ArmNN
 24.08
GpuFsaBackend.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2022-2024 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "GpuFsaBackend.hpp"
9 #include "GpuFsaBackendId.hpp"
10 #include "GpuFsaLayerSupport.hpp"
13 
17 #include <Optimizer.hpp>
18 
19 #include <arm_compute/core/CL/CLKernelLibrary.h>
20 #include <arm_compute/runtime/CL/CLBufferAllocator.h>
21 
24 #include "layers/GpuFsaCast.hpp"
29 #include "layers/GpuFsaReshape.hpp"
30 #include "layers/GpuFsaResize.hpp"
31 #include "layers/GpuFsaSoftmax.hpp"
32 
33 namespace armnn
34 {
35 
36 template <typename T>
37 inline void DeleteAsType(const void* const blob)
38 {
39  delete static_cast<const T*>(blob);
40 }
41 
43 {
45  for (auto&& it = layer->BeginInputSlots(); it != layer->EndInputSlots(); ++it)
46  {
47  result.push_back(&(*it));
48  }
49  return result;
50 }
51 
53 {
55  for (auto&& it = layer->BeginOutputSlots(); it != layer->EndOutputSlots(); ++it)
56  {
57  result.push_back(&(*it));
58  }
59  return result;
60 }
61 
63  SubgraphView::OutputSlots&& outputs,
64  SubgraphView::Layers&& layers)
65 {
66  return std::make_unique<SubgraphView>(std::move(inputs), std::move(outputs), std::move(layers));
67 }
68 
70 {
71  static const BackendId s_Id{GpuFsaBackendId()};
72  return s_Id;
73 }
74 
76 {
78  {
79  return std::make_unique<GpuFsaMemoryManager>(m_CustomAllocator);
80  }
81  return std::make_unique<GpuFsaMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
82 }
83 
85  const IBackendInternal::IMemoryManagerSharedPtr& memoryManager) const
86 {
87  return std::make_unique<GpuFsaWorkloadFactory>(PolymorphicPointerDowncast<GpuFsaMemoryManager>(memoryManager));
88 }
89 
91  TensorHandleFactoryRegistry& registry) const
92 {
93  std::shared_ptr<GpuFsaMemoryManager> memoryManager;
95  {
96  memoryManager = std::make_shared<GpuFsaMemoryManager>(m_CustomAllocator);
97  }
98  else
99  {
100  memoryManager = std::make_shared<GpuFsaMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
101  }
102 
103  std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<GpuFsaTensorHandleFactory>(memoryManager);
104 
105  registry.RegisterMemoryManager(memoryManager);
106  registry.RegisterFactory(std::move(factory));
107 
108  return std::make_unique<GpuFsaWorkloadFactory>(PolymorphicPointerDowncast<GpuFsaMemoryManager>(memoryManager));
109 }
110 
112  TensorHandleFactoryRegistry& registry,
113  const ModelOptions&,
114  MemorySourceFlags inputFlags,
115  MemorySourceFlags outputFlags) const
116 {
117 
118  // To allow force import if inputFlags/outputFlags are Undefined, set it as Malloc
119  if (inputFlags == static_cast<MemorySourceFlags>(MemorySource::Undefined))
120  {
121  inputFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc);
122  }
123  if (outputFlags == static_cast<MemorySourceFlags>(MemorySource::Undefined))
124  {
125  outputFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc);
126  }
127 
128  std::shared_ptr<GpuFsaMemoryManager> memoryManager;
130  {
131  memoryManager = std::make_shared<GpuFsaMemoryManager>(m_CustomAllocator);
132  }
133  else
134  {
135  memoryManager = std::make_shared<GpuFsaMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
136  }
137 
138  std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<GpuFsaTensorHandleFactory>(memoryManager);
139 
140  registry.RegisterMemoryManager(memoryManager);
141  registry.RegisterFactory(std::move(factory));
142 
143  return std::make_unique<GpuFsaWorkloadFactory>(PolymorphicPointerDowncast<GpuFsaMemoryManager>(memoryManager));
144 }
145 
146 std::vector<ITensorHandleFactory::FactoryId> GpuFsaBackend::GetHandleFactoryPreferences() const
147 {
148  return std::vector<ITensorHandleFactory::FactoryId> { GpuFsaTensorHandleFactory::GetIdStatic() };
149 }
150 
152 {
153  std::shared_ptr<GpuFsaMemoryManager> memoryManager;
155  {
156  memoryManager = std::make_shared<GpuFsaMemoryManager>(m_CustomAllocator);
157  }
158  else
159  {
160  memoryManager = std::make_shared<GpuFsaMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
161  }
162 
163  std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<GpuFsaTensorHandleFactory>(memoryManager);
164  registry.RegisterMemoryManager(memoryManager);
165  registry.RegisterFactory(std::move(factory));
166 
167 }
168 
170  MemorySourceFlags inputFlags,
171  MemorySourceFlags outputFlags)
172 {
173  // To allow force import if inputFlags/outputFlags are Undefined, set it as Malloc
174  if (inputFlags == static_cast<MemorySourceFlags>(MemorySource::Undefined))
175  {
176  inputFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc);
177  }
178  if (outputFlags == static_cast<MemorySourceFlags>(MemorySource::Undefined))
179  {
180  outputFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc);
181  }
182 
183  std::shared_ptr<GpuFsaMemoryManager> memoryManager;
185  {
186  memoryManager = std::make_shared<GpuFsaMemoryManager>(m_CustomAllocator);
187  }
188  else
189  {
190  memoryManager = std::make_shared<GpuFsaMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
191  }
192 
193  std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<GpuFsaTensorHandleFactory>(memoryManager);
194  registry.RegisterMemoryManager(memoryManager);
195  registry.RegisterFactory(std::move(factory));
196 }
197 
199 {
200  return IBackendContextPtr{new GpuFsaBackendContext{options}};
201 }
202 
205 {
207 }
208 
210 {
211  static ILayerSupportSharedPtr layerSupport{new GpuFsaLayerSupport};
212  return layerSupport;
213 }
214 
215 std::unique_ptr<ICustomAllocator> GpuFsaBackend::GetDefaultAllocator() const
216 {
217  return std::make_unique<GpuFsaBackendDefaultAllocator>();
218 }
219 
221  const ModelOptions& modelOptions) const
222 {
223  OptimizationViews optimizationViews(modelOptions);
224 
225  using namespace arm_compute::experimental::dynamic_fusion;
226 
227  auto it = subgraph.end();
228  std::map<LayerGuid, Layer*> untouched;
229  while (it != subgraph.begin())
230  {
231  --it;
232  Layer& base = *(PolymorphicDowncast<Layer*>(*it));
233  untouched.insert({base.GetGuid(), &base});
234  }
235 
236  GpuFsaLayerSupport supportChecker;
237  it = subgraph.end();
238  arm_compute::CLCompileContext* compileCtx = &(arm_compute::CLKernelLibrary::get().get_compile_context());
239 
240  // Setup the GpuWokloadContext which will exist for the lifetime of the Graph. This contains the TensorInfos
241  std::shared_ptr<GpuWorkloadContext> workloadContext = std::make_shared<GpuWorkloadContext>(compileCtx);
242  while (it != subgraph.begin())
243  {
244  --it;
245  Layer& base = *(PolymorphicDowncast<Layer*>(*it));
246  // Create a GpuFsaPreCompiledBlob, this contains all of the information needed to execute an operator
247  GpuFsaPreCompiledBlob* preCompiledBlobPtr = new GpuFsaPreCompiledBlob();
248  preCompiledBlobPtr->workloadContext = workloadContext;
249  preCompiledBlobPtr->sketch = std::make_unique<GpuWorkloadSketch>(workloadContext.get());
250 
251  // Configure and setup the sketch for each supported op. Their data will be wrapped into a PreCompiled layer
252  switch (base.GetType())
253  {
254  case (LayerType::Activation):
255  {
256  auto desc = PolymorphicDowncast<const ActivationDescriptor*>(&base.GetParameters());
257  auto input = base.GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
258  GpuFsaActivationCreateOp(preCompiledBlobPtr, input, *desc);
259  break;
260  }
261  case (LayerType::Cast):
262  {
263  auto input = base.GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
264  auto output = base.GetOutputSlot(0).GetTensorInfo();
265  GpuFsaCastCreateOp(preCompiledBlobPtr, input, output);
266  break;
267  }
269  {
270  auto input = base.GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
271  auto weights = base.GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo();
272 
273  auto desc = PolymorphicDowncast<const Convolution2dDescriptor*>(&base.GetParameters());
274  if (desc->m_BiasEnabled)
275  {
276  auto bias = base.GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
277  GpuFsaConvolution2dCreateOp(preCompiledBlobPtr,
278  input,
279  *desc,
280  weights,
281  bias);
282  }
283  else
284  {
285  GpuFsaConvolution2dCreateOp(preCompiledBlobPtr,
286  input,
287  *desc,
288  weights,
289  EmptyOptional());
290  }
291  break;
292  }
293  case (LayerType::BatchMatMul):
294  {
295  auto input0 = base.GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
296  auto input1 = base.GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo();
297  auto desc = PolymorphicDowncast<const BatchMatMulDescriptor*>(&base.GetParameters());
298  GpuFsaBatchMatMulCreateOp(preCompiledBlobPtr, input0, input1, *desc);
299  break;
300  }
302  {
303  auto input = base.GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
304  auto weights = base.GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo();
305 
306  auto desc = PolymorphicDowncast<const DepthwiseConvolution2dDescriptor*>(&base.GetParameters());
307  if (desc->m_BiasEnabled)
308  {
309  auto bias = base.GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
310  GpuFsaDepthwiseConvolution2dCreateOp(preCompiledBlobPtr,
311  input,
312  *desc,
313  weights,
314  bias);
315  }
316  else
317  {
318  GpuFsaDepthwiseConvolution2dCreateOp(preCompiledBlobPtr,
319  input,
320  *desc,
321  weights,
322  EmptyOptional());
323  }
324  break;
325  }
327  {
328  auto desc = PolymorphicDowncast<const ElementwiseBinaryDescriptor *>(&base.GetParameters());
329  auto input0 = base.GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
330  auto input1 = base.GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo();
331  GpuFsaElementwiseBinaryCreateOp(preCompiledBlobPtr, input0, input1, *desc);
332  break;
333  }
334  case (LayerType::Pooling2d):
335  {
336  auto input = base.GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
337  auto desc = PolymorphicDowncast<const Pooling2dDescriptor*>(&base.GetParameters());
338  GpuFsaPooling2dCreateOp(preCompiledBlobPtr, input, *desc);
339  break;
340  }
341  case LayerType::Reshape:
342  {
343  auto input = base.GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
344  auto desc = PolymorphicDowncast<const ReshapeDescriptor*>(&base.GetParameters());
345  GpuFsaReshapeCreateOp(preCompiledBlobPtr, input, *desc);
346 
347  break;
348  }
349  case (LayerType::Resize):
350  {
351  auto input = base.GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
352  auto desc = PolymorphicDowncast<const ResizeDescriptor*>(&base.GetParameters());
353  GpuFsaResizeCreateOp(preCompiledBlobPtr, input, *desc);
354  break;
355  }
356  case (LayerType::Softmax):
357  {
358  auto input = base.GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
359  auto output = base.GetOutputSlot(0).GetTensorInfo();
360 
361  auto desc = PolymorphicDowncast<const SoftmaxDescriptor*>(&base.GetParameters());
362  GpuFsaSoftmaxCreateOp(preCompiledBlobPtr,
363  input,
364  output,
365  *desc);
366  break;
367  }
368  default:
369  // unsupported layer for GpuFsa backend
370  continue;
371  }
372 
373  auto compiledBlob =
374  std::make_unique<PreCompiledObjectPtr>(preCompiledBlobPtr, DeleteAsType<GpuFsaPreCompiledBlob>);
375 
376  IConnectableLayer* preCompiledLayer = optimizationViews.GetINetwork()->AddPrecompiledLayer(
378  std::move(*compiledBlob),
380  "GpuFsa_Pre_Compiled_Layer");
381 
382  // Copy the output tensor infos from sub-graph
383  for (unsigned int i = 0; i < subgraph.GetNumOutputSlots(); i++)
384  {
385  preCompiledLayer->GetOutputSlot(i).SetTensorInfo(base.GetOutputSlot(i).GetTensorInfo());
386  }
387 
388  SubgraphView::SubgraphViewPtr substituteSubgraph =
390  CreateOutputsFrom(&base),
391  {&base});
392 
393  optimizationViews.AddSubstitution({ std::move(*substituteSubgraph), SubgraphView(preCompiledLayer) });
394 
395  untouched.erase(base.GetGuid());
396  }
397 
398  if (optimizationViews.GetSubstitutions().empty())
399  {
400  optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
401  }
402  else
403  {
404  ReportUntouchedLayers(optimizationViews, untouched);
405  }
406 
407 
408  return optimizationViews;
409 }
410 
411 } // namespace armnn
armnn::MemorySource::Malloc
@ Malloc
armnn::OptimizationViews::AddUntouchedSubgraph
void AddUntouchedSubgraph(SubgraphView &&subgraph)
Definition: OptimizationViews.hpp:48
armnn::GpuFsaSoftmaxCreateOp
void GpuFsaSoftmaxCreateOp(GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const TensorInfo &output, const SoftmaxDescriptor &descriptor)
Definition: GpuFsaSoftmax.cpp:63
armnn::GpuFsaElementwiseBinaryCreateOp
void GpuFsaElementwiseBinaryCreateOp(GpuFsaPreCompiledBlob *blob, const TensorInfo &input0, const TensorInfo &input1, const ElementwiseBinaryDescriptor &descriptor)
Definition: GpuFsaElementwiseBinary.cpp:63
armnn::Optional
Definition: Optional.hpp:270
armnn::GpuFsaPooling2dCreateOp
void GpuFsaPooling2dCreateOp(GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const Pooling2dDescriptor &descriptor)
Definition: GpuFsaPooling2d.cpp:40
armnn::GpuFsaLayerSupport
Definition: GpuFsaLayerSupport.hpp:13
armnn::OutputSlot::GetTensorInfo
const TensorInfo & GetTensorInfo() const override
Definition: Layer.cpp:100
armnn::GpuFsaBackend::RegisterTensorHandleFactories
void RegisterTensorHandleFactories(TensorHandleFactoryRegistry &registry) override
(Optional) Register TensorHandleFactories Either this method or CreateMemoryManager() and IWorkloadFa...
Definition: GpuFsaBackend.cpp:151
armnn::GpuFsaBackend::GetIdStatic
static const BackendId & GetIdStatic()
Definition: GpuFsaBackend.cpp:69
armnn::GpuFsaBatchMatMulCreateOp
void GpuFsaBatchMatMulCreateOp(GpuFsaPreCompiledBlob *blob, const TensorInfo &input0, const TensorInfo &input1, const BatchMatMulDescriptor &descriptor)
Definition: GpuFsaBatchMatMul.cpp:51
armnn::GpuFsaBackend::m_UsingCustomAllocator
bool m_UsingCustomAllocator
Definition: GpuFsaBackend.hpp:304
armnn::IBackendInternal::IMemoryManagerSharedPtr
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
Definition: IBackendInternal.hpp:99
armnn::TensorHandleFactoryRegistry
Definition: TensorHandleFactoryRegistry.hpp:23
armnn::GpuFsaBackend::GetLayerSupport
IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override
Definition: GpuFsaBackend.cpp:209
armnn::SubgraphView::Layers
std::list< Layer * > Layers
Definition: SubgraphView.hpp:61
GpuFsaElementwiseBinary.hpp
armnn::GpuFsaTensorHandleFactory::GetIdStatic
static const FactoryId & GetIdStatic()
Definition: GpuFsaTensorHandleFactory.cpp:86
armnn::GpuFsaActivationCreateOp
void GpuFsaActivationCreateOp(GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const ActivationDescriptor &descriptor)
Definition: GpuFsaActivation.cpp:58
armnn::MemorySourceFlags
unsigned int MemorySourceFlags
Definition: MemorySources.hpp:15
GpuFsaBackendDefaultAllocator.hpp
armnn::Layer::GetOutputSlot
const OutputSlot & GetOutputSlot(unsigned int index=0) const override
Get the const output slot handle by slot index.
Definition: Layer.hpp:339
armnn::TensorHandleFactoryRegistry::RegisterMemoryManager
void RegisterMemoryManager(std::shared_ptr< IMemoryManager > memoryManger)
Register a memory manager with shared ownership.
Definition: TensorHandleFactoryRegistry.cpp:34
armnn::GpuFsaBackendId
constexpr const char * GpuFsaBackendId()
Definition: GpuFsaBackendId.hpp:10
armnn::GpuFsaBackend::OptimizeSubgraphView
OptimizationViews OptimizeSubgraphView(const SubgraphView &subgraph, const ModelOptions &modelOptions) const override
Definition: GpuFsaBackend.cpp:220
GpuFsaBackendContext.hpp
armnn::IBackendInternal::IBackendContextPtr
std::unique_ptr< IBackendContext > IBackendContextPtr
Definition: IBackendInternal.hpp:90
Optimizer.hpp
armnn::GpuFsaBackend::GetDefaultAllocator
std::unique_ptr< ICustomAllocator > GetDefaultAllocator() const override
Returns the default memory allocator for the backend.
Definition: GpuFsaBackend.cpp:215
armnn::Layer::GetInputSlot
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:337
armnn::LayerType::ElementwiseBinary
@ ElementwiseBinary
armnn::GpuFsaBackend::GetHandleFactoryPreferences
std::vector< ITensorHandleFactory::FactoryId > GetHandleFactoryPreferences() const override
(Optional) Returns a vector of supported TensorHandleFactory ids in preference order.
Definition: GpuFsaBackend.cpp:146
armnn::SubgraphView::InputSlots
std::vector< InputSlot * > InputSlots
Definition: SubgraphView.hpp:57
GpuFsaWorkloadFactory.hpp
armnn::GpuFsaReshapeCreateOp
void GpuFsaReshapeCreateOp(GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const ReshapeDescriptor &descriptor)
Definition: GpuFsaReshape.cpp:49
armnn::Layer
Definition: Layer.hpp:230
armnn::GpuFsaBackend::CreateMemoryManager
IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override
Definition: GpuFsaBackend.cpp:75
armnn::GpuFsaBackend::CreateBackendContext
IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions &) const override
Create the runtime context of the backend.
Definition: GpuFsaBackend.cpp:198
armnn::SubgraphView::begin
IConnectableLayerIterator begin()
Definition: SubgraphView.cpp:286
GpuFsaSoftmax.hpp
armnn::INetwork::AddPrecompiledLayer
IConnectableLayer * AddPrecompiledLayer(const PreCompiledDescriptor &preCompiledDescriptor, CompiledBlobPtr compiledBlobPtr, const Optional< BackendId > &backend, const char *name=nullptr)
Adds a Precompiled layer to the network.
Definition: Network.cpp:368
armnn::IOutputSlot::SetTensorInfo
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
armnn::MemorySource::Undefined
@ Undefined
IBackendContext.hpp
armnn::EmptyOptional
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
Definition: Optional.hpp:32
armnn::LayerType::Softmax
@ Softmax
armnn::GpuFsaPreCompiledBlob::sketch
std::unique_ptr< arm_compute::experimental::dynamic_fusion::GpuWorkloadSketch > sketch
Definition: GpuFsaBackend.hpp:34
GpuFsaConvolution2d.hpp
GpuFsaLayerSupport.hpp
armnn::SubgraphView::SubgraphViewPtr
std::shared_ptr< SubgraphView > SubgraphViewPtr
Definition: SubgraphView.hpp:56
armnn::GpuFsaDepthwiseConvolution2dCreateOp
void GpuFsaDepthwiseConvolution2dCreateOp(GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases)
Definition: GpuFsaDepthwiseConvolution2d.cpp:89
armnn::Layer::EndInputSlots
std::vector< InputSlot >::iterator EndInputSlots()
Definition: Layer.hpp:263
armnn::Layer::GetGuid
LayerGuid GetGuid() const final
Returns the unique id of the layer.
Definition: Layer.hpp:343
armnn::Layer::GetNumOutputSlots
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
Definition: Layer.hpp:335
armnn::SubgraphView
The SubgraphView class represents a subgraph of a Graph.
Definition: SubgraphView.hpp:31
armnn::OptimizationViews
Definition: OptimizationViews.hpp:17
GpuFsaActivation.hpp
armnn::GpuFsaBackend::CreateBackendProfilingContext
IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext(const IRuntime::CreationOptions &, IBackendProfilingPtr &backendProfiling) override
Create context specifically used for profiling interaction from backends.
Definition: GpuFsaBackend.cpp:203
armnn::GpuFsaBackend::CreateWorkloadFactory
IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(const IBackendInternal::IMemoryManagerSharedPtr &memoryManager=nullptr) const override
Definition: GpuFsaBackend.cpp:84
armnn::CreateSubgraphViewFrom
SubgraphView::SubgraphViewPtr CreateSubgraphViewFrom(SubgraphView::InputSlots &&inputs, SubgraphView::OutputSlots &&outputs, SubgraphView::Layers &&layers)
Definition: GpuFsaBackend.cpp:62
armnn::OptimizationViews::AddSubstitution
void AddSubstitution(SubstitutionPair &&substitution)
Definition: OptimizationViews.hpp:38
armnn::Layer::BeginInputSlots
std::vector< InputSlot >::iterator BeginInputSlots()
Definition: Layer.hpp:262
armnn::GpuFsaBackend::m_CustomAllocator
std::shared_ptr< GpuFsaBackendCustomAllocatorWrapper > m_CustomAllocator
Definition: GpuFsaBackend.hpp:303
armnn::LayerType::Pooling2d
@ Pooling2d
armnn::IBackendInternal::IBackendProfilingContextPtr
std::shared_ptr< arm::pipe::IBackendProfilingContext > IBackendProfilingContextPtr
This is the bridge between backend and backend profiling we'll keep it in the backend namespace.
Definition: IBackendInternal.hpp:92
armnn::Layer::GetNumInputSlots
unsigned int GetNumInputSlots() const override
Returns the number of connectable input slots.
Definition: Layer.hpp:334
armnn::GpuFsaResizeCreateOp
void GpuFsaResizeCreateOp(GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const ResizeDescriptor &descriptor)
Definition: GpuFsaResize.cpp:39
armnn::Layer::GetParameters
virtual const BaseDescriptor & GetParameters() const override
If the layer has a descriptor return it.
Definition: Layer.hpp:378
armnn::DeleteAsType
void DeleteAsType(const void *const blob)
Definition: GpuFsaBackend.cpp:37
GpuFsaResize.hpp
armnn::LayerType::BatchMatMul
@ BatchMatMul
GpuFsaDepthwiseConvolution2d.hpp
armnn::LayerType::DepthwiseConvolution2d
@ DepthwiseConvolution2d
armnn::LayerType::Cast
@ Cast
armnn::Layer::GetType
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:286
armnn::SubgraphView::GetNumOutputSlots
unsigned int GetNumOutputSlots() const
Definition: SubgraphView.cpp:276
armnn::IBackendInternal::IBackendProfilingPtr
std::unique_ptr< arm::pipe::IBackendProfiling > IBackendProfilingPtr
Definition: IBackendInternal.hpp:93
armnn::IRuntime::CreationOptions
Definition: IRuntime.hpp:78
GpuFsaBackend.hpp
armnn::GpuFsaCastCreateOp
void GpuFsaCastCreateOp(GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const TensorInfo &output)
Definition: GpuFsaCast.cpp:61
armnn::LayerType::Reshape
@ Reshape
armnn::Layer::BeginOutputSlots
std::vector< OutputSlot >::iterator BeginOutputSlots()
Definition: Layer.hpp:266
armnn::GpuFsaBackend::GetId
const BackendId & GetId() const override
Definition: GpuFsaBackend.hpp:67
armnn::IBackendInternal::IMemoryManagerUniquePtr
std::unique_ptr< IMemoryManager > IMemoryManagerUniquePtr
Definition: IBackendInternal.hpp:98
GpuFsaCast.hpp
armnn::OptimizationViews::GetSubstitutions
const Substitutions & GetSubstitutions() const
Definition: OptimizationViews.hpp:58
armnn::SubgraphView::end
IConnectableLayerIterator end()
Definition: SubgraphView.cpp:291
armnn::BackendId
Definition: BackendId.hpp:75
armnn::SubgraphView::OutputSlots
std::vector< OutputSlot * > OutputSlots
Definition: SubgraphView.hpp:59
armnn::InputSlot::GetConnectedOutputSlot
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:56
armnn::IConnectableLayer::GetOutputSlot
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:6
armnn::TensorHandleFactoryRegistry::RegisterFactory
void RegisterFactory(std::unique_ptr< ITensorHandleFactory > allocator)
Register a TensorHandleFactory and transfer ownership.
Definition: TensorHandleFactoryRegistry.cpp:12
GpuFsaBatchMatMul.hpp
armnn::OptimizationViews::GetINetwork
INetwork * GetINetwork()
Definition: OptimizationViews.hpp:69
armnn::GpuFsaBackendContext
Definition: GpuFsaBackendContext.hpp:17
armnn::IBackendInternal::ILayerSupportSharedPtr
std::shared_ptr< ILayerSupport > ILayerSupportSharedPtr
Definition: IBackendInternal.hpp:94
armnn::GpuFsaPreCompiledBlob::workloadContext
std::shared_ptr< arm_compute::experimental::dynamic_fusion::GpuWorkloadContext > workloadContext
Definition: GpuFsaBackend.hpp:35
armnn::IConnectableLayer
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:80
armnn::ModelOptions
std::vector< BackendOptions > ModelOptions
Definition: BackendOptions.hpp:18
armnn::LayerType::Resize
@ Resize
armnn::GpuFsaPreCompiledBlob
A structure which contains all the elements needed to execute a fused workload in the GpuFsa Backend.
Definition: GpuFsaBackend.hpp:32
armnn::PreCompiledDescriptor
A PreCompiledDescriptor for the PreCompiledLayer.
Definition: Descriptors.hpp:1367
GpuFsaBackendId.hpp
armnn::ReportUntouchedLayers
void ReportUntouchedLayers(OptimizationViews &optimizationViews, std::map< LayerGuid, Layer * > untouched)
Definition: SubgraphUtils.hpp:220
armnn::LayerType::Convolution2d
@ Convolution2d
armnn::GpuFsaConvolution2dCreateOp
void GpuFsaConvolution2dCreateOp(GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases)
Definition: GpuFsaConvolution2d.cpp:70
armnn::LayerType::Activation
@ Activation
armnn::IBackendInternal::IWorkloadFactoryPtr
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
Definition: IBackendInternal.hpp:89
SubgraphUtils.hpp
GpuFsaReshape.hpp
GpuFsaTensorHandleFactory.hpp
armnn::CreateOutputsFrom
SubgraphView::OutputSlots CreateOutputsFrom(Layer *layer)
Definition: GpuFsaBackend.cpp:52
GpuFsaPooling2d.hpp
armnn::Layer::EndOutputSlots
std::vector< OutputSlot >::iterator EndOutputSlots()
Definition: Layer.hpp:267
IMemoryManager.hpp
armnn::CreateInputsFrom
SubgraphView::InputSlots CreateInputsFrom(Layer *layer)
Definition: GpuFsaBackend.cpp:42