ArmNN
 25.02
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
CreateWorkload.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017,2021-2023 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 
7 #include "TestUtils.hpp"
8 
9 #include <Graph.hpp>
10 #include <Network.hpp>
11 #include <ResolveType.hpp>
12 
17 #include <armnn/utility/Assert.hpp>
20 
21 #include <doctest/doctest.h>
22 
23 #include <utility>
24 
25 using namespace armnn;
26 
27 namespace
28 {
29 
30 using namespace std;
31 
32 // Calls CreateWorkload for a layer, and checks the returned pointer is of the correct type.
33 template<typename Workload>
34 std::unique_ptr<Workload> MakeAndCheckWorkload(Layer& layer,
35  const IWorkloadFactory& factory,
36  const ModelOptions& modelOptions = {})
37 {
38  std::unique_ptr<IWorkload> workload = layer.CreateWorkload(factory);
39  CHECK_MESSAGE(workload.get() == PolymorphicDowncast<Workload*>(workload.get()),
40  "Cannot convert to derived class");
41  std::string reasonIfUnsupported;
42  layer.SetBackendId(factory.GetBackendId());
43  CHECK(factory.IsLayerSupported(layer, layer.GetDataType(), reasonIfUnsupported, modelOptions));
44  return std::unique_ptr<Workload>(static_cast<Workload*>(workload.release()));
45 }
46 
47 // Helper function to create tensor handlers for workloads, assuming they all use the same factory.
48 void CreateTensorHandles(armnn::Graph& graph,
49  armnn::IWorkloadFactory& factory)
50 {
51  TensorHandleFactoryRegistry tmpRegistry;
52  for (auto&& layer : graph.TopologicalSort())
53  {
54  layer->CreateTensorHandles(tmpRegistry, factory);
55  }
56 }
57 
58 /////////////////////////////////////////////////////////////////////////////////////////////
59 // The following functions are called by backendsCommon/test/CreateWorkload*.cpp
60 // They build very simple graphs, and then create a workload.
61 // Some checks are performed on the workload to ensure parameters have been passed correctly.
62 // They return the created workloads so that backend-specific checks can be performed.
63 /////////////////////////////////////////////////////////////////////////////////////////////
64 
65 template <typename ActivationWorkload, armnn::DataType DataType>
66 std::unique_ptr<ActivationWorkload> CreateActivationWorkloadTest(armnn::IWorkloadFactory& factory,
67  armnn::Graph& graph)
68 {
69  // Creates the layer we're testing.
70  ActivationDescriptor layerDesc;
72  layerDesc.m_A = 3.5f;
73  layerDesc.m_B = -10.0f;
74 
75  ActivationLayer* const layer = graph.AddLayer<ActivationLayer>(layerDesc, "layer");
76 
77  // Creates extra layers.
78  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
79  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
80 
81  // Connects up.
82  armnn::TensorInfo tensorInfo({1, 1}, DataType);
83 
84  Connect(input, layer, tensorInfo);
85  Connect(layer, output, tensorInfo);
86 
87  CreateTensorHandles(graph, factory);
88 
89  // Makes the workload and checks it.
90  auto workload = MakeAndCheckWorkload<ActivationWorkload>(*layer, factory);
91 
92  ActivationQueueDescriptor queueDescriptor = workload->GetData();
93  CHECK(queueDescriptor.m_Inputs.size() == 1);
94  CHECK(queueDescriptor.m_Outputs.size() == 1);
95  CHECK(queueDescriptor.m_Parameters.m_A == 3.5f);
96  CHECK(queueDescriptor.m_Parameters.m_B == -10.0f);
97  CHECK((queueDescriptor.m_Parameters.m_Function == ActivationFunction::ReLu));
98 
99  // Returns so we can do extra, backend-specific tests.
100  return workload;
101 }
102 
103 template <typename WorkloadType,
104  typename DescriptorType,
105  typename LayerType,
107 std::unique_ptr<WorkloadType> CreateElementwiseWorkloadTest(armnn::IWorkloadFactory & factory,
108  armnn::Graph & graph)
109 {
110  // Creates the layer we're testing.
111  Layer* const layer = graph.AddLayer<LayerType>("layer");
112 
113  // Creates extra layers.
114  Layer* const input1 = graph.AddLayer<InputLayer>(1, "input1");
115  Layer* const input2 = graph.AddLayer<InputLayer>(2, "input2");
116  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
117 
118  // Connects up.
119  armnn::TensorInfo tensorInfo({2, 3}, DataType);
120  Connect(input1, layer, tensorInfo, 0, 0);
121  Connect(input2, layer, tensorInfo, 0, 1);
122  Connect(layer, output, tensorInfo);
123  CreateTensorHandles(graph, factory);
124 
125  // Makes the workload and checks it.
126  auto workload = MakeAndCheckWorkload<WorkloadType>(*layer, factory);
127 
128  auto queueDescriptor = workload->GetData();
129  CHECK(queueDescriptor.m_Inputs.size() == 2);
130  CHECK(queueDescriptor.m_Outputs.size() == 1);
131 
132  // Returns so we can do extra, backend-specific tests.
133  return workload;
134 }
135 
136 template <typename WorkloadType, armnn::DataType DataType>
137 std::unique_ptr<WorkloadType> CreateElementwiseBinaryWorkloadTest(armnn::IWorkloadFactory & factory,
138  armnn::Graph & graph,
139  armnn::BinaryOperation binaryOperation)
140 {
141  // Creates the layer we're testing.
142  ElementwiseBinaryDescriptor descriptor(binaryOperation);
143  //ElementwiseBinaryDescriptor descriptor = ElementwiseBinaryDescriptor(binaryOperation);
144 
145  Layer* const layer = graph.AddLayer<ElementwiseBinaryLayer>(descriptor, "layer");
146 
147  // Creates extra layers.
148  Layer* const input1 = graph.AddLayer<InputLayer>(1, "input1");
149  Layer* const input2 = graph.AddLayer<InputLayer>(2, "input2");
150  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
151 
152  // Connects up.
153  armnn::TensorInfo tensorInfo({2, 3}, DataType);
154  Connect(input1, layer, tensorInfo, 0, 0);
155  Connect(input2, layer, tensorInfo, 0, 1);
156  Connect(layer, output, tensorInfo);
157  CreateTensorHandles(graph, factory);
158 
159  // Makes the workload and checks it.
160  auto workload = MakeAndCheckWorkload<WorkloadType>(*layer, factory);
161 
162  auto queueDescriptor = workload->GetData();
163  CHECK(queueDescriptor.m_Inputs.size() == 2);
164  CHECK(queueDescriptor.m_Outputs.size() == 1);
165 
166  // Returns so we can do extra, backend-specific tests.
167  return workload;
168 }
169 
170 template<typename WorkloadType,
171  typename DescriptorType,
173 std::unique_ptr<WorkloadType> CreateSubtractionWithBlobWorkloadTest(armnn::IWorkloadFactory& factory,
174  armnn::Graph& graph)
175 {
176  // Creates the layer we're testing.
177  SubtractionLayer* const layer = graph.AddLayer<SubtractionLayer>("layer");
178 
179  auto activationDesc = std::make_shared<ActivationDescriptor>();
180  activationDesc->m_A = 10.0f;
181  activationDesc->m_B = 5.0f;
182  activationDesc->m_Function = armnn::ActivationFunction::BoundedReLu;
183 
184  layer->SetAdditionalInfoForObject(activationDesc);
185 
186  // Creates extra layers.
187  Layer* const input1 = graph.AddLayer<InputLayer>(1, "input1");
188  Layer* const input2 = graph.AddLayer<InputLayer>(2, "input2");
189  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
190 
191  // Connects up.
192  armnn::TensorInfo tensorInfo({2, 3}, DataType);
193  Connect(input1, layer, tensorInfo, 0, 0);
194  Connect(input2, layer, tensorInfo, 0, 1);
195  Connect(layer, output, tensorInfo);
196  CreateTensorHandles(graph, factory);
197 
198  // Check that the additional information can be queried from the layer
199  std::shared_ptr<ActivationDescriptor>
200  activationDescPtr = layer->GetAdditionalInformation<ActivationDescriptor>();
201 
202  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_A) == 10.0f);
203  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_B) == 5.0f);
204  ARMNN_ASSERT(
205  static_cast<ActivationFunction>(activationDescPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
206  );
207 
208  // Makes the workload and checks it.
209  auto workload = MakeAndCheckWorkload<WorkloadType>(*layer, factory);
210 
211  DescriptorType queueDescriptor = workload->GetData();
212 
213  const ActivationDescriptor* queueDescBlobPtr =
214  queueDescriptor.template GetAdditionalInformation<ActivationDescriptor>();
215  IgnoreUnused(queueDescBlobPtr);
216  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_A) == 10.0f);
217  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_B) == 5.0f);
218  ARMNN_ASSERT(
219  static_cast<ActivationFunction>(queueDescBlobPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
220  );
221 
222  CHECK(queueDescriptor.m_Inputs.size() == 2);
223  CHECK(queueDescriptor.m_Outputs.size() == 1);
224 
225  return workload;
226 }
227 
228 
229 template<typename WorkloadType,
230  typename DescriptorType,
232 std::unique_ptr<WorkloadType> CreateMultiplicationWithBlobWorkloadTest(armnn::IWorkloadFactory& factory,
233  armnn::Graph& graph)
234 {
235  // Creates the layer we're testing.
236  MultiplicationLayer* const layer = graph.AddLayer<MultiplicationLayer>("layer");
237 
238  auto activationDesc = std::make_shared<ActivationDescriptor>();
239  activationDesc->m_A = 10.0f;
240  activationDesc->m_B = 5.0f;
241  activationDesc->m_Function = armnn::ActivationFunction::BoundedReLu;
242 
243  layer->SetAdditionalInfoForObject(activationDesc);
244 
245  // Creates extra layers.
246  Layer* const input1 = graph.AddLayer<InputLayer>(1, "input1");
247  Layer* const input2 = graph.AddLayer<InputLayer>(2, "input2");
248  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
249 
250  // Connects up.
251  armnn::TensorInfo tensorInfo({2, 3}, DataType);
252  Connect(input1, layer, tensorInfo, 0, 0);
253  Connect(input2, layer, tensorInfo, 0, 1);
254  Connect(layer, output, tensorInfo);
255  CreateTensorHandles(graph, factory);
256 
257  // Check that the additional information can be queried from the layer
258  std::shared_ptr<ActivationDescriptor>
259  activationDescPtr = layer->GetAdditionalInformation<ActivationDescriptor>();
260 
261  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_A) == 10.0f);
262  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_B) == 5.0f);
263  ARMNN_ASSERT(
264  static_cast<ActivationFunction>(activationDescPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
265  );
266 
267  // Makes the workload and checks it.
268  auto workload = MakeAndCheckWorkload<WorkloadType>(*layer, factory);
269 
270  DescriptorType queueDescriptor = workload->GetData();
271  CHECK(queueDescriptor.m_Inputs.size() == 2);
272  CHECK(queueDescriptor.m_Outputs.size() == 1);
273  const ActivationDescriptor* queueDescBlobPtr =
274  queueDescriptor.template GetAdditionalInformation<ActivationDescriptor>();
275  IgnoreUnused(queueDescBlobPtr);
276  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_A) == 10.0f);
277  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_B) == 5.0f);
278  ARMNN_ASSERT(
279  static_cast<ActivationFunction>(queueDescBlobPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
280  );
281 
282  return workload;// Returns so we can do extra, backend-specific tests.
283 }
284 
285 template<typename WorkloadType,
286  typename DescriptorType,
288 std::unique_ptr<WorkloadType> CreateAdditionWithBlobWorkloadTest(armnn::IWorkloadFactory& factory,
289  armnn::Graph& graph)
290 {
291  // Creates the layer we're testing.
292  AdditionLayer* const layer = graph.AddLayer<AdditionLayer>("layer");
293 
294  auto activationDesc = std::make_shared<ActivationDescriptor>();
295  activationDesc->m_A = 10.0f;
296  activationDesc->m_B = 5.0f;
297  activationDesc->m_Function = armnn::ActivationFunction::BoundedReLu;
298 
299  layer->SetAdditionalInfoForObject(activationDesc);
300 
301  // Creates extra layers.
302  Layer* const input1 = graph.AddLayer<InputLayer>(1, "input1");
303  Layer* const input2 = graph.AddLayer<InputLayer>(2, "input2");
304  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
305 
306  // Connects up.
307  armnn::TensorInfo tensorInfo({2, 3}, DataType);
308  Connect(input1, layer, tensorInfo, 0, 0);
309  Connect(input2, layer, tensorInfo, 0, 1);
310  Connect(layer, output, tensorInfo);
311  CreateTensorHandles(graph, factory);
312 
313  // Check that the additional information can be queried from the layer
314  std::shared_ptr<ActivationDescriptor>
315  activationDescPtr = layer->template GetAdditionalInformation<ActivationDescriptor>();
316 
317  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_A) == 10.0f);
318  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_B) == 5.0f);
319  ARMNN_ASSERT(
320  static_cast<ActivationFunction>(activationDescPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
321  );
322 
323  // Makes the workload and checks it.
324  auto workload = MakeAndCheckWorkload<WorkloadType>(*layer, factory);
325 
326  DescriptorType queueDescriptor = workload->GetData();
327  const ActivationDescriptor* queueDescBlobPtr =
328  queueDescriptor.template GetAdditionalInformation<ActivationDescriptor>();
329  IgnoreUnused(queueDescBlobPtr);
330  CHECK(queueDescriptor.m_Inputs.size() == 2);
331  CHECK(queueDescriptor.m_Outputs.size() == 1);
332  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_A) == 10.0f);
333  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_B) == 5.0f);
334  ARMNN_ASSERT(
335  static_cast<ActivationFunction>(queueDescBlobPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
336  );
337 
338  return workload;
339 }
340 
341 template <typename WorkloadType,
342  typename DescriptorType,
344 std::unique_ptr<WorkloadType> CreateElementwiseUnaryWorkloadTest(armnn::IWorkloadFactory & factory,
345  armnn::Graph & graph,
347 {
349  Layer* const layer = graph.AddLayer<armnn::ElementwiseUnaryLayer>(desc, "layer");
350 
351  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
352  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
353 
354  armnn::TensorInfo tensorInfo({ 2, 3 }, DataType);
355  Connect(input, layer, tensorInfo, 0, 0);
356  Connect(layer, output, tensorInfo, 0, 0);
357  CreateTensorHandles(graph, factory);
358 
359  auto workload = MakeAndCheckWorkload<WorkloadType>(*layer, factory);
360  DescriptorType queueDescriptor = workload->GetData();
361 
362  CHECK(queueDescriptor.m_Inputs.size() == 1);
363  CHECK(queueDescriptor.m_Outputs.size() == 1);
364 
365  return workload;
366 }
367 
368 template <typename BatchNormalizationWorkloadType, armnn::DataType DataType>
369 std::unique_ptr<BatchNormalizationWorkloadType> CreateBatchNormalizationWorkloadTest(
370  armnn::IWorkloadFactory& factory, armnn::Graph& graph, DataLayout dataLayout = DataLayout::NCHW)
371 {
372  TensorShape tensorShape;
373  switch (dataLayout)
374  {
375  case DataLayout::NHWC:
376  tensorShape = { 2, 4, 4, 3 };
377  break;
378  case DataLayout::NCHW:
379  default:
380  tensorShape = { 2, 3, 4, 4 };
381  }
382 
383  // Creates the layer we're testing.
385  layerDesc.m_Eps = 0.05f;
386  layerDesc.m_DataLayout = dataLayout;
387 
388  BatchNormalizationLayer* const layer = graph.AddLayer<BatchNormalizationLayer>(layerDesc, "layer");
389 
390  armnn::TensorInfo weightInfo({3}, DataType);
391  layer->m_Mean = std::make_unique<ScopedTensorHandle>(weightInfo);
392  layer->m_Variance = std::make_unique<ScopedTensorHandle>(weightInfo);
393  layer->m_Beta = std::make_unique<ScopedTensorHandle>(weightInfo);
394  layer->m_Gamma = std::make_unique<ScopedTensorHandle>(weightInfo);
395  layer->m_Mean->Allocate();
396  layer->m_Variance->Allocate();
397  layer->m_Beta->Allocate();
398  layer->m_Gamma->Allocate();
399 
400  // Creates extra layers.
401  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
402  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
403 
404  // Connects up.
405  armnn::TensorInfo tensorInfo(tensorShape, DataType);
406  Connect(input, layer, tensorInfo);
407  Connect(layer, output, tensorInfo);
408  CreateTensorHandles(graph, factory);
409 
410  // Makes the workload and checks it.
411  auto workload = MakeAndCheckWorkload<BatchNormalizationWorkloadType>(*layer, factory);
412  BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
413  CHECK(queueDescriptor.m_Parameters.m_Eps == 0.05f);
414  CHECK(queueDescriptor.m_Inputs.size() == 1);
415  CHECK(queueDescriptor.m_Outputs.size() == 1);
416  CHECK((queueDescriptor.m_Mean->GetTensorInfo() == TensorInfo({3}, DataType)));
417  CHECK((queueDescriptor.m_Variance->GetTensorInfo() == TensorInfo({3}, DataType)));
418  CHECK((queueDescriptor.m_Gamma->GetTensorInfo() == TensorInfo({3}, DataType)));
419  CHECK((queueDescriptor.m_Beta->GetTensorInfo() == TensorInfo({3}, DataType)));
420  CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
421 
422  // Returns so we can do extra, backend-specific tests.
423  return workload;
424 }
425 
426 template <typename BatchNormalizationWorkloadType, armnn::DataType DataType>
427 std::unique_ptr<BatchNormalizationWorkloadType> CreateBatchNormalizationWithBlobWorkloadTest(
428  armnn::IWorkloadFactory& factory, armnn::Graph& graph, DataLayout dataLayout = DataLayout::NCHW)
429 {
430  TensorShape tensorShape;
431  switch (dataLayout)
432  {
433  case DataLayout::NHWC:
434  tensorShape = { 2, 4, 4, 3 };
435  break;
436  case DataLayout::NCHW:
437  default:
438  tensorShape = { 2, 3, 4, 4 };
439  }
440 
441  // Creates the layer we're testing.
443  layerDesc.m_Eps = 0.05f;
444  layerDesc.m_DataLayout = dataLayout;
445 
446  BatchNormalizationLayer* const layer = graph.AddLayer<BatchNormalizationLayer>(layerDesc, "layer");
447 
448  armnn::TensorInfo weightInfo({3}, DataType);
449  layer->m_Mean = std::make_unique<ScopedTensorHandle>(weightInfo);
450  layer->m_Variance = std::make_unique<ScopedTensorHandle>(weightInfo);
451  layer->m_Beta = std::make_unique<ScopedTensorHandle>(weightInfo);
452  layer->m_Gamma = std::make_unique<ScopedTensorHandle>(weightInfo);
453  layer->m_Mean->Allocate();
454  layer->m_Variance->Allocate();
455  layer->m_Beta->Allocate();
456  layer->m_Gamma->Allocate();
457 
458  auto activationDesc = std::make_shared<ActivationDescriptor>();
459  activationDesc->m_A = 10.0f;
460  activationDesc->m_B = 5.0f;
461  activationDesc->m_Function = armnn::ActivationFunction::BoundedReLu;
462 
463  layer->SetAdditionalInfoForObject(activationDesc);
464 
465  // Check that the additional information can be queried from the layer
466  std::shared_ptr<ActivationDescriptor> activationDescPtr = layer->GetAdditionalInformation<ActivationDescriptor>();
467  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_A) == 10.0f);
468  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_B) == 5.0f);
469  ARMNN_ASSERT(
470  static_cast<ActivationFunction>(activationDescPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
471  );
472 
473  // Creates extra layers.
474  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
475  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
476 
477  // Connects up.
478  armnn::TensorInfo tensorInfo(tensorShape, DataType);
479  Connect(input, layer, tensorInfo);
480  Connect(layer, output, tensorInfo);
481  CreateTensorHandles(graph, factory);
482 
483  // Makes the workload and checks it.
484  auto workload = MakeAndCheckWorkload<BatchNormalizationWorkloadType>(*layer, factory);
485  BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
486  const ActivationDescriptor* queueDescBlobPtr = queueDescriptor.GetAdditionalInformation<ActivationDescriptor>();
487  IgnoreUnused(queueDescBlobPtr);
488  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_A) == 10.0f);
489  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_B) == 5.0f);
490  ARMNN_ASSERT(
491  static_cast<ActivationFunction>(queueDescBlobPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
492  );
493 
494  CHECK(queueDescriptor.m_Parameters.m_Eps == 0.05f);
495  CHECK(queueDescriptor.m_Inputs.size() == 1);
496  CHECK(queueDescriptor.m_Outputs.size() == 1);
497  CHECK((queueDescriptor.m_Mean->GetTensorInfo() == TensorInfo({3}, DataType)));
498  CHECK((queueDescriptor.m_Variance->GetTensorInfo() == TensorInfo({3}, DataType)));
499  CHECK((queueDescriptor.m_Gamma->GetTensorInfo() == TensorInfo({3}, DataType)));
500  CHECK((queueDescriptor.m_Beta->GetTensorInfo() == TensorInfo({3}, DataType)));
501  CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
502 
503  // Returns so we can do extra, backend-specific tests.
504  return workload;
505 }
506 
507 template <typename Convolution2dWorkload, armnn::DataType DataType>
508 std::unique_ptr<Convolution2dWorkload> CreateConvolution2dWorkloadTest(armnn::IWorkloadFactory& factory,
509  armnn::Graph& graph,
510  DataLayout dataLayout = DataLayout::NCHW,
511  const ModelOptions& modelOptions = {})
512 {
513  // Creates the layer we're testing.
514  Convolution2dDescriptor layerDesc;
515  layerDesc.m_PadLeft = 3;
516  layerDesc.m_PadRight = 3;
517  layerDesc.m_PadTop = 1;
518  layerDesc.m_PadBottom = 1;
519  layerDesc.m_StrideX = 2;
520  layerDesc.m_StrideY = 4;
521  layerDesc.m_BiasEnabled = false;
522  layerDesc.m_DataLayout = dataLayout;
523 
524  float inputsQScale = 1.0f;
525  float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 1.0;
526 
527  Convolution2dLayer* const layer = graph.AddLayer<Convolution2dLayer>(layerDesc, "layer");
528 
529  TensorShape weightShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 5, 3} : TensorShape{2, 5, 3, 3};
530  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 8, 16} : TensorShape{2, 8, 16, 3};
531  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 2, 2, 10} : TensorShape{2, 2, 10, 2};
532 
533  armnn::TensorInfo weightsTensorInfo(weightShape, DataType, inputsQScale);
534  weightsTensorInfo.SetConstant();
535 
536  // Creates extra layers.
537  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
538  auto const weights = graph.AddLayer<ConstantLayer>("weights");
539  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
540 
541  weights->m_LayerOutput = std::make_unique<ScopedTensorHandle>(weightsTensorInfo);
542  weights->m_LayerOutput->Allocate();
543 
544  // Connects up.
545  Connect(input, layer, TensorInfo(inputShape, DataType, inputsQScale));
546  Connect(weights, layer, weightsTensorInfo, 0, 1);
547  Connect(layer, output, TensorInfo(outputShape, DataType, outputQScale));
548  CreateTensorHandles(graph, factory);
549 
550  // Makes the workload and checks it.
551  auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, factory, modelOptions);
552 
553  Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
554  CHECK(queueDescriptor.m_Parameters.m_StrideX == 2);
555  CHECK(queueDescriptor.m_Parameters.m_StrideY == 4);
556  CHECK(queueDescriptor.m_Parameters.m_PadLeft == 3);
557  CHECK(queueDescriptor.m_Parameters.m_PadRight == 3);
558  CHECK(queueDescriptor.m_Parameters.m_PadTop == 1);
559  CHECK(queueDescriptor.m_Parameters.m_PadBottom == 1);
560  CHECK(!queueDescriptor.m_Parameters.m_BiasEnabled);
561  CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
562 
563  CHECK(queueDescriptor.m_Inputs.size() == 2);
564  CHECK(queueDescriptor.m_Outputs.size() == 1);
565 
566  // Returns so we can do extra, backend-specific tests.
567  return workload;
568 }
569 
570 template<typename Convolution2dWorkload, armnn::DataType DataType>
571 std::unique_ptr<Convolution2dWorkload> CreateConvolution2dFusedActivationWithBlobWorkloadTest(
572  armnn::IWorkloadFactory& factory,
573  armnn::Graph& graph,
574  DataLayout dataLayout = DataLayout::NCHW,
575  const ModelOptions& modelOptions = {})
576 {
577  // Creates the layer we're testing.
578  Convolution2dDescriptor layerDesc;
579  layerDesc.m_PadLeft = 3;
580  layerDesc.m_PadRight = 3;
581  layerDesc.m_PadTop = 1;
582  layerDesc.m_PadBottom = 1;
583  layerDesc.m_StrideX = 2;
584  layerDesc.m_StrideY = 4;
585  layerDesc.m_BiasEnabled = true;
586  layerDesc.m_DataLayout = dataLayout;
587 
588  float inputsQScale = 1.0f;
589  float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 1.0;
590 
591  Convolution2dLayer* const layer = graph.AddLayer<Convolution2dLayer>(layerDesc, "layer");
592 
593  TensorShape weightShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 5, 3} : TensorShape{2, 5, 3, 3};
594  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 8, 16} : TensorShape{2, 8, 16, 3};
595  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 2, 2, 10} : TensorShape{2, 2, 10, 2};
596 
597  armnn::TensorInfo weightsTensorInfo(weightShape, DataType, inputsQScale);
598  weightsTensorInfo.SetConstant();
599  armnn::TensorInfo biasTensorInfo({2}, DataType, inputsQScale);
600  biasTensorInfo.SetConstant();
601 
602  auto activationDesc = std::make_shared<ActivationDescriptor>();
603  activationDesc->m_A = 10.0f;
604  activationDesc->m_B = 5.0f;
605  activationDesc->m_Function = armnn::ActivationFunction::BoundedReLu;
606 
607  layer->SetAdditionalInfoForObject(activationDesc);
608 
609  // Check that the additional information can be queried from the layer
610  std::shared_ptr<ActivationDescriptor> activationDescPtr = layer->GetAdditionalInformation<ActivationDescriptor>();
611 
612  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_A) == 10.0f);
613  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_B) == 5.0f);
614  ARMNN_ASSERT(
615  static_cast<ActivationFunction>(activationDescPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
616  );
617 
618  // Creates extra layers.
619  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
620  auto const weights = graph.AddLayer<ConstantLayer>("weights");
621  auto const bias = graph.AddLayer<ConstantLayer>("bias");
622  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
623 
624  weights->m_LayerOutput = std::make_unique<ScopedTensorHandle>(weightsTensorInfo);
625  weights->m_LayerOutput->Allocate();
626  bias->m_LayerOutput = std::make_unique<ScopedTensorHandle>(biasTensorInfo);
627  bias->m_LayerOutput->Allocate();
628 
629  // Connects up.
630  Connect(input, layer, TensorInfo(inputShape, DataType, inputsQScale));
631  Connect(weights, layer, weightsTensorInfo, 0, 1);
632  Connect(bias, layer, biasTensorInfo, 0, 2);
633  Connect(layer, output, TensorInfo(outputShape, DataType, outputQScale));
634  CreateTensorHandles(graph, factory);
635 
636  // Makes the workload and checks it.
637  auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, factory, modelOptions);
638 
639  Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
640  const ActivationDescriptor* queueDescBlobPtr = queueDescriptor.GetAdditionalInformation<ActivationDescriptor>();
641  IgnoreUnused(queueDescBlobPtr);
642  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_A) == 10.0f);
643  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_B) == 5.0f);
644  ARMNN_ASSERT(
645  static_cast<ActivationFunction>(queueDescBlobPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
646  );
647 
648  CHECK(queueDescriptor.m_Parameters.m_StrideX == 2);
649  CHECK(queueDescriptor.m_Parameters.m_StrideY == 4);
650  CHECK(queueDescriptor.m_Parameters.m_PadLeft == 3);
651  CHECK(queueDescriptor.m_Parameters.m_PadRight == 3);
652  CHECK(queueDescriptor.m_Parameters.m_PadTop == 1);
653  CHECK(queueDescriptor.m_Parameters.m_PadBottom == 1);
654  CHECK(queueDescriptor.m_Parameters.m_BiasEnabled);
655  CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
656 
657  CHECK(queueDescriptor.m_Outputs.size() == 1);
658  CHECK(queueDescriptor.m_Inputs.size() == 3);
659 
660  // Returns so we can do extra, backend-specific tests.
661  return workload;
662 }
663 
664 template <typename Convolution2dWorkload, armnn::DataType DataType>
665 std::unique_ptr<Convolution2dWorkload> CreateConvolution2dWorkloadFastMathTest(armnn::IWorkloadFactory& factory,
666  armnn::Graph& graph,
667  DataLayout dataLayout = DataLayout::NCHW,
668  const ModelOptions& modelOptions = {})
669 {
670  // Creates the layer we're testing.
671  Convolution2dDescriptor layerDesc;
672  layerDesc.m_PadLeft = 0;
673  layerDesc.m_PadRight = 0;
674  layerDesc.m_PadTop = 0;
675  layerDesc.m_PadBottom = 0;
676  layerDesc.m_StrideX = 1;
677  layerDesc.m_StrideY = 1;
678  layerDesc.m_BiasEnabled = true;
679  layerDesc.m_DataLayout = dataLayout;
680 
681  float inputsQScale = 1.0f;
682  float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 1.0;
683 
684  Convolution2dLayer* const layer = graph.AddLayer<Convolution2dLayer>(layerDesc, "layer");
685 
686  TensorShape weightShape = TensorShape{ 32, 32, 3, 3 };
687  TensorShape biasShape = TensorShape{ 32 };
688  TensorShape inputShape = TensorShape{ 1, 32, 149, 149 };
689  TensorShape outputShape = TensorShape{ 1, 32, 147, 147 };
690 
691  armnn::TensorInfo weightsTensorInfo(weightShape, DataType, inputsQScale);
692  weightsTensorInfo.SetConstant();
693  armnn::TensorInfo biasTensorInfo(biasShape, DataType, inputsQScale);
694  biasTensorInfo.SetConstant();
695 
696  // Creates extra layers.
697  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
698  auto const weights = graph.AddLayer<ConstantLayer>("weights");
699  auto const bias = graph.AddLayer<ConstantLayer>("bias");
700  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
701 
702  // Connects up.
703  Connect(input, layer, TensorInfo(inputShape, DataType));
704  Connect(weights, layer, weightsTensorInfo, 0, 1);
705  Connect(bias, layer, biasTensorInfo, 0, 2);
706  Connect(layer, output, TensorInfo(outputShape, DataType, outputQScale));
707  CreateTensorHandles(graph, factory);
708 
709  // Makes the workload and checks it.
710  auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, factory, modelOptions);
711 
712  Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
713  CHECK(queueDescriptor.m_Parameters.m_StrideX == 1);
714  CHECK(queueDescriptor.m_Parameters.m_StrideY == 1);
715  CHECK(queueDescriptor.m_Parameters.m_PadLeft == 0);
716  CHECK(queueDescriptor.m_Parameters.m_PadRight == 0);
717  CHECK(queueDescriptor.m_Parameters.m_PadTop == 0);
718  CHECK(queueDescriptor.m_Parameters.m_PadBottom == 0);
719  CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
720 
721  CHECK(queueDescriptor.m_Inputs.size() == 3);
722  CHECK(queueDescriptor.m_Outputs.size() == 1);
723 
724  // Returns so we can do extra, backend-specific tests.
725  return workload;
726 }
727 
728 template <typename LstmWorkload>
729 std::unique_ptr<LstmWorkload> CreateLstmWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph)
730 {
731  // This parameter setting is for withCifgWithPeepholeNoProjection
732  LstmDescriptor layerDesc;
733  layerDesc.m_ActivationFunc = 4;
734  layerDesc.m_ClippingThresCell = 0.0f;
735  layerDesc.m_ClippingThresProj = 0.0f;
736  layerDesc.m_CifgEnabled = true;
737  layerDesc.m_PeepholeEnabled = true;
738  layerDesc.m_ProjectionEnabled = false;
739 
740  LstmLayer* const layer = graph.AddLayer<LstmLayer>(layerDesc, "layer");
741  unsigned int batchSize = 2;
742  unsigned int inputSize = 2;
743  unsigned int numUnits = 4;
744  unsigned int outputSize = 4;
745 
746  layer->m_BasicParameters.m_InputToForgetWeights = std::make_unique<ScopedTensorHandle>
747  (TensorInfo({ numUnits, inputSize }, DataType::Float32));
748  layer->m_BasicParameters.m_InputToCellWeights = std::make_unique<ScopedTensorHandle>
749  (TensorInfo({ numUnits, inputSize }, DataType::Float32));
750  layer->m_BasicParameters.m_InputToOutputWeights = std::make_unique<ScopedTensorHandle>
751  (TensorInfo({ numUnits, inputSize }, DataType::Float32));
752  layer->m_BasicParameters.m_RecurrentToForgetWeights = std::make_unique<ScopedTensorHandle>
753  (TensorInfo({ numUnits, outputSize }, DataType::Float32));
754  layer->m_BasicParameters.m_RecurrentToCellWeights = std::make_unique<ScopedTensorHandle>
755  (TensorInfo({ numUnits, outputSize }, DataType::Float32));
756  layer->m_BasicParameters.m_RecurrentToOutputWeights = std::make_unique<ScopedTensorHandle>
757  (TensorInfo({ numUnits, outputSize }, DataType::Float32));
758  layer->m_BasicParameters.m_ForgetGateBias = std::make_unique<ScopedTensorHandle>
759  (TensorInfo({ numUnits }, DataType::Float32));
760  layer->m_BasicParameters.m_CellBias = std::make_unique<ScopedTensorHandle>
761  (TensorInfo({ numUnits }, DataType::Float32));
762  layer->m_BasicParameters.m_OutputGateBias = std::make_unique<ScopedTensorHandle>
763  (TensorInfo({ numUnits }, DataType::Float32));
764 
765  layer->m_BasicParameters.m_InputToForgetWeights->Allocate();
766  layer->m_BasicParameters.m_InputToCellWeights->Allocate();
767  layer->m_BasicParameters.m_InputToOutputWeights->Allocate();
769  layer->m_BasicParameters.m_RecurrentToCellWeights->Allocate();
771  layer->m_BasicParameters.m_ForgetGateBias->Allocate();
772  layer->m_BasicParameters.m_CellBias->Allocate();
773  layer->m_BasicParameters.m_OutputGateBias->Allocate();
774 
775 
776  if (layerDesc.m_PeepholeEnabled)
777  {
778  layer->m_PeepholeParameters.m_CellToForgetWeights = std::make_unique<ScopedTensorHandle>
779  (TensorInfo({ numUnits }, DataType::Float32));
780  layer->m_PeepholeParameters.m_CellToOutputWeights = std::make_unique<ScopedTensorHandle>
781  (TensorInfo({ numUnits }, DataType::Float32));
782  layer->m_PeepholeParameters.m_CellToForgetWeights->Allocate();
783  layer->m_PeepholeParameters.m_CellToOutputWeights->Allocate();
784  }
785 
786  // create input and output layers
787  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
788  Layer* const outputStateIn = graph.AddLayer<InputLayer>(1, "outputStateIn");
789  Layer* const cellStateIn = graph.AddLayer<InputLayer>(2, "cellStateIn");
790  Layer* const scratchBuffer = graph.AddLayer<OutputLayer>(0, "scratchBuffer");
791  Layer* const outputStateOut = graph.AddLayer<OutputLayer>(1, "outputStateOut");
792  Layer* const cellStateOut = graph.AddLayer<OutputLayer>(2, "cellStateOut");
793  Layer* const output = graph.AddLayer<OutputLayer>(3, "output");
794 
795  // connect up
796  armnn::TensorInfo lstmTensorInfo1({ batchSize, inputSize }, DataType::Float32);
797  armnn::TensorInfo lstmTensorInfo2({ batchSize, numUnits}, DataType::Float32);
798  armnn::TensorInfo lstmTensorInfo3({ batchSize, outputSize }, DataType::Float32);
799  armnn::TensorInfo lstmTensorInfoScratchBuff({ batchSize, numUnits * (layerDesc.m_CifgEnabled ? 3 : 4) },
801  Connect(input, layer, lstmTensorInfo1, 0, 0);
802  Connect(cellStateIn, layer, lstmTensorInfo2, 0, 1);
803  Connect(outputStateIn, layer, lstmTensorInfo3, 0, 2);
804  Connect(layer, scratchBuffer, lstmTensorInfoScratchBuff, 0, 0);
805  Connect(layer, outputStateOut, lstmTensorInfo3, 1, 0);
806  Connect(layer, cellStateOut, lstmTensorInfo2, 2, 0);
807  Connect(layer, output, lstmTensorInfo3, 3, 0);
808 
809  CreateTensorHandles(graph, factory);
810 
811  // make the workload and check it
812  auto workload = MakeAndCheckWorkload<LstmWorkload>(*layer, factory);
813  LstmQueueDescriptor queueDescriptor = workload->GetData();
814  CHECK(queueDescriptor.m_Parameters.m_ActivationFunc == 4);
815  CHECK(queueDescriptor.m_Parameters.m_ClippingThresCell == 0.0f);
816  CHECK(queueDescriptor.m_Parameters.m_ClippingThresProj == 0.0f);
817  CHECK(queueDescriptor.m_Inputs.size() == 3);
818  CHECK(queueDescriptor.m_Outputs.size() == 4);
819 
820  CHECK((queueDescriptor.m_InputToForgetWeights->GetTensorInfo() == TensorInfo({ numUnits, inputSize },
822  CHECK((queueDescriptor.m_OutputGateBias->GetTensorInfo() == TensorInfo({ numUnits },
824  CHECK((queueDescriptor.m_CellBias->GetTensorInfo() == TensorInfo({ numUnits }, DataType::Float32)));
825  return workload;
826 }
827 
828 template <typename QuantizedLstmWorkload>
829 std::unique_ptr<QuantizedLstmWorkload> CreateQuantizedLstmWorkloadTest(armnn::IWorkloadFactory& factory,
830  armnn::Graph& graph)
831 {
832  auto layer = graph.AddLayer<QuantizedLstmLayer>("quantizedLstmlayer");
833  unsigned int numBatches = 2;
834  unsigned int inputSize = 2;
835  unsigned int outputSize = 4;
836 
837  // Scale/Offset for input/output, cellState In/Out, weights, bias
838  float inputOutputScale = 0.0078125f;
839  int32_t inputOutputOffset = 128;
840 
841  float cellStateScale = 0.00048828125f;
842  int32_t cellStateOffset = 0;
843 
844  float weightsScale = 0.00408021f;
845  int32_t weightsOffset = 100;
846 
847  float biasScale = 3.1876640625e-05f;
848  int32_t biasOffset = 0;
849 
850  // Weights and bias tensor and quantization info
851  armnn::TensorInfo inputWeightsInfo({outputSize, inputSize},
853  weightsScale,
854  weightsOffset);
855 
856  armnn::TensorInfo recurrentWeightsInfo({outputSize, outputSize},
858  weightsScale,
859  weightsOffset);
860 
861  armnn::TensorInfo biasInfo({outputSize},
863  biasScale,
864  biasOffset);
865 
866  // Weights and bias
867  layer->m_QuantizedLstmParameters.m_InputToInputWeights =
868  std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
869  layer->m_QuantizedLstmParameters.m_InputToForgetWeights =
870  std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
871  layer->m_QuantizedLstmParameters.m_InputToCellWeights =
872  std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
873  layer->m_QuantizedLstmParameters.m_InputToOutputWeights =
874  std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
875 
876  layer->m_QuantizedLstmParameters.m_RecurrentToInputWeights =
877  std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
878  layer->m_QuantizedLstmParameters.m_RecurrentToForgetWeights =
879  std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
880  layer->m_QuantizedLstmParameters.m_RecurrentToCellWeights =
881  std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
882  layer->m_QuantizedLstmParameters.m_RecurrentToOutputWeights =
883  std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
884 
885  layer->m_QuantizedLstmParameters.m_InputGateBias = std::make_unique<ScopedTensorHandle>(biasInfo);
886  layer->m_QuantizedLstmParameters.m_ForgetGateBias = std::make_unique<ScopedTensorHandle>(biasInfo);
887  layer->m_QuantizedLstmParameters.m_CellBias = std::make_unique<ScopedTensorHandle>(biasInfo);
888  layer->m_QuantizedLstmParameters.m_OutputGateBias = std::make_unique<ScopedTensorHandle>(biasInfo);
889 
890  // Allocate weights and bias
891  layer->m_QuantizedLstmParameters.m_InputToInputWeights->Allocate();
892  layer->m_QuantizedLstmParameters.m_InputToForgetWeights->Allocate();
893  layer->m_QuantizedLstmParameters.m_InputToCellWeights->Allocate();
894  layer->m_QuantizedLstmParameters.m_InputToOutputWeights->Allocate();
895 
896  layer->m_QuantizedLstmParameters.m_RecurrentToInputWeights->Allocate();
897  layer->m_QuantizedLstmParameters.m_RecurrentToForgetWeights->Allocate();
898  layer->m_QuantizedLstmParameters.m_RecurrentToCellWeights->Allocate();
899  layer->m_QuantizedLstmParameters.m_RecurrentToOutputWeights->Allocate();
900 
901  layer->m_QuantizedLstmParameters.m_InputGateBias->Allocate();
902  layer->m_QuantizedLstmParameters.m_ForgetGateBias->Allocate();
903  layer->m_QuantizedLstmParameters.m_CellBias->Allocate();
904  layer->m_QuantizedLstmParameters.m_OutputGateBias->Allocate();
905 
906  // Create input and output layers
907  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
908  Layer* const cellStateIn = graph.AddLayer<InputLayer>(1, "cellStateIn");
909  Layer* const outputStateIn = graph.AddLayer<InputLayer>(2, "outputStateIn");
910 
911  Layer* const cellStateOut = graph.AddLayer<OutputLayer>(0, "cellStateOut");
912  Layer* const outputStateOut = graph.AddLayer<OutputLayer>(1, "outputStateOut");
913 
914  // Input/output tensor info and quantization info
915  armnn::TensorInfo inputInfo({numBatches , inputSize},
917  inputOutputScale,
918  inputOutputOffset);
919 
920  armnn::TensorInfo cellStateInfo({numBatches , outputSize},
922  cellStateScale,
923  cellStateOffset);
924 
925  armnn::TensorInfo outputStateInfo({numBatches , outputSize},
927  inputOutputScale,
928  inputOutputOffset);
929 
930  // Connect input/output slots
931  Connect(input, layer, inputInfo, 0, 0);
932  Connect(cellStateIn, layer, cellStateInfo, 0, 1);
933  Connect(outputStateIn, layer, outputStateInfo, 0, 2);
934 
935  Connect(layer, cellStateOut, cellStateInfo, 0, 0);
936  Connect(layer, outputStateOut, outputStateInfo, 1, 0);
937 
938  CreateTensorHandles(graph, factory);
939 
940  // Create workload and check layer support
941  auto workload = MakeAndCheckWorkload<QuantizedLstmWorkload>(*layer, factory);
942  QuantizedLstmQueueDescriptor queueDescriptor = workload->GetData();
943 
944  // Validate input/output sizes
945  CHECK(queueDescriptor.m_Inputs.size() == 3);
946  CHECK(queueDescriptor.m_Outputs.size() == 2);
947 
948  // Validate weight tensor info
949  CHECK((queueDescriptor.m_InputToInputWeights->GetTensorInfo() == inputWeightsInfo));
950  CHECK((queueDescriptor.m_InputToForgetWeights->GetTensorInfo() == inputWeightsInfo));
951  CHECK((queueDescriptor.m_InputToCellWeights->GetTensorInfo() == inputWeightsInfo));
952  CHECK((queueDescriptor.m_InputToOutputWeights->GetTensorInfo() == inputWeightsInfo));
953 
954  CHECK((queueDescriptor.m_RecurrentToInputWeights->GetTensorInfo() == recurrentWeightsInfo));
955  CHECK((queueDescriptor.m_RecurrentToForgetWeights->GetTensorInfo() == recurrentWeightsInfo));
956  CHECK((queueDescriptor.m_RecurrentToCellWeights->GetTensorInfo() == recurrentWeightsInfo));
957  CHECK((queueDescriptor.m_RecurrentToOutputWeights->GetTensorInfo() == recurrentWeightsInfo));
958 
959  CHECK((queueDescriptor.m_InputGateBias->GetTensorInfo() == biasInfo));
960  CHECK((queueDescriptor.m_ForgetGateBias->GetTensorInfo() == biasInfo));
961  CHECK((queueDescriptor.m_CellBias->GetTensorInfo() == biasInfo));
962  CHECK((queueDescriptor.m_OutputGateBias->GetTensorInfo() == biasInfo));
963 
964  return workload;
965 }
966 
967 template <typename QLstmWorkload>
968 std::unique_ptr<QLstmWorkload> CreateQLstmWorkloadTest(armnn::IWorkloadFactory& factory,
969  armnn::Graph& graph)
970 {
971  QLstmDescriptor layerDesc;
972  layerDesc.m_CifgEnabled = true;
973  layerDesc.m_PeepholeEnabled = false;
974  layerDesc.m_ProjectionEnabled = false;
975  layerDesc.m_LayerNormEnabled = true;
976 
977  layerDesc.m_CellClip = 0.0f;
978  layerDesc.m_ProjectionClip = 0.0f;
979 
980  layerDesc.m_HiddenStateZeroPoint = 0;
981  layerDesc.m_HiddenStateScale = 0.007f;
982 
983  layerDesc.m_InputIntermediateScale = 0.007059f;
984  layerDesc.m_ForgetIntermediateScale = 0.007812f;
985  layerDesc.m_CellIntermediateScale = 0.007059f;
986  layerDesc.m_OutputIntermediateScale = 0.007812f;
987 
988  QLstmLayer* const layer = graph.AddLayer<QLstmLayer>(layerDesc, "qLstm");
989 
990  unsigned int numBatches = 2;
991  unsigned int inputSize = 4;
992  unsigned int numUnits = 4;
993  unsigned int outputSize = 4;
994 
995  // Scale/Offset quantization info
996  float inputScale = 0.0078125f;
997  int32_t inputOffset = 0;
998 
999  // if (!projectionEnabled) outputScale == hiddenStateScale
1000  float outputScale = layerDesc.m_HiddenStateScale;
1001  int32_t outputOffset = layerDesc.m_HiddenStateZeroPoint;
1002 
1003  float cellStateScale = 3.05176e-05f;
1004  int32_t cellStateOffset = 0;
1005 
1006  float weightsScale = 0.00784314f;
1007  int32_t weightsOffset = 0;
1008 
1009  float layerNormScale = 3.05182e-05f;
1010  int32_t layerNormOffset = 0;
1011 
1012  float biasScale = layerNormScale / 1024;
1013  int32_t biasOffset = 0;
1014 
1015  // Weights and bias tensor and quantization info
1016  armnn::TensorInfo inputWeightsInfo({outputSize, inputSize},
1018  weightsScale,
1019  weightsOffset);
1020 
1021  armnn::TensorInfo recurrentWeightsInfo({outputSize, outputSize},
1023  weightsScale,
1024  weightsOffset);
1025 
1026  armnn::TensorInfo biasInfo({outputSize}, armnn::DataType::Signed32, biasScale, biasOffset);
1027 
1028  armnn::TensorInfo layerNormWeightsInfo({numUnits}, armnn::DataType::QSymmS16, layerNormScale, layerNormOffset);
1029 
1030  // Create and allocate tensors
1031  layer->m_BasicParameters.m_InputToForgetWeights = std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
1032  layer->m_BasicParameters.m_InputToCellWeights = std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
1033  layer->m_BasicParameters.m_InputToOutputWeights = std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
1034 
1036  std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
1038  std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
1040  std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
1041 
1042  layer->m_BasicParameters.m_ForgetGateBias = std::make_unique<ScopedTensorHandle>(biasInfo);
1043  layer->m_BasicParameters.m_CellBias = std::make_unique<ScopedTensorHandle>(biasInfo);
1044  layer->m_BasicParameters.m_OutputGateBias = std::make_unique<ScopedTensorHandle>(biasInfo);
1045 
1047  std::make_unique<ScopedTensorHandle>(layerNormWeightsInfo);
1049  std::make_unique<ScopedTensorHandle>(layerNormWeightsInfo);
1051  std::make_unique<ScopedTensorHandle>(layerNormWeightsInfo);
1052 
1053  layer->m_BasicParameters.m_InputToForgetWeights->Allocate();
1054  layer->m_BasicParameters.m_InputToCellWeights->Allocate();
1055  layer->m_BasicParameters.m_InputToOutputWeights->Allocate();
1056 
1057  layer->m_BasicParameters.m_RecurrentToForgetWeights->Allocate();
1058  layer->m_BasicParameters.m_RecurrentToCellWeights->Allocate();
1059  layer->m_BasicParameters.m_RecurrentToOutputWeights->Allocate();
1060 
1061  layer->m_BasicParameters.m_ForgetGateBias->Allocate();
1062  layer->m_BasicParameters.m_CellBias->Allocate();
1063  layer->m_BasicParameters.m_OutputGateBias->Allocate();
1064 
1066  layer->m_LayerNormParameters.m_CellLayerNormWeights->Allocate();
1068 
1069  // Input and output layers
1070  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1071  Layer* const outputStateIn = graph.AddLayer<InputLayer>(1, "outputStateIn");
1072  Layer* const cellStateIn = graph.AddLayer<InputLayer>(2, "cellStateIn");
1073 
1074  Layer* const outputStateOut = graph.AddLayer<OutputLayer>(0, "outputStateOut");
1075  Layer* const cellStateOut = graph.AddLayer<OutputLayer>(1, "cellStateOut");
1076  Layer* const output = graph.AddLayer<OutputLayer>(2, "output");
1077 
1078  // Input/Output tensor info
1079  armnn::TensorInfo inputInfo({numBatches , inputSize},
1081  inputScale,
1082  inputOffset);
1083 
1084  armnn::TensorInfo cellStateInfo({numBatches , numUnits},
1086  cellStateScale,
1087  cellStateOffset);
1088 
1089  armnn::TensorInfo outputStateInfo({numBatches , outputSize},
1091  outputScale,
1092  outputOffset);
1093 
1094  // Connect layers to slots
1095  Connect(input, layer, inputInfo, 0, 0);
1096  Connect(outputStateIn, layer, outputStateInfo, 0, 1);
1097  Connect(cellStateIn, layer, cellStateInfo, 0, 2);
1098 
1099  Connect(layer, outputStateOut, outputStateInfo, 0, 0);
1100  Connect(layer, cellStateOut, cellStateInfo, 1, 0);
1101  Connect(layer, output, outputStateInfo, 2, 0);
1102 
1103  CreateTensorHandles(graph, factory);
1104 
1105  // Create and check workload
1106  auto workload = MakeAndCheckWorkload<QLstmWorkload>(*layer, factory);
1107  QLstmQueueDescriptor queueDescriptor = workload->GetData();
1108  CHECK(queueDescriptor.m_Parameters.m_CellClip == 0.0f);
1109  CHECK(queueDescriptor.m_Parameters.m_ProjectionClip == 0.0f);
1110  CHECK(queueDescriptor.m_Inputs.size() == 3);
1111  CHECK(queueDescriptor.m_Outputs.size() == 3);
1112 
1113  CHECK((queueDescriptor.m_InputToForgetWeights->GetTensorInfo() == inputWeightsInfo));
1114  CHECK((queueDescriptor.m_InputToCellWeights->GetTensorInfo() == inputWeightsInfo));
1115  CHECK((queueDescriptor.m_InputToOutputWeights->GetTensorInfo() == inputWeightsInfo));
1116 
1117  CHECK((queueDescriptor.m_RecurrentToForgetWeights->GetTensorInfo() == recurrentWeightsInfo));
1118  CHECK((queueDescriptor.m_RecurrentToCellWeights->GetTensorInfo() == recurrentWeightsInfo));
1119  CHECK((queueDescriptor.m_RecurrentToOutputWeights->GetTensorInfo() == recurrentWeightsInfo));
1120 
1121  CHECK((queueDescriptor.m_ForgetGateBias->GetTensorInfo() == biasInfo));
1122  CHECK((queueDescriptor.m_CellBias->GetTensorInfo() == biasInfo));
1123  CHECK((queueDescriptor.m_OutputGateBias->GetTensorInfo() == biasInfo));
1124 
1125  return workload;
1126 }
1127 
1128 template<typename Convolution2dWorkload, armnn::DataType DataType>
1129 std::unique_ptr<Convolution2dWorkload> CreateDirectConvolution2dWorkloadTest(armnn::IWorkloadFactory& factory,
1130  armnn::Graph& graph)
1131 {
1132  // Creates the layer we're testing.
1133  Convolution2dDescriptor layerDesc;
1134  layerDesc.m_PadLeft = 1;
1135  layerDesc.m_PadRight = 1;
1136  layerDesc.m_PadTop = 1;
1137  layerDesc.m_PadBottom = 1;
1138  layerDesc.m_StrideX = 1;
1139  layerDesc.m_StrideY = 1;
1140  layerDesc.m_BiasEnabled = true;
1141 
1142  Convolution2dLayer* const layer = graph.AddLayer<Convolution2dLayer>(layerDesc, "layer");
1143 
1144  float inputsQScale = 1.0f;
1145  float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 1.0;
1146 
1147  TensorShape biasShape = TensorShape{ 2 };
1148  TensorShape weightShape = TensorShape{ 2, 3, 3, 3 };
1149  armnn::TensorInfo weightsTensorInfo(weightShape, DataType, inputsQScale);
1150  weightsTensorInfo.SetConstant();
1151  armnn::TensorInfo biasTensorInfo(biasShape, GetBiasDataType(DataType), inputsQScale);
1152  biasTensorInfo.SetConstant();
1153 
1154  // Creates extra layers.
1155  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1156  auto const weights = graph.AddLayer<ConstantLayer>("weights");
1157  auto const bias = graph.AddLayer<ConstantLayer>("bias");
1158  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1159 
1160  weights->m_LayerOutput = std::make_unique<ScopedTensorHandle>(weightsTensorInfo);
1161  weights->m_LayerOutput->Allocate();
1162  bias->m_LayerOutput = std::make_unique<ScopedTensorHandle>(biasTensorInfo);
1163  bias->m_LayerOutput->Allocate();
1164 
1165  // Connects up.
1166  Connect(input, layer, TensorInfo({2, 3, 6, 6}, DataType, inputsQScale));
1167  Connect(weights, layer, weightsTensorInfo, 0, 1);
1168  Connect(bias, layer, biasTensorInfo, 0, 2);
1169  Connect(layer, output, TensorInfo({2, 2, 6, 6}, DataType, outputQScale));
1170  CreateTensorHandles(graph, factory);
1171 
1172  // Makes the workload and checks it.
1173  auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, factory);
1174 
1175  Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
1176  CHECK(queueDescriptor.m_Parameters.m_StrideX == 1);
1177  CHECK(queueDescriptor.m_Parameters.m_StrideY == 1);
1178  CHECK(queueDescriptor.m_Parameters.m_PadLeft == 1);
1179  CHECK(queueDescriptor.m_Parameters.m_PadRight == 1);
1180  CHECK(queueDescriptor.m_Parameters.m_PadTop == 1);
1181  CHECK(queueDescriptor.m_Parameters.m_PadBottom == 1);
1182  CHECK(queueDescriptor.m_Parameters.m_BiasEnabled == true);
1183 
1184  CHECK(queueDescriptor.m_Inputs.size() == 3);
1185  CHECK(queueDescriptor.m_Outputs.size() == 1);
1186 
1187  // Returns so we can do extra, backend-specific tests.
1188  return workload;
1189 }
1190 
1191 template <typename DepthwiseConvolution2dFloat32Workload, armnn::DataType DataType>
1192 std::unique_ptr<DepthwiseConvolution2dFloat32Workload> CreateDepthwiseConvolution2dWorkloadTest(
1193  armnn::IWorkloadFactory& factory, armnn::Graph& graph, DataLayout dataLayout = DataLayout::NCHW)
1194 {
1195  // Creates the layer we're testing.
1197  layerDesc.m_PadLeft = 1;
1198  layerDesc.m_PadRight = 2;
1199  layerDesc.m_PadTop = 1;
1200  layerDesc.m_PadBottom = 2;
1201  layerDesc.m_StrideX = 1;
1202  layerDesc.m_StrideY = 1;
1203  layerDesc.m_BiasEnabled = false;
1204  layerDesc.m_DataLayout = dataLayout;
1205 
1206  float inputsQScale = 1.0f;
1207  float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 1.0;
1208 
1209  TensorShape weightShape({1, 4, 4, 2});
1210  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ?
1211  TensorShape{ 2, 2, 5, 5 } : TensorShape{ 2, 5, 5, 2 };
1212  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ?
1213  TensorShape{ 2, 2, 5, 5 } : TensorShape{ 2, 5, 5, 2 };
1214 
1215  DepthwiseConvolution2dLayer* const layer = graph.AddLayer<DepthwiseConvolution2dLayer>(layerDesc, "layer");
1216 
1217 
1218  // Creates extra layers.
1219  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1220  Layer* const weights = graph.AddLayer<ConstantLayer>("weights");
1221  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1222 
1223  // Connects up.
1224  Connect(input, layer, TensorInfo(inputShape, DataType, inputsQScale));
1225  Connect(weights, layer, TensorInfo(weightShape, DataType, inputsQScale, 0.0f, true), 0, 1);
1226  Connect(layer, output, TensorInfo(outputShape, DataType, outputQScale));
1227  CreateTensorHandles(graph, factory);
1228 
1229  // Makes the workload and checks it.
1230  auto workload = MakeAndCheckWorkload<DepthwiseConvolution2dFloat32Workload>(*layer, factory);
1231 
1232  DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData();
1233  CHECK(queueDescriptor.m_Parameters.m_StrideX == 1);
1234  CHECK(queueDescriptor.m_Parameters.m_StrideY == 1);
1235  CHECK(queueDescriptor.m_Parameters.m_PadLeft == 1);
1236  CHECK(queueDescriptor.m_Parameters.m_PadRight == 2);
1237  CHECK(queueDescriptor.m_Parameters.m_PadTop == 1);
1238  CHECK(queueDescriptor.m_Parameters.m_PadBottom == 2);
1239  CHECK(queueDescriptor.m_Parameters.m_BiasEnabled == false);
1240  CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
1241 
1242  CHECK(queueDescriptor.m_Inputs.size() == 2);
1243  CHECK(queueDescriptor.m_Outputs.size() == 1);
1244 
1245  // Returns so we can do extra, backend-specific tests.
1246  return workload;
1247 }
1248 
1249 template <typename FullyConnectedWorkload, armnn::DataType DataType>
1250 std::unique_ptr<FullyConnectedWorkload> CreateFullyConnectedWorkloadTest(armnn::IWorkloadFactory& factory,
1251  armnn::Graph& graph)
1252 {
1253  // Creates the layer we're testing.
1254  FullyConnectedDescriptor layerDesc;
1255  layerDesc.m_BiasEnabled = false;
1256  layerDesc.m_TransposeWeightMatrix = true;
1257 
1258  FullyConnectedLayer* const layer = graph.AddLayer<FullyConnectedLayer>(layerDesc, "layer");
1259 
1260  float inputsQScale = 1.0f;
1261  float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 1.0;
1262 
1263  armnn::TensorInfo weightsTensorInfo({7, 20}, DataType, inputsQScale);
1264  weightsTensorInfo.SetConstant();
1265 
1266  // Creates extra layers.
1267  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1268  auto const weights = graph.AddLayer<ConstantLayer>("weights");
1269  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1270 
1271  weights->m_LayerOutput = std::make_unique<ScopedTensorHandle>(weightsTensorInfo);
1272  weights->m_LayerOutput->Allocate();
1273 
1274  // Connects up.
1275  Connect(input, layer, TensorInfo({3, 1, 4, 5}, DataType, inputsQScale), 0, 0);
1276  Connect(weights, layer, weightsTensorInfo, 0, 1);
1277  Connect(layer, output, TensorInfo({3, 7}, DataType, outputQScale));
1278  CreateTensorHandles(graph, factory);
1279 
1280  // Makes the workload and checks it.
1281  auto workload = MakeAndCheckWorkload<FullyConnectedWorkload>(*layer, factory);
1282 
1283  FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
1284  CHECK(queueDescriptor.m_Parameters.m_TransposeWeightMatrix == true);
1285 
1286  CHECK(queueDescriptor.m_Inputs.size() == 2);
1287  CHECK(queueDescriptor.m_Outputs.size() == 1);
1288 
1289  // Returns so we can do extra, backend-specific tests.
1290  return workload;
1291 }
1292 
1293 template <typename FullyConnectedWorkload, armnn::DataType DataType>
1294 std::unique_ptr<FullyConnectedWorkload> CreateFullyConnectedWithBlobWorkloadTest
1295  (armnn::IWorkloadFactory& factory,
1296  armnn::Graph& graph)
1297 {
1298  // Creates the layer we're testing.
1299  FullyConnectedDescriptor layerDesc;
1300  layerDesc.m_BiasEnabled = true;
1301  layerDesc.m_TransposeWeightMatrix = true;
1302 
1303  FullyConnectedLayer* const layer = graph.AddLayer<FullyConnectedLayer>(layerDesc, "layer");
1304 
1305  float inputsQScale = 1.0f;
1306  float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 1.0;
1307 
1308  armnn::TensorInfo weightsTensorInfo({7, 20}, DataType, inputsQScale);
1309  armnn::TensorInfo biasesTensorInfo({7}, GetBiasDataType(DataType), inputsQScale);
1310  weightsTensorInfo.SetConstant();
1311  biasesTensorInfo.SetConstant();
1312 
1313  auto activationDesc = std::make_shared<ActivationDescriptor>();
1314  activationDesc->m_A = 10.0f;
1315  activationDesc->m_B = 5.0f;
1316  activationDesc->m_Function = armnn::ActivationFunction::BoundedReLu;
1317 
1318  layer->SetAdditionalInfoForObject(activationDesc);
1319 
1320  // Check that the additional information can be queried from the layer
1321  std::shared_ptr<ActivationDescriptor> activationDescPtr = layer->GetAdditionalInformation<ActivationDescriptor>();
1322  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_A) == 10.0f);
1323  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_B) == 5.0f);
1324  ARMNN_ASSERT(static_cast<ActivationFunction>(activationDescPtr->m_Function) ==
1326 
1327  // Creates extra layers.
1328  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1329  auto const weights = graph.AddLayer<ConstantLayer>("weights");
1330  auto const biases = graph.AddLayer<ConstantLayer>("biases");
1331  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1332 
1333  weights->m_LayerOutput = std::make_unique<ScopedTensorHandle>(weightsTensorInfo);
1334  weights->m_LayerOutput->Allocate();
1335  biases->m_LayerOutput = std::make_unique<ScopedTensorHandle>(biasesTensorInfo);
1336  biases->m_LayerOutput->Allocate();
1337 
1338  // Connects up.
1339  Connect(input, layer, TensorInfo({3, 1, 4, 5}, DataType, inputsQScale), 0, 0);
1340  Connect(weights, layer, weightsTensorInfo, 0, 1);
1341  Connect(biases, layer, biasesTensorInfo, 0, 2);
1342  Connect(layer, output, TensorInfo({3, 7}, DataType, outputQScale));
1343  CreateTensorHandles(graph, factory);
1344 
1345  // Makes the workload and checks it.
1346  auto workload = MakeAndCheckWorkload<FullyConnectedWorkload>(*layer, factory);
1347 
1348  FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
1349 
1350  const ActivationDescriptor* queueDescBlobPtr = queueDescriptor.GetAdditionalInformation<ActivationDescriptor>();
1351  IgnoreUnused(queueDescBlobPtr);
1352 
1353  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_A) == 10.0f);
1354  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_B) == 5.0f);
1355  ARMNN_ASSERT(
1356  static_cast<ActivationFunction>(queueDescBlobPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
1357  );
1358 
1359  CHECK(queueDescriptor.m_Parameters.m_BiasEnabled == true);
1360  CHECK(queueDescriptor.m_Parameters.m_TransposeWeightMatrix == true);
1361  CHECK(queueDescriptor.m_Inputs.size() == 3);
1362  CHECK(queueDescriptor.m_Outputs.size() == 1);
1363 
1364  // Returns so we can do extra, backend-specific tests.
1365  return workload;
1366 }
1367 
1368 template <typename FullyConnectedWorkload, armnn::DataType DataType>
1369 std::unique_ptr<FullyConnectedWorkload> CreateFullyConnectedWorkloadWeightsBiasesAsInputsTest
1370  (armnn::IWorkloadFactory& factory,
1371  armnn::Graph& graph)
1372 {
1373  // Creates the layer we're testing.
1374  FullyConnectedDescriptor layerDesc;
1375  layerDesc.m_BiasEnabled = true;
1376  layerDesc.m_TransposeWeightMatrix = true;
1377  layerDesc.m_ConstantWeights = false;
1378 
1379  FullyConnectedLayer* const layer = graph.AddLayer<FullyConnectedLayer>(layerDesc, "layer");
1380 
1381  float inputsQScale = 1.0f;
1382  float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 1.0;
1383 
1384  // Creates extra layers with weights and biases as input layers.
1385  Layer* const input = graph.AddLayer<InputLayer>(1, "input");
1386  Layer* const weights = graph.AddLayer<InputLayer>(2, "weights");
1387  Layer* const biases = graph.AddLayer<InputLayer>(3, "biases");
1388  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1389 
1390  // Connects up.
1391  Connect(input, layer, TensorInfo({3, 1, 4, 5}, DataType, inputsQScale), 0, 0);
1392  Connect(weights, layer, TensorInfo({7, 20}, DataType, inputsQScale), 0, 1);
1393  Connect(biases, layer, TensorInfo({7}, GetBiasDataType(DataType), inputsQScale), 0, 2);
1394  Connect(layer, output, TensorInfo({3, 7}, DataType, outputQScale));
1395  CreateTensorHandles(graph, factory);
1396 
1397  // Makes the workload and checks it.
1398  auto workload = MakeAndCheckWorkload<FullyConnectedWorkload>(*layer, factory);
1399 
1400  FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
1401 
1402  CHECK(queueDescriptor.m_Parameters.m_BiasEnabled == true);
1403  CHECK(queueDescriptor.m_Parameters.m_TransposeWeightMatrix == true);
1404  CHECK(queueDescriptor.m_Parameters.m_ConstantWeights == false);
1405  CHECK(queueDescriptor.m_Inputs.size() == 3);
1406  CHECK(queueDescriptor.m_Outputs.size() == 1);
1407 
1408  // Returns so we can do extra, backend-specific tests.
1409  return workload;
1410 }
1411 
1412 
1413 template <typename NormalizationWorkload, armnn::DataType DataType>
1414 std::unique_ptr<NormalizationWorkload> CreateNormalizationWorkloadTest(armnn::IWorkloadFactory& factory,
1415  armnn::Graph& graph,
1416  DataLayout dataLayout = DataLayout::NCHW)
1417 {
1418  // Creates the layer we're testing.
1419  NormalizationDescriptor layerDesc;
1422  layerDesc.m_NormSize = 3;
1423  layerDesc.m_Alpha = 0.5f;
1424  layerDesc.m_Beta = -1.0f;
1425  layerDesc.m_K = 0.2f;
1426  layerDesc.m_DataLayout = dataLayout;
1427 
1428  NormalizationLayer* layer = graph.AddLayer<NormalizationLayer>(layerDesc, "layer");
1429 
1430  // Creates extra layers.
1431  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1432  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1433 
1434  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ?
1435  TensorShape{ 3, 5, 5, 1 } : TensorShape{ 3, 1, 5, 5 };
1436  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ?
1437  TensorShape{ 3, 5, 5, 1 } : TensorShape{ 3, 1, 5, 5 };
1438 
1439  // Connects up.
1440  armnn::TensorInfo inputTensorInfo(inputShape, DataType);
1441  armnn::TensorInfo outputTensorInfo(outputShape, DataType);
1442  Connect(input, layer, inputTensorInfo);
1443  Connect(layer, output, outputTensorInfo);
1444  CreateTensorHandles(graph, factory);
1445 
1446  // Makes the workload and checks it.
1447  auto workload = MakeAndCheckWorkload<NormalizationWorkload>(*layer, factory);
1448 
1449  NormalizationQueueDescriptor queueDescriptor = workload->GetData();
1452  CHECK(queueDescriptor.m_Parameters.m_NormSize == 3);
1453  CHECK(queueDescriptor.m_Parameters.m_Alpha == 0.5f);
1454  CHECK(queueDescriptor.m_Parameters.m_Beta == -1.0f);
1455  CHECK(queueDescriptor.m_Parameters.m_K == 0.2f);
1456  CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
1457 
1458  CHECK(queueDescriptor.m_Inputs.size() == 1);
1459  CHECK(queueDescriptor.m_Outputs.size() == 1);
1460 
1461  // Returns so we can do extra, backend-specific tests.
1462  return workload;
1463 }
1464 
1465 template <typename Pooling2dWorkload, armnn::DataType DataType>
1466 std::unique_ptr<Pooling2dWorkload> CreatePooling2dWorkloadTest(armnn::IWorkloadFactory& factory,
1467  armnn::Graph& graph,
1468  DataLayout dataLayout = DataLayout::NCHW)
1469 {
1470  // Creates the layer we're testing.
1471  Pooling2dDescriptor layerDesc;
1473  layerDesc.m_PoolWidth = 3;
1474  layerDesc.m_PoolHeight = 3;
1475  layerDesc.m_PadLeft = 2;
1476  layerDesc.m_PadRight = 2;
1477  layerDesc.m_PadTop = 1;
1478  layerDesc.m_PadBottom = 1;
1479  layerDesc.m_StrideX = 2;
1480  layerDesc.m_StrideY = 3;
1482  layerDesc.m_DataLayout = dataLayout;
1483 
1484  Pooling2dLayer* const layer = graph.AddLayer<Pooling2dLayer>(layerDesc, "layer");
1485 
1486  // Create extra layers
1487  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1488  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1489 
1490  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 2, 5, 5} : TensorShape{3, 5, 5, 2};
1491  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 2, 2, 4} : TensorShape{3, 2, 4, 2};
1492 
1493  // Connect up
1494  Connect(input, layer, TensorInfo(inputShape, DataType));
1495  Connect(layer, output, TensorInfo(outputShape, DataType));
1496  CreateTensorHandles(graph, factory);
1497 
1498  // Make the workload and checks it
1499  auto workload = MakeAndCheckWorkload<Pooling2dWorkload>(*layer, factory);
1500 
1501  Pooling2dQueueDescriptor queueDescriptor = workload->GetData();
1502  CHECK((queueDescriptor.m_Parameters.m_PoolType == PoolingAlgorithm::Average));
1504  CHECK(queueDescriptor.m_Parameters.m_PoolWidth == 3);
1505  CHECK(queueDescriptor.m_Parameters.m_PoolHeight == 3);
1506  CHECK(queueDescriptor.m_Parameters.m_StrideX == 2);
1507  CHECK(queueDescriptor.m_Parameters.m_StrideY == 3);
1508  CHECK(queueDescriptor.m_Parameters.m_PadLeft == 2);
1509  CHECK(queueDescriptor.m_Parameters.m_PadRight == 2);
1510  CHECK(queueDescriptor.m_Parameters.m_PadTop == 1);
1511  CHECK(queueDescriptor.m_Parameters.m_PadBottom == 1);
1512  CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
1513 
1514  CHECK(queueDescriptor.m_Inputs.size() == 1);
1515  CHECK(queueDescriptor.m_Outputs.size() == 1);
1516 
1517  // Return so we can do extra, backend-specific tests
1518  return workload;
1519 }
1520 
1521 template <typename SoftmaxWorkload, armnn::DataType DataType>
1522 std::unique_ptr<SoftmaxWorkload> CreateSoftmaxWorkloadTest(armnn::IWorkloadFactory& factory,
1523  armnn::Graph& graph)
1524 {
1525  // Create the layer we're testing.
1526  SoftmaxDescriptor softmaxDescriptor;
1527  // Set Axis to -1 if CL or Neon until further Axes are supported.
1529  {
1530  softmaxDescriptor.m_Axis = -1;
1531  }
1532 
1533  Layer* const layer = graph.AddLayer<SoftmaxLayer>(softmaxDescriptor, "layer");
1534  // Create extra layers.
1535  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1536  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1537 
1538  // Connect up
1539  armnn::TensorInfo tensorInfo({4, 1}, DataType);
1541  {
1542  tensorInfo.SetQuantizationOffset(0);
1543  tensorInfo.SetQuantizationScale(1.f / 256);
1544  }
1545  else if (DataType == armnn::DataType::QAsymmS8)
1546  {
1547  tensorInfo.SetQuantizationOffset(-128);
1548  tensorInfo.SetQuantizationScale(1.f / 256);
1549  }
1550 
1551  Connect(input, layer, tensorInfo);
1552  Connect(layer, output, tensorInfo);
1553  CreateTensorHandles(graph, factory);
1554 
1555  // Make the workload and checks it.
1556  auto workload = MakeAndCheckWorkload<SoftmaxWorkload>(*layer, factory);
1557 
1558  SoftmaxQueueDescriptor queueDescriptor = workload->GetData();
1559  CHECK(queueDescriptor.m_Inputs.size() == 1);
1560  CHECK(queueDescriptor.m_Outputs.size() == 1);
1561 
1562  // Return so we can do extra, backend-specific tests.
1563  return workload;
1564 }
1565 
1566 template<typename SplitterWorkload, armnn::DataType DataType>
1567 std::unique_ptr<SplitterWorkload>
1568  CreateSplitterWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph)
1569 {
1570  // Create the layer we're testing.
1571  // NOTE: need three dimensions channels, height/y, width/x because the Compute
1572  // library restricts subtensors to have the same x and y dimensions as
1573  // their parent tensors, and therefore the origin on the x and y dimension
1574  // has to be zero for any view. So we need a third dimension to split...
1575  // NOTE: arguments are: number of views, number of dimensions.
1576  ViewsDescriptor layerDesc(3, 3);
1577  // NOTE: arguments are: view, dimension, value.
1578  layerDesc.SetViewOriginCoord(0, 0, 0);
1579  layerDesc.SetViewOriginCoord(1, 0, 1);
1580  layerDesc.SetViewOriginCoord(2, 0, 3);
1581 
1582  Layer* const layer = graph.AddLayer<SplitterLayer>(layerDesc, "layer");
1583 
1584  // Adds extra layers.
1585  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1586  Layer* const output0 = graph.AddLayer<OutputLayer>(0, "output0");
1587  Layer* const output1 = graph.AddLayer<OutputLayer>(1, "output1");
1588  Layer* const output2 = graph.AddLayer<OutputLayer>(2, "output2");
1589 
1590  // Connects up.
1591  armnn::TensorInfo tensorInfo({5, 7, 7}, DataType);
1592  Connect(input, layer, tensorInfo);
1593 
1594  armnn::TensorInfo output0Info({1, 7, 7}, DataType);
1595  armnn::TensorInfo output1Info({2, 7, 7}, DataType);
1596  armnn::TensorInfo output2Info({2, 7, 7}, DataType);
1597 
1598  Connect(layer, output0, output0Info, 0, 0);
1599  Connect(layer, output1, output1Info, 1, 0);
1600  Connect(layer, output2, output2Info, 2, 0);
1601 
1602  CreateTensorHandles(graph, factory);
1603 
1604  // Makes the workload and checks it.
1605  auto workload = MakeAndCheckWorkload<SplitterWorkload>(*layer, factory);
1606 
1607  SplitterQueueDescriptor queueDescriptor = workload->GetData();
1608  CHECK(queueDescriptor.m_Inputs.size() == 1);
1609  CHECK(queueDescriptor.m_Outputs.size() == 3);
1610  CHECK(queueDescriptor.m_ViewOrigins.size() == 3);
1611 
1612  CHECK(queueDescriptor.m_ViewOrigins[0].m_Origin[0] == 0);
1613  CHECK(queueDescriptor.m_ViewOrigins[1].m_Origin[0] == 1);
1614  CHECK(queueDescriptor.m_ViewOrigins[2].m_Origin[0] == 3);
1615  CHECK(queueDescriptor.m_ViewOrigins[0].m_Origin[1] == 0);
1616  CHECK(queueDescriptor.m_ViewOrigins[1].m_Origin[1] == 0);
1617  CHECK(queueDescriptor.m_ViewOrigins[2].m_Origin[1] == 0);
1618  CHECK(queueDescriptor.m_ViewOrigins[0].m_Origin[2] == 0);
1619  CHECK(queueDescriptor.m_ViewOrigins[1].m_Origin[2] == 0);
1620  CHECK(queueDescriptor.m_ViewOrigins[2].m_Origin[2] == 0);
1621 
1622  // Returns so we can do extra, backend-specific tests.
1623  return workload;
1624 }
1625 
1626 /// This function constructs a graph with both a splitter and a concat, and returns a pair of the workloads.
1627 template<typename SplitterWorkload, typename ConcatWorkload, armnn::DataType DataType>
1628 std::pair<std::unique_ptr<SplitterWorkload>, std::unique_ptr<ConcatWorkload>>
1629  CreateSplitterConcatWorkloadTest(armnn::IWorkloadFactory &factory, armnn::Graph &graph)
1630 {
1631  armnn::TensorInfo inputTensorInfo({ 1, 2, 100, 10 }, DataType);
1632 
1633  armnn::TensorInfo splitTensorInfo1({ 1, 1, 100, 10 }, DataType);
1634  armnn::TensorInfo splitTensorInfo2({ 1, 1, 100, 10 }, DataType);
1635 
1636  //Constructs the graph.
1637  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1638 
1639  armnn::ViewsDescriptor splitterViews(2);
1640  splitterViews.SetViewOriginCoord(0, 0, 0);
1641  splitterViews.SetViewOriginCoord(0, 1, 0);
1642  splitterViews.SetViewOriginCoord(0, 2, 0);
1643  splitterViews.SetViewOriginCoord(0, 3, 0);
1644 
1645  splitterViews.SetViewOriginCoord(1, 0, 0);
1646  splitterViews.SetViewOriginCoord(1, 1, 1);
1647  splitterViews.SetViewOriginCoord(1, 2, 0);
1648  splitterViews.SetViewOriginCoord(1, 3, 0);
1649 
1650  // create splitter layer
1651  Layer* const splitter = graph.AddLayer<SplitterLayer>(splitterViews, "splitter");
1652  CHECK(splitter);
1653 
1654  armnn::OriginsDescriptor concatViews(2);
1655  concatViews.SetViewOriginCoord(0, 0, 0);
1656  concatViews.SetViewOriginCoord(0, 1, 1);
1657  concatViews.SetViewOriginCoord(0, 2, 0);
1658  concatViews.SetViewOriginCoord(0, 3, 0);
1659 
1660  concatViews.SetViewOriginCoord(1, 0, 0);
1661  concatViews.SetViewOriginCoord(1, 1, 0);
1662  concatViews.SetViewOriginCoord(1, 2, 0);
1663  concatViews.SetViewOriginCoord(1, 3, 0);
1664 
1665  // create concat layer
1666  Layer* const concat = graph.AddLayer<ConcatLayer>(concatViews, "concat");
1667  CHECK(concat);
1668 
1669  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1670 
1671  // Adds connections.
1672  // connect input to splitter
1673  Connect(input, splitter, inputTensorInfo, 0, 0);
1674  // connect splitter[0] to concat[1]
1675  Connect(splitter, concat, splitTensorInfo1, 0, 1); // The splitter & concat are connected up.
1676  // connect splitter[1] to concat[0]
1677  Connect(splitter, concat, splitTensorInfo2, 1, 0); // So that the outputs are flipped round.
1678  // connect concat to output
1679  Connect(concat, output, inputTensorInfo, 0, 0);
1680 
1681  // created tensor handles
1682  CreateTensorHandles(graph, factory);
1683 
1684  // created splitter workload
1685  auto workloadSplitter = MakeAndCheckWorkload<SplitterWorkload>(*splitter, factory);
1686  CHECK(workloadSplitter);
1687  // created concat workload
1688  auto workloadConcat = MakeAndCheckWorkload<ConcatWorkload>(*concat, factory);
1689  CHECK(workloadConcat);
1690 
1691  return {std::move(workloadSplitter), std::move(workloadConcat)};
1692 }
1693 
1694 
1695 /// This function constructs a graph with a splitter with two outputs. Each of the outputs is then
1696 /// connected to two different activation layers
1697 template<typename SplitterWorkload, typename ActivationWorkload, armnn::DataType DataType>
1698 void CreateSplitterMultipleInputsOneOutputWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph,
1699  std::unique_ptr<SplitterWorkload>& wlSplitter,
1700  std::unique_ptr<ActivationWorkload>& wlActiv0_0,
1701  std::unique_ptr<ActivationWorkload>& wlActiv0_1,
1702  std::unique_ptr<ActivationWorkload>& wlActiv1_0,
1703  std::unique_ptr<ActivationWorkload>& wlActiv1_1)
1704 {
1705  armnn::TensorInfo inputTensorInfo ({ 1, 3, 100, 50 }, DataType);
1706  armnn::TensorInfo splitTensorInfo1({ 1, 1, 100, 50 }, DataType);
1707  armnn::TensorInfo splitTensorInfo2({ 1, 2, 100, 50 }, DataType);
1708 
1709  //Constructs the graph.
1710  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1711 
1712  armnn::ViewsDescriptor splitterViews(2);
1713 
1714  splitterViews.SetViewOriginCoord(0, 0, 0);
1715  splitterViews.SetViewOriginCoord(0, 1, 0);
1716  splitterViews.SetViewOriginCoord(0, 2, 0);
1717  splitterViews.SetViewOriginCoord(0, 3, 0);
1718 
1719  splitterViews.SetViewOriginCoord(1, 0, 0);
1720  splitterViews.SetViewOriginCoord(1, 1, 1);
1721  splitterViews.SetViewOriginCoord(1, 2, 0);
1722  splitterViews.SetViewOriginCoord(1, 3, 0);
1723 
1724  Layer* const splitter = graph.AddLayer<SplitterLayer>(splitterViews, "splitter");
1725 
1726  armnn::ActivationDescriptor activationDesc;
1727 
1728  Layer* const activ0_0 = graph.AddLayer<ActivationLayer>(activationDesc, "activ0_0");
1729  Layer* const activ0_1 = graph.AddLayer<ActivationLayer>(activationDesc, "activ0_1");
1730  Layer* const activ1_0 = graph.AddLayer<ActivationLayer>(activationDesc, "activ1_0");
1731  Layer* const activ1_1 = graph.AddLayer<ActivationLayer>(activationDesc, "activ1_1");
1732 
1733  Layer* const output1 = graph.AddLayer<OutputLayer>(1, "output1");
1734  Layer* const output2 = graph.AddLayer<OutputLayer>(2, "output2");
1735  Layer* const output3 = graph.AddLayer<OutputLayer>(3, "output3");
1736  Layer* const output4 = graph.AddLayer<OutputLayer>(4, "output4");
1737 
1738  // Adds connections.
1739  Connect(input, splitter, inputTensorInfo, 0, 0);
1740  Connect(splitter, activ0_0, splitTensorInfo1, 0, 0);
1741  Connect(splitter, activ0_1, splitTensorInfo1, 0, 0);
1742 
1743  Connect(splitter, activ1_0, splitTensorInfo2, 1, 0);
1744  Connect(splitter, activ1_1, splitTensorInfo2, 1, 0);
1745 
1746  Connect(activ0_0, output1, splitTensorInfo1, 0, 0);
1747  Connect(activ0_1, output2, splitTensorInfo1, 0, 0);
1748  Connect(activ1_0, output3, splitTensorInfo2, 0, 0);
1749  Connect(activ1_1, output4, splitTensorInfo2, 0, 0);
1750 
1751  CreateTensorHandles(graph, factory);
1752 
1753  auto workloadSplitter = MakeAndCheckWorkload<SplitterWorkload>(*splitter, factory);
1754  auto workloadActiv0_0 = MakeAndCheckWorkload<ActivationWorkload>(*activ0_0, factory);
1755  auto workloadActiv0_1 = MakeAndCheckWorkload<ActivationWorkload>(*activ0_1, factory);
1756  auto workloadActiv1_0 = MakeAndCheckWorkload<ActivationWorkload>(*activ1_0, factory);
1757  auto workloadActiv1_1 = MakeAndCheckWorkload<ActivationWorkload>(*activ1_1, factory);
1758 
1759  wlSplitter = std::move(workloadSplitter);
1760  wlActiv0_0 = std::move(workloadActiv0_0);
1761  wlActiv0_1 = std::move(workloadActiv0_1);
1762  wlActiv1_0 = std::move(workloadActiv1_0);
1763  wlActiv1_1 = std::move(workloadActiv1_1);
1764 }
1765 
1766 template <typename ResizeWorkload, armnn::DataType DataType>
1767 std::unique_ptr<ResizeWorkload> CreateResizeBilinearWorkloadTest(armnn::IWorkloadFactory& factory,
1768  armnn::Graph& graph,
1769  DataLayout dataLayout = DataLayout::NCHW)
1770 {
1771  TensorShape inputShape;
1772  TensorShape outputShape;
1773 
1774  switch (dataLayout) {
1775  case DataLayout::NHWC:
1776  inputShape = { 2, 4, 4, 3 };
1777  outputShape = { 2, 2, 2, 3 };
1778  break;
1779  case DataLayout::NCHW:
1780  default:
1781  inputShape = { 2, 3, 4, 4 };
1782  outputShape = { 2, 3, 2, 2 };
1783  }
1784 
1785  // Creates the layer we're testing.
1786  ResizeDescriptor resizeDesc;
1787  armnnUtils::DataLayoutIndexed dimensionIndices = dataLayout;
1788  resizeDesc.m_Method = ResizeMethod::Bilinear;
1789  resizeDesc.m_TargetWidth = outputShape[dimensionIndices.GetWidthIndex()];
1790  resizeDesc.m_TargetHeight = outputShape[dimensionIndices.GetHeightIndex()];
1791  resizeDesc.m_DataLayout = dataLayout;
1792  Layer* const layer = graph.AddLayer<ResizeLayer>(resizeDesc, "resize");
1793 
1794  // Creates extra layers.
1795  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1796  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1797 
1798  // Connects up.
1799  armnn::TensorInfo inputTensorInfo(inputShape, DataType);
1800  armnn::TensorInfo outputTensorInfo(outputShape, DataType);
1801  Connect(input, layer, inputTensorInfo);
1802  Connect(layer, output, outputTensorInfo);
1803  CreateTensorHandles(graph, factory);
1804 
1805  // Makes the workload and checks it.
1806  auto workload = MakeAndCheckWorkload<ResizeWorkload>(*layer, factory);
1807 
1808  auto queueDescriptor = workload->GetData();
1809  CHECK(queueDescriptor.m_Inputs.size() == 1);
1810  CHECK(queueDescriptor.m_Outputs.size() == 1);
1811  CHECK(queueDescriptor.m_Parameters.m_DataLayout == dataLayout);
1812 
1813  // Returns so we can do extra, backend-specific tests.
1814  return workload;
1815 }
1816 
1817 template <typename BatchToSpaceNdWorkload, armnn::DataType DataType>
1818 std::unique_ptr<BatchToSpaceNdWorkload> CreateBatchToSpaceNdWorkloadTest(armnn::IWorkloadFactory& factory,
1819  armnn::Graph& graph)
1820 {
1822  Layer* const layer = graph.AddLayer<BatchToSpaceNdLayer>(desc, "batchToSpace");
1823 
1824  // Creates extra layers.
1825  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1826  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1827 
1828  // Connects up.
1829  armnn::TensorInfo tensorInfo({1, 1, 1, 1}, DataType);
1830 
1831  Connect(input, layer, tensorInfo);
1832  Connect(layer, output, tensorInfo);
1833 
1834  CreateTensorHandles(graph, factory);
1835 
1836  // Makes the workload and checks it.
1837  auto workload = MakeAndCheckWorkload<BatchToSpaceNdWorkload>(*layer, factory);
1838 
1839  BatchToSpaceNdQueueDescriptor queueDescriptor = workload->GetData();
1840  CHECK(queueDescriptor.m_Inputs.size() == 1);
1841  CHECK(queueDescriptor.m_Outputs.size() == 1);
1842 
1843  return workload;
1844 }
1845 
1846 template <typename LogSoftmaxWorkload, armnn::DataType DataType>
1847 std::unique_ptr<LogSoftmaxWorkload> CreateLogSoftmaxWorkloadTest(armnn::IWorkloadFactory& factory,
1848  armnn::Graph& graph)
1849 {
1850  // Create the layer we're testing.
1851  LogSoftmaxDescriptor logSoftmaxDescriptor;
1852  // Set Axis to -1 if CL or Neon until further Axes are supported.
1854  {
1855  logSoftmaxDescriptor.m_Axis = -1;
1856  }
1857 
1858  Layer* const layer = graph.AddLayer<LogSoftmaxLayer>(logSoftmaxDescriptor, "layer");
1859  // Create extra layers.
1860  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1861  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1862 
1863  // Connect up
1864  armnn::TensorInfo tensorInfo({4, 1}, DataType);
1865 
1866  Connect(input, layer, tensorInfo);
1867  Connect(layer, output, tensorInfo);
1868  CreateTensorHandles(graph, factory);
1869 
1870  // Make the workload and checks it.
1871  auto workload = MakeAndCheckWorkload<LogSoftmaxWorkload>(*layer, factory);
1872 
1873  LogSoftmaxQueueDescriptor queueDescriptor = workload->GetData();
1874  CHECK(queueDescriptor.m_Inputs.size() == 1);
1875  CHECK(queueDescriptor.m_Outputs.size() == 1);
1876 
1877  // Return so we can do extra, backend-specific tests.
1878  return workload;
1879 }
1880 
1881 template <typename L2NormalizationWorkload, armnn::DataType DataType>
1882 std::unique_ptr<L2NormalizationWorkload> CreateL2NormalizationWorkloadTest(armnn::IWorkloadFactory& factory,
1883  armnn::Graph& graph, DataLayout dataLayout = DataLayout::NCHW)
1884 {
1885  // Creates the layer we're testing.
1886  L2NormalizationDescriptor layerDesc;
1887  layerDesc.m_DataLayout = dataLayout;
1888 
1889  Layer* const layer = graph.AddLayer<L2NormalizationLayer>(layerDesc, "l2norm");
1890 
1891  // Creates extra layers.
1892  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1893  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1894 
1895  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ?
1896  TensorShape{ 5, 20, 50, 67 } : TensorShape{ 5, 50, 67, 20 };
1897  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ?
1898  TensorShape{ 5, 20, 50, 67 } : TensorShape{ 5, 50, 67, 20 };
1899 
1900  // Connects up.
1901  armnn::TensorInfo inputTensorInfo(inputShape, DataType);
1902  armnn::TensorInfo outputTensorInfo(outputShape, DataType);
1903  Connect(input, layer, inputTensorInfo);
1904  Connect(layer, output, outputTensorInfo);
1905  CreateTensorHandles(graph, factory);
1906 
1907  // Makes the workload and checks it.
1908  auto workload = MakeAndCheckWorkload<L2NormalizationWorkload>(*layer, factory);
1909 
1910  L2NormalizationQueueDescriptor queueDescriptor = workload->GetData();
1911  CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
1912  CHECK(queueDescriptor.m_Inputs.size() == 1);
1913  CHECK(queueDescriptor.m_Outputs.size() == 1);
1914 
1915  // Returns so we can do extra, backend-specific tests.
1916  return workload;
1917 }
1918 
1919 template <typename ReshapeWorkload, armnn::DataType DataType>
1920 std::unique_ptr<ReshapeWorkload> CreateReshapeWorkloadTest(armnn::IWorkloadFactory& factory,
1921  armnn::Graph& graph)
1922 {
1923  // Creates the layer we're testing.
1924  TensorShape outputShape({ 1, 4 });
1925  ReshapeDescriptor reshapeDesc;
1926  reshapeDesc.m_TargetShape = outputShape;
1927  Layer* const layer = graph.AddLayer<ReshapeLayer>(reshapeDesc, "layer");
1928 
1929  // Creates extra layers.
1930  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1931  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1932 
1933  // Connects up.
1934  armnn::TensorInfo inputTensorInfo({ 4, 1 }, DataType);
1935  armnn::TensorInfo outputTensorInfo(outputShape, DataType);
1936  Connect(input, layer, inputTensorInfo);
1937  Connect(layer, output, outputTensorInfo);
1938  CreateTensorHandles(graph, factory);
1939 
1940  // Makes the workload and checks it.
1941  auto workload = MakeAndCheckWorkload<ReshapeWorkload>(*layer, factory);
1942 
1943  ReshapeQueueDescriptor queueDescriptor = workload->GetData();
1944  CHECK(queueDescriptor.m_Inputs.size() == 1);
1945  CHECK(queueDescriptor.m_Outputs.size() == 1);
1946 
1947  // Returns so we can do extra, backend-specific tests.
1948  return workload;
1949 }
1950 
1951 template <typename ConvertFp16ToFp32Float32Workload>
1952 std::unique_ptr<ConvertFp16ToFp32Float32Workload> CreateConvertFp16ToFp32WorkloadTest(
1953  armnn::IWorkloadFactory& factory, armnn::Graph& graph)
1954 {
1955  // Creates the layer we're testing.
1956  ConvertFp16ToFp32Layer* const layer = graph.AddLayer<ConvertFp16ToFp32Layer>("Fp16ToFp32Converter");
1957 
1958  // Creates extra layers.
1959  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1960  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1961 
1962  // Connects up.
1963  armnn::TensorInfo inputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float16);
1964  armnn::TensorInfo outputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float32);
1965  Connect(input, layer, inputTensorInfo);
1966  Connect(layer, output, outputTensorInfo);
1967  CreateTensorHandles(graph, factory);
1968 
1969  // Makes the workload and checks it.
1970  auto workload = MakeAndCheckWorkload<ConvertFp16ToFp32Float32Workload>(*layer, factory);
1971 
1972  ConvertFp16ToFp32QueueDescriptor queueDescriptor = workload->GetData();
1973  CHECK(queueDescriptor.m_Inputs.size() == 1);
1974  CHECK(queueDescriptor.m_Outputs.size() == 1);
1975 
1976  // Returns so we can do extra, backend-specific tests.
1977  return workload;
1978 }
1979 
1980 template <typename ConvertFp32ToFp16Float16Workload>
1981 std::unique_ptr<ConvertFp32ToFp16Float16Workload> CreateConvertFp32ToFp16WorkloadTest(
1982  armnn::IWorkloadFactory& factory, armnn::Graph& graph)
1983 {
1984  // Creates the layer we're testing.
1985  ConvertFp32ToFp16Layer* const layer = graph.AddLayer<ConvertFp32ToFp16Layer>("Fp32ToFp16Converter");
1986 
1987  // Creates extra layers.
1988  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1989  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1990 
1991  // Connects up.
1992  armnn::TensorInfo inputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float32);
1993  armnn::TensorInfo outputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float16);
1994  Connect(input, layer, inputTensorInfo);
1995  Connect(layer, output, outputTensorInfo);
1996  CreateTensorHandles(graph, factory);
1997 
1998  // Makes the workload and checks it.
1999  auto workload = MakeAndCheckWorkload<ConvertFp32ToFp16Float16Workload>(*layer, factory);
2000 
2001  ConvertFp32ToFp16QueueDescriptor queueDescriptor = workload->GetData();
2002  CHECK(queueDescriptor.m_Inputs.size() == 1);
2003  CHECK(queueDescriptor.m_Outputs.size() == 1);
2004 
2005  // Returns so we can do extra, backend-specific tests.
2006  return workload;
2007 }
2008 
2009 template <typename MeanWorkload, armnn::DataType DataType>
2010 std::unique_ptr<MeanWorkload> CreateMeanWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph)
2011 {
2012  // Reduce along the first and second dimensions, and do not keep the reduced dimensions.
2013  MeanDescriptor descriptor({ 1, 2 }, false);
2014 
2015  // Creates the layer we're testing.
2016  Layer* const layer = graph.AddLayer<MeanLayer>(descriptor, "mean");
2017 
2018  // Creates extra layers.
2019  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
2020  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
2021 
2022  // Connects up.
2023  armnn::TensorInfo inputTensorInfo({ 1, 3, 7, 4 }, DataType);
2024  armnn::TensorInfo outputTensorInfo({ 1, 4 }, DataType);
2025  Connect(input, layer, inputTensorInfo);
2026  Connect(layer, output, outputTensorInfo);
2027  CreateTensorHandles(graph, factory);
2028 
2029  // Makes the workload and checks it.
2030  auto workload = MakeAndCheckWorkload<MeanWorkload>(*layer, factory);
2031 
2032  MeanQueueDescriptor queueDescriptor = workload->GetData();
2033  CHECK(queueDescriptor.m_Parameters.m_Axis == descriptor.m_Axis);
2034  CHECK(queueDescriptor.m_Parameters.m_KeepDims == descriptor.m_KeepDims);
2035  CHECK(queueDescriptor.m_Inputs.size() == 1);
2036  CHECK(queueDescriptor.m_Outputs.size() == 1);
2037 
2038  // Returns so we can do extra, backend-specific tests.
2039  return workload;
2040 }
2041 
2042 template<typename ConcatWorkload, armnn::DataType DataType>
2043 std::unique_ptr<ConcatWorkload> CreateConcatWorkloadTest(armnn::IWorkloadFactory &factory,
2044  armnn::Graph &graph,
2045  const armnn::TensorShape &outputShape,
2046  unsigned int concatAxis)
2047 {
2048  armnn::TensorInfo inputTensorInfo({ 2, 3, 2, 5 }, DataType);
2049  armnn::TensorInfo outputTensorInfo(outputShape, DataType);
2050 
2051  // Constructs the graph.
2052  Layer* const input0 = graph.AddLayer<InputLayer>(0, "input0");
2053  Layer* const input1 = graph.AddLayer<InputLayer>(1, "input1");
2054  armnn::OriginsDescriptor descriptor;
2055 
2056  std::vector<armnn::TensorShape> inputShapes{{ 2, 3, 2, 5 }, { 2, 3, 2, 5 }};
2057 
2058  descriptor = CreateDescriptorForConcatenation(inputShapes.begin(),
2059  inputShapes.end(),
2060  concatAxis);
2061 
2062  // create concat layer
2063  Layer* const concat = graph.AddLayer<ConcatLayer>(descriptor, "concat");
2064  CHECK(concat);
2065 
2066  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
2067 
2068  // Adds connections.
2069  // connect input0 to concat
2070  Connect(input0, concat, inputTensorInfo, 0, 0);
2071  // connect input1 to concat
2072  Connect(input1, concat, inputTensorInfo, 0, 1);
2073  // connect concat to output
2074  Connect(concat, output, outputTensorInfo, 0, 0);
2075 
2076  // create tensor handles
2077  CreateTensorHandles(graph, factory);
2078 
2079  // create concat workload
2080  auto workloadConcat = MakeAndCheckWorkload<ConcatWorkload>(*concat, factory);
2081  CHECK(workloadConcat);
2082 
2083  return workloadConcat;
2084 }
2085 
2086 template <typename PreCompiledWorkload, armnn::DataType dataType>
2087 std::pair<armnn::IOptimizedNetworkPtr, std::unique_ptr<PreCompiledWorkload>> CreatePreCompiledWorkloadTest(
2088  armnn::IWorkloadFactory& factory,
2089  armnn::Graph& graph,
2090  bool biasEnabled = false)
2091 {
2092  IgnoreUnused(graph);
2093 
2094  // build up the structure of the network
2096 
2097  // Add an input layer
2098  armnn::IConnectableLayer* const inputLayer = net->AddInputLayer(0, "input layer");
2099  CHECK(inputLayer);
2100 
2101  // ArmNN weights tensor shape is OIHW (out channels, in channels, height, width) for NCHW
2102  // ArmNN weights tensor shape is OHWI (out channels, height, width, in channels) for NHWC
2103  // this test is using NHWC, so the weights shape is OHWI
2104  TensorInfo weightsTensorInfo(TensorShape({16, 1, 1, 16}), dataType, 0.9f, 0, true);
2105  unsigned int weightsLength = weightsTensorInfo.GetNumElements();
2106 
2107  using WeightType = armnn::ResolveType<dataType>;
2108  std::vector<WeightType> convWeightsData(weightsLength);
2109  for (unsigned int i = 0; i < weightsLength; ++i)
2110  {
2111  convWeightsData[i] = static_cast<WeightType>(i);
2112  }
2113 
2114  armnn::ConstTensor weights(weightsTensorInfo, convWeightsData);
2115 
2116  // Add a layer that can be used in the PreCompiled layer
2117  armnn::Convolution2dDescriptor convDesc2d;
2118  convDesc2d.m_StrideX = 1;
2119  convDesc2d.m_StrideY = 1;
2120  convDesc2d.m_BiasEnabled = biasEnabled;
2122 
2123 
2124  const std::string convLayerName("conv layer");
2125 
2126  armnn::IConnectableLayer* convLayer = net->AddConvolution2dLayer(convDesc2d, convLayerName.c_str());
2127 
2128  IConnectableLayer* weightsLayer = net->AddConstantLayer(weights);
2129  weightsLayer->GetOutputSlot(0).SetTensorInfo(weights.GetInfo());
2130  weightsLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(1u));
2131 
2132  if (biasEnabled)
2133  {
2134  constexpr armnn::DataType biasDataType = ( dataType == armnn::DataType::QAsymmU8) ?
2136 
2137  TensorInfo biasTensorInfo(TensorShape({16}), biasDataType, 0.9f * 0.9f, 0, true);
2138  unsigned int biasLength = biasTensorInfo.GetNumElements();
2139 
2140  using BiasType = armnn::ResolveType<biasDataType>;
2141  std::vector<BiasType> biasData(biasLength);
2142  std::fill(biasData.begin(), biasData.end(), static_cast<BiasType>(0));
2143 
2144  armnn::ConstTensor biases(biasTensorInfo, biasData);
2145 
2146  IConnectableLayer* biasLayer = net->AddConstantLayer(biases);
2147 
2148  biasLayer->GetOutputSlot(0).SetTensorInfo(biases.GetInfo());
2149  biasLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(2u));
2150  }
2151 
2152  CHECK(convLayer);
2153 
2154  // Add an output layer
2155  armnn::IConnectableLayer* const outputLayer = net->AddOutputLayer(0, "output layer");
2156  CHECK(outputLayer);
2157 
2158  // set the tensors in the network (NHWC format)
2159  TensorInfo inputTensorInfo(TensorShape({ 1, 16, 16, 16 }), dataType);
2160  if (dataType == armnn::DataType::QAsymmU8)
2161  {
2162  inputTensorInfo.SetQuantizationOffset(0);
2163  inputTensorInfo.SetQuantizationScale(0.9f);
2164  }
2165 
2166  TensorInfo outputTensorInfo(TensorShape({1, 16, 16, 16}), dataType);
2167  if (dataType == armnn::DataType::QAsymmU8)
2168  {
2169  outputTensorInfo.SetQuantizationOffset(0);
2170  outputTensorInfo.SetQuantizationScale(0.9f);
2171  }
2172 
2173  // Connect the layers
2174  inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0));
2175  inputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
2176 
2177  convLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
2178  convLayer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
2179 
2180  // Optimize the network for the backend supported by the factory
2181  std::vector<armnn::BackendId> backends = {factory.GetBackendId()};
2183  armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
2184  armnn::OptimizerOptionsOpaque optimizerOptions;
2185  armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec(),
2186  optimizerOptions);
2187  CHECK(optimizedNet != nullptr);
2188 
2189  // Find the PreCompiled layer in the optimised graph
2190  armnn::Graph& optimisedGraph = GetGraphForTesting(optimizedNet.get());
2191  Layer* preCompiledLayer = nullptr;
2192  for (auto& layer : optimisedGraph)
2193  {
2194  if (layer->GetType() == LayerType::PreCompiled)
2195  {
2196  preCompiledLayer = layer;
2197  }
2198  }
2199  CHECK(preCompiledLayer != nullptr);
2200 
2201  // Create the TensorHandles.
2202  CreateTensorHandles(optimisedGraph, factory);
2203 
2204  // Make the workload and check it.
2205  auto workload = MakeAndCheckWorkload<PreCompiledWorkload>(*preCompiledLayer, factory);
2206 
2207  PreCompiledQueueDescriptor queueDescriptor = workload->GetData();
2208  CHECK(queueDescriptor.m_Inputs.size() == 1);
2209  CHECK(queueDescriptor.m_Outputs.size() == 1);
2210 
2211  // Returns the workload so we can do extra, backend-specific tests.
2212  // NOTE: We need to return the optimised network as well, otherwise it gets
2213  // out of scope and the tensor handles get destructed
2214  return std::make_pair(std::move(optimizedNet), std::move(workload));
2215 }
2216 
2217 template<typename ConstantWorkload, armnn::DataType DataType>
2218 std::unique_ptr<ConstantWorkload> CreateConstantWorkloadTest(armnn::IWorkloadFactory& factory,
2219  armnn::Graph& graph,
2220  const armnn::TensorShape& outputShape)
2221 {
2222  armnn::TensorInfo outputTensorInfo(outputShape, DataType);
2223 
2224  // create constant layer
2225  auto constant = graph.AddLayer<ConstantLayer>("constant");
2226  CHECK(constant);
2227  constant->m_LayerOutput = std::make_unique<ScopedTensorHandle>(outputTensorInfo);
2228 
2229  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
2230 
2231  // Adds connections.
2232  // connect constant to output
2233  Connect(constant, output, outputTensorInfo, 0, 0);
2234 
2235  // create tensor handles
2236  CreateTensorHandles(graph, factory);
2237 
2238  // create Constant workload"
2239  auto workloadConstant = MakeAndCheckWorkload<ConstantWorkload>(*constant, factory);
2240  CHECK(workloadConstant);
2241 
2242  return workloadConstant;
2243 }
2244 
2245 template <typename PreluWorkload>
2246 std::unique_ptr<PreluWorkload> CreatePreluWorkloadTest(armnn::IWorkloadFactory& factory,
2247  armnn::Graph& graph,
2248  const armnn::TensorShape& inputShape,
2249  const armnn::TensorShape& alphaShape,
2250  const armnn::TensorShape& outputShape,
2251  armnn::DataType dataType)
2252 {
2253  // Creates the PReLU layer
2254  Layer* const layer = graph.AddLayer<PreluLayer>("prelu");
2255  CHECK(layer != nullptr);
2256 
2257  // Creates extra layers
2258  Layer* const input = graph.AddLayer<InputLayer> (0, "input");
2259  Layer* const alpha = graph.AddLayer<InputLayer> (1, "alpha");
2260  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
2261  CHECK(input != nullptr);
2262  CHECK(alpha != nullptr);
2263  CHECK(output != nullptr);
2264 
2265  // Connects up
2266  armnn::TensorInfo inputTensorInfo (inputShape, dataType);
2267  armnn::TensorInfo alphaTensorInfo (alphaShape, dataType);
2268  armnn::TensorInfo outputTensorInfo(outputShape, dataType);
2269  Connect(input, layer, inputTensorInfo, 0, 0);
2270  Connect(alpha, layer, alphaTensorInfo, 0, 1);
2271  Connect(layer, output, outputTensorInfo, 0, 0);
2272  CreateTensorHandles(graph, factory);
2273 
2274  // Makes the workload and checks it
2275  auto workload = MakeAndCheckWorkload<PreluWorkload>(*layer, factory);
2276 
2277  PreluQueueDescriptor queueDescriptor = workload->GetData();
2278  CHECK(queueDescriptor.m_Inputs.size() == 2);
2279  CHECK(queueDescriptor.m_Outputs.size() == 1);
2280 
2281  // Returns so we can do extra, backend-specific tests.
2282  return workload;
2283 }
2284 
2285 template <typename SpaceToDepthWorkload, armnn::DataType DataType>
2286 std::unique_ptr<SpaceToDepthWorkload> CreateSpaceToDepthWorkloadTest(armnn::IWorkloadFactory& factory,
2287  armnn::Graph& graph)
2288 {
2290  desc.m_BlockSize = 2;
2291  Layer* const layer = graph.AddLayer<SpaceToDepthLayer>(desc, "spaceToDepth");
2292 
2293  // Creates extra layers.
2294  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
2295  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
2296 
2297  // Connects up.
2298  armnn::TensorInfo inputTensorInfo({ 1, 2, 2, 1 }, DataType);
2299  armnn::TensorInfo outputTensorInfo({ 1, 1, 1, 4 }, DataType);
2300 
2301  Connect(input, layer, inputTensorInfo);
2302  Connect(layer, output, outputTensorInfo);
2303 
2304  CreateTensorHandles(graph, factory);
2305 
2306  // Makes the workload and checks it.
2307  auto workload = MakeAndCheckWorkload<SpaceToDepthWorkload>(*layer, factory);
2308 
2309  SpaceToDepthQueueDescriptor queueDescriptor = workload->GetData();
2310  CHECK(queueDescriptor.m_Inputs.size() == 1);
2311  CHECK(queueDescriptor.m_Outputs.size() == 1);
2312 
2313  return workload;
2314 }
2315 
2316 template <typename StackWorkload, armnn::DataType DataType>
2317 std::unique_ptr<StackWorkload> CreateStackWorkloadTest(armnn::IWorkloadFactory& factory,
2318  armnn::Graph& graph,
2319  const armnn::TensorShape& inputShape,
2320  const armnn::TensorShape& outputShape,
2321  unsigned int axis,
2322  unsigned int numInputs)
2323 {
2324  armnn::TensorInfo inputTensorInfo(inputShape, DataType);
2325  armnn::TensorInfo outputTensorInfo(outputShape, DataType);
2326 
2327  // Constructs the Stack layer.
2328  armnn::StackDescriptor descriptor(axis, numInputs, inputShape);
2329  Layer* const stackLayer = graph.AddLayer<StackLayer>(descriptor, "stack");
2330  CHECK(stackLayer != nullptr);
2331 
2332  // Constructs layer inputs and output.
2333  std::vector<Layer*> inputs;
2334  for (unsigned int i=0; i<numInputs; ++i)
2335  {
2336  inputs.push_back(graph.AddLayer<InputLayer>(
2337  static_cast<int>(i),
2338  ("input" + std::to_string(i)).c_str()
2339  ));
2340  CHECK(inputs[i] != nullptr);
2341  }
2342  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
2343  CHECK(output != nullptr);
2344 
2345  // Adds connections.
2346  for (unsigned int i=0; i<numInputs; ++i)
2347  {
2348  Connect(inputs[i], stackLayer, inputTensorInfo, 0, i);
2349  }
2350  Connect(stackLayer, output, outputTensorInfo, 0, 0);
2351 
2352  CreateTensorHandles(graph, factory);
2353 
2354  auto stackWorkload = MakeAndCheckWorkload<StackWorkload>(*stackLayer, factory);
2355  StackQueueDescriptor queueDescriptor = stackWorkload->GetData();
2356  CHECK(queueDescriptor.m_Inputs.size() == numInputs);
2357  CHECK(queueDescriptor.m_Outputs.size() == 1);
2358 
2359  return stackWorkload;
2360 }
2361 
2362 } // Anonymous namespace
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
void Connect(armnn::IConnectableLayer *from, armnn::IConnectableLayer *to, const armnn::TensorInfo &tensorInfo, unsigned int fromIndex, unsigned int toIndex)
Definition: TestUtils.cpp:14
This layer represents an activation operation with the specified activation function.
This layer represents an addition operation.
This layer represents a batch normalization operation.
std::shared_ptr< ConstTensorHandle > m_Mean
A unique pointer to store Mean values.
std::shared_ptr< ConstTensorHandle > m_Gamma
A unique pointer to store Gamma values.
std::shared_ptr< ConstTensorHandle > m_Beta
A unique pointer to store Beta values.
std::shared_ptr< ConstTensorHandle > m_Variance
A unique pointer to store Variance values.
This layer represents a BatchToSpaceNd operation.
This layer represents a merge operation.
Definition: ConcatLayer.hpp:14
const TensorInfo & GetTensorInfo() const
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:330
A layer that the constant data can be bound to.
std::shared_ptr< ConstTensorHandle > m_LayerOutput
This layer converts data type Float 16 to Float 32.
This layer converts data type Float 32 to Float 16.
This layer represents a convolution 2d operation.
This layer represents a depthwise convolution 2d operation.
This layer represents a elementwiseBinary operation.
This layer represents a elementwiseUnary operation.
This layer represents a fully connected operation.
LayerT * AddLayer(Args &&... args)
Adds a new layer, of type LayerType, to the graph constructed with the arguments passed.
Definition: Graph.hpp:466
Graph & TopologicalSort()
Sorts layers in topological order and return this.
Definition: Graph.hpp:191
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:81
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
static INetworkPtr Create(const NetworkOptions &networkOptions={})
Definition: Network.cpp:682
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
virtual int Connect(IInputSlot &destination)=0
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:52
static bool IsLayerSupported(const BackendId &backendId, const IConnectableLayer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
virtual const BackendId & GetBackendId() const =0
A layer user-provided data can be bound to (e.g. inputs, outputs).
Definition: InputLayer.hpp:14
This layer represents a L2 normalization operation.
virtual std::unique_ptr< IWorkload > CreateWorkload(const IWorkloadFactory &factory) const =0
void SetBackendId(const BackendId &id) override
Set the backend of the IConnectableLayer.
Definition: Layer.hpp:291
virtual void CreateTensorHandles(const TensorHandleFactoryRegistry &registry, const IWorkloadFactory &factory, const bool IsMemoryManaged=true)
Definition: Layer.cpp:308
void SetAdditionalInfoForObject(const AdditionalInfoObjectPtr &additionalInfo)
Definition: Layer.hpp:373
DataType GetDataType() const
Definition: Layer.cpp:345
std::shared_ptr< T > GetAdditionalInformation() const
Definition: Layer.hpp:368
This layer represents a log softmax operation.
This layer represents a LSTM operation.
Definition: LstmLayer.hpp:17
LstmOptPeepholeParameters m_PeepholeParameters
Definition: LstmLayer.hpp:23
LstmBasicParameters m_BasicParameters
Definition: LstmLayer.hpp:20
This layer represents a mean operation.
Definition: MeanLayer.hpp:15
This layer represents a multiplication operation.
This layer represents a normalization operation.
A layer user-provided data can be bound to (e.g. inputs, outputs).
Definition: OutputLayer.hpp:14
This layer represents a pooling 2d operation.
This layer represents a QLstm operation.
Definition: QLstmLayer.hpp:80
QLstmBasicParameters m_BasicParameters
Definition: QLstmLayer.hpp:83
QLstmOptLayerNormParameters m_LayerNormParameters
Definition: QLstmLayer.hpp:87
This layer represents a QuantizedLstm operation.
This layer represents a reshape operation.
This layer represents a resize operation.
Definition: ResizeLayer.hpp:14
This layer represents a softmax operation.
This layer represents a SpaceToDepth operation.
This layer represents a split operation.
This layer represents a stack operation.
Definition: StackLayer.hpp:14
This layer represents a subtraction operation.
void SetQuantizationOffset(int32_t offset)
Definition: Tensor.cpp:493
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
Definition: Tensor.cpp:518
Provides access to the appropriate indexes for Channels, Height and Width based on DataLayout.
unsigned int GetWidthIndex() const
unsigned int GetHeightIndex() const
Copyright (c) 2021 ARM Limited and Contributors.
typename ResolveTypeImpl< DT >::Type ResolveType
Definition: ResolveType.hpp:79
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:39
UnaryOperation
Definition: Types.hpp:126
void IgnoreUnused(Ts &&...)
ActivationFunction
Definition: Types.hpp:87
@ BoundedReLu
min(a, max(b, input)) ReLu1 & ReLu6.
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below.
Definition: Types.hpp:494
std::vector< BackendOptions > ModelOptions
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:340
Graph & GetGraphForTesting(IOptimizedNetwork *optNet)
Definition: TestUtils.cpp:49
OriginsDescriptor CreateDescriptorForConcatenation(TensorShapeIt first, TensorShapeIt last, unsigned int concatenationDimension)
Convenience template to create an OriginsDescriptor to use when creating a ConcatLayer for performing...
DataType GetBiasDataType(DataType inputDataType)
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptionsOpaque &options=OptimizerOptionsOpaque(), Optional< std::vector< std::string > & > messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:2286
BinaryOperation
Definition: Types.hpp:139
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:339
DataLayout
Definition: Types.hpp:63
@ LocalBrightness
Krichevsky 2012: Local Brightness Normalization.
DataType
Definition: Types.hpp:49
@ CpuAcc
CPU Execution: NEON: ArmCompute.
@ GpuAcc
GPU Execution: OpenCL: ArmCompute.
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:37
float m_A
Alpha upper bound value used by the activation functions. (BoundedReLu, Linear, TanH,...
Definition: Descriptors.hpp:61
float m_B
Beta lower bound value used by the activation functions. (BoundedReLu, Linear, TanH).
Definition: Descriptors.hpp:63
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu,...
Definition: Descriptors.hpp:59
A BatchNormalizationDescriptor for the BatchNormalizationLayer.
float m_Eps
Value to add to the variance. Used to avoid dividing by zero.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
const ConstTensorHandle * m_Variance
A BatchToSpaceNdDescriptor for the BatchToSpaceNdLayer.
A Convolution2dDescriptor for the Convolution2dLayer.
uint32_t m_PadRight
Padding right value in the width dimension.
uint32_t m_PadTop
Padding top value in the height dimension.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
uint32_t m_PadBottom
Padding bottom value in the height dimension.
uint32_t m_PadLeft
Padding left value in the width dimension.
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
bool m_BiasEnabled
Enable/disable bias.
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
A DepthwiseConvolution2dDescriptor for the DepthwiseConvolution2dLayer.
uint32_t m_PadRight
Padding right value in the width dimension.
uint32_t m_PadTop
Padding top value in the height dimension.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
uint32_t m_PadBottom
Padding bottom value in the height dimension.
uint32_t m_PadLeft
Padding left value in the width dimension.
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
bool m_BiasEnabled
Enable/disable bias.
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
Depthwise Convolution 2D layer workload data.
A ElementwiseBinaryDescriptor for the ElementwiseBinaryLayer.
A ElementwiseUnaryDescriptor for the ElementwiseUnaryLayer.
A FullyConnectedDescriptor for the FullyConnectedLayer.
bool m_TransposeWeightMatrix
Enable/disable transpose weight matrix.
bool m_ConstantWeights
Enable/disable constant weights and biases.
bool m_BiasEnabled
Enable/disable bias.
A L2NormalizationDescriptor for the L2NormalizationLayer.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
std::shared_ptr< ConstTensorHandle > m_RecurrentToForgetWeights
A unique pointer to represent 2D weights tensor with dimensions [output_size, num_units].
std::shared_ptr< ConstTensorHandle > m_CellBias
A unique pointer to represent 1D weights tensor with dimensions [num_units].
std::shared_ptr< ConstTensorHandle > m_InputToOutputWeights
A unique pointer to represent 2D weights tensor with dimensions [input_size, num_units].
std::shared_ptr< ConstTensorHandle > m_RecurrentToCellWeights
A unique pointer to represent 2D weights tensor with dimensions [output_size, num_units].
std::shared_ptr< ConstTensorHandle > m_OutputGateBias
A unique pointer to represent 1D weights tensor with dimensions [num_units].
std::shared_ptr< ConstTensorHandle > m_InputToForgetWeights
A unique pointer to represent 2D weights tensor with dimensions [input_size, num_units].
std::shared_ptr< ConstTensorHandle > m_InputToCellWeights
A unique pointer to represent 2D weights tensor with dimensions [input_size, num_units].
std::shared_ptr< ConstTensorHandle > m_RecurrentToOutputWeights
A unique pointer to represent 2D weights tensor with dimensions [output_size, num_units].
std::shared_ptr< ConstTensorHandle > m_ForgetGateBias
A unique pointer to represent 1D weights tensor with dimensions [num_units].
An LstmDescriptor for the LstmLayer.
bool m_PeepholeEnabled
Enable/disable peephole.
float m_ClippingThresCell
Clipping threshold value for the cell state.
bool m_ProjectionEnabled
Enable/disable the projection layer.
float m_ClippingThresProj
Clipping threshold value for the projection.
bool m_CifgEnabled
Enable/disable cifg (coupled input & forget gate).
uint32_t m_ActivationFunc
The activation function to use.
std::shared_ptr< ConstTensorHandle > m_CellToForgetWeights
A unique pointer to represent 1D weights tensor with dimensions [num_units].
std::shared_ptr< ConstTensorHandle > m_CellToOutputWeights
A unique pointer to represent 1D weights tensor with dimensions [num_units].
const ConstTensorHandle * m_OutputGateBias
const ConstTensorHandle * m_CellBias
const ConstTensorHandle * m_InputToForgetWeights
A MeanDescriptor for the MeanLayer.
std::vector< unsigned int > m_Axis
Values for the dimensions to reduce.
bool m_KeepDims
Enable/disable keep dimensions. If true, then the reduced dimensions that are of length 1 are kept.
A NormalizationDescriptor for the NormalizationLayer.
NormalizationAlgorithmMethod m_NormMethodType
Normalization method algorithm to use (LocalBrightness, LocalContrast).
float m_Alpha
Alpha value for the normalization equation.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
float m_Beta
Beta value for the normalization equation.
float m_K
Kappa value used for the across channel normalization equation.
uint32_t m_NormSize
Depth radius value.
NormalizationAlgorithmChannel m_NormChannelType
Normalization channel algorithm to use (Across, Within).
An OriginsDescriptor for the ConcatLayer.
A Pooling2dDescriptor for the Pooling2dLayer.
uint32_t m_PadRight
Padding right value in the width dimension.
PoolingAlgorithm m_PoolType
The pooling algorithm to use (Max. Average, L2).
uint32_t m_PoolHeight
Pooling height value.
uint32_t m_PadTop
Padding top value in the height dimension.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
uint32_t m_PoolWidth
Pooling width value.
uint32_t m_PadBottom
Padding bottom value in the height dimension.
uint32_t m_PadLeft
Padding left value in the width dimension.
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
OutputShapeRounding m_OutputShapeRounding
The rounding method for the output shape. (Floor, Ceiling).
std::shared_ptr< ConstTensorHandle > m_RecurrentToForgetWeights
A unique pointer to represent 2D weights tensor with dimensions [num_units, outputSize] (QSymmS8).
Definition: QLstmLayer.hpp:24
std::shared_ptr< ConstTensorHandle > m_CellBias
A unique pointer to represent 1D bias tensor with dimensions [num_units] (int32).
Definition: QLstmLayer.hpp:33
std::shared_ptr< ConstTensorHandle > m_InputToOutputWeights
A unique pointer to represent 2D weights tensor with dimensions [num_units, inputSize] (QSymmS8).
Definition: QLstmLayer.hpp:21
std::shared_ptr< ConstTensorHandle > m_RecurrentToCellWeights
A unique pointer to represent 2D weights tensor with dimensions [num_units, outputSize] (QSymmS8).
Definition: QLstmLayer.hpp:26
std::shared_ptr< ConstTensorHandle > m_OutputGateBias
A unique pointer to represent 1D bias tensor with dimensions [num_units] (int32).
Definition: QLstmLayer.hpp:35
std::shared_ptr< ConstTensorHandle > m_InputToForgetWeights
A unique pointer to represent 2D weights tensor with dimensions [num_units, inputSize] (QSymmS8).
Definition: QLstmLayer.hpp:17
std::shared_ptr< ConstTensorHandle > m_InputToCellWeights
A unique pointer to represent 2D weights tensor with dimensions [num_units, inputSize] (QSymmS8).
Definition: QLstmLayer.hpp:19
std::shared_ptr< ConstTensorHandle > m_RecurrentToOutputWeights
A unique pointer to represent 2D weights tensor with dimensions [num_units, outputSize] (QSymmS8).
Definition: QLstmLayer.hpp:28
std::shared_ptr< ConstTensorHandle > m_ForgetGateBias
A unique pointer to represent 1D bias tensor with dimensions [num_units] (int32).
Definition: QLstmLayer.hpp:31
A QLstmDescriptor for the QLstmLayer.
float m_CellIntermediateScale
Cell intermediate quantization scale.
float m_InputIntermediateScale
Input intermediate quantization scale.
bool m_PeepholeEnabled
Enable/disable peephole.
int32_t m_HiddenStateZeroPoint
Hidden State zero point.
bool m_LayerNormEnabled
Enable/disable layer normalization.
bool m_ProjectionEnabled
Enable/disable the projection layer.
float m_OutputIntermediateScale
Output intermediate quantization scale.
float m_ProjectionClip
Clipping threshold value for the projection.
float m_CellClip
Clipping threshold value for the cell state.
bool m_CifgEnabled
Enable/disable CIFG (coupled input & forget gate).
float m_HiddenStateScale
Hidden State quantization scale.
float m_ForgetIntermediateScale
Forget intermediate quantization scale.
std::shared_ptr< ConstTensorHandle > m_CellLayerNormWeights
A unique pointer to represent 1D weights tensor with dimensions [num_units] (QSymmS16).
Definition: QLstmLayer.hpp:73
std::shared_ptr< ConstTensorHandle > m_OutputLayerNormWeights
A unique pointer to represent 1D weights tensor with dimensions [num_units] (QSymmS16).
Definition: QLstmLayer.hpp:75
std::shared_ptr< ConstTensorHandle > m_ForgetLayerNormWeights
A unique pointer to represent 1D weights tensor with dimensions [num_units] (QSymmS16).
Definition: QLstmLayer.hpp:71
const ConstTensorHandle * m_InputToOutputWeights
const ConstTensorHandle * m_ForgetGateBias
const ConstTensorHandle * m_RecurrentToOutputWeights
const ConstTensorHandle * m_OutputGateBias
const ConstTensorHandle * m_CellBias
const ConstTensorHandle * m_InputToCellWeights
const ConstTensorHandle * m_InputToForgetWeights
const ConstTensorHandle * m_RecurrentToCellWeights
const ConstTensorHandle * m_RecurrentToForgetWeights
const ConstTensorHandle * m_InputToOutputWeights
const ConstTensorHandle * m_RecurrentToInputWeights
const ConstTensorHandle * m_ForgetGateBias
const ConstTensorHandle * m_InputGateBias
const ConstTensorHandle * m_RecurrentToOutputWeights
const ConstTensorHandle * m_OutputGateBias
const ConstTensorHandle * m_CellBias
const ConstTensorHandle * m_InputToCellWeights
const ConstTensorHandle * m_InputToForgetWeights
const ConstTensorHandle * m_InputToInputWeights
const ConstTensorHandle * m_RecurrentToCellWeights
const ConstTensorHandle * m_RecurrentToForgetWeights
const T * GetAdditionalInformation() const
std::vector< ITensorHandle * > m_Inputs
std::vector< ITensorHandle * > m_Outputs
A ReshapeDescriptor for the ReshapeLayer.
TensorShape m_TargetShape
Target shape value.
A ResizeDescriptor for the ResizeLayer.
uint32_t m_TargetHeight
Target height value.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
ResizeMethod m_Method
The Interpolation method to use (Bilinear, NearestNeighbor).
uint32_t m_TargetWidth
Target width value.
A SoftmaxDescriptor for the SoftmaxLayer.
int m_Axis
Scalar, defaulted to the last index (-1), specifying the dimension the activation will be performed o...
A SpaceToDepthDescriptor for the SpaceToDepthLayer.
unsigned int m_BlockSize
Scalar specifying the input block size. It must be >= 1.
std::vector< ViewOrigin > m_ViewOrigins
A StackDescriptor for the StackLayer.
A ViewsDescriptor for the SplitterLayer.