Compute Library
 22.05
Multithreaded.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
26 #include "tests/CL/CLAccessor.h"
27 #include "tests/framework/Macros.h"
34 #include <thread>
35 
36 namespace arm_compute
37 {
38 namespace test
39 {
40 namespace validation
41 {
42 TEST_SUITE(CL)
43 TEST_SUITE(UNIT)
44 TEST_SUITE(RuntimeContext)
45 // This test tries scheduling work concurrently from two independent threads
46 TEST_CASE(MultipleThreadedScheduller, framework::DatasetMode::ALL)
47 {
48  constexpr auto num_threads(16u);
49  std::array<CLActivationLayer, num_threads> func{};
50  std::array<CLPixelWiseMultiplication, num_threads> pmul{};
51  std::array<CLTensor, num_threads> s0{};
52  std::array<CLTensor, num_threads> s1{};
53 
54  std::array<CLTensor, num_threads> st{};
55  std::array<CLTensor, num_threads> dt{};
56 
57  const TensorShape tensor_shape(128u, 4u, 5u);
59  std::array<std::thread, num_threads> threads;
60  auto ctx = parameters->get_ctx<CLTensor>();
61 
62  for(auto i = 0u; i < num_threads; ++i)
63  {
64  s0[i] = create_tensor<CLTensor>(tensor_shape, DataType::F32, 1);
65  s1[i] = create_tensor<CLTensor>(tensor_shape, DataType::F32, 1);
66  st[i] = create_tensor<CLTensor>(tensor_shape, DataType::F32, 1);
67  dt[i] = create_tensor<CLTensor>(tensor_shape, DataType::F32, 1);
68  func[i] = CLActivationLayer(ctx);
69  pmul[i] = CLPixelWiseMultiplication();
70  threads[i] =
71  std::thread([&,i]
72  {
73  auto &s = st[i];
74  auto &t = dt[i];
75  auto &p0 = s0[i];
76  auto &p1 = s1[i];
77  pmul[i].configure(&p0, &p1, &s, 1.f, ConvertPolicy::WRAP, RoundingPolicy::TO_NEAREST_UP);
78  func[i].configure(&s, &t, ainfo);
79  s.allocator()->allocate();
80  t.allocator()->allocate();
81  p0.allocator()->allocate();
82  p1.allocator()->allocate();
83  library->fill_tensor_uniform(CLAccessor(p0), 0, -1.f, 1.f);
84  library->fill_tensor_uniform(CLAccessor(p1), 0, -1.f, 1.f);
85  pmul[i].run();
86  func[i].run();
87  });
88  }
89 
90  for(auto &t : threads)
91  {
92  t.join();
93  }
94 
95  SimpleTensor<float> rs{ tensor_shape, DataType::F32, 1 };
96  SimpleTensor<float> ra{ tensor_shape, DataType::F32, 1 };
97  SimpleTensor<float> rb{ tensor_shape, DataType::F32, 1 };
98  library->fill_tensor_uniform(ra, 0, -1.f, 1.f);
99  library->fill_tensor_uniform(rb, 0, -1.f, 1.f);
100  const auto mul = reference::pixel_wise_multiplication<float, float, float>(ra, rb, 1.f, ConvertPolicy::WRAP, RoundingPolicy::TO_NEAREST_UP, DataType::F32);
101  const auto golden = reference::activation_layer<float>(mul, ainfo);
102  for(auto &d : dt)
103  {
104  validate(CLAccessor(d), golden);
105  }
106 }
107 
108 TEST_SUITE_END() // MultipleThreadedScheduller
109 TEST_SUITE_END() // UNIT
110 TEST_SUITE_END() // CL
111 } // namespace validation
112 } // namespace test
113 } // namespace arm_compute
Shape of a tensor.
Definition: TensorShape.h:39
Rounds to nearest value; half rounds away from zero.
1 channel, 1 F32 per channel
Basic function to run opencl::kernels::ClActivationKernel.
Activation Layer Information class.
Definition: Types.h:1625
Copyright (c) 2017-2022 Arm Limited.
DatasetMode
Possible dataset modes.
Definition: DatasetModes.h:40
std::unique_ptr< AssetsLibrary > library
Definition: main.cpp:76
TEST_SUITE_END() FIXTURE_DATA_TEST_CASE(RunSmall
[CLActivationLayer Test snippet]
std::unique_ptr< ParametersLibrary > parameters
Definition: Framework.cpp:46
Accessor implementation for CLTensor objects.
Definition: CLAccessor.h:36
validate(CLAccessor(output_state), expected_output)
Basic function to run opencl::ClMul.
Simple tensor object that stores elements in a consecutive chunk of memory.
Definition: SimpleTensor.h:58
TEST_CASE(FusedActivation, framework::DatasetMode::ALL)
Validate fused activation expecting the following behaviours:
TEST_SUITE(QASYMM8_to_F32) FIXTURE_DATA_TEST_CASE(RunSmall
Basic implementation of the OpenCL tensor interface.
Definition: CLTensor.h:41