Compute Library
 21.02
GEMMReshapeRHSMatrix.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "arm_compute/core/Types.h"
29 #include "tests/CL/CLAccessor.h"
30 #include "tests/CL/Helper.h"
32 #include "tests/datasets/ShapeDatasets.h"
34 #include "tests/framework/Macros.h"
37 #include "tests/validation/fixtures/GEMMReshapeRHSMatrixFixture.h"
38 
39 namespace arm_compute
40 {
41 namespace test
42 {
43 namespace validation
44 {
45 namespace
46 {
47 // *INDENT-OFF*
48 // clang-format off
49 /** Batch size values to test */
50 const auto b_values = framework::dataset::make("batchsize", 1, 3);
51 
52 /** N0 values to test */
53 const auto n0_values_nt_s32 = framework::dataset::make("N0", { 1, 2, 3 });
54 const auto n0_values_nt_s16 = framework::dataset::make("N0", { 4, 8 });
55 const auto n0_values_nt_s8 = framework::dataset::make("N0", { 16 });
56 const auto n0_values_t_s32 = framework::dataset::make("N0", { 4, 8 });
57 const auto n0_values_t_s16 = framework::dataset::make("N0", { 16 });
58 const auto n0_values_t_s8 = framework::dataset::make("N0", { 2, 3 });
59 
60 /** K0 values to test */
61 const auto k0_values_nt_s32 = framework::dataset::make("K0", { 1, 2 });
62 const auto k0_values_nt_s16 = framework::dataset::make("K0", { 16 });
63 const auto k0_values_nt_s8 = framework::dataset::make("K0", { 3,4 });
64 const auto k0_values_t_s32 = framework::dataset::make("K0", { 2, 3 });
65 const auto k0_values_t_s16 = framework::dataset::make("K0", { 4, 8 });
66 const auto k0_values_t_s8 = framework::dataset::make("K0", { 16 });
67 
68 /** H0 values to test */
69 const auto h0_values = framework::dataset::make("H0", 1, 4);
70 
71 /** Interleave values to test */
72 const auto i_values = framework::dataset::make("interleave", { true, false });
73 } // namespace
74 
76 
77 // Initialize the output tensor with zero and fill the border with zero
78 using CLGEMMReshapeRHSMatrix = CLSynthetizeFunctionInitOutputWithZeroAndWithZeroConstantBorder<CLGEMMReshapeRHSMatrixKernel, 16>;
79 
80 template <typename T>
81 using CLGEMMReshapeRHSMatrixFixture = GEMMReshapeRHSMatrixValidationFixture<CLTensor, CLAccessor, CLGEMMReshapeRHSMatrix, T>;
82 
83 TEST_SUITE(CL)
84 TEST_SUITE(GEMMReshapeRHSMatrix)
85 
86 // *INDENT-OFF*
87 // clang-format off
88 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
89  framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32),
90  TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32), // Mismatching data types
91  TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32), // Wrong n0 value
92  TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32), // Wrong k0 value
93  TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32), // Wrong h0 value
94  TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32), // n0 > 16
95  TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32), // k0 > 16
96  TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32), // k0 == 1 && transpose
97  }),
100  TensorInfo(TensorShape(32U, 2U, 2U), 1, DataType::F32),
101  TensorInfo(TensorShape(32U, 2U, 2U), 1, DataType::F32),
102  TensorInfo(TensorShape(32U, 2U, 2U), 1, DataType::F32),
103  TensorInfo(TensorShape(32U, 2U, 2U), 1, DataType::F32),
104  TensorInfo(TensorShape(32U, 2U, 2U), 1, DataType::F32),
105  TensorInfo(TensorShape(32U, 2U, 2U), 1, DataType::F32),
106  })),
107  framework::dataset::make("N0",{ 4, 0, 4, 4, 4, 17, 4, 4 })),
108  framework::dataset::make("K0",{ 4, 4, 0, 4, 4, 4, 17, 1 })),
109  framework::dataset::make("H0",{ 4, 4, 4, 0, 4, 4, 4, 4 })),
110  framework::dataset::make("Expected", { false, false, false, false, false, false, false})),
111  input_info, output_info, n0, k0, h0, expected)
112 {
113  GEMMRHSMatrixInfo rhs_info;
114  rhs_info.n0 = n0;
115  rhs_info.k0 = k0;
116  rhs_info.h0 = h0;
117  rhs_info.transpose = true;
118  rhs_info.interleave = true;
119 
120  bool has_error = bool(CLGEMMReshapeRHSMatrixKernel::validate(&input_info.clone()->set_is_resizable(false), (output_info.total_size() == 0) ? nullptr : &output_info.clone()->set_is_resizable(false), rhs_info));
122 }
123 
125  framework::dataset::make("InputShape", { TensorShape(32U, 16U, 1U),
126  TensorShape(32U, 16U, 2U)
127  }),
128  framework::dataset::make("N0",{ 4 })),
129  framework::dataset::make("K0",{ 4, 8, 16 })),
130  framework::dataset::make("H0",{ 1, 2, 4 })),
132  input_shape, n0, k0, h0, data_type)
133 {
134  CLTensor input;
135  CLTensor output;
136 
137  input.info()->init(input_shape, 1, data_type);
138 
139  unsigned int padding = 0;
140 
141  GEMMRHSMatrixInfo rhs_info;
142  rhs_info.n0 = n0;
143  rhs_info.k0 = k0;
144  rhs_info.h0 = h0;
145  rhs_info.transpose = true;
146  rhs_info.interleave = true;
148 
149  if(rhs_info.export_to_cl_image)
150  {
152  constexpr unsigned int num_floats_per_pixel = 4;
153 
154  const unsigned int pixel_aligment = get_cl_image_pitch_alignment(CLKernelLibrary::get().get_device());
155  const unsigned int row_pitch_alignment = pixel_aligment * num_floats_per_pixel;
156  const unsigned int round_up_width = ((output_shape[0] + row_pitch_alignment - 1) / row_pitch_alignment) * row_pitch_alignment;
157 
158  padding = round_up_width - output_shape[0];
159  }
160 
162 
163  kernel.configure(&input, &output, rhs_info);
164 
166 }
167 // clang-format on
168 // *INDENT-ON*
169 
170 // Run S32 tests only for transpose = false
172  combine(combine(combine(combine(combine(combine(combine(datasets::SmallGEMMReshape2DShapes(),
173  b_values),
175  n0_values_nt_s32),
176  k0_values_nt_s32),
177  h0_values),
178  i_values),
179  framework::dataset::make("transpose", false)))
180 {
181  // Validate output
182  validate(CLAccessor(_target), _reference);
183 }
184 
185 // Run S32 tests only for transpose = true
187  combine(combine(combine(combine(combine(combine(combine(datasets::SmallGEMMReshape2DShapes(),
188  b_values),
190  n0_values_t_s32),
191  k0_values_t_s32),
192  h0_values),
193  i_values),
194  framework::dataset::make("transpose", true)))
195 {
196  // Validate output
197  validate(CLAccessor(_target), _reference);
198 }
199 
200 // Run S16 tests only for transpose = false
202  combine(combine(combine(combine(combine(combine(combine(datasets::SmallGEMMReshape2DShapes(),
203  b_values),
205  n0_values_nt_s16),
206  k0_values_nt_s16),
207  h0_values),
208  i_values),
209  framework::dataset::make("transpose", false)))
210 {
211  // Validate output
212  validate(CLAccessor(_target), _reference);
213 }
214 
215 // Run S16 tests only for transpose = true
217  combine(combine(combine(combine(combine(combine(combine(datasets::SmallGEMMReshape2DShapes(),
218  b_values),
220  n0_values_t_s16),
221  k0_values_t_s16),
222  h0_values),
223  i_values),
224  framework::dataset::make("transpose", true)))
225 {
226  // Validate output
227  validate(CLAccessor(_target), _reference);
228 }
229 
230 // Run S8 tests only for transpose = false
232  combine(combine(combine(combine(combine(combine(combine(datasets::SmallGEMMReshape2DShapes(),
233  b_values),
235  n0_values_nt_s8),
236  k0_values_nt_s8),
237  h0_values),
238  i_values),
239  framework::dataset::make("transpose", false)))
240 {
241  // Validate output
242  validate(CLAccessor(_target), _reference);
243 }
244 
245 // Run S8 tests only for transpose = true
247  combine(combine(combine(combine(combine(combine(combine(datasets::SmallGEMMReshape2DShapes(),
248  b_values),
250  n0_values_t_s8),
251  k0_values_t_s8),
252  h0_values),
253  i_values),
254  framework::dataset::make("transpose", true)))
255 {
256  // Validate output
257  validate(CLAccessor(_target), _reference);
258 }
259 
260 TEST_SUITE_END() // GEMMReshapeRHSMatrix
261 TEST_SUITE_END() // CL
262 } // namespace validation
263 } // namespace test
264 } // namespace arm_compute
bool image2d_from_buffer_supported(const cl::Device &device)
Helper function to check whether the cl_khr_image2d_from_buffer extension is supported.
Definition: CLHelpers.cpp:368
Shape of a tensor.
Definition: TensorShape.h:39
TensorInfo * info() const override
Interface to be implemented by the child class to return the tensor&#39;s metadata.
Definition: CLTensor.cpp:41
OpenCL kernel to reshape the RHS matrix when performing the matrix multiplication In particular...
1 channel, 1 F32 per channel
ARM_COMPUTE_EXPECT(has_error==expected, framework::LogLevel::ERRORS)
unsigned int h0
Number of horizontal blocks of size (k0xn0) stored on the same output row.
Definition: Types.h:1992
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
std::enable_if< is_container< T >::value, ContainerDataset< T > >::type make(std::string name, T &&values)
Helper function to create a ContainerDataset.
PaddingSize padding() const override
Padding of tensor.
Definition: TensorInfo.h:282
bool transpose
True if the (k0xn0) block has to be transposed before been stored.
Definition: Types.h:1993
bool export_to_cl_image
True if the reshaped rhs has to be exported to cl_image.
Definition: Types.h:1995
Copyright (c) 2017-2021 Arm Limited.
1 channel, 1 F16 per channel
1 channel, 1 S32 per channel
const DataType data_type
Definition: Im2Col.cpp:150
unsigned int k0
Number of partial accumulations performed by the matrix multiplication.
Definition: Types.h:1991
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16), }), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(30U, 11U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16, QuantizationInfo(1.f/32768.f, 0)), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16, QuantizationInfo(1.f/32768.f, 0)), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16, QuantizationInfo(1.f/32768.f, 0)), })), framework::dataset::make("ActivationInfo", { ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SQRT), })), framework::dataset::make("Expected", { false, true, true, true, false, false, true, true, false })), input_info, output_info, act_info, expected)
TensorShape input_shape
Validate test suite is to test ARM_COMPUTE_RETURN_ON_* macros we use to check the validity of given a...
DatasetMode
Possible dataset modes.
Definition: DatasetModes.h:40
GEMM RHS (Right Hand Side) matrix information.
Definition: Types.h:1983
TEST_SUITE_END() FIXTURE_DATA_TEST_CASE(RunSmall
[CLActivationLayer Test snippet]
GEMMReshapeRHSMatrixValidationFixture< CLTensor, CLAccessor, CLGEMMReshapeRHSMatrix, T > CLGEMMReshapeRHSMatrixFixture
unsigned int n0
Number of columns processed by the matrix multiplication.
Definition: Types.h:1990
Accessor implementation for CLTensor objects.
Definition: CLAccessor.h:35
CLSynthetizeFunction< CLGEMMReshapeRHSMatrixKernel > CLGEMMReshapeRHSMatrix
TensorShape compute_rhs_reshaped_shape(const ITensorInfo &a, const GEMMRHSMatrixInfo &rhs_info)
Calculate the Right Hand Side matrix reshaped shape.
TEST_SUITE(U8_to_S8) FIXTURE_DATA_TEST_CASE(RunSmall
validate(CLAccessor(output_state), expected_output)
unsigned int right
right of the border
Definition: Types.h:376
1 channel, 1 S16 per channel
size_t get_cl_image_pitch_alignment(const cl::Device &device)
Helper function to get the cl_image pitch alignment in pixels.
Definition: CLHelpers.cpp:373
void init(Format format)
Initialize the tensor info with just a format.
Definition: TensorInfo.cpp:109
FIXTURE_DATA_TEST_CASE(RunSmall, CLAbsLayerFixture< half >, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)))
Definition: AbsLayer.cpp:50
void configure(const ICLTensor *input, ICLTensor *output, const GEMMRHSMatrixInfo &rhs_info)
Initialise the kernel&#39;s input and output.
bool interleave
True if the h0 (k0xn0) blocks have to be interleaved in the output row.
Definition: Types.h:1994
Store the tensor&#39;s metadata.
Definition: TensorInfo.h:45
zip(zip(framework::dataset::make("Weights", { TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U, 1U), 1, DataType::F32), }), framework::dataset::make("MVBGInfo",{ TensorInfo(TensorShape(2U), 1, DataType::F32), TensorInfo(TensorShape(2U), 1, DataType::F16), TensorInfo(TensorShape(5U), 1, DataType::F32), })), framework::dataset::make("Expected", { true, false, false}))
signed 8-bit number
combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)))
Definition: AbsLayer.cpp:65
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const GEMMRHSMatrixInfo &rhs_info)
Static function to check if given info will lead to a valid configuration of CLGEMMReshapeRHSMatrixKe...
Basic implementation of the OpenCL tensor interface.
Definition: CLTensor.h:41