Compute Library
 21.11
GEMMReshapeRHSMatrix.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "arm_compute/core/Types.h"
29 #include "tests/CL/CLAccessor.h"
30 #include "tests/CL/Helper.h"
32 #include "tests/datasets/ShapeDatasets.h"
34 #include "tests/framework/Macros.h"
37 #include "tests/validation/fixtures/GEMMReshapeRHSMatrixFixture.h"
38 
39 namespace arm_compute
40 {
41 namespace test
42 {
43 namespace validation
44 {
45 namespace
46 {
47 // *INDENT-OFF*
48 // clang-format off
49 /** Batch size values to test */
50 const auto b_values = framework::dataset::make("batchsize", 1, 3);
51 
52 /** N0 values to test */
53 const auto n0_values_nt_s32 = framework::dataset::make("N0", { 1, 2, 3 });
54 const auto n0_values_nt_s16 = framework::dataset::make("N0", { 4, 8 });
55 const auto n0_values_nt_s8 = framework::dataset::make("N0", { 16 });
56 const auto n0_values_t_s32 = framework::dataset::make("N0", { 4, 8 });
57 const auto n0_values_t_s16 = framework::dataset::make("N0", { 16 });
58 const auto n0_values_t_s8 = framework::dataset::make("N0", { 2, 3 });
59 
60 /** K0 values to test */
61 const auto k0_values_nt_s32 = framework::dataset::make("K0", { 1, 2 });
62 const auto k0_values_nt_s16 = framework::dataset::make("K0", { 16 });
63 const auto k0_values_nt_s8 = framework::dataset::make("K0", { 3,4 });
64 const auto k0_values_t_s32 = framework::dataset::make("K0", { 2, 3 });
65 const auto k0_values_t_s16 = framework::dataset::make("K0", { 4, 8 });
66 const auto k0_values_t_s8 = framework::dataset::make("K0", { 16 });
67 
68 /** H0 values to test */
69 const auto h0_values = framework::dataset::make("H0", 1, 4);
70 
71 /** Interleave values to test */
72 const auto i_values = framework::dataset::make("interleave", { true, false });
73 } // namespace
74 
76 using namespace arm_compute::opencl::kernels;
77 
78 // Initialize the output tensor with zero and fill the border with zero
79 using CLGEMMReshapeRHSMatrix = CLSynthetizeOperatorInitOutputWithZeroAndWithZeroConstantBorder<ClGemmReshapeRhsMatrixKernel, 16>;
80 
81 template <typename T>
82 using CLGEMMReshapeRHSMatrixFixture = GEMMReshapeRHSMatrixValidationFixture<CLTensor, CLAccessor, CLGEMMReshapeRHSMatrix, T>;
83 
84 TEST_SUITE(CL)
85 TEST_SUITE(GEMMReshapeRHSMatrix)
86 
87 // *INDENT-OFF*
88 // clang-format off
89 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
90  framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32),
91  TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32), // Mismatching data types
92  TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32), // Wrong n0 value
93  TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32), // Wrong k0 value
94  TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32), // Wrong h0 value
95  TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32), // n0 > 16
96  TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32), // k0 > 16
97  TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32), // k0 == 1 && transpose
98  }),
100  TensorInfo(TensorShape(32U, 2U, 2U), 1, DataType::F16),
101  TensorInfo(TensorShape(32U, 2U, 2U), 1, DataType::F32),
102  TensorInfo(TensorShape(32U, 2U, 2U), 1, DataType::F32),
103  TensorInfo(TensorShape(32U, 2U, 2U), 1, DataType::F32),
104  TensorInfo(TensorShape(32U, 2U, 2U), 1, DataType::F32),
105  TensorInfo(TensorShape(32U, 2U, 2U), 1, DataType::F32),
106  TensorInfo(TensorShape(32U, 2U, 2U), 1, DataType::F32),
107  })),
108  framework::dataset::make("N0",{ 4, 0, 4, 4, 4, 17, 4, 4 })),
109  framework::dataset::make("K0",{ 4, 4, 0, 4, 4, 4, 17, 1 })),
110  framework::dataset::make("H0",{ 4, 4, 4, 0, 4, 4, 4, 4 })),
111  framework::dataset::make("Expected", { false, false, false, false, false, false, false})),
112  input_info, output_info, n0, k0, h0, expected)
113 {
115  rhs_info.n0 = n0;
116  rhs_info.k0 = k0;
117  rhs_info.h0 = h0;
118  rhs_info.transpose = true;
119  rhs_info.interleave = true;
120 
121  bool has_error = bool(ClGemmReshapeRhsMatrixKernel::validate(&input_info.clone()->set_is_resizable(false), (output_info.total_size() == 0) ? nullptr : &output_info.clone()->set_is_resizable(false), rhs_info));
123 }
124 
126  framework::dataset::make("InputShape", { TensorShape(32U, 16U, 1U),
127  TensorShape(32U, 16U, 2U)
128  }),
129  framework::dataset::make("N0",{ 4 })),
130  framework::dataset::make("K0",{ 4, 8, 16 })),
131  framework::dataset::make("H0",{ 1, 2, 4 })),
133  input_shape, n0, k0, h0, data_type)
134 {
135  CLTensor input;
136  CLTensor output;
137 
138  input.info()->init(input_shape, 1, data_type);
139 
140  unsigned int padding = 0;
141 
143  rhs_info.n0 = n0;
144  rhs_info.k0 = k0;
145  rhs_info.h0 = h0;
146  rhs_info.transpose = true;
147  rhs_info.interleave = true;
149 
150  if(rhs_info.export_to_cl_image)
151  {
153  constexpr unsigned int num_floats_per_pixel = 4;
154 
155  const unsigned int pixel_aligment = get_cl_image_pitch_alignment(CLKernelLibrary::get().get_device());
156  const unsigned int row_pitch_alignment = pixel_aligment * num_floats_per_pixel;
157  const unsigned int round_up_width = ((output_shape[0] + row_pitch_alignment - 1) / row_pitch_alignment) * row_pitch_alignment;
158 
159  padding = round_up_width - output_shape[0];
160  }
161 
163 
164  kernel.configure(CLKernelLibrary::get().get_compile_context(), input.info(), output.info(), rhs_info);
165 
167 }
168 // clang-format on
169 // *INDENT-ON*
170 
171 // Run S32 tests only for transpose = false
173  combine(combine(combine(combine(combine(combine(combine(datasets::SmallGEMMReshape2DShapes(),
174  b_values),
176  n0_values_nt_s32),
177  k0_values_nt_s32),
178  h0_values),
179  i_values),
180  framework::dataset::make("transpose", false)))
181 {
182  // Validate output
183  validate(CLAccessor(_target), _reference);
184 }
185 
186 // Run S32 tests only for transpose = true
188  combine(combine(combine(combine(combine(combine(combine(datasets::SmallGEMMReshape2DShapes(),
189  b_values),
191  n0_values_t_s32),
192  k0_values_t_s32),
193  h0_values),
194  i_values),
195  framework::dataset::make("transpose", true)))
196 {
197  // Validate output
198  validate(CLAccessor(_target), _reference);
199 }
200 
201 // Run S16 tests only for transpose = false
203  combine(combine(combine(combine(combine(combine(combine(datasets::SmallGEMMReshape2DShapes(),
204  b_values),
206  n0_values_nt_s16),
207  k0_values_nt_s16),
208  h0_values),
209  i_values),
210  framework::dataset::make("transpose", false)))
211 {
212  // Validate output
213  validate(CLAccessor(_target), _reference);
214 }
215 
216 // Run S16 tests only for transpose = true
218  combine(combine(combine(combine(combine(combine(combine(datasets::SmallGEMMReshape2DShapes(),
219  b_values),
221  n0_values_t_s16),
222  k0_values_t_s16),
223  h0_values),
224  i_values),
225  framework::dataset::make("transpose", true)))
226 {
227  // Validate output
228  validate(CLAccessor(_target), _reference);
229 }
230 
231 // Run S8 tests only for transpose = false
233  combine(combine(combine(combine(combine(combine(combine(datasets::SmallGEMMReshape2DShapes(),
234  b_values),
236  n0_values_nt_s8),
237  k0_values_nt_s8),
238  h0_values),
239  i_values),
240  framework::dataset::make("transpose", false)))
241 {
242  // Validate output
243  validate(CLAccessor(_target), _reference);
244 }
245 
246 // Run S8 tests only for transpose = true
248  combine(combine(combine(combine(combine(combine(combine(datasets::SmallGEMMReshape2DShapes(),
249  b_values),
251  n0_values_t_s8),
252  k0_values_t_s8),
253  h0_values),
254  i_values),
255  framework::dataset::make("transpose", true)))
256 {
257  // Validate output
258  validate(CLAccessor(_target), _reference);
259 }
260 
261 TEST_SUITE_END() // GEMMReshapeRHSMatrix
262 TEST_SUITE_END() // CL
263 } // namespace validation
264 } // namespace test
265 } // namespace arm_compute
bool image2d_from_buffer_supported(const cl::Device &device)
Helper function to check whether the cl_khr_image2d_from_buffer extension is supported.
Definition: CLHelpers.cpp:370
Shape of a tensor.
Definition: TensorShape.h:39
TensorInfo * info() const override
Interface to be implemented by the child class to return the tensor&#39;s metadata.
Definition: CLTensor.cpp:41
1 channel, 1 F32 per channel
ARM_COMPUTE_EXPECT(has_error==expected, framework::LogLevel::ERRORS)
unsigned int h0
Number of horizontal blocks of size (k0xn0) stored on the same output row.
Definition: Types.h:1962
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
std::enable_if< is_container< T >::value, ContainerDataset< T > >::type make(std::string name, T &&values)
Helper function to create a ContainerDataset.
PaddingSize padding() const override
Padding of tensor.
Definition: TensorInfo.h:254
void configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const GEMMRHSMatrixInfo &rhs_info)
Initialise the kernel&#39;s input and output.
bool transpose
True if the (k0xn0) block has to be transposed before been stored.
Definition: Types.h:1963
bool export_to_cl_image
True if the reshaped rhs has to be exported to cl_image.
Definition: Types.h:1965
Copyright (c) 2017-2021 Arm Limited.
1 channel, 1 F16 per channel
CLSynthetizeOperator< opencl::kernels::ClGemmReshapeRhsMatrixKernel > CLGEMMReshapeRHSMatrix
1 channel, 1 S32 per channel
const DataType data_type
Definition: Im2Col.cpp:150
unsigned int k0
Number of partial accumulations performed by the matrix multiplication.
Definition: Types.h:1961
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16), }), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(30U, 11U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16, QuantizationInfo(1.f/32768.f, 0)), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16, QuantizationInfo(1.f/32768.f, 0)), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16, QuantizationInfo(1.f/32768.f, 0)), })), framework::dataset::make("ActivationInfo", { ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SQRT), })), framework::dataset::make("Expected", { false, true, true, true, false, false, true, true, false })), input_info, output_info, act_info, expected)
const auto input_shape
Validate test suite is to test ARM_COMPUTE_RETURN_ON_* macros we use to check the validity of given a...
DatasetMode
Possible dataset modes.
Definition: DatasetModes.h:40
GEMM RHS (Right Hand Side) matrix information.
Definition: Types.h:1953
TEST_SUITE_END() FIXTURE_DATA_TEST_CASE(RunSmall
[CLActivationLayer Test snippet]
static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const GEMMRHSMatrixInfo &rhs_info)
Static function to check if given info will lead to a valid configuration.
GEMMReshapeRHSMatrixValidationFixture< CLTensor, CLAccessor, CLGEMMReshapeRHSMatrix, T > CLGEMMReshapeRHSMatrixFixture
unsigned int n0
Number of columns processed by the matrix multiplication.
Definition: Types.h:1960
Accessor implementation for CLTensor objects.
Definition: CLAccessor.h:36
TensorShape compute_rhs_reshaped_shape(const ITensorInfo &a, const GEMMRHSMatrixInfo &rhs_info)
Calculate the Right Hand Side matrix reshaped shape.
TEST_SUITE(U8_to_S8) FIXTURE_DATA_TEST_CASE(RunSmall
validate(CLAccessor(output_state), expected_output)
unsigned int right
right of the border
Definition: Types.h:378
1 channel, 1 S16 per channel
size_t get_cl_image_pitch_alignment(const cl::Device &device)
Helper function to get the cl_image pitch alignment in pixels.
Definition: CLHelpers.cpp:375
void init(Format format)
Initialize the tensor info with just a format.
Definition: TensorInfo.cpp:103
FIXTURE_DATA_TEST_CASE(RunSmall, CLAbsLayerFixture< half >, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)))
Definition: AbsLayer.cpp:50
OpenCL kernel to reshape the RHS matrix when performing the matrix multiplication In particular...
bool interleave
True if the h0 (k0xn0) blocks have to be interleaved in the output row.
Definition: Types.h:1964
Store the tensor&#39;s metadata.
Definition: TensorInfo.h:43
zip(zip(framework::dataset::make("Weights", { TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U, 1U), 1, DataType::F32), }), framework::dataset::make("MVBGInfo",{ TensorInfo(TensorShape(2U), 1, DataType::F32), TensorInfo(TensorShape(2U), 1, DataType::F16), TensorInfo(TensorShape(5U), 1, DataType::F32), })), framework::dataset::make("Expected", { true, false, false}))
signed 8-bit number
combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)))
Definition: AbsLayer.cpp:65
Basic implementation of the OpenCL tensor interface.
Definition: CLTensor.h:41