Compute Library
 22.11
ClDirectConvDefaultConfigValhall.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
32 #include <utility>
33 
34 namespace arm_compute
35 {
36 namespace cl_direct_conv
37 {
39 
42 {
43 }
44 
46 {
47  using ConfigurationFunctionExecutorPtr = DirectConvComputeKernelInfo (ClDirectConvDefaultConfigValhall::*)(const ITensorInfo * src, const ITensorInfo * wei, const PadStrideInfo & conv_info);
48 
49  ClDirectConvConfigArray<ConfigurationFunctionExecutorPtr> configs_G78(&ClDirectConvDefaultConfigValhall::configure_G78_f32,
50  &ClDirectConvDefaultConfigValhall::configure_G78_f16,
51  &ClDirectConvDefaultConfigValhall::configure_G78_u8);
52 
53  ClDirectConvConfigArray<ConfigurationFunctionExecutorPtr> configs_G57(&ClDirectConvDefaultConfigValhall::configure_G57_f32,
54  &ClDirectConvDefaultConfigValhall::configure_G57_f16,
55  &ClDirectConvDefaultConfigValhall::configure_G78_u8);
56 
57  ConfigurationFunctionExecutorPtr func = nullptr;
58  switch(_target)
59  {
60  case GPUTarget::G57:
61  func = configs_G57.get_function(src->data_type());
62  break;
63  case GPUTarget::G78:
64  default:
65  func = configs_G78.get_function(src->data_type());
66  break;
67  }
68 
69  ARM_COMPUTE_ERROR_ON_MSG(func == nullptr, "Data type not supported for direct convolution");
70  return (this->*func)(src, wei, conv_info);
71 }
72 
73 DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G78_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
74 {
76 
77  if(src->data_layout() == DataLayout::NHWC)
78  {
79  // Get the output shape
80  const TensorShape wei_shape = wei->tensor_shape();
82  const bool export_weights_to_cl_image = export_to_cl_image(wei);
83 
84  const int32_t ofm = dst_shape[0];
85  const int32_t m = dst_shape[1] * dst_shape[2];
86  const bool is_pointwise = (wei_shape[1] == wei_shape[2]) && wei_shape[1] == 1;
87 
88  desc.export_weights_to_cl_image = export_weights_to_cl_image;
89 
90  if(dst_shape[0] <= 4)
91  {
92  if(is_pointwise)
93  {
94  if(ofm == 4)
95  {
96  desc.m0 = 1;
97  desc.n0 = 4;
98  desc.k0 = 16;
99  }
100  else
101  {
102  desc.m0 = 1;
103  desc.n0 = 1;
104  desc.k0 = 16;
105  }
106  }
107  else
108  {
109  desc.m0 = 1;
110  desc.n0 = 2;
111  desc.k0 = 16;
112  }
113  }
114  else
115  {
116  if(m < 64)
117  {
118  desc.m0 = 1;
119  desc.n0 = 1;
120  desc.k0 = 16;
121  }
122  else
123  {
124  desc.m0 = 4;
125  desc.n0 = 4;
126  desc.k0 = 4;
127  }
128  }
129  }
130 
131  return desc;
132 }
133 
134 DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G78_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
135 {
137 
138  if(src->data_layout() == DataLayout::NHWC)
139  {
140  // Get the output shape
141  const TensorShape wei_shape = wei->tensor_shape();
143  const bool export_weights_to_cl_image = export_to_cl_image(wei);
144 
145  const int32_t ofm = dst_shape[0];
146  const int32_t m = dst_shape[1] * dst_shape[2];
147  const int32_t k = wei_shape[0];
148  const bool is_pointwise = (wei_shape[1] == wei_shape[2]) && wei_shape[1] == 1;
149 
150  desc.export_weights_to_cl_image = export_weights_to_cl_image;
151 
152  if(dst_shape[0] <= 4)
153  {
154  // k0 should be as larger as possible. However, we should avoid
155  // having left-over for loops that make the implementation slower.
156  if((k % 16) == 0)
157  {
158  desc.k0 = 16;
159  }
160  else if((k % 8) == 0)
161  {
162  desc.k0 = 8;
163  }
164  else
165  {
166  desc.k0 = 4;
167  }
168 
169  if(is_pointwise)
170  {
171  if(ofm == 4)
172  {
173  desc.m0 = 1;
174  desc.n0 = 4;
175  }
176  else
177  {
178  desc.m0 = 1;
179  desc.n0 = 1;
180  }
181  }
182  else
183  {
184  desc.m0 = 1;
185  desc.n0 = dst_shape[0];
186  }
187  }
188  else
189  {
190  if(m < 64)
191  {
192  desc.m0 = 1;
193  desc.n0 = 1;
194  if((k % 16) == 0)
195  {
196  desc.k0 = 16;
197  }
198  else if((k % 8) == 0)
199  {
200  desc.k0 = 8;
201  }
202  else
203  {
204  desc.k0 = 4;
205  }
206  }
207  else
208  {
209  if(ofm >= 16)
210  {
211  if(m / 6 > 24000)
212  {
213  desc.m0 = 6;
214  }
215  else
216  {
217  desc.m0 = 5;
218  }
219  desc.n0 = 8;
220  desc.k0 = 4;
221  }
222  else
223  {
224  desc.m0 = 2;
225  desc.n0 = 8;
226  if((k % 16) == 0)
227  {
228  desc.k0 = 16;
229  }
230  else if((k % 8) == 0)
231  {
232  desc.k0 = 8;
233  }
234  else
235  {
236  desc.k0 = 4;
237  }
238  }
239  }
240  }
241  }
242 
243  return desc;
244 }
245 
246 DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G78_u8(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
247 {
249 
250  if(src->data_layout() == DataLayout::NHWC)
251  {
252  // Get the output shape
254 
255  desc.n0 = 4;
256 
257  if(output_shape[0] > 16)
258  {
259  desc.m0 = 4;
260  }
261 
262  desc.k0 = 16;
263 
264  desc.export_weights_to_cl_image = false;
265  }
266 
267  return desc;
268 }
269 
270 DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G57_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
271 {
273 
274  if(src->data_layout() == DataLayout::NHWC)
275  {
276  // Get the output shape
277  const TensorShape wei_shape = wei->tensor_shape();
279  const bool export_weights_to_cl_image = export_to_cl_image(wei);
280 
281  const int32_t m = dst_shape[1] * dst_shape[2];
282  const bool is_pointwise = (wei_shape[1] == wei_shape[2]) && wei_shape[1] == 1;
283 
284  desc.export_weights_to_cl_image = export_weights_to_cl_image;
285 
286  if(dst_shape[0] <= 4)
287  {
288  if(is_pointwise)
289  {
290  desc.m0 = 1;
291  desc.n0 = 1;
292  desc.k0 = 16;
293  }
294  else
295  {
296  desc.m0 = 1;
297  desc.n0 = dst_shape[0];
298  desc.k0 = 16;
299  }
300  }
301  else
302  {
303  if(m < 64)
304  {
305  if(m == 1)
306  {
307  desc.m0 = 1;
308  desc.n0 = 1;
309  desc.k0 = 16;
310  }
311  else
312  {
313  desc.m0 = 4;
314  desc.n0 = 2;
315  desc.k0 = 8;
316  }
317  }
318  else
319  {
320  desc.m0 = 4;
321  desc.n0 = 4;
322  desc.k0 = 4;
323  }
324  }
325  }
326 
327  return desc;
328 }
329 
330 DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G57_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
331 {
333 
334  if(src->data_layout() == DataLayout::NHWC)
335  {
336  // Get the output shape
337  const TensorShape wei_shape = wei->tensor_shape();
339  const bool export_weights_to_cl_image = export_to_cl_image(wei);
340 
341  const int32_t ofm = dst_shape[0];
342  const int32_t m = dst_shape[1] * dst_shape[2];
343  const bool is_pointwise = (wei_shape[1] == wei_shape[2]) && wei_shape[1] == 1;
344 
345  desc.export_weights_to_cl_image = export_weights_to_cl_image;
346 
347  if(dst_shape[0] <= 4)
348  {
349  if(is_pointwise)
350  {
351  desc.m0 = 2;
352  desc.n0 = 1;
353  desc.k0 = 16;
354  }
355  else
356  {
357  desc.m0 = 1;
358  desc.n0 = dst_shape[0];
359  desc.k0 = 16;
360  }
361  }
362  else
363  {
364  if(m < 64)
365  {
366  if(m == 1)
367  {
368  desc.m0 = 1;
369  desc.n0 = 1;
370  desc.k0 = 16;
371  }
372  else
373  {
374  desc.m0 = 4;
375  desc.n0 = 2;
376  desc.k0 = 8;
377  }
378  }
379  else
380  {
381  if(ofm > 16)
382  {
383  desc.m0 = 4;
384  desc.n0 = 8;
385  desc.k0 = 8;
386  }
387  else
388  {
389  desc.m0 = 8;
390  desc.n0 = 4;
391  desc.k0 = 4;
392  }
393  }
394  }
395  }
396 
397  return desc;
398 }
399 } // namespace opencl
400 } // namespace arm_compute
Shape of a tensor.
Definition: TensorShape.h:39
DirectConvComputeKernelInfo configure(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info) override
This method returns the DirectConvComputeKernelInfo for the given inputs.
Basic container for the OpenCL direct convolution configuration functions.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
T get_function(DataType data_type)
Method to return the direct convolution configuration function based on data type.
Valhall based OpenCL direct convolution configuration.
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context...
bool export_weights_to_cl_image
Flag to export the weights to cl_image.
SimpleTensor< float > src
Definition: DFT.cpp:155
Copyright (c) 2017-2022 Arm Limited.
int32_t n0
Number of columns to be processed by the kernel.
int32_t k0
Number of partial accumulations to be processed in a single iteration by the kernel.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
Definition: Error.h:456
Compute descriptor used by the direct convolution kernel.
Padding and stride information class.
Definition: Types.h:669
GPUTarget
Available GPU Targets.
Definition: GPUTarget.h:34
Basic interface for the Direct convolution kernel configuration.
Num samples, height, width, channels.
int32_t m0
Number of rows to be processed by the kernel.
bool export_to_cl_image(const ITensorInfo *tensor)
Definition: CLHelpers.cpp:444
TensorShape compute_deep_convolution_shape(const TensorShape &input_shape, DataLayout input_data_layout, const TensorShape &weights_shape, const PadStrideInfo &conv_info)
Calculate the deep convolution shape output shape of a tensor.
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.