Compute Library
 22.11
CpuWinogradConv2dKernel.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
26 
27 namespace arm_compute
28 {
29 namespace cpu
30 {
31 CpuWinogradConv2dTransformInputKernel::CpuWinogradConv2dTransformInputKernel(arm_conv::winograd::WinogradImpl &w_impl, arm_conv::ConvolutionArgs &_c_args, uint32_t nthreads)
32  : _winograd_impl{ w_impl }, _conv_args{ _c_args }, _nthreads{ nthreads }
33 {
34 }
35 
37 {
38  ARM_COMPUTE_UNUSED(window);
39  const ITensor *input_nhwc = tensors.get_const_tensor(TensorType::ACL_SRC);
41  const ITensor *workspace = tensors.get_const_tensor(TensorType::ACL_INT);
42 
43  const unsigned int width_idx = 1;
44  const unsigned int height_idx = 2;
45  const unsigned int batch_idx = 3;
46  int element_size_in_bytes = input_nhwc->info()->element_size();
47  const auto src_strides = input_nhwc->info()->strides_in_bytes();
48 
49  const size_t input_row_stride = src_strides[height_idx] / element_size_in_bytes;
50  const size_t input_col_stride = src_strides[width_idx] / element_size_in_bytes;
51  const size_t input_batch_stride = src_strides[batch_idx] / element_size_in_bytes;
52  const auto input_nhwc_ptr = reinterpret_cast<const void *>(input_nhwc->buffer() + input_nhwc->info()->offset_first_element_in_bytes());
53  auto win_transf_ptr = reinterpret_cast<void *>(winograd_input_transform->buffer() + winograd_input_transform->info()->offset_first_element_in_bytes());
54 
55  _winograd_impl.input_transform->execute(
56  _conv_args,
57  input_nhwc_ptr,
58  input_batch_stride,
59  input_row_stride,
60  input_col_stride,
61  win_transf_ptr,
62  _winograd_impl.winograd_spec,
63  workspace->buffer(),
64  info.thread_id,
65  _nthreads);
66 }
67 
68 CpuWinogradConv2dTransformOutputKernel::CpuWinogradConv2dTransformOutputKernel(arm_conv::winograd::WinogradImpl &w_impl, arm_conv::ConvolutionArgs &_c_args, uint32_t nthreads)
69  : _winograd_impl{ w_impl }, _conv_args{ _c_args }, _nthreads{ nthreads }
70 {
71 }
72 
73 // Inherited methods overridden:
75 {
76  ARM_COMPUTE_UNUSED(window);
77  const ITensor *dst_nhwc = tensors.get_const_tensor(TensorType::ACL_DST);
79  const ITensor *biases = tensors.get_const_tensor(TensorType::ACL_SRC_1);
80  const ITensor *workspace = tensors.get_tensor(TensorType::ACL_INT);
81 
82  const unsigned int width_idx = 1;
83  const unsigned int height_idx = 2;
84  const unsigned int batch_idx = 3;
85  const int element_size_in_bytes = dst_nhwc->info()->element_size();
86  const auto dst_strides = dst_nhwc->info()->strides_in_bytes();
87 
88  const size_t out_row_stride = dst_strides[height_idx] / element_size_in_bytes;
89  const size_t out_col_stride = dst_strides[width_idx] / element_size_in_bytes;
90  const size_t out_batch_stride = dst_strides[batch_idx] / element_size_in_bytes;
91  const auto wout_transf_ptr = reinterpret_cast<const void *>(winograd_output_transform->buffer() + winograd_output_transform->info()->offset_first_element_in_bytes());
92  auto dst_nhwc_ptr = reinterpret_cast<void *>(dst_nhwc->buffer() + dst_nhwc->info()->offset_first_element_in_bytes());
93  void *biases_data_ptr = nullptr;
94  if(biases != nullptr)
95  {
96  biases_data_ptr = reinterpret_cast<void *>(biases->buffer() + biases->info()->offset_first_element_in_bytes());
97  }
98 
99  // Output transform
100  _winograd_impl.output_transform->execute(
101  _conv_args,
102  wout_transf_ptr,
103  _winograd_impl.winograd_spec,
104  biases_data_ptr,
105  dst_nhwc_ptr,
106  out_batch_stride,
107  out_row_stride,
108  out_col_stride,
109  workspace->buffer(),
110  info.thread_id,
111  _nthreads);
112 }
113 
114 } // namespace cpu
115 } // namespace arm_compute
CpuWinogradConv2dTransformOutputKernel(const CpuWinogradConv2dTransformOutputKernel &)=delete
Prevent instances of this class from being copied (As this class contains pointers) ...
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28
CpuWinogradConv2dTransformInputKernel(const CpuWinogradConv2dTransformInputKernel &)=delete
Prevent instances of this class from being copied (As this class contains pointers) ...
Interface for CPU tensor.
Definition: ITensor.h:36
SimpleTensor< T > winograd_output_transform(const SimpleTensor< T > &in, const SimpleTensor< T > &b, const TensorShape &output_shape, const WinogradInfo &winograd_info)
Definition: Winograd.cpp:440
Copyright (c) 2017-2022 Arm Limited.
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
Definition: ITensorPack.cpp:54
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
virtual uint8_t * buffer() const =0
Interface to be implemented by the child class to return a pointer to CPU memory. ...
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
virtual size_t element_size() const =0
Element size in bytes calculated as data_size() * num_channels()
virtual size_t offset_first_element_in_bytes() const =0
The offset from the beginning of the memory allocation to the first element of the tensor...
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
Definition: ITensorPack.cpp:64
Information about executing thread and CPU.
Definition: CPPTypes.h:179
Tensor packing service.
Definition: ITensorPack.h:39
SimpleTensor< T > winograd_input_transform(const SimpleTensor< T > &in, const TensorShape &output_shape, const WinogradInfo &winograd_info)
Definition: Winograd.cpp:236
virtual const Strides & strides_in_bytes() const =0
The strides in bytes for accessing each dimension of the tensor.
Describe a multidimensional execution window.
Definition: Window.h:39