Compute Library
 21.11
CpuGemmAssemblyWrapperKernel.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_ASSEMBLY_GEMM_KERNEL_WRAPPER_KERNEL_H
25 #define ARM_COMPUTE_ASSEMBLY_GEMM_KERNEL_WRAPPER_KERNEL_H
26 
27 #include "arm_compute/core/Utils.h"
31 
32 #include "gemm_common.hpp"
33 
34 namespace arm_compute
35 {
36 class ITensor;
37 
38 namespace cpu
39 {
40 namespace kernel
41 {
42 /** This class is a wrapper for the assembly kernels.
43  *
44  * Some kernels were written in assembly and highly optimised for specific CPUs like A53 or A55.
45  * This class works as a wrapper for these assembly kernels. The arm compute library creates an instance
46  * of CpuGemmAssemblyWrapperKernel and other auxiliary data structures to execute a single assembly kernel
47  * in the context of an NEFunctions.
48  *
49  * The type T is the type of the actual kernel implemented in assembly which is of type
50  * template<typename To, typename Tr> class GemmCommon
51  *
52  *
53  */
54 template <typename TypeInput, typename TypeOutput>
56 {
57 public:
58  /** Constructor
59  */
61  : _kernel(nullptr), _name("CpuGemmAssemblyWrapperKernel")
62  {
63  }
64 
68 
69  const char *name() const override
70  {
71  return _name.c_str();
72  }
73 
74  void run(const Window &window, const ThreadInfo &info) override
75  {
76  ARM_COMPUTE_ERROR_ON_NULLPTR((reinterpret_cast<void *>(_kernel)));
78 
79  auto win = arm_gemm::to_ndcoord(window);
80 
81  arm_gemm::ndcoord_t thread_locator{};
82 
83  _kernel->execute(win, thread_locator, info.thread_id);
84  }
85 
86  // Inherited methods overridden:
87  void run_nd(const Window &window, const ThreadInfo &info, const Window &thread_locator) override
88  {
89  ARM_COMPUTE_ERROR_ON_NULLPTR((reinterpret_cast<void *>(_kernel)));
91 
92  //convert between arm_compute and arm_gemm types
93  auto ndc_win = arm_gemm::to_ndcoord(window);
94  auto ndc_tlc = arm_gemm::to_ndcoord(thread_locator);
95 
96  _kernel->execute(ndc_win, ndc_tlc, info.thread_id);
97  }
98 
99  /** Initialise the kernel's input and output.
100  *
101  * @param[in] kernel Pointer to an assembly kernel implementation.
102  * @param[in] kernel_name_tag Tag to be attacehd to the kernel's name.
103  */
104  void configure(arm_gemm::GemmCommon<TypeInput, TypeOutput> *kernel, std::string kernel_name_tag)
105  {
106  ARM_COMPUTE_ERROR_ON_NULLPTR((reinterpret_cast<void *>(kernel)));
107  _kernel = kernel;
108 
109  Window win = to_window(kernel->get_window_size());
110 
111  INEKernel::configure(win);
112 
113  if(!kernel_name_tag.empty())
114  {
115  _name += "/" + kernel_name_tag;
116  }
117  }
118  /** Return minimum workload size of the relevant kernel
119  *
120  * @param[in] platform The CPU platform used to create the context.
121  * @param[in] thread_count Number of threads in the execution.
122  *
123  * @return[out] small_network_mws Minimum workload size for requsted configuration.
124  */
125  size_t get_mws(const CPUInfo &platform, size_t thread_count) const override
126  {
127  ARM_COMPUTE_UNUSED(platform, thread_count);
128 
130  }
131 
132 private:
134  std::string _name;
135 };
136 } // namespace kernel
137 } // namespace cpu
138 } // namespace arm_compute
139 #endif /* ARM_COMPUTE_ASSEMBLY_GEMM_KERNEL_WRAPPER_KERNEL_H */
arm_compute::Window to_window(const ndrange_t &ndr)
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28
void run(const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
virtual void execute(const ndcoord_t &work_range, const ndcoord_t &thread_locator, int threadid)=0
Main execute member fucntion.
static constexpr size_t small_network_mws
Definition: ICPPKernel.h:42
Common interface for all kernels implemented in C++.
Definition: ICPPKernel.h:38
Copyright (c) 2017-2021 Arm Limited.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Definition: Validate.h:915
CpuGemmAssemblyWrapperKernel & operator=(CpuGemmAssemblyWrapperKernel &)=delete
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
This class is a wrapper for the assembly kernels.
Information about executing thread and CPU.
Definition: CPPTypes.h:158
const char * name() const override
Name of the kernel.
virtual ndrange_t get_window_size() const =0
NDCoordinate builds upon a range, but specifies a starting position in addition to a size which it in...
Definition: ndrange.hpp:151
void run_nd(const Window &window, const ThreadInfo &info, const Window &thread_locator) override
legacy compatibility layer for implemantions which do not support thread_locator In these cases we si...
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:157
Describe a multidimensional execution window.
Definition: Window.h:39
size_t get_mws(const CPUInfo &platform, size_t thread_count) const override
Return minimum workload size of the relevant kernel.
ndcoord_t to_ndcoord(const arm_compute::Window &win)
Convert an arm_compute::Window to an arm_gemm::NDCoord of the same max dimensions.
void configure(arm_gemm::GemmCommon< TypeInput, TypeOutput > *kernel, std::string kernel_name_tag)
Initialise the kernel&#39;s input and output.