Compute Library
 22.11
ClFloorKernelComponent.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
27 #include "arm_compute/core/Error.h"
31 
32 namespace arm_compute
33 {
34 namespace experimental
35 {
36 namespace dynamic_fusion
37 {
39 {
40  return ComponentType::Simple;
41 }
42 std::set<std::string> ClFloorKernelComponent::get_headers_list() const
43 {
44  return std::set<std::string> { "common/experimental/gemm_fused_post_ops/fp_mixed_precision_helpers.h", "tile_helpers.h" };
45 }
47 {
48  const ITensorInfo *src_info = _blueprint->impl().get_kernel_argument_info(_src.arg_id);
49  ITensorInfo *dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
50 
51  ARM_COMPUTE_ERROR_ON_NULLPTR(src_info, dst_info);
52  auto_init_if_empty(*dst_info, src_info->tensor_shape(), 1, src_info->data_type());
53 
55  // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) and upper dimensions unchanged
56  // This is in line with the collapsing convention used by Conv2d
57  output_shape.collapse(2U, 1U);
58  const unsigned int vector_size_byte_opencl = 16;
59  const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / dst_info->element_size(), dst_info->dimension(0));
60  Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
61 
62  return win;
63 }
65 {
66  bool is_root = _blueprint->impl().group(_src.arg_id) == SharedVarGroup::Argument;
67  if(is_root)
68  {
69  return R"_(
70  //------------------ START KERNEL {{meta_kernel_id}} FLOOR ---------------------
71  // IN_0(src) {{src}}
72  // OUT(dst, accum) {{dst}}
73  TILE({{DATA_TYPE}}, M0, N0, {{dst}});
74  {
75  TILE({{DATA_TYPE}}, M0, N0, src_tile);
76 
77  // Since mout maps to dimensions 1 (y) and dimension 2 (z) of the input tensor because of the collapsed window, bout maps to dimension 3 (w)
78  {{src}}_offset_first_element_in_bytes += bout * {{src}}_stride_w;
79  T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{src}}, cout, mout, 1, {{src}}_stride_y, src_tile);
80 
81  T_FLOOR({{DATA_TYPE}}, M0, N0, src_tile, {{dst}});
82  }
83  //------------------ END KERNEL {{meta_kernel_id}} FLOOR ---------------------
84 )_";
85  }
86  else
87  {
88  return R"_(
89  //------------------ START KERNEL {{meta_kernel_id}} FLOOR ---------------------
90  // IN_0/Out(Accumulator) {{acc}}
91  // output = floor(input)
92  {
93  T_FLOOR({{DATA_TYPE}}, M0, N0, {{acc}}, {{acc}});
94  }
95  //------------------ END KERNEL {{meta_kernel_id}} FLOOR ---------------------
96 )_";
97  }
98 }
100 {
101  CLBuildOptions build_opts{};
102  const auto n0 = _blueprint->impl().get_execution_window().x().step();
103  const auto m0 = _blueprint->impl().get_execution_window().y().step();
104  const auto dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
105  const unsigned int partial_store_n0 = dst_info->dimension(0) % n0;
106  build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
107  build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
108  build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0));
109  return build_opts;
110 }
112 {
113  auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
114  std::string config_id{};
115  config_id += lower_string(string_from_data_type(t_dst_info->data_type()));
116  config_id += "_";
117  config_id += support::cpp11::to_string(t_dst_info->dimension(0));
118  config_id += "_";
119  config_id += support::cpp11::to_string(t_dst_info->dimension(1));
120  config_id += "_";
121  config_id += lower_string(string_from_data_layout(t_dst_info->data_layout()));
122  return config_id;
123 }
125 {
126  vtable.add(_src, _blueprint->impl().group(_src.arg_id), ClKernelArgDescriptor(_src.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "src");
127  vtable.add(_dst, _blueprint->impl().group(_dst.arg_id), ClKernelArgDescriptor(_dst.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "dst");
128 }
130 {
131  TagLUT lut{};
132  const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
133  // Arguments and global shared variables
134  const bool is_root = _blueprint->impl().group(_src.arg_id) == SharedVarGroup::Argument;
135 
136  if(is_root)
137  {
138  lut["src"] = vtable.get(_src);
139  lut["dst"] = vtable.get(_dst);
140  }
141  else
142  {
143  lut["acc"] = vtable.get(_src);
144  }
145 
146  lut["meta_kernel_id"] = id();
147  lut["DATA_TYPE"] = get_cl_type_from_data_type(t_dst_info->data_type());
148  return lut;
149 }
150 } // namespace dynamic_fusion
151 } // namespace experimental
152 } // namespace arm_compute
153 #endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
Shape of a tensor.
Definition: TensorShape.h:39
SharedVar get(const SharedVarLink &var_link) const
Get the SharedVar associated with var_link.
Definition: Common.h:206
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
std::string to_string(T &&value)
Convert integer and float values to string.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
std::string lower_string(const std::string &val)
Lower a given string.
Definition: Utils.cpp:353
Copyright (c) 2017-2022 Arm Limited.
Describes all the info required to add a kernel argument at run time.
Definition: ClWorkload.h:70
void add_option(std::string option)
Adds option to the existing build option list.
const std::string & string_from_data_type(DataType dt)
Convert a data type identity into a string.
Definition: Utils.cpp:135
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
A table of all the variables used in the kernel / blueprint Because we limit the DependencyGraph in t...
Definition: Common.h:92
Class to describe a number of elements in each dimension.
Definition: Steps.h:40
std::string generate_config_id() const override
Generate config id of the component.
unsigned int num_elems_processed_per_iteration
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
Definition: CLHelpers.cpp:39
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual TagLUT get_tag_lut(const SharedVarTable &vtable) const override
Get the tag look-up table used to instantiate the component code.
virtual size_t element_size() const =0
Element size in bytes calculated as data_size() * num_channels()
TensorInfo src_info(src_shape, 1, data_type)
const std::string & string_from_data_layout(DataLayout dl)
Convert a data layout identity into a string.
Definition: Utils.cpp:123
virtual void allocate_shared_vars(SharedVarTable &vtable) const override
Allocate all shared variables used by the component in the vtable.
void add(SharedVarLink var_link, SharedVarGroup group, ClKernelArgDescriptor runtime_desc, const std::string &name="unnamed")
Create a SharedVar for a corresponding SharedVarLink (contains ArgumentID).
Definition: Common.h:153
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:157
unsigned int adjust_vec_size(unsigned int vec_size, size_t dim0)
Returns the adjusted vector size in case it is less than the input&#39;s first dimension, getting rounded down to its closest valid vector size.
Definition: Utils.h:1222
Describe a multidimensional execution window.
Definition: Window.h:39
void collapse(size_t n, size_t first=0)
Collapse the first n dimensions.
Definition: TensorShape.h:133