Compute Library
 22.05
ClElementwiseAddKernelComponent.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
25 
30 
31 namespace arm_compute
32 {
33 namespace experimental
34 {
35 namespace dynamic_fusion
36 {
38 {
39  return ComponentType::Simple;
40 }
41 
43 {
44  return std::set<std::string> { "common/experimental/gemm_fused_post_ops/fp_mixed_precision_helpers.h", "tile_helpers.h" };
45 }
46 
48 {
49  const ITensorInfo *lhs_info = _blueprint->impl().get_kernel_argument_info(_lhs.arg_id);
50  const ITensorInfo *rhs_info = _blueprint->impl().get_kernel_argument_info(_rhs.arg_id);
51  ITensorInfo *dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
52 
53  ARM_COMPUTE_ERROR_ON_NULLPTR(lhs_info, rhs_info, dst_info);
54 
55  const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*lhs_info, *rhs_info);
56  const TensorShape &out_shape = broadcast_pair.first;
57 
58  auto_init_if_empty(*dst_info, out_shape, 1, lhs_info->data_type());
59 
60  const unsigned int vector_size_byte_opencl = 16;
61  const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / dst_info->element_size(), dst_info->dimension(0));
62  Window win = calculate_max_window(*dst_info, Steps(num_elems_processed_per_iteration));
63 
64  return win;
65 }
66 
68 {
69  std::string code;
70  const bool is_root = _blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Argument && _blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Argument;
71 
72  if(is_root)
73  {
74  return R"_(
75  //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_ADD ---------------------
76  // IN_0(LHS) {{lhs}}
77  // IN_1(RHS) {{rhs}}
78  // OUT(dst, accum) {{dst}}
79 
80  // dst = lhs + rhs (mix-precision, broadcast, boundary aware)
81  TILE({{DATA_TYPE}}, M0, N0, {{dst}});
82  {
83  TILE({{DATA_TYPE}}, M0, N0, lhs_tile);
84  TILE({{DATA_TYPE}}, M0, N0, rhs_tile);
85 
86  T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{lhs}}, cout, mout, 1, {{lhs}}_stride_y, lhs_tile);
87  T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{rhs}}, cout, mout, 1, {{rhs}}_stride_y, rhs_tile);
88 
89 #if defined(IS_BROADCAST)
90  T_ADD_BROADCAST_X({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}});
91 #else // !defined(IS_BROADCAST)
92  T_ADD({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}});
93 #endif // defined(IS_BROADCAST)
94 
95  }
96  //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_ADD ---------------------
97 )_";
98  }
99  else
100  {
101  return R"_(
102  //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_ADD ---------------------
103  // IN_0/Out(Accumulator) {{acc}}
104  // IN_1(Addend) {{addend}}
105 
106  // acc = addend + acc (mix-precision, broadcast, boundary aware)
107  {
108  TILE({{DATA_TYPE}}, M0, N0, addend_tile);
109 
110  T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{addend}}, cout, mout, 1, {{addend}}_stride_y, addend_tile);
111 
112 #if defined(IS_BROADCAST)
113  T_ADD_BROADCAST_X({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}});
114 #else // !defined(IS_BROADCAST)
115  T_ADD({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}});
116 #endif // defined(IS_BROADCAST)
117  }
118  //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_ADD ---------------------
119 )_";
120  }
121 }
122 
124 {
125  const auto t_src_info = _blueprint->impl().get_kernel_argument_info(_rhs.arg_id);
126  const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
127 
128  CLBuildOptions build_opts{};
129  const auto n0 = _blueprint->impl().get_execution_window().x().step();
130  const auto m0 = _blueprint->impl().get_execution_window().y().step();
131  const bool is_broadcast = t_src_info->tensor_shape() != t_dst_info->tensor_shape();
132 
133  build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
134  build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
135  build_opts.add_option_if(is_broadcast, "-DIS_BROADCAST");
136 
137  return build_opts;
138 }
139 
141 {
142  auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
143  std::string config_id{};
144  config_id += lower_string(string_from_data_type(t_dst_info->data_type()));
145  config_id += "_";
146  config_id += support::cpp11::to_string(t_dst_info->dimension(0));
147  config_id += "_";
148  config_id += support::cpp11::to_string(t_dst_info->dimension(1));
149  config_id += "_";
150  config_id += lower_string(string_from_data_layout(t_dst_info->data_layout()));
151  return config_id;
152 }
153 
155 {
156  const bool is_root = _blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Argument && _blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Argument;
157  vtable.add(_lhs, _blueprint->impl().group(_lhs.arg_id), ClKernelArgDescriptor(_lhs.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "lhs");
158  vtable.add(_rhs, _blueprint->impl().group(_rhs.arg_id), ClKernelArgDescriptor(_rhs.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "rhs");
159  if(is_root)
160  {
161  vtable.add(_dst, _blueprint->impl().group(_dst.arg_id), ClKernelArgDescriptor(_dst.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "dst");
162  }
163 }
164 
166 {
167  TagLUT lut{};
168  const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
169  // Arguments and global shared variables
170  const bool is_root = _blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Argument && _blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Argument;
171  if(is_root)
172  {
173  lut["lhs"] = vtable.get(_lhs);
174  lut["rhs"] = vtable.get(_rhs);
175  lut["dst"] = vtable.get(_dst);
176  }
177  else
178  {
179  // Determine which link is the accumulator
180  Link accumulator;
181  Link addend;
182  if(_blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Automatic)
183  {
184  accumulator = _lhs;
185  addend = _rhs;
186  }
187  else if(_blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Automatic)
188  {
189  accumulator = _rhs;
190  addend = _lhs;
191  }
192  else
193  {
194  ARM_COMPUTE_ERROR("Invalid elementwise component linking");
195  }
196  lut["acc"] = vtable.get(accumulator);
197  lut["addend"] = vtable.get(addend);
198  }
199  // Local build options
200  lut["meta_kernel_id"] = id();
201  lut["DATA_TYPE"] = get_cl_type_from_data_type(t_dst_info->data_type());
202  return lut;
203 }
204 } // namespace dynamic_fusion
205 } // namespace experimental
206 } // namespace arm_compute
207 #endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
Shape of a tensor.
Definition: TensorShape.h:39
virtual TagLUT get_tag_lut(const SharedVarTable &vtable) const override
Get the tag look-up table used to instantiate the component code.
SharedVar get(const SharedVarLink &var_link) const
Get the SharedVar associated with var_link.
Definition: Common.h:206
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
std::string to_string(T &&value)
Convert integer and float values to string.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
std::string lower_string(const std::string &val)
Lower a given string.
Definition: Utils.cpp:351
static std::pair< TensorShape, ValidRegion > broadcast_shape_and_valid_region(const Infos &... infos)
If infos are broadcast compatible tensor info&#39;s, return the broadcasted shape and the intersection of...
Definition: ITensorInfo.h:299
Copyright (c) 2017-2022 Arm Limited.
Describes all the info required to add a kernel argument at run time.
Definition: ClWorkload.h:70
const std::string & string_from_data_type(DataType dt)
Convert a data type identity into a string.
Definition: Utils.cpp:135
A table of all the variables used in the kernel / blueprint Because we limit the DependencyGraph in t...
Definition: Common.h:92
Class to describe a number of elements in each dimension.
Definition: Steps.h:40
unsigned int num_elems_processed_per_iteration
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
Definition: CLHelpers.cpp:39
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual size_t element_size() const =0
Element size in bytes calculated as data_size() * num_channels()
const std::string & string_from_data_layout(DataLayout dl)
Convert a data layout identity into a string.
Definition: Utils.cpp:123
void add(SharedVarLink var_link, SharedVarGroup group, ClKernelArgDescriptor runtime_desc, const std::string &name="unnamed")
Create a SharedVar for a corresponding SharedVarLink (contains ArgumentID).
Definition: Common.h:153
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:157
unsigned int adjust_vec_size(unsigned int vec_size, size_t dim0)
Returns the adjusted vector size in case it is less than the input&#39;s first dimension, getting rounded down to its closest valid vector size.
Definition: Utils.h:1222
virtual void allocate_shared_vars(SharedVarTable &vtable) const override
Allocate all shared variables used by the component in the vtable.
Describe a multidimensional execution window.
Definition: Window.h:39
std::string generate_config_id() const override
Generate config id of the component.