24 #ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION 33 namespace experimental
35 namespace dynamic_fusion
44 return std::set<std::string> {
"common/experimental/gemm_fused_post_ops/fp_mixed_precision_helpers.h",
"tile_helpers.h" };
51 ITensorInfo *dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
56 const TensorShape &out_shape = broadcast_pair.first;
60 const unsigned int vector_size_byte_opencl = 16;
75 //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_ADD --------------------- 78 // OUT(dst, accum) {{dst}} 80 // dst = lhs + rhs (mix-precision, broadcast, boundary aware) 81 TILE({{DATA_TYPE}}, M0, N0, {{dst}}); 83 TILE({{DATA_TYPE}}, M0, N0, lhs_tile); 84 TILE({{DATA_TYPE}}, M0, N0, rhs_tile); 86 T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{lhs}}, cout, mout, 1, {{lhs}}_stride_y, lhs_tile); 87 T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{rhs}}, cout, mout, 1, {{rhs}}_stride_y, rhs_tile); 89 #if defined(IS_BROADCAST) 90 T_ADD_BROADCAST_X({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}}); 91 #else // !defined(IS_BROADCAST) 92 T_ADD({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}}); 93 #endif // defined(IS_BROADCAST) 96 //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_ADD --------------------- 102 //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_ADD --------------------- 103 // IN_0/Out(Accumulator) {{acc}} 104 // IN_1(Addend) {{addend}} 106 // acc = addend + acc (mix-precision, broadcast, boundary aware) 108 TILE({{DATA_TYPE}}, M0, N0, addend_tile); 110 T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{addend}}, cout, mout, 1, {{addend}}_stride_y, addend_tile); 112 #if defined(IS_BROADCAST) 113 T_ADD_BROADCAST_X({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}}); 114 #else // !defined(IS_BROADCAST) 115 T_ADD({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}}); 116 #endif // defined(IS_BROADCAST) 118 //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_ADD --------------------- 125 const auto t_src_info = _blueprint->impl().get_kernel_argument_info(_rhs.
arg_id);
126 const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
129 const auto n0 = _blueprint->impl().get_execution_window().x().step();
130 const auto m0 = _blueprint->impl().get_execution_window().y().step();
131 const bool is_broadcast = t_src_info->tensor_shape() !=
t_dst_info->tensor_shape();
135 build_opts.add_option_if(is_broadcast,
"-DIS_BROADCAST");
142 auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
143 std::string config_id{};
168 const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
173 lut[
"lhs"] = vtable.
get(_lhs);
174 lut[
"rhs"] = vtable.
get(_rhs);
175 lut[
"dst"] = vtable.
get(_dst);
196 lut[
"acc"] = vtable.
get(accumulator);
197 lut[
"addend"] = vtable.
get(addend);
200 lut[
"meta_kernel_id"] =
id();
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
Window get_window() const override
virtual TagLUT get_tag_lut(const SharedVarTable &vtable) const override
Get the tag look-up table used to instantiate the component code.
SharedVar get(const SharedVarLink &var_link) const
Get the SharedVar associated with var_link.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
std::string to_string(T &&value)
Convert integer and float values to string.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
Store the tensor's metadata.
Specifies a shared variable link for a component.
std::string get_component_code() const override
std::string lower_string(const std::string &val)
Lower a given string.
static std::pair< TensorShape, ValidRegion > broadcast_shape_and_valid_region(const Infos &... infos)
If infos are broadcast compatible tensor info's, return the broadcasted shape and the intersection of...
Copyright (c) 2017-2022 Arm Limited.
Describes all the info required to add a kernel argument at run time.
std::set< std::string > get_headers_list() const override
const std::string & string_from_data_type(DataType dt)
Convert a data type identity into a string.
A table of all the variables used in the kernel / blueprint Because we limit the DependencyGraph in t...
Class to describe a number of elements in each dimension.
unsigned int num_elems_processed_per_iteration
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual size_t element_size() const =0
Element size in bytes calculated as data_size() * num_channels()
ComponentType get_component_type() const override
CLBuildOptions generate_build_options() const override
const std::string & string_from_data_layout(DataLayout dl)
Convert a data layout identity into a string.
void add(SharedVarLink var_link, SharedVarGroup group, ClKernelArgDescriptor runtime_desc, const std::string &name="unnamed")
Create a SharedVar for a corresponding SharedVarLink (contains ArgumentID).
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
unsigned int adjust_vec_size(unsigned int vec_size, size_t dim0)
Returns the adjusted vector size in case it is less than the input's first dimension, getting rounded down to its closest valid vector size.
virtual void allocate_shared_vars(SharedVarTable &vtable) const override
Allocate all shared variables used by the component in the vtable.
Describe a multidimensional execution window.
std::unordered_map< Tag, TagVal > TagLUT
std::string generate_config_id() const override
Generate config id of the component.