Compute Library
 22.05
arm_compute::experimental::dynamic_fusion Namespace Reference

Data Structures

class  AddContent
 
struct  AddDescriptor
 Descriptor for Addition operation. More...
 
struct  AuxMemoryInfo
 Memory Info for a WorkloadTensor of Auxiliary memory type. More...
 
struct  ClActivationKernelDescriptor
 
struct  ClAddKernel
 
class  ClAuxTensorData
 Holder of any auxiliary CLTensors required by a ClWorkload. More...
 
struct  ClCodeBuilderContext
 All information required for building the ClKernelCode. More...
 
class  ClCompositeKernel
 
class  ClCompositeOperator
 Operator runtime to run a ClWorkload. More...
 
struct  ClDirectConv2dKernel
 
struct  ClDirectConv2dKernelDescriptor
 
class  ClDirectConvolutionKernelComponent
 
class  ClElementwiseAddKernelComponent
 
struct  ClEltwiseAddKernelDescriptor
 
struct  ClExecutionDescriptor
 Descriptor containing information required to run a single ClWorkload. More...
 
struct  ClFusedKernelGraph
 
struct  ClKernel
 
struct  ClKernelArgDescriptor
 Describes all the info required to add a kernel argument at run time. More...
 
class  ClKernelBlueprint
 Intermediate representation of the final, complete kernel source. More...
 
struct  ClKernelCode
 Contains kernel code to be compiled and run in a ClUnitWorkload. More...
 
struct  ClKernelConfig
 Configurations for ClKernel. More...
 
struct  ClKernelFusionGroup
 A const view of a subgraph of the ClKernelGraph to be fused together. More...
 
struct  ClKernelGraph
 
struct  ClKernelTensor
 
class  ClStoreBlockBoundaryAwareKernelComponent
 
class  ClStoreIndirectWidthSelectKernelComponent
 
struct  ClUnitWorkload
 The basic atomic unit in a ClWorkload. More...
 
struct  ClWorkload
 Workload for Cl backend. More...
 
struct  ClWorkloadContext
 Context (device capabilities, platform details) associated with a ClWorkload. More...
 
struct  ClWorkloadTensor
 A descriptor of ClWorkload Tensors. More...
 
struct  Conv2dContent
 
struct  Conv2dDescriptor
 Descriptor for Conv2dDescriptor operation. More...
 
class  DependencyGraph
 The dependency graph of a workload, where the nodes are of 2 types: Tensor or Operator Represented as a doubly-linked adjacency list with the differentiation between source and destination. More...
 
struct  GpuInfo
 GPU information for ClWorkloadContext. More...
 
class  IClKernelComponent
 
class  ITensorDescPack
 
struct  IWorkload
 Run-time-agnostic, platform-specific graph that describes everything required to run a workload It can be configured into an Arm Compute Library runtime, integrated into the runtime of another framework, or integrated into the compilation flow. More...
 
class  Operator
 Operator Handle This can be used to further modify an existing operator. More...
 
struct  OperatorContent
 
class  OperatorGraph
 Graph of operators to execute within a Workload. More...
 
class  OpTensor
 Operator Tensor Handle This can be either an argument tensor, or an intermediate tensor linking 2 Operator s. More...
 
struct  OpTensorContent
 
struct  Padding2D
 Padding information for 2D operations like Conv2dDescriptor. More...
 
struct  SharedVarLink
 Specifies a shared variable link for a component. More...
 
class  SharedVarTable
 A table of all the variables used in the kernel / blueprint Because we limit the DependencyGraph in the blueprint to a Linear Sequence for now, we only allow ** a single global variable (the accumulator) **. More...
 
class  TensorPackMap
 Map a kernel (as identified by its unit workload id) to its corresponding tensor pack. More...
 
struct  TileDescriptor
 Component: Store. More...
 
struct  UnitWorkload
 The basic atomic unit in an IWorkload. More...
 
struct  UnitWorkloadStage
 Describes when a Unit Workload is run. More...
 
struct  WorkloadTensor
 A descriptor for IWorkload Tensors. More...
 

Typedefs

using ClKernelArgList = std::map< int, ClKernelArgDescriptor >
 
using AuxMemoryLifetime = MemoryLifetime
 
using OpTensorBinding = std::map< OpTensor, ITensor * >
 Map OpTensor handles to their corresponding ITensor memory. More...
 
using ArgumentID = DependencyGraph::Id
 
using ComponentID = DependencyGraph::Id
 
using ComponentList = std::vector< ComponentID >
 
using ComponentUniquePtr = std::unique_ptr< IClKernelComponent >
 
using Id = DependencyGraph::Id
 

Enumerations

enum  ClKernelTensorArgType : int {
  Scalar, Vector, Image, Image_Reinterpret_As_3D,
  Image_Export_To_ClImage2D, Image_3D, Image_3D_Export_To_ClImage2D, Tensor_3D,
  Tensor_4D, Tensor_4D_t_Buffer, Tensor_4D_t_Image
}
 Verbose and explicit way to enumerate all the tensor arguments variants used by all kernel implementations. More...
 
enum  MemoryType { Core = 0, Auxiliary = 1 }
 Type of memory used by a Workload Tensor. More...
 
enum  SharedVarIO { Input, Output }
 We introduce the concept of Shared Variables in the context of kernel building. More...
 
enum  SharedVarGroup { Argument, Automatic }
 
enum  ComponentType { Simple, Complex, Store }
 
enum  ClippingStrategy { TOP_LEFT, TOP_RIGHT, BOTTOM_LEFT, BOTTOM_RIGHT }
 
enum  StoreType {
  VStore, VStorePartial, StoreRow, ConvertStoreRow,
  StoreBlock, ConvertStoreBlock, StoreRowPartial, StoreBlockPartial,
  StoreBlockBoundaryAware, StoreVectorSelect, TStoreIndirectWidthSelect
}
 
enum  Complexity { Simple, Complex }
 
enum  OperatorComplexity { Complex = 0, Simple }
 

Functions

Status build (ClWorkload &workload, const OperatorGraph &op_graph, const ClWorkloadContext &ctx)
 Build a ClWorkload from an OperatorGraph. More...
 
template<typename T >
bool is_in (const T &v, const std::vector< T > &vec)
 
Status validate (const OperatorGraph &op_graph)
 Return the validity of op_graph, usually after performing an operation (e.g. More...
 
bool operator< (const OpTensor &t0, const OpTensor &t1)
 Provide order of OpTensor by checking if t0 is "lower than" t1. More...
 
OpTensor add_tensor (OperatorGraph &graph, ITensorInfo &info)
 Associate a TensorInfo with a newly created OpTensor in the graph. More...
 
bool operator< (const Operator &op0, const Operator &op1)
 Provide order of Operator by checking if op0 is "lower than" op1. More...
 
Operator add_op_conv2d (OperatorGraph &graph, const Conv2dDescriptor &desc, OpTensor input, OpTensor weights, OpTensor bias, OpTensor dst)
 Add op Conv2d to graph. More...
 
Operator add_op_conv2d (OperatorGraph &graph, const Conv2dDescriptor &desc, OpTensor input, OpTensor weights, OpTensor dst)
 
void force_conv2d_method (OperatorGraph &graph, Operator conv2d, ConvolutionMethod method)
 (Only for Debuging and Testing) Force a conv2d method More...
 
Operator add_op_elementwise_add (OperatorGraph &graph, const AddDescriptor &desc, OpTensor lhs, OpTensor rhs, OpTensor dst)
 Add op Add to graph, and optionally describes fusion through passing of intermediate OpTensor s. More...
 
bool operator== (const OpTensor &t0, const OpTensor &t1)
 
bool operator== (const Padding2D &pad0, const Padding2D &pad1)
 
bool operator== (const Conv2dDescriptor &conv2d0, const Conv2dDescriptor &conv2d1)
 
bool operator== (const AddDescriptor &, const AddDescriptor &)
 
Status bind_tensors (ClAuxTensorData &aux_tensor_data, TensorPackMap &prepare_pack_map, TensorPackMap &run_pack_map, const ClWorkload &workload, const OpTensorBinding &op_tensors)
 Bind tensor memory to packs used by prepare and run methods. More...
 
Status add_tensor (ClKernelBlueprint &kernel_blueprint, ITensorInfo *tensor_info, ArgumentID &id, ArgumentID merge_point)
 
Status add_kcomp_eltwise_add (ClKernelBlueprint &, const ClEltwiseAddKernelDescriptor &, ArgumentID src0_id, ArgumentID src1_id, ArgumentID &dst_id)
 Component: Eltwise Add. More...
 
Status add_kcomp_activation (ClKernelBlueprint &, const ClActivationKernelDescriptor &, ArgumentID src_id, ArgumentID &dst_id)
 Component: Activation. More...
 
Status add_kcomp_direct_conv2d (ClKernelBlueprint &, const ClDirectConv2dKernelDescriptor &, ArgumentID src_id, ArgumentID weight_id, ArgumentID bias_id, ArgumentID &dst_id)
 Component: Direct Convolution. More...
 
Status add_kcomp_store (ClKernelBlueprint &kernel_blueprint, const StoreType &store_type, ArgumentID src_tile, ArgumentID dst_tile)
 
Status update_merge_point (ClKernelBlueprint &, ArgumentID t_id, ArgumentID merge_point)
 Update existing merge tensor merge_point to point to t_id. More...
 
Status set_tile_info (ClKernelBlueprint &bp, const TileDescriptor &tile_info)
 
Status build (ClKernelCode &code, const ClCodeBuilderContext &, ClKernelBlueprint &)
 Build final kernel source from KernelBlueprint. More...
 
DependencyGraph get_dependency_graph (const ClKernelBlueprint &blueprint)
 Get dependency graph. More...
 
Status tune_static (ClExecutionDescriptor &, const ClKernelCode &)
 
bool export_to_cl_image_support (const ITensorInfo *tensor, GPUTarget gpu_target, DataLayout data_layout)
 
inline ::std::ostream & operator<< (::std::ostream &os, const CLBuildOptions::StringSet &build_opts)
 
inline ::std::ostream & operator<< (::std::ostream &os, const CLBuildOptions &cl_build_opts)
 
std::string to_string (const CLBuildOptions &cl_build_opts)
 
inline ::std::ostream & operator<< (::std::ostream &os, const ClKernelCode &code)
 
std::string to_string (const ClKernelCode &code)
 
std::vector< const ClKernel * > traverse (const ClKernelFusionGroup &group)
 
std::vector< const ClKernelFusionGroup * > traverse (const ClFusedKernelGraph &graph)
 
std::vector< ClKernelFusionGroup * > traverse (ClFusedKernelGraph &graph)
 
std::pair< Status, ClFusedKernelGraphinit_fusion_graph (const ClKernelGraph &kernel_graph)
 
Status fuse (ClFusedKernelGraph &fused_kernel_graph)
 
Status generate_store (ClKernelBlueprint &bp, const ClFusedKernelGraph &fused_kernel_graph, const ClKernelFusionGroup &fg)
 
Status generate (ClWorkload &workload, const ClWorkloadContext &ctx, const ClFusedKernelGraph &fused_kernel_graph)
 
std::vector< const ClKernel * > traverse (const ClKernelGraph &graph)
 
std::vector< ClKernel * > traverse (ClKernelGraph &graph)
 
std::vector< const OperatorContent * > traverse (const OperatorGraph::Implementation &graph)
 
std::vector< OperatorContent * > traverse (OperatorGraph::Implementation &graph)
 
Status translate (ClKernelGraph &kernel_graph, const OperatorGraph::Implementation &op_graph)
 

Typedef Documentation

◆ ArgumentID

Definition at line 41 of file ClKernelBuildingAPI.h.

◆ AuxMemoryLifetime

Definition at line 65 of file IWorkload.h.

◆ ClKernelArgList

using ClKernelArgList = std::map<int, ClKernelArgDescriptor>

Definition at line 87 of file ClWorkload.h.

◆ ComponentID

Definition at line 254 of file Common.h.

◆ ComponentList

using ComponentList = std::vector<ComponentID>

Definition at line 255 of file Common.h.

◆ ComponentUniquePtr

using ComponentUniquePtr = std::unique_ptr<IClKernelComponent>

Definition at line 413 of file Common.h.

◆ Id

Definition at line 230 of file ClKernelGraph.h.

◆ OpTensorBinding

using OpTensorBinding = std::map<OpTensor, ITensor *>

Map OpTensor handles to their corresponding ITensor memory.

Definition at line 44 of file ClCompositeOperator.h.

Enumeration Type Documentation

◆ ClippingStrategy

◆ ClKernelTensorArgType

enum ClKernelTensorArgType : int
strong

Verbose and explicit way to enumerate all the tensor arguments variants used by all kernel implementations.

This avoids any ambiguity in what kernel arguments are passed

Enumerator
Scalar 
Vector 
Image 
Image_Reinterpret_As_3D 
Image_Export_To_ClImage2D 
Image_3D 
Image_3D_Export_To_ClImage2D 
Tensor_3D 
Tensor_4D 
Tensor_4D_t_Buffer 
Tensor_4D_t_Image 

Definition at line 46 of file ClWorkload.h.

◆ Complexity

◆ ComponentType

◆ MemoryType

enum MemoryType
strong

Type of memory used by a Workload Tensor.

Enumerator
Core 

Core memory used by the Workload Tensor, e.g.

for argument tensors

Auxiliary 

Auxiliary memory required by the Workload Tensor, e.g.

for temporary tensors

Definition at line 59 of file IWorkload.h.

60 {
61  Core = 0, /**< Core memory used by the Workload Tensor, e.g. for argument tensors */
62  Auxiliary = 1, /**< Auxiliary memory required by the Workload Tensor, e.g. for temporary tensors */
63 };
Core memory used by the Workload Tensor, e.g.
Auxiliary memory required by the Workload Tensor, e.g.

◆ OperatorComplexity

◆ SharedVarGroup

enum SharedVarGroup
strong
Enumerator
Argument 
Automatic 

Definition at line 65 of file Common.h.

66 {
67  Argument, // Parameters to a kernel function == dst or src tensors of the whole blueprint graph
68  Automatic // Automatic variables declared within the kernel body == intermediate tensors of the whole blueprint graph
69 };

◆ SharedVarIO

enum SharedVarIO
strong

We introduce the concept of Shared Variables in the context of kernel building.

They are variables that can be accessed / shared among all the kernel components within a single kernel. For now we consider 2 groups of shared variables: Argument: The argument variables (parameters) of a kernel Automatic: The automatic variables declared inside a kernel All Shared Variables have the same kernel scope, and are thus visible to all kernel components

Enumerator
Input 
Output 

Definition at line 59 of file Common.h.

◆ StoreType

Function Documentation

◆ add_kcomp_activation()

Status add_kcomp_activation ( ClKernelBlueprint ,
const ClActivationKernelDescriptor ,
ArgumentID  ,
ArgumentID  
)

Component: Activation.

Definition at line 70 of file ClKernelBuildingAPI.cpp.

71 {
72  return Status{};
73 }

◆ add_kcomp_direct_conv2d()

Status add_kcomp_direct_conv2d ( ClKernelBlueprint kernel_blueprint,
const ClDirectConv2dKernelDescriptor direct_conv2d_desc,
ArgumentID  src_id,
ArgumentID  weight_id,
ArgumentID  bias_id,
ArgumentID dst_id 
)

Component: Direct Convolution.

Definition at line 75 of file ClKernelBuildingAPI.cpp.

References ClKernelBlueprint::Implementation::add_component(), ClKernelBlueprint::impl(), Input, and Output.

Referenced by ClDirectConv2dKernel::generate().

78 {
79  kernel_blueprint.impl().add_component(
80  std::make_unique<ClDirectConvolutionKernelComponent>(
81  &kernel_blueprint,
82  direct_conv2d_desc,
83  SharedVarLink{ src_id, SharedVarIO::Input },
84  SharedVarLink{ weight_id, SharedVarIO::Input },
85  SharedVarLink{ dst_id, SharedVarIO::Output },
86  SharedVarLink{ bias_id, SharedVarIO::Input }));
87 
88  return Status{};
89 }

◆ add_kcomp_eltwise_add()

Status add_kcomp_eltwise_add ( ClKernelBlueprint kernel_blueprint,
const ClEltwiseAddKernelDescriptor ,
ArgumentID  src0_id,
ArgumentID  src1_id,
ArgumentID dst_id 
)

Component: Eltwise Add.

Definition at line 58 of file ClKernelBuildingAPI.cpp.

References ClKernelBlueprint::Implementation::add_component(), ClKernelBlueprint::impl(), Input, and Output.

Referenced by ClAddKernel::generate().

60 {
61  kernel_blueprint.impl().add_component(
62  std::make_unique<ClElementwiseAddKernelComponent>(
63  &kernel_blueprint,
64  SharedVarLink{ src0_id, SharedVarIO::Input },
65  SharedVarLink{ src1_id, SharedVarIO::Input },
66  SharedVarLink{ dst_id, SharedVarIO::Output }));
67 
68  return Status{};
69 }

◆ add_kcomp_store()

Status add_kcomp_store ( ClKernelBlueprint kernel_blueprint,
const StoreType store_type,
ArgumentID  src_tile,
ArgumentID  dst_tile 
)

Definition at line 91 of file ClKernelBuildingAPI.cpp.

References ClKernelBlueprint::Implementation::add_component(), ARM_COMPUTE_ERROR, ClKernelBlueprint::impl(), Input, Output, StoreBlockBoundaryAware, and TStoreIndirectWidthSelect.

Referenced by generate_store().

92 {
93  switch(store_type)
94  {
95  case StoreType::StoreBlockBoundaryAware:
96  kernel_blueprint.impl().add_component(
97  std::make_unique<ClStoreBlockBoundaryAwareKernelComponent>(
98  &kernel_blueprint,
99  SharedVarLink{ src_tile, SharedVarIO::Input },
100  SharedVarLink{ dst_tile, SharedVarIO::Output }));
101  break;
102  case StoreType::TStoreIndirectWidthSelect:
103  kernel_blueprint.impl().add_component(
104  std::make_unique<ClStoreIndirectWidthSelectKernelComponent>(
105  &kernel_blueprint,
106  SharedVarLink{ src_tile, SharedVarIO::Input },
107  SharedVarLink{ dst_tile, SharedVarIO::Output }));
108  break;
109  default:
110  ARM_COMPUTE_ERROR("Store mode not yet supported.");
111  }
112 
113  return Status{};
114 }
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352

◆ add_op_conv2d() [1/2]

Operator add_op_conv2d ( OperatorGraph graph,
const Conv2dDescriptor desc,
OpTensor  input,
OpTensor  weights,
OpTensor  bias,
OpTensor  dst 
)

Add op Conv2d to graph.

Parameters
[in,out]graphOperatorGraph where the operator is added to
[in]descOperator descriptor
[in]inputInput OpTensor
[in]weightsWeights OpTensor
[in]bias(Optional) bias OpTensor
[in]dstDestination OpTensor
Returns
Operator
Examples:
dynamic_fusion/cl_fused_conv2d_elementwise_add.cpp.

Definition at line 134 of file OperatorGraph.cpp.

References arm_compute::ACL_DST_0, arm_compute::ACL_SRC_0, arm_compute::ACL_SRC_1, arm_compute::ACL_SRC_2, ITensorDescPack< TDesc >::add_const_tensor(), OperatorGraph::Implementation::add_node(), DependencyGraph::add_operator(), arm_compute::auto_init_if_empty(), Padding2D::bottom, arm_compute::misc::shape_calculator::compute_deep_convolution_shape(), arm_compute::FLOOR, DependencyGraph::get_root_ops(), OpTensorContent::get_tensor_info(), OperatorGraph::Implementation::graph, OpTensor::id(), OperatorGraph::impl(), Padding2D::left, Conv2dDescriptor::pad, Padding2D::right, arm_compute::RUNTIME_ERROR, arm_compute::test::validation::shape, arm_compute::test::validation::src, OperatorGraph::Implementation::status, Conv2dDescriptor::stride, OperatorGraph::Implementation::tensors, Padding2D::top, ITensorInfo::total_size(), Size2D::x(), and Size2D::y().

Referenced by add_op_conv2d(), and arm_compute::test::validation::TEST_CASE().

135 {
136  // Check if map is empty as a complex operator can only be root
137  if(!graph.impl()->graph.get_root_ops().empty())
138  {
139  graph.impl()->status = Status{ ErrorCode::RUNTIME_ERROR, "Cannot add multiple complex operators" };
140  return Operator{};
141  }
142 
143  std::pair<Status, DependencyGraph::Id> status_id;
144 
145  if(bias.id() == -1)
146  {
147  status_id = graph.impl()->graph.add_operator({ input.id(), weights.id() }, { dst.id() });
148  }
149  else
150  {
151  status_id = graph.impl()->graph.add_operator({ input.id(), weights.id(), bias.id() }, { dst.id() });
152  }
153 
154  check_dependency_graph_op_success(graph, status_id.first);
155 
156  Operator op_node(status_id.second);
157 
158  // Infer TensorInfo
159  OpTensorContent *dst_tensor = graph.impl()->tensors[dst.id()].get();
160  if(dst_tensor->get_tensor_info()->total_size() == 0)
161  {
162  auto src = graph.impl()->tensors[input.id()]->get_tensor_info();
163  auto wts = graph.impl()->tensors[weights.id()]->get_tensor_info();
164  auto shape = misc::shape_calculator::compute_deep_convolution_shape(src->tensor_shape(), src->data_layout(), wts->tensor_shape(), PadStrideInfo(desc.stride.x(), desc.stride.y(), desc.pad.left,
165  desc.pad.right,
166  desc.pad.top, desc.pad.bottom, DimensionRoundingType::FLOOR)); // use the default DimensionRoundingType
167 
168  auto_init_if_empty(*(dst_tensor->get_tensor_info()), src->clone()->set_tensor_shape(shape));
169  }
170 
171  // Check execution space
172  auto dst_info = dst_tensor->get_tensor_info();
173  check_execution_shape(graph, *dst_info);
174 
175  ITensorDescPack<OpTensorContent> tensors;
176  tensors.add_const_tensor(ACL_SRC_0, graph.impl()->tensors[input.id()].get());
177  tensors.add_const_tensor(ACL_SRC_1, graph.impl()->tensors[weights.id()].get());
178  if(bias.id() != -1)
179  {
180  tensors.add_const_tensor(ACL_SRC_2, graph.impl()->tensors[bias.id()].get());
181  }
182  tensors.add_const_tensor(ACL_DST_0, graph.impl()->tensors[dst.id()].get());
183 
184  graph.impl()->add_node<Conv2dContent>(status_id.second, desc, tensors);
185  check_multiple_roots(graph);
186 
187  return op_node;
188 }
SimpleTensor< float > src
Definition: DFT.cpp:155
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
TensorShape compute_deep_convolution_shape(const TensorShape &input_shape, DataLayout input_data_layout, const TensorShape &weights_shape, const PadStrideInfo &conv_info)
Calculate the deep convolution shape output shape of a tensor.
const int32_t * bias

◆ add_op_conv2d() [2/2]

Operator add_op_conv2d ( OperatorGraph graph,
const Conv2dDescriptor desc,
OpTensor  input,
OpTensor  weights,
OpTensor  dst 
)

Definition at line 190 of file OperatorGraph.cpp.

References add_op_conv2d().

191 {
192  return add_op_conv2d(graph, desc, input, weights, OpTensor(-1), dst);
193 }
Operator add_op_conv2d(OperatorGraph &graph, const Conv2dDescriptor &desc, OpTensor input, OpTensor weights, OpTensor bias, OpTensor dst)
Add op Conv2d to graph.

◆ add_op_elementwise_add()

Operator add_op_elementwise_add ( OperatorGraph graph,
const AddDescriptor desc,
OpTensor  lhs,
OpTensor  rhs,
OpTensor  dst 
)

Add op Add to graph, and optionally describes fusion through passing of intermediate OpTensor s.

Parameters
[in,out]graphOperatorGraph where the operator is added to
[in]descOperator descriptor
[in]lhsLhs OpTensor
[in]rhsRhs OpTensor
[in]dstDestination OpTensor
Returns
Operator
Examples:
dynamic_fusion/cl_fused_conv2d_elementwise_add.cpp.

Definition at line 201 of file OperatorGraph.cpp.

References arm_compute::ACL_DST_0, arm_compute::ACL_SRC_0, arm_compute::ACL_SRC_1, ITensorDescPack< TDesc >::add_const_tensor(), OperatorGraph::Implementation::add_node(), DependencyGraph::add_operator(), arm_compute::auto_init_if_empty(), ITensorInfo::broadcast_shape_and_valid_region(), OpTensorContent::get_tensor_info(), OperatorGraph::Implementation::graph, OpTensor::id(), OperatorGraph::impl(), OperatorGraph::Implementation::tensors, and ITensorInfo::total_size().

Referenced by arm_compute::test::validation::TEST_CASE().

202 {
203  auto id = graph.impl()->graph.add_operator({ rhs.id(), lhs.id() }, { dst.id() });
204  check_dependency_graph_op_success(graph, id.first);
205 
206  Operator op_node(id.second);
207 
208  // Infer TensorInfo
209  auto node_lhs = graph.impl()->tensors[lhs.id()]->get_tensor_info();
210  auto node_rhs = graph.impl()->tensors[rhs.id()]->get_tensor_info();
211  OpTensorContent *node_dst = graph.impl()->tensors[dst.id()].get();
212 
213  if(node_dst->get_tensor_info()->total_size() == 0)
214  {
215  const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*node_rhs, *node_lhs);
216  auto_init_if_empty(*(node_dst->get_tensor_info()), node_lhs->clone()->set_tensor_shape(broadcast_pair.first));
217  }
218 
219  // Check execution space
220  auto dst_info = node_dst->get_tensor_info();
221  check_execution_shape(graph, *dst_info);
222 
223  ITensorDescPack<OpTensorContent> tensors;
224  tensors.add_const_tensor(ACL_SRC_0, graph.impl()->tensors[lhs.id()].get());
225  tensors.add_const_tensor(ACL_SRC_1, graph.impl()->tensors[rhs.id()].get());
226  tensors.add_const_tensor(ACL_DST_0, graph.impl()->tensors[dst.id()].get());
227  graph.impl()->add_node<AddContent>(id.second, desc, tensors);
228  check_multiple_roots(graph);
229 
230  return op_node;
231 }
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...

◆ add_tensor() [1/2]

Status add_tensor ( ClKernelBlueprint kernel_blueprint,
ITensorInfo tensor_info,
ArgumentID id,
ArgumentID  merge_point 
)

Definition at line 52 of file ClKernelBuildingAPI.cpp.

References ClKernelBlueprint::Implementation::add_kernel_tensor(), and ClKernelBlueprint::impl().

53 {
54  id = kernel_blueprint.impl().add_kernel_tensor(tensor_info, merge_point);
55  return Status{};
56 }

◆ add_tensor() [2/2]

OpTensor add_tensor ( OperatorGraph graph,
ITensorInfo info 
)

Associate a TensorInfo with a newly created OpTensor in the graph.

Note
info needs to remain in scope and valid until the workload has finished building
Can pass in an empty TensorInfo for a destination Tensor, in which case info will be inferred from the source tensors
Parameters
[in,out]graphOperatorGraph where the tensor is added
[in]infoTensorInfo to be associated
Returns
OpTensor
Examples:
dynamic_fusion/cl_fused_conv2d_elementwise_add.cpp.

Definition at line 126 of file OperatorGraph.cpp.

References DependencyGraph::add_tensor(), OperatorGraph::Implementation::add_tensor(), OperatorGraph::Implementation::graph, and OperatorGraph::impl().

Referenced by ClDirectConv2dKernel::generate(), ClAddKernel::generate(), generate_store(), and arm_compute::test::validation::TEST_CASE().

127 {
128  auto id = graph.impl()->graph.add_tensor();
129  OpTensor op_tensor(id);
130  graph.impl()->add_tensor(id, &info);
131  return op_tensor;
132 }
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)

◆ bind_tensors()

Status bind_tensors ( ClAuxTensorData aux_tensor_data,
TensorPackMap prepare_pack_map,
TensorPackMap run_pack_map,
const ClWorkload workload,
const OpTensorBinding op_tensors 
)

Bind tensor memory to packs used by prepare and run methods.

Create auxiliary tensor objects and their memory requirements if needed

Note
This is the only method for external user to create ClAuxTensorData, and the prepare and run TensorPackMaps
Parameters
[out]aux_tensor_dataAuxiliary Tensors required by the workload
[out]prepare_pack_mapTensorPackMap used by the prepare method
[out]run_pack_mapTensorPackMap used by the run method
[in]workloadClWorkload to bind the tensors to
[in]op_tensorsCLTensor memory objects mapped from Core OpTensors
Returns
Status
Examples:
dynamic_fusion/cl_fused_conv2d_elementwise_add.cpp.

Definition at line 104 of file ClCompositeOperator.cpp.

References ClAuxTensorData::add_aux_tensor(), ARM_COMPUTE_CREATE_ERROR, ARM_COMPUTE_RETURN_ON_ERROR, Auxiliary, Core, ClWorkload::op_tensor_id_lut, arm_compute::RUNTIME_ERROR, and ClWorkload::tensors.

Referenced by arm_compute::test::validation::TEST_CASE().

105 {
106  for(auto tensor : workload.tensors)
107  {
108  const auto wk_tensor_id = tensor.first; // workload tensor id
109  ICLTensor *tensor_object = nullptr;
110  if(tensor.second.memory_type == MemoryType::Core)
111  {
112  const auto op_tensor_id = workload.op_tensor_id_lut.at(wk_tensor_id);
113  auto op_tensor_find = op_tensors.find(op_tensor_id);
114  if(op_tensor_find == op_tensors.end())
115  {
116  return ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Cannot find binding for some operator tensor");
117  }
118  tensor_object = utils::cast::polymorphic_downcast<ICLTensor *>(op_tensor_find->second);
119  }
120  else if(tensor.second.memory_type == MemoryType::Auxiliary)
121  {
122  // Create aux tensor CLTensor object
123  const TensorInfo tensor_info = *tensor.second.info;
124  const auto memory_info = tensor.second.memory_info;
125  tensor_object = aux_tensor_data.add_aux_tensor(wk_tensor_id, tensor_info, memory_info);
126  }
127  else
128  {
129  return ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Unsupported tensor memory type");
130  }
131 
132  const auto st = add_tensor_to_tensor_pack(wk_tensor_id, tensor_object, workload, prepare_pack_map, run_pack_map);
134  }
135  return Status{};
136 }
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
#define ARM_COMPUTE_CREATE_ERROR(error_code, msg)
Creates an error with a given message.
Definition: Error.h:159

◆ build() [1/2]

Status build ( ClKernelCode code,
const ClCodeBuilderContext ,
ClKernelBlueprint kernel_blueprint 
)

Build final kernel source from KernelBlueprint.

Definition at line 126 of file ClKernelBuildingAPI.cpp.

References ClKernelCode::arguments, ClKernelBlueprint::Implementation::build_code(), ClKernelBlueprint::Implementation::build_config_id(), ClKernelBlueprint::Implementation::build_kernel_name(), ClKernelCode::build_options, ClKernelBlueprint::Implementation::build_options(), ClKernelCode::code, ClKernelCode::config_id, ClKernelBlueprint::Implementation::finalize(), ClKernelBlueprint::Implementation::get_arguments(), ClKernelBlueprint::Implementation::get_execution_window(), ClKernelBlueprint::impl(), ClKernelCode::name, and ClKernelCode::window.

127 {
128  kernel_blueprint.impl().finalize();
129  code.name = kernel_blueprint.impl().build_kernel_name();
130  code.code = kernel_blueprint.impl().build_code();
131 
132  code.config_id = kernel_blueprint.impl().build_config_id();
133  code.build_options = kernel_blueprint.impl().build_options();
134  code.window = kernel_blueprint.impl().get_execution_window();
135  code.arguments = kernel_blueprint.impl().get_arguments();
136 
137  return Status{};
138 }

◆ build() [2/2]

Status build ( ClWorkload workload,
const OperatorGraph op_graph,
const ClWorkloadContext ctx 
)

Build a ClWorkload from an OperatorGraph.

Parameters
[out]workload
[in]op_graph
[in]ctx
Returns
Status
Examples:
dynamic_fusion/cl_fused_conv2d_elementwise_add.cpp.

Definition at line 36 of file ClWorkload.cpp.

References ARM_COMPUTE_RETURN_ON_ERROR, ClWorkload::context, DependencyGraph::dst_tensors(), fuse(), generate(), DependencyGraph::get_merge_points(), IWorkload::graph, OperatorGraph::Implementation::graph, ClKernelGraph::graph, OperatorGraph::impl(), init_fusion_graph(), ClWorkload::op_tensor_id_lut, ClFusedKernelGraph::original_graph, DependencyGraph::src_tensors(), ClWorkload::status, translate(), and validate().

Referenced by Program::binary(), generate(), and arm_compute::test::validation::TEST_CASE().

37 {
38  workload.context = ctx;
39  ClKernelGraph kernel_graph;
40  workload.status = validate(op_graph);
42  workload.status = translate(kernel_graph, *op_graph.impl());
44  ClFusedKernelGraph fused_k_graph;
45  std::tie(workload.status, fused_k_graph) = init_fusion_graph(kernel_graph);
47  workload.status = fuse(fused_k_graph);
49  workload.status = generate(workload, ctx, fused_k_graph);
51 
52  // Get operator tensor id to workload tensor id map
53  const auto op_tensor_to_kernel_tensor = fused_k_graph.original_graph->graph.get_merge_points();
54  const auto kernel_tensor_to_workload_tensor = workload.graph.get_merge_points();
55  for(const auto op_t : op_graph.impl()->graph.src_tensors())
56  {
57  const auto kernel_t = op_tensor_to_kernel_tensor.at(op_t);
58  const auto workload_t = kernel_tensor_to_workload_tensor.at(kernel_t);
59  workload.op_tensor_id_lut[workload_t] = op_t;
60  }
61  for(const auto op_t : op_graph.impl()->graph.dst_tensors())
62  {
63  const auto kernel_t = op_tensor_to_kernel_tensor.at(op_t);
64  const auto workload_t = kernel_tensor_to_workload_tensor.at(kernel_t);
65  workload.op_tensor_id_lut[workload_t] = op_t;
66  }
67  return workload.status;
68 }
Status validate(const OperatorGraph &op_graph)
Return the validity of op_graph, usually after performing an operation (e.g.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
Status translate(ClKernelGraph &kernel_graph, const OperatorGraph::Implementation &op_graph)
Status generate(ClWorkload &workload, const ClWorkloadContext &ctx, const ClFusedKernelGraph &fused_kernel_graph)
Status fuse(ClFusedKernelGraph &fused_kernel_graph)
std::pair< Status, ClFusedKernelGraph > init_fusion_graph(const ClKernelGraph &kernel_graph)

◆ export_to_cl_image_support()

bool arm_compute::experimental::dynamic_fusion::export_to_cl_image_support ( const ITensorInfo tensor,
GPUTarget  gpu_target,
DataLayout  data_layout 
)

Definition at line 251 of file ClDirectConvolutionKernelComponent.cpp.

References ITensorInfo::data_type(), arm_compute::G71, CLKernelLibrary::get(), arm_compute::get_arch_from_target(), arm_compute::get_cl_image_pitch_alignment(), CLKernelLibrary::get_device(), arm_compute::image2d_from_buffer_supported(), arm_compute::is_data_type_float(), arm_compute::MIDGARD, arm_compute::NHWC, and ITensorInfo::tensor_shape().

Referenced by ClDirectConvolutionKernelComponent::allocate_shared_vars(), ClDirectConv2dKernel::configure(), ClDirectConvolutionKernelComponent::generate_build_options(), and ClDirectConv2dKernel::run_op().

252 {
253  if(tensor->tensor_shape()[0] % 4 || (data_layout != DataLayout::NHWC))
254  {
255  return false;
256  }
257 
258  // If not floating point
259  if(!is_data_type_float(tensor->data_type()))
260  {
261  return false;
262  }
263 
264  if(gpu_target == GPUTarget::G71 || get_arch_from_target(gpu_target) == GPUTarget::MIDGARD)
265  {
266  return false;
267  }
268 
269  // Check if the cl_khr_image2d_from_buffer extension is supported on the target platform
270  if(!image2d_from_buffer_supported(CLKernelLibrary::get().get_device()))
271  {
272  return false;
273  }
274 
275  // Check cl image pitch alignment
276  if(get_cl_image_pitch_alignment(CLKernelLibrary::get().get_device()) == 0)
277  {
278  return false;
279  }
280 
281  const size_t image_w = tensor->tensor_shape()[0] / 4;
282  const size_t image_h = tensor->tensor_shape()[1] * tensor->tensor_shape()[2] * tensor->tensor_shape()[3];
283  const size_t max_image_w = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_WIDTH>();
284  const size_t max_image_h = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_HEIGHT>();
285 
286  if(image_w > max_image_w || image_h > max_image_h)
287  {
288  return false;
289  }
290 
291  return true;
292 }
bool image2d_from_buffer_supported(const cl::Device &device)
Helper function to check whether the cl_khr_image2d_from_buffer extension is supported.
Definition: CLHelpers.cpp:370
GPUTarget get_arch_from_target(GPUTarget target)
Helper function to get the GPU arch.
Definition: GPUTarget.cpp:199
size_t get_cl_image_pitch_alignment(const cl::Device &device)
Helper function to get the cl_image pitch alignment in pixels.
Definition: CLHelpers.cpp:375
bool is_data_type_float(DataType dt)
Check if a given data type is of floating point type.
Definition: Utils.h:1010

◆ force_conv2d_method()

void force_conv2d_method ( OperatorGraph graph,
Operator  conv2d,
ConvolutionMethod  method 
)

(Only for Debuging and Testing) Force a conv2d method

Parameters
[in,out]graphOperatorGraph where conv2d op is located
[in]conv2dConv2d Op
[in]methodForced ConvolutionMethod
Examples:
dynamic_fusion/cl_fused_conv2d_elementwise_add.cpp.

Definition at line 195 of file OperatorGraph.cpp.

References Operator::id(), OperatorGraph::impl(), and OperatorGraph::Implementation::operators.

Referenced by arm_compute::test::validation::TEST_CASE().

196 {
197  auto node = utils::cast::polymorphic_downcast<Conv2dContent *>(graph.impl()->operators[conv2d.id()].get());
198  node->set_method(method);
199 }

◆ fuse()

Status fuse ( ClFusedKernelGraph fused_kernel_graph)

Definition at line 96 of file ClFusedKernelGraph.cpp.

References ClFusedKernelGraph::can_fuse(), ClFusedKernelGraph::fuse(), and traverse().

Referenced by build().

97 {
98  // A naive fusion algorithm that's guaranteed to find optimal pattern if there are no branches
99  // If there are branches, the algorithm cannot guanrantee optimality as it doesn't perform any searches
100 
101  bool fusion_found = false;
102  do
103  {
104  fusion_found = false;
105  const auto sorted_fgs = traverse(fused_kernel_graph);
106  if(sorted_fgs.size() <= 1)
107  {
108  // Only one or zero fusion group, thus no need to perform fusion
109  return Status{};
110  }
111  auto fgs_combo = get_combinations(sorted_fgs);
112  for(auto fgs : fgs_combo)
113  {
114  auto fg0 = fgs.first;
115  auto fg1 = fgs.second;
116  const auto st = fused_kernel_graph.can_fuse(*fg0, *fg1);
117  if(bool(st))
118  {
119  const auto st = fused_kernel_graph.fuse(*fg0, *fg1);
120  if(!bool(st))
121  {
122  return st;
123  }
124  fusion_found = true;
125  break;
126  }
127  }
128  }
129  while(fusion_found);
130  return Status{};
131 }
std::vector< ClKernelFusionGroup * > traverse(ClFusedKernelGraph &graph)

◆ generate()

Status generate ( ClWorkload workload,
const ClWorkloadContext ctx,
const ClFusedKernelGraph fused_kernel_graph 
)

Definition at line 170 of file ClFusedKernelGraph.cpp.

References ClWorkload::add_unit_workload(), ClWorkload::add_workload_tensor(), build(), ClWorkload::context, DependencyGraph::dst_tensors(), ClFusedKernelGraph::fg_dependency, generate_store(), get_dependency_graph(), ClKernelGraph::get_tensor(), ClWorkloadContext::gpu_info, ClFusedKernelGraph::original_graph, set_tile_info(), DependencyGraph::src_tensors(), and traverse().

Referenced by build(), ClAddKernel::ClAddKernel(), ClDirectConv2dKernel::ClDirectConv2dKernel(), and ClKernel::ClKernel().

171 {
172  workload.context = ctx;
173  for(const auto &fg : traverse(fused_kernel_graph))
174  {
175  ClKernelBlueprint bp{};
176  for(const auto &kernel : traverse(*fg))
177  {
178  const auto st = kernel->generate(bp);
179  if(!bool(st))
180  {
181  return st;
182  }
183  }
184  auto st = set_tile_info(bp, fg->get_root_kernel()->config().tile_desc);
185  if(!bool(st))
186  {
187  return st;
188  }
189  st = generate_store(bp, fused_kernel_graph, *fg);
190  if(!bool(st))
191  {
192  return st;
193  }
194 
195  ClKernelCode code{};
196  st = build(code, ClCodeBuilderContext{ ctx.gpu_info }, bp);
197  if(!bool(st))
198  {
199  return st;
200  }
201  const auto bp_graph = get_dependency_graph(bp);
202 
203  // Get tensor info
204  std::vector<Id> workload_src_tensors{};
205  for(const auto &src_t_id : fused_kernel_graph.fg_dependency.src_tensors(fg->id))
206  {
207  const auto src_t = fused_kernel_graph.original_graph->get_tensor(src_t_id);
208  // Get corresponding kernel arg descriptor
209  const auto arg_desc = code.arguments.at(bp_graph.get_merge_points().at(src_t->id));
210  const auto kernel_t_id = workload.add_workload_tensor(src_t->desc, src_t->memory_type, src_t->memory_info, arg_desc, src_t->id);
211  workload_src_tensors.push_back(kernel_t_id);
212  }
213  std::vector<Id> workload_dst_tensors{};
214  for(const auto &dst_t_id : fused_kernel_graph.fg_dependency.dst_tensors(fg->id))
215  {
216  const auto dst_t = fused_kernel_graph.original_graph->get_tensor(dst_t_id);
217  // Get corresponding kernel arg descriptor
218  const auto arg_desc = code.arguments.at(bp_graph.get_merge_points().at(dst_t->id));
219  const auto kernel_t_id = workload.add_workload_tensor(dst_t->desc, dst_t->memory_type, dst_t->memory_info, arg_desc, dst_t->id);
220  workload_dst_tensors.push_back(kernel_t_id);
221  }
222 
223  workload.add_unit_workload(fg->get_root_kernel()->config().stage, code, workload_src_tensors, workload_dst_tensors);
224  }
225 
226  return Status{};
227 }
DependencyGraph get_dependency_graph(const ClKernelBlueprint &blueprint)
Get dependency graph.
std::vector< ClKernelFusionGroup * > traverse(ClFusedKernelGraph &graph)
Status build(ClWorkload &workload, const OperatorGraph &op_graph, const ClWorkloadContext &ctx)
Build a ClWorkload from an OperatorGraph.
Definition: ClWorkload.cpp:36
Status set_tile_info(ClKernelBlueprint &bp, const TileDescriptor &tile_info)
Status generate_store(ClKernelBlueprint &bp, const ClFusedKernelGraph &fused_kernel_graph, const ClKernelFusionGroup &fg)

◆ generate_store()

Status generate_store ( ClKernelBlueprint bp,
const ClFusedKernelGraph fused_kernel_graph,
const ClKernelFusionGroup fg 
)

NOTE: dst tensor must have already been added to the blueprint at this point

NOTE: the extra dst tensor is needed as the store kcomp requires 2 tensors. But this is irrelevant to the fused kernel graph since both tensors share the exact same info and kernel arg descriptor

NOTE: Update the merge point map to link dst_dst_id with dst_t->id instead. This is required because the get_arguments() returned by the blueprint returns the dst tensor added by the store component

Definition at line 132 of file ClFusedKernelGraph.cpp.

References add_kcomp_store(), add_tensor(), ClKernel::config(), DependencyGraph::dst_tensors(), ClFusedKernelGraph::fg_dependency, ClKernelFusionGroup::get_root_kernel(), ClKernelGraph::get_tensor(), ClKernelFusionGroup::id, ClFusedKernelGraph::original_graph, ClKernelConfig::store_type, and update_merge_point().

Referenced by generate().

133 {
134  Status st{};
135  for(const auto &dst_t_id : fused_kernel_graph.fg_dependency.dst_tensors(fg.id))
136  {
137  const auto dst_t = fused_kernel_graph.original_graph->get_tensor(dst_t_id);
138 
139  /// NOTE: dst tensor must have already been added to the blueprint at this point
140  ArgumentID dst_id;
141  st = add_tensor(bp, dst_t->desc, dst_id, dst_t->id);
142  if(!bool(st))
143  {
144  return st;
145  }
146  /// NOTE: the extra dst tensor is needed as the store kcomp requires 2 tensors. But this is irrelevant to the fused kernel graph
147  /// since both tensors share the exact same info and kernel arg descriptor
148  ArgumentID dst_dst_id;
149  st = add_tensor(bp, dst_t->desc, dst_dst_id);
150  if(!bool(st))
151  {
152  return st;
153  }
154  /// NOTE: Update the merge point map to link dst_dst_id with dst_t->id instead.
155  /// This is required because the get_arguments() returned by the blueprint returns the dst tensor added by the store component
156  st = update_merge_point(bp, dst_dst_id, dst_t->id);
157  if(!bool(st))
158  {
159  return st;
160  }
161  st = add_kcomp_store(bp, fg.get_root_kernel()->config().store_type, dst_id, dst_dst_id);
162  if(!bool(st))
163  {
164  return st;
165  }
166  }
167  return st;
168 }
Status update_merge_point(ClKernelBlueprint &bp, ArgumentID t_id, ArgumentID merge_point)
Update existing merge tensor merge_point to point to t_id.
OpTensor add_tensor(OperatorGraph &graph, ITensorInfo &info)
Associate a TensorInfo with a newly created OpTensor in the graph.
Status add_kcomp_store(ClKernelBlueprint &kernel_blueprint, const StoreType &store_type, ArgumentID src_tile, ArgumentID dst_tile)

◆ get_dependency_graph()

DependencyGraph get_dependency_graph ( const ClKernelBlueprint blueprint)

Get dependency graph.

Returns
DependencyGraph

Definition at line 139 of file ClKernelBuildingAPI.cpp.

References ClKernelBlueprint::Implementation::get_graph(), and ClKernelBlueprint::impl().

Referenced by generate().

140 {
141  return blueprint.impl().get_graph();
142 }

◆ init_fusion_graph()

std::pair< Status, ClFusedKernelGraph > init_fusion_graph ( const ClKernelGraph kernel_graph)

Definition at line 83 of file ClFusedKernelGraph.cpp.

References ClFusedKernelGraph::original_graph, and traverse().

Referenced by build().

84 {
85  ClFusedKernelGraph fused_kernel_graph{};
86  fused_kernel_graph.original_graph = &kernel_graph; // Create a copy of the original kernel graph
87  fused_kernel_graph.fg_dependency = DependencyGraph();
88  // Initialize all fusion groups
89  for(const auto &kernel : traverse(kernel_graph))
90  {
91  fused_kernel_graph.add_fusion_group({ kernel });
92  }
93  return { Status{}, fused_kernel_graph };
94 }
std::vector< ClKernelFusionGroup * > traverse(ClFusedKernelGraph &graph)

◆ is_in()

bool arm_compute::experimental::dynamic_fusion::is_in ( const T &  v,
const std::vector< T > &  vec 
)

Definition at line 41 of file DependencyGraph.h.

References arm_compute::mlgo::parser::end().

Referenced by ClFusedKernelGraph::can_fuse(), ClFusedKernelGraph::fuse(), and DependencyGraph::path_exists_from_op_to_op().

42 {
43  return std::find(std::begin(vec), std::end(vec), v) != std::end(vec);
44 }
void end(TokenStream &in, bool &valid)
Definition: MLGOParser.cpp:290

◆ operator<() [1/2]

bool operator< ( const OpTensor t0,
const OpTensor t1 
)

Provide order of OpTensor by checking if t0 is "lower than" t1.

Parameters
[in]t0OpTensor
[in]t1OpTensor
Returns
true if t0 is lower than t1
false otherwise

Definition at line 84 of file OperatorGraph.cpp.

References OpTensor::id().

85 {
86  return t0.id() < t1.id();
87 }

◆ operator<() [2/2]

bool operator< ( const Operator op0,
const Operator op1 
)

Provide order of Operator by checking if op0 is "lower than" op1.

Parameters
[in]op0Operator
[in]op1Operator
Returns
true if op0 is lower than op1
false otherwise

Definition at line 99 of file OperatorGraph.cpp.

References Operator::id().

100 {
101  return op0.id() < op1.id();
102 }

◆ operator<<() [1/3]

inline ::std::ostream& arm_compute::experimental::dynamic_fusion::operator<< ( ::std::ostream &  os,
const CLBuildOptions::StringSet &  build_opts 
)

Definition at line 37 of file Utils.h.

38 {
39  for(const auto &opt : build_opts)
40  {
41  os << opt << ",";
42  }
43  return os;
44 }

◆ operator<<() [2/3]

inline ::std::ostream& arm_compute::experimental::dynamic_fusion::operator<< ( ::std::ostream &  os,
const CLBuildOptions cl_build_opts 
)

Definition at line 45 of file Utils.h.

References CLBuildOptions::options().

46 {
47  os << cl_build_opts.options();
48  return os;
49 }

◆ operator<<() [3/3]

inline ::std::ostream& arm_compute::experimental::dynamic_fusion::operator<< ( ::std::ostream &  os,
const ClKernelCode code 
)

Definition at line 57 of file Utils.h.

References ClKernelCode::build_options, ClKernelCode::code, and ClKernelCode::name.

58 {
59  os << "name: " << code.name << std::endl;
60  os << "code: " << code.code << std::endl;
61  os << "build_opts: " << code.build_options << std::endl;
62  return os;
63 }

◆ operator==() [1/4]

bool operator== ( const OpTensor t0,
const OpTensor t1 
)

Definition at line 103 of file OperatorGraphImpl.cpp.

References OpTensor::id().

Referenced by AddContent::AddContent(), ClAddKernel::ClAddKernel(), ClDirectConv2dKernel::ClDirectConv2dKernel(), ClKernel::ClKernel(), Conv2dContent::Conv2dContent(), and OperatorContent::OperatorContent().

104 {
105  return std::make_tuple(t0.id()) == std::make_tuple(t1.id());
106 }

◆ operator==() [2/4]

bool operator== ( const Padding2D pad0,
const Padding2D pad1 
)

Definition at line 107 of file OperatorGraphImpl.cpp.

References Padding2D::bottom, Padding2D::left, Padding2D::right, and Padding2D::top.

108 {
109  return std::make_tuple(pad0.top, pad0.right, pad0.bottom, pad0.left) == std::make_tuple(pad1.top, pad1.right, pad1.bottom, pad1.left);
110 }

◆ operator==() [3/4]

bool operator== ( const Conv2dDescriptor conv2d0,
const Conv2dDescriptor conv2d1 
)

Definition at line 111 of file OperatorGraphImpl.cpp.

References Conv2dDescriptor::dilation, Conv2dDescriptor::pad, and Conv2dDescriptor::stride.

112 {
113  return std::make_tuple(conv2d0.pad, conv2d0.stride, conv2d0.dilation) == std::make_tuple(conv2d1.pad, conv2d1.stride, conv2d1.dilation);
114 }

◆ operator==() [4/4]

bool operator== ( const AddDescriptor ,
const AddDescriptor  
)

Definition at line 116 of file OperatorGraphImpl.cpp.

117 {
118  return std::make_tuple() == std::make_tuple(); // Currently two Add ops are always the same
119 }

◆ set_tile_info()

Status set_tile_info ( ClKernelBlueprint bp,
const TileDescriptor tile_info 
)

Definition at line 121 of file ClKernelBuildingAPI.cpp.

References ClKernelBlueprint::impl(), and ClKernelBlueprint::Implementation::set_tile_info().

Referenced by generate().

122 {
123  bp.impl().set_tile_info(tile_info);
124  return Status{};
125 }

◆ to_string() [1/2]

std::string arm_compute::experimental::dynamic_fusion::to_string ( const CLBuildOptions cl_build_opts)
inline

Definition at line 51 of file Utils.h.

References caffe_data_extractor::str.

Referenced by ClKernelBlueprint::Implementation::get_graph(), ClElementwiseAddKernelComponent::name(), and ClDirectConvolutionKernelComponent::name().

52 {
53  std::stringstream str;
54  str << cl_build_opts;
55  return str.str();
56 }

◆ to_string() [2/2]

std::string arm_compute::experimental::dynamic_fusion::to_string ( const ClKernelCode code)
inline

Definition at line 64 of file Utils.h.

References caffe_data_extractor::str.

65 {
66  std::stringstream str;
67  str << code;
68  return str.str();
69 }

◆ translate()

Status translate ( ClKernelGraph kernel_graph,
const OperatorGraph::Implementation op_graph 
)

Definition at line 373 of file OperatorGraphImpl.cpp.

References ARM_COMPUTE_RETURN_ON_ERROR, and traverse().

Referenced by build(), AddContent::complexity(), OperatorContent::OperatorContent(), and Conv2dContent::set_method().

374 {
375  for(const auto &op : traverse(op_graph))
376  {
377  const auto st = op->translate(kernel_graph);
379  }
380  return Status{};
381 }
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
std::vector< OperatorContent * > traverse(OperatorGraph::Implementation &graph)

◆ traverse() [1/7]

std::vector< const ClKernel * > traverse ( const ClKernelFusionGroup group)

Definition at line 50 of file ClFusedKernelGraph.cpp.

References ClKernelFusionGroup::fused_kernels, ClKernelFusionGroup::graph, arm_compute::test::validation::pack, and DependencyGraph::topological_sort().

Referenced by ClKernelBlueprint::Implementation::build_config_id(), ClKernelBlueprint::Implementation::build_kernel_name(), ClKernelBlueprint::Implementation::build_options(), fuse(), ClFusedKernelGraph::fuse(), generate(), ClKernelBlueprint::Implementation::get_graph(), init_fusion_graph(), and translate().

51 {
52  std::vector<const ClKernel *> kernels;
53  const auto sorted = group.graph.topological_sort();
54  for(const auto &pack : sorted.second)
55  {
56  kernels.push_back(group.fused_kernels.at(pack.op));
57  }
58  return kernels;
59 }

◆ traverse() [2/7]

std::vector< const ClKernelFusionGroup * > traverse ( const ClFusedKernelGraph graph)

Definition at line 61 of file ClFusedKernelGraph.cpp.

References ClFusedKernelGraph::fg_dependency, ClFusedKernelGraph::fusion_groups, arm_compute::test::validation::pack, and DependencyGraph::topological_sort().

62 {
63  std::vector<const ClKernelFusionGroup *> kernels;
64  const auto sorted = graph.fg_dependency.topological_sort();
65  for(const auto &pack : sorted.second)
66  {
67  kernels.push_back(graph.fusion_groups.at(pack.op).get());
68  }
69  return kernels;
70 }

◆ traverse() [3/7]

std::vector< ClKernelFusionGroup * > traverse ( ClFusedKernelGraph graph)

Definition at line 72 of file ClFusedKernelGraph.cpp.

References ClFusedKernelGraph::fg_dependency, ClFusedKernelGraph::fusion_groups, arm_compute::test::validation::pack, and DependencyGraph::topological_sort().

73 {
74  std::vector<ClKernelFusionGroup *> kernels;
75  const auto sorted = graph.fg_dependency.topological_sort();
76  for(const auto &pack : sorted.second)
77  {
78  kernels.push_back(graph.fusion_groups.at(pack.op).get());
79  }
80  return kernels;
81 }

◆ traverse() [4/7]

std::vector< const ClKernel * > traverse ( const ClKernelGraph graph)

Definition at line 195 of file ClKernelGraph.cpp.

References ClKernelGraph::graph, ClKernelGraph::kernels, arm_compute::test::validation::pack, and DependencyGraph::topological_sort().

196 {
197  std::vector<const ClKernel *> kernels;
198  const auto sorted = graph.graph.topological_sort();
199  for(const auto &pack : sorted.second)
200  {
201  kernels.push_back(graph.kernels.at(pack.op).get());
202  }
203  return kernels;
204 }

◆ traverse() [5/7]

std::vector< ClKernel * > traverse ( ClKernelGraph graph)

Definition at line 205 of file ClKernelGraph.cpp.

References ClKernelGraph::graph, ClKernelGraph::kernels, arm_compute::test::validation::pack, and DependencyGraph::topological_sort().

206 {
207  std::vector<ClKernel *> kernels;
208  const auto sorted = graph.graph.topological_sort();
209  for(const auto &pack : sorted.second)
210  {
211  kernels.push_back(graph.kernels.at(pack.op).get());
212  }
213  return kernels;
214 }

◆ traverse() [6/7]

std::vector< const OperatorContent * > traverse ( const OperatorGraph::Implementation graph)

Definition at line 351 of file OperatorGraphImpl.cpp.

References OperatorGraph::Implementation::graph, OperatorGraph::Implementation::operators, arm_compute::test::validation::pack, and DependencyGraph::topological_sort().

352 {
353  std::vector<const OperatorContent *> ops;
354  const auto sorted = graph.graph.topological_sort();
355  for(const auto &pack : sorted.second)
356  {
357  ops.push_back(graph.operators.at(pack.op).get());
358  }
359  return ops;
360 }

◆ traverse() [7/7]

std::vector< OperatorContent * > traverse ( OperatorGraph::Implementation graph)

Definition at line 362 of file OperatorGraphImpl.cpp.

References OperatorGraph::Implementation::graph, OperatorGraph::Implementation::operators, arm_compute::test::validation::pack, and DependencyGraph::topological_sort().

363 {
364  std::vector<OperatorContent *> ops;
365  const auto sorted = graph.graph.topological_sort();
366  for(const auto &pack : sorted.second)
367  {
368  ops.push_back(graph.operators.at(pack.op).get());
369  }
370  return ops;
371 }

◆ tune_static()

Status tune_static ( ClExecutionDescriptor ,
const ClKernelCode  
)

Definition at line 143 of file ClKernelBuildingAPI.cpp.

144 {
145  return Status{};
146 }

◆ update_merge_point()

Status update_merge_point ( ClKernelBlueprint ,
ArgumentID  t_id,
ArgumentID  merge_point 
)

Update existing merge tensor merge_point to point to t_id.

Parameters
t_id
merge_point
Returns
Status

Definition at line 116 of file ClKernelBuildingAPI.cpp.

References ClKernelBlueprint::impl(), and ClKernelBlueprint::Implementation::update_merge_point().

Referenced by generate_store().

117 {
118  return bp.impl().update_merge_point(t_id, merge_point);
119 }

◆ validate()

Status validate ( const OperatorGraph op_graph)

Return the validity of op_graph, usually after performing an operation (e.g.

add_tensor) on it

Parameters
[in,out]op_graphOperatorGraph to be validated
Returns
Status
Examples:
dynamic_fusion/cl_fused_conv2d_elementwise_add.cpp.

Definition at line 121 of file OperatorGraph.cpp.

References OperatorGraph::impl(), and OperatorGraph::Implementation::status.

Referenced by build(), ClAddKernel::ClAddKernel(), ClDirectConv2dKernel::ClDirectConv2dKernel(), CpuGemm::configure(), arm_compute::test::validation::DATA_TEST_CASE(), CpuDepthwiseConv2d::get_depthwiseconvolution_function(), arm_compute::test::validation::TEST_CASE(), CPPSplit< CLSlice, ICLTensor >::validate(), CpuDepthwiseConv2d::validate(), CLSynthetizeOperator< ClGemmMatrixMultiplyReshapedOnlyRhsKernel >::validate(), NESynthetizeFunction< K >::validate(), CLSynthetizeFunction< K >::validate(), NEQLSTMLayer::validate(), CLQLSTMLayer::validate(), arm_compute::graph::backends::detail::validate_arg_min_max_layer(), arm_compute::graph::backends::detail::validate_bounding_box_transform_layer(), arm_compute::graph::backends::detail::validate_channel_shuffle_layer(), arm_compute::graph::backends::detail::validate_convolution_layer(), arm_compute::graph::backends::detail::validate_depth_to_space_layer(), arm_compute::graph::backends::detail::validate_depthwise_convolution_layer(), arm_compute::graph::backends::detail::validate_dequantization_layer(), arm_compute::graph::backends::detail::validate_detection_output_layer(), arm_compute::graph::backends::detail::validate_detection_post_process_layer(), arm_compute::graph::backends::detail::validate_eltwise_Layer(), arm_compute::graph::backends::detail::validate_fused_convolution_with_post_op(), arm_compute::graph::backends::detail::validate_generate_proposals_layer(), arm_compute::graph::backends::detail::validate_l2_normalize_layer(), arm_compute::graph::backends::detail::validate_normalize_planar_yuv_layer(), arm_compute::graph::backends::detail::validate_pad_layer(), arm_compute::graph::backends::detail::validate_permute_layer(), arm_compute::graph::backends::detail::validate_prelu_layer(), arm_compute::graph::backends::detail::validate_priorbox_layer(), arm_compute::graph::backends::detail::validate_quantization_layer(), arm_compute::graph::backends::detail::validate_reduction_operation_layer(), arm_compute::graph::backends::detail::validate_reorg_layer(), arm_compute::graph::backends::detail::validate_reshape_layer(), arm_compute::graph::backends::detail::validate_roi_align_layer(), arm_compute::graph::backends::detail::validate_slice_layer(), arm_compute::graph::backends::detail::validate_strided_slice_layer(), and arm_compute::graph::backends::detail::validate_unary_eltwise_layer().

122 {
123  return graph.impl()->status;
124 }