Functions
void	configure_transition_manager (Graph &g, GraphContext &ctx, ExecutionWorkload &workload)
	Configures transition manager and execution workload. More...

void	validate_all_nodes (Graph &g)
	Validates all nodes. More...

void	configure_all_tensors (Graph &g)
	Configures all nodes of a graph. More...

void	allocate_all_input_tensors (INode &node)
	Allocates all input tensors of a node. More...

void	allocate_all_output_tensors (INode &node)
	Allocates all output tensors of a node. More...

void	allocate_const_tensors (Graph &g)
	Allocates const tensor of a given graph. More...

void	allocate_all_tensors (Graph &g)
	Allocates all tensors of a graph. More...

ExecutionWorkload	configure_all_nodes (Graph &g, GraphContext &ctx, const std::vector< NodeID > &node_order)
	Configures all nodes of graph. More...

void	release_unused_tensors (Graph &g)
	Release the memory of all unused const nodes. More...

void	call_tensor_accessor (Tensor *tensor)
	Calls accessor of a given tensor. More...

void	call_all_const_node_accessors (Graph &g)
	Call all const node accessors. More...

bool	call_all_input_node_accessors (ExecutionWorkload &workload)
	Call all input node accessors. More...

bool	call_all_output_node_accessors (ExecutionWorkload &workload)
	Call all output node accessors. More...

void	prepare_all_tasks (ExecutionWorkload &workload)
	Prepares all tasks for execution. More...

void	call_all_tasks (ExecutionWorkload &workload)
	Executes all tasks of a workload. More...

bool	all_inputs_are_visited (const INode *node, const std::vector< bool > &visited)
	Checks if all the input dependencies of a node have been visited. More...

void	transfer_driving_nodes_and_remove_old_node (Graph &g, INode new_node, INode old_node, bool add_output_tensor)

void	fuse_convolution_with_batch_normalization (Graph &g, const Edge *output_edge)

void	fuse_depthwise_convolution_with_batch_normalization (Graph &g, const Edge *output_edge)

template<typename N >
void	fuse_node_with_activation (Graph &g, const Edge *output_edge, const std::set< Activation > &supported_fused_activations)

template<typename N >
void	fuse_pad_with_convolution (Graph &g, const Edge *output_edge)

template<typename N1 , typename N2 , typename F , typename... Args>
void	fuse_layer (Graph &g, std::function< bool(INode &)> const &prec, const F fuse_fcn, Args &&...optional_arguments)

template<typename N1 , typename F , typename... Args>
void	fuse_layer (Graph &g, std::function< bool(INode &)> const &prec, const F fuse_fcn, Args &&...optional_arguments)

Function Documentation

◆ all_inputs_are_visited()

bool arm_compute::graph::detail::all_inputs_are_visited	(	const INode *	node,
		const std::vector< bool > &	visited
	)

inline

Checks if all the input dependencies of a node have been visited.

Parameters

[in]	node	Node to check
[in]	visited	Vector that contains the visited information

Returns: True if all inputs dependencies have been visited else false

Definition at line 46 of file TopologicalSort.cpp.

 {
     ARM_COMPUTE_ERROR_ON(node == nullptr);
     const Graph *graph = node->graph();
     ARM_COMPUTE_ERROR_ON(graph == nullptr);
  
     bool are_all_visited = true;
     for (const auto &input_edge_id : node->input_edges())
     {
         if (input_edge_id != EmptyNodeID)
         {
             const Edge *input_edge = graph->edge(input_edge_id);
             ARM_COMPUTE_ERROR_ON(input_edge == nullptr);
             ARM_COMPUTE_ERROR_ON(input_edge->producer() == nullptr);
             if (!visited[input_edge->producer_id()])
             {
                 are_all_visited = false;
                 break;
             }
         }
     }
  
     return are_all_visited;
 }

References ARM_COMPUTE_ERROR_ON, Graph::edge(), arm_compute::graph::EmptyNodeID, INode::graph(), INode::input_edges(), Edge::producer(), and Edge::producer_id().

Referenced by arm_compute::graph::bfs(), and arm_compute::graph::dfs().

◆ allocate_all_input_tensors()

void allocate_all_input_tensors ( INode & node )

Allocates all input tensors of a node.

Parameters

[in] node Node to allocate the input tensor of

Definition at line 73 of file ExecutionHelpers.cpp.

 {
     for (unsigned int i = 0; i < node.num_inputs(); ++i)
     {
         Tensor *tensor = node.input(i);
         if (tensor != nullptr && !tensor->bound_edges().empty())
         {
             ARM_COMPUTE_ERROR_ON_MSG(!tensor->handle(), "Tensor handle is not configured!");
             tensor->handle()->allocate();
         }
     }
 }

References ARM_COMPUTE_ERROR_ON_MSG, INode::input(), INode::num_inputs(), and tensor.

Referenced by allocate_const_tensors().

◆ allocate_all_output_tensors()

void allocate_all_output_tensors ( INode & node )

Allocates all output tensors of a node.

Parameters

[in] node Node to allocate the output tensor of

Definition at line 86 of file ExecutionHelpers.cpp.

 {
     for (unsigned int i = 0; i < node.num_outputs(); ++i)
     {
         Tensor *tensor = node.output(i);
         if (tensor != nullptr && !tensor->bound_edges().empty())
         {
             ARM_COMPUTE_ERROR_ON_MSG(!tensor->handle(), "Tensor handle is not configured!");
             tensor->handle()->allocate();
         }
     }
 }

References ARM_COMPUTE_ERROR_ON_MSG, INode::num_outputs(), INode::output(), and tensor.

Referenced by allocate_const_tensors().

◆ allocate_all_tensors()

void allocate_all_tensors ( Graph & g )

Allocates all tensors of a graph.

Parameters

[in] g Graph to allocate the tensors

Definition at line 120 of file ExecutionHelpers.cpp.

 {
     auto &tensors = g.tensors();
  
     for (auto &tensor : tensors)
     {
         if (tensor && !tensor->bound_edges().empty() && tensor->handle() != nullptr &&
             tensor->handle()->tensor().info()->is_resizable() && tensor->handle()->tensor().is_used())
         {
             tensor->handle()->allocate();
         }
     }
 }

References tensor, and Graph::tensors().

Referenced by GraphManager::finalize_graph().

◆ allocate_const_tensors()

void allocate_const_tensors ( Graph & g )

Allocates const tensor of a given graph.

Parameters

[in] g Graph to allocate the tensors

Definition at line 99 of file ExecutionHelpers.cpp.

 {
     for (auto &node : g.nodes())
     {
         if (node != nullptr)
         {
             switch (node->type())
             {
                 case NodeType::Const:
                 case NodeType::Input:
                     allocate_all_output_tensors(*node);
                     break;
                 case NodeType::Output:
                     allocate_all_input_tensors(*node);
                 default:
                     break;
             }
         }
     }
 }

References allocate_all_input_tensors(), allocate_all_output_tensors(), arm_compute::graph::Const, arm_compute::graph::Input, Graph::nodes(), and arm_compute::graph::Output.

Referenced by GraphManager::finalize_graph().

◆ call_all_const_node_accessors()

void call_all_const_node_accessors ( Graph & g )

Call all const node accessors.

Parameters

[in] g Graph containing the const nodes

Definition at line 194 of file ExecutionHelpers.cpp.

 {
     auto &nodes = g.nodes();
  
     for (auto &node : nodes)
     {
         if (node != nullptr && node->type() == NodeType::Const && node->num_outputs())
         {
             if (!node->output(0)->bound_edges().empty())
             {
                 call_tensor_accessor(node->output(0));
             }
         }
     }
 }

References call_tensor_accessor(), arm_compute::graph::Const, and Graph::nodes().

Referenced by GraphManager::finalize_graph().

◆ call_all_input_node_accessors()

bool call_all_input_node_accessors ( ExecutionWorkload & workload )

Call all input node accessors.

Parameters

[in] workload Workload to execute

Returns: True if all the accesses were valid

Definition at line 210 of file ExecutionHelpers.cpp.

 {
     bool is_valid = true;
     std::for_each(std::begin(workload.inputs), std::end(workload.inputs),
                   [&](Tensor *input_tensor)
                   {
                       bool valid_input = (input_tensor != nullptr) && input_tensor->call_accessor();
                       is_valid         = is_valid && valid_input;
                   });
     return is_valid;
 }

References arm_compute::mlgo::parser::end(), arm_compute::utility::for_each(), ExecutionWorkload::inputs, and arm_compute::test::validation::is_valid.

Referenced by GraphManager::execute_graph().

◆ call_all_output_node_accessors()

bool call_all_output_node_accessors ( ExecutionWorkload & workload )

Call all output node accessors.

Parameters

[in] workload Workload to execute

Returns: True if all the accessors expect more data

Definition at line 261 of file ExecutionHelpers.cpp.

 {
     bool is_valid = true;
     std::for_each(std::begin(workload.outputs), std::end(workload.outputs),
                   [&](Tensor *output_tensor)
                   {
                       bool valid_output = (output_tensor != nullptr) && output_tensor->call_accessor();
                       is_valid          = is_valid && valid_output;
                   });
  
     sync_backends();
  
     return is_valid;
 }

References arm_compute::mlgo::parser::end(), arm_compute::utility::for_each(), arm_compute::test::validation::is_valid, ExecutionWorkload::outputs, and arm_compute::graph::sync_backends().

Referenced by GraphManager::execute_graph().

◆ call_all_tasks()

void call_all_tasks ( ExecutionWorkload & workload )

Executes all tasks of a workload.

Parameters

[in] workload Workload to execute

Definition at line 232 of file ExecutionHelpers.cpp.

 {
     ARM_COMPUTE_ERROR_ON(workload.ctx == nullptr);
  
     // Acquire memory for the transition buffers
     for (auto &mm_ctx : workload.ctx->memory_managers())
     {
         if (mm_ctx.second.cross_group != nullptr)
         {
             mm_ctx.second.cross_group->acquire();
         }
     }
  
     // Execute tasks
     for (auto &task : workload.tasks)
     {
         task();
     }
  
     // Release memory for the transition buffers
     for (auto &mm_ctx : workload.ctx->memory_managers())
     {
         if (mm_ctx.second.cross_group != nullptr)
         {
             mm_ctx.second.cross_group->release();
         }
     }
 }

References ARM_COMPUTE_ERROR_ON, ExecutionWorkload::ctx, GraphContext::memory_managers(), and ExecutionWorkload::tasks.

Referenced by GraphManager::execute_graph().

◆ call_tensor_accessor()

void call_tensor_accessor ( Tensor * tensor )

Calls accessor of a given tensor.

Parameters

[in] tensor The tensor of which the accessor should be called

Definition at line 188 of file ExecutionHelpers.cpp.

 {
     ARM_COMPUTE_ERROR_ON(!tensor);
     tensor->call_accessor();
 }

References ARM_COMPUTE_ERROR_ON, and tensor.

Referenced by call_all_const_node_accessors().

◆ configure_all_nodes()

ExecutionWorkload configure_all_nodes	(	Graph &	g,
		GraphContext &	ctx,
		const std::vector< NodeID > &	node_order
	)

Configures all nodes of graph.

Parameters

[in,out]	g	Graph to configure the nodes
[in]	ctx	Graph context to use
[in]	node_order	The order to configure the nodes

Returns: The execution workload

Definition at line 134 of file ExecutionHelpers.cpp.

 {
     ExecutionWorkload workload;
     workload.graph = &g;
     workload.ctx   = &ctx;
  
     // Reserve memory for tasks
     workload.tasks.reserve(node_order.size());
  
     // Create tasks
     for (auto &node_id : node_order)
     {
         auto node = g.node(node_id);
         if (node != nullptr)
         {
             Target                     assigned_target = node->assigned_target();
             backends::IDeviceBackend  &backend         = backends::BackendRegistry::get().get_backend(assigned_target);
             std::unique_ptr<IFunction> func            = backend.configure_node(*node, ctx);
             if (func != nullptr || is_utility_node(node))
             {
                 workload.tasks.emplace_back(ExecutionTask(std::move(func), node));
             }
         }
     }
  
     // Add inputs and outputs
     for (auto &node : g.nodes())
     {
         if (node != nullptr && node->type() == NodeType::Input)
         {
             workload.inputs.push_back(node->output(0));
         }
  
         if (node != nullptr && node->type() == NodeType::Output)
         {
             workload.outputs.push_back(node->input(0));
             continue;
         }
     }
  
     return workload;
 }

References INode::assigned_target(), IDeviceBackend::configure_node(), ExecutionWorkload::ctx, BackendRegistry::get(), BackendRegistry::get_backend(), ExecutionWorkload::graph, arm_compute::graph::Input, ExecutionWorkload::inputs, arm_compute::graph::is_utility_node(), Graph::node(), Graph::nodes(), arm_compute::graph::Output, ExecutionWorkload::outputs, and ExecutionWorkload::tasks.

Referenced by GraphManager::finalize_graph().

◆ configure_all_tensors()

void configure_all_tensors ( Graph & g )

Configures all nodes of a graph.

Parameters

[in] g Graph to configure

Definition at line 56 of file ExecutionHelpers.cpp.

 {
     auto &tensors = g.tensors();
  
     for (auto &tensor : tensors)
     {
         if (tensor && tensor->handle() == nullptr)
         {
             Target                         target  = tensor->desc().target;
             backends::IDeviceBackend      &backend = backends::BackendRegistry::get().get_backend(target);
             std::unique_ptr<ITensorHandle> handle  = backend.create_tensor(*tensor);
             ARM_COMPUTE_ERROR_ON_MSG(!handle, "Couldn't create backend handle!");
             tensor->set_handle(std::move(handle));
         }
     }
 }

References ARM_COMPUTE_ERROR_ON_MSG, IDeviceBackend::create_tensor(), BackendRegistry::get(), BackendRegistry::get_backend(), tensor, and Graph::tensors().

Referenced by GraphManager::finalize_graph().

◆ configure_transition_manager()

void configure_transition_manager	(	Graph &	g,
		GraphContext &	ctx,
		ExecutionWorkload &	workload
	)

Configures transition manager and execution workload.

Parameters

[in]	g	Graph to configure
[in]	ctx	Graph context
[in]	workload	Workload to configure

Definition at line 237 of file CrossLayerMemoryManagerHelpers.cpp.

 {
     // Get const tensors (un-managed)
     std::set<ITensorHandle *> const_tensors = get_const_handles(g);
  
     std::vector<TaskHandles> tasks_handles;
     TargetHandleCounter      target_handle_count;
  
     // Count handles
     for (auto &task : workload.tasks)
     {
         // Populates IO handles
         tasks_handles.push_back(get_transition_handles(ctx, task, const_tensors));
  
         // Count handles
         count_input_handles_per_target(tasks_handles.back(), target_handle_count);
     }
  
     // Setup memory managers
     for (auto &hc : target_handle_count)
     {
         MemoryManagerContext *mm_ctx = ctx.memory_management_ctx(hc.first);
         if (mm_ctx != nullptr)
         {
             if (mm_ctx->cross_mm != nullptr && mm_ctx->cross_group != nullptr)
             {
                 // Manage and allocate tensors
                 configure_handle_lifetime(tasks_handles, hc.second);
             }
         }
     }
 }

References MemoryManagerContext::cross_group, MemoryManagerContext::cross_mm, GraphContext::memory_management_ctx(), and ExecutionWorkload::tasks.

Referenced by GraphManager::finalize_graph().

◆ fuse_convolution_with_batch_normalization()

void arm_compute::graph::detail::fuse_convolution_with_batch_normalization	(	Graph &	g,
		const Edge *	output_edge
	)

Definition at line 80 of file NodeFusionMutator.cpp.

 {
     ARM_COMPUTE_ERROR_ON(output_edge == nullptr);
  
     auto *conv_node = arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(output_edge->producer());
     auto *bn_node =
         arm_compute::utils::cast::polymorphic_downcast<BatchNormalizationLayerNode *>(output_edge->consumer());
  
     // Not fusing if number of groups is greater than 1
     if (conv_node->num_groups() > 1)
     {
         return;
     }
  
     ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing convolution node with ID : "
                                   << output_edge->producer_id() << " with BatchNormalization Layer node with ID : "
                                   << output_edge->consumer_id() << std::endl);
  
     // Prevent fusion if fused node has an output accessor
     if (conv_node->output(0)->accessor() == nullptr)
     {
         const Target assigned_target = conv_node->assigned_target();
  
         // Extract conv inputs
         const auto   conv_input_id   = conv_node->input_edge(0)->producer_id();
         const auto   conv_weights_id = conv_node->input_edge(1)->producer_id();
         const auto   conv_info       = conv_node->convolution_info();
         const auto   conv_method     = conv_node->convolution_method();
         const auto   num_groups      = conv_node->num_groups();
         const auto   act_info        = bn_node->fused_activation();
         FastMathHint fast_math_hint  = conv_node->fast_math_hint();
  
         // Extract bn inputs
         const auto bn_mean_id = bn_node->input_edge(1)->producer_id();
         const auto bn_var_id  = bn_node->input_edge(2)->producer_id();
  
         const auto epsilon = bn_node->epsilon();
  
         // Create the fused node
         const NodeID fused_id = g.add_node<FusedConvolutionBatchNormalizationNode>(
             epsilon, conv_info, num_groups, conv_method, fast_math_hint, act_info);
  
         if (conv_node->input_edge(2) != nullptr)
         {
             auto conv_bias_id = conv_node->input_edge(2)->producer_id();
             g.add_connection(conv_bias_id, 0, fused_id, 2);
         }
  
         // Add connections from the conv/batch_norm inputs to the fused node
         g.add_connection(conv_input_id, 0, fused_id, 0);
         g.add_connection(conv_weights_id, 0, fused_id, 1);
         g.add_connection(bn_mean_id, 0, fused_id, 3);
         g.add_connection(bn_var_id, 0, fused_id, 4);
  
         if (bn_node->input_edge(3) != nullptr)
         {
             const auto bn_beta_id = bn_node->input_edge(3)->producer_id();
             g.add_connection(bn_beta_id, 0, fused_id, 5);
         }
  
         if (bn_node->input_edge(4) != nullptr)
         {
             const auto bn_gamma_id = bn_node->input_edge(4)->producer_id();
             g.add_connection(bn_gamma_id, 0, fused_id, 6);
         }
  
         auto fused_node   = g.node(fused_id);
         auto bn_node_name = bn_node->name();
  
         transfer_driving_nodes_and_remove_old_node(g, fused_node, bn_node, true);
  
         fused_node->set_assigned_target(assigned_target);
         fused_node->set_common_node_parameters(NodeParams{conv_node->name() + "+" + bn_node_name, assigned_target});
  
         // Remove convolution node
         g.remove_node(conv_node->id());
     }
     else
     {
         ARM_COMPUTE_LOG_GRAPH_VERBOSE(
             "Prevented fusion of convolution with batch normalization due to the presence of an output accessor\n");
     }
 }

References arm_compute::test::validation::act_info, Graph::add_connection(), Graph::add_node(), ARM_COMPUTE_ERROR_ON, ARM_COMPUTE_LOG_GRAPH_VERBOSE, Edge::consumer(), Edge::consumer_id(), arm_compute::test::validation::conv_info, arm_compute::quantization::epsilon, INode::input_edge(), INode::name(), NodeParams::name, Graph::node(), arm_compute::test::validation::num_groups, Edge::producer(), Edge::producer_id(), Graph::remove_node(), and transfer_driving_nodes_and_remove_old_node().

Referenced by NodeFusionMutator::mutate().

◆ fuse_depthwise_convolution_with_batch_normalization()

void arm_compute::graph::detail::fuse_depthwise_convolution_with_batch_normalization	(	Graph &	g,
		const Edge *	output_edge
	)

Definition at line 164 of file NodeFusionMutator.cpp.

 {
     ARM_COMPUTE_ERROR_ON(output_edge == nullptr);
  
     auto *depth_conv_node =
         arm_compute::utils::cast::polymorphic_downcast<DepthwiseConvolutionLayerNode *>(output_edge->producer());
     auto *bn_node =
         arm_compute::utils::cast::polymorphic_downcast<BatchNormalizationLayerNode *>(output_edge->consumer());
  
     ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing depthwise convolution node with ID : "
                                   << output_edge->producer_id() << " with BatchNormalization Layer node with ID : "
                                   << output_edge->consumer_id() << std::endl);
  
     // Prevent fusion if fused node has an output accessor
     if (depth_conv_node->output(0)->accessor() == nullptr)
     {
         const Target assigned_target = depth_conv_node->assigned_target();
  
         // Extract conv inputs
         const auto depth_conv_input_id = depth_conv_node->input_edge(0)->producer_id();
         const auto conv_weights_id     = depth_conv_node->input_edge(1)->producer_id();
         const auto conv_info           = depth_conv_node->convolution_info();
         const auto depth_conv_method   = depth_conv_node->depthwise_convolution_method();
         const auto depth_multiplier    = depth_conv_node->depth_multiplier();
         const auto act_info            = bn_node->fused_activation();
  
         // Extract bn inputs
         const auto bn_mean_id  = bn_node->input_edge(1)->producer_id();
         const auto bn_var_id   = bn_node->input_edge(2)->producer_id();
         const auto bn_beta_id  = bn_node->input_edge(3)->producer_id();
         const auto bn_gamma_id = bn_node->input_edge(4)->producer_id();
         const auto epsilon     = bn_node->epsilon();
  
         // Create the fused node
         const NodeID fused_id = g.add_node<FusedDepthwiseConvolutionBatchNormalizationNode>(
             epsilon, conv_info, depth_multiplier, depth_conv_method, act_info);
  
         if (depth_conv_node->input_edge(2) != nullptr)
         {
             const auto conv_bias_id = depth_conv_node->input_edge(2)->producer_id();
             g.add_connection(conv_bias_id, 0, fused_id, 2);
         }
  
         // Add connections from the conv/batch_norm inputs to the fused node
         g.add_connection(depth_conv_input_id, 0, fused_id, 0);
         g.add_connection(conv_weights_id, 0, fused_id, 1);
         g.add_connection(bn_mean_id, 0, fused_id, 3);
         g.add_connection(bn_var_id, 0, fused_id, 4);
         g.add_connection(bn_beta_id, 0, fused_id, 5);
         g.add_connection(bn_gamma_id, 0, fused_id, 6);
  
         auto fused_node   = g.node(fused_id);
         auto bn_node_name = bn_node->name();
  
         transfer_driving_nodes_and_remove_old_node(g, fused_node, bn_node, true);
  
         fused_node->set_assigned_target(assigned_target);
         fused_node->set_common_node_parameters(
             NodeParams{depth_conv_node->name() + "+" + bn_node_name, assigned_target});
  
         // Remove convolution node
         g.remove_node(depth_conv_node->id());
     }
     else
     {
         ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion of depthwise convolution with batch normalization due to the "
                                       "presence of an output accessor\n");
     }
 }

References arm_compute::test::validation::act_info, Graph::add_connection(), Graph::add_node(), ARM_COMPUTE_ERROR_ON, ARM_COMPUTE_LOG_GRAPH_VERBOSE, Edge::consumer(), Edge::consumer_id(), arm_compute::test::validation::conv_info, arm_compute::quantization::epsilon, INode::input_edge(), INode::name(), NodeParams::name, Graph::node(), Edge::producer(), Edge::producer_id(), Graph::remove_node(), and transfer_driving_nodes_and_remove_old_node().

Referenced by NodeFusionMutator::mutate().

◆ fuse_layer() [1/2]

void arm_compute::graph::detail::fuse_layer	(	Graph &	g,
		std::function< bool(INode &)> const &	prec,
		const F	fuse_fcn,
		Args &&...	optional_arguments
	)

Definition at line 320 of file NodeFusionMutator.cpp.

 {
     // Note that fused nodes may be added to the end of the node list.
     // Instead of only looping over the original list of nodes, we loop over the current node list which could be growing.
     // This is intentional as it probes the newly added fused nodes for further fusing opportunities.
     for (unsigned int i = 0; i < g.nodes().size(); ++i)
     {
         auto node = g.node(i);
         // Check if the node is of type N1 and not a branching node
         if (node && node->type() == N1::node_type && node->output_edges().size() == 1)
         {
             const auto output_edge_id = *node->output_edges().begin();
             const auto output_edge    = g.edge(output_edge_id);
  
             // Check if following node is a type N2 node
             if ((output_edge != nullptr) && (output_edge->consumer() != nullptr) &&
                 (output_edge->consumer()->type() == N2::node_type) && prec(*output_edge->producer()))
             {
                 fuse_fcn(g, output_edge, optional_arguments...);
             }
         }
     }
 }

References Graph::edge(), Graph::node(), Graph::nodes(), and INode::output_edges().

◆ fuse_layer() [2/2]

void arm_compute::graph::detail::fuse_layer	(	Graph &	g,
		std::function< bool(INode &)> const &	prec,
		const F	fuse_fcn,
		Args &&...	optional_arguments
	)

Definition at line 345 of file NodeFusionMutator.cpp.

 {
     // Note that fused nodes may be added to the end of the node list.
     // Instead of only looping over the original list of nodes, we loop over the current node list which could be growing.
     // This is intentional as it probes the newly added fused nodes for further fusing opportunities.
     for (unsigned int i = 0; i < g.nodes().size(); ++i)
     {
         auto node = g.node(i);
         // Check if the node is of type N1 and not a branching node
         if (node && node->type() == N1::node_type && node->output_edges().size() == 1)
         {
             const auto output_edge_id = *node->output_edges().begin();
             const auto output_edge    = g.edge(output_edge_id);
  
             // Check if it's the correct target
             if ((output_edge != nullptr) && (output_edge->consumer() != nullptr) && prec(*output_edge->producer()))
             {
                 fuse_fcn(g, output_edge, i, optional_arguments...);
             }
         }
     }
 }

References Graph::edge(), Graph::node(), Graph::nodes(), and INode::output_edges().

◆ fuse_node_with_activation()

void arm_compute::graph::detail::fuse_node_with_activation	(	Graph &	g,
		const Edge *	output_edge,
		const std::set< Activation > &	supported_fused_activations
	)

Definition at line 235 of file NodeFusionMutator.cpp.

 {
     ARM_COMPUTE_ERROR_ON(output_edge == nullptr);
  
     auto *n_node   = arm_compute::utils::cast::polymorphic_downcast<N *>(output_edge->producer());
     auto *act_node = arm_compute::utils::cast::polymorphic_downcast<ActivationLayerNode *>(output_edge->consumer());
  
     ARM_COMPUTE_ERROR_ON(act_node->output(0) == nullptr || n_node->output(0) == nullptr);
  
     // Check if activation is supported for fusion
     if (supported_fused_activations.count(act_node->activation_info().activation()) == 0)
     {
         return;
     }
  
     // EltwiseLayerNode can only be fused when dataype is float
     if (n_node->type() == NodeType::EltwiseLayer && !is_data_type_float(n_node->output(0)->desc().data_type))
     {
         return;
     }
  
     ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing node with ID : " << output_edge->producer_id()
                                                            << " with Activation Layer node with ID : "
                                                            << output_edge->consumer_id() << std::endl);
  
     // Prevent fusion if fused node has an output accessor
     if (n_node->output(0)->accessor() == nullptr)
     {
         // Set activation info to fused node
         n_node->set_fused_activation(act_node->activation_info());
  
         transfer_driving_nodes_and_remove_old_node(g, n_node, act_node, false);
     }
     else
     {
         ARM_COMPUTE_LOG_GRAPH_VERBOSE(
             "Prevented fusion of node with activation due to the presence of an output accessor\n");
     }
 }

References ARM_COMPUTE_ERROR_ON, ARM_COMPUTE_LOG_GRAPH_VERBOSE, Edge::consumer(), Edge::consumer_id(), arm_compute::graph::EltwiseLayer, arm_compute::is_data_type_float(), Edge::producer(), Edge::producer_id(), and transfer_driving_nodes_and_remove_old_node().

◆ fuse_pad_with_convolution()

void arm_compute::graph::detail::fuse_pad_with_convolution	(	Graph &	g,
		const Edge *	output_edge
	)

Definition at line 278 of file NodeFusionMutator.cpp.

 {
     auto *pad_node  = arm_compute::utils::cast::polymorphic_downcast<PadLayerNode *>(output_edge->producer());
     auto *conv_node = arm_compute::utils::cast::polymorphic_downcast<N *>(output_edge->consumer());
  
     const Edge *input_edge = pad_node->input_edge(0);
     if (input_edge != nullptr && input_edge->tensor() != nullptr && pad_node->output(0)->accessor() == nullptr &&
         pad_node->pad_value().get<float>() == 0.0)
     {
         const DataLayout  layout       = input_edge->tensor()->desc().layout;
         const PaddingList padding_list = pad_node->padding();
  
         const unsigned int height_index = get_dimension_idx(layout, DataLayoutDimension::HEIGHT);
         const unsigned int width_index  = get_dimension_idx(layout, DataLayoutDimension::WIDTH);
  
         const PaddingInfo pad_w = width_index < padding_list.size() ? padding_list[width_index] : PaddingInfo(0, 0);
         const PaddingInfo pad_h = height_index < padding_list.size() ? padding_list[height_index] : PaddingInfo(0, 0);
  
         if (is_padding_in_height_or_width(layout, padding_list))
         {
             // Add paddings to the convolution node
             const PadStrideInfo conv_info = conv_node->convolution_info();
             const PadStrideInfo new_conv_info(conv_info.stride().first, conv_info.stride().second,
                                               conv_info.pad_left() + pad_w.first, conv_info.pad_right() + pad_w.second,
                                               conv_info.pad_top() + pad_h.first, conv_info.pad_bottom() + pad_h.second,
                                               conv_info.round());
             conv_node->set_convolution_info(new_conv_info);
  
             // Update drivers of the convolution node
             std::vector<NodeIdxPair> pad_driver_nodes = get_driver_nodes(*pad_node);
             g.remove_node(pad_node->id());
  
             // Update fused node inputs
             for (auto &driver_node : pad_driver_nodes)
             {
                 g.add_connection(driver_node.node_id, driver_node.index, conv_node->id(), 0);
             }
         }
     }
 }

References Tensor::accessor(), Graph::add_connection(), Edge::consumer(), arm_compute::test::validation::conv_info, Tensor::desc(), arm_compute::graph::get_dimension_idx(), arm_compute::graph::get_driver_nodes(), arm_compute::HEIGHT, arm_compute::graph::is_padding_in_height_or_width(), TensorDescriptor::layout, Edge::producer(), Graph::remove_node(), Edge::tensor(), and arm_compute::WIDTH.

◆ prepare_all_tasks()

void prepare_all_tasks ( ExecutionWorkload & workload )

Prepares all tasks for execution.

Parameters

[in] workload Workload to prepare

Definition at line 222 of file ExecutionHelpers.cpp.

 {
     ARM_COMPUTE_ERROR_ON(workload.graph == nullptr);
     for (auto &task : workload.tasks)
     {
         task.prepare();
         release_unused_tensors(*workload.graph);
     }
 }

References ARM_COMPUTE_ERROR_ON, ExecutionWorkload::graph, release_unused_tensors(), and ExecutionWorkload::tasks.

Referenced by GraphManager::finalize_graph().

◆ release_unused_tensors()

void release_unused_tensors ( Graph & g )

Release the memory of all unused const nodes.

Parameters

[in] g Graph to release the memory from

Definition at line 177 of file ExecutionHelpers.cpp.

 {
     for (auto &tensor : g.tensors())
     {
         if (tensor != nullptr && tensor->handle() != nullptr)
         {
             tensor->handle()->release_if_unused();
         }
     }
 }

References tensor, and Graph::tensors().

Referenced by prepare_all_tasks().

◆ transfer_driving_nodes_and_remove_old_node()

void arm_compute::graph::detail::transfer_driving_nodes_and_remove_old_node	(	Graph &	g,
		INode *	new_node,
		INode *	old_node,
		bool	add_output_tensor
	)

Definition at line 46 of file NodeFusionMutator.cpp.

 {
     if (new_node == nullptr || old_node == nullptr)
     {
         return;
     }
  
     // Get driving nodes of last fusable node
     std::vector<NodeIdxPair> last_driving_nodes = get_driving_nodes(*old_node);
  
     // Extract last fusable node accessor if any
     if (old_node->output(0) == nullptr)
     {
         return;
     }
     auto old_node_accessor = old_node->output(0)->extract_accessor();
  
     // Remove node
     g.remove_node(old_node->id());
  
     // Update fused node outputs
     for (auto &driving_node : last_driving_nodes)
     {
         g.add_connection(new_node->id(), 0, driving_node.node_id, driving_node.index);
         if (add_output_tensor)
         {
             configure_tensor(new_node->output(0));
         }
     }
  
     // Update accessor to fused node
     new_node->output(0)->set_accessor(std::move(old_node_accessor));
 }

References Graph::add_connection(), arm_compute::graph::configure_tensor(), Tensor::extract_accessor(), arm_compute::graph::get_driving_nodes(), INode::id(), INode::output(), Graph::remove_node(), and Tensor::set_accessor().

Referenced by fuse_convolution_with_batch_normalization(), fuse_depthwise_convolution_with_batch_normalization(), and fuse_node_with_activation().

◆ validate_all_nodes()

void validate_all_nodes ( Graph & g )

Validates all nodes.

Parameters

[in] g Graph to validate

Definition at line 39 of file ExecutionHelpers.cpp.

 {
     auto &nodes = g.nodes();
  
     // Create tasks
     for (auto &node : nodes)
     {
         if (node != nullptr)
         {
             Target                    assigned_target = node->assigned_target();
             backends::IDeviceBackend &backend         = backends::BackendRegistry::get().get_backend(assigned_target);
             Status                    status          = backend.validate_node(*node);
             ARM_COMPUTE_ERROR_ON_MSG(!bool(status), status.error_description().c_str());
         }
     }
 }

References ARM_COMPUTE_ERROR_ON_MSG, Status::error_description(), BackendRegistry::get(), BackendRegistry::get_backend(), Graph::nodes(), and IDeviceBackend::validate_node().

Referenced by GraphManager::finalize_graph().

Functions

Function Documentation

◆ all_inputs_are_visited()

◆ allocate_all_input_tensors()

◆ allocate_all_output_tensors()

◆ allocate_all_tensors()

◆ allocate_const_tensors()

◆ call_all_const_node_accessors()

◆ call_all_input_node_accessors()

◆ call_all_output_node_accessors()

◆ call_all_tasks()

◆ call_tensor_accessor()

◆ configure_all_nodes()

◆ configure_all_tensors()

◆ configure_transition_manager()

◆ fuse_convolution_with_batch_normalization()

◆ fuse_depthwise_convolution_with_batch_normalization()

◆ fuse_layer() [1/2]

◆ fuse_layer() [2/2]

◆ fuse_node_with_activation()

◆ fuse_pad_with_convolution()

◆ prepare_all_tasks()

◆ release_unused_tensors()

◆ transfer_driving_nodes_and_remove_old_node()

◆ validate_all_nodes()