Compute Library
 21.08
NodeFusionMutator.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
32 
33 #include "support/Cast.h"
34 
35 #include <set>
36 
37 namespace arm_compute
38 {
39 namespace graph
40 {
41 namespace detail
42 {
44 {
45  ARM_COMPUTE_ERROR_ON(output_edge == nullptr);
46 
47  auto *conv_node = arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(output_edge->producer());
48  auto *bn_node = arm_compute::utils::cast::polymorphic_downcast<BatchNormalizationLayerNode *>(output_edge->consumer());
49 
50  // Not fusing if number of groups is greater than 1
51  if(conv_node->num_groups() > 1)
52  {
53  return;
54  }
55 
56  ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing convolution node with ID : " << output_edge->producer_id()
57  << " with BatchNormalization Layer node with ID : " << output_edge->consumer_id() << std::endl);
58 
59  // Prevent fusion if fused node has an output accessor
60  if(conv_node->output(0)->accessor() == nullptr)
61  {
62  const Target assigned_target = conv_node->assigned_target();
63 
64  // Extract conv inputs
65  const auto conv_input_id = conv_node->input_edge(0)->producer_id();
66  const auto conv_weights_id = conv_node->input_edge(1)->producer_id();
67  const auto conv_info = conv_node->convolution_info();
68  const auto conv_method = conv_node->convolution_method();
69  const auto num_groups = conv_node->num_groups();
70  const auto act_info = bn_node->fused_activation();
71  FastMathHint fast_math_hint = conv_node->fast_math_hint();
72 
73  // Extract bn inputs
74  const auto bn_mean_id = bn_node->input_edge(1)->producer_id();
75  const auto bn_var_id = bn_node->input_edge(2)->producer_id();
76 
77  const auto epsilon = bn_node->epsilon();
78 
79  // Create the fused node
80  const NodeID fused_id = g.add_node<FusedConvolutionBatchNormalizationNode>(epsilon, conv_info, num_groups, conv_method, fast_math_hint, act_info);
81 
82  if(conv_node->input_edge(2) != nullptr)
83  {
84  auto conv_bias_id = conv_node->input_edge(2)->producer_id();
85  g.add_connection(conv_bias_id, 0, fused_id, 2);
86  }
87 
88  // Add connections from the conv/batch_norm inputs to the fused node
89  g.add_connection(conv_input_id, 0, fused_id, 0);
90  g.add_connection(conv_weights_id, 0, fused_id, 1);
91  g.add_connection(bn_mean_id, 0, fused_id, 3);
92  g.add_connection(bn_var_id, 0, fused_id, 4);
93 
94  if(bn_node->input_edge(3) != nullptr)
95  {
96  const auto bn_beta_id = bn_node->input_edge(3)->producer_id();
97  g.add_connection(bn_beta_id, 0, fused_id, 5);
98  }
99 
100  if(bn_node->input_edge(4) != nullptr)
101  {
102  const auto bn_gamma_id = bn_node->input_edge(4)->producer_id();
103  g.add_connection(bn_gamma_id, 0, fused_id, 6);
104  }
105 
106  auto fused_node = g.node(fused_id);
107  std::vector<NodeIdxPair> bn_driving_nodes = get_driving_nodes(*bn_node);
108 
109  // Extract batch normalization node accessor if any
110  auto bn_node_accessor = bn_node->output(0)->extract_accessor();
111  auto bn_node_name = bn_node->name();
112 
113  // Remove batch normalization node
114  g.remove_node(bn_node->id());
115 
116  // Get driving nodes of batch normalization node
117  for(auto &driving_node : bn_driving_nodes)
118  {
119  g.add_connection(fused_id, 0, driving_node.node_id, driving_node.index);
120  configure_tensor(fused_node->output(0));
121  }
122  // Update fused node outputs
123  fused_node->output(0)->set_accessor(std::move(bn_node_accessor));
124  fused_node->set_assigned_target(assigned_target);
125  fused_node->set_common_node_parameters(NodeParams{ conv_node->name() + "+" + bn_node_name, assigned_target });
126 
127  // Remove convolution node
128  g.remove_node(conv_node->id());
129  }
130  else
131  {
132  ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion of convolution with batch normalization due to the presence of an output accessor\n");
133  }
134 }
135 
137 {
138  ARM_COMPUTE_ERROR_ON(output_edge == nullptr);
139 
140  auto *depth_conv_node = arm_compute::utils::cast::polymorphic_downcast<DepthwiseConvolutionLayerNode *>(output_edge->producer());
141  auto *bn_node = arm_compute::utils::cast::polymorphic_downcast<BatchNormalizationLayerNode *>(output_edge->consumer());
142 
143  ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing depthwise convolution node with ID : " << output_edge->producer_id()
144  << " with BatchNormalization Layer node with ID : " << output_edge->consumer_id() << std::endl);
145 
146  // Prevent fusion if fused node has an output accessor
147  if(depth_conv_node->output(0)->accessor() == nullptr)
148  {
149  const Target assigned_target = depth_conv_node->assigned_target();
150 
151  // Extract conv inputs
152  const auto depth_conv_input_id = depth_conv_node->input_edge(0)->producer_id();
153  const auto conv_weights_id = depth_conv_node->input_edge(1)->producer_id();
154  const auto conv_info = depth_conv_node->convolution_info();
155  const auto depth_conv_method = depth_conv_node->depthwise_convolution_method();
156  const auto depth_multiplier = depth_conv_node->depth_multiplier();
157  const auto act_info = bn_node->fused_activation();
158 
159  // Extract bn inputs
160  const auto bn_mean_id = bn_node->input_edge(1)->producer_id();
161  const auto bn_var_id = bn_node->input_edge(2)->producer_id();
162  const auto bn_beta_id = bn_node->input_edge(3)->producer_id();
163  const auto bn_gamma_id = bn_node->input_edge(4)->producer_id();
164  const auto epsilon = bn_node->epsilon();
165 
166  // Create the fused node
167  const NodeID fused_id = g.add_node<FusedDepthwiseConvolutionBatchNormalizationNode>(epsilon, conv_info, depth_multiplier, depth_conv_method, act_info);
168 
169  if(depth_conv_node->input_edge(2) != nullptr)
170  {
171  const auto conv_bias_id = depth_conv_node->input_edge(2)->producer_id();
172  g.add_connection(conv_bias_id, 0, fused_id, 2);
173  }
174 
175  // Add connections from the conv/batch_norm inputs to the fused node
176  g.add_connection(depth_conv_input_id, 0, fused_id, 0);
177  g.add_connection(conv_weights_id, 0, fused_id, 1);
178  g.add_connection(bn_mean_id, 0, fused_id, 3);
179  g.add_connection(bn_var_id, 0, fused_id, 4);
180  g.add_connection(bn_beta_id, 0, fused_id, 5);
181  g.add_connection(bn_gamma_id, 0, fused_id, 6);
182 
183  auto fused_node = g.node(fused_id);
184  std::vector<NodeIdxPair> bn_driving_nodes = get_driving_nodes(*bn_node);
185 
186  // Extract batch normalization node accessor if any
187  auto bn_node_accessor = bn_node->output(0)->extract_accessor();
188  auto bn_node_name = bn_node->name();
189 
190  // Remove batch normalization node
191  g.remove_node(bn_node->id());
192 
193  // Get driving nodes of batch normalization node
194  for(auto &driving_node : bn_driving_nodes)
195  {
196  g.add_connection(fused_id, 0, driving_node.node_id, driving_node.index);
197  configure_tensor(fused_node->output(0));
198  }
199  // Update fused node outputs
200  fused_node->output(0)->set_accessor(std::move(bn_node_accessor));
201  fused_node->set_assigned_target(assigned_target);
202  fused_node->set_common_node_parameters(NodeParams{ depth_conv_node->name() + "+" + bn_node_name, assigned_target });
203 
204  // Remove convolution node
205  g.remove_node(depth_conv_node->id());
206  }
207  else
208  {
209  ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion of depthwise convolution with batch normalization due to the presence of an output accessor\n");
210  }
211 }
212 
213 template <typename N>
214 void fuse_node_with_activation(Graph &g, const Edge *output_edge, const std::set<Activation> &supported_fused_activations)
215 {
216  ARM_COMPUTE_ERROR_ON(output_edge == nullptr);
217 
218  auto *n_node = arm_compute::utils::cast::polymorphic_downcast<N *>(output_edge->producer());
219  auto *act_node = arm_compute::utils::cast::polymorphic_downcast<ActivationLayerNode *>(output_edge->consumer());
220 
221  ARM_COMPUTE_ERROR_ON(act_node->output(0) == nullptr || n_node->output(0) == nullptr);
222 
223  // Check if activation is supported for fusion
224  if(supported_fused_activations.count(act_node->activation_info().activation()) == 0)
225  {
226  return;
227  }
228 
229  // EltwiseLayerNode can only be fused when dataype is float
230  if(n_node->type() == NodeType::EltwiseLayer && !is_data_type_float(n_node->output(0)->desc().data_type))
231  {
232  return;
233  }
234 
235  ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing node with ID : " << output_edge->producer_id()
236  << " with Activation Layer node with ID : " << output_edge->consumer_id() << std::endl);
237 
238  // Prevent fusion if fused node has an output accessor
239  if(n_node->output(0)->accessor() == nullptr)
240  {
241  // Get driving nodes of activation node
242  std::vector<NodeIdxPair> act_driving_nodes = get_driving_nodes(*act_node);
243 
244  // Set activation info to fused node
245  n_node->set_fused_activation(act_node->activation_info());
246 
247  // Extract activation node accessor if any
248  auto act_node_accessor = act_node->output(0)->extract_accessor();
249 
250  // Remove activation node
251  g.remove_node(act_node->id());
252 
253  // Update fused node outputs
254  for(auto &driving_node : act_driving_nodes)
255  {
256  g.add_connection(n_node->id(), 0, driving_node.node_id, driving_node.index);
257  }
258 
259  // Update accessor to fused node
260  n_node->output(0)->set_accessor(std::move(act_node_accessor));
261  }
262  else
263  {
264  ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion of node with activation due to the presence of an output accessor\n");
265  }
266 }
267 
268 template <typename N1, typename N2, typename F, typename... Args>
269 void fuse_layer(Graph &g, std::function<bool(INode &)> const &prec, const F fuse_fcn, Args &&... optional_arguments)
270 {
271  // Note that fused nodes may be added to the end of the node list.
272  // Instead of only looping over the original list of nodes, we loop over the current node list which could be growing.
273  // This is intentional as it probes the newly added fused nodes for further fusing opportunities.
274  for(unsigned int i = 0; i < g.nodes().size(); ++i)
275  {
276  auto node = g.node(i);
277  // Check if the node is of type N and not a branching node
278  if(node && node->type() == N1::node_type && node->output_edges().size() == 1)
279  {
280  const auto output_edge_id = *node->output_edges().begin();
281  const auto output_edge = g.edge(output_edge_id);
282 
283  // Check if following node is an activation layer node
284  if((output_edge != nullptr) && (output_edge->consumer() != nullptr) && (output_edge->consumer()->type() == N2::node_type) && prec(*output_edge->producer()))
285  {
286  fuse_fcn(g, output_edge, optional_arguments...);
287  }
288  }
289  }
290 }
291 } // namespace detail
292 
294 {
295  return "NodeFusionMutator";
296 }
297 
299 {
301 }
302 
304 {
305  // Supported activations when fusing
306  const std::set<Activation> supported_fused_activations = { Activation::ABS, Activation::BOUNDED_RELU, Activation::ELU,
311  };
312 
313  // Preconditions
314  auto empty_prec = [](INode &)
315  {
316  return true;
317  };
318  auto cl_target_prec = [](INode & n)
319  {
320  return n.assigned_target() == Target::CL;
321  };
322  auto qs8_prec = [&g](INode & n)
323  {
324  ARM_COMPUTE_ERROR_ON(n.output(0) == nullptr);
325 
326  const auto output_edge_id = *n.output_edges().begin();
327  const auto output_edge = g.edge(output_edge_id);
328  // To perform fusion the two nodes must have same output quantization information
329  const bool same_qinfo = n.output(0)->desc().quant_info == output_edge->producer()->output(0)->desc().quant_info;
330  const bool output_qasymm8 = n.output(0)->desc().data_type == DataType::QASYMM8;
331 
332  return (output_qasymm8 && same_qinfo) || !output_qasymm8;
333  };
334 
335  // Fusion mutations
336  detail::fuse_layer<BatchNormalizationLayerNode, ActivationLayerNode>(g, empty_prec, detail::fuse_node_with_activation<BatchNormalizationLayerNode>, supported_fused_activations);
337  detail::fuse_layer<ConvolutionLayerNode, ActivationLayerNode>(g, empty_prec, detail::fuse_node_with_activation<ConvolutionLayerNode>, supported_fused_activations);
338  detail::fuse_layer<DepthwiseConvolutionLayerNode, ActivationLayerNode>(g, qs8_prec, detail::fuse_node_with_activation<DepthwiseConvolutionLayerNode>, supported_fused_activations);
339  detail::fuse_layer<FullyConnectedLayerNode, ActivationLayerNode>(g, empty_prec, detail::fuse_node_with_activation<FullyConnectedLayerNode>, supported_fused_activations);
340  detail::fuse_layer<EltwiseLayerNode, ActivationLayerNode>(g, cl_target_prec, detail::fuse_node_with_activation<EltwiseLayerNode>, supported_fused_activations);
341  detail::fuse_layer<ConvolutionLayerNode, BatchNormalizationLayerNode>(g, empty_prec, detail::fuse_convolution_with_batch_normalization);
342  detail::fuse_layer<DepthwiseConvolutionLayerNode, BatchNormalizationLayerNode>(g, empty_prec, detail::fuse_depthwise_convolution_with_batch_normalization);
343 }
344 } // namespace graph
345 } // namespace arm_compute
Edge * input_edge(size_t idx) const
Returns the edge of a given input of the node.
Definition: INode.cpp:171
Common node parameters.
Definition: Types.h:218
void configure_tensor(Tensor *tensor)
Configures tensor.
Definition: Utils.cpp:186
INode * consumer() const
Returns consumer node.
Definition: Edge.h:92
const std::set< EdgeID > & output_edges() const
Returns output edge set.
Definition: INode.cpp:132
std::vector< NodeIdxPair > get_driving_nodes(const INode &node)
Get the list of driving nodes of a given node.
Definition: Utils.cpp:166
NodeID add_node(Ts &&... args)
Adds a node to the graph.
Definition: Graph.h:235
void fuse_convolution_with_batch_normalization(Graph &g, const Edge *output_edge)
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
void fuse_depthwise_convolution_with_batch_normalization(Graph &g, const Edge *output_edge)
Copyright (c) 2017-2021 Arm Limited.
TensorDescriptor & desc()
TensorInfo metadata accessor.
Definition: Tensor.cpp:40
QuantizationInfo quant_info
Quantization info.
Node interface.
Definition: INode.h:45
Tensor * output(size_t idx) const
Returns the tensor of a given output of the node.
Definition: INode.cpp:158
quantized, asymmetric fixed-point 8-bit number unsigned
const unsigned int num_groups
Definition: Im2Col.cpp:153
bool remove_node(NodeID nid)
Remove the node with the given ID.
Definition: Graph.cpp:35
NodeID producer_id() const
Returns producer node id.
Definition: Edge.h:68
void fuse_layer(Graph &g, std::function< bool(INode &)> const &prec, const F fuse_fcn, Args &&... optional_arguments)
EdgeID add_connection(NodeID source, size_t source_idx, NodeID sink, size_t sink_idx)
Adds a connection between two nodes.
Definition: Graph.cpp:69
FastMathHint
Enable or disable fast math for Convolution layer.
Definition: Types.h:142
Graph class.
Definition: Graph.h:53
unsigned int NodeID
Definition: Types.h:68
const std::vector< NodeID > & nodes(NodeType type)
Returns graph input nodes.
Definition: Graph.cpp:174
const char * name() override
Returns mutator name.
Graph Edge.
Definition: Edge.h:39
MutationType type() const override
Returns mutation type.
const INode * node(NodeID id) const
Get node object given its id.
Definition: Graph.cpp:204
std::string name
Node name.
Definition: Types.h:220
#define ARM_COMPUTE_LOG_GRAPH_VERBOSE(x)
Definition: Logger.h:50
const Edge * edge(EdgeID id) const
Get edge object given its id.
Definition: Graph.cpp:214
NodeID consumer_id() const
Returns sink node id.
Definition: Edge.h:76
void fuse_node_with_activation(Graph &g, const Edge *output_edge, const std::set< Activation > &supported_fused_activations)
OpenCL capable target device.
INode * producer() const
Returns producer node.
Definition: Edge.h:84
virtual void mutate(Graph &g) override
Walk the graph and perform a specific mutation.
bool is_data_type_float(DataType dt)
Check if a given data type is of floating point type.
Definition: Utils.h:961