48 if (new_node ==
nullptr || old_node ==
nullptr)
57 if (old_node->
output(0) ==
nullptr)
67 for (
auto &driving_node : last_driving_nodes)
69 g.
add_connection(new_node->
id(), 0, driving_node.node_id, driving_node.index);
70 if (add_output_tensor)
84 auto *conv_node = arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(output_edge->
producer());
86 arm_compute::utils::cast::polymorphic_downcast<BatchNormalizationLayerNode *>(output_edge->
consumer());
89 if (conv_node->num_groups() > 1)
95 << output_edge->
producer_id() <<
" with BatchNormalization Layer node with ID : "
99 if (conv_node->output(0)->accessor() ==
nullptr)
101 const Target assigned_target = conv_node->assigned_target();
104 const auto conv_input_id = conv_node->input_edge(0)->producer_id();
105 const auto conv_weights_id = conv_node->input_edge(1)->producer_id();
106 const auto conv_info = conv_node->convolution_info();
107 const auto conv_method = conv_node->convolution_method();
108 const auto num_groups = conv_node->num_groups();
109 const auto act_info = bn_node->fused_activation();
110 FastMathHint fast_math_hint = conv_node->fast_math_hint();
113 const auto bn_mean_id = bn_node->input_edge(1)->producer_id();
114 const auto bn_var_id = bn_node->input_edge(2)->producer_id();
116 const auto epsilon = bn_node->epsilon();
122 if (conv_node->input_edge(2) !=
nullptr)
134 if (bn_node->input_edge(3) !=
nullptr)
136 const auto bn_beta_id = bn_node->input_edge(3)->producer_id();
140 if (bn_node->input_edge(4) !=
nullptr)
142 const auto bn_gamma_id = bn_node->input_edge(4)->producer_id();
146 auto fused_node = g.
node(fused_id);
147 auto bn_node_name = bn_node->
name();
151 fused_node->set_assigned_target(assigned_target);
152 fused_node->set_common_node_parameters(
NodeParams{conv_node->
name() +
"+" + bn_node_name, assigned_target});
160 "Prevented fusion of convolution with batch normalization due to the presence of an output accessor\n");
168 auto *depth_conv_node =
169 arm_compute::utils::cast::polymorphic_downcast<DepthwiseConvolutionLayerNode *>(output_edge->
producer());
171 arm_compute::utils::cast::polymorphic_downcast<BatchNormalizationLayerNode *>(output_edge->
consumer());
174 << output_edge->
producer_id() <<
" with BatchNormalization Layer node with ID : "
178 if (depth_conv_node->output(0)->accessor() ==
nullptr)
180 const Target assigned_target = depth_conv_node->assigned_target();
183 const auto depth_conv_input_id = depth_conv_node->input_edge(0)->producer_id();
184 const auto conv_weights_id = depth_conv_node->input_edge(1)->producer_id();
185 const auto conv_info = depth_conv_node->convolution_info();
186 const auto depth_conv_method = depth_conv_node->depthwise_convolution_method();
187 const auto depth_multiplier = depth_conv_node->depth_multiplier();
188 const auto act_info = bn_node->fused_activation();
191 const auto bn_mean_id = bn_node->input_edge(1)->producer_id();
192 const auto bn_var_id = bn_node->input_edge(2)->producer_id();
193 const auto bn_beta_id = bn_node->input_edge(3)->producer_id();
194 const auto bn_gamma_id = bn_node->input_edge(4)->producer_id();
195 const auto epsilon = bn_node->epsilon();
201 if (depth_conv_node->input_edge(2) !=
nullptr)
215 auto fused_node = g.
node(fused_id);
216 auto bn_node_name = bn_node->
name();
220 fused_node->set_assigned_target(assigned_target);
221 fused_node->set_common_node_parameters(
222 NodeParams{depth_conv_node->
name() +
"+" + bn_node_name, assigned_target});
230 "presence of an output accessor\n");
234 template <
typename N>
236 const Edge *output_edge,
237 const std::set<Activation> &supported_fused_activations)
241 auto *n_node = arm_compute::utils::cast::polymorphic_downcast<N *>(output_edge->
producer());
242 auto *act_node = arm_compute::utils::cast::polymorphic_downcast<ActivationLayerNode *>(output_edge->
consumer());
247 if (supported_fused_activations.count(act_node->activation_info().activation()) == 0)
259 <<
" with Activation Layer node with ID : "
263 if (n_node->output(0)->accessor() ==
nullptr)
266 n_node->set_fused_activation(act_node->activation_info());
273 "Prevented fusion of node with activation due to the presence of an output accessor\n");
277 template <
typename N>
280 auto *pad_node = arm_compute::utils::cast::polymorphic_downcast<PadLayerNode *>(output_edge->
producer());
281 auto *conv_node = arm_compute::utils::cast::polymorphic_downcast<N *>(output_edge->
consumer());
283 const Edge *input_edge = pad_node->input_edge(0);
284 if (input_edge !=
nullptr && input_edge->
tensor() !=
nullptr && pad_node->output(0)->
accessor() ==
nullptr &&
285 pad_node->pad_value().get<
float>() == 0.0)
288 const PaddingList padding_list = pad_node->padding();
293 const PaddingInfo pad_w = width_index < padding_list.size() ? padding_list[width_index] :
PaddingInfo(0, 0);
294 const PaddingInfo pad_h = height_index < padding_list.size() ? padding_list[height_index] :
PaddingInfo(0, 0);
304 conv_node->set_convolution_info(new_conv_info);
311 for (
auto &driver_node : pad_driver_nodes)
313 g.
add_connection(driver_node.node_id, driver_node.index, conv_node->id(), 0);
319 template <
typename N1,
typename N2,
typename F,
typename... Args>
320 void fuse_layer(
Graph &g, std::function<
bool(
INode &)>
const &prec,
const F fuse_fcn, Args &&...optional_arguments)
325 for (
unsigned int i = 0; i < g.
nodes().size(); ++i)
327 auto node = g.
node(i);
329 if (node && node->type() == N1::node_type && node->output_edges().size() == 1)
331 const auto output_edge_id = *node->
output_edges().begin();
332 const auto output_edge = g.
edge(output_edge_id);
335 if ((output_edge !=
nullptr) && (output_edge->consumer() !=
nullptr) &&
336 (output_edge->consumer()->type() == N2::node_type) && prec(*output_edge->producer()))
338 fuse_fcn(g, output_edge, optional_arguments...);
344 template <
typename N1,
typename F,
typename... Args>
345 void fuse_layer(
Graph &g, std::function<
bool(
INode &)>
const &prec,
const F fuse_fcn, Args &&...optional_arguments)
350 for (
unsigned int i = 0; i < g.
nodes().size(); ++i)
352 auto node = g.
node(i);
354 if (node && node->type() == N1::node_type && node->output_edges().size() == 1)
356 const auto output_edge_id = *node->
output_edges().begin();
357 const auto output_edge = g.
edge(output_edge_id);
360 if ((output_edge !=
nullptr) && (output_edge->consumer() !=
nullptr) && prec(*output_edge->producer()))
362 fuse_fcn(g, output_edge, i, optional_arguments...);
371 return "NodeFusionMutator";
382 const std::set<Activation> supported_fused_activations = {
383 Activation::ABS, Activation::BOUNDED_RELU, Activation::ELU,
384 Activation::HARD_SWISH, Activation::IDENTITY, Activation::LEAKY_RELU,
385 Activation::LINEAR, Activation::LOGISTIC, Activation::LU_BOUNDED_RELU,
386 Activation::RELU, Activation::SOFT_RELU, Activation::SQRT,
387 Activation::SQUARE, Activation::TANH};
390 auto empty_prec = [](
INode &) {
return true; };
391 auto cl_target_prec = [](
INode &n) {
return n.assigned_target() ==
Target::CL; };
392 auto qs8_prec = [&g](
INode &n)
396 const auto output_edge_id = *n.output_edges().begin();
397 const auto output_edge = g.
edge(output_edge_id);
402 return (output_qasymm8 && same_qinfo) || !output_qasymm8;
407 detail::fuse_layer<PadLayerNode, ConvolutionLayerNode>(g, empty_prec,
408 detail::fuse_pad_with_convolution<ConvolutionLayerNode>);
409 detail::fuse_layer<PadLayerNode, DepthwiseConvolutionLayerNode>(
410 g, empty_prec, detail::fuse_pad_with_convolution<DepthwiseConvolutionLayerNode>);
411 detail::fuse_layer<BatchNormalizationLayerNode, ActivationLayerNode>(
412 g, empty_prec, detail::fuse_node_with_activation<BatchNormalizationLayerNode>, supported_fused_activations);
413 detail::fuse_layer<ConvolutionLayerNode, ActivationLayerNode>(
414 g, empty_prec, detail::fuse_node_with_activation<ConvolutionLayerNode>, supported_fused_activations);
415 detail::fuse_layer<DepthwiseConvolutionLayerNode, ActivationLayerNode>(
416 g, qs8_prec, detail::fuse_node_with_activation<DepthwiseConvolutionLayerNode>, supported_fused_activations);
417 detail::fuse_layer<FullyConnectedLayerNode, ActivationLayerNode>(
418 g, empty_prec, detail::fuse_node_with_activation<FullyConnectedLayerNode>, supported_fused_activations);
419 detail::fuse_layer<EltwiseLayerNode, ActivationLayerNode>(
420 g, cl_target_prec, detail::fuse_node_with_activation<EltwiseLayerNode>, supported_fused_activations);
422 detail::fuse_layer<ConvolutionLayerNode, BatchNormalizationLayerNode>(
424 detail::fuse_layer<DepthwiseConvolutionLayerNode, BatchNormalizationLayerNode>(