50 if(new_node ==
nullptr || old_node ==
nullptr)
59 if(old_node->
output(0) ==
nullptr)
69 for(
auto &driving_node : last_driving_nodes)
71 g.
add_connection(new_node->
id(), 0, driving_node.node_id, driving_node.index);
86 auto *conv_node = arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(output_edge->
producer());
87 auto *bn_node = arm_compute::utils::cast::polymorphic_downcast<BatchNormalizationLayerNode *>(output_edge->
consumer());
90 if(conv_node->num_groups() > 1)
96 <<
" with BatchNormalization Layer node with ID : " << output_edge->
consumer_id() << std::endl);
99 if(conv_node->output(0)->accessor() ==
nullptr)
101 const Target assigned_target = conv_node->assigned_target();
104 const auto conv_input_id = conv_node->input_edge(0)->producer_id();
105 const auto conv_weights_id = conv_node->input_edge(1)->producer_id();
106 const auto conv_info = conv_node->convolution_info();
107 const auto conv_method = conv_node->convolution_method();
108 const auto num_groups = conv_node->num_groups();
109 const auto act_info = bn_node->fused_activation();
110 FastMathHint fast_math_hint = conv_node->fast_math_hint();
113 const auto bn_mean_id = bn_node->input_edge(1)->producer_id();
114 const auto bn_var_id = bn_node->input_edge(2)->producer_id();
116 const auto epsilon = bn_node->epsilon();
121 if(conv_node->input_edge(2) !=
nullptr)
133 if(bn_node->input_edge(3) !=
nullptr)
135 const auto bn_beta_id = bn_node->input_edge(3)->producer_id();
139 if(bn_node->input_edge(4) !=
nullptr)
141 const auto bn_gamma_id = bn_node->input_edge(4)->producer_id();
145 auto fused_node = g.
node(fused_id);
146 auto bn_node_name = bn_node->
name();
150 fused_node->set_assigned_target(assigned_target);
151 fused_node->set_common_node_parameters(
NodeParams{ conv_node->
name() +
"+" + bn_node_name, assigned_target });
166 auto *depth_conv_node = arm_compute::utils::cast::polymorphic_downcast<DepthwiseConvolutionLayerNode *>(output_edge->
producer());
167 auto *bn_node = arm_compute::utils::cast::polymorphic_downcast<BatchNormalizationLayerNode *>(output_edge->
consumer());
170 <<
" with BatchNormalization Layer node with ID : " << output_edge->
consumer_id() << std::endl);
173 if(depth_conv_node->output(0)->accessor() ==
nullptr)
175 const Target assigned_target = depth_conv_node->assigned_target();
178 const auto depth_conv_input_id = depth_conv_node->input_edge(0)->producer_id();
179 const auto conv_weights_id = depth_conv_node->input_edge(1)->producer_id();
180 const auto conv_info = depth_conv_node->convolution_info();
181 const auto depth_conv_method = depth_conv_node->depthwise_convolution_method();
182 const auto depth_multiplier = depth_conv_node->depth_multiplier();
183 const auto act_info = bn_node->fused_activation();
186 const auto bn_mean_id = bn_node->input_edge(1)->producer_id();
187 const auto bn_var_id = bn_node->input_edge(2)->producer_id();
188 const auto bn_beta_id = bn_node->input_edge(3)->producer_id();
189 const auto bn_gamma_id = bn_node->input_edge(4)->producer_id();
190 const auto epsilon = bn_node->epsilon();
195 if(depth_conv_node->input_edge(2) !=
nullptr)
209 auto fused_node = g.
node(fused_id);
210 auto bn_node_name = bn_node->
name();
214 fused_node->set_assigned_target(assigned_target);
215 fused_node->set_common_node_parameters(
NodeParams{ depth_conv_node->
name() +
"+" + bn_node_name, assigned_target });
222 ARM_COMPUTE_LOG_GRAPH_VERBOSE(
"Prevented fusion of depthwise convolution with batch normalization due to the presence of an output accessor\n");
226 template <
typename N>
231 auto *n_node = arm_compute::utils::cast::polymorphic_downcast<N *>(output_edge->
producer());
232 auto *act_node = arm_compute::utils::cast::polymorphic_downcast<ActivationLayerNode *>(output_edge->
consumer());
237 if(supported_fused_activations.count(act_node->activation_info().activation()) == 0)
249 <<
" with Activation Layer node with ID : " << output_edge->
consumer_id() << std::endl);
252 if(n_node->output(0)->accessor() ==
nullptr)
255 n_node->set_fused_activation(act_node->activation_info());
265 template <
typename N>
268 auto *pad_node = arm_compute::utils::cast::polymorphic_downcast<PadLayerNode *>(output_edge->
producer());
269 auto *conv_node = arm_compute::utils::cast::polymorphic_downcast<N *>(output_edge->
consumer());
271 const Edge *input_edge = pad_node->input_edge(0);
272 if(input_edge !=
nullptr && input_edge->tensor() !=
nullptr && pad_node->output(0)->accessor() ==
nullptr 273 && pad_node->pad_value().get<
float>() == 0.0)
275 const DataLayout layout = input_edge->tensor()->desc().layout;
276 const PaddingList padding_list = pad_node->padding();
281 const PaddingInfo pad_w = width_index < padding_list.size() ? padding_list[width_index] :
PaddingInfo(0, 0);
282 const PaddingInfo pad_h = height_index < padding_list.size() ? padding_list[height_index] :
PaddingInfo(0, 0);
290 conv_info.
stride().second,
293 conv_info.
pad_top() + pad_h.first,
296 conv_node->set_convolution_info(new_conv_info);
303 for(
auto &driver_node : pad_driver_nodes)
305 g.
add_connection(driver_node.node_id, driver_node.index, conv_node->id(), 0);
311 template <
typename N1,
typename N2,
typename F,
typename... Args>
312 void fuse_layer(
Graph &g, std::function<
bool(
INode &)>
const &prec,
const F fuse_fcn, Args &&... optional_arguments)
317 for(
unsigned int i = 0; i < g.
nodes().size(); ++i)
319 auto node = g.
node(i);
321 if(node && node->type() == N1::node_type && node->output_edges().size() == 1)
323 const auto output_edge_id = *node->
output_edges().begin();
324 const auto output_edge = g.
edge(output_edge_id);
327 if((output_edge !=
nullptr) && (output_edge->consumer() !=
nullptr) && (output_edge->consumer()->type() == N2::node_type) && prec(*output_edge->producer()))
329 fuse_fcn(g, output_edge, optional_arguments...);
345 #define MAX_VALIDE_COMBINATION 4 346 #define MAX_POST_OP_NUM 3 363 for(
int j = 0; j < len; ++j)
365 if(post_op_type[j] != valide_post_op_type[i][j])
381 unsigned int op_idx = 0;
383 for(
const auto &post_op : post_op_node_list)
385 switch(post_op->type())
389 auto *eltwise_node = arm_compute::utils::cast::polymorphic_downcast<EltwiseLayerNode *>(post_op);
392 fused_node->
post_op_info_list().push_back(std::make_unique<ConvPostOpInfoEltwiseAdd>(prev_op_dst_pos, eltwise_node->convert_policy()));
398 auto *act_node = arm_compute::utils::cast::polymorphic_downcast<ActivationLayerNode *>(post_op);
401 fused_node->
post_op_info_list().push_back(std::make_unique<ConvPostOpInfoActivation>(act_node->activation_info()));
411 if(op_idx == post_op_node_list.size() - 1)
424 std::list<INode *>
get_post_op_list(
Graph &g,
int &eltwise_operand_id,
int &prev_op_dst_pos,
unsigned int conv_node_id,
const std::set<Activation> &supported_fused_activations)
426 std::list<INode *> post_op_node_list = {};
427 NodeID prev_op_dst_id = conv_node_id;
432 auto current_node = g.
node(conv_node_id);
434 while(post_op_node_list.size() < 3)
438 auto current_output_edge_id = current_node->
output_edges().begin();
439 auto current_output_edge = g.
edge(*current_output_edge_id);
440 auto post_op_node = current_output_edge->
consumer();
442 bool fusable_post_op =
false;
443 if(post_op_node !=
nullptr && post_op_node->output_edges().size() > 0)
445 switch(post_op_node->type())
449 auto *eltwise_node = arm_compute::utils::cast::polymorphic_downcast<EltwiseLayerNode *>(post_op_node);
451 if(eltwise_node->output(0)->accessor() ==
nullptr)
453 post_op_node_list.push_back(post_op_node);
454 fusable_post_op =
true;
455 post_op_type_list[post_op_idx++] = eltwise_node->type();
458 const auto eltwise_input_id_0 = eltwise_node->input_edge(0)->producer_id();
459 const auto eltwise_input_id_1 = eltwise_node->input_edge(1)->producer_id();
460 if(eltwise_input_id_0 == prev_op_dst_id)
462 eltwise_operand_id = eltwise_input_id_1;
465 else if(eltwise_input_id_1 == prev_op_dst_id)
467 eltwise_operand_id = eltwise_input_id_0;
479 auto *act_node = arm_compute::utils::cast::polymorphic_downcast<ActivationLayerNode *>(post_op_node);
482 if(supported_fused_activations.count(act_node->activation_info().activation()) == 0)
486 if(act_node->output(0)->accessor() ==
nullptr)
488 post_op_node_list.push_back(post_op_node);
489 fusable_post_op =
true;
490 post_op_type_list[post_op_idx++] = act_node->type();
491 prev_op_dst_id = act_node->id();
506 if(post_op_node->output_edges().size() == 1 && fusable_post_op ==
true)
508 current_node = post_op_node;
518 if(post_op_node_list.size() > 0)
520 bool fuse_with_post_op =
check_post_op_type(post_op_type_list, post_op_node_list.size());
521 if(!fuse_with_post_op)
523 post_op_node_list.clear();
527 return post_op_node_list;
545 auto *conv_node = arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(output_edge->
producer());
556 if(conv_node->output(0)->accessor() ==
nullptr)
559 const Edge *input_edge = conv_node->input_edge(1);
560 if(input_edge !=
nullptr && input_edge->
tensor() !=
nullptr)
577 int eltwise_operand_id = 0;
578 int prev_op_dst_pos = 0;
579 std::list<INode *> post_op_node_list =
get_post_op_list(g, eltwise_operand_id, prev_op_dst_pos, conv_node_id, supported_fused_activations);
581 if(post_op_node_list.size() == 0)
587 const Target assigned_target = conv_node->assigned_target();
590 const auto conv_input_id = conv_node->input_edge(0)->producer_id();
591 const auto conv_weights_id = conv_node->input_edge(1)->producer_id();
592 const auto conv_info = conv_node->convolution_info();
593 const auto conv_method = conv_node->convolution_method();
594 const auto num_groups = conv_node->num_groups();
595 FastMathHint fast_math_hint = conv_node->fast_math_hint();
604 if(conv_node->input_edge(2) !=
nullptr)
606 auto conv_bias_id = conv_node->input_edge(2)->producer_id();
610 auto it = std::find_if(post_op_node_list.begin(),
611 post_op_node_list.end(),
612 [&](
const INode * nd)
617 if(it != post_op_node_list.end())
624 auto fused_node = g.
node(fused_id);
630 post_op_node_list.clear();
644 auto *conv_node = arm_compute::utils::cast::polymorphic_downcast<FusedConvolutionBatchNormalizationNode *>(output_edge->
producer());
654 if(conv_node->output(0)->accessor() ==
nullptr)
657 const Edge *input_edge = conv_node->input_edge(1);
658 if(input_edge !=
nullptr && input_edge->
tensor() !=
nullptr)
675 int eltwise_operand_id = 0;
676 int prev_op_dst_pos = 0;
677 std::list<INode *> post_op_node_list =
get_post_op_list(g, eltwise_operand_id, prev_op_dst_pos, conv_node_id, supported_fused_activations);
679 if(post_op_node_list.size() == 0)
685 const Target assigned_target = conv_node->assigned_target();
688 const auto conv_input_id = conv_node->input_edge(0)->producer_id();
689 const auto conv_weights_id = conv_node->input_edge(1)->producer_id();
690 const auto bn_mean_id = conv_node->input_edge(3)->producer_id();
691 const auto bn_var_id = conv_node->input_edge(4)->producer_id();
692 const auto conv_info = conv_node->convolution_info();
693 const auto conv_method = conv_node->convolution_method();
694 const auto num_groups = conv_node->num_groups();
695 FastMathHint fast_math_hint = conv_node->fast_math_hint();
699 const float epsilon = conv_node->epsilon();
708 if(conv_node->input_edge(2) !=
nullptr)
710 auto conv_bias_id = conv_node->input_edge(2)->producer_id();
717 if(conv_node->input_edge(5) !=
nullptr)
719 const auto bn_beta_id = conv_node->input_edge(5)->producer_id();
723 if(conv_node->input_edge(6) !=
nullptr)
725 const auto bn_gamma_id = conv_node->input_edge(6)->producer_id();
730 auto it = std::find_if(post_op_node_list.begin(),
731 post_op_node_list.end(),
732 [&](
const INode * nd)
737 if(it != post_op_node_list.end())
743 auto fused_node = g.
node(fused_id);
746 auto conv_node_name = conv_node->name();
749 std::string post_ops_name =
"";
750 for(
auto &post_op : post_op_node_list)
752 post_ops_name += post_op->name();
754 fused_node->set_common_node_parameters(
NodeParams{ conv_node->
name() +
"+" + post_ops_name, assigned_target });
759 post_op_node_list.clear();
770 template <
typename N1,
typename F,
typename... Args>
771 void fuse_layer(
Graph &g, std::function<
bool(
INode &)>
const &prec,
const F fuse_fcn, Args &&... optional_arguments)
776 for(
unsigned int i = 0; i < g.
nodes().size(); ++i)
778 auto node = g.
node(i);
780 if(node && node->type() == N1::node_type && node->output_edges().size() == 1)
782 const auto output_edge_id = *node->
output_edges().begin();
783 const auto output_edge = g.
edge(output_edge_id);
786 if((output_edge !=
nullptr) && (output_edge->consumer() !=
nullptr) && prec(*output_edge->producer()))
788 fuse_fcn(g, output_edge, i, optional_arguments...);
797 return "NodeFusionMutator";
816 auto empty_prec = [](
INode &)
820 auto cl_target_prec = [](
INode &
n)
824 auto qs8_prec = [&g](
INode &
n)
828 const auto output_edge_id = *
n.output_edges().begin();
829 const auto output_edge = g.
edge(output_edge_id);
831 const bool same_qinfo =
n.output(0)->desc().quant_info == output_edge->producer()->output(0)->desc().quant_info;
834 return (output_qasymm8 && same_qinfo) || !output_qasymm8;
839 detail::fuse_layer<PadLayerNode, ConvolutionLayerNode>(g, empty_prec, detail::fuse_pad_with_convolution<ConvolutionLayerNode>);
840 detail::fuse_layer<PadLayerNode, DepthwiseConvolutionLayerNode>(g, empty_prec, detail::fuse_pad_with_convolution<DepthwiseConvolutionLayerNode>);
845 detail::fuse_layer<BatchNormalizationLayerNode, ActivationLayerNode>(g, empty_prec, detail::fuse_node_with_activation<BatchNormalizationLayerNode>, supported_fused_activations);
846 detail::fuse_layer<ConvolutionLayerNode, ActivationLayerNode>(g, empty_prec, detail::fuse_node_with_activation<ConvolutionLayerNode>, supported_fused_activations);
847 detail::fuse_layer<DepthwiseConvolutionLayerNode, ActivationLayerNode>(g, qs8_prec, detail::fuse_node_with_activation<DepthwiseConvolutionLayerNode>, supported_fused_activations);
848 detail::fuse_layer<FullyConnectedLayerNode, ActivationLayerNode>(g, empty_prec, detail::fuse_node_with_activation<FullyConnectedLayerNode>, supported_fused_activations);
849 detail::fuse_layer<EltwiseLayerNode, ActivationLayerNode>(g, cl_target_prec, detail::fuse_node_with_activation<EltwiseLayerNode>, supported_fused_activations);
Edge * input_edge(size_t idx) const
Returns the edge of a given input of the node.
bool is_padding_in_height_or_width(const DataLayout &layout, const PaddingList &padding_list)
Check if padding is in height and/or width dimensions.
DataType data_type
Data type.
TensorShape shape
Tensor shape.
void set_assigned_target(Target target)
Sets the final execution target.
std::string name() const
Returns node's name.
void configure_tensor(Tensor *tensor)
Configures tensor.
INode * consumer() const
Returns consumer node.
const std::set< EdgeID > & output_edges() const
Returns output edge set.
Fused Depthwise Convolution Batch Normalization node.
DimensionRoundingType round() const
Get the rounding type.
static constexpr NodeType node_type
std::vector< NodeIdxPair > get_driving_nodes(const INode &node)
Get the list of driving nodes of a given node.
std::vector< PaddingInfo > PaddingList
List of padding information.
Tensor * tensor() const
Returns the tensor associated with this edge.
NodeID add_node(Ts &&... args)
Adds a node to the graph.
void fuse_convolution_with_batch_normalization(Graph &g, const Edge *output_edge)
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
unsigned int pad_top() const
Get the top padding.
std::vector< NodeIdxPair > get_driver_nodes(const INode &node)
Get the list of driver nodes of a given node.
NodeType valide_post_op_type[4][3]
void fuse_depthwise_convolution_with_batch_normalization(Graph &g, const Edge *output_edge)
void set_accessor(std::unique_ptr< ITensorAccessor > accessor)
Sets the backend tensor accessor.
Copyright (c) 2017-2022 Arm Limited.
Batch Normalization node.
std::list< INode * > get_post_op_list(Graph &g, int &eltwise_operand_id, int &prev_op_dst_pos, unsigned int conv_node_id, const std::set< Activation > &supported_fused_activations)
TensorDescriptor & desc()
TensorInfo metadata accessor.
void fuse_convolution_batch_normalization_with_post_ops(Graph &g, const Edge *output_edge, unsigned int conv_node_id, const std::set< Activation > &supported_fused_activations)
void fuse_convolution_with_post_ops(Graph &g, const Edge *output_edge, unsigned int conv_node_id, const std::set< Activation > &supported_fused_activations)
Fuse below operators:
Exponential Linear Unit ( )
void transfer_driving_nodes_and_remove_old_node(Graph &g, INode *new_node, INode *old_node, bool add_output_tensor)
Tensor * output(size_t idx) const
Returns the tensor of a given output of the node.
quantized, asymmetric fixed-point 8-bit number unsigned
T z() const
Alias to access the size of the third dimension.
const unsigned int num_groups
bool remove_node(NodeID nid)
Remove the node with the given ID.
std::pair< unsigned int, unsigned int > stride() const
Get the stride.
NodeID id() const
Returns node's ID.
NodeID producer_id() const
Returns producer node id.
unsigned int pad_right() const
Get the right padding.
void fuse_layer(Graph &g, std::function< bool(INode &)> const &prec, const F fuse_fcn, Args &&... optional_arguments)
Padding and stride information class.
EdgeID add_connection(NodeID source, size_t source_idx, NodeID sink, size_t sink_idx)
Adds a connection between two nodes.
Batch Normalization node.
FastMathHint
Enable or disable fast math for Convolution layer.
#define MAX_VALIDE_COMBINATION
Check valid combinations:
Lower and Upper Bounded Rectifier ( )
static constexpr NodeType node_type
const std::vector< NodeID > & nodes(NodeType type)
Returns graph input nodes.
void fuse_convolution_with_post_op(Graph &g, INode *fused_node, std::list< INode *> post_op_node_list, int prev_op_dst_pos)
const char * name() override
Returns mutator name.
Upper Bounded Rectifier ( )
MutationType
Mutation type.
MutationType type() const override
Returns mutation type.
std::pair< uint32_t, uint32_t > PaddingInfo
Padding information as a pair of unsigned int start/end.
const INode * node(NodeID id) const
Get node object given its id.
Num samples, height, width, channels.
std::string name
Node name.
#define ARM_COMPUTE_LOG_GRAPH_VERBOSE(x)
ConvolutionMethod
Supported Convolution layer methods.
const Edge * edge(EdgeID id) const
Get edge object given its id.
NodeID consumer_id() const
Returns sink node id.
bool check_post_op_type(NodeType *post_op_type, int len)
T y() const
Alias to access the size of the second dimension.
void fuse_node_with_activation(Graph &g, const Edge *output_edge, const std::set< Activation > &supported_fused_activations)
unsigned int pad_bottom() const
Get the bottom padding.
DataLayout layout
Data layout.
DataType
Available data types.
unsigned int pad_left() const
Get the left padding.
OpenCL capable target device.
INode * producer() const
Returns producer node.
DataLayout
[DataLayout enum definition]
virtual void mutate(Graph &g) override
Walk the graph and perform a specific mutation.
std::unique_ptr< ITensorAccessor > extract_accessor()
Extracts accessor from the tensor.
size_t get_dimension_idx(DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get index of a tensor's given dimension depending on its layout.
bool is_data_type_float(DataType dt)
Check if a given data type is of floating point type.
void fuse_pad_with_convolution(Graph &g, const Edge *output_edge)
const std::list< std::unique_ptr< ConvPostOpInfo > > & post_op_info_list() const
Post operator info list.