Compute Library
 21.02
CLFunctionsFactory.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
31 #include "src/core/CL/CLKernels.h"
32 #include "support/Cast.h"
33 
34 using namespace arm_compute::utils::cast;
35 
36 namespace arm_compute
37 {
38 namespace graph
39 {
40 namespace backends
41 {
42 /** Target specific information structure used to pass information to the layer templates */
43 struct CLTargetInfo
44 {
46  using SrcTensorType = const arm_compute::ICLTensor;
47  using TensorConcreteType = CLTensor;
48  static Target TargetType;
49 };
50 
51 Target CLTargetInfo::TargetType = Target::CL;
52 
53 /** Collection of CL convolution functions */
54 struct CLConvolutionLayerFunctions
55 {
56  using GenericConvolutionLayer = CLConvolutionLayer;
57  using GEMMConvolutionLayer = CLGEMMConvolutionLayer;
58  using DirectConvolutionLayer = CLDirectConvolutionLayer;
59  using WinogradConvolutionLayer = CLWinogradConvolutionLayer;
60 };
61 
62 /** Collection of CL element-wise functions */
63 struct CLEltwiseFunctions
64 {
65  using Addition = CLArithmeticAddition;
66  using Subtraction = CLArithmeticSubtraction;
67  using Multiplication = CLPixelWiseMultiplication;
68  using Maximum = CLElementwiseMax;
69 };
70 
71 /** Collection of CL unary element-wise functions */
72 struct CLUnaryEltwiseFunctions
73 {
74  using Exp = CLExpLayer;
75 };
76 
77 /** Function and tensor types to be used inside a CL fused convolution/batch normalization layer */
78 struct CLFusedLayerTypes
79 {
80  using ConvolutionLayer = CLConvolutionLayer;
81  using DepthwiseConvolutionLayer = CLDepthwiseConvolutionLayer;
82  using FuseBatchNormalization = CLFuseBatchNormalization;
83 };
84 
85 // TODO (isagot01): Remove once we support heterogeneous scheduling at function level
86 /** Wrapper for the CPP Function in the OpenCL backend **/
87 class CPPWrapperFunction : public IFunction
88 {
89 public:
90  /* Default constructor */
91  CPPWrapperFunction()
92  : _tensors(), _func(nullptr)
93  {
94  }
95 
96  void run() override
97  {
98  for(auto &tensor : _tensors)
99  {
100  tensor->map(CLScheduler::get().queue());
101  }
102  _func->run();
103 
104  for(auto &tensor : _tensors)
105  {
106  tensor->unmap(CLScheduler::get().queue());
107  }
108  }
109 
110  void register_tensor(ICLTensor *tensor)
111  {
112  _tensors.push_back(tensor);
113  }
114 
115  void register_function(std::unique_ptr<IFunction> function)
116  {
117  _func = std::move(function);
118  }
119 
120 private:
121  std::vector<arm_compute::ICLTensor *> _tensors;
122  std::unique_ptr<IFunction> _func;
123 };
124 
125 namespace detail
126 {
127 // Specialized functions
128 template <>
130 {
131  validate_node<CLTargetInfo>(node, 3 /* expected inputs */, 1 /* expected outputs */);
132 
133  // Extract IO and info
134  CLTargetInfo::TensorType *input0 = get_backing_tensor<CLTargetInfo>(node.input(0));
135  CLTargetInfo::TensorType *input1 = get_backing_tensor<CLTargetInfo>(node.input(1));
136  CLTargetInfo::TensorType *input2 = get_backing_tensor<CLTargetInfo>(node.input(2));
137  CLTargetInfo::TensorType *output = get_backing_tensor<CLTargetInfo>(node.output(0));
138  const DetectionOutputLayerInfo detect_info = node.detection_output_info();
139 
140  ARM_COMPUTE_ERROR_ON(input0 == nullptr);
141  ARM_COMPUTE_ERROR_ON(input1 == nullptr);
142  ARM_COMPUTE_ERROR_ON(input2 == nullptr);
143  ARM_COMPUTE_ERROR_ON(output == nullptr);
144 
145  // Create and configure function
146  auto func = std::make_unique<CPPDetectionOutputLayer>();
147  func->configure(input0, input1, input2, output, detect_info);
148 
149  // Log info
150  ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated "
151  << node.name()
152  << " Type: " << node.type()
153  << " Target: " << CLTargetInfo::TargetType
154  << " Data Type: " << input0->info()->data_type()
155  << " Input0 shape: " << input0->info()->tensor_shape()
156  << " Input1 shape: " << input1->info()->tensor_shape()
157  << " Input2 shape: " << input2->info()->tensor_shape()
158  << " Output shape: " << output->info()->tensor_shape()
159  << " DetectionOutputLayer info: " << detect_info
160  << std::endl);
161 
162  auto wrap_function = std::make_unique<CPPWrapperFunction>();
163 
164  wrap_function->register_function(std::move(func));
165  wrap_function->register_tensor(input0);
166  wrap_function->register_tensor(input1);
167  wrap_function->register_tensor(input2);
168  wrap_function->register_tensor(output);
169 
170  return std::move(wrap_function);
171 }
172 template <>
174 {
175  validate_node<CLTargetInfo>(node, 3 /* expected inputs */, 4 /* expected outputs */);
176 
177  // Extract IO and info
178  CLTargetInfo::TensorType *input0 = get_backing_tensor<CLTargetInfo>(node.input(0));
179  CLTargetInfo::TensorType *input1 = get_backing_tensor<CLTargetInfo>(node.input(1));
180  CLTargetInfo::TensorType *input2 = get_backing_tensor<CLTargetInfo>(node.input(2));
181  CLTargetInfo::TensorType *output0 = get_backing_tensor<CLTargetInfo>(node.output(0));
182  CLTargetInfo::TensorType *output1 = get_backing_tensor<CLTargetInfo>(node.output(1));
183  CLTargetInfo::TensorType *output2 = get_backing_tensor<CLTargetInfo>(node.output(2));
184  CLTargetInfo::TensorType *output3 = get_backing_tensor<CLTargetInfo>(node.output(3));
185  const DetectionPostProcessLayerInfo detect_info = node.detection_post_process_info();
186 
187  ARM_COMPUTE_ERROR_ON(input0 == nullptr);
188  ARM_COMPUTE_ERROR_ON(input1 == nullptr);
189  ARM_COMPUTE_ERROR_ON(input2 == nullptr);
190  ARM_COMPUTE_ERROR_ON(output0 == nullptr);
191  ARM_COMPUTE_ERROR_ON(output1 == nullptr);
192  ARM_COMPUTE_ERROR_ON(output2 == nullptr);
193  ARM_COMPUTE_ERROR_ON(output3 == nullptr);
194 
195  // Create and configure function
196  auto func = std::make_unique<CPPDetectionPostProcessLayer>();
197  func->configure(input0, input1, input2, output0, output1, output2, output3, detect_info);
198 
199  // Log info
200  ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated "
201  << node.name()
202  << " Type: " << node.type()
203  << " Target: " << CLTargetInfo::TargetType
204  << " Data Type: " << input0->info()->data_type()
205  << " Input0 shape: " << input0->info()->tensor_shape()
206  << " Input1 shape: " << input1->info()->tensor_shape()
207  << " Input2 shape: " << input2->info()->tensor_shape()
208  << " Output0 shape: " << output0->info()->tensor_shape()
209  << " Output1 shape: " << output1->info()->tensor_shape()
210  << " Output2 shape: " << output2->info()->tensor_shape()
211  << " Output3 shape: " << output3->info()->tensor_shape()
212  << " DetectionPostProcessLayer info: " << detect_info
213  << std::endl);
214 
215  auto wrap_function = std::make_unique<CPPWrapperFunction>();
216 
217  wrap_function->register_function(std::move(func));
218  wrap_function->register_tensor(input0);
219  wrap_function->register_tensor(input1);
220  wrap_function->register_tensor(input2);
221  wrap_function->register_tensor(output0);
222  wrap_function->register_tensor(output1);
223  wrap_function->register_tensor(output2);
224  wrap_function->register_tensor(output3);
225 
226  return std::move(wrap_function);
227 }
228 } // namespace detail
229 
230 std::unique_ptr<IFunction> CLFunctionFactory::create(INode *node, GraphContext &ctx)
231 {
232  if(node == nullptr)
233  {
234  return nullptr;
235  }
236 
237  NodeType type = node->type();
238  switch(type)
239  {
240  case NodeType::ActivationLayer:
241  return detail::create_activation_layer<CLActivationLayer, CLTargetInfo>(*polymorphic_downcast<ActivationLayerNode *>(node));
242  case NodeType::ArgMinMaxLayer:
243  return detail::create_arg_min_max_layer<CLArgMinMaxLayer, CLTargetInfo>(*polymorphic_downcast<ArgMinMaxLayerNode *>(node));
244  case NodeType::BatchNormalizationLayer:
245  return detail::create_batch_normalization_layer<CLBatchNormalizationLayer, CLTargetInfo>(*polymorphic_downcast<BatchNormalizationLayerNode *>(node));
246  case NodeType::BoundingBoxTransformLayer:
247  return detail::create_bounding_box_transform_layer<CLBoundingBoxTransform, CLTargetInfo>(*polymorphic_downcast<BoundingBoxTransformLayerNode *>(node));
248  case NodeType::ChannelShuffleLayer:
249  return detail::create_channel_shuffle_layer<CLChannelShuffleLayer, CLTargetInfo>(*polymorphic_downcast<ChannelShuffleLayerNode *>(node));
250  case NodeType::ConvolutionLayer:
251  return detail::create_convolution_layer<CLConvolutionLayerFunctions, CLTargetInfo>(*polymorphic_downcast<ConvolutionLayerNode *>(node), ctx);
252  case NodeType::DeconvolutionLayer:
253  return detail::create_deconvolution_layer<CLDeconvolutionLayer, CLTargetInfo>(*polymorphic_downcast<DeconvolutionLayerNode *>(node), ctx);
254  case NodeType::ConcatenateLayer:
255  return detail::create_concatenate_layer<CLConcatenateLayer, CLTargetInfo>(*polymorphic_downcast<ConcatenateLayerNode *>(node));
256  case NodeType::DepthToSpaceLayer:
257  return detail::create_depth_to_space_layer<CLDepthToSpaceLayer, CLTargetInfo>(*polymorphic_downcast<DepthToSpaceLayerNode *>(node));
258  case NodeType::DepthwiseConvolutionLayer:
259  return detail::create_depthwise_convolution_layer<CLDepthwiseConvolutionLayer, CLTargetInfo>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
260  case NodeType::DequantizationLayer:
261  return detail::create_dequantization_layer<CLDequantizationLayer, CLTargetInfo>(*polymorphic_downcast<DequantizationLayerNode *>(node));
262  case NodeType::DetectionOutputLayer:
263  return detail::create_detection_output_layer<CPPDetectionOutputLayer, CLTargetInfo>(*polymorphic_downcast<DetectionOutputLayerNode *>(node));
264  case NodeType::DetectionPostProcessLayer:
265  return detail::create_detection_post_process_layer<CPPDetectionPostProcessLayer, CLTargetInfo>(*polymorphic_downcast<DetectionPostProcessLayerNode *>(node));
266  case NodeType::EltwiseLayer:
267  return detail::create_eltwise_layer<CLEltwiseFunctions, CLTargetInfo>(*polymorphic_downcast<EltwiseLayerNode *>(node));
268  case NodeType::UnaryEltwiseLayer:
269  return detail::create_unary_eltwise_layer<CLUnaryEltwiseFunctions, CLTargetInfo>(*polymorphic_downcast<UnaryEltwiseLayerNode *>(node));
270  case NodeType::FlattenLayer:
271  return detail::create_flatten_layer<CLFlattenLayer, CLTargetInfo>(*polymorphic_downcast<FlattenLayerNode *>(node));
272  case NodeType::FullyConnectedLayer:
273  return detail::create_fully_connected_layer<CLFullyConnectedLayer, CLTargetInfo>(*polymorphic_downcast<FullyConnectedLayerNode *>(node), ctx);
274  case NodeType::FusedConvolutionBatchNormalizationLayer:
275  return detail::create_fused_convolution_batch_normalization_layer<CLFusedLayerTypes, CLTargetInfo>(*polymorphic_downcast<FusedConvolutionBatchNormalizationNode *>(node), ctx);
276  case NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer:
277  return detail::create_fused_depthwise_convolution_batch_normalization_layer<CLFusedLayerTypes, CLTargetInfo>(*polymorphic_downcast<FusedDepthwiseConvolutionBatchNormalizationNode *>(node), ctx);
278  case NodeType::GenerateProposalsLayer:
279  return detail::create_generate_proposals_layer<CLGenerateProposalsLayer, CLTargetInfo>(*polymorphic_downcast<GenerateProposalsLayerNode *>(node), ctx);
280  case NodeType::L2NormalizeLayer:
281  return detail::create_l2_normalize_layer<CLL2NormalizeLayer, CLTargetInfo>(*polymorphic_downcast<L2NormalizeLayerNode *>(node), ctx);
282  case NodeType::NormalizationLayer:
283  return detail::create_normalization_layer<CLNormalizationLayer, CLTargetInfo>(*polymorphic_downcast<NormalizationLayerNode *>(node), ctx);
284  case NodeType::NormalizePlanarYUVLayer:
285  return detail::create_normalize_planar_yuv_layer<CLNormalizePlanarYUVLayer, CLTargetInfo>(*polymorphic_downcast<NormalizePlanarYUVLayerNode *>(node));
286  case NodeType::PadLayer:
287  return detail::create_pad_layer<CLPadLayer, CLTargetInfo>(*polymorphic_downcast<PadLayerNode *>(node));
288  case NodeType::PermuteLayer:
289  return detail::create_permute_layer<CLPermute, CLTargetInfo>(*polymorphic_downcast<PermuteLayerNode *>(node));
290  case NodeType::PoolingLayer:
291  return detail::create_pooling_layer<CLPoolingLayer, CLTargetInfo>(*polymorphic_downcast<PoolingLayerNode *>(node));
292  case NodeType::PReluLayer:
293  return detail::create_prelu_layer<CLPReluLayer, CLTargetInfo>(*polymorphic_downcast<PReluLayerNode *>(node));
294  case NodeType::PrintLayer:
295  return detail::create_print_layer<CLTargetInfo>(*polymorphic_downcast<PrintLayerNode *>(node));
296  case NodeType::PriorBoxLayer:
297  return detail::create_priorbox_layer<CLPriorBoxLayer, CLTargetInfo>(*polymorphic_downcast<PriorBoxLayerNode *>(node));
298  case NodeType::QuantizationLayer:
299  return detail::create_quantization_layer<CLQuantizationLayer, CLTargetInfo>(*polymorphic_downcast<QuantizationLayerNode *>(node));
300  case NodeType::ReductionOperationLayer:
301  return detail::create_reduction_operation_layer<CLReductionOperation, CLTargetInfo>(*polymorphic_downcast<ReductionLayerNode *>(node), ctx);
302  case NodeType::ReorgLayer:
303  return detail::create_reorg_layer<CLReorgLayer, CLTargetInfo>(*polymorphic_downcast<ReorgLayerNode *>(node));
304  case NodeType::ReshapeLayer:
305  return detail::create_reshape_layer<CLReshapeLayer, CLTargetInfo>(*polymorphic_downcast<ReshapeLayerNode *>(node));
306  case NodeType::ResizeLayer:
307  return detail::create_resize_layer<CLScale, CLTargetInfo>(*polymorphic_downcast<ResizeLayerNode *>(node));
308  case NodeType::ROIAlignLayer:
309  return detail::create_roi_align_layer<CLROIAlignLayer, CLTargetInfo>(*polymorphic_downcast<ROIAlignLayerNode *>(node));
310  case NodeType::SliceLayer:
311  return detail::create_slice_layer<CLSlice, CLTargetInfo>(*polymorphic_downcast<SliceLayerNode *>(node));
312  case NodeType::SoftmaxLayer:
313  return detail::create_softmax_layer<CLSoftmaxLayer, CLTargetInfo>(*polymorphic_downcast<SoftmaxLayerNode *>(node), ctx);
314  case NodeType::StackLayer:
315  return detail::create_stack_layer<CLStackLayer, CLTargetInfo>(*polymorphic_downcast<StackLayerNode *>(node));
316  case NodeType::StridedSliceLayer:
317  return detail::create_strided_slice_layer<CLStridedSlice, CLTargetInfo>(*polymorphic_downcast<StridedSliceLayerNode *>(node));
318  default:
319  return nullptr;
320  }
321 }
322 } // namespace backends
323 } // namespace graph
324 } // namespace arm_compute
std::unique_ptr< IFunction > create_detection_output_layer< CPPDetectionOutputLayer, CLTargetInfo >(DetectionOutputLayerNode &node)
static CLScheduler & get()
Access the scheduler singleton.
TensorType
Memory type.
Definition: Types.h:38
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
decltype(strategy::transforms) typedef type
#define ARM_COMPUTE_LOG_GRAPH_INFO(x)
Definition: Logger.h:54
Copyright (c) 2017-2021 Arm Limited.
std::unique_ptr< IFunction > create_detection_post_process_layer< CPPDetectionPostProcessLayer, CLTargetInfo >(DetectionPostProcessLayerNode &node)
Node interface.
Definition: INode.h:45
NodeType
Supported nodes.
Definition: Types.h:142
FloorUKernelPtr func
Detection Output layer info.
Definition: Types.h:976
DetectionPostProcess Layer node.
Interface for OpenCL tensor.
Definition: ICLTensor.h:42
Detection Output layer info.
Definition: Types.h:1095
virtual NodeType type() const =0
Returns node&#39;s type.
Includes all the OpenCL functions at once.