ComputeLibrary/v21.08/_c_l_convolution_layer_8cpp_source.xhtml

 /*
  * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to
  * deal in the Software without restriction, including without limitation the
  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  * sell copies of the Software, and to permit persons to whom the Software is
  * furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice shall be included in all
  * copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
 #include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"

 #include "arm_compute/core/CL/CLKernelLibrary.h"
 #include "arm_compute/core/CL/ICLTensor.h"
 #include "arm_compute/core/KernelDescriptors.h"
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h"
 #include "src/core/CL/ICLKernel.h"
 #include "src/core/helpers/MemoryHelpers.h"
 #include "src/runtime/gpu/cl/operators/ClConv2d.h"
 #include "support/Cast.h"

 namespace arm_compute
 {
 using namespace arm_compute::misc::shape_calculator;
 using namespace arm_compute::experimental;
 struct CLConvolutionLayer::Impl
 {
     MemoryGroup                          memory_group{};
     std::shared_ptr<IMemoryManager>      memory_manager{};
     std::unique_ptr<opencl::IClOperator> op{ nullptr };
     ITensorPack                          run_pack{};
     ITensorPack                          prep_pack{};
     WorkspaceData<CLTensor>              workspace{};
     experimental::MemoryRequirements     aux_mem_req{};
     std::unique_ptr<IFunction>           func{ nullptr };
 };

 CLConvolutionLayer::CLConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
     : _impl(std::make_unique<Impl>())
 {
     _impl->memory_manager = std::move(memory_manager);
 }

 CLConvolutionLayer::~CLConvolutionLayer() = default;

 void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
                                    const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups)
 {
     configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, weights_info, dilation, act_info, enable_fast_math, num_groups);
 }

 void CLConvolutionLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
                                    const WeightsInfo &weights_info,
                                    const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
     ARM_COMPUTE_ERROR_THROW_ON(CLConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation, act_info,
                                                             enable_fast_math, num_groups));

     const Conv2dInfo conv2d_info = Conv2dInfo(conv_info, dilation, act_info, enable_fast_math, num_groups);

     switch(opencl::ClConv2d::get_convolution_method(input->info(), weights->info(), output->info(), conv2d_info,
                                                     weights_info, CLScheduler::get().target()))
     {
         case ConvolutionMethod::WINOGRAD:
         case ConvolutionMethod::DIRECT:
         case ConvolutionMethod::GEMM:
         {
             auto f = std::make_unique<opencl::ClConv2d>();
             f->configure(compile_context, input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv2d_info, weights_info);
             _impl->op = std::move(f);
             break;
         }
         case ConvolutionMethod::FFT:
         {
             auto f = std::make_unique<CLFFTConvolutionLayer>(_impl->memory_manager);
             f->configure(compile_context, input, weights, biases, output, conv_info, act_info, enable_fast_math);
             _impl->func = std::move(f);
             break;
         }
         default:
             ARM_COMPUTE_ERROR("Not supported.");
             break;
     }

     if(_impl->op)
     {
         _impl->memory_group = MemoryGroup(std::move(_impl->memory_manager));
         _impl->aux_mem_req  = _impl->op->workspace();
         _impl->run_pack     = { { ACL_SRC_0, input }, { ACL_SRC_1, weights }, { ACL_SRC_2, biases }, { ACL_DST, output } };
         _impl->prep_pack    = { { ACL_SRC_1, weights }, { ACL_SRC_2, biases } };
         _impl->workspace    = manage_workspace<CLTensor>(_impl->aux_mem_req, _impl->memory_group, _impl->run_pack, _impl->prep_pack);
     }
 }

 Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
                                     const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
     ARM_COMPUTE_RETURN_ERROR_ON_MSG((num_groups != 1) && (input->data_layout() != DataLayout::NCHW), "Grouping (num_groups != 1) with NHWC data layout is not supported");

     const GPUTarget  gpu_target  = CLScheduler::get().target();
     const Conv2dInfo conv2d_info = Conv2dInfo(conv_info, dilation, act_info, enable_fast_math, num_groups);

     switch(opencl::ClConv2d::get_convolution_method(input, weights, output, conv2d_info, weights_info, gpu_target))
     {
         case ConvolutionMethod::WINOGRAD:
         case ConvolutionMethod::DIRECT:
         case ConvolutionMethod::GEMM:
         {
             ARM_COMPUTE_RETURN_ON_ERROR(opencl::ClConv2d::validate(input, weights, biases, output, conv2d_info, weights_info));
             break;
         }
         case ConvolutionMethod::FFT:
         {
             // Validate FFT-based convolution layer
             ARM_COMPUTE_RETURN_ON_ERROR(CLFFTConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info, enable_fast_math));
             break;
         }
         default:
             ARM_COMPUTE_ERROR("Not supported.");
             break;
     }

     return Status{};
 }

 ConvolutionMethod CLConvolutionLayer::get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info,
                                                              const WeightsInfo &weights_info, const ActivationLayerInfo &act_info, const GPUTarget gpu_target, const Size2D &dilation, bool enable_fast_math)
 {
     const Conv2dInfo conv2d_info = Conv2dInfo(conv_info, dilation, act_info, enable_fast_math, 1);
     return opencl::ClConv2d::get_convolution_method(input, weights, output, conv2d_info, weights_info, gpu_target);
 }

 void CLConvolutionLayer::run()
 {
     prepare();

     MemoryGroupResourceScope scope_mg(_impl->memory_group);

     if(_impl->func)
     {
         _impl->func->run();
     }
     else
     {
         _impl->op->run(_impl->run_pack);
     }
 }

 void CLConvolutionLayer::prepare()
 {
     if(_impl->func)
     {
         _impl->func->prepare();
     }
     else
     {
         _impl->op->prepare(_impl->prep_pack);

         // Release temporary tensors that are only used in prepare stage
         release_temporaries(_impl->aux_mem_req, _impl->workspace);
     }
 }
 } // namespace arm_compute
arm_compute::MemoryGroup
Memory group.
Definition: MemoryGroup.h:43

arm_compute::ACL_SRC_2
Definition: Types.h:47

CLConvolutionLayer.h

arm_compute::CLConvolutionLayer::run
void run() override
Run the kernels contained in the function.
Definition: CLConvolutionLayer.cpp:149

arm_compute::ACL_DST
Definition: Types.h:54

arm_compute::CLConvolutionLayer::CLConvolutionLayer
CLConvolutionLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
Definition: CLConvolutionLayer.cpp:52

arm_compute::CLConvolutionLayer::configure
void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false, unsigned int num_groups=1)
Set the input and output tensors.
Definition: CLConvolutionLayer.cpp:60

MemoryHelpers.h

arm_compute::CLScheduler::get
static CLScheduler & get()
Access the scheduler singleton.
Definition: CLScheduler.cpp:102

arm_compute::test::validation::weights_info
weights_info
Definition: BatchNormalizationLayer.cpp:165

ARM_COMPUTE_ERROR
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352

arm_compute::CLScheduler::target
GPUTarget target() const
Get the target GPU.
Definition: CLScheduler.cpp:45

arm_compute::test::validation::conv_info
conv_info
Definition: DFT.cpp:151

ARM_COMPUTE_RETURN_ON_ERROR
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204

arm_compute::CLConvolutionLayer::~CLConvolutionLayer
~CLConvolutionLayer()
Default Destructor.

arm_compute::CLConvolutionLayer::prepare
void prepare() override
Prepare the function for executing.
Definition: CLConvolutionLayer.cpp:165

arm_compute::CLKernelLibrary::get
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Definition: CLKernelLibrary.cpp:39

arm_compute::ITensorInfo
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40

arm_compute::CLFFTConvolutionLayer::validate
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Static function to check if given info will lead to a valid configuration of CLFFTConvolutionLayer.
Definition: CLFFTConvolutionLayer.cpp:269

ARM_COMPUTE_ERROR_THROW_ON
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455

CLKernelLibrary.h
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context...

arm_compute::CLConvolutionLayer::validate
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false, unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of CLConvolutionLayer.
Definition: CLConvolutionLayer.cpp:110

arm_compute::Status
Status class.
Definition: Error.h:52

arm_compute::ConvolutionMethod
ConvolutionMethod
Available ConvolutionMethod.
Definition: Types.h:129

arm_compute::ActivationLayerInfo
Activation Layer Information class.
Definition: Types.h:1475

arm_compute
Copyright (c) 2017-2021 Arm Limited.
Definition: introduction.dox:24

arm_compute::experimental::MemoryRequirements
std::vector< MemoryInfo > MemoryRequirements
Definition: Types.h:113

arm_compute::test::validation::input
auto input
Definition: LSTMLayerQuantized.cpp:486

Cast.h

arm_compute::WeightsInfo
Convolution Layer Weights Information class.
Definition: Types.h:1694

ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:159

arm_compute::ConvolutionMethod::DIRECT
Direct convolution.

std

arm_compute::test::validation::num_groups
const unsigned int num_groups
Definition: Im2Col.cpp:153

ShapeCalculator.h

arm_compute::ITensor::info
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.

arm_compute::PadStrideInfo
Padding and stride information class.
Definition: Types.h:647

arm_compute::release_temporaries
void release_temporaries(const experimental::MemoryRequirements &mem_reqs, WorkspaceData< TensorType > &workspace)
Utility function to release tensors with lifetime marked as Prepare.
Definition: MemoryHelpers.h:122

arm_compute::ACL_SRC_0
Definition: Types.h:45

arm_compute::Conv2dInfo
Descriptor used by the Convolution function.
Definition: FunctionDescriptors.h:56

arm_compute::DataLayout::NCHW
Num samples, channels, height, width.

arm_compute::CLCompileContext
CLCompileContext class.
Definition: CLCompileContext.h:202

arm_compute::ACL_SRC_1
Definition: Types.h:46

arm_compute::ConvolutionMethod::WINOGRAD
Convolution using Winograd.

ClConv2d.h

arm_compute::MemoryGroupResourceScope
Memory group resources scope handling class.
Definition: IMemoryGroup.h:82

arm_compute::ICLTensor
Interface for OpenCL tensor.
Definition: ICLTensor.h:42

ICLTensor.h

CLFFTConvolutionLayer.h

ICLKernel.h

arm_compute::opencl::ClConv2d::get_convolution_method
static ConvolutionMethod get_convolution_method(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const Conv2dInfo &conv2d_info, const WeightsInfo &weights_info, const GPUTarget gpu_target)
Static function to check if given info will return the convolution called by ClConv2d.
Definition: ClConv2d.cpp:157

arm_compute::GPUTarget
GPUTarget
Available GPU Targets.
Definition: GPUTarget.h:34

arm_compute::Size2D
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34

arm_compute::CLConvolutionLayer::get_convolution_method
static ConvolutionMethod get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, const ActivationLayerInfo &act_info, const GPUTarget gpu_target, const Size2D &dilation=Size2D(1U, 1U), bool enable_fast_math=false)
Static function to check if given info will return the convolution called by CLConvolutionLayer.
Definition: CLConvolutionLayer.cpp:142

arm_compute::ConvolutionMethod::FFT
Convolution using FFT.

ARM_COMPUTE_RETURN_ERROR_ON_MSG
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244

ARM_COMPUTE_ERROR_ON_NULLPTR
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:157

arm_compute::misc::shape_calculator
Definition: ShapeCalculator.h:40

KernelDescriptors.h

arm_compute::experimental
Definition: Types.h:81

arm_compute::ConvolutionMethod::GEMM
Convolution using GEMM.

arm_compute::opencl::ClConv2d::validate
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const Conv2dInfo &conv2d_info, const WeightsInfo &weights_info=WeightsInfo())
Static function to check if given info will lead to a valid configuration of ClConv2d.
Definition: ClConv2d.cpp:119

arm_compute::ITensorInfo::data_layout
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.