Compute Library
 21.11
CpuDepthwiseConv2dAssemblyDispatch.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
26 
29 #include "src/common/utils/Log.h"
30 #include "src/core/CPP/Validate.h"
34 
35 namespace arm_compute
36 {
37 namespace cpu
38 {
39 struct CpuDepthwiseConv2dAssemblyDispatch::LocalImpl
40 {
41  std::unique_ptr<kernels::CpuDepthwiseConv2dAssemblyWrapperKernel> asm_kernel{ nullptr };
42  bool is_prepared{ false };
44 };
45 
46 #ifndef DOXYGEN_SKIP_THIS
48  : _pImpl(std::make_unique<LocalImpl>())
49 {
50 }
51 #endif /* DOXYGEN_SKIP_THIS */
52 
54 
56  const ITensorInfo *weights,
57  const ITensorInfo *bias,
59  const ConvolutionInfo &info)
60 {
61  ARM_COMPUTE_LOG_PARAMS(src, weights, bias, dst, info);
62  const CPUInfo &ci = NEScheduler::get().cpu_info();
63  const unsigned int num_threads = NEScheduler::get().num_threads();
64  _pImpl->is_prepared = false;
65 
66  // If we don't support a combination of data types, silently return: it is the caller's responsibility to check if configure() was successful via is_configured()
67  if(!CpuDepthwiseConv2dAssemblyDispatch::validate(src, weights, bias, dst, info))
68  {
69  return;
70  }
71 
72  auto dwc_wrapper = std::make_unique<kernels::CpuDepthwiseConv2dAssemblyWrapperKernel>();
73  ARM_COMPUTE_ERROR_ON(dwc_wrapper == nullptr);
74  dwc_wrapper->configure(src, weights, bias, dst, info, ci);
75 
76  // Compute memory requirements for assembly kernels
77  constexpr size_t alignment = 4096;
78  _pImpl->mem_req.push_back({ TensorType::ACL_INT_0, dwc_wrapper->get_working_size(num_threads, src->dimension(0)), alignment });
79  _pImpl->mem_req.push_back({ TensorType::ACL_INT_1, dwc_wrapper->get_storage_size(), alignment });
80  _pImpl->asm_kernel = std::move(dwc_wrapper);
81 }
82 
84 {
85  return kernels::CpuDepthwiseConv2dAssemblyWrapperKernel::validate(src, weights, bias, dst, info);
86 }
87 
89 {
90  return _pImpl->mem_req;
91 }
92 
94 {
97 }
98 
100 {
101  ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided");
102 
103  prepare(tensors);
104 
105  NEScheduler::get().schedule_op(_pImpl->asm_kernel.get(), Window::DimY, _pImpl->asm_kernel->window(), tensors);
106 }
107 
109 {
110  if(!_pImpl->is_prepared)
111  {
112  // Pack weights and bias
113  const ITensor *weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
114  const ITensor *bias = tensors.get_const_tensor(TensorType::ACL_SRC_2);
115  ITensor *storage = tensors.get_tensor(TensorType::ACL_INT_1);
116 
117  const auto weights_ptr = weights->buffer() + weights->info()->offset_first_element_in_bytes();
118  const auto bias_ptr = (bias) ? bias->buffer() + bias->info()->offset_first_element_in_bytes() : nullptr;
119  auto parameters_ptr = storage->buffer() + storage->info()->offset_first_element_in_bytes();
120 
121  const auto weights_shape = weights->info()->tensor_shape();
122  const auto weights_padding = weights->info()->padding();
123 
124  const size_t ld_weights_col = weights_shape[0] + weights_padding.left + weights_padding.right;
125  const size_t ld_weights_row = ld_weights_col * (weights_shape[1] + weights_padding.top + weights_padding.bottom);
126  _pImpl->asm_kernel->pack_parameters(parameters_ptr, bias_ptr, weights_ptr, ld_weights_col, ld_weights_row);
127 
128  weights->mark_as_unused();
129  if(bias != nullptr)
130  {
131  bias->mark_as_unused();
132  }
133  _pImpl->is_prepared = true;
134  }
135 }
136 } // namespace cpu
137 } // namespace arm_compute
void run(ITensorPack &tensors) override
Run the kernels contained in the function.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *dst, const ConvolutionInfo &info)
Indicates whether or not this function can be used to process the given parameters.
bool empty() const
Checks if pack is empty.
Definition: ITensorPack.cpp:80
void prepare(ITensorPack &tensors) override
Prepare the function for executing.
virtual void schedule_op(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors)=0
Runs the kernel in the same thread as the caller synchronously.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
CPUInfo & cpu_info()
Get CPU info.
Definition: IScheduler.cpp:40
Status class.
Definition: Error.h:52
Activation Layer Information class.
Definition: Types.h:1509
Interface for CPU tensor.
Definition: ITensor.h:36
SimpleTensor< float > src
Definition: DFT.cpp:155
Copyright (c) 2017-2021 Arm Limited.
const CPUInfo & ci
std::vector< MemoryInfo > MemoryRequirements
Definition: Types.h:132
void mark_as_unused() const
Marks a tensor as unused.
Definition: ITensor.cpp:168
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
Definition: ITensorPack.cpp:54
static bool is_activation_supported(const ActivationLayerInfo &activation)
Checks if activation is supported by the assembly kernels.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
Definition: Error.h:456
virtual uint8_t * buffer() const =0
Interface to be implemented by the child class to return a pointer to CPU memory. ...
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
void configure(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *bias, ITensorInfo *dst, const ConvolutionInfo &info)
Initialize the function&#39;s source, destination, kernels and border_size.
virtual PaddingSize padding() const =0
Padding of tensor.
experimental::MemoryRequirements workspace() const override
Return the memory requirements required by the workspace.
void prepare(ITensorPack &tensors) override
Prepare the function for executing.
virtual size_t offset_first_element_in_bytes() const =0
The offset from the beginning of the memory allocation to the first element of the tensor...
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
Definition: Window.h:45
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
Definition: ITensorPack.cpp:64
Tensor packing service.
Definition: ITensorPack.h:39
#define ARM_COMPUTE_LOG_PARAMS(...)
arm_gemm::Activation map_to_arm_gemm_activation(const ActivationLayerInfo &act)
Performs a mapping between Compute Library ActivationLayerInfo and the assembly Activation structure...
virtual unsigned int num_threads() const =0
Returns the number of threads that the SingleThreadScheduler has in his pool.
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *dst, const ConvolutionInfo &info)
Static function to check if given info will lead to a valid configuration.
static IScheduler & get()
Access the scheduler singleton.
Definition: Scheduler.cpp:94