Compute Library
 22.08
depthwise_depthfirst_generic_multiplier_quantized.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #pragma once
26 
28 
29 #ifdef CYCLE_PROFILING
30 #include "profiler.hpp"
31 #endif
32 
34 
35 namespace arm_conv {
36 namespace depthwise {
37 
38 template <class strategy>
40 {
41  using TInput = typename strategy::input_type;
42  using TWeight = typename strategy::weight_type;
43  using TOutput = typename strategy::return_type;
44  using TAccum = typename strategy::bias_type;
45 
47 
49 
50  public:
52  : Parent(args), m_qp(qp)
53  {
54  }
55 
58 
59  void pack_parameters(void *buffer, const void *biases, const void *weights, size_t ld_weight_col, size_t ld_weight_row) override
60  {
61  m_qp.bias = static_cast<const TAccum *>(biases);
62  Parent::pack_weights(static_cast<TWeight *>(buffer), static_cast<const TWeight *>(weights), ld_weight_col, ld_weight_row);
63  }
64 
65  using Parent::execute;
66  void execute(
67  const unsigned int batches,
68  const unsigned int input_height,
69  const unsigned int input_width,
70  const unsigned int input_channels,
71  const PaddingValues &padding,
72  const void *const _input,
73  const size_t ld_input_col,
74  const size_t ld_input_row,
75  const size_t ld_input_batch,
76  const void *const parameters,
77  const unsigned int output_height,
78  const unsigned int output_width,
79  void *const _output,
80  const size_t ld_output_col,
81  const size_t ld_output_row,
82  const size_t ld_output_batch,
83  void *const _working_space,
84  const unsigned int thread_id,
85  const unsigned int n_threads
86  ) const override
87  {
88  strategy strat(this->m_args.cpu_info);
89 #ifdef CYCLE_PROFILING
90  arm_gemm::profiler prof;
91 #endif
92 
93  // Get a function to call for each point of the output
94  auto tile_fn = [&] (const TInput **inptrs,
95  TOutput **outptrs,
96  const TWeight *weights,
97  const unsigned int,
98  const unsigned int start_output_channel) {
99 #ifdef CYCLE_PROFILING
100  auto p = prof.ScopedProfiler(PROFILE_KERNEL, (unsigned long)(strategy::output_rows() * strategy::output_cols() * this->m_args.channel_multiplier * this->m_args.kernel_rows * this->m_args.kernel_cols));
101 #endif
102  strat.kernel(
103  inptrs, outptrs, weights,
104  m_qp.bias == nullptr ? nullptr : m_qp.bias + start_output_channel,
105  this->kernel_points(),
106  this->m_args.channel_multiplier,
107  m_qp.per_channel_left_shifts == nullptr ? nullptr : m_qp.per_channel_left_shifts + start_output_channel,
108  m_qp.per_channel_muls == nullptr ? nullptr : m_qp.per_channel_muls + start_output_channel,
109  m_qp.per_channel_right_shifts == nullptr ? nullptr : m_qp.per_channel_right_shifts + start_output_channel,
110  m_qp
111  );
112  };
113 
114  Parent::execute_tiles(
115  tile_fn, m_qp.a_offset,
116  batches, input_height, input_width, input_channels, padding,
117  _input, ld_input_col, ld_input_row, ld_input_batch,
118  parameters,
119  output_height, output_width,
120  _output, ld_output_col, ld_output_row, ld_output_batch,
121  _working_space, thread_id, n_threads
122  );
123  }
124 };
125 
126 } // namespace depthwise
127 } // namespace arm_conv
const int32_t * bias
Definition: arm_gemm.hpp:172
const int32_t * per_channel_left_shifts
Definition: arm_gemm.hpp:181
const size_t input_height
Definition: impl.cpp:61
const size_t input_width
Definition: impl.cpp:62
void pack_parameters(void *buffer, const void *biases, const void *weights, size_t ld_weight_col, size_t ld_weight_row) override
std::unique_ptr< ParametersLibrary > parameters
Definition: Framework.cpp:46
void execute(const unsigned int batches, const unsigned int input_height, const unsigned int input_width, const unsigned int input_channels, const PaddingValues &padding, const void *const _input, const size_t ld_input_col, const size_t ld_input_row, const size_t ld_input_batch, const void *const parameters, const unsigned int output_height, const unsigned int output_width, void *const _output, const size_t ld_output_col, const size_t ld_output_row, const size_t ld_output_batch, void *const _working_space, const unsigned int thread_id, const unsigned int n_threads) const override
const StratType * strategy
DepthwiseDepthfirstGenericWithMultiplierQuantized & operator=(DepthwiseDepthfirstGenericWithMultiplierQuantized &)=delete
DepthwiseDepthfirstGenericWithMultiplierQuantized(const DepthwiseArgs &args, const arm_gemm::Requantize32 &qp)
const int32_t * per_channel_right_shifts
Definition: arm_gemm.hpp:182
template UniqueDepthwiseCommon< float > depthwise(const DepthwiseArgs &, const Nothing &)
unsigned int batches
const int32_t * per_channel_muls
Definition: arm_gemm.hpp:183