Compute Library
 22.08
depthwise_depthfirst_generic_quantized.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #pragma once
26 
28 
29 #include "arm_gemm.hpp"
31 
32 #ifdef CYCLE_PROFILING
33 #include "profiler.hpp"
34 #endif
35 
37 
38 namespace arm_conv {
39 namespace depthwise {
40 
41 template <class Strategy, unsigned OutputRows, unsigned int OutputCols>
42 class DepthwiseDepthfirstGenericQuantized : public DepthwiseDepthfirstGenericBase<Strategy, OutputRows, OutputCols>
43 {
44  using Parent = DepthwiseDepthfirstGenericBase<Strategy, OutputRows, OutputCols>;
45  using TInput = typename Parent::TInput;
46  using TAccum = typename Parent::TAccum;
47  using TOutput = typename Parent::TOutput;
48 
49  Requantize32 m_qp;
50 
51  public:
52  DepthwiseDepthfirstGenericQuantized(const DepthwiseArgs &args, const Requantize32 &qp)
53  : Parent(args), m_qp(qp)
54  {
55  }
56 
59 
60  void pack_parameters(void *buffer, const void *biases, const void *weights, size_t ld_weight_col, size_t ld_weight_row) override
61  {
62  m_qp.bias = static_cast<const TAccum *>(biases);
63  Parent::pack_parameters(buffer, biases, weights, ld_weight_col, ld_weight_row);
64  }
65 
66  using DepthwiseDepthfirstGenericBase<Strategy, OutputRows, OutputCols>::execute;
67  void execute(
68  const unsigned int batches,
69  const unsigned int input_height,
70  const unsigned int input_width,
71  const unsigned int input_channels,
72  const PaddingValues &padding,
73  const void *const _input,
74  const size_t ld_input_col,
75  const size_t ld_input_row,
76  const size_t ld_input_batch,
77  const void *const parameters,
78  const unsigned int output_height,
79  const unsigned int output_width,
80  void *const _output,
81  const size_t ld_output_col,
82  const size_t ld_output_row,
83  const size_t ld_output_batch,
84  void *const _working_space,
85  const unsigned int thread_id,
86  const unsigned int n_threads
87  ) const override
88  {
89  Strategy strat(this->m_args.cpu_info);
90 #ifdef CYCLE_PROFILING
91  arm_gemm::profiler prof;
92 #endif
93 
94  // Create a function to initialise the input buffer
95  const auto initialise_input_buffer = [this] (TInput *const buffer, const unsigned int n) {
96  std::memset(buffer, static_cast<TInput>(m_qp.a_offset), n * sizeof(TInput));
97  };
98 
99  // Create a function to execute a tile of work
100  const auto tile_fn = [&] (const TInput *const *const inptrs, TOutput *const * const outptrs) {
101 #ifdef CYCLE_PROFILING
102  auto p = prof.ScopedProfiler(
103  PROFILE_KERNEL,
104  (unsigned long) (OutputRows * OutputCols * this->m_args.kernel_rows* this->m_args.kernel_cols)
105  );
106 #endif
107  strat.kernel(inptrs, outptrs, parameters, m_qp,
108  this->m_args.kernel_rows * this->m_args.kernel_cols,
109  this->m_args.input_channels);
110  };
111 
112  // Call into a parent utility function to do the actual work.
113  Parent::execute_tiles(
114  tile_fn, initialise_input_buffer,
115  batches, input_height, input_width, input_channels, padding,
116  _input, ld_input_col, ld_input_row, ld_input_batch,
117  output_height, output_width,
118  _output, ld_output_col, ld_output_row, ld_output_batch,
119  _working_space, thread_id, n_threads
120  );
121  }
122 };
123 
124 } // namespace depthwise
125 } // namespace arm_conv
const int32_t * bias
Definition: arm_gemm.hpp:172
DepthwiseDepthfirstGenericQuantized(const DepthwiseArgs &args, const Requantize32 &qp)
const size_t input_height
Definition: impl.cpp:61
void pack_parameters(void *_buffer, const int32_t *biases, const T *weights, size_t ld_weight_col, size_t ld_weight_row, const DepthwiseArgs &args, const arm_gemm::Requantize32 &qp, const arm_gemm::VLType vl_type, const unsigned int accumulator_depth_vl)
const size_t input_width
Definition: impl.cpp:62
Strategy
Types.
Definition: GemmTuner.py:41
std::unique_ptr< ParametersLibrary > parameters
Definition: Framework.cpp:46
DepthwiseDepthfirstGenericQuantized & operator=(DepthwiseDepthfirstGenericQuantized &)=delete
void pack_parameters(void *buffer, const void *biases, const void *weights, size_t ld_weight_col, size_t ld_weight_row) override
template UniqueDepthwiseCommon< float > depthwise(const DepthwiseArgs &, const Nothing &)
unsigned int batches
void execute(const unsigned int batches, const unsigned int input_height, const unsigned int input_width, const unsigned int input_channels, const PaddingValues &padding, const void *const _input, const size_t ld_input_col, const size_t ld_input_row, const size_t ld_input_batch, const void *const parameters, const unsigned int output_height, const unsigned int output_width, void *const _output, const size_t ld_output_col, const size_t ld_output_row, const size_t ld_output_batch, void *const _working_space, const unsigned int thread_id, const unsigned int n_threads) const override