Compute Library
 22.08
8b_mla.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #include "8b_mla.hpp"
26 
28  const VLType vec_type,
29  const unsigned int acc_depth,
30  const unsigned int kernel_rows,
31  const unsigned int kernel_cols,
32  const unsigned int n_input_channels
33 )
34 {
35  const auto per_iter = acc_depth * arm_gemm::utils::get_vector_length<int32_t>(vec_type);
36  return arm_gemm::roundup((long unsigned int) n_input_channels, per_iter) * kernel_rows * kernel_cols * sizeof(int8_t);
37 }
38 
40  const VLType vec_type,
41  const unsigned int acc_depth,
42  const unsigned int kernel_rows,
43  const unsigned int kernel_cols,
44  const unsigned int n_channels,
45  void *_outptr,
46  const void *_weights,
47  size_t ld_weight_col,
48  size_t ld_weight_row
49 )
50 {
51  int8_t *outptr = reinterpret_cast<int8_t *>(_outptr);
52  const int8_t *weights = reinterpret_cast<const int8_t *>(_weights);
53 
54  // Get the strides
55  ld_weight_col = (ld_weight_col == 0) ? n_channels * sizeof(int8_t) : ld_weight_col;
56  ld_weight_row = (ld_weight_row == 0) ? kernel_cols * ld_weight_col : ld_weight_row;
57 
58  // Pack into per-iter chunks.
59  const auto per_iter = acc_depth * arm_gemm::utils::get_vector_length<int32_t>(vec_type);
60  for (unsigned int c = 0; c < n_channels; c += per_iter)
61  {
62  auto weight_row = weights + c;
63  const auto to_copy = std::min<unsigned int>(per_iter, n_channels - c);
64 
65  for (unsigned int i = 0; i < kernel_rows; i++)
66  {
67  auto weight_col = weight_row;
68 
69  for (unsigned int j = 0; j < kernel_cols; j++)
70  {
71  memcpy(outptr, weight_col, to_copy);
72  outptr += per_iter;
73  weight_col += ld_weight_col;
74  }
75 
76  weight_row += ld_weight_row;
77  }
78  }
79 }
80 
81 namespace arm_conv {
82 namespace depthwise {
83 
84 ADD_IMPLEMENTATION(a64, s8q, int8_t, None, 2, 3, 3)
85 ADD_IMPLEMENTATION(a64, s8q, int8_t, None, 2, 5, 5)
86 ADD_IMPLEMENTATION(a64, u8q, uint8_t, None, 2, 3, 3)
87 ADD_IMPLEMENTATION(a64, u8q, uint8_t, None, 2, 5, 5)
88 
89 } // namespace depthwise
90 } // namespace arm_conv
T roundup(const T a, const T b)
Definition: utils.hpp:70
void generic_pack(const VLType vec_type, const unsigned int acc_depth, const unsigned int kernel_rows, const unsigned int kernel_cols, const unsigned int n_channels, void *_outptr, const void *_weights, size_t ld_weight_col, size_t ld_weight_row)
Definition: 8b_mla.cpp:39
#define ADD_IMPLEMENTATION(ARCH, TYPENAME, TYPE, VEC_TYPE, ACC_DEPTH, KERN_ROWS, KERN_COLS)
Definition: 8b_mla.hpp:53
size_t generic_get_packed_size(const VLType vec_type, const unsigned int acc_depth, const unsigned int kernel_rows, const unsigned int kernel_cols, const unsigned int n_input_channels)
Definition: 8b_mla.cpp:27
template UniqueDepthwiseCommon< float > depthwise(const DepthwiseArgs &, const Nothing &)