Compute Library
 22.11
impl.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016-2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
26 #include <arm_neon.h>
27 
28 namespace arm_compute
29 {
30 namespace cpu
31 {
32 void matrix_addition_f32(const ITensor *src, ITensor *dst, const Window &window, float beta)
33 {
35  const float32x4_t beta_f32 = vdupq_n_f32(beta);
36 
37  constexpr int window_step_x = 16;
38  const auto window_start_x = static_cast<int>(window.x().start());
39  const auto window_end_x = static_cast<int>(window.x().end());
40 
41  Window win = window.collapse_if_possible(window, Window::DimZ);
42  win.set(Window::DimX, Window::Dimension(0, 1, 1));
43 
44  Iterator in(src, win);
45  Iterator out(dst, win);
46 
47  execute_window_loop(win, [&](const Coordinates &)
48  {
49  const auto in_ptr = reinterpret_cast<const float *>(in.ptr());
50  const auto out_ptr = reinterpret_cast<float *>(out.ptr());
51 
52  int x = window_start_x;
53  for(; x < (window_end_x - window_step_x); x += window_step_x)
54  {
55  float32x4x4_t alpha_ab = vld4q_f32(out_ptr + x);
56  const float32x4x4_t c = vld4q_f32(in_ptr + x);
57 
58  // Multiply matrix C by its weight and accumulate
59  alpha_ab.val[0] = vmlaq_f32(alpha_ab.val[0], c.val[0], beta_f32);
60  alpha_ab.val[1] = vmlaq_f32(alpha_ab.val[1], c.val[1], beta_f32);
61  alpha_ab.val[2] = vmlaq_f32(alpha_ab.val[2], c.val[2], beta_f32);
62  alpha_ab.val[3] = vmlaq_f32(alpha_ab.val[3], c.val[3], beta_f32);
63 
64  vst4q_f32(out_ptr + x, alpha_ab);
65  }
66 
67  // Left-over loop
68  for(; x < window_end_x; ++x)
69  {
70  *(out_ptr + x) += *(in_ptr + x) * beta;
71  }
72  },
73  in, out);
74 }
75 
76 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
77 void matrix_addition_f16(const ITensor *src, ITensor *dst, const Window &window, float beta)
78 {
80  const float16x8_t beta_f16 = vdupq_n_f16(beta);
81 
82  constexpr int window_step_x = 16;
83  const auto window_start_x = static_cast<int>(window.x().start());
84  const auto window_end_x = static_cast<int>(window.x().end());
85 
86  Window win = window.collapse_if_possible(window, Window::DimZ);
87  win.set(Window::DimX, Window::Dimension(0, 1, 1));
88 
89  Iterator in(src, win);
90  Iterator out(dst, win);
91 
92  execute_window_loop(win, [&](const Coordinates &)
93  {
94  const auto in_ptr = reinterpret_cast<const float16_t *>(in.ptr());
95  const auto out_ptr = reinterpret_cast<float16_t *>(out.ptr());
96 
97  int x = window_start_x;
98  for(; x < (window_end_x - window_step_x); x += window_step_x)
99  {
100  float16x8x2_t alpha_ab = vld2q_f16(out_ptr + x);
101  const float16x8x2_t c = vld2q_f16(in_ptr + x);
102  // Multiply matrix C by its weight and accumulate
103  alpha_ab.val[0] = vaddq_f16(alpha_ab.val[0], vmulq_f16(c.val[0], beta_f16));
104  alpha_ab.val[1] = vaddq_f16(alpha_ab.val[1], vmulq_f16(c.val[1], beta_f16));
105 
106  vst2q_f16(out_ptr + x, alpha_ab);
107  }
108 
109  // Left-over loop
110  for(; x < window_end_x; ++x)
111  {
112  *(out_ptr + x) += *(in_ptr + x) * static_cast<float16_t>(beta);
113  }
114  },
115  in, out);
116 }
117 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
118 } // namespace cpu
119 } // namespace arm_compute
float16x8_t vmulq_f16(float16x8_t, float16x8_t)
Definition: clang-tidy.h:85
float16x8_t vaddq_f16(float16x8_t, float16x8_t)
Definition: clang-tidy.h:75
Describe one of the image&#39;s dimensions with a start, end and step.
Definition: Window.h:79
Interface for CPU tensor.
Definition: ITensor.h:36
SimpleTensor< float > src
Definition: DFT.cpp:155
Copyright (c) 2017-2022 Arm Limited.
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Definition: Window.h:43
Window collapse_if_possible(const Window &full_window, size_t first, size_t last, bool *has_collapsed=nullptr) const
Collapse the dimensions between first and last if possible.
Definition: Window.inl:68
void matrix_addition_f32(const ITensor *src, ITensor *dst, const Window &window, float beta)
Definition: impl.cpp:32
Coordinates of an item.
Definition: Coordinates.h:37
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
Definition: Helpers.inl:139
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
Definition: Window.inl:49
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
Definition: Window.h:47
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:157
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
Definition: Helpers.inl:77
constexpr int end() const
Return the end of the dimension.
Definition: Window.h:102
Iterator updated by execute_window_loop for each window element.
Definition: Helpers.h:46
constexpr int start() const
Return the start of the dimension.
Definition: Window.h:97
Describe a multidimensional execution window.
Definition: Window.h:39
constexpr const Dimension & x() const
Alias to access the first dimension of the window.
Definition: Window.h:159