Compute Library
 21.11
elementwise_list.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef SRC_CORE_NEON_KERNELS_ELEMENTWISE_LIST_H
25 #define SRC_CORE_NEON_KERNELS_ELEMENTWISE_LIST_H
26 
27 #include "src/core/NEON/NEAsymm.h"
30 
31 namespace arm_compute
32 {
33 namespace cpu
34 {
35 template <typename InputScalarType, typename OutputScalarType, typename InputVectorType>
36 void elementwise_op(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window,
37  OutputScalarType (*scalar_func)(const InputScalarType &, const InputScalarType &),
38  int (*broadcast_func)(int, int, int, const InputScalarType *, const InputScalarType &, OutputScalarType *, const bool),
39  int (*neon_func)(int, int, int, const InputScalarType *, const InputScalarType *, OutputScalarType *))
40 {
41  // Create input windows
42  Window input1_win = window.broadcast_if_dimension_le_one(in1->info()->tensor_shape());
43  Window input2_win = window.broadcast_if_dimension_le_one(in2->info()->tensor_shape());
44 
45  // Clear X Dimension on execution window as we handle manually
46  Window win = window;
47  win.set(Window::DimX, Window::Dimension(0, 1, 1));
48 
49  const int window_step_x = std::min(16 / static_cast<int>(sizeof(OutputScalarType)), 8);
50  const auto window_start_x = static_cast<int>(window.x().start());
51  const auto window_end_x = static_cast<int>(window.x().end());
52  const bool is_broadcast_across_x = in1->info()->tensor_shape().x() != in2->info()->tensor_shape().x();
53 
54  if(is_broadcast_across_x)
55  {
56  const bool is_broadcast_input_2 = input2_win.x().step() == 0;
57  Window broadcast_win = is_broadcast_input_2 ? input2_win : input1_win;
58  Window non_broadcast_win = !is_broadcast_input_2 ? input2_win : input1_win;
59  const ITensor *broadcast_tensor = is_broadcast_input_2 ? in2 : in1;
60  const ITensor *non_broadcast_tensor = !is_broadcast_input_2 ? in2 : in1;
61 
62  // Clear X Dimension on execution window as we handle manually
63  non_broadcast_win.set(Window::DimX, Window::Dimension(0, 1, 1));
64 
65  Iterator broadcast_input(broadcast_tensor, broadcast_win);
66  Iterator non_broadcast_input(non_broadcast_tensor, non_broadcast_win);
67  Iterator output(out, win);
68 
69  execute_window_loop(win, [&](const Coordinates &)
70  {
71  auto output_ptr = reinterpret_cast<OutputScalarType *>(output.ptr());
72  const auto non_broadcast_input_ptr = reinterpret_cast<const InputScalarType *>(non_broadcast_input.ptr());
73  const InputScalarType broadcast_value = *reinterpret_cast<const InputScalarType *>(broadcast_input.ptr());
74 
75  int x = (*broadcast_func)(window_start_x, window_end_x, window_step_x, non_broadcast_input_ptr, broadcast_value, output_ptr, !is_broadcast_input_2);
76  for(; x < window_end_x; ++x)
77  {
78  const auto a = *(non_broadcast_input_ptr + x);
79  *(output_ptr + x) = (*scalar_func)(!is_broadcast_input_2 ? broadcast_value : a, !is_broadcast_input_2 ? a : broadcast_value);
80  }
81  },
82  broadcast_input, non_broadcast_input, output);
83  }
84  else
85  {
86  // Clear X Dimension on execution window as we handle manually
87  input1_win.set(Window::DimX, Window::Dimension(0, 1, 1));
88  input2_win.set(Window::DimX, Window::Dimension(0, 1, 1));
89 
90  Iterator input1(in1, input1_win);
91  Iterator input2(in2, input2_win);
92  Iterator output(out, win);
93 
94  execute_window_loop(win, [&](const Coordinates &)
95  {
96  auto output_ptr = reinterpret_cast<OutputScalarType *>(output.ptr());
97  const auto input1_ptr = reinterpret_cast<const InputScalarType *>(input1.ptr());
98  const auto input2_ptr = reinterpret_cast<const InputScalarType *>(input2.ptr());
99 
100  int x = (*neon_func)(window_start_x, window_end_x, window_step_x, input1_ptr, input2_ptr, output_ptr);
101  for(; x < window_end_x; ++x)
102  {
103  const auto a = *(input1_ptr + x);
104  const auto b = *(input2_ptr + x);
105  *(output_ptr + x) = (*scalar_func)(a, b);
106  }
107  },
108  input1, input2, output);
109  }
110 }
111 
112 template <ArithmeticOperation op, typename ScalarType>
113 inline ScalarType elementwise_arithm_op_scalar(const ScalarType &a, const ScalarType &b)
114 {
115  auto res = ScalarType(0);
116 
117  switch(op)
118  {
120  res = std::max(a, b);
121  break;
123  res = std::min(a, b);
124  break;
126  {
127  res = (a - b) * (a - b);
128  break;
129  }
131  {
132  res = (a > 0 ? a : a * b);
133  break;
134  }
136  {
137  res = a / b;
138  if(std::is_integral<ScalarType>::value)
139  {
140  res = (b == 0) ? 0 : res;
141  if(static_cast<int32_t>(a) % static_cast<int32_t>(b) != 0 && ((a < 0) != (b < 0)))
142  {
143  --res;
144  }
145  }
146  break;
147  }
149  {
150  res = std::pow(a, b);
151  break;
152  }
153  default:
154  ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
155  }
156  return res;
157 }
158 
159 template <ArithmeticOperation op, typename VectorType>
160 inline typename VectorType::type elementwise_arithm_op(const typename VectorType::type &a, const typename VectorType::type &b)
161 {
162  using vec_type = typename VectorType::type;
163  using scalar_type = typename VectorType::scalar_type;
164  using tag_type = typename VectorType::tag_type;
165 
166  vec_type res = wrapper::vdup_n(static_cast<scalar_type>(0), tag_type{});
167 
168  switch(op)
169  {
171  res = wrapper::vmax(a, b);
172  break;
174  res = wrapper::vmin(a, b);
175  break;
177  {
178  const vec_type tmp = wrapper::vsub(a, b);
179  res = wrapper::vmul(tmp, tmp);
180  break;
181  }
183  {
184  const vec_type zero = wrapper::vdup_n(static_cast<scalar_type>(0), tag_type{});
185  const vec_type tmp = wrapper::vmul(a, b);
186  const auto gt = wrapper::vcgt(a, zero);
187 
188  res = wrapper::vbsl(gt, a, tmp);
189  break;
190  }
191 
192  default:
193  ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
194  }
195 
196  return res;
197 }
198 
199 template <>
200 inline int32x4_t elementwise_arithm_op<ArithmeticOperation::DIV, typename wrapper::traits::neon_vector<int32_t, 4>>(const int32x4_t &a, const int32x4_t &b)
201 {
202  return vcvtq_s32_f32(vfloorq_f32(wrapper::vdiv(vcvtq_f32_s32(a), vcvtq_f32_s32(b))));
203 }
204 
205 template <>
206 inline float32x4_t elementwise_arithm_op<ArithmeticOperation::DIV, typename wrapper::traits::neon_vector<float, 4>>(const float32x4_t &a, const float32x4_t &b)
207 {
208  return wrapper::vdiv(a, b);
209 }
210 
211 template <>
212 inline float32x4_t elementwise_arithm_op<ArithmeticOperation::POWER, typename wrapper::traits::neon_vector<float, 4>>(const float32x4_t &a, const float32x4_t &b)
213 {
214  return wrapper::vpow(a, b);
215 }
216 
217 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
218 template <>
219 inline float16x8_t elementwise_arithm_op<ArithmeticOperation::DIV, typename wrapper::traits::neon_vector<float16_t, 8>>(const float16x8_t &a, const float16x8_t &b)
220 {
221  return wrapper::vdiv(a, b);
222 }
223 
224 template <>
225 inline float16x8_t elementwise_arithm_op<ArithmeticOperation::POWER, typename wrapper::traits::neon_vector<float16_t, 8>>(const float16x8_t &a, const float16x8_t &b)
226 {
227  return wrapper::vpow(a, b);
228 }
229 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
230 
231 template <ArithmeticOperation op, typename ScalarType, typename VectorType>
232 inline typename VectorType::type elementwise_arithm_op_broadcast(const typename VectorType::type &a, const ScalarType &broadcast_value, const bool reorder)
233 {
234  using tag_type = typename VectorType::tag_type;
235  using vec_type = typename VectorType::type;
236 
237  vec_type broadcast_vector = wrapper::vdup_n(broadcast_value, tag_type{});
238  return elementwise_arithm_op<op, VectorType>(reorder ? broadcast_vector : a, reorder ? a : broadcast_vector);
239 }
240 
241 template <ArithmeticOperation op, typename ScalarType, typename VectorType>
242 inline int elementwise_arithm_op_loop(int window_start_x, int window_end_x, int window_step_x,
243  const ScalarType *input1_ptr, const ScalarType *input2_ptr, ScalarType *output_ptr)
244 {
245  int x = window_start_x;
246  for(; x <= (window_end_x - window_step_x); x += window_step_x)
247  {
248  const auto a = wrapper::vloadq(input1_ptr + x);
249  const auto b = wrapper::vloadq(input2_ptr + x);
250  wrapper::vstore(output_ptr + x, elementwise_arithm_op<op, VectorType>(a, b));
251  }
252  return x;
253 }
254 
255 template <ArithmeticOperation op, typename ScalarType, typename VectorType>
256 inline int elementwise_arithm_op_broadcast_loop(int window_start_x, int window_end_x, int window_step_x,
257  const ScalarType *non_broadcast_input_ptr, const ScalarType &broadcast_value, ScalarType *output_ptr, const bool reorder)
258 {
259  int x = window_start_x;
260  for(; x <= (window_end_x - window_step_x); x += window_step_x)
261  {
262  const auto a = wrapper::vloadq((non_broadcast_input_ptr + x));
263  wrapper::vstore(output_ptr + x, elementwise_arithm_op_broadcast<op, ScalarType, VectorType>(a, broadcast_value, reorder));
264  }
265  return x;
266 }
267 
268 template <ArithmeticOperation op, typename VectorType>
269 void elementwise_arithm_op(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
270 {
271  using scalar_type = typename VectorType::scalar_type;
272 
273  elementwise_op<scalar_type, scalar_type, VectorType>(in1, in2, out, window,
274  &elementwise_arithm_op_scalar<op, scalar_type>,
275  &elementwise_arithm_op_broadcast_loop<op, scalar_type, VectorType>,
276  &elementwise_arithm_op_loop<op, scalar_type, VectorType>);
277 }
278 
279 template <ComparisonOperation op, typename InputScalarType>
280 inline uint8_t elementwise_comp_op_scalar(const InputScalarType &a, const InputScalarType &b)
281 {
282  bool res = false;
283 
284  switch(op)
285  {
287  res = (a == b);
288  break;
290  res = (a != b);
291  break;
293  res = (a > b);
294  break;
296  res = (a >= b);
297  break;
299  res = (a < b);
300  break;
302  res = (a <= b);
303  break;
304  default:
305  ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
306  }
307  return res ? ~static_cast<uint8_t>(0) : static_cast<uint8_t>(0);
308 }
309 
310 template <ComparisonOperation op, typename InputVectorType, typename OutputVectorType>
311 inline OutputVectorType elementwise_comp_op(const InputVectorType &a, const InputVectorType &b)
312 {
313  OutputVectorType res = { 0, 0, 0, 0 };
314 
315  switch(op)
316  {
318  res = wrapper::vceq(a, b);
319  break;
321  res = wrapper::vnot(wrapper::vceq(a, b));
322  break;
324  res = wrapper::vcgt(a, b);
325  break;
327  res = wrapper::vcge(a, b);
328  break;
330  res = wrapper::vcgt(b, a);
331  break;
333  res = wrapper::vcge(b, a);
334  break;
335  default:
336  ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
337  }
338 
339  return res;
340 }
341 
342 template <ComparisonOperation op, typename InputScalarType, typename InputVectorType, typename OutputVectorType>
343 inline OutputVectorType elementwise_comp_op_broadcast(const InputVectorType &a, const InputScalarType &broadcast_value, const bool reorder)
344 {
345  InputVectorType broadcast_vector = wrapper::vdup_n(broadcast_value, wrapper::traits::vector_128_tag());
346  return elementwise_comp_op<op, InputVectorType, OutputVectorType>(reorder ? broadcast_vector : a, reorder ? a : broadcast_vector);
347 }
348 
349 template <ComparisonOperation op, typename InputScalarType, typename InputVectorType>
350 inline int elementwise_comp_op_broadcast_8_loop(int window_start_x, int window_end_x, int window_step_x,
351  const InputScalarType *non_broadcast_input_ptr, const InputScalarType &broadcast_value, uint8_t *output_ptr, const bool reorder)
352 {
353  int x = window_start_x;
354  for(; x <= (window_end_x - window_step_x); x += window_step_x)
355  {
356  const auto a = elementwise_comp_op_broadcast<op, InputScalarType, InputVectorType, uint8x16_t>(wrapper::vloadq((non_broadcast_input_ptr + x)), broadcast_value, reorder);
357  wrapper::vstore(output_ptr + x, a);
358  }
359  return x;
360 }
361 
362 template <ComparisonOperation op, typename InputScalarType, typename InputVectorType>
363 inline int elementwise_comp_op_broadcast_16_loop(int window_start_x, int window_end_x, int window_step_x,
364  const InputScalarType *non_broadcast_input_ptr, const InputScalarType &broadcast_value, uint8_t *output_ptr, const bool reorder)
365 {
366  int x = window_start_x;
367  for(; x <= (window_end_x - window_step_x); x += window_step_x)
368  {
369  const auto a = elementwise_comp_op_broadcast<op, InputScalarType, InputVectorType, uint16x8_t>(wrapper::vloadq((non_broadcast_input_ptr + x)), broadcast_value, reorder);
370  wrapper::vstore(output_ptr + x, wrapper::vmovn(a));
371  }
372  return x;
373 }
374 
375 template <ComparisonOperation op, typename InputScalarType, typename InputVectorType>
376 inline int elementwise_comp_op_broadcast_32_loop(int window_start_x, int window_end_x, int window_step_x,
377  const InputScalarType *non_broadcast_input_ptr, const InputScalarType &broadcast_value, uint8_t *output_ptr, const bool reorder)
378 {
379  int x = window_start_x;
380  for(; x <= (window_end_x - window_step_x); x += window_step_x)
381  {
382  const auto a = elementwise_comp_op_broadcast<op, InputScalarType, InputVectorType, uint32x4_t>(wrapper::vloadq(non_broadcast_input_ptr + x), broadcast_value, reorder);
383  const auto b = elementwise_comp_op_broadcast<op, InputScalarType, InputVectorType, uint32x4_t>(wrapper::vloadq(non_broadcast_input_ptr + x + 4), broadcast_value, reorder);
385  }
386  if(x <= window_end_x - 4)
387  {
388  const auto a = elementwise_comp_op_broadcast<op, InputScalarType, InputVectorType, uint32x4_t>(wrapper::vloadq((non_broadcast_input_ptr + x)), broadcast_value, reorder);
389  for(int i = 0; i < 4; i++)
390  {
391  *(output_ptr + x + i) = wrapper::vgetlane(a, i);
392  }
393  x = +4;
394  }
395  return x;
396 }
397 
398 template <ComparisonOperation op, typename InputScalarType, typename InputVectorType>
399 inline int elementwise_comp_op_8_loop(int window_start_x, int window_end_x, int window_step_x,
400  const InputScalarType *input1_ptr, const InputScalarType *input2_ptr, uint8_t *output_ptr)
401 {
402  int x = window_start_x;
403  for(; x <= (window_end_x - window_step_x); x += window_step_x)
404  {
405  const auto a = wrapper::vloadq(input1_ptr + x);
406  const auto b = wrapper::vloadq(input2_ptr + x);
407  const auto res = elementwise_comp_op<op, InputVectorType, uint8x16_t>(a, b);
408  wrapper::vstore(output_ptr + x, res);
409  }
410  return x;
411 }
412 
413 template <ComparisonOperation op, typename InputScalarType, typename InputVectorType>
414 inline int elementwise_comp_op_16_loop(int window_start_x, int window_end_x, int window_step_x,
415  const InputScalarType *input1_ptr, const InputScalarType *input2_ptr, uint8_t *output_ptr)
416 {
417  int x = window_start_x;
418  for(; x <= (window_end_x - window_step_x); x += window_step_x)
419  {
420  const auto a = wrapper::vloadq(input1_ptr + x);
421  const auto b = wrapper::vloadq(input2_ptr + x);
422  const auto res = elementwise_comp_op<op, InputVectorType, uint16x8_t>(a, b);
423  wrapper::vstore(output_ptr + x, wrapper::vmovn(res));
424  }
425  return x;
426 }
427 
428 template <ComparisonOperation op, typename InputScalarType, typename InputVectorType>
429 inline int elementwise_comp_op_32_loop(int window_start_x, int window_end_x, int window_step_x,
430  const InputScalarType *input1_ptr, const InputScalarType *input2_ptr, uint8_t *output_ptr)
431 {
432  int x = window_start_x;
433  for(; x <= (window_end_x - window_step_x); x += window_step_x)
434  {
435  auto a = wrapper::vloadq(input1_ptr + x);
436  auto b = wrapper::vloadq(input2_ptr + x);
437  const auto res = elementwise_comp_op<op, InputVectorType, uint32x4_t>(a, b);
438  a = wrapper::vloadq(input1_ptr + x + 4);
439  b = wrapper::vloadq(input2_ptr + x + 4);
440  const auto res2 = elementwise_comp_op<op, InputVectorType, uint32x4_t>(a, b);
442  }
443  if(x <= window_end_x - 4)
444  {
445  const auto a = wrapper::vloadq(input1_ptr + x);
446  const auto b = wrapper::vloadq(input2_ptr + x);
447  const auto res = elementwise_comp_op<op, InputVectorType, uint32x4_t>(a, b);
448  for(int i = 0; i < 4; i++)
449  {
450  *(output_ptr + x + i) = wrapper::vgetlane(res, i);
451  }
452  x = +4;
453  }
454  return x;
455 }
456 
457 template <ComparisonOperation op, typename InputScalarType, typename InputVectorType>
458 void elementwise_comp_op_8(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
459 {
460  elementwise_op<InputScalarType, uint8_t, InputVectorType>(in1, in2, out, window,
461  &elementwise_comp_op_scalar<op, InputScalarType>,
462  &elementwise_comp_op_broadcast_8_loop<op, InputScalarType, InputVectorType>,
463  &elementwise_comp_op_8_loop<op, InputScalarType, InputVectorType>);
464 }
465 
466 template <ComparisonOperation op, typename InputScalarType, typename InputVectorType>
467 void elementwise_comp_op_16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
468 {
469  elementwise_op<InputScalarType, uint8_t, InputVectorType>(in1, in2, out, window,
470  &elementwise_comp_op_scalar<op, InputScalarType>,
471  &elementwise_comp_op_broadcast_16_loop<op, InputScalarType, InputVectorType>,
472  &elementwise_comp_op_16_loop<op, InputScalarType, InputVectorType>);
473 }
474 
475 template <ComparisonOperation op, typename InputScalarType, typename InputVectorType>
476 void elementwise_comp_op_32(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
477 {
478  elementwise_op<InputScalarType, uint8_t, InputVectorType>(in1, in2, out, window,
479  &elementwise_comp_op_scalar<op, InputScalarType>,
480  &elementwise_comp_op_broadcast_32_loop<op, InputScalarType, InputVectorType>,
481  &elementwise_comp_op_32_loop<op, InputScalarType, InputVectorType>);
482 }
483 } // namesapce cpu
484 } // namespace arm_compute
485 
486 #endif /* SRC_CORE_NEON_KERNELS_ELEMENTWISE_LIST_H */
float32x2_t vdiv(const float32x2_t &a, const float32x2_t &b)
Definition: div.h:58
int elementwise_comp_op_broadcast_8_loop(int window_start_x, int window_end_x, int window_step_x, const InputScalarType *non_broadcast_input_ptr, const InputScalarType &broadcast_value, uint8_t *output_ptr, const bool reorder)
int elementwise_comp_op_8_loop(int window_start_x, int window_end_x, int window_step_x, const InputScalarType *input1_ptr, const InputScalarType *input2_ptr, uint8_t *output_ptr)
uint32x2_t vmovn(const uint64x2_t &a)
Definition: movn.h:39
OutputVectorType elementwise_comp_op_broadcast(const InputVectorType &a, const InputScalarType &broadcast_value, const bool reorder)
SimpleTensor< float > b
Definition: DFT.cpp:157
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
constexpr int step() const
Return the step of the dimension.
Definition: Window.h:104
int elementwise_comp_op_broadcast_32_loop(int window_start_x, int window_end_x, int window_step_x, const InputScalarType *non_broadcast_input_ptr, const InputScalarType &broadcast_value, uint8_t *output_ptr, const bool reorder)
uint8x16_t vloadq(const uint8_t *ptr)
Definition: load.h:58
void elementwise_comp_op_16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
int elementwise_comp_op_32_loop(int window_start_x, int window_end_x, int window_step_x, const InputScalarType *input1_ptr, const InputScalarType *input2_ptr, uint8_t *output_ptr)
VectorType::type elementwise_arithm_op_broadcast(const typename VectorType::type &a, const ScalarType &broadcast_value, const bool reorder)
uint8x8_t vsub(const uint8x8_t &a, const uint8x8_t &b)
Definition: sub.h:39
Describe one of the image&#39;s dimensions with a start, end and step.
Definition: Window.h:77
uint8_t elementwise_comp_op_scalar(const InputScalarType &a, const InputScalarType &b)
int elementwise_comp_op_broadcast_16_loop(int window_start_x, int window_end_x, int window_step_x, const InputScalarType *non_broadcast_input_ptr, const InputScalarType &broadcast_value, uint8_t *output_ptr, const bool reorder)
float32x4_t vpow(const float32x4_t &a, const float32x4_t &b)
Definition: pow.h:40
decltype(strategy::transforms) typedef type
Interface for CPU tensor.
Definition: ITensor.h:36
Copyright (c) 2017-2021 Arm Limited.
float32x4_t vfloorq_f32(float32x4_t val)
Calculate floor of a vector.
T x() const
Alias to access the size of the first dimension.
Definition: Dimensions.h:87
VectorType::type elementwise_arithm_op(const typename VectorType::type &a, const typename VectorType::type &b)
void elementwise_comp_op_8(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
uint8_t vgetlane(const uint8x8_t vector, const unsigned int lane)
Definition: getlane.h:91
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Definition: Window.h:43
uint8x8_t vnot(const uint8x8_t &a)
Definition: not.h:39
void elementwise_op(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, OutputScalarType(*scalar_func)(const InputScalarType &, const InputScalarType &), int(*broadcast_func)(int, int, int, const InputScalarType *, const InputScalarType &, OutputScalarType *, const bool), int(*neon_func)(int, int, int, const InputScalarType *, const InputScalarType *, OutputScalarType *))
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
uint8x8_t vmin(const uint8x8_t &a, const uint8x8_t &b)
Definition: min.h:39
Coordinates of an item.
Definition: Coordinates.h:37
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
Definition: Helpers.inl:139
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
Definition: Window.inl:49
uint8x16_t vcombine(const uint8x8_t &a, const uint8x8_t &b)
Definition: combine.h:39
Window broadcast_if_dimension_le_one(const TensorShape &shape) const
Don&#39;t advance in the dimension where shape is less equal to 1.
Definition: Window.inl:120
OutputVectorType elementwise_comp_op(const InputVectorType &a, const InputVectorType &b)
y*x if x < 0, x otherwise
int elementwise_arithm_op_broadcast_loop(int window_start_x, int window_end_x, int window_step_x, const ScalarType *non_broadcast_input_ptr, const ScalarType &broadcast_value, ScalarType *output_ptr, const bool reorder)
uint8x8_t vcgt(const uint8x8_t &a, const uint8x8_t &b)
Definition: cgt.h:39
uint8x8_t vmul(const uint8x8_t &a, const uint8x8_t &b)
Definition: mul.h:39
uint8x8_t vbsl(const uint8x8_t &a, const uint8x8_t &b, const uint8x8_t &c)
Definition: bsl.h:39
ScalarType elementwise_arithm_op_scalar(const ScalarType &a, const ScalarType &b)
int elementwise_comp_op_16_loop(int window_start_x, int window_end_x, int window_step_x, const InputScalarType *input1_ptr, const InputScalarType *input2_ptr, uint8_t *output_ptr)
void vstore(uint8_t *ptr, uint8x8_t val)
Definition: store.h:39
uint8x8_t vdup_n(uint8_t value, traits::vector_64_tag)
Definition: dup_n.h:41
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
Definition: Helpers.inl:77
int elementwise_arithm_op_loop(int window_start_x, int window_end_x, int window_step_x, const ScalarType *input1_ptr, const ScalarType *input2_ptr, ScalarType *output_ptr)
Includes all wrapper headers at once.
void elementwise_comp_op_32(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
constexpr int end() const
Return the end of the dimension.
Definition: Window.h:99
uint8x8_t vcge(const uint8x8_t &a, const uint8x8_t &b)
Definition: cge.h:39
Iterator updated by execute_window_loop for each window element.
Definition: Helpers.h:46
uint8x8_t vmax(const uint8x8_t &a, const uint8x8_t &b)
Definition: max.h:39
constexpr int start() const
Return the start of the dimension.
Definition: Window.h:94
Describe a multidimensional execution window.
Definition: Window.h:39
uint8x8_t vceq(const uint8x8_t &a, const uint8x8_t &b)
Definition: ceq.h:39
Sets the macro arm_any if compiling for Aarch32 or Aarch64.
constexpr const Dimension & x() const
Alias to access the first dimension of the window.
Definition: Window.h:145