24.02.1
|
Go to the documentation of this file.
24 #ifndef ACL_SRC_CPU_KERNELS_SELECT_GENERIC_NEON_IMPL_H
25 #define ACL_SRC_CPU_KERNELS_SELECT_GENERIC_NEON_IMPL_H
40 template <
typename ScalarType,
typename VectorType>
46 const int window_step_x,
47 const int window_start_x,
48 const int window_end_x,
50 VectorType (*condition_conversion)(
const uint8_t *))
64 auto output_ptr =
reinterpret_cast<ScalarType *
>(output.
ptr());
65 const auto condition_ptr =
reinterpret_cast<const uint8_t *
>(
condition.ptr());
66 const auto input1_ptr =
reinterpret_cast<const ScalarType *
>(input1.
ptr());
67 const auto input2_ptr =
reinterpret_cast<const ScalarType *
>(input2.
ptr());
69 int x = window_start_x;
70 for (; x <= limit; x += window_step_x)
72 const auto c = (*condition_conversion)(condition_ptr + x);
77 for (; x < window_end_x; ++x)
79 const auto c = *(condition_ptr + x);
80 const auto a = *(input1_ptr + x);
81 const auto b = *(input2_ptr + x);
82 *(output_ptr + x) =
static_cast<bool>(c) ? a :
b;
88 template <
typename ScalarType,
typename VectorType>
91 const auto window_step_x = 16 /
sizeof(ScalarType);
92 const auto window_start_x =
static_cast<int>(window.
x().
start());
93 const auto window_end_x =
static_cast<int>(window.
x().
end());
95 select_op<ScalarType, VectorType>(
96 cond, in1, in2, out, window, window_step_x, window_start_x, window_end_x, window_end_x - window_step_x,
97 [](
const uint8_t *condition_ptr) -> VectorType
99 static const auto zero =
105 template <
typename ScalarType,
typename VectorType>
108 const auto window_step_x = 16 /
sizeof(ScalarType);
109 const auto window_start_x =
static_cast<int>(window.
x().
start());
110 const auto window_end_x =
static_cast<int>(window.
x().
end());
112 select_op<ScalarType, VectorType>(
113 cond, in1, in2, out, window, window_step_x, window_start_x, window_end_x, window_end_x - window_step_x,
114 [](
const uint8_t *condition_ptr) -> VectorType
116 static const auto zero =
122 template <
typename ScalarType,
typename VectorType>
125 const auto window_step_x = 16 /
sizeof(ScalarType);
126 const auto window_start_x =
static_cast<int>(window.
x().
start());
127 const auto window_end_x =
static_cast<int>(window.
x().
end());
129 select_op<ScalarType, VectorType>(
130 cond, in1, in2, out, window, window_step_x, window_start_x, window_end_x, window_end_x - window_step_x,
131 [](
const uint8_t *condition_ptr) -> VectorType
133 static const auto zero =
139 template <
typename ScalarType>
145 auto output_ptr =
reinterpret_cast<ScalarType *
>(out->
buffer());
146 const auto condition_ptr =
reinterpret_cast<const uint8_t *
>(cond->
buffer());
147 const auto input1_ptr =
reinterpret_cast<const ScalarType *
>(in1->
buffer());
148 const auto input2_ptr =
reinterpret_cast<const ScalarType *
>(in2->
buffer());
155 for (
int i = 0; i < outer_size; ++i)
158 const auto input_ptr =
static_cast<bool>(*(condition_ptr + i)) ? input1_ptr : input2_ptr;
168 for (; x <
offset + inner_size; ++x)
170 *(output_ptr + x) = *(input_ptr + x);
177 #endif // ACL_SRC_CPU_KERNELS_SELECT_GENERIC_NEON_IMPL_H
void select_op_16(const ITensor *cond, const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
constexpr int start() const
Return the start of the dimension.
uint16x8_t vmovl(const uint8x8_t &a)
virtual size_t element_size() const =0
Element size in bytes calculated as data_size() * num_channels()
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Interface for CPU tensor.
uint8x16_t vloadq(const uint8_t *ptr)
void select_op_not_same_rank(const ITensor *cond, const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
void select_op_8(const ITensor *cond, const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
void select_op_32(const ITensor *cond, const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&...iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
Iterator updated by execute_window_loop for each window element.
__global uchar * offset(const Image *img, int x, int y)
Get the pointer position of a Image.
uint8x8_t vbsl(const uint8x8_t &a, const uint8x8_t &b, const uint8x8_t &c)
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Describe one of the image's dimensions with a start, end and step.
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
uint8x8_t vgetlow(const uint8x16_t val)
void vstore(uint8_t *ptr, uint8x8_t val)
Describe a multidimensional execution window.
void select_op(const ITensor *cond, const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, const int window_step_x, const int window_start_x, const int window_end_x, const int limit, VectorType(*condition_conversion)(const uint8_t *))
Copyright (c) 2017-2024 Arm Limited.
uint8x8_t vcgt(const uint8x8_t &a, const uint8x8_t &b)
uint8x8_t vload(const uint8_t *ptr)
Condition condition(TokenStream &in, bool &valid)
constexpr int end() const
Return the end of the dimension.
constexpr const Dimension & x() const
Alias to access the first dimension of the window.
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
uint8x8_t vdup_n(uint8_t value, traits::vector_64_tag)
virtual uint8_t * buffer() const =0
Interface to be implemented by the child class to return a pointer to CPU memory.