37 template <
typename ScalarType,
typename VectorType>
39 const int window_step_x,
const int window_start_x,
const int window_end_x,
const int limit, VectorType (*condition_conversion)(
const uint8_t *))
51 auto output_ptr =
reinterpret_cast<ScalarType *
>(output.
ptr());
52 const auto condition_ptr =
reinterpret_cast<const uint8_t *
>(condition.
ptr());
53 const auto input1_ptr =
reinterpret_cast<const ScalarType *
>(input1.
ptr());
54 const auto input2_ptr =
reinterpret_cast<const ScalarType *
>(input2.
ptr());
56 int x = window_start_x;
57 for(; x <= limit; x += window_step_x)
59 const auto c = (*condition_conversion)(condition_ptr + x);
64 for(; x < window_end_x; ++x)
66 const auto c = *(condition_ptr + x);
67 const auto a = *(input1_ptr + x);
68 const auto b = *(input2_ptr + x);
69 *(output_ptr + x) = static_cast<bool>(c) ? a :
b;
75 template <
typename ScalarType,
typename VectorType>
78 const auto window_step_x = 16 /
sizeof(ScalarType);
79 const auto window_start_x =
static_cast<int>(window.
x().
start());
80 const auto window_end_x =
static_cast<int>(window.
x().
end());
82 select_op<ScalarType, VectorType>(cond, in1, in2, out, window, window_step_x, window_start_x, window_end_x, window_end_x - window_step_x, [](
const uint8_t *condition_ptr) -> VectorType
89 template <
typename ScalarType,
typename VectorType>
92 const auto window_step_x = 16 /
sizeof(ScalarType);
93 const auto window_start_x =
static_cast<int>(window.
x().
start());
94 const auto window_end_x =
static_cast<int>(window.
x().
end());
96 select_op<ScalarType, VectorType>(cond, in1, in2, out, window, window_step_x, window_start_x, window_end_x, window_end_x - window_step_x, [](
const uint8_t *condition_ptr) -> VectorType
103 template <
typename ScalarType,
typename VectorType>
106 const auto window_step_x = 16 /
sizeof(ScalarType);
107 const auto window_start_x =
static_cast<int>(window.
x().
start());
108 const auto window_end_x =
static_cast<int>(window.
x().
end());
110 select_op<ScalarType, VectorType>(cond, in1, in2, out, window, window_step_x, window_start_x, window_end_x, window_end_x - window_step_x, [](
const uint8_t *condition_ptr) -> VectorType
117 template <
typename ScalarType>
122 auto output_ptr =
reinterpret_cast<ScalarType *
>(out->
buffer());
123 const auto condition_ptr =
reinterpret_cast<const uint8_t *
>(cond->
buffer());
124 const auto input1_ptr =
reinterpret_cast<const ScalarType *
>(in1->
buffer());
125 const auto input2_ptr =
reinterpret_cast<const ScalarType *
>(in2->
buffer());
132 for(
int i = 0; i < outer_size; ++i)
135 const auto input_ptr =
static_cast<bool>(*(condition_ptr + i)) ? input1_ptr : input2_ptr;
136 for(; x <= offset + inner_size -
step; x +=
step)
140 if(x <= offset + inner_size - (step / 2))
145 for(; x < offset + inner_size; ++x)
147 *(output_ptr + x) = *(input_ptr + x);
149 offset += inner_size;
161 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) 171 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) __global uchar * offset(const Image *img, int x, int y)
Get the pointer position of a Image.
void select_op_8(const ITensor *cond, const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
template void select_op_32< float, uint32x4_t >(const ITensor *cond, const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
Condition condition(TokenStream &in, bool &valid)
uint8x16_t vloadq(const uint8_t *ptr)
void select_op_32(const ITensor *cond, const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
void select_op_16(const ITensor *cond, const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
template void select_op_not_same_rank< uint16_t >(const ITensor *cond, const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
Describe one of the image's dimensions with a start, end and step.
Interface for CPU tensor.
Copyright (c) 2017-2022 Arm Limited.
template void select_op_not_same_rank< uint8_t >(const ITensor *cond, const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
template void select_op_32< int32_t, uint32x4_t >(const ITensor *cond, const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
template void select_op_16< uint16_t, uint16x8_t >(const ITensor *cond, const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
virtual uint8_t * buffer() const =0
Interface to be implemented by the child class to return a pointer to CPU memory. ...
template void select_op_not_same_rank< uint32_t >(const ITensor *cond, const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
template void select_op_not_same_rank< int8_t >(const ITensor *cond, const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
uint8x8_t vgetlow(const uint8x16_t val)
virtual size_t element_size() const =0
Element size in bytes calculated as data_size() * num_channels()
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
void select_op(const ITensor *cond, const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, const int window_step_x, const int window_start_x, const int window_end_x, const int limit, VectorType(*condition_conversion)(const uint8_t *))
template void select_op_not_same_rank< int32_t >(const ITensor *cond, const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
uint8x8_t vcgt(const uint8x8_t &a, const uint8x8_t &b)
uint8x8_t vbsl(const uint8x8_t &a, const uint8x8_t &b, const uint8x8_t &c)
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
uint8x8_t vload(const uint8_t *ptr)
void vstore(uint8_t *ptr, uint8x8_t val)
uint8x8_t vdup_n(uint8_t value, traits::vector_64_tag)
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
template void select_op_8< uint8_t, uint8x16_t >(const ITensor *cond, const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
template void select_op_8< int8_t, uint8x16_t >(const ITensor *cond, const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
template void select_op_not_same_rank< float >(const ITensor *cond, const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
constexpr int end() const
Return the end of the dimension.
Iterator updated by execute_window_loop for each window element.
uint16x8_t vmovl(const uint8x8_t &a)
constexpr int start() const
Return the start of the dimension.
Describe a multidimensional execution window.
template void select_op_16< int16_t, uint16x8_t >(const ITensor *cond, const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
void select_op_not_same_rank(const ITensor *cond, const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
template void select_op_32< uint32_t, uint32x4_t >(const ITensor *cond, const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
constexpr const Dimension & x() const
Alias to access the first dimension of the window.
template void select_op_not_same_rank< int16_t >(const ITensor *cond, const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)