40 static const uint8x8_t c0_x8 = vdup_n_u8(0);
41 static const uint8x16_t c0_x16 = vdupq_n_u8(0);
42 static const uint8x8_t c1_x8 = vdup_n_u8(1);
43 static const uint8x16_t c1_x16 = vdupq_n_u8(1);
44 static const int step = 16;
45 static const int half_step = step / 2;
47 void neon_logical_and(
const uint8_t *src0,
const uint8_t *src1, uint8_t *
dst,
int len)
56 vst1q_u8(dst, vandq_u8(vminq_u8(vld1q_u8(src0), c1_x16), vminq_u8(vld1q_u8(src1), c1_x16)));
62 for(; len >= half_step; len -= half_step)
64 vst1_u8(dst, vand_u8(vmin_u8(vld1_u8(src0), c1_x8), vmin_u8(vld1_u8(src1), c1_x8)));
72 *dst = (*src0) && (*src1);
79 void neon_logical_and_broadcast(
const uint8_t *
src, uint8_t broadcast_val, uint8_t *dst,
int len)
85 const auto broadcast_val_clamped_s = std::min<uint8_t>(broadcast_val, 1);
86 const auto broadcast_val_clamped_x16 = vdupq_n_u8(broadcast_val_clamped_s);
87 const auto broadcast_val_clamped_x8 = vdup_n_u8(broadcast_val_clamped_s);
91 vst1q_u8(dst, vandq_u8(vminq_u8(vld1q_u8(src), c1_x16), broadcast_val_clamped_x16));
96 for(; len >= half_step; len -= half_step)
98 vst1_u8(dst, vand_u8(vmin_u8(vld1_u8(src), c1_x8), broadcast_val_clamped_x8));
103 for(; len > 0; --len)
105 *dst = (*src) && broadcast_val_clamped_s;
111 void neon_logical_or(
const uint8_t *src0,
const uint8_t *src1, uint8_t *dst,
int len)
120 vst1q_u8(dst, vorrq_u8(vminq_u8(vld1q_u8(src0), c1_x16), vminq_u8(vld1q_u8(src1), c1_x16)));
126 for(; len >= half_step; len -= half_step)
128 vst1_u8(dst, vorr_u8(vmin_u8(vld1_u8(src0), c1_x8), vmin_u8(vld1_u8(src1), c1_x8)));
134 for(; len > 0; --len)
136 *dst = (*src0) || (*src1);
143 void neon_logical_or_broadcast(
const uint8_t *src, uint8_t broadcast_val, uint8_t *dst,
int len)
149 const auto broadcast_val_clamped_s = std::min<uint8_t>(broadcast_val, 1);
150 const auto broadcast_val_clamped_x16 = vdupq_n_u8(broadcast_val_clamped_s);
151 const auto broadcast_val_clamped_x8 = vdup_n_u8(broadcast_val_clamped_s);
155 vst1q_u8(dst, vorrq_u8(vminq_u8(vld1q_u8(src), c1_x16), broadcast_val_clamped_x16));
160 for(; len >= half_step; len -= half_step)
162 vst1_u8(dst, vorr_u8(vmin_u8(vld1_u8(src), c1_x8), broadcast_val_clamped_x8));
167 for(; len > 0; --len)
169 *dst = (*src) || broadcast_val_clamped_s;
175 void neon_logical_not(
const uint8_t *src, uint8_t *dst,
int len)
183 vst1q_u8(dst, vbslq_u8(vceqq_u8(vld1q_u8(src), c0_x16), c1_x16, c0_x16));
188 for(; len >= half_step; len -= half_step)
190 vst1_u8(dst, vbsl_u8(vceq_u8(vld1_u8(src), c0_x8), c1_x8, c0_x8));
195 for(; len > 0; --len)
207 const auto len =
static_cast<int>(window.
x().
end()) - static_cast<int>(window.
x().
start());
214 neon_logical_not(in.
ptr(), out.
ptr(), len);
228 const auto len =
static_cast<int>(window.
x().
end()) - static_cast<int>(window.
x().
start());
230 if(is_broadcast_across_x)
233 LogicalBroadcastUKernelPtr logical_func = op ==
LogicalOperation::Or ? &neon_logical_or_broadcast : &neon_logical_and_broadcast;
235 const bool is_broadcast_input_1 = src1_win.
x().
step() == 0;
236 Window broadcast_win = is_broadcast_input_1 ? src1_win : src0_win;
237 Window non_broadcast_win = !is_broadcast_input_1 ? src1_win : src0_win;
238 const ITensor *broadcast_tensor = is_broadcast_input_1 ? src1 : src0;
239 const ITensor *non_broadcast_tensor = !is_broadcast_input_1 ? src1 : src0;
242 Iterator broadcast_in(broadcast_tensor, broadcast_win);
243 Iterator non_broadcast_in(non_broadcast_tensor, non_broadcast_win);
248 const uint8_t broadcast_value = *broadcast_in.
ptr();
249 logical_func(non_broadcast_in.
ptr(), broadcast_value, out.
ptr(), len);
252 broadcast_in, non_broadcast_in, out);
257 LogicalUKernelPtr logical_func = op ==
LogicalOperation::Or ? &neon_logical_or : &neon_logical_and;
267 logical_func(in0.
ptr(), in1.
ptr(), out.
ptr(), len);
275 return "NELogicalKernel";
291 out_shape = broadcast_pair.first;
294 ICPPKernel::configure(win);
315 if((output !=
nullptr) && (output->
total_size() != 0))
337 run_unary(window, src0, dst);
341 run_binary(window, src0, src1, dst, _op);
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
const Window & window() const
The maximum window the kernel can be executed on.
bool set_data_type_if_unknown(ITensorInfo &info, DataType data_type)
Set the data type and number of channels to the specified value if the current data type is unknown...
bool empty() const
Checks if pack is empty.
constexpr int step() const
Return the step of the dimension.
1 channel, 1 U8 per channel
virtual DataType data_type() const =0
Data type used for each element of the tensor.
static TensorShape broadcast_shape(const Shapes &... shapes)
If shapes are broadcast compatible, return the broadcasted shape.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Describe one of the image's dimensions with a start, end and step.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
static std::pair< TensorShape, ValidRegion > broadcast_shape_and_valid_region(const Infos &... infos)
If infos are broadcast compatible tensor info's, return the broadcasted shape and the intersection of...
decltype(strategy::transforms) typedef type
Interface for Neon tensor.
SimpleTensor< float > src
Copyright (c) 2017-2021 Arm Limited.
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, LogicalOperation op)
Static function to check if given info will lead to a valid configuration of NELogicalKernel.
T x() const
Alias to access the size of the first dimension.
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
Class to describe a number of elements in each dimension.
size_t total_size() const
Collapses all dimensions to a single linear total size.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape)
Set the shape to the specified value if the current assignment is empty.
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
bool have_different_dimensions(const Dimensions< T > &dim1, const Dimensions< T > &dim2, unsigned int upper_dim)
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
#define ARM_COMPUTE_ASSERT_NOT_NULLPTR(ptr)
Window broadcast_if_dimension_le_one(const TensorShape &shape) const
Don't advance in the dimension where shape is less equal to 1.
const char * name() const override
Name of the kernel.
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
Information about executing thread and CPU.
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
#define ARM_COMPUTE_ASSERT(cond)
constexpr int end() const
Return the end of the dimension.
Iterator updated by execute_window_loop for each window element.
void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, LogicalOperation op)
Initialise the kernel's inputs and output.
LogicalOperation
List of supported logical operations.
constexpr int start() const
Return the start of the dimension.
Describe a multidimensional execution window.
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
constexpr const Dimension & x() const
Alias to access the first dimension of the window.