24.02.1
|
Go to the documentation of this file.
46 constexpr
int window_step_x = 16 /
sizeof(float);
47 const auto window_start_x =
static_cast<int>(window.
x().
start());
48 const auto window_end_x =
static_cast<int>(window.
x().
end());
53 if (is_broadcast_across_x)
55 const bool is_broadcast_input_2 = input2_win.
x().
step() == 0;
56 Window broadcast_win = is_broadcast_input_2 ? input2_win : input1_win;
57 Window non_broadcast_win = !is_broadcast_input_2 ? input2_win : input1_win;
58 const ITensor *broadcast_tensor = is_broadcast_input_2 ? src2 : src1;
59 const ITensor *non_broadcast_tensor = !is_broadcast_input_2 ? src2 : src1;
64 Iterator broadcast_input(broadcast_tensor, broadcast_win);
65 Iterator non_broadcast_input(non_broadcast_tensor, non_broadcast_win);
72 const auto non_broadcast_input_ptr =
reinterpret_cast<const float *
>(non_broadcast_input.
ptr());
73 const auto output_ptr =
reinterpret_cast<float *
>(
dst.ptr());
75 const float broadcast_value = *
reinterpret_cast<const float *
>(broadcast_input.
ptr());
76 const auto broadcast_value_vec =
wrapper::vdup_n(broadcast_value, ExactTagType{});
80 int x = window_start_x;
81 for (; x <= (window_end_x - window_step_x); x += window_step_x)
83 const auto non_broadcast_v =
wrapper::vloadq(non_broadcast_input_ptr + x);
89 for (; x < window_end_x; ++x)
91 const auto non_broadcast_v = *(non_broadcast_input_ptr + x);
92 *(output_ptr + x) = broadcast_value * non_broadcast_v *
scale;
95 broadcast_input, non_broadcast_input,
dst);
111 const auto input1_ptr =
reinterpret_cast<const float *
>(input1.
ptr());
112 const auto input2_ptr =
reinterpret_cast<const float *
>(input2.
ptr());
113 const auto output_ptr =
reinterpret_cast<float *
>(
dst.ptr());
116 int x = window_start_x;
117 for (; x <= (window_end_x - window_step_x); x += window_step_x)
127 for (; x < window_end_x; ++x)
129 const auto ta1 = *(input1_ptr + x);
130 const auto ta2 = *(input2_ptr + x);
131 *(output_ptr + x) = ta1 * ta2 *
scale;
134 input1, input2,
dst);
constexpr int start() const
Return the start of the dimension.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
constexpr int step() const
Return the step of the dimension.
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Interface for CPU tensor.
uint8x16_t vloadq(const uint8_t *ptr)
Includes all wrapper headers at once.
Create the appropriate SIMD vector given its type and size in terms of elements.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
uint8x8_t vmul(const uint8x8_t &a, const uint8x8_t &b)
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&...iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
Iterator updated by execute_window_loop for each window element.
T x() const
Alias to access the size of the first dimension.
Describe one of the image's dimensions with a start, end and step.
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
void vstore(uint8_t *ptr, uint8x8_t val)
Describe a multidimensional execution window.
Copyright (c) 2017-2024 Arm Limited.
Window broadcast_if_dimension_le_one(const TensorShape &shape) const
Don't advance in the dimension where shape is less equal to 1.
constexpr int end() const
Return the end of the dimension.
void mul_F32_F32_F32(const ITensor *src1, const ITensor *src2, ITensor *out, const Window &window, float scale)
constexpr const Dimension & x() const
Alias to access the first dimension of the window.
uint8x8_t vdup_n(uint8_t value, traits::vector_64_tag)