49 : _func(nullptr), _planes{ {
nullptr } }, _output(
nullptr), _output_multi(
nullptr), _x_subsampling{ { 1, 1, 1 } }, _y_subsampling{ { 1, 1, 1 } }, _num_elems_processed_per_iteration(8),
50 _is_parallelizable(
true)
100 _output_multi =
nullptr;
105 _x_subsampling[1] = 2;
106 _x_subsampling[2] = 2;
109 _num_elems_processed_per_iteration = 8;
110 _is_parallelizable =
true;
113 switch(output_format)
116 _func = &NEChannelCombineKernel::combine_3C;
119 _func = &NEChannelCombineKernel::combine_4C;
122 _num_elems_processed_per_iteration = 16;
123 _func = &NEChannelCombineKernel::combine_YUV_1p<true>;
126 _num_elems_processed_per_iteration = 16;
127 _func = &NEChannelCombineKernel::combine_YUV_1p<false>;
137 AccessWindowHorizontal plane0_access(plane0->
info(), 0, _num_elems_processed_per_iteration / _x_subsampling[1], 1.f / _x_subsampling[0]);
138 AccessWindowHorizontal plane1_access(plane1->
info(), 0, _num_elems_processed_per_iteration / _x_subsampling[1], 1.f / _x_subsampling[1]);
139 AccessWindowHorizontal plane2_access(plane2->
info(), 0, _num_elems_processed_per_iteration / _x_subsampling[1], 1.f / _x_subsampling[2]);
154 if(plane3 !=
nullptr)
161 INEKernel::configure(win);
207 _planes[3] =
nullptr;
209 _output_multi = output;
211 bool has_two_planes =
false;
212 unsigned int num_elems_written_plane1 = 8;
214 _num_elems_processed_per_iteration = 8;
215 _is_parallelizable =
true;
217 switch(output_format)
221 _x_subsampling = { { 1, 2, 2 } };
222 _y_subsampling = { { 1, 2, 2 } };
223 _func = &NEChannelCombineKernel::combine_YUV_2p;
224 has_two_planes =
true;
225 num_elems_written_plane1 = 16;
228 _is_parallelizable =
false;
229 _x_subsampling = { { 1, 2, 2 } };
230 _y_subsampling = { { 1, 2, 2 } };
231 _func = &NEChannelCombineKernel::combine_YUV_3p;
234 _is_parallelizable =
false;
235 _x_subsampling = { { 1, 1, 1 } };
236 _y_subsampling = { { 1, 1, 1 } };
237 _func = &NEChannelCombineKernel::combine_YUV_3p;
244 const unsigned int y_step = *std::max_element(_y_subsampling.begin(), _y_subsampling.end());
248 AccessWindowRectangle output_plane1_access(output->
plane(1)->
info(), 0, 0, num_elems_written_plane1, 1, 1.f / _x_subsampling[1], 1.f / _y_subsampling[1]);
249 AccessWindowRectangle output_plane2_access(has_two_planes ?
nullptr : output->
plane(2)->
info(), 0, 0, _num_elems_processed_per_iteration, 1, 1.f / _x_subsampling[2], 1.f / _y_subsampling[2]);
253 AccessWindowRectangle(plane1->
info(), 0, 0, _num_elems_processed_per_iteration, 1, 1.f / _x_subsampling[1], 1.f / _y_subsampling[1]),
254 AccessWindowRectangle(plane2->
info(), 0, 0, _num_elems_processed_per_iteration, 1, 1.f / _x_subsampling[2], 1.f / _y_subsampling[2]),
255 output_plane0_access,
256 output_plane1_access,
257 output_plane2_access);
266 INEKernel::configure(win);
271 return _is_parallelizable;
281 (this->*_func)(window);
284 void NEChannelCombineKernel::combine_3C(
const Window &win)
293 const auto p0_ptr =
static_cast<uint8_t *
>(p0.
ptr());
294 const auto p1_ptr =
static_cast<uint8_t *
>(p1.
ptr());
295 const auto p2_ptr =
static_cast<uint8_t *
>(p2.
ptr());
296 const auto out_ptr =
static_cast<uint8_t *
>(out.
ptr());
298 const uint8x8x3_t pixels =
307 vst3_u8(out_ptr, pixels);
312 void NEChannelCombineKernel::combine_4C(
const Window &win)
322 const auto p0_ptr =
static_cast<uint8_t *
>(p0.
ptr());
323 const auto p1_ptr =
static_cast<uint8_t *
>(p1.
ptr());
324 const auto p2_ptr =
static_cast<uint8_t *
>(p2.
ptr());
325 const auto p3_ptr =
static_cast<uint8_t *
>(p3.
ptr());
326 const auto out_ptr =
static_cast<uint8_t *
>(out.
ptr());
328 const uint8x8x4_t pixels =
338 vst4_u8(out_ptr, pixels);
340 p0, p1, p2, p3, out);
343 template <
bool is_uyvy>
344 void NEChannelCombineKernel::combine_YUV_1p(
const Window &win)
356 constexpr
auto shift = is_uyvy ? 1 : 0;
360 const auto p0_ptr =
static_cast<uint8_t *
>(p0.
ptr());
361 const auto p1_ptr =
static_cast<uint8_t *
>(p1.
ptr());
362 const auto p2_ptr =
static_cast<uint8_t *
>(p2.
ptr());
363 const auto out_ptr =
static_cast<uint8_t *
>(out.
ptr());
365 const uint8x8x2_t pixels_y = vld2_u8(p0_ptr);
366 const uint8x8x2_t pixels_uv =
374 uint8x8x4_t pixels{ {} };
375 pixels.val[0 + shift] = pixels_y.val[0];
376 pixels.val[1 - shift] = pixels_uv.val[0];
377 pixels.val[2 + shift] = pixels_y.val[1];
378 pixels.val[3 - shift] = pixels_uv.val[1];
380 vst4_u8(out_ptr, pixels);
385 void NEChannelCombineKernel::combine_YUV_2p(
const Window &win)
407 Iterator p1(_planes[1 + shift], uv_win);
408 Iterator p2(_planes[2 - shift], uv_win);
416 const uint8x8x2_t pixels =
424 vst2_u8(out.ptr(), pixels);
429 void NEChannelCombineKernel::combine_YUV_3p(
const Window &win)
443 tmp_win.set(
Window::DimX,
Window::Dimension(tmp_win.x().start() / _x_subsampling[plane_id], tmp_win.x().end() / _x_subsampling[plane_id], tmp_win.x().step() / _x_subsampling[plane_id]));
444 tmp_win.set(
Window::DimY,
Window::Dimension(tmp_win.y().start() / _y_subsampling[plane_id], tmp_win.y().end() / _y_subsampling[plane_id], 1));
446 Iterator in(_planes[plane_id], tmp_win);
451 const uint8x8_t pixels = vld1_u8(in.
ptr());
453 vst1_u8(out.ptr(), pixels);
A single plane of 32-bit macro pixel of U0, Y0, V0, Y1 byte.
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const Window & window() const
The maximum window the kernel can be executed on.
bool is_parallelisable() const override
Indicates whether or not the kernel is parallelisable.
#define ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(t)
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
constexpr int step() const
Return the step of the dimension.
1 channel, 1 U8 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
A 2 plane YUV format of Luma (Y) and interleaved UV data at 4:2:0 sampling.
Describe one of the image's dimensions with a start, end and step.
void validate() const
Will validate all the window's dimensions' values when asserts are enabled.
#define ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(t,...)
NEChannelCombineKernel()
Default constructor.
const ValidRegion valid_region
Interface for Neon tensor.
A 2 plane YUV format of Luma (Y) and interleaved VU data at 4:2:0 sampling.
Copyright (c) 2017-2021 Arm Limited.
virtual ValidRegion valid_region() const =0
Valid region of the tensor.
3 channels, 1 U8 per channel
#define ARM_COMPUTE_ERROR_ON_TENSORS_NOT_EVEN(...)
Implementation of a rectangular access pattern.
virtual Format format() const =0
Colour format of the image.
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
bool update_window_and_padding(Window &win, Ts &&... patterns)
Update window and padding size for each of the access patterns.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Interface for multi-planar images.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
Format
Image colour formats.
Class to describe a number of elements in each dimension.
Implementation of a row access pattern.
void configure(const ITensor *plane0, const ITensor *plane1, const ITensor *plane2, const ITensor *plane3, ITensor *output)
Configure function's inputs and outputs.
A 3 plane of 8 bit 4:4:4 sampled Y, U, V planes.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
ValidRegion intersect_valid_regions(const Ts &... regions)
Intersect multiple valid regions.
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
virtual const MultiImageInfo * info() const =0
Interface to be implemented by the child class to return the multi-planar image's metadata...
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
A 3 plane of 8-bit 4:2:0 sampled Y, U, V planes.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
4 channels, 1 U8 per channel
void set_dimension_step(size_t dimension, int step)
Set the step of a given dimension.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
Information about executing thread and CPU.
constexpr const Dimension & y() const
Alias to access the second dimension of the window.
virtual IImage * plane(unsigned int index)=0
Return a pointer to the requested plane of the image.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
Container for valid region of a window.
constexpr int end() const
Return the end of the dimension.
A single plane of 32-bit macro pixel of Y0, U0, Y1, V0 bytes.
Iterator updated by execute_window_loop for each window element.
Format format() const
Colour format of the image.
void run(const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
constexpr int start() const
Return the start of the dimension.
Describe a multidimensional execution window.
Coordinates anchor
Anchor for the start of the valid region.
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
#define ARM_COMPUTE_ERROR_ON_TENSORS_NOT_SUBSAMPLED(...)
constexpr const Dimension & x() const
Alias to access the first dimension of the window.