Compute Library
 23.08
NEColorConvertHelper.inl
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "arm_compute/core/Error.h"
26 #include "arm_compute/core/IMultiImage.h"
27 #include "arm_compute/core/Utils.h"
28 #include "src/core/NEON/NEMath.h"
29 
30 #include <arm_neon.h>
31 
32 namespace
33 {
34 #ifndef DOXYGEN_SKIP_THIS
35 constexpr float red_coef_bt709 = 1.5748F;
36 constexpr float green_coef_bt709 = -0.1873f;
37 constexpr float green_coef2_bt709 = -0.4681f;
38 constexpr float blue_coef_bt709 = 1.8556f;
39 
40 constexpr float rgb2yuv_bt709_kr = 0.2126f;
41 constexpr float rgb2yuv_bt709_kb = 0.0722f;
42 // K_g = 1 - K_r - K_b
43 constexpr float rgb2yuv_bt709_kg = 0.7152f;
44 // C_u = 1 / (2 * (1 - K_b))
45 constexpr float rgb2yuv_bt709_cu = 0.5389f;
46 // C_v = 1 / (2 * (1 - K_r))
47 constexpr float rgb2yuv_bt709_cv = 0.6350f;
48 
49 constexpr float rgb2u8_red_coef = 0.2126f;
50 constexpr float rgb2u8_green_coef = 0.7152f;
51 constexpr float rgb2u8_blue_coef = 0.0722f;
52 
53 inline float32x4_t rgb_to_greyscale_calculation(const float32x4_t &rcolor, const float32x4_t &gcolor, const float32x4_t &bcolor,
54  const float rcoef, const float gcoef, const float bcoef)
55 {
56  float32x4_t greyscale = vmulq_n_f32(rcolor, rcoef);
57  greyscale = vmlaq_n_f32(greyscale, gcolor, gcoef);
58  greyscale = vmlaq_n_f32(greyscale, bcolor, bcoef);
59  return greyscale;
60 }
61 
62 inline void rgb_to_u8_conversion(const uint8x16x3_t &in, uint8x16_t &out)
63 {
64  float32x4x4_t out_float32;
65 
66  //Conversion from 3(RGB) 4 uint8s to 3(RGB) 4 floats
67  const float32x4x4_t r_float32 = arm_compute::convert_uint8x16_to_float32x4x4(in.val[0]);
68  const float32x4x4_t g_float32 = arm_compute::convert_uint8x16_to_float32x4x4(in.val[1]);
69  const float32x4x4_t b_float32 = arm_compute::convert_uint8x16_to_float32x4x4(in.val[2]);
70 
71  //New grayscale image = ( (RED_COEFF * R) + (GREEN_COEFF * G) + (BLUE_COEFF * B) )
72  //Computation of 1(Greyscale) 4 uint8 using 3(RGB) 4 uint8s float
73  out_float32.val[0] = rgb_to_greyscale_calculation(r_float32.val[0], g_float32.val[0], b_float32.val[0],
75 
76  out_float32.val[1] = rgb_to_greyscale_calculation(r_float32.val[1], g_float32.val[1], b_float32.val[1],
78 
79  out_float32.val[2] = rgb_to_greyscale_calculation(r_float32.val[2], g_float32.val[2], b_float32.val[2],
81 
82  out_float32.val[3] = rgb_to_greyscale_calculation(r_float32.val[3], g_float32.val[3], b_float32.val[3],
84 
85  //Conversion from 1(Greyscale) 4 floats to 1(Greyscale) 4 uint8s
87 }
88 
89 inline void rgb_to_yuv_calculation(const float32x4_t &rvec, const float32x4_t &gvec, const float32x4_t &bvec,
90  float32x4_t &yvec, float32x4_t &uvec, float32x4_t &vvec)
91 {
92  /*
93  Y'= 0.2126*R' + 0.7152*G' + 0.0722*B'
94  U'=-0.1146*R' - 0.3854*G' + 0.5000*B'
95  V'= 0.5000*R' - 0.4542*G' - 0.0458*B'
96  */
97  const auto c128 = vdupq_n_f32(128.f);
98 
99  // Y = R * K_r + G * (1 - K_r - K_b) * B * K_b
100  yvec = vmulq_n_f32(rvec, rgb2yuv_bt709_kr);
101  yvec = vmlaq_n_f32(yvec, gvec, rgb2yuv_bt709_kg);
102  yvec = vmlaq_n_f32(yvec, bvec, rgb2yuv_bt709_kb);
103 
104  // U = (B - Y) / (2 * (1 - K_b))
105  uvec = vsubq_f32(bvec, yvec);
106  uvec = vmlaq_n_f32(c128, uvec, rgb2yuv_bt709_cu);
107 
108  // V = (R - Y) / (2 * (1 - K_r))
109  vvec = vsubq_f32(rvec, yvec);
110  vvec = vmlaq_n_f32(c128, vvec, rgb2yuv_bt709_cv);
111 }
112 
113 inline void yuyv_to_rgb_calculation(const float32x4_t &yvec_val, float32x4_t uvec_val, const float32x4_t &yyvec_val,
114  float32x4_t vvec_val, unsigned char *output_ptr, const bool alpha)
115 {
116  float32x4x3_t rgb1, rgb2;
117 
118  // Compute: cb - 128 and cr - 128;
119  const auto c128 = vdupq_n_f32(128.f);
120  uvec_val = vsubq_f32(uvec_val, c128);
121  vvec_val = vsubq_f32(vvec_val, c128);
122 
123  // Compute:
124  // r = 0.0000f*f_u + 1.5748f*f_v;
125  // g = 0.1873f*f_u - 0.4681f*f_v;
126  // b = 1.8556f*f_u + 0.0000f*f_v;
127  const auto red = vmulq_n_f32(vvec_val, red_coef_bt709);
128  const auto blue = vmulq_n_f32(uvec_val, blue_coef_bt709);
129  const auto green = vaddq_f32(vmulq_n_f32(uvec_val, green_coef_bt709),
130  vmulq_n_f32(vvec_val, green_coef2_bt709));
131 
132  // Compute the final r,g,b values using y1 for the first texel and y2 for the second one.
133  // the result is stored in two float32x4x3_t which then are converted to one uint8x8x3_t
134  // and written back to memory using vst3 instruction
135 
136  rgb1.val[0] = vaddq_f32(yvec_val, red);
137  rgb1.val[1] = vaddq_f32(yvec_val, green);
138  rgb1.val[2] = vaddq_f32(yvec_val, blue);
139 
140  rgb2.val[0] = vaddq_f32(yyvec_val, red);
141  rgb2.val[1] = vaddq_f32(yyvec_val, green);
142  rgb2.val[2] = vaddq_f32(yyvec_val, blue);
143 
144  uint8x8x3_t u8_rgb;
146 
147  if(!alpha)
148  {
149  vst3_lane_u8(&output_ptr[0], u8_rgb, 0);
150  vst3_lane_u8(&output_ptr[3], u8_rgb, 4);
151  vst3_lane_u8(&output_ptr[6], u8_rgb, 1);
152  vst3_lane_u8(&output_ptr[9], u8_rgb, 5);
153  vst3_lane_u8(&output_ptr[12], u8_rgb, 2);
154  vst3_lane_u8(&output_ptr[15], u8_rgb, 6);
155  vst3_lane_u8(&output_ptr[18], u8_rgb, 3);
156  vst3_lane_u8(&output_ptr[21], u8_rgb, 7);
157  }
158  else
159  {
160  uint8x8x4_t u8_rgba;
161  u8_rgba.val[0] = u8_rgb.val[0];
162  u8_rgba.val[1] = u8_rgb.val[1];
163  u8_rgba.val[2] = u8_rgb.val[2];
164  u8_rgba.val[3] = vdup_n_u8(255);
165  vst4_lane_u8(&output_ptr[0], u8_rgba, 0);
166  vst4_lane_u8(&output_ptr[4], u8_rgba, 4);
167  vst4_lane_u8(&output_ptr[8], u8_rgba, 1);
168  vst4_lane_u8(&output_ptr[12], u8_rgba, 5);
169  vst4_lane_u8(&output_ptr[16], u8_rgba, 2);
170  vst4_lane_u8(&output_ptr[20], u8_rgba, 6);
171  vst4_lane_u8(&output_ptr[24], u8_rgba, 3);
172  vst4_lane_u8(&output_ptr[28], u8_rgba, 7);
173  }
174 }
175 
176 inline uint8x16x3_t load_rgb(const unsigned char *const ptr, const bool alpha)
177 {
178  uint8x16x3_t rgb;
179 
180  if(alpha)
181  {
182  const auto tmp = vld4q_u8(ptr);
183  rgb.val[0] = tmp.val[0];
184  rgb.val[1] = tmp.val[1];
185  rgb.val[2] = tmp.val[2];
186  }
187  else
188  {
189  rgb = vld3q_u8(ptr);
190  }
191 
192  return rgb;
193 }
194 
195 inline void rgb_to_yuv_conversion(uint8x16x3_t &vec_top, uint8x16x3_t &vec_bottom)
196 {
197  // Convert the uint8x16_t to float32x4x4_t
198  const float32x4x4_t frvec_top = arm_compute::convert_uint8x16_to_float32x4x4(vec_top.val[0]);
199  const float32x4x4_t fgvec_top = arm_compute::convert_uint8x16_to_float32x4x4(vec_top.val[1]);
200  const float32x4x4_t fbvec_top = arm_compute::convert_uint8x16_to_float32x4x4(vec_top.val[2]);
201 
202  const float32x4x4_t frvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(vec_bottom.val[0]);
203  const float32x4x4_t fgvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(vec_bottom.val[1]);
204  const float32x4x4_t fbvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(vec_bottom.val[2]);
205 
206  float32x4x4_t fyvec_top, fuvec_top, fvvec_top;
207  float32x4x4_t fyvec_bottom, fuvec_bottom, fvvec_bottom;
208 
209  for(auto i = 0; i < 4; ++i)
210  {
211  rgb_to_yuv_calculation(frvec_top.val[i], fgvec_top.val[i], fbvec_top.val[i],
212  fyvec_top.val[i], fuvec_top.val[i], fvvec_top.val[i]);
213  rgb_to_yuv_calculation(frvec_bottom.val[i], fgvec_bottom.val[i], fbvec_bottom.val[i],
214  fyvec_bottom.val[i], fuvec_bottom.val[i], fvvec_bottom.val[i]);
215  }
216 
217  arm_compute::convert_float32x4x4_to_uint8x16(fyvec_top, vec_top.val[0]);
218  arm_compute::convert_float32x4x4_to_uint8x16(fuvec_top, vec_top.val[1]);
219  arm_compute::convert_float32x4x4_to_uint8x16(fvvec_top, vec_top.val[2]);
220  arm_compute::convert_float32x4x4_to_uint8x16(fyvec_bottom, vec_bottom.val[0]);
221  arm_compute::convert_float32x4x4_to_uint8x16(fuvec_bottom, vec_bottom.val[1]);
222  arm_compute::convert_float32x4x4_to_uint8x16(fvvec_bottom, vec_bottom.val[2]);
223 }
224 
225 inline void store_rgb_to_nv12(const uint8x16_t &rvec_top, const uint8x16_t &gvec_top, const uint8x16_t &bvec_top,
226  const uint8x16_t &rvec_bottom, const uint8x16_t &gvec_bottom, const uint8x16_t &bvec_bottom,
227  unsigned char *const __restrict out_y_top, unsigned char *const __restrict out_y_bottom,
228  unsigned char *const __restrict out_uv)
229 {
230  uint8x16x3_t vec_top, vec_bottom;
231  vec_top.val[0] = rvec_top;
232  vec_top.val[1] = gvec_top;
233  vec_top.val[2] = bvec_top;
234  vec_bottom.val[0] = rvec_bottom;
235  vec_bottom.val[1] = gvec_bottom;
236  vec_bottom.val[2] = bvec_bottom;
237 
238  rgb_to_yuv_conversion(vec_top, vec_bottom);
239 
240  vst1q_u8(out_y_top, vec_top.val[0]);
241  vst1q_u8(out_y_bottom, vec_bottom.val[0]);
242 
243  const auto uvec = vuzpq_u8(vec_top.val[1], vec_bottom.val[1]);
244  const auto vvec = vuzpq_u8(vec_top.val[2], vec_bottom.val[2]);
245  const auto utmp = vrhaddq_u8(uvec.val[0], uvec.val[1]);
246  const auto vtmp = vrhaddq_u8(vvec.val[0], vvec.val[1]);
247 
248  uint8x8x2_t uvvec;
249  uvvec.val[0] = vhadd_u8(vget_low_u8(utmp), vget_high_u8(utmp));
250  uvvec.val[1] = vhadd_u8(vget_low_u8(vtmp), vget_high_u8(vtmp));
251 
252  vst2_u8(out_uv, uvvec);
253 }
254 
255 inline void store_rgb_to_iyuv(const uint8x16_t &rvec_top, const uint8x16_t &gvec_top, const uint8x16_t &bvec_top,
256  const uint8x16_t &rvec_bottom, const uint8x16_t &gvec_bottom, const uint8x16_t &bvec_bottom,
257  unsigned char *const __restrict out_y_top, unsigned char *const __restrict out_y_bottom,
258  unsigned char *const __restrict out_u,
259  unsigned char *const __restrict out_v)
260 {
261  uint8x16x3_t vec_top, vec_bottom;
262  vec_top.val[0] = rvec_top;
263  vec_top.val[1] = gvec_top;
264  vec_top.val[2] = bvec_top;
265  vec_bottom.val[0] = rvec_bottom;
266  vec_bottom.val[1] = gvec_bottom;
267  vec_bottom.val[2] = bvec_bottom;
268 
269  rgb_to_yuv_conversion(vec_top, vec_bottom);
270 
271  vst1q_u8(out_y_top, vec_top.val[0]);
272  vst1q_u8(out_y_bottom, vec_bottom.val[0]);
273 
274  const auto uvvec_top = vuzpq_u8(vec_top.val[1], vec_top.val[2]);
275  const auto uvvec_bottom = vuzpq_u8(vec_bottom.val[1], vec_bottom.val[2]);
276  const auto uvvec = vhaddq_u8(vrhaddq_u8(uvvec_top.val[0], uvvec_top.val[1]),
277  vrhaddq_u8(uvvec_bottom.val[0], uvvec_bottom.val[1]));
278 
279  vst1_u8(out_u, vget_low_u8(uvvec));
280  vst1_u8(out_v, vget_high_u8(uvvec));
281 }
282 
283 inline void store_rgb_to_yuv4(const uint8x16_t &rvec, const uint8x16_t &gvec, const uint8x16_t &bvec,
284  unsigned char *const __restrict out_y,
285  unsigned char *const __restrict out_u,
286  unsigned char *const __restrict out_v)
287 {
288  // Convert the uint8x16_t to float32x4x4_t
289  const float32x4x4_t frvec = arm_compute::convert_uint8x16_to_float32x4x4(rvec);
290  const float32x4x4_t fgvec = arm_compute::convert_uint8x16_to_float32x4x4(gvec);
291  const float32x4x4_t fbvec = arm_compute::convert_uint8x16_to_float32x4x4(bvec);
292 
293  float32x4x4_t fyvec, fuvec, fvvec;
294  for(auto i = 0; i < 4; ++i)
295  {
296  rgb_to_yuv_calculation(frvec.val[i], fgvec.val[i], fbvec.val[i],
297  fyvec.val[i], fuvec.val[i], fvvec.val[i]);
298  }
299 
300  uint8x16_t yvec, uvec, vvec;
304 
305  vst1q_u8(out_y, yvec);
306  vst1q_u8(out_u, uvec);
307  vst1q_u8(out_v, vvec);
308 }
309 #endif /* DOXYGEN_SKIP_THIS */
310 }
311 
312 namespace arm_compute
313 {
314 /** Convert RGB to RGBX.
315  *
316  * @param[in] input Input RGB data buffer.
317  * @param[out] output Output RGBX buffer.
318  * @param[in] win Window for iterating the buffers.
319  *
320  */
321 void colorconvert_rgb_to_rgbx(const void *__restrict input, void *__restrict output, const Window &win)
322 {
323  ARM_COMPUTE_ERROR_ON(nullptr == input);
324  ARM_COMPUTE_ERROR_ON(nullptr == output);
325 
326  const auto input_ptr = static_cast<const IImage *__restrict>(input);
327  const auto output_ptr = static_cast<IImage *__restrict>(output);
328 
329  Iterator in(input_ptr, win);
330  Iterator out(output_ptr, win);
331 
332  execute_window_loop(win, [&](const Coordinates &)
333  {
334  const auto ta1 = vld3q_u8(in.ptr());
335  uint8x16x4_t ta2;
336  ta2.val[0] = ta1.val[0];
337  ta2.val[1] = ta1.val[1];
338  ta2.val[2] = ta1.val[2];
339  ta2.val[3] = vdupq_n_u8(255);
340  vst4q_u8(out.ptr(), ta2);
341  },
342  in, out);
343 }
344 
345 /** Convert RGB to U8.
346  *
347  * @param[in] input Input RGB data buffer.
348  * @param[out] output Output U8 buffer.
349  * @param[in] win Window for iterating the buffers.
350  *
351  */
352 void colorconvert_rgb_to_u8(const void *__restrict input, void *__restrict output, const Window &win)
353 {
354  ARM_COMPUTE_ERROR_ON(nullptr == input);
355  ARM_COMPUTE_ERROR_ON(nullptr == output);
356 
357  const auto input_ptr = static_cast<const IImage *__restrict>(input);
358  const auto output_ptr = static_cast<IImage *__restrict>(output);
359 
360  Iterator in(input_ptr, win);
361  Iterator out(output_ptr, win);
362 
363  execute_window_loop(win, [&](const Coordinates &)
364  {
365  const auto ta1 = vld3q_u8(in.ptr());
366  uint8x16_t ta2;
367  rgb_to_u8_conversion(ta1, ta2);
368  vst1q_u8(out.ptr(), ta2);
369  },
370  in, out);
371 }
372 
373 /** Convert RGBX to RGB.
374  *
375  * @param[in] input Input RGBX data buffer.
376  * @param[out] output Output RGB buffer.
377  * @param[in] win Window for iterating the buffers.
378  *
379  */
380 void colorconvert_rgbx_to_rgb(const void *input, void *output, const Window &win)
381 {
382  ARM_COMPUTE_ERROR_ON(nullptr == input);
383  ARM_COMPUTE_ERROR_ON(nullptr == output);
384 
385  const auto input_ptr = static_cast<const IImage *__restrict>(input);
386  const auto output_ptr = static_cast<IImage *__restrict>(output);
387 
388  Iterator in(input_ptr, win);
389  Iterator out(output_ptr, win);
390 
391  execute_window_loop(win, [&](const Coordinates &)
392  {
393  const auto ta1 = vld4q_u8(in.ptr());
394  uint8x16x3_t ta2;
395  ta2.val[0] = ta1.val[0];
396  ta2.val[1] = ta1.val[1];
397  ta2.val[2] = ta1.val[2];
398  vst3q_u8(out.ptr(), ta2);
399  },
400  in, out);
401 }
402 
403 /** Convert YUYV to RGB.
404  *
405  * @param[in] input Input YUYV data buffer.
406  * @param[out] output Output RGB buffer.
407  * @param[in] win Window for iterating the buffers.
408  *
409  */
410 template <bool yuyv, bool alpha>
411 void colorconvert_yuyv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win)
412 {
413  ARM_COMPUTE_ERROR_ON(nullptr == input);
414  ARM_COMPUTE_ERROR_ON(nullptr == output);
415 
416  const auto input_ptr = static_cast<const IImage *__restrict>(input);
417  const auto output_ptr = static_cast<IImage *__restrict>(output);
418 
419  constexpr auto element_size = alpha ? 32 : 24;
420  constexpr auto shift = yuyv ? 0 : 1;
421 
422  Iterator in(input_ptr, win);
423  Iterator out(output_ptr, win);
424 
425  execute_window_loop(win, [&](const Coordinates &)
426  {
427  const auto ta = vld4q_u8(in.ptr());
428  //ta.val[0] = Y0 Y2 Y4 Y6 ...
429  //ta.val[1] = U0 U2 U4 U6 ...
430  //ta.val[2] = Y1 Y3 Y5 Y7 ...
431  //ta.val[3] = V0 V2 V4 V7 ...
432 
433  // Convert the uint8x16x4_t to float32x4x4_t
434  const float32x4x4_t yvec = arm_compute::convert_uint8x16_to_float32x4x4(ta.val[0 + shift]);
435  const float32x4x4_t uvec = arm_compute::convert_uint8x16_to_float32x4x4(ta.val[1 - shift]);
436  const float32x4x4_t yyvec = arm_compute::convert_uint8x16_to_float32x4x4(ta.val[2 + shift]);
437  const float32x4x4_t vvec = arm_compute::convert_uint8x16_to_float32x4x4(ta.val[3 - shift]);
438 
439  yuyv_to_rgb_calculation(yvec.val[0], uvec.val[0], yyvec.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha);
440  yuyv_to_rgb_calculation(yvec.val[1], uvec.val[1], yyvec.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha);
441  yuyv_to_rgb_calculation(yvec.val[2], uvec.val[2], yyvec.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha);
442  yuyv_to_rgb_calculation(yvec.val[3], uvec.val[3], yyvec.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha);
443  },
444  in, out);
445 }
446 
447 /** Convert NV12 to RGB.
448  *
449  * @param[in] input Input NV12 data buffer.
450  * @param[out] output Output RGB buffer.
451  * @param[in] win Window for iterating the buffers.
452  *
453  */
454 template <bool uv, bool alpha>
455 void colorconvert_nv12_to_rgb(const void *__restrict input, void *__restrict output, const Window &win)
456 {
457  ARM_COMPUTE_ERROR_ON(nullptr == input);
458  ARM_COMPUTE_ERROR_ON(nullptr == output);
459  win.validate();
460 
461  const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
462  const auto output_ptr = static_cast<IImage *__restrict>(output);
463 
464  constexpr auto element_size = alpha ? 32 : 24;
465  const auto out_stride = output_ptr->info()->strides_in_bytes().y();
466  constexpr auto shift = uv ? 0 : 1;
467 
468  // UV's width and height are subsampled
469  Window win_uv(win);
470  win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win.x().step() / 2));
471  win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
472  win_uv.validate();
473 
474  Iterator in_y(input_ptr->plane(0), win);
475  Iterator in_uv(input_ptr->plane(1), win_uv);
476  Iterator out(output_ptr, win);
477 
478  execute_window_loop(win, [&](const Coordinates &)
479  {
480  const auto ta_y_top = vld2q_u8(in_y.ptr());
481  const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
482  const auto ta_uv = vld2q_u8(in_uv.ptr());
483  //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
484  //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
485  //ta_uv.val[0] = U0 U2 U4 U6 ...
486  //ta_uv.val[1] = V0 V2 V4 V6 ...
487 
488  // Convert the uint8x16x4_t to float32x4x4_t
489  float32x4x4_t yvec_top = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_top.val[0]);
490  float32x4x4_t yyvec_top = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_top.val[1]);
491  float32x4x4_t yvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0]);
492  float32x4x4_t yyvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1]);
493  float32x4x4_t uvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_uv.val[0 + shift]);
494  float32x4x4_t vvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_uv.val[1 - shift]);
495 
496  yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha);
497  yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha);
498  yuyv_to_rgb_calculation(yvec_top.val[2], uvec.val[2], yyvec_top.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha);
499  yuyv_to_rgb_calculation(yvec_top.val[3], uvec.val[3], yyvec_top.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha);
500 
501  yuyv_to_rgb_calculation(yvec_bottom.val[0], uvec.val[0], yyvec_bottom.val[0], vvec.val[0], out.ptr() + out_stride + 0 * element_size, alpha);
502  yuyv_to_rgb_calculation(yvec_bottom.val[1], uvec.val[1], yyvec_bottom.val[1], vvec.val[1], out.ptr() + out_stride + 1 * element_size, alpha);
503  yuyv_to_rgb_calculation(yvec_bottom.val[2], uvec.val[2], yyvec_bottom.val[2], vvec.val[2], out.ptr() + out_stride + 2 * element_size, alpha);
504  yuyv_to_rgb_calculation(yvec_bottom.val[3], uvec.val[3], yyvec_bottom.val[3], vvec.val[3], out.ptr() + out_stride + 3 * element_size, alpha);
505  },
506  in_y, in_uv, out);
507 }
508 
509 /** Convert IYUV to RGB.
510  *
511  * @param[in] input Input IYUV data buffer.
512  * @param[out] output Output RGB buffer.
513  * @param[in] win Window for iterating the buffers.
514  *
515  */
516 template <bool alpha>
517 void colorconvert_iyuv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win)
518 {
519  ARM_COMPUTE_ERROR_ON(nullptr == input);
520  ARM_COMPUTE_ERROR_ON(nullptr == output);
521  win.validate();
522 
523  const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
524  const auto output_ptr = static_cast<IImage *__restrict>(output);
525 
526  constexpr auto element_size = alpha ? 32 : 24;
527  const auto out_stride = output_ptr->info()->strides_in_bytes().y();
528 
529  // UV's width and height are subsampled
530  Window win_uv(win);
531  win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
532  win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
533  win_uv.validate();
534 
535  Iterator in_y(input_ptr->plane(0), win);
536  Iterator in_u(input_ptr->plane(1), win_uv);
537  Iterator in_v(input_ptr->plane(2), win_uv);
538  Iterator out(output_ptr, win);
539 
540  execute_window_loop(win, [&](const Coordinates &)
541  {
542  const auto *y_top_ptr = in_y.ptr();
543  const auto *y_bottom_ptr = in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y();
544  const auto *u_ptr = in_u.ptr();
545  const auto *v_ptr = in_v.ptr();
546 
547  // Work-around issue in gcc 9(>=) where vld2q might cause issues with register allocation
548 #if defined(__arch64__)
549  const auto ta0_y_top = vld1q_u8(y_top_ptr);
550  const auto ta1_y_top = vld1q_u8(y_top_ptr + 16);
551  const auto ta0_y_bottom = vld1q_u8(y_bottom_ptr);
552  const auto ta1_y_bottom = vld1q_u8(y_bottom_ptr + 16);
553  const auto ta_u = vld1q_u8(u_ptr);
554  const auto ta_v = vld1q_u8(v_ptr);
555 
556  // Convert the uint8x16x4_t to float32x4x4_t
557  float32x4x4_t yvec_top = arm_compute::convert_uint8x16_to_float32x4x4(vuzp1q_u8(ta0_y_top, ta1_y_top));
558  float32x4x4_t yyvec_top = arm_compute::convert_uint8x16_to_float32x4x4(vuzp2q_u8(ta0_y_top, ta1_y_top));
559  float32x4x4_t yvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(vuzp1q_u8(ta0_y_bottom, ta1_y_bottom));
560  float32x4x4_t yyvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(vuzp2q_u8(ta0_y_bottom, ta1_y_bottom));
561  float32x4x4_t uvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_u);
562  float32x4x4_t vvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_v);
563 #else /* defined(__arch64__) */
564  const auto ta_y_top = vld2q_u8(y_top_ptr);
565  const auto ta_y_bottom = vld2q_u8(y_bottom_ptr);
566  const auto ta_u = vld1q_u8(u_ptr);
567  const auto ta_v = vld1q_u8(v_ptr);
568  //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
569  //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
570  //ta_u.val[0] = U0 U2 U4 U6 ...
571  //ta_v.val[0] = V0 V2 V4 V6 ...
572 
573  // Convert the uint8x16x4_t to float32x4x4_t
574  float32x4x4_t yvec_top = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_top.val[0]);
575  float32x4x4_t yyvec_top = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_top.val[1]);
576  float32x4x4_t yvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0]);
577  float32x4x4_t yyvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1]);
578  float32x4x4_t uvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_u);
579  float32x4x4_t vvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_v);
580 #endif /* defined(__arch64__) */
581 
582  yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha);
583  yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha);
584  yuyv_to_rgb_calculation(yvec_top.val[2], uvec.val[2], yyvec_top.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha);
585  yuyv_to_rgb_calculation(yvec_top.val[3], uvec.val[3], yyvec_top.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha);
586 
587  yuyv_to_rgb_calculation(yvec_bottom.val[0], uvec.val[0], yyvec_bottom.val[0], vvec.val[0], out.ptr() + out_stride + 0 * element_size, alpha);
588  yuyv_to_rgb_calculation(yvec_bottom.val[1], uvec.val[1], yyvec_bottom.val[1], vvec.val[1], out.ptr() + out_stride + 1 * element_size, alpha);
589  yuyv_to_rgb_calculation(yvec_bottom.val[2], uvec.val[2], yyvec_bottom.val[2], vvec.val[2], out.ptr() + out_stride + 2 * element_size, alpha);
590  yuyv_to_rgb_calculation(yvec_bottom.val[3], uvec.val[3], yyvec_bottom.val[3], vvec.val[3], out.ptr() + out_stride + 3 * element_size, alpha);
591  },
592  in_y, in_u, in_v, out);
593 }
594 
595 /** Convert YUYV to NV12.
596  *
597  * @param[in] input Input YUYV data buffer.
598  * @param[out] output Output NV12 buffer.
599  * @param[in] win Window for iterating the buffers.
600  *
601  */
602 template <bool yuyv>
603 void colorconvert_yuyv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win)
604 {
605  ARM_COMPUTE_ERROR_ON(nullptr == input);
606  ARM_COMPUTE_ERROR_ON(nullptr == output);
607  win.validate();
608 
609  const auto input_ptr = static_cast<const IImage *__restrict>(input);
610  const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
611 
612  constexpr auto shift = yuyv ? 0 : 1;
613 
614  // NV12's UV's width and height are subsampled
615  Window win_uv(win);
616  win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
617  win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
618  win_uv.validate();
619 
620  Iterator in(input_ptr, win);
621  Iterator out_y(output_ptr->plane(0), win);
622  Iterator out_uv(output_ptr->plane(1), win_uv);
623 
624  execute_window_loop(win, [&](const Coordinates &)
625  {
626  const auto ta_top = vld4q_u8(in.ptr());
627  const auto ta_bottom = vld4q_u8(in.ptr() + input_ptr->info()->strides_in_bytes().y());
628  //ta.val[0] = Y0 Y2 Y4 Y6 ...
629  //ta.val[1] = U0 U2 U4 U6 ...
630  //ta.val[2] = Y1 Y3 Y5 Y7 ...
631  //ta.val[3] = V0 V2 V4 V7 ...
632 
633  uint8x16x2_t yvec;
634  yvec.val[0] = ta_top.val[0 + shift];
635  yvec.val[1] = ta_top.val[2 + shift];
636  vst2q_u8(out_y.ptr(), yvec);
637 
638  uint8x16x2_t yyvec;
639  yyvec.val[0] = ta_bottom.val[0 + shift];
640  yyvec.val[1] = ta_bottom.val[2 + shift];
641  vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec);
642 
643  uint8x16x2_t uvvec;
644  uvvec.val[0] = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]);
645  uvvec.val[1] = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]);
646  vst2q_u8(out_uv.ptr(), uvvec);
647  },
648  in, out_y, out_uv);
649 }
650 
651 /** Convert IYUV to NV12.
652  *
653  * @param[in] input Input IYUV data buffer.
654  * @param[out] output Output NV12 buffer.
655  * @param[in] win Window for iterating the buffers.
656  *
657  */
658 void colorconvert_iyuv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win)
659 {
660  ARM_COMPUTE_ERROR_ON(nullptr == input);
661  ARM_COMPUTE_ERROR_ON(nullptr == output);
662  win.validate();
663 
664  const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
665  const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
666 
667  // UV's width and height are subsampled
668  Window win_uv(win);
669  win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
670  win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
671  win_uv.validate();
672 
673  Iterator in_y(input_ptr->plane(0), win);
674  Iterator in_u(input_ptr->plane(1), win_uv);
675  Iterator in_v(input_ptr->plane(2), win_uv);
676  Iterator out_y(output_ptr->plane(0), win);
677  Iterator out_uv(output_ptr->plane(1), win_uv);
678 
679  execute_window_loop(win, [&](const Coordinates &)
680  {
681  const auto ta_y_top = vld2q_u8(in_y.ptr());
682  const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
683  uint8x16x2_t ta_uv;
684  ta_uv.val[0] = vld1q_u8(in_u.ptr());
685  ta_uv.val[1] = vld1q_u8(in_v.ptr());
686  //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
687  //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
688  //ta_uv.val[0] = U0 U2 U4 U6 ...
689  //ta_uv.val[1] = V0 V2 V4 V6 ...
690 
691  vst2q_u8(out_y.ptr(), ta_y_top);
692  vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
693  vst2q_u8(out_uv.ptr(), ta_uv);
694  },
695  in_y, in_u, in_v, out_y, out_uv);
696 }
697 
698 /** Convert NV12 to IYUV.
699  *
700  * @param[in] input Input NV12 data buffer.
701  * @param[out] output Output IYUV buffer.
702  * @param[in] win Window for iterating the buffers.
703  *
704  */
705 template <bool uv>
706 void colorconvert_nv12_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win)
707 {
708  ARM_COMPUTE_ERROR_ON(nullptr == input);
709  ARM_COMPUTE_ERROR_ON(nullptr == output);
710  win.validate();
711 
712  const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
713  const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
714 
715  constexpr auto shift = uv ? 0 : 1;
716 
717  // UV's width and height are subsampled
718  Window win_uv(win);
719  win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
720  win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
721  win_uv.validate();
722 
723  Iterator in_y(input_ptr->plane(0), win);
724  Iterator in_uv(input_ptr->plane(1), win_uv);
725  Iterator out_y(output_ptr->plane(0), win);
726  Iterator out_u(output_ptr->plane(1), win_uv);
727  Iterator out_v(output_ptr->plane(2), win_uv);
728 
729  execute_window_loop(win, [&](const Coordinates &)
730  {
731  const auto ta_y_top = vld2q_u8(in_y.ptr());
732  const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
733  const auto ta_uv = vld2q_u8(in_uv.ptr());
734  //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
735  //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
736  //ta_uv.val[0] = U0 U2 U4 U6 ...
737  //ta_uv.val[1] = V0 V2 V4 V6 ...
738 
739  vst2q_u8(out_y.ptr(), ta_y_top);
740  vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
741  vst1q_u8(out_u.ptr(), ta_uv.val[0 + shift]);
742  vst1q_u8(out_v.ptr(), ta_uv.val[1 - shift]);
743  },
744  in_y, in_uv, out_y, out_u, out_v);
745 }
746 
747 /** Convert YUYV to IYUV.
748  *
749  * @param[in] input Input YUYV data buffer.
750  * @param[out] output Output IYUV buffer.
751  * @param[in] win Window for iterating the buffers.
752  *
753  */
754 template <bool yuyv>
755 void colorconvert_yuyv_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win)
756 {
757  ARM_COMPUTE_ERROR_ON(nullptr == input);
758  ARM_COMPUTE_ERROR_ON(nullptr == output);
759  win.validate();
760 
761  const auto input_ptr = static_cast<const IImage *__restrict>(input);
762  const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
763 
764  constexpr auto shift = yuyv ? 0 : 1;
765 
766  // Destination's UV's width and height are subsampled
767  Window win_uv(win);
768  win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
769  win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
770  win_uv.validate();
771 
772  Iterator in(input_ptr, win);
773  Iterator out_y(output_ptr->plane(0), win);
774  Iterator out_u(output_ptr->plane(1), win_uv);
775  Iterator out_v(output_ptr->plane(2), win_uv);
776 
777  execute_window_loop(win, [&](const Coordinates &)
778  {
779  const auto ta_top = vld4q_u8(in.ptr());
780  const auto ta_bottom = vld4q_u8(in.ptr() + input_ptr->info()->strides_in_bytes().y());
781  //ta.val[0] = Y0 Y2 Y4 Y6 ...
782  //ta.val[1] = U0 U2 U4 U6 ...
783  //ta.val[2] = Y1 Y3 Y5 Y7 ...
784  //ta.val[3] = V0 V2 V4 V7 ...
785 
786  uint8x16x2_t yvec;
787  yvec.val[0] = ta_top.val[0 + shift];
788  yvec.val[1] = ta_top.val[2 + shift];
789  vst2q_u8(out_y.ptr(), yvec);
790 
791  uint8x16x2_t yyvec;
792  yyvec.val[0] = ta_bottom.val[0 + shift];
793  yyvec.val[1] = ta_bottom.val[2 + shift];
794  vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec);
795 
796  uint8x16_t uvec;
797  uvec = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]);
798  vst1q_u8(out_u.ptr(), uvec);
799 
800  uint8x16_t vvec;
801  vvec = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]);
802  vst1q_u8(out_v.ptr(), vvec);
803  },
804  in, out_y, out_u, out_v);
805 }
806 
807 /** Convert NV12 to YUV4.
808  *
809  * @param[in] input Input NV12 data buffer.
810  * @param[out] output Output YUV4 buffer.
811  * @param[in] win Window for iterating the buffers.
812  *
813  */
814 template <bool uv>
815 void colorconvert_nv12_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win)
816 {
817  ARM_COMPUTE_ERROR_ON(nullptr == input);
818  ARM_COMPUTE_ERROR_ON(nullptr == output);
819  win.validate();
820 
821  const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
822  const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
823 
824  constexpr auto shift = uv ? 0 : 1;
825 
826  // UV's width and height are subsampled
827  Window win_uv(win);
828  win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
829  win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
830  win_uv.validate();
831 
832  Iterator in_y(input_ptr->plane(0), win);
833  Iterator in_uv(input_ptr->plane(1), win_uv);
834  Iterator out_y(output_ptr->plane(0), win);
835  Iterator out_u(output_ptr->plane(1), win);
836  Iterator out_v(output_ptr->plane(2), win);
837 
838  execute_window_loop(win, [&](const Coordinates &)
839  {
840  const auto ta_y_top = vld2q_u8(in_y.ptr());
841  const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
842  const auto ta_uv = vld2q_u8(in_uv.ptr());
843  //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
844  //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
845  //ta_uv.val[0] = U0 U2 U4 U6 ...
846  //ta_uv.val[1] = V0 V2 V4 V6 ...
847 
848  vst2q_u8(out_y.ptr(), ta_y_top);
849  vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
850 
851  uint8x16x2_t uvec;
852  uvec.val[0] = ta_uv.val[0 + shift];
853  uvec.val[1] = ta_uv.val[0 + shift];
854  vst2q_u8(out_u.ptr(), uvec);
855  vst2q_u8(out_u.ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec);
856 
857  uint8x16x2_t vvec;
858  vvec.val[0] = ta_uv.val[1 - shift];
859  vvec.val[1] = ta_uv.val[1 - shift];
860  vst2q_u8(out_v.ptr(), vvec);
861  vst2q_u8(out_v.ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec);
862  },
863  in_y, in_uv, out_y, out_u, out_v);
864 }
865 
866 /** Convert IYUV to YUV4.
867  *
868  * @param[in] input Input IYUV data buffer.
869  * @param[out] output Output YUV4 buffer.
870  * @param[in] win Window for iterating the buffers.
871  *
872  */
873 void colorconvert_iyuv_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win)
874 {
875  ARM_COMPUTE_ERROR_ON(nullptr == input);
876  ARM_COMPUTE_ERROR_ON(nullptr == output);
877  win.validate();
878 
879  const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
880  const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
881 
882  // UV's width and height are subsampled
883  Window win_uv(win);
884  win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
885  win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
886  win_uv.validate();
887 
888  Iterator in_y(input_ptr->plane(0), win);
889  Iterator in_u(input_ptr->plane(1), win_uv);
890  Iterator in_v(input_ptr->plane(2), win_uv);
891  Iterator out_y(output_ptr->plane(0), win);
892  Iterator out_u(output_ptr->plane(1), win);
893  Iterator out_v(output_ptr->plane(2), win);
894 
895  execute_window_loop(win, [&](const Coordinates &)
896  {
897  const auto ta_y_top = vld2q_u8(in_y.ptr());
898  const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
899  const auto ta_u = vld1q_u8(in_u.ptr());
900  const auto ta_v = vld1q_u8(in_v.ptr());
901  //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
902  //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
903  //ta_u = U0 U2 U4 U6 ...
904  //ta_v = V0 V2 V4 V6 ...
905 
906  vst2q_u8(out_y.ptr(), ta_y_top);
907  vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
908 
909  uint8x16x2_t uvec;
910  uvec.val[0] = ta_u;
911  uvec.val[1] = ta_u;
912  vst2q_u8(out_u.ptr(), uvec);
913  vst2q_u8(out_u.ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec);
914 
915  uint8x16x2_t vvec;
916  vvec.val[0] = ta_v;
917  vvec.val[1] = ta_v;
918  vst2q_u8(out_v.ptr(), vvec);
919  vst2q_u8(out_v.ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec);
920  },
921  in_y, in_u, in_v, out_y, out_u, out_v);
922 }
923 
924 /** Convert RGB to NV12.
925  *
926  * @param[in] input Input RGB data buffer.
927  * @param[out] output Output NV12 buffer.
928  * @param[in] win Window for iterating the buffers.
929  *
930  */
931 template <bool alpha>
932 void colorconvert_rgb_to_nv12(const void *__restrict input, void *__restrict output, const Window &win)
933 {
934  ARM_COMPUTE_ERROR_ON(nullptr == input);
935  ARM_COMPUTE_ERROR_ON(nullptr == output);
936  win.validate();
937 
938  const auto input_ptr = static_cast<const IImage *__restrict>(input);
939  const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
940 
941  // UV's width and height are subsampled
942  Window win_uv(win);
943  win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
944  win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
945  win_uv.validate();
946 
947  Iterator in(input_ptr, win);
948  Iterator out_y(output_ptr->plane(0), win);
949  Iterator out_uv(output_ptr->plane(1), win_uv);
950 
951  execute_window_loop(win, [&](const Coordinates &)
952  {
953  const auto ta_rgb_top = load_rgb(in.ptr(), alpha);
954  const auto ta_rgb_bottom = load_rgb(in.ptr() + input_ptr->info()->strides_in_bytes().y(), alpha);
955  //ta_rgb.val[0] = R0 R1 R2 R3 ...
956  //ta_rgb.val[1] = G0 G1 G2 G3 ...
957  //ta_rgb.val[2] = B0 B1 B2 B3 ...
958 
959  store_rgb_to_nv12(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2],
960  ta_rgb_bottom.val[0], ta_rgb_bottom.val[1], ta_rgb_bottom.val[2],
961  out_y.ptr(), out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(),
962  out_uv.ptr());
963  },
964  in, out_y, out_uv);
965 }
966 
967 /** Convert RGB to IYUV.
968  *
969  * @param[in] input Input RGB data buffer.
970  * @param[out] output Output IYUV buffer.
971  * @param[in] win Window for iterating the buffers.
972  *
973  */
974 template <bool alpha>
975 void colorconvert_rgb_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win)
976 {
977  ARM_COMPUTE_ERROR_ON(nullptr == input);
978  ARM_COMPUTE_ERROR_ON(nullptr == output);
979  win.validate();
980 
981  const auto input_ptr = static_cast<const IImage *__restrict>(input);
982  const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
983 
984  // UV's width and height are subsampled
985  Window win_uv(win);
986  win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
987  win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
988  win_uv.validate();
989 
990  Iterator in(input_ptr, win);
991  Iterator out_y(output_ptr->plane(0), win);
992  Iterator out_u(output_ptr->plane(1), win_uv);
993  Iterator out_v(output_ptr->plane(2), win_uv);
994 
995  execute_window_loop(win, [&](const Coordinates &)
996  {
997  const auto ta_rgb_top = load_rgb(in.ptr(), alpha);
998  const auto ta_rgb_bottom = load_rgb(in.ptr() + input_ptr->info()->strides_in_bytes().y(), alpha);
999  //ta_rgb.val[0] = R0 R1 R2 R3 ...
1000  //ta_rgb.val[1] = G0 G1 G2 G3 ...
1001  //ta_rgb.val[2] = B0 B1 B2 B3 ...
1002 
1003  store_rgb_to_iyuv(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2],
1004  ta_rgb_bottom.val[0], ta_rgb_bottom.val[1], ta_rgb_bottom.val[2],
1005  out_y.ptr(), out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(),
1006  out_u.ptr(), out_v.ptr());
1007  },
1008  in, out_y, out_u, out_v);
1009 }
1010 
1011 /** Convert RGB to YUV4.
1012  *
1013  * @param[in] input Input RGB data buffer.
1014  * @param[out] output Output YUV4 buffer.
1015  * @param[in] win Window for iterating the buffers.
1016  *
1017  */
1018 template <bool alpha>
1019 void colorconvert_rgb_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win)
1020 {
1021  ARM_COMPUTE_ERROR_ON(nullptr == input);
1022  ARM_COMPUTE_ERROR_ON(nullptr == output);
1023  win.validate();
1024 
1025  const auto input_ptr = static_cast<const IImage *__restrict>(input);
1026  const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
1027 
1028  Iterator in(input_ptr, win);
1029  Iterator out_y(output_ptr->plane(0), win);
1030  Iterator out_u(output_ptr->plane(1), win);
1031  Iterator out_v(output_ptr->plane(2), win);
1032 
1033  execute_window_loop(win, [&](const Coordinates &)
1034  {
1035  const auto ta_rgb = load_rgb(in.ptr(), alpha);
1036  //ta_rgb.val[0] = R0 R1 R2 R3 ...
1037  //ta_rgb.val[1] = G0 G1 G2 G3 ...
1038  //ta_rgb.val[2] = B0 B1 B2 B3 ...
1039 
1040  store_rgb_to_yuv4(ta_rgb.val[0], ta_rgb.val[1], ta_rgb.val[2],
1041  out_y.ptr(), out_u.ptr(), out_v.ptr());
1042  },
1043  in, out_y, out_u, out_v);
1044 }
1045 } // namespace arm_compute
arm_compute::colorconvert_iyuv_to_nv12
void colorconvert_iyuv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win)
Convert IYUV to NV12.
Definition: NEColorConvertHelper.inl:658
arm_compute::Window::Dimension::start
constexpr int start() const
Return the start of the dimension.
Definition: Window.h:97
arm_compute::convert_float32x4x3_to_uint8x8x3
void convert_float32x4x3_to_uint8x8x3(const float32x4x3_t &in1, const float32x4x3_t &in2, uint8x8x3_t &out)
Converts from two float32x4x3_t to just one uint8x8x3_t.
Definition: NEMath.inl:457
Helpers.h
arm_compute::test::colorconvert_helper::detail::rgb2yuv_bt709_kr
constexpr float rgb2yuv_bt709_kr
Definition: ColorConvertHelper.h:42
arm_compute::colorconvert_nv12_to_yuv4
void colorconvert_nv12_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win)
Convert NV12 to YUV4.
Definition: NEColorConvertHelper.inl:815
arm_compute::Window::Dimension::step
constexpr int step() const
Return the step of the dimension.
Definition: Window.h:107
arm_compute::test::colorconvert_helper::detail::rgb_to_yuv_calculation
void rgb_to_yuv_calculation(const SimpleTensor< T > rvec, const SimpleTensor< T > gvec, const SimpleTensor< T > bvec, SimpleTensor< T > &yvec, SimpleTensor< T > &uvec_top, SimpleTensor< T > &uvec_bottom, SimpleTensor< T > &vvec_top, SimpleTensor< T > &vvec_bottom)
Definition: ColorConvertHelper.h:81
arm_compute::Window::DimX
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Definition: Window.h:43
arm_compute::colorconvert_rgb_to_yuv4
void colorconvert_rgb_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win)
Convert RGB to YUV4.
Definition: NEColorConvertHelper.inl:1019
arm_compute::ITensor
Interface for CPU tensor.
Definition: ITensor.h:36
arm_compute::test::colorconvert_helper::detail::yuyv_to_rgb_calculation
void yuyv_to_rgb_calculation(const SimpleTensor< T > yvec, const SimpleTensor< T > vvec, const SimpleTensor< T > yyvec, const SimpleTensor< T > uvec, SimpleTensor< T > &dst)
Definition: ColorConvertHelper.h:159
Error.h
arm_compute::test::colorconvert_helper::detail::green_coef2_bt709
constexpr float green_coef2_bt709
Definition: ColorConvertHelper.h:39
arm_compute::test::colorconvert_helper::detail::rgb2u8_red_coef
constexpr float rgb2u8_red_coef
Definition: ColorConvertHelper.h:51
NEMath.h
arm_compute::convert_uint8x16_to_float32x4x4
float32x4x4_t convert_uint8x16_to_float32x4x4(const uint8x16_t &in)
Converts from uint8x16 to float32x4x4_t.
Definition: NEMath.inl:417
arm_compute::ITensor::info
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
ARM_COMPUTE_ERROR_ON
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:467
arm_compute::Iterator::ptr
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
Definition: Helpers.inl:149
arm_compute::colorconvert_rgbx_to_rgb
void colorconvert_rgbx_to_rgb(const void *input, void *output, const Window &win)
Convert RGBX to RGB.
Definition: NEColorConvertHelper.inl:380
arm_compute::colorconvert_rgb_to_rgbx
void colorconvert_rgb_to_rgbx(const void *__restrict input, void *__restrict output, const Window &win)
Convert RGB to RGBX.
Definition: NEColorConvertHelper.inl:321
arm_compute::colorconvert_rgb_to_iyuv
void colorconvert_rgb_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win)
Convert RGB to IYUV.
Definition: NEColorConvertHelper.inl:975
arm_compute::Iterator
Iterator updated by execute_window_loop for each window element.
Definition: Helpers.h:46
arm_compute::colorconvert_yuyv_to_rgb
void colorconvert_yuyv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win)
Convert YUYV to RGB.
Definition: NEColorConvertHelper.inl:411
arm_compute::colorconvert_rgb_to_u8
void colorconvert_rgb_to_u8(const void *__restrict input, void *__restrict output, const Window &win)
Convert RGB to U8.
Definition: NEColorConvertHelper.inl:352
arm_compute::Window::y
constexpr const Dimension & y() const
Alias to access the second dimension of the window.
Definition: Window.h:168
arm_compute::test::colorconvert_helper::detail::rgb2yuv_bt709_kg
constexpr float rgb2yuv_bt709_kg
Definition: ColorConvertHelper.h:45
arm_compute::Window::validate
void validate() const
Will validate all the window's dimensions' values when asserts are enabled.
Definition: Window.inl:173
arm_compute::test::colorconvert_helper::detail::blue_coef_bt709
constexpr float blue_coef_bt709
Definition: ColorConvertHelper.h:40
arm_compute::Coordinates
Coordinates of an item.
Definition: Coordinates.h:37
arm_compute::Window::Dimension
Describe one of the image's dimensions with a start, end and step.
Definition: Window.h:79
arm_compute::test::colorconvert_helper::detail::rgb2yuv_bt709_kb
constexpr float rgb2yuv_bt709_kb
Definition: ColorConvertHelper.h:43
arm_compute::colorconvert_nv12_to_rgb
void colorconvert_nv12_to_rgb(const void *__restrict input, void *__restrict output, const Window &win)
Convert NV12 to RGB.
Definition: NEColorConvertHelper.inl:455
arm_compute::test::colorconvert_helper::detail::red_coef_bt709
constexpr float red_coef_bt709
Definition: ColorConvertHelper.h:37
arm_compute::Window::set
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
Definition: Window.inl:49
arm_compute::Window::DimY
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
Definition: Window.h:45
arm_compute::colorconvert_nv12_to_iyuv
void colorconvert_nv12_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win)
Convert NV12 to IYUV.
Definition: NEColorConvertHelper.inl:706
Utils.h
arm_compute::convert_float32x4x4_to_uint8x16
void convert_float32x4x4_to_uint8x16(const float32x4x4_t &in, uint8x16_t &out)
Converts from two float32x4x4_t to just one uint8x16_t.
Definition: NEMath.inl:467
arm_compute::Window
Describe a multidimensional execution window.
Definition: Window.h:39
arm_compute::colorconvert_rgb_to_nv12
void colorconvert_rgb_to_nv12(const void *__restrict input, void *__restrict output, const Window &win)
Convert RGB to NV12.
Definition: NEColorConvertHelper.inl:932
arm_compute
Copyright (c) 2017-2023 Arm Limited.
Definition: introduction.dox:24
arm_compute::test::colorconvert_helper::detail::rgb2yuv_bt709_cu
constexpr float rgb2yuv_bt709_cu
Definition: ColorConvertHelper.h:47
arm_compute::colorconvert_yuyv_to_nv12
void colorconvert_yuyv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win)
Convert YUYV to NV12.
Definition: NEColorConvertHelper.inl:603
arm_compute::ITensorInfo::strides_in_bytes
virtual const Strides & strides_in_bytes() const =0
The strides in bytes for accessing each dimension of the tensor.
arm_compute::test::colorconvert_helper::detail::rgb2u8_blue_coef
constexpr float rgb2u8_blue_coef
Definition: ColorConvertHelper.h:53
arm_compute::colorconvert_iyuv_to_rgb
void colorconvert_iyuv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win)
Convert IYUV to RGB.
Definition: NEColorConvertHelper.inl:517
arm_compute::colorconvert_yuyv_to_iyuv
void colorconvert_yuyv_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win)
Convert YUYV to IYUV.
Definition: NEColorConvertHelper.inl:755
arm_compute::colorconvert_iyuv_to_yuv4
void colorconvert_iyuv_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win)
Convert IYUV to YUV4.
Definition: NEColorConvertHelper.inl:873
arm_compute::execute_window_loop
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
Definition: Helpers.inl:77
arm_compute::Window::Dimension::end
constexpr int end() const
Return the end of the dimension.
Definition: Window.h:102
arm_compute::Dimensions::y
T y() const
Alias to access the size of the second dimension.
Definition: Dimensions.h:92
arm_compute::Window::x
constexpr const Dimension & x() const
Alias to access the first dimension of the window.
Definition: Window.h:159
arm_compute::test::colorconvert_helper::detail::green_coef_bt709
constexpr float green_coef_bt709
Definition: ColorConvertHelper.h:38
arm_compute::test::colorconvert_helper::detail::rgb2yuv_bt709_cv
constexpr float rgb2yuv_bt709_cv
Definition: ColorConvertHelper.h:49
arm_compute::test::validation::input
auto input
Definition: LSTMLayerQuantized.cpp:486
arm_compute::test::colorconvert_helper::detail::rgb2u8_green_coef
constexpr float rgb2u8_green_coef
Definition: ColorConvertHelper.h:52