26 #include "arm_compute/core/IMultiImage.h"
35 #ifndef DOXYGEN_SKIP_THIS
54 inline float32x4_t rgb_to_greyscale_calculation(
const float32x4_t &rcolor,
55 const float32x4_t &gcolor,
56 const float32x4_t &bcolor,
61 float32x4_t greyscale = vmulq_n_f32(rcolor, rcoef);
62 greyscale = vmlaq_n_f32(greyscale, gcolor, gcoef);
63 greyscale = vmlaq_n_f32(greyscale, bcolor, bcoef);
67 inline void rgb_to_u8_conversion(
const uint8x16x3_t &in, uint8x16_t &out)
69 float32x4x4_t out_float32;
78 out_float32.val[0] = rgb_to_greyscale_calculation(r_float32.val[0], g_float32.val[0], b_float32.val[0],
81 out_float32.val[1] = rgb_to_greyscale_calculation(r_float32.val[1], g_float32.val[1], b_float32.val[1],
84 out_float32.val[2] = rgb_to_greyscale_calculation(r_float32.val[2], g_float32.val[2], b_float32.val[2],
87 out_float32.val[3] = rgb_to_greyscale_calculation(r_float32.val[3], g_float32.val[3], b_float32.val[3],
95 const float32x4_t &gvec,
96 const float32x4_t &bvec,
106 const auto c128 = vdupq_n_f32(128.f);
114 uvec = vsubq_f32(bvec, yvec);
118 vvec = vsubq_f32(rvec, yvec);
123 float32x4_t uvec_val,
124 const float32x4_t &yyvec_val,
125 float32x4_t vvec_val,
126 unsigned char *output_ptr,
129 float32x4x3_t rgb1, rgb2;
132 const auto c128 = vdupq_n_f32(128.f);
133 uvec_val = vsubq_f32(uvec_val, c128);
134 vvec_val = vsubq_f32(vvec_val, c128);
148 rgb1.val[0] = vaddq_f32(yvec_val, red);
149 rgb1.val[1] = vaddq_f32(yvec_val, green);
150 rgb1.val[2] = vaddq_f32(yvec_val, blue);
152 rgb2.val[0] = vaddq_f32(yyvec_val, red);
153 rgb2.val[1] = vaddq_f32(yyvec_val, green);
154 rgb2.val[2] = vaddq_f32(yyvec_val, blue);
161 vst3_lane_u8(&output_ptr[0], u8_rgb, 0);
162 vst3_lane_u8(&output_ptr[3], u8_rgb, 4);
163 vst3_lane_u8(&output_ptr[6], u8_rgb, 1);
164 vst3_lane_u8(&output_ptr[9], u8_rgb, 5);
165 vst3_lane_u8(&output_ptr[12], u8_rgb, 2);
166 vst3_lane_u8(&output_ptr[15], u8_rgb, 6);
167 vst3_lane_u8(&output_ptr[18], u8_rgb, 3);
168 vst3_lane_u8(&output_ptr[21], u8_rgb, 7);
173 u8_rgba.val[0] = u8_rgb.val[0];
174 u8_rgba.val[1] = u8_rgb.val[1];
175 u8_rgba.val[2] = u8_rgb.val[2];
176 u8_rgba.val[3] = vdup_n_u8(255);
177 vst4_lane_u8(&output_ptr[0], u8_rgba, 0);
178 vst4_lane_u8(&output_ptr[4], u8_rgba, 4);
179 vst4_lane_u8(&output_ptr[8], u8_rgba, 1);
180 vst4_lane_u8(&output_ptr[12], u8_rgba, 5);
181 vst4_lane_u8(&output_ptr[16], u8_rgba, 2);
182 vst4_lane_u8(&output_ptr[20], u8_rgba, 6);
183 vst4_lane_u8(&output_ptr[24], u8_rgba, 3);
184 vst4_lane_u8(&output_ptr[28], u8_rgba, 7);
188 inline uint8x16x3_t load_rgb(
const unsigned char *
const ptr,
const bool alpha)
194 const auto tmp = vld4q_u8(ptr);
195 rgb.val[0] = tmp.val[0];
196 rgb.val[1] = tmp.val[1];
197 rgb.val[2] = tmp.val[2];
207 inline void rgb_to_yuv_conversion(uint8x16x3_t &vec_top, uint8x16x3_t &vec_bottom)
218 float32x4x4_t fyvec_top, fuvec_top, fvvec_top;
219 float32x4x4_t fyvec_bottom, fuvec_bottom, fvvec_bottom;
221 for (
auto i = 0; i < 4; ++i)
223 rgb_to_yuv_calculation(frvec_top.val[i], fgvec_top.val[i], fbvec_top.val[i], fyvec_top.val[i], fuvec_top.val[i],
225 rgb_to_yuv_calculation(frvec_bottom.val[i], fgvec_bottom.val[i], fbvec_bottom.val[i], fyvec_bottom.val[i],
226 fuvec_bottom.val[i], fvvec_bottom.val[i]);
237 inline void store_rgb_to_nv12(
const uint8x16_t &rvec_top,
238 const uint8x16_t &gvec_top,
239 const uint8x16_t &bvec_top,
240 const uint8x16_t &rvec_bottom,
241 const uint8x16_t &gvec_bottom,
242 const uint8x16_t &bvec_bottom,
243 unsigned char *
const __restrict out_y_top,
244 unsigned char *
const __restrict out_y_bottom,
245 unsigned char *
const __restrict out_uv)
247 uint8x16x3_t vec_top, vec_bottom;
248 vec_top.val[0] = rvec_top;
249 vec_top.val[1] = gvec_top;
250 vec_top.val[2] = bvec_top;
251 vec_bottom.val[0] = rvec_bottom;
252 vec_bottom.val[1] = gvec_bottom;
253 vec_bottom.val[2] = bvec_bottom;
255 rgb_to_yuv_conversion(vec_top, vec_bottom);
257 vst1q_u8(out_y_top, vec_top.val[0]);
258 vst1q_u8(out_y_bottom, vec_bottom.val[0]);
260 const auto uvec = vuzpq_u8(vec_top.val[1], vec_bottom.val[1]);
261 const auto vvec = vuzpq_u8(vec_top.val[2], vec_bottom.val[2]);
262 const auto utmp = vrhaddq_u8(uvec.val[0], uvec.val[1]);
263 const auto vtmp = vrhaddq_u8(vvec.val[0], vvec.val[1]);
266 uvvec.val[0] = vhadd_u8(vget_low_u8(utmp), vget_high_u8(utmp));
267 uvvec.val[1] = vhadd_u8(vget_low_u8(vtmp), vget_high_u8(vtmp));
269 vst2_u8(out_uv, uvvec);
272 inline void store_rgb_to_iyuv(
const uint8x16_t &rvec_top,
273 const uint8x16_t &gvec_top,
274 const uint8x16_t &bvec_top,
275 const uint8x16_t &rvec_bottom,
276 const uint8x16_t &gvec_bottom,
277 const uint8x16_t &bvec_bottom,
278 unsigned char *
const __restrict out_y_top,
279 unsigned char *
const __restrict out_y_bottom,
280 unsigned char *
const __restrict out_u,
281 unsigned char *
const __restrict out_v)
283 uint8x16x3_t vec_top, vec_bottom;
284 vec_top.val[0] = rvec_top;
285 vec_top.val[1] = gvec_top;
286 vec_top.val[2] = bvec_top;
287 vec_bottom.val[0] = rvec_bottom;
288 vec_bottom.val[1] = gvec_bottom;
289 vec_bottom.val[2] = bvec_bottom;
291 rgb_to_yuv_conversion(vec_top, vec_bottom);
293 vst1q_u8(out_y_top, vec_top.val[0]);
294 vst1q_u8(out_y_bottom, vec_bottom.val[0]);
296 const auto uvvec_top = vuzpq_u8(vec_top.val[1], vec_top.val[2]);
297 const auto uvvec_bottom = vuzpq_u8(vec_bottom.val[1], vec_bottom.val[2]);
299 vhaddq_u8(vrhaddq_u8(uvvec_top.val[0], uvvec_top.val[1]), vrhaddq_u8(uvvec_bottom.val[0], uvvec_bottom.val[1]));
301 vst1_u8(out_u, vget_low_u8(uvvec));
302 vst1_u8(out_v, vget_high_u8(uvvec));
305 inline void store_rgb_to_yuv4(
const uint8x16_t &rvec,
306 const uint8x16_t &gvec,
307 const uint8x16_t &bvec,
308 unsigned char *
const __restrict out_y,
309 unsigned char *
const __restrict out_u,
310 unsigned char *
const __restrict out_v)
317 float32x4x4_t fyvec, fuvec, fvvec;
318 for (
auto i = 0; i < 4; ++i)
320 rgb_to_yuv_calculation(frvec.val[i], fgvec.val[i], fbvec.val[i], fyvec.val[i], fuvec.val[i], fvvec.val[i]);
323 uint8x16_t yvec, uvec, vvec;
328 vst1q_u8(out_y, yvec);
329 vst1q_u8(out_u, uvec);
330 vst1q_u8(out_v, vvec);
349 const auto input_ptr =
static_cast<const IImage *__restrict
>(
input);
350 const auto output_ptr =
static_cast<IImage *__restrict
>(output);
359 const auto ta1 = vld3q_u8(in.
ptr());
361 ta2.val[0] = ta1.val[0];
362 ta2.val[1] = ta1.val[1];
363 ta2.val[2] = ta1.val[2];
364 ta2.val[3] = vdupq_n_u8(255);
365 vst4q_u8(out.
ptr(), ta2);
382 const auto input_ptr =
static_cast<const IImage *__restrict
>(
input);
383 const auto output_ptr =
static_cast<IImage *__restrict
>(output);
392 const auto ta1 = vld3q_u8(in.
ptr());
394 rgb_to_u8_conversion(ta1, ta2);
395 vst1q_u8(out.
ptr(), ta2);
412 const auto input_ptr =
static_cast<const IImage *__restrict
>(
input);
413 const auto output_ptr =
static_cast<IImage *__restrict
>(output);
422 const auto ta1 = vld4q_u8(in.
ptr());
424 ta2.val[0] = ta1.val[0];
425 ta2.val[1] = ta1.val[1];
426 ta2.val[2] = ta1.val[2];
427 vst3q_u8(out.
ptr(), ta2);
439 template <
bool yuyv,
bool alpha>
445 const auto input_ptr =
static_cast<const IImage *__restrict
>(
input);
446 const auto output_ptr =
static_cast<IImage *__restrict
>(output);
448 constexpr
auto element_size = alpha ? 32 : 24;
449 constexpr
auto shift = yuyv ? 0 : 1;
458 const auto ta = vld4q_u8(in.
ptr());
489 template <
bool uv,
bool alpha>
496 const auto input_ptr =
static_cast<const IMultiImage *__restrict
>(
input);
497 const auto output_ptr =
static_cast<IImage *__restrict
>(output);
499 constexpr
auto element_size = alpha ? 32 : 24;
501 constexpr
auto shift = uv ? 0 : 1;
509 Iterator in_y(input_ptr->plane(0), win);
510 Iterator in_uv(input_ptr->plane(1), win_uv);
517 const auto ta_y_top = vld2q_u8(in_y.
ptr());
518 const auto ta_y_bottom = vld2q_u8(in_y.
ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
519 const auto ta_uv = vld2q_u8(in_uv.
ptr());
534 out.
ptr() + 0 * element_size, alpha);
536 out.
ptr() + 1 * element_size, alpha);
538 out.
ptr() + 2 * element_size, alpha);
540 out.
ptr() + 3 * element_size, alpha);
543 out.
ptr() + out_stride + 0 * element_size, alpha);
545 out.
ptr() + out_stride + 1 * element_size, alpha);
547 out.
ptr() + out_stride + 2 * element_size, alpha);
549 out.
ptr() + out_stride + 3 * element_size, alpha);
561 template <
bool alpha>
568 const auto input_ptr =
static_cast<const IMultiImage *__restrict
>(
input);
569 const auto output_ptr =
static_cast<IImage *__restrict
>(output);
571 constexpr
auto element_size = alpha ? 32 : 24;
580 Iterator in_y(input_ptr->plane(0), win);
581 Iterator in_u(input_ptr->plane(1), win_uv);
582 Iterator in_v(input_ptr->plane(2), win_uv);
589 const auto *y_top_ptr = in_y.
ptr();
590 const auto *y_bottom_ptr = in_y.
ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y();
591 const auto *u_ptr = in_u.
ptr();
592 const auto *v_ptr = in_v.
ptr();
595 #
if defined(__arch64__)
596 const auto ta0_y_top = vld1q_u8(y_top_ptr);
597 const auto ta1_y_top = vld1q_u8(y_top_ptr + 16);
598 const auto ta0_y_bottom = vld1q_u8(y_bottom_ptr);
599 const auto ta1_y_bottom = vld1q_u8(y_bottom_ptr + 16);
600 const auto ta_u = vld1q_u8(u_ptr);
601 const auto ta_v = vld1q_u8(v_ptr);
606 float32x4x4_t yvec_bottom =
608 float32x4x4_t yyvec_bottom =
613 const auto ta_y_top = vld2q_u8(y_top_ptr);
614 const auto ta_y_bottom = vld2q_u8(y_bottom_ptr);
615 const auto ta_u = vld1q_u8(u_ptr);
616 const auto ta_v = vld1q_u8(v_ptr);
632 out.
ptr() + 0 * element_size, alpha);
634 out.
ptr() + 1 * element_size, alpha);
636 out.
ptr() + 2 * element_size, alpha);
638 out.
ptr() + 3 * element_size, alpha);
641 out.
ptr() + out_stride + 0 * element_size, alpha);
643 out.
ptr() + out_stride + 1 * element_size, alpha);
645 out.
ptr() + out_stride + 2 * element_size, alpha);
647 out.
ptr() + out_stride + 3 * element_size, alpha);
649 in_y, in_u, in_v, out);
666 const auto input_ptr =
static_cast<const IImage *__restrict
>(
input);
667 const auto output_ptr =
static_cast<IMultiImage *__restrict
>(output);
669 constexpr
auto shift = yuyv ? 0 : 1;
678 Iterator out_y(output_ptr->plane(0), win);
679 Iterator out_uv(output_ptr->plane(1), win_uv);
685 const auto ta_top = vld4q_u8(in.
ptr());
686 const auto ta_bottom = vld4q_u8(in.
ptr() + input_ptr->info()->strides_in_bytes().y());
693 yvec.val[0] = ta_top.val[0 + shift];
694 yvec.val[1] = ta_top.val[2 + shift];
695 vst2q_u8(out_y.
ptr(), yvec);
698 yyvec.val[0] = ta_bottom.val[0 + shift];
699 yyvec.val[1] = ta_bottom.val[2 + shift];
700 vst2q_u8(out_y.
ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec);
703 uvvec.val[0] = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]);
704 uvvec.val[1] = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]);
705 vst2q_u8(out_uv.
ptr(), uvvec);
723 const auto input_ptr =
static_cast<const IMultiImage *__restrict
>(
input);
724 const auto output_ptr =
static_cast<IMultiImage *__restrict
>(output);
732 Iterator in_y(input_ptr->plane(0), win);
733 Iterator in_u(input_ptr->plane(1), win_uv);
734 Iterator in_v(input_ptr->plane(2), win_uv);
735 Iterator out_y(output_ptr->plane(0), win);
736 Iterator out_uv(output_ptr->plane(1), win_uv);
742 const auto ta_y_top = vld2q_u8(in_y.
ptr());
743 const auto ta_y_bottom = vld2q_u8(in_y.
ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
745 ta_uv.val[0] = vld1q_u8(in_u.
ptr());
746 ta_uv.val[1] = vld1q_u8(in_v.
ptr());
752 vst2q_u8(out_y.
ptr(), ta_y_top);
753 vst2q_u8(out_y.
ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
754 vst2q_u8(out_uv.
ptr(), ta_uv);
756 in_y, in_u, in_v, out_y, out_uv);
773 const auto input_ptr =
static_cast<const IMultiImage *__restrict
>(
input);
774 const auto output_ptr =
static_cast<IMultiImage *__restrict
>(output);
776 constexpr
auto shift = uv ? 0 : 1;
784 Iterator in_y(input_ptr->plane(0), win);
785 Iterator in_uv(input_ptr->plane(1), win_uv);
786 Iterator out_y(output_ptr->plane(0), win);
787 Iterator out_u(output_ptr->plane(1), win_uv);
788 Iterator out_v(output_ptr->plane(2), win_uv);
794 const auto ta_y_top = vld2q_u8(in_y.
ptr());
795 const auto ta_y_bottom = vld2q_u8(in_y.
ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
796 const auto ta_uv = vld2q_u8(in_uv.
ptr());
802 vst2q_u8(out_y.
ptr(), ta_y_top);
803 vst2q_u8(out_y.
ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
804 vst1q_u8(out_u.
ptr(), ta_uv.val[0 + shift]);
805 vst1q_u8(out_v.
ptr(), ta_uv.val[1 - shift]);
807 in_y, in_uv, out_y, out_u, out_v);
824 const auto input_ptr =
static_cast<const IImage *__restrict
>(
input);
825 const auto output_ptr =
static_cast<IMultiImage *__restrict
>(output);
827 constexpr
auto shift = yuyv ? 0 : 1;
836 Iterator out_y(output_ptr->plane(0), win);
837 Iterator out_u(output_ptr->plane(1), win_uv);
838 Iterator out_v(output_ptr->plane(2), win_uv);
844 const auto ta_top = vld4q_u8(in.
ptr());
845 const auto ta_bottom = vld4q_u8(in.
ptr() + input_ptr->info()->strides_in_bytes().y());
852 yvec.val[0] = ta_top.val[0 + shift];
853 yvec.val[1] = ta_top.val[2 + shift];
854 vst2q_u8(out_y.
ptr(), yvec);
857 yyvec.val[0] = ta_bottom.val[0 + shift];
858 yyvec.val[1] = ta_bottom.val[2 + shift];
859 vst2q_u8(out_y.
ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec);
862 uvec = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]);
863 vst1q_u8(out_u.
ptr(), uvec);
866 vvec = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]);
867 vst1q_u8(out_v.
ptr(), vvec);
869 in, out_y, out_u, out_v);
886 const auto input_ptr =
static_cast<const IMultiImage *__restrict
>(
input);
887 const auto output_ptr =
static_cast<IMultiImage *__restrict
>(output);
889 constexpr
auto shift = uv ? 0 : 1;
897 Iterator in_y(input_ptr->plane(0), win);
898 Iterator in_uv(input_ptr->plane(1), win_uv);
899 Iterator out_y(output_ptr->plane(0), win);
900 Iterator out_u(output_ptr->plane(1), win);
901 Iterator out_v(output_ptr->plane(2), win);
907 const auto ta_y_top = vld2q_u8(in_y.
ptr());
908 const auto ta_y_bottom = vld2q_u8(in_y.
ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
909 const auto ta_uv = vld2q_u8(in_uv.
ptr());
915 vst2q_u8(out_y.
ptr(), ta_y_top);
916 vst2q_u8(out_y.
ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
919 uvec.val[0] = ta_uv.val[0 + shift];
920 uvec.val[1] = ta_uv.val[0 + shift];
921 vst2q_u8(out_u.
ptr(), uvec);
922 vst2q_u8(out_u.
ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec);
925 vvec.val[0] = ta_uv.val[1 - shift];
926 vvec.val[1] = ta_uv.val[1 - shift];
927 vst2q_u8(out_v.
ptr(), vvec);
928 vst2q_u8(out_v.
ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec);
930 in_y, in_uv, out_y, out_u, out_v);
946 const auto input_ptr =
static_cast<const IMultiImage *__restrict
>(
input);
947 const auto output_ptr =
static_cast<IMultiImage *__restrict
>(output);
955 Iterator in_y(input_ptr->plane(0), win);
956 Iterator in_u(input_ptr->plane(1), win_uv);
957 Iterator in_v(input_ptr->plane(2), win_uv);
958 Iterator out_y(output_ptr->plane(0), win);
959 Iterator out_u(output_ptr->plane(1), win);
960 Iterator out_v(output_ptr->plane(2), win);
966 const auto ta_y_top = vld2q_u8(in_y.
ptr());
967 const auto ta_y_bottom = vld2q_u8(in_y.
ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
968 const auto ta_u = vld1q_u8(in_u.
ptr());
969 const auto ta_v = vld1q_u8(in_v.
ptr());
975 vst2q_u8(out_y.
ptr(), ta_y_top);
976 vst2q_u8(out_y.
ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
981 vst2q_u8(out_u.
ptr(), uvec);
982 vst2q_u8(out_u.
ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec);
987 vst2q_u8(out_v.
ptr(), vvec);
988 vst2q_u8(out_v.
ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec);
990 in_y, in_u, in_v, out_y, out_u, out_v);
1000 template <
bool alpha>
1007 const auto input_ptr =
static_cast<const IImage *__restrict
>(
input);
1008 const auto output_ptr =
static_cast<IMultiImage *__restrict
>(output);
1017 Iterator out_y(output_ptr->plane(0), win);
1018 Iterator out_uv(output_ptr->plane(1), win_uv);
1024 const auto ta_rgb_top = load_rgb(in.
ptr(), alpha);
1025 const auto ta_rgb_bottom = load_rgb(in.
ptr() + input_ptr->info()->strides_in_bytes().y(), alpha);
1030 store_rgb_to_nv12(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2], ta_rgb_bottom.val[0],
1031 ta_rgb_bottom.val[1], ta_rgb_bottom.val[2], out_y.
ptr(),
1032 out_y.
ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), out_uv.
ptr());
1044 template <
bool alpha>
1051 const auto input_ptr =
static_cast<const IImage *__restrict
>(
input);
1052 const auto output_ptr =
static_cast<IMultiImage *__restrict
>(output);
1061 Iterator out_y(output_ptr->plane(0), win);
1062 Iterator out_u(output_ptr->plane(1), win_uv);
1063 Iterator out_v(output_ptr->plane(2), win_uv);
1069 const auto ta_rgb_top = load_rgb(in.
ptr(), alpha);
1070 const auto ta_rgb_bottom = load_rgb(in.
ptr() + input_ptr->info()->strides_in_bytes().y(), alpha);
1075 store_rgb_to_iyuv(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2], ta_rgb_bottom.val[0],
1076 ta_rgb_bottom.val[1], ta_rgb_bottom.val[2], out_y.
ptr(),
1077 out_y.
ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), out_u.
ptr(),
1080 in, out_y, out_u, out_v);
1090 template <
bool alpha>
1097 const auto input_ptr =
static_cast<const IImage *__restrict
>(
input);
1098 const auto output_ptr =
static_cast<IMultiImage *__restrict
>(output);
1101 Iterator out_y(output_ptr->plane(0), win);
1102 Iterator out_u(output_ptr->plane(1), win);
1103 Iterator out_v(output_ptr->plane(2), win);
1109 const auto ta_rgb = load_rgb(in.
ptr(), alpha);
1114 store_rgb_to_yuv4(ta_rgb.val[0], ta_rgb.val[1], ta_rgb.val[2], out_y.
ptr(), out_u.
ptr(), out_v.
ptr());
1116 in, out_y, out_u, out_v);