26 #include "arm_compute/core/IMultiImage.h" 34 #ifndef DOXYGEN_SKIP_THIS 53 inline float32x4_t rgb_to_greyscale_calculation(
const float32x4_t &rcolor,
const float32x4_t &gcolor,
const float32x4_t &bcolor,
54 const float rcoef,
const float gcoef,
const float bcoef)
56 float32x4_t greyscale = vmulq_n_f32(rcolor, rcoef);
57 greyscale = vmlaq_n_f32(greyscale, gcolor, gcoef);
58 greyscale = vmlaq_n_f32(greyscale, bcolor, bcoef);
62 inline void rgb_to_u8_conversion(
const uint8x16x3_t &in, uint8x16_t &out)
64 float32x4x4_t out_float32;
73 out_float32.val[0] = rgb_to_greyscale_calculation(r_float32.val[0], g_float32.val[0], b_float32.val[0],
74 rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef);
76 out_float32.val[1] = rgb_to_greyscale_calculation(r_float32.val[1], g_float32.val[1], b_float32.val[1],
77 rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef);
79 out_float32.val[2] = rgb_to_greyscale_calculation(r_float32.val[2], g_float32.val[2], b_float32.val[2],
80 rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef);
82 out_float32.val[3] = rgb_to_greyscale_calculation(r_float32.val[3], g_float32.val[3], b_float32.val[3],
83 rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef);
89 inline void rgb_to_yuv_calculation(
const float32x4_t &rvec,
const float32x4_t &gvec,
const float32x4_t &bvec,
90 float32x4_t &yvec, float32x4_t &uvec, float32x4_t &vvec)
97 const auto c128 = vdupq_n_f32(128.f);
100 yvec = vmulq_n_f32(rvec, rgb2yuv_bt709_kr);
101 yvec = vmlaq_n_f32(yvec, gvec, rgb2yuv_bt709_kg);
102 yvec = vmlaq_n_f32(yvec, bvec, rgb2yuv_bt709_kb);
105 uvec = vsubq_f32(bvec, yvec);
106 uvec = vmlaq_n_f32(c128, uvec, rgb2yuv_bt709_cu);
109 vvec = vsubq_f32(rvec, yvec);
110 vvec = vmlaq_n_f32(c128, vvec, rgb2yuv_bt709_cv);
113 inline void yuyv_to_rgb_calculation(
const float32x4_t &yvec_val, float32x4_t uvec_val,
const float32x4_t &yyvec_val,
114 float32x4_t vvec_val,
unsigned char *output_ptr,
const bool alpha)
116 float32x4x3_t rgb1, rgb2;
119 const auto c128 = vdupq_n_f32(128.f);
120 uvec_val = vsubq_f32(uvec_val, c128);
121 vvec_val = vsubq_f32(vvec_val, c128);
127 const auto red = vmulq_n_f32(vvec_val, red_coef_bt709);
128 const auto blue = vmulq_n_f32(uvec_val, blue_coef_bt709);
129 const auto green = vaddq_f32(vmulq_n_f32(uvec_val, green_coef_bt709),
130 vmulq_n_f32(vvec_val, green_coef2_bt709));
136 rgb1.val[0] = vaddq_f32(yvec_val, red);
137 rgb1.val[1] = vaddq_f32(yvec_val, green);
138 rgb1.val[2] = vaddq_f32(yvec_val, blue);
140 rgb2.val[0] = vaddq_f32(yyvec_val, red);
141 rgb2.val[1] = vaddq_f32(yyvec_val, green);
142 rgb2.val[2] = vaddq_f32(yyvec_val, blue);
149 vst3_lane_u8(&output_ptr[0], u8_rgb, 0);
150 vst3_lane_u8(&output_ptr[3], u8_rgb, 4);
151 vst3_lane_u8(&output_ptr[6], u8_rgb, 1);
152 vst3_lane_u8(&output_ptr[9], u8_rgb, 5);
153 vst3_lane_u8(&output_ptr[12], u8_rgb, 2);
154 vst3_lane_u8(&output_ptr[15], u8_rgb, 6);
155 vst3_lane_u8(&output_ptr[18], u8_rgb, 3);
156 vst3_lane_u8(&output_ptr[21], u8_rgb, 7);
161 u8_rgba.val[0] = u8_rgb.val[0];
162 u8_rgba.val[1] = u8_rgb.val[1];
163 u8_rgba.val[2] = u8_rgb.val[2];
164 u8_rgba.val[3] = vdup_n_u8(255);
165 vst4_lane_u8(&output_ptr[0], u8_rgba, 0);
166 vst4_lane_u8(&output_ptr[4], u8_rgba, 4);
167 vst4_lane_u8(&output_ptr[8], u8_rgba, 1);
168 vst4_lane_u8(&output_ptr[12], u8_rgba, 5);
169 vst4_lane_u8(&output_ptr[16], u8_rgba, 2);
170 vst4_lane_u8(&output_ptr[20], u8_rgba, 6);
171 vst4_lane_u8(&output_ptr[24], u8_rgba, 3);
172 vst4_lane_u8(&output_ptr[28], u8_rgba, 7);
176 inline uint8x16x3_t load_rgb(
const unsigned char *
const ptr,
const bool alpha)
182 const auto tmp = vld4q_u8(ptr);
183 rgb.val[0] = tmp.val[0];
184 rgb.val[1] = tmp.val[1];
185 rgb.val[2] = tmp.val[2];
195 inline void rgb_to_yuv_conversion(uint8x16x3_t &vec_top, uint8x16x3_t &vec_bottom)
206 float32x4x4_t fyvec_top, fuvec_top, fvvec_top;
207 float32x4x4_t fyvec_bottom, fuvec_bottom, fvvec_bottom;
209 for(
auto i = 0; i < 4; ++i)
212 fyvec_top.val[i], fuvec_top.val[i], fvvec_top.val[i]);
214 fyvec_bottom.val[i], fuvec_bottom.val[i], fvvec_bottom.val[i]);
225 inline void store_rgb_to_nv12(
const uint8x16_t &rvec_top,
const uint8x16_t &gvec_top,
const uint8x16_t &bvec_top,
226 const uint8x16_t &rvec_bottom,
const uint8x16_t &gvec_bottom,
const uint8x16_t &bvec_bottom,
227 unsigned char *
const __restrict out_y_top,
unsigned char *
const __restrict out_y_bottom,
228 unsigned char *
const __restrict out_uv)
230 uint8x16x3_t vec_top, vec_bottom;
231 vec_top.val[0] = rvec_top;
232 vec_top.val[1] = gvec_top;
233 vec_top.val[2] = bvec_top;
234 vec_bottom.val[0] = rvec_bottom;
235 vec_bottom.val[1] = gvec_bottom;
236 vec_bottom.val[2] = bvec_bottom;
238 rgb_to_yuv_conversion(vec_top, vec_bottom);
240 vst1q_u8(out_y_top, vec_top.val[0]);
241 vst1q_u8(out_y_bottom, vec_bottom.val[0]);
243 const auto uvec = vuzpq_u8(vec_top.val[1], vec_bottom.val[1]);
244 const auto vvec = vuzpq_u8(vec_top.val[2], vec_bottom.val[2]);
245 const auto utmp = vrhaddq_u8(uvec.val[0], uvec.val[1]);
246 const auto vtmp = vrhaddq_u8(vvec.val[0], vvec.val[1]);
249 uvvec.val[0] = vhadd_u8(vget_low_u8(utmp), vget_high_u8(utmp));
250 uvvec.val[1] = vhadd_u8(vget_low_u8(vtmp), vget_high_u8(vtmp));
252 vst2_u8(out_uv, uvvec);
255 inline void store_rgb_to_iyuv(
const uint8x16_t &rvec_top,
const uint8x16_t &gvec_top,
const uint8x16_t &bvec_top,
256 const uint8x16_t &rvec_bottom,
const uint8x16_t &gvec_bottom,
const uint8x16_t &bvec_bottom,
257 unsigned char *
const __restrict out_y_top,
unsigned char *
const __restrict out_y_bottom,
258 unsigned char *
const __restrict out_u,
259 unsigned char *
const __restrict out_v)
261 uint8x16x3_t vec_top, vec_bottom;
262 vec_top.val[0] = rvec_top;
263 vec_top.val[1] = gvec_top;
264 vec_top.val[2] = bvec_top;
265 vec_bottom.val[0] = rvec_bottom;
266 vec_bottom.val[1] = gvec_bottom;
267 vec_bottom.val[2] = bvec_bottom;
269 rgb_to_yuv_conversion(vec_top, vec_bottom);
271 vst1q_u8(out_y_top, vec_top.val[0]);
272 vst1q_u8(out_y_bottom, vec_bottom.val[0]);
274 const auto uvvec_top = vuzpq_u8(vec_top.val[1], vec_top.val[2]);
275 const auto uvvec_bottom = vuzpq_u8(vec_bottom.val[1], vec_bottom.val[2]);
276 const auto uvvec = vhaddq_u8(vrhaddq_u8(uvvec_top.val[0], uvvec_top.val[1]),
277 vrhaddq_u8(uvvec_bottom.val[0], uvvec_bottom.val[1]));
279 vst1_u8(out_u, vget_low_u8(uvvec));
280 vst1_u8(out_v, vget_high_u8(uvvec));
283 inline void store_rgb_to_yuv4(
const uint8x16_t &rvec,
const uint8x16_t &gvec,
const uint8x16_t &bvec,
284 unsigned char *
const __restrict out_y,
285 unsigned char *
const __restrict out_u,
286 unsigned char *
const __restrict out_v)
293 float32x4x4_t fyvec, fuvec, fvvec;
294 for(
auto i = 0; i < 4; ++i)
297 fyvec.val[i], fuvec.val[i], fvvec.val[i]);
300 uint8x16_t yvec, uvec, vvec;
305 vst1q_u8(out_y, yvec);
306 vst1q_u8(out_u, uvec);
307 vst1q_u8(out_v, vvec);
326 const auto input_ptr =
static_cast<const IImage *__restrict
>(
input);
327 const auto output_ptr =
static_cast<IImage *__restrict
>(output);
334 const auto ta1 = vld3q_u8(in.
ptr());
336 ta2.val[0] = ta1.val[0];
337 ta2.val[1] = ta1.val[1];
338 ta2.val[2] = ta1.val[2];
339 ta2.val[3] = vdupq_n_u8(255);
340 vst4q_u8(out.
ptr(), ta2);
357 const auto input_ptr =
static_cast<const IImage *__restrict
>(
input);
358 const auto output_ptr =
static_cast<IImage *__restrict
>(output);
365 const auto ta1 = vld3q_u8(in.
ptr());
367 rgb_to_u8_conversion(ta1, ta2);
368 vst1q_u8(out.
ptr(), ta2);
385 const auto input_ptr =
static_cast<const IImage *__restrict
>(
input);
386 const auto output_ptr =
static_cast<IImage *__restrict
>(output);
393 const auto ta1 = vld4q_u8(in.
ptr());
395 ta2.val[0] = ta1.val[0];
396 ta2.val[1] = ta1.val[1];
397 ta2.val[2] = ta1.val[2];
398 vst3q_u8(out.
ptr(), ta2);
410 template <
bool yuyv,
bool alpha>
416 const auto input_ptr =
static_cast<const IImage *__restrict
>(
input);
417 const auto output_ptr =
static_cast<IImage *__restrict
>(output);
419 constexpr
auto element_size = alpha ? 32 : 24;
420 constexpr
auto shift = yuyv ? 0 : 1;
427 const auto ta = vld4q_u8(in.
ptr());
454 template <
bool uv,
bool alpha>
461 const auto input_ptr =
static_cast<const IMultiImage *__restrict
>(
input);
462 const auto output_ptr =
static_cast<IImage *__restrict
>(output);
464 constexpr
auto element_size = alpha ? 32 : 24;
466 constexpr
auto shift = uv ? 0 : 1;
474 Iterator in_y(input_ptr->plane(0), win);
475 Iterator in_uv(input_ptr->plane(1), win_uv);
480 const auto ta_y_top = vld2q_u8(in_y.ptr());
481 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
482 const auto ta_uv = vld2q_u8(in_uv.ptr());
496 yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha);
497 yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha);
498 yuyv_to_rgb_calculation(yvec_top.val[2], uvec.val[2], yyvec_top.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha);
499 yuyv_to_rgb_calculation(yvec_top.val[3], uvec.val[3], yyvec_top.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha);
501 yuyv_to_rgb_calculation(yvec_bottom.val[0], uvec.val[0], yyvec_bottom.val[0], vvec.val[0], out.ptr() + out_stride + 0 * element_size, alpha);
502 yuyv_to_rgb_calculation(yvec_bottom.val[1], uvec.val[1], yyvec_bottom.val[1], vvec.val[1], out.ptr() + out_stride + 1 * element_size, alpha);
503 yuyv_to_rgb_calculation(yvec_bottom.val[2], uvec.val[2], yyvec_bottom.val[2], vvec.val[2], out.ptr() + out_stride + 2 * element_size, alpha);
504 yuyv_to_rgb_calculation(yvec_bottom.val[3], uvec.val[3], yyvec_bottom.val[3], vvec.val[3], out.ptr() + out_stride + 3 * element_size, alpha);
516 template <
bool alpha>
523 const auto input_ptr =
static_cast<const IMultiImage *__restrict
>(
input);
524 const auto output_ptr =
static_cast<IImage *__restrict
>(output);
526 constexpr
auto element_size = alpha ? 32 : 24;
535 Iterator in_y(input_ptr->plane(0), win);
536 Iterator in_u(input_ptr->plane(1), win_uv);
537 Iterator in_v(input_ptr->plane(2), win_uv);
542 const auto *y_top_ptr = in_y.ptr();
543 const auto *y_bottom_ptr = in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().
y();
544 const auto *u_ptr = in_u.ptr();
545 const auto *v_ptr = in_v.ptr();
548 #if defined(__arch64__) 549 const auto ta0_y_top = vld1q_u8(y_top_ptr);
550 const auto ta1_y_top = vld1q_u8(y_top_ptr + 16);
551 const auto ta0_y_bottom = vld1q_u8(y_bottom_ptr);
552 const auto ta1_y_bottom = vld1q_u8(y_bottom_ptr + 16);
553 const auto ta_u = vld1q_u8(u_ptr);
554 const auto ta_v = vld1q_u8(v_ptr);
564 const auto ta_y_top = vld2q_u8(y_top_ptr);
565 const auto ta_y_bottom = vld2q_u8(y_bottom_ptr);
566 const auto ta_u = vld1q_u8(u_ptr);
567 const auto ta_v = vld1q_u8(v_ptr);
582 yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha);
583 yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha);
584 yuyv_to_rgb_calculation(yvec_top.val[2], uvec.val[2], yyvec_top.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha);
585 yuyv_to_rgb_calculation(yvec_top.val[3], uvec.val[3], yyvec_top.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha);
587 yuyv_to_rgb_calculation(yvec_bottom.val[0], uvec.val[0], yyvec_bottom.val[0], vvec.val[0], out.ptr() + out_stride + 0 * element_size, alpha);
588 yuyv_to_rgb_calculation(yvec_bottom.val[1], uvec.val[1], yyvec_bottom.val[1], vvec.val[1], out.ptr() + out_stride + 1 * element_size, alpha);
589 yuyv_to_rgb_calculation(yvec_bottom.val[2], uvec.val[2], yyvec_bottom.val[2], vvec.val[2], out.ptr() + out_stride + 2 * element_size, alpha);
590 yuyv_to_rgb_calculation(yvec_bottom.val[3], uvec.val[3], yyvec_bottom.val[3], vvec.val[3], out.ptr() + out_stride + 3 * element_size, alpha);
592 in_y, in_u, in_v, out);
609 const auto input_ptr =
static_cast<const IImage *__restrict
>(
input);
610 const auto output_ptr =
static_cast<IMultiImage *__restrict
>(output);
612 constexpr
auto shift = yuyv ? 0 : 1;
621 Iterator out_y(output_ptr->plane(0), win);
622 Iterator out_uv(output_ptr->plane(1), win_uv);
626 const auto ta_top = vld4q_u8(in.
ptr());
627 const auto ta_bottom = vld4q_u8(in.
ptr() + input_ptr->info()->strides_in_bytes().y());
634 yvec.val[0] = ta_top.val[0 + shift];
635 yvec.val[1] = ta_top.val[2 + shift];
636 vst2q_u8(out_y.ptr(), yvec);
639 yyvec.val[0] = ta_bottom.val[0 + shift];
640 yyvec.val[1] = ta_bottom.val[2 + shift];
641 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec);
644 uvvec.val[0] = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]);
645 uvvec.val[1] = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]);
646 vst2q_u8(out_uv.ptr(), uvvec);
664 const auto input_ptr =
static_cast<const IMultiImage *__restrict
>(
input);
665 const auto output_ptr =
static_cast<IMultiImage *__restrict
>(output);
673 Iterator in_y(input_ptr->plane(0), win);
674 Iterator in_u(input_ptr->plane(1), win_uv);
675 Iterator in_v(input_ptr->plane(2), win_uv);
676 Iterator out_y(output_ptr->plane(0), win);
677 Iterator out_uv(output_ptr->plane(1), win_uv);
681 const auto ta_y_top = vld2q_u8(in_y.ptr());
682 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
684 ta_uv.val[0] = vld1q_u8(in_u.ptr());
685 ta_uv.val[1] = vld1q_u8(in_v.ptr());
691 vst2q_u8(out_y.ptr(), ta_y_top);
692 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
693 vst2q_u8(out_uv.ptr(), ta_uv);
695 in_y, in_u, in_v, out_y, out_uv);
712 const auto input_ptr =
static_cast<const IMultiImage *__restrict
>(
input);
713 const auto output_ptr =
static_cast<IMultiImage *__restrict
>(output);
715 constexpr
auto shift = uv ? 0 : 1;
723 Iterator in_y(input_ptr->plane(0), win);
724 Iterator in_uv(input_ptr->plane(1), win_uv);
725 Iterator out_y(output_ptr->plane(0), win);
726 Iterator out_u(output_ptr->plane(1), win_uv);
727 Iterator out_v(output_ptr->plane(2), win_uv);
731 const auto ta_y_top = vld2q_u8(in_y.ptr());
732 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
733 const auto ta_uv = vld2q_u8(in_uv.ptr());
739 vst2q_u8(out_y.ptr(), ta_y_top);
740 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
741 vst1q_u8(out_u.ptr(), ta_uv.val[0 + shift]);
742 vst1q_u8(out_v.ptr(), ta_uv.val[1 - shift]);
744 in_y, in_uv, out_y, out_u, out_v);
761 const auto input_ptr =
static_cast<const IImage *__restrict
>(
input);
762 const auto output_ptr =
static_cast<IMultiImage *__restrict
>(output);
764 constexpr
auto shift = yuyv ? 0 : 1;
773 Iterator out_y(output_ptr->plane(0), win);
774 Iterator out_u(output_ptr->plane(1), win_uv);
775 Iterator out_v(output_ptr->plane(2), win_uv);
779 const auto ta_top = vld4q_u8(in.
ptr());
780 const auto ta_bottom = vld4q_u8(in.
ptr() + input_ptr->info()->strides_in_bytes().y());
787 yvec.val[0] = ta_top.val[0 + shift];
788 yvec.val[1] = ta_top.val[2 + shift];
789 vst2q_u8(out_y.ptr(), yvec);
792 yyvec.val[0] = ta_bottom.val[0 + shift];
793 yyvec.val[1] = ta_bottom.val[2 + shift];
794 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec);
797 uvec = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]);
798 vst1q_u8(out_u.ptr(), uvec);
801 vvec = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]);
802 vst1q_u8(out_v.ptr(), vvec);
804 in, out_y, out_u, out_v);
821 const auto input_ptr =
static_cast<const IMultiImage *__restrict
>(
input);
822 const auto output_ptr =
static_cast<IMultiImage *__restrict
>(output);
824 constexpr
auto shift = uv ? 0 : 1;
832 Iterator in_y(input_ptr->plane(0), win);
833 Iterator in_uv(input_ptr->plane(1), win_uv);
834 Iterator out_y(output_ptr->plane(0), win);
835 Iterator out_u(output_ptr->plane(1), win);
836 Iterator out_v(output_ptr->plane(2), win);
840 const auto ta_y_top = vld2q_u8(in_y.ptr());
841 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
842 const auto ta_uv = vld2q_u8(in_uv.ptr());
848 vst2q_u8(out_y.ptr(), ta_y_top);
849 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
852 uvec.val[0] = ta_uv.val[0 + shift];
853 uvec.val[1] = ta_uv.val[0 + shift];
854 vst2q_u8(out_u.ptr(), uvec);
855 vst2q_u8(out_u.ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec);
858 vvec.val[0] = ta_uv.val[1 - shift];
859 vvec.val[1] = ta_uv.val[1 - shift];
860 vst2q_u8(out_v.ptr(), vvec);
861 vst2q_u8(out_v.ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec);
863 in_y, in_uv, out_y, out_u, out_v);
879 const auto input_ptr =
static_cast<const IMultiImage *__restrict
>(
input);
880 const auto output_ptr =
static_cast<IMultiImage *__restrict
>(output);
888 Iterator in_y(input_ptr->plane(0), win);
889 Iterator in_u(input_ptr->plane(1), win_uv);
890 Iterator in_v(input_ptr->plane(2), win_uv);
891 Iterator out_y(output_ptr->plane(0), win);
892 Iterator out_u(output_ptr->plane(1), win);
893 Iterator out_v(output_ptr->plane(2), win);
897 const auto ta_y_top = vld2q_u8(in_y.ptr());
898 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
899 const auto ta_u = vld1q_u8(in_u.ptr());
900 const auto ta_v = vld1q_u8(in_v.ptr());
906 vst2q_u8(out_y.ptr(), ta_y_top);
907 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
912 vst2q_u8(out_u.ptr(), uvec);
913 vst2q_u8(out_u.ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec);
918 vst2q_u8(out_v.ptr(), vvec);
919 vst2q_u8(out_v.ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec);
921 in_y, in_u, in_v, out_y, out_u, out_v);
931 template <
bool alpha>
938 const auto input_ptr =
static_cast<const IImage *__restrict
>(
input);
939 const auto output_ptr =
static_cast<IMultiImage *__restrict
>(output);
948 Iterator out_y(output_ptr->plane(0), win);
949 Iterator out_uv(output_ptr->plane(1), win_uv);
953 const auto ta_rgb_top = load_rgb(in.
ptr(), alpha);
954 const auto ta_rgb_bottom = load_rgb(in.
ptr() + input_ptr->info()->strides_in_bytes().y(), alpha);
959 store_rgb_to_nv12(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2],
960 ta_rgb_bottom.val[0], ta_rgb_bottom.val[1], ta_rgb_bottom.val[2],
961 out_y.ptr(), out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(),
974 template <
bool alpha>
981 const auto input_ptr =
static_cast<const IImage *__restrict
>(
input);
982 const auto output_ptr =
static_cast<IMultiImage *__restrict
>(output);
991 Iterator out_y(output_ptr->plane(0), win);
992 Iterator out_u(output_ptr->plane(1), win_uv);
993 Iterator out_v(output_ptr->plane(2), win_uv);
997 const auto ta_rgb_top = load_rgb(in.
ptr(), alpha);
998 const auto ta_rgb_bottom = load_rgb(in.
ptr() + input_ptr->info()->strides_in_bytes().y(), alpha);
1003 store_rgb_to_iyuv(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2],
1004 ta_rgb_bottom.val[0], ta_rgb_bottom.val[1], ta_rgb_bottom.val[2],
1005 out_y.ptr(), out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(),
1006 out_u.ptr(), out_v.ptr());
1008 in, out_y, out_u, out_v);
1018 template <
bool alpha>
1025 const auto input_ptr =
static_cast<const IImage *__restrict
>(
input);
1026 const auto output_ptr =
static_cast<IMultiImage *__restrict
>(output);
1029 Iterator out_y(output_ptr->plane(0), win);
1030 Iterator out_u(output_ptr->plane(1), win);
1031 Iterator out_v(output_ptr->plane(2), win);
1035 const auto ta_rgb = load_rgb(in.
ptr(), alpha);
1040 store_rgb_to_yuv4(ta_rgb.val[0], ta_rgb.val[1], ta_rgb.val[2],
1041 out_y.ptr(), out_u.ptr(), out_v.ptr());
1043 in, out_y, out_u, out_v);
constexpr float blue_coef_bt709
void colorconvert_iyuv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win)
Convert IYUV to NV12.
constexpr float green_coef_bt709
void colorconvert_rgb_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win)
Convert RGB to YUV4.
constexpr int step() const
Return the step of the dimension.
constexpr float rgb2yuv_bt709_kr
float32x4x4_t convert_uint8x16_to_float32x4x4(const uint8x16_t &in)
Converts from uint8x16 to float32x4x4_t.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
void colorconvert_rgbx_to_rgb(const void *input, void *output, const Window &win)
Convert RGBX to RGB.
constexpr float rgb2yuv_bt709_kg
Describe one of the image's dimensions with a start, end and step.
void validate() const
Will validate all the window's dimensions' values when asserts are enabled.
constexpr float rgb2u8_red_coef
constexpr float green_coef2_bt709
Interface for CPU tensor.
Copyright (c) 2017-2021 Arm Limited.
void colorconvert_rgb_to_u8(const void *__restrict input, void *__restrict output, const Window &win)
Convert RGB to U8.
void colorconvert_rgb_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win)
Convert RGB to IYUV.
void yuyv_to_rgb_calculation(const SimpleTensor< T > yvec, const SimpleTensor< T > vvec, const SimpleTensor< T > yyvec, const SimpleTensor< T > uvec, SimpleTensor< T > &dst)
constexpr float rgb2u8_blue_coef
constexpr float red_coef_bt709
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
void colorconvert_rgb_to_rgbx(const void *__restrict input, void *__restrict output, const Window &win)
Convert RGB to RGBX.
void colorconvert_nv12_to_rgb(const void *__restrict input, void *__restrict output, const Window &win)
Convert NV12 to RGB.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
void colorconvert_yuyv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win)
Convert YUYV to RGB.
void colorconvert_rgb_to_nv12(const void *__restrict input, void *__restrict output, const Window &win)
Convert RGB to NV12.
void rgb_to_yuv_calculation(const SimpleTensor< T > rvec, const SimpleTensor< T > gvec, const SimpleTensor< T > bvec, SimpleTensor< T > &yvec, SimpleTensor< T > &uvec_top, SimpleTensor< T > &uvec_bottom, SimpleTensor< T > &vvec_top, SimpleTensor< T > &vvec_bottom)
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
constexpr const Dimension & y() const
Alias to access the second dimension of the window.
constexpr float rgb2u8_green_coef
void colorconvert_iyuv_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win)
Convert IYUV to YUV4.
void colorconvert_nv12_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win)
Convert NV12 to IYUV.
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
T y() const
Alias to access the size of the second dimension.
void convert_float32x4x4_to_uint8x16(const float32x4x4_t &in, uint8x16_t &out)
Converts from two float32x4x4_t to just one uint8x16_t.
virtual const Strides & strides_in_bytes() const =0
The strides in bytes for accessing each dimension of the tensor.
constexpr float rgb2yuv_bt709_cv
void colorconvert_yuyv_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win)
Convert YUYV to IYUV.
constexpr int end() const
Return the end of the dimension.
Iterator updated by execute_window_loop for each window element.
constexpr int start() const
Return the start of the dimension.
Describe a multidimensional execution window.
void colorconvert_nv12_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win)
Convert NV12 to YUV4.
void colorconvert_yuyv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win)
Convert YUYV to NV12.
void colorconvert_iyuv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win)
Convert IYUV to RGB.
void convert_float32x4x3_to_uint8x8x3(const float32x4x3_t &in1, const float32x4x3_t &in2, uint8x8x3_t &out)
Converts from two float32x4x3_t to just one uint8x8x3_t.
constexpr const Dimension & x() const
Alias to access the first dimension of the window.
constexpr float rgb2yuv_bt709_cu
constexpr float rgb2yuv_bt709_kb