# HG changeset patch # User ludovic pollet # Date 1176600803 -3600 # Node ID e91a180168c1008ec3b12b525d03184dd4c128f6 # Parent 834e4e9658d615b80bcccff2703928e5a8edf075 Software YUY2 to RGB and MMX acceleration I noticed software yuv to rgb function are not MMX accelerated for yuy2. As a result, in the current xine-lib, contrast and saturation settings does not work when video is yuy2, since they only affect MMX functions. (for example after tv-time postprocessing which seems to change format) So I provide acceleration in this case, for 32bit display. diff --git a/src/video_out/yuv2rgb.c b/src/video_out/yuv2rgb.c --- a/src/video_out/yuv2rgb.c +++ b/src/video_out/yuv2rgb.c @@ -1301,7 +1301,7 @@ static scale_line_func_t find_scale_line } -static void scale_line_2 (uint8_t *source, uint8_t *dest, +void scale_line_2 (uint8_t *source, uint8_t *dest, int width, int step) { int p1; int p2; @@ -1328,7 +1328,7 @@ static void scale_line_2 (uint8_t *sourc } } -static void scale_line_4 (uint8_t *source, uint8_t *dest, +void scale_line_4 (uint8_t *source, uint8_t *dest, int width, int step) { int p1; int p2; @@ -3254,9 +3254,33 @@ yuv2rgb_factory_t* yuv2rgb_factory_init /* * auto-probe for the best yuy22rgb function */ + this->yuy22rgb_fun = NULL; +#if defined(ARCH_X86) || defined(ARCH_X86_64) + if ((this->yuy22rgb_fun == NULL) && (mm & MM_ACCEL_X86_MMXEXT)) { - /* FIXME: implement mmx/mlib functions */ - yuy22rgb_c_init (this); + yuy22rgb_init_mmxext (this); + +#ifdef LOG + if (this->yuy22rgb_fun != NULL) + printf ("yuy22rgb: using MMXEXT for colorspace transform\n"); +#endif + } + + if ((this->yuy22rgb_fun == NULL) && (mm & MM_ACCEL_X86_MMX)) { + + yuy22rgb_init_mmx (this); + +#ifdef LOG + if (this->yuy22rgb_fun != NULL) + printf ("yuv2rgb: using MMX for colorspace transform\n"); +#endif + } +#endif + + if (this->yuy22rgb_fun == NULL) { + + yuy22rgb_c_init (this); + } /* * set up single pixel function diff --git a/src/video_out/yuv2rgb.h b/src/video_out/yuv2rgb.h --- a/src/video_out/yuv2rgb.h +++ b/src/video_out/yuv2rgb.h @@ -171,4 +171,12 @@ void yuv2rgb_init_mmx (yuv2rgb_factory_t void yuv2rgb_init_mmx (yuv2rgb_factory_t *this); void yuv2rgb_init_mlib (yuv2rgb_factory_t *this); +void yuy22rgb_init_mmxext (yuv2rgb_factory_t *_this); +void yuy22rgb_init_mmx (yuv2rgb_factory_t *_this); + +void scale_line_2 (uint8_t *source, uint8_t *dest, + int width, int step); +void scale_line_4 (uint8_t *source, uint8_t *dest, + int width, int step); + #endif diff --git a/src/video_out/yuv2rgb_mlib.c b/src/video_out/yuv2rgb_mlib.c --- a/src/video_out/yuv2rgb_mlib.c +++ b/src/video_out/yuv2rgb_mlib.c @@ -22,7 +22,11 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA */ +#ifdef HAVE_CONFIG_H #include "config.h" +#endif + +#ifdef HAVE_XINE #if HAVE_MLIB @@ -176,3 +180,6 @@ void yuv2rgb_init_mlib (yuv2rgb_factory_ } #endif /* HAVE_MLIB */ + +#endif /* HAVE_XINE */ + diff --git a/src/video_out/yuv2rgb_mmx.c b/src/video_out/yuv2rgb_mmx.c --- a/src/video_out/yuv2rgb_mmx.c +++ b/src/video_out/yuv2rgb_mmx.c @@ -851,7 +851,6 @@ static inline void yuv420_argb32 (yuv2rg i = this->dest_width >> 3; img=image; do { - /* printf ("i : %d\n",i); */ mmx_yuv2rgb (y_buf, u_buf, v_buf, this->table_mmx); mmx_unpack_32rgb (img, cpu); @@ -962,7 +961,6 @@ static inline void yuv420_abgr32 (yuv2rg i = this->dest_width >> 3; img=image; do { - /* printf ("i : %d\n",i); */ mmx_yuv2rgb (y_buf, u_buf, v_buf, this->table_mmx); mmx_unpack_32bgr (img, cpu); @@ -1128,5 +1126,205 @@ void yuv2rgb_init_mmx (yuv2rgb_factory_t } } +/* Accelerated support for yuy2 2 rgb acceleration */ + +static void yuy2_argb32 (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _p, int cpu) +{ + int U, V, Y; + uint8_t * py_1, * pu, * pv; + uint32_t * r, * g, * b; + uint32_t * dst_1; + int width, height; + int dy; + + /* FIXME: implement unscaled version */ + + scale_line_4 (_p+1, this->u_buffer, + this->dest_width >> 1, this->step_dx); + scale_line_4 (_p+3, this->v_buffer, + this->dest_width >> 1, this->step_dx); + scale_line_2 (_p, this->y_buffer, + this->dest_width, this->step_dx); + + dy = 0; + height = this->next_slice (this, &_dst); + + for (;;) { + dst_1 = (uint32_t*)_dst; + py_1 = this->y_buffer; + pu = this->u_buffer; + pv = this->v_buffer; + + width = this->dest_width >> 3; + + do { + mmx_yuv2rgb(py_1, pu, pv, this->table_mmx); + mmx_unpack_32rgb (dst_1, cpu); + + pu += 4; + pv += 4; + py_1 += 8; + dst_1 += 8; + } while (--width); + + dy += this->step_dy; + _dst += this->rgb_stride; + + while (--height > 0 && dy < 32768) { + + xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*4); + + dy += this->step_dy; + _dst += this->rgb_stride; + } + + if (height <= 0) + break; + + _p += this->y_stride*(dy>>15); + dy &= 32767; + /* + dy -= 32768; + _p += this->y_stride*2; + */ + + scale_line_4 (_p+1, this->u_buffer, + this->dest_width >> 1, this->step_dx); + scale_line_4 (_p+3, this->v_buffer, + this->dest_width >> 1, this->step_dx); + scale_line_2 (_p, this->y_buffer, + this->dest_width, this->step_dx); + } +} + +static void yuy2_abgr32 (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _p, int cpu) +{ + int U, V, Y; + uint8_t * py_1, * pu, * pv; + uint32_t * r, * g, * b; + uint32_t * dst_1; + int width, height; + int dy; + + /* FIXME: implement unscaled version */ + + scale_line_4 (_p+1, this->u_buffer, + this->dest_width >> 1, this->step_dx); + scale_line_4 (_p+3, this->v_buffer, + this->dest_width >> 1, this->step_dx); + scale_line_2 (_p, this->y_buffer, + this->dest_width, this->step_dx); + + dy = 0; + height = this->next_slice (this, &_dst); + + for (;;) { + dst_1 = (uint32_t*)_dst; + py_1 = this->y_buffer; + pu = this->u_buffer; + pv = this->v_buffer; + + width = this->dest_width >> 3; + + do { + mmx_yuv2rgb(py_1, pu, pv, this->table_mmx); + mmx_unpack_32bgr (dst_1, cpu); + + pu += 4; + pv += 4; + py_1 += 8; + dst_1 += 8; + } while (--width); + + dy += this->step_dy; + _dst += this->rgb_stride; + + while (--height > 0 && dy < 32768) { + + xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*4); + + dy += this->step_dy; + _dst += this->rgb_stride; + } + + if (height <= 0) + break; + + _p += this->y_stride*(dy>>15); + dy &= 32767; + /* + dy -= 32768; + _p += this->y_stride*2; + */ + + scale_line_4 (_p+1, this->u_buffer, + this->dest_width >> 1, this->step_dx); + scale_line_4 (_p+3, this->v_buffer, + this->dest_width >> 1, this->step_dx); + scale_line_2 (_p, this->y_buffer, + this->dest_width, this->step_dx); + } +} + +static void mmxext_yuy2_argb32 (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _p) +{ + yuy2_argb32 (this, _dst, _p, CPU_MMXEXT); + emms(); /* re-initialize x86 FPU after MMX use */ +} + +static void mmx_yuy2_argb32 (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _p) +{ + yuy2_argb32 (this, _dst, _p, CPU_MMX); + emms(); /* re-initialize x86 FPU after MMX use */ +} + +void yuy22rgb_init_mmxext (yuv2rgb_factory_t *this) { + + if (this->swapped) + return; /*no swapped pixel output upto now*/ + + switch (this->mode) { + /*case MODE_15_RGB: + this->yuy22rgb_fun = mmxext_rgb15; + break; + case MODE_16_RGB: + this->yuy22rgb_fun = mmxext_rgb16; + break; + case MODE_24_RGB: + this->yuy22rgb_fun = mmxext_rgb24; + break;*/ + case MODE_32_RGB: + this->yuy22rgb_fun = mmxext_yuy2_argb32; + break; + case MODE_32_BGR: + this->yuy22rgb_fun = mmxext_abgr32; + break; + } +} + +void yuy22rgb_init_mmx (yuv2rgb_factory_t *this) { + + if (this->swapped) + return; /*no swapped pixel output upto now*/ + + switch (this->mode) { + /*case MODE_15_RGB: + this->yuy22rgb_fun = mmxext_rgb15; + break; + case MODE_16_RGB: + this->yuy22rgb_fun = mmxext_rgb16; + break; + case MODE_24_RGB: + this->yuy22rgb_fun = mmxext_rgb24; + break;*/ + case MODE_32_RGB: + this->yuy22rgb_fun = mmx_yuy2_argb32; + break; + case MODE_32_BGR: + this->yuy22rgb_fun = mmxext_abgr32; + break; + } +} + #endif