Update libmpeg2 to upstream SVN (rev. 1172). diff --git a/configure.ac b/configure.ac --- a/configure.ac +++ b/configure.ac @@ -1246,6 +1246,7 @@ src/libmpeg2new/Makefile src/libmpeg2new/Makefile src/libmpeg2new/include/Makefile src/libmpeg2new/libmpeg2/Makefile +src/libmpeg2new/libmpeg2/convert/Makefile src/video_out/Makefile src/video_out/macosx/Makefile src/xine-utils/Makefile diff --git a/src/libmpeg2new/include/alpha_asm.h b/src/libmpeg2new/include/alpha_asm.h --- a/src/libmpeg2new/include/alpha_asm.h +++ b/src/libmpeg2new/include/alpha_asm.h @@ -20,8 +20,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#ifndef ALPHA_ASM_H -#define ALPHA_ASM_H +#ifndef LIBMPEG2_ALPHA_ASM_H +#define LIBMPEG2_ALPHA_ASM_H #include @@ -178,4 +178,4 @@ struct unaligned_long { uint64_t l; } __ #error "Unknown compiler!" #endif -#endif /* ALPHA_ASM_H */ +#endif /* LIBMPEG2_ALPHA_ASM_H */ diff --git a/src/libmpeg2new/include/attributes.h b/src/libmpeg2new/include/attributes.h --- a/src/libmpeg2new/include/attributes.h +++ b/src/libmpeg2new/include/attributes.h @@ -21,6 +21,9 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#ifndef LIBMPEG2_ATTRIBUTES_H +#define LIBMPEG2_ATTRIBUTES_H + /* use gcc attribs to align critical data structures */ #ifdef ATTRIBUTE_ALIGNED_MAX #define ATTR_ALIGN(align) __attribute__ ((__aligned__ ((ATTRIBUTE_ALIGNED_MAX < align) ? ATTRIBUTE_ALIGNED_MAX : align))) @@ -35,3 +38,5 @@ #define likely(x) (x) #define unlikely(x) (x) #endif + +#endif /* LIBMPEG2_ATTRIBUTES_H */ diff --git a/src/libmpeg2new/include/mmx.h b/src/libmpeg2new/include/mmx.h --- a/src/libmpeg2new/include/mmx.h +++ b/src/libmpeg2new/include/mmx.h @@ -20,6 +20,9 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef LIBMPEG2_MMX_H +#define LIBMPEG2_MMX_H /* * The type of an value that fits in an MMX register (note that long @@ -257,7 +260,33 @@ typedef union { #define psadbw_m2r(var,reg) mmx_m2r (psadbw, var, reg) #define psadbw_r2r(regs,regd) mmx_r2r (psadbw, regs, regd) + +/* SSE2 */ + +typedef union { + long long q[2]; /* Quadword (64-bit) value */ + unsigned long long uq[2]; /* Unsigned Quadword */ + int d[4]; /* 2 Doubleword (32-bit) values */ + unsigned int ud[4]; /* 2 Unsigned Doubleword */ + short w[8]; /* 4 Word (16-bit) values */ + unsigned short uw[8]; /* 4 Unsigned Word */ + char b[16]; /* 8 Byte (8-bit) values */ + unsigned char ub[16]; /* 8 Unsigned Byte */ + float s[4]; /* Single-precision (32-bit) value */ +} ATTR_ALIGN(16) sse_t; /* On an 16-byte (128-bit) boundary */ + +#define movdqu_m2r(var,reg) mmx_m2r (movdqu, var, reg) +#define movdqu_r2m(reg,var) mmx_r2m (movdqu, reg, var) +#define movdqu_r2r(regs,regd) mmx_r2r (movdqu, regs, regd) +#define movdqa_m2r(var,reg) mmx_m2r (movdqa, var, reg) +#define movdqa_r2m(reg,var) mmx_r2m (movdqa, reg, var) +#define movdqa_r2r(regs,regd) mmx_r2r (movdqa, regs, regd) + +#define pshufd_r2r(regs,regd,imm) mmx_r2ri(pshufd, regs, regd, imm) + #define pshufw_m2r(var,reg,imm) mmx_m2ri(pshufw, var, reg, imm) #define pshufw_r2r(regs,regd,imm) mmx_r2ri(pshufw, regs, regd, imm) #define sfence() __asm__ __volatile__ ("sfence\n\t") + +#endif /* LIBMPEG2_MMX_H */ diff --git a/src/libmpeg2new/include/mpeg2.h b/src/libmpeg2new/include/mpeg2.h --- a/src/libmpeg2new/include/mpeg2.h +++ b/src/libmpeg2new/include/mpeg2.h @@ -21,11 +21,11 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#ifndef MPEG2_H -#define MPEG2_H +#ifndef LIBMPEG2_MPEG2_H +#define LIBMPEG2_MPEG2_H #define MPEG2_VERSION(a,b,c) (((a)<<16)|((b)<<8)|(c)) -#define MPEG2_RELEASE MPEG2_VERSION (0, 4, 1) /* 0.4.1 */ +#define MPEG2_RELEASE MPEG2_VERSION (0, 5, 0) /* 0.5.0 */ #define SEQ_FLAG_MPEG2 1 #define SEQ_FLAG_CONSTRAINED_PARAMETERS 2 @@ -82,6 +82,7 @@ typedef struct mpeg2_gop_s { #define PIC_FLAG_COMPOSITE_DISPLAY 32 #define PIC_FLAG_SKIP 64 #define PIC_FLAG_TAGS 128 +#define PIC_FLAG_REPEAT_FIRST_FIELD 256 #define PIC_MASK_COMPOSITE_DISPLAY 0xfffff000 typedef struct mpeg2_picture_s { @@ -162,6 +163,7 @@ void mpeg2_custom_fbuf (mpeg2dec_t * mpe #define MPEG2_ACCEL_ALPHA_MVI 2 #define MPEG2_ACCEL_SPARC_VIS 1 #define MPEG2_ACCEL_SPARC_VIS2 2 +#define MPEG2_ACCEL_ARM 1 #define MPEG2_ACCEL_DETECT 0x80000000 uint32_t mpeg2_accel (uint32_t accel); @@ -199,4 +201,4 @@ void mpeg2_malloc_hooks (void * malloc ( void mpeg2_malloc_hooks (void * malloc (unsigned, mpeg2_alloc_t), int free (void *)); -#endif /* MPEG2_H */ +#endif /* LIBMPEG2_MPEG2_H */ diff --git a/src/libmpeg2new/include/mpeg2convert.h b/src/libmpeg2new/include/mpeg2convert.h --- a/src/libmpeg2new/include/mpeg2convert.h +++ b/src/libmpeg2new/include/mpeg2convert.h @@ -21,8 +21,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#ifndef MPEG2CONVERT_H -#define MPEG2CONVERT_H +#ifndef LIBMPEG2_MPEG2CONVERT_H +#define LIBMPEG2_MPEG2CONVERT_H mpeg2_convert_t mpeg2convert_rgb32; mpeg2_convert_t mpeg2convert_rgb24; @@ -45,4 +45,4 @@ mpeg2_convert_t * mpeg2convert_rgb (mpeg mpeg2_convert_t mpeg2convert_uyvy; -#endif /* MPEG2CONVERT_H */ +#endif /* LIBMPEG2_MPEG2CONVERT_H */ diff --git a/src/libmpeg2new/include/sse.h b/src/libmpeg2new/include/sse.h --- a/src/libmpeg2new/include/sse.h +++ b/src/libmpeg2new/include/sse.h @@ -19,6 +19,9 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef LIBMPEG2_SSE_H +#define LIBMPEG2_SSE_H typedef union { float sf[4]; /* Single-precision (32-bit) value */ @@ -254,3 +257,4 @@ typedef union { : /* nothing */ \ : "X" (mem)) +#endif /* LIBMPEG2_SSE_H */ diff --git a/src/libmpeg2new/include/tendra.h b/src/libmpeg2new/include/tendra.h --- a/src/libmpeg2new/include/tendra.h +++ b/src/libmpeg2new/include/tendra.h @@ -21,6 +21,9 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#ifndef LIBMPEG2_TENDRA_H +#define LIBMPEG2_TENDRA_H + #pragma TenDRA begin #pragma TenDRA longlong type warning @@ -33,3 +36,5 @@ int TenDRA; int TenDRA; #endif /* TenDRA_check */ + +#endif /* LIBMPEG2_TENDRA_H */ diff --git a/src/libmpeg2new/include/video_out.h b/src/libmpeg2new/include/video_out.h --- a/src/libmpeg2new/include/video_out.h +++ b/src/libmpeg2new/include/video_out.h @@ -20,6 +20,9 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef LIBMPEG2_VIDEO_OUT_H +#define LIBMPEG2_VIDEO_OUT_H struct mpeg2_sequence_s; struct mpeg2_convert_init_s; @@ -48,7 +51,7 @@ typedef vo_instance_t * vo_open_t (void) typedef vo_instance_t * vo_open_t (void); typedef struct { - char * name; + const char * name; vo_open_t * open; } vo_driver_t; @@ -56,3 +59,5 @@ void vo_accel (uint32_t accel); /* return NULL terminated array of all drivers */ vo_driver_t const * vo_drivers (void); + +#endif /* LIBMPEG2_VIDEO_OUT_H */ diff --git a/src/libmpeg2new/include/vis.h b/src/libmpeg2new/include/vis.h --- a/src/libmpeg2new/include/vis.h +++ b/src/libmpeg2new/include/vis.h @@ -40,6 +40,9 @@ * Thus, we do the opcode encoding by hand and output 32-bit words in * the assembler to keep the binary from becoming tainted. */ + +#ifndef LIBMPEG2_VIS_H +#define LIBMPEG2_VIS_H #define vis_opc_base ((0x1 << 31) | (0x36 << 19)) #define vis_opf(X) ((X) << 5) @@ -326,3 +329,5 @@ static inline void vis_alignaddrl_g0(voi /* Pixel component distance. */ #define vis_pdist(rs1,rs2,rd) vis_dd2d(0x3e, rs1, rs2, rd) + +#endif /* LIBMPEG2_VIS_H */ diff --git a/src/libmpeg2new/libmpeg2/Makefile.am b/src/libmpeg2new/libmpeg2/Makefile.am --- a/src/libmpeg2new/libmpeg2/Makefile.am +++ b/src/libmpeg2new/libmpeg2/Makefile.am @@ -1,6 +1,6 @@ include $(top_srcdir)/misc/Makefile.comm -include $(top_srcdir)/misc/Makefile.common +SUBDIRS = convert -AM_CFLAGS = $(DEFAULT_OCFLAGS) $(VISIBILITY_FLAG) +AM_CFLAGS = $(DEFAULT_OCFLAGS) $(VISIBILITY_FLAG) -I$(top_srcdir)/src/libmpeg2new/include -DACCEL_DETECT noinst_LTLIBRARIES = libmpeg2.la libmpeg2arch.la @@ -8,7 +8,12 @@ libmpeg2_la_LIBADD = libmpeg2arch.la libmpeg2_la_LIBADD = libmpeg2arch.la libmpeg2arch_la_SOURCES = motion_comp_mmx.c idct_mmx.c \ - motion_comp_altivec.c idct_altivec.c \ - motion_comp_alpha.c idct_alpha.c \ - motion_comp_vis.c \ - cpu_accel.c cpu_state.c + motion_comp_altivec.c idct_altivec.c \ + motion_comp_alpha.c idct_alpha.c \ + motion_comp_vis.c motion_comp_arm.c \ + cpu_accel.c cpu_state.c +#if ARCH_ARM +#libmpeg2arch_la_SOURCES += motion_comp_arm_s.S +#endif + +EXTRA_DIST = configure.incl vlc.h mpeg2_internal.h diff --git a/src/libmpeg2new/libmpeg2/alloc.c b/src/libmpeg2new/libmpeg2/alloc.c --- a/src/libmpeg2new/libmpeg2/alloc.c +++ b/src/libmpeg2new/libmpeg2/alloc.c @@ -24,7 +24,7 @@ #include #include -#include "../include/mpeg2.h" +#include "mpeg2.h" static void * (* malloc_hook) (unsigned size, mpeg2_alloc_t reason) = NULL; static int (* free_hook) (void * buf) = NULL; @@ -62,9 +62,9 @@ void mpeg2_free (void * buf) free (*(((void **)buf) - 1)); } -void mpeg2_malloc_hooks (void * malloc (unsigned, mpeg2_alloc_t), - int free (void *)) +void mpeg2_malloc_hooks (void * alloc_func (unsigned, mpeg2_alloc_t), + int free_func (void *)) { - malloc_hook = malloc; - free_hook = free; + malloc_hook = alloc_func; + free_hook = free_func; } diff --git a/src/libmpeg2new/libmpeg2/convert/Makefile.am b/src/libmpeg2new/libmpeg2/convert/Makefile.am new file mode 100644 --- /dev/null +++ b/src/libmpeg2new/libmpeg2/convert/Makefile.am @@ -0,0 +1,11 @@ +AM_CFLAGS = $(DEFAULT_OCFLAGS) $(VISIBILITY_FLAG) -I$(top_srcdir)/src/libmpeg2new/include + +noinst_LTLIBRARIES = libmpeg2convert.la libmpeg2convertarch.la + +libmpeg2convert_la_SOURCES = rgb.c uyvy.c +libmpeg2convert_la_LIBADD = libmpeg2convertarch.la + +libmpeg2convertarch_la_SOURCES = rgb_mmx.c rgb_vis.c +#libmpeg2convertarch_la_CFLAGS = $(OPT_CFLAGS) $(ARCH_OPT_CFLAGS) $(LIBMPEG2_CFLAGS) + +EXTRA_DIST = convert_internal.h diff --git a/src/libmpeg2new/libmpeg2/convert_internal.h b/src/libmpeg2new/libmpeg2/convert/convert_internal.h rename from src/libmpeg2new/libmpeg2/convert_internal.h rename to src/libmpeg2new/libmpeg2/convert/convert_internal.h diff --git a/src/libmpeg2new/libmpeg2/libmpeg2convert.pc.in b/src/libmpeg2new/libmpeg2/convert/libmpeg2convert.pc.in rename from src/libmpeg2new/libmpeg2/libmpeg2convert.pc.in rename to src/libmpeg2new/libmpeg2/convert/libmpeg2convert.pc.in diff --git a/src/libmpeg2new/libmpeg2/rgb.c b/src/libmpeg2new/libmpeg2/convert/rgb.c rename from src/libmpeg2new/libmpeg2/rgb.c rename to src/libmpeg2new/libmpeg2/convert/rgb.c --- a/src/libmpeg2new/libmpeg2/rgb.c +++ b/src/libmpeg2new/libmpeg2/convert/rgb.c @@ -22,7 +22,7 @@ */ #include "config.h" -#include +#include "attributes.h" #include diff --git a/src/libmpeg2new/libmpeg2/rgb_mmx.c b/src/libmpeg2new/libmpeg2/convert/rgb_mmx.c rename from src/libmpeg2new/libmpeg2/rgb_mmx.c rename to src/libmpeg2new/libmpeg2/convert/rgb_mmx.c --- a/src/libmpeg2new/libmpeg2/rgb_mmx.c +++ b/src/libmpeg2new/libmpeg2/convert/rgb_mmx.c @@ -34,7 +34,7 @@ #include "mpeg2.h" #include "mpeg2convert.h" #include "convert_internal.h" -#include +#include "attributes.h" #include "mmx.h" #define CPU_MMXEXT 0 diff --git a/src/libmpeg2new/libmpeg2/rgb_vis.c b/src/libmpeg2new/libmpeg2/convert/rgb_vis.c rename from src/libmpeg2new/libmpeg2/rgb_vis.c rename to src/libmpeg2new/libmpeg2/convert/rgb_vis.c --- a/src/libmpeg2new/libmpeg2/rgb_vis.c +++ b/src/libmpeg2new/libmpeg2/convert/rgb_vis.c @@ -30,7 +30,7 @@ #include "mpeg2.h" #include "mpeg2convert.h" #include "convert_internal.h" -#include +#include "attributes.h" #include "vis.h" /* Based partially upon the MMX yuv2rgb code, see there for credits. diff --git a/src/libmpeg2new/libmpeg2/uyvy.c b/src/libmpeg2new/libmpeg2/convert/uyvy.c rename from src/libmpeg2new/libmpeg2/uyvy.c rename to src/libmpeg2new/libmpeg2/convert/uyvy.c diff --git a/src/libmpeg2new/libmpeg2/cpu_accel.c b/src/libmpeg2new/libmpeg2/cpu_accel.c --- a/src/libmpeg2new/libmpeg2/cpu_accel.c +++ b/src/libmpeg2new/libmpeg2/cpu_accel.c @@ -25,11 +25,11 @@ #include -#include "../include/mpeg2.h" -#include "../include/attributes.h" +#include "mpeg2.h" +#include "attributes.h" #include "mpeg2_internal.h" -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static inline uint32_t arch_accel (uint32_t accel) { if (accel & (MPEG2_ACCEL_X86_3DNOW | MPEG2_ACCEL_X86_MMXEXT)) @@ -46,7 +46,7 @@ static inline uint32_t arch_accel (uint3 uint32_t eax, ebx, ecx, edx; int AMD; -#if !defined(PIC) && !defined(__PIC__) +#if defined(__x86_64__) || (!defined(PIC) && !defined(__PIC__)) #define cpuid(op,eax,ebx,ecx,edx) \ __asm__ ("cpuid" \ : "=a" (eax), \ @@ -55,12 +55,12 @@ static inline uint32_t arch_accel (uint3 "=d" (edx) \ : "a" (op) \ : "cc") -#else /* PIC version : save ebx */ +#else /* PIC version : save ebx (not needed on x86_64) */ #define cpuid(op,eax,ebx,ecx,edx) \ - __asm__ ("push %%ebx\n\t" \ + __asm__ ("pushl %%ebx\n\t" \ "cpuid\n\t" \ "movl %%ebx,%1\n\t" \ - "pop %%ebx" \ + "popl %%ebx" \ : "=a" (eax), \ "=r" (ebx), \ "=c" (ecx), \ @@ -69,6 +69,7 @@ static inline uint32_t arch_accel (uint3 : "cc") #endif +#ifndef __x86_64__ /* x86_64 supports the cpuid op */ __asm__ ("pushf\n\t" "pushf\n\t" "pop %0\n\t" @@ -86,6 +87,7 @@ static inline uint32_t arch_accel (uint3 if (eax == ebx) /* no cpuid */ return accel; +#endif cpuid (0x00000000, eax, ebx, ecx, edx); if (!eax) /* vendor string only */ @@ -98,15 +100,15 @@ static inline uint32_t arch_accel (uint3 return accel; accel |= MPEG2_ACCEL_X86_MMX; - if (edx & 0x02000000) /* SSE - identical to AMD MMX extensions */ + if (edx & 0x02000000) /* SSE - identical to AMD MMX ext. */ accel |= MPEG2_ACCEL_X86_MMXEXT; - if (edx & 0x04000000) /* SSE2 */ + if (edx & 0x04000000) /* SSE2 */ accel |= MPEG2_ACCEL_X86_SSE2; - - if (ecx & 0x00000001) /* SSE3 */ + + if (ecx & 0x00000001) /* SSE3 */ accel |= MPEG2_ACCEL_X86_SSE3; - + cpuid (0x80000000, eax, ebx, ecx, edx); if (eax < 0x80000001) /* no extended capabilities */ return accel; @@ -123,7 +125,7 @@ static inline uint32_t arch_accel (uint3 return accel; } -#endif /* ARCH_X86 */ +#endif /* ARCH_X86 || ARCH_X86_64 */ #if defined(ACCEL_DETECT) && (defined(ARCH_PPC) || defined(ARCH_SPARC)) #include @@ -145,10 +147,10 @@ static RETSIGTYPE sigill_handler (int si #endif /* ACCEL_DETECT && (ARCH_PPC || ARCH_SPARC) */ #ifdef ARCH_PPC -static inline uint32_t arch_accel (uint32_t accel) +static uint32_t arch_accel (uint32_t accel) { #ifdef ACCEL_DETECT - if (accel & (MPEG2_ACCEL_PPC_ALTIVEC | MPEG2_ACCEL_DETECT) == + if ((accel & (MPEG2_ACCEL_PPC_ALTIVEC | MPEG2_ACCEL_DETECT)) == MPEG2_ACCEL_DETECT) { static RETSIGTYPE (* oldsig) (int); @@ -160,10 +162,10 @@ static inline uint32_t arch_accel (uint3 canjump = 1; -#ifdef HAVE_ALTIVEC_H /* gnu */ +#if defined(__APPLE_CC__) /* apple */ +#define VAND(a,b,c) "vand v" #a ",v" #b ",v" #c "\n\t" +#else /* gnu */ #define VAND(a,b,c) "vand " #a "," #b "," #c "\n\t" -#else /* apple */ -#define VAND(a,b,c) "vand v" #a ",v" #b ",v" #c "\n\t" #endif asm volatile ("mtspr 256, %0\n\t" VAND (0, 0, 0) @@ -182,13 +184,13 @@ static inline uint32_t arch_accel (uint3 #endif /* ARCH_PPC */ #ifdef ARCH_SPARC -static inline uint32_t arch_accel (uint32_t accel) +static uint32_t arch_accel (uint32_t accel) { if (accel & MPEG2_ACCEL_SPARC_VIS2) accel |= MPEG2_ACCEL_SPARC_VIS; #ifdef ACCEL_DETECT - if (accel & (MPEG2_ACCEL_SPARC_VIS2 | MPEG2_ACCEL_DETECT) == + if ((accel & (MPEG2_ACCEL_SPARC_VIS2 | MPEG2_ACCEL_DETECT)) == MPEG2_ACCEL_DETECT) { static RETSIGTYPE (* oldsig) (int); @@ -251,7 +253,7 @@ static inline uint32_t arch_accel (uint3 uint32_t mpeg2_detect_accel (uint32_t accel) { -#if defined (ARCH_X86) || defined (ARCH_PPC) || defined (ARCH_ALPHA) || defined (ARCH_SPARC) +#if defined (ARCH_X86) || defined (ARCH_X86_64) || defined (ARCH_PPC) || defined (ARCH_ALPHA) || defined (ARCH_SPARC) accel = arch_accel (accel); #endif return accel; diff --git a/src/libmpeg2new/libmpeg2/cpu_state.c b/src/libmpeg2new/libmpeg2/cpu_state.c --- a/src/libmpeg2new/libmpeg2/cpu_state.c +++ b/src/libmpeg2new/libmpeg2/cpu_state.c @@ -26,17 +26,17 @@ #include #include -#include "../include/mpeg2.h" -#include "../include/attributes.h" +#include "mpeg2.h" +#include "attributes.h" #include "mpeg2_internal.h" -#ifdef ARCH_X86 -#include "../include/mmx.h" +#if defined(ARCH_X86) || defined(ARCH_X86_64) +#include "mmx.h" #endif void (* mpeg2_cpu_state_save) (cpu_state_t * state) = NULL; void (* mpeg2_cpu_state_restore) (cpu_state_t * state) = NULL; -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static void state_restore_mmx (cpu_state_t * state) { emms (); @@ -44,18 +44,18 @@ static void state_restore_mmx (cpu_state #endif #ifdef ARCH_PPC -#ifdef HAVE_ALTIVEC_H /* gnu */ +#if defined(__APPLE_CC__) /* apple */ +#define LI(a,b) "li r" #a "," #b "\n\t" +#define STVX0(a,b,c) "stvx v" #a ",0,r" #c "\n\t" +#define STVX(a,b,c) "stvx v" #a ",r" #b ",r" #c "\n\t" +#define LVX0(a,b,c) "lvx v" #a ",0,r" #c "\n\t" +#define LVX(a,b,c) "lvx v" #a ",r" #b ",r" #c "\n\t" +#else /* gnu */ #define LI(a,b) "li " #a "," #b "\n\t" #define STVX0(a,b,c) "stvx " #a ",0," #c "\n\t" #define STVX(a,b,c) "stvx " #a "," #b "," #c "\n\t" #define LVX0(a,b,c) "lvx " #a ",0," #c "\n\t" #define LVX(a,b,c) "lvx " #a "," #b "," #c "\n\t" -#else /* apple */ -#define LI(a,b) "li r" #a "," #b "\n\t" -#define STVX0(a,b,c) "stvx v" #a ",0,r" #c "\n\t" -#define STVX(a,b,c) "stvx v" #a ",r" #b ",r" #c "\n\t" -#define LVX0(a,b,c) "lvx v" #a ",0,r" #c "\n\t" -#define LVX(a,b,c) "lvx v" #a ",r" #b ",r" #c "\n\t" #endif static void state_save_altivec (cpu_state_t * state) @@ -115,7 +115,7 @@ static void state_restore_altivec (cpu_s void mpeg2_cpu_state_init (uint32_t accel) { -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) if (accel & MPEG2_ACCEL_X86_MMX) { mpeg2_cpu_state_restore = state_restore_mmx; } diff --git a/src/libmpeg2new/libmpeg2/decode.c b/src/libmpeg2new/libmpeg2/decode.c --- a/src/libmpeg2new/libmpeg2/decode.c +++ b/src/libmpeg2new/libmpeg2/decode.c @@ -27,8 +27,8 @@ #include #include -#include "../include/mpeg2.h" -#include "../include/attributes.h" +#include "mpeg2.h" +#include "attributes.h" #include "mpeg2_internal.h" static int mpeg2_accels = 0; diff --git a/src/libmpeg2new/libmpeg2/header.c b/src/libmpeg2new/libmpeg2/header.c --- a/src/libmpeg2new/libmpeg2/header.c +++ b/src/libmpeg2new/libmpeg2/header.c @@ -28,8 +28,8 @@ #include /* defines NULL */ #include /* memcmp */ -#include "../include/mpeg2.h" -#include "../include/attributes.h" +#include "mpeg2.h" +#include "attributes.h" #include "mpeg2_internal.h" #define SEQ_EXT 2 @@ -239,7 +239,7 @@ static int sequence_ext (mpeg2dec_t * mp sequence->vbv_buffer_size |= buffer[4] << 21; sequence->frame_period = - sequence->frame_period * ((buffer[5]&31)+1) / (((buffer[5]>>2)&3)+1); + sequence->frame_period * ((buffer[5]&31)+1) / (((buffer[5]>>5)&3)+1); mpeg2dec->ext_state = SEQ_DISPLAY_EXT; @@ -264,9 +264,11 @@ static int sequence_display_ext (mpeg2de if (!(buffer[2] & 2)) /* missing marker_bit */ return 1; - sequence->display_width = (buffer[1] << 6) | (buffer[2] >> 2); - sequence->display_height = - ((buffer[2]& 1 ) << 13) | (buffer[3] << 5) | (buffer[4] >> 3); + if( (buffer[1] << 6) | (buffer[2] >> 2) ) + sequence->display_width = (buffer[1] << 6) | (buffer[2] >> 2); + if( ((buffer[2]& 1 ) << 13) | (buffer[3] << 5) | (buffer[4] >> 3) ) + sequence->display_height = + ((buffer[2]& 1 ) << 13) | (buffer[3] << 5) | (buffer[4] >> 3); return 0; } @@ -406,13 +408,13 @@ int mpeg2_guess_aspect (const mpeg2_sequ return (height == 576) ? 1 : 2; } -static void copy_matrix (mpeg2dec_t * mpeg2dec, int index) +static void copy_matrix (mpeg2dec_t * mpeg2dec, int idx) { - if (memcmp (mpeg2dec->quantizer_matrix[index], - mpeg2dec->new_quantizer_matrix[index], 64)) { - memcpy (mpeg2dec->quantizer_matrix[index], - mpeg2dec->new_quantizer_matrix[index], 64); - mpeg2dec->scaled[index] = -1; + if (memcmp (mpeg2dec->quantizer_matrix[idx], + mpeg2dec->new_quantizer_matrix[idx], 64)) { + memcpy (mpeg2dec->quantizer_matrix[idx], + mpeg2dec->new_quantizer_matrix[idx], 64); + mpeg2dec->scaled[idx] = -1; } } @@ -464,7 +466,7 @@ void mpeg2_header_sequence_finalize (mpe if (mpeg2dec->sequence.width != (unsigned)-1) { /* * According to 6.1.1.6, repeat sequence headers should be - * identical to the original. However some encoders dont + * identical to the original. However some encoders do not * respect that and change various fields (including bitrate * and aspect ratio) in the repeat sequence headers. So we * choose to be as conservative as possible and only restart @@ -583,7 +585,7 @@ int mpeg2_header_picture (mpeg2dec_t * m /* XXXXXX decode extra_information_picture as well */ - mpeg2dec->q_scale_type = 0; + decoder->q_scale_type = 0; decoder->intra_dc_precision = 7; decoder->frame_pred_frame_dct = 1; decoder->concealment_motion_vectors = 0; @@ -620,6 +622,7 @@ static int picture_coding_ext (mpeg2dec_ if (!(mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE)) { picture->nb_fields = (buffer[3] & 2) ? 3 : 2; flags |= (buffer[3] & 128) ? PIC_FLAG_TOP_FIELD_FIRST : 0; + flags |= (buffer[3] & 2) ? PIC_FLAG_REPEAT_FIRST_FIELD : 0; } else picture->nb_fields = (buffer[3]&2) ? ((buffer[3]&128) ? 6 : 4) : 2; break; @@ -629,7 +632,7 @@ static int picture_coding_ext (mpeg2dec_ decoder->top_field_first = buffer[3] >> 7; decoder->frame_pred_frame_dct = (buffer[3] >> 6) & 1; decoder->concealment_motion_vectors = (buffer[3] >> 5) & 1; - mpeg2dec->q_scale_type = buffer[3] & 16; + decoder->q_scale_type = buffer[3] & 16; decoder->intra_vlc_format = (buffer[3] >> 3) & 1; decoder->scan = (buffer[3] & 4) ? mpeg2_scan_alt : mpeg2_scan_norm; if (!(buffer[4] & 0x80)) @@ -854,7 +857,7 @@ int mpeg2_header_user_data (mpeg2dec_t * return 0; } -static void prescale (mpeg2dec_t * mpeg2dec, int index) +static void prescale (mpeg2dec_t * mpeg2dec, int idx) { static int non_linear_scale [] = { 0, 1, 2, 3, 4, 5, 6, 7, @@ -865,13 +868,13 @@ static void prescale (mpeg2dec_t * mpeg2 int i, j, k; mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); - if (mpeg2dec->scaled[index] != mpeg2dec->q_scale_type) { - mpeg2dec->scaled[index] = mpeg2dec->q_scale_type; + if (mpeg2dec->scaled[idx] != decoder->q_scale_type) { + mpeg2dec->scaled[idx] = decoder->q_scale_type; for (i = 0; i < 32; i++) { - k = mpeg2dec->q_scale_type ? non_linear_scale[i] : (i << 1); + k = decoder->q_scale_type ? non_linear_scale[i] : (i << 1); for (j = 0; j < 64; j++) - decoder->quantizer_prescale[index][i][j] = - k * mpeg2dec->quantizer_matrix[index][j]; + decoder->quantizer_prescale[idx][i][j] = + k * mpeg2dec->quantizer_matrix[idx][j]; } } } diff --git a/src/libmpeg2new/libmpeg2/idct.c b/src/libmpeg2new/libmpeg2/idct.c --- a/src/libmpeg2new/libmpeg2/idct.c +++ b/src/libmpeg2new/libmpeg2/idct.c @@ -26,8 +26,8 @@ #include #include -#include "../include/mpeg2.h" -#include "../include/attributes.h" +#include "mpeg2.h" +#include "attributes.h" #include "mpeg2_internal.h" #define W1 2841 /* 2048 * sqrt (2) * cos (1 * pi / 16) */ @@ -66,7 +66,7 @@ do { \ } while (0) #endif -static void inline idct_row (int16_t * const block) +static inline void idct_row (int16_t * const block) { int d0, d1, d2, d3; int a0, a1, a2, a3, b0, b1, b2, b3; @@ -119,7 +119,7 @@ static void inline idct_row (int16_t * c block[7] = (a0 - b0) >> 12; } -static void inline idct_col (int16_t * const block) +static inline void idct_col (int16_t * const block) { int d0, d1, d2, d3; int a0, a1, a2, a3, b0, b1, b2, b3; @@ -236,7 +236,11 @@ void mpeg2_idct_init (uint32_t accel) void mpeg2_idct_init (uint32_t accel) { #ifdef ARCH_X86 - if (accel & MPEG2_ACCEL_X86_MMXEXT) { + if (accel & MPEG2_ACCEL_X86_SSE2) { + mpeg2_idct_copy = mpeg2_idct_copy_sse2; + mpeg2_idct_add = mpeg2_idct_add_sse2; + mpeg2_idct_mmx_init (); + } else if (accel & MPEG2_ACCEL_X86_MMXEXT) { mpeg2_idct_copy = mpeg2_idct_copy_mmxext; mpeg2_idct_add = mpeg2_idct_add_mmxext; mpeg2_idct_mmx_init (); diff --git a/src/libmpeg2new/libmpeg2/idct_alpha.c b/src/libmpeg2new/libmpeg2/idct_alpha.c --- a/src/libmpeg2new/libmpeg2/idct_alpha.c +++ b/src/libmpeg2new/libmpeg2/idct_alpha.c @@ -30,7 +30,7 @@ #include #include "mpeg2.h" -#include +#include "attributes.h" #include "mpeg2_internal.h" #include "alpha_asm.h" @@ -59,7 +59,7 @@ do { \ } while (0) #endif -static void inline idct_row (int16_t * const block) +static inline void idct_row (int16_t * const block) { uint64_t l, r; int_fast32_t d0, d1, d2, d3; @@ -116,7 +116,7 @@ static void inline idct_row (int16_t * c block[7] = (a0 - b0) >> 12; } -static void inline idct_col (int16_t * const block) +static inline void idct_col (int16_t * const block) { int_fast32_t d0, d1, d2, d3; int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3; diff --git a/src/libmpeg2new/libmpeg2/idct_altivec.c b/src/libmpeg2new/libmpeg2/idct_altivec.c --- a/src/libmpeg2new/libmpeg2/idct_altivec.c +++ b/src/libmpeg2new/libmpeg2/idct_altivec.c @@ -31,7 +31,7 @@ #include #include "mpeg2.h" -#include +#include "attributes.h" #include "mpeg2_internal.h" typedef vector signed char vector_s8_t; @@ -41,7 +41,7 @@ typedef vector signed int vector_s32_t; typedef vector signed int vector_s32_t; typedef vector unsigned int vector_u32_t; -#if defined(HAVE_ALTIVEC_H) && (__GNUC__ * 100 + __GNUC_MINOR__ < 303) +#if defined(HAVE_ALTIVEC_H) && !defined(__APPLE_CC__) && (__GNUC__ * 100 + __GNUC_MINOR__ < 303) /* work around gcc <3.3 vec_mergel bug */ static inline vector_s16_t my_vec_mergel (vector_s16_t const A, vector_s16_t const B) @@ -56,10 +56,10 @@ static inline vector_s16_t my_vec_mergel #define vec_mergel my_vec_mergel #endif -#ifdef HAVE_ALTIVEC_H /* gnu */ +#if defined(__APPLE_CC__) /* apple */ +#define VEC_S16(a,b,c,d,e,f,g,h) (vector_s16_t) (a, b, c, d, e, f, g, h) +#else /* gnu */ #define VEC_S16(a,b,c,d,e,f,g,h) {a, b, c, d, e, f, g, h} -#else /* apple */ -#define VEC_S16(a,b,c,d,e,f,g,h) (vector_s16_t) (a, b, c, d, e, f, g, h) #endif static const vector_s16_t constants ATTR_ALIGN(16) = diff --git a/src/libmpeg2new/libmpeg2/idct_mlib.c b/src/libmpeg2new/libmpeg2/idct_mlib.c deleted file mode 100644 --- a/src/libmpeg2new/libmpeg2/idct_mlib.c +++ /dev/null @@ -1,60 +0,0 @@ -/* - * idct_mlib.c - * Copyright (C) 1999-2003 Håkan Hjort - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#ifdef LIBMPEG2_MLIB - -#include -#include -#include -#include -#include -#include - -#include "../include/mpeg2.h" -#include "mpeg2_internal.h" - -void mpeg2_idct_add_mlib (const int last, int16_t * const block, - uint8_t * const dest, const int stride) -{ - mlib_VideoIDCT_IEEE_S16_S16 (block, block); - mlib_VideoAddBlock_U8_S16 (dest, block, stride); - memset (block, 0, 64 * sizeof (uint16_t)); -} - -void mpeg2_idct_copy_mlib_non_ieee (int16_t * const block, - uint8_t * const dest, const int stride) -{ - mlib_VideoIDCT8x8_U8_S16 (dest, block, stride); - memset (block, 0, 64 * sizeof (uint16_t)); -} - -void mpeg2_idct_add_mlib_non_ieee (const int last, int16_t * const block, - uint8_t * const dest, const int stride) -{ - mlib_VideoIDCT8x8_S16_S16 (block, block); - mlib_VideoAddBlock_U8_S16 (dest, block, stride); - memset (block, 0, 64 * sizeof (uint16_t)); -} - -#endif diff --git a/src/libmpeg2new/libmpeg2/idct_mmx.c b/src/libmpeg2new/libmpeg2/idct_mmx.c --- a/src/libmpeg2new/libmpeg2/idct_mmx.c +++ b/src/libmpeg2new/libmpeg2/idct_mmx.c @@ -23,24 +23,25 @@ #include "config.h" -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) #include -#include "../include/mpeg2.h" -#include "../include/attributes.h" +#include "mpeg2.h" +#include "attributes.h" #include "mpeg2_internal.h" -#include "../include/mmx.h" +#include "mmx.h" #define ROW_SHIFT 15 #define COL_SHIFT 6 #define round(bias) ((int)(((bias)+0.5) * (1<> ROW_SHIFT; } #endif + + +/* SSE2 row IDCT */ +#define sse2_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, c4, c6, \ + c4, -c6, c4, -c2, \ + c4, c6, -c4, -c2, \ + -c4, c2, c4, -c6, \ + c1, c3, c3, -c7, \ + c5, -c1, c7, -c5, \ + c5, c7, -c1, -c5, \ + c7, c3, c3, -c1 } + +#define SSE2_IDCT_2ROW(table, row1, row2, round1, round2) do { \ + /* no scheduling: trust in out of order execution */ \ + /* based on Intel AP-945 */ \ + /* (http://cache-www.intel.com/cd/00/00/01/76/17680_w_idct.pdf) */ \ + \ + /* input */ /* 1: row1= x7 x5 x3 x1 x6 x4 x2 x0 */ \ + pshufd_r2r (row1, xmm1, 0); /* 1: xmm1= x2 x0 x2 x0 x2 x0 x2 x0 */ \ + pmaddwd_m2r (table[0], xmm1); /* 1: xmm1= x2*C + x0*C ... */ \ + pshufd_r2r (row1, xmm3, 0xaa); /* 1: xmm3= x3 x1 x3 x1 x3 x1 x3 x1 */ \ + pmaddwd_m2r (table[2*8], xmm3); /* 1: xmm3= x3*C + x1*C ... */ \ + pshufd_r2r (row1, xmm2, 0x55); /* 1: xmm2= x6 x4 x6 x4 x6 x4 x6 x4 */ \ + pshufd_r2r (row1, row1, 0xff); /* 1: row1= x7 x5 x7 x5 x7 x5 x7 x5 */ \ + pmaddwd_m2r (table[1*8], xmm2); /* 1: xmm2= x6*C + x4*C ... */ \ + paddd_m2r (round1, xmm1); /* 1: xmm1= x2*C + x0*C + round ... */ \ + pmaddwd_m2r (table[3*8], row1); /* 1: row1= x7*C + x5*C ... */ \ + pshufd_r2r (row2, xmm5, 0); /* 2: */ \ + pshufd_r2r (row2, xmm6, 0x55); /* 2: */ \ + pmaddwd_m2r (table[0], xmm5); /* 2: */ \ + paddd_r2r (xmm2, xmm1); /* 1: xmm1= a[] */ \ + movdqa_r2r (xmm1, xmm2); /* 1: xmm2= a[] */ \ + pshufd_r2r (row2, xmm7, 0xaa); /* 2: */ \ + pmaddwd_m2r (table[1*8], xmm6); /* 2: */ \ + paddd_r2r (xmm3, row1); /* 1: row1= b[]= 7*C+5*C+3*C+1*C ... */ \ + pshufd_r2r (row2, row2, 0xff); /* 2: */ \ + psubd_r2r (row1, xmm2); /* 1: xmm2= a[] - b[] */ \ + pmaddwd_m2r (table[2*8], xmm7); /* 2: */ \ + paddd_r2r (xmm1, row1); /* 1: row1= a[] + b[] */ \ + psrad_i2r (ROW_SHIFT, xmm2); /* 1: xmm2= result 4...7 */ \ + paddd_m2r (round2, xmm5); /* 2: */ \ + pmaddwd_m2r (table[3*8], row2); /* 2: */ \ + paddd_r2r (xmm6, xmm5); /* 2: */ \ + movdqa_r2r (xmm5, xmm6); /* 2: */ \ + psrad_i2r (ROW_SHIFT, row1); /* 1: row1= result 0...4 */ \ + pshufd_r2r (xmm2, xmm2, 0x1b); /* 1: [0 1 2 3] -> [3 2 1 0] */ \ + packssdw_r2r (xmm2, row1); /* 1: row1= result[] */ \ + paddd_r2r (xmm7, row2); /* 2: */ \ + psubd_r2r (row2, xmm6); /* 2: */ \ + paddd_r2r (xmm5, row2); /* 2: */ \ + psrad_i2r (ROW_SHIFT, xmm6); /* 2: */ \ + psrad_i2r (ROW_SHIFT, row2); /* 2: */ \ + pshufd_r2r (xmm6, xmm6, 0x1b); /* 2: */ \ + packssdw_r2r (xmm6, row2); /* 2: */ \ +} while (0) /* MMXEXT row IDCT */ @@ -325,7 +381,7 @@ static inline void mmx_row_mid (int16_t #if 0 -/* C column IDCT - its just here to document the MMXEXT and MMX versions */ +/* C column IDCT - it is just here to document the MMXEXT and MMX versions */ static inline void idct_col (int16_t * col, int offset) { /* multiplication - as implemented on mmx */ @@ -396,23 +452,273 @@ static inline void idct_col (int16_t * c #endif -/* MMX column IDCT */ -static inline void idct_col (int16_t * const col, const int offset) -{ #define T1 13036 #define T2 27146 #define T3 43790 #define C4 23170 - static const short _T1[] ATTR_ALIGN(8) = {T1,T1,T1,T1}; - static const short _T2[] ATTR_ALIGN(8) = {T2,T2,T2,T2}; - static const short _T3[] ATTR_ALIGN(8) = {T3,T3,T3,T3}; - static const short _C4[] ATTR_ALIGN(8) = {C4,C4,C4,C4}; + +/* SSE2 column IDCT */ +static inline void sse2_idct_col (int16_t * const col) +{ + /* Almost identical to mmxext version: */ + /* just do both 4x8 columns in paraller */ + + static const short t1_vector[] ATTR_ALIGN(16) = {T1,T1,T1,T1,T1,T1,T1,T1}; + static const short t2_vector[] ATTR_ALIGN(16) = {T2,T2,T2,T2,T2,T2,T2,T2}; + static const short t3_vector[] ATTR_ALIGN(16) = {T3,T3,T3,T3,T3,T3,T3,T3}; + static const short c4_vector[] ATTR_ALIGN(16) = {C4,C4,C4,C4,C4,C4,C4,C4}; + +#if defined(__x86_64__) + + /* INPUT: block in xmm8 ... xmm15 */ + + movdqa_m2r (*t1_vector, xmm0); /* xmm0 = T1 */ + movdqa_r2r (xmm9, xmm1); /* xmm1 = x1 */ + + movdqa_r2r (xmm0, xmm2); /* xmm2 = T1 */ + pmulhw_r2r (xmm1, xmm0); /* xmm0 = T1*x1 */ + + movdqa_m2r (*t3_vector, xmm5); /* xmm5 = T3 */ + pmulhw_r2r (xmm15, xmm2); /* xmm2 = T1*x7 */ + + movdqa_r2r (xmm5, xmm7); /* xmm7 = T3-1 */ + psubsw_r2r (xmm15, xmm0); /* xmm0 = v17 */ + + movdqa_m2r (*t2_vector, xmm9); /* xmm9 = T2 */ + pmulhw_r2r (xmm11, xmm5); /* xmm5 = (T3-1)*x3 */ + + paddsw_r2r (xmm2, xmm1); /* xmm1 = u17 */ + pmulhw_r2r (xmm13, xmm7); /* xmm7 = (T3-1)*x5 */ + + movdqa_r2r (xmm9, xmm2); /* xmm2 = T2 */ + paddsw_r2r (xmm11, xmm5); /* xmm5 = T3*x3 */ + + pmulhw_r2r (xmm10, xmm9); /* xmm9 = T2*x2 */ + paddsw_r2r (xmm13, xmm7); /* xmm7 = T3*x5 */ + + psubsw_r2r (xmm13, xmm5); /* xmm5 = v35 */ + paddsw_r2r (xmm11, xmm7); /* xmm7 = u35 */ + + movdqa_r2r (xmm0, xmm6); /* xmm6 = v17 */ + pmulhw_r2r (xmm14, xmm2); /* xmm2 = T2*x6 */ + + psubsw_r2r (xmm5, xmm0); /* xmm0 = b3 */ + psubsw_r2r (xmm14, xmm9); /* xmm9 = v26 */ + + paddsw_r2r (xmm6, xmm5); /* xmm5 = v12 */ + movdqa_r2r (xmm0, xmm11); /* xmm11 = b3 */ + + movdqa_r2r (xmm1, xmm6); /* xmm6 = u17 */ + paddsw_r2r (xmm10, xmm2); /* xmm2 = u26 */ + + paddsw_r2r (xmm7, xmm6); /* xmm6 = b0 */ + psubsw_r2r (xmm7, xmm1); /* xmm1 = u12 */ + + movdqa_r2r (xmm1, xmm7); /* xmm7 = u12 */ + paddsw_r2r (xmm5, xmm1); /* xmm1 = u12+v12 */ + + movdqa_m2r (*c4_vector, xmm0); /* xmm0 = C4/2 */ + psubsw_r2r (xmm5, xmm7); /* xmm7 = u12-v12 */ + + movdqa_r2r (xmm6, xmm4); /* xmm4 = b0 */ + pmulhw_r2r (xmm0, xmm1); /* xmm1 = b1/2 */ + + movdqa_r2r (xmm9, xmm6); /* xmm6 = v26 */ + pmulhw_r2r (xmm0, xmm7); /* xmm7 = b2/2 */ + + movdqa_r2r (xmm8, xmm10); /* xmm10 = x0 */ + movdqa_r2r (xmm8, xmm0); /* xmm0 = x0 */ + + psubsw_r2r (xmm12, xmm10); /* xmm10 = v04 */ + paddsw_r2r (xmm12, xmm0); /* xmm0 = u04 */ + + paddsw_r2r (xmm10, xmm9); /* xmm9 = a1 */ + movdqa_r2r (xmm0, xmm8); /* xmm8 = u04 */ + + psubsw_r2r (xmm6, xmm10); /* xmm10 = a2 */ + paddsw_r2r (xmm2, xmm8); /* xmm5 = a0 */ + + paddsw_r2r (xmm1, xmm1); /* xmm1 = b1 */ + psubsw_r2r (xmm2, xmm0); /* xmm0 = a3 */ + + paddsw_r2r (xmm7, xmm7); /* xmm7 = b2 */ + movdqa_r2r (xmm10, xmm13); /* xmm13 = a2 */ + + movdqa_r2r (xmm9, xmm14); /* xmm14 = a1 */ + paddsw_r2r (xmm7, xmm10); /* xmm10 = a2+b2 */ + + psraw_i2r (COL_SHIFT,xmm10); /* xmm10 = y2 */ + paddsw_r2r (xmm1, xmm9); /* xmm9 = a1+b1 */ + + psraw_i2r (COL_SHIFT, xmm9); /* xmm9 = y1 */ + psubsw_r2r (xmm1, xmm14); /* xmm14 = a1-b1 */ + + psubsw_r2r (xmm7, xmm13); /* xmm13 = a2-b2 */ + psraw_i2r (COL_SHIFT,xmm14); /* xmm14 = y6 */ + + movdqa_r2r (xmm8, xmm15); /* xmm15 = a0 */ + psraw_i2r (COL_SHIFT,xmm13); /* xmm13 = y5 */ + + paddsw_r2r (xmm4, xmm8); /* xmm8 = a0+b0 */ + psubsw_r2r (xmm4, xmm15); /* xmm15 = a0-b0 */ + + psraw_i2r (COL_SHIFT, xmm8); /* xmm8 = y0 */ + movdqa_r2r (xmm0, xmm12); /* xmm12 = a3 */ + + psubsw_r2r (xmm11, xmm12); /* xmm12 = a3-b3 */ + psraw_i2r (COL_SHIFT,xmm15); /* xmm15 = y7 */ + + paddsw_r2r (xmm0, xmm11); /* xmm11 = a3+b3 */ + psraw_i2r (COL_SHIFT,xmm12); /* xmm12 = y4 */ + + psraw_i2r (COL_SHIFT,xmm11); /* xmm11 = y3 */ + + /* OUTPUT: block in xmm8 ... xmm15 */ + +#else + movdqa_m2r (*t1_vector, xmm0); /* xmm0 = T1 */ + + movdqa_m2r (*(col+1*8), xmm1); /* xmm1 = x1 */ + movdqa_r2r (xmm0, xmm2); /* xmm2 = T1 */ + + movdqa_m2r (*(col+7*8), xmm4); /* xmm4 = x7 */ + pmulhw_r2r (xmm1, xmm0); /* xmm0 = T1*x1 */ + + movdqa_m2r (*t3_vector, xmm5); /* xmm5 = T3 */ + pmulhw_r2r (xmm4, xmm2); /* xmm2 = T1*x7 */ + + movdqa_m2r (*(col+5*8), xmm6); /* xmm6 = x5 */ + movdqa_r2r (xmm5, xmm7); /* xmm7 = T3-1 */ + + movdqa_m2r (*(col+3*8), xmm3); /* xmm3 = x3 */ + psubsw_r2r (xmm4, xmm0); /* xmm0 = v17 */ + + movdqa_m2r (*t2_vector, xmm4); /* xmm4 = T2 */ + pmulhw_r2r (xmm3, xmm5); /* xmm5 = (T3-1)*x3 */ + + paddsw_r2r (xmm2, xmm1); /* xmm1 = u17 */ + pmulhw_r2r (xmm6, xmm7); /* xmm7 = (T3-1)*x5 */ + + /* slot */ + + movdqa_r2r (xmm4, xmm2); /* xmm2 = T2 */ + paddsw_r2r (xmm3, xmm5); /* xmm5 = T3*x3 */ + + pmulhw_m2r (*(col+2*8), xmm4); /* xmm4 = T2*x2 */ + paddsw_r2r (xmm6, xmm7); /* xmm7 = T3*x5 */ + + psubsw_r2r (xmm6, xmm5); /* xmm5 = v35 */ + paddsw_r2r (xmm3, xmm7); /* xmm7 = u35 */ + + movdqa_m2r (*(col+6*8), xmm3); /* xmm3 = x6 */ + movdqa_r2r (xmm0, xmm6); /* xmm6 = v17 */ + + pmulhw_r2r (xmm3, xmm2); /* xmm2 = T2*x6 */ + psubsw_r2r (xmm5, xmm0); /* xmm0 = b3 */ + + psubsw_r2r (xmm3, xmm4); /* xmm4 = v26 */ + paddsw_r2r (xmm6, xmm5); /* xmm5 = v12 */ + + movdqa_r2m (xmm0, *(col+3*8)); /* save b3 in scratch0 */ + movdqa_r2r (xmm1, xmm6); /* xmm6 = u17 */ + + paddsw_m2r (*(col+2*8), xmm2); /* xmm2 = u26 */ + paddsw_r2r (xmm7, xmm6); /* xmm6 = b0 */ + + psubsw_r2r (xmm7, xmm1); /* xmm1 = u12 */ + movdqa_r2r (xmm1, xmm7); /* xmm7 = u12 */ + + movdqa_m2r (*(col+0*8), xmm3); /* xmm3 = x0 */ + paddsw_r2r (xmm5, xmm1); /* xmm1 = u12+v12 */ + + movdqa_m2r (*c4_vector, xmm0); /* xmm0 = C4/2 */ + psubsw_r2r (xmm5, xmm7); /* xmm7 = u12-v12 */ + + movdqa_r2m (xmm6, *(col+5*8)); /* save b0 in scratch1 */ + pmulhw_r2r (xmm0, xmm1); /* xmm1 = b1/2 */ + + movdqa_r2r (xmm4, xmm6); /* xmm6 = v26 */ + pmulhw_r2r (xmm0, xmm7); /* xmm7 = b2/2 */ + + movdqa_m2r (*(col+4*8), xmm5); /* xmm5 = x4 */ + movdqa_r2r (xmm3, xmm0); /* xmm0 = x0 */ + + psubsw_r2r (xmm5, xmm3); /* xmm3 = v04 */ + paddsw_r2r (xmm5, xmm0); /* xmm0 = u04 */ + + paddsw_r2r (xmm3, xmm4); /* xmm4 = a1 */ + movdqa_r2r (xmm0, xmm5); /* xmm5 = u04 */ + + psubsw_r2r (xmm6, xmm3); /* xmm3 = a2 */ + paddsw_r2r (xmm2, xmm5); /* xmm5 = a0 */ + + paddsw_r2r (xmm1, xmm1); /* xmm1 = b1 */ + psubsw_r2r (xmm2, xmm0); /* xmm0 = a3 */ + + paddsw_r2r (xmm7, xmm7); /* xmm7 = b2 */ + movdqa_r2r (xmm3, xmm2); /* xmm2 = a2 */ + + movdqa_r2r (xmm4, xmm6); /* xmm6 = a1 */ + paddsw_r2r (xmm7, xmm3); /* xmm3 = a2+b2 */ + + psraw_i2r (COL_SHIFT, xmm3); /* xmm3 = y2 */ + paddsw_r2r (xmm1, xmm4); /* xmm4 = a1+b1 */ + + psraw_i2r (COL_SHIFT, xmm4); /* xmm4 = y1 */ + psubsw_r2r (xmm1, xmm6); /* xmm6 = a1-b1 */ + + movdqa_m2r (*(col+5*8), xmm1); /* xmm1 = b0 */ + psubsw_r2r (xmm7, xmm2); /* xmm2 = a2-b2 */ + + psraw_i2r (COL_SHIFT, xmm6); /* xmm6 = y6 */ + movdqa_r2r (xmm5, xmm7); /* xmm7 = a0 */ + + movdqa_r2m (xmm4, *(col+1*8)); /* save y1 */ + psraw_i2r (COL_SHIFT, xmm2); /* xmm2 = y5 */ + + movdqa_r2m (xmm3, *(col+2*8)); /* save y2 */ + paddsw_r2r (xmm1, xmm5); /* xmm5 = a0+b0 */ + + movdqa_m2r (*(col+3*8), xmm4); /* xmm4 = b3 */ + psubsw_r2r (xmm1, xmm7); /* xmm7 = a0-b0 */ + + psraw_i2r (COL_SHIFT, xmm5); /* xmm5 = y0 */ + movdqa_r2r (xmm0, xmm3); /* xmm3 = a3 */ + + movdqa_r2m (xmm2, *(col+5*8)); /* save y5 */ + psubsw_r2r (xmm4, xmm3); /* xmm3 = a3-b3 */ + + psraw_i2r (COL_SHIFT, xmm7); /* xmm7 = y7 */ + paddsw_r2r (xmm0, xmm4); /* xmm4 = a3+b3 */ + + movdqa_r2m (xmm5, *(col+0*8)); /* save y0 */ + psraw_i2r (COL_SHIFT, xmm3); /* xmm3 = y4 */ + + movdqa_r2m (xmm6, *(col+6*8)); /* save y6 */ + psraw_i2r (COL_SHIFT, xmm4); /* xmm4 = y3 */ + + movdqa_r2m (xmm7, *(col+7*8)); /* save y7 */ + + movdqa_r2m (xmm3, *(col+4*8)); /* save y4 */ + + movdqa_r2m (xmm4, *(col+3*8)); /* save y3 */ +#endif +} + + +/* MMX column IDCT */ +static inline void idct_col (int16_t * const col, const int offset) +{ + static const short t1_vector[] ATTR_ALIGN(8) = {T1,T1,T1,T1}; + static const short t2_vector[] ATTR_ALIGN(8) = {T2,T2,T2,T2}; + static const short t3_vector[] ATTR_ALIGN(8) = {T3,T3,T3,T3}; + static const short c4_vector[] ATTR_ALIGN(8) = {C4,C4,C4,C4}; /* column code adapted from peter gubanov */ /* http://www.elecard.com/peter/idct.shtml */ - movq_m2r (*_T1, mm0); /* mm0 = T1 */ + movq_m2r (*t1_vector, mm0); /* mm0 = T1 */ movq_m2r (*(col+offset+1*8), mm1); /* mm1 = x1 */ movq_r2r (mm0, mm2); /* mm2 = T1 */ @@ -420,7 +726,7 @@ static inline void idct_col (int16_t * c movq_m2r (*(col+offset+7*8), mm4); /* mm4 = x7 */ pmulhw_r2r (mm1, mm0); /* mm0 = T1*x1 */ - movq_m2r (*_T3, mm5); /* mm5 = T3 */ + movq_m2r (*t3_vector, mm5); /* mm5 = T3 */ pmulhw_r2r (mm4, mm2); /* mm2 = T1*x7 */ movq_m2r (*(col+offset+5*8), mm6); /* mm6 = x5 */ @@ -429,7 +735,7 @@ static inline void idct_col (int16_t * c movq_m2r (*(col+offset+3*8), mm3); /* mm3 = x3 */ psubsw_r2r (mm4, mm0); /* mm0 = v17 */ - movq_m2r (*_T2, mm4); /* mm4 = T2 */ + movq_m2r (*t2_vector, mm4); /* mm4 = T2 */ pmulhw_r2r (mm3, mm5); /* mm5 = (T3-1)*x3 */ paddsw_r2r (mm2, mm1); /* mm1 = u17 */ @@ -467,7 +773,7 @@ static inline void idct_col (int16_t * c movq_m2r (*(col+offset+0*8), mm3); /* mm3 = x0 */ paddsw_r2r (mm5, mm1); /* mm1 = u12+v12 */ - movq_m2r (*_C4, mm0); /* mm0 = C4/2 */ + movq_m2r (*c4_vector, mm0); /* mm0 = C4/2 */ psubsw_r2r (mm5, mm7); /* mm7 = u12-v12 */ movq_r2m (mm6, *(col+offset+5*8)); /* save b0 in scratch1 */ @@ -592,6 +898,129 @@ static inline void idct (int16_t * const idct_col (block, 4); \ } +static inline void sse2_idct (int16_t * const block) +{ + static const int16_t table04[] ATTR_ALIGN(16) = + sse2_table (22725, 21407, 19266, 16384, 12873, 8867, 4520); + static const int16_t table17[] ATTR_ALIGN(16) = + sse2_table (31521, 29692, 26722, 22725, 17855, 12299, 6270); + static const int16_t table26[] ATTR_ALIGN(16) = + sse2_table (29692, 27969, 25172, 21407, 16819, 11585, 5906); + static const int16_t table35[] ATTR_ALIGN(16) = + sse2_table (26722, 25172, 22654, 19266, 15137, 10426, 5315); + + static const int32_t rounder0_128[] ATTR_ALIGN(16) = + rounder_sse2 ((1 << (COL_SHIFT - 1)) - 0.5); + static const int32_t rounder4_128[] ATTR_ALIGN(16) = rounder_sse2 (0); + static const int32_t rounder1_128[] ATTR_ALIGN(16) = + rounder_sse2 (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */ + static const int32_t rounder7_128[] ATTR_ALIGN(16) = + rounder_sse2 (-0.25); /* C1*(C7/C4+C7-C1)/2 */ + static const int32_t rounder2_128[] ATTR_ALIGN(16) = + rounder_sse2 (0.60355339059); /* C2 * (C6+C2)/2 */ + static const int32_t rounder6_128[] ATTR_ALIGN(16) = + rounder_sse2 (-0.25); /* C2 * (C6-C2)/2 */ + static const int32_t rounder3_128[] ATTR_ALIGN(16) = + rounder_sse2 (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */ + static const int32_t rounder5_128[] ATTR_ALIGN(16) = + rounder_sse2 (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */ + +#if defined(__x86_64__) + movdqa_m2r (block[0*8], xmm8); + movdqa_m2r (block[4*8], xmm12); + SSE2_IDCT_2ROW (table04, xmm8, xmm12, *rounder0_128, *rounder4_128); + + movdqa_m2r (block[1*8], xmm9); + movdqa_m2r (block[7*8], xmm15); + SSE2_IDCT_2ROW (table17, xmm9, xmm15, *rounder1_128, *rounder7_128); + + movdqa_m2r (block[2*8], xmm10); + movdqa_m2r (block[6*8], xmm14); + SSE2_IDCT_2ROW (table26, xmm10, xmm14, *rounder2_128, *rounder6_128); + + movdqa_m2r (block[3*8], xmm11); + movdqa_m2r (block[5*8], xmm13); + SSE2_IDCT_2ROW (table35, xmm11, xmm13, *rounder3_128, *rounder5_128); + + /* OUTPUT: block in xmm8 ... xmm15 */ + +#else + movdqa_m2r (block[0*8], xmm0); + movdqa_m2r (block[4*8], xmm4); + SSE2_IDCT_2ROW (table04, xmm0, xmm4, *rounder0_128, *rounder4_128); + movdqa_r2m (xmm0, block[0*8]); + movdqa_r2m (xmm4, block[4*8]); + + movdqa_m2r (block[1*8], xmm0); + movdqa_m2r (block[7*8], xmm4); + SSE2_IDCT_2ROW (table17, xmm0, xmm4, *rounder1_128, *rounder7_128); + movdqa_r2m (xmm0, block[1*8]); + movdqa_r2m (xmm4, block[7*8]); + + movdqa_m2r (block[2*8], xmm0); + movdqa_m2r (block[6*8], xmm4); + SSE2_IDCT_2ROW (table26, xmm0, xmm4, *rounder2_128, *rounder6_128); + movdqa_r2m (xmm0, block[2*8]); + movdqa_r2m (xmm4, block[6*8]); + + movdqa_m2r (block[3*8], xmm0); + movdqa_m2r (block[5*8], xmm4); + SSE2_IDCT_2ROW (table35, xmm0, xmm4, *rounder3_128, *rounder5_128); + movdqa_r2m (xmm0, block[3*8]); + movdqa_r2m (xmm4, block[5*8]); +#endif + + sse2_idct_col (block); +} + +static void sse2_block_copy (int16_t * const block, uint8_t * dest, + const int stride) +{ +#if defined(__x86_64__) + /* INPUT: block in xmm8 ... xmm15 */ + packuswb_r2r (xmm8, xmm8); + packuswb_r2r (xmm9, xmm9); + movq_r2m (xmm8, *(dest+0*stride)); + packuswb_r2r (xmm10, xmm10); + movq_r2m (xmm9, *(dest+1*stride)); + packuswb_r2r (xmm11, xmm11); + movq_r2m (xmm10, *(dest+2*stride)); + packuswb_r2r (xmm12, xmm12); + movq_r2m (xmm11, *(dest+3*stride)); + packuswb_r2r (xmm13, xmm13); + movq_r2m (xmm12, *(dest+4*stride)); + packuswb_r2r (xmm14, xmm14); + movq_r2m (xmm13, *(dest+5*stride)); + packuswb_r2r (xmm15, xmm15); + movq_r2m (xmm14, *(dest+6*stride)); + movq_r2m (xmm15, *(dest+7*stride)); +#else + movdqa_m2r (*(block+0*8), xmm0); + movdqa_m2r (*(block+1*8), xmm1); + movdqa_m2r (*(block+2*8), xmm2); + packuswb_r2r (xmm0, xmm0); + movdqa_m2r (*(block+3*8), xmm3); + packuswb_r2r (xmm1, xmm1); + movdqa_m2r (*(block+4*8), xmm4); + packuswb_r2r (xmm2, xmm2); + movdqa_m2r (*(block+5*8), xmm5); + packuswb_r2r (xmm3, xmm3); + movdqa_m2r (*(block+6*8), xmm6); + packuswb_r2r (xmm4, xmm4); + movdqa_m2r (*(block+7*8), xmm7); + movq_r2m (xmm0, *(dest+0*stride)); + packuswb_r2r (xmm5, xmm5); + movq_r2m (xmm1, *(dest+1*stride)); + packuswb_r2r (xmm6, xmm6); + movq_r2m (xmm2, *(dest+2*stride)); + packuswb_r2r (xmm7, xmm7); + movq_r2m (xmm3, *(dest+3*stride)); + movq_r2m (xmm4, *(dest+4*stride)); + movq_r2m (xmm5, *(dest+5*stride)); + movq_r2m (xmm6, *(dest+6*stride)); + movq_r2m (xmm7, *(dest+7*stride)); +#endif +} #define COPY_MMX(offset,r0,r1,r2) \ do { \ @@ -621,6 +1050,38 @@ static inline void block_copy (int16_t * movq_r2m (mm2, *(dest+stride)); } +#define ADD_SSE2_2ROW(op, block0, block1)\ +do { \ + movq_m2r (*(dest), xmm1); \ + movq_m2r (*(dest+stride), xmm2); \ + punpcklbw_r2r (xmm0, xmm1); \ + punpcklbw_r2r (xmm0, xmm2); \ + paddsw_##op (block0, xmm1); \ + paddsw_##op (block1, xmm2); \ + packuswb_r2r (xmm1, xmm1); \ + packuswb_r2r (xmm2, xmm2); \ + movq_r2m (xmm1, *(dest)); \ + movq_r2m (xmm2, *(dest+stride)); \ + dest += 2*stride; \ +} while (0) + +static void sse2_block_add (int16_t * const block, uint8_t * dest, + const int stride) +{ + pxor_r2r(xmm0, xmm0); +#if defined(__x86_64__) + /* INPUT: block in xmm8 ... xmm15 */ + ADD_SSE2_2ROW(r2r, xmm8, xmm9); + ADD_SSE2_2ROW(r2r, xmm10, xmm11); + ADD_SSE2_2ROW(r2r, xmm12, xmm13); + ADD_SSE2_2ROW(r2r, xmm14, xmm15); +#else + ADD_SSE2_2ROW(m2r, *(block+0*8), *(block+1*8)); + ADD_SSE2_2ROW(m2r, *(block+2*8), *(block+3*8)); + ADD_SSE2_2ROW(m2r, *(block+4*8), *(block+5*8)); + ADD_SSE2_2ROW(m2r, *(block+6*8), *(block+7*8)); +#endif +} #define ADD_MMX(offset,r1,r2,r3,r4) \ do { \ @@ -663,6 +1124,19 @@ static inline void block_add (int16_t * movq_r2m (mm3, *(dest+stride)); } + +static inline void sse2_block_zero (int16_t * const block) +{ + pxor_r2r (xmm0, xmm0); + movdqa_r2m (xmm0, *(block+0*8)); + movdqa_r2m (xmm0, *(block+1*8)); + movdqa_r2m (xmm0, *(block+2*8)); + movdqa_r2m (xmm0, *(block+3*8)); + movdqa_r2m (xmm0, *(block+4*8)); + movdqa_r2m (xmm0, *(block+5*8)); + movdqa_r2m (xmm0, *(block+6*8)); + movdqa_r2m (xmm0, *(block+7*8)); +} static inline void block_zero (int16_t * const block) { @@ -748,6 +1222,25 @@ static inline void block_add_DC (int16_t movq_r2m (mm3, *(dest + 2*stride)); } +void mpeg2_idct_copy_sse2 (int16_t * const block, uint8_t * const dest, + const int stride) +{ + sse2_idct (block); + sse2_block_copy (block, dest, stride); + sse2_block_zero (block); +} + +void mpeg2_idct_add_sse2 (const int last, int16_t * const block, + uint8_t * const dest, const int stride) +{ + if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) { + sse2_idct (block); + sse2_block_add (block, dest, stride); + sse2_block_zero (block); + } else + block_add_DC (block, dest, stride, CPU_MMXEXT); +} + declare_idct (mmxext_idct, mmxext_table, mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid) diff --git a/src/libmpeg2new/libmpeg2/motion_comp.c b/src/libmpeg2new/libmpeg2/motion_comp.c --- a/src/libmpeg2new/libmpeg2/motion_comp.c +++ b/src/libmpeg2new/libmpeg2/motion_comp.c @@ -25,8 +25,8 @@ #include -#include "../include/mpeg2.h" -#include "../include/attributes.h" +#include "mpeg2.h" +#include "attributes.h" #include "mpeg2_internal.h" mpeg2_mc_t mpeg2_mc; @@ -56,6 +56,11 @@ void mpeg2_mc_init (uint32_t accel) if (accel & MPEG2_ACCEL_SPARC_VIS) mpeg2_mc = mpeg2_mc_vis; else +#endif +#ifdef ARCH_ARM + if (accel & MPEG2_ACCEL_ARM) { + mpeg2_mc = mpeg2_mc_arm; + } else #endif mpeg2_mc = mpeg2_mc_c; } diff --git a/src/libmpeg2new/libmpeg2/motion_comp_alpha.c b/src/libmpeg2new/libmpeg2/motion_comp_alpha.c --- a/src/libmpeg2new/libmpeg2/motion_comp_alpha.c +++ b/src/libmpeg2new/libmpeg2/motion_comp_alpha.c @@ -27,7 +27,7 @@ #include #include "mpeg2.h" -#include +#include "attributes.h" #include "mpeg2_internal.h" #include "alpha_asm.h" diff --git a/src/libmpeg2new/libmpeg2/motion_comp_altivec.c b/src/libmpeg2new/libmpeg2/motion_comp_altivec.c --- a/src/libmpeg2new/libmpeg2/motion_comp_altivec.c +++ b/src/libmpeg2new/libmpeg2/motion_comp_altivec.c @@ -31,7 +31,7 @@ #include #include "mpeg2.h" -#include +#include "attributes.h" #include "mpeg2_internal.h" typedef vector signed char vector_s8_t; diff --git a/src/libmpeg2new/libmpeg2/motion_comp_arm.c b/src/libmpeg2new/libmpeg2/motion_comp_arm.c new file mode 100644 --- /dev/null +++ b/src/libmpeg2new/libmpeg2/motion_comp_arm.c @@ -0,0 +1,185 @@ +/* + * motion_comp_arm.c + * Copyright (C) 2004 AGAWA Koji + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with mpeg2dec; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef ARCH_ARM + +#include + +#include "mpeg2.h" +#include "attributes.h" +#include "mpeg2_internal.h" + +#define avg2(a,b) ((a+b+1)>>1) +#define avg4(a,b,c,d) ((a+b+c+d+2)>>2) + +#define predict_o(i) (ref[i]) +#define predict_x(i) (avg2 (ref[i], ref[i+1])) +#define predict_y(i) (avg2 (ref[i], (ref+stride)[i])) +#define predict_xy(i) (avg4 (ref[i], ref[i+1], \ + (ref+stride)[i], (ref+stride)[i+1])) + +#define put(predictor,i) dest[i] = predictor (i) +#define avg(predictor,i) dest[i] = avg2 (predictor (i), dest[i]) + +/* mc function template */ + +#define MC_FUNC(op,xy) \ +static void inline MC_##op##_##xy##_16_c (uint8_t * dest, const uint8_t * ref, \ + const int stride, int height) \ +{ \ + do { \ + op (predict_##xy, 0); \ + op (predict_##xy, 1); \ + op (predict_##xy, 2); \ + op (predict_##xy, 3); \ + op (predict_##xy, 4); \ + op (predict_##xy, 5); \ + op (predict_##xy, 6); \ + op (predict_##xy, 7); \ + op (predict_##xy, 8); \ + op (predict_##xy, 9); \ + op (predict_##xy, 10); \ + op (predict_##xy, 11); \ + op (predict_##xy, 12); \ + op (predict_##xy, 13); \ + op (predict_##xy, 14); \ + op (predict_##xy, 15); \ + ref += stride; \ + dest += stride; \ + } while (--height); \ +} \ +static void MC_##op##_##xy##_8_c (uint8_t * dest, const uint8_t * ref, \ + const int stride, int height) \ +{ \ + do { \ + op (predict_##xy, 0); \ + op (predict_##xy, 1); \ + op (predict_##xy, 2); \ + op (predict_##xy, 3); \ + op (predict_##xy, 4); \ + op (predict_##xy, 5); \ + op (predict_##xy, 6); \ + op (predict_##xy, 7); \ + ref += stride; \ + dest += stride; \ + } while (--height); \ +} \ +/* definitions of the actual mc functions */ + +MC_FUNC (avg,o) +MC_FUNC (avg,x) +MC_FUNC (put,y) +MC_FUNC (avg,y) +MC_FUNC (put,xy) +MC_FUNC (avg,xy) + + +extern void MC_put_o_16_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height); + +extern void MC_put_x_16_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height); + + +static void MC_put_y_16_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_y_16_c(dest, ref, stride, height); +} + +static void MC_put_xy_16_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_xy_16_c(dest, ref, stride, height); +} + +extern void MC_put_o_8_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height); + +extern void MC_put_x_8_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height); + +static void MC_put_y_8_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_y_8_c(dest, ref, stride, height); +} + +static void MC_put_xy_8_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_xy_8_c(dest, ref, stride, height); +} + +static void MC_avg_o_16_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_o_16_c(dest, ref, stride, height); +} + +static void MC_avg_x_16_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_x_16_c(dest, ref, stride, height); +} + +static void MC_avg_y_16_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_y_16_c(dest, ref, stride, height); +} + +static void MC_avg_xy_16_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_xy_16_c(dest, ref, stride, height); +} + +static void MC_avg_o_8_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_o_8_c(dest, ref, stride, height); +} + +static void MC_avg_x_8_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_x_8_c(dest, ref, stride, height); +} + +static void MC_avg_y_8_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_y_8_c(dest, ref, stride, height); +} + +static void MC_avg_xy_8_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_xy_8_c(dest, ref, stride, height); +} + +MPEG2_MC_EXTERN (arm) + +#endif diff --git a/src/libmpeg2new/libmpeg2/motion_comp_arm_s.S b/src/libmpeg2new/libmpeg2/motion_comp_arm_s.S new file mode 100644 --- /dev/null +++ b/src/libmpeg2new/libmpeg2/motion_comp_arm_s.S @@ -0,0 +1,323 @@ +@ motion_comp_arm_s.S +@ Copyright (C) 2004 AGAWA Koji +@ +@ This file is part of mpeg2dec, a free MPEG-2 video stream decoder. +@ See http://libmpeg2.sourceforge.net/ for updates. +@ +@ mpeg2dec is free software; you can redistribute it and/or modify +@ it under the terms of the GNU General Public License as published by +@ the Free Software Foundation; either version 2 of the License, or +@ (at your option) any later version. +@ +@ mpeg2dec is distributed in the hope that it will be useful, +@ but WITHOUT ANY WARRANTY; without even the implied warranty of +@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +@ GNU General Public License for more details. +@ +@ You should have received a copy of the GNU General Public License +@ along with mpeg2dec; if not, write to the Free Software +@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + + .text + +@ ---------------------------------------------------------------- + .align + .global MC_put_o_16_arm +MC_put_o_16_arm: + @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) + pld [r1] + stmfd sp!, {r4-r11, lr} @ R14 is also called LR + and r4, r1, #3 + adr r5, MC_put_o_16_arm_align_jt + add r5, r5, r4, lsl #2 + ldr pc, [r5] + +MC_put_o_16_arm_align0: + ldmia r1, {r4-r7} + add r1, r1, r2 + pld [r1] + stmia r0, {r4-r7} + subs r3, r3, #1 + add r0, r0, r2 + bne MC_put_o_16_arm_align0 + ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. + +.macro PROC shift + ldmia r1, {r4-r8} + add r1, r1, r2 + mov r9, r4, lsr #(\shift) + pld [r1] + mov r10, r5, lsr #(\shift) + orr r9, r9, r5, lsl #(32-\shift) + mov r11, r6, lsr #(\shift) + orr r10, r10, r6, lsl #(32-\shift) + mov r12, r7, lsr #(\shift) + orr r11, r11, r7, lsl #(32-\shift) + orr r12, r12, r8, lsl #(32-\shift) + stmia r0, {r9-r12} + subs r3, r3, #1 + add r0, r0, r2 +.endm + +MC_put_o_16_arm_align1: + and r1, r1, #0xFFFFFFFC +1: PROC(8) + bne 1b + ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. +MC_put_o_16_arm_align2: + and r1, r1, #0xFFFFFFFC +1: PROC(16) + bne 1b + ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. +MC_put_o_16_arm_align3: + and r1, r1, #0xFFFFFFFC +1: PROC(24) + bne 1b + ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. +MC_put_o_16_arm_align_jt: + .word MC_put_o_16_arm_align0 + .word MC_put_o_16_arm_align1 + .word MC_put_o_16_arm_align2 + .word MC_put_o_16_arm_align3 + +@ ---------------------------------------------------------------- + .align + .global MC_put_o_8_arm +MC_put_o_8_arm: + @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) + pld [r1] + stmfd sp!, {r4-r10, lr} @ R14 is also called LR + and r4, r1, #3 + adr r5, MC_put_o_8_arm_align_jt + add r5, r5, r4, lsl #2 + ldr pc, [r5] +MC_put_o_8_arm_align0: + ldmia r1, {r4-r5} + add r1, r1, r2 + pld [r1] + stmia r0, {r4-r5} + add r0, r0, r2 + subs r3, r3, #1 + bne MC_put_o_8_arm_align0 + ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. + +.macro PROC8 shift + ldmia r1, {r4-r6} + add r1, r1, r2 + mov r9, r4, lsr #(\shift) + pld [r1] + mov r10, r5, lsr #(\shift) + orr r9, r9, r5, lsl #(32-\shift) + orr r10, r10, r6, lsl #(32-\shift) + stmia r0, {r9-r10} + subs r3, r3, #1 + add r0, r0, r2 +.endm + +MC_put_o_8_arm_align1: + and r1, r1, #0xFFFFFFFC +1: PROC8(8) + bne 1b + ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. + +MC_put_o_8_arm_align2: + and r1, r1, #0xFFFFFFFC +1: PROC8(16) + bne 1b + ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. + +MC_put_o_8_arm_align3: + and r1, r1, #0xFFFFFFFC +1: PROC8(24) + bne 1b + ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. + +MC_put_o_8_arm_align_jt: + .word MC_put_o_8_arm_align0 + .word MC_put_o_8_arm_align1 + .word MC_put_o_8_arm_align2 + .word MC_put_o_8_arm_align3 + +@ ---------------------------------------------------------------- +.macro AVG_PW rW1, rW2 + mov \rW2, \rW2, lsl #24 + orr \rW2, \rW2, \rW1, lsr #8 + eor r9, \rW1, \rW2 + and \rW2, \rW1, \rW2 + and r10, r9, r12 + add \rW2, \rW2, r10, lsr #1 + and r10, r9, r11 + add \rW2, \rW2, r10 +.endm + + .align + .global MC_put_x_16_arm +MC_put_x_16_arm: + @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) + pld [r1] + stmfd sp!, {r4-r11,lr} @ R14 is also called LR + and r4, r1, #3 + adr r5, MC_put_x_16_arm_align_jt + ldr r11, [r5] + mvn r12, r11 + add r5, r5, r4, lsl #2 + ldr pc, [r5, #4] + +.macro ADJ_ALIGN_QW shift, R0, R1, R2, R3, R4 + mov \R0, \R0, lsr #(\shift) + orr \R0, \R0, \R1, lsl #(32 - \shift) + mov \R1, \R1, lsr #(\shift) + orr \R1, \R1, \R2, lsl #(32 - \shift) + mov \R2, \R2, lsr #(\shift) + orr \R2, \R2, \R3, lsl #(32 - \shift) + mov \R3, \R3, lsr #(\shift) + orr \R3, \R3, \R4, lsl #(32 - \shift) + mov \R4, \R4, lsr #(\shift) +@ and \R4, \R4, #0xFF +.endm + +MC_put_x_16_arm_align0: + ldmia r1, {r4-r8} + add r1, r1, r2 + pld [r1] + AVG_PW r7, r8 + AVG_PW r6, r7 + AVG_PW r5, r6 + AVG_PW r4, r5 + stmia r0, {r5-r8} + subs r3, r3, #1 + add r0, r0, r2 + bne MC_put_x_16_arm_align0 + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. +MC_put_x_16_arm_align1: + and r1, r1, #0xFFFFFFFC +1: ldmia r1, {r4-r8} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_QW 8, r4, r5, r6, r7, r8 + AVG_PW r7, r8 + AVG_PW r6, r7 + AVG_PW r5, r6 + AVG_PW r4, r5 + stmia r0, {r5-r8} + subs r3, r3, #1 + add r0, r0, r2 + bne 1b + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. +MC_put_x_16_arm_align2: + and r1, r1, #0xFFFFFFFC +1: ldmia r1, {r4-r8} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_QW 16, r4, r5, r6, r7, r8 + AVG_PW r7, r8 + AVG_PW r6, r7 + AVG_PW r5, r6 + AVG_PW r4, r5 + stmia r0, {r5-r8} + subs r3, r3, #1 + add r0, r0, r2 + bne 1b + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. +MC_put_x_16_arm_align3: + and r1, r1, #0xFFFFFFFC +1: ldmia r1, {r4-r8} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_QW 24, r4, r5, r6, r7, r8 + AVG_PW r7, r8 + AVG_PW r6, r7 + AVG_PW r5, r6 + AVG_PW r4, r5 + stmia r0, {r5-r8} + subs r3, r3, #1 + add r0, r0, r2 + bne 1b + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. +MC_put_x_16_arm_align_jt: + .word 0x01010101 + .word MC_put_x_16_arm_align0 + .word MC_put_x_16_arm_align1 + .word MC_put_x_16_arm_align2 + .word MC_put_x_16_arm_align3 + +@ ---------------------------------------------------------------- + .align + .global MC_put_x_8_arm +MC_put_x_8_arm: + @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) + pld [r1] + stmfd sp!, {r4-r11,lr} @ R14 is also called LR + and r4, r1, #3 + adr r5, MC_put_x_8_arm_align_jt + ldr r11, [r5] + mvn r12, r11 + add r5, r5, r4, lsl #2 + ldr pc, [r5, #4] + +.macro ADJ_ALIGN_DW shift, R0, R1, R2 + mov \R0, \R0, lsr #(\shift) + orr \R0, \R0, \R1, lsl #(32 - \shift) + mov \R1, \R1, lsr #(\shift) + orr \R1, \R1, \R2, lsl #(32 - \shift) + mov \R2, \R2, lsr #(\shift) +@ and \R4, \R4, #0xFF +.endm + +MC_put_x_8_arm_align0: + ldmia r1, {r4-r6} + add r1, r1, r2 + pld [r1] + AVG_PW r5, r6 + AVG_PW r4, r5 + stmia r0, {r5-r6} + subs r3, r3, #1 + add r0, r0, r2 + bne MC_put_x_8_arm_align0 + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. +MC_put_x_8_arm_align1: + and r1, r1, #0xFFFFFFFC +1: ldmia r1, {r4-r6} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DW 8, r4, r5, r6 + AVG_PW r5, r6 + AVG_PW r4, r5 + stmia r0, {r5-r6} + subs r3, r3, #1 + add r0, r0, r2 + bne 1b + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. +MC_put_x_8_arm_align2: + and r1, r1, #0xFFFFFFFC +1: ldmia r1, {r4-r6} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DW 16, r4, r5, r6 + AVG_PW r5, r6 + AVG_PW r4, r5 + stmia r0, {r5-r6} + subs r3, r3, #1 + add r0, r0, r2 + bne 1b + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. +MC_put_x_8_arm_align3: + and r1, r1, #0xFFFFFFFC +1: ldmia r1, {r4-r6} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DW 24, r4, r5, r6 + AVG_PW r5, r6 + AVG_PW r4, r5 + stmia r0, {r5-r6} + subs r3, r3, #1 + add r0, r0, r2 + bne 1b + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. +MC_put_x_8_arm_align_jt: + .word 0x01010101 + .word MC_put_x_8_arm_align0 + .word MC_put_x_8_arm_align1 + .word MC_put_x_8_arm_align2 + .word MC_put_x_8_arm_align3 diff --git a/src/libmpeg2new/libmpeg2/motion_comp_mlib.c b/src/libmpeg2new/libmpeg2/motion_comp_mlib.c deleted file mode 100644 --- a/src/libmpeg2new/libmpeg2/motion_comp_mlib.c +++ /dev/null @@ -1,190 +0,0 @@ -/* - * motion_comp_mlib.c - * Copyright (C) 2000-2003 Håkan Hjort - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#ifdef LIBMPEG2_MLIB - -#include -#include -#include -#include -#include - -#include "../include/mpeg2.h" -#include "mpeg2_internal.h" - -static void MC_put_o_16_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 16) - mlib_VideoCopyRef_U8_U8_16x16 (dest, (uint8_t *) ref, stride); - else - mlib_VideoCopyRef_U8_U8_16x8 (dest, (uint8_t *) ref, stride); -} - -static void MC_put_x_16_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 16) - mlib_VideoInterpX_U8_U8_16x16 (dest, (uint8_t *) ref, stride, stride); - else - mlib_VideoInterpX_U8_U8_16x8 (dest, (uint8_t *) ref, stride, stride); -} - -static void MC_put_y_16_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 16) - mlib_VideoInterpY_U8_U8_16x16 (dest, (uint8_t *) ref, stride, stride); - else - mlib_VideoInterpY_U8_U8_16x8 (dest, (uint8_t *) ref, stride, stride); -} - -static void MC_put_xy_16_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 16) - mlib_VideoInterpXY_U8_U8_16x16 (dest, (uint8_t *) ref, stride, stride); - else - mlib_VideoInterpXY_U8_U8_16x8 (dest, (uint8_t *) ref, stride, stride); -} - -static void MC_put_o_8_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 8) - mlib_VideoCopyRef_U8_U8_8x8 (dest, (uint8_t *) ref, stride); - else - mlib_VideoCopyRef_U8_U8_8x4 (dest, (uint8_t *) ref, stride); -} - -static void MC_put_x_8_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 8) - mlib_VideoInterpX_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); - else - mlib_VideoInterpX_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); -} - -static void MC_put_y_8_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 8) - mlib_VideoInterpY_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); - else - mlib_VideoInterpY_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); -} - -static void MC_put_xy_8_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 8) - mlib_VideoInterpXY_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); - else - mlib_VideoInterpXY_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); -} - -static void MC_avg_o_16_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 16) - mlib_VideoCopyRefAve_U8_U8_16x16 (dest, (uint8_t *) ref, stride); - else - mlib_VideoCopyRefAve_U8_U8_16x8 (dest, (uint8_t *) ref, stride); -} - -static void MC_avg_x_16_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 16) - mlib_VideoInterpAveX_U8_U8_16x16 (dest, (uint8_t *) ref, - stride, stride); - else - mlib_VideoInterpAveX_U8_U8_16x8 (dest, (uint8_t *) ref, - stride, stride); -} - -static void MC_avg_y_16_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 16) - mlib_VideoInterpAveY_U8_U8_16x16 (dest, (uint8_t *) ref, - stride, stride); - else - mlib_VideoInterpAveY_U8_U8_16x8 (dest, (uint8_t *) ref, - stride, stride); -} - -static void MC_avg_xy_16_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 16) - mlib_VideoInterpAveXY_U8_U8_16x16 (dest, (uint8_t *) ref, - stride, stride); - else - mlib_VideoInterpAveXY_U8_U8_16x8 (dest, (uint8_t *) ref, - stride, stride); -} - -static void MC_avg_o_8_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 8) - mlib_VideoCopyRefAve_U8_U8_8x8 (dest, (uint8_t *) ref, stride); - else - mlib_VideoCopyRefAve_U8_U8_8x4 (dest, (uint8_t *) ref, stride); -} - -static void MC_avg_x_8_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 8) - mlib_VideoInterpAveX_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); - else - mlib_VideoInterpAveX_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); -} - -static void MC_avg_y_8_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 8) - mlib_VideoInterpAveY_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); - else - mlib_VideoInterpAveY_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); -} - -static void MC_avg_xy_8_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 8) - mlib_VideoInterpAveXY_U8_U8_8x8 (dest, (uint8_t *) ref, - stride, stride); - else - mlib_VideoInterpAveXY_U8_U8_8x4 (dest, (uint8_t *) ref, - stride, stride); -} - -MPEG2_MC_EXTERN (mlib) - -#endif diff --git a/src/libmpeg2new/libmpeg2/motion_comp_mmx.c b/src/libmpeg2new/libmpeg2/motion_comp_mmx.c --- a/src/libmpeg2new/libmpeg2/motion_comp_mmx.c +++ b/src/libmpeg2new/libmpeg2/motion_comp_mmx.c @@ -23,14 +23,14 @@ #include "config.h" -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) #include -#include "../include/mpeg2.h" -#include "../include/attributes.h" +#include "mpeg2.h" +#include "attributes.h" #include "mpeg2_internal.h" -#include "../include/mmx.h" +#include "mmx.h" #define CPU_MMXEXT 0 #define CPU_3DNOW 1 diff --git a/src/libmpeg2new/libmpeg2/motion_comp_vis.c b/src/libmpeg2new/libmpeg2/motion_comp_vis.c --- a/src/libmpeg2new/libmpeg2/motion_comp_vis.c +++ b/src/libmpeg2new/libmpeg2/motion_comp_vis.c @@ -27,7 +27,7 @@ #include #include "mpeg2.h" -#include +#include "attributes.h" #include "mpeg2_internal.h" #include "vis.h" diff --git a/src/libmpeg2new/libmpeg2/mpeg2_internal.h b/src/libmpeg2new/libmpeg2/mpeg2_internal.h --- a/src/libmpeg2new/libmpeg2/mpeg2_internal.h +++ b/src/libmpeg2new/libmpeg2/mpeg2_internal.h @@ -20,6 +20,9 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef LIBMPEG2_MPEG2_INTERNAL_H +#define LIBMPEG2_MPEG2_INTERNAL_H #define STATE_INTERNAL_NORETURN ((mpeg2_state_t)-1) @@ -146,6 +149,9 @@ struct mpeg2_decoder_s { int second_field; int mpeg1; + + /* XXX: stuff due to xine shit */ + int8_t q_scale_type; }; typedef struct { @@ -213,7 +219,8 @@ struct mpeg2dec_s { int16_t display_offset_x, display_offset_y; int copy_matrix; - int8_t q_scale_type, scaled[4]; + int8_t scaled[4]; /* XXX: MOVED */ + //int8_t q_scale_type, scaled[4]; uint8_t quantizer_matrix[4][64]; uint8_t new_quantizer_matrix[4][64]; }; @@ -255,6 +262,9 @@ void mpeg2_idct_init (uint32_t accel); void mpeg2_idct_init (uint32_t accel); /* idct_mmx.c */ +void mpeg2_idct_copy_sse2 (int16_t * block, uint8_t * dest, int stride); +void mpeg2_idct_add_sse2 (int last, int16_t * block, + uint8_t * dest, int stride); void mpeg2_idct_copy_mmxext (int16_t * block, uint8_t * dest, int stride); void mpeg2_idct_add_mmxext (int last, int16_t * block, uint8_t * dest, int stride); @@ -300,3 +310,6 @@ extern mpeg2_mc_t mpeg2_mc_altivec; extern mpeg2_mc_t mpeg2_mc_altivec; extern mpeg2_mc_t mpeg2_mc_alpha; extern mpeg2_mc_t mpeg2_mc_vis; +extern mpeg2_mc_t mpeg2_mc_arm; + +#endif /* LIBMPEG2_MPEG2_INTERNAL_H */ diff --git a/src/libmpeg2new/libmpeg2/slice.c b/src/libmpeg2new/libmpeg2/slice.c --- a/src/libmpeg2new/libmpeg2/slice.c +++ b/src/libmpeg2new/libmpeg2/slice.c @@ -26,8 +26,8 @@ #include -#include "../include/mpeg2.h" -#include "../include/attributes.h" +#include "mpeg2.h" +#include "attributes.h" #include "mpeg2_internal.h" extern mpeg2_mc_t mpeg2_mc; @@ -503,7 +503,7 @@ static void get_intra_block_B15 (mpeg2_d } else { /* end of block. I commented out this code because if we */ - /* dont exit here we will still exit at the later test :) */ + /* do not exit here we will still exit at the later test :) */ /* if (i >= 128) break; */ /* end of block */ @@ -1247,7 +1247,7 @@ static inline void slice_non_intra_DCT ( ref[0] + offset, decoder->stride, 16); \ table[4] (decoder->dest[1] + decoder->offset, \ ref[1] + offset, decoder->stride, 16); \ - table[4] (decoder->dest[2] + (decoder->offset >> 1), \ + table[4] (decoder->dest[2] + decoder->offset, \ ref[2] + offset, decoder->stride, 16) #define bit_buf (decoder->bitstream_buf) @@ -1587,6 +1587,16 @@ do { \ } \ } while (0) +/** + * Dummy motion decoding function, to avoid calling NULL in + * case of malformed streams. + */ +static void motion_dummy (mpeg2_decoder_t * const decoder, + motion_t * const motion, + mpeg2_mc_fct * const * const table) +{ +} + void mpeg2_init_fbuf (mpeg2_decoder_t * decoder, uint8_t * current_fbuf[3], uint8_t * forward_fbuf[3], uint8_t * backward_fbuf[3]) { @@ -1644,7 +1654,9 @@ void mpeg2_init_fbuf (mpeg2_decoder_t * if (decoder->mpeg1) { decoder->motion_parser[0] = motion_zero_420; - decoder->motion_parser[MC_FRAME] = motion_mp1; + decoder->motion_parser[MC_FIELD] = motion_dummy; + decoder->motion_parser[MC_FRAME] = motion_mp1; + decoder->motion_parser[MC_DMV] = motion_dummy; decoder->motion_parser[4] = motion_reuse_420; } else if (decoder->picture_structure == FRAME_PICTURE) { if (decoder->chroma_format == 0) { @@ -1869,6 +1881,14 @@ void mpeg2_slice (mpeg2_decoder_t * cons motion_parser_t * parser; + if ( ((macroblock_modes >> MOTION_TYPE_SHIFT) < 0) + || ((macroblock_modes >> MOTION_TYPE_SHIFT) >= + (int)(sizeof(decoder->motion_parser) + / sizeof(decoder->motion_parser[0]))) + ) { + break; // Illegal ! + } + parser = decoder->motion_parser[macroblock_modes >> MOTION_TYPE_SHIFT]; MOTION_CALL (parser, macroblock_modes); diff --git a/src/libmpeg2new/libmpeg2/vlc.h b/src/libmpeg2new/libmpeg2/vlc.h --- a/src/libmpeg2new/libmpeg2/vlc.h +++ b/src/libmpeg2new/libmpeg2/vlc.h @@ -20,6 +20,9 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef LIBMPEG2_VLC_H +#define LIBMPEG2_VLC_H #define GETWORD(bit_buf,shift,bit_ptr) \ do { \ @@ -427,3 +430,5 @@ static const MBAtab MBA_11 [] = { { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7} }; + +#endif /* LIBMPEG2_VLC_H */