diff -Nurp ffmpeg-old/libavcodec/i386/dsputil_h264_template_mmx.c ffmpeg/libavcodec/i386/dsputil_h264_template_mmx.c --- ffmpeg-old/libavcodec/i386/dsputil_h264_template_mmx.c 2007-03-20 11:58:13.000000000 +0000 +++ ffmpeg/libavcodec/i386/dsputil_h264_template_mmx.c 2007-10-05 18:10:09.000000000 +0100 @@ -188,8 +188,8 @@ static void H264_CHROMA_MC4_TMPL(uint8_t "pxor %%mm7, %%mm7 \n\t" "movd %5, %%mm2 \n\t" "movd %6, %%mm3 \n\t" - "movq "MANGLE(ff_pw_8)", %%mm4\n\t" - "movq "MANGLE(ff_pw_8)", %%mm5\n\t" + "movq %7, %%mm4 \n\t" + "movq %7, %%mm5 \n\t" "punpcklwd %%mm2, %%mm2 \n\t" "punpcklwd %%mm3, %%mm3 \n\t" "punpcklwd %%mm2, %%mm2 \n\t" @@ -246,7 +246,7 @@ static void H264_CHROMA_MC4_TMPL(uint8_t "sub $2, %2 \n\t" "jnz 1b \n\t" : "+r"(dst), "+r"(src), "+r"(h) - : "r"((long)stride), "m"(ff_pw_32), "m"(x), "m"(y) + : "r"((long)stride), "m"(ff_pw_32), "m"(x), "m"(y), "m"(ff_pw_8) ); } diff -Nurp ffmpeg-old/libavcodec/i386/dsputil_mmx.c ffmpeg/libavcodec/i386/dsputil_mmx.c --- ffmpeg-old/libavcodec/i386/dsputil_mmx.c 2007-10-05 17:31:09.000000000 +0100 +++ ffmpeg/libavcodec/i386/dsputil_mmx.c 2007-10-06 01:11:59.000000000 +0100 @@ -664,15 +664,14 @@ static inline void transpose4x4(uint8_t "punpckhwd %%mm2, %%mm1 \n\t" "movd %%mm0, %0 \n\t" "punpckhdq %%mm0, %%mm0 \n\t" - "movd %%mm0, %1 \n\t" - "movd %%mm1, %2 \n\t" + "movd %%mm0, (%0,%1) \n\t" + "movd %%mm1, (%0,%1,2) \n\t" "punpckhdq %%mm1, %%mm1 \n\t" - "movd %%mm1, %3 \n\t" + "lea (%1,%1,2), %1 \n\t" + "movd %%mm1, (%0,%1) \n\t" - : "=m" (*(uint32_t*)(dst + 0*dst_stride)), - "=m" (*(uint32_t*)(dst + 1*dst_stride)), - "=m" (*(uint32_t*)(dst + 2*dst_stride)), - "=m" (*(uint32_t*)(dst + 3*dst_stride)) + : "=r" (*(uint32_t*)(dst)), "+r" (dst_stride) + :: "memory" ); } @@ -1917,7 +1916,7 @@ static int ssd_int8_vs_int16_mmx(int8_t #define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\ "paddw " #m4 ", " #m3 " \n\t" /* x1 */\ - "movq "MANGLE(ff_pw_20)", %%mm4 \n\t" /* 20 */\ + "movq %5, %%mm4 \n\t" /* 20 */\ "pmullw " #m3 ", %%mm4 \n\t" /* 20x1 */\ "movq "#in7", " #m3 " \n\t" /* d */\ "movq "#in0", %%mm5 \n\t" /* D */\ @@ -1929,7 +1928,7 @@ static int ssd_int8_vs_int16_mmx(int8_t "paddw " #m5 ", %%mm6 \n\t" /* x2 */\ "paddw %%mm6, %%mm6 \n\t" /* 2x2 */\ "psubw %%mm6, %%mm5 \n\t" /* -2x2 + x3 */\ - "pmullw "MANGLE(ff_pw_3)", %%mm5 \n\t" /* -6x2 + 3x3 */\ + "pmullw %6, %%mm5 \n\t" /* -6x2 + 3x3 */\ "paddw " #rnd ", %%mm4 \n\t" /* x2 */\ "paddw %%mm4, %%mm5 \n\t" /* 20x1 - 6x2 + 3x3 - x4 */\ "psraw $5, %%mm5 \n\t"\ @@ -1963,15 +1962,15 @@ static void OPNAME ## mpeg4_qpel16_h_low "paddw %%mm5, %%mm5 \n\t" /* 2b */\ "psubw %%mm5, %%mm6 \n\t" /* c - 2b */\ "pshufw $0x06, %%mm0, %%mm5 \n\t" /* 0C0B0A0A */\ - "pmullw "MANGLE(ff_pw_3)", %%mm6 \n\t" /* 3c - 6b */\ + "pmullw %6, %%mm6 \n\t" /* 3c - 6b */\ "paddw %%mm4, %%mm0 \n\t" /* a */\ "paddw %%mm1, %%mm5 \n\t" /* d */\ - "pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\ + "pmullw %5, %%mm0 \n\t" /* 20a */\ "psubw %%mm5, %%mm0 \n\t" /* 20a - d */\ - "paddw %6, %%mm6 \n\t"\ + "paddw %8, %%mm6 \n\t"\ "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\ "psraw $5, %%mm0 \n\t"\ - "movq %%mm0, %5 \n\t"\ + "movq %%mm0, %7 \n\t"\ /* mm1=EFGH, mm2=DEFG, mm3=CDEF, mm4=BCDE, mm7=0 */\ \ "movq 5(%0), %%mm0 \n\t" /* FGHIJKLM */\ @@ -1989,15 +1988,15 @@ static void OPNAME ## mpeg4_qpel16_h_low "psrlq $24, %%mm6 \n\t" /* IJKLM000 */\ "punpcklbw %%mm7, %%mm2 \n\t" /* 0F0G0H0I */\ "punpcklbw %%mm7, %%mm6 \n\t" /* 0I0J0K0L */\ - "pmullw "MANGLE(ff_pw_3)", %%mm3 \n\t" /* 3c - 6b */\ + "pmullw %6, %%mm3 \n\t" /* 3c - 6b */\ "paddw %%mm2, %%mm1 \n\t" /* a */\ "paddw %%mm6, %%mm4 \n\t" /* d */\ - "pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\ + "pmullw %5, %%mm1 \n\t" /* 20a */\ "psubw %%mm4, %%mm3 \n\t" /* - 6b +3c - d */\ - "paddw %6, %%mm1 \n\t"\ + "paddw %8, %%mm1 \n\t"\ "paddw %%mm1, %%mm3 \n\t" /* 20a - 6b +3c - d */\ "psraw $5, %%mm3 \n\t"\ - "movq %5, %%mm1 \n\t"\ + "movq %7, %%mm1 \n\t"\ "packuswb %%mm3, %%mm1 \n\t"\ OP_MMX2(%%mm1, (%1),%%mm4, q)\ /* mm0= GHIJ, mm2=FGHI, mm5=HIJK, mm6=IJKL, mm7=0 */\ @@ -2015,7 +2014,7 @@ static void OPNAME ## mpeg4_qpel16_h_low "psubw %%mm5, %%mm0 \n\t" /* c - 2b */\ "movq %%mm3, %%mm5 \n\t" /* JKLMNOPQ */\ "psrlq $24, %%mm3 \n\t" /* MNOPQ000 */\ - "pmullw "MANGLE(ff_pw_3)", %%mm0 \n\t" /* 3c - 6b */\ + "pmullw %6, %%mm0 \n\t" /* 3c - 6b */\ "punpcklbw %%mm7, %%mm3 \n\t" /* 0M0N0O0P */\ "paddw %%mm3, %%mm2 \n\t" /* d */\ "psubw %%mm2, %%mm0 \n\t" /* -6b + 3c - d */\ @@ -2023,8 +2022,8 @@ static void OPNAME ## mpeg4_qpel16_h_low "punpcklbw %%mm7, %%mm2 \n\t" /* 0J0K0L0M */\ "punpckhbw %%mm7, %%mm5 \n\t" /* 0N0O0P0Q */\ "paddw %%mm2, %%mm6 \n\t" /* a */\ - "pmullw "MANGLE(ff_pw_20)", %%mm6 \n\t" /* 20a */\ - "paddw %6, %%mm0 \n\t"\ + "pmullw %5, %%mm6 \n\t" /* 20a */\ + "paddw %8, %%mm0 \n\t"\ "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\ "psraw $5, %%mm0 \n\t"\ /* mm1=KLMN, mm2=JKLM, mm3=MNOP, mm4=LMNO, mm5=NOPQ mm7=0 */\ @@ -2038,10 +2037,10 @@ static void OPNAME ## mpeg4_qpel16_h_low "paddw %%mm2, %%mm5 \n\t" /* d */\ "paddw %%mm6, %%mm6 \n\t" /* 2b */\ "psubw %%mm6, %%mm4 \n\t" /* c - 2b */\ - "pmullw "MANGLE(ff_pw_20)", %%mm3 \n\t" /* 20a */\ - "pmullw "MANGLE(ff_pw_3)", %%mm4 \n\t" /* 3c - 6b */\ + "pmullw %5, %%mm3 \n\t" /* 20a */\ + "pmullw %6, %%mm4 \n\t" /* 3c - 6b */\ "psubw %%mm5, %%mm3 \n\t" /* -6b + 3c - d */\ - "paddw %6, %%mm4 \n\t"\ + "paddw %8, %%mm4 \n\t"\ "paddw %%mm3, %%mm4 \n\t" /* 20a - 6b + 3c - d */\ "psraw $5, %%mm4 \n\t"\ "packuswb %%mm4, %%mm0 \n\t"\ @@ -2051,8 +2050,8 @@ static void OPNAME ## mpeg4_qpel16_h_low "add %4, %1 \n\t"\ "decl %2 \n\t"\ " jnz 1b \n\t"\ - : "+a"(src), "+c"(dst), "+m"(h)\ - : "d"((long)srcStride), "S"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\ + : : "a"(src), "c"(dst), "m"(h), \ + "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_20), "m"(ff_pw_3), "m"(temp), "m"(ROUNDER)\ : "memory"\ );\ }\ @@ -2130,12 +2129,12 @@ static void OPNAME ## mpeg4_qpel8_h_lowp "paddw %%mm5, %%mm5 \n\t" /* 2b */\ "psubw %%mm5, %%mm6 \n\t" /* c - 2b */\ "pshufw $0x06, %%mm0, %%mm5 \n\t" /* 0C0B0A0A */\ - "pmullw "MANGLE(ff_pw_3)", %%mm6 \n\t" /* 3c - 6b */\ + "pmullw %6, %%mm6 \n\t" /* 3c - 6b */\ "paddw %%mm4, %%mm0 \n\t" /* a */\ "paddw %%mm1, %%mm5 \n\t" /* d */\ - "pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\ + "pmullw %5, %%mm0 \n\t" /* 20a */\ "psubw %%mm5, %%mm0 \n\t" /* 20a - d */\ - "paddw %6, %%mm6 \n\t"\ + "paddw %8, %%mm6 \n\t"\ "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\ "psraw $5, %%mm0 \n\t"\ /* mm1=EFGH, mm2=DEFG, mm3=CDEF, mm4=BCDE, mm7=0 */\ @@ -2151,10 +2150,10 @@ static void OPNAME ## mpeg4_qpel8_h_lowp "paddw %%mm5, %%mm4 \n\t" /* d */\ "paddw %%mm2, %%mm2 \n\t" /* 2b */\ "psubw %%mm2, %%mm3 \n\t" /* c - 2b */\ - "pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\ - "pmullw "MANGLE(ff_pw_3)", %%mm3 \n\t" /* 3c - 6b */\ + "pmullw %5, %%mm1 \n\t" /* 20a */\ + "pmullw %6, %%mm3 \n\t" /* 3c - 6b */\ "psubw %%mm4, %%mm3 \n\t" /* -6b + 3c - d */\ - "paddw %6, %%mm1 \n\t"\ + "paddw %8, %%mm1 \n\t"\ "paddw %%mm1, %%mm3 \n\t" /* 20a - 6b + 3c - d */\ "psraw $5, %%mm3 \n\t"\ "packuswb %%mm3, %%mm0 \n\t"\ @@ -2164,8 +2163,8 @@ static void OPNAME ## mpeg4_qpel8_h_lowp "add %4, %1 \n\t"\ "decl %2 \n\t"\ " jnz 1b \n\t"\ - : "+a"(src), "+c"(dst), "+m"(h)\ - : "S"((long)srcStride), "D"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\ + : : "a"(src), "c"(dst), "m"(h), \ + "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_20), "m"(ff_pw_3), "m"(temp), "m"(ROUNDER)\ : "memory"\ );\ }\ @@ -2244,39 +2243,39 @@ static void OPNAME ## mpeg4_qpel16_v_low "movq 8(%0), %%mm1 \n\t"\ "movq 16(%0), %%mm2 \n\t"\ "movq 24(%0), %%mm3 \n\t"\ - QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\ - QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\ + QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %7, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\ + QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %7, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\ "add %4, %1 \n\t"\ - QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\ + QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %7, (%0), (%0), 8(%0), 48(%0), (%1), OP)\ \ - QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\ + QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %7, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\ "add %4, %1 \n\t"\ - QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\ - QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 72(%0), (%1, %3), OP)\ + QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %7, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\ + QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %7, 16(%0), 24(%0), 32(%0), 72(%0), (%1, %3), OP)\ "add %4, %1 \n\t"\ - QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 80(%0), (%1), OP)\ - QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 88(%0), (%1, %3), OP)\ + QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %7, 24(%0), 32(%0), 40(%0), 80(%0), (%1), OP)\ + QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %7, 32(%0), 40(%0), 48(%0), 88(%0), (%1, %3), OP)\ "add %4, %1 \n\t"\ - QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 40(%0), 48(%0), 56(%0), 96(%0), (%1), OP)\ - QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 48(%0), 56(%0), 64(%0),104(%0), (%1, %3), OP)\ + QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %7, 40(%0), 48(%0), 56(%0), 96(%0), (%1), OP)\ + QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %7, 48(%0), 56(%0), 64(%0),104(%0), (%1, %3), OP)\ "add %4, %1 \n\t"\ - QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 56(%0), 64(%0), 72(%0),112(%0), (%1), OP)\ - QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 64(%0), 72(%0), 80(%0),120(%0), (%1, %3), OP)\ + QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %7, 56(%0), 64(%0), 72(%0),112(%0), (%1), OP)\ + QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %7, 64(%0), 72(%0), 80(%0),120(%0), (%1, %3), OP)\ "add %4, %1 \n\t"\ - QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 72(%0), 80(%0), 88(%0),128(%0), (%1), OP)\ + QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %7, 72(%0), 80(%0), 88(%0),128(%0), (%1), OP)\ \ - QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 80(%0), 88(%0), 96(%0),128(%0), (%1, %3), OP)\ + QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %7, 80(%0), 88(%0), 96(%0),128(%0), (%1, %3), OP)\ "add %4, %1 \n\t" \ - QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 88(%0), 96(%0),104(%0),120(%0), (%1), OP)\ - QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 96(%0),104(%0),112(%0),112(%0), (%1, %3), OP)\ + QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %7, 88(%0), 96(%0),104(%0),120(%0), (%1), OP)\ + QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %7, 96(%0),104(%0),112(%0),112(%0), (%1, %3), OP)\ \ "add $136, %0 \n\t"\ - "add %6, %1 \n\t"\ + "add %8, %1 \n\t"\ "decl %2 \n\t"\ " jnz 1b \n\t"\ \ - : "+r"(temp_ptr), "+r"(dst), "+g"(count)\ - : "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-14*(long)dstStride)\ + : : "r"(temp_ptr), "r"(dst), "rm"(count), \ + "r"((long)dstStride), "r"(2*(long)dstStride), "m"(ff_pw_20), "m"(ff_pw_3), "m"(ROUNDER), "g"(4-14*(long)dstStride)\ :"memory"\ );\ }\ @@ -2316,27 +2315,27 @@ static void OPNAME ## mpeg4_qpel8_v_lowp "movq 8(%0), %%mm1 \n\t"\ "movq 16(%0), %%mm2 \n\t"\ "movq 24(%0), %%mm3 \n\t"\ - QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\ - QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\ + QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %7, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\ + QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %7, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\ "add %4, %1 \n\t"\ - QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\ + QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %7, (%0), (%0), 8(%0), 48(%0), (%1), OP)\ \ - QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\ + QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %7, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\ "add %4, %1 \n\t"\ - QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\ + QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %7, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\ \ - QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 64(%0), (%1, %3), OP)\ + QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %7, 16(%0), 24(%0), 32(%0), 64(%0), (%1, %3), OP)\ "add %4, %1 \n\t"\ - QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 56(%0), (%1), OP)\ - QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 48(%0), (%1, %3), OP)\ + QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %7, 24(%0), 32(%0), 40(%0), 56(%0), (%1), OP)\ + QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %7, 32(%0), 40(%0), 48(%0), 48(%0), (%1, %3), OP)\ \ "add $72, %0 \n\t"\ - "add %6, %1 \n\t"\ + "add %8, %1 \n\t"\ "decl %2 \n\t"\ " jnz 1b \n\t"\ \ - : "+r"(temp_ptr), "+r"(dst), "+g"(count)\ - : "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-6*(long)dstStride)\ + : : "r"(temp_ptr), "r"(dst), "rm"(count), \ + "r"((long)dstStride), "r"(2*(long)dstStride), "m"(ff_pw_20), "m"(ff_pw_3), "m"(ROUNDER), "g"(4-6*(long)dstStride)\ : "memory"\ );\ }\ diff -Nurp ffmpeg-old/libavcodec/i386/h264dsp_mmx.c ffmpeg/libavcodec/i386/h264dsp_mmx.c --- ffmpeg-old/libavcodec/i386/h264dsp_mmx.c 2007-05-25 17:31:52.000000000 +0100 +++ ffmpeg/libavcodec/i386/h264dsp_mmx.c 2007-10-05 17:31:30.000000000 +0100 @@ -341,21 +341,21 @@ static void ff_h264_idct8_dc_add_mmx2(ui // in: mm0=p1 mm1=p0 mm2=q0 mm3=q1 mm7=(tc&mask) // out: mm1=p0' mm2=q0' // clobbers: mm0,3-6 -#define H264_DEBLOCK_P0_Q0(pb_01, pb_3f)\ +#define H264_DEBLOCK_P0_Q0(pb_01, pb_3f, pb_3, pb_A1)\ "movq %%mm1 , %%mm5 \n\t"\ "pxor %%mm2 , %%mm5 \n\t" /* p0^q0*/\ "pand "#pb_01" , %%mm5 \n\t" /* (p0^q0)&1*/\ "pcmpeqb %%mm4 , %%mm4 \n\t"\ "pxor %%mm4 , %%mm3 \n\t"\ "pavgb %%mm0 , %%mm3 \n\t" /* (p1 - q1 + 256)>>1*/\ - "pavgb "MANGLE(ff_pb_3)" , %%mm3 \n\t" /*(((p1 - q1 + 256)>>1)+4)>>1 = 64+2+(p1-q1)>>2*/\ + "pavgb "#pb_3" , %%mm3 \n\t" /*(((p1 - q1 + 256)>>1)+4)>>1 = 64+2+(p1-q1)>>2*/\ "pxor %%mm1 , %%mm4 \n\t"\ "pavgb %%mm2 , %%mm4 \n\t" /* (q0 - p0 + 256)>>1*/\ "pavgb %%mm5 , %%mm3 \n\t"\ "paddusb %%mm4 , %%mm3 \n\t" /* d+128+33*/\ - "movq "MANGLE(ff_pb_A1)" , %%mm6 \n\t"\ + "movq "#pb_A1" , %%mm6 \n\t"\ "psubusb %%mm3 , %%mm6 \n\t"\ - "psubusb "MANGLE(ff_pb_A1)" , %%mm3 \n\t"\ + "psubusb "#pb_A1" , %%mm3 \n\t"\ "pminub %%mm7 , %%mm6 \n\t"\ "pminub %%mm7 , %%mm3 \n\t"\ "psubusb %%mm6 , %%mm1 \n\t"\ @@ -422,14 +422,14 @@ static inline void h264_loop_filter_luma H264_DEBLOCK_Q1(%%mm3, %%mm4, "(%2,%3,2)", "(%2,%3)", %%mm5, %%mm6) /* filter p0, q0 */ - H264_DEBLOCK_P0_Q0(%8, unused) + H264_DEBLOCK_P0_Q0(%8, unused, %9, %10) "movq %%mm1, (%1,%3,2) \n\t" "movq %%mm2, (%2) \n\t" : "=m"(*tmp0) : "r"(pix-3*stride), "r"(pix), "r"((long)stride), "m"(*tmp0/*unused*/), "m"(*(uint32_t*)tc0), "m"(alpha1), "m"(beta1), - "m"(mm_bone) + "m"(mm_bone), "m" (ff_pb_3), "m" (ff_pb_A1) ); } @@ -470,13 +470,13 @@ static inline void h264_loop_filter_chro "movd %3, %%mm6 \n\t" "punpcklbw %%mm6, %%mm6 \n\t" "pand %%mm6, %%mm7 \n\t" // mm7 = tc&mask - H264_DEBLOCK_P0_Q0(%6, %7) + H264_DEBLOCK_P0_Q0(%6, %7, %8, %9) "movq %%mm1, (%0,%2) \n\t" "movq %%mm2, (%1) \n\t" :: "r"(pix-2*stride), "r"(pix), "r"((long)stride), "r"(*(uint32_t*)tc0), - "m"(alpha1), "m"(beta1), "m"(mm_bone), "m"(ff_pb_3F) + "m"(alpha1), "m"(beta1), "m"(mm_bone), "m"(ff_pb_3F), "m" (ff_pb_3), "m" (ff_pb_A1) ); } @@ -583,22 +583,26 @@ static void h264_loop_filter_strength_mm "paddb %%mm6, %%mm1 \n\t" "punpckhbw %%mm7, %%mm1 \n\t" // ref[b] != ref[bn] "por %%mm1, %%mm0 \n\t" + ::"m"(ref[l][b_idx]), + "m"(ref[l][b_idx+d_idx]) + : "memory" + ); - "movq %2, %%mm1 \n\t" - "movq %3, %%mm2 \n\t" - "psubw %4, %%mm1 \n\t" - "psubw %5, %%mm2 \n\t" + asm volatile( + "movq %0, %%mm1 \n\t" + "movq %1, %%mm2 \n\t" + "psubw %2, %%mm1 \n\t" + "psubw %3, %%mm2 \n\t" "packsswb %%mm2, %%mm1 \n\t" "paddb %%mm5, %%mm1 \n\t" "pminub %%mm4, %%mm1 \n\t" "pcmpeqb %%mm4, %%mm1 \n\t" // abs(mv[b] - mv[bn]) >= limit "por %%mm1, %%mm0 \n\t" - ::"m"(ref[l][b_idx]), - "m"(ref[l][b_idx+d_idx]), - "m"(mv[l][b_idx][0]), + ::"m"(mv[l][b_idx][0]), "m"(mv[l][b_idx+2][0]), "m"(mv[l][b_idx+d_idx][0]), "m"(mv[l][b_idx+d_idx+2][0]) + : "memory" ); } } diff -Nurp ffmpeg-old/libavcodec/i386/motion_est_mmx.c ffmpeg/libavcodec/i386/motion_est_mmx.c --- ffmpeg-old/libavcodec/i386/motion_est_mmx.c 2007-05-25 17:31:52.000000000 +0100 +++ ffmpeg/libavcodec/i386/motion_est_mmx.c 2007-10-06 01:13:07.000000000 +0100 @@ -167,7 +167,7 @@ static inline void sad8_y2a_mmx2(uint8_t static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) { asm volatile( - "movq "MANGLE(bone)", %%mm5 \n\t" + "movq %4, %%mm5 \n\t" "movq (%1), %%mm0 \n\t" "pavgb 1(%1), %%mm0 \n\t" "add %3, %1 \n\t" @@ -190,7 +190,7 @@ static inline void sad8_4_mmx2(uint8_t * "sub $2, %0 \n\t" " jg 1b \n\t" : "+r" (h), "+r" (blk1), "+r" (blk2) - : "r" ((long)stride) + : "r" ((long)stride), "m" (bone) ); } @@ -258,7 +258,7 @@ static inline void sad8_4_mmx(uint8_t *b "punpckhbw %%mm7, %%mm5 \n\t" "paddw %%mm4, %%mm2 \n\t" "paddw %%mm5, %%mm3 \n\t" - "movq 16+"MANGLE(round_tab)", %%mm5 \n\t" + "movq %5, %%mm5 \n\t" "paddw %%mm2, %%mm0 \n\t" "paddw %%mm3, %%mm1 \n\t" "paddw %%mm5, %%mm0 \n\t" @@ -281,7 +281,7 @@ static inline void sad8_4_mmx(uint8_t *b "add %4, %%"REG_a" \n\t" " js 1b \n\t" : "+a" (len) - : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((long)stride) + : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len), "r" ((long)stride), "m" (round_tab[2]) ); } diff -Nurp ffmpeg-old/libavcodec/i386/mpegvideo_mmx_template.c ffmpeg/libavcodec/i386/mpegvideo_mmx_template.c --- ffmpeg-old/libavcodec/i386/mpegvideo_mmx_template.c 2007-06-16 10:01:26.000000000 +0100 +++ ffmpeg/libavcodec/i386/mpegvideo_mmx_template.c 2007-10-05 18:07:45.000000000 +0100 @@ -154,7 +154,7 @@ static int RENAME(dct_quantize)(MpegEncC SPREADW(MM"3") "pxor "MM"7, "MM"7 \n\t" // 0 "pxor "MM"4, "MM"4 \n\t" // 0 - MOVQ" (%2), "MM"5 \n\t" // qmat[0] + MOVQ" %2, "MM"5 \n\t" // qmat[0] "pxor "MM"6, "MM"6 \n\t" "psubw (%3), "MM"6 \n\t" // -bias[0] "mov $-128, %%"REG_a" \n\t" @@ -178,15 +178,16 @@ static int RENAME(dct_quantize)(MpegEncC "movd "MM"3, %%"REG_a" \n\t" "movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1 : "+a" (last_non_zero_p1) - : "r" (block+64), "r" (qmat), "r" (bias), + : "r" (block+64), "m" (qmat), "r" (bias), "r" (inv_zigzag_direct16+64), "r" (temp_block+64) ); }else{ // FMT_H263 asm volatile( - "movd %%"REG_a", "MM"3 \n\t" // last_non_zero_p1 + "movd %0, %%mm3 \n\t" // last_non_zero_p1 SPREADW(MM"3") "pxor "MM"7, "MM"7 \n\t" // 0 "pxor "MM"4, "MM"4 \n\t" // 0 + "push %%"REG_a" \n\t" "mov $-128, %%"REG_a" \n\t" ASMALIGN(4) "1: \n\t" @@ -209,9 +210,12 @@ static int RENAME(dct_quantize)(MpegEncC PMAX(MM"3", MM"0") "movd "MM"3, %%"REG_a" \n\t" "movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1 - : "+a" (last_non_zero_p1) + "mov %%"REG_a", %0 \n\t" + "pop %%"REG_a" \n\t" + : "+m" (last_non_zero_p1) : "r" (block+64), "r" (qmat+64), "r" (bias+64), - "r" (inv_zigzag_direct16+64), "r" (temp_block+64) + "r" (inv_zigzag_direct16+64), "r" (temp_block+64), + "i" (sizeof(long)) ); } asm volatile( diff -Nurp ffmpeg-old/libavcodec/i386/simple_idct_mmx.c ffmpeg/libavcodec/i386/simple_idct_mmx.c --- ffmpeg-old/libavcodec/i386/simple_idct_mmx.c 2007-05-25 17:31:52.000000000 +0100 +++ ffmpeg/libavcodec/i386/simple_idct_mmx.c 2007-10-05 18:12:10.000000000 +0100 @@ -363,7 +363,7 @@ static inline void idct(int16_t *block) "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\ "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\ "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\ - "movq "MANGLE(wm1010)", %%mm4 \n\t"\ + "movq %3, %%mm4 \n\t"\ "pand %%mm0, %%mm4 \n\t"\ "por %%mm1, %%mm4 \n\t"\ "por %%mm2, %%mm4 \n\t"\ @@ -437,7 +437,7 @@ static inline void idct(int16_t *block) "jmp 2f \n\t"\ "1: \n\t"\ "pslld $16, %%mm0 \n\t"\ - "#paddd "MANGLE(d40000)", %%mm0 \n\t"\ + "#paddd %4, %%mm0 \n\t"\ "psrad $13, %%mm0 \n\t"\ "packssdw %%mm0, %%mm0 \n\t"\ "movq %%mm0, " #dst " \n\t"\ @@ -471,7 +471,7 @@ COL_IDCT( 24(%1), 88(%1), 56(%1), 120(% "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\ "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\ "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\ - "movq "MANGLE(wm1010)", %%mm4 \n\t"\ + "movq %3, %%mm4 \n\t"\ "pand %%mm0, %%mm4 \n\t"\ "por %%mm1, %%mm4 \n\t"\ "por %%mm2, %%mm4 \n\t"\ @@ -545,7 +545,7 @@ COL_IDCT( 24(%1), 88(%1), 56(%1), 120(% "jmp 2f \n\t"\ "1: \n\t"\ "pslld $16, %%mm0 \n\t"\ - "paddd "MANGLE(d40000)", %%mm0 \n\t"\ + "paddd %4, %%mm0 \n\t"\ "psrad $13, %%mm0 \n\t"\ "packssdw %%mm0, %%mm0 \n\t"\ "movq %%mm0, " #dst " \n\t"\ @@ -1270,7 +1270,7 @@ Temp */ "9: \n\t" - :: "r" (block), "r" (temp), "r" (coeffs) + :: "r" (block), "r" (temp), "r" (coeffs), "m" (wm1010), "m" (d40000) : "%eax" ); } diff -Nurp ffmpeg-old/libavcodec/i386/snowdsp_mmx.c ffmpeg/libavcodec/i386/snowdsp_mmx.c --- ffmpeg-old/libavcodec/i386/snowdsp_mmx.c 2007-05-25 17:31:52.000000000 +0100 +++ ffmpeg/libavcodec/i386/snowdsp_mmx.c 2007-10-05 17:33:55.000000000 +0100 @@ -629,10 +629,9 @@ void ff_snow_vertical_compose97i_mmx(DWT #define snow_inner_add_yblock_sse2_header \ DWTELEM * * dst_array = sb->line + src_y;\ - long tmp;\ + long tmp = b_h;\ asm volatile(\ - "mov %7, %%"REG_c" \n\t"\ - "mov %6, %2 \n\t"\ + "mov %6, %%"REG_c" \n\t"\ "mov %4, %%"REG_S" \n\t"\ "pxor %%xmm7, %%xmm7 \n\t" /* 0 */\ "pcmpeqd %%xmm3, %%xmm3 \n\t"\ @@ -689,9 +688,9 @@ void ff_snow_vertical_compose97i_mmx(DWT #define snow_inner_add_yblock_sse2_end_common2\ "jnz 1b \n\t"\ - :"+m"(dst8),"+m"(dst_array),"=&r"(tmp)\ + :"+m"(dst8),"+m"(dst_array),"=m"(tmp)\ :\ - "rm"((long)(src_x<<2)),"m"(obmc),"a"(block),"m"((long)b_h),"m"((long)src_stride):\ + "rm"((long)(src_x<<2)),"m"(obmc),"a"(block),"m"((long)src_stride):\ "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d""); #define snow_inner_add_yblock_sse2_end_8\ @@ -705,7 +704,7 @@ void ff_snow_vertical_compose97i_mmx(DWT #define snow_inner_add_yblock_sse2_end_16\ "add $"PTR_SIZE"*1, %1 \n\t"\ snow_inner_add_yblock_sse2_end_common1\ - "dec %2 \n\t"\ + "sub $1, %2 \n\t"\ snow_inner_add_yblock_sse2_end_common2 static void inner_add_yblock_bw_8_obmc_16_bh_even_sse2(const uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h, @@ -795,10 +794,9 @@ snow_inner_add_yblock_sse2_end_16 #define snow_inner_add_yblock_mmx_header \ DWTELEM * * dst_array = sb->line + src_y;\ - long tmp;\ + long tmp = b_h;\ asm volatile(\ - "mov %7, %%"REG_c" \n\t"\ - "mov %6, %2 \n\t"\ + "mov %6, %%"REG_c" \n\t"\ "mov %4, %%"REG_S" \n\t"\ "pxor %%mm7, %%mm7 \n\t" /* 0 */\ "pcmpeqd %%mm3, %%mm3 \n\t"\ @@ -861,11 +859,11 @@ snow_inner_add_yblock_sse2_end_16 "add %%"REG_c", (%%"REG_a") \n\t"\ "add $"PTR_SIZE"*1, %1 \n\t"\ "add %%"REG_c", %0 \n\t"\ - "dec %2 \n\t"\ + "sub $1, %2 \n\t"\ "jnz 1b \n\t"\ - :"+m"(dst8),"+m"(dst_array),"=&r"(tmp)\ + :"+m"(dst8),"+m"(dst_array),"=m"(tmp)\ :\ - "rm"((long)(src_x<<2)),"m"(obmc),"a"(block),"m"((long)b_h),"m"((long)src_stride):\ + "rm"((long)(src_x<<2)),"m"(obmc),"a"(block),"m"((long)src_stride):\ "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d""); static void inner_add_yblock_bw_8_obmc_16_mmx(const uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h, diff -Nurp ffmpeg-old/libpostproc/postprocess_template.c ffmpeg/libpostproc/postprocess_template.c --- ffmpeg-old/libpostproc/postprocess_template.c 2007-06-16 10:01:28.000000000 +0100 +++ ffmpeg/libpostproc/postprocess_template.c 2007-10-05 17:31:31.000000000 +0100 @@ -388,16 +388,16 @@ static inline void RENAME(vertRK1Filter) // FIXME rounding asm volatile( "pxor %%mm7, %%mm7 \n\t" // 0 - "movq "MANGLE(b80)", %%mm6 \n\t" // MIN_SIGNED_BYTE + "movq %2, %%mm6 \n\t" // MIN_SIGNED_BYTE "leal (%0, %1), %%"REG_a" \n\t" "leal (%%"REG_a", %1, 4), %%"REG_c" \n\t" // 0 1 2 3 4 5 6 7 8 9 // %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 %0+8%1 ecx+4%1 - "movq "MANGLE(pQPb)", %%mm0 \n\t" // QP,..., QP + "movq %3, %%mm0 \n\t" // QP,..., QP "movq %%mm0, %%mm1 \n\t" // QP,..., QP - "paddusb "MANGLE(b02)", %%mm0 \n\t" + "paddusb %4, %%mm0 \n\t" "psrlw $2, %%mm0 \n\t" - "pand "MANGLE(b3F)", %%mm0 \n\t" // QP/4,..., QP/4 + "pand %5, %%mm0 \n\t" // QP/4,..., QP/4 "paddusb %%mm1, %%mm0 \n\t" // QP*1.25 ... "movq (%0, %1, 4), %%mm2 \n\t" // line 4 "movq (%%"REG_c"), %%mm3 \n\t" // line 5 @@ -426,8 +426,8 @@ static inline void RENAME(vertRK1Filter) "paddb %%mm6, %%mm5 \n\t" "psrlw $2, %%mm5 \n\t" - "pand "MANGLE(b3F)", %%mm5 \n\t" - "psubb "MANGLE(b20)", %%mm5 \n\t" // (l5-l4)/8 + "pand %5, %%mm5 \n\t" + "psubb %6, %%mm5 \n\t" // (l5-l4)/8 "movq (%%"REG_a", %1, 2), %%mm2 \n\t" "paddb %%mm6, %%mm2 \n\t" // line 3 + 0x80 @@ -442,7 +442,7 @@ static inline void RENAME(vertRK1Filter) "movq %%mm2, (%%"REG_c", %1) \n\t" : - : "r" (src), "r" ((long)stride) + : "r" (src), "r" ((long)stride), "m" (b80), "m" (pQPb), "m" (b02), "m" (b3F), "m" (b20) : "%"REG_a, "%"REG_c ); #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) @@ -518,7 +518,7 @@ static inline void RENAME(vertX1Filter)( "paddusb %%mm0, %%mm0 \n\t" "psubusb %%mm0, %%mm4 \n\t" "pcmpeqb %%mm7, %%mm4 \n\t" // d <= QP ? -1 : 0 - "psubusb "MANGLE(b01)", %%mm3 \n\t" + "psubusb %3, %%mm3 \n\t" "pand %%mm4, %%mm3 \n\t" // d <= QP ? d : 0 PAVGB(%%mm7, %%mm3) // d/2 @@ -567,7 +567,7 @@ static inline void RENAME(vertX1Filter)( "movq %%mm0, (%%"REG_c", %1, 2) \n\t" // line 7 : - : "r" (src), "r" ((long)stride), "m" (co->pQPb) + : "r" (src), "r" ((long)stride), "m" (co->pQPb), "m" (b01) : "%"REG_a, "%"REG_c ); #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) @@ -700,17 +700,17 @@ static inline void RENAME(doVertDefFilte PMINUB(%%mm2, %%mm1, %%mm4) // MIN(|lenergy|,|renergy|)/8 "movq %2, %%mm4 \n\t" // QP //FIXME QP+1 ? - "paddusb "MANGLE(b01)", %%mm4 \n\t" + "paddusb %3, %%mm4 \n\t" "pcmpgtb %%mm3, %%mm4 \n\t" // |menergy|/8 < QP "psubusb %%mm1, %%mm3 \n\t" // d=|menergy|/8-MIN(|lenergy|,|renergy|)/8 "pand %%mm4, %%mm3 \n\t" "movq %%mm3, %%mm1 \n\t" -// "psubusb "MANGLE(b01)", %%mm3 \n\t" +// "psubusb %3, %%mm3 \n\t" PAVGB(%%mm7, %%mm3) PAVGB(%%mm7, %%mm3) "paddusb %%mm1, %%mm3 \n\t" -// "paddusb "MANGLE(b01)", %%mm3 \n\t" +// "paddusb %3, %%mm3 \n\t" "movq (%%"REG_a", %1, 2), %%mm6 \n\t" //l3 "movq (%0, %1, 4), %%mm5 \n\t" //l4 @@ -723,7 +723,7 @@ static inline void RENAME(doVertDefFilte "pand %%mm0, %%mm3 \n\t" PMINUB(%%mm5, %%mm3, %%mm0) - "psubusb "MANGLE(b01)", %%mm3 \n\t" + "psubusb %3, %%mm3 \n\t" PAVGB(%%mm7, %%mm3) "movq (%%"REG_a", %1, 2), %%mm0 \n\t" @@ -755,7 +755,7 @@ static inline void RENAME(doVertDefFilte "movq (%%"REG_a", %1), %%mm3 \n\t" // l2 "pxor %%mm6, %%mm2 \n\t" // -l5-1 "movq %%mm2, %%mm5 \n\t" // -l5-1 - "movq "MANGLE(b80)", %%mm4 \n\t" // 128 + "movq %4, %%mm4 \n\t" // 128 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t" PAVGB(%%mm3, %%mm2) // (l2-l5+256)/2 PAVGB(%%mm0, %%mm4) // ~(l4-l3)/4 + 128 @@ -767,7 +767,7 @@ static inline void RENAME(doVertDefFilte "pxor %%mm6, %%mm2 \n\t" // -l1-1 PAVGB(%%mm3, %%mm2) // (l2-l1+256)/2 PAVGB((%0), %%mm1) // (l0-l3+256)/2 - "movq "MANGLE(b80)", %%mm3 \n\t" // 128 + "movq %4, %%mm3 \n\t" // 128 PAVGB(%%mm2, %%mm3) // ~(l2-l1)/4 + 128 PAVGB(%%mm1, %%mm3) // ~(l0-l3)/4 +(l2-l1)/8 + 128 PAVGB(%%mm2, %%mm3) // ~(l0-l3)/8 +5(l2-l1)/16 + 128 @@ -777,14 +777,14 @@ static inline void RENAME(doVertDefFilte "movq (%%"REG_c", %1, 2), %%mm1 \n\t" // l7 "pxor %%mm6, %%mm1 \n\t" // -l7-1 PAVGB((%0, %1, 4), %%mm1) // (l4-l7+256)/2 - "movq "MANGLE(b80)", %%mm2 \n\t" // 128 + "movq %4, %%mm2 \n\t" // 128 PAVGB(%%mm5, %%mm2) // ~(l6-l5)/4 + 128 PAVGB(%%mm1, %%mm2) // ~(l4-l7)/4 +(l6-l5)/8 + 128 PAVGB(%%mm5, %%mm2) // ~(l4-l7)/8 +5(l6-l5)/16 + 128 // mm0=128-q, mm2=renergy/16 + 128, mm3=lenergy/16 + 128, mm4= menergy/16 + 128 - "movq "MANGLE(b00)", %%mm1 \n\t" // 0 - "movq "MANGLE(b00)", %%mm5 \n\t" // 0 + "movq %5, %%mm1 \n\t" // 0 + "movq %5, %%mm5 \n\t" // 0 "psubb %%mm2, %%mm1 \n\t" // 128 - renergy/16 "psubb %%mm3, %%mm5 \n\t" // 128 - lenergy/16 PMAXUB(%%mm1, %%mm2) // 128 + |renergy/16| @@ -793,7 +793,7 @@ static inline void RENAME(doVertDefFilte // mm0=128-q, mm3=128 + MIN(|lenergy|,|renergy|)/16, mm4= menergy/16 + 128 - "movq "MANGLE(b00)", %%mm7 \n\t" // 0 + "movq %5, %%mm7 \n\t" // 0 "movq %2, %%mm2 \n\t" // QP PAVGB(%%mm6, %%mm2) // 128 + QP/2 "psubb %%mm6, %%mm2 \n\t" @@ -807,13 +807,13 @@ static inline void RENAME(doVertDefFilte // mm0=128-q, mm1= SIGN(menergy), mm2= |menergy|/16 < QP/2, mm4= d/16 "movq %%mm4, %%mm3 \n\t" // d - "psubusb "MANGLE(b01)", %%mm4 \n\t" + "psubusb %3, %%mm4 \n\t" PAVGB(%%mm7, %%mm4) // d/32 PAVGB(%%mm7, %%mm4) // (d + 32)/64 "paddb %%mm3, %%mm4 \n\t" // 5d/64 "pand %%mm2, %%mm4 \n\t" - "movq "MANGLE(b80)", %%mm5 \n\t" // 128 + "movq %4, %%mm5 \n\t" // 128 "psubb %%mm0, %%mm5 \n\t" // q "paddsb %%mm6, %%mm5 \n\t" // fix bad rounding "pcmpgtb %%mm5, %%mm7 \n\t" // SIGN(q) @@ -835,7 +835,7 @@ static inline void RENAME(doVertDefFilte "movq %%mm2, (%0, %1, 4) \n\t" : - : "r" (src), "r" ((long)stride), "m" (c->pQPb) + : "r" (src), "r" ((long)stride), "m" (c->pQPb), "m" (b01), "m" (b80), "m" (b00) : "%"REG_a, "%"REG_c ); @@ -1079,10 +1079,10 @@ src-=8; "psubusw %%mm1, %%mm5 \n\t" // ld - "movq "MANGLE(w05)", %%mm2 \n\t" // 5 + "movq %3, %%mm2 \n\t" // 5 "pmullw %%mm2, %%mm4 \n\t" "pmullw %%mm2, %%mm5 \n\t" - "movq "MANGLE(w20)", %%mm2 \n\t" // 32 + "movq %4, %%mm2 \n\t" // 32 "paddw %%mm2, %%mm4 \n\t" "paddw %%mm2, %%mm5 \n\t" "psrlw $6, %%mm4 \n\t" @@ -1132,7 +1132,7 @@ src-=8; "movq %%mm0, (%0, %1) \n\t" : "+r" (src) - : "r" ((long)stride), "m" (c->pQPb) + : "r" ((long)stride), "m" (c->pQPb), "m" (w05), "m" (w20) : "%"REG_a, "%"REG_c ); #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) @@ -1276,7 +1276,7 @@ FIND_MIN_MAX((%0, %1, 8)) "movq %%mm6, %%mm0 \n\t" // max "psubb %%mm7, %%mm6 \n\t" // max - min "movd %%mm6, %%ecx \n\t" - "cmpb "MANGLE(deringThreshold)", %%cl \n\t" + "cmpb %4, %%cl \n\t" " jb 1f \n\t" "lea -24(%%"REG_SP"), %%"REG_c" \n\t" "and "ALIGN_MASK", %%"REG_c" \n\t" @@ -1303,9 +1303,9 @@ FIND_MIN_MAX((%0, %1, 8)) "psubusb %%mm7, %%mm0 \n\t" "psubusb %%mm7, %%mm2 \n\t" "psubusb %%mm7, %%mm3 \n\t" - "pcmpeqb "MANGLE(b00)", %%mm0 \n\t" // L10 > a ? 0 : -1 - "pcmpeqb "MANGLE(b00)", %%mm2 \n\t" // L20 > a ? 0 : -1 - "pcmpeqb "MANGLE(b00)", %%mm3 \n\t" // L00 > a ? 0 : -1 + "pcmpeqb %5, %%mm0 \n\t" // L10 > a ? 0 : -1 + "pcmpeqb %5, %%mm2 \n\t" // L20 > a ? 0 : -1 + "pcmpeqb %5, %%mm3 \n\t" // L00 > a ? 0 : -1 "paddb %%mm2, %%mm0 \n\t" "paddb %%mm3, %%mm0 \n\t" @@ -1326,9 +1326,9 @@ FIND_MIN_MAX((%0, %1, 8)) "psubusb %%mm7, %%mm2 \n\t" "psubusb %%mm7, %%mm4 \n\t" "psubusb %%mm7, %%mm5 \n\t" - "pcmpeqb "MANGLE(b00)", %%mm2 \n\t" // L11 > a ? 0 : -1 - "pcmpeqb "MANGLE(b00)", %%mm4 \n\t" // L21 > a ? 0 : -1 - "pcmpeqb "MANGLE(b00)", %%mm5 \n\t" // L01 > a ? 0 : -1 + "pcmpeqb %5, %%mm2 \n\t" // L11 > a ? 0 : -1 + "pcmpeqb %5, %%mm4 \n\t" // L21 > a ? 0 : -1 + "pcmpeqb %5, %%mm5 \n\t" // L01 > a ? 0 : -1 "paddb %%mm4, %%mm2 \n\t" "paddb %%mm5, %%mm2 \n\t" // 0, 2, 3, 1 @@ -1353,7 +1353,7 @@ FIND_MIN_MAX((%0, %1, 8)) "psubusb " #lx ", " #t1 " \n\t"\ "psubusb " #lx ", " #t0 " \n\t"\ "psubusb " #lx ", " #sx " \n\t"\ - "movq "MANGLE(b00)", " #lx " \n\t"\ + "movq %5, " #lx " \n\t"\ "pcmpeqb " #lx ", " #t1 " \n\t" /* src[-1] > a ? 0 : -1*/\ "pcmpeqb " #lx ", " #t0 " \n\t" /* src[+1] > a ? 0 : -1*/\ "pcmpeqb " #lx ", " #sx " \n\t" /* src[0] > a ? 0 : -1*/\ @@ -1369,8 +1369,8 @@ FIND_MIN_MAX((%0, %1, 8)) PMINUB(t1, pplx, t0)\ "paddb " #sx ", " #ppsx " \n\t"\ "paddb " #psx ", " #ppsx " \n\t"\ - "#paddb "MANGLE(b02)", " #ppsx " \n\t"\ - "pand "MANGLE(b08)", " #ppsx " \n\t"\ + "#paddb %6, " #ppsx " \n\t"\ + "pand %7, " #ppsx " \n\t"\ "pcmpeqb " #lx ", " #ppsx " \n\t"\ "pand " #ppsx ", " #pplx " \n\t"\ "pandn " #dst ", " #ppsx " \n\t"\ @@ -1406,7 +1406,7 @@ DERING_CORE((%%REGd, %1, 2),(%0, %1, 8) DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7) "1: \n\t" - : : "r" (src), "r" ((long)stride), "m" (c->pQPb), "m"(c->pQPb2) + : : "r" (src), "r" ((long)stride), "m" (c->pQPb), "m"(c->pQPb2), "m" (deringThreshold), "m" (b00), "m" (b02), "m" (b08) : "%"REG_a, "%"REG_d, "%"REG_c ); #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) @@ -2284,7 +2284,7 @@ static inline void RENAME(tempNoiseReduc #else //L1_DIFF #if defined (FAST_L2_DIFF) "pcmpeqb %%mm7, %%mm7 \n\t" - "movq "MANGLE(b80)", %%mm6 \n\t" + "movq %4, %%mm6 \n\t" "pxor %%mm0, %%mm0 \n\t" #define REAL_L2_DIFF_CORE(a, b)\ "movq " #a ", %%mm5 \n\t"\ @@ -2533,7 +2533,7 @@ L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc "4: \n\t" - :: "r" (src), "r" (tempBlured), "r"((long)stride), "m" (tempBluredPast) + :: "r" (src), "r" (tempBlured), "r"((long)stride), "m" (tempBluredPast), "m" (b80) : "%"REG_a, "%"REG_d, "%"REG_c, "memory" ); #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) @@ -2806,8 +2806,8 @@ asm volatile( "movq %%mm6, %%mm1 \n\t" "psllw $2, %%mm0 \n\t" "psllw $2, %%mm1 \n\t" - "paddw "MANGLE(w04)", %%mm0 \n\t" - "paddw "MANGLE(w04)", %%mm1 \n\t" + "paddw %5, %%mm0 \n\t" + "paddw %5, %%mm1 \n\t" #define NEXT\ "movq (%0), %%mm2 \n\t"\ @@ -2896,7 +2896,7 @@ asm volatile( "mov %4, %0 \n\t" //FIXME : "+&r"(src) - : "r" ((long)step), "m" (c->pQPb), "r"(sums), "g"(src) + : "r" ((long)step), "m" (c->pQPb), "r"(sums), "g"(src), "m" (w04) ); src+= step; // src points to begin of the 8x8 Block @@ -3113,10 +3113,10 @@ asm volatile( "psubusw %%mm1, %%mm5 \n\t" // ld - "movq "MANGLE(w05)", %%mm2 \n\t" // 5 + "movq %4, %%mm2 \n\t" // 5 "pmullw %%mm2, %%mm4 \n\t" "pmullw %%mm2, %%mm5 \n\t" - "movq "MANGLE(w20)", %%mm2 \n\t" // 32 + "movq %5, %%mm2 \n\t" // 32 "paddw %%mm2, %%mm4 \n\t" "paddw %%mm2, %%mm5 \n\t" "psrlw $6, %%mm4 \n\t" @@ -3168,7 +3168,7 @@ asm volatile( "movq %%mm0, (%0, %1) \n\t" : "+r" (temp_src) - : "r" ((long)step), "m" (c->pQPb), "m"(eq_mask) + : "r" ((long)step), "m" (c->pQPb), "m"(eq_mask), "m" (w05), "m" (w20) : "%"REG_a, "%"REG_c ); } @@ -3199,10 +3199,8 @@ static inline void RENAME(blockCopy)(uin { #ifdef HAVE_MMX asm volatile( - "movq (%%"REG_a"), %%mm2 \n\t" // packedYOffset - "movq 8(%%"REG_a"), %%mm3 \n\t" // packedYScale - "lea (%2,%4), %%"REG_a" \n\t" - "lea (%3,%5), %%"REG_d" \n\t" + "movq (%0), %%mm2 \n\t" // packedYOffset + "movq 8(%0), %%mm3 \n\t" // packedYScale "pxor %%mm4, %%mm4 \n\t" #ifdef HAVE_MMX2 #define REAL_SCALED_CPY(src1, src2, dst1, dst2) \ @@ -3258,22 +3256,24 @@ static inline void RENAME(blockCopy)(uin #define SCALED_CPY(src1, src2, dst1, dst2)\ REAL_SCALED_CPY(src1, src2, dst1, dst2) -SCALED_CPY((%2) , (%2, %4) , (%3) , (%3, %5)) -SCALED_CPY((%2, %4, 2), (%%REGa, %4, 2), (%3, %5, 2), (%%REGd, %5, 2)) -SCALED_CPY((%2, %4, 4), (%%REGa, %4, 4), (%3, %5, 4), (%%REGd, %5, 4)) - "lea (%%"REG_a",%4,4), %%"REG_a" \n\t" - "lea (%%"REG_d",%5,4), %%"REG_d" \n\t" -SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2)) - - - : "=&a" (packedOffsetAndScale) - : "0" (packedOffsetAndScale), - "r"(src), - "r"(dst), - "r" ((long)srcStride), - "r" ((long)dstStride) - : "%"REG_d - ); +SCALED_CPY((%1), (%1, %3), (%2), (%2, %4)) + "lea (%1,%3,2), %1 \n\t" + "lea (%2,%4,2), %2 \n\t" +SCALED_CPY((%1), (%1, %3), (%2), (%2, %4)) + "lea (%1,%3,2), %1 \n\t" + "lea (%2,%4,2), %2 \n\t" +SCALED_CPY((%1), (%1, %3), (%2), (%2, %4)) + "lea (%1,%3,2), %1 \n\t" + "lea (%2,%4,2), %2 \n\t" +SCALED_CPY((%1), (%1, %3), (%2), (%2, %4)) + + : "+r" (packedOffsetAndScale), + "+r"(src), + "+r"(dst) + : "r" ((long)srcStride), + "r" ((long)dstStride) + : "memory" + ); #else //HAVE_MMX for(i=0; i<8; i++) memcpy( &(dst[dstStride*i]), @@ -3284,8 +3284,6 @@ SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2) { #ifdef HAVE_MMX asm volatile( - "lea (%0,%2), %%"REG_a" \n\t" - "lea (%1,%3), %%"REG_d" \n\t" #define REAL_SIMPLE_CPY(src1, src2, dst1, dst2) \ "movq " #src1 ", %%mm0 \n\t"\ @@ -3296,18 +3294,22 @@ SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2) #define SIMPLE_CPY(src1, src2, dst1, dst2)\ REAL_SIMPLE_CPY(src1, src2, dst1, dst2) -SIMPLE_CPY((%0) , (%0, %2) , (%1) , (%1, %3)) -SIMPLE_CPY((%0, %2, 2), (%%REGa, %2, 2), (%1, %3, 2), (%%REGd, %3, 2)) -SIMPLE_CPY((%0, %2, 4), (%%REGa, %2, 4), (%1, %3, 4), (%%REGd, %3, 4)) - "lea (%%"REG_a",%2,4), %%"REG_a" \n\t" - "lea (%%"REG_d",%3,4), %%"REG_d" \n\t" -SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2)) - - : : "r" (src), - "r" (dst), - "r" ((long)srcStride), - "r" ((long)dstStride) - : "%"REG_a, "%"REG_d +SIMPLE_CPY((%0), (%0, %2), (%1), (%1, %3)) + "lea (%0,%2,2), %0 \n\t" + "lea (%1,%3,2), %1 \n\t" +SIMPLE_CPY((%0), (%0, %2), (%1), (%1, %3)) + "lea (%0,%2,2), %0 \n\t" + "lea (%1,%3,2), %1 \n\t" +SIMPLE_CPY((%0), (%0, %2), (%1), (%1, %3)) + "lea (%0,%2), %0 \n\t" + "lea (%1,%3), %1 \n\t" +SIMPLE_CPY((%0), (%0, %2), (%1), (%1, %3)) + + : "+r" (src), + "+r" (dst) + : "r" ((long)srcStride), + "r" ((long)dstStride) + : "memory" ); #else //HAVE_MMX for(i=0; i<8; i++) diff -Nurp ffmpeg-old/libswscale/rgb2rgb_template.c ffmpeg/libswscale/rgb2rgb_template.c --- ffmpeg-old/libswscale/rgb2rgb_template.c 2007-10-06 02:20:55.000000000 +0100 +++ ffmpeg/libswscale/rgb2rgb_template.c 2007-10-06 02:17:56.000000000 +0100 @@ -1436,9 +1436,9 @@ static inline void RENAME(rgb24tobgr24)( asm volatile ( "test %%"REG_a", %%"REG_a" \n\t" "jns 2f \n\t" - "movq "MANGLE(mask24r)", %%mm5 \n\t" - "movq "MANGLE(mask24g)", %%mm6 \n\t" - "movq "MANGLE(mask24b)", %%mm7 \n\t" + "movq %3, %%mm5 \n\t" + "movq %4, %%mm6 \n\t" + "movq %5, %%mm7 \n\t" ASMALIGN(4) "1: \n\t" PREFETCH" 32(%1, %%"REG_a") \n\t" @@ -1474,7 +1474,7 @@ static inline void RENAME(rgb24tobgr24)( " js 1b \n\t" "2: \n\t" : "+a" (mmx_size) - : "r" (src-mmx_size), "r"(dst-mmx_size) + : "r" (src-mmx_size), "r"(dst-mmx_size), "m"(mask24r), "m"(mask24g), "m"(mask24b) ); __asm __volatile(SFENCE:::"memory"); @@ -2152,8 +2152,8 @@ static inline void RENAME(rgb24toyv12)(c { asm volatile( "mov %2, %%"REG_a" \n\t" - "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t" - "movq "MANGLE(w1111)", %%mm5 \n\t" + "movq %3, %%mm6 \n\t" + "movq %4, %%mm5 \n\t" "pxor %%mm7, %%mm7 \n\t" "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" ASMALIGN(4) @@ -2211,12 +2211,12 @@ static inline void RENAME(rgb24toyv12)(c "psraw $7, %%mm4 \n\t" "packuswb %%mm4, %%mm0 \n\t" - "paddusb "MANGLE(bgr2YOffset)", %%mm0 \n\t" + "paddusb %5, %%mm0 \n\t" MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t" "add $8, %%"REG_a" \n\t" " js 1b \n\t" - : : "r" (src+width*3), "r" (ydst+width), "g" (-width) + : : "r" (src+width*3), "r" (ydst+width), "g" (-width), "m" (bgr2YCoeff), "m" (w1111), "m" (bgr2YOffset) : "%"REG_a, "%"REG_d ); ydst += lumStride; @@ -2225,8 +2225,8 @@ static inline void RENAME(rgb24toyv12)(c src -= srcStride*2; asm volatile( "mov %4, %%"REG_a" \n\t" - "movq "MANGLE(w1111)", %%mm5 \n\t" - "movq "MANGLE(bgr2UCoeff)", %%mm6 \n\t" + "movq %5, %%mm5 \n\t" + "movq %6, %%mm6 \n\t" "pxor %%mm7, %%mm7 \n\t" "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" "add %%"REG_d", %%"REG_d" \n\t" @@ -2275,8 +2275,8 @@ static inline void RENAME(rgb24toyv12)(c "psrlw $2, %%mm0 \n\t" "psrlw $2, %%mm2 \n\t" #endif - "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" - "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" + "movq %7, %%mm1 \n\t" + "movq %7, %%mm3 \n\t" "pmaddwd %%mm0, %%mm1 \n\t" "pmaddwd %%mm2, %%mm3 \n\t" @@ -2333,12 +2333,12 @@ static inline void RENAME(rgb24toyv12)(c "paddw %%mm1, %%mm5 \n\t" "paddw %%mm3, %%mm2 \n\t" "paddw %%mm5, %%mm2 \n\t" - "movq "MANGLE(w1111)", %%mm5 \n\t" + "movq %5, %%mm5 \n\t" "psrlw $2, %%mm4 \n\t" "psrlw $2, %%mm2 \n\t" #endif - "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" - "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" + "movq %7, %%mm1 \n\t" + "movq %7, %%mm3 \n\t" "pmaddwd %%mm4, %%mm1 \n\t" "pmaddwd %%mm2, %%mm3 \n\t" @@ -2362,13 +2362,14 @@ static inline void RENAME(rgb24toyv12)(c "punpckldq %%mm4, %%mm0 \n\t" "punpckhdq %%mm4, %%mm1 \n\t" "packsswb %%mm1, %%mm0 \n\t" - "paddb "MANGLE(bgr2UVOffset)", %%mm0 \n\t" + "paddb %8, %%mm0 \n\t" "movd %%mm0, (%2, %%"REG_a") \n\t" "punpckhdq %%mm0, %%mm0 \n\t" "movd %%mm0, (%3, %%"REG_a") \n\t" "add $4, %%"REG_a" \n\t" " js 1b \n\t" - : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth) + : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth), + "m" (w1111), "m" (bgr2UCoeff), "m" (bgr2VCoeff), "m" (bgr2UVOffset) : "%"REG_a, "%"REG_d ); diff -Nurp ffmpeg-old/libswscale/swscale_template.c ffmpeg/libswscale/swscale_template.c --- ffmpeg-old/libswscale/swscale_template.c 2007-10-06 02:20:55.000000000 +0100 +++ ffmpeg/libswscale/swscale_template.c 2007-10-06 02:17:56.000000000 +0100 @@ -233,7 +233,9 @@ #define YSCALEYUV2PACKEDX_END \ :: "r" (&c->redDither), \ "m" (dummy), "m" (dummy), "m" (dummy),\ - "r" (dest), "m" (dstW) \ + "r" (dest), "m" (dstW), \ + "m" (b5Dither), "m" (g5Dither), \ + "m" (r5Dither), "m" (bF8), "m" (bFC) \ : "%"REG_a, "%"REG_d, "%"REG_S \ ); @@ -687,10 +689,10 @@ " jb 1b \n\t" #define WRITEBGR32(dst, dstw, index) REAL_WRITEBGR32(dst, dstw, index) -#define REAL_WRITEBGR16(dst, dstw, index) \ - "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\ - "pand "MANGLE(bFC)", %%mm4 \n\t" /* G */\ - "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\ +#define REAL_WRITEBGR16(dst, dstw, index, bf8, bfc) \ + "pand "#bf8", %%mm2 \n\t" /* B */\ + "pand "#bfc", %%mm4 \n\t" /* G */\ + "pand "#bf8", %%mm5 \n\t" /* R */\ "psrlq $3, %%mm2 \n\t"\ \ "movq %%mm2, %%mm1 \n\t"\ @@ -713,12 +715,12 @@ "add $8, "#index" \n\t"\ "cmp "#dstw", "#index" \n\t"\ " jb 1b \n\t" -#define WRITEBGR16(dst, dstw, index) REAL_WRITEBGR16(dst, dstw, index) +#define WRITEBGR16(dst, dstw, index, bf8, bfc) REAL_WRITEBGR16(dst, dstw, index, bf8, bfc) -#define REAL_WRITEBGR15(dst, dstw, index) \ - "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\ - "pand "MANGLE(bF8)", %%mm4 \n\t" /* G */\ - "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\ +#define REAL_WRITEBGR15(dst, dstw, index, bf8) \ + "pand "#bf8", %%mm2 \n\t" /* B */\ + "pand "#bf8", %%mm4 \n\t" /* G */\ + "pand "#bf8", %%mm5 \n\t" /* R */\ "psrlq $3, %%mm2 \n\t"\ "psrlq $1, %%mm5 \n\t"\ \ @@ -742,9 +744,9 @@ "add $8, "#index" \n\t"\ "cmp "#dstw", "#index" \n\t"\ " jb 1b \n\t" -#define WRITEBGR15(dst, dstw, index) REAL_WRITEBGR15(dst, dstw, index) +#define WRITEBGR15(dst, dstw, index, bf8) REAL_WRITEBGR15(dst, dstw, index, bf8) -#define WRITEBGR24OLD(dst, dstw, index) \ +#define WRITEBGR24OLD(dst, dstw, index, m24a, m24b, m24c) \ /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ "movq %%mm2, %%mm1 \n\t" /* B */\ "movq %%mm5, %%mm6 \n\t" /* R */\ @@ -800,7 +802,7 @@ "cmp "#dstw", "#index" \n\t"\ " jb 1b \n\t" -#define WRITEBGR24MMX(dst, dstw, index) \ +#define WRITEBGR24MMX(dst, dstw, index, m24a, m24b, m24c) \ /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ "movq %%mm2, %%mm1 \n\t" /* B */\ "movq %%mm5, %%mm6 \n\t" /* R */\ @@ -853,10 +855,10 @@ "cmp "#dstw", "#index" \n\t"\ " jb 1b \n\t" -#define WRITEBGR24MMX2(dst, dstw, index) \ +#define WRITEBGR24MMX2(dst, dstw, index, m24a, m24b, m24c) \ /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ - "movq "MANGLE(M24A)", %%mm0 \n\t"\ - "movq "MANGLE(M24C)", %%mm7 \n\t"\ + "movq "#m24a", %%mm0 \n\t"\ + "movq "#m24c", %%mm7 \n\t"\ "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\ "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\ "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */\ @@ -875,7 +877,7 @@ "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */\ "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */\ \ - "pand "MANGLE(M24B)", %%mm1 \n\t" /* B5 B4 B3 */\ + "pand "#m24b", %%mm1 \n\t" /* B5 B4 B3 */\ "pand %%mm7, %%mm3 \n\t" /* G4 G3 */\ "pand %%mm0, %%mm6 \n\t" /* R4 R3 R2 */\ \ @@ -889,7 +891,7 @@ \ "pand %%mm7, %%mm1 \n\t" /* B7 B6 */\ "pand %%mm0, %%mm3 \n\t" /* G7 G6 G5 */\ - "pand "MANGLE(M24B)", %%mm6 \n\t" /* R7 R6 R5 */\ + "pand "#m24b", %%mm6 \n\t" /* R7 R6 R5 */\ \ "por %%mm1, %%mm3 \n\t"\ "por %%mm3, %%mm6 \n\t"\ @@ -903,10 +905,10 @@ #ifdef HAVE_MMX2 #undef WRITEBGR24 -#define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX2(dst, dstw, index) +#define WRITEBGR24(dst, dstw, index, m24a, m24b, m24c) WRITEBGR24MMX2(dst, dstw, index, m24a, m24b, m24c) #else #undef WRITEBGR24 -#define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index) +#define WRITEBGR24(dst, dstw, index, m24a, m24b, m24c) WRITEBGR24MMX(dst, dstw, index, m24a, m24b, m24c) #endif #define REAL_WRITEYUY2(dst, dstw, index) \ @@ -1053,12 +1055,12 @@ static inline void RENAME(yuv2packedX)(S YSCALEYUV2RGBX "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize "add %4, %%"REG_c" \n\t" - WRITEBGR24(%%REGc, %5, %%REGa) + WRITEBGR24(%%REGc, %5, %%REGa, %6, %7, %8) :: "r" (&c->redDither), "m" (dummy), "m" (dummy), "m" (dummy), - "r" (dest), "m" (dstW) + "r" (dest), "m" (dstW), "m" (M24A), "m" (M24B), "m" (M24C) : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S ); return; @@ -1067,12 +1069,12 @@ static inline void RENAME(yuv2packedX)(S YSCALEYUV2RGBX /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "MANGLE(b5Dither)", %%mm2\n\t" - "paddusb "MANGLE(g5Dither)", %%mm4\n\t" - "paddusb "MANGLE(r5Dither)", %%mm5\n\t" + "paddusb %6, %%mm2\n\t" + "paddusb %7, %%mm4\n\t" + "paddusb %8, %%mm5\n\t" #endif - WRITEBGR15(%4, %5, %%REGa) + WRITEBGR15(%4, %5, %%REGa, %9) YSCALEYUV2PACKEDX_END return; case PIX_FMT_BGR565: @@ -1080,12 +1082,12 @@ static inline void RENAME(yuv2packedX)(S YSCALEYUV2RGBX /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "MANGLE(b5Dither)", %%mm2\n\t" - "paddusb "MANGLE(g6Dither)", %%mm4\n\t" - "paddusb "MANGLE(r5Dither)", %%mm5\n\t" + "paddusb %6, %%mm2\n\t" + "paddusb %7, %%mm4\n\t" + "paddusb %8, %%mm5\n\t" #endif - WRITEBGR16(%4, %5, %%REGa) + WRITEBGR16(%4, %5, %%REGa, %9, %10) YSCALEYUV2PACKEDX_END return; case PIX_FMT_YUYV422: @@ -1114,11 +1116,11 @@ static inline void RENAME(yuv2packedX)(S YSCALEYUV2RGBX "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c" \n\t" //FIXME optimize "add %4, %%"REG_c" \n\t" - WRITEBGR24(%%REGc, %5, %%REGa) + WRITEBGR24(%%REGc, %5, %%REGa, %6, %7, %8) :: "r" (&c->redDither), "m" (dummy), "m" (dummy), "m" (dummy), - "r" (dest), "m" (dstW) + "r" (dest), "m" (dstW), "m" (M24A), "m" (M24B), "m" (M24C) : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S ); return; @@ -1127,12 +1129,12 @@ static inline void RENAME(yuv2packedX)(S YSCALEYUV2RGBX /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "MANGLE(b5Dither)", %%mm2 \n\t" - "paddusb "MANGLE(g5Dither)", %%mm4 \n\t" - "paddusb "MANGLE(r5Dither)", %%mm5 \n\t" + "paddusb %6, %%mm2\n\t" + "paddusb %7, %%mm4\n\t" + "paddusb %8, %%mm5\n\t" #endif - WRITEBGR15(%4, %5, %%REGa) + WRITEBGR15(%4, %5, %%REGa, %9) YSCALEYUV2PACKEDX_END return; case PIX_FMT_BGR565: @@ -1140,12 +1142,12 @@ static inline void RENAME(yuv2packedX)(S YSCALEYUV2RGBX /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "MANGLE(b5Dither)", %%mm2 \n\t" - "paddusb "MANGLE(g6Dither)", %%mm4 \n\t" - "paddusb "MANGLE(r5Dither)", %%mm5 \n\t" + "paddusb %6, %%mm2\n\t" + "paddusb %7, %%mm4\n\t" + "paddusb %8, %%mm5\n\t" #endif - WRITEBGR16(%4, %5, %%REGa) + WRITEBGR16(%4, %5, %%REGa, %9, %10) YSCALEYUV2PACKEDX_END return; case PIX_FMT_YUYV422: @@ -1427,11 +1429,11 @@ FULL_YSCALEYUV2RGB "mov %4, %%"REG_b" \n\t" "push %%"REG_BP" \n\t" YSCALEYUV2RGB(%%REGBP, %5) - WRITEBGR24(%%REGb, 8280(%5), %%REGBP) + WRITEBGR24(%%REGb, 8280(%5), %%REGBP, %6, %7, %8) "pop %%"REG_BP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) + "a" (&c->redDither), "m" (M24A), "m" (M24B), "m" (M24C) ); return; case PIX_FMT_BGR555: @@ -1442,17 +1444,18 @@ FULL_YSCALEYUV2RGB YSCALEYUV2RGB(%%REGBP, %5) /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "MANGLE(b5Dither)", %%mm2 \n\t" - "paddusb "MANGLE(g5Dither)", %%mm4 \n\t" - "paddusb "MANGLE(r5Dither)", %%mm5 \n\t" + "paddusb %6, %%mm2 \n\t" + "paddusb %7, %%mm4 \n\t" + "paddusb %8, %%mm5 \n\t" #endif - WRITEBGR15(%%REGb, 8280(%5), %%REGBP) + WRITEBGR15(%%REGb, 8280(%5), %%REGBP, %9) "pop %%"REG_BP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) + "a" (&c->redDither), "m" (b5Dither), "m" (g6Dither), "m" (r5Dither), + "m" (bF8) ); return; case PIX_FMT_BGR565: @@ -1463,16 +1466,17 @@ FULL_YSCALEYUV2RGB YSCALEYUV2RGB(%%REGBP, %5) /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "MANGLE(b5Dither)", %%mm2 \n\t" - "paddusb "MANGLE(g6Dither)", %%mm4 \n\t" - "paddusb "MANGLE(r5Dither)", %%mm5 \n\t" + "paddusb %6, %%mm2 \n\t" + "paddusb %7, %%mm4 \n\t" + "paddusb %8, %%mm5 \n\t" #endif - WRITEBGR16(%%REGb, 8280(%5), %%REGBP) + WRITEBGR16(%%REGb, 8280(%5), %%REGBP, %9, %10) "pop %%"REG_BP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) + "a" (&c->redDither), "m" (b5Dither), "m" (g6Dither), "m" (r5Dither), + "m" (bF8), "m" (bFC) ); return; case PIX_FMT_YUYV422: @@ -1537,12 +1541,12 @@ static inline void RENAME(yuv2packed1)(S "mov %4, %%"REG_b" \n\t" "push %%"REG_BP" \n\t" YSCALEYUV2RGB1(%%REGBP, %5) - WRITEBGR24(%%REGb, 8280(%5), %%REGBP) + WRITEBGR24(%%REGb, 8280(%5), %%REGBP, %6, %7, %8) "pop %%"REG_BP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) + "a" (&c->redDither), "m" (M24A), "m" (M24B), "m" (M24C) ); return; case PIX_FMT_BGR555: @@ -1553,16 +1557,17 @@ static inline void RENAME(yuv2packed1)(S YSCALEYUV2RGB1(%%REGBP, %5) /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "MANGLE(b5Dither)", %%mm2 \n\t" - "paddusb "MANGLE(g5Dither)", %%mm4 \n\t" - "paddusb "MANGLE(r5Dither)", %%mm5 \n\t" + "paddusb %6, %%mm2 \n\t" + "paddusb %7, %%mm4 \n\t" + "paddusb %8, %%mm5 \n\t" #endif - WRITEBGR15(%%REGb, 8280(%5), %%REGBP) + WRITEBGR15(%%REGb, 8280(%5), %%REGBP, %9) "pop %%"REG_BP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) + "a" (&c->redDither), "m" (b5Dither), "m" (g6Dither), "m" (r5Dither), + "m" (bF8) ); return; case PIX_FMT_BGR565: @@ -1573,17 +1578,18 @@ static inline void RENAME(yuv2packed1)(S YSCALEYUV2RGB1(%%REGBP, %5) /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "MANGLE(b5Dither)", %%mm2 \n\t" - "paddusb "MANGLE(g6Dither)", %%mm4 \n\t" - "paddusb "MANGLE(r5Dither)", %%mm5 \n\t" + "paddusb %6, %%mm2 \n\t" + "paddusb %7, %%mm4 \n\t" + "paddusb %8, %%mm5 \n\t" #endif - WRITEBGR16(%%REGb, 8280(%5), %%REGBP) + WRITEBGR16(%%REGb, 8280(%5), %%REGBP, %9, %10) "pop %%"REG_BP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) + "a" (&c->redDither), "m" (b5Dither), "m" (g6Dither), "m" (r5Dither), + "m" (bF8), "m" (bFC) ); return; case PIX_FMT_YUYV422: @@ -1626,12 +1632,12 @@ static inline void RENAME(yuv2packed1)(S "mov %4, %%"REG_b" \n\t" "push %%"REG_BP" \n\t" YSCALEYUV2RGB1b(%%REGBP, %5) - WRITEBGR24(%%REGb, 8280(%5), %%REGBP) + WRITEBGR24(%%REGb, 8280(%5), %%REGBP, %6, %7, %8) "pop %%"REG_BP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) + "a" (&c->redDither), "m" (M24A), "m" (M24B), "m" (M24C) ); return; case PIX_FMT_BGR555: @@ -1642,16 +1648,17 @@ static inline void RENAME(yuv2packed1)(S YSCALEYUV2RGB1b(%%REGBP, %5) /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "MANGLE(b5Dither)", %%mm2 \n\t" - "paddusb "MANGLE(g5Dither)", %%mm4 \n\t" - "paddusb "MANGLE(r5Dither)", %%mm5 \n\t" + "paddusb %6, %%mm2 \n\t" + "paddusb %7, %%mm4 \n\t" + "paddusb %8, %%mm5 \n\t" #endif - WRITEBGR15(%%REGb, 8280(%5), %%REGBP) + WRITEBGR15(%%REGb, 8280(%5), %%REGBP, %9) "pop %%"REG_BP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) + "a" (&c->redDither), "m" (b5Dither), "m" (g6Dither), "m" (r5Dither), + "m" (bF8) ); return; case PIX_FMT_BGR565: @@ -1662,17 +1669,18 @@ static inline void RENAME(yuv2packed1)(S YSCALEYUV2RGB1b(%%REGBP, %5) /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "MANGLE(b5Dither)", %%mm2 \n\t" - "paddusb "MANGLE(g6Dither)", %%mm4 \n\t" - "paddusb "MANGLE(r5Dither)", %%mm5 \n\t" + "paddusb %6, %%mm2 \n\t" + "paddusb %7, %%mm4 \n\t" + "paddusb %8, %%mm5 \n\t" #endif - WRITEBGR16(%%REGb, 8280(%5), %%REGBP) + WRITEBGR16(%%REGb, 8280(%5), %%REGBP, %9, %10) "pop %%"REG_BP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) + "a" (&c->redDither), "m" (b5Dither), "m" (g6Dither), "m" (r5Dither), + "m" (bF8), "m" (bFC) ); return; case PIX_FMT_YUYV422: @@ -1706,7 +1714,7 @@ static inline void RENAME(yuy2ToY)(uint8 { #ifdef HAVE_MMX asm volatile( - "movq "MANGLE(bm01010101)", %%mm2 \n\t" + "movq %3, %%mm2 \n\t" "mov %0, %%"REG_a" \n\t" "1: \n\t" "movq (%1, %%"REG_a",2), %%mm0 \n\t" @@ -1717,7 +1725,7 @@ static inline void RENAME(yuy2ToY)(uint8 "movq %%mm0, (%2, %%"REG_a") \n\t" "add $8, %%"REG_a" \n\t" " js 1b \n\t" - : : "g" (-width), "r" (src+width*2), "r" (dst+width) + : : "g" (-width), "r" (src+width*2), "r" (dst+width), "m" (bm01010101) : "%"REG_a ); #else @@ -1731,7 +1739,7 @@ static inline void RENAME(yuy2ToUV)(uint { #ifdef HAVE_MMX asm volatile( - "movq "MANGLE(bm01010101)", %%mm4 \n\t" + "movq %4, %%mm4 \n\t" "mov %0, %%"REG_a" \n\t" "1: \n\t" "movq (%1, %%"REG_a",4), %%mm0 \n\t" @@ -1748,7 +1756,7 @@ static inline void RENAME(yuy2ToUV)(uint "movd %%mm1, (%2, %%"REG_a") \n\t" "add $4, %%"REG_a" \n\t" " js 1b \n\t" - : : "g" (-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width) + : : "g" (-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width), "m" (bm01010101) : "%"REG_a ); #else @@ -1791,7 +1799,7 @@ static inline void RENAME(uyvyToUV)(uint { #ifdef HAVE_MMX asm volatile( - "movq "MANGLE(bm01010101)", %%mm4 \n\t" + "movq %4, %%mm4 \n\t" "mov %0, %%"REG_a" \n\t" "1: \n\t" "movq (%1, %%"REG_a",4), %%mm0 \n\t" @@ -1808,7 +1816,7 @@ static inline void RENAME(uyvyToUV)(uint "movd %%mm1, (%2, %%"REG_a") \n\t" "add $4, %%"REG_a" \n\t" " js 1b \n\t" - : : "g" (-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width) + : : "g" (-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width), "m" (bm01010101) : "%"REG_a ); #else @@ -1859,8 +1867,8 @@ static inline void RENAME(bgr24ToY)(uint #ifdef HAVE_MMX asm volatile( "mov %2, %%"REG_a" \n\t" - "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t" - "movq "MANGLE(w1111)", %%mm5 \n\t" + "movq %3, %%mm6 \n\t" + "movq %4, %%mm5 \n\t" "pxor %%mm7, %%mm7 \n\t" "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" ASMALIGN(4) @@ -1918,12 +1926,13 @@ static inline void RENAME(bgr24ToY)(uint "psraw $7, %%mm4 \n\t" "packuswb %%mm4, %%mm0 \n\t" - "paddusb "MANGLE(bgr2YOffset)", %%mm0 \n\t" + "paddusb %5, %%mm0 \n\t" "movq %%mm0, (%1, %%"REG_a") \n\t" "add $8, %%"REG_a" \n\t" " js 1b \n\t" - : : "r" (src+width*3), "r" (dst+width), "g" (-width) + : : "r" (src+width*3), "r" (dst+width), "g" (-width), + "m" (bgr2YCoeff), "m" (w1111), "m" (bgr2YOffset) : "%"REG_a, "%"REG_d ); #else @@ -1944,8 +1953,8 @@ static inline void RENAME(bgr24ToUV)(uin #ifdef HAVE_MMX asm volatile( "mov %3, %%"REG_a" \n\t" - "movq "MANGLE(w1111)", %%mm5 \n\t" - "movq "MANGLE(bgr2UCoeff)", %%mm6 \n\t" + "movq %4, %%mm5 \n\t" + "movq %5, %%mm6 \n\t" "pxor %%mm7, %%mm7 \n\t" "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" "add %%"REG_d", %%"REG_d" \n\t" @@ -1977,8 +1986,8 @@ static inline void RENAME(bgr24ToUV)(uin "psrlw $1, %%mm0 \n\t" "psrlw $1, %%mm2 \n\t" #endif - "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" - "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" + "movq %6, %%mm1 \n\t" + "movq %6, %%mm3 \n\t" "pmaddwd %%mm0, %%mm1 \n\t" "pmaddwd %%mm2, %%mm3 \n\t" @@ -2019,12 +2028,12 @@ static inline void RENAME(bgr24ToUV)(uin "punpcklbw %%mm7, %%mm5 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" "paddw %%mm5, %%mm2 \n\t" - "movq "MANGLE(w1111)", %%mm5 \n\t" + "movq %4, %%mm5 \n\t" "psrlw $2, %%mm4 \n\t" "psrlw $2, %%mm2 \n\t" #endif - "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" - "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" + "movq %6, %%mm1 \n\t" + "movq %6, %%mm3 \n\t" "pmaddwd %%mm4, %%mm1 \n\t" "pmaddwd %%mm2, %%mm3 \n\t" @@ -2048,14 +2057,15 @@ static inline void RENAME(bgr24ToUV)(uin "punpckldq %%mm4, %%mm0 \n\t" "punpckhdq %%mm4, %%mm1 \n\t" "packsswb %%mm1, %%mm0 \n\t" - "paddb "MANGLE(bgr2UVOffset)", %%mm0 \n\t" + "paddb %7, %%mm0 \n\t" "movd %%mm0, (%1, %%"REG_a") \n\t" "punpckhdq %%mm0, %%mm0 \n\t" "movd %%mm0, (%2, %%"REG_a") \n\t" "add $4, %%"REG_a" \n\t" " js 1b \n\t" - : : "r" (src1+width*6), "r" (dstU+width), "r" (dstV+width), "g" (-width) + : : "r" (src1+width*6), "r" (dstU+width), "r" (dstV+width), "g" (-width), + "m" (w1111), "m" (bgr2UCoeff), "m" (bgr2VCoeff), "m" (bgr2UVOffset) : "%"REG_a, "%"REG_d ); #else @@ -2313,7 +2323,7 @@ static inline void RENAME(hScale)(int16_ "push %%"REG_b" \n\t" #endif "pxor %%mm7, %%mm7 \n\t" - "movq "MANGLE(w02)", %%mm6 \n\t" + "movq %5, %%mm6 \n\t" "push %%"REG_BP" \n\t" // we use 7 regs here ... "mov %%"REG_a", %%"REG_BP" \n\t" ASMALIGN(4) @@ -2342,7 +2352,7 @@ static inline void RENAME(hScale)(int16_ "pop %%"REG_b" \n\t" #endif : "+a" (counter) - : "c" (filter), "d" (filterPos), "S" (src), "D" (dst) + : "c" (filter), "d" (filterPos), "S" (src), "D" (dst), "m" (w02) #if !defined(PIC) : "%"REG_b #endif @@ -2359,7 +2369,7 @@ static inline void RENAME(hScale)(int16_ "push %%"REG_b" \n\t" #endif "pxor %%mm7, %%mm7 \n\t" - "movq "MANGLE(w02)", %%mm6 \n\t" + "movq %5, %%mm6 \n\t" "push %%"REG_BP" \n\t" // we use 7 regs here ... "mov %%"REG_a", %%"REG_BP" \n\t" ASMALIGN(4) @@ -2400,7 +2410,7 @@ static inline void RENAME(hScale)(int16_ "pop %%"REG_b" \n\t" #endif : "+a" (counter) - : "c" (filter), "d" (filterPos), "S" (src), "D" (dst) + : "c" (filter), "d" (filterPos), "S" (src), "D" (dst), "m" (w02) #if !defined(PIC) : "%"REG_b #endif @@ -2415,7 +2425,7 @@ static inline void RENAME(hScale)(int16_ dst-= counter/2; asm volatile( "pxor %%mm7, %%mm7 \n\t" - "movq "MANGLE(w02)", %%mm6 \n\t" + "movq %7, %%mm6 \n\t" ASMALIGN(4) "1: \n\t" "mov %2, %%"REG_c" \n\t" @@ -2452,7 +2462,7 @@ static inline void RENAME(hScale)(int16_ : "+r" (counter), "+r" (filter) : "m" (filterPos), "m" (dst), "m"(offset), - "m" (src), "r" (filterSize*2) + "m" (src), "r" (filterSize*2), "m"(w02) : "%"REG_a, "%"REG_c, "%"REG_d ); } diff -Nurp ffmpeg-old/libswscale/yuv2rgb_template.c ffmpeg/libswscale/yuv2rgb_template.c --- ffmpeg-old/libswscale/yuv2rgb_template.c 2007-10-06 02:20:55.000000000 +0100 +++ ffmpeg/libswscale/yuv2rgb_template.c 2007-10-06 02:17:56.000000000 +0100 @@ -46,7 +46,7 @@ #define SFENCE "/nop" #endif -#define YUV2RGB \ +#define YUV2RGB(mmx_00ffw) \ /* Do the multiply part of the conversion for even and odd pixels, register usage: mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels, @@ -75,7 +75,7 @@ \ /* convert the luma part */\ "movq %%mm6, %%mm7;" /* Copy 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */\ - "pand "MANGLE(mmx_00ffw)", %%mm6;" /* get Y even 00 Y6 00 Y4 00 Y2 00 Y0 */\ + "pand "#mmx_00ffw", %%mm6;" /* get Y even 00 Y6 00 Y4 00 Y2 00 Y0 */\ \ "psrlw $8, %%mm7;" /* get Y odd 00 Y7 00 Y5 00 Y3 00 Y1 */\ \ @@ -163,17 +163,17 @@ static inline int RENAME(yuv420_rgb16)(S PREFETCH" 64(%1) \n\t" PREFETCH" 64(%2) \n\t" */ -YUV2RGB +YUV2RGB(%6) #ifdef DITHER1XBPP - "paddusb "MANGLE(b5Dither)", %%mm0;" - "paddusb "MANGLE(g6Dither)", %%mm2;" - "paddusb "MANGLE(r5Dither)", %%mm1;" + "paddusb %7, %%mm0;" + "paddusb %8, %%mm2;" + "paddusb %9, %%mm1;" #endif /* mask unneeded bits off */ - "pand "MANGLE(mmx_redmask)", %%mm0;" /* b7b6b5b4 b3_0_0_0 b7b6b5b4 b3_0_0_0 */ - "pand "MANGLE(mmx_grnmask)", %%mm2;" /* g7g6g5g4 g3g2_0_0 g7g6g5g4 g3g2_0_0 */ - "pand "MANGLE(mmx_redmask)", %%mm1;" /* r7r6r5r4 r3_0_0_0 r7r6r5r4 r3_0_0_0 */ + "pand %10, %%mm0;" /* b7b6b5b4 b3_0_0_0 b7b6b5b4 b3_0_0_0 */ + "pand %11, %%mm2;" /* g7g6g5g4 g3g2_0_0 g7g6g5g4 g3g2_0_0 */ + "pand %10, %%mm1;" /* r7r6r5r4 r3_0_0_0 r7r6r5r4 r3_0_0_0 */ "psrlw $3, %%mm0;" /* 0_0_0_b7 b6b5b4b3 0_0_0_b7 b6b5b4b3 */ "pxor %%mm4, %%mm4;" /* zero mm4 */ @@ -208,7 +208,9 @@ YUV2RGB " js 1b \n\t" : "+r" (index), "+r" (_image) - : "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index) + : "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index), + "m" (mmx_00ffw), "m" (b5Dither), "m" (g6Dither), "m" (r5Dither), + "m" (mmx_redmask), "m" (mmx_grnmask) ); } @@ -252,18 +254,18 @@ static inline int RENAME(yuv420_rgb15)(S "movq (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ //".balign 16 \n\t" "1: \n\t" -YUV2RGB +YUV2RGB(%6) #ifdef DITHER1XBPP - "paddusb "MANGLE(b5Dither)", %%mm0 \n\t" - "paddusb "MANGLE(g5Dither)", %%mm2 \n\t" - "paddusb "MANGLE(r5Dither)", %%mm1 \n\t" + "paddusb %7, %%mm0 \n\t" + "paddusb %8, %%mm2 \n\t" + "paddusb %9, %%mm1 \n\t" #endif /* mask unneeded bits off */ - "pand "MANGLE(mmx_redmask)", %%mm0;" /* b7b6b5b4 b3_0_0_0 b7b6b5b4 b3_0_0_0 */ - "pand "MANGLE(mmx_redmask)", %%mm2;" /* g7g6g5g4 g3_0_0_0 g7g6g5g4 g3_0_0_0 */ - "pand "MANGLE(mmx_redmask)", %%mm1;" /* r7r6r5r4 r3_0_0_0 r7r6r5r4 r3_0_0_0 */ + "pand %10, %%mm0;" /* b7b6b5b4 b3_0_0_0 b7b6b5b4 b3_0_0_0 */ + "pand %10, %%mm2;" /* g7g6g5g4 g3_0_0_0 g7g6g5g4 g3_0_0_0 */ + "pand %10, %%mm1;" /* r7r6r5r4 r3_0_0_0 r7r6r5r4 r3_0_0_0 */ "psrlw $3, %%mm0;" /* 0_0_0_b7 b6b5b4b3 0_0_0_b7 b6b5b4b3 */ "psrlw $1, %%mm1;" /* 0_r7r6r5 r4r3_0_0 0_r7r6r5 r4r3_0_0 */ @@ -298,7 +300,9 @@ YUV2RGB "add $4, %0 \n\t" " js 1b \n\t" : "+r" (index), "+r" (_image) - : "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index) + : "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index), + "m" (mmx_00ffw), "m" (b5Dither), "m" (g5Dither), "m" (r5Dither), + "m" (mmx_redmask) ); } @@ -336,11 +340,11 @@ static inline int RENAME(yuv420_rgb24)(S "movq (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ //".balign 16 \n\t" "1: \n\t" -YUV2RGB +YUV2RGB(%6) /* mm0=B, %%mm2=G, %%mm1=R */ #ifdef HAVE_MMX2 - "movq "MANGLE(M24A)", %%mm4 \n\t" - "movq "MANGLE(M24C)", %%mm7 \n\t" + "movq %7, %%mm4 \n\t" + "movq %9, %%mm7 \n\t" "pshufw $0x50, %%mm0, %%mm5 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */ "pshufw $0x50, %%mm2, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */ "pshufw $0x00, %%mm1, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */ @@ -359,7 +363,7 @@ YUV2RGB "pshufw $0x55, %%mm2, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */ "pshufw $0xA5, %%mm1, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */ - "pand "MANGLE(M24B)", %%mm5 \n\t" /* B5 B4 B3 */ + "pand %8, %%mm5 \n\t" /* B5 B4 B3 */ "pand %%mm7, %%mm3 \n\t" /* G4 G3 */ "pand %%mm4, %%mm6 \n\t" /* R4 R3 R2 */ @@ -374,7 +378,7 @@ YUV2RGB "pand %%mm7, %%mm5 \n\t" /* B7 B6 */ "pand %%mm4, %%mm3 \n\t" /* G7 G6 G5 */ - "pand "MANGLE(M24B)", %%mm6 \n\t" /* R7 R6 R5 */ + "pand %8, %%mm6 \n\t" /* R7 R6 R5 */ "movd 4 (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */ \ "por %%mm5, %%mm3 \n\t" @@ -444,7 +448,8 @@ YUV2RGB " js 1b \n\t" : "+r" (index), "+r" (_image) - : "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index) + : "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index), + "m" (mmx_00ffw), "m" (M24A), "m" (M24B), "m" (M24C) ); } @@ -482,7 +487,7 @@ static inline int RENAME(yuv420_rgb32)(S "movq (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ //".balign 16 \n\t" "1: \n\t" -YUV2RGB +YUV2RGB(%6) /* convert RGB plane to RGB packed format, mm0 -> B, mm1 -> R, mm2 -> G, mm3 -> 0, mm4 -> GB, mm5 -> AR pixel 4-7, @@ -530,7 +535,7 @@ YUV2RGB " js 1b \n\t" : "+r" (index), "+r" (_image) - : "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index) + : "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index), "m" (mmx_00ffw) ); }