Gentoo Websites Logo
Go to: Gentoo Home Documentation Forums Lists Bugs Planet Store Wiki Get Gentoo!
View | Details | Raw Unified | Return to bug 115568 | Differences between
and this patch

Collapse All | Expand All

(-)ffmpeg-old/libavcodec/i386/dsputil_mmx.c (-63 / +62 lines)
Lines 657-671 static inline void transpose4x4(uint8_t Link Here
657
        "punpckhwd %%mm2, %%mm1         \n\t"
657
        "punpckhwd %%mm2, %%mm1         \n\t"
658
        "movd  %%mm0, %0                \n\t"
658
        "movd  %%mm0, %0                \n\t"
659
        "punpckhdq %%mm0, %%mm0         \n\t"
659
        "punpckhdq %%mm0, %%mm0         \n\t"
660
        "movd  %%mm0, %1                \n\t"
660
        "movd  %%mm0, (%0,%1)           \n\t"
661
        "movd  %%mm1, %2                \n\t"
661
        "movd  %%mm1, (%0,%1,2)         \n\t"
662
        "punpckhdq %%mm1, %%mm1         \n\t"
662
        "punpckhdq %%mm1, %%mm1         \n\t"
663
        "movd  %%mm1, %3                \n\t"
663
        "lea (%1,%1,2), %1              \n\t"
664
        "movd  %%mm1, (%0,%1)           \n\t"
664
665
665
        : "=m" (*(uint32_t*)(dst + 0*dst_stride)),
666
        : "=r" (*(uint32_t*)(dst)), "+r" (dst_stride)
666
          "=m" (*(uint32_t*)(dst + 1*dst_stride)),
667
        :: "memory"
667
          "=m" (*(uint32_t*)(dst + 2*dst_stride)),
668
          "=m" (*(uint32_t*)(dst + 3*dst_stride))
669
    );
668
    );
670
}
669
}
671
670
Lines 1745-1751 WARPER8_16_SQ(hadamard8_diff_mmx2, hadam Link Here
1745
1744
1746
#define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\
1745
#define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\
1747
        "paddw " #m4 ", " #m3 "           \n\t" /* x1 */\
1746
        "paddw " #m4 ", " #m3 "           \n\t" /* x1 */\
1748
        "movq "MANGLE(ff_pw_20)", %%mm4   \n\t" /* 20 */\
1747
        "movq %5, %%mm4                   \n\t" /* 20 */\
1749
        "pmullw " #m3 ", %%mm4            \n\t" /* 20x1 */\
1748
        "pmullw " #m3 ", %%mm4            \n\t" /* 20x1 */\
1750
        "movq "#in7", " #m3 "             \n\t" /* d */\
1749
        "movq "#in7", " #m3 "             \n\t" /* d */\
1751
        "movq "#in0", %%mm5               \n\t" /* D */\
1750
        "movq "#in0", %%mm5               \n\t" /* D */\
Lines 1757-1763 WARPER8_16_SQ(hadamard8_diff_mmx2, hadam Link Here
1757
        "paddw " #m5 ", %%mm6             \n\t" /* x2 */\
1756
        "paddw " #m5 ", %%mm6             \n\t" /* x2 */\
1758
        "paddw %%mm6, %%mm6               \n\t" /* 2x2 */\
1757
        "paddw %%mm6, %%mm6               \n\t" /* 2x2 */\
1759
        "psubw %%mm6, %%mm5               \n\t" /* -2x2 + x3 */\
1758
        "psubw %%mm6, %%mm5               \n\t" /* -2x2 + x3 */\
1760
        "pmullw "MANGLE(ff_pw_3)", %%mm5  \n\t" /* -6x2 + 3x3 */\
1759
        "pmullw %6, %%mm5                 \n\t" /* -6x2 + 3x3 */\
1761
        "paddw " #rnd ", %%mm4            \n\t" /* x2 */\
1760
        "paddw " #rnd ", %%mm4            \n\t" /* x2 */\
1762
        "paddw %%mm4, %%mm5               \n\t" /* 20x1 - 6x2 + 3x3 - x4 */\
1761
        "paddw %%mm4, %%mm5               \n\t" /* 20x1 - 6x2 + 3x3 - x4 */\
1763
        "psraw $5, %%mm5                  \n\t"\
1762
        "psraw $5, %%mm5                  \n\t"\
Lines 1791-1805 static void OPNAME ## mpeg4_qpel16_h_low Link Here
1791
        "paddw %%mm5, %%mm5               \n\t" /* 2b */\
1790
        "paddw %%mm5, %%mm5               \n\t" /* 2b */\
1792
        "psubw %%mm5, %%mm6               \n\t" /* c - 2b */\
1791
        "psubw %%mm5, %%mm6               \n\t" /* c - 2b */\
1793
        "pshufw $0x06, %%mm0, %%mm5       \n\t" /* 0C0B0A0A */\
1792
        "pshufw $0x06, %%mm0, %%mm5       \n\t" /* 0C0B0A0A */\
1794
        "pmullw "MANGLE(ff_pw_3)", %%mm6  \n\t" /* 3c - 6b */\
1793
        "pmullw %6, %%mm6                 \n\t" /* 3c - 6b */\
1795
        "paddw %%mm4, %%mm0               \n\t" /* a */\
1794
        "paddw %%mm4, %%mm0               \n\t" /* a */\
1796
        "paddw %%mm1, %%mm5               \n\t" /* d */\
1795
        "paddw %%mm1, %%mm5               \n\t" /* d */\
1797
        "pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\
1796
        "pmullw %5, %%mm0                 \n\t" /* 20a */\
1798
        "psubw %%mm5, %%mm0               \n\t" /* 20a - d */\
1797
        "psubw %%mm5, %%mm0               \n\t" /* 20a - d */\
1799
        "paddw %6, %%mm6                  \n\t"\
1798
        "paddw %8, %%mm6                  \n\t"\
1800
        "paddw %%mm6, %%mm0               \n\t" /* 20a - 6b + 3c - d */\
1799
        "paddw %%mm6, %%mm0               \n\t" /* 20a - 6b + 3c - d */\
1801
        "psraw $5, %%mm0                  \n\t"\
1800
        "psraw $5, %%mm0                  \n\t"\
1802
        "movq %%mm0, %5                   \n\t"\
1801
        "movq %%mm0, %7                   \n\t"\
1803
        /* mm1=EFGH, mm2=DEFG, mm3=CDEF, mm4=BCDE, mm7=0 */\
1802
        /* mm1=EFGH, mm2=DEFG, mm3=CDEF, mm4=BCDE, mm7=0 */\
1804
        \
1803
        \
1805
        "movq 5(%0), %%mm0                \n\t" /* FGHIJKLM */\
1804
        "movq 5(%0), %%mm0                \n\t" /* FGHIJKLM */\
Lines 1817-1831 static void OPNAME ## mpeg4_qpel16_h_low Link Here
1817
        "psrlq $24, %%mm6                 \n\t" /* IJKLM000 */\
1816
        "psrlq $24, %%mm6                 \n\t" /* IJKLM000 */\
1818
        "punpcklbw %%mm7, %%mm2           \n\t" /* 0F0G0H0I */\
1817
        "punpcklbw %%mm7, %%mm2           \n\t" /* 0F0G0H0I */\
1819
        "punpcklbw %%mm7, %%mm6           \n\t" /* 0I0J0K0L */\
1818
        "punpcklbw %%mm7, %%mm6           \n\t" /* 0I0J0K0L */\
1820
        "pmullw "MANGLE(ff_pw_3)", %%mm3  \n\t" /* 3c - 6b */\
1819
        "pmullw %6, %%mm3                 \n\t" /* 3c - 6b */\
1821
        "paddw %%mm2, %%mm1               \n\t" /* a */\
1820
        "paddw %%mm2, %%mm1               \n\t" /* a */\
1822
        "paddw %%mm6, %%mm4               \n\t" /* d */\
1821
        "paddw %%mm6, %%mm4               \n\t" /* d */\
1823
        "pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\
1822
        "pmullw %5, %%mm1                 \n\t" /* 20a */\
1824
        "psubw %%mm4, %%mm3               \n\t" /* - 6b +3c - d */\
1823
        "psubw %%mm4, %%mm3               \n\t" /* - 6b +3c - d */\
1825
        "paddw %6, %%mm1                  \n\t"\
1824
        "paddw %8, %%mm1                  \n\t"\
1826
        "paddw %%mm1, %%mm3               \n\t" /* 20a - 6b +3c - d */\
1825
        "paddw %%mm1, %%mm3               \n\t" /* 20a - 6b +3c - d */\
1827
        "psraw $5, %%mm3                  \n\t"\
1826
        "psraw $5, %%mm3                  \n\t"\
1828
        "movq %5, %%mm1                   \n\t"\
1827
        "movq %7, %%mm1                   \n\t"\
1829
        "packuswb %%mm3, %%mm1            \n\t"\
1828
        "packuswb %%mm3, %%mm1            \n\t"\
1830
        OP_MMX2(%%mm1, (%1),%%mm4, q)\
1829
        OP_MMX2(%%mm1, (%1),%%mm4, q)\
1831
        /* mm0= GHIJ, mm2=FGHI, mm5=HIJK, mm6=IJKL, mm7=0 */\
1830
        /* mm0= GHIJ, mm2=FGHI, mm5=HIJK, mm6=IJKL, mm7=0 */\
Lines 1843-1849 static void OPNAME ## mpeg4_qpel16_h_low Link Here
1843
        "psubw %%mm5, %%mm0               \n\t" /* c - 2b */\
1842
        "psubw %%mm5, %%mm0               \n\t" /* c - 2b */\
1844
        "movq %%mm3, %%mm5                \n\t" /* JKLMNOPQ */\
1843
        "movq %%mm3, %%mm5                \n\t" /* JKLMNOPQ */\
1845
        "psrlq $24, %%mm3                 \n\t" /* MNOPQ000 */\
1844
        "psrlq $24, %%mm3                 \n\t" /* MNOPQ000 */\
1846
        "pmullw "MANGLE(ff_pw_3)", %%mm0  \n\t" /* 3c - 6b */\
1845
        "pmullw %6, %%mm0                 \n\t" /* 3c - 6b */\
1847
        "punpcklbw %%mm7, %%mm3           \n\t" /* 0M0N0O0P */\
1846
        "punpcklbw %%mm7, %%mm3           \n\t" /* 0M0N0O0P */\
1848
        "paddw %%mm3, %%mm2               \n\t" /* d */\
1847
        "paddw %%mm3, %%mm2               \n\t" /* d */\
1849
        "psubw %%mm2, %%mm0               \n\t" /* -6b + 3c - d */\
1848
        "psubw %%mm2, %%mm0               \n\t" /* -6b + 3c - d */\
Lines 1851-1858 static void OPNAME ## mpeg4_qpel16_h_low Link Here
1851
        "punpcklbw %%mm7, %%mm2           \n\t" /* 0J0K0L0M */\
1850
        "punpcklbw %%mm7, %%mm2           \n\t" /* 0J0K0L0M */\
1852
        "punpckhbw %%mm7, %%mm5           \n\t" /* 0N0O0P0Q */\
1851
        "punpckhbw %%mm7, %%mm5           \n\t" /* 0N0O0P0Q */\
1853
        "paddw %%mm2, %%mm6               \n\t" /* a */\
1852
        "paddw %%mm2, %%mm6               \n\t" /* a */\
1854
        "pmullw "MANGLE(ff_pw_20)", %%mm6 \n\t" /* 20a */\
1853
        "pmullw %5, %%mm6                 \n\t" /* 20a */\
1855
        "paddw %6, %%mm0                  \n\t"\
1854
        "paddw %8, %%mm0                  \n\t"\
1856
        "paddw %%mm6, %%mm0               \n\t" /* 20a - 6b + 3c - d */\
1855
        "paddw %%mm6, %%mm0               \n\t" /* 20a - 6b + 3c - d */\
1857
        "psraw $5, %%mm0                  \n\t"\
1856
        "psraw $5, %%mm0                  \n\t"\
1858
        /* mm1=KLMN, mm2=JKLM, mm3=MNOP, mm4=LMNO, mm5=NOPQ mm7=0 */\
1857
        /* mm1=KLMN, mm2=JKLM, mm3=MNOP, mm4=LMNO, mm5=NOPQ mm7=0 */\
Lines 1866-1875 static void OPNAME ## mpeg4_qpel16_h_low Link Here
1866
        "paddw %%mm2, %%mm5               \n\t" /* d */\
1865
        "paddw %%mm2, %%mm5               \n\t" /* d */\
1867
        "paddw %%mm6, %%mm6               \n\t" /* 2b */\
1866
        "paddw %%mm6, %%mm6               \n\t" /* 2b */\
1868
        "psubw %%mm6, %%mm4               \n\t" /* c - 2b */\
1867
        "psubw %%mm6, %%mm4               \n\t" /* c - 2b */\
1869
        "pmullw "MANGLE(ff_pw_20)", %%mm3 \n\t" /* 20a */\
1868
        "pmullw %5, %%mm3                 \n\t" /* 20a */\
1870
        "pmullw "MANGLE(ff_pw_3)", %%mm4  \n\t" /* 3c - 6b */\
1869
        "pmullw %6, %%mm4                 \n\t" /* 3c - 6b */\
1871
        "psubw %%mm5, %%mm3               \n\t" /* -6b + 3c - d */\
1870
        "psubw %%mm5, %%mm3               \n\t" /* -6b + 3c - d */\
1872
        "paddw %6, %%mm4                  \n\t"\
1871
        "paddw %8, %%mm4                  \n\t"\
1873
        "paddw %%mm3, %%mm4               \n\t" /* 20a - 6b + 3c - d */\
1872
        "paddw %%mm3, %%mm4               \n\t" /* 20a - 6b + 3c - d */\
1874
        "psraw $5, %%mm4                  \n\t"\
1873
        "psraw $5, %%mm4                  \n\t"\
1875
        "packuswb %%mm4, %%mm0            \n\t"\
1874
        "packuswb %%mm4, %%mm0            \n\t"\
Lines 1879-1886 static void OPNAME ## mpeg4_qpel16_h_low Link Here
1879
        "add %4, %1                       \n\t"\
1878
        "add %4, %1                       \n\t"\
1880
        "decl %2                          \n\t"\
1879
        "decl %2                          \n\t"\
1881
        " jnz 1b                          \n\t"\
1880
        " jnz 1b                          \n\t"\
1882
        : "+a"(src), "+c"(dst), "+m"(h)\
1881
        : : "a"(src), "c"(dst), "m"(h), \
1883
        : "d"((long)srcStride), "S"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\
1882
         "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_20), "m"(ff_pw_3), "m"(temp), "m"(ROUNDER)\
1884
        : "memory"\
1883
        : "memory"\
1885
    );\
1884
    );\
1886
}\
1885
}\
Lines 1958-1969 static void OPNAME ## mpeg4_qpel8_h_lowp Link Here
1958
        "paddw %%mm5, %%mm5               \n\t" /* 2b */\
1957
        "paddw %%mm5, %%mm5               \n\t" /* 2b */\
1959
        "psubw %%mm5, %%mm6               \n\t" /* c - 2b */\
1958
        "psubw %%mm5, %%mm6               \n\t" /* c - 2b */\
1960
        "pshufw $0x06, %%mm0, %%mm5       \n\t" /* 0C0B0A0A */\
1959
        "pshufw $0x06, %%mm0, %%mm5       \n\t" /* 0C0B0A0A */\
1961
        "pmullw "MANGLE(ff_pw_3)", %%mm6  \n\t" /* 3c - 6b */\
1960
        "pmullw %6, %%mm6                 \n\t" /* 3c - 6b */\
1962
        "paddw %%mm4, %%mm0               \n\t" /* a */\
1961
        "paddw %%mm4, %%mm0               \n\t" /* a */\
1963
        "paddw %%mm1, %%mm5               \n\t" /* d */\
1962
        "paddw %%mm1, %%mm5               \n\t" /* d */\
1964
        "pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\
1963
        "pmullw %5, %%mm0                 \n\t" /* 20a */\
1965
        "psubw %%mm5, %%mm0               \n\t" /* 20a - d */\
1964
        "psubw %%mm5, %%mm0               \n\t" /* 20a - d */\
1966
        "paddw %6, %%mm6                  \n\t"\
1965
        "paddw %8, %%mm6                  \n\t"\
1967
        "paddw %%mm6, %%mm0               \n\t" /* 20a - 6b + 3c - d */\
1966
        "paddw %%mm6, %%mm0               \n\t" /* 20a - 6b + 3c - d */\
1968
        "psraw $5, %%mm0                  \n\t"\
1967
        "psraw $5, %%mm0                  \n\t"\
1969
        /* mm1=EFGH, mm2=DEFG, mm3=CDEF, mm4=BCDE, mm7=0 */\
1968
        /* mm1=EFGH, mm2=DEFG, mm3=CDEF, mm4=BCDE, mm7=0 */\
Lines 1979-1988 static void OPNAME ## mpeg4_qpel8_h_lowp Link Here
1979
        "paddw %%mm5, %%mm4               \n\t" /* d */\
1978
        "paddw %%mm5, %%mm4               \n\t" /* d */\
1980
        "paddw %%mm2, %%mm2               \n\t" /* 2b */\
1979
        "paddw %%mm2, %%mm2               \n\t" /* 2b */\
1981
        "psubw %%mm2, %%mm3               \n\t" /* c - 2b */\
1980
        "psubw %%mm2, %%mm3               \n\t" /* c - 2b */\
1982
        "pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\
1981
        "pmullw %5, %%mm1                 \n\t" /* 20a */\
1983
        "pmullw "MANGLE(ff_pw_3)", %%mm3  \n\t" /* 3c - 6b */\
1982
        "pmullw %6, %%mm3                 \n\t" /* 3c - 6b */\
1984
        "psubw %%mm4, %%mm3               \n\t" /* -6b + 3c - d */\
1983
        "psubw %%mm4, %%mm3               \n\t" /* -6b + 3c - d */\
1985
        "paddw %6, %%mm1                  \n\t"\
1984
        "paddw %8, %%mm1                  \n\t"\
1986
        "paddw %%mm1, %%mm3               \n\t" /* 20a - 6b + 3c - d */\
1985
        "paddw %%mm1, %%mm3               \n\t" /* 20a - 6b + 3c - d */\
1987
        "psraw $5, %%mm3                  \n\t"\
1986
        "psraw $5, %%mm3                  \n\t"\
1988
        "packuswb %%mm3, %%mm0            \n\t"\
1987
        "packuswb %%mm3, %%mm0            \n\t"\
Lines 1992-1999 static void OPNAME ## mpeg4_qpel8_h_lowp Link Here
1992
        "add %4, %1                       \n\t"\
1991
        "add %4, %1                       \n\t"\
1993
        "decl %2                          \n\t"\
1992
        "decl %2                          \n\t"\
1994
        " jnz 1b                          \n\t"\
1993
        " jnz 1b                          \n\t"\
1995
        : "+a"(src), "+c"(dst), "+m"(h)\
1994
        : : "a"(src), "c"(dst), "m"(h), \
1996
        : "S"((long)srcStride), "D"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\
1995
         "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_20), "m"(ff_pw_3), "m"(temp), "m"(ROUNDER)\
1997
        : "memory"\
1996
        : "memory"\
1998
    );\
1997
    );\
1999
}\
1998
}\
Lines 2072-2110 static void OPNAME ## mpeg4_qpel16_v_low Link Here
2072
        "movq 8(%0), %%mm1              \n\t"\
2071
        "movq 8(%0), %%mm1              \n\t"\
2073
        "movq 16(%0), %%mm2             \n\t"\
2072
        "movq 16(%0), %%mm2             \n\t"\
2074
        "movq 24(%0), %%mm3             \n\t"\
2073
        "movq 24(%0), %%mm3             \n\t"\
2075
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0),  8(%0),   (%0), 32(%0), (%1), OP)\
2074
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %7, 16(%0),  8(%0),   (%0), 32(%0), (%1), OP)\
2076
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5,  8(%0),   (%0),   (%0), 40(%0), (%1, %3), OP)\
2075
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %7,  8(%0),   (%0),   (%0), 40(%0), (%1, %3), OP)\
2077
        "add %4, %1                     \n\t"\
2076
        "add %4, %1                     \n\t"\
2078
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5,   (%0),   (%0),  8(%0), 48(%0), (%1), OP)\
2077
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %7,   (%0),   (%0),  8(%0), 48(%0), (%1), OP)\
2079
        \
2078
        \
2080
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5,   (%0),  8(%0), 16(%0), 56(%0), (%1, %3), OP)\
2079
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %7,   (%0),  8(%0), 16(%0), 56(%0), (%1, %3), OP)\
2081
        "add %4, %1                     \n\t"\
2080
        "add %4, %1                     \n\t"\
2082
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5,  8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
2081
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %7,  8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
2083
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 72(%0), (%1, %3), OP)\
2082
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %7, 16(%0), 24(%0), 32(%0), 72(%0), (%1, %3), OP)\
2084
        "add %4, %1                     \n\t"\
2083
        "add %4, %1                     \n\t"\
2085
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 80(%0), (%1), OP)\
2084
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %7, 24(%0), 32(%0), 40(%0), 80(%0), (%1), OP)\
2086
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 88(%0), (%1, %3), OP)\
2085
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %7, 32(%0), 40(%0), 48(%0), 88(%0), (%1, %3), OP)\
2087
        "add %4, %1                     \n\t"\
2086
        "add %4, %1                     \n\t"\
2088
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 40(%0), 48(%0), 56(%0), 96(%0), (%1), OP)\
2087
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %7, 40(%0), 48(%0), 56(%0), 96(%0), (%1), OP)\
2089
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 48(%0), 56(%0), 64(%0),104(%0), (%1, %3), OP)\
2088
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %7, 48(%0), 56(%0), 64(%0),104(%0), (%1, %3), OP)\
2090
        "add %4, %1                     \n\t"\
2089
        "add %4, %1                     \n\t"\
2091
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 56(%0), 64(%0), 72(%0),112(%0), (%1), OP)\
2090
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %7, 56(%0), 64(%0), 72(%0),112(%0), (%1), OP)\
2092
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 64(%0), 72(%0), 80(%0),120(%0), (%1, %3), OP)\
2091
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %7, 64(%0), 72(%0), 80(%0),120(%0), (%1, %3), OP)\
2093
        "add %4, %1                     \n\t"\
2092
        "add %4, %1                     \n\t"\
2094
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 72(%0), 80(%0), 88(%0),128(%0), (%1), OP)\
2093
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %7, 72(%0), 80(%0), 88(%0),128(%0), (%1), OP)\
2095
        \
2094
        \
2096
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 80(%0), 88(%0), 96(%0),128(%0), (%1, %3), OP)\
2095
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %7, 80(%0), 88(%0), 96(%0),128(%0), (%1, %3), OP)\
2097
        "add %4, %1                     \n\t"  \
2096
        "add %4, %1                     \n\t"  \
2098
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 88(%0), 96(%0),104(%0),120(%0), (%1), OP)\
2097
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %7, 88(%0), 96(%0),104(%0),120(%0), (%1), OP)\
2099
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 96(%0),104(%0),112(%0),112(%0), (%1, %3), OP)\
2098
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %7, 96(%0),104(%0),112(%0),112(%0), (%1, %3), OP)\
2100
        \
2099
        \
2101
        "add $136, %0                   \n\t"\
2100
        "add $136, %0                   \n\t"\
2102
        "add %6, %1                     \n\t"\
2101
        "add %8, %1                     \n\t"\
2103
        "decl %2                        \n\t"\
2102
        "decl %2                        \n\t"\
2104
        " jnz 1b                        \n\t"\
2103
        " jnz 1b                        \n\t"\
2105
        \
2104
        \
2106
        : "+r"(temp_ptr), "+r"(dst), "+g"(count)\
2105
        : : "r"(temp_ptr), "r"(dst), "g"(count), \
2107
        : "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-14*(long)dstStride)\
2106
         "r"((long)dstStride), "r"(2*(long)dstStride), "m"(ff_pw_20), "m"(ff_pw_3), "m"(ROUNDER), "g"(4-14*(long)dstStride)\
2108
        :"memory"\
2107
        :"memory"\
2109
    );\
2108
    );\
2110
}\
2109
}\
Lines 2144-2170 static void OPNAME ## mpeg4_qpel8_v_lowp Link Here
2144
        "movq 8(%0), %%mm1              \n\t"\
2143
        "movq 8(%0), %%mm1              \n\t"\
2145
        "movq 16(%0), %%mm2             \n\t"\
2144
        "movq 16(%0), %%mm2             \n\t"\
2146
        "movq 24(%0), %%mm3             \n\t"\
2145
        "movq 24(%0), %%mm3             \n\t"\
2147
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0),  8(%0),   (%0), 32(%0), (%1), OP)\
2146
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %7, 16(%0),  8(%0),   (%0), 32(%0), (%1), OP)\
2148
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5,  8(%0),   (%0),   (%0), 40(%0), (%1, %3), OP)\
2147
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %7,  8(%0),   (%0),   (%0), 40(%0), (%1, %3), OP)\
2149
        "add %4, %1                     \n\t"\
2148
        "add %4, %1                     \n\t"\
2150
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5,   (%0),   (%0),  8(%0), 48(%0), (%1), OP)\
2149
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %7,   (%0),   (%0),  8(%0), 48(%0), (%1), OP)\
2151
        \
2150
        \
2152
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5,   (%0),  8(%0), 16(%0), 56(%0), (%1, %3), OP)\
2151
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %7,   (%0),  8(%0), 16(%0), 56(%0), (%1, %3), OP)\
2153
        "add %4, %1                     \n\t"\
2152
        "add %4, %1                     \n\t"\
2154
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5,  8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
2153
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %7,  8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
2155
        \
2154
        \
2156
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 64(%0), (%1, %3), OP)\
2155
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %7, 16(%0), 24(%0), 32(%0), 64(%0), (%1, %3), OP)\
2157
        "add %4, %1                     \n\t"\
2156
        "add %4, %1                     \n\t"\
2158
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 56(%0), (%1), OP)\
2157
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %7, 24(%0), 32(%0), 40(%0), 56(%0), (%1), OP)\
2159
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 48(%0), (%1, %3), OP)\
2158
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %7, 32(%0), 40(%0), 48(%0), 48(%0), (%1, %3), OP)\
2160
                \
2159
                \
2161
        "add $72, %0                    \n\t"\
2160
        "add $72, %0                    \n\t"\
2162
        "add %6, %1                     \n\t"\
2161
        "add %8, %1                     \n\t"\
2163
        "decl %2                        \n\t"\
2162
        "decl %2                        \n\t"\
2164
        " jnz 1b                        \n\t"\
2163
        " jnz 1b                        \n\t"\
2165
         \
2164
         \
2166
        : "+r"(temp_ptr), "+r"(dst), "+g"(count)\
2165
        : : "r"(temp_ptr), "r"(dst), "rm"(count), \
2167
        : "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-6*(long)dstStride)\
2166
         "r"((long)dstStride), "r"(2*(long)dstStride), "m"(ff_pw_20), "m"(ff_pw_3), "m"(ROUNDER), "g"(4-6*(long)dstStride)\
2168
        : "memory"\
2167
        : "memory"\
2169
   );\
2168
   );\
2170
}\
2169
}\
(-)ffmpeg-old/libavcodec/i386/h264dsp_mmx.c (-7 / +11 lines)
Lines 589-610 static void h264_loop_filter_strength_mm Link Here
589
                        "paddb %%mm6, %%mm1 \n\t"
589
                        "paddb %%mm6, %%mm1 \n\t"
590
                        "punpckhbw %%mm7, %%mm1 \n\t" // ref[b] != ref[bn]
590
                        "punpckhbw %%mm7, %%mm1 \n\t" // ref[b] != ref[bn]
591
                        "por %%mm1, %%mm0 \n\t"
591
                        "por %%mm1, %%mm0 \n\t"
592
                        ::"m"(ref[l][b_idx]),
593
                          "m"(ref[l][b_idx+d_idx])
594
                        : "memory"
595
                    );
592
596
593
                        "movq %2, %%mm1 \n\t"
597
                    asm volatile(
594
                        "movq %3, %%mm2 \n\t"
598
                        "movq %0, %%mm1 \n\t"
595
                        "psubw %4, %%mm1 \n\t"
599
                        "movq %1, %%mm2 \n\t"
596
                        "psubw %5, %%mm2 \n\t"
600
                        "psubw %2, %%mm1 \n\t"
601
                        "psubw %3, %%mm2 \n\t"
597
                        "packsswb %%mm2, %%mm1 \n\t"
602
                        "packsswb %%mm2, %%mm1 \n\t"
598
                        "paddb %%mm5, %%mm1 \n\t"
603
                        "paddb %%mm5, %%mm1 \n\t"
599
                        "pminub %%mm4, %%mm1 \n\t"
604
                        "pminub %%mm4, %%mm1 \n\t"
600
                        "pcmpeqb %%mm4, %%mm1 \n\t" // abs(mv[b] - mv[bn]) >= limit
605
                        "pcmpeqb %%mm4, %%mm1 \n\t" // abs(mv[b] - mv[bn]) >= limit
601
                        "por %%mm1, %%mm0 \n\t"
606
                        "por %%mm1, %%mm0 \n\t"
602
                        ::"m"(ref[l][b_idx]),
607
                        ::"m"(mv[l][b_idx][0]),
603
                          "m"(ref[l][b_idx+d_idx]),
604
                          "m"(mv[l][b_idx][0]),
605
                          "m"(mv[l][b_idx+2][0]),
608
                          "m"(mv[l][b_idx+2][0]),
606
                          "m"(mv[l][b_idx+d_idx][0]),
609
                          "m"(mv[l][b_idx+d_idx][0]),
607
                          "m"(mv[l][b_idx+d_idx+2][0])
610
                          "m"(mv[l][b_idx+d_idx+2][0])
611
                        : "memory"
608
                    );
612
                    );
609
                }
613
                }
610
            }
614
            }
(-)ffmpeg-old/libavcodec/i386/motion_est_mmx.c (-2 / +2 lines)
Lines 121-127 static inline void sad8_4_mmx2(uint8_t * Link Here
121
    long len= -(stride*h);
121
    long len= -(stride*h);
122
    asm volatile(
122
    asm volatile(
123
        ASMALIGN(4)
123
        ASMALIGN(4)
124
        "movq "MANGLE(bone)", %%mm5     \n\t"
124
        "movq %5, %%mm5                 \n\t"
125
        "1:                             \n\t"
125
        "1:                             \n\t"
126
        "movq (%1, %%"REG_a"), %%mm0    \n\t"
126
        "movq (%1, %%"REG_a"), %%mm0    \n\t"
127
        "movq (%2, %%"REG_a"), %%mm2    \n\t"
127
        "movq (%2, %%"REG_a"), %%mm2    \n\t"
Lines 149-155 static inline void sad8_4_mmx2(uint8_t * Link Here
149
        "add %4, %%"REG_a"              \n\t"
149
        "add %4, %%"REG_a"              \n\t"
150
        " js 1b                         \n\t"
150
        " js 1b                         \n\t"
151
        : "+a" (len)
151
        : "+a" (len)
152
        : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len), "r" ((long)stride)
152
        : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len), "r" ((long)stride), "m" (bone)
153
    );
153
    );
154
}
154
}
155
155
(-)ffmpeg-old/libavcodec/i386/mpegvideo_mmx_template.c (-7 / +14 lines)
Lines 110-116 static int RENAME(dct_quantize)(MpegEncC Link Here
110
            SPREADW(%%mm3)
110
            SPREADW(%%mm3)
111
            "pxor %%mm7, %%mm7                  \n\t" // 0
111
            "pxor %%mm7, %%mm7                  \n\t" // 0
112
            "pxor %%mm4, %%mm4                  \n\t" // 0
112
            "pxor %%mm4, %%mm4                  \n\t" // 0
113
            "movq (%2), %%mm5                   \n\t" // qmat[0]
113
            "movq %2, %%mm5                   \n\t" // qmat[0]
114
            "pxor %%mm6, %%mm6                  \n\t"
114
            "pxor %%mm6, %%mm6                  \n\t"
115
            "psubw (%3), %%mm6                  \n\t" // -bias[0]
115
            "psubw (%3), %%mm6                  \n\t" // -bias[0]
116
            "mov $-128, %%"REG_a"               \n\t"
116
            "mov $-128, %%"REG_a"               \n\t"
Lines 138-144 static int RENAME(dct_quantize)(MpegEncC Link Here
138
            "movd %%mm3, %%"REG_a"              \n\t"
138
            "movd %%mm3, %%"REG_a"              \n\t"
139
            "movzb %%al, %%"REG_a"              \n\t" // last_non_zero_p1
139
            "movzb %%al, %%"REG_a"              \n\t" // last_non_zero_p1
140
            : "+a" (last_non_zero_p1)
140
            : "+a" (last_non_zero_p1)
141
            : "r" (block+64), "r" (qmat), "r" (bias),
141
            : "r" (block+64), "m" (qmat), "r" (bias),
142
              "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
142
              "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
143
        );
143
        );
144
        // note the asm is split cuz gcc doesnt like that many operands ...
144
        // note the asm is split cuz gcc doesnt like that many operands ...
Lines 153-167 static int RENAME(dct_quantize)(MpegEncC Link Here
153
        );
153
        );
154
    }else{ // FMT_H263
154
    }else{ // FMT_H263
155
        asm volatile(
155
        asm volatile(
156
            "movd %%"REG_a", %%mm3              \n\t" // last_non_zero_p1
156
            "movd %0, %%mm3                     \n\t" // last_non_zero_p1
157
            SPREADW(%%mm3)
157
            SPREADW(%%mm3)
158
            "pxor %%mm7, %%mm7                  \n\t" // 0
158
            "pxor %%mm7, %%mm7                  \n\t" // 0
159
            "pxor %%mm4, %%mm4                  \n\t" // 0
159
            "pxor %%mm4, %%mm4                  \n\t" // 0
160
            "push %%"REG_a"                     \n\t"
160
            "mov $-128, %%"REG_a"               \n\t"
161
            "mov $-128, %%"REG_a"               \n\t"
161
            ASMALIGN(4)
162
            ASMALIGN(4)
162
            "1:                                 \n\t"
163
            "1:                                 \n\t"
163
            "pxor %%mm1, %%mm1                  \n\t" // 0
164
            "pxor %%mm1, %%mm1                  \n\t" // 0
164
            "movq (%1, %%"REG_a"), %%mm0        \n\t" // block[i]
165
            "push %%"REG_a"                     \n\t"
166
            "add %c6(%%"REG_SP"), %%"REG_a"     \n\t"
167
            "movq (%%"REG_a"), %%mm0            \n\t" // block[i]
168
            "pop %%"REG_a"                      \n\t"
165
            "pcmpgtw %%mm0, %%mm1               \n\t" // block[i] <= 0 ? 0xFF : 0x00
169
            "pcmpgtw %%mm0, %%mm1               \n\t" // block[i] <= 0 ? 0xFF : 0x00
166
            "pxor %%mm1, %%mm0                  \n\t"
170
            "pxor %%mm1, %%mm0                  \n\t"
167
            "psubw %%mm1, %%mm0                 \n\t" // ABS(block[i])
171
            "psubw %%mm1, %%mm0                 \n\t" // ABS(block[i])
Lines 183-191 static int RENAME(dct_quantize)(MpegEncC Link Here
183
            PMAX(%%mm3, %%mm0)
187
            PMAX(%%mm3, %%mm0)
184
            "movd %%mm3, %%"REG_a"              \n\t"
188
            "movd %%mm3, %%"REG_a"              \n\t"
185
            "movzb %%al, %%"REG_a"              \n\t" // last_non_zero_p1
189
            "movzb %%al, %%"REG_a"              \n\t" // last_non_zero_p1
186
            : "+a" (last_non_zero_p1)
190
            "mov %%"REG_a", %0                  \n\t"
187
            : "r" (block+64), "r" (qmat+64), "r" (bias+64),
191
            "pop %%"REG_a"                      \n\t"
188
              "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
192
            : "+m" (last_non_zero_p1)
193
            : "a" (block+64), "r" (qmat+64), "r" (bias+64),
194
              "r" (inv_zigzag_direct16+64), "r" (temp_block+64),
195
              "i" (sizeof(long))
189
        );
196
        );
190
        // note the asm is split cuz gcc doesnt like that many operands ...
197
        // note the asm is split cuz gcc doesnt like that many operands ...
191
        asm volatile(
198
        asm volatile(
(-)ffmpeg-old/libavcodec/i386/simple_idct_mmx.c (-4 / +4 lines)
Lines 363-369 static inline void idct(int16_t *block) Link Here
363
        "movq " #src4 ", %%mm1          \n\t" /* R6     R2      r6      r2 */\
363
        "movq " #src4 ", %%mm1          \n\t" /* R6     R2      r6      r2 */\
364
        "movq " #src1 ", %%mm2          \n\t" /* R3     R1      r3      r1 */\
364
        "movq " #src1 ", %%mm2          \n\t" /* R3     R1      r3      r1 */\
365
        "movq " #src5 ", %%mm3          \n\t" /* R7     R5      r7      r5 */\
365
        "movq " #src5 ", %%mm3          \n\t" /* R7     R5      r7      r5 */\
366
        "movq "MANGLE(wm1010)", %%mm4   \n\t"\
366
        "movq %3, %%mm4                 \n\t"\
367
        "pand %%mm0, %%mm4              \n\t"\
367
        "pand %%mm0, %%mm4              \n\t"\
368
        "por %%mm1, %%mm4               \n\t"\
368
        "por %%mm1, %%mm4               \n\t"\
369
        "por %%mm2, %%mm4               \n\t"\
369
        "por %%mm2, %%mm4               \n\t"\
Lines 471-477 COL_IDCT( 24(%1), 88(%1), 56(%1), 120(% Link Here
471
        "movq " #src4 ", %%mm1          \n\t" /* R6     R2      r6      r2 */\
471
        "movq " #src4 ", %%mm1          \n\t" /* R6     R2      r6      r2 */\
472
        "movq " #src1 ", %%mm2          \n\t" /* R3     R1      r3      r1 */\
472
        "movq " #src1 ", %%mm2          \n\t" /* R3     R1      r3      r1 */\
473
        "movq " #src5 ", %%mm3          \n\t" /* R7     R5      r7      r5 */\
473
        "movq " #src5 ", %%mm3          \n\t" /* R7     R5      r7      r5 */\
474
        "movq "MANGLE(wm1010)", %%mm4   \n\t"\
474
        "movq %3, %%mm4                 \n\t"\
475
        "pand %%mm0, %%mm4              \n\t"\
475
        "pand %%mm0, %%mm4              \n\t"\
476
        "por %%mm1, %%mm4               \n\t"\
476
        "por %%mm1, %%mm4               \n\t"\
477
        "por %%mm2, %%mm4               \n\t"\
477
        "por %%mm2, %%mm4               \n\t"\
Lines 545-551 COL_IDCT( 24(%1), 88(%1), 56(%1), 120(% Link Here
545
        "jmp 2f                         \n\t"\
545
        "jmp 2f                         \n\t"\
546
        "1:                             \n\t"\
546
        "1:                             \n\t"\
547
        "pslld $16, %%mm0               \n\t"\
547
        "pslld $16, %%mm0               \n\t"\
548
        "paddd "MANGLE(d40000)", %%mm0  \n\t"\
548
        "paddd %4, %%mm0                \n\t"\
549
        "psrad $13, %%mm0               \n\t"\
549
        "psrad $13, %%mm0               \n\t"\
550
        "packssdw %%mm0, %%mm0          \n\t"\
550
        "packssdw %%mm0, %%mm0          \n\t"\
551
        "movq %%mm0, " #dst "           \n\t"\
551
        "movq %%mm0, " #dst "           \n\t"\
Lines 1270-1276 Temp Link Here
1270
*/
1270
*/
1271
1271
1272
"9: \n\t"
1272
"9: \n\t"
1273
                :: "r" (block), "r" (temp), "r" (coeffs)
1273
                :: "r" (block), "r" (temp), "r" (coeffs), "m" (wm1010), "m" (d40000)
1274
                : "%eax"
1274
                : "%eax"
1275
        );
1275
        );
1276
}
1276
}
(-)ffmpeg-old/libavcodec/i386/snowdsp_mmx.c (-12 / +10 lines)
Lines 629-638 void ff_snow_vertical_compose97i_mmx(DWT Link Here
629
629
630
#define snow_inner_add_yblock_sse2_header \
630
#define snow_inner_add_yblock_sse2_header \
631
    DWTELEM * * dst_array = sb->line + src_y;\
631
    DWTELEM * * dst_array = sb->line + src_y;\
632
    long tmp;\
632
    long tmp = b_h;\
633
    asm volatile(\
633
    asm volatile(\
634
             "mov  %7, %%"REG_c"             \n\t"\
634
             "mov  %6, %%"REG_c"             \n\t"\
635
             "mov  %6, %2                    \n\t"\
636
             "mov  %4, %%"REG_S"             \n\t"\
635
             "mov  %4, %%"REG_S"             \n\t"\
637
             "pxor %%xmm7, %%xmm7            \n\t" /* 0 */\
636
             "pxor %%xmm7, %%xmm7            \n\t" /* 0 */\
638
             "pcmpeqd %%xmm3, %%xmm3         \n\t"\
637
             "pcmpeqd %%xmm3, %%xmm3         \n\t"\
Lines 689-697 void ff_snow_vertical_compose97i_mmx(DWT Link Here
689
688
690
#define snow_inner_add_yblock_sse2_end_common2\
689
#define snow_inner_add_yblock_sse2_end_common2\
691
             "jnz 1b                         \n\t"\
690
             "jnz 1b                         \n\t"\
692
             :"+m"(dst8),"+m"(dst_array),"=&r"(tmp)\
691
             :"+m"(dst8),"+m"(dst_array),"=m"(tmp)\
693
             :\
692
             :\
694
             "rm"((long)(src_x<<2)),"m"(obmc),"a"(block),"m"((long)b_h),"m"((long)src_stride):\
693
             "rm"((long)(src_x<<2)),"m"(obmc),"a"(block),"m"((long)src_stride):\
695
             "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d"");
694
             "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d"");
696
695
697
#define snow_inner_add_yblock_sse2_end_8\
696
#define snow_inner_add_yblock_sse2_end_8\
Lines 705-711 void ff_snow_vertical_compose97i_mmx(DWT Link Here
705
#define snow_inner_add_yblock_sse2_end_16\
704
#define snow_inner_add_yblock_sse2_end_16\
706
             "add $"PTR_SIZE"*1, %1          \n\t"\
705
             "add $"PTR_SIZE"*1, %1          \n\t"\
707
             snow_inner_add_yblock_sse2_end_common1\
706
             snow_inner_add_yblock_sse2_end_common1\
708
             "dec %2                         \n\t"\
707
             "sub $1, %2                     \n\t"\
709
             snow_inner_add_yblock_sse2_end_common2
708
             snow_inner_add_yblock_sse2_end_common2
710
709
711
static void inner_add_yblock_bw_8_obmc_16_bh_even_sse2(uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h,
710
static void inner_add_yblock_bw_8_obmc_16_bh_even_sse2(uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h,
Lines 795-804 snow_inner_add_yblock_sse2_end_16 Link Here
795
794
796
#define snow_inner_add_yblock_mmx_header \
795
#define snow_inner_add_yblock_mmx_header \
797
    DWTELEM * * dst_array = sb->line + src_y;\
796
    DWTELEM * * dst_array = sb->line + src_y;\
798
    long tmp;\
797
    long tmp = b_h;\
799
    asm volatile(\
798
    asm volatile(\
800
             "mov  %7, %%"REG_c"             \n\t"\
799
             "mov  %6, %%"REG_c"             \n\t"\
801
             "mov  %6, %2                    \n\t"\
802
             "mov  %4, %%"REG_S"             \n\t"\
800
             "mov  %4, %%"REG_S"             \n\t"\
803
             "pxor %%mm7, %%mm7              \n\t" /* 0 */\
801
             "pxor %%mm7, %%mm7              \n\t" /* 0 */\
804
             "pcmpeqd %%mm3, %%mm3           \n\t"\
802
             "pcmpeqd %%mm3, %%mm3           \n\t"\
Lines 861-871 snow_inner_add_yblock_sse2_end_16 Link Here
861
             "add %%"REG_c", (%%"REG_a")     \n\t"\
859
             "add %%"REG_c", (%%"REG_a")     \n\t"\
862
             "add $"PTR_SIZE"*1, %1          \n\t"\
860
             "add $"PTR_SIZE"*1, %1          \n\t"\
863
             "add %%"REG_c", %0              \n\t"\
861
             "add %%"REG_c", %0              \n\t"\
864
             "dec %2                         \n\t"\
862
             "sub $1, %2                     \n\t"\
865
             "jnz 1b                         \n\t"\
863
             "jnz 1b                         \n\t"\
866
             :"+m"(dst8),"+m"(dst_array),"=&r"(tmp)\
864
             :"+m"(dst8),"+m"(dst_array),"=m"(tmp)\
867
             :\
865
             :\
868
             "rm"((long)(src_x<<2)),"m"(obmc),"a"(block),"m"((long)b_h),"m"((long)src_stride):\
866
             "rm"((long)(src_x<<2)),"m"(obmc),"a"(block),"m"((long)src_stride):\
869
             "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d"");
867
             "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d"");
870
868
871
static void inner_add_yblock_bw_8_obmc_16_mmx(uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h,
869
static void inner_add_yblock_bw_8_obmc_16_mmx(uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h,
(-)ffmpeg-old/libavcodec/liba52/resample_mmx.c (-23 / +23 lines)
Lines 35-44 static int a52_resample_MONO_to_5_MMX(fl Link Here
35
    int32_t * f = (int32_t *) _f;
35
    int32_t * f = (int32_t *) _f;
36
	asm volatile(
36
	asm volatile(
37
		"movl $-512, %%esi		\n\t"
37
		"movl $-512, %%esi		\n\t"
38
		"movq "MANGLE(magicF2W)", %%mm7	\n\t"
38
		"movq %2, %%mm7			\n\t"
39
		"movq "MANGLE(wm1100)", %%mm3	\n\t"
39
		"movq %3, %%mm3			\n\t"
40
		"movq "MANGLE(wm0101)", %%mm4	\n\t"
40
		"movq %4, %%mm4			\n\t"
41
		"movq "MANGLE(wm1010)", %%mm5	\n\t"
41
		"movq %5, %%mm5			\n\t"
42
		"pxor %%mm6, %%mm6		\n\t"
42
		"pxor %%mm6, %%mm6		\n\t"
43
		"1:				\n\t"
43
		"1:				\n\t"
44
		"movq (%1, %%esi, 2), %%mm0	\n\t"
44
		"movq (%1, %%esi, 2), %%mm0	\n\t"
Lines 62-68 static int a52_resample_MONO_to_5_MMX(fl Link Here
62
		"addl $8, %%esi			\n\t"
62
		"addl $8, %%esi			\n\t"
63
		" jnz 1b			\n\t"
63
		" jnz 1b			\n\t"
64
		"emms				\n\t"
64
		"emms				\n\t"
65
		:: "r" (s16+1280), "r" (f+256)
65
		:: "r" (s16+1280), "r" (f+256), "m" (magicF2W), "m" (wm1100), "m" (wm0101), "m" (wm1010)
66
		:"%esi", "%edi", "memory"
66
		:"%esi", "%edi", "memory"
67
	);
67
	);
68
    return 5*256;
68
    return 5*256;
Lines 90-96 static int a52_resample_STEREO_to_2_MMX( Link Here
90
	);*/
90
	);*/
91
	asm volatile(
91
	asm volatile(
92
		"movl $-1024, %%esi		\n\t"
92
		"movl $-1024, %%esi		\n\t"
93
		"movq "MANGLE(magicF2W)", %%mm7	\n\t"
93
		"movq %2, %%mm7			\n\t"
94
		"1:				\n\t"
94
		"1:				\n\t"
95
		"movq (%1, %%esi), %%mm0	\n\t"
95
		"movq (%1, %%esi), %%mm0	\n\t"
96
		"movq 8(%1, %%esi), %%mm1	\n\t"
96
		"movq 8(%1, %%esi), %%mm1	\n\t"
Lines 110-116 static int a52_resample_STEREO_to_2_MMX( Link Here
110
		"addl $16, %%esi		\n\t"
110
		"addl $16, %%esi		\n\t"
111
		" jnz 1b			\n\t"
111
		" jnz 1b			\n\t"
112
		"emms				\n\t"
112
		"emms				\n\t"
113
		:: "r" (s16+512), "r" (f+256)
113
		:: "r" (s16+512), "r" (f+256), "m" (magicF2W)
114
		:"%esi", "memory"
114
		:"%esi", "memory"
115
	);
115
	);
116
    return 2*256;
116
    return 2*256;
Lines 120-126 static int a52_resample_3F_to_5_MMX(floa Link Here
120
    int32_t * f = (int32_t *) _f;
120
    int32_t * f = (int32_t *) _f;
121
	asm volatile(
121
	asm volatile(
122
		"movl $-1024, %%esi		\n\t"
122
		"movl $-1024, %%esi		\n\t"
123
		"movq "MANGLE(magicF2W)", %%mm7	\n\t"
123
		"movq %2, %%mm7			\n\t"
124
		"pxor %%mm6, %%mm6		\n\t"
124
		"pxor %%mm6, %%mm6		\n\t"
125
		"movq %%mm7, %%mm5		\n\t"
125
		"movq %%mm7, %%mm5		\n\t"
126
		"punpckldq %%mm6, %%mm5		\n\t"
126
		"punpckldq %%mm6, %%mm5		\n\t"
Lines 165-171 static int a52_resample_3F_to_5_MMX(floa Link Here
165
		"addl $16, %%esi		\n\t"
165
		"addl $16, %%esi		\n\t"
166
		" jnz 1b			\n\t"
166
		" jnz 1b			\n\t"
167
		"emms				\n\t"
167
		"emms				\n\t"
168
		:: "r" (s16+1280), "r" (f+256)
168
		:: "r" (s16+1280), "r" (f+256), "m" (magicF2W)
169
		:"%esi", "%edi", "memory"
169
		:"%esi", "%edi", "memory"
170
	);
170
	);
171
    return 5*256;
171
    return 5*256;
Lines 175-181 static int a52_resample_2F_2R_to_4_MMX(f Link Here
175
    int32_t * f = (int32_t *) _f;
175
    int32_t * f = (int32_t *) _f;
176
	asm volatile(
176
	asm volatile(
177
		"movl $-1024, %%esi		\n\t"
177
		"movl $-1024, %%esi		\n\t"
178
		"movq "MANGLE(magicF2W)", %%mm7	\n\t"
178
		"movq %2, %%mm7			\n\t"
179
		"1:				\n\t"
179
		"1:				\n\t"
180
		"movq (%1, %%esi), %%mm0	\n\t"
180
		"movq (%1, %%esi), %%mm0	\n\t"
181
		"movq 8(%1, %%esi), %%mm1	\n\t"
181
		"movq 8(%1, %%esi), %%mm1	\n\t"
Lines 216-222 static int a52_resample_2F_2R_to_4_MMX(f Link Here
216
		"addl $16, %%esi		\n\t"
216
		"addl $16, %%esi		\n\t"
217
		" jnz 1b			\n\t"
217
		" jnz 1b			\n\t"
218
		"emms				\n\t"
218
		"emms				\n\t"
219
		:: "r" (s16+1024), "r" (f+256)
219
		:: "r" (s16+1024), "r" (f+256), "m" (magicF2W)
220
		:"%esi", "memory"
220
		:"%esi", "memory"
221
	);
221
	);
222
    return 4*256;
222
    return 4*256;
Lines 226-232 static int a52_resample_3F_2R_to_5_MMX(f Link Here
226
    int32_t * f = (int32_t *) _f;
226
    int32_t * f = (int32_t *) _f;
227
	asm volatile(
227
	asm volatile(
228
		"movl $-1024, %%esi		\n\t"
228
		"movl $-1024, %%esi		\n\t"
229
		"movq "MANGLE(magicF2W)", %%mm7	\n\t"
229
		"movq %2, %%mm7			\n\t"
230
		"1:				\n\t"
230
		"1:				\n\t"
231
		"movd (%1, %%esi), %%mm0	\n\t"
231
		"movd (%1, %%esi), %%mm0	\n\t"
232
		"punpckldq 2048(%1, %%esi), %%mm0\n\t"
232
		"punpckldq 2048(%1, %%esi), %%mm0\n\t"
Lines 275-281 static int a52_resample_3F_2R_to_5_MMX(f Link Here
275
		"addl $16, %%esi		\n\t"
275
		"addl $16, %%esi		\n\t"
276
		" jnz 1b			\n\t"
276
		" jnz 1b			\n\t"
277
		"emms				\n\t"
277
		"emms				\n\t"
278
		:: "r" (s16+1280), "r" (f+256)
278
		:: "r" (s16+1280), "r" (f+256), "m" (magicF2W)
279
		:"%esi", "%edi", "memory"
279
		:"%esi", "%edi", "memory"
280
	);
280
	);
281
    return 5*256;
281
    return 5*256;
Lines 285-291 static int a52_resample_MONO_LFE_to_6_MM Link Here
285
    int32_t * f = (int32_t *) _f;
285
    int32_t * f = (int32_t *) _f;
286
	asm volatile(
286
	asm volatile(
287
		"movl $-1024, %%esi		\n\t"
287
		"movl $-1024, %%esi		\n\t"
288
		"movq "MANGLE(magicF2W)", %%mm7	\n\t"
288
		"movq %2, %%mm7			\n\t"
289
		"pxor %%mm6, %%mm6		\n\t"
289
		"pxor %%mm6, %%mm6		\n\t"
290
		"1:				\n\t"
290
		"1:				\n\t"
291
		"movq 1024(%1, %%esi), %%mm0	\n\t"
291
		"movq 1024(%1, %%esi), %%mm0	\n\t"
Lines 315-321 static int a52_resample_MONO_LFE_to_6_MM Link Here
315
		"addl $16, %%esi		\n\t"
315
		"addl $16, %%esi		\n\t"
316
		" jnz 1b			\n\t"
316
		" jnz 1b			\n\t"
317
		"emms				\n\t"
317
		"emms				\n\t"
318
		:: "r" (s16+1536), "r" (f+256)
318
		:: "r" (s16+1536), "r" (f+256), "m" (magicF2W)
319
		:"%esi", "%edi", "memory"
319
		:"%esi", "%edi", "memory"
320
	);
320
	);
321
    return 6*256;
321
    return 6*256;
Lines 325-331 static int a52_resample_STEREO_LFE_to_6_ Link Here
325
    int32_t * f = (int32_t *) _f;
325
    int32_t * f = (int32_t *) _f;
326
	asm volatile(
326
	asm volatile(
327
		"movl $-1024, %%esi		\n\t"
327
		"movl $-1024, %%esi		\n\t"
328
		"movq "MANGLE(magicF2W)", %%mm7	\n\t"
328
		"movq %2, %%mm7			\n\t"
329
		"pxor %%mm6, %%mm6		\n\t"
329
		"pxor %%mm6, %%mm6		\n\t"
330
		"1:				\n\t"
330
		"1:				\n\t"
331
		"movq 1024(%1, %%esi), %%mm0	\n\t"
331
		"movq 1024(%1, %%esi), %%mm0	\n\t"
Lines 353-359 static int a52_resample_STEREO_LFE_to_6_ Link Here
353
		"addl $8, %%esi			\n\t"
353
		"addl $8, %%esi			\n\t"
354
		" jnz 1b			\n\t"
354
		" jnz 1b			\n\t"
355
		"emms				\n\t"
355
		"emms				\n\t"
356
		:: "r" (s16+1536), "r" (f+256)
356
		:: "r" (s16+1536), "r" (f+256), "m" (magicF2W)
357
		:"%esi", "%edi", "memory"
357
		:"%esi", "%edi", "memory"
358
	);
358
	);
359
    return 6*256;
359
    return 6*256;
Lines 363-369 static int a52_resample_3F_LFE_to_6_MMX( Link Here
363
    int32_t * f = (int32_t *) _f;
363
    int32_t * f = (int32_t *) _f;
364
	asm volatile(
364
	asm volatile(
365
		"movl $-1024, %%esi		\n\t"
365
		"movl $-1024, %%esi		\n\t"
366
		"movq "MANGLE(magicF2W)", %%mm7	\n\t"
366
		"movq %2, %%mm7			\n\t"
367
		"pxor %%mm6, %%mm6		\n\t"
367
		"pxor %%mm6, %%mm6		\n\t"
368
		"1:				\n\t"
368
		"1:				\n\t"
369
		"movq 1024(%1, %%esi), %%mm0	\n\t"
369
		"movq 1024(%1, %%esi), %%mm0	\n\t"
Lines 393-399 static int a52_resample_3F_LFE_to_6_MMX( Link Here
393
		"addl $8, %%esi			\n\t"
393
		"addl $8, %%esi			\n\t"
394
		" jnz 1b			\n\t"
394
		" jnz 1b			\n\t"
395
		"emms				\n\t"
395
		"emms				\n\t"
396
		:: "r" (s16+1536), "r" (f+256)
396
		:: "r" (s16+1536), "r" (f+256), "m" (magicF2W)
397
		:"%esi", "%edi", "memory"
397
		:"%esi", "%edi", "memory"
398
	);
398
	);
399
    return 6*256;
399
    return 6*256;
Lines 403-409 static int a52_resample_2F_2R_LFE_to_6_M Link Here
403
    int32_t * f = (int32_t *) _f;
403
    int32_t * f = (int32_t *) _f;
404
	asm volatile(
404
	asm volatile(
405
		"movl $-1024, %%esi		\n\t"
405
		"movl $-1024, %%esi		\n\t"
406
		"movq "MANGLE(magicF2W)", %%mm7	\n\t"
406
		"movq %2, %%mm7			\n\t"
407
//		"pxor %%mm6, %%mm6		\n\t"
407
//		"pxor %%mm6, %%mm6		\n\t"
408
		"1:				\n\t"
408
		"1:				\n\t"
409
		"movq 1024(%1, %%esi), %%mm0	\n\t"
409
		"movq 1024(%1, %%esi), %%mm0	\n\t"
Lines 439-445 static int a52_resample_2F_2R_LFE_to_6_M Link Here
439
		"addl $8, %%esi			\n\t"
439
		"addl $8, %%esi			\n\t"
440
		" jnz 1b			\n\t"
440
		" jnz 1b			\n\t"
441
		"emms				\n\t"
441
		"emms				\n\t"
442
		:: "r" (s16+1536), "r" (f+256)
442
		:: "r" (s16+1536), "r" (f+256), "m" (magicF2W)
443
		:"%esi", "%edi", "memory"
443
		:"%esi", "%edi", "memory"
444
	);
444
	);
445
    return 6*256;
445
    return 6*256;
Lines 449-455 static int a52_resample_3F_2R_LFE_to_6_M Link Here
449
    int32_t * f = (int32_t *) _f;
449
    int32_t * f = (int32_t *) _f;
450
	asm volatile(
450
	asm volatile(
451
		"movl $-1024, %%esi		\n\t"
451
		"movl $-1024, %%esi		\n\t"
452
		"movq "MANGLE(magicF2W)", %%mm7	\n\t"
452
		"movq %2, %%mm7			\n\t"
453
//		"pxor %%mm6, %%mm6		\n\t"
453
//		"pxor %%mm6, %%mm6		\n\t"
454
		"1:				\n\t"
454
		"1:				\n\t"
455
		"movq 1024(%1, %%esi), %%mm0	\n\t"
455
		"movq 1024(%1, %%esi), %%mm0	\n\t"
Lines 487-493 static int a52_resample_3F_2R_LFE_to_6_M Link Here
487
		"addl $8, %%esi			\n\t"
487
		"addl $8, %%esi			\n\t"
488
		" jnz 1b			\n\t"
488
		" jnz 1b			\n\t"
489
		"emms				\n\t"
489
		"emms				\n\t"
490
		:: "r" (s16+1536), "r" (f+256)
490
		:: "r" (s16+1536), "r" (f+256), "m" (magicF2W)
491
		:"%esi", "%edi", "memory"
491
		:"%esi", "%edi", "memory"
492
	);
492
	);
493
    return 6*256;
493
    return 6*256;
(-)ffmpeg-old/libpostproc/postprocess_template.c (-78 / +80 lines)
Lines 387-402 static inline void RENAME(vertRK1Filter) Link Here
387
// FIXME rounding
387
// FIXME rounding
388
        asm volatile(
388
        asm volatile(
389
                "pxor %%mm7, %%mm7                      \n\t" // 0
389
                "pxor %%mm7, %%mm7                      \n\t" // 0
390
                "movq "MANGLE(b80)", %%mm6              \n\t" // MIN_SIGNED_BYTE
390
                "movq %2, %%mm6                         \n\t" // MIN_SIGNED_BYTE
391
                "leal (%0, %1), %%"REG_a"               \n\t"
391
                "leal (%0, %1), %%"REG_a"               \n\t"
392
                "leal (%%"REG_a", %1, 4), %%"REG_c"     \n\t"
392
                "leal (%%"REG_a", %1, 4), %%"REG_c"     \n\t"
393
//      0       1       2       3       4       5       6       7       8       9
393
//      0       1       2       3       4       5       6       7       8       9
394
//      %0      eax     eax+%1  eax+2%1 %0+4%1  ecx     ecx+%1  ecx+2%1 %0+8%1  ecx+4%1
394
//      %0      eax     eax+%1  eax+2%1 %0+4%1  ecx     ecx+%1  ecx+2%1 %0+8%1  ecx+4%1
395
                "movq "MANGLE(pQPb)", %%mm0             \n\t" // QP,..., QP
395
                "movq %3, %%mm0                         \n\t" // QP,..., QP
396
                "movq %%mm0, %%mm1                      \n\t" // QP,..., QP
396
                "movq %%mm0, %%mm1                      \n\t" // QP,..., QP
397
                "paddusb "MANGLE(b02)", %%mm0           \n\t"
397
                "paddusb %4, %%mm0                      \n\t"
398
                "psrlw $2, %%mm0                        \n\t"
398
                "psrlw $2, %%mm0                        \n\t"
399
                "pand "MANGLE(b3F)", %%mm0              \n\t" // QP/4,..., QP/4
399
                "pand %5, %%mm0                         \n\t" // QP/4,..., QP/4
400
                "paddusb %%mm1, %%mm0                   \n\t" // QP*1.25 ...
400
                "paddusb %%mm1, %%mm0                   \n\t" // QP*1.25 ...
401
                "movq (%0, %1, 4), %%mm2                \n\t" // line 4
401
                "movq (%0, %1, 4), %%mm2                \n\t" // line 4
402
                "movq (%%"REG_c"), %%mm3                \n\t" // line 5
402
                "movq (%%"REG_c"), %%mm3                \n\t" // line 5
Lines 425-432 static inline void RENAME(vertRK1Filter) Link Here
425
425
426
                "paddb %%mm6, %%mm5                     \n\t"
426
                "paddb %%mm6, %%mm5                     \n\t"
427
                "psrlw $2, %%mm5                        \n\t"
427
                "psrlw $2, %%mm5                        \n\t"
428
                "pand "MANGLE(b3F)", %%mm5              \n\t"
428
                "pand %5, %%mm5                         \n\t"
429
                "psubb "MANGLE(b20)", %%mm5             \n\t" // (l5-l4)/8
429
                "psubb %6, %%mm5                        \n\t" // (l5-l4)/8
430
430
431
                "movq (%%"REG_a", %1, 2), %%mm2         \n\t"
431
                "movq (%%"REG_a", %1, 2), %%mm2         \n\t"
432
                "paddb %%mm6, %%mm2                     \n\t" // line 3 + 0x80
432
                "paddb %%mm6, %%mm2                     \n\t" // line 3 + 0x80
Lines 441-447 static inline void RENAME(vertRK1Filter) Link Here
441
                "movq %%mm2, (%%"REG_c", %1)            \n\t"
441
                "movq %%mm2, (%%"REG_c", %1)            \n\t"
442
442
443
                :
443
                :
444
                : "r" (src), "r" ((long)stride)
444
                : "r" (src), "r" ((long)stride), "m" (b80), "m" (pQPb), "m" (b02), "m" (b3F), "m" (b20)
445
                : "%"REG_a, "%"REG_c
445
                : "%"REG_a, "%"REG_c
446
        );
446
        );
447
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
447
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
Lines 517-523 static inline void RENAME(vertX1Filter)( Link Here
517
                "paddusb %%mm0, %%mm0                   \n\t"
517
                "paddusb %%mm0, %%mm0                   \n\t"
518
                "psubusb %%mm0, %%mm4                   \n\t"
518
                "psubusb %%mm0, %%mm4                   \n\t"
519
                "pcmpeqb %%mm7, %%mm4                   \n\t" // d <= QP ? -1 : 0
519
                "pcmpeqb %%mm7, %%mm4                   \n\t" // d <= QP ? -1 : 0
520
                "psubusb "MANGLE(b01)", %%mm3           \n\t"
520
                "psubusb %3, %%mm3                      \n\t"
521
                "pand %%mm4, %%mm3                      \n\t" // d <= QP ? d : 0
521
                "pand %%mm4, %%mm3                      \n\t" // d <= QP ? d : 0
522
522
523
                PAVGB(%%mm7, %%mm3)                           // d/2
523
                PAVGB(%%mm7, %%mm3)                           // d/2
Lines 566-572 static inline void RENAME(vertX1Filter)( Link Here
566
                "movq %%mm0, (%%"REG_c", %1, 2)         \n\t" // line 7
566
                "movq %%mm0, (%%"REG_c", %1, 2)         \n\t" // line 7
567
567
568
                :
568
                :
569
                : "r" (src), "r" ((long)stride), "m" (co->pQPb)
569
                : "r" (src), "r" ((long)stride), "m" (co->pQPb), "m" (b01)
570
                : "%"REG_a, "%"REG_c
570
                : "%"REG_a, "%"REG_c
571
        );
571
        );
572
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
572
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
Lines 699-715 static inline void RENAME(doVertDefFilte Link Here
699
699
700
                PMINUB(%%mm2, %%mm1, %%mm4)                   // MIN(|lenergy|,|renergy|)/8
700
                PMINUB(%%mm2, %%mm1, %%mm4)                   // MIN(|lenergy|,|renergy|)/8
701
                "movq %2, %%mm4                         \n\t" // QP //FIXME QP+1 ?
701
                "movq %2, %%mm4                         \n\t" // QP //FIXME QP+1 ?
702
                "paddusb "MANGLE(b01)", %%mm4           \n\t"
702
                "paddusb %3, %%mm4                      \n\t"
703
                "pcmpgtb %%mm3, %%mm4                   \n\t" // |menergy|/8 < QP
703
                "pcmpgtb %%mm3, %%mm4                   \n\t" // |menergy|/8 < QP
704
                "psubusb %%mm1, %%mm3                   \n\t" // d=|menergy|/8-MIN(|lenergy|,|renergy|)/8
704
                "psubusb %%mm1, %%mm3                   \n\t" // d=|menergy|/8-MIN(|lenergy|,|renergy|)/8
705
                "pand %%mm4, %%mm3                      \n\t"
705
                "pand %%mm4, %%mm3                      \n\t"
706
706
707
                "movq %%mm3, %%mm1                      \n\t"
707
                "movq %%mm3, %%mm1                      \n\t"
708
//                "psubusb "MANGLE(b01)", %%mm3           \n\t"
708
//                "psubusb %3, %%mm3           \n\t"
709
                PAVGB(%%mm7, %%mm3)
709
                PAVGB(%%mm7, %%mm3)
710
                PAVGB(%%mm7, %%mm3)
710
                PAVGB(%%mm7, %%mm3)
711
                "paddusb %%mm1, %%mm3                   \n\t"
711
                "paddusb %%mm1, %%mm3                   \n\t"
712
//                "paddusb "MANGLE(b01)", %%mm3           \n\t"
712
//                "paddusb %3, %%mm3           \n\t"
713
713
714
                "movq (%%"REG_a", %1, 2), %%mm6         \n\t" //l3
714
                "movq (%%"REG_a", %1, 2), %%mm6         \n\t" //l3
715
                "movq (%0, %1, 4), %%mm5                \n\t" //l4
715
                "movq (%0, %1, 4), %%mm5                \n\t" //l4
Lines 722-728 static inline void RENAME(doVertDefFilte Link Here
722
                "pand %%mm0, %%mm3                      \n\t"
722
                "pand %%mm0, %%mm3                      \n\t"
723
                PMINUB(%%mm5, %%mm3, %%mm0)
723
                PMINUB(%%mm5, %%mm3, %%mm0)
724
724
725
                "psubusb "MANGLE(b01)", %%mm3           \n\t"
725
                "psubusb %3, %%mm3                      \n\t"
726
                PAVGB(%%mm7, %%mm3)
726
                PAVGB(%%mm7, %%mm3)
727
727
728
                "movq (%%"REG_a", %1, 2), %%mm0         \n\t"
728
                "movq (%%"REG_a", %1, 2), %%mm0         \n\t"
Lines 754-760 static inline void RENAME(doVertDefFilte Link Here
754
                "movq (%%"REG_a", %1), %%mm3            \n\t" // l2
754
                "movq (%%"REG_a", %1), %%mm3            \n\t" // l2
755
                "pxor %%mm6, %%mm2                      \n\t" // -l5-1
755
                "pxor %%mm6, %%mm2                      \n\t" // -l5-1
756
                "movq %%mm2, %%mm5                      \n\t" // -l5-1
756
                "movq %%mm2, %%mm5                      \n\t" // -l5-1
757
                "movq "MANGLE(b80)", %%mm4              \n\t" // 128
757
                "movq %4, %%mm4                         \n\t" // 128
758
                "lea (%%"REG_a", %1, 4), %%"REG_c"      \n\t"
758
                "lea (%%"REG_a", %1, 4), %%"REG_c"      \n\t"
759
                PAVGB(%%mm3, %%mm2)                           // (l2-l5+256)/2
759
                PAVGB(%%mm3, %%mm2)                           // (l2-l5+256)/2
760
                PAVGB(%%mm0, %%mm4)                           // ~(l4-l3)/4 + 128
760
                PAVGB(%%mm0, %%mm4)                           // ~(l4-l3)/4 + 128
Lines 766-772 static inline void RENAME(doVertDefFilte Link Here
766
                "pxor %%mm6, %%mm2                      \n\t" // -l1-1
766
                "pxor %%mm6, %%mm2                      \n\t" // -l1-1
767
                PAVGB(%%mm3, %%mm2)                           // (l2-l1+256)/2
767
                PAVGB(%%mm3, %%mm2)                           // (l2-l1+256)/2
768
                PAVGB((%0), %%mm1)                            // (l0-l3+256)/2
768
                PAVGB((%0), %%mm1)                            // (l0-l3+256)/2
769
                "movq "MANGLE(b80)", %%mm3              \n\t" // 128
769
                "movq %4, %%mm3                         \n\t" // 128
770
                PAVGB(%%mm2, %%mm3)                           // ~(l2-l1)/4 + 128
770
                PAVGB(%%mm2, %%mm3)                           // ~(l2-l1)/4 + 128
771
                PAVGB(%%mm1, %%mm3)                           // ~(l0-l3)/4 +(l2-l1)/8 + 128
771
                PAVGB(%%mm1, %%mm3)                           // ~(l0-l3)/4 +(l2-l1)/8 + 128
772
                PAVGB(%%mm2, %%mm3)                           // ~(l0-l3)/8 +5(l2-l1)/16 + 128
772
                PAVGB(%%mm2, %%mm3)                           // ~(l0-l3)/8 +5(l2-l1)/16 + 128
Lines 776-789 static inline void RENAME(doVertDefFilte Link Here
776
                "movq (%%"REG_c", %1, 2), %%mm1         \n\t" // l7
776
                "movq (%%"REG_c", %1, 2), %%mm1         \n\t" // l7
777
                "pxor %%mm6, %%mm1                      \n\t" // -l7-1
777
                "pxor %%mm6, %%mm1                      \n\t" // -l7-1
778
                PAVGB((%0, %1, 4), %%mm1)                     // (l4-l7+256)/2
778
                PAVGB((%0, %1, 4), %%mm1)                     // (l4-l7+256)/2
779
                "movq "MANGLE(b80)", %%mm2              \n\t" // 128
779
                "movq %4, %%mm2                         \n\t" // 128
780
                PAVGB(%%mm5, %%mm2)                           // ~(l6-l5)/4 + 128
780
                PAVGB(%%mm5, %%mm2)                           // ~(l6-l5)/4 + 128
781
                PAVGB(%%mm1, %%mm2)                           // ~(l4-l7)/4 +(l6-l5)/8 + 128
781
                PAVGB(%%mm1, %%mm2)                           // ~(l4-l7)/4 +(l6-l5)/8 + 128
782
                PAVGB(%%mm5, %%mm2)                           // ~(l4-l7)/8 +5(l6-l5)/16 + 128
782
                PAVGB(%%mm5, %%mm2)                           // ~(l4-l7)/8 +5(l6-l5)/16 + 128
783
// mm0=128-q, mm2=renergy/16 + 128, mm3=lenergy/16 + 128, mm4= menergy/16 + 128
783
// mm0=128-q, mm2=renergy/16 + 128, mm3=lenergy/16 + 128, mm4= menergy/16 + 128
784
784
785
                "movq "MANGLE(b00)", %%mm1              \n\t" // 0
785
                "movq %5, %%mm1                         \n\t" // 0
786
                "movq "MANGLE(b00)", %%mm5              \n\t" // 0
786
                "movq %5, %%mm5                         \n\t" // 0
787
                "psubb %%mm2, %%mm1                     \n\t" // 128 - renergy/16
787
                "psubb %%mm2, %%mm1                     \n\t" // 128 - renergy/16
788
                "psubb %%mm3, %%mm5                     \n\t" // 128 - lenergy/16
788
                "psubb %%mm3, %%mm5                     \n\t" // 128 - lenergy/16
789
                PMAXUB(%%mm1, %%mm2)                          // 128 + |renergy/16|
789
                PMAXUB(%%mm1, %%mm2)                          // 128 + |renergy/16|
Lines 792-798 static inline void RENAME(doVertDefFilte Link Here
792
792
793
// mm0=128-q, mm3=128 + MIN(|lenergy|,|renergy|)/16, mm4= menergy/16 + 128
793
// mm0=128-q, mm3=128 + MIN(|lenergy|,|renergy|)/16, mm4= menergy/16 + 128
794
794
795
                "movq "MANGLE(b00)", %%mm7              \n\t" // 0
795
                "movq %5, %%mm7                         \n\t" // 0
796
                "movq %2, %%mm2                         \n\t" // QP
796
                "movq %2, %%mm2                         \n\t" // QP
797
                PAVGB(%%mm6, %%mm2)                           // 128 + QP/2
797
                PAVGB(%%mm6, %%mm2)                           // 128 + QP/2
798
                "psubb %%mm6, %%mm2                     \n\t"
798
                "psubb %%mm6, %%mm2                     \n\t"
Lines 806-818 static inline void RENAME(doVertDefFilte Link Here
806
// mm0=128-q, mm1= SIGN(menergy), mm2= |menergy|/16 < QP/2, mm4= d/16
806
// mm0=128-q, mm1= SIGN(menergy), mm2= |menergy|/16 < QP/2, mm4= d/16
807
807
808
                "movq %%mm4, %%mm3                      \n\t" // d
808
                "movq %%mm4, %%mm3                      \n\t" // d
809
                "psubusb "MANGLE(b01)", %%mm4           \n\t"
809
                "psubusb %3, %%mm4                      \n\t"
810
                PAVGB(%%mm7, %%mm4)                           // d/32
810
                PAVGB(%%mm7, %%mm4)                           // d/32
811
                PAVGB(%%mm7, %%mm4)                           // (d + 32)/64
811
                PAVGB(%%mm7, %%mm4)                           // (d + 32)/64
812
                "paddb %%mm3, %%mm4                     \n\t" // 5d/64
812
                "paddb %%mm3, %%mm4                     \n\t" // 5d/64
813
                "pand %%mm2, %%mm4                      \n\t"
813
                "pand %%mm2, %%mm4                      \n\t"
814
814
815
                "movq "MANGLE(b80)", %%mm5              \n\t" // 128
815
                "movq %4, %%mm5                         \n\t" // 128
816
                "psubb %%mm0, %%mm5                     \n\t" // q
816
                "psubb %%mm0, %%mm5                     \n\t" // q
817
                "paddsb %%mm6, %%mm5                    \n\t" // fix bad rounding
817
                "paddsb %%mm6, %%mm5                    \n\t" // fix bad rounding
818
                "pcmpgtb %%mm5, %%mm7                   \n\t" // SIGN(q)
818
                "pcmpgtb %%mm5, %%mm7                   \n\t" // SIGN(q)
Lines 834-840 static inline void RENAME(doVertDefFilte Link Here
834
                "movq %%mm2, (%0, %1, 4)                \n\t"
834
                "movq %%mm2, (%0, %1, 4)                \n\t"
835
835
836
                :
836
                :
837
                : "r" (src), "r" ((long)stride), "m" (c->pQPb)
837
                : "r" (src), "r" ((long)stride), "m" (c->pQPb), "m" (b01), "m" (b80), "m" (b00)
838
                : "%"REG_a, "%"REG_c
838
                : "%"REG_a, "%"REG_c
839
        );
839
        );
840
840
Lines 1078-1087 src-=8; Link Here
1078
                "psubusw %%mm1, %%mm5                   \n\t" // ld
1078
                "psubusw %%mm1, %%mm5                   \n\t" // ld
1079
1079
1080
1080
1081
                "movq "MANGLE(w05)", %%mm2              \n\t" // 5
1081
                "movq %3, %%mm2                         \n\t" // 5
1082
                "pmullw %%mm2, %%mm4                    \n\t"
1082
                "pmullw %%mm2, %%mm4                    \n\t"
1083
                "pmullw %%mm2, %%mm5                    \n\t"
1083
                "pmullw %%mm2, %%mm5                    \n\t"
1084
                "movq "MANGLE(w20)", %%mm2              \n\t" // 32
1084
                "movq %4, %%mm2                         \n\t" // 32
1085
                "paddw %%mm2, %%mm4                     \n\t"
1085
                "paddw %%mm2, %%mm4                     \n\t"
1086
                "paddw %%mm2, %%mm5                     \n\t"
1086
                "paddw %%mm2, %%mm5                     \n\t"
1087
                "psrlw $6, %%mm4                        \n\t"
1087
                "psrlw $6, %%mm4                        \n\t"
Lines 1131-1137 src-=8; Link Here
1131
                "movq %%mm0, (%0, %1)                   \n\t"
1131
                "movq %%mm0, (%0, %1)                   \n\t"
1132
1132
1133
                : "+r" (src)
1133
                : "+r" (src)
1134
                : "r" ((long)stride), "m" (c->pQPb)
1134
                : "r" ((long)stride), "m" (c->pQPb), "m" (w05), "m" (w20)
1135
                : "%"REG_a, "%"REG_c
1135
                : "%"REG_a, "%"REG_c
1136
        );
1136
        );
1137
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1137
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
Lines 1275-1281 FIND_MIN_MAX((%0, %1, 8)) Link Here
1275
                "movq %%mm6, %%mm0                      \n\t" // max
1275
                "movq %%mm6, %%mm0                      \n\t" // max
1276
                "psubb %%mm7, %%mm6                     \n\t" // max - min
1276
                "psubb %%mm7, %%mm6                     \n\t" // max - min
1277
                "movd %%mm6, %%ecx                      \n\t"
1277
                "movd %%mm6, %%ecx                      \n\t"
1278
                "cmpb "MANGLE(deringThreshold)", %%cl   \n\t"
1278
                "cmpb %4, %%cl                          \n\t"
1279
                " jb 1f                                 \n\t"
1279
                " jb 1f                                 \n\t"
1280
                "lea -24(%%"REG_SP"), %%"REG_c"         \n\t"
1280
                "lea -24(%%"REG_SP"), %%"REG_c"         \n\t"
1281
                "and "ALIGN_MASK", %%"REG_c"            \n\t"
1281
                "and "ALIGN_MASK", %%"REG_c"            \n\t"
Lines 1302-1310 FIND_MIN_MAX((%0, %1, 8)) Link Here
1302
                "psubusb %%mm7, %%mm0                   \n\t"
1302
                "psubusb %%mm7, %%mm0                   \n\t"
1303
                "psubusb %%mm7, %%mm2                   \n\t"
1303
                "psubusb %%mm7, %%mm2                   \n\t"
1304
                "psubusb %%mm7, %%mm3                   \n\t"
1304
                "psubusb %%mm7, %%mm3                   \n\t"
1305
                "pcmpeqb "MANGLE(b00)", %%mm0           \n\t" // L10 > a ? 0 : -1
1305
                "pcmpeqb %5, %%mm0                      \n\t" // L10 > a ? 0 : -1
1306
                "pcmpeqb "MANGLE(b00)", %%mm2           \n\t" // L20 > a ? 0 : -1
1306
                "pcmpeqb %5, %%mm2                      \n\t" // L20 > a ? 0 : -1
1307
                "pcmpeqb "MANGLE(b00)", %%mm3           \n\t" // L00 > a ? 0 : -1
1307
                "pcmpeqb %5, %%mm3                      \n\t" // L00 > a ? 0 : -1
1308
                "paddb %%mm2, %%mm0                     \n\t"
1308
                "paddb %%mm2, %%mm0                     \n\t"
1309
                "paddb %%mm3, %%mm0                     \n\t"
1309
                "paddb %%mm3, %%mm0                     \n\t"
1310
1310
Lines 1325-1333 FIND_MIN_MAX((%0, %1, 8)) Link Here
1325
                "psubusb %%mm7, %%mm2                   \n\t"
1325
                "psubusb %%mm7, %%mm2                   \n\t"
1326
                "psubusb %%mm7, %%mm4                   \n\t"
1326
                "psubusb %%mm7, %%mm4                   \n\t"
1327
                "psubusb %%mm7, %%mm5                   \n\t"
1327
                "psubusb %%mm7, %%mm5                   \n\t"
1328
                "pcmpeqb "MANGLE(b00)", %%mm2           \n\t" // L11 > a ? 0 : -1
1328
                "pcmpeqb %5, %%mm2                      \n\t" // L11 > a ? 0 : -1
1329
                "pcmpeqb "MANGLE(b00)", %%mm4           \n\t" // L21 > a ? 0 : -1
1329
                "pcmpeqb %5, %%mm4                      \n\t" // L21 > a ? 0 : -1
1330
                "pcmpeqb "MANGLE(b00)", %%mm5           \n\t" // L01 > a ? 0 : -1
1330
                "pcmpeqb %5, %%mm5                      \n\t" // L01 > a ? 0 : -1
1331
                "paddb %%mm4, %%mm2                     \n\t"
1331
                "paddb %%mm4, %%mm2                     \n\t"
1332
                "paddb %%mm5, %%mm2                     \n\t"
1332
                "paddb %%mm5, %%mm2                     \n\t"
1333
// 0, 2, 3, 1
1333
// 0, 2, 3, 1
Lines 1352-1358 FIND_MIN_MAX((%0, %1, 8)) Link Here
1352
                "psubusb " #lx ", " #t1 "               \n\t"\
1352
                "psubusb " #lx ", " #t1 "               \n\t"\
1353
                "psubusb " #lx ", " #t0 "               \n\t"\
1353
                "psubusb " #lx ", " #t0 "               \n\t"\
1354
                "psubusb " #lx ", " #sx "               \n\t"\
1354
                "psubusb " #lx ", " #sx "               \n\t"\
1355
                "movq "MANGLE(b00)", " #lx "            \n\t"\
1355
                "movq %5, " #lx "                       \n\t"\
1356
                "pcmpeqb " #lx ", " #t1 "               \n\t" /* src[-1] > a ? 0 : -1*/\
1356
                "pcmpeqb " #lx ", " #t1 "               \n\t" /* src[-1] > a ? 0 : -1*/\
1357
                "pcmpeqb " #lx ", " #t0 "               \n\t" /* src[+1] > a ? 0 : -1*/\
1357
                "pcmpeqb " #lx ", " #t0 "               \n\t" /* src[+1] > a ? 0 : -1*/\
1358
                "pcmpeqb " #lx ", " #sx "               \n\t" /* src[0]  > a ? 0 : -1*/\
1358
                "pcmpeqb " #lx ", " #sx "               \n\t" /* src[0]  > a ? 0 : -1*/\
Lines 1368-1375 FIND_MIN_MAX((%0, %1, 8)) Link Here
1368
                PMINUB(t1, pplx, t0)\
1368
                PMINUB(t1, pplx, t0)\
1369
                "paddb " #sx ", " #ppsx "               \n\t"\
1369
                "paddb " #sx ", " #ppsx "               \n\t"\
1370
                "paddb " #psx ", " #ppsx "              \n\t"\
1370
                "paddb " #psx ", " #ppsx "              \n\t"\
1371
                "#paddb "MANGLE(b02)", " #ppsx "        \n\t"\
1371
                "#paddb %6, " #ppsx "                   \n\t"\
1372
                "pand "MANGLE(b08)", " #ppsx "          \n\t"\
1372
                "pand %7, " #ppsx "                     \n\t"\
1373
                "pcmpeqb " #lx ", " #ppsx "             \n\t"\
1373
                "pcmpeqb " #lx ", " #ppsx "             \n\t"\
1374
                "pand " #ppsx ", " #pplx "              \n\t"\
1374
                "pand " #ppsx ", " #pplx "              \n\t"\
1375
                "pandn " #dst ", " #ppsx "              \n\t"\
1375
                "pandn " #dst ", " #ppsx "              \n\t"\
Lines 1405-1411 DERING_CORE((%%REGd, %1, 2),(%0, %1, 8) Link Here
1405
DERING_CORE((%0, %1, 8)    ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
1405
DERING_CORE((%0, %1, 8)    ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
1406
1406
1407
                "1:                        \n\t"
1407
                "1:                        \n\t"
1408
                : : "r" (src), "r" ((long)stride), "m" (c->pQPb), "m"(c->pQPb2)
1408
                : : "r" (src), "r" ((long)stride), "m" (c->pQPb), "m"(c->pQPb2), "m" (deringThreshold), "m" (b00), "m" (b02), "m" (b08)
1409
                : "%"REG_a, "%"REG_d, "%"REG_c
1409
                : "%"REG_a, "%"REG_d, "%"REG_c
1410
        );
1410
        );
1411
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1411
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
Lines 2283-2289 static inline void RENAME(tempNoiseReduc Link Here
2283
#else //L1_DIFF
2283
#else //L1_DIFF
2284
#if defined (FAST_L2_DIFF)
2284
#if defined (FAST_L2_DIFF)
2285
                "pcmpeqb %%mm7, %%mm7                   \n\t"
2285
                "pcmpeqb %%mm7, %%mm7                   \n\t"
2286
                "movq "MANGLE(b80)", %%mm6              \n\t"
2286
                "movq %4, %%mm6                         \n\t"
2287
                "pxor %%mm0, %%mm0                      \n\t"
2287
                "pxor %%mm0, %%mm0                      \n\t"
2288
#define REAL_L2_DIFF_CORE(a, b)\
2288
#define REAL_L2_DIFF_CORE(a, b)\
2289
                "movq " #a ", %%mm5                     \n\t"\
2289
                "movq " #a ", %%mm5                     \n\t"\
Lines 2532-2538 L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc Link Here
2532
2532
2533
                "4:                                     \n\t"
2533
                "4:                                     \n\t"
2534
2534
2535
                :: "r" (src), "r" (tempBlured), "r"((long)stride), "m" (tempBluredPast)
2535
                :: "r" (src), "r" (tempBlured), "r"((long)stride), "m" (tempBluredPast), "m" (b80)
2536
                : "%"REG_a, "%"REG_d, "%"REG_c, "memory"
2536
                : "%"REG_a, "%"REG_d, "%"REG_c, "memory"
2537
                );
2537
                );
2538
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
2538
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
Lines 2805-2812 asm volatile( Link Here
2805
                "movq %%mm6, %%mm1                      \n\t"
2805
                "movq %%mm6, %%mm1                      \n\t"
2806
                "psllw $2, %%mm0                        \n\t"
2806
                "psllw $2, %%mm0                        \n\t"
2807
                "psllw $2, %%mm1                        \n\t"
2807
                "psllw $2, %%mm1                        \n\t"
2808
                "paddw "MANGLE(w04)", %%mm0             \n\t"
2808
                "paddw %5, %%mm0                        \n\t"
2809
                "paddw "MANGLE(w04)", %%mm1             \n\t"
2809
                "paddw %5, %%mm1                        \n\t"
2810
2810
2811
#define NEXT\
2811
#define NEXT\
2812
                "movq (%0), %%mm2                       \n\t"\
2812
                "movq (%0), %%mm2                       \n\t"\
Lines 2895-2901 asm volatile( Link Here
2895
                "mov %4, %0                             \n\t" //FIXME
2895
                "mov %4, %0                             \n\t" //FIXME
2896
2896
2897
                : "+&r"(src)
2897
                : "+&r"(src)
2898
                : "r" ((long)step), "m" (c->pQPb), "r"(sums), "g"(src)
2898
                : "r" ((long)step), "m" (c->pQPb), "r"(sums), "g"(src), "m" (w04)
2899
                );
2899
                );
2900
2900
2901
                src+= step; // src points to begin of the 8x8 Block
2901
                src+= step; // src points to begin of the 8x8 Block
Lines 3112-3121 asm volatile( Link Here
3112
                "psubusw %%mm1, %%mm5                   \n\t" // ld
3112
                "psubusw %%mm1, %%mm5                   \n\t" // ld
3113
3113
3114
3114
3115
                "movq "MANGLE(w05)", %%mm2              \n\t" // 5
3115
                "movq %4, %%mm2                         \n\t" // 5
3116
                "pmullw %%mm2, %%mm4                    \n\t"
3116
                "pmullw %%mm2, %%mm4                    \n\t"
3117
                "pmullw %%mm2, %%mm5                    \n\t"
3117
                "pmullw %%mm2, %%mm5                    \n\t"
3118
                "movq "MANGLE(w20)", %%mm2              \n\t" // 32
3118
                "movq %5, %%mm2                         \n\t" // 32
3119
                "paddw %%mm2, %%mm4                     \n\t"
3119
                "paddw %%mm2, %%mm4                     \n\t"
3120
                "paddw %%mm2, %%mm5                     \n\t"
3120
                "paddw %%mm2, %%mm5                     \n\t"
3121
                "psrlw $6, %%mm4                        \n\t"
3121
                "psrlw $6, %%mm4                        \n\t"
Lines 3167-3173 asm volatile( Link Here
3167
                "movq %%mm0, (%0, %1)                   \n\t"
3167
                "movq %%mm0, (%0, %1)                   \n\t"
3168
3168
3169
                : "+r" (temp_src)
3169
                : "+r" (temp_src)
3170
                : "r" ((long)step), "m" (c->pQPb), "m"(eq_mask)
3170
                : "r" ((long)step), "m" (c->pQPb), "m"(eq_mask), "m" (w05), "m" (w20)
3171
                : "%"REG_a, "%"REG_c
3171
                : "%"REG_a, "%"REG_c
3172
                );
3172
                );
3173
        }
3173
        }
Lines 3198-3207 static inline void RENAME(blockCopy)(uin Link Here
3198
        {
3198
        {
3199
#ifdef HAVE_MMX
3199
#ifdef HAVE_MMX
3200
                asm volatile(
3200
                asm volatile(
3201
                        "movq (%%"REG_a"), %%mm2        \n\t" // packedYOffset
3201
                        "movq (%0), %%mm2               \n\t" // packedYOffset
3202
                        "movq 8(%%"REG_a"), %%mm3       \n\t" // packedYScale
3202
                        "movq 8(%0), %%mm3              \n\t" // packedYScale
3203
                        "lea (%2,%4), %%"REG_a"         \n\t"
3204
                        "lea (%3,%5), %%"REG_d"         \n\t"
3205
                        "pxor %%mm4, %%mm4              \n\t"
3203
                        "pxor %%mm4, %%mm4              \n\t"
3206
#ifdef HAVE_MMX2
3204
#ifdef HAVE_MMX2
3207
#define REAL_SCALED_CPY(src1, src2, dst1, dst2)                                                \
3205
#define REAL_SCALED_CPY(src1, src2, dst1, dst2)                                                \
Lines 3257-3278 static inline void RENAME(blockCopy)(uin Link Here
3257
#define SCALED_CPY(src1, src2, dst1, dst2)\
3255
#define SCALED_CPY(src1, src2, dst1, dst2)\
3258
   REAL_SCALED_CPY(src1, src2, dst1, dst2)
3256
   REAL_SCALED_CPY(src1, src2, dst1, dst2)
3259
3257
3260
SCALED_CPY((%2)       , (%2, %4)      , (%3)       , (%3, %5))
3258
SCALED_CPY((%1), (%1, %3), (%2), (%2, %4))
3261
SCALED_CPY((%2, %4, 2), (%%REGa, %4, 2), (%3, %5, 2), (%%REGd, %5, 2))
3259
                        "lea (%1,%3,2), %1              \n\t"
3262
SCALED_CPY((%2, %4, 4), (%%REGa, %4, 4), (%3, %5, 4), (%%REGd, %5, 4))
3260
                        "lea (%2,%4,2), %2              \n\t"
3263
                        "lea (%%"REG_a",%4,4), %%"REG_a"        \n\t"
3261
SCALED_CPY((%1), (%1, %3), (%2), (%2, %4))
3264
                        "lea (%%"REG_d",%5,4), %%"REG_d"        \n\t"
3262
                        "lea (%1,%3,2), %1              \n\t"
3265
SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2))
3263
                        "lea (%2,%4,2), %2              \n\t"
3266
3264
SCALED_CPY((%1), (%1, %3), (%2), (%2, %4))
3267
3265
                        "lea (%1,%3,2), %1              \n\t"
3268
                        : "=&a" (packedOffsetAndScale)
3266
                        "lea (%2,%4,2), %2              \n\t"
3269
                        : "0" (packedOffsetAndScale),
3267
SCALED_CPY((%1), (%1, %3), (%2), (%2, %4))
3270
                        "r"(src),
3268
3271
                        "r"(dst),
3269
                        : "+r" (packedOffsetAndScale),
3272
                        "r" ((long)srcStride),
3270
                          "+r"(src),
3273
                        "r" ((long)dstStride)
3271
                          "+r"(dst)
3274
                        : "%"REG_d
3272
                        : "r" ((long)srcStride),
3275
                                        );
3273
                          "r" ((long)dstStride)
3274
                        : "memory"
3275
                );
3276
#else //HAVE_MMX
3276
#else //HAVE_MMX
3277
        for(i=0; i<8; i++)
3277
        for(i=0; i<8; i++)
3278
                memcpy( &(dst[dstStride*i]),
3278
                memcpy( &(dst[dstStride*i]),
Lines 3283-3290 SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2) Link Here
3283
        {
3283
        {
3284
#ifdef HAVE_MMX
3284
#ifdef HAVE_MMX
3285
        asm volatile(
3285
        asm volatile(
3286
                "lea (%0,%2), %%"REG_a"                 \n\t"
3287
                "lea (%1,%3), %%"REG_d"                 \n\t"
3288
3286
3289
#define REAL_SIMPLE_CPY(src1, src2, dst1, dst2)                              \
3287
#define REAL_SIMPLE_CPY(src1, src2, dst1, dst2)                              \
3290
                "movq " #src1 ", %%mm0          \n\t"\
3288
                "movq " #src1 ", %%mm0          \n\t"\
Lines 3295-3312 SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2) Link Here
3295
#define SIMPLE_CPY(src1, src2, dst1, dst2)\
3293
#define SIMPLE_CPY(src1, src2, dst1, dst2)\
3296
   REAL_SIMPLE_CPY(src1, src2, dst1, dst2)
3294
   REAL_SIMPLE_CPY(src1, src2, dst1, dst2)
3297
3295
3298
SIMPLE_CPY((%0)       , (%0, %2)       , (%1)       , (%1, %3))
3296
SIMPLE_CPY((%0), (%0, %2), (%1), (%1, %3))
3299
SIMPLE_CPY((%0, %2, 2), (%%REGa, %2, 2), (%1, %3, 2), (%%REGd, %3, 2))
3297
                 "lea (%0,%2,2), %0             \n\t"
3300
SIMPLE_CPY((%0, %2, 4), (%%REGa, %2, 4), (%1, %3, 4), (%%REGd, %3, 4))
3298
                 "lea (%1,%3,2), %1             \n\t"
3301
                "lea (%%"REG_a",%2,4), %%"REG_a"        \n\t"
3299
SIMPLE_CPY((%0), (%0, %2), (%1), (%1, %3))
3302
                "lea (%%"REG_d",%3,4), %%"REG_d"        \n\t"
3300
                 "lea (%0,%2,2), %0             \n\t"
3303
SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2))
3301
                 "lea (%1,%3,2), %1             \n\t"
3304
3302
SIMPLE_CPY((%0), (%0, %2), (%1), (%1, %3))
3305
                : : "r" (src),
3303
                 "lea (%0,%2), %0               \n\t"
3306
                "r" (dst),
3304
                 "lea (%1,%3), %1               \n\t"
3307
                "r" ((long)srcStride),
3305
SIMPLE_CPY((%0), (%0, %2), (%1), (%1, %3))
3308
                "r" ((long)dstStride)
3306
3309
                : "%"REG_a, "%"REG_d
3307
                : "+r" (src),
3308
                  "+r" (dst)
3309
                : "r" ((long)srcStride),
3310
                  "r" ((long)dstStride)
3311
                : "memory"
3310
        );
3312
        );
3311
#else //HAVE_MMX
3313
#else //HAVE_MMX
3312
        for(i=0; i<8; i++)
3314
        for(i=0; i<8; i++)

Return to bug 115568