Gentoo Websites Logo
Go to: Gentoo Home Documentation Forums Lists Bugs Planet Store Wiki Get Gentoo!
View | Details | Raw Unified | Return to bug 179872 | Differences between
and this patch

Collapse All | Expand All

(-)ffmpeg-old/libavcodec/i386/dsputil_h264_template_mmx.c (-3 / +3 lines)
Lines 188-195 static void H264_CHROMA_MC4_TMPL(uint8_t Link Here
188
        "pxor   %%mm7, %%mm7        \n\t"
188
        "pxor   %%mm7, %%mm7        \n\t"
189
        "movd %5, %%mm2             \n\t"
189
        "movd %5, %%mm2             \n\t"
190
        "movd %6, %%mm3             \n\t"
190
        "movd %6, %%mm3             \n\t"
191
        "movq "MANGLE(ff_pw_8)", %%mm4\n\t"
191
        "movq %7, %%mm4             \n\t"
192
        "movq "MANGLE(ff_pw_8)", %%mm5\n\t"
192
        "movq %7, %%mm5             \n\t"
193
        "punpcklwd %%mm2, %%mm2     \n\t"
193
        "punpcklwd %%mm2, %%mm2     \n\t"
194
        "punpcklwd %%mm3, %%mm3     \n\t"
194
        "punpcklwd %%mm3, %%mm3     \n\t"
195
        "punpcklwd %%mm2, %%mm2     \n\t"
195
        "punpcklwd %%mm2, %%mm2     \n\t"
Lines 246-252 static void H264_CHROMA_MC4_TMPL(uint8_t Link Here
246
        "sub $2, %2                 \n\t"
246
        "sub $2, %2                 \n\t"
247
        "jnz 1b                     \n\t"
247
        "jnz 1b                     \n\t"
248
        : "+r"(dst), "+r"(src), "+r"(h)
248
        : "+r"(dst), "+r"(src), "+r"(h)
249
        : "r"((long)stride), "m"(ff_pw_32), "m"(x), "m"(y)
249
        : "r"((long)stride), "m"(ff_pw_32), "m"(x), "m"(y), "m"(ff_pw_8)
250
    );
250
    );
251
}
251
}
252
252
(-)ffmpeg-old/libavcodec/i386/dsputil_mmx.c (-63 / +62 lines)
Lines 664-678 static inline void transpose4x4(uint8_t Link Here
664
        "punpckhwd %%mm2, %%mm1         \n\t"
664
        "punpckhwd %%mm2, %%mm1         \n\t"
665
        "movd  %%mm0, %0                \n\t"
665
        "movd  %%mm0, %0                \n\t"
666
        "punpckhdq %%mm0, %%mm0         \n\t"
666
        "punpckhdq %%mm0, %%mm0         \n\t"
667
        "movd  %%mm0, %1                \n\t"
667
        "movd  %%mm0, (%0,%1)           \n\t"
668
        "movd  %%mm1, %2                \n\t"
668
        "movd  %%mm1, (%0,%1,2)         \n\t"
669
        "punpckhdq %%mm1, %%mm1         \n\t"
669
        "punpckhdq %%mm1, %%mm1         \n\t"
670
        "movd  %%mm1, %3                \n\t"
670
        "lea (%1,%1,2), %1              \n\t"
671
        "movd  %%mm1, (%0,%1)           \n\t"
671
672
672
        : "=m" (*(uint32_t*)(dst + 0*dst_stride)),
673
        : "=r" (*(uint32_t*)(dst)), "+r" (dst_stride)
673
          "=m" (*(uint32_t*)(dst + 1*dst_stride)),
674
        :: "memory"
674
          "=m" (*(uint32_t*)(dst + 2*dst_stride)),
675
          "=m" (*(uint32_t*)(dst + 3*dst_stride))
676
    );
675
    );
677
}
676
}
678
677
Lines 1917-1923 static int ssd_int8_vs_int16_mmx(int8_t Link Here
1917
1916
1918
#define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\
1917
#define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\
1919
        "paddw " #m4 ", " #m3 "           \n\t" /* x1 */\
1918
        "paddw " #m4 ", " #m3 "           \n\t" /* x1 */\
1920
        "movq "MANGLE(ff_pw_20)", %%mm4   \n\t" /* 20 */\
1919
        "movq %5, %%mm4                   \n\t" /* 20 */\
1921
        "pmullw " #m3 ", %%mm4            \n\t" /* 20x1 */\
1920
        "pmullw " #m3 ", %%mm4            \n\t" /* 20x1 */\
1922
        "movq "#in7", " #m3 "             \n\t" /* d */\
1921
        "movq "#in7", " #m3 "             \n\t" /* d */\
1923
        "movq "#in0", %%mm5               \n\t" /* D */\
1922
        "movq "#in0", %%mm5               \n\t" /* D */\
Lines 1929-1935 static int ssd_int8_vs_int16_mmx(int8_t Link Here
1929
        "paddw " #m5 ", %%mm6             \n\t" /* x2 */\
1928
        "paddw " #m5 ", %%mm6             \n\t" /* x2 */\
1930
        "paddw %%mm6, %%mm6               \n\t" /* 2x2 */\
1929
        "paddw %%mm6, %%mm6               \n\t" /* 2x2 */\
1931
        "psubw %%mm6, %%mm5               \n\t" /* -2x2 + x3 */\
1930
        "psubw %%mm6, %%mm5               \n\t" /* -2x2 + x3 */\
1932
        "pmullw "MANGLE(ff_pw_3)", %%mm5  \n\t" /* -6x2 + 3x3 */\
1931
        "pmullw %6, %%mm5                 \n\t" /* -6x2 + 3x3 */\
1933
        "paddw " #rnd ", %%mm4            \n\t" /* x2 */\
1932
        "paddw " #rnd ", %%mm4            \n\t" /* x2 */\
1934
        "paddw %%mm4, %%mm5               \n\t" /* 20x1 - 6x2 + 3x3 - x4 */\
1933
        "paddw %%mm4, %%mm5               \n\t" /* 20x1 - 6x2 + 3x3 - x4 */\
1935
        "psraw $5, %%mm5                  \n\t"\
1934
        "psraw $5, %%mm5                  \n\t"\
Lines 1963-1977 static void OPNAME ## mpeg4_qpel16_h_low Link Here
1963
        "paddw %%mm5, %%mm5               \n\t" /* 2b */\
1962
        "paddw %%mm5, %%mm5               \n\t" /* 2b */\
1964
        "psubw %%mm5, %%mm6               \n\t" /* c - 2b */\
1963
        "psubw %%mm5, %%mm6               \n\t" /* c - 2b */\
1965
        "pshufw $0x06, %%mm0, %%mm5       \n\t" /* 0C0B0A0A */\
1964
        "pshufw $0x06, %%mm0, %%mm5       \n\t" /* 0C0B0A0A */\
1966
        "pmullw "MANGLE(ff_pw_3)", %%mm6  \n\t" /* 3c - 6b */\
1965
        "pmullw %6, %%mm6                 \n\t" /* 3c - 6b */\
1967
        "paddw %%mm4, %%mm0               \n\t" /* a */\
1966
        "paddw %%mm4, %%mm0               \n\t" /* a */\
1968
        "paddw %%mm1, %%mm5               \n\t" /* d */\
1967
        "paddw %%mm1, %%mm5               \n\t" /* d */\
1969
        "pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\
1968
        "pmullw %5, %%mm0                 \n\t" /* 20a */\
1970
        "psubw %%mm5, %%mm0               \n\t" /* 20a - d */\
1969
        "psubw %%mm5, %%mm0               \n\t" /* 20a - d */\
1971
        "paddw %6, %%mm6                  \n\t"\
1970
        "paddw %8, %%mm6                  \n\t"\
1972
        "paddw %%mm6, %%mm0               \n\t" /* 20a - 6b + 3c - d */\
1971
        "paddw %%mm6, %%mm0               \n\t" /* 20a - 6b + 3c - d */\
1973
        "psraw $5, %%mm0                  \n\t"\
1972
        "psraw $5, %%mm0                  \n\t"\
1974
        "movq %%mm0, %5                   \n\t"\
1973
        "movq %%mm0, %7                   \n\t"\
1975
        /* mm1=EFGH, mm2=DEFG, mm3=CDEF, mm4=BCDE, mm7=0 */\
1974
        /* mm1=EFGH, mm2=DEFG, mm3=CDEF, mm4=BCDE, mm7=0 */\
1976
        \
1975
        \
1977
        "movq 5(%0), %%mm0                \n\t" /* FGHIJKLM */\
1976
        "movq 5(%0), %%mm0                \n\t" /* FGHIJKLM */\
Lines 1989-2003 static void OPNAME ## mpeg4_qpel16_h_low Link Here
1989
        "psrlq $24, %%mm6                 \n\t" /* IJKLM000 */\
1988
        "psrlq $24, %%mm6                 \n\t" /* IJKLM000 */\
1990
        "punpcklbw %%mm7, %%mm2           \n\t" /* 0F0G0H0I */\
1989
        "punpcklbw %%mm7, %%mm2           \n\t" /* 0F0G0H0I */\
1991
        "punpcklbw %%mm7, %%mm6           \n\t" /* 0I0J0K0L */\
1990
        "punpcklbw %%mm7, %%mm6           \n\t" /* 0I0J0K0L */\
1992
        "pmullw "MANGLE(ff_pw_3)", %%mm3  \n\t" /* 3c - 6b */\
1991
        "pmullw %6, %%mm3                 \n\t" /* 3c - 6b */\
1993
        "paddw %%mm2, %%mm1               \n\t" /* a */\
1992
        "paddw %%mm2, %%mm1               \n\t" /* a */\
1994
        "paddw %%mm6, %%mm4               \n\t" /* d */\
1993
        "paddw %%mm6, %%mm4               \n\t" /* d */\
1995
        "pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\
1994
        "pmullw %5, %%mm1                 \n\t" /* 20a */\
1996
        "psubw %%mm4, %%mm3               \n\t" /* - 6b +3c - d */\
1995
        "psubw %%mm4, %%mm3               \n\t" /* - 6b +3c - d */\
1997
        "paddw %6, %%mm1                  \n\t"\
1996
        "paddw %8, %%mm1                  \n\t"\
1998
        "paddw %%mm1, %%mm3               \n\t" /* 20a - 6b +3c - d */\
1997
        "paddw %%mm1, %%mm3               \n\t" /* 20a - 6b +3c - d */\
1999
        "psraw $5, %%mm3                  \n\t"\
1998
        "psraw $5, %%mm3                  \n\t"\
2000
        "movq %5, %%mm1                   \n\t"\
1999
        "movq %7, %%mm1                   \n\t"\
2001
        "packuswb %%mm3, %%mm1            \n\t"\
2000
        "packuswb %%mm3, %%mm1            \n\t"\
2002
        OP_MMX2(%%mm1, (%1),%%mm4, q)\
2001
        OP_MMX2(%%mm1, (%1),%%mm4, q)\
2003
        /* mm0= GHIJ, mm2=FGHI, mm5=HIJK, mm6=IJKL, mm7=0 */\
2002
        /* mm0= GHIJ, mm2=FGHI, mm5=HIJK, mm6=IJKL, mm7=0 */\
Lines 2015-2021 static void OPNAME ## mpeg4_qpel16_h_low Link Here
2015
        "psubw %%mm5, %%mm0               \n\t" /* c - 2b */\
2014
        "psubw %%mm5, %%mm0               \n\t" /* c - 2b */\
2016
        "movq %%mm3, %%mm5                \n\t" /* JKLMNOPQ */\
2015
        "movq %%mm3, %%mm5                \n\t" /* JKLMNOPQ */\
2017
        "psrlq $24, %%mm3                 \n\t" /* MNOPQ000 */\
2016
        "psrlq $24, %%mm3                 \n\t" /* MNOPQ000 */\
2018
        "pmullw "MANGLE(ff_pw_3)", %%mm0  \n\t" /* 3c - 6b */\
2017
        "pmullw %6, %%mm0                 \n\t" /* 3c - 6b */\
2019
        "punpcklbw %%mm7, %%mm3           \n\t" /* 0M0N0O0P */\
2018
        "punpcklbw %%mm7, %%mm3           \n\t" /* 0M0N0O0P */\
2020
        "paddw %%mm3, %%mm2               \n\t" /* d */\
2019
        "paddw %%mm3, %%mm2               \n\t" /* d */\
2021
        "psubw %%mm2, %%mm0               \n\t" /* -6b + 3c - d */\
2020
        "psubw %%mm2, %%mm0               \n\t" /* -6b + 3c - d */\
Lines 2023-2030 static void OPNAME ## mpeg4_qpel16_h_low Link Here
2023
        "punpcklbw %%mm7, %%mm2           \n\t" /* 0J0K0L0M */\
2022
        "punpcklbw %%mm7, %%mm2           \n\t" /* 0J0K0L0M */\
2024
        "punpckhbw %%mm7, %%mm5           \n\t" /* 0N0O0P0Q */\
2023
        "punpckhbw %%mm7, %%mm5           \n\t" /* 0N0O0P0Q */\
2025
        "paddw %%mm2, %%mm6               \n\t" /* a */\
2024
        "paddw %%mm2, %%mm6               \n\t" /* a */\
2026
        "pmullw "MANGLE(ff_pw_20)", %%mm6 \n\t" /* 20a */\
2025
        "pmullw %5, %%mm6                 \n\t" /* 20a */\
2027
        "paddw %6, %%mm0                  \n\t"\
2026
        "paddw %8, %%mm0                  \n\t"\
2028
        "paddw %%mm6, %%mm0               \n\t" /* 20a - 6b + 3c - d */\
2027
        "paddw %%mm6, %%mm0               \n\t" /* 20a - 6b + 3c - d */\
2029
        "psraw $5, %%mm0                  \n\t"\
2028
        "psraw $5, %%mm0                  \n\t"\
2030
        /* mm1=KLMN, mm2=JKLM, mm3=MNOP, mm4=LMNO, mm5=NOPQ mm7=0 */\
2029
        /* mm1=KLMN, mm2=JKLM, mm3=MNOP, mm4=LMNO, mm5=NOPQ mm7=0 */\
Lines 2038-2047 static void OPNAME ## mpeg4_qpel16_h_low Link Here
2038
        "paddw %%mm2, %%mm5               \n\t" /* d */\
2037
        "paddw %%mm2, %%mm5               \n\t" /* d */\
2039
        "paddw %%mm6, %%mm6               \n\t" /* 2b */\
2038
        "paddw %%mm6, %%mm6               \n\t" /* 2b */\
2040
        "psubw %%mm6, %%mm4               \n\t" /* c - 2b */\
2039
        "psubw %%mm6, %%mm4               \n\t" /* c - 2b */\
2041
        "pmullw "MANGLE(ff_pw_20)", %%mm3 \n\t" /* 20a */\
2040
        "pmullw %5, %%mm3                 \n\t" /* 20a */\
2042
        "pmullw "MANGLE(ff_pw_3)", %%mm4  \n\t" /* 3c - 6b */\
2041
        "pmullw %6, %%mm4                 \n\t" /* 3c - 6b */\
2043
        "psubw %%mm5, %%mm3               \n\t" /* -6b + 3c - d */\
2042
        "psubw %%mm5, %%mm3               \n\t" /* -6b + 3c - d */\
2044
        "paddw %6, %%mm4                  \n\t"\
2043
        "paddw %8, %%mm4                  \n\t"\
2045
        "paddw %%mm3, %%mm4               \n\t" /* 20a - 6b + 3c - d */\
2044
        "paddw %%mm3, %%mm4               \n\t" /* 20a - 6b + 3c - d */\
2046
        "psraw $5, %%mm4                  \n\t"\
2045
        "psraw $5, %%mm4                  \n\t"\
2047
        "packuswb %%mm4, %%mm0            \n\t"\
2046
        "packuswb %%mm4, %%mm0            \n\t"\
Lines 2051-2058 static void OPNAME ## mpeg4_qpel16_h_low Link Here
2051
        "add %4, %1                       \n\t"\
2050
        "add %4, %1                       \n\t"\
2052
        "decl %2                          \n\t"\
2051
        "decl %2                          \n\t"\
2053
        " jnz 1b                          \n\t"\
2052
        " jnz 1b                          \n\t"\
2054
        : "+a"(src), "+c"(dst), "+m"(h)\
2053
        : : "a"(src), "c"(dst), "m"(h), \
2055
        : "d"((long)srcStride), "S"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\
2054
         "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_20), "m"(ff_pw_3), "m"(temp), "m"(ROUNDER)\
2056
        : "memory"\
2055
        : "memory"\
2057
    );\
2056
    );\
2058
}\
2057
}\
Lines 2130-2141 static void OPNAME ## mpeg4_qpel8_h_lowp Link Here
2130
        "paddw %%mm5, %%mm5               \n\t" /* 2b */\
2129
        "paddw %%mm5, %%mm5               \n\t" /* 2b */\
2131
        "psubw %%mm5, %%mm6               \n\t" /* c - 2b */\
2130
        "psubw %%mm5, %%mm6               \n\t" /* c - 2b */\
2132
        "pshufw $0x06, %%mm0, %%mm5       \n\t" /* 0C0B0A0A */\
2131
        "pshufw $0x06, %%mm0, %%mm5       \n\t" /* 0C0B0A0A */\
2133
        "pmullw "MANGLE(ff_pw_3)", %%mm6  \n\t" /* 3c - 6b */\
2132
        "pmullw %6, %%mm6                 \n\t" /* 3c - 6b */\
2134
        "paddw %%mm4, %%mm0               \n\t" /* a */\
2133
        "paddw %%mm4, %%mm0               \n\t" /* a */\
2135
        "paddw %%mm1, %%mm5               \n\t" /* d */\
2134
        "paddw %%mm1, %%mm5               \n\t" /* d */\
2136
        "pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\
2135
        "pmullw %5, %%mm0                 \n\t" /* 20a */\
2137
        "psubw %%mm5, %%mm0               \n\t" /* 20a - d */\
2136
        "psubw %%mm5, %%mm0               \n\t" /* 20a - d */\
2138
        "paddw %6, %%mm6                  \n\t"\
2137
        "paddw %8, %%mm6                  \n\t"\
2139
        "paddw %%mm6, %%mm0               \n\t" /* 20a - 6b + 3c - d */\
2138
        "paddw %%mm6, %%mm0               \n\t" /* 20a - 6b + 3c - d */\
2140
        "psraw $5, %%mm0                  \n\t"\
2139
        "psraw $5, %%mm0                  \n\t"\
2141
        /* mm1=EFGH, mm2=DEFG, mm3=CDEF, mm4=BCDE, mm7=0 */\
2140
        /* mm1=EFGH, mm2=DEFG, mm3=CDEF, mm4=BCDE, mm7=0 */\
Lines 2151-2160 static void OPNAME ## mpeg4_qpel8_h_lowp Link Here
2151
        "paddw %%mm5, %%mm4               \n\t" /* d */\
2150
        "paddw %%mm5, %%mm4               \n\t" /* d */\
2152
        "paddw %%mm2, %%mm2               \n\t" /* 2b */\
2151
        "paddw %%mm2, %%mm2               \n\t" /* 2b */\
2153
        "psubw %%mm2, %%mm3               \n\t" /* c - 2b */\
2152
        "psubw %%mm2, %%mm3               \n\t" /* c - 2b */\
2154
        "pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\
2153
        "pmullw %5, %%mm1                 \n\t" /* 20a */\
2155
        "pmullw "MANGLE(ff_pw_3)", %%mm3  \n\t" /* 3c - 6b */\
2154
        "pmullw %6, %%mm3                 \n\t" /* 3c - 6b */\
2156
        "psubw %%mm4, %%mm3               \n\t" /* -6b + 3c - d */\
2155
        "psubw %%mm4, %%mm3               \n\t" /* -6b + 3c - d */\
2157
        "paddw %6, %%mm1                  \n\t"\
2156
        "paddw %8, %%mm1                  \n\t"\
2158
        "paddw %%mm1, %%mm3               \n\t" /* 20a - 6b + 3c - d */\
2157
        "paddw %%mm1, %%mm3               \n\t" /* 20a - 6b + 3c - d */\
2159
        "psraw $5, %%mm3                  \n\t"\
2158
        "psraw $5, %%mm3                  \n\t"\
2160
        "packuswb %%mm3, %%mm0            \n\t"\
2159
        "packuswb %%mm3, %%mm0            \n\t"\
Lines 2164-2171 static void OPNAME ## mpeg4_qpel8_h_lowp Link Here
2164
        "add %4, %1                       \n\t"\
2163
        "add %4, %1                       \n\t"\
2165
        "decl %2                          \n\t"\
2164
        "decl %2                          \n\t"\
2166
        " jnz 1b                          \n\t"\
2165
        " jnz 1b                          \n\t"\
2167
        : "+a"(src), "+c"(dst), "+m"(h)\
2166
        : : "a"(src), "c"(dst), "m"(h), \
2168
        : "S"((long)srcStride), "D"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\
2167
         "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_20), "m"(ff_pw_3), "m"(temp), "m"(ROUNDER)\
2169
        : "memory"\
2168
        : "memory"\
2170
    );\
2169
    );\
2171
}\
2170
}\
Lines 2244-2282 static void OPNAME ## mpeg4_qpel16_v_low Link Here
2244
        "movq 8(%0), %%mm1              \n\t"\
2243
        "movq 8(%0), %%mm1              \n\t"\
2245
        "movq 16(%0), %%mm2             \n\t"\
2244
        "movq 16(%0), %%mm2             \n\t"\
2246
        "movq 24(%0), %%mm3             \n\t"\
2245
        "movq 24(%0), %%mm3             \n\t"\
2247
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0),  8(%0),   (%0), 32(%0), (%1), OP)\
2246
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %7, 16(%0),  8(%0),   (%0), 32(%0), (%1), OP)\
2248
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5,  8(%0),   (%0),   (%0), 40(%0), (%1, %3), OP)\
2247
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %7,  8(%0),   (%0),   (%0), 40(%0), (%1, %3), OP)\
2249
        "add %4, %1                     \n\t"\
2248
        "add %4, %1                     \n\t"\
2250
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5,   (%0),   (%0),  8(%0), 48(%0), (%1), OP)\
2249
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %7,   (%0),   (%0),  8(%0), 48(%0), (%1), OP)\
2251
        \
2250
        \
2252
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5,   (%0),  8(%0), 16(%0), 56(%0), (%1, %3), OP)\
2251
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %7,   (%0),  8(%0), 16(%0), 56(%0), (%1, %3), OP)\
2253
        "add %4, %1                     \n\t"\
2252
        "add %4, %1                     \n\t"\
2254
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5,  8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
2253
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %7,  8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
2255
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 72(%0), (%1, %3), OP)\
2254
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %7, 16(%0), 24(%0), 32(%0), 72(%0), (%1, %3), OP)\
2256
        "add %4, %1                     \n\t"\
2255
        "add %4, %1                     \n\t"\
2257
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 80(%0), (%1), OP)\
2256
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %7, 24(%0), 32(%0), 40(%0), 80(%0), (%1), OP)\
2258
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 88(%0), (%1, %3), OP)\
2257
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %7, 32(%0), 40(%0), 48(%0), 88(%0), (%1, %3), OP)\
2259
        "add %4, %1                     \n\t"\
2258
        "add %4, %1                     \n\t"\
2260
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 40(%0), 48(%0), 56(%0), 96(%0), (%1), OP)\
2259
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %7, 40(%0), 48(%0), 56(%0), 96(%0), (%1), OP)\
2261
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 48(%0), 56(%0), 64(%0),104(%0), (%1, %3), OP)\
2260
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %7, 48(%0), 56(%0), 64(%0),104(%0), (%1, %3), OP)\
2262
        "add %4, %1                     \n\t"\
2261
        "add %4, %1                     \n\t"\
2263
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 56(%0), 64(%0), 72(%0),112(%0), (%1), OP)\
2262
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %7, 56(%0), 64(%0), 72(%0),112(%0), (%1), OP)\
2264
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 64(%0), 72(%0), 80(%0),120(%0), (%1, %3), OP)\
2263
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %7, 64(%0), 72(%0), 80(%0),120(%0), (%1, %3), OP)\
2265
        "add %4, %1                     \n\t"\
2264
        "add %4, %1                     \n\t"\
2266
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 72(%0), 80(%0), 88(%0),128(%0), (%1), OP)\
2265
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %7, 72(%0), 80(%0), 88(%0),128(%0), (%1), OP)\
2267
        \
2266
        \
2268
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 80(%0), 88(%0), 96(%0),128(%0), (%1, %3), OP)\
2267
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %7, 80(%0), 88(%0), 96(%0),128(%0), (%1, %3), OP)\
2269
        "add %4, %1                     \n\t"  \
2268
        "add %4, %1                     \n\t"  \
2270
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 88(%0), 96(%0),104(%0),120(%0), (%1), OP)\
2269
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %7, 88(%0), 96(%0),104(%0),120(%0), (%1), OP)\
2271
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 96(%0),104(%0),112(%0),112(%0), (%1, %3), OP)\
2270
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %7, 96(%0),104(%0),112(%0),112(%0), (%1, %3), OP)\
2272
        \
2271
        \
2273
        "add $136, %0                   \n\t"\
2272
        "add $136, %0                   \n\t"\
2274
        "add %6, %1                     \n\t"\
2273
        "add %8, %1                     \n\t"\
2275
        "decl %2                        \n\t"\
2274
        "decl %2                        \n\t"\
2276
        " jnz 1b                        \n\t"\
2275
        " jnz 1b                        \n\t"\
2277
        \
2276
        \
2278
        : "+r"(temp_ptr), "+r"(dst), "+g"(count)\
2277
        : : "r"(temp_ptr), "r"(dst), "rm"(count), \
2279
        : "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-14*(long)dstStride)\
2278
         "r"((long)dstStride), "r"(2*(long)dstStride), "m"(ff_pw_20), "m"(ff_pw_3), "m"(ROUNDER), "g"(4-14*(long)dstStride)\
2280
        :"memory"\
2279
        :"memory"\
2281
    );\
2280
    );\
2282
}\
2281
}\
Lines 2316-2342 static void OPNAME ## mpeg4_qpel8_v_lowp Link Here
2316
        "movq 8(%0), %%mm1              \n\t"\
2315
        "movq 8(%0), %%mm1              \n\t"\
2317
        "movq 16(%0), %%mm2             \n\t"\
2316
        "movq 16(%0), %%mm2             \n\t"\
2318
        "movq 24(%0), %%mm3             \n\t"\
2317
        "movq 24(%0), %%mm3             \n\t"\
2319
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0),  8(%0),   (%0), 32(%0), (%1), OP)\
2318
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %7, 16(%0),  8(%0),   (%0), 32(%0), (%1), OP)\
2320
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5,  8(%0),   (%0),   (%0), 40(%0), (%1, %3), OP)\
2319
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %7,  8(%0),   (%0),   (%0), 40(%0), (%1, %3), OP)\
2321
        "add %4, %1                     \n\t"\
2320
        "add %4, %1                     \n\t"\
2322
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5,   (%0),   (%0),  8(%0), 48(%0), (%1), OP)\
2321
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %7,   (%0),   (%0),  8(%0), 48(%0), (%1), OP)\
2323
        \
2322
        \
2324
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5,   (%0),  8(%0), 16(%0), 56(%0), (%1, %3), OP)\
2323
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %7,   (%0),  8(%0), 16(%0), 56(%0), (%1, %3), OP)\
2325
        "add %4, %1                     \n\t"\
2324
        "add %4, %1                     \n\t"\
2326
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5,  8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
2325
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %7,  8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
2327
        \
2326
        \
2328
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 64(%0), (%1, %3), OP)\
2327
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %7, 16(%0), 24(%0), 32(%0), 64(%0), (%1, %3), OP)\
2329
        "add %4, %1                     \n\t"\
2328
        "add %4, %1                     \n\t"\
2330
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 56(%0), (%1), OP)\
2329
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %7, 24(%0), 32(%0), 40(%0), 56(%0), (%1), OP)\
2331
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 48(%0), (%1, %3), OP)\
2330
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %7, 32(%0), 40(%0), 48(%0), 48(%0), (%1, %3), OP)\
2332
                \
2331
                \
2333
        "add $72, %0                    \n\t"\
2332
        "add $72, %0                    \n\t"\
2334
        "add %6, %1                     \n\t"\
2333
        "add %8, %1                     \n\t"\
2335
        "decl %2                        \n\t"\
2334
        "decl %2                        \n\t"\
2336
        " jnz 1b                        \n\t"\
2335
        " jnz 1b                        \n\t"\
2337
         \
2336
         \
2338
        : "+r"(temp_ptr), "+r"(dst), "+g"(count)\
2337
        : : "r"(temp_ptr), "r"(dst), "rm"(count), \
2339
        : "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-6*(long)dstStride)\
2338
         "r"((long)dstStride), "r"(2*(long)dstStride), "m"(ff_pw_20), "m"(ff_pw_3), "m"(ROUNDER), "g"(4-6*(long)dstStride)\
2340
        : "memory"\
2339
        : "memory"\
2341
   );\
2340
   );\
2342
}\
2341
}\
(-)ffmpeg-old/libavcodec/i386/h264dsp_mmx.c (-15 / +19 lines)
Lines 341-361 static void ff_h264_idct8_dc_add_mmx2(ui Link Here
341
// in: mm0=p1 mm1=p0 mm2=q0 mm3=q1 mm7=(tc&mask)
341
// in: mm0=p1 mm1=p0 mm2=q0 mm3=q1 mm7=(tc&mask)
342
// out: mm1=p0' mm2=q0'
342
// out: mm1=p0' mm2=q0'
343
// clobbers: mm0,3-6
343
// clobbers: mm0,3-6
344
#define H264_DEBLOCK_P0_Q0(pb_01, pb_3f)\
344
#define H264_DEBLOCK_P0_Q0(pb_01, pb_3f, pb_3, pb_A1)\
345
        "movq    %%mm1              , %%mm5 \n\t"\
345
        "movq    %%mm1              , %%mm5 \n\t"\
346
        "pxor    %%mm2              , %%mm5 \n\t" /* p0^q0*/\
346
        "pxor    %%mm2              , %%mm5 \n\t" /* p0^q0*/\
347
        "pand    "#pb_01"           , %%mm5 \n\t" /* (p0^q0)&1*/\
347
        "pand    "#pb_01"           , %%mm5 \n\t" /* (p0^q0)&1*/\
348
        "pcmpeqb %%mm4              , %%mm4 \n\t"\
348
        "pcmpeqb %%mm4              , %%mm4 \n\t"\
349
        "pxor    %%mm4              , %%mm3 \n\t"\
349
        "pxor    %%mm4              , %%mm3 \n\t"\
350
        "pavgb   %%mm0              , %%mm3 \n\t" /* (p1 - q1 + 256)>>1*/\
350
        "pavgb   %%mm0              , %%mm3 \n\t" /* (p1 - q1 + 256)>>1*/\
351
        "pavgb   "MANGLE(ff_pb_3)"  , %%mm3 \n\t" /*(((p1 - q1 + 256)>>1)+4)>>1 = 64+2+(p1-q1)>>2*/\
351
        "pavgb   "#pb_3"            , %%mm3 \n\t" /*(((p1 - q1 + 256)>>1)+4)>>1 = 64+2+(p1-q1)>>2*/\
352
        "pxor    %%mm1              , %%mm4 \n\t"\
352
        "pxor    %%mm1              , %%mm4 \n\t"\
353
        "pavgb   %%mm2              , %%mm4 \n\t" /* (q0 - p0 + 256)>>1*/\
353
        "pavgb   %%mm2              , %%mm4 \n\t" /* (q0 - p0 + 256)>>1*/\
354
        "pavgb   %%mm5              , %%mm3 \n\t"\
354
        "pavgb   %%mm5              , %%mm3 \n\t"\
355
        "paddusb %%mm4              , %%mm3 \n\t" /* d+128+33*/\
355
        "paddusb %%mm4              , %%mm3 \n\t" /* d+128+33*/\
356
        "movq    "MANGLE(ff_pb_A1)" , %%mm6 \n\t"\
356
        "movq    "#pb_A1"           , %%mm6 \n\t"\
357
        "psubusb %%mm3              , %%mm6 \n\t"\
357
        "psubusb %%mm3              , %%mm6 \n\t"\
358
        "psubusb "MANGLE(ff_pb_A1)" , %%mm3 \n\t"\
358
        "psubusb "#pb_A1"           , %%mm3 \n\t"\
359
        "pminub  %%mm7              , %%mm6 \n\t"\
359
        "pminub  %%mm7              , %%mm6 \n\t"\
360
        "pminub  %%mm7              , %%mm3 \n\t"\
360
        "pminub  %%mm7              , %%mm3 \n\t"\
361
        "psubusb %%mm6              , %%mm1 \n\t"\
361
        "psubusb %%mm6              , %%mm1 \n\t"\
Lines 422-435 static inline void h264_loop_filter_luma Link Here
422
        H264_DEBLOCK_Q1(%%mm3, %%mm4, "(%2,%3,2)", "(%2,%3)", %%mm5, %%mm6)
422
        H264_DEBLOCK_Q1(%%mm3, %%mm4, "(%2,%3,2)", "(%2,%3)", %%mm5, %%mm6)
423
423
424
        /* filter p0, q0 */
424
        /* filter p0, q0 */
425
        H264_DEBLOCK_P0_Q0(%8, unused)
425
        H264_DEBLOCK_P0_Q0(%8, unused, %9, %10)
426
        "movq      %%mm1, (%1,%3,2) \n\t"
426
        "movq      %%mm1, (%1,%3,2) \n\t"
427
        "movq      %%mm2, (%2)      \n\t"
427
        "movq      %%mm2, (%2)      \n\t"
428
428
429
        : "=m"(*tmp0)
429
        : "=m"(*tmp0)
430
        : "r"(pix-3*stride), "r"(pix), "r"((long)stride),
430
        : "r"(pix-3*stride), "r"(pix), "r"((long)stride),
431
          "m"(*tmp0/*unused*/), "m"(*(uint32_t*)tc0), "m"(alpha1), "m"(beta1),
431
          "m"(*tmp0/*unused*/), "m"(*(uint32_t*)tc0), "m"(alpha1), "m"(beta1),
432
          "m"(mm_bone)
432
          "m"(mm_bone), "m" (ff_pb_3), "m" (ff_pb_A1)
433
    );
433
    );
434
}
434
}
435
435
Lines 470-482 static inline void h264_loop_filter_chro Link Here
470
        "movd      %3,    %%mm6     \n\t"
470
        "movd      %3,    %%mm6     \n\t"
471
        "punpcklbw %%mm6, %%mm6     \n\t"
471
        "punpcklbw %%mm6, %%mm6     \n\t"
472
        "pand      %%mm6, %%mm7     \n\t" // mm7 = tc&mask
472
        "pand      %%mm6, %%mm7     \n\t" // mm7 = tc&mask
473
        H264_DEBLOCK_P0_Q0(%6, %7)
473
        H264_DEBLOCK_P0_Q0(%6, %7, %8, %9)
474
        "movq      %%mm1, (%0,%2)   \n\t"
474
        "movq      %%mm1, (%0,%2)   \n\t"
475
        "movq      %%mm2, (%1)      \n\t"
475
        "movq      %%mm2, (%1)      \n\t"
476
476
477
        :: "r"(pix-2*stride), "r"(pix), "r"((long)stride),
477
        :: "r"(pix-2*stride), "r"(pix), "r"((long)stride),
478
           "r"(*(uint32_t*)tc0),
478
           "r"(*(uint32_t*)tc0),
479
           "m"(alpha1), "m"(beta1), "m"(mm_bone), "m"(ff_pb_3F)
479
           "m"(alpha1), "m"(beta1), "m"(mm_bone), "m"(ff_pb_3F), "m" (ff_pb_3), "m" (ff_pb_A1)
480
    );
480
    );
481
}
481
}
482
482
Lines 583-604 static void h264_loop_filter_strength_mm Link Here
583
                        "paddb %%mm6, %%mm1 \n\t"
583
                        "paddb %%mm6, %%mm1 \n\t"
584
                        "punpckhbw %%mm7, %%mm1 \n\t" // ref[b] != ref[bn]
584
                        "punpckhbw %%mm7, %%mm1 \n\t" // ref[b] != ref[bn]
585
                        "por %%mm1, %%mm0 \n\t"
585
                        "por %%mm1, %%mm0 \n\t"
586
                        ::"m"(ref[l][b_idx]),
587
                          "m"(ref[l][b_idx+d_idx])
588
                        : "memory"
589
                    );
586
590
587
                        "movq %2, %%mm1 \n\t"
591
                    asm volatile(
588
                        "movq %3, %%mm2 \n\t"
592
                        "movq %0, %%mm1 \n\t"
589
                        "psubw %4, %%mm1 \n\t"
593
                        "movq %1, %%mm2 \n\t"
590
                        "psubw %5, %%mm2 \n\t"
594
                        "psubw %2, %%mm1 \n\t"
595
                        "psubw %3, %%mm2 \n\t"
591
                        "packsswb %%mm2, %%mm1 \n\t"
596
                        "packsswb %%mm2, %%mm1 \n\t"
592
                        "paddb %%mm5, %%mm1 \n\t"
597
                        "paddb %%mm5, %%mm1 \n\t"
593
                        "pminub %%mm4, %%mm1 \n\t"
598
                        "pminub %%mm4, %%mm1 \n\t"
594
                        "pcmpeqb %%mm4, %%mm1 \n\t" // abs(mv[b] - mv[bn]) >= limit
599
                        "pcmpeqb %%mm4, %%mm1 \n\t" // abs(mv[b] - mv[bn]) >= limit
595
                        "por %%mm1, %%mm0 \n\t"
600
                        "por %%mm1, %%mm0 \n\t"
596
                        ::"m"(ref[l][b_idx]),
601
                        ::"m"(mv[l][b_idx][0]),
597
                          "m"(ref[l][b_idx+d_idx]),
598
                          "m"(mv[l][b_idx][0]),
599
                          "m"(mv[l][b_idx+2][0]),
602
                          "m"(mv[l][b_idx+2][0]),
600
                          "m"(mv[l][b_idx+d_idx][0]),
603
                          "m"(mv[l][b_idx+d_idx][0]),
601
                          "m"(mv[l][b_idx+d_idx+2][0])
604
                          "m"(mv[l][b_idx+d_idx+2][0])
605
                        : "memory"
602
                    );
606
                    );
603
                }
607
                }
604
            }
608
            }
(-)ffmpeg-old/libavcodec/i386/motion_est_mmx.c (-4 / +4 lines)
Lines 167-173 static inline void sad8_y2a_mmx2(uint8_t Link Here
167
static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
167
static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
168
{
168
{
169
    asm volatile(
169
    asm volatile(
170
        "movq "MANGLE(bone)", %%mm5     \n\t"
170
        "movq %4, %%mm5                 \n\t"
171
        "movq (%1), %%mm0               \n\t"
171
        "movq (%1), %%mm0               \n\t"
172
        "pavgb 1(%1), %%mm0             \n\t"
172
        "pavgb 1(%1), %%mm0             \n\t"
173
        "add %3, %1                     \n\t"
173
        "add %3, %1                     \n\t"
Lines 190-196 static inline void sad8_4_mmx2(uint8_t * Link Here
190
        "sub $2, %0                     \n\t"
190
        "sub $2, %0                     \n\t"
191
        " jg 1b                         \n\t"
191
        " jg 1b                         \n\t"
192
        : "+r" (h), "+r" (blk1), "+r" (blk2)
192
        : "+r" (h), "+r" (blk1), "+r" (blk2)
193
        : "r" ((long)stride)
193
        : "r" ((long)stride), "m" (bone)
194
    );
194
    );
195
}
195
}
196
196
Lines 258-264 static inline void sad8_4_mmx(uint8_t *b Link Here
258
        "punpckhbw %%mm7, %%mm5         \n\t"
258
        "punpckhbw %%mm7, %%mm5         \n\t"
259
        "paddw %%mm4, %%mm2             \n\t"
259
        "paddw %%mm4, %%mm2             \n\t"
260
        "paddw %%mm5, %%mm3             \n\t"
260
        "paddw %%mm5, %%mm3             \n\t"
261
        "movq 16+"MANGLE(round_tab)", %%mm5 \n\t"
261
        "movq %5, %%mm5                 \n\t"
262
        "paddw %%mm2, %%mm0             \n\t"
262
        "paddw %%mm2, %%mm0             \n\t"
263
        "paddw %%mm3, %%mm1             \n\t"
263
        "paddw %%mm3, %%mm1             \n\t"
264
        "paddw %%mm5, %%mm0             \n\t"
264
        "paddw %%mm5, %%mm0             \n\t"
Lines 281-287 static inline void sad8_4_mmx(uint8_t *b Link Here
281
        "add %4, %%"REG_a"              \n\t"
281
        "add %4, %%"REG_a"              \n\t"
282
        " js 1b                         \n\t"
282
        " js 1b                         \n\t"
283
        : "+a" (len)
283
        : "+a" (len)
284
        : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((long)stride)
284
        : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len), "r" ((long)stride), "m" (round_tab[2])
285
    );
285
    );
286
}
286
}
287
287
(-)ffmpeg-old/libavcodec/i386/mpegvideo_mmx_template.c (-5 / +9 lines)
Lines 154-160 static int RENAME(dct_quantize)(MpegEncC Link Here
154
            SPREADW(MM"3")
154
            SPREADW(MM"3")
155
            "pxor "MM"7, "MM"7                  \n\t" // 0
155
            "pxor "MM"7, "MM"7                  \n\t" // 0
156
            "pxor "MM"4, "MM"4                  \n\t" // 0
156
            "pxor "MM"4, "MM"4                  \n\t" // 0
157
            MOVQ" (%2), "MM"5                   \n\t" // qmat[0]
157
            MOVQ" %2, "MM"5                     \n\t" // qmat[0]
158
            "pxor "MM"6, "MM"6                  \n\t"
158
            "pxor "MM"6, "MM"6                  \n\t"
159
            "psubw (%3), "MM"6                  \n\t" // -bias[0]
159
            "psubw (%3), "MM"6                  \n\t" // -bias[0]
160
            "mov $-128, %%"REG_a"               \n\t"
160
            "mov $-128, %%"REG_a"               \n\t"
Lines 178-192 static int RENAME(dct_quantize)(MpegEncC Link Here
178
            "movd "MM"3, %%"REG_a"              \n\t"
178
            "movd "MM"3, %%"REG_a"              \n\t"
179
            "movzb %%al, %%"REG_a"              \n\t" // last_non_zero_p1
179
            "movzb %%al, %%"REG_a"              \n\t" // last_non_zero_p1
180
            : "+a" (last_non_zero_p1)
180
            : "+a" (last_non_zero_p1)
181
            : "r" (block+64), "r" (qmat), "r" (bias),
181
            : "r" (block+64), "m" (qmat), "r" (bias),
182
              "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
182
              "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
183
        );
183
        );
184
    }else{ // FMT_H263
184
    }else{ // FMT_H263
185
        asm volatile(
185
        asm volatile(
186
            "movd %%"REG_a", "MM"3              \n\t" // last_non_zero_p1
186
            "movd %0, %%mm3                     \n\t" // last_non_zero_p1
187
            SPREADW(MM"3")
187
            SPREADW(MM"3")
188
            "pxor "MM"7, "MM"7                  \n\t" // 0
188
            "pxor "MM"7, "MM"7                  \n\t" // 0
189
            "pxor "MM"4, "MM"4                  \n\t" // 0
189
            "pxor "MM"4, "MM"4                  \n\t" // 0
190
            "push %%"REG_a"                     \n\t"
190
            "mov $-128, %%"REG_a"               \n\t"
191
            "mov $-128, %%"REG_a"               \n\t"
191
            ASMALIGN(4)
192
            ASMALIGN(4)
192
            "1:                                 \n\t"
193
            "1:                                 \n\t"
Lines 209-217 static int RENAME(dct_quantize)(MpegEncC Link Here
209
            PMAX(MM"3", MM"0")
210
            PMAX(MM"3", MM"0")
210
            "movd "MM"3, %%"REG_a"              \n\t"
211
            "movd "MM"3, %%"REG_a"              \n\t"
211
            "movzb %%al, %%"REG_a"              \n\t" // last_non_zero_p1
212
            "movzb %%al, %%"REG_a"              \n\t" // last_non_zero_p1
212
            : "+a" (last_non_zero_p1)
213
            "mov %%"REG_a", %0                  \n\t"
214
            "pop %%"REG_a"                      \n\t"
215
            : "+m" (last_non_zero_p1)
213
            : "r" (block+64), "r" (qmat+64), "r" (bias+64),
216
            : "r" (block+64), "r" (qmat+64), "r" (bias+64),
214
              "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
217
              "r" (inv_zigzag_direct16+64), "r" (temp_block+64),
218
              "i" (sizeof(long))
215
        );
219
        );
216
    }
220
    }
217
    asm volatile(
221
    asm volatile(
(-)ffmpeg-old/libavcodec/i386/simple_idct_mmx.c (-5 / +5 lines)
Lines 363-369 static inline void idct(int16_t *block) Link Here
363
        "movq " #src4 ", %%mm1          \n\t" /* R6     R2      r6      r2 */\
363
        "movq " #src4 ", %%mm1          \n\t" /* R6     R2      r6      r2 */\
364
        "movq " #src1 ", %%mm2          \n\t" /* R3     R1      r3      r1 */\
364
        "movq " #src1 ", %%mm2          \n\t" /* R3     R1      r3      r1 */\
365
        "movq " #src5 ", %%mm3          \n\t" /* R7     R5      r7      r5 */\
365
        "movq " #src5 ", %%mm3          \n\t" /* R7     R5      r7      r5 */\
366
        "movq "MANGLE(wm1010)", %%mm4   \n\t"\
366
        "movq %3, %%mm4                 \n\t"\
367
        "pand %%mm0, %%mm4              \n\t"\
367
        "pand %%mm0, %%mm4              \n\t"\
368
        "por %%mm1, %%mm4               \n\t"\
368
        "por %%mm1, %%mm4               \n\t"\
369
        "por %%mm2, %%mm4               \n\t"\
369
        "por %%mm2, %%mm4               \n\t"\
Lines 437-443 static inline void idct(int16_t *block) Link Here
437
        "jmp 2f                         \n\t"\
437
        "jmp 2f                         \n\t"\
438
        "1:                             \n\t"\
438
        "1:                             \n\t"\
439
        "pslld $16, %%mm0               \n\t"\
439
        "pslld $16, %%mm0               \n\t"\
440
        "#paddd "MANGLE(d40000)", %%mm0 \n\t"\
440
        "#paddd %4, %%mm0                \n\t"\
441
        "psrad $13, %%mm0               \n\t"\
441
        "psrad $13, %%mm0               \n\t"\
442
        "packssdw %%mm0, %%mm0          \n\t"\
442
        "packssdw %%mm0, %%mm0          \n\t"\
443
        "movq %%mm0, " #dst "           \n\t"\
443
        "movq %%mm0, " #dst "           \n\t"\
Lines 471-477 COL_IDCT( 24(%1), 88(%1), 56(%1), 120(% Link Here
471
        "movq " #src4 ", %%mm1          \n\t" /* R6     R2      r6      r2 */\
471
        "movq " #src4 ", %%mm1          \n\t" /* R6     R2      r6      r2 */\
472
        "movq " #src1 ", %%mm2          \n\t" /* R3     R1      r3      r1 */\
472
        "movq " #src1 ", %%mm2          \n\t" /* R3     R1      r3      r1 */\
473
        "movq " #src5 ", %%mm3          \n\t" /* R7     R5      r7      r5 */\
473
        "movq " #src5 ", %%mm3          \n\t" /* R7     R5      r7      r5 */\
474
        "movq "MANGLE(wm1010)", %%mm4   \n\t"\
474
        "movq %3, %%mm4                 \n\t"\
475
        "pand %%mm0, %%mm4              \n\t"\
475
        "pand %%mm0, %%mm4              \n\t"\
476
        "por %%mm1, %%mm4               \n\t"\
476
        "por %%mm1, %%mm4               \n\t"\
477
        "por %%mm2, %%mm4               \n\t"\
477
        "por %%mm2, %%mm4               \n\t"\
Lines 545-551 COL_IDCT( 24(%1), 88(%1), 56(%1), 120(% Link Here
545
        "jmp 2f                         \n\t"\
545
        "jmp 2f                         \n\t"\
546
        "1:                             \n\t"\
546
        "1:                             \n\t"\
547
        "pslld $16, %%mm0               \n\t"\
547
        "pslld $16, %%mm0               \n\t"\
548
        "paddd "MANGLE(d40000)", %%mm0  \n\t"\
548
        "paddd %4, %%mm0                \n\t"\
549
        "psrad $13, %%mm0               \n\t"\
549
        "psrad $13, %%mm0               \n\t"\
550
        "packssdw %%mm0, %%mm0          \n\t"\
550
        "packssdw %%mm0, %%mm0          \n\t"\
551
        "movq %%mm0, " #dst "           \n\t"\
551
        "movq %%mm0, " #dst "           \n\t"\
Lines 1270-1276 Temp Link Here
1270
*/
1270
*/
1271
1271
1272
"9: \n\t"
1272
"9: \n\t"
1273
                :: "r" (block), "r" (temp), "r" (coeffs)
1273
                :: "r" (block), "r" (temp), "r" (coeffs), "m" (wm1010), "m" (d40000)
1274
                : "%eax"
1274
                : "%eax"
1275
        );
1275
        );
1276
}
1276
}
(-)ffmpeg-old/libavcodec/i386/snowdsp_mmx.c (-12 / +10 lines)
Lines 629-638 void ff_snow_vertical_compose97i_mmx(DWT Link Here
629
629
630
#define snow_inner_add_yblock_sse2_header \
630
#define snow_inner_add_yblock_sse2_header \
631
    DWTELEM * * dst_array = sb->line + src_y;\
631
    DWTELEM * * dst_array = sb->line + src_y;\
632
    long tmp;\
632
    long tmp = b_h;\
633
    asm volatile(\
633
    asm volatile(\
634
             "mov  %7, %%"REG_c"             \n\t"\
634
             "mov  %6, %%"REG_c"             \n\t"\
635
             "mov  %6, %2                    \n\t"\
636
             "mov  %4, %%"REG_S"             \n\t"\
635
             "mov  %4, %%"REG_S"             \n\t"\
637
             "pxor %%xmm7, %%xmm7            \n\t" /* 0 */\
636
             "pxor %%xmm7, %%xmm7            \n\t" /* 0 */\
638
             "pcmpeqd %%xmm3, %%xmm3         \n\t"\
637
             "pcmpeqd %%xmm3, %%xmm3         \n\t"\
Lines 689-697 void ff_snow_vertical_compose97i_mmx(DWT Link Here
689
688
690
#define snow_inner_add_yblock_sse2_end_common2\
689
#define snow_inner_add_yblock_sse2_end_common2\
691
             "jnz 1b                         \n\t"\
690
             "jnz 1b                         \n\t"\
692
             :"+m"(dst8),"+m"(dst_array),"=&r"(tmp)\
691
             :"+m"(dst8),"+m"(dst_array),"=m"(tmp)\
693
             :\
692
             :\
694
             "rm"((long)(src_x<<2)),"m"(obmc),"a"(block),"m"((long)b_h),"m"((long)src_stride):\
693
             "rm"((long)(src_x<<2)),"m"(obmc),"a"(block),"m"((long)src_stride):\
695
             "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d"");
694
             "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d"");
696
695
697
#define snow_inner_add_yblock_sse2_end_8\
696
#define snow_inner_add_yblock_sse2_end_8\
Lines 705-711 void ff_snow_vertical_compose97i_mmx(DWT Link Here
705
#define snow_inner_add_yblock_sse2_end_16\
704
#define snow_inner_add_yblock_sse2_end_16\
706
             "add $"PTR_SIZE"*1, %1          \n\t"\
705
             "add $"PTR_SIZE"*1, %1          \n\t"\
707
             snow_inner_add_yblock_sse2_end_common1\
706
             snow_inner_add_yblock_sse2_end_common1\
708
             "dec %2                         \n\t"\
707
             "sub $1, %2                     \n\t"\
709
             snow_inner_add_yblock_sse2_end_common2
708
             snow_inner_add_yblock_sse2_end_common2
710
709
711
static void inner_add_yblock_bw_8_obmc_16_bh_even_sse2(const uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h,
710
static void inner_add_yblock_bw_8_obmc_16_bh_even_sse2(const uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h,
Lines 795-804 snow_inner_add_yblock_sse2_end_16 Link Here
795
794
796
#define snow_inner_add_yblock_mmx_header \
795
#define snow_inner_add_yblock_mmx_header \
797
    DWTELEM * * dst_array = sb->line + src_y;\
796
    DWTELEM * * dst_array = sb->line + src_y;\
798
    long tmp;\
797
    long tmp = b_h;\
799
    asm volatile(\
798
    asm volatile(\
800
             "mov  %7, %%"REG_c"             \n\t"\
799
             "mov  %6, %%"REG_c"             \n\t"\
801
             "mov  %6, %2                    \n\t"\
802
             "mov  %4, %%"REG_S"             \n\t"\
800
             "mov  %4, %%"REG_S"             \n\t"\
803
             "pxor %%mm7, %%mm7              \n\t" /* 0 */\
801
             "pxor %%mm7, %%mm7              \n\t" /* 0 */\
804
             "pcmpeqd %%mm3, %%mm3           \n\t"\
802
             "pcmpeqd %%mm3, %%mm3           \n\t"\
Lines 861-871 snow_inner_add_yblock_sse2_end_16 Link Here
861
             "add %%"REG_c", (%%"REG_a")     \n\t"\
859
             "add %%"REG_c", (%%"REG_a")     \n\t"\
862
             "add $"PTR_SIZE"*1, %1          \n\t"\
860
             "add $"PTR_SIZE"*1, %1          \n\t"\
863
             "add %%"REG_c", %0              \n\t"\
861
             "add %%"REG_c", %0              \n\t"\
864
             "dec %2                         \n\t"\
862
             "sub $1, %2                     \n\t"\
865
             "jnz 1b                         \n\t"\
863
             "jnz 1b                         \n\t"\
866
             :"+m"(dst8),"+m"(dst_array),"=&r"(tmp)\
864
             :"+m"(dst8),"+m"(dst_array),"=m"(tmp)\
867
             :\
865
             :\
868
             "rm"((long)(src_x<<2)),"m"(obmc),"a"(block),"m"((long)b_h),"m"((long)src_stride):\
866
             "rm"((long)(src_x<<2)),"m"(obmc),"a"(block),"m"((long)src_stride):\
869
             "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d"");
867
             "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d"");
870
868
871
static void inner_add_yblock_bw_8_obmc_16_mmx(const uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h,
869
static void inner_add_yblock_bw_8_obmc_16_mmx(const uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h,
(-)ffmpeg-old/libpostproc/postprocess_template.c (-78 / +80 lines)
Lines 388-403 static inline void RENAME(vertRK1Filter) Link Here
388
// FIXME rounding
388
// FIXME rounding
389
        asm volatile(
389
        asm volatile(
390
                "pxor %%mm7, %%mm7                      \n\t" // 0
390
                "pxor %%mm7, %%mm7                      \n\t" // 0
391
                "movq "MANGLE(b80)", %%mm6              \n\t" // MIN_SIGNED_BYTE
391
                "movq %2, %%mm6                         \n\t" // MIN_SIGNED_BYTE
392
                "leal (%0, %1), %%"REG_a"               \n\t"
392
                "leal (%0, %1), %%"REG_a"               \n\t"
393
                "leal (%%"REG_a", %1, 4), %%"REG_c"     \n\t"
393
                "leal (%%"REG_a", %1, 4), %%"REG_c"     \n\t"
394
//      0       1       2       3       4       5       6       7       8       9
394
//      0       1       2       3       4       5       6       7       8       9
395
//      %0      eax     eax+%1  eax+2%1 %0+4%1  ecx     ecx+%1  ecx+2%1 %0+8%1  ecx+4%1
395
//      %0      eax     eax+%1  eax+2%1 %0+4%1  ecx     ecx+%1  ecx+2%1 %0+8%1  ecx+4%1
396
                "movq "MANGLE(pQPb)", %%mm0             \n\t" // QP,..., QP
396
                "movq %3, %%mm0                         \n\t" // QP,..., QP
397
                "movq %%mm0, %%mm1                      \n\t" // QP,..., QP
397
                "movq %%mm0, %%mm1                      \n\t" // QP,..., QP
398
                "paddusb "MANGLE(b02)", %%mm0           \n\t"
398
                "paddusb %4, %%mm0                      \n\t"
399
                "psrlw $2, %%mm0                        \n\t"
399
                "psrlw $2, %%mm0                        \n\t"
400
                "pand "MANGLE(b3F)", %%mm0              \n\t" // QP/4,..., QP/4
400
                "pand %5, %%mm0                         \n\t" // QP/4,..., QP/4
401
                "paddusb %%mm1, %%mm0                   \n\t" // QP*1.25 ...
401
                "paddusb %%mm1, %%mm0                   \n\t" // QP*1.25 ...
402
                "movq (%0, %1, 4), %%mm2                \n\t" // line 4
402
                "movq (%0, %1, 4), %%mm2                \n\t" // line 4
403
                "movq (%%"REG_c"), %%mm3                \n\t" // line 5
403
                "movq (%%"REG_c"), %%mm3                \n\t" // line 5
Lines 426-433 static inline void RENAME(vertRK1Filter) Link Here
426
426
427
                "paddb %%mm6, %%mm5                     \n\t"
427
                "paddb %%mm6, %%mm5                     \n\t"
428
                "psrlw $2, %%mm5                        \n\t"
428
                "psrlw $2, %%mm5                        \n\t"
429
                "pand "MANGLE(b3F)", %%mm5              \n\t"
429
                "pand %5, %%mm5                         \n\t"
430
                "psubb "MANGLE(b20)", %%mm5             \n\t" // (l5-l4)/8
430
                "psubb %6, %%mm5                        \n\t" // (l5-l4)/8
431
431
432
                "movq (%%"REG_a", %1, 2), %%mm2         \n\t"
432
                "movq (%%"REG_a", %1, 2), %%mm2         \n\t"
433
                "paddb %%mm6, %%mm2                     \n\t" // line 3 + 0x80
433
                "paddb %%mm6, %%mm2                     \n\t" // line 3 + 0x80
Lines 442-448 static inline void RENAME(vertRK1Filter) Link Here
442
                "movq %%mm2, (%%"REG_c", %1)            \n\t"
442
                "movq %%mm2, (%%"REG_c", %1)            \n\t"
443
443
444
                :
444
                :
445
                : "r" (src), "r" ((long)stride)
445
                : "r" (src), "r" ((long)stride), "m" (b80), "m" (pQPb), "m" (b02), "m" (b3F), "m" (b20)
446
                : "%"REG_a, "%"REG_c
446
                : "%"REG_a, "%"REG_c
447
        );
447
        );
448
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
448
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
Lines 518-524 static inline void RENAME(vertX1Filter)( Link Here
518
                "paddusb %%mm0, %%mm0                   \n\t"
518
                "paddusb %%mm0, %%mm0                   \n\t"
519
                "psubusb %%mm0, %%mm4                   \n\t"
519
                "psubusb %%mm0, %%mm4                   \n\t"
520
                "pcmpeqb %%mm7, %%mm4                   \n\t" // d <= QP ? -1 : 0
520
                "pcmpeqb %%mm7, %%mm4                   \n\t" // d <= QP ? -1 : 0
521
                "psubusb "MANGLE(b01)", %%mm3           \n\t"
521
                "psubusb %3, %%mm3                      \n\t"
522
                "pand %%mm4, %%mm3                      \n\t" // d <= QP ? d : 0
522
                "pand %%mm4, %%mm3                      \n\t" // d <= QP ? d : 0
523
523
524
                PAVGB(%%mm7, %%mm3)                           // d/2
524
                PAVGB(%%mm7, %%mm3)                           // d/2
Lines 567-573 static inline void RENAME(vertX1Filter)( Link Here
567
                "movq %%mm0, (%%"REG_c", %1, 2)         \n\t" // line 7
567
                "movq %%mm0, (%%"REG_c", %1, 2)         \n\t" // line 7
568
568
569
                :
569
                :
570
                : "r" (src), "r" ((long)stride), "m" (co->pQPb)
570
                : "r" (src), "r" ((long)stride), "m" (co->pQPb), "m" (b01)
571
                : "%"REG_a, "%"REG_c
571
                : "%"REG_a, "%"REG_c
572
        );
572
        );
573
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
573
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
Lines 700-716 static inline void RENAME(doVertDefFilte Link Here
700
700
701
                PMINUB(%%mm2, %%mm1, %%mm4)                   // MIN(|lenergy|,|renergy|)/8
701
                PMINUB(%%mm2, %%mm1, %%mm4)                   // MIN(|lenergy|,|renergy|)/8
702
                "movq %2, %%mm4                         \n\t" // QP //FIXME QP+1 ?
702
                "movq %2, %%mm4                         \n\t" // QP //FIXME QP+1 ?
703
                "paddusb "MANGLE(b01)", %%mm4           \n\t"
703
                "paddusb %3, %%mm4                      \n\t"
704
                "pcmpgtb %%mm3, %%mm4                   \n\t" // |menergy|/8 < QP
704
                "pcmpgtb %%mm3, %%mm4                   \n\t" // |menergy|/8 < QP
705
                "psubusb %%mm1, %%mm3                   \n\t" // d=|menergy|/8-MIN(|lenergy|,|renergy|)/8
705
                "psubusb %%mm1, %%mm3                   \n\t" // d=|menergy|/8-MIN(|lenergy|,|renergy|)/8
706
                "pand %%mm4, %%mm3                      \n\t"
706
                "pand %%mm4, %%mm3                      \n\t"
707
707
708
                "movq %%mm3, %%mm1                      \n\t"
708
                "movq %%mm3, %%mm1                      \n\t"
709
//                "psubusb "MANGLE(b01)", %%mm3           \n\t"
709
//                "psubusb %3, %%mm3           \n\t"
710
                PAVGB(%%mm7, %%mm3)
710
                PAVGB(%%mm7, %%mm3)
711
                PAVGB(%%mm7, %%mm3)
711
                PAVGB(%%mm7, %%mm3)
712
                "paddusb %%mm1, %%mm3                   \n\t"
712
                "paddusb %%mm1, %%mm3                   \n\t"
713
//                "paddusb "MANGLE(b01)", %%mm3           \n\t"
713
//                "paddusb %3, %%mm3           \n\t"
714
714
715
                "movq (%%"REG_a", %1, 2), %%mm6         \n\t" //l3
715
                "movq (%%"REG_a", %1, 2), %%mm6         \n\t" //l3
716
                "movq (%0, %1, 4), %%mm5                \n\t" //l4
716
                "movq (%0, %1, 4), %%mm5                \n\t" //l4
Lines 723-729 static inline void RENAME(doVertDefFilte Link Here
723
                "pand %%mm0, %%mm3                      \n\t"
723
                "pand %%mm0, %%mm3                      \n\t"
724
                PMINUB(%%mm5, %%mm3, %%mm0)
724
                PMINUB(%%mm5, %%mm3, %%mm0)
725
725
726
                "psubusb "MANGLE(b01)", %%mm3           \n\t"
726
                "psubusb %3, %%mm3                      \n\t"
727
                PAVGB(%%mm7, %%mm3)
727
                PAVGB(%%mm7, %%mm3)
728
728
729
                "movq (%%"REG_a", %1, 2), %%mm0         \n\t"
729
                "movq (%%"REG_a", %1, 2), %%mm0         \n\t"
Lines 755-761 static inline void RENAME(doVertDefFilte Link Here
755
                "movq (%%"REG_a", %1), %%mm3            \n\t" // l2
755
                "movq (%%"REG_a", %1), %%mm3            \n\t" // l2
756
                "pxor %%mm6, %%mm2                      \n\t" // -l5-1
756
                "pxor %%mm6, %%mm2                      \n\t" // -l5-1
757
                "movq %%mm2, %%mm5                      \n\t" // -l5-1
757
                "movq %%mm2, %%mm5                      \n\t" // -l5-1
758
                "movq "MANGLE(b80)", %%mm4              \n\t" // 128
758
                "movq %4, %%mm4                         \n\t" // 128
759
                "lea (%%"REG_a", %1, 4), %%"REG_c"      \n\t"
759
                "lea (%%"REG_a", %1, 4), %%"REG_c"      \n\t"
760
                PAVGB(%%mm3, %%mm2)                           // (l2-l5+256)/2
760
                PAVGB(%%mm3, %%mm2)                           // (l2-l5+256)/2
761
                PAVGB(%%mm0, %%mm4)                           // ~(l4-l3)/4 + 128
761
                PAVGB(%%mm0, %%mm4)                           // ~(l4-l3)/4 + 128
Lines 767-773 static inline void RENAME(doVertDefFilte Link Here
767
                "pxor %%mm6, %%mm2                      \n\t" // -l1-1
767
                "pxor %%mm6, %%mm2                      \n\t" // -l1-1
768
                PAVGB(%%mm3, %%mm2)                           // (l2-l1+256)/2
768
                PAVGB(%%mm3, %%mm2)                           // (l2-l1+256)/2
769
                PAVGB((%0), %%mm1)                            // (l0-l3+256)/2
769
                PAVGB((%0), %%mm1)                            // (l0-l3+256)/2
770
                "movq "MANGLE(b80)", %%mm3              \n\t" // 128
770
                "movq %4, %%mm3                         \n\t" // 128
771
                PAVGB(%%mm2, %%mm3)                           // ~(l2-l1)/4 + 128
771
                PAVGB(%%mm2, %%mm3)                           // ~(l2-l1)/4 + 128
772
                PAVGB(%%mm1, %%mm3)                           // ~(l0-l3)/4 +(l2-l1)/8 + 128
772
                PAVGB(%%mm1, %%mm3)                           // ~(l0-l3)/4 +(l2-l1)/8 + 128
773
                PAVGB(%%mm2, %%mm3)                           // ~(l0-l3)/8 +5(l2-l1)/16 + 128
773
                PAVGB(%%mm2, %%mm3)                           // ~(l0-l3)/8 +5(l2-l1)/16 + 128
Lines 777-790 static inline void RENAME(doVertDefFilte Link Here
777
                "movq (%%"REG_c", %1, 2), %%mm1         \n\t" // l7
777
                "movq (%%"REG_c", %1, 2), %%mm1         \n\t" // l7
778
                "pxor %%mm6, %%mm1                      \n\t" // -l7-1
778
                "pxor %%mm6, %%mm1                      \n\t" // -l7-1
779
                PAVGB((%0, %1, 4), %%mm1)                     // (l4-l7+256)/2
779
                PAVGB((%0, %1, 4), %%mm1)                     // (l4-l7+256)/2
780
                "movq "MANGLE(b80)", %%mm2              \n\t" // 128
780
                "movq %4, %%mm2                         \n\t" // 128
781
                PAVGB(%%mm5, %%mm2)                           // ~(l6-l5)/4 + 128
781
                PAVGB(%%mm5, %%mm2)                           // ~(l6-l5)/4 + 128
782
                PAVGB(%%mm1, %%mm2)                           // ~(l4-l7)/4 +(l6-l5)/8 + 128
782
                PAVGB(%%mm1, %%mm2)                           // ~(l4-l7)/4 +(l6-l5)/8 + 128
783
                PAVGB(%%mm5, %%mm2)                           // ~(l4-l7)/8 +5(l6-l5)/16 + 128
783
                PAVGB(%%mm5, %%mm2)                           // ~(l4-l7)/8 +5(l6-l5)/16 + 128
784
// mm0=128-q, mm2=renergy/16 + 128, mm3=lenergy/16 + 128, mm4= menergy/16 + 128
784
// mm0=128-q, mm2=renergy/16 + 128, mm3=lenergy/16 + 128, mm4= menergy/16 + 128
785
785
786
                "movq "MANGLE(b00)", %%mm1              \n\t" // 0
786
                "movq %5, %%mm1                         \n\t" // 0
787
                "movq "MANGLE(b00)", %%mm5              \n\t" // 0
787
                "movq %5, %%mm5                         \n\t" // 0
788
                "psubb %%mm2, %%mm1                     \n\t" // 128 - renergy/16
788
                "psubb %%mm2, %%mm1                     \n\t" // 128 - renergy/16
789
                "psubb %%mm3, %%mm5                     \n\t" // 128 - lenergy/16
789
                "psubb %%mm3, %%mm5                     \n\t" // 128 - lenergy/16
790
                PMAXUB(%%mm1, %%mm2)                          // 128 + |renergy/16|
790
                PMAXUB(%%mm1, %%mm2)                          // 128 + |renergy/16|
Lines 793-799 static inline void RENAME(doVertDefFilte Link Here
793
793
794
// mm0=128-q, mm3=128 + MIN(|lenergy|,|renergy|)/16, mm4= menergy/16 + 128
794
// mm0=128-q, mm3=128 + MIN(|lenergy|,|renergy|)/16, mm4= menergy/16 + 128
795
795
796
                "movq "MANGLE(b00)", %%mm7              \n\t" // 0
796
                "movq %5, %%mm7                         \n\t" // 0
797
                "movq %2, %%mm2                         \n\t" // QP
797
                "movq %2, %%mm2                         \n\t" // QP
798
                PAVGB(%%mm6, %%mm2)                           // 128 + QP/2
798
                PAVGB(%%mm6, %%mm2)                           // 128 + QP/2
799
                "psubb %%mm6, %%mm2                     \n\t"
799
                "psubb %%mm6, %%mm2                     \n\t"
Lines 807-819 static inline void RENAME(doVertDefFilte Link Here
807
// mm0=128-q, mm1= SIGN(menergy), mm2= |menergy|/16 < QP/2, mm4= d/16
807
// mm0=128-q, mm1= SIGN(menergy), mm2= |menergy|/16 < QP/2, mm4= d/16
808
808
809
                "movq %%mm4, %%mm3                      \n\t" // d
809
                "movq %%mm4, %%mm3                      \n\t" // d
810
                "psubusb "MANGLE(b01)", %%mm4           \n\t"
810
                "psubusb %3, %%mm4                      \n\t"
811
                PAVGB(%%mm7, %%mm4)                           // d/32
811
                PAVGB(%%mm7, %%mm4)                           // d/32
812
                PAVGB(%%mm7, %%mm4)                           // (d + 32)/64
812
                PAVGB(%%mm7, %%mm4)                           // (d + 32)/64
813
                "paddb %%mm3, %%mm4                     \n\t" // 5d/64
813
                "paddb %%mm3, %%mm4                     \n\t" // 5d/64
814
                "pand %%mm2, %%mm4                      \n\t"
814
                "pand %%mm2, %%mm4                      \n\t"
815
815
816
                "movq "MANGLE(b80)", %%mm5              \n\t" // 128
816
                "movq %4, %%mm5                         \n\t" // 128
817
                "psubb %%mm0, %%mm5                     \n\t" // q
817
                "psubb %%mm0, %%mm5                     \n\t" // q
818
                "paddsb %%mm6, %%mm5                    \n\t" // fix bad rounding
818
                "paddsb %%mm6, %%mm5                    \n\t" // fix bad rounding
819
                "pcmpgtb %%mm5, %%mm7                   \n\t" // SIGN(q)
819
                "pcmpgtb %%mm5, %%mm7                   \n\t" // SIGN(q)
Lines 835-841 static inline void RENAME(doVertDefFilte Link Here
835
                "movq %%mm2, (%0, %1, 4)                \n\t"
835
                "movq %%mm2, (%0, %1, 4)                \n\t"
836
836
837
                :
837
                :
838
                : "r" (src), "r" ((long)stride), "m" (c->pQPb)
838
                : "r" (src), "r" ((long)stride), "m" (c->pQPb), "m" (b01), "m" (b80), "m" (b00)
839
                : "%"REG_a, "%"REG_c
839
                : "%"REG_a, "%"REG_c
840
        );
840
        );
841
841
Lines 1079-1088 src-=8; Link Here
1079
                "psubusw %%mm1, %%mm5                   \n\t" // ld
1079
                "psubusw %%mm1, %%mm5                   \n\t" // ld
1080
1080
1081
1081
1082
                "movq "MANGLE(w05)", %%mm2              \n\t" // 5
1082
                "movq %3, %%mm2                         \n\t" // 5
1083
                "pmullw %%mm2, %%mm4                    \n\t"
1083
                "pmullw %%mm2, %%mm4                    \n\t"
1084
                "pmullw %%mm2, %%mm5                    \n\t"
1084
                "pmullw %%mm2, %%mm5                    \n\t"
1085
                "movq "MANGLE(w20)", %%mm2              \n\t" // 32
1085
                "movq %4, %%mm2                         \n\t" // 32
1086
                "paddw %%mm2, %%mm4                     \n\t"
1086
                "paddw %%mm2, %%mm4                     \n\t"
1087
                "paddw %%mm2, %%mm5                     \n\t"
1087
                "paddw %%mm2, %%mm5                     \n\t"
1088
                "psrlw $6, %%mm4                        \n\t"
1088
                "psrlw $6, %%mm4                        \n\t"
Lines 1132-1138 src-=8; Link Here
1132
                "movq %%mm0, (%0, %1)                   \n\t"
1132
                "movq %%mm0, (%0, %1)                   \n\t"
1133
1133
1134
                : "+r" (src)
1134
                : "+r" (src)
1135
                : "r" ((long)stride), "m" (c->pQPb)
1135
                : "r" ((long)stride), "m" (c->pQPb), "m" (w05), "m" (w20)
1136
                : "%"REG_a, "%"REG_c
1136
                : "%"REG_a, "%"REG_c
1137
        );
1137
        );
1138
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1138
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
Lines 1276-1282 FIND_MIN_MAX((%0, %1, 8)) Link Here
1276
                "movq %%mm6, %%mm0                      \n\t" // max
1276
                "movq %%mm6, %%mm0                      \n\t" // max
1277
                "psubb %%mm7, %%mm6                     \n\t" // max - min
1277
                "psubb %%mm7, %%mm6                     \n\t" // max - min
1278
                "movd %%mm6, %%ecx                      \n\t"
1278
                "movd %%mm6, %%ecx                      \n\t"
1279
                "cmpb "MANGLE(deringThreshold)", %%cl   \n\t"
1279
                "cmpb %4, %%cl                          \n\t"
1280
                " jb 1f                                 \n\t"
1280
                " jb 1f                                 \n\t"
1281
                "lea -24(%%"REG_SP"), %%"REG_c"         \n\t"
1281
                "lea -24(%%"REG_SP"), %%"REG_c"         \n\t"
1282
                "and "ALIGN_MASK", %%"REG_c"            \n\t"
1282
                "and "ALIGN_MASK", %%"REG_c"            \n\t"
Lines 1303-1311 FIND_MIN_MAX((%0, %1, 8)) Link Here
1303
                "psubusb %%mm7, %%mm0                   \n\t"
1303
                "psubusb %%mm7, %%mm0                   \n\t"
1304
                "psubusb %%mm7, %%mm2                   \n\t"
1304
                "psubusb %%mm7, %%mm2                   \n\t"
1305
                "psubusb %%mm7, %%mm3                   \n\t"
1305
                "psubusb %%mm7, %%mm3                   \n\t"
1306
                "pcmpeqb "MANGLE(b00)", %%mm0           \n\t" // L10 > a ? 0 : -1
1306
                "pcmpeqb %5, %%mm0                      \n\t" // L10 > a ? 0 : -1
1307
                "pcmpeqb "MANGLE(b00)", %%mm2           \n\t" // L20 > a ? 0 : -1
1307
                "pcmpeqb %5, %%mm2                      \n\t" // L20 > a ? 0 : -1
1308
                "pcmpeqb "MANGLE(b00)", %%mm3           \n\t" // L00 > a ? 0 : -1
1308
                "pcmpeqb %5, %%mm3                      \n\t" // L00 > a ? 0 : -1
1309
                "paddb %%mm2, %%mm0                     \n\t"
1309
                "paddb %%mm2, %%mm0                     \n\t"
1310
                "paddb %%mm3, %%mm0                     \n\t"
1310
                "paddb %%mm3, %%mm0                     \n\t"
1311
1311
Lines 1326-1334 FIND_MIN_MAX((%0, %1, 8)) Link Here
1326
                "psubusb %%mm7, %%mm2                   \n\t"
1326
                "psubusb %%mm7, %%mm2                   \n\t"
1327
                "psubusb %%mm7, %%mm4                   \n\t"
1327
                "psubusb %%mm7, %%mm4                   \n\t"
1328
                "psubusb %%mm7, %%mm5                   \n\t"
1328
                "psubusb %%mm7, %%mm5                   \n\t"
1329
                "pcmpeqb "MANGLE(b00)", %%mm2           \n\t" // L11 > a ? 0 : -1
1329
                "pcmpeqb %5, %%mm2                      \n\t" // L11 > a ? 0 : -1
1330
                "pcmpeqb "MANGLE(b00)", %%mm4           \n\t" // L21 > a ? 0 : -1
1330
                "pcmpeqb %5, %%mm4                      \n\t" // L21 > a ? 0 : -1
1331
                "pcmpeqb "MANGLE(b00)", %%mm5           \n\t" // L01 > a ? 0 : -1
1331
                "pcmpeqb %5, %%mm5                      \n\t" // L01 > a ? 0 : -1
1332
                "paddb %%mm4, %%mm2                     \n\t"
1332
                "paddb %%mm4, %%mm2                     \n\t"
1333
                "paddb %%mm5, %%mm2                     \n\t"
1333
                "paddb %%mm5, %%mm2                     \n\t"
1334
// 0, 2, 3, 1
1334
// 0, 2, 3, 1
Lines 1353-1359 FIND_MIN_MAX((%0, %1, 8)) Link Here
1353
                "psubusb " #lx ", " #t1 "               \n\t"\
1353
                "psubusb " #lx ", " #t1 "               \n\t"\
1354
                "psubusb " #lx ", " #t0 "               \n\t"\
1354
                "psubusb " #lx ", " #t0 "               \n\t"\
1355
                "psubusb " #lx ", " #sx "               \n\t"\
1355
                "psubusb " #lx ", " #sx "               \n\t"\
1356
                "movq "MANGLE(b00)", " #lx "            \n\t"\
1356
                "movq %5, " #lx "                       \n\t"\
1357
                "pcmpeqb " #lx ", " #t1 "               \n\t" /* src[-1] > a ? 0 : -1*/\
1357
                "pcmpeqb " #lx ", " #t1 "               \n\t" /* src[-1] > a ? 0 : -1*/\
1358
                "pcmpeqb " #lx ", " #t0 "               \n\t" /* src[+1] > a ? 0 : -1*/\
1358
                "pcmpeqb " #lx ", " #t0 "               \n\t" /* src[+1] > a ? 0 : -1*/\
1359
                "pcmpeqb " #lx ", " #sx "               \n\t" /* src[0]  > a ? 0 : -1*/\
1359
                "pcmpeqb " #lx ", " #sx "               \n\t" /* src[0]  > a ? 0 : -1*/\
Lines 1369-1376 FIND_MIN_MAX((%0, %1, 8)) Link Here
1369
                PMINUB(t1, pplx, t0)\
1369
                PMINUB(t1, pplx, t0)\
1370
                "paddb " #sx ", " #ppsx "               \n\t"\
1370
                "paddb " #sx ", " #ppsx "               \n\t"\
1371
                "paddb " #psx ", " #ppsx "              \n\t"\
1371
                "paddb " #psx ", " #ppsx "              \n\t"\
1372
                "#paddb "MANGLE(b02)", " #ppsx "        \n\t"\
1372
                "#paddb %6, " #ppsx "                   \n\t"\
1373
                "pand "MANGLE(b08)", " #ppsx "          \n\t"\
1373
                "pand %7, " #ppsx "                     \n\t"\
1374
                "pcmpeqb " #lx ", " #ppsx "             \n\t"\
1374
                "pcmpeqb " #lx ", " #ppsx "             \n\t"\
1375
                "pand " #ppsx ", " #pplx "              \n\t"\
1375
                "pand " #ppsx ", " #pplx "              \n\t"\
1376
                "pandn " #dst ", " #ppsx "              \n\t"\
1376
                "pandn " #dst ", " #ppsx "              \n\t"\
Lines 1406-1412 DERING_CORE((%%REGd, %1, 2),(%0, %1, 8) Link Here
1406
DERING_CORE((%0, %1, 8)    ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
1406
DERING_CORE((%0, %1, 8)    ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
1407
1407
1408
                "1:                        \n\t"
1408
                "1:                        \n\t"
1409
                : : "r" (src), "r" ((long)stride), "m" (c->pQPb), "m"(c->pQPb2)
1409
                : : "r" (src), "r" ((long)stride), "m" (c->pQPb), "m"(c->pQPb2), "m" (deringThreshold), "m" (b00), "m" (b02), "m" (b08)
1410
                : "%"REG_a, "%"REG_d, "%"REG_c
1410
                : "%"REG_a, "%"REG_d, "%"REG_c
1411
        );
1411
        );
1412
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1412
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
Lines 2284-2290 static inline void RENAME(tempNoiseReduc Link Here
2284
#else //L1_DIFF
2284
#else //L1_DIFF
2285
#if defined (FAST_L2_DIFF)
2285
#if defined (FAST_L2_DIFF)
2286
                "pcmpeqb %%mm7, %%mm7                   \n\t"
2286
                "pcmpeqb %%mm7, %%mm7                   \n\t"
2287
                "movq "MANGLE(b80)", %%mm6              \n\t"
2287
                "movq %4, %%mm6                         \n\t"
2288
                "pxor %%mm0, %%mm0                      \n\t"
2288
                "pxor %%mm0, %%mm0                      \n\t"
2289
#define REAL_L2_DIFF_CORE(a, b)\
2289
#define REAL_L2_DIFF_CORE(a, b)\
2290
                "movq " #a ", %%mm5                     \n\t"\
2290
                "movq " #a ", %%mm5                     \n\t"\
Lines 2533-2539 L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc Link Here
2533
2533
2534
                "4:                                     \n\t"
2534
                "4:                                     \n\t"
2535
2535
2536
                :: "r" (src), "r" (tempBlured), "r"((long)stride), "m" (tempBluredPast)
2536
                :: "r" (src), "r" (tempBlured), "r"((long)stride), "m" (tempBluredPast), "m" (b80)
2537
                : "%"REG_a, "%"REG_d, "%"REG_c, "memory"
2537
                : "%"REG_a, "%"REG_d, "%"REG_c, "memory"
2538
                );
2538
                );
2539
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
2539
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
Lines 2806-2813 asm volatile( Link Here
2806
                "movq %%mm6, %%mm1                      \n\t"
2806
                "movq %%mm6, %%mm1                      \n\t"
2807
                "psllw $2, %%mm0                        \n\t"
2807
                "psllw $2, %%mm0                        \n\t"
2808
                "psllw $2, %%mm1                        \n\t"
2808
                "psllw $2, %%mm1                        \n\t"
2809
                "paddw "MANGLE(w04)", %%mm0             \n\t"
2809
                "paddw %5, %%mm0                        \n\t"
2810
                "paddw "MANGLE(w04)", %%mm1             \n\t"
2810
                "paddw %5, %%mm1                        \n\t"
2811
2811
2812
#define NEXT\
2812
#define NEXT\
2813
                "movq (%0), %%mm2                       \n\t"\
2813
                "movq (%0), %%mm2                       \n\t"\
Lines 2896-2902 asm volatile( Link Here
2896
                "mov %4, %0                             \n\t" //FIXME
2896
                "mov %4, %0                             \n\t" //FIXME
2897
2897
2898
                : "+&r"(src)
2898
                : "+&r"(src)
2899
                : "r" ((long)step), "m" (c->pQPb), "r"(sums), "g"(src)
2899
                : "r" ((long)step), "m" (c->pQPb), "r"(sums), "g"(src), "m" (w04)
2900
                );
2900
                );
2901
2901
2902
                src+= step; // src points to begin of the 8x8 Block
2902
                src+= step; // src points to begin of the 8x8 Block
Lines 3113-3122 asm volatile( Link Here
3113
                "psubusw %%mm1, %%mm5                   \n\t" // ld
3113
                "psubusw %%mm1, %%mm5                   \n\t" // ld
3114
3114
3115
3115
3116
                "movq "MANGLE(w05)", %%mm2              \n\t" // 5
3116
                "movq %4, %%mm2                         \n\t" // 5
3117
                "pmullw %%mm2, %%mm4                    \n\t"
3117
                "pmullw %%mm2, %%mm4                    \n\t"
3118
                "pmullw %%mm2, %%mm5                    \n\t"
3118
                "pmullw %%mm2, %%mm5                    \n\t"
3119
                "movq "MANGLE(w20)", %%mm2              \n\t" // 32
3119
                "movq %5, %%mm2                         \n\t" // 32
3120
                "paddw %%mm2, %%mm4                     \n\t"
3120
                "paddw %%mm2, %%mm4                     \n\t"
3121
                "paddw %%mm2, %%mm5                     \n\t"
3121
                "paddw %%mm2, %%mm5                     \n\t"
3122
                "psrlw $6, %%mm4                        \n\t"
3122
                "psrlw $6, %%mm4                        \n\t"
Lines 3168-3174 asm volatile( Link Here
3168
                "movq %%mm0, (%0, %1)                   \n\t"
3168
                "movq %%mm0, (%0, %1)                   \n\t"
3169
3169
3170
                : "+r" (temp_src)
3170
                : "+r" (temp_src)
3171
                : "r" ((long)step), "m" (c->pQPb), "m"(eq_mask)
3171
                : "r" ((long)step), "m" (c->pQPb), "m"(eq_mask), "m" (w05), "m" (w20)
3172
                : "%"REG_a, "%"REG_c
3172
                : "%"REG_a, "%"REG_c
3173
                );
3173
                );
3174
        }
3174
        }
Lines 3199-3208 static inline void RENAME(blockCopy)(uin Link Here
3199
        {
3199
        {
3200
#ifdef HAVE_MMX
3200
#ifdef HAVE_MMX
3201
                asm volatile(
3201
                asm volatile(
3202
                        "movq (%%"REG_a"), %%mm2        \n\t" // packedYOffset
3202
                        "movq (%0), %%mm2               \n\t" // packedYOffset
3203
                        "movq 8(%%"REG_a"), %%mm3       \n\t" // packedYScale
3203
                        "movq 8(%0), %%mm3              \n\t" // packedYScale
3204
                        "lea (%2,%4), %%"REG_a"         \n\t"
3205
                        "lea (%3,%5), %%"REG_d"         \n\t"
3206
                        "pxor %%mm4, %%mm4              \n\t"
3204
                        "pxor %%mm4, %%mm4              \n\t"
3207
#ifdef HAVE_MMX2
3205
#ifdef HAVE_MMX2
3208
#define REAL_SCALED_CPY(src1, src2, dst1, dst2)                                                \
3206
#define REAL_SCALED_CPY(src1, src2, dst1, dst2)                                                \
Lines 3258-3279 static inline void RENAME(blockCopy)(uin Link Here
3258
#define SCALED_CPY(src1, src2, dst1, dst2)\
3256
#define SCALED_CPY(src1, src2, dst1, dst2)\
3259
   REAL_SCALED_CPY(src1, src2, dst1, dst2)
3257
   REAL_SCALED_CPY(src1, src2, dst1, dst2)
3260
3258
3261
SCALED_CPY((%2)       , (%2, %4)      , (%3)       , (%3, %5))
3259
SCALED_CPY((%1), (%1, %3), (%2), (%2, %4))
3262
SCALED_CPY((%2, %4, 2), (%%REGa, %4, 2), (%3, %5, 2), (%%REGd, %5, 2))
3260
                        "lea (%1,%3,2), %1              \n\t"
3263
SCALED_CPY((%2, %4, 4), (%%REGa, %4, 4), (%3, %5, 4), (%%REGd, %5, 4))
3261
                        "lea (%2,%4,2), %2              \n\t"
3264
                        "lea (%%"REG_a",%4,4), %%"REG_a"        \n\t"
3262
SCALED_CPY((%1), (%1, %3), (%2), (%2, %4))
3265
                        "lea (%%"REG_d",%5,4), %%"REG_d"        \n\t"
3263
                        "lea (%1,%3,2), %1              \n\t"
3266
SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2))
3264
                        "lea (%2,%4,2), %2              \n\t"
3267
3265
SCALED_CPY((%1), (%1, %3), (%2), (%2, %4))
3268
3266
                        "lea (%1,%3,2), %1              \n\t"
3269
                        : "=&a" (packedOffsetAndScale)
3267
                        "lea (%2,%4,2), %2              \n\t"
3270
                        : "0" (packedOffsetAndScale),
3268
SCALED_CPY((%1), (%1, %3), (%2), (%2, %4))
3271
                        "r"(src),
3269
3272
                        "r"(dst),
3270
                        : "+r" (packedOffsetAndScale),
3273
                        "r" ((long)srcStride),
3271
                          "+r"(src),
3274
                        "r" ((long)dstStride)
3272
                          "+r"(dst)
3275
                        : "%"REG_d
3273
                        : "r" ((long)srcStride),
3276
                                        );
3274
                          "r" ((long)dstStride)
3275
                        : "memory"
3276
                );
3277
#else //HAVE_MMX
3277
#else //HAVE_MMX
3278
        for(i=0; i<8; i++)
3278
        for(i=0; i<8; i++)
3279
                memcpy( &(dst[dstStride*i]),
3279
                memcpy( &(dst[dstStride*i]),
Lines 3284-3291 SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2) Link Here
3284
        {
3284
        {
3285
#ifdef HAVE_MMX
3285
#ifdef HAVE_MMX
3286
        asm volatile(
3286
        asm volatile(
3287
                "lea (%0,%2), %%"REG_a"                 \n\t"
3288
                "lea (%1,%3), %%"REG_d"                 \n\t"
3289
3287
3290
#define REAL_SIMPLE_CPY(src1, src2, dst1, dst2)                              \
3288
#define REAL_SIMPLE_CPY(src1, src2, dst1, dst2)                              \
3291
                "movq " #src1 ", %%mm0          \n\t"\
3289
                "movq " #src1 ", %%mm0          \n\t"\
Lines 3296-3313 SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2) Link Here
3296
#define SIMPLE_CPY(src1, src2, dst1, dst2)\
3294
#define SIMPLE_CPY(src1, src2, dst1, dst2)\
3297
   REAL_SIMPLE_CPY(src1, src2, dst1, dst2)
3295
   REAL_SIMPLE_CPY(src1, src2, dst1, dst2)
3298
3296
3299
SIMPLE_CPY((%0)       , (%0, %2)       , (%1)       , (%1, %3))
3297
SIMPLE_CPY((%0), (%0, %2), (%1), (%1, %3))
3300
SIMPLE_CPY((%0, %2, 2), (%%REGa, %2, 2), (%1, %3, 2), (%%REGd, %3, 2))
3298
                 "lea (%0,%2,2), %0             \n\t"
3301
SIMPLE_CPY((%0, %2, 4), (%%REGa, %2, 4), (%1, %3, 4), (%%REGd, %3, 4))
3299
                 "lea (%1,%3,2), %1             \n\t"
3302
                "lea (%%"REG_a",%2,4), %%"REG_a"        \n\t"
3300
SIMPLE_CPY((%0), (%0, %2), (%1), (%1, %3))
3303
                "lea (%%"REG_d",%3,4), %%"REG_d"        \n\t"
3301
                 "lea (%0,%2,2), %0             \n\t"
3304
SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2))
3302
                 "lea (%1,%3,2), %1             \n\t"
3305
3303
SIMPLE_CPY((%0), (%0, %2), (%1), (%1, %3))
3306
                : : "r" (src),
3304
                 "lea (%0,%2), %0               \n\t"
3307
                "r" (dst),
3305
                 "lea (%1,%3), %1               \n\t"
3308
                "r" ((long)srcStride),
3306
SIMPLE_CPY((%0), (%0, %2), (%1), (%1, %3))
3309
                "r" ((long)dstStride)
3307
3310
                : "%"REG_a, "%"REG_d
3308
                : "+r" (src),
3309
                  "+r" (dst)
3310
                : "r" ((long)srcStride),
3311
                  "r" ((long)dstStride)
3312
                : "memory"
3311
        );
3313
        );
3312
#else //HAVE_MMX
3314
#else //HAVE_MMX
3313
        for(i=0; i<8; i++)
3315
        for(i=0; i<8; i++)
(-)ffmpeg-old/libswscale/rgb2rgb_template.c (-17 / +18 lines)
Lines 1436-1444 static inline void RENAME(rgb24tobgr24)( Link Here
1436
    asm volatile (
1436
    asm volatile (
1437
    "test             %%"REG_a", %%"REG_a"          \n\t"
1437
    "test             %%"REG_a", %%"REG_a"          \n\t"
1438
    "jns                     2f                     \n\t"
1438
    "jns                     2f                     \n\t"
1439
    "movq     "MANGLE(mask24r)", %%mm5              \n\t"
1439
    "movq                    %3, %%mm5              \n\t"
1440
    "movq     "MANGLE(mask24g)", %%mm6              \n\t"
1440
    "movq                    %4, %%mm6              \n\t"
1441
    "movq     "MANGLE(mask24b)", %%mm7              \n\t"
1441
    "movq                    %5, %%mm7              \n\t"
1442
    ASMALIGN(4)
1442
    ASMALIGN(4)
1443
    "1:                                             \n\t"
1443
    "1:                                             \n\t"
1444
    PREFETCH" 32(%1, %%"REG_a")                     \n\t"
1444
    PREFETCH" 32(%1, %%"REG_a")                     \n\t"
Lines 1474-1480 static inline void RENAME(rgb24tobgr24)( Link Here
1474
    " js                     1b                     \n\t"
1474
    " js                     1b                     \n\t"
1475
    "2:                                             \n\t"
1475
    "2:                                             \n\t"
1476
    : "+a" (mmx_size)
1476
    : "+a" (mmx_size)
1477
    : "r" (src-mmx_size), "r"(dst-mmx_size)
1477
    : "r" (src-mmx_size), "r"(dst-mmx_size), "m"(mask24r), "m"(mask24g), "m"(mask24b)
1478
    );
1478
    );
1479
1479
1480
    __asm __volatile(SFENCE:::"memory");
1480
    __asm __volatile(SFENCE:::"memory");
Lines 2152-2159 static inline void RENAME(rgb24toyv12)(c Link Here
2152
        {
2152
        {
2153
            asm volatile(
2153
            asm volatile(
2154
            "mov                        %2, %%"REG_a"   \n\t"
2154
            "mov                        %2, %%"REG_a"   \n\t"
2155
            "movq     "MANGLE(bgr2YCoeff)", %%mm6       \n\t"
2155
            "movq                       %3, %%mm6       \n\t"
2156
            "movq          "MANGLE(w1111)", %%mm5       \n\t"
2156
            "movq                       %4, %%mm5       \n\t"
2157
            "pxor                    %%mm7, %%mm7       \n\t"
2157
            "pxor                    %%mm7, %%mm7       \n\t"
2158
            "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"   \n\t"
2158
            "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"   \n\t"
2159
            ASMALIGN(4)
2159
            ASMALIGN(4)
Lines 2211-2222 static inline void RENAME(rgb24toyv12)(c Link Here
2211
            "psraw                      $7, %%mm4       \n\t"
2211
            "psraw                      $7, %%mm4       \n\t"
2212
2212
2213
            "packuswb                %%mm4, %%mm0       \n\t"
2213
            "packuswb                %%mm4, %%mm0       \n\t"
2214
            "paddusb "MANGLE(bgr2YOffset)", %%mm0       \n\t"
2214
            "paddusb                    %5, %%mm0       \n\t"
2215
2215
2216
            MOVNTQ"                  %%mm0, (%1, %%"REG_a") \n\t"
2216
            MOVNTQ"                  %%mm0, (%1, %%"REG_a") \n\t"
2217
            "add                        $8,      %%"REG_a"  \n\t"
2217
            "add                        $8,      %%"REG_a"  \n\t"
2218
            " js                        1b                  \n\t"
2218
            " js                        1b                  \n\t"
2219
            : : "r" (src+width*3), "r" (ydst+width), "g" (-width)
2219
            : : "r" (src+width*3), "r" (ydst+width), "g" (-width), "m" (bgr2YCoeff), "m" (w1111), "m" (bgr2YOffset)
2220
            : "%"REG_a, "%"REG_d
2220
            : "%"REG_a, "%"REG_d
2221
            );
2221
            );
2222
            ydst += lumStride;
2222
            ydst += lumStride;
Lines 2225-2232 static inline void RENAME(rgb24toyv12)(c Link Here
2225
        src -= srcStride*2;
2225
        src -= srcStride*2;
2226
        asm volatile(
2226
        asm volatile(
2227
        "mov                        %4, %%"REG_a"   \n\t"
2227
        "mov                        %4, %%"REG_a"   \n\t"
2228
        "movq          "MANGLE(w1111)", %%mm5       \n\t"
2228
        "movq                       %5, %%mm5       \n\t"
2229
        "movq     "MANGLE(bgr2UCoeff)", %%mm6       \n\t"
2229
        "movq                       %6, %%mm6       \n\t"
2230
        "pxor                    %%mm7, %%mm7       \n\t"
2230
        "pxor                    %%mm7, %%mm7       \n\t"
2231
        "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"   \n\t"
2231
        "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"   \n\t"
2232
        "add                 %%"REG_d", %%"REG_d"   \n\t"
2232
        "add                 %%"REG_d", %%"REG_d"   \n\t"
Lines 2275-2282 static inline void RENAME(rgb24toyv12)(c Link Here
2275
        "psrlw                      $2, %%mm0       \n\t"
2275
        "psrlw                      $2, %%mm0       \n\t"
2276
        "psrlw                      $2, %%mm2       \n\t"
2276
        "psrlw                      $2, %%mm2       \n\t"
2277
#endif
2277
#endif
2278
        "movq     "MANGLE(bgr2VCoeff)", %%mm1       \n\t"
2278
        "movq                       %7, %%mm1       \n\t"
2279
        "movq     "MANGLE(bgr2VCoeff)", %%mm3       \n\t"
2279
        "movq                       %7, %%mm3       \n\t"
2280
2280
2281
        "pmaddwd                 %%mm0, %%mm1       \n\t"
2281
        "pmaddwd                 %%mm0, %%mm1       \n\t"
2282
        "pmaddwd                 %%mm2, %%mm3       \n\t"
2282
        "pmaddwd                 %%mm2, %%mm3       \n\t"
Lines 2333-2344 static inline void RENAME(rgb24toyv12)(c Link Here
2333
        "paddw                   %%mm1, %%mm5       \n\t"
2333
        "paddw                   %%mm1, %%mm5       \n\t"
2334
        "paddw                   %%mm3, %%mm2       \n\t"
2334
        "paddw                   %%mm3, %%mm2       \n\t"
2335
        "paddw                   %%mm5, %%mm2       \n\t"
2335
        "paddw                   %%mm5, %%mm2       \n\t"
2336
        "movq          "MANGLE(w1111)", %%mm5       \n\t"
2336
        "movq                       %5, %%mm5       \n\t"
2337
        "psrlw                      $2, %%mm4       \n\t"
2337
        "psrlw                      $2, %%mm4       \n\t"
2338
        "psrlw                      $2, %%mm2       \n\t"
2338
        "psrlw                      $2, %%mm2       \n\t"
2339
#endif
2339
#endif
2340
        "movq     "MANGLE(bgr2VCoeff)", %%mm1       \n\t"
2340
        "movq                       %7, %%mm1       \n\t"
2341
        "movq     "MANGLE(bgr2VCoeff)", %%mm3       \n\t"
2341
        "movq                       %7, %%mm3       \n\t"
2342
2342
2343
        "pmaddwd                 %%mm4, %%mm1       \n\t"
2343
        "pmaddwd                 %%mm4, %%mm1       \n\t"
2344
        "pmaddwd                 %%mm2, %%mm3       \n\t"
2344
        "pmaddwd                 %%mm2, %%mm3       \n\t"
Lines 2362-2374 static inline void RENAME(rgb24toyv12)(c Link Here
2362
        "punpckldq               %%mm4, %%mm0           \n\t"
2362
        "punpckldq               %%mm4, %%mm0           \n\t"
2363
        "punpckhdq               %%mm4, %%mm1           \n\t"
2363
        "punpckhdq               %%mm4, %%mm1           \n\t"
2364
        "packsswb                %%mm1, %%mm0           \n\t"
2364
        "packsswb                %%mm1, %%mm0           \n\t"
2365
        "paddb  "MANGLE(bgr2UVOffset)", %%mm0           \n\t"
2365
        "paddb                      %8, %%mm0           \n\t"
2366
        "movd                    %%mm0, (%2, %%"REG_a") \n\t"
2366
        "movd                    %%mm0, (%2, %%"REG_a") \n\t"
2367
        "punpckhdq               %%mm0, %%mm0           \n\t"
2367
        "punpckhdq               %%mm0, %%mm0           \n\t"
2368
        "movd                    %%mm0, (%3, %%"REG_a") \n\t"
2368
        "movd                    %%mm0, (%3, %%"REG_a") \n\t"
2369
        "add                        $4, %%"REG_a"       \n\t"
2369
        "add                        $4, %%"REG_a"       \n\t"
2370
        " js                        1b                  \n\t"
2370
        " js                        1b                  \n\t"
2371
        : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth)
2371
        : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth),
2372
            "m" (w1111), "m" (bgr2UCoeff), "m" (bgr2VCoeff), "m" (bgr2UVOffset)
2372
        : "%"REG_a, "%"REG_d
2373
        : "%"REG_a, "%"REG_d
2373
        );
2374
        );
2374
2375
(-)ffmpeg-old/libswscale/swscale_template.c (-101 / +111 lines)
Lines 233-239 Link Here
233
#define YSCALEYUV2PACKEDX_END                 \
233
#define YSCALEYUV2PACKEDX_END                 \
234
    :: "r" (&c->redDither),                   \
234
    :: "r" (&c->redDither),                   \
235
        "m" (dummy), "m" (dummy), "m" (dummy),\
235
        "m" (dummy), "m" (dummy), "m" (dummy),\
236
        "r" (dest), "m" (dstW)                \
236
        "r" (dest), "m" (dstW),               \
237
        "m" (b5Dither), "m" (g5Dither),       \
238
        "m" (r5Dither), "m" (bF8), "m" (bFC)  \
237
    : "%"REG_a, "%"REG_d, "%"REG_S            \
239
    : "%"REG_a, "%"REG_d, "%"REG_S            \
238
    );
240
    );
239
241
Lines 687-696 Link Here
687
    " jb      1b                \n\t"
689
    " jb      1b                \n\t"
688
#define WRITEBGR32(dst, dstw, index)  REAL_WRITEBGR32(dst, dstw, index)
690
#define WRITEBGR32(dst, dstw, index)  REAL_WRITEBGR32(dst, dstw, index)
689
691
690
#define REAL_WRITEBGR16(dst, dstw, index) \
692
#define REAL_WRITEBGR16(dst, dstw, index, bf8, bfc) \
691
    "pand "MANGLE(bF8)", %%mm2  \n\t" /* B */\
693
    "pand        "#bf8", %%mm2  \n\t" /* B */\
692
    "pand "MANGLE(bFC)", %%mm4  \n\t" /* G */\
694
    "pand        "#bfc", %%mm4  \n\t" /* G */\
693
    "pand "MANGLE(bF8)", %%mm5  \n\t" /* R */\
695
    "pand        "#bf8", %%mm5  \n\t" /* R */\
694
    "psrlq           $3, %%mm2  \n\t"\
696
    "psrlq           $3, %%mm2  \n\t"\
695
\
697
\
696
    "movq         %%mm2, %%mm1  \n\t"\
698
    "movq         %%mm2, %%mm1  \n\t"\
Lines 713-724 Link Here
713
    "add             $8, "#index"   \n\t"\
715
    "add             $8, "#index"   \n\t"\
714
    "cmp        "#dstw", "#index"   \n\t"\
716
    "cmp        "#dstw", "#index"   \n\t"\
715
    " jb             1b             \n\t"
717
    " jb             1b             \n\t"
716
#define WRITEBGR16(dst, dstw, index)  REAL_WRITEBGR16(dst, dstw, index)
718
#define WRITEBGR16(dst, dstw, index, bf8, bfc)  REAL_WRITEBGR16(dst, dstw, index, bf8, bfc)
717
719
718
#define REAL_WRITEBGR15(dst, dstw, index) \
720
#define REAL_WRITEBGR15(dst, dstw, index, bf8) \
719
    "pand "MANGLE(bF8)", %%mm2  \n\t" /* B */\
721
    "pand        "#bf8", %%mm2  \n\t" /* B */\
720
    "pand "MANGLE(bF8)", %%mm4  \n\t" /* G */\
722
    "pand        "#bf8", %%mm4  \n\t" /* G */\
721
    "pand "MANGLE(bF8)", %%mm5  \n\t" /* R */\
723
    "pand        "#bf8", %%mm5  \n\t" /* R */\
722
    "psrlq           $3, %%mm2  \n\t"\
724
    "psrlq           $3, %%mm2  \n\t"\
723
    "psrlq           $1, %%mm5  \n\t"\
725
    "psrlq           $1, %%mm5  \n\t"\
724
\
726
\
Lines 742-750 Link Here
742
    "add             $8, "#index"   \n\t"\
744
    "add             $8, "#index"   \n\t"\
743
    "cmp        "#dstw", "#index"   \n\t"\
745
    "cmp        "#dstw", "#index"   \n\t"\
744
    " jb             1b             \n\t"
746
    " jb             1b             \n\t"
745
#define WRITEBGR15(dst, dstw, index)  REAL_WRITEBGR15(dst, dstw, index)
747
#define WRITEBGR15(dst, dstw, index, bf8)  REAL_WRITEBGR15(dst, dstw, index, bf8)
746
748
747
#define WRITEBGR24OLD(dst, dstw, index) \
749
#define WRITEBGR24OLD(dst, dstw, index, m24a, m24b, m24c) \
748
    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
750
    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
749
    "movq      %%mm2, %%mm1             \n\t" /* B */\
751
    "movq      %%mm2, %%mm1             \n\t" /* B */\
750
    "movq      %%mm5, %%mm6             \n\t" /* R */\
752
    "movq      %%mm5, %%mm6             \n\t" /* R */\
Lines 800-806 Link Here
800
    "cmp     "#dstw", "#index"          \n\t"\
802
    "cmp     "#dstw", "#index"          \n\t"\
801
    " jb          1b                    \n\t"
803
    " jb          1b                    \n\t"
802
804
803
#define WRITEBGR24MMX(dst, dstw, index) \
805
#define WRITEBGR24MMX(dst, dstw, index, m24a, m24b, m24c) \
804
    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
806
    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
805
    "movq      %%mm2, %%mm1     \n\t" /* B */\
807
    "movq      %%mm2, %%mm1     \n\t" /* B */\
806
    "movq      %%mm5, %%mm6     \n\t" /* R */\
808
    "movq      %%mm5, %%mm6     \n\t" /* R */\
Lines 853-862 Link Here
853
    "cmp     "#dstw", "#index"  \n\t"\
855
    "cmp     "#dstw", "#index"  \n\t"\
854
    " jb          1b            \n\t"
856
    " jb          1b            \n\t"
855
857
856
#define WRITEBGR24MMX2(dst, dstw, index) \
858
#define WRITEBGR24MMX2(dst, dstw, index, m24a, m24b, m24c) \
857
    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
859
    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
858
    "movq "MANGLE(M24A)", %%mm0 \n\t"\
860
    "movq "#m24a", %%mm0 \n\t"\
859
    "movq "MANGLE(M24C)", %%mm7 \n\t"\
861
    "movq "#m24c", %%mm7 \n\t"\
860
    "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2  B1 B0 B1 B0 */\
862
    "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2  B1 B0 B1 B0 */\
861
    "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2  G1 G0 G1 G0 */\
863
    "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2  G1 G0 G1 G0 */\
862
    "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0  R1 R0 R1 R0 */\
864
    "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0  R1 R0 R1 R0 */\
Lines 875-881 Link Here
875
    "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3  G4 G3 G4 G3 */\
877
    "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3  G4 G3 G4 G3 */\
876
    "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4  R3 R2 R3 R2 */\
878
    "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4  R3 R2 R3 R2 */\
877
\
879
\
878
    "pand "MANGLE(M24B)", %%mm1 \n\t" /* B5       B4        B3    */\
880
    "pand "#m24b", %%mm1 \n\t" /* B5       B4        B3    */\
879
    "pand   %%mm7, %%mm3        \n\t" /*       G4        G3       */\
881
    "pand   %%mm7, %%mm3        \n\t" /*       G4        G3       */\
880
    "pand   %%mm0, %%mm6        \n\t" /*    R4        R3       R2 */\
882
    "pand   %%mm0, %%mm6        \n\t" /*    R4        R3       R2 */\
881
\
883
\
Lines 889-895 Link Here
889
\
891
\
890
    "pand   %%mm7, %%mm1        \n\t" /*       B7        B6       */\
892
    "pand   %%mm7, %%mm1        \n\t" /*       B7        B6       */\
891
    "pand   %%mm0, %%mm3        \n\t" /*    G7        G6       G5 */\
893
    "pand   %%mm0, %%mm3        \n\t" /*    G7        G6       G5 */\
892
    "pand "MANGLE(M24B)", %%mm6 \n\t" /* R7       R6        R5    */\
894
    "pand "#m24b", %%mm6 \n\t" /* R7       R6        R5    */\
893
\
895
\
894
    "por    %%mm1, %%mm3        \n\t"\
896
    "por    %%mm1, %%mm3        \n\t"\
895
    "por    %%mm3, %%mm6        \n\t"\
897
    "por    %%mm3, %%mm6        \n\t"\
Lines 903-912 Link Here
903
905
904
#ifdef HAVE_MMX2
906
#ifdef HAVE_MMX2
905
#undef WRITEBGR24
907
#undef WRITEBGR24
906
#define WRITEBGR24(dst, dstw, index)  WRITEBGR24MMX2(dst, dstw, index)
908
#define WRITEBGR24(dst, dstw, index, m24a, m24b, m24c)  WRITEBGR24MMX2(dst, dstw, index, m24a, m24b, m24c)
907
#else
909
#else
908
#undef WRITEBGR24
910
#undef WRITEBGR24
909
#define WRITEBGR24(dst, dstw, index)  WRITEBGR24MMX(dst, dstw, index)
911
#define WRITEBGR24(dst, dstw, index, m24a, m24b, m24c)  WRITEBGR24MMX(dst, dstw, index, m24a, m24b, m24c)
910
#endif
912
#endif
911
913
912
#define REAL_WRITEYUY2(dst, dstw, index) \
914
#define REAL_WRITEYUY2(dst, dstw, index) \
Lines 1053-1064 static inline void RENAME(yuv2packedX)(S Link Here
1053
            YSCALEYUV2RGBX
1055
            YSCALEYUV2RGBX
1054
            "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
1056
            "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
1055
            "add %4, %%"REG_c"                        \n\t"
1057
            "add %4, %%"REG_c"                        \n\t"
1056
            WRITEBGR24(%%REGc, %5, %%REGa)
1058
            WRITEBGR24(%%REGc, %5, %%REGa, %6, %7, %8)
1057
1059
1058
1060
1059
            :: "r" (&c->redDither),
1061
            :: "r" (&c->redDither),
1060
               "m" (dummy), "m" (dummy), "m" (dummy),
1062
               "m" (dummy), "m" (dummy), "m" (dummy),
1061
               "r" (dest), "m" (dstW)
1063
               "r" (dest), "m" (dstW), "m" (M24A), "m" (M24B), "m" (M24C)
1062
            : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
1064
            : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
1063
            );
1065
            );
1064
            return;
1066
            return;
Lines 1067-1078 static inline void RENAME(yuv2packedX)(S Link Here
1067
            YSCALEYUV2RGBX
1069
            YSCALEYUV2RGBX
1068
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1070
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1069
#ifdef DITHER1XBPP
1071
#ifdef DITHER1XBPP
1070
            "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
1072
            "paddusb                 %6, %%mm2\n\t"
1071
            "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
1073
            "paddusb                 %7, %%mm4\n\t"
1072
            "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
1074
            "paddusb                 %8, %%mm5\n\t"
1073
#endif
1075
#endif
1074
1076
1075
            WRITEBGR15(%4, %5, %%REGa)
1077
            WRITEBGR15(%4, %5, %%REGa, %9)
1076
            YSCALEYUV2PACKEDX_END
1078
            YSCALEYUV2PACKEDX_END
1077
            return;
1079
            return;
1078
        case PIX_FMT_BGR565:
1080
        case PIX_FMT_BGR565:
Lines 1080-1091 static inline void RENAME(yuv2packedX)(S Link Here
1080
            YSCALEYUV2RGBX
1082
            YSCALEYUV2RGBX
1081
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1083
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1082
#ifdef DITHER1XBPP
1084
#ifdef DITHER1XBPP
1083
            "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
1085
            "paddusb                 %6, %%mm2\n\t"
1084
            "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
1086
            "paddusb                 %7, %%mm4\n\t"
1085
            "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
1087
            "paddusb                 %8, %%mm5\n\t"
1086
#endif
1088
#endif
1087
1089
1088
            WRITEBGR16(%4, %5, %%REGa)
1090
            WRITEBGR16(%4, %5, %%REGa, %9, %10)
1089
            YSCALEYUV2PACKEDX_END
1091
            YSCALEYUV2PACKEDX_END
1090
            return;
1092
            return;
1091
        case PIX_FMT_YUYV422:
1093
        case PIX_FMT_YUYV422:
Lines 1114-1124 static inline void RENAME(yuv2packedX)(S Link Here
1114
            YSCALEYUV2RGBX
1116
            YSCALEYUV2RGBX
1115
            "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"   \n\t" //FIXME optimize
1117
            "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"   \n\t" //FIXME optimize
1116
            "add                        %4, %%"REG_c"   \n\t"
1118
            "add                        %4, %%"REG_c"   \n\t"
1117
            WRITEBGR24(%%REGc, %5, %%REGa)
1119
            WRITEBGR24(%%REGc, %5, %%REGa, %6, %7, %8)
1118
1120
1119
            :: "r" (&c->redDither),
1121
            :: "r" (&c->redDither),
1120
               "m" (dummy), "m" (dummy), "m" (dummy),
1122
               "m" (dummy), "m" (dummy), "m" (dummy),
1121
               "r" (dest),  "m" (dstW)
1123
               "r" (dest),  "m" (dstW), "m" (M24A), "m" (M24B), "m" (M24C)
1122
            : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
1124
            : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
1123
            );
1125
            );
1124
            return;
1126
            return;
Lines 1127-1138 static inline void RENAME(yuv2packedX)(S Link Here
1127
            YSCALEYUV2RGBX
1129
            YSCALEYUV2RGBX
1128
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1130
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1129
#ifdef DITHER1XBPP
1131
#ifdef DITHER1XBPP
1130
            "paddusb "MANGLE(b5Dither)", %%mm2  \n\t"
1132
            "paddusb                 %6, %%mm2\n\t"
1131
            "paddusb "MANGLE(g5Dither)", %%mm4  \n\t"
1133
            "paddusb                 %7, %%mm4\n\t"
1132
            "paddusb "MANGLE(r5Dither)", %%mm5  \n\t"
1134
            "paddusb                 %8, %%mm5\n\t"
1133
#endif
1135
#endif
1134
1136
1135
            WRITEBGR15(%4, %5, %%REGa)
1137
            WRITEBGR15(%4, %5, %%REGa, %9)
1136
            YSCALEYUV2PACKEDX_END
1138
            YSCALEYUV2PACKEDX_END
1137
            return;
1139
            return;
1138
        case PIX_FMT_BGR565:
1140
        case PIX_FMT_BGR565:
Lines 1140-1151 static inline void RENAME(yuv2packedX)(S Link Here
1140
            YSCALEYUV2RGBX
1142
            YSCALEYUV2RGBX
1141
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1143
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1142
#ifdef DITHER1XBPP
1144
#ifdef DITHER1XBPP
1143
            "paddusb "MANGLE(b5Dither)", %%mm2  \n\t"
1145
            "paddusb                 %6, %%mm2\n\t"
1144
            "paddusb "MANGLE(g6Dither)", %%mm4  \n\t"
1146
            "paddusb                 %7, %%mm4\n\t"
1145
            "paddusb "MANGLE(r5Dither)", %%mm5  \n\t"
1147
            "paddusb                 %8, %%mm5\n\t"
1146
#endif
1148
#endif
1147
1149
1148
            WRITEBGR16(%4, %5, %%REGa)
1150
            WRITEBGR16(%4, %5, %%REGa, %9, %10)
1149
            YSCALEYUV2PACKEDX_END
1151
            YSCALEYUV2PACKEDX_END
1150
            return;
1152
            return;
1151
        case PIX_FMT_YUYV422:
1153
        case PIX_FMT_YUYV422:
Lines 1427-1437 FULL_YSCALEYUV2RGB Link Here
1427
                "mov        %4, %%"REG_b"               \n\t"
1429
                "mov        %4, %%"REG_b"               \n\t"
1428
                "push %%"REG_BP"                        \n\t"
1430
                "push %%"REG_BP"                        \n\t"
1429
                YSCALEYUV2RGB(%%REGBP, %5)
1431
                YSCALEYUV2RGB(%%REGBP, %5)
1430
                WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
1432
                WRITEBGR24(%%REGb, 8280(%5), %%REGBP, %6, %7, %8)
1431
                "pop %%"REG_BP"                         \n\t"
1433
                "pop %%"REG_BP"                         \n\t"
1432
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1434
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1433
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1435
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1434
                "a" (&c->redDither)
1436
                "a" (&c->redDither), "m" (M24A), "m" (M24B), "m" (M24C)
1435
                );
1437
                );
1436
                return;
1438
                return;
1437
            case PIX_FMT_BGR555:
1439
            case PIX_FMT_BGR555:
Lines 1442-1458 FULL_YSCALEYUV2RGB Link Here
1442
                YSCALEYUV2RGB(%%REGBP, %5)
1444
                YSCALEYUV2RGB(%%REGBP, %5)
1443
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1445
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1444
#ifdef DITHER1XBPP
1446
#ifdef DITHER1XBPP
1445
                "paddusb "MANGLE(b5Dither)", %%mm2      \n\t"
1447
                "paddusb                 %6, %%mm2      \n\t"
1446
                "paddusb "MANGLE(g5Dither)", %%mm4      \n\t"
1448
                "paddusb                 %7, %%mm4      \n\t"
1447
                "paddusb "MANGLE(r5Dither)", %%mm5      \n\t"
1449
                "paddusb                 %8, %%mm5      \n\t"
1448
#endif
1450
#endif
1449
1451
1450
                WRITEBGR15(%%REGb, 8280(%5), %%REGBP)
1452
                WRITEBGR15(%%REGb, 8280(%5), %%REGBP, %9)
1451
                "pop %%"REG_BP"                         \n\t"
1453
                "pop %%"REG_BP"                         \n\t"
1452
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1454
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1453
1455
1454
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1456
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1455
                "a" (&c->redDither)
1457
                "a" (&c->redDither), "m" (b5Dither),  "m" (g6Dither), "m" (r5Dither),
1458
                "m" (bF8)
1456
                );
1459
                );
1457
                return;
1460
                return;
1458
            case PIX_FMT_BGR565:
1461
            case PIX_FMT_BGR565:
Lines 1463-1478 FULL_YSCALEYUV2RGB Link Here
1463
                YSCALEYUV2RGB(%%REGBP, %5)
1466
                YSCALEYUV2RGB(%%REGBP, %5)
1464
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1467
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1465
#ifdef DITHER1XBPP
1468
#ifdef DITHER1XBPP
1466
                "paddusb "MANGLE(b5Dither)", %%mm2      \n\t"
1469
                "paddusb                 %6, %%mm2      \n\t"
1467
                "paddusb "MANGLE(g6Dither)", %%mm4      \n\t"
1470
                "paddusb                 %7, %%mm4      \n\t"
1468
                "paddusb "MANGLE(r5Dither)", %%mm5      \n\t"
1471
                "paddusb                 %8, %%mm5      \n\t"
1469
#endif
1472
#endif
1470
1473
1471
                WRITEBGR16(%%REGb, 8280(%5), %%REGBP)
1474
                WRITEBGR16(%%REGb, 8280(%5), %%REGBP, %9, %10)
1472
                "pop %%"REG_BP"                         \n\t"
1475
                "pop %%"REG_BP"                         \n\t"
1473
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1476
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1474
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1477
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1475
                "a" (&c->redDither)
1478
                "a" (&c->redDither), "m" (b5Dither),  "m" (g6Dither), "m" (r5Dither),
1479
                "m" (bF8), "m" (bFC)
1476
                );
1480
                );
1477
                return;
1481
                return;
1478
            case PIX_FMT_YUYV422:
1482
            case PIX_FMT_YUYV422:
Lines 1537-1548 static inline void RENAME(yuv2packed1)(S Link Here
1537
            "mov        %4, %%"REG_b"               \n\t"
1541
            "mov        %4, %%"REG_b"               \n\t"
1538
            "push %%"REG_BP"                        \n\t"
1542
            "push %%"REG_BP"                        \n\t"
1539
            YSCALEYUV2RGB1(%%REGBP, %5)
1543
            YSCALEYUV2RGB1(%%REGBP, %5)
1540
            WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
1544
            WRITEBGR24(%%REGb, 8280(%5), %%REGBP, %6, %7, %8)
1541
            "pop %%"REG_BP"                         \n\t"
1545
            "pop %%"REG_BP"                         \n\t"
1542
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1546
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1543
1547
1544
            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1548
            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1545
            "a" (&c->redDither)
1549
            "a" (&c->redDither), "m" (M24A), "m" (M24B), "m" (M24C)
1546
            );
1550
            );
1547
            return;
1551
            return;
1548
        case PIX_FMT_BGR555:
1552
        case PIX_FMT_BGR555:
Lines 1553-1568 static inline void RENAME(yuv2packed1)(S Link Here
1553
            YSCALEYUV2RGB1(%%REGBP, %5)
1557
            YSCALEYUV2RGB1(%%REGBP, %5)
1554
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1558
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1555
#ifdef DITHER1XBPP
1559
#ifdef DITHER1XBPP
1556
            "paddusb "MANGLE(b5Dither)", %%mm2      \n\t"
1560
            "paddusb                 %6, %%mm2      \n\t"
1557
            "paddusb "MANGLE(g5Dither)", %%mm4      \n\t"
1561
            "paddusb                 %7, %%mm4      \n\t"
1558
            "paddusb "MANGLE(r5Dither)", %%mm5      \n\t"
1562
            "paddusb                 %8, %%mm5      \n\t"
1559
#endif
1563
#endif
1560
            WRITEBGR15(%%REGb, 8280(%5), %%REGBP)
1564
            WRITEBGR15(%%REGb, 8280(%5), %%REGBP, %9)
1561
            "pop %%"REG_BP"                         \n\t"
1565
            "pop %%"REG_BP"                         \n\t"
1562
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1566
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1563
1567
1564
            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1568
            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1565
            "a" (&c->redDither)
1569
            "a" (&c->redDither), "m" (b5Dither),  "m" (g6Dither), "m" (r5Dither),
1570
            "m" (bF8)
1566
            );
1571
            );
1567
            return;
1572
            return;
1568
        case PIX_FMT_BGR565:
1573
        case PIX_FMT_BGR565:
Lines 1573-1589 static inline void RENAME(yuv2packed1)(S Link Here
1573
            YSCALEYUV2RGB1(%%REGBP, %5)
1578
            YSCALEYUV2RGB1(%%REGBP, %5)
1574
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1579
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1575
#ifdef DITHER1XBPP
1580
#ifdef DITHER1XBPP
1576
            "paddusb "MANGLE(b5Dither)", %%mm2      \n\t"
1581
            "paddusb                 %6, %%mm2      \n\t"
1577
            "paddusb "MANGLE(g6Dither)", %%mm4      \n\t"
1582
            "paddusb                 %7, %%mm4      \n\t"
1578
            "paddusb "MANGLE(r5Dither)", %%mm5      \n\t"
1583
            "paddusb                 %8, %%mm5      \n\t"
1579
#endif
1584
#endif
1580
1585
1581
            WRITEBGR16(%%REGb, 8280(%5), %%REGBP)
1586
            WRITEBGR16(%%REGb, 8280(%5), %%REGBP, %9, %10)
1582
            "pop %%"REG_BP"                         \n\t"
1587
            "pop %%"REG_BP"                         \n\t"
1583
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1588
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1584
1589
1585
            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1590
            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1586
            "a" (&c->redDither)
1591
            "a" (&c->redDither), "m" (b5Dither),  "m" (g6Dither), "m" (r5Dither),
1592
            "m" (bF8), "m" (bFC)
1587
            );
1593
            );
1588
            return;
1594
            return;
1589
        case PIX_FMT_YUYV422:
1595
        case PIX_FMT_YUYV422:
Lines 1626-1637 static inline void RENAME(yuv2packed1)(S Link Here
1626
            "mov        %4, %%"REG_b"               \n\t"
1632
            "mov        %4, %%"REG_b"               \n\t"
1627
            "push %%"REG_BP"                        \n\t"
1633
            "push %%"REG_BP"                        \n\t"
1628
            YSCALEYUV2RGB1b(%%REGBP, %5)
1634
            YSCALEYUV2RGB1b(%%REGBP, %5)
1629
            WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
1635
            WRITEBGR24(%%REGb, 8280(%5), %%REGBP, %6, %7, %8)
1630
            "pop %%"REG_BP"                         \n\t"
1636
            "pop %%"REG_BP"                         \n\t"
1631
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1637
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1632
1638
1633
            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1639
            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1634
            "a" (&c->redDither)
1640
            "a" (&c->redDither), "m" (M24A), "m" (M24B), "m" (M24C)
1635
            );
1641
            );
1636
            return;
1642
            return;
1637
        case PIX_FMT_BGR555:
1643
        case PIX_FMT_BGR555:
Lines 1642-1657 static inline void RENAME(yuv2packed1)(S Link Here
1642
            YSCALEYUV2RGB1b(%%REGBP, %5)
1648
            YSCALEYUV2RGB1b(%%REGBP, %5)
1643
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1649
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1644
#ifdef DITHER1XBPP
1650
#ifdef DITHER1XBPP
1645
            "paddusb "MANGLE(b5Dither)", %%mm2      \n\t"
1651
            "paddusb                 %6, %%mm2      \n\t"
1646
            "paddusb "MANGLE(g5Dither)", %%mm4      \n\t"
1652
            "paddusb                 %7, %%mm4      \n\t"
1647
            "paddusb "MANGLE(r5Dither)", %%mm5      \n\t"
1653
            "paddusb                 %8, %%mm5      \n\t"
1648
#endif
1654
#endif
1649
            WRITEBGR15(%%REGb, 8280(%5), %%REGBP)
1655
            WRITEBGR15(%%REGb, 8280(%5), %%REGBP, %9)
1650
            "pop %%"REG_BP"                         \n\t"
1656
            "pop %%"REG_BP"                         \n\t"
1651
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1657
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1652
1658
1653
            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1659
            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1654
            "a" (&c->redDither)
1660
            "a" (&c->redDither), "m" (b5Dither),  "m" (g6Dither), "m" (r5Dither),
1661
            "m" (bF8)
1655
            );
1662
            );
1656
            return;
1663
            return;
1657
        case PIX_FMT_BGR565:
1664
        case PIX_FMT_BGR565:
Lines 1662-1678 static inline void RENAME(yuv2packed1)(S Link Here
1662
            YSCALEYUV2RGB1b(%%REGBP, %5)
1669
            YSCALEYUV2RGB1b(%%REGBP, %5)
1663
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1670
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1664
#ifdef DITHER1XBPP
1671
#ifdef DITHER1XBPP
1665
            "paddusb "MANGLE(b5Dither)", %%mm2      \n\t"
1672
            "paddusb                 %6, %%mm2      \n\t"
1666
            "paddusb "MANGLE(g6Dither)", %%mm4      \n\t"
1673
            "paddusb                 %7, %%mm4      \n\t"
1667
            "paddusb "MANGLE(r5Dither)", %%mm5      \n\t"
1674
            "paddusb                 %8, %%mm5      \n\t"
1668
#endif
1675
#endif
1669
1676
1670
            WRITEBGR16(%%REGb, 8280(%5), %%REGBP)
1677
            WRITEBGR16(%%REGb, 8280(%5), %%REGBP, %9, %10)
1671
            "pop %%"REG_BP"                         \n\t"
1678
            "pop %%"REG_BP"                         \n\t"
1672
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1679
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1673
1680
1674
            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1681
            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1675
            "a" (&c->redDither)
1682
            "a" (&c->redDither), "m" (b5Dither),  "m" (g6Dither), "m" (r5Dither),
1683
            "m" (bF8), "m" (bFC)
1676
            );
1684
            );
1677
            return;
1685
            return;
1678
        case PIX_FMT_YUYV422:
1686
        case PIX_FMT_YUYV422:
Lines 1706-1712 static inline void RENAME(yuy2ToY)(uint8 Link Here
1706
{
1714
{
1707
#ifdef HAVE_MMX
1715
#ifdef HAVE_MMX
1708
    asm volatile(
1716
    asm volatile(
1709
    "movq "MANGLE(bm01010101)", %%mm2           \n\t"
1717
    "movq                   %3, %%mm2           \n\t"
1710
    "mov                    %0, %%"REG_a"       \n\t"
1718
    "mov                    %0, %%"REG_a"       \n\t"
1711
    "1:                                         \n\t"
1719
    "1:                                         \n\t"
1712
    "movq    (%1, %%"REG_a",2), %%mm0           \n\t"
1720
    "movq    (%1, %%"REG_a",2), %%mm0           \n\t"
Lines 1717-1723 static inline void RENAME(yuy2ToY)(uint8 Link Here
1717
    "movq                %%mm0, (%2, %%"REG_a") \n\t"
1725
    "movq                %%mm0, (%2, %%"REG_a") \n\t"
1718
    "add                    $8, %%"REG_a"       \n\t"
1726
    "add                    $8, %%"REG_a"       \n\t"
1719
    " js                    1b                  \n\t"
1727
    " js                    1b                  \n\t"
1720
    : : "g" (-width), "r" (src+width*2), "r" (dst+width)
1728
    : : "g" (-width), "r" (src+width*2), "r" (dst+width), "m" (bm01010101)
1721
    : "%"REG_a
1729
    : "%"REG_a
1722
    );
1730
    );
1723
#else
1731
#else
Lines 1731-1737 static inline void RENAME(yuy2ToUV)(uint Link Here
1731
{
1739
{
1732
#ifdef HAVE_MMX
1740
#ifdef HAVE_MMX
1733
    asm volatile(
1741
    asm volatile(
1734
    "movq "MANGLE(bm01010101)", %%mm4           \n\t"
1742
    "movq                   %4, %%mm4           \n\t"
1735
    "mov                    %0, %%"REG_a"       \n\t"
1743
    "mov                    %0, %%"REG_a"       \n\t"
1736
    "1:                                         \n\t"
1744
    "1:                                         \n\t"
1737
    "movq    (%1, %%"REG_a",4), %%mm0           \n\t"
1745
    "movq    (%1, %%"REG_a",4), %%mm0           \n\t"
Lines 1748-1754 static inline void RENAME(yuy2ToUV)(uint Link Here
1748
    "movd                %%mm1, (%2, %%"REG_a") \n\t"
1756
    "movd                %%mm1, (%2, %%"REG_a") \n\t"
1749
    "add                    $4, %%"REG_a"       \n\t"
1757
    "add                    $4, %%"REG_a"       \n\t"
1750
    " js                    1b                  \n\t"
1758
    " js                    1b                  \n\t"
1751
    : : "g" (-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
1759
    : : "g" (-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width), "m" (bm01010101)
1752
    : "%"REG_a
1760
    : "%"REG_a
1753
    );
1761
    );
1754
#else
1762
#else
Lines 1791-1797 static inline void RENAME(uyvyToUV)(uint Link Here
1791
{
1799
{
1792
#ifdef HAVE_MMX
1800
#ifdef HAVE_MMX
1793
    asm volatile(
1801
    asm volatile(
1794
    "movq "MANGLE(bm01010101)", %%mm4           \n\t"
1802
    "movq                   %4, %%mm4           \n\t"
1795
    "mov                    %0, %%"REG_a"       \n\t"
1803
    "mov                    %0, %%"REG_a"       \n\t"
1796
    "1:                                         \n\t"
1804
    "1:                                         \n\t"
1797
    "movq    (%1, %%"REG_a",4), %%mm0           \n\t"
1805
    "movq    (%1, %%"REG_a",4), %%mm0           \n\t"
Lines 1808-1814 static inline void RENAME(uyvyToUV)(uint Link Here
1808
    "movd                %%mm1, (%2, %%"REG_a") \n\t"
1816
    "movd                %%mm1, (%2, %%"REG_a") \n\t"
1809
    "add                    $4, %%"REG_a"       \n\t"
1817
    "add                    $4, %%"REG_a"       \n\t"
1810
    " js                    1b                  \n\t"
1818
    " js                    1b                  \n\t"
1811
    : : "g" (-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
1819
    : : "g" (-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width), "m" (bm01010101)
1812
    : "%"REG_a
1820
    : "%"REG_a
1813
    );
1821
    );
1814
#else
1822
#else
Lines 1859-1866 static inline void RENAME(bgr24ToY)(uint Link Here
1859
#ifdef HAVE_MMX
1867
#ifdef HAVE_MMX
1860
    asm volatile(
1868
    asm volatile(
1861
    "mov                        %2, %%"REG_a"   \n\t"
1869
    "mov                        %2, %%"REG_a"   \n\t"
1862
    "movq     "MANGLE(bgr2YCoeff)", %%mm6       \n\t"
1870
    "movq                       %3, %%mm6       \n\t"
1863
    "movq          "MANGLE(w1111)", %%mm5       \n\t"
1871
    "movq                       %4, %%mm5       \n\t"
1864
    "pxor                    %%mm7, %%mm7       \n\t"
1872
    "pxor                    %%mm7, %%mm7       \n\t"
1865
    "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"   \n\t"
1873
    "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"   \n\t"
1866
    ASMALIGN(4)
1874
    ASMALIGN(4)
Lines 1918-1929 static inline void RENAME(bgr24ToY)(uint Link Here
1918
    "psraw                      $7, %%mm4       \n\t"
1926
    "psraw                      $7, %%mm4       \n\t"
1919
1927
1920
    "packuswb                %%mm4, %%mm0       \n\t"
1928
    "packuswb                %%mm4, %%mm0       \n\t"
1921
    "paddusb "MANGLE(bgr2YOffset)", %%mm0       \n\t"
1929
    "paddusb                    %5, %%mm0       \n\t"
1922
1930
1923
    "movq                    %%mm0, (%1, %%"REG_a") \n\t"
1931
    "movq                    %%mm0, (%1, %%"REG_a") \n\t"
1924
    "add                        $8, %%"REG_a"   \n\t"
1932
    "add                        $8, %%"REG_a"   \n\t"
1925
    " js                        1b              \n\t"
1933
    " js                        1b              \n\t"
1926
    : : "r" (src+width*3), "r" (dst+width), "g" (-width)
1934
    : : "r" (src+width*3), "r" (dst+width), "g" (-width),
1935
        "m" (bgr2YCoeff), "m" (w1111), "m" (bgr2YOffset)
1927
    : "%"REG_a, "%"REG_d
1936
    : "%"REG_a, "%"REG_d
1928
    );
1937
    );
1929
#else
1938
#else
Lines 1944-1951 static inline void RENAME(bgr24ToUV)(uin Link Here
1944
#ifdef HAVE_MMX
1953
#ifdef HAVE_MMX
1945
    asm volatile(
1954
    asm volatile(
1946
    "mov                        %3, %%"REG_a"   \n\t"
1955
    "mov                        %3, %%"REG_a"   \n\t"
1947
    "movq          "MANGLE(w1111)", %%mm5       \n\t"
1956
    "movq                       %4, %%mm5       \n\t"
1948
    "movq     "MANGLE(bgr2UCoeff)", %%mm6       \n\t"
1957
    "movq                       %5, %%mm6       \n\t"
1949
    "pxor                    %%mm7, %%mm7       \n\t"
1958
    "pxor                    %%mm7, %%mm7       \n\t"
1950
    "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"   \n\t"
1959
    "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"   \n\t"
1951
    "add                 %%"REG_d", %%"REG_d"   \n\t"
1960
    "add                 %%"REG_d", %%"REG_d"   \n\t"
Lines 1977-1984 static inline void RENAME(bgr24ToUV)(uin Link Here
1977
    "psrlw                      $1, %%mm0       \n\t"
1986
    "psrlw                      $1, %%mm0       \n\t"
1978
    "psrlw                      $1, %%mm2       \n\t"
1987
    "psrlw                      $1, %%mm2       \n\t"
1979
#endif
1988
#endif
1980
    "movq     "MANGLE(bgr2VCoeff)", %%mm1       \n\t"
1989
    "movq                       %6, %%mm1       \n\t"
1981
    "movq     "MANGLE(bgr2VCoeff)", %%mm3       \n\t"
1990
    "movq                       %6, %%mm3       \n\t"
1982
1991
1983
    "pmaddwd                 %%mm0, %%mm1       \n\t"
1992
    "pmaddwd                 %%mm0, %%mm1       \n\t"
1984
    "pmaddwd                 %%mm2, %%mm3       \n\t"
1993
    "pmaddwd                 %%mm2, %%mm3       \n\t"
Lines 2019-2030 static inline void RENAME(bgr24ToUV)(uin Link Here
2019
    "punpcklbw              %%mm7, %%mm5       \n\t"
2028
    "punpcklbw              %%mm7, %%mm5       \n\t"
2020
    "punpcklbw              %%mm7, %%mm2       \n\t"
2029
    "punpcklbw              %%mm7, %%mm2       \n\t"
2021
    "paddw                  %%mm5, %%mm2       \n\t"
2030
    "paddw                  %%mm5, %%mm2       \n\t"
2022
    "movq         "MANGLE(w1111)", %%mm5       \n\t"
2031
    "movq                      %4, %%mm5       \n\t"
2023
    "psrlw                     $2, %%mm4       \n\t"
2032
    "psrlw                     $2, %%mm4       \n\t"
2024
    "psrlw                     $2, %%mm2       \n\t"
2033
    "psrlw                     $2, %%mm2       \n\t"
2025
#endif
2034
#endif
2026
    "movq    "MANGLE(bgr2VCoeff)", %%mm1       \n\t"
2035
    "movq                      %6, %%mm1       \n\t"
2027
    "movq    "MANGLE(bgr2VCoeff)", %%mm3       \n\t"
2036
    "movq                      %6, %%mm3       \n\t"
2028
2037
2029
    "pmaddwd                %%mm4, %%mm1       \n\t"
2038
    "pmaddwd                %%mm4, %%mm1       \n\t"
2030
    "pmaddwd                %%mm2, %%mm3       \n\t"
2039
    "pmaddwd                %%mm2, %%mm3       \n\t"
Lines 2048-2061 static inline void RENAME(bgr24ToUV)(uin Link Here
2048
    "punpckldq              %%mm4, %%mm0       \n\t"
2057
    "punpckldq              %%mm4, %%mm0       \n\t"
2049
    "punpckhdq              %%mm4, %%mm1       \n\t"
2058
    "punpckhdq              %%mm4, %%mm1       \n\t"
2050
    "packsswb               %%mm1, %%mm0       \n\t"
2059
    "packsswb               %%mm1, %%mm0       \n\t"
2051
    "paddb "MANGLE(bgr2UVOffset)", %%mm0       \n\t"
2060
    "paddb                     %7, %%mm0       \n\t"
2052
2061
2053
    "movd                   %%mm0, (%1, %%"REG_a")  \n\t"
2062
    "movd                   %%mm0, (%1, %%"REG_a")  \n\t"
2054
    "punpckhdq              %%mm0, %%mm0            \n\t"
2063
    "punpckhdq              %%mm0, %%mm0            \n\t"
2055
    "movd                   %%mm0, (%2, %%"REG_a")  \n\t"
2064
    "movd                   %%mm0, (%2, %%"REG_a")  \n\t"
2056
    "add                       $4, %%"REG_a"        \n\t"
2065
    "add                       $4, %%"REG_a"        \n\t"
2057
    " js                       1b                   \n\t"
2066
    " js                       1b                   \n\t"
2058
    : : "r" (src1+width*6), "r" (dstU+width), "r" (dstV+width), "g" (-width)
2067
    : : "r" (src1+width*6), "r" (dstU+width), "r" (dstV+width), "g" (-width),
2068
        "m" (w1111), "m" (bgr2UCoeff), "m" (bgr2VCoeff), "m" (bgr2UVOffset)
2059
    : "%"REG_a, "%"REG_d
2069
    : "%"REG_a, "%"REG_d
2060
    );
2070
    );
2061
#else
2071
#else
Lines 2313-2319 static inline void RENAME(hScale)(int16_ Link Here
2313
        "push            %%"REG_b"              \n\t"
2323
        "push            %%"REG_b"              \n\t"
2314
#endif
2324
#endif
2315
        "pxor                %%mm7, %%mm7       \n\t"
2325
        "pxor                %%mm7, %%mm7       \n\t"
2316
        "movq        "MANGLE(w02)", %%mm6       \n\t"
2326
        "movq                   %5, %%mm6       \n\t"
2317
        "push           %%"REG_BP"              \n\t" // we use 7 regs here ...
2327
        "push           %%"REG_BP"              \n\t" // we use 7 regs here ...
2318
        "mov             %%"REG_a", %%"REG_BP"  \n\t"
2328
        "mov             %%"REG_a", %%"REG_BP"  \n\t"
2319
        ASMALIGN(4)
2329
        ASMALIGN(4)
Lines 2342-2348 static inline void RENAME(hScale)(int16_ Link Here
2342
        "pop             %%"REG_b"              \n\t"
2352
        "pop             %%"REG_b"              \n\t"
2343
#endif
2353
#endif
2344
        : "+a" (counter)
2354
        : "+a" (counter)
2345
        : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
2355
        : "c" (filter), "d" (filterPos), "S" (src), "D" (dst), "m" (w02)
2346
#if !defined(PIC)
2356
#if !defined(PIC)
2347
        : "%"REG_b
2357
        : "%"REG_b
2348
#endif
2358
#endif
Lines 2359-2365 static inline void RENAME(hScale)(int16_ Link Here
2359
        "push             %%"REG_b"             \n\t"
2369
        "push             %%"REG_b"             \n\t"
2360
#endif
2370
#endif
2361
        "pxor                 %%mm7, %%mm7      \n\t"
2371
        "pxor                 %%mm7, %%mm7      \n\t"
2362
        "movq         "MANGLE(w02)", %%mm6      \n\t"
2372
        "movq                    %5, %%mm6      \n\t"
2363
        "push            %%"REG_BP"             \n\t" // we use 7 regs here ...
2373
        "push            %%"REG_BP"             \n\t" // we use 7 regs here ...
2364
        "mov              %%"REG_a", %%"REG_BP" \n\t"
2374
        "mov              %%"REG_a", %%"REG_BP" \n\t"
2365
        ASMALIGN(4)
2375
        ASMALIGN(4)
Lines 2400-2406 static inline void RENAME(hScale)(int16_ Link Here
2400
        "pop              %%"REG_b"             \n\t"
2410
        "pop              %%"REG_b"             \n\t"
2401
#endif
2411
#endif
2402
        : "+a" (counter)
2412
        : "+a" (counter)
2403
        : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
2413
        : "c" (filter), "d" (filterPos), "S" (src), "D" (dst), "m" (w02)
2404
#if !defined(PIC)
2414
#if !defined(PIC)
2405
        : "%"REG_b
2415
        : "%"REG_b
2406
#endif
2416
#endif
Lines 2415-2421 static inline void RENAME(hScale)(int16_ Link Here
2415
        dst-= counter/2;
2425
        dst-= counter/2;
2416
        asm volatile(
2426
        asm volatile(
2417
        "pxor                  %%mm7, %%mm7     \n\t"
2427
        "pxor                  %%mm7, %%mm7     \n\t"
2418
        "movq          "MANGLE(w02)", %%mm6     \n\t"
2428
        "movq                     %7, %%mm6     \n\t"
2419
        ASMALIGN(4)
2429
        ASMALIGN(4)
2420
        "1:                                     \n\t"
2430
        "1:                                     \n\t"
2421
        "mov                      %2, %%"REG_c" \n\t"
2431
        "mov                      %2, %%"REG_c" \n\t"
Lines 2452-2458 static inline void RENAME(hScale)(int16_ Link Here
2452
2462
2453
        : "+r" (counter), "+r" (filter)
2463
        : "+r" (counter), "+r" (filter)
2454
        : "m" (filterPos), "m" (dst), "m"(offset),
2464
        : "m" (filterPos), "m" (dst), "m"(offset),
2455
          "m" (src), "r" (filterSize*2)
2465
          "m" (src), "r" (filterSize*2), "m"(w02)
2456
        : "%"REG_a, "%"REG_c, "%"REG_d
2466
        : "%"REG_a, "%"REG_c, "%"REG_d
2457
        );
2467
        );
2458
    }
2468
    }
(-)ffmpeg-old/libswscale/yuv2rgb_template.c (-26 / +31 lines)
Lines 46-52 Link Here
46
#define SFENCE "/nop"
46
#define SFENCE "/nop"
47
#endif
47
#endif
48
48
49
#define YUV2RGB \
49
#define YUV2RGB(mmx_00ffw) \
50
    /* Do the multiply part of the conversion for even and odd pixels,
50
    /* Do the multiply part of the conversion for even and odd pixels,
51
       register usage:
51
       register usage:
52
       mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels,
52
       mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels,
Lines 75-81 Link Here
75
\
75
\
76
    /* convert the luma part */\
76
    /* convert the luma part */\
77
    "movq %%mm6, %%mm7;" /* Copy 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */\
77
    "movq %%mm6, %%mm7;" /* Copy 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */\
78
    "pand "MANGLE(mmx_00ffw)", %%mm6;" /* get Y even 00 Y6 00 Y4 00 Y2 00 Y0 */\
78
    "pand "#mmx_00ffw", %%mm6;" /* get Y even 00 Y6 00 Y4 00 Y2 00 Y0 */\
79
\
79
\
80
    "psrlw $8, %%mm7;" /* get Y odd 00 Y7 00 Y5 00 Y3 00 Y1 */\
80
    "psrlw $8, %%mm7;" /* get Y odd 00 Y7 00 Y5 00 Y3 00 Y1 */\
81
\
81
\
Lines 163-179 static inline int RENAME(yuv420_rgb16)(S Link Here
163
        PREFETCH" 64(%1) \n\t"
163
        PREFETCH" 64(%1) \n\t"
164
        PREFETCH" 64(%2) \n\t"
164
        PREFETCH" 64(%2) \n\t"
165
        */
165
        */
166
YUV2RGB
166
YUV2RGB(%6)
167
167
168
#ifdef DITHER1XBPP
168
#ifdef DITHER1XBPP
169
        "paddusb "MANGLE(b5Dither)", %%mm0;"
169
        "paddusb                 %7, %%mm0;"
170
        "paddusb "MANGLE(g6Dither)", %%mm2;"
170
        "paddusb                 %8, %%mm2;"
171
        "paddusb "MANGLE(r5Dither)", %%mm1;"
171
        "paddusb                 %9, %%mm1;"
172
#endif
172
#endif
173
        /* mask unneeded bits off */
173
        /* mask unneeded bits off */
174
        "pand "MANGLE(mmx_redmask)", %%mm0;" /* b7b6b5b4 b3_0_0_0 b7b6b5b4 b3_0_0_0 */
174
        "pand                   %10, %%mm0;" /* b7b6b5b4 b3_0_0_0 b7b6b5b4 b3_0_0_0 */
175
        "pand "MANGLE(mmx_grnmask)", %%mm2;" /* g7g6g5g4 g3g2_0_0 g7g6g5g4 g3g2_0_0 */
175
        "pand                   %11, %%mm2;" /* g7g6g5g4 g3g2_0_0 g7g6g5g4 g3g2_0_0 */
176
        "pand "MANGLE(mmx_redmask)", %%mm1;" /* r7r6r5r4 r3_0_0_0 r7r6r5r4 r3_0_0_0 */
176
        "pand                   %10, %%mm1;" /* r7r6r5r4 r3_0_0_0 r7r6r5r4 r3_0_0_0 */
177
177
178
        "psrlw   $3, %%mm0;" /* 0_0_0_b7 b6b5b4b3 0_0_0_b7 b6b5b4b3 */
178
        "psrlw   $3, %%mm0;" /* 0_0_0_b7 b6b5b4b3 0_0_0_b7 b6b5b4b3 */
179
        "pxor %%mm4, %%mm4;" /* zero mm4 */
179
        "pxor %%mm4, %%mm4;" /* zero mm4 */
Lines 208-214 YUV2RGB Link Here
208
        " js  1b        \n\t"
208
        " js  1b        \n\t"
209
209
210
        : "+r" (index), "+r" (_image)
210
        : "+r" (index), "+r" (_image)
211
        : "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index)
211
        : "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index),
212
          "m" (mmx_00ffw), "m" (b5Dither), "m" (g6Dither), "m" (r5Dither),
213
          "m" (mmx_redmask), "m" (mmx_grnmask)
212
        );
214
        );
213
    }
215
    }
214
216
Lines 252-269 static inline int RENAME(yuv420_rgb15)(S Link Here
252
        "movq (%5, %0, 2), %%mm6;" /* Load 8  Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
254
        "movq (%5, %0, 2), %%mm6;" /* Load 8  Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
253
        //".balign 16     \n\t"
255
        //".balign 16     \n\t"
254
        "1:             \n\t"
256
        "1:             \n\t"
255
YUV2RGB
257
YUV2RGB(%6)
256
258
257
#ifdef DITHER1XBPP
259
#ifdef DITHER1XBPP
258
        "paddusb "MANGLE(b5Dither)", %%mm0  \n\t"
260
        "paddusb                 %7, %%mm0  \n\t"
259
        "paddusb "MANGLE(g5Dither)", %%mm2  \n\t"
261
        "paddusb                 %8, %%mm2  \n\t"
260
        "paddusb "MANGLE(r5Dither)", %%mm1  \n\t"
262
        "paddusb                 %9, %%mm1  \n\t"
261
#endif
263
#endif
262
264
263
        /* mask unneeded bits off */
265
        /* mask unneeded bits off */
264
        "pand "MANGLE(mmx_redmask)", %%mm0;" /* b7b6b5b4 b3_0_0_0 b7b6b5b4 b3_0_0_0 */
266
        "pand                   %10, %%mm0;" /* b7b6b5b4 b3_0_0_0 b7b6b5b4 b3_0_0_0 */
265
        "pand "MANGLE(mmx_redmask)", %%mm2;" /* g7g6g5g4 g3_0_0_0 g7g6g5g4 g3_0_0_0 */
267
        "pand                   %10, %%mm2;" /* g7g6g5g4 g3_0_0_0 g7g6g5g4 g3_0_0_0 */
266
        "pand "MANGLE(mmx_redmask)", %%mm1;" /* r7r6r5r4 r3_0_0_0 r7r6r5r4 r3_0_0_0 */
268
        "pand                   %10, %%mm1;" /* r7r6r5r4 r3_0_0_0 r7r6r5r4 r3_0_0_0 */
267
269
268
        "psrlw   $3, %%mm0;" /* 0_0_0_b7 b6b5b4b3 0_0_0_b7 b6b5b4b3 */
270
        "psrlw   $3, %%mm0;" /* 0_0_0_b7 b6b5b4b3 0_0_0_b7 b6b5b4b3 */
269
        "psrlw   $1, %%mm1;" /* 0_r7r6r5  r4r3_0_0 0_r7r6r5 r4r3_0_0 */
271
        "psrlw   $1, %%mm1;" /* 0_r7r6r5  r4r3_0_0 0_r7r6r5 r4r3_0_0 */
Lines 298-304 YUV2RGB Link Here
298
        "add $4, %0             \n\t"
300
        "add $4, %0             \n\t"
299
        " js 1b                 \n\t"
301
        " js 1b                 \n\t"
300
        : "+r" (index), "+r" (_image)
302
        : "+r" (index), "+r" (_image)
301
        : "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index)
303
        : "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index),
304
          "m" (mmx_00ffw), "m" (b5Dither), "m" (g5Dither), "m" (r5Dither),
305
          "m" (mmx_redmask)
302
        );
306
        );
303
    }
307
    }
304
308
Lines 336-346 static inline int RENAME(yuv420_rgb24)(S Link Here
336
        "movq (%5, %0, 2), %%mm6;" /* Load 8  Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
340
        "movq (%5, %0, 2), %%mm6;" /* Load 8  Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
337
        //".balign 16     \n\t"
341
        //".balign 16     \n\t"
338
        "1:             \n\t"
342
        "1:             \n\t"
339
YUV2RGB
343
YUV2RGB(%6)
340
        /* mm0=B, %%mm2=G, %%mm1=R */
344
        /* mm0=B, %%mm2=G, %%mm1=R */
341
#ifdef HAVE_MMX2
345
#ifdef HAVE_MMX2
342
        "movq "MANGLE(M24A)", %%mm4     \n\t"
346
        "movq             %7, %%mm4     \n\t"
343
        "movq "MANGLE(M24C)", %%mm7     \n\t"
347
        "movq             %9, %%mm7     \n\t"
344
        "pshufw $0x50, %%mm0, %%mm5     \n\t" /* B3 B2 B3 B2  B1 B0 B1 B0 */
348
        "pshufw $0x50, %%mm0, %%mm5     \n\t" /* B3 B2 B3 B2  B1 B0 B1 B0 */
345
        "pshufw $0x50, %%mm2, %%mm3     \n\t" /* G3 G2 G3 G2  G1 G0 G1 G0 */
349
        "pshufw $0x50, %%mm2, %%mm3     \n\t" /* G3 G2 G3 G2  G1 G0 G1 G0 */
346
        "pshufw $0x00, %%mm1, %%mm6     \n\t" /* R1 R0 R1 R0  R1 R0 R1 R0 */
350
        "pshufw $0x00, %%mm1, %%mm6     \n\t" /* R1 R0 R1 R0  R1 R0 R1 R0 */
Lines 359-365 YUV2RGB Link Here
359
        "pshufw $0x55, %%mm2, %%mm3     \n\t" /* G4 G3 G4 G3  G4 G3 G4 G3 */
363
        "pshufw $0x55, %%mm2, %%mm3     \n\t" /* G4 G3 G4 G3  G4 G3 G4 G3 */
360
        "pshufw $0xA5, %%mm1, %%mm6     \n\t" /* R5 R4 R5 R4  R3 R2 R3 R2 */
364
        "pshufw $0xA5, %%mm1, %%mm6     \n\t" /* R5 R4 R5 R4  R3 R2 R3 R2 */
361
365
362
        "pand "MANGLE(M24B)", %%mm5     \n\t" /* B5       B4        B3    */
366
        "pand             %8, %%mm5     \n\t" /* B5       B4        B3    */
363
        "pand          %%mm7, %%mm3     \n\t" /*       G4        G3       */
367
        "pand          %%mm7, %%mm3     \n\t" /*       G4        G3       */
364
        "pand          %%mm4, %%mm6     \n\t" /*    R4        R3       R2 */
368
        "pand          %%mm4, %%mm6     \n\t" /*    R4        R3       R2 */
365
369
Lines 374-380 YUV2RGB Link Here
374
378
375
        "pand          %%mm7, %%mm5     \n\t" /*       B7        B6       */
379
        "pand          %%mm7, %%mm5     \n\t" /*       B7        B6       */
376
        "pand          %%mm4, %%mm3     \n\t" /*    G7        G6       G5 */
380
        "pand          %%mm4, %%mm3     \n\t" /*    G7        G6       G5 */
377
        "pand "MANGLE(M24B)", %%mm6     \n\t" /* R7       R6        R5    */
381
        "pand             %8, %%mm6     \n\t" /* R7       R6        R5    */
378
        "movd 4 (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
382
        "movd 4 (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
379
\
383
\
380
        "por          %%mm5, %%mm3      \n\t"
384
        "por          %%mm5, %%mm3      \n\t"
Lines 444-450 YUV2RGB Link Here
444
        " js  1b        \n\t"
448
        " js  1b        \n\t"
445
449
446
        : "+r" (index), "+r" (_image)
450
        : "+r" (index), "+r" (_image)
447
        : "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index)
451
        : "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index),
452
          "m" (mmx_00ffw), "m" (M24A), "m" (M24B), "m" (M24C)
448
        );
453
        );
449
    }
454
    }
450
455
Lines 482-488 static inline int RENAME(yuv420_rgb32)(S Link Here
482
        "movq (%5, %0, 2), %%mm6;" /* Load 8  Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
487
        "movq (%5, %0, 2), %%mm6;" /* Load 8  Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
483
        //".balign 16     \n\t"
488
        //".balign 16     \n\t"
484
        "1:             \n\t"
489
        "1:             \n\t"
485
YUV2RGB
490
YUV2RGB(%6)
486
        /* convert RGB plane to RGB packed format,
491
        /* convert RGB plane to RGB packed format,
487
           mm0 ->  B, mm1 -> R, mm2 -> G, mm3 -> 0,
492
           mm0 ->  B, mm1 -> R, mm2 -> G, mm3 -> 0,
488
           mm4 -> GB, mm5 -> AR pixel 4-7,
493
           mm4 -> GB, mm5 -> AR pixel 4-7,
Lines 530-536 YUV2RGB Link Here
530
        " js  1b        \n\t"
535
        " js  1b        \n\t"
531
536
532
        : "+r" (index), "+r" (_image)
537
        : "+r" (index), "+r" (_image)
533
        : "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index)
538
        : "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index), "m" (mmx_00ffw)
534
        );
539
        );
535
    }
540
    }
536
541

Return to bug 179872