Gentoo Websites Logo
Go to: Gentoo Home Documentation Forums Lists Bugs Planet Store Wiki Get Gentoo!
View | Details | Raw Unified | Return to bug 115568 | Differences between
and this patch

Collapse All | Expand All

(-)libavcodec/Makefile (-1 / +1 lines)
Lines 7-13 Link Here
7
VPATH=$(SRC_PATH)/libavcodec
7
VPATH=$(SRC_PATH)/libavcodec
8
8
9
# NOTE: -I.. is needed to include config.h
9
# NOTE: -I.. is needed to include config.h
10
CFLAGS=$(OPTFLAGS) -DHAVE_AV_CONFIG_H -I.. -I$(SRC_PATH)/libavutil -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_GNU_SOURCE $(AMR_CFLAGS)
10
CFLAGS=$(OPTFLAGS) $(PIC) -DHAVE_AV_CONFIG_H -I.. -I$(SRC_PATH)/libavutil -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_GNU_SOURCE $(AMR_CFLAGS)
11
11
12
OBJS= bitstream.o utils.o mem.o allcodecs.o \
12
OBJS= bitstream.o utils.o mem.o allcodecs.o \
13
      mpegvideo.o jrevdct.o jfdctfst.o jfdctint.o\
13
      mpegvideo.o jrevdct.o jfdctfst.o jfdctint.o\
(-)libavcodec/i386/dsputil_mmx.c (-70 / +71 lines)
Lines 613-648 Link Here
613
          "+m" (*(uint64_t*)(src - 1*stride)),
613
          "+m" (*(uint64_t*)(src - 1*stride)),
614
          "+m" (*(uint64_t*)(src + 0*stride)),
614
          "+m" (*(uint64_t*)(src + 0*stride)),
615
          "+m" (*(uint64_t*)(src + 1*stride))
615
          "+m" (*(uint64_t*)(src + 1*stride))
616
        : "g" (2*strength), "m"(ff_pb_FC)
616
        : "g" (2*(long)strength), "m"(ff_pb_FC)
617
    );
617
    );
618
}
618
}
619
619
620
static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){
620
static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){
621
    long dummy;
621
    asm volatile( //FIXME could save 1 instruction if done as 8x4 ...
622
    asm volatile( //FIXME could save 1 instruction if done as 8x4 ...
622
        "movd  %4, %%mm0		\n\t"
623
        "movd  (%3), %%mm0		\n\t"
623
        "movd  %5, %%mm1		\n\t"
624
        "movd  (%3, %4), %%mm1		\n\t"
624
        "movd  %6, %%mm2		\n\t"
625
        "movd  (%3, %4, 2), %%mm2	\n\t"
625
        "movd  %7, %%mm3		\n\t"
626
        "lea  (%4, %4, 2), %0		\n\t"
627
        "movd  (%3, %0), %%mm3		\n\t"
626
        "punpcklbw %%mm1, %%mm0		\n\t"
628
        "punpcklbw %%mm1, %%mm0		\n\t"
627
        "punpcklbw %%mm3, %%mm2		\n\t"
629
        "punpcklbw %%mm3, %%mm2		\n\t"
628
        "movq %%mm0, %%mm1		\n\t"
630
        "movq %%mm0, %%mm1		\n\t"
629
        "punpcklwd %%mm2, %%mm0		\n\t"
631
        "punpcklwd %%mm2, %%mm0		\n\t"
630
        "punpckhwd %%mm2, %%mm1		\n\t"
632
        "punpckhwd %%mm2, %%mm1		\n\t"
631
        "movd  %%mm0, %0		\n\t"
633
        "movd  %%mm0, (%1)		\n\t"
632
        "punpckhdq %%mm0, %%mm0		\n\t"
634
        "punpckhdq %%mm0, %%mm0		\n\t"
633
        "movd  %%mm0, %1		\n\t"
635
        "movd  %%mm0, (%1, %2)		\n\t"
634
        "movd  %%mm1, %2		\n\t"
636
        "movd  %%mm1, (%1, %2, 2)	\n\t"
635
        "punpckhdq %%mm1, %%mm1		\n\t"
637
        "punpckhdq %%mm1, %%mm1		\n\t"
636
        "movd  %%mm1, %3		\n\t"
638
        "lea  (%2, %2, 2), %0		\n\t"
639
        "movd  %%mm1, (%1, %0)		\n\t"
637
        
640
        
638
        : "=m" (*(uint32_t*)(dst + 0*dst_stride)),
641
        : "=&r" (dummy)
639
          "=m" (*(uint32_t*)(dst + 1*dst_stride)),
642
        :  "r" (dst),
640
          "=m" (*(uint32_t*)(dst + 2*dst_stride)),
643
           "r" ((long)dst_stride),
641
          "=m" (*(uint32_t*)(dst + 3*dst_stride))
644
           "r" (src),
642
        :  "m" (*(uint32_t*)(src + 0*src_stride)),
645
           "r" ((long)src_stride)
643
           "m" (*(uint32_t*)(src + 1*src_stride)),
646
        : "memory"
644
           "m" (*(uint32_t*)(src + 2*src_stride)),
645
           "m" (*(uint32_t*)(src + 3*src_stride))
646
    );
647
    );
647
}
648
}
648
649
Lines 662-668 Link Here
662
          "+m" (temp[1]),
663
          "+m" (temp[1]),
663
          "+m" (temp[2]),
664
          "+m" (temp[2]),
664
          "+m" (temp[3])
665
          "+m" (temp[3])
665
        : "g" (2*strength), "m"(ff_pb_FC)
666
        : "g" (2*(long)strength), "m"(ff_pb_FC)
666
    );
667
    );
667
668
668
    asm volatile(
669
    asm volatile(
Lines 1727-1733 Link Here
1727
1728
1728
#define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\
1729
#define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\
1729
        "paddw " #m4 ", " #m3 "		\n\t" /* x1 */\
1730
        "paddw " #m4 ", " #m3 "		\n\t" /* x1 */\
1730
        "movq "MANGLE(ff_pw_20)", %%mm4		\n\t" /* 20 */\
1731
        "movq %5, %%mm4			\n\t" /* 20 */\
1731
        "pmullw " #m3 ", %%mm4		\n\t" /* 20x1 */\
1732
        "pmullw " #m3 ", %%mm4		\n\t" /* 20x1 */\
1732
        "movq "#in7", " #m3 "		\n\t" /* d */\
1733
        "movq "#in7", " #m3 "		\n\t" /* d */\
1733
        "movq "#in0", %%mm5		\n\t" /* D */\
1734
        "movq "#in0", %%mm5		\n\t" /* D */\
Lines 1739-1745 Link Here
1739
        "paddw " #m5 ", %%mm6		\n\t" /* x2 */\
1740
        "paddw " #m5 ", %%mm6		\n\t" /* x2 */\
1740
        "paddw %%mm6, %%mm6		\n\t" /* 2x2 */\
1741
        "paddw %%mm6, %%mm6		\n\t" /* 2x2 */\
1741
        "psubw %%mm6, %%mm5		\n\t" /* -2x2 + x3 */\
1742
        "psubw %%mm6, %%mm5		\n\t" /* -2x2 + x3 */\
1742
        "pmullw "MANGLE(ff_pw_3)", %%mm5	\n\t" /* -6x2 + 3x3 */\
1743
        "pmullw %6, %%mm5		\n\t" /* -6x2 + 3x3 */\
1743
        "paddw " #rnd ", %%mm4		\n\t" /* x2 */\
1744
        "paddw " #rnd ", %%mm4		\n\t" /* x2 */\
1744
        "paddw %%mm4, %%mm5		\n\t" /* 20x1 - 6x2 + 3x3 - x4 */\
1745
        "paddw %%mm4, %%mm5		\n\t" /* 20x1 - 6x2 + 3x3 - x4 */\
1745
        "psraw $5, %%mm5		\n\t"\
1746
        "psraw $5, %%mm5		\n\t"\
Lines 1773-1787 Link Here
1773
        "paddw %%mm5, %%mm5		\n\t" /* 2b */\
1774
        "paddw %%mm5, %%mm5		\n\t" /* 2b */\
1774
        "psubw %%mm5, %%mm6		\n\t" /* c - 2b */\
1775
        "psubw %%mm5, %%mm6		\n\t" /* c - 2b */\
1775
        "pshufw $0x06, %%mm0, %%mm5	\n\t" /* 0C0B0A0A */\
1776
        "pshufw $0x06, %%mm0, %%mm5	\n\t" /* 0C0B0A0A */\
1776
        "pmullw "MANGLE(ff_pw_3)", %%mm6		\n\t" /* 3c - 6b */\
1777
        "pmullw %6, %%mm6		\n\t" /* 3c - 6b */\
1777
        "paddw %%mm4, %%mm0		\n\t" /* a */\
1778
        "paddw %%mm4, %%mm0		\n\t" /* a */\
1778
        "paddw %%mm1, %%mm5		\n\t" /* d */\
1779
        "paddw %%mm1, %%mm5		\n\t" /* d */\
1779
        "pmullw "MANGLE(ff_pw_20)", %%mm0		\n\t" /* 20a */\
1780
        "pmullw %5, %%mm0		\n\t" /* 20a */\
1780
        "psubw %%mm5, %%mm0		\n\t" /* 20a - d */\
1781
        "psubw %%mm5, %%mm0		\n\t" /* 20a - d */\
1781
        "paddw %6, %%mm6		\n\t"\
1782
        "paddw %8, %%mm6		\n\t"\
1782
        "paddw %%mm6, %%mm0		\n\t" /* 20a - 6b + 3c - d */\
1783
        "paddw %%mm6, %%mm0		\n\t" /* 20a - 6b + 3c - d */\
1783
        "psraw $5, %%mm0		\n\t"\
1784
        "psraw $5, %%mm0		\n\t"\
1784
        "movq %%mm0, %5			\n\t"\
1785
        "movq %%mm0, %7			\n\t"\
1785
        /* mm1=EFGH, mm2=DEFG, mm3=CDEF, mm4=BCDE, mm7=0 */\
1786
        /* mm1=EFGH, mm2=DEFG, mm3=CDEF, mm4=BCDE, mm7=0 */\
1786
        \
1787
        \
1787
        "movq 5(%0), %%mm0		\n\t" /* FGHIJKLM */\
1788
        "movq 5(%0), %%mm0		\n\t" /* FGHIJKLM */\
Lines 1799-1813 Link Here
1799
        "psrlq $24, %%mm6		\n\t" /* IJKLM000 */\
1800
        "psrlq $24, %%mm6		\n\t" /* IJKLM000 */\
1800
        "punpcklbw %%mm7, %%mm2		\n\t" /* 0F0G0H0I */\
1801
        "punpcklbw %%mm7, %%mm2		\n\t" /* 0F0G0H0I */\
1801
        "punpcklbw %%mm7, %%mm6		\n\t" /* 0I0J0K0L */\
1802
        "punpcklbw %%mm7, %%mm6		\n\t" /* 0I0J0K0L */\
1802
        "pmullw "MANGLE(ff_pw_3)", %%mm3		\n\t" /* 3c - 6b */\
1803
        "pmullw %6, %%mm3		\n\t" /* 3c - 6b */\
1803
        "paddw %%mm2, %%mm1		\n\t" /* a */\
1804
        "paddw %%mm2, %%mm1		\n\t" /* a */\
1804
        "paddw %%mm6, %%mm4		\n\t" /* d */\
1805
        "paddw %%mm6, %%mm4		\n\t" /* d */\
1805
        "pmullw "MANGLE(ff_pw_20)", %%mm1		\n\t" /* 20a */\
1806
        "pmullw %5, %%mm1		\n\t" /* 20a */\
1806
        "psubw %%mm4, %%mm3		\n\t" /* - 6b +3c - d */\
1807
        "psubw %%mm4, %%mm3		\n\t" /* - 6b +3c - d */\
1807
        "paddw %6, %%mm1		\n\t"\
1808
        "paddw %8, %%mm1		\n\t"\
1808
        "paddw %%mm1, %%mm3		\n\t" /* 20a - 6b +3c - d */\
1809
        "paddw %%mm1, %%mm3		\n\t" /* 20a - 6b +3c - d */\
1809
        "psraw $5, %%mm3		\n\t"\
1810
        "psraw $5, %%mm3		\n\t"\
1810
        "movq %5, %%mm1			\n\t"\
1811
        "movq %7, %%mm1			\n\t"\
1811
        "packuswb %%mm3, %%mm1		\n\t"\
1812
        "packuswb %%mm3, %%mm1		\n\t"\
1812
        OP_MMX2(%%mm1, (%1),%%mm4, q)\
1813
        OP_MMX2(%%mm1, (%1),%%mm4, q)\
1813
        /* mm0= GHIJ, mm2=FGHI, mm5=HIJK, mm6=IJKL, mm7=0 */\
1814
        /* mm0= GHIJ, mm2=FGHI, mm5=HIJK, mm6=IJKL, mm7=0 */\
Lines 1825-1831 Link Here
1825
        "psubw %%mm5, %%mm0		\n\t" /* c - 2b */\
1826
        "psubw %%mm5, %%mm0		\n\t" /* c - 2b */\
1826
        "movq %%mm3, %%mm5		\n\t" /* JKLMNOPQ */\
1827
        "movq %%mm3, %%mm5		\n\t" /* JKLMNOPQ */\
1827
        "psrlq $24, %%mm3		\n\t" /* MNOPQ000 */\
1828
        "psrlq $24, %%mm3		\n\t" /* MNOPQ000 */\
1828
        "pmullw "MANGLE(ff_pw_3)", %%mm0		\n\t" /* 3c - 6b */\
1829
        "pmullw %6, %%mm0		\n\t" /* 3c - 6b */\
1829
        "punpcklbw %%mm7, %%mm3		\n\t" /* 0M0N0O0P */\
1830
        "punpcklbw %%mm7, %%mm3		\n\t" /* 0M0N0O0P */\
1830
        "paddw %%mm3, %%mm2		\n\t" /* d */\
1831
        "paddw %%mm3, %%mm2		\n\t" /* d */\
1831
        "psubw %%mm2, %%mm0		\n\t" /* -6b + 3c - d */\
1832
        "psubw %%mm2, %%mm0		\n\t" /* -6b + 3c - d */\
Lines 1833-1840 Link Here
1833
        "punpcklbw %%mm7, %%mm2		\n\t" /* 0J0K0L0M */\
1834
        "punpcklbw %%mm7, %%mm2		\n\t" /* 0J0K0L0M */\
1834
        "punpckhbw %%mm7, %%mm5		\n\t" /* 0N0O0P0Q */\
1835
        "punpckhbw %%mm7, %%mm5		\n\t" /* 0N0O0P0Q */\
1835
        "paddw %%mm2, %%mm6		\n\t" /* a */\
1836
        "paddw %%mm2, %%mm6		\n\t" /* a */\
1836
        "pmullw "MANGLE(ff_pw_20)", %%mm6		\n\t" /* 20a */\
1837
        "pmullw %5, %%mm6		\n\t" /* 20a */\
1837
        "paddw %6, %%mm0		\n\t"\
1838
        "paddw %8, %%mm0		\n\t"\
1838
        "paddw %%mm6, %%mm0		\n\t" /* 20a - 6b + 3c - d */\
1839
        "paddw %%mm6, %%mm0		\n\t" /* 20a - 6b + 3c - d */\
1839
        "psraw $5, %%mm0		\n\t"\
1840
        "psraw $5, %%mm0		\n\t"\
1840
        /* mm1=KLMN, mm2=JKLM, mm3=MNOP, mm4=LMNO, mm5=NOPQ mm7=0 */\
1841
        /* mm1=KLMN, mm2=JKLM, mm3=MNOP, mm4=LMNO, mm5=NOPQ mm7=0 */\
Lines 1848-1857 Link Here
1848
        "paddw %%mm2, %%mm5		\n\t" /* d */\
1849
        "paddw %%mm2, %%mm5		\n\t" /* d */\
1849
        "paddw %%mm6, %%mm6		\n\t" /* 2b */\
1850
        "paddw %%mm6, %%mm6		\n\t" /* 2b */\
1850
        "psubw %%mm6, %%mm4		\n\t" /* c - 2b */\
1851
        "psubw %%mm6, %%mm4		\n\t" /* c - 2b */\
1851
        "pmullw "MANGLE(ff_pw_20)", %%mm3		\n\t" /* 20a */\
1852
        "pmullw %5, %%mm3		\n\t" /* 20a */\
1852
        "pmullw "MANGLE(ff_pw_3)", %%mm4		\n\t" /* 3c - 6b */\
1853
        "pmullw %6, %%mm4		\n\t" /* 3c - 6b */\
1853
        "psubw %%mm5, %%mm3		\n\t" /* -6b + 3c - d */\
1854
        "psubw %%mm5, %%mm3		\n\t" /* -6b + 3c - d */\
1854
        "paddw %6, %%mm4		\n\t"\
1855
        "paddw %8, %%mm4		\n\t"\
1855
        "paddw %%mm3, %%mm4		\n\t" /* 20a - 6b + 3c - d */\
1856
        "paddw %%mm3, %%mm4		\n\t" /* 20a - 6b + 3c - d */\
1856
        "psraw $5, %%mm4		\n\t"\
1857
        "psraw $5, %%mm4		\n\t"\
1857
        "packuswb %%mm4, %%mm0		\n\t"\
1858
        "packuswb %%mm4, %%mm0		\n\t"\
Lines 1862-1868 Link Here
1862
        "decl %2			\n\t"\
1863
        "decl %2			\n\t"\
1863
        " jnz 1b				\n\t"\
1864
        " jnz 1b				\n\t"\
1864
        : "+a"(src), "+c"(dst), "+m"(h)\
1865
        : "+a"(src), "+c"(dst), "+m"(h)\
1865
        : "d"((long)srcStride), "S"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\
1866
        : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_20), "m"(ff_pw_3), "m"(temp), "m"(ROUNDER)\
1866
        : "memory"\
1867
        : "memory"\
1867
    );\
1868
    );\
1868
}\
1869
}\
Lines 1940-1951 Link Here
1940
        "paddw %%mm5, %%mm5		\n\t" /* 2b */\
1941
        "paddw %%mm5, %%mm5		\n\t" /* 2b */\
1941
        "psubw %%mm5, %%mm6		\n\t" /* c - 2b */\
1942
        "psubw %%mm5, %%mm6		\n\t" /* c - 2b */\
1942
        "pshufw $0x06, %%mm0, %%mm5	\n\t" /* 0C0B0A0A */\
1943
        "pshufw $0x06, %%mm0, %%mm5	\n\t" /* 0C0B0A0A */\
1943
        "pmullw "MANGLE(ff_pw_3)", %%mm6		\n\t" /* 3c - 6b */\
1944
        "pmullw %6, %%mm6		\n\t" /* 3c - 6b */\
1944
        "paddw %%mm4, %%mm0		\n\t" /* a */\
1945
        "paddw %%mm4, %%mm0		\n\t" /* a */\
1945
        "paddw %%mm1, %%mm5		\n\t" /* d */\
1946
        "paddw %%mm1, %%mm5		\n\t" /* d */\
1946
        "pmullw "MANGLE(ff_pw_20)", %%mm0		\n\t" /* 20a */\
1947
        "pmullw %5, %%mm0		\n\t" /* 20a */\
1947
        "psubw %%mm5, %%mm0		\n\t" /* 20a - d */\
1948
        "psubw %%mm5, %%mm0		\n\t" /* 20a - d */\
1948
        "paddw %6, %%mm6		\n\t"\
1949
        "paddw %8, %%mm6		\n\t"\
1949
        "paddw %%mm6, %%mm0		\n\t" /* 20a - 6b + 3c - d */\
1950
        "paddw %%mm6, %%mm0		\n\t" /* 20a - 6b + 3c - d */\
1950
        "psraw $5, %%mm0		\n\t"\
1951
        "psraw $5, %%mm0		\n\t"\
1951
        /* mm1=EFGH, mm2=DEFG, mm3=CDEF, mm4=BCDE, mm7=0 */\
1952
        /* mm1=EFGH, mm2=DEFG, mm3=CDEF, mm4=BCDE, mm7=0 */\
Lines 1961-1970 Link Here
1961
        "paddw %%mm5, %%mm4		\n\t" /* d */\
1962
        "paddw %%mm5, %%mm4		\n\t" /* d */\
1962
        "paddw %%mm2, %%mm2		\n\t" /* 2b */\
1963
        "paddw %%mm2, %%mm2		\n\t" /* 2b */\
1963
        "psubw %%mm2, %%mm3		\n\t" /* c - 2b */\
1964
        "psubw %%mm2, %%mm3		\n\t" /* c - 2b */\
1964
        "pmullw "MANGLE(ff_pw_20)", %%mm1		\n\t" /* 20a */\
1965
        "pmullw %5, %%mm1		\n\t" /* 20a */\
1965
        "pmullw "MANGLE(ff_pw_3)", %%mm3		\n\t" /* 3c - 6b */\
1966
        "pmullw %6, %%mm3		\n\t" /* 3c - 6b */\
1966
        "psubw %%mm4, %%mm3		\n\t" /* -6b + 3c - d */\
1967
        "psubw %%mm4, %%mm3		\n\t" /* -6b + 3c - d */\
1967
        "paddw %6, %%mm1		\n\t"\
1968
        "paddw %8, %%mm1		\n\t"\
1968
        "paddw %%mm1, %%mm3		\n\t" /* 20a - 6b + 3c - d */\
1969
        "paddw %%mm1, %%mm3		\n\t" /* 20a - 6b + 3c - d */\
1969
        "psraw $5, %%mm3		\n\t"\
1970
        "psraw $5, %%mm3		\n\t"\
1970
        "packuswb %%mm3, %%mm0		\n\t"\
1971
        "packuswb %%mm3, %%mm0		\n\t"\
Lines 1975-1981 Link Here
1975
        "decl %2			\n\t"\
1976
        "decl %2			\n\t"\
1976
        " jnz 1b			\n\t"\
1977
        " jnz 1b			\n\t"\
1977
        : "+a"(src), "+c"(dst), "+m"(h)\
1978
        : "+a"(src), "+c"(dst), "+m"(h)\
1978
        : "S"((long)srcStride), "D"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\
1979
        : "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_20), "m"(ff_pw_3), "m"(temp), "m"(ROUNDER)\
1979
        : "memory"\
1980
        : "memory"\
1980
    );\
1981
    );\
1981
}\
1982
}\
Lines 2054-2092 Link Here
2054
        "movq 8(%0), %%mm1		\n\t"\
2055
        "movq 8(%0), %%mm1		\n\t"\
2055
        "movq 16(%0), %%mm2		\n\t"\
2056
        "movq 16(%0), %%mm2		\n\t"\
2056
        "movq 24(%0), %%mm3		\n\t"\
2057
        "movq 24(%0), %%mm3		\n\t"\
2057
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0),  8(%0),   (%0), 32(%0), (%1), OP)\
2058
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %7, 16(%0),  8(%0),   (%0), 32(%0), (%1), OP)\
2058
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5,  8(%0),   (%0),   (%0), 40(%0), (%1, %3), OP)\
2059
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %7,  8(%0),   (%0),   (%0), 40(%0), (%1, %3), OP)\
2059
        "add %4, %1			\n\t"\
2060
        "add %4, %1			\n\t"\
2060
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5,   (%0),   (%0),  8(%0), 48(%0), (%1), OP)\
2061
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %7,   (%0),   (%0),  8(%0), 48(%0), (%1), OP)\
2061
        \
2062
        \
2062
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5,   (%0),  8(%0), 16(%0), 56(%0), (%1, %3), OP)\
2063
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %7,   (%0),  8(%0), 16(%0), 56(%0), (%1, %3), OP)\
2063
        "add %4, %1			\n\t"\
2064
        "add %4, %1			\n\t"\
2064
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5,  8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
2065
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %7,  8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
2065
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 72(%0), (%1, %3), OP)\
2066
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %7, 16(%0), 24(%0), 32(%0), 72(%0), (%1, %3), OP)\
2066
        "add %4, %1			\n\t"\
2067
        "add %4, %1			\n\t"\
2067
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 80(%0), (%1), OP)\
2068
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %7, 24(%0), 32(%0), 40(%0), 80(%0), (%1), OP)\
2068
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 88(%0), (%1, %3), OP)\
2069
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %7, 32(%0), 40(%0), 48(%0), 88(%0), (%1, %3), OP)\
2069
        "add %4, %1			\n\t"\
2070
        "add %4, %1			\n\t"\
2070
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 40(%0), 48(%0), 56(%0), 96(%0), (%1), OP)\
2071
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %7, 40(%0), 48(%0), 56(%0), 96(%0), (%1), OP)\
2071
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 48(%0), 56(%0), 64(%0),104(%0), (%1, %3), OP)\
2072
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %7, 48(%0), 56(%0), 64(%0),104(%0), (%1, %3), OP)\
2072
        "add %4, %1			\n\t"\
2073
        "add %4, %1			\n\t"\
2073
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 56(%0), 64(%0), 72(%0),112(%0), (%1), OP)\
2074
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %7, 56(%0), 64(%0), 72(%0),112(%0), (%1), OP)\
2074
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 64(%0), 72(%0), 80(%0),120(%0), (%1, %3), OP)\
2075
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %7, 64(%0), 72(%0), 80(%0),120(%0), (%1, %3), OP)\
2075
        "add %4, %1			\n\t"\
2076
        "add %4, %1			\n\t"\
2076
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 72(%0), 80(%0), 88(%0),128(%0), (%1), OP)\
2077
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %7, 72(%0), 80(%0), 88(%0),128(%0), (%1), OP)\
2077
        \
2078
        \
2078
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 80(%0), 88(%0), 96(%0),128(%0), (%1, %3), OP)\
2079
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %7, 80(%0), 88(%0), 96(%0),128(%0), (%1, %3), OP)\
2079
        "add %4, %1			\n\t"  \
2080
        "add %4, %1			\n\t"  \
2080
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 88(%0), 96(%0),104(%0),120(%0), (%1), OP)\
2081
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %7, 88(%0), 96(%0),104(%0),120(%0), (%1), OP)\
2081
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 96(%0),104(%0),112(%0),112(%0), (%1, %3), OP)\
2082
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %7, 96(%0),104(%0),112(%0),112(%0), (%1, %3), OP)\
2082
        \
2083
        \
2083
        "add $136, %0			\n\t"\
2084
        "add $136, %0			\n\t"\
2084
        "add %6, %1			\n\t"\
2085
        "add %8, %1			\n\t"\
2085
        "decl %2			\n\t"\
2086
        "decl %2			\n\t"\
2086
        " jnz 1b			\n\t"\
2087
        " jnz 1b			\n\t"\
2087
        \
2088
        \
2088
        : "+r"(temp_ptr), "+r"(dst), "+g"(count)\
2089
        : "+r"(temp_ptr), "+r"(dst), "+g"(count)\
2089
        : "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-14*(long)dstStride)\
2090
        : "r"((long)dstStride), "r"(2*(long)dstStride), "m"(ff_pw_20), "m"(ff_pw_3), "m"(ROUNDER), "g"(4-14*(long)dstStride)\
2090
        :"memory"\
2091
        :"memory"\
2091
    );\
2092
    );\
2092
}\
2093
}\
Lines 2126-2152 Link Here
2126
        "movq 8(%0), %%mm1		\n\t"\
2127
        "movq 8(%0), %%mm1		\n\t"\
2127
        "movq 16(%0), %%mm2		\n\t"\
2128
        "movq 16(%0), %%mm2		\n\t"\
2128
        "movq 24(%0), %%mm3		\n\t"\
2129
        "movq 24(%0), %%mm3		\n\t"\
2129
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0),  8(%0),   (%0), 32(%0), (%1), OP)\
2130
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %7, 16(%0),  8(%0),   (%0), 32(%0), (%1), OP)\
2130
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5,  8(%0),   (%0),   (%0), 40(%0), (%1, %3), OP)\
2131
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %7,  8(%0),   (%0),   (%0), 40(%0), (%1, %3), OP)\
2131
        "add %4, %1			\n\t"\
2132
        "add %4, %1			\n\t"\
2132
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5,   (%0),   (%0),  8(%0), 48(%0), (%1), OP)\
2133
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %7,   (%0),   (%0),  8(%0), 48(%0), (%1), OP)\
2133
        \
2134
        \
2134
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5,   (%0),  8(%0), 16(%0), 56(%0), (%1, %3), OP)\
2135
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %7,   (%0),  8(%0), 16(%0), 56(%0), (%1, %3), OP)\
2135
        "add %4, %1			\n\t"\
2136
        "add %4, %1			\n\t"\
2136
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5,  8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
2137
        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %7,  8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
2137
        \
2138
        \
2138
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 64(%0), (%1, %3), OP)\
2139
        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %7, 16(%0), 24(%0), 32(%0), 64(%0), (%1, %3), OP)\
2139
        "add %4, %1			\n\t"\
2140
        "add %4, %1			\n\t"\
2140
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 56(%0), (%1), OP)\
2141
        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %7, 24(%0), 32(%0), 40(%0), 56(%0), (%1), OP)\
2141
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 48(%0), (%1, %3), OP)\
2142
        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %7, 32(%0), 40(%0), 48(%0), 48(%0), (%1, %3), OP)\
2142
                \
2143
                \
2143
        "add $72, %0			\n\t"\
2144
        "add $72, %0			\n\t"\
2144
        "add %6, %1			\n\t"\
2145
        "add %8, %1			\n\t"\
2145
        "decl %2			\n\t"\
2146
        "decl %2			\n\t"\
2146
        " jnz 1b			\n\t"\
2147
        " jnz 1b			\n\t"\
2147
         \
2148
         \
2148
        : "+r"(temp_ptr), "+r"(dst), "+g"(count)\
2149
        : "+r"(temp_ptr), "+r"(dst), "+g"(count)\
2149
        : "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-6*(long)dstStride)\
2150
        : "r"((long)dstStride), "r"(2*(long)dstStride), "m"(ff_pw_20), "m"(ff_pw_3), "m"(ROUNDER), "g"(4-6*(long)dstStride)\
2150
        : "memory"\
2151
        : "memory"\
2151
   );\
2152
   );\
2152
}\
2153
}\
(-)libavcodec/i386/motion_est_mmx.c (-2 / +2 lines)
Lines 119-125 Link Here
119
    long len= -(stride*h);
119
    long len= -(stride*h);
120
    asm volatile(
120
    asm volatile(
121
        ".balign 16			\n\t"
121
        ".balign 16			\n\t"
122
        "movq "MANGLE(bone)", %%mm5	\n\t"
122
        "movq %5, %%mm5			\n\t"
123
        "1:				\n\t"
123
        "1:				\n\t"
124
        "movq (%1, %%"REG_a"), %%mm0	\n\t"
124
        "movq (%1, %%"REG_a"), %%mm0	\n\t"
125
        "movq (%2, %%"REG_a"), %%mm2	\n\t"
125
        "movq (%2, %%"REG_a"), %%mm2	\n\t"
Lines 147-153 Link Here
147
        "add %4, %%"REG_a"		\n\t"
147
        "add %4, %%"REG_a"		\n\t"
148
        " js 1b				\n\t"
148
        " js 1b				\n\t"
149
        : "+a" (len)
149
        : "+a" (len)
150
        : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len), "r" ((long)stride)
150
        : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len), "r" ((long)stride), "m" (bone)
151
    );
151
    );
152
}
152
}
153
153
(-)libavcodec/i386/mpegvideo_mmx_template.c (-10 / +10 lines)
Lines 103-109 Link Here
103
    if((s->out_format == FMT_H263 || s->out_format == FMT_H261) && s->mpeg_quant==0){
103
    if((s->out_format == FMT_H263 || s->out_format == FMT_H261) && s->mpeg_quant==0){
104
    
104
    
105
        asm volatile(
105
        asm volatile(
106
            "movd %%"REG_a", %%mm3		\n\t" // last_non_zero_p1
106
            "movd %6, %%mm3			\n\t" // last_non_zero_p1
107
            SPREADW(%%mm3)
107
            SPREADW(%%mm3)
108
            "pxor %%mm7, %%mm7			\n\t" // 0
108
            "pxor %%mm7, %%mm7			\n\t" // 0
109
            "pxor %%mm4, %%mm4			\n\t" // 0
109
            "pxor %%mm4, %%mm4			\n\t" // 0
Lines 132-142 Link Here
132
            "add $8, %%"REG_a"			\n\t"
132
            "add $8, %%"REG_a"			\n\t"
133
            " js 1b				\n\t"
133
            " js 1b				\n\t"
134
	    PMAX(%%mm3, %%mm0)
134
	    PMAX(%%mm3, %%mm0)
135
            "movd %%mm3, %%"REG_a"		\n\t"
135
            "movd %%mm3, %0			\n\t"
136
            "movzb %%al, %%"REG_a"		\n\t" // last_non_zero_p1
136
            "movzb %b0, %0			\n\t" // last_non_zero_p1
137
	    : "+a" (last_non_zero_p1)
137
	    : "=q" (last_non_zero_p1)
138
            : "r" (block+64), "r" (qmat), "r" (bias),
138
            : "r" (block+64), "r" (qmat), "r" (bias),
139
              "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
139
              "r" (inv_zigzag_direct16+64), "r" (temp_block+64), "m" (last_non_zero_p1)
140
        );
140
        );
141
        // note the asm is split cuz gcc doesnt like that many operands ...
141
        // note the asm is split cuz gcc doesnt like that many operands ...
142
        asm volatile(
142
        asm volatile(
Lines 150-156 Link Here
150
        );
150
        );
151
    }else{ // FMT_H263
151
    }else{ // FMT_H263
152
        asm volatile(
152
        asm volatile(
153
            "movd %%"REG_a", %%mm3		\n\t" // last_non_zero_p1
153
            "movd %6, %%mm3			\n\t" // last_non_zero_p1
154
            SPREADW(%%mm3)
154
            SPREADW(%%mm3)
155
            "pxor %%mm7, %%mm7			\n\t" // 0
155
            "pxor %%mm7, %%mm7			\n\t" // 0
156
            "pxor %%mm4, %%mm4			\n\t" // 0
156
            "pxor %%mm4, %%mm4			\n\t" // 0
Lines 178-188 Link Here
178
            "add $8, %%"REG_a"			\n\t"
178
            "add $8, %%"REG_a"			\n\t"
179
            " js 1b				\n\t"
179
            " js 1b				\n\t"
180
	    PMAX(%%mm3, %%mm0)
180
	    PMAX(%%mm3, %%mm0)
181
            "movd %%mm3, %%"REG_a"		\n\t"
181
            "movd %%mm3, %0			\n\t"
182
            "movzb %%al, %%"REG_a"		\n\t" // last_non_zero_p1
182
            "movzb %b0, %0			\n\t" // last_non_zero_p1
183
	    : "+a" (last_non_zero_p1)
183
	    : "=q" (last_non_zero_p1)
184
            : "r" (block+64), "r" (qmat+64), "r" (bias+64),
184
            : "r" (block+64), "r" (qmat+64), "r" (bias+64),
185
              "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
185
              "r" (inv_zigzag_direct16+64), "r" (temp_block+64), "m" (last_non_zero_p1)
186
        );
186
        );
187
        // note the asm is split cuz gcc doesnt like that many operands ...
187
        // note the asm is split cuz gcc doesnt like that many operands ...
188
        asm volatile(
188
        asm volatile(
(-)libavcodec/i386/simple_idct_mmx.c (-4 / +4 lines)
Lines 363-369 Link Here
363
	"movq " #src4 ", %%mm1			\n\t" /* R6	R2	r6	r2 */\
363
	"movq " #src4 ", %%mm1			\n\t" /* R6	R2	r6	r2 */\
364
	"movq " #src1 ", %%mm2			\n\t" /* R3	R1	r3	r1 */\
364
	"movq " #src1 ", %%mm2			\n\t" /* R3	R1	r3	r1 */\
365
	"movq " #src5 ", %%mm3			\n\t" /* R7	R5	r7	r5 */\
365
	"movq " #src5 ", %%mm3			\n\t" /* R7	R5	r7	r5 */\
366
	"movq "MANGLE(wm1010)", %%mm4		\n\t"\
366
	"movq %3, %%mm4				\n\t"\
367
	"pand %%mm0, %%mm4			\n\t"\
367
	"pand %%mm0, %%mm4			\n\t"\
368
	"por %%mm1, %%mm4			\n\t"\
368
	"por %%mm1, %%mm4			\n\t"\
369
	"por %%mm2, %%mm4			\n\t"\
369
	"por %%mm2, %%mm4			\n\t"\
Lines 471-477 Link Here
471
	"movq " #src4 ", %%mm1			\n\t" /* R6	R2	r6	r2 */\
471
	"movq " #src4 ", %%mm1			\n\t" /* R6	R2	r6	r2 */\
472
	"movq " #src1 ", %%mm2			\n\t" /* R3	R1	r3	r1 */\
472
	"movq " #src1 ", %%mm2			\n\t" /* R3	R1	r3	r1 */\
473
	"movq " #src5 ", %%mm3			\n\t" /* R7	R5	r7	r5 */\
473
	"movq " #src5 ", %%mm3			\n\t" /* R7	R5	r7	r5 */\
474
	"movq "MANGLE(wm1010)", %%mm4		\n\t"\
474
	"movq %3, %%mm4				\n\t"\
475
	"pand %%mm0, %%mm4			\n\t"\
475
	"pand %%mm0, %%mm4			\n\t"\
476
	"por %%mm1, %%mm4			\n\t"\
476
	"por %%mm1, %%mm4			\n\t"\
477
	"por %%mm2, %%mm4			\n\t"\
477
	"por %%mm2, %%mm4			\n\t"\
Lines 545-551 Link Here
545
	"jmp 2f					\n\t"\
545
	"jmp 2f					\n\t"\
546
	"1:					\n\t"\
546
	"1:					\n\t"\
547
	"pslld $16, %%mm0			\n\t"\
547
	"pslld $16, %%mm0			\n\t"\
548
	"paddd "MANGLE(d40000)", %%mm0		\n\t"\
548
	"paddd %4, %%mm0			\n\t"\
549
	"psrad $13, %%mm0			\n\t"\
549
	"psrad $13, %%mm0			\n\t"\
550
	"packssdw %%mm0, %%mm0			\n\t"\
550
	"packssdw %%mm0, %%mm0			\n\t"\
551
	"movq %%mm0, " #dst "			\n\t"\
551
	"movq %%mm0, " #dst "			\n\t"\
Lines 1290-1296 Link Here
1290
*/
1290
*/
1291
1291
1292
"9: \n\t"
1292
"9: \n\t"
1293
		:: "r" (block), "r" (temp), "r" (coeffs)
1293
		:: "r" (block), "r" (temp), "r" (coeffs), "m" (wm1010), "m" (d40000)
1294
		: "%eax"
1294
		: "%eax"
1295
	);
1295
	);
1296
}
1296
}
(-)libavcodec/liba52/resample_mmx.c (-23 / +23 lines)
Lines 16-25 Link Here
16
    int32_t * f = (int32_t *) _f;
16
    int32_t * f = (int32_t *) _f;
17
	asm volatile(
17
	asm volatile(
18
		"movl $-512, %%esi		\n\t"
18
		"movl $-512, %%esi		\n\t"
19
		"movq "MANGLE(magicF2W)", %%mm7	\n\t"
19
		"movq %2, %%mm7			\n\t"
20
		"movq "MANGLE(wm1100)", %%mm3	\n\t"
20
		"movq %3, %%mm3			\n\t"
21
		"movq "MANGLE(wm0101)", %%mm4	\n\t"
21
		"movq %4, %%mm4			\n\t"
22
		"movq "MANGLE(wm1010)", %%mm5	\n\t"
22
		"movq %5, %%mm5			\n\t"
23
		"pxor %%mm6, %%mm6		\n\t"
23
		"pxor %%mm6, %%mm6		\n\t"
24
		"1:				\n\t"
24
		"1:				\n\t"
25
		"movq (%1, %%esi, 2), %%mm0	\n\t"
25
		"movq (%1, %%esi, 2), %%mm0	\n\t"
Lines 43-49 Link Here
43
		"addl $8, %%esi			\n\t"
43
		"addl $8, %%esi			\n\t"
44
		" jnz 1b			\n\t"
44
		" jnz 1b			\n\t"
45
		"emms				\n\t"
45
		"emms				\n\t"
46
		:: "r" (s16+1280), "r" (f+256)
46
		:: "r" (s16+1280), "r" (f+256), "m" (magicF2W), "m" (wm1100), "m" (wm0101), "m" (wm1010)
47
		:"%esi", "%edi", "memory"
47
		:"%esi", "%edi", "memory"
48
	);
48
	);
49
    return 5*256;
49
    return 5*256;
Lines 71-77 Link Here
71
	);*/
71
	);*/
72
	asm volatile(
72
	asm volatile(
73
		"movl $-1024, %%esi		\n\t"
73
		"movl $-1024, %%esi		\n\t"
74
		"movq "MANGLE(magicF2W)", %%mm7	\n\t"
74
		"movq %2, %%mm7			\n\t"
75
		"1:				\n\t"
75
		"1:				\n\t"
76
		"movq (%1, %%esi), %%mm0	\n\t"
76
		"movq (%1, %%esi), %%mm0	\n\t"
77
		"movq 8(%1, %%esi), %%mm1	\n\t"
77
		"movq 8(%1, %%esi), %%mm1	\n\t"
Lines 91-97 Link Here
91
		"addl $16, %%esi		\n\t"
91
		"addl $16, %%esi		\n\t"
92
		" jnz 1b			\n\t"
92
		" jnz 1b			\n\t"
93
		"emms				\n\t"
93
		"emms				\n\t"
94
		:: "r" (s16+512), "r" (f+256)
94
		:: "r" (s16+512), "r" (f+256), "m" (magicF2W)
95
		:"%esi", "memory"
95
		:"%esi", "memory"
96
	);
96
	);
97
    return 2*256;
97
    return 2*256;
Lines 101-107 Link Here
101
    int32_t * f = (int32_t *) _f;
101
    int32_t * f = (int32_t *) _f;
102
	asm volatile(
102
	asm volatile(
103
		"movl $-1024, %%esi		\n\t"
103
		"movl $-1024, %%esi		\n\t"
104
		"movq "MANGLE(magicF2W)", %%mm7	\n\t"
104
		"movq %2, %%mm7			\n\t"
105
		"pxor %%mm6, %%mm6		\n\t"
105
		"pxor %%mm6, %%mm6		\n\t"
106
		"movq %%mm7, %%mm5		\n\t"
106
		"movq %%mm7, %%mm5		\n\t"
107
		"punpckldq %%mm6, %%mm5		\n\t"
107
		"punpckldq %%mm6, %%mm5		\n\t"
Lines 146-152 Link Here
146
		"addl $16, %%esi		\n\t"
146
		"addl $16, %%esi		\n\t"
147
		" jnz 1b			\n\t"
147
		" jnz 1b			\n\t"
148
		"emms				\n\t"
148
		"emms				\n\t"
149
		:: "r" (s16+1280), "r" (f+256)
149
		:: "r" (s16+1280), "r" (f+256), "m" (magicF2W)
150
		:"%esi", "%edi", "memory"
150
		:"%esi", "%edi", "memory"
151
	);
151
	);
152
    return 5*256;
152
    return 5*256;
Lines 156-162 Link Here
156
    int32_t * f = (int32_t *) _f;
156
    int32_t * f = (int32_t *) _f;
157
	asm volatile(
157
	asm volatile(
158
		"movl $-1024, %%esi		\n\t"
158
		"movl $-1024, %%esi		\n\t"
159
		"movq "MANGLE(magicF2W)", %%mm7	\n\t"
159
		"movq %2, %%mm7			\n\t"
160
		"1:				\n\t"
160
		"1:				\n\t"
161
		"movq (%1, %%esi), %%mm0	\n\t"
161
		"movq (%1, %%esi), %%mm0	\n\t"
162
		"movq 8(%1, %%esi), %%mm1	\n\t"
162
		"movq 8(%1, %%esi), %%mm1	\n\t"
Lines 197-203 Link Here
197
		"addl $16, %%esi		\n\t"
197
		"addl $16, %%esi		\n\t"
198
		" jnz 1b			\n\t"
198
		" jnz 1b			\n\t"
199
		"emms				\n\t"
199
		"emms				\n\t"
200
		:: "r" (s16+1024), "r" (f+256)
200
		:: "r" (s16+1024), "r" (f+256), "m" (magicF2W)
201
		:"%esi", "memory"
201
		:"%esi", "memory"
202
	);
202
	);
203
    return 4*256;
203
    return 4*256;
Lines 207-213 Link Here
207
    int32_t * f = (int32_t *) _f;
207
    int32_t * f = (int32_t *) _f;
208
	asm volatile(
208
	asm volatile(
209
		"movl $-1024, %%esi		\n\t"
209
		"movl $-1024, %%esi		\n\t"
210
		"movq "MANGLE(magicF2W)", %%mm7	\n\t"
210
		"movq %2, %%mm7			\n\t"
211
		"1:				\n\t"
211
		"1:				\n\t"
212
		"movd (%1, %%esi), %%mm0	\n\t"
212
		"movd (%1, %%esi), %%mm0	\n\t"
213
		"punpckldq 2048(%1, %%esi), %%mm0\n\t"
213
		"punpckldq 2048(%1, %%esi), %%mm0\n\t"
Lines 256-262 Link Here
256
		"addl $16, %%esi		\n\t"
256
		"addl $16, %%esi		\n\t"
257
		" jnz 1b			\n\t"
257
		" jnz 1b			\n\t"
258
		"emms				\n\t"
258
		"emms				\n\t"
259
		:: "r" (s16+1280), "r" (f+256)
259
		:: "r" (s16+1280), "r" (f+256), "m" (magicF2W)
260
		:"%esi", "%edi", "memory"
260
		:"%esi", "%edi", "memory"
261
	);
261
	);
262
    return 5*256;
262
    return 5*256;
Lines 266-272 Link Here
266
    int32_t * f = (int32_t *) _f;
266
    int32_t * f = (int32_t *) _f;
267
	asm volatile(
267
	asm volatile(
268
		"movl $-1024, %%esi		\n\t"
268
		"movl $-1024, %%esi		\n\t"
269
		"movq "MANGLE(magicF2W)", %%mm7	\n\t"
269
		"movq %2, %%mm7			\n\t"
270
		"pxor %%mm6, %%mm6		\n\t"
270
		"pxor %%mm6, %%mm6		\n\t"
271
		"1:				\n\t"
271
		"1:				\n\t"
272
		"movq 1024(%1, %%esi), %%mm0	\n\t"
272
		"movq 1024(%1, %%esi), %%mm0	\n\t"
Lines 296-302 Link Here
296
		"addl $16, %%esi		\n\t"
296
		"addl $16, %%esi		\n\t"
297
		" jnz 1b			\n\t"
297
		" jnz 1b			\n\t"
298
		"emms				\n\t"
298
		"emms				\n\t"
299
		:: "r" (s16+1536), "r" (f+256)
299
		:: "r" (s16+1536), "r" (f+256), "m" (magicF2W)
300
		:"%esi", "%edi", "memory"
300
		:"%esi", "%edi", "memory"
301
	);
301
	);
302
    return 6*256;
302
    return 6*256;
Lines 306-312 Link Here
306
    int32_t * f = (int32_t *) _f;
306
    int32_t * f = (int32_t *) _f;
307
	asm volatile(
307
	asm volatile(
308
		"movl $-1024, %%esi		\n\t"
308
		"movl $-1024, %%esi		\n\t"
309
		"movq "MANGLE(magicF2W)", %%mm7	\n\t"
309
		"movq %2, %%mm7			\n\t"
310
		"pxor %%mm6, %%mm6		\n\t"
310
		"pxor %%mm6, %%mm6		\n\t"
311
		"1:				\n\t"
311
		"1:				\n\t"
312
		"movq 1024(%1, %%esi), %%mm0	\n\t"
312
		"movq 1024(%1, %%esi), %%mm0	\n\t"
Lines 334-340 Link Here
334
		"addl $8, %%esi			\n\t"
334
		"addl $8, %%esi			\n\t"
335
		" jnz 1b			\n\t"
335
		" jnz 1b			\n\t"
336
		"emms				\n\t"
336
		"emms				\n\t"
337
		:: "r" (s16+1536), "r" (f+256)
337
		:: "r" (s16+1536), "r" (f+256), "m" (magicF2W)
338
		:"%esi", "%edi", "memory"
338
		:"%esi", "%edi", "memory"
339
	);
339
	);
340
    return 6*256;
340
    return 6*256;
Lines 344-350 Link Here
344
    int32_t * f = (int32_t *) _f;
344
    int32_t * f = (int32_t *) _f;
345
	asm volatile(
345
	asm volatile(
346
		"movl $-1024, %%esi		\n\t"
346
		"movl $-1024, %%esi		\n\t"
347
		"movq "MANGLE(magicF2W)", %%mm7	\n\t"
347
		"movq %2, %%mm7			\n\t"
348
		"pxor %%mm6, %%mm6		\n\t"
348
		"pxor %%mm6, %%mm6		\n\t"
349
		"1:				\n\t"
349
		"1:				\n\t"
350
		"movq 1024(%1, %%esi), %%mm0	\n\t"
350
		"movq 1024(%1, %%esi), %%mm0	\n\t"
Lines 374-380 Link Here
374
		"addl $8, %%esi			\n\t"
374
		"addl $8, %%esi			\n\t"
375
		" jnz 1b			\n\t"
375
		" jnz 1b			\n\t"
376
		"emms				\n\t"
376
		"emms				\n\t"
377
		:: "r" (s16+1536), "r" (f+256)
377
		:: "r" (s16+1536), "r" (f+256), "m" (magicF2W)
378
		:"%esi", "%edi", "memory"
378
		:"%esi", "%edi", "memory"
379
	);
379
	);
380
    return 6*256;
380
    return 6*256;
Lines 384-390 Link Here
384
    int32_t * f = (int32_t *) _f;
384
    int32_t * f = (int32_t *) _f;
385
	asm volatile(
385
	asm volatile(
386
		"movl $-1024, %%esi		\n\t"
386
		"movl $-1024, %%esi		\n\t"
387
		"movq "MANGLE(magicF2W)", %%mm7	\n\t"
387
		"movq %2, %%mm7			\n\t"
388
//		"pxor %%mm6, %%mm6		\n\t"
388
//		"pxor %%mm6, %%mm6		\n\t"
389
		"1:				\n\t"
389
		"1:				\n\t"
390
		"movq 1024(%1, %%esi), %%mm0	\n\t"
390
		"movq 1024(%1, %%esi), %%mm0	\n\t"
Lines 420-426 Link Here
420
		"addl $8, %%esi			\n\t"
420
		"addl $8, %%esi			\n\t"
421
		" jnz 1b			\n\t"
421
		" jnz 1b			\n\t"
422
		"emms				\n\t"
422
		"emms				\n\t"
423
		:: "r" (s16+1536), "r" (f+256)
423
		:: "r" (s16+1536), "r" (f+256), "m" (magicF2W)
424
		:"%esi", "%edi", "memory"
424
		:"%esi", "%edi", "memory"
425
	);
425
	);
426
    return 6*256;
426
    return 6*256;
Lines 430-436 Link Here
430
    int32_t * f = (int32_t *) _f;
430
    int32_t * f = (int32_t *) _f;
431
	asm volatile(
431
	asm volatile(
432
		"movl $-1024, %%esi		\n\t"
432
		"movl $-1024, %%esi		\n\t"
433
		"movq "MANGLE(magicF2W)", %%mm7	\n\t"
433
		"movq %2, %%mm7			\n\t"
434
//		"pxor %%mm6, %%mm6		\n\t"
434
//		"pxor %%mm6, %%mm6		\n\t"
435
		"1:				\n\t"
435
		"1:				\n\t"
436
		"movq 1024(%1, %%esi), %%mm0	\n\t"
436
		"movq 1024(%1, %%esi), %%mm0	\n\t"
Lines 468-474 Link Here
468
		"addl $8, %%esi			\n\t"
468
		"addl $8, %%esi			\n\t"
469
		" jnz 1b			\n\t"
469
		" jnz 1b			\n\t"
470
		"emms				\n\t"
470
		"emms				\n\t"
471
		:: "r" (s16+1536), "r" (f+256)
471
		:: "r" (s16+1536), "r" (f+256), "m" (magicF2W)
472
		:"%esi", "%edi", "memory"
472
		:"%esi", "%edi", "memory"
473
	);
473
	);
474
    return 6*256;
474
    return 6*256;
(-)libavcodec/libpostproc/Makefile (-1 / +1 lines)
Lines 20-26 Link Here
20
PPOBJS=postprocess.o
20
PPOBJS=postprocess.o
21
SPPOBJS=postprocess_pic.o
21
SPPOBJS=postprocess_pic.o
22
22
23
CFLAGS  = $(OPTFLAGS) $(MLIB_INC) -I. -I.. $(EXTRA_INC)
23
CFLAGS  = $(OPTFLAGS) $(PIC) $(MLIB_INC) -I. -I.. $(EXTRA_INC)
24
# -I/usr/X11R6/include/
24
# -I/usr/X11R6/include/
25
25
26
.SUFFIXES: .c .o
26
.SUFFIXES: .c .o
(-)libavcodec/libpostproc/postprocess_template.c (-62 / +57 lines)
Lines 515-521 Link Here
515
                "paddusb %%mm0, %%mm0				\n\t"
515
                "paddusb %%mm0, %%mm0				\n\t"
516
		"psubusb %%mm0, %%mm4				\n\t"
516
		"psubusb %%mm0, %%mm4				\n\t"
517
		"pcmpeqb %%mm7, %%mm4				\n\t" // d <= QP ? -1 : 0
517
		"pcmpeqb %%mm7, %%mm4				\n\t" // d <= QP ? -1 : 0
518
		"psubusb "MANGLE(b01)", %%mm3			\n\t"
518
		"psubusb %3, %%mm3				\n\t"
519
		"pand %%mm4, %%mm3				\n\t" // d <= QP ? d : 0
519
		"pand %%mm4, %%mm3				\n\t" // d <= QP ? d : 0
520
520
521
		PAVGB(%%mm7, %%mm3)				      // d/2
521
		PAVGB(%%mm7, %%mm3)				      // d/2
Lines 564-570 Link Here
564
		"movq %%mm0, (%%"REG_c", %1, 2)			\n\t" // line 7
564
		"movq %%mm0, (%%"REG_c", %1, 2)			\n\t" // line 7
565
565
566
		:
566
		:
567
		: "r" (src), "r" ((long)stride), "m" (co->pQPb)
567
		: "r" (src), "r" ((long)stride), "m" (co->pQPb), "m" (b01)
568
		: "%"REG_a, "%"REG_c
568
		: "%"REG_a, "%"REG_c
569
	);
569
	);
570
#else
570
#else
Lines 697-713 Link Here
697
697
698
		PMINUB(%%mm2, %%mm1, %%mm4)			      // MIN(|lenergy|,|renergy|)/8
698
		PMINUB(%%mm2, %%mm1, %%mm4)			      // MIN(|lenergy|,|renergy|)/8
699
		"movq %2, %%mm4					\n\t" // QP //FIXME QP+1 ?
699
		"movq %2, %%mm4					\n\t" // QP //FIXME QP+1 ?
700
		"paddusb "MANGLE(b01)", %%mm4			\n\t"
700
		"paddusb %3, %%mm4				\n\t"
701
		"pcmpgtb %%mm3, %%mm4				\n\t" // |menergy|/8 < QP
701
		"pcmpgtb %%mm3, %%mm4				\n\t" // |menergy|/8 < QP
702
		"psubusb %%mm1, %%mm3				\n\t" // d=|menergy|/8-MIN(|lenergy|,|renergy|)/8
702
		"psubusb %%mm1, %%mm3				\n\t" // d=|menergy|/8-MIN(|lenergy|,|renergy|)/8
703
		"pand %%mm4, %%mm3				\n\t"
703
		"pand %%mm4, %%mm3				\n\t"
704
704
705
		"movq %%mm3, %%mm1				\n\t"
705
		"movq %%mm3, %%mm1				\n\t"
706
//		"psubusb "MANGLE(b01)", %%mm3			\n\t"
706
//		"psubusb %3, %%mm3				\n\t"
707
		PAVGB(%%mm7, %%mm3)
707
		PAVGB(%%mm7, %%mm3)
708
		PAVGB(%%mm7, %%mm3)
708
		PAVGB(%%mm7, %%mm3)
709
		"paddusb %%mm1, %%mm3				\n\t"
709
		"paddusb %%mm1, %%mm3				\n\t"
710
//		"paddusb "MANGLE(b01)", %%mm3			\n\t"
710
//		"paddusb %3, %%mm3				\n\t"
711
711
712
		"movq (%%"REG_a", %1, 2), %%mm6			\n\t" //l3
712
		"movq (%%"REG_a", %1, 2), %%mm6			\n\t" //l3
713
		"movq (%0, %1, 4), %%mm5			\n\t" //l4
713
		"movq (%0, %1, 4), %%mm5			\n\t" //l4
Lines 720-726 Link Here
720
		"pand %%mm0, %%mm3				\n\t"
720
		"pand %%mm0, %%mm3				\n\t"
721
		PMINUB(%%mm5, %%mm3, %%mm0)
721
		PMINUB(%%mm5, %%mm3, %%mm0)
722
722
723
		"psubusb "MANGLE(b01)", %%mm3			\n\t"
723
		"psubusb %3, %%mm3				\n\t"
724
		PAVGB(%%mm7, %%mm3)
724
		PAVGB(%%mm7, %%mm3)
725
725
726
		"movq (%%"REG_a", %1, 2), %%mm0			\n\t"
726
		"movq (%%"REG_a", %1, 2), %%mm0			\n\t"
Lines 752-758 Link Here
752
		"movq (%%"REG_a", %1), %%mm3			\n\t" // l2
752
		"movq (%%"REG_a", %1), %%mm3			\n\t" // l2
753
		"pxor %%mm6, %%mm2				\n\t" // -l5-1
753
		"pxor %%mm6, %%mm2				\n\t" // -l5-1
754
		"movq %%mm2, %%mm5				\n\t" // -l5-1
754
		"movq %%mm2, %%mm5				\n\t" // -l5-1
755
		"movq "MANGLE(b80)", %%mm4			\n\t" // 128
755
		"movq %4, %%mm4					\n\t" // 128
756
		"lea (%%"REG_a", %1, 4), %%"REG_c"		\n\t"
756
		"lea (%%"REG_a", %1, 4), %%"REG_c"		\n\t"
757
		PAVGB(%%mm3, %%mm2)				      // (l2-l5+256)/2
757
		PAVGB(%%mm3, %%mm2)				      // (l2-l5+256)/2
758
		PAVGB(%%mm0, %%mm4)				      // ~(l4-l3)/4 + 128
758
		PAVGB(%%mm0, %%mm4)				      // ~(l4-l3)/4 + 128
Lines 764-770 Link Here
764
		"pxor %%mm6, %%mm2				\n\t" // -l1-1
764
		"pxor %%mm6, %%mm2				\n\t" // -l1-1
765
		PAVGB(%%mm3, %%mm2)				      // (l2-l1+256)/2
765
		PAVGB(%%mm3, %%mm2)				      // (l2-l1+256)/2
766
		PAVGB((%0), %%mm1)				      // (l0-l3+256)/2
766
		PAVGB((%0), %%mm1)				      // (l0-l3+256)/2
767
		"movq "MANGLE(b80)", %%mm3			\n\t" // 128
767
		"movq %4, %%mm3					\n\t" // 128
768
		PAVGB(%%mm2, %%mm3)				      // ~(l2-l1)/4 + 128
768
		PAVGB(%%mm2, %%mm3)				      // ~(l2-l1)/4 + 128
769
		PAVGB(%%mm1, %%mm3)				      // ~(l0-l3)/4 +(l2-l1)/8 + 128
769
		PAVGB(%%mm1, %%mm3)				      // ~(l0-l3)/4 +(l2-l1)/8 + 128
770
		PAVGB(%%mm2, %%mm3)				      // ~(l0-l3)/8 +5(l2-l1)/16 + 128
770
		PAVGB(%%mm2, %%mm3)				      // ~(l0-l3)/8 +5(l2-l1)/16 + 128
Lines 774-787 Link Here
774
		"movq (%%"REG_c", %1, 2), %%mm1			\n\t" // l7
774
		"movq (%%"REG_c", %1, 2), %%mm1			\n\t" // l7
775
		"pxor %%mm6, %%mm1				\n\t" // -l7-1
775
		"pxor %%mm6, %%mm1				\n\t" // -l7-1
776
		PAVGB((%0, %1, 4), %%mm1)			      // (l4-l7+256)/2
776
		PAVGB((%0, %1, 4), %%mm1)			      // (l4-l7+256)/2
777
		"movq "MANGLE(b80)", %%mm2			\n\t" // 128
777
		"movq %4, %%mm2					\n\t" // 128
778
		PAVGB(%%mm5, %%mm2)				      // ~(l6-l5)/4 + 128
778
		PAVGB(%%mm5, %%mm2)				      // ~(l6-l5)/4 + 128
779
		PAVGB(%%mm1, %%mm2)				      // ~(l4-l7)/4 +(l6-l5)/8 + 128
779
		PAVGB(%%mm1, %%mm2)				      // ~(l4-l7)/4 +(l6-l5)/8 + 128
780
		PAVGB(%%mm5, %%mm2)				      // ~(l4-l7)/8 +5(l6-l5)/16 + 128
780
		PAVGB(%%mm5, %%mm2)				      // ~(l4-l7)/8 +5(l6-l5)/16 + 128
781
// mm0=128-q, mm2=renergy/16 + 128, mm3=lenergy/16 + 128, mm4= menergy/16 + 128
781
// mm0=128-q, mm2=renergy/16 + 128, mm3=lenergy/16 + 128, mm4= menergy/16 + 128
782
782
783
		"movq "MANGLE(b00)", %%mm1			\n\t" // 0
783
		"movq %5, %%mm1					\n\t" // 0
784
		"movq "MANGLE(b00)", %%mm5			\n\t" // 0
784
		"movq %5, %%mm5					\n\t" // 0
785
		"psubb %%mm2, %%mm1				\n\t" // 128 - renergy/16
785
		"psubb %%mm2, %%mm1				\n\t" // 128 - renergy/16
786
		"psubb %%mm3, %%mm5				\n\t" // 128 - lenergy/16
786
		"psubb %%mm3, %%mm5				\n\t" // 128 - lenergy/16
787
		PMAXUB(%%mm1, %%mm2)				      // 128 + |renergy/16|
787
		PMAXUB(%%mm1, %%mm2)				      // 128 + |renergy/16|
Lines 790-796 Link Here
790
790
791
// mm0=128-q, mm3=128 + MIN(|lenergy|,|renergy|)/16, mm4= menergy/16 + 128
791
// mm0=128-q, mm3=128 + MIN(|lenergy|,|renergy|)/16, mm4= menergy/16 + 128
792
792
793
		"movq "MANGLE(b00)", %%mm7			\n\t" // 0
793
		"movq %5, %%mm7					\n\t" // 0
794
		"movq %2, %%mm2					\n\t" // QP
794
		"movq %2, %%mm2					\n\t" // QP
795
		PAVGB(%%mm6, %%mm2)				      // 128 + QP/2
795
		PAVGB(%%mm6, %%mm2)				      // 128 + QP/2
796
		"psubb %%mm6, %%mm2				\n\t"
796
		"psubb %%mm6, %%mm2				\n\t"
Lines 804-816 Link Here
804
// mm0=128-q, mm1= SIGN(menergy), mm2= |menergy|/16 < QP/2, mm4= d/16
804
// mm0=128-q, mm1= SIGN(menergy), mm2= |menergy|/16 < QP/2, mm4= d/16
805
805
806
		"movq %%mm4, %%mm3				\n\t" // d
806
		"movq %%mm4, %%mm3				\n\t" // d
807
		"psubusb "MANGLE(b01)", %%mm4			\n\t"
807
		"psubusb %3, %%mm4				\n\t"
808
		PAVGB(%%mm7, %%mm4)				      // d/32
808
		PAVGB(%%mm7, %%mm4)				      // d/32
809
		PAVGB(%%mm7, %%mm4)				      // (d + 32)/64
809
		PAVGB(%%mm7, %%mm4)				      // (d + 32)/64
810
		"paddb %%mm3, %%mm4				\n\t" // 5d/64
810
		"paddb %%mm3, %%mm4				\n\t" // 5d/64
811
		"pand %%mm2, %%mm4				\n\t"
811
		"pand %%mm2, %%mm4				\n\t"
812
812
813
		"movq "MANGLE(b80)", %%mm5			\n\t" // 128
813
		"movq %4, %%mm5					\n\t" // 128
814
		"psubb %%mm0, %%mm5				\n\t" // q
814
		"psubb %%mm0, %%mm5				\n\t" // q
815
		"paddsb %%mm6, %%mm5				\n\t" // fix bad rounding
815
		"paddsb %%mm6, %%mm5				\n\t" // fix bad rounding
816
		"pcmpgtb %%mm5, %%mm7				\n\t" // SIGN(q)
816
		"pcmpgtb %%mm5, %%mm7				\n\t" // SIGN(q)
Lines 832-838 Link Here
832
		"movq %%mm2, (%0, %1, 4)			\n\t"
832
		"movq %%mm2, (%0, %1, 4)			\n\t"
833
833
834
		:
834
		:
835
		: "r" (src), "r" ((long)stride), "m" (c->pQPb)
835
		: "r" (src), "r" ((long)stride), "m" (c->pQPb), "m" (b01), "m" (b80), "m" (b00)
836
		: "%"REG_a, "%"REG_c
836
		: "%"REG_a, "%"REG_c
837
	);
837
	);
838
838
Lines 1076-1085 Link Here
1076
		"psubusw %%mm1, %%mm5				\n\t" // ld
1076
		"psubusw %%mm1, %%mm5				\n\t" // ld
1077
1077
1078
1078
1079
		"movq "MANGLE(w05)", %%mm2			\n\t" // 5
1079
		"movq %3, %%mm2					\n\t" // 5
1080
		"pmullw %%mm2, %%mm4				\n\t"
1080
		"pmullw %%mm2, %%mm4				\n\t"
1081
		"pmullw %%mm2, %%mm5				\n\t"
1081
		"pmullw %%mm2, %%mm5				\n\t"
1082
		"movq "MANGLE(w20)", %%mm2			\n\t" // 32
1082
		"movq %4, %%mm2					\n\t" // 32
1083
		"paddw %%mm2, %%mm4				\n\t"
1083
		"paddw %%mm2, %%mm4				\n\t"
1084
		"paddw %%mm2, %%mm5				\n\t"
1084
		"paddw %%mm2, %%mm5				\n\t"
1085
		"psrlw $6, %%mm4				\n\t"
1085
		"psrlw $6, %%mm4				\n\t"
Lines 1129-1135 Link Here
1129
		"movq %%mm0, (%0, %1)				\n\t"
1129
		"movq %%mm0, (%0, %1)				\n\t"
1130
1130
1131
		: "+r" (src)
1131
		: "+r" (src)
1132
		: "r" ((long)stride), "m" (c->pQPb)
1132
		: "r" ((long)stride), "m" (c->pQPb), "m" (w05), "m" (w20)
1133
		: "%"REG_a, "%"REG_c
1133
		: "%"REG_a, "%"REG_c
1134
	);
1134
	);
1135
#else
1135
#else
Lines 1273-1279 Link Here
1273
		"movq %%mm6, %%mm0				\n\t" // max
1273
		"movq %%mm6, %%mm0				\n\t" // max
1274
		"psubb %%mm7, %%mm6				\n\t" // max - min
1274
		"psubb %%mm7, %%mm6				\n\t" // max - min
1275
		"movd %%mm6, %%ecx				\n\t"
1275
		"movd %%mm6, %%ecx				\n\t"
1276
		"cmpb "MANGLE(deringThreshold)", %%cl		\n\t"
1276
		"cmpb %4, %%cl					\n\t"
1277
		" jb 1f						\n\t"
1277
		" jb 1f						\n\t"
1278
		"lea -24(%%"REG_SP"), %%"REG_c"			\n\t"
1278
		"lea -24(%%"REG_SP"), %%"REG_c"			\n\t"
1279
		"and "ALIGN_MASK", %%"REG_c"			\n\t" 
1279
		"and "ALIGN_MASK", %%"REG_c"			\n\t" 
Lines 1300-1308 Link Here
1300
		"psubusb %%mm7, %%mm0				\n\t"
1300
		"psubusb %%mm7, %%mm0				\n\t"
1301
		"psubusb %%mm7, %%mm2				\n\t"
1301
		"psubusb %%mm7, %%mm2				\n\t"
1302
		"psubusb %%mm7, %%mm3				\n\t"
1302
		"psubusb %%mm7, %%mm3				\n\t"
1303
		"pcmpeqb "MANGLE(b00)", %%mm0			\n\t" // L10 > a ? 0 : -1
1303
		"pcmpeqb %5, %%mm0				\n\t" // L10 > a ? 0 : -1
1304
		"pcmpeqb "MANGLE(b00)", %%mm2			\n\t" // L20 > a ? 0 : -1
1304
		"pcmpeqb %5, %%mm2				\n\t" // L20 > a ? 0 : -1
1305
		"pcmpeqb "MANGLE(b00)", %%mm3			\n\t" // L00 > a ? 0 : -1
1305
		"pcmpeqb %5, %%mm3				\n\t" // L00 > a ? 0 : -1
1306
		"paddb %%mm2, %%mm0				\n\t"
1306
		"paddb %%mm2, %%mm0				\n\t"
1307
		"paddb %%mm3, %%mm0				\n\t"
1307
		"paddb %%mm3, %%mm0				\n\t"
1308
1308
Lines 1323-1331 Link Here
1323
		"psubusb %%mm7, %%mm2				\n\t"
1323
		"psubusb %%mm7, %%mm2				\n\t"
1324
		"psubusb %%mm7, %%mm4				\n\t"
1324
		"psubusb %%mm7, %%mm4				\n\t"
1325
		"psubusb %%mm7, %%mm5				\n\t"
1325
		"psubusb %%mm7, %%mm5				\n\t"
1326
		"pcmpeqb "MANGLE(b00)", %%mm2			\n\t" // L11 > a ? 0 : -1
1326
		"pcmpeqb %5, %%mm2				\n\t" // L11 > a ? 0 : -1
1327
		"pcmpeqb "MANGLE(b00)", %%mm4			\n\t" // L21 > a ? 0 : -1
1327
		"pcmpeqb %5, %%mm4				\n\t" // L21 > a ? 0 : -1
1328
		"pcmpeqb "MANGLE(b00)", %%mm5			\n\t" // L01 > a ? 0 : -1
1328
		"pcmpeqb %5, %%mm5				\n\t" // L01 > a ? 0 : -1
1329
		"paddb %%mm4, %%mm2				\n\t"
1329
		"paddb %%mm4, %%mm2				\n\t"
1330
		"paddb %%mm5, %%mm2				\n\t"
1330
		"paddb %%mm5, %%mm2				\n\t"
1331
// 0, 2, 3, 1
1331
// 0, 2, 3, 1
Lines 1350-1356 Link Here
1350
		"psubusb " #lx ", " #t1 "			\n\t"\
1350
		"psubusb " #lx ", " #t1 "			\n\t"\
1351
		"psubusb " #lx ", " #t0 "			\n\t"\
1351
		"psubusb " #lx ", " #t0 "			\n\t"\
1352
		"psubusb " #lx ", " #sx "			\n\t"\
1352
		"psubusb " #lx ", " #sx "			\n\t"\
1353
		"movq "MANGLE(b00)", " #lx "			\n\t"\
1353
		"movq %5, " #lx 	"			\n\t"\
1354
		"pcmpeqb " #lx ", " #t1 "			\n\t" /* src[-1] > a ? 0 : -1*/\
1354
		"pcmpeqb " #lx ", " #t1 "			\n\t" /* src[-1] > a ? 0 : -1*/\
1355
		"pcmpeqb " #lx ", " #t0 "			\n\t" /* src[+1] > a ? 0 : -1*/\
1355
		"pcmpeqb " #lx ", " #t0 "			\n\t" /* src[+1] > a ? 0 : -1*/\
1356
		"pcmpeqb " #lx ", " #sx "			\n\t" /* src[0]  > a ? 0 : -1*/\
1356
		"pcmpeqb " #lx ", " #sx "			\n\t" /* src[0]  > a ? 0 : -1*/\
Lines 1366-1373 Link Here
1366
		PMINUB(t1, pplx, t0)\
1366
		PMINUB(t1, pplx, t0)\
1367
		"paddb " #sx ", " #ppsx "			\n\t"\
1367
		"paddb " #sx ", " #ppsx "			\n\t"\
1368
		"paddb " #psx ", " #ppsx "			\n\t"\
1368
		"paddb " #psx ", " #ppsx "			\n\t"\
1369
		"#paddb "MANGLE(b02)", " #ppsx "		\n\t"\
1369
		"#paddb %6, " #ppsx "				\n\t"\
1370
		"pand "MANGLE(b08)", " #ppsx "			\n\t"\
1370
		"pand %7, " #ppsx "				\n\t"\
1371
		"pcmpeqb " #lx ", " #ppsx "			\n\t"\
1371
		"pcmpeqb " #lx ", " #ppsx "			\n\t"\
1372
		"pand " #ppsx ", " #pplx "			\n\t"\
1372
		"pand " #ppsx ", " #pplx "			\n\t"\
1373
		"pandn " #dst ", " #ppsx "			\n\t"\
1373
		"pandn " #dst ", " #ppsx "			\n\t"\
Lines 1403-1409 Link Here
1403
DERING_CORE((%0, %1, 8),(%%REGd, %1, 4) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
1403
DERING_CORE((%0, %1, 8),(%%REGd, %1, 4) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
1404
1404
1405
		"1:			\n\t"
1405
		"1:			\n\t"
1406
		: : "r" (src), "r" ((long)stride), "m" (c->pQPb), "m"(c->pQPb2)
1406
		: : "r" (src), "r" ((long)stride), "m" (c->pQPb), "m"(c->pQPb2), "m" (deringThreshold), "m" (b00), "m" (b02), "m" (b08)
1407
		: "%"REG_a, "%"REG_d, "%"REG_c
1407
		: "%"REG_a, "%"REG_d, "%"REG_c
1408
	);
1408
	);
1409
#else
1409
#else
Lines 2281-2287 Link Here
2281
#else
2281
#else
2282
#if defined (FAST_L2_DIFF)
2282
#if defined (FAST_L2_DIFF)
2283
		"pcmpeqb %%mm7, %%mm7				\n\t"
2283
		"pcmpeqb %%mm7, %%mm7				\n\t"
2284
		"movq "MANGLE(b80)", %%mm6			\n\t"
2284
		"movq %4, %%mm6					\n\t"
2285
		"pxor %%mm0, %%mm0				\n\t"
2285
		"pxor %%mm0, %%mm0				\n\t"
2286
#define REAL_L2_DIFF_CORE(a, b)\
2286
#define REAL_L2_DIFF_CORE(a, b)\
2287
		"movq " #a ", %%mm5				\n\t"\
2287
		"movq " #a ", %%mm5				\n\t"\
Lines 2530-2536 Link Here
2530
2530
2531
		"4:						\n\t"
2531
		"4:						\n\t"
2532
2532
2533
		:: "r" (src), "r" (tempBlured), "r"((long)stride), "m" (tempBluredPast)
2533
		:: "r" (src), "r" (tempBlured), "r"((long)stride), "m" (tempBluredPast), "m" (b80)
2534
		: "%"REG_a, "%"REG_d, "%"REG_c, "memory"
2534
		: "%"REG_a, "%"REG_d, "%"REG_c, "memory"
2535
		);
2535
		);
2536
//printf("%d\n", test);
2536
//printf("%d\n", test);
Lines 2805-2812 Link Here
2805
		"movq %%mm6, %%mm1				\n\t"
2805
		"movq %%mm6, %%mm1				\n\t"
2806
		"psllw $2, %%mm0				\n\t"
2806
		"psllw $2, %%mm0				\n\t"
2807
		"psllw $2, %%mm1				\n\t"
2807
		"psllw $2, %%mm1				\n\t"
2808
		"paddw "MANGLE(w04)", %%mm0			\n\t"
2808
		"paddw %5, %%mm0				\n\t"
2809
		"paddw "MANGLE(w04)", %%mm1			\n\t"
2809
		"paddw %5, %%mm1				\n\t"
2810
2810
2811
#define NEXT\
2811
#define NEXT\
2812
		"movq (%0), %%mm2				\n\t"\
2812
		"movq (%0), %%mm2				\n\t"\
Lines 2895-2901 Link Here
2895
		"mov %4, %0					\n\t" //FIXME
2895
		"mov %4, %0					\n\t" //FIXME
2896
2896
2897
		: "+&r"(src)
2897
		: "+&r"(src)
2898
		: "r" ((long)step), "m" (c->pQPb), "r"(sums), "g"(src)
2898
		: "r" ((long)step), "m" (c->pQPb), "r"(sums), "g"(src), "m" (w04)
2899
		);
2899
		);
2900
2900
2901
		src+= step; // src points to begin of the 8x8 Block
2901
		src+= step; // src points to begin of the 8x8 Block
Lines 3112-3121 Link Here
3112
		"psubusw %%mm1, %%mm5				\n\t" // ld
3112
		"psubusw %%mm1, %%mm5				\n\t" // ld
3113
3113
3114
3114
3115
		"movq "MANGLE(w05)", %%mm2			\n\t" // 5
3115
		"movq %4, %%mm2					\n\t" // 5
3116
		"pmullw %%mm2, %%mm4				\n\t"
3116
		"pmullw %%mm2, %%mm4				\n\t"
3117
		"pmullw %%mm2, %%mm5				\n\t"
3117
		"pmullw %%mm2, %%mm5				\n\t"
3118
		"movq "MANGLE(w20)", %%mm2			\n\t" // 32
3118
		"movq %5, %%mm2					\n\t" // 32
3119
		"paddw %%mm2, %%mm4				\n\t"
3119
		"paddw %%mm2, %%mm4				\n\t"
3120
		"paddw %%mm2, %%mm5				\n\t"
3120
		"paddw %%mm2, %%mm5				\n\t"
3121
		"psrlw $6, %%mm4				\n\t"
3121
		"psrlw $6, %%mm4				\n\t"
Lines 3167-3173 Link Here
3167
		"movq %%mm0, (%0, %1)				\n\t"
3167
		"movq %%mm0, (%0, %1)				\n\t"
3168
3168
3169
		: "+r" (temp_src)
3169
		: "+r" (temp_src)
3170
		: "r" ((long)step), "m" (c->pQPb), "m"(eq_mask)
3170
		: "r" ((long)step), "m" (c->pQPb), "m"(eq_mask), "m" (w05), "m" (w20)
3171
		: "%"REG_a, "%"REG_c
3171
		: "%"REG_a, "%"REG_c
3172
		);
3172
		);
3173
	}
3173
	}
Lines 3198-3207 Link Here
3198
	{
3198
	{
3199
#ifdef HAVE_MMX
3199
#ifdef HAVE_MMX
3200
					asm volatile(
3200
					asm volatile(
3201
						"movq (%%"REG_a"), %%mm2	\n\t" // packedYOffset
3201
						"movq (%0), %%mm2	\n\t" // packedYOffset
3202
						"movq 8(%%"REG_a"), %%mm3	\n\t" // packedYScale
3202
						"movq 8(%0), %%mm3	\n\t" // packedYScale
3203
						"lea (%2,%4), %%"REG_a"	\n\t"
3204
						"lea (%3,%5), %%"REG_d"	\n\t"
3205
						"pxor %%mm4, %%mm4	\n\t"
3203
						"pxor %%mm4, %%mm4	\n\t"
3206
#ifdef HAVE_MMX2
3204
#ifdef HAVE_MMX2
3207
#define REAL_SCALED_CPY(src1, src2, dst1, dst2)					\
3205
#define REAL_SCALED_CPY(src1, src2, dst1, dst2)					\
Lines 3257-3277 Link Here
3257
#define SCALED_CPY(src1, src2, dst1, dst2)\
3255
#define SCALED_CPY(src1, src2, dst1, dst2)\
3258
   REAL_SCALED_CPY(src1, src2, dst1, dst2)
3256
   REAL_SCALED_CPY(src1, src2, dst1, dst2)
3259
3257
3260
SCALED_CPY((%2)       , (%2, %4)      , (%3)       , (%3, %5))
3258
SCALED_CPY((%1)       , (%1, %3)      , (%2)       , (%2, %4))
3261
SCALED_CPY((%2, %4, 2), (%%REGa, %4, 2), (%3, %5, 2), (%%REGd, %5, 2))
3259
SCALED_CPY((%1, %3, 2), (%1, %3, 4)   , (%2, %4, 2), (%2, %4, 4))
3262
SCALED_CPY((%2, %4, 4), (%%REGa, %4, 4), (%3, %5, 4), (%%REGd, %5, 4))
3260
						"lea (%1,%3,2), %1	\n\t"
3263
						"lea (%%"REG_a",%4,4), %%"REG_a"	\n\t"
3261
						"lea (%2,%4,2), %2	\n\t"
3264
						"lea (%%"REG_d",%5,4), %%"REG_d"	\n\t"
3262
SCALED_CPY((%1, %3, 2), (%1, %3, 4)   , (%2, %4, 2), (%2, %4, 4))
3265
SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2))
3263
						"lea (%1,%3), %1	\n\t"
3266
3264
						"lea (%2,%4), %2	\n\t"
3267
3265
SCALED_CPY((%1, %3, 2), (%1, %3, 4)   , (%2, %4, 2), (%2, %4, 4))
3268
						: "=&a" (packedOffsetAndScale)
3266
						: : "r" (packedOffsetAndScale),
3269
						: "0" (packedOffsetAndScale),
3270
						"r"(src),
3267
						"r"(src),
3271
						"r"(dst),
3268
						"r"(dst),
3272
						"r" ((long)srcStride),
3269
						"r" ((long)srcStride),
3273
						"r" ((long)dstStride)
3270
						"r" ((long)dstStride)
3274
						: "%"REG_d
3271
						: "memory"
3275
					);
3272
					);
3276
#else
3273
#else
3277
				for(i=0; i<8; i++)
3274
				for(i=0; i<8; i++)
Lines 3283-3291 Link Here
3283
	{
3280
	{
3284
#ifdef HAVE_MMX
3281
#ifdef HAVE_MMX
3285
					asm volatile(
3282
					asm volatile(
3286
						"lea (%0,%2), %%"REG_a"	\n\t"
3287
						"lea (%1,%3), %%"REG_d"	\n\t"
3288
3289
#define REAL_SIMPLE_CPY(src1, src2, dst1, dst2)				\
3283
#define REAL_SIMPLE_CPY(src1, src2, dst1, dst2)				\
3290
						"movq " #src1 ", %%mm0	\n\t"\
3284
						"movq " #src1 ", %%mm0	\n\t"\
3291
						"movq " #src2 ", %%mm1	\n\t"\
3285
						"movq " #src2 ", %%mm1	\n\t"\
Lines 3296-3312 Link Here
3296
   REAL_SIMPLE_CPY(src1, src2, dst1, dst2)
3290
   REAL_SIMPLE_CPY(src1, src2, dst1, dst2)
3297
3291
3298
SIMPLE_CPY((%0)       , (%0, %2)      , (%1)       , (%1, %3))
3292
SIMPLE_CPY((%0)       , (%0, %2)      , (%1)       , (%1, %3))
3299
SIMPLE_CPY((%0, %2, 2), (%%REGa, %2, 2), (%1, %3, 2), (%%REGd, %3, 2))
3293
SIMPLE_CPY((%0, %2, 2), (%0, %2, 4)   , (%1, %3, 2), (%1, %3, 4))
3300
SIMPLE_CPY((%0, %2, 4), (%%REGa, %2, 4), (%1, %3, 4), (%%REGd, %3, 4))
3294
						"lea (%0,%2,2), %0	\n\t"
3301
						"lea (%%"REG_a",%2,4), %%"REG_a"	\n\t"
3295
						"lea (%1,%3,2), %1	\n\t"
3302
						"lea (%%"REG_d",%3,4), %%"REG_d"	\n\t"
3296
SIMPLE_CPY((%0, %2)   , (%0, %2, 4)   , (%1, %3)   , (%1, %3, 4))
3303
SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2))
3297
						"lea (%0,%2), %0	\n\t"
3304
3298
						"lea (%1,%3), %1	\n\t"
3299
SIMPLE_CPY((%0, %2, 2), (%0, %2, 4)   , (%1, %3, 2), (%1, %3, 4))
3305
						: : "r" (src),
3300
						: : "r" (src),
3306
						"r" (dst),
3301
						"r" (dst),
3307
						"r" ((long)srcStride),
3302
						"r" ((long)srcStride),
3308
						"r" ((long)dstStride)
3303
						"r" ((long)dstStride)
3309
						: "%"REG_a, "%"REG_d
3304
						: "memory"
3310
					);
3305
					);
3311
#else
3306
#else
3312
				for(i=0; i<8; i++)
3307
				for(i=0; i<8; i++)
(-)libavformat/Makefile (-1 / +1 lines)
Lines 6-12 Link Here
6
6
7
VPATH=$(SRC_PATH)/libavformat
7
VPATH=$(SRC_PATH)/libavformat
8
8
9
CFLAGS=$(OPTFLAGS) -I.. -I$(SRC_PATH) -I$(SRC_PATH)/libavutil -I$(SRC_PATH)/libavcodec -DHAVE_AV_CONFIG_H -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_GNU_SOURCE
9
CFLAGS=$(OPTFLAGS) $(PIC) -I.. -I$(SRC_PATH) -I$(SRC_PATH)/libavutil -I$(SRC_PATH)/libavcodec -DHAVE_AV_CONFIG_H -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_GNU_SOURCE
10
10
11
OBJS= utils.o cutils.o os_support.o allformats.o
11
OBJS= utils.o cutils.o os_support.o allformats.o
12
PPOBJS=
12
PPOBJS=
(-)libavutil/Makefile (-1 / +1 lines)
Lines 6-12 Link Here
6
VPATH=$(SRC_PATH)/libavutil
6
VPATH=$(SRC_PATH)/libavutil
7
7
8
# NOTE: -I.. is needed to include config.h
8
# NOTE: -I.. is needed to include config.h
9
CFLAGS=$(OPTFLAGS) -DHAVE_AV_CONFIG_H -I.. -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_GNU_SOURCE
9
CFLAGS=$(OPTFLAGS) $(PIC) -DHAVE_AV_CONFIG_H -I.. -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_GNU_SOURCE
10
10
11
OBJS= mathematics.o \
11
OBJS= mathematics.o \
12
      integer.o \
12
      integer.o \

Return to bug 115568