Lines 657-671
static inline void transpose4x4(uint8_t
Link Here
|
657 |
"punpckhwd %%mm2, %%mm1 \n\t" |
657 |
"punpckhwd %%mm2, %%mm1 \n\t" |
658 |
"movd %%mm0, %0 \n\t" |
658 |
"movd %%mm0, %0 \n\t" |
659 |
"punpckhdq %%mm0, %%mm0 \n\t" |
659 |
"punpckhdq %%mm0, %%mm0 \n\t" |
660 |
"movd %%mm0, %1 \n\t" |
660 |
"movd %%mm0, (%0,%1) \n\t" |
661 |
"movd %%mm1, %2 \n\t" |
661 |
"movd %%mm1, (%0,%1,2) \n\t" |
662 |
"punpckhdq %%mm1, %%mm1 \n\t" |
662 |
"punpckhdq %%mm1, %%mm1 \n\t" |
663 |
"movd %%mm1, %3 \n\t" |
663 |
"lea (%1,%1,2), %1 \n\t" |
|
|
664 |
"movd %%mm1, (%0,%1) \n\t" |
664 |
|
665 |
|
665 |
: "=m" (*(uint32_t*)(dst + 0*dst_stride)), |
666 |
: "=r" (*(uint32_t*)(dst)), "+r" (dst_stride) |
666 |
"=m" (*(uint32_t*)(dst + 1*dst_stride)), |
667 |
:: "memory" |
667 |
"=m" (*(uint32_t*)(dst + 2*dst_stride)), |
|
|
668 |
"=m" (*(uint32_t*)(dst + 3*dst_stride)) |
669 |
); |
668 |
); |
670 |
} |
669 |
} |
671 |
|
670 |
|
Lines 1745-1751
WARPER8_16_SQ(hadamard8_diff_mmx2, hadam
Link Here
|
1745 |
|
1744 |
|
1746 |
#define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\ |
1745 |
#define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\ |
1747 |
"paddw " #m4 ", " #m3 " \n\t" /* x1 */\ |
1746 |
"paddw " #m4 ", " #m3 " \n\t" /* x1 */\ |
1748 |
"movq "MANGLE(ff_pw_20)", %%mm4 \n\t" /* 20 */\ |
1747 |
"movq %5, %%mm4 \n\t" /* 20 */\ |
1749 |
"pmullw " #m3 ", %%mm4 \n\t" /* 20x1 */\ |
1748 |
"pmullw " #m3 ", %%mm4 \n\t" /* 20x1 */\ |
1750 |
"movq "#in7", " #m3 " \n\t" /* d */\ |
1749 |
"movq "#in7", " #m3 " \n\t" /* d */\ |
1751 |
"movq "#in0", %%mm5 \n\t" /* D */\ |
1750 |
"movq "#in0", %%mm5 \n\t" /* D */\ |
Lines 1757-1763
WARPER8_16_SQ(hadamard8_diff_mmx2, hadam
Link Here
|
1757 |
"paddw " #m5 ", %%mm6 \n\t" /* x2 */\ |
1756 |
"paddw " #m5 ", %%mm6 \n\t" /* x2 */\ |
1758 |
"paddw %%mm6, %%mm6 \n\t" /* 2x2 */\ |
1757 |
"paddw %%mm6, %%mm6 \n\t" /* 2x2 */\ |
1759 |
"psubw %%mm6, %%mm5 \n\t" /* -2x2 + x3 */\ |
1758 |
"psubw %%mm6, %%mm5 \n\t" /* -2x2 + x3 */\ |
1760 |
"pmullw "MANGLE(ff_pw_3)", %%mm5 \n\t" /* -6x2 + 3x3 */\ |
1759 |
"pmullw %6, %%mm5 \n\t" /* -6x2 + 3x3 */\ |
1761 |
"paddw " #rnd ", %%mm4 \n\t" /* x2 */\ |
1760 |
"paddw " #rnd ", %%mm4 \n\t" /* x2 */\ |
1762 |
"paddw %%mm4, %%mm5 \n\t" /* 20x1 - 6x2 + 3x3 - x4 */\ |
1761 |
"paddw %%mm4, %%mm5 \n\t" /* 20x1 - 6x2 + 3x3 - x4 */\ |
1763 |
"psraw $5, %%mm5 \n\t"\ |
1762 |
"psraw $5, %%mm5 \n\t"\ |
Lines 1791-1805
static void OPNAME ## mpeg4_qpel16_h_low
Link Here
|
1791 |
"paddw %%mm5, %%mm5 \n\t" /* 2b */\ |
1790 |
"paddw %%mm5, %%mm5 \n\t" /* 2b */\ |
1792 |
"psubw %%mm5, %%mm6 \n\t" /* c - 2b */\ |
1791 |
"psubw %%mm5, %%mm6 \n\t" /* c - 2b */\ |
1793 |
"pshufw $0x06, %%mm0, %%mm5 \n\t" /* 0C0B0A0A */\ |
1792 |
"pshufw $0x06, %%mm0, %%mm5 \n\t" /* 0C0B0A0A */\ |
1794 |
"pmullw "MANGLE(ff_pw_3)", %%mm6 \n\t" /* 3c - 6b */\ |
1793 |
"pmullw %6, %%mm6 \n\t" /* 3c - 6b */\ |
1795 |
"paddw %%mm4, %%mm0 \n\t" /* a */\ |
1794 |
"paddw %%mm4, %%mm0 \n\t" /* a */\ |
1796 |
"paddw %%mm1, %%mm5 \n\t" /* d */\ |
1795 |
"paddw %%mm1, %%mm5 \n\t" /* d */\ |
1797 |
"pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\ |
1796 |
"pmullw %5, %%mm0 \n\t" /* 20a */\ |
1798 |
"psubw %%mm5, %%mm0 \n\t" /* 20a - d */\ |
1797 |
"psubw %%mm5, %%mm0 \n\t" /* 20a - d */\ |
1799 |
"paddw %6, %%mm6 \n\t"\ |
1798 |
"paddw %8, %%mm6 \n\t"\ |
1800 |
"paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\ |
1799 |
"paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\ |
1801 |
"psraw $5, %%mm0 \n\t"\ |
1800 |
"psraw $5, %%mm0 \n\t"\ |
1802 |
"movq %%mm0, %5 \n\t"\ |
1801 |
"movq %%mm0, %7 \n\t"\ |
1803 |
/* mm1=EFGH, mm2=DEFG, mm3=CDEF, mm4=BCDE, mm7=0 */\ |
1802 |
/* mm1=EFGH, mm2=DEFG, mm3=CDEF, mm4=BCDE, mm7=0 */\ |
1804 |
\ |
1803 |
\ |
1805 |
"movq 5(%0), %%mm0 \n\t" /* FGHIJKLM */\ |
1804 |
"movq 5(%0), %%mm0 \n\t" /* FGHIJKLM */\ |
Lines 1817-1831
static void OPNAME ## mpeg4_qpel16_h_low
Link Here
|
1817 |
"psrlq $24, %%mm6 \n\t" /* IJKLM000 */\ |
1816 |
"psrlq $24, %%mm6 \n\t" /* IJKLM000 */\ |
1818 |
"punpcklbw %%mm7, %%mm2 \n\t" /* 0F0G0H0I */\ |
1817 |
"punpcklbw %%mm7, %%mm2 \n\t" /* 0F0G0H0I */\ |
1819 |
"punpcklbw %%mm7, %%mm6 \n\t" /* 0I0J0K0L */\ |
1818 |
"punpcklbw %%mm7, %%mm6 \n\t" /* 0I0J0K0L */\ |
1820 |
"pmullw "MANGLE(ff_pw_3)", %%mm3 \n\t" /* 3c - 6b */\ |
1819 |
"pmullw %6, %%mm3 \n\t" /* 3c - 6b */\ |
1821 |
"paddw %%mm2, %%mm1 \n\t" /* a */\ |
1820 |
"paddw %%mm2, %%mm1 \n\t" /* a */\ |
1822 |
"paddw %%mm6, %%mm4 \n\t" /* d */\ |
1821 |
"paddw %%mm6, %%mm4 \n\t" /* d */\ |
1823 |
"pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\ |
1822 |
"pmullw %5, %%mm1 \n\t" /* 20a */\ |
1824 |
"psubw %%mm4, %%mm3 \n\t" /* - 6b +3c - d */\ |
1823 |
"psubw %%mm4, %%mm3 \n\t" /* - 6b +3c - d */\ |
1825 |
"paddw %6, %%mm1 \n\t"\ |
1824 |
"paddw %8, %%mm1 \n\t"\ |
1826 |
"paddw %%mm1, %%mm3 \n\t" /* 20a - 6b +3c - d */\ |
1825 |
"paddw %%mm1, %%mm3 \n\t" /* 20a - 6b +3c - d */\ |
1827 |
"psraw $5, %%mm3 \n\t"\ |
1826 |
"psraw $5, %%mm3 \n\t"\ |
1828 |
"movq %5, %%mm1 \n\t"\ |
1827 |
"movq %7, %%mm1 \n\t"\ |
1829 |
"packuswb %%mm3, %%mm1 \n\t"\ |
1828 |
"packuswb %%mm3, %%mm1 \n\t"\ |
1830 |
OP_MMX2(%%mm1, (%1),%%mm4, q)\ |
1829 |
OP_MMX2(%%mm1, (%1),%%mm4, q)\ |
1831 |
/* mm0= GHIJ, mm2=FGHI, mm5=HIJK, mm6=IJKL, mm7=0 */\ |
1830 |
/* mm0= GHIJ, mm2=FGHI, mm5=HIJK, mm6=IJKL, mm7=0 */\ |
Lines 1843-1849
static void OPNAME ## mpeg4_qpel16_h_low
Link Here
|
1843 |
"psubw %%mm5, %%mm0 \n\t" /* c - 2b */\ |
1842 |
"psubw %%mm5, %%mm0 \n\t" /* c - 2b */\ |
1844 |
"movq %%mm3, %%mm5 \n\t" /* JKLMNOPQ */\ |
1843 |
"movq %%mm3, %%mm5 \n\t" /* JKLMNOPQ */\ |
1845 |
"psrlq $24, %%mm3 \n\t" /* MNOPQ000 */\ |
1844 |
"psrlq $24, %%mm3 \n\t" /* MNOPQ000 */\ |
1846 |
"pmullw "MANGLE(ff_pw_3)", %%mm0 \n\t" /* 3c - 6b */\ |
1845 |
"pmullw %6, %%mm0 \n\t" /* 3c - 6b */\ |
1847 |
"punpcklbw %%mm7, %%mm3 \n\t" /* 0M0N0O0P */\ |
1846 |
"punpcklbw %%mm7, %%mm3 \n\t" /* 0M0N0O0P */\ |
1848 |
"paddw %%mm3, %%mm2 \n\t" /* d */\ |
1847 |
"paddw %%mm3, %%mm2 \n\t" /* d */\ |
1849 |
"psubw %%mm2, %%mm0 \n\t" /* -6b + 3c - d */\ |
1848 |
"psubw %%mm2, %%mm0 \n\t" /* -6b + 3c - d */\ |
Lines 1851-1858
static void OPNAME ## mpeg4_qpel16_h_low
Link Here
|
1851 |
"punpcklbw %%mm7, %%mm2 \n\t" /* 0J0K0L0M */\ |
1850 |
"punpcklbw %%mm7, %%mm2 \n\t" /* 0J0K0L0M */\ |
1852 |
"punpckhbw %%mm7, %%mm5 \n\t" /* 0N0O0P0Q */\ |
1851 |
"punpckhbw %%mm7, %%mm5 \n\t" /* 0N0O0P0Q */\ |
1853 |
"paddw %%mm2, %%mm6 \n\t" /* a */\ |
1852 |
"paddw %%mm2, %%mm6 \n\t" /* a */\ |
1854 |
"pmullw "MANGLE(ff_pw_20)", %%mm6 \n\t" /* 20a */\ |
1853 |
"pmullw %5, %%mm6 \n\t" /* 20a */\ |
1855 |
"paddw %6, %%mm0 \n\t"\ |
1854 |
"paddw %8, %%mm0 \n\t"\ |
1856 |
"paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\ |
1855 |
"paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\ |
1857 |
"psraw $5, %%mm0 \n\t"\ |
1856 |
"psraw $5, %%mm0 \n\t"\ |
1858 |
/* mm1=KLMN, mm2=JKLM, mm3=MNOP, mm4=LMNO, mm5=NOPQ mm7=0 */\ |
1857 |
/* mm1=KLMN, mm2=JKLM, mm3=MNOP, mm4=LMNO, mm5=NOPQ mm7=0 */\ |
Lines 1866-1875
static void OPNAME ## mpeg4_qpel16_h_low
Link Here
|
1866 |
"paddw %%mm2, %%mm5 \n\t" /* d */\ |
1865 |
"paddw %%mm2, %%mm5 \n\t" /* d */\ |
1867 |
"paddw %%mm6, %%mm6 \n\t" /* 2b */\ |
1866 |
"paddw %%mm6, %%mm6 \n\t" /* 2b */\ |
1868 |
"psubw %%mm6, %%mm4 \n\t" /* c - 2b */\ |
1867 |
"psubw %%mm6, %%mm4 \n\t" /* c - 2b */\ |
1869 |
"pmullw "MANGLE(ff_pw_20)", %%mm3 \n\t" /* 20a */\ |
1868 |
"pmullw %5, %%mm3 \n\t" /* 20a */\ |
1870 |
"pmullw "MANGLE(ff_pw_3)", %%mm4 \n\t" /* 3c - 6b */\ |
1869 |
"pmullw %6, %%mm4 \n\t" /* 3c - 6b */\ |
1871 |
"psubw %%mm5, %%mm3 \n\t" /* -6b + 3c - d */\ |
1870 |
"psubw %%mm5, %%mm3 \n\t" /* -6b + 3c - d */\ |
1872 |
"paddw %6, %%mm4 \n\t"\ |
1871 |
"paddw %8, %%mm4 \n\t"\ |
1873 |
"paddw %%mm3, %%mm4 \n\t" /* 20a - 6b + 3c - d */\ |
1872 |
"paddw %%mm3, %%mm4 \n\t" /* 20a - 6b + 3c - d */\ |
1874 |
"psraw $5, %%mm4 \n\t"\ |
1873 |
"psraw $5, %%mm4 \n\t"\ |
1875 |
"packuswb %%mm4, %%mm0 \n\t"\ |
1874 |
"packuswb %%mm4, %%mm0 \n\t"\ |
Lines 1879-1886
static void OPNAME ## mpeg4_qpel16_h_low
Link Here
|
1879 |
"add %4, %1 \n\t"\ |
1878 |
"add %4, %1 \n\t"\ |
1880 |
"decl %2 \n\t"\ |
1879 |
"decl %2 \n\t"\ |
1881 |
" jnz 1b \n\t"\ |
1880 |
" jnz 1b \n\t"\ |
1882 |
: "+a"(src), "+c"(dst), "+m"(h)\ |
1881 |
: : "a"(src), "c"(dst), "m"(h), \ |
1883 |
: "d"((long)srcStride), "S"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\ |
1882 |
"d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_20), "m"(ff_pw_3), "m"(temp), "m"(ROUNDER)\ |
1884 |
: "memory"\ |
1883 |
: "memory"\ |
1885 |
);\ |
1884 |
);\ |
1886 |
}\ |
1885 |
}\ |
Lines 1958-1969
static void OPNAME ## mpeg4_qpel8_h_lowp
Link Here
|
1958 |
"paddw %%mm5, %%mm5 \n\t" /* 2b */\ |
1957 |
"paddw %%mm5, %%mm5 \n\t" /* 2b */\ |
1959 |
"psubw %%mm5, %%mm6 \n\t" /* c - 2b */\ |
1958 |
"psubw %%mm5, %%mm6 \n\t" /* c - 2b */\ |
1960 |
"pshufw $0x06, %%mm0, %%mm5 \n\t" /* 0C0B0A0A */\ |
1959 |
"pshufw $0x06, %%mm0, %%mm5 \n\t" /* 0C0B0A0A */\ |
1961 |
"pmullw "MANGLE(ff_pw_3)", %%mm6 \n\t" /* 3c - 6b */\ |
1960 |
"pmullw %6, %%mm6 \n\t" /* 3c - 6b */\ |
1962 |
"paddw %%mm4, %%mm0 \n\t" /* a */\ |
1961 |
"paddw %%mm4, %%mm0 \n\t" /* a */\ |
1963 |
"paddw %%mm1, %%mm5 \n\t" /* d */\ |
1962 |
"paddw %%mm1, %%mm5 \n\t" /* d */\ |
1964 |
"pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\ |
1963 |
"pmullw %5, %%mm0 \n\t" /* 20a */\ |
1965 |
"psubw %%mm5, %%mm0 \n\t" /* 20a - d */\ |
1964 |
"psubw %%mm5, %%mm0 \n\t" /* 20a - d */\ |
1966 |
"paddw %6, %%mm6 \n\t"\ |
1965 |
"paddw %8, %%mm6 \n\t"\ |
1967 |
"paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\ |
1966 |
"paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\ |
1968 |
"psraw $5, %%mm0 \n\t"\ |
1967 |
"psraw $5, %%mm0 \n\t"\ |
1969 |
/* mm1=EFGH, mm2=DEFG, mm3=CDEF, mm4=BCDE, mm7=0 */\ |
1968 |
/* mm1=EFGH, mm2=DEFG, mm3=CDEF, mm4=BCDE, mm7=0 */\ |
Lines 1979-1988
static void OPNAME ## mpeg4_qpel8_h_lowp
Link Here
|
1979 |
"paddw %%mm5, %%mm4 \n\t" /* d */\ |
1978 |
"paddw %%mm5, %%mm4 \n\t" /* d */\ |
1980 |
"paddw %%mm2, %%mm2 \n\t" /* 2b */\ |
1979 |
"paddw %%mm2, %%mm2 \n\t" /* 2b */\ |
1981 |
"psubw %%mm2, %%mm3 \n\t" /* c - 2b */\ |
1980 |
"psubw %%mm2, %%mm3 \n\t" /* c - 2b */\ |
1982 |
"pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\ |
1981 |
"pmullw %5, %%mm1 \n\t" /* 20a */\ |
1983 |
"pmullw "MANGLE(ff_pw_3)", %%mm3 \n\t" /* 3c - 6b */\ |
1982 |
"pmullw %6, %%mm3 \n\t" /* 3c - 6b */\ |
1984 |
"psubw %%mm4, %%mm3 \n\t" /* -6b + 3c - d */\ |
1983 |
"psubw %%mm4, %%mm3 \n\t" /* -6b + 3c - d */\ |
1985 |
"paddw %6, %%mm1 \n\t"\ |
1984 |
"paddw %8, %%mm1 \n\t"\ |
1986 |
"paddw %%mm1, %%mm3 \n\t" /* 20a - 6b + 3c - d */\ |
1985 |
"paddw %%mm1, %%mm3 \n\t" /* 20a - 6b + 3c - d */\ |
1987 |
"psraw $5, %%mm3 \n\t"\ |
1986 |
"psraw $5, %%mm3 \n\t"\ |
1988 |
"packuswb %%mm3, %%mm0 \n\t"\ |
1987 |
"packuswb %%mm3, %%mm0 \n\t"\ |
Lines 1992-1999
static void OPNAME ## mpeg4_qpel8_h_lowp
Link Here
|
1992 |
"add %4, %1 \n\t"\ |
1991 |
"add %4, %1 \n\t"\ |
1993 |
"decl %2 \n\t"\ |
1992 |
"decl %2 \n\t"\ |
1994 |
" jnz 1b \n\t"\ |
1993 |
" jnz 1b \n\t"\ |
1995 |
: "+a"(src), "+c"(dst), "+m"(h)\ |
1994 |
: : "a"(src), "c"(dst), "m"(h), \ |
1996 |
: "S"((long)srcStride), "D"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\ |
1995 |
"S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_20), "m"(ff_pw_3), "m"(temp), "m"(ROUNDER)\ |
1997 |
: "memory"\ |
1996 |
: "memory"\ |
1998 |
);\ |
1997 |
);\ |
1999 |
}\ |
1998 |
}\ |
Lines 2072-2110
static void OPNAME ## mpeg4_qpel16_v_low
Link Here
|
2072 |
"movq 8(%0), %%mm1 \n\t"\ |
2071 |
"movq 8(%0), %%mm1 \n\t"\ |
2073 |
"movq 16(%0), %%mm2 \n\t"\ |
2072 |
"movq 16(%0), %%mm2 \n\t"\ |
2074 |
"movq 24(%0), %%mm3 \n\t"\ |
2073 |
"movq 24(%0), %%mm3 \n\t"\ |
2075 |
QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\ |
2074 |
QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %7, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\ |
2076 |
QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\ |
2075 |
QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %7, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\ |
2077 |
"add %4, %1 \n\t"\ |
2076 |
"add %4, %1 \n\t"\ |
2078 |
QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\ |
2077 |
QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %7, (%0), (%0), 8(%0), 48(%0), (%1), OP)\ |
2079 |
\ |
2078 |
\ |
2080 |
QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\ |
2079 |
QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %7, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\ |
2081 |
"add %4, %1 \n\t"\ |
2080 |
"add %4, %1 \n\t"\ |
2082 |
QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\ |
2081 |
QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %7, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\ |
2083 |
QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 72(%0), (%1, %3), OP)\ |
2082 |
QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %7, 16(%0), 24(%0), 32(%0), 72(%0), (%1, %3), OP)\ |
2084 |
"add %4, %1 \n\t"\ |
2083 |
"add %4, %1 \n\t"\ |
2085 |
QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 80(%0), (%1), OP)\ |
2084 |
QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %7, 24(%0), 32(%0), 40(%0), 80(%0), (%1), OP)\ |
2086 |
QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 88(%0), (%1, %3), OP)\ |
2085 |
QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %7, 32(%0), 40(%0), 48(%0), 88(%0), (%1, %3), OP)\ |
2087 |
"add %4, %1 \n\t"\ |
2086 |
"add %4, %1 \n\t"\ |
2088 |
QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 40(%0), 48(%0), 56(%0), 96(%0), (%1), OP)\ |
2087 |
QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %7, 40(%0), 48(%0), 56(%0), 96(%0), (%1), OP)\ |
2089 |
QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 48(%0), 56(%0), 64(%0),104(%0), (%1, %3), OP)\ |
2088 |
QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %7, 48(%0), 56(%0), 64(%0),104(%0), (%1, %3), OP)\ |
2090 |
"add %4, %1 \n\t"\ |
2089 |
"add %4, %1 \n\t"\ |
2091 |
QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 56(%0), 64(%0), 72(%0),112(%0), (%1), OP)\ |
2090 |
QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %7, 56(%0), 64(%0), 72(%0),112(%0), (%1), OP)\ |
2092 |
QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 64(%0), 72(%0), 80(%0),120(%0), (%1, %3), OP)\ |
2091 |
QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %7, 64(%0), 72(%0), 80(%0),120(%0), (%1, %3), OP)\ |
2093 |
"add %4, %1 \n\t"\ |
2092 |
"add %4, %1 \n\t"\ |
2094 |
QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 72(%0), 80(%0), 88(%0),128(%0), (%1), OP)\ |
2093 |
QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %7, 72(%0), 80(%0), 88(%0),128(%0), (%1), OP)\ |
2095 |
\ |
2094 |
\ |
2096 |
QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 80(%0), 88(%0), 96(%0),128(%0), (%1, %3), OP)\ |
2095 |
QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %7, 80(%0), 88(%0), 96(%0),128(%0), (%1, %3), OP)\ |
2097 |
"add %4, %1 \n\t" \ |
2096 |
"add %4, %1 \n\t" \ |
2098 |
QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 88(%0), 96(%0),104(%0),120(%0), (%1), OP)\ |
2097 |
QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %7, 88(%0), 96(%0),104(%0),120(%0), (%1), OP)\ |
2099 |
QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 96(%0),104(%0),112(%0),112(%0), (%1, %3), OP)\ |
2098 |
QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %7, 96(%0),104(%0),112(%0),112(%0), (%1, %3), OP)\ |
2100 |
\ |
2099 |
\ |
2101 |
"add $136, %0 \n\t"\ |
2100 |
"add $136, %0 \n\t"\ |
2102 |
"add %6, %1 \n\t"\ |
2101 |
"add %8, %1 \n\t"\ |
2103 |
"decl %2 \n\t"\ |
2102 |
"decl %2 \n\t"\ |
2104 |
" jnz 1b \n\t"\ |
2103 |
" jnz 1b \n\t"\ |
2105 |
\ |
2104 |
\ |
2106 |
: "+r"(temp_ptr), "+r"(dst), "+g"(count)\ |
2105 |
: : "r"(temp_ptr), "r"(dst), "g"(count), \ |
2107 |
: "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-14*(long)dstStride)\ |
2106 |
"r"((long)dstStride), "r"(2*(long)dstStride), "m"(ff_pw_20), "m"(ff_pw_3), "m"(ROUNDER), "g"(4-14*(long)dstStride)\ |
2108 |
:"memory"\ |
2107 |
:"memory"\ |
2109 |
);\ |
2108 |
);\ |
2110 |
}\ |
2109 |
}\ |
Lines 2144-2170
static void OPNAME ## mpeg4_qpel8_v_lowp
Link Here
|
2144 |
"movq 8(%0), %%mm1 \n\t"\ |
2143 |
"movq 8(%0), %%mm1 \n\t"\ |
2145 |
"movq 16(%0), %%mm2 \n\t"\ |
2144 |
"movq 16(%0), %%mm2 \n\t"\ |
2146 |
"movq 24(%0), %%mm3 \n\t"\ |
2145 |
"movq 24(%0), %%mm3 \n\t"\ |
2147 |
QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\ |
2146 |
QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %7, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\ |
2148 |
QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\ |
2147 |
QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %7, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\ |
2149 |
"add %4, %1 \n\t"\ |
2148 |
"add %4, %1 \n\t"\ |
2150 |
QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\ |
2149 |
QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %7, (%0), (%0), 8(%0), 48(%0), (%1), OP)\ |
2151 |
\ |
2150 |
\ |
2152 |
QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\ |
2151 |
QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %7, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\ |
2153 |
"add %4, %1 \n\t"\ |
2152 |
"add %4, %1 \n\t"\ |
2154 |
QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\ |
2153 |
QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %7, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\ |
2155 |
\ |
2154 |
\ |
2156 |
QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 64(%0), (%1, %3), OP)\ |
2155 |
QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %7, 16(%0), 24(%0), 32(%0), 64(%0), (%1, %3), OP)\ |
2157 |
"add %4, %1 \n\t"\ |
2156 |
"add %4, %1 \n\t"\ |
2158 |
QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 56(%0), (%1), OP)\ |
2157 |
QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %7, 24(%0), 32(%0), 40(%0), 56(%0), (%1), OP)\ |
2159 |
QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 48(%0), (%1, %3), OP)\ |
2158 |
QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %7, 32(%0), 40(%0), 48(%0), 48(%0), (%1, %3), OP)\ |
2160 |
\ |
2159 |
\ |
2161 |
"add $72, %0 \n\t"\ |
2160 |
"add $72, %0 \n\t"\ |
2162 |
"add %6, %1 \n\t"\ |
2161 |
"add %8, %1 \n\t"\ |
2163 |
"decl %2 \n\t"\ |
2162 |
"decl %2 \n\t"\ |
2164 |
" jnz 1b \n\t"\ |
2163 |
" jnz 1b \n\t"\ |
2165 |
\ |
2164 |
\ |
2166 |
: "+r"(temp_ptr), "+r"(dst), "+g"(count)\ |
2165 |
: : "r"(temp_ptr), "r"(dst), "rm"(count), \ |
2167 |
: "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-6*(long)dstStride)\ |
2166 |
"r"((long)dstStride), "r"(2*(long)dstStride), "m"(ff_pw_20), "m"(ff_pw_3), "m"(ROUNDER), "g"(4-6*(long)dstStride)\ |
2168 |
: "memory"\ |
2167 |
: "memory"\ |
2169 |
);\ |
2168 |
);\ |
2170 |
}\ |
2169 |
}\ |