Lines 1138-1144
Link Here
|
1138 |
int h=8;\ |
1138 |
int h=8;\ |
1139 |
__asm__ volatile(\ |
1139 |
__asm__ volatile(\ |
1140 |
"pxor %%mm7, %%mm7 \n\t"\ |
1140 |
"pxor %%mm7, %%mm7 \n\t"\ |
1141 |
"movq %5, %%mm6 \n\t"\ |
1141 |
"movq "MANGLE(ff_pw_5) ", %%mm6\n\t"\ |
1142 |
"1: \n\t"\ |
1142 |
"1: \n\t"\ |
1143 |
"movq (%0), %%mm0 \n\t"\ |
1143 |
"movq (%0), %%mm0 \n\t"\ |
1144 |
"movq 1(%0), %%mm2 \n\t"\ |
1144 |
"movq 1(%0), %%mm2 \n\t"\ |
Lines 1172-1178
Link Here
|
1172 |
"punpcklbw %%mm7, %%mm5 \n\t"\ |
1172 |
"punpcklbw %%mm7, %%mm5 \n\t"\ |
1173 |
"paddw %%mm3, %%mm2 \n\t"\ |
1173 |
"paddw %%mm3, %%mm2 \n\t"\ |
1174 |
"paddw %%mm5, %%mm4 \n\t"\ |
1174 |
"paddw %%mm5, %%mm4 \n\t"\ |
1175 |
"movq %6, %%mm5 \n\t"\ |
1175 |
"movq "MANGLE(ff_pw_16) ", %%mm5\n\t"\ |
1176 |
"paddw %%mm5, %%mm2 \n\t"\ |
1176 |
"paddw %%mm5, %%mm2 \n\t"\ |
1177 |
"paddw %%mm5, %%mm4 \n\t"\ |
1177 |
"paddw %%mm5, %%mm4 \n\t"\ |
1178 |
"paddw %%mm2, %%mm0 \n\t"\ |
1178 |
"paddw %%mm2, %%mm0 \n\t"\ |
Lines 1186-1192
Link Here
|
1186 |
"decl %2 \n\t"\ |
1186 |
"decl %2 \n\t"\ |
1187 |
" jnz 1b \n\t"\ |
1187 |
" jnz 1b \n\t"\ |
1188 |
: "+a"(src), "+c"(dst), "+g"(h)\ |
1188 |
: "+a"(src), "+c"(dst), "+g"(h)\ |
1189 |
: "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ |
1189 |
: "d"((x86_reg)srcStride), "S"((x86_reg)dstStride)\ |
1190 |
: "memory"\ |
1190 |
: "memory"\ |
1191 |
);\ |
1191 |
);\ |
1192 |
}\ |
1192 |
}\ |
Lines 1593-1600
Link Here
|
1593 |
int h=8;\ |
1593 |
int h=8;\ |
1594 |
__asm__ volatile(\ |
1594 |
__asm__ volatile(\ |
1595 |
"pxor %%xmm7, %%xmm7 \n\t"\ |
1595 |
"pxor %%xmm7, %%xmm7 \n\t"\ |
1596 |
"movdqa %0, %%xmm6 \n\t"\ |
1596 |
"movdqa "MANGLE(ff_pw_5) ", %%xmm6 \n\t"\ |
1597 |
:: "m"(ff_pw_5)\ |
1597 |
::\ |
1598 |
);\ |
1598 |
);\ |
1599 |
do{\ |
1599 |
do{\ |
1600 |
__asm__ volatile(\ |
1600 |
__asm__ volatile(\ |
Lines 1617-1623
Link Here
|
1617 |
"psllw $2, %%xmm2 \n\t"\ |
1617 |
"psllw $2, %%xmm2 \n\t"\ |
1618 |
"movq (%2), %%xmm3 \n\t"\ |
1618 |
"movq (%2), %%xmm3 \n\t"\ |
1619 |
"psubw %%xmm1, %%xmm2 \n\t"\ |
1619 |
"psubw %%xmm1, %%xmm2 \n\t"\ |
1620 |
"paddw %5, %%xmm0 \n\t"\ |
1620 |
"paddw "MANGLE(ff_pw_16)", %%xmm0 \n\t"\ |
1621 |
"pmullw %%xmm6, %%xmm2 \n\t"\ |
1621 |
"pmullw %%xmm6, %%xmm2 \n\t"\ |
1622 |
"paddw %%xmm0, %%xmm2 \n\t"\ |
1622 |
"paddw %%xmm0, %%xmm2 \n\t"\ |
1623 |
"psraw $5, %%xmm2 \n\t"\ |
1623 |
"psraw $5, %%xmm2 \n\t"\ |
Lines 1628-1635
Link Here
|
1628 |
"add %4, %1 \n\t"\ |
1628 |
"add %4, %1 \n\t"\ |
1629 |
"add %3, %2 \n\t"\ |
1629 |
"add %3, %2 \n\t"\ |
1630 |
: "+a"(src), "+c"(dst), "+d"(src2)\ |
1630 |
: "+a"(src), "+c"(dst), "+d"(src2)\ |
1631 |
: "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride),\ |
1631 |
: "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride)\ |
1632 |
"m"(ff_pw_16)\ |
|
|
1633 |
: "memory"\ |
1632 |
: "memory"\ |
1634 |
);\ |
1633 |
);\ |
1635 |
}while(--h);\ |
1634 |
}while(--h);\ |
Lines 1640-1646
Link Here
|
1640 |
int h=8;\ |
1639 |
int h=8;\ |
1641 |
__asm__ volatile(\ |
1640 |
__asm__ volatile(\ |
1642 |
"pxor %%xmm7, %%xmm7 \n\t"\ |
1641 |
"pxor %%xmm7, %%xmm7 \n\t"\ |
1643 |
"movdqa %5, %%xmm6 \n\t"\ |
1642 |
"movdqa "MANGLE(ff_pw_5)", %%xmm6\n\t"\ |
1644 |
"1: \n\t"\ |
1643 |
"1: \n\t"\ |
1645 |
"lddqu -2(%0), %%xmm1 \n\t"\ |
1644 |
"lddqu -2(%0), %%xmm1 \n\t"\ |
1646 |
"movdqa %%xmm1, %%xmm0 \n\t"\ |
1645 |
"movdqa %%xmm1, %%xmm0 \n\t"\ |
Lines 1660-1666
Link Here
|
1660 |
"paddw %%xmm4, %%xmm1 \n\t"\ |
1659 |
"paddw %%xmm4, %%xmm1 \n\t"\ |
1661 |
"psllw $2, %%xmm2 \n\t"\ |
1660 |
"psllw $2, %%xmm2 \n\t"\ |
1662 |
"psubw %%xmm1, %%xmm2 \n\t"\ |
1661 |
"psubw %%xmm1, %%xmm2 \n\t"\ |
1663 |
"paddw %6, %%xmm0 \n\t"\ |
1662 |
"paddw "MANGLE(ff_pw_16)", %%xmm0 \n\t"\ |
1664 |
"pmullw %%xmm6, %%xmm2 \n\t"\ |
1663 |
"pmullw %%xmm6, %%xmm2 \n\t"\ |
1665 |
"paddw %%xmm0, %%xmm2 \n\t"\ |
1664 |
"paddw %%xmm0, %%xmm2 \n\t"\ |
1666 |
"psraw $5, %%xmm2 \n\t"\ |
1665 |
"psraw $5, %%xmm2 \n\t"\ |
Lines 1671-1678
Link Here
|
1671 |
"decl %2 \n\t"\ |
1670 |
"decl %2 \n\t"\ |
1672 |
" jnz 1b \n\t"\ |
1671 |
" jnz 1b \n\t"\ |
1673 |
: "+a"(src), "+c"(dst), "+g"(h)\ |
1672 |
: "+a"(src), "+c"(dst), "+g"(h)\ |
1674 |
: "D"((x86_reg)srcStride), "S"((x86_reg)dstStride),\ |
1673 |
: "D"((x86_reg)srcStride), "S"((x86_reg)dstStride)\ |
1675 |
"m"(ff_pw_5), "m"(ff_pw_16)\ |
|
|
1676 |
: "memory"\ |
1674 |
: "memory"\ |
1677 |
);\ |
1675 |
);\ |
1678 |
}\ |
1676 |
}\ |