Lines 1136-1144
Link Here
|
1136 |
\ |
1136 |
\ |
1137 |
static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
1137 |
static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
1138 |
int h=8;\ |
1138 |
int h=8;\ |
|
|
1139 |
const xmm_reg xmm_consts[2] __attribute__((aligned (16))) = { ff_pw_5, ff_pw_16 }; \ |
1140 |
\ |
1139 |
__asm__ volatile(\ |
1141 |
__asm__ volatile(\ |
1140 |
"pxor %%mm7, %%mm7 \n\t"\ |
1142 |
"pxor %%mm7, %%mm7 \n\t"\ |
1141 |
"movq %5, %%mm6 \n\t"\ |
1143 |
"movq (%5), %%mm6 \n\t"\ |
|
|
1144 |
"add $16, %5 \n\t"\ |
1142 |
"1: \n\t"\ |
1145 |
"1: \n\t"\ |
1143 |
"movq (%0), %%mm0 \n\t"\ |
1146 |
"movq (%0), %%mm0 \n\t"\ |
1144 |
"movq 1(%0), %%mm2 \n\t"\ |
1147 |
"movq 1(%0), %%mm2 \n\t"\ |
Lines 1172-1178
Link Here
|
1172 |
"punpcklbw %%mm7, %%mm5 \n\t"\ |
1175 |
"punpcklbw %%mm7, %%mm5 \n\t"\ |
1173 |
"paddw %%mm3, %%mm2 \n\t"\ |
1176 |
"paddw %%mm3, %%mm2 \n\t"\ |
1174 |
"paddw %%mm5, %%mm4 \n\t"\ |
1177 |
"paddw %%mm5, %%mm4 \n\t"\ |
1175 |
"movq %6, %%mm5 \n\t"\ |
1178 |
"movq (%5), %%mm5 \n\t"\ |
1176 |
"paddw %%mm5, %%mm2 \n\t"\ |
1179 |
"paddw %%mm5, %%mm2 \n\t"\ |
1177 |
"paddw %%mm5, %%mm4 \n\t"\ |
1180 |
"paddw %%mm5, %%mm4 \n\t"\ |
1178 |
"paddw %%mm2, %%mm0 \n\t"\ |
1181 |
"paddw %%mm2, %%mm0 \n\t"\ |
Lines 1186-1192
Link Here
|
1186 |
"decl %2 \n\t"\ |
1189 |
"decl %2 \n\t"\ |
1187 |
" jnz 1b \n\t"\ |
1190 |
" jnz 1b \n\t"\ |
1188 |
: "+a"(src), "+c"(dst), "+g"(h)\ |
1191 |
: "+a"(src), "+c"(dst), "+g"(h)\ |
1189 |
: "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ |
1192 |
: "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), "r"(xmm_consts)\ |
1190 |
: "memory"\ |
1193 |
: "memory"\ |
1191 |
);\ |
1194 |
);\ |
1192 |
}\ |
1195 |
}\ |
Lines 1638-1646
Link Here
|
1638 |
\ |
1641 |
\ |
1639 |
static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
1642 |
static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
1640 |
int h=8;\ |
1643 |
int h=8;\ |
|
|
1644 |
const xmm_reg xmm_consts[2] __attribute__((aligned (16))) = { ff_pw_5, ff_pw_16 }; \ |
1641 |
__asm__ volatile(\ |
1645 |
__asm__ volatile(\ |
1642 |
"pxor %%xmm7, %%xmm7 \n\t"\ |
1646 |
"pxor %%xmm7, %%xmm7 \n\t"\ |
1643 |
"movdqa %5, %%xmm6 \n\t"\ |
1647 |
"movdqa (%5), %%xmm6 \n\t"\ |
|
|
1648 |
"add $16, %5 \n\t"\ |
1644 |
"1: \n\t"\ |
1649 |
"1: \n\t"\ |
1645 |
"lddqu -2(%0), %%xmm1 \n\t"\ |
1650 |
"lddqu -2(%0), %%xmm1 \n\t"\ |
1646 |
"movdqa %%xmm1, %%xmm0 \n\t"\ |
1651 |
"movdqa %%xmm1, %%xmm0 \n\t"\ |
Lines 1660-1666
Link Here
|
1660 |
"paddw %%xmm4, %%xmm1 \n\t"\ |
1665 |
"paddw %%xmm4, %%xmm1 \n\t"\ |
1661 |
"psllw $2, %%xmm2 \n\t"\ |
1666 |
"psllw $2, %%xmm2 \n\t"\ |
1662 |
"psubw %%xmm1, %%xmm2 \n\t"\ |
1667 |
"psubw %%xmm1, %%xmm2 \n\t"\ |
1663 |
"paddw %6, %%xmm0 \n\t"\ |
1668 |
"paddw (%5), %%xmm0 \n\t"\ |
1664 |
"pmullw %%xmm6, %%xmm2 \n\t"\ |
1669 |
"pmullw %%xmm6, %%xmm2 \n\t"\ |
1665 |
"paddw %%xmm0, %%xmm2 \n\t"\ |
1670 |
"paddw %%xmm0, %%xmm2 \n\t"\ |
1666 |
"psraw $5, %%xmm2 \n\t"\ |
1671 |
"psraw $5, %%xmm2 \n\t"\ |
Lines 1672-1678
Link Here
|
1672 |
" jnz 1b \n\t"\ |
1677 |
" jnz 1b \n\t"\ |
1673 |
: "+a"(src), "+c"(dst), "+g"(h)\ |
1678 |
: "+a"(src), "+c"(dst), "+g"(h)\ |
1674 |
: "D"((x86_reg)srcStride), "S"((x86_reg)dstStride),\ |
1679 |
: "D"((x86_reg)srcStride), "S"((x86_reg)dstStride),\ |
1675 |
"m"(ff_pw_5), "m"(ff_pw_16)\ |
1680 |
"r"(xmm_consts)\ |
1676 |
: "memory"\ |
1681 |
: "memory"\ |
1677 |
);\ |
1682 |
);\ |
1678 |
}\ |
1683 |
}\ |