|
Lines 57-65
static void FilterHoriz__mmx(unsigned ch
|
Link Here
|
|---|
|
"psubw %%mm3,%%mm1\n" /* mm1 = pix[0]-pix[3] mm1 - mm3 */ \ | "psubw %%mm3,%%mm1\n" /* mm1 = pix[0]-pix[3] mm1 - mm3 */ \ |
"movq %%mm0,%%mm7\n" /* mm7 = pix[2]*/ \ | "movq %%mm0,%%mm7\n" /* mm7 = pix[2]*/ \ |
"psubw %%mm5,%%mm0\n" /* mm0 = pix[2]-pix[1] mm0 - mm5*/ \ | "psubw %%mm5,%%mm0\n" /* mm0 = pix[2]-pix[1] mm0 - mm5*/ \ |
"PMULLW "MANGLE(V3)",%%mm0\n" /* *3 */ \ |
"PMULLW %3,%%mm0\n" /* *3 */ \ |
"paddw %%mm0,%%mm1\n" /* mm1 has f[0] ... f[4]*/ \ | "paddw %%mm0,%%mm1\n" /* mm1 has f[0] ... f[4]*/ \ |
"paddw "MANGLE(V804)",%%mm1\n"/* add 4 */ /* add 256 after shift */ \ |
"paddw %4,%%mm1\n"/* add 4 */ /* add 256 after shift */ \ |
"psraw $3,%%mm1\n" /* >>3 */ \ | "psraw $3,%%mm1\n" /* >>3 */ \ |
" pextrw $0,%%mm1,%%esi\n" /* In MM1 we have 4 f coefs (16bits) */ \ | " pextrw $0,%%mm1,%%esi\n" /* In MM1 we have 4 f coefs (16bits) */ \ |
" pextrw $1,%%mm1,%%edi\n" /* now perform MM4 = *(_bv+ f) */ \ | " pextrw $1,%%mm1,%%edi\n" /* now perform MM4 = *(_bv+ f) */ \ |
|
Lines 87-93
static void FilterHoriz__mmx(unsigned ch
|
Link Here
|
|---|
|
" shrl $16,%%eax\n" \ | " shrl $16,%%eax\n" \ |
" movw %%ax,1(%0,%%esi)\n" \ | " movw %%ax,1(%0,%%esi)\n" \ |
: \ | : \ |
: "r" (PixelPtr), "r" (LineLength), "r" (BoundingValuePtr-256) \ |
: "r" (PixelPtr), "r" (LineLength), "r" (BoundingValuePtr-256), "m" (V3), "m" (V804) \ |
: "esi", "edi" , "memory", "eax" \ | : "esi", "edi" , "memory", "eax" \ |
); | ); |
| |
|
Lines 126-137
static void FilterVert__mmx(unsigned cha
|
Link Here
|
|---|
|
"psubw %%mm5,%%mm3\n" | "psubw %%mm5,%%mm3\n" |
"psubw %%mm4,%%mm2\n" | "psubw %%mm4,%%mm2\n" |
/* mm3:mm2 = (pix[ystride*2]-pix[ystride]); */ | /* mm3:mm2 = (pix[ystride*2]-pix[ystride]); */ |
"PMULLW "MANGLE(V3)",%%mm3\n" /* *3 */ |
"PMULLW %3,%%mm3\n" /* *3 */ |
"PMULLW "MANGLE(V3)",%%mm2\n" /* *3 */ |
"PMULLW %3,%%mm2\n" /* *3 */ |
"paddw %%mm7,%%mm3\n" /* highpart */ | "paddw %%mm7,%%mm3\n" /* highpart */ |
"paddw %%mm6,%%mm2\n" /* lowpart of pix[0]-pix[ystride*3]+3*(pix[ystride*2]-pix[ystride]); */ | "paddw %%mm6,%%mm2\n" /* lowpart of pix[0]-pix[ystride*3]+3*(pix[ystride*2]-pix[ystride]); */ |
"paddw "MANGLE(V804)",%%mm3\n" /* add 4 */ /* add 256 after shift */ |
"paddw %4,%%mm3\n" /* add 4 */ /* add 256 after shift */ |
"paddw "MANGLE(V804)",%%mm2\n" /* add 4 */ /* add 256 after shift */ |
"paddw %4,%%mm2\n" /* add 4 */ /* add 256 after shift */ |
"psraw $3,%%mm3\n" /* >>3 f coefs high */ | "psraw $3,%%mm3\n" /* >>3 f coefs high */ |
"psraw $3,%%mm2\n" /* >>3 f coefs low */ | "psraw $3,%%mm2\n" /* >>3 f coefs low */ |
| |
|
Lines 168-174
static void FilterVert__mmx(unsigned cha
|
Link Here
|
|---|
|
"movq %%mm4,(%0,%1)\n" /* pix[ystride]= */ | "movq %%mm4,(%0,%1)\n" /* pix[ystride]= */ |
"emms\n" | "emms\n" |
: | : |
: "r" (PixelPtr-2*LineLength), "r" (LineLength), "r" (BoundingValuePtr-256) |
: "r" (PixelPtr-2*LineLength), "r" (LineLength), "r" (BoundingValuePtr-256), "m" (V3), "m" (V804) |
: "esi", "edi" , "memory" | : "esi", "edi" , "memory" |
); | ); |
} | } |