Gentoo Websites Logo
Go to: Gentoo Home Documentation Forums Lists Bugs Planet Store Wiki Get Gentoo!
View | Details | Raw Unified | Return to bug 200549
Collapse All | Expand All

(-)libtheora-1.0beta2-orig/lib/enc/x86_32/dct_decode_mmx.c (-18 / +17 lines)
Lines 57-65 static void FilterHoriz__mmx(unsigned ch Link Here
57
    "psubw %%mm3,%%mm1\n"       /* mm1 = pix[0]-pix[3] mm1 - mm3 */     \
57
    "psubw %%mm3,%%mm1\n"       /* mm1 = pix[0]-pix[3] mm1 - mm3 */     \
58
    "movq %%mm0,%%mm7\n"        /* mm7 = pix[2]*/                       \
58
    "movq %%mm0,%%mm7\n"        /* mm7 = pix[2]*/                       \
59
    "psubw %%mm5,%%mm0\n"       /* mm0 = pix[2]-pix[1] mm0 - mm5*/      \
59
    "psubw %%mm5,%%mm0\n"       /* mm0 = pix[2]-pix[1] mm0 - mm5*/      \
60
    "PMULLW "MANGLE(V3)",%%mm0\n" /* *3 */                              \
60
    "PMULLW %3,%%mm0\n" /* *3 */                              \
61
    "paddw %%mm0,%%mm1\n"         /* mm1 has f[0] ... f[4]*/            \
61
    "paddw %%mm0,%%mm1\n"         /* mm1 has f[0] ... f[4]*/            \
62
    "paddw "MANGLE(V804)",%%mm1\n"/* add 4 */ /* add 256 after shift */ \
62
    "paddw %4,%%mm1\n"/* add 4 */ /* add 256 after shift */ \
63
    "psraw $3,%%mm1\n"          /* >>3 */                               \
63
    "psraw $3,%%mm1\n"          /* >>3 */                               \
64
    " pextrw $0,%%mm1,%%esi\n"  /* In MM1 we have 4 f coefs (16bits) */ \
64
    " pextrw $0,%%mm1,%%esi\n"  /* In MM1 we have 4 f coefs (16bits) */ \
65
    " pextrw $1,%%mm1,%%edi\n"  /* now perform MM4 = *(_bv+ f) */       \
65
    " pextrw $1,%%mm1,%%edi\n"  /* now perform MM4 = *(_bv+ f) */       \
Lines 75-94 static void FilterHoriz__mmx(unsigned ch Link Here
75
    " packuswb %%mm0,%%mm5\n"   /* mm5 = x x x x newpix1 */             \
75
    " packuswb %%mm0,%%mm5\n"   /* mm5 = x x x x newpix1 */             \
76
    " packuswb %%mm0,%%mm7\n"   /* mm7 = x x x x newpix2 */             \
76
    " packuswb %%mm0,%%mm7\n"   /* mm7 = x x x x newpix2 */             \
77
    " punpcklbw %%mm7,%%mm5\n"  /* 2 1 2 1 2 1 2 1 */                   \
77
    " punpcklbw %%mm7,%%mm5\n"  /* 2 1 2 1 2 1 2 1 */                   \
78
    " movd %%mm5,%%eax\n"       /* eax = newpix21 */                    \
78
    " movd %%mm5,%%edi\n"       /* eax = newpix21 */                    \
79
    " movw %%ax,1(%0)\n"                                                \
79
    " movw %%di,1(%0)\n"                                                \
80
    " psrlq $32,%%mm5\n"        /* why is so big stall here ? */        \
80
    " psrlq $32,%%mm5\n"        /* why is so big stall here ? */        \
81
    " shrl $16,%%eax\n"                                                 \
81
    " shrl $16,%%edi\n"                                                 \
82
    " lea 1(%0,%1,2),%%edi\n"                                           \
82
    " movw %%di,1(%0,%1,1)\n"                                           \
83
    " movw %%ax,1(%0,%1,1)\n"                                           \
83
    " movd %%mm5,%%edi\n"       /* eax = newpix21 high part */          \
84
    " movd %%mm5,%%eax\n"       /* eax = newpix21 high part */          \
85
    " lea (%1,%1,2),%%esi\n"                                            \
84
    " lea (%1,%1,2),%%esi\n"                                            \
86
    " movw %%ax,(%%edi)\n"                                              \
85
    " movw %%di,1(%0,%1,2)\n"                                              \
87
    " shrl $16,%%eax\n"                                                 \
86
    " shrl $16,%%edi\n"                                                 \
88
    " movw %%ax,1(%0,%%esi)\n"                                          \
87
    " movw %%di,1(%0,%%esi)\n"                                          \
89
    :                                                                   \
88
    :                                                                   \
90
    : "r" (PixelPtr), "r" (LineLength), "r" (BoundingValuePtr-256)      \
89
    : "r" (PixelPtr), "r" (LineLength), "r" (BoundingValuePtr-256), "m" (V3), "m" (V804) \
91
    : "esi", "edi" , "memory", "eax"                                    \
90
    : "esi", "edi" , "memory"                                           \
92
    );
91
    );
93
92
94
    OC_LOOP_H_4x4
93
    OC_LOOP_H_4x4
Lines 126-137 static void FilterVert__mmx(unsigned cha Link Here
126
    "psubw %%mm5,%%mm3\n"
125
    "psubw %%mm5,%%mm3\n"
127
    "psubw %%mm4,%%mm2\n"
126
    "psubw %%mm4,%%mm2\n"
128
                /* mm3:mm2 = (pix[ystride*2]-pix[ystride]); */
127
                /* mm3:mm2 = (pix[ystride*2]-pix[ystride]); */
129
    "PMULLW "MANGLE(V3)",%%mm3\n"    /* *3 */
128
    "PMULLW %3,%%mm3\n"    /* *3 */
130
    "PMULLW "MANGLE(V3)",%%mm2\n"    /* *3 */
129
    "PMULLW %3,%%mm2\n"    /* *3 */
131
    "paddw %%mm7,%%mm3\n"            /* highpart */
130
    "paddw %%mm7,%%mm3\n"            /* highpart */
132
    "paddw %%mm6,%%mm2\n"            /* lowpart of pix[0]-pix[ystride*3]+3*(pix[ystride*2]-pix[ystride]);  */
131
    "paddw %%mm6,%%mm2\n"            /* lowpart of pix[0]-pix[ystride*3]+3*(pix[ystride*2]-pix[ystride]);  */
133
    "paddw "MANGLE(V804)",%%mm3\n"   /* add 4 */ /* add 256 after shift */
132
    "paddw %4,%%mm3\n"   /* add 4 */ /* add 256 after shift */
134
    "paddw "MANGLE(V804)",%%mm2\n"   /* add 4 */ /* add 256 after shift */
133
    "paddw %4,%%mm2\n"   /* add 4 */ /* add 256 after shift */
135
    "psraw $3,%%mm3\n"               /* >>3 f coefs high */
134
    "psraw $3,%%mm3\n"               /* >>3 f coefs high */
136
    "psraw $3,%%mm2\n"               /* >>3 f coefs low */
135
    "psraw $3,%%mm2\n"               /* >>3 f coefs low */
137
136
Lines 168-174 static void FilterVert__mmx(unsigned cha Link Here
168
    "movq %%mm4,(%0,%1)\n"      /* pix[ystride]= */
167
    "movq %%mm4,(%0,%1)\n"      /* pix[ystride]= */
169
    "emms\n"
168
    "emms\n"
170
    :
169
    :
171
    : "r" (PixelPtr-2*LineLength), "r" (LineLength), "r" (BoundingValuePtr-256)
170
    : "r" (PixelPtr-2*LineLength), "r" (LineLength), "r" (BoundingValuePtr-256), "m" (V3), "m" (V804)
172
    : "esi", "edi" , "memory"
171
    : "esi", "edi" , "memory"
173
    );
172
    );
174
}
173
}

Return to bug 200549