View | Details | Raw Unified
Collapse All | Expand All

(-) mjpegtools-1.6.2.orig/yuvdenoise/deinterlace.c (-4 / +4 lines)
 Lines 161-167    Link Here 
                  " pxor        %%mm7 , %%mm7;           /* clear mm7                                          */\n"
                  " pxor        %%mm7 , %%mm7;           /* clear mm7                                          */\n"
                  "                                      /*                                                    */\n"
                  "                                      /*                                                    */\n"
                  " movl         %1    , %%eax;          /* load frameadress into eax                          */\n"
                  " movl         %1    , %%eax;          /* load frameadress into eax                          */\n"
                  " movl         %2    , %%ebx;          /* load frameadress into ebx                          */\n"
                  " movl         %2    , %%esi;          /* load frameadress into esi                          */\n"
                  " movl         %3    , %%ecx;          /* load frameadress into ecx                          */\n"
                  " movl         %3    , %%ecx;          /* load frameadress into ecx                          */\n"
                  "                                      /*                                                    */\n"
                  "                                      /*                                                    */\n"
                  " .rept 3                              /* repeat 3 times                                     */\n"
                  " .rept 3                              /* repeat 3 times                                     */\n"
 Lines 177-183    Link Here 
                  " paddusw      %%mm1 , %%mm0;          /* add mm1 (stored in mm1 and mm2...)                 */\n"
                  " paddusw      %%mm1 , %%mm0;          /* add mm1 (stored in mm1 and mm2...)                 */\n"
                  " paddusw      %%mm2 , %%mm0;          /* to mm0                                             */\n"
                  " paddusw      %%mm2 , %%mm0;          /* to mm0                                             */\n"
                  "                                      /*                                                    */\n"
                  "                                      /*                                                    */\n"
                  " movq        (%%ebx), %%mm1;          /* 8 Pixels from line                                 */\n"
                  " movq        (%%esi), %%mm1;          /* 8 Pixels from line                                 */\n"
                  " movq        (%%ecx), %%mm2;          /* 8 Pixels from displaced line                       */\n"
                  " movq        (%%ecx), %%mm2;          /* 8 Pixels from displaced line                       */\n"
                  " movq         %%mm2 , %%mm3;          /* hold a copy of mm2 in mm3                          */\n"
                  " movq         %%mm2 , %%mm3;          /* hold a copy of mm2 in mm3                          */\n"
                  " psubusb      %%mm1 , %%mm3;          /* positive differences between mm2 and mm1           */\n"
                  " psubusb      %%mm1 , %%mm3;          /* positive differences between mm2 and mm1           */\n"
 Lines 189-202    Link Here 
                  " paddusw      %%mm1 , %%mm0;          /* add mm1 (stored in mm1 and mm2...)                 */\n"
                  " paddusw      %%mm1 , %%mm0;          /* add mm1 (stored in mm1 and mm2...)                 */\n"
                  " paddusw      %%mm2 , %%mm0;          /* to mm0                                             */\n"
                  " paddusw      %%mm2 , %%mm0;          /* to mm0                                             */\n"
                  " addl         $8    , %%eax;          /* add 8 to frameaddress                              */\n"
                  " addl         $8    , %%eax;          /* add 8 to frameaddress                              */\n"
                  " addl         $8    , %%ebx;          /* add 8 to frameaddress                              */\n"
                  " addl         $8    , %%esi;          /* add 8 to frameaddress                              */\n"
                  " addl         $8    , %%ecx;          /* add 8 to frameaddress                              */\n"
                  " addl         $8    , %%ecx;          /* add 8 to frameaddress                              */\n"
                  " .endr                                /* end loop                                           */\n"
                  " .endr                                /* end loop                                           */\n"
                  "                                      /*                                                    */\n"
                  "                                      /*                                                    */\n"
                  " movq         %%mm0 , %0   ;          /* make mm0 available to gcc ...                      */\n"
                  " movq         %%mm0 , %0   ;          /* make mm0 available to gcc ...                      */\n"
                  :"=m" (a)
                  :"=m" (a)
                  :"m" (ref1), "m" (ref2), "m" (ref3)
                  :"m" (ref1), "m" (ref2), "m" (ref3)
                  :"%eax", "%ebx", "%ecx"
                  :"%eax", "%esi", "%ecx"
                );
                );
              
              
              d=a[0]+a[1]+a[2]+a[3];
              d=a[0]+a[1]+a[2]+a[3];
(-) mjpegtools-1.6.2.orig/utils/mmxsse/mblock_sumsq_mmx.s (-35 / +86 lines)
 Lines 146-157    Link Here 
    " pxor        %%mm7 , %%mm7;           /* clear mm7                                          */\n"
    " pxor        %%mm7 , %%mm7;           /* clear mm7                                          */\n"
    "                                      /*                                                    */\n"
    "                                      /*                                                    */\n"
    " movl         %1    , %%eax;          /* load frameadress into eax                          */\n"
    " movl         %1    , %%eax;          /* load frameadress into eax                          */\n"
    " movl         %2    , %%ebx;          /* load frameadress into ebx                          */\n"
    " movl         %2    , %%esi;          /* load frameadress into esi                          */\n"
    " movl         %3    , %%ecx;          /* load width       into ecx                          */\n"
    " movl         %3    , %%ecx;          /* load width       into ecx                          */\n"
    "                                      /*                                                    */\n"
    "                                      /*                                                    */\n"
    ".rept 8                    ;          /* Loop for 8 lines                                   */\n"
    ".rept 8                    ;          /* Loop for 8 lines                                   */\n"
    " movq        (%%eax), %%mm1;          /* 8 Pixels from filtered frame to mm1                */\n"
    " movq        (%%eax), %%mm1;          /* 8 Pixels from filtered frame to mm1                */\n"
    " movq        (%%ebx), %%mm2;          /* 8 Pixels from reference frame to mm2               */\n"
    " movq        (%%esi), %%mm2;          /* 8 Pixels from reference frame to mm2               */\n"
    " movq         %%mm2 , %%mm3;          /* hold a copy of mm2 in mm3                          */\n"
    " movq         %%mm2 , %%mm3;          /* hold a copy of mm2 in mm3                          */\n"
    " psubusb      %%mm1 , %%mm3;          /* positive differences between mm2 and mm1           */\n"
    " psubusb      %%mm1 , %%mm3;          /* positive differences between mm2 and mm1           */\n"
    " psubusb      %%mm2 , %%mm1;          /* positive differences between mm1 and mm3           */\n"
    " psubusb      %%mm2 , %%mm1;          /* positive differences between mm1 and mm3           */\n"
 Lines 162-174    Link Here 
    " paddusw      %%mm1 , %%mm0;          /* add mm1 (stored in mm1 and mm2...)                 */\n"
    " paddusw      %%mm1 , %%mm0;          /* add mm1 (stored in mm1 and mm2...)                 */\n"
    " paddusw      %%mm2 , %%mm0;          /* to mm0                                             */\n"
    " paddusw      %%mm2 , %%mm0;          /* to mm0                                             */\n"
    " addl         %%ecx , %%eax;          /* add framewidth to frameaddress                     */\n"
    " addl         %%ecx , %%eax;          /* add framewidth to frameaddress                     */\n"
    " addl         %%ecx , %%ebx;          /* add framewidth to frameaddress                     */\n"
    " addl         %%ecx , %%esi;          /* add framewidth to frameaddress                     */\n"
    " .endr                                /* end loop                                           */\n"
    " .endr                                /* end loop                                           */\n"
    "                                      /*                                                    */\n"
    "                                      /*                                                    */\n"
    " movq         %%mm0 , %0   ;          /* make mm0 available to gcc ...                      */\n"
    " movq         %%mm0 , %0   ;          /* make mm0 available to gcc ...                      */\n"
    :"=m" (a)     
    :"=m" (a)     
    :"m" (frm), "m" (ref), "m" (denoiser.frame.w)
    :"m" (frm), "m" (ref), "m" (denoiser.frame.w)
    :"%eax", "%ebx", "%ecx"
    :"%eax", "%esi", "%ecx"
    );
    );
#endif
#endif
 Lines 191-211    Link Here 
    (
    (
    " pxor         %%mm0 , %%mm0;          /* clear mm0                                          */\n"
    " pxor         %%mm0 , %%mm0;          /* clear mm0                                          */\n"
    " movl         %1    , %%eax;          /* load frameadress into eax                          */\n"
    " movl         %1    , %%eax;          /* load frameadress into eax                          */\n"
    " movl         %2    , %%ebx;          /* load frameadress into ebx                          */\n"
    " movl         %2    , %%esi;          /* load frameadress into esi                          */\n"
    " movl         %3    , %%ecx;          /* load width       into ecx                          */\n"
    " movl         %3    , %%ecx;          /* load width       into ecx                          */\n"
    "                           ;          /*                                                    */\n"
    "                           ;          /*                                                    */\n"
    " .rept 8                   ;          /*                                                    */\n"
    " .rept 8                   ;          /*                                                    */\n"
    " movq        (%%eax), %%mm1;          /* 8 Pixels from filtered frame to mm1                */\n"
    " movq        (%%eax), %%mm1;          /* 8 Pixels from filtered frame to mm1                */\n"
    " psadbw      (%%ebx), %%mm1;          /* 8 Pixels difference to mm1                         */\n"
    " psadbw      (%%esi), %%mm1;          /* 8 Pixels difference to mm1                         */\n"
    " paddusw      %%mm1 , %%mm0;          /* add result to mm0                                  */\n"
    " paddusw      %%mm1 , %%mm0;          /* add result to mm0                                  */\n"
    " addl         %%ecx , %%eax;          /* add framewidth to frameaddress                     */\n"
    " addl         %%ecx , %%eax;          /* add framewidth to frameaddress                     */\n"
    " addl         %%ecx , %%ebx;          /* add framewidth to frameaddress                     */\n"
    " addl         %%ecx , %%esi;          /* add framewidth to frameaddress                     */\n"
    " .endr                     ;          /*                                                    */\n"
    " .endr                     ;          /*                                                    */\n"
    "                                      /*                                                    */\n"
    "                                      /*                                                    */\n"
    " movq         %%mm0 , %0   ;          /* make mm0 available to gcc ...                      */\n"
    " movq         %%mm0 , %0   ;          /* make mm0 available to gcc ...                      */\n"
    :"=m" (a)     
    :"=m" (a)     
    :"m" (frm), "m" (ref), "m" (denoiser.frame.w)
    :"m" (frm), "m" (ref), "m" (denoiser.frame.w)
    :"%eax", "%ebx", "%ecx"
    :"%eax", "%esi", "%ecx"
    );
    );
#endif
#endif
  return a;
  return a;
 Lines 254-265    Link Here 
    " pxor        %%mm7 , %%mm7;           /* clear mm7                                          */\n"
    " pxor        %%mm7 , %%mm7;           /* clear mm7                                          */\n"
    "                                      /*                                                    */\n"
    "                                      /*                                                    */\n"
    " movl         %1    , %%eax;          /* load frameadress into eax                          */\n"
    " movl         %1    , %%eax;          /* load frameadress into eax                          */\n"
    " movl         %2    , %%ebx;          /* load frameadress into ebx                          */\n"
    " movl         %2    , %%esi;          /* load frameadress into esi                          */\n"
    " movl         %3    , %%ecx;          /* load width       into ecx                          */\n"
    " movl         %3    , %%ecx;          /* load width       into ecx                          */\n"
    "                                      /*                                                    */\n"
    "                                      /*                                                    */\n"
    ".rept 4                    ;          /* Loop for 4 lines                                   */\n"
    ".rept 4                    ;          /* Loop for 4 lines                                   */\n"
    " movd        (%%eax), %%mm1;          /* 4 Pixels from filtered frame to mm1                */\n"
    " movd        (%%eax), %%mm1;          /* 4 Pixels from filtered frame to mm1                */\n"
    " movd        (%%ebx), %%mm2;          /* 4 Pixels from reference frame to mm2               */\n"
    " movd        (%%esi), %%mm2;          /* 4 Pixels from reference frame to mm2               */\n"
    " movq         %%mm2 , %%mm3;          /* hold a copy of mm2 in mm3                          */\n"
    " movq         %%mm2 , %%mm3;          /* hold a copy of mm2 in mm3                          */\n"
    " psubusb      %%mm1 , %%mm3;          /* positive differences between mm2 and mm1           */\n"
    " psubusb      %%mm1 , %%mm3;          /* positive differences between mm2 and mm1           */\n"
    " psubusb      %%mm2 , %%mm1;          /* positive differences between mm1 and mm3           */\n"
    " psubusb      %%mm2 , %%mm1;          /* positive differences between mm1 and mm3           */\n"
 Lines 270-282    Link Here 
    " paddusw      %%mm1 , %%mm2;          /* add mm1 (stored in mm1 and mm2...)                 */\n"
    " paddusw      %%mm1 , %%mm2;          /* add mm1 (stored in mm1 and mm2...)                 */\n"
    " paddusw      %%mm2 , %%mm0;          /* to mm0                                             */\n"
    " paddusw      %%mm2 , %%mm0;          /* to mm0                                             */\n"
    " addl         %%ecx , %%eax;          /* add framewidth to frameaddress                     */\n"
    " addl         %%ecx , %%eax;          /* add framewidth to frameaddress                     */\n"
    " addl         %%ecx , %%ebx;          /* add framewidth to frameaddress                     */\n"
    " addl         %%ecx , %%esi;          /* add framewidth to frameaddress                     */\n"
    " .endr                                /* end loop                                           */\n"
    " .endr                                /* end loop                                           */\n"
    "                                      /*                                                    */\n"
    "                                      /*                                                    */\n"
    " movq         %%mm0 , %0   ;          /* make mm0 available to gcc ...                      */\n"
    " movq         %%mm0 , %0   ;          /* make mm0 available to gcc ...                      */\n"
    :"=m" (a)     
    :"=m" (a)     
    :"m" (frm), "m" (ref), "m" (halfwidth)
    :"m" (frm), "m" (ref), "m" (halfwidth)
    :"%eax", "%ebx", "%ecx"
    :"%eax", "%esi", "%ecx"
    );
    );
#endif
#endif
  return (uint32_t)(a[0]+a[1]+a[2]+a[3]);
  return (uint32_t)(a[0]+a[1]+a[2]+a[3]);
 Lines 300-312    Link Here 
    " pxor        %%mm7 , %%mm7;           /* clear mm7                                          */\n"
    " pxor        %%mm7 , %%mm7;           /* clear mm7                                          */\n"
    "                                      /*                                                    */\n"
    "                                      /*                                                    */\n"
    " movl         %1    , %%eax;          /* load frameadress into eax                          */\n"
    " movl         %1    , %%eax;          /* load frameadress into eax                          */\n"
    " movl         %2    , %%ebx;          /* load frameadress into ebx                          */\n"
    " movl         %2    , %%esi;          /* load frameadress into esi                          */\n"
    " movl         %3    , %%ecx;          /* load width       into ecx                          */\n"
    " movl         %3    , %%ecx;          /* load width       into ecx                          */\n"
    "                                      /*                                                    */\n"
    "                                      /*                                                    */\n"
    ".rept 8                    ;          /* Loop for 8 lines                                   */\n"
    ".rept 8                    ;          /* Loop for 8 lines                                   */\n"
    " movw        (%%eax),  %%dx;          /*                                                    */\n"
    " movw        (%%eax),  %%dx;          /*                                                    */\n"
    " movd         %%edx , %%mm1;          /* 2 Pixels from filtered frame to mm1                */\n"
    " movd         %%edx , %%mm1;          /* 2 Pixels from filtered frame to mm1                */\n"
    " movw        (%%ebx),  %%dx;          /*                                                    */\n"
    " movw        (%%esi),  %%dx;          /*                                                    */\n"
    " movd         %%edx , %%mm2;          /* 2 Pixels from reference frame to mm2               */\n"
    " movd         %%edx , %%mm2;          /* 2 Pixels from reference frame to mm2               */\n"
    " movq         %%mm2 , %%mm3;          /* hold a copy of mm2 in mm3                          */\n"
    " movq         %%mm2 , %%mm3;          /* hold a copy of mm2 in mm3                          */\n"
    " psubusb      %%mm1 , %%mm3;          /* positive differences between mm2 and mm1           */\n"
    " psubusb      %%mm1 , %%mm3;          /* positive differences between mm2 and mm1           */\n"
 Lines 318-330    Link Here 
    " paddusw      %%mm1 , %%mm2;          /* add mm1 (stored in mm1 and mm2...)                 */\n"
    " paddusw      %%mm1 , %%mm2;          /* add mm1 (stored in mm1 and mm2...)                 */\n"
    " paddusw      %%mm2 , %%mm0;          /* to mm0                                             */\n"
    " paddusw      %%mm2 , %%mm0;          /* to mm0                                             */\n"
    " addl         %%ecx , %%eax;          /* add framewidth to frameaddress                     */\n"
    " addl         %%ecx , %%eax;          /* add framewidth to frameaddress                     */\n"
    " addl         %%ecx , %%ebx;          /* add framewidth to frameaddress                     */\n"
    " addl         %%ecx , %%esi;          /* add framewidth to frameaddress                     */\n"
    " .endr                                /* end loop                                           */\n"
    " .endr                                /* end loop                                           */\n"
    "                                      /*                                                    */\n"
    "                                      /*                                                    */\n"
    " movq         %%mm0 , %0   ;          /* make mm0 available to gcc ...                      */\n"
    " movq         %%mm0 , %0   ;          /* make mm0 available to gcc ...                      */\n"
    :"=m" (a)     
    :"=m" (a)     
    :"m" (frm), "m" (ref), "m" (W2)
    :"m" (frm), "m" (ref), "m" (W2)
    :"%eax", "%ebx", "%ecx"
    :"%eax", "%esi", "%ecx"
    );
    );
#endif
#endif
  return (uint32_t)(a[0]+a[1]+a[2]+a[3]+a[4]+a[5]+a[6]+a[7]);
  return (uint32_t)(a[0]+a[1]+a[2]+a[3]+a[4]+a[5]+a[6]+a[7]);
 Lines 348-369    Link Here 
    (
    (
    " pxor         %%mm0 , %%mm0;          /* clear mm0                                          */\n"
    " pxor         %%mm0 , %%mm0;          /* clear mm0                                          */\n"
    " movl         %1    , %%eax;          /* load frameadress into eax                          */\n"
    " movl         %1    , %%eax;          /* load frameadress into eax                          */\n"
    " movl         %2    , %%ebx;          /* load frameadress into ebx                          */\n"
    " movl         %2    , %%esi;          /* load frameadress into esi                          */\n"
    " movl         %3    , %%ecx;          /* load width       into ecx                          */\n"
    " movl         %3    , %%ecx;          /* load width       into ecx                          */\n"
    "                           ;          /*                                                    */\n"
    "                           ;          /*                                                    */\n"
    " .rept 4                   ;          /*                                                    */\n"
    " .rept 4                   ;          /*                                                    */\n"
    " movd        (%%eax), %%mm1;          /* 4 Pixels from filtered frame to mm1                */\n"
    " movd        (%%eax), %%mm1;          /* 4 Pixels from filtered frame to mm1                */\n"
    " movd        (%%ebx), %%mm2;          /* 4 Pixels from filtered frame to mm2                */\n"
    " movd        (%%esi), %%mm2;          /* 4 Pixels from filtered frame to mm2                */\n"
    " psadbw       %%mm2 , %%mm1;          /* 4 Pixels difference to mm1                         */\n"
    " psadbw       %%mm2 , %%mm1;          /* 4 Pixels difference to mm1                         */\n"
    " paddusw      %%mm1 , %%mm0;          /* add result to mm0                                  */\n"
    " paddusw      %%mm1 , %%mm0;          /* add result to mm0                                  */\n"
    " addl         %%ecx , %%eax;          /* add framewidth to frameaddress                     */\n"
    " addl         %%ecx , %%eax;          /* add framewidth to frameaddress                     */\n"
    " addl         %%ecx , %%ebx;          /* add framewidth to frameaddress                     */\n"
    " addl         %%ecx , %%esi;          /* add framewidth to frameaddress                     */\n"
    " .endr                     ;          /*                                                    */\n"
    " .endr                     ;          /*                                                    */\n"
    "                                      /*                                                    */\n"
    "                                      /*                                                    */\n"
    " movq         %%mm0 , %0   ;          /* make mm0 available to gcc ...                      */\n"
    " movq         %%mm0 , %0   ;          /* make mm0 available to gcc ...                      */\n"
    :"=m" (a)     
    :"=m" (a)     
    :"m" (frm), "m" (ref), "m" (halfwidth)
    :"m" (frm), "m" (ref), "m" (halfwidth)
    :"%eax", "%ebx", "%ecx"
    :"%eax", "%esi", "%ecx"
    );
    );
#endif
#endif
  return a;
  return a;
 Lines 385-408    Link Here 
    (
    (
    " pxor         %%mm0 , %%mm0;          /* clear mm0                                          */\n"
    " pxor         %%mm0 , %%mm0;          /* clear mm0                                          */\n"
    " movl         %1    , %%eax;          /* load frameadress into eax                          */\n"
    " movl         %1    , %%eax;          /* load frameadress into eax                          */\n"
    " movl         %2    , %%ebx;          /* load frameadress into ebx                          */\n"
    " movl         %2    , %%esi;          /* load frameadress into esi                          */\n"
    " movl         %3    , %%ecx;          /* load width       into ecx                          */\n"
    " movl         %3    , %%ecx;          /* load width       into ecx                          */\n"
    "                           ;          /*                                                    */\n"
    "                           ;          /*                                                    */\n"
    " .rept 8                   ;          /*                                                    */\n"
    " .rept 8                   ;          /*                                                    */\n"
    " movw        (%%eax),  %%dx;          /*                                                    */\n"
    " movw        (%%eax),  %%dx;          /*                                                    */\n"
    " movd         %%edx , %%mm1;          /* 2 Pixels from filtered frame to mm1                */\n"
    " movd         %%edx , %%mm1;          /* 2 Pixels from filtered frame to mm1                */\n"
    " movw        (%%ebx),  %%dx;          /*                                                    */\n"
    " movw        (%%esi),  %%dx;          /*                                                    */\n"
    " movd         %%edx , %%mm2;          /* 2 Pixels from filtered frame to mm2                */\n"
    " movd         %%edx , %%mm2;          /* 2 Pixels from filtered frame to mm2                */\n"
    " psadbw       %%mm2 , %%mm1;          /* 2 Pixels difference to mm1                         */\n"
    " psadbw       %%mm2 , %%mm1;          /* 2 Pixels difference to mm1                         */\n"
    " paddusw      %%mm1 , %%mm0;          /* add result to mm0                                  */\n"
    " paddusw      %%mm1 , %%mm0;          /* add result to mm0                                  */\n"
    " addl         %%ecx , %%eax;          /* add framewidth to frameaddress                     */\n"
    " addl         %%ecx , %%eax;          /* add framewidth to frameaddress                     */\n"
    " addl         %%ecx , %%ebx;          /* add framewidth to frameaddress                     */\n"
    " addl         %%ecx , %%esi;          /* add framewidth to frameaddress                     */\n"
    " .endr                     ;          /*                                                    */\n"
    " .endr                     ;          /*                                                    */\n"
    "                                      /*                                                    */\n"
    "                                      /*                                                    */\n"
    " movq         %%mm0 , %0   ;          /* make mm0 available to gcc ...                      */\n"
    " movq         %%mm0 , %0   ;          /* make mm0 available to gcc ...                      */\n"
    :"=m" (a)     
    :"=m" (a)     
    :"m" (frm), "m" (ref), "m" (W2)
    :"m" (frm), "m" (ref), "m" (W2)
    :"%eax", "%ebx", "%ecx"
    :"%eax", "%esi", "%ecx"
    );
    );
#endif
#endif
  return a;
  return a;
 Lines 448-460    Link Here 
      (
      (
	  " pxor         %%mm0 , %%mm0;          /* clear mm0                                          */"
	  " pxor         %%mm0 , %%mm0;          /* clear mm0                                          */"
	  " movl         %1    , %%eax;          /* load frameadress into eax                          */"
	  " movl         %1    , %%eax;          /* load frameadress into eax                          */"
	  " movl         %2    , %%ebx;          /* load frameadress into ebx                          */"
	  " movl         %2    , %%esi;          /* load frameadress into esi                          */"
	  " movl         %3    , %%ecx;          /* load frameadress into ecx                          */"
	  " movl         %3    , %%ecx;          /* load frameadress into ecx                          */"
	  " movl         %4    , %%edx;          /* load width       into edx                          */"
	  " movl         %4    , %%edx;          /* load width       into edx                          */"
	  "                           ;          /*                                                    */"
	  "                           ;          /*                                                    */"
	  " .rept 8                   ;          /*                                                    */"
	  " .rept 8                   ;          /*                                                    */"
	  " movq        (%%eax), %%mm1;          /* 8 Pixels from filtered frame to mm1                */"
	  " movq        (%%eax), %%mm1;          /* 8 Pixels from filtered frame to mm1                */"
	  " movq        (%%ebx), %%mm2;          /* 8 Pixels from filtered frame to mm2 (displaced)    */"
	  " movq        (%%esi), %%mm2;          /* 8 Pixels from filtered frame to mm2 (displaced)    */"
	  " movq        (%%ecx), %%mm3;          /* reference to mm3                                   */"
	  " movq        (%%ecx), %%mm3;          /* reference to mm3                                   */"
	  " psrlq        $1    , %%mm1;          /* average source pixels                              */"
	  " psrlq        $1    , %%mm1;          /* average source pixels                              */"
	  " psrlq        $1    , %%mm2;          /* shift right by one (divide by two)                 */"
	  " psrlq        $1    , %%mm2;          /* shift right by one (divide by two)                 */"
 Lines 468-481    Link Here 
	  " paddusb      %%mm3 , %%mm1;          /* mm1 now contains abs(mm1-mm2) */"
	  " paddusb      %%mm3 , %%mm1;          /* mm1 now contains abs(mm1-mm2) */"
	  " paddusw      %%mm1 , %%mm0;          /* add result to mm0                                  */"
	  " paddusw      %%mm1 , %%mm0;          /* add result to mm0                                  */"
	  " addl         %%edx , %%eax;          /* add framewidth to frameaddress                     */"
	  " addl         %%edx , %%eax;          /* add framewidth to frameaddress                     */"
	  " addl         %%edx , %%ebx;          /* add framewidth to frameaddress                     */"
	  " addl         %%edx , %%esi;          /* add framewidth to frameaddress                     */"
	  " addl         %%edx , %%ecx;          /* add framewidth to frameaddress                     */"
	  " addl         %%edx , %%ecx;          /* add framewidth to frameaddress                     */"
	  " .endr                     ;          /*                                                    */"
	  " .endr                     ;          /*                                                    */"
	  "                                      /*                                                    */"
	  "                                      /*                                                    */"
	  " movq         %%mm0 , %0   ;          /* make mm0 available to gcc ...                      */"
	  " movq         %%mm0 , %0   ;          /* make mm0 available to gcc ...                      */"
	  :"=m" (a)     
	  :"=m" (a)     
	  :"m" (frm1),"m" (frm2), "m" (ref), "m" (denoiser.frame.w), "m" (*bit_mask)
	  :"m" (frm1),"m" (frm2), "m" (ref), "m" (denoiser.frame.w), "m" (*bit_mask)
	  :"%eax", "%ebx", "%ecx", "%edx"
	  :"%eax", "%esi", "%ecx", "%edx"
	  );
	  );
#endif
#endif
  return a;
  return a;
 Lines 497-522    Link Here 
      (
      (
	  " pxor         %%mm0 , %%mm0;          /* clear mm0                                          */\n"
	  " pxor         %%mm0 , %%mm0;          /* clear mm0                                          */\n"
	  " movl         %1    , %%eax;          /* load frameadress into eax                          */\n"
	  " movl         %1    , %%eax;          /* load frameadress into eax                          */\n"
	  " movl         %2    , %%ebx;          /* load frameadress into ebx                          */\n"
	  " movl         %2    , %%esi;          /* load frameadress into esi                          */\n"
	  " movl         %3    , %%ecx;          /* load frameadress into ecx                          */\n"
	  " movl         %3    , %%ecx;          /* load frameadress into ecx                          */\n"
	  " movl         %4    , %%edx;          /* load width       into edx                          */\n"
	  " movl         %4    , %%edx;          /* load width       into edx                          */\n"
	  "                           ;          /*                                                    */\n"
	  "                           ;          /*                                                    */\n"
	  " .rept 8                   ;          /*                                                    */\n"
	  " .rept 8                   ;          /*                                                    */\n"
	  " movq        (%%eax), %%mm1;          /* 8 Pixels from filtered frame to mm1                */\n"
	  " movq        (%%eax), %%mm1;          /* 8 Pixels from filtered frame to mm1                */\n"
	  " movq        (%%ebx), %%mm2;          /* 8 Pixels from filtered frame to mm2 (displaced)    */\n"
	  " movq        (%%esi), %%mm2;          /* 8 Pixels from filtered frame to mm2 (displaced)    */\n"
	  " movq        (%%ecx), %%mm3;          /* 8 Pixels from reference frame to mm3               */\n"
	  " movq        (%%ecx), %%mm3;          /* 8 Pixels from reference frame to mm3               */\n"
	  " pavgb        %%mm2 , %%mm1;          /* average source pixels                              */\n"
	  " pavgb        %%mm2 , %%mm1;          /* average source pixels                              */\n"
	  " psadbw       %%mm3 , %%mm1;          /* 8 Pixels difference to mm1                         */\n"
	  " psadbw       %%mm3 , %%mm1;          /* 8 Pixels difference to mm1                         */\n"
	  " paddusw      %%mm1 , %%mm0;          /* add result to mm0                                  */\n"
	  " paddusw      %%mm1 , %%mm0;          /* add result to mm0                                  */\n"
	  " addl         %%edx , %%eax;          /* add framewidth to frameaddress                     */\n"
	  " addl         %%edx , %%eax;          /* add framewidth to frameaddress                     */\n"
	  " addl         %%edx , %%ebx;          /* add framewidth to frameaddress                     */\n"
	  " addl         %%edx , %%esi;          /* add framewidth to frameaddress                     */\n"
	  " addl         %%edx , %%ecx;          /* add framewidth to frameaddress                     */\n"
	  " addl         %%edx , %%ecx;          /* add framewidth to frameaddress                     */\n"
	  " .endr                     ;          /*                                                    */\n"
	  " .endr                     ;          /*                                                    */\n"
	  "                                      /*                                                    */\n"
	  "                                      /*                                                    */\n"
	  " movq         %%mm0 , %0   ;          /* make mm0 available to gcc ...                      */\n"
	  " movq         %%mm0 , %0   ;          /* make mm0 available to gcc ...                      */\n"
	  :"=m" (a)     
	  :"=m" (a)     
	  :"m" (frm1),"m" (frm2), "m" (ref), "m" (denoiser.frame.w)
	  :"m" (frm1),"m" (frm2), "m" (ref), "m" (denoiser.frame.w)
	  :"%eax", "%ebx", "%ecx", "%edx"
	  :"%eax", "%esi", "%ecx", "%edx"
	  );
	  );
#endif
#endif
  return a;
  return a;
 Lines 18-23    Link Here 
;
;
;
;
;
;
; Kevin F. Quinn 06 Feb 2005 - add GOT for local data reference, in order
; to avoid TEXTREL caused by local data overflim.  Changes marked with
; 'KFQ start' and 'KFQ end'
; KFQ start
; Add reference to GOT for PIC-ability
extern _GLOBAL_OFFSET_TABLE_
; KFQ end
;  quantize_ni_mmx.s:  MMX optimized coefficient quantization sub-routine
;  quantize_ni_mmx.s:  MMX optimized coefficient quantization sub-routine
 Lines 75-80    Link Here 
	push esi     
	push esi     
	push edi
	push edi
; KFQ start - new code segment
; Retrieve the GOT, and get overflim location relative to it
        call .get_GOT
.get_GOT:
        pop esi
        add esi, _GLOBAL_OFFSET_TABLE_+$$-.get_GOT wrt ..gotpc                                           
	movq  mm6, [esi+overflim wrt ..gotoff]; overflow limit
; note; don't need GOT again, so reuse of esi from here on is ok.
; KFQ end
	mov edi, [ebp+8]    ; get dst
	mov edi, [ebp+8]    ; get dst
	mov esi, [ebp+12]	; get psrc
	mov esi, [ebp+12]	; get psrc
	mov ebx, [ebp+16]	; get pqm
	mov ebx, [ebp+16]	; get pqm
 Lines 84-90    Link Here 
	punpcklwd mm0, mm1  
	punpcklwd mm0, mm1  
	punpcklwd mm0, mm0    ; mm0 = [imquant|0..3]W
	punpcklwd mm0, mm0    ; mm0 = [imquant|0..3]W
	
	
	movq  mm6, [overflim]; overflow limit
; KFQ start
; moved up, to retrieve via GOT before all the registers are in use
;	movq  mm6, [overflim]; overflow limit
; KFQ end
	movd mm1, [ebp+32]  ; sat_limit
	movd mm1, [ebp+32]  ; sat_limit
	movq mm2, mm1
	movq mm2, mm1
 Lines 17-22    Link Here 
;  along with this program; if not, write to the Free Software
;  along with this program; if not, write to the Free Software
;  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
;  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
;
;
; Kevin F. Quinn 06 Feb 2005 - add GOT for local data reference, in order
; to avoid TEXTREL caused by local data "twos".  Changes marked with
; 'KFQ start' and 'KFQ end'
; KFQ start
; Add reference to GOT for PIC-ability
extern _GLOBAL_OFFSET_TABLE_
; KFQ end
; total squared difference between two (16*h) blocks
; total squared difference between two (16*h) blocks
; including optional half pel interpolation of [ebp+8] ; blk1 (hx,hy)
; including optional half pel interpolation of [ebp+8] ; blk1 (hx,hy)
 Lines 338-344    Link Here 
	paddw	  mm3, mm6
	paddw	  mm3, mm6
	paddw	  mm0, mm2
	paddw	  mm0, mm2
	paddw	  mm1, mm3
	paddw	  mm1, mm3
	movq      mm6, [twos]
; KFQ start
; Retrieve the GOT, and get twos location relative to it
        call .get_GOT1
.get_GOT1:
        pop       esi	; note; esi now overwritten so need to restore
        add       esi, _GLOBAL_OFFSET_TABLE_+$$-.get_GOT1 wrt ..gotpc
	movq      mm6, [esi+twos wrt ..gotoff]
	mov       esi, [ebp+16] ; restore lx; this is only ever read
;	movq      mm6, [twos]
; KFQ end
	paddw	  mm0, mm6    ; round mm0
	paddw	  mm0, mm6    ; round mm0
	paddw	  mm1, mm6    ; round mm1
	paddw	  mm1, mm6    ; round mm1
	psrlw	  mm0, 2
	psrlw	  mm0, 2
 Lines 383-389    Link Here 
	paddw	  mm1, mm3
	paddw	  mm1, mm3
	paddw	  mm2, mm4
	paddw	  mm2, mm4
	movq      mm6, [twos]
; KFQ start
; Retrieve the GOT, and get twos location relative to it
        call .get_GOT2
.get_GOT2:
        pop       esi	; note; esi now overwritten so need to restore
        add       esi, _GLOBAL_OFFSET_TABLE_+$$-.get_GOT2 wrt ..gotpc
	movq      mm6, [esi+twos wrt ..gotoff]
	mov       esi, [ebp+16] ; restore lx; this is only ever read
;	movq      mm6, [twos]
; KFQ end
	paddw     mm1, mm6
	paddw     mm1, mm6
	paddw	  mm2, mm6
	paddw	  mm2, mm6