diff -ur mjpegtools-1.6.2.orig/yuvdenoise/deinterlace.c mjpegtools-1.6.2/yuvdenoise/deinterlace.c --- mjpegtools-1.6.2.orig/yuvdenoise/deinterlace.c 2005-02-04 21:46:03.000000000 +0100 +++ mjpegtools-1.6.2/yuvdenoise/deinterlace.c 2005-02-04 21:46:50.327682376 +0100 @@ -161,7 +161,7 @@ " pxor %%mm7 , %%mm7; /* clear mm7 */\n" " /* */\n" " movl %1 , %%eax; /* load frameadress into eax */\n" - " movl %2 , %%ebx; /* load frameadress into ebx */\n" + " movl %2 , %%esi; /* load frameadress into esi */\n" " movl %3 , %%ecx; /* load frameadress into ecx */\n" " /* */\n" " .rept 3 /* repeat 3 times */\n" @@ -177,7 +177,7 @@ " paddusw %%mm1 , %%mm0; /* add mm1 (stored in mm1 and mm2...) */\n" " paddusw %%mm2 , %%mm0; /* to mm0 */\n" " /* */\n" - " movq (%%ebx), %%mm1; /* 8 Pixels from line */\n" + " movq (%%esi), %%mm1; /* 8 Pixels from line */\n" " movq (%%ecx), %%mm2; /* 8 Pixels from displaced line */\n" " movq %%mm2 , %%mm3; /* hold a copy of mm2 in mm3 */\n" " psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */\n" @@ -189,14 +189,14 @@ " paddusw %%mm1 , %%mm0; /* add mm1 (stored in mm1 and mm2...) */\n" " paddusw %%mm2 , %%mm0; /* to mm0 */\n" " addl $8 , %%eax; /* add 8 to frameaddress */\n" - " addl $8 , %%ebx; /* add 8 to frameaddress */\n" + " addl $8 , %%esi; /* add 8 to frameaddress */\n" " addl $8 , %%ecx; /* add 8 to frameaddress */\n" " .endr /* end loop */\n" " /* */\n" " movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" :"=m" (a) :"m" (ref1), "m" (ref2), "m" (ref3) - :"%eax", "%ebx", "%ecx" + :"%eax", "%esi", "%ecx" ); d=a[0]+a[1]+a[2]+a[3]; diff -ur mjpegtools-1.6.2.orig/yuvdenoise/motion.c mjpegtools-1.6.2/yuvdenoise/motion.c --- mjpegtools-1.6.2.orig/yuvdenoise/motion.c 2005-02-04 21:46:03.000000000 +0100 +++ mjpegtools-1.6.2/yuvdenoise/motion.c 2005-02-04 21:47:37.045580176 +0100 @@ -146,12 +146,12 @@ " pxor %%mm7 , %%mm7; /* clear mm7 */\n" " /* */\n" " movl %1 , %%eax; /* load frameadress into eax */\n" - " movl %2 , %%ebx; /* load frameadress into ebx */\n" + " movl %2 , %%esi; /* load frameadress into esi */\n" " movl %3 , %%ecx; /* load width into ecx */\n" " /* */\n" ".rept 8 ; /* Loop for 8 lines */\n" " movq (%%eax), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" - " movq (%%ebx), %%mm2; /* 8 Pixels from reference frame to mm2 */\n" + " movq (%%esi), %%mm2; /* 8 Pixels from reference frame to mm2 */\n" " movq %%mm2 , %%mm3; /* hold a copy of mm2 in mm3 */\n" " psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */\n" " psubusb %%mm2 , %%mm1; /* positive differences between mm1 and mm3 */\n" @@ -162,13 +162,13 @@ " paddusw %%mm1 , %%mm0; /* add mm1 (stored in mm1 and mm2...) */\n" " paddusw %%mm2 , %%mm0; /* to mm0 */\n" " addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" - " addl %%ecx , %%ebx; /* add framewidth to frameaddress */\n" + " addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" " .endr /* end loop */\n" " /* */\n" " movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" :"=m" (a) :"m" (frm), "m" (ref), "m" (denoiser.frame.w) - :"%eax", "%ebx", "%ecx" + :"%eax", "%esi", "%ecx" ); #endif @@ -191,21 +191,21 @@ ( " pxor %%mm0 , %%mm0; /* clear mm0 */\n" " movl %1 , %%eax; /* load frameadress into eax */\n" - " movl %2 , %%ebx; /* load frameadress into ebx */\n" + " movl %2 , %%esi; /* load frameadress into esi */\n" " movl %3 , %%ecx; /* load width into ecx */\n" " ; /* */\n" " .rept 8 ; /* */\n" " movq (%%eax), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" - " psadbw (%%ebx), %%mm1; /* 8 Pixels difference to mm1 */\n" + " psadbw (%%esi), %%mm1; /* 8 Pixels difference to mm1 */\n" " paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" " addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" - " addl %%ecx , %%ebx; /* add framewidth to frameaddress */\n" + " addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" " .endr ; /* */\n" " /* */\n" " movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" :"=m" (a) :"m" (frm), "m" (ref), "m" (denoiser.frame.w) - :"%eax", "%ebx", "%ecx" + :"%eax", "%esi", "%ecx" ); #endif return a; @@ -254,12 +254,12 @@ " pxor %%mm7 , %%mm7; /* clear mm7 */\n" " /* */\n" " movl %1 , %%eax; /* load frameadress into eax */\n" - " movl %2 , %%ebx; /* load frameadress into ebx */\n" + " movl %2 , %%esi; /* load frameadress into esi */\n" " movl %3 , %%ecx; /* load width into ecx */\n" " /* */\n" ".rept 4 ; /* Loop for 4 lines */\n" " movd (%%eax), %%mm1; /* 4 Pixels from filtered frame to mm1 */\n" - " movd (%%ebx), %%mm2; /* 4 Pixels from reference frame to mm2 */\n" + " movd (%%esi), %%mm2; /* 4 Pixels from reference frame to mm2 */\n" " movq %%mm2 , %%mm3; /* hold a copy of mm2 in mm3 */\n" " psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */\n" " psubusb %%mm2 , %%mm1; /* positive differences between mm1 and mm3 */\n" @@ -270,13 +270,13 @@ " paddusw %%mm1 , %%mm2; /* add mm1 (stored in mm1 and mm2...) */\n" " paddusw %%mm2 , %%mm0; /* to mm0 */\n" " addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" - " addl %%ecx , %%ebx; /* add framewidth to frameaddress */\n" + " addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" " .endr /* end loop */\n" " /* */\n" " movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" :"=m" (a) :"m" (frm), "m" (ref), "m" (halfwidth) - :"%eax", "%ebx", "%ecx" + :"%eax", "%esi", "%ecx" ); #endif return (uint32_t)(a[0]+a[1]+a[2]+a[3]); @@ -300,13 +300,13 @@ " pxor %%mm7 , %%mm7; /* clear mm7 */\n" " /* */\n" " movl %1 , %%eax; /* load frameadress into eax */\n" - " movl %2 , %%ebx; /* load frameadress into ebx */\n" + " movl %2 , %%esi; /* load frameadress into esi */\n" " movl %3 , %%ecx; /* load width into ecx */\n" " /* */\n" ".rept 8 ; /* Loop for 8 lines */\n" " movw (%%eax), %%dx; /* */\n" " movd %%edx , %%mm1; /* 2 Pixels from filtered frame to mm1 */\n" - " movw (%%ebx), %%dx; /* */\n" + " movw (%%esi), %%dx; /* */\n" " movd %%edx , %%mm2; /* 2 Pixels from reference frame to mm2 */\n" " movq %%mm2 , %%mm3; /* hold a copy of mm2 in mm3 */\n" " psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */\n" @@ -318,13 +318,13 @@ " paddusw %%mm1 , %%mm2; /* add mm1 (stored in mm1 and mm2...) */\n" " paddusw %%mm2 , %%mm0; /* to mm0 */\n" " addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" - " addl %%ecx , %%ebx; /* add framewidth to frameaddress */\n" + " addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" " .endr /* end loop */\n" " /* */\n" " movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" :"=m" (a) :"m" (frm), "m" (ref), "m" (W2) - :"%eax", "%ebx", "%ecx" + :"%eax", "%esi", "%ecx" ); #endif return (uint32_t)(a[0]+a[1]+a[2]+a[3]+a[4]+a[5]+a[6]+a[7]); @@ -348,22 +348,22 @@ ( " pxor %%mm0 , %%mm0; /* clear mm0 */\n" " movl %1 , %%eax; /* load frameadress into eax */\n" - " movl %2 , %%ebx; /* load frameadress into ebx */\n" + " movl %2 , %%esi; /* load frameadress into esi */\n" " movl %3 , %%ecx; /* load width into ecx */\n" " ; /* */\n" " .rept 4 ; /* */\n" " movd (%%eax), %%mm1; /* 4 Pixels from filtered frame to mm1 */\n" - " movd (%%ebx), %%mm2; /* 4 Pixels from filtered frame to mm2 */\n" + " movd (%%esi), %%mm2; /* 4 Pixels from filtered frame to mm2 */\n" " psadbw %%mm2 , %%mm1; /* 4 Pixels difference to mm1 */\n" " paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" " addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" - " addl %%ecx , %%ebx; /* add framewidth to frameaddress */\n" + " addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" " .endr ; /* */\n" " /* */\n" " movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" :"=m" (a) :"m" (frm), "m" (ref), "m" (halfwidth) - :"%eax", "%ebx", "%ecx" + :"%eax", "%esi", "%ecx" ); #endif return a; @@ -385,24 +385,24 @@ ( " pxor %%mm0 , %%mm0; /* clear mm0 */\n" " movl %1 , %%eax; /* load frameadress into eax */\n" - " movl %2 , %%ebx; /* load frameadress into ebx */\n" + " movl %2 , %%esi; /* load frameadress into esi */\n" " movl %3 , %%ecx; /* load width into ecx */\n" " ; /* */\n" " .rept 8 ; /* */\n" " movw (%%eax), %%dx; /* */\n" " movd %%edx , %%mm1; /* 2 Pixels from filtered frame to mm1 */\n" - " movw (%%ebx), %%dx; /* */\n" + " movw (%%esi), %%dx; /* */\n" " movd %%edx , %%mm2; /* 2 Pixels from filtered frame to mm2 */\n" " psadbw %%mm2 , %%mm1; /* 2 Pixels difference to mm1 */\n" " paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" " addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" - " addl %%ecx , %%ebx; /* add framewidth to frameaddress */\n" + " addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" " .endr ; /* */\n" " /* */\n" " movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" :"=m" (a) :"m" (frm), "m" (ref), "m" (W2) - :"%eax", "%ebx", "%ecx" + :"%eax", "%esi", "%ecx" ); #endif return a; @@ -448,13 +448,13 @@ ( " pxor %%mm0 , %%mm0; /* clear mm0 */" " movl %1 , %%eax; /* load frameadress into eax */" - " movl %2 , %%ebx; /* load frameadress into ebx */" + " movl %2 , %%esi; /* load frameadress into esi */" " movl %3 , %%ecx; /* load frameadress into ecx */" " movl %4 , %%edx; /* load width into edx */" " ; /* */" " .rept 8 ; /* */" " movq (%%eax), %%mm1; /* 8 Pixels from filtered frame to mm1 */" - " movq (%%ebx), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */" + " movq (%%esi), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */" " movq (%%ecx), %%mm3; /* reference to mm3 */" " psrlq $1 , %%mm1; /* average source pixels */" " psrlq $1 , %%mm2; /* shift right by one (divide by two) */" @@ -468,14 +468,14 @@ " paddusb %%mm3 , %%mm1; /* mm1 now contains abs(mm1-mm2) */" " paddusw %%mm1 , %%mm0; /* add result to mm0 */" " addl %%edx , %%eax; /* add framewidth to frameaddress */" - " addl %%edx , %%ebx; /* add framewidth to frameaddress */" + " addl %%edx , %%esi; /* add framewidth to frameaddress */" " addl %%edx , %%ecx; /* add framewidth to frameaddress */" " .endr ; /* */" " /* */" " movq %%mm0 , %0 ; /* make mm0 available to gcc ... */" :"=m" (a) :"m" (frm1),"m" (frm2), "m" (ref), "m" (denoiser.frame.w), "m" (*bit_mask) - :"%eax", "%ebx", "%ecx", "%edx" + :"%eax", "%esi", "%ecx", "%edx" ); #endif return a; @@ -497,26 +497,26 @@ ( " pxor %%mm0 , %%mm0; /* clear mm0 */\n" " movl %1 , %%eax; /* load frameadress into eax */\n" - " movl %2 , %%ebx; /* load frameadress into ebx */\n" + " movl %2 , %%esi; /* load frameadress into esi */\n" " movl %3 , %%ecx; /* load frameadress into ecx */\n" " movl %4 , %%edx; /* load width into edx */\n" " ; /* */\n" " .rept 8 ; /* */\n" " movq (%%eax), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" - " movq (%%ebx), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */\n" + " movq (%%esi), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */\n" " movq (%%ecx), %%mm3; /* 8 Pixels from reference frame to mm3 */\n" " pavgb %%mm2 , %%mm1; /* average source pixels */\n" " psadbw %%mm3 , %%mm1; /* 8 Pixels difference to mm1 */\n" " paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" " addl %%edx , %%eax; /* add framewidth to frameaddress */\n" - " addl %%edx , %%ebx; /* add framewidth to frameaddress */\n" + " addl %%edx , %%esi; /* add framewidth to frameaddress */\n" " addl %%edx , %%ecx; /* add framewidth to frameaddress */\n" " .endr ; /* */\n" " /* */\n" " movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" :"=m" (a) :"m" (frm1),"m" (frm2), "m" (ref), "m" (denoiser.frame.w) - :"%eax", "%ebx", "%ecx", "%edx" + :"%eax", "%esi", "%ecx", "%edx" ); #endif return a; --- mjpegtools-1.6.2.orig/mpeg2enc/quant_mmx.s 2005-02-04 22:58:53.000000000 +0100 +++ mjpegtools-1.6.2/mpeg2enc/quant_mmx.s 2005-02-05 17:40:20.729373952 +0100 @@ -18,6 +18,16 @@ ; ; ; +; Kevin F. Quinn 06 Feb 2005 - add GOT for local data reference, in order +; to avoid TEXTREL caused by local data overflim. Changes marked with +; 'KFQ start' and 'KFQ end' + + +; KFQ start +; Add reference to GOT for PIC-ability +extern _GLOBAL_OFFSET_TABLE_ +; KFQ end + ; quantize_ni_mmx.s: MMX optimized coefficient quantization sub-routine @@ -75,6 +85,16 @@ push esi push edi +; KFQ start - new code segment +; Retrieve the GOT, and get overflim location relative to it + call .get_GOT +.get_GOT: + pop esi + add esi, _GLOBAL_OFFSET_TABLE_+$$-.get_GOT wrt ..gotpc + movq mm6, [esi+overflim wrt ..gotoff]; overflow limit +; note; don't need GOT again, so reuse of esi from here on is ok. +; KFQ end + mov edi, [ebp+8] ; get dst mov esi, [ebp+12] ; get psrc mov ebx, [ebp+16] ; get pqm @@ -84,7 +104,10 @@ punpcklwd mm0, mm1 punpcklwd mm0, mm0 ; mm0 = [imquant|0..3]W - movq mm6, [overflim]; overflow limit +; KFQ start +; moved up, to retrieve via GOT before all the registers are in use +; movq mm6, [overflim]; overflow limit +; KFQ end movd mm1, [ebp+32] ; sat_limit movq mm2, mm1 --- mjpegtools-1.6.2.orig/utils/mmxsse/mblock_sumsq_mmx.s 2005-02-05 18:02:28.000000000 +0100 +++ mjpegtools-1.6.2/utils/mmxsse/mblock_sumsq_mmx.s 2005-02-05 19:00:50.758097976 +0100 @@ -17,6 +17,15 @@ ; along with this program; if not, write to the Free Software ; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ; +; Kevin F. Quinn 06 Feb 2005 - add GOT for local data reference, in order +; to avoid TEXTREL caused by local data "twos". Changes marked with +; 'KFQ start' and 'KFQ end' + +; KFQ start +; Add reference to GOT for PIC-ability +extern _GLOBAL_OFFSET_TABLE_ +; KFQ end + ; total squared difference between two (16*h) blocks ; including optional half pel interpolation of [ebp+8] ; blk1 (hx,hy) @@ -338,7 +347,17 @@ paddw mm3, mm6 paddw mm0, mm2 paddw mm1, mm3 - movq mm6, [twos] +; KFQ start +; Retrieve the GOT, and get twos location relative to it + call .get_GOT1 +.get_GOT1: + pop esi ; note; esi now overwritten so need to restore + add esi, _GLOBAL_OFFSET_TABLE_+$$-.get_GOT1 wrt ..gotpc + movq mm6, [esi+twos wrt ..gotoff] + mov esi, [ebp+16] ; restore lx; this is only ever read +; movq mm6, [twos] +; KFQ end + paddw mm0, mm6 ; round mm0 paddw mm1, mm6 ; round mm1 psrlw mm0, 2 @@ -383,7 +402,16 @@ paddw mm1, mm3 paddw mm2, mm4 - movq mm6, [twos] +; KFQ start +; Retrieve the GOT, and get twos location relative to it + call .get_GOT2 +.get_GOT2: + pop esi ; note; esi now overwritten so need to restore + add esi, _GLOBAL_OFFSET_TABLE_+$$-.get_GOT2 wrt ..gotpc + movq mm6, [esi+twos wrt ..gotoff] + mov esi, [ebp+16] ; restore lx; this is only ever read +; movq mm6, [twos] +; KFQ end paddw mm1, mm6 paddw mm2, mm6