|
|
" pxor %%mm7 , %%mm7; /* clear mm7 */\n" | " pxor %%mm7 , %%mm7; /* clear mm7 */\n" |
" /* */\n" | " /* */\n" |
" movl %1 , %%eax; /* load frameadress into eax */\n" | " movl %1 , %%eax; /* load frameadress into eax */\n" |
" movl %2 , %%ebx; /* load frameadress into ebx */\n" |
" movl %2 , %%esi; /* load frameadress into esi */\n" |
" movl %3 , %%ecx; /* load width into ecx */\n" | " movl %3 , %%ecx; /* load width into ecx */\n" |
" /* */\n" | " /* */\n" |
".rept 8 ; /* Loop for 8 lines */\n" | ".rept 8 ; /* Loop for 8 lines */\n" |
" movq (%%eax), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" | " movq (%%eax), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
" movq (%%ebx), %%mm2; /* 8 Pixels from reference frame to mm2 */\n" |
" movq (%%esi), %%mm2; /* 8 Pixels from reference frame to mm2 */\n" |
" movq %%mm2 , %%mm3; /* hold a copy of mm2 in mm3 */\n" | " movq %%mm2 , %%mm3; /* hold a copy of mm2 in mm3 */\n" |
" psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */\n" | " psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */\n" |
" psubusb %%mm2 , %%mm1; /* positive differences between mm1 and mm3 */\n" | " psubusb %%mm2 , %%mm1; /* positive differences between mm1 and mm3 */\n" |
|
|
" paddusw %%mm1 , %%mm0; /* add mm1 (stored in mm1 and mm2...) */\n" | " paddusw %%mm1 , %%mm0; /* add mm1 (stored in mm1 and mm2...) */\n" |
" paddusw %%mm2 , %%mm0; /* to mm0 */\n" | " paddusw %%mm2 , %%mm0; /* to mm0 */\n" |
" addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" | " addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
" addl %%ecx , %%ebx; /* add framewidth to frameaddress */\n" |
" addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
" .endr /* end loop */\n" | " .endr /* end loop */\n" |
" /* */\n" | " /* */\n" |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" | " movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
:"=m" (a) | :"=m" (a) |
:"m" (frm), "m" (ref), "m" (denoiser.frame.w) | :"m" (frm), "m" (ref), "m" (denoiser.frame.w) |
:"%eax", "%ebx", "%ecx" |
:"%eax", "%esi", "%ecx" |
); | ); |
#endif | #endif |
| |
|
|
( | ( |
" pxor %%mm0 , %%mm0; /* clear mm0 */\n" | " pxor %%mm0 , %%mm0; /* clear mm0 */\n" |
" movl %1 , %%eax; /* load frameadress into eax */\n" | " movl %1 , %%eax; /* load frameadress into eax */\n" |
" movl %2 , %%ebx; /* load frameadress into ebx */\n" |
" movl %2 , %%esi; /* load frameadress into esi */\n" |
" movl %3 , %%ecx; /* load width into ecx */\n" | " movl %3 , %%ecx; /* load width into ecx */\n" |
" ; /* */\n" | " ; /* */\n" |
" .rept 8 ; /* */\n" | " .rept 8 ; /* */\n" |
" movq (%%eax), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" | " movq (%%eax), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
" psadbw (%%ebx), %%mm1; /* 8 Pixels difference to mm1 */\n" |
" psadbw (%%esi), %%mm1; /* 8 Pixels difference to mm1 */\n" |
" paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" | " paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" |
" addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" | " addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
" addl %%ecx , %%ebx; /* add framewidth to frameaddress */\n" |
" addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
" .endr ; /* */\n" | " .endr ; /* */\n" |
" /* */\n" | " /* */\n" |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" | " movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
:"=m" (a) | :"=m" (a) |
:"m" (frm), "m" (ref), "m" (denoiser.frame.w) | :"m" (frm), "m" (ref), "m" (denoiser.frame.w) |
:"%eax", "%ebx", "%ecx" |
:"%eax", "%esi", "%ecx" |
); | ); |
#endif | #endif |
return a; | return a; |
|
|
" pxor %%mm7 , %%mm7; /* clear mm7 */\n" | " pxor %%mm7 , %%mm7; /* clear mm7 */\n" |
" /* */\n" | " /* */\n" |
" movl %1 , %%eax; /* load frameadress into eax */\n" | " movl %1 , %%eax; /* load frameadress into eax */\n" |
" movl %2 , %%ebx; /* load frameadress into ebx */\n" |
" movl %2 , %%esi; /* load frameadress into esi */\n" |
" movl %3 , %%ecx; /* load width into ecx */\n" | " movl %3 , %%ecx; /* load width into ecx */\n" |
" /* */\n" | " /* */\n" |
".rept 4 ; /* Loop for 4 lines */\n" | ".rept 4 ; /* Loop for 4 lines */\n" |
" movd (%%eax), %%mm1; /* 4 Pixels from filtered frame to mm1 */\n" | " movd (%%eax), %%mm1; /* 4 Pixels from filtered frame to mm1 */\n" |
" movd (%%ebx), %%mm2; /* 4 Pixels from reference frame to mm2 */\n" |
" movd (%%esi), %%mm2; /* 4 Pixels from reference frame to mm2 */\n" |
" movq %%mm2 , %%mm3; /* hold a copy of mm2 in mm3 */\n" | " movq %%mm2 , %%mm3; /* hold a copy of mm2 in mm3 */\n" |
" psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */\n" | " psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */\n" |
" psubusb %%mm2 , %%mm1; /* positive differences between mm1 and mm3 */\n" | " psubusb %%mm2 , %%mm1; /* positive differences between mm1 and mm3 */\n" |
|
|
" paddusw %%mm1 , %%mm2; /* add mm1 (stored in mm1 and mm2...) */\n" | " paddusw %%mm1 , %%mm2; /* add mm1 (stored in mm1 and mm2...) */\n" |
" paddusw %%mm2 , %%mm0; /* to mm0 */\n" | " paddusw %%mm2 , %%mm0; /* to mm0 */\n" |
" addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" | " addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
" addl %%ecx , %%ebx; /* add framewidth to frameaddress */\n" |
" addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
" .endr /* end loop */\n" | " .endr /* end loop */\n" |
" /* */\n" | " /* */\n" |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" | " movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
:"=m" (a) | :"=m" (a) |
:"m" (frm), "m" (ref), "m" (halfwidth) | :"m" (frm), "m" (ref), "m" (halfwidth) |
:"%eax", "%ebx", "%ecx" |
:"%eax", "%esi", "%ecx" |
); | ); |
#endif | #endif |
return (uint32_t)(a[0]+a[1]+a[2]+a[3]); | return (uint32_t)(a[0]+a[1]+a[2]+a[3]); |
|
|
" pxor %%mm7 , %%mm7; /* clear mm7 */\n" | " pxor %%mm7 , %%mm7; /* clear mm7 */\n" |
" /* */\n" | " /* */\n" |
" movl %1 , %%eax; /* load frameadress into eax */\n" | " movl %1 , %%eax; /* load frameadress into eax */\n" |
" movl %2 , %%ebx; /* load frameadress into ebx */\n" |
" movl %2 , %%esi; /* load frameadress into esi */\n" |
" movl %3 , %%ecx; /* load width into ecx */\n" | " movl %3 , %%ecx; /* load width into ecx */\n" |
" /* */\n" | " /* */\n" |
".rept 8 ; /* Loop for 8 lines */\n" | ".rept 8 ; /* Loop for 8 lines */\n" |
" movw (%%eax), %%dx; /* */\n" | " movw (%%eax), %%dx; /* */\n" |
" movd %%edx , %%mm1; /* 2 Pixels from filtered frame to mm1 */\n" | " movd %%edx , %%mm1; /* 2 Pixels from filtered frame to mm1 */\n" |
" movw (%%ebx), %%dx; /* */\n" |
" movw (%%esi), %%dx; /* */\n" |
" movd %%edx , %%mm2; /* 2 Pixels from reference frame to mm2 */\n" | " movd %%edx , %%mm2; /* 2 Pixels from reference frame to mm2 */\n" |
" movq %%mm2 , %%mm3; /* hold a copy of mm2 in mm3 */\n" | " movq %%mm2 , %%mm3; /* hold a copy of mm2 in mm3 */\n" |
" psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */\n" | " psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */\n" |
|
|
" paddusw %%mm1 , %%mm2; /* add mm1 (stored in mm1 and mm2...) */\n" | " paddusw %%mm1 , %%mm2; /* add mm1 (stored in mm1 and mm2...) */\n" |
" paddusw %%mm2 , %%mm0; /* to mm0 */\n" | " paddusw %%mm2 , %%mm0; /* to mm0 */\n" |
" addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" | " addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
" addl %%ecx , %%ebx; /* add framewidth to frameaddress */\n" |
" addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
" .endr /* end loop */\n" | " .endr /* end loop */\n" |
" /* */\n" | " /* */\n" |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" | " movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
:"=m" (a) | :"=m" (a) |
:"m" (frm), "m" (ref), "m" (W2) | :"m" (frm), "m" (ref), "m" (W2) |
:"%eax", "%ebx", "%ecx" |
:"%eax", "%esi", "%ecx" |
); | ); |
#endif | #endif |
return (uint32_t)(a[0]+a[1]+a[2]+a[3]+a[4]+a[5]+a[6]+a[7]); | return (uint32_t)(a[0]+a[1]+a[2]+a[3]+a[4]+a[5]+a[6]+a[7]); |
|
|
( | ( |
" pxor %%mm0 , %%mm0; /* clear mm0 */\n" | " pxor %%mm0 , %%mm0; /* clear mm0 */\n" |
" movl %1 , %%eax; /* load frameadress into eax */\n" | " movl %1 , %%eax; /* load frameadress into eax */\n" |
" movl %2 , %%ebx; /* load frameadress into ebx */\n" |
" movl %2 , %%esi; /* load frameadress into esi */\n" |
" movl %3 , %%ecx; /* load width into ecx */\n" | " movl %3 , %%ecx; /* load width into ecx */\n" |
" ; /* */\n" | " ; /* */\n" |
" .rept 4 ; /* */\n" | " .rept 4 ; /* */\n" |
" movd (%%eax), %%mm1; /* 4 Pixels from filtered frame to mm1 */\n" | " movd (%%eax), %%mm1; /* 4 Pixels from filtered frame to mm1 */\n" |
" movd (%%ebx), %%mm2; /* 4 Pixels from filtered frame to mm2 */\n" |
" movd (%%esi), %%mm2; /* 4 Pixels from filtered frame to mm2 */\n" |
" psadbw %%mm2 , %%mm1; /* 4 Pixels difference to mm1 */\n" | " psadbw %%mm2 , %%mm1; /* 4 Pixels difference to mm1 */\n" |
" paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" | " paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" |
" addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" | " addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
" addl %%ecx , %%ebx; /* add framewidth to frameaddress */\n" |
" addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
" .endr ; /* */\n" | " .endr ; /* */\n" |
" /* */\n" | " /* */\n" |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" | " movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
:"=m" (a) | :"=m" (a) |
:"m" (frm), "m" (ref), "m" (halfwidth) | :"m" (frm), "m" (ref), "m" (halfwidth) |
:"%eax", "%ebx", "%ecx" |
:"%eax", "%esi", "%ecx" |
); | ); |
#endif | #endif |
return a; | return a; |
|
|
( | ( |
" pxor %%mm0 , %%mm0; /* clear mm0 */\n" | " pxor %%mm0 , %%mm0; /* clear mm0 */\n" |
" movl %1 , %%eax; /* load frameadress into eax */\n" | " movl %1 , %%eax; /* load frameadress into eax */\n" |
" movl %2 , %%ebx; /* load frameadress into ebx */\n" |
" movl %2 , %%esi; /* load frameadress into esi */\n" |
" movl %3 , %%ecx; /* load width into ecx */\n" | " movl %3 , %%ecx; /* load width into ecx */\n" |
" ; /* */\n" | " ; /* */\n" |
" .rept 8 ; /* */\n" | " .rept 8 ; /* */\n" |
" movw (%%eax), %%dx; /* */\n" | " movw (%%eax), %%dx; /* */\n" |
" movd %%edx , %%mm1; /* 2 Pixels from filtered frame to mm1 */\n" | " movd %%edx , %%mm1; /* 2 Pixels from filtered frame to mm1 */\n" |
" movw (%%ebx), %%dx; /* */\n" |
" movw (%%esi), %%dx; /* */\n" |
" movd %%edx , %%mm2; /* 2 Pixels from filtered frame to mm2 */\n" | " movd %%edx , %%mm2; /* 2 Pixels from filtered frame to mm2 */\n" |
" psadbw %%mm2 , %%mm1; /* 2 Pixels difference to mm1 */\n" | " psadbw %%mm2 , %%mm1; /* 2 Pixels difference to mm1 */\n" |
" paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" | " paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" |
" addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" | " addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
" addl %%ecx , %%ebx; /* add framewidth to frameaddress */\n" |
" addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
" .endr ; /* */\n" | " .endr ; /* */\n" |
" /* */\n" | " /* */\n" |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" | " movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
:"=m" (a) | :"=m" (a) |
:"m" (frm), "m" (ref), "m" (W2) | :"m" (frm), "m" (ref), "m" (W2) |
:"%eax", "%ebx", "%ecx" |
:"%eax", "%esi", "%ecx" |
); | ); |
#endif | #endif |
return a; | return a; |
|
|
( | ( |
" pxor %%mm0 , %%mm0; /* clear mm0 */" | " pxor %%mm0 , %%mm0; /* clear mm0 */" |
" movl %1 , %%eax; /* load frameadress into eax */" | " movl %1 , %%eax; /* load frameadress into eax */" |
" movl %2 , %%ebx; /* load frameadress into ebx */" |
" movl %2 , %%esi; /* load frameadress into esi */" |
" movl %3 , %%ecx; /* load frameadress into ecx */" | " movl %3 , %%ecx; /* load frameadress into ecx */" |
" movl %4 , %%edx; /* load width into edx */" | " movl %4 , %%edx; /* load width into edx */" |
" ; /* */" | " ; /* */" |
" .rept 8 ; /* */" | " .rept 8 ; /* */" |
" movq (%%eax), %%mm1; /* 8 Pixels from filtered frame to mm1 */" | " movq (%%eax), %%mm1; /* 8 Pixels from filtered frame to mm1 */" |
" movq (%%ebx), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */" |
" movq (%%esi), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */" |
" movq (%%ecx), %%mm3; /* reference to mm3 */" | " movq (%%ecx), %%mm3; /* reference to mm3 */" |
" psrlq $1 , %%mm1; /* average source pixels */" | " psrlq $1 , %%mm1; /* average source pixels */" |
" psrlq $1 , %%mm2; /* shift right by one (divide by two) */" | " psrlq $1 , %%mm2; /* shift right by one (divide by two) */" |
|
|
" paddusb %%mm3 , %%mm1; /* mm1 now contains abs(mm1-mm2) */" | " paddusb %%mm3 , %%mm1; /* mm1 now contains abs(mm1-mm2) */" |
" paddusw %%mm1 , %%mm0; /* add result to mm0 */" | " paddusw %%mm1 , %%mm0; /* add result to mm0 */" |
" addl %%edx , %%eax; /* add framewidth to frameaddress */" | " addl %%edx , %%eax; /* add framewidth to frameaddress */" |
" addl %%edx , %%ebx; /* add framewidth to frameaddress */" |
" addl %%edx , %%esi; /* add framewidth to frameaddress */" |
" addl %%edx , %%ecx; /* add framewidth to frameaddress */" | " addl %%edx , %%ecx; /* add framewidth to frameaddress */" |
" .endr ; /* */" | " .endr ; /* */" |
" /* */" | " /* */" |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */" | " movq %%mm0 , %0 ; /* make mm0 available to gcc ... */" |
:"=m" (a) | :"=m" (a) |
:"m" (frm1),"m" (frm2), "m" (ref), "m" (denoiser.frame.w), "m" (*bit_mask) | :"m" (frm1),"m" (frm2), "m" (ref), "m" (denoiser.frame.w), "m" (*bit_mask) |
:"%eax", "%ebx", "%ecx", "%edx" |
:"%eax", "%esi", "%ecx", "%edx" |
); | ); |
#endif | #endif |
return a; | return a; |
|
|
( | ( |
" pxor %%mm0 , %%mm0; /* clear mm0 */\n" | " pxor %%mm0 , %%mm0; /* clear mm0 */\n" |
" movl %1 , %%eax; /* load frameadress into eax */\n" | " movl %1 , %%eax; /* load frameadress into eax */\n" |
" movl %2 , %%ebx; /* load frameadress into ebx */\n" |
" movl %2 , %%esi; /* load frameadress into esi */\n" |
" movl %3 , %%ecx; /* load frameadress into ecx */\n" | " movl %3 , %%ecx; /* load frameadress into ecx */\n" |
" movl %4 , %%edx; /* load width into edx */\n" | " movl %4 , %%edx; /* load width into edx */\n" |
" ; /* */\n" | " ; /* */\n" |
" .rept 8 ; /* */\n" | " .rept 8 ; /* */\n" |
" movq (%%eax), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" | " movq (%%eax), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
" movq (%%ebx), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */\n" |
" movq (%%esi), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */\n" |
" movq (%%ecx), %%mm3; /* 8 Pixels from reference frame to mm3 */\n" | " movq (%%ecx), %%mm3; /* 8 Pixels from reference frame to mm3 */\n" |
" pavgb %%mm2 , %%mm1; /* average source pixels */\n" | " pavgb %%mm2 , %%mm1; /* average source pixels */\n" |
" psadbw %%mm3 , %%mm1; /* 8 Pixels difference to mm1 */\n" | " psadbw %%mm3 , %%mm1; /* 8 Pixels difference to mm1 */\n" |
" paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" | " paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" |
" addl %%edx , %%eax; /* add framewidth to frameaddress */\n" | " addl %%edx , %%eax; /* add framewidth to frameaddress */\n" |
" addl %%edx , %%ebx; /* add framewidth to frameaddress */\n" |
" addl %%edx , %%esi; /* add framewidth to frameaddress */\n" |
" addl %%edx , %%ecx; /* add framewidth to frameaddress */\n" | " addl %%edx , %%ecx; /* add framewidth to frameaddress */\n" |
" .endr ; /* */\n" | " .endr ; /* */\n" |
" /* */\n" | " /* */\n" |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" | " movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
:"=m" (a) | :"=m" (a) |
:"m" (frm1),"m" (frm2), "m" (ref), "m" (denoiser.frame.w) | :"m" (frm1),"m" (frm2), "m" (ref), "m" (denoiser.frame.w) |
:"%eax", "%ebx", "%ecx", "%edx" |
:"%eax", "%esi", "%ecx", "%edx" |
); | ); |
#endif | #endif |
return a; | return a; |
|
|
; | ; |
; | ; |
; | ; |
|
; Kevin F. Quinn 06 Feb 2005 - add GOT for local data reference, in order |
|
; to avoid TEXTREL caused by local data overflim. Changes marked with |
|
; 'KFQ start' and 'KFQ end' |
|
|
|
|
|
; KFQ start |
|
; Add reference to GOT for PIC-ability |
|
extern _GLOBAL_OFFSET_TABLE_ |
|
; KFQ end |
|
|
; quantize_ni_mmx.s: MMX optimized coefficient quantization sub-routine | ; quantize_ni_mmx.s: MMX optimized coefficient quantization sub-routine |
| |
| |
|
|
push esi | push esi |
push edi | push edi |
| |
|
; KFQ start - new code segment |
|
; Retrieve the GOT, and get overflim location relative to it |
|
call .get_GOT |
|
.get_GOT: |
|
pop esi |
|
add esi, _GLOBAL_OFFSET_TABLE_+$$-.get_GOT wrt ..gotpc |
|
movq mm6, [esi+overflim wrt ..gotoff]; overflow limit |
|
; note; don't need GOT again, so reuse of esi from here on is ok. |
|
; KFQ end |
|
|
mov edi, [ebp+8] ; get dst | mov edi, [ebp+8] ; get dst |
mov esi, [ebp+12] ; get psrc | mov esi, [ebp+12] ; get psrc |
mov ebx, [ebp+16] ; get pqm | mov ebx, [ebp+16] ; get pqm |
|
|
punpcklwd mm0, mm1 | punpcklwd mm0, mm1 |
punpcklwd mm0, mm0 ; mm0 = [imquant|0..3]W | punpcklwd mm0, mm0 ; mm0 = [imquant|0..3]W |
| |
movq mm6, [overflim]; overflow limit |
; KFQ start |
|
; moved up, to retrieve via GOT before all the registers are in use |
|
; movq mm6, [overflim]; overflow limit |
|
; KFQ end |
| |
movd mm1, [ebp+32] ; sat_limit | movd mm1, [ebp+32] ; sat_limit |
movq mm2, mm1 | movq mm2, mm1 |
|
|
; along with this program; if not, write to the Free Software | ; along with this program; if not, write to the Free Software |
; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
; | ; |
|
; Kevin F. Quinn 06 Feb 2005 - add GOT for local data reference, in order |
|
; to avoid TEXTREL caused by local data "twos". Changes marked with |
|
; 'KFQ start' and 'KFQ end' |
|
|
|
; KFQ start |
|
; Add reference to GOT for PIC-ability |
|
extern _GLOBAL_OFFSET_TABLE_ |
|
; KFQ end |
|
|
| |
; total squared difference between two (16*h) blocks | ; total squared difference between two (16*h) blocks |
; including optional half pel interpolation of [ebp+8] ; blk1 (hx,hy) | ; including optional half pel interpolation of [ebp+8] ; blk1 (hx,hy) |
|
|
paddw mm3, mm6 | paddw mm3, mm6 |
paddw mm0, mm2 | paddw mm0, mm2 |
paddw mm1, mm3 | paddw mm1, mm3 |
movq mm6, [twos] |
; KFQ start |
|
; Retrieve the GOT, and get twos location relative to it |
|
call .get_GOT1 |
|
.get_GOT1: |
|
pop esi ; note; esi now overwritten so need to restore |
|
add esi, _GLOBAL_OFFSET_TABLE_+$$-.get_GOT1 wrt ..gotpc |
|
movq mm6, [esi+twos wrt ..gotoff] |
|
mov esi, [ebp+16] ; restore lx; this is only ever read |
|
; movq mm6, [twos] |
|
; KFQ end |
|
|
paddw mm0, mm6 ; round mm0 | paddw mm0, mm6 ; round mm0 |
paddw mm1, mm6 ; round mm1 | paddw mm1, mm6 ; round mm1 |
psrlw mm0, 2 | psrlw mm0, 2 |
|
|
paddw mm1, mm3 | paddw mm1, mm3 |
paddw mm2, mm4 | paddw mm2, mm4 |
| |
movq mm6, [twos] |
; KFQ start |
|
; Retrieve the GOT, and get twos location relative to it |
|
call .get_GOT2 |
|
.get_GOT2: |
|
pop esi ; note; esi now overwritten so need to restore |
|
add esi, _GLOBAL_OFFSET_TABLE_+$$-.get_GOT2 wrt ..gotpc |
|
movq mm6, [esi+twos wrt ..gotoff] |
|
mov esi, [ebp+16] ; restore lx; this is only ever read |
|
; movq mm6, [twos] |
|
; KFQ end |
paddw mm1, mm6 | paddw mm1, mm6 |
paddw mm2, mm6 | paddw mm2, mm6 |
| |