diff -urp libmpeg3-1.5.2-old/video/mmxidct.S libmpeg3-1.5.2/video/mmxidct.S --- libmpeg3-1.5.2-old/video/mmxidct.S 2002-06-21 14:35:24.000000000 +0200 +++ libmpeg3-1.5.2/video/mmxidct.S 2005-12-22 23:46:25.000000000 +0100 @@ -63,12 +63,25 @@ x0: .align 8 .text + +# undef __i686 /* gcc define gets in our way */ + .section .gnu.linkonce.t.__i686.get_pc_thunk.bp,"ax",@progbits +.globl __i686.get_pc_thunk.bp + .hidden __i686.get_pc_thunk.bp + .type __i686.get_pc_thunk.bp,@function +__i686.get_pc_thunk.bp: + movl (%esp), %ebp + ret + .align 4 .globl IDCT_mmx .type IDCT_mmx, @function IDCT_mmx: pushl %ebp - movl %esp, %ebp + + call __i686.get_pc_thunk.bp + addl $_GLOBAL_OFFSET_TABLE_, %ebp + pushl %ebx pushl %ecx pushl %edx @@ -84,8 +97,8 @@ IDCT_mmx: pushl $0 pushl $0 - movl 8(%ebp), %esi /* source matrix */ - leal preSC, %ecx + movl 8+13*4(%esp), %esi /* source matrix */ + leal preSC@GOTOFF(%ebp), %ecx /* column 0: even part * use V4, V12, V0, V8 to produce V22..V25 */ @@ -101,7 +114,7 @@ IDCT_mmx: movq %mm1, %mm2 /* added 11/1/96 */ pmulhw 8*8(%esi),%mm5 /* V8 */ psubsw %mm0, %mm1 /* V16 */ - pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V18 */ + pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm1 /* 23170 ->V18 */ paddsw %mm0, %mm2 /* V17 */ movq %mm2, %mm0 /* duplicate V17 */ psraw $1, %mm2 /* t75=t82 */ @@ -142,7 +155,7 @@ IDCT_mmx: paddsw %mm0, %mm3 /* V29 ; free mm0 */ movq %mm7, %mm1 /* duplicate V26 */ psraw $1, %mm3 /* t91=t94 */ - pmulhw x539f539f539f539f,%mm7 /* V33 */ + pmulhw x539f539f539f539f@GOTOFF(%ebp),%mm7 /* V33 */ psraw $1, %mm1 /* t96 */ movq %mm5, %mm0 /* duplicate V2 */ psraw $2, %mm4 /* t85=t87 */ @@ -150,15 +163,15 @@ IDCT_mmx: psubsw %mm4, %mm0 /* V28 ; free mm4 */ movq %mm0, %mm2 /* duplicate V28 */ psraw $1, %mm5 /* t90=t93 */ - pmulhw x4546454645464546,%mm0 /* V35 */ + pmulhw x4546454645464546@GOTOFF(%ebp),%mm0 /* V35 */ psraw $1, %mm2 /* t97 */ movq %mm5, %mm4 /* duplicate t90=t93 */ psubsw %mm2, %mm1 /* V32 ; free mm2 */ - pmulhw x61f861f861f861f8,%mm1 /* V36 */ + pmulhw x61f861f861f861f8@GOTOFF(%ebp),%mm1 /* V36 */ psllw $1, %mm7 /* t107 */ paddsw %mm3, %mm5 /* V31 */ psubsw %mm3, %mm4 /* V30 ; free mm3 */ - pmulhw x5a825a825a825a82,%mm4 /* V34 */ + pmulhw x5a825a825a825a82@GOTOFF(%ebp),%mm4 /* V34 */ nop psubsw %mm1, %mm0 /* V38 */ psubsw %mm7, %mm1 /* V37 ; free mm7 */ @@ -225,7 +238,7 @@ IDCT_mmx: psubsw %mm7, %mm1 /* V50 */ pmulhw 8*9(%esi), %mm5 /* V9 */ paddsw %mm7, %mm2 /* V51 */ - pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V52 */ + pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm1 /* 23170 ->V52 */ movq %mm2, %mm6 /* duplicate V51 */ psraw $1, %mm2 /* t138=t144 */ movq %mm3, %mm4 /* duplicate V1 */ @@ -266,11 +279,11 @@ IDCT_mmx: * even more by doing the correction step in a later stage when the number * is actually multiplied by 16 */ - paddw x0005000200010001, %mm4 + paddw x0005000200010001@GOTOFF(%ebp), %mm4 psubsw %mm6, %mm3 /* V60 ; free mm6 */ psraw $1, %mm0 /* t154=t156 */ movq %mm3, %mm1 /* duplicate V60 */ - pmulhw x539f539f539f539f, %mm1 /* V67 */ + pmulhw x539f539f539f539f@GOTOFF(%ebp), %mm1 /* V67 */ movq %mm5, %mm6 /* duplicate V3 */ psraw $2, %mm4 /* t148=t150 */ paddsw %mm4, %mm5 /* V61 */ @@ -279,13 +292,13 @@ IDCT_mmx: psllw $1, %mm1 /* t169 */ paddsw %mm0, %mm5 /* V65 -> result */ psubsw %mm0, %mm4 /* V64 ; free mm0 */ - pmulhw x5a825a825a825a82, %mm4 /* V68 */ + pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm4 /* V68 */ psraw $1, %mm3 /* t158 */ psubsw %mm6, %mm3 /* V66 */ movq %mm5, %mm2 /* duplicate V65 */ - pmulhw x61f861f861f861f8, %mm3 /* V70 */ + pmulhw x61f861f861f861f8@GOTOFF(%ebp), %mm3 /* V70 */ psllw $1, %mm6 /* t165 */ - pmulhw x4546454645464546, %mm6 /* V69 */ + pmulhw x4546454645464546@GOTOFF(%ebp), %mm6 /* V69 */ psraw $1, %mm2 /* t172 */ /* moved from next block */ movq 8*5(%esi), %mm0 /* V56 */ @@ -410,7 +423,7 @@ IDCT_mmx: * movq 8*13(%esi), %mm4 tmt13 */ psubsw %mm4, %mm3 /* V134 */ - pmulhw x5a825a825a825a82, %mm3 /* 23170 ->V136 */ + pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm3 /* 23170 ->V136 */ movq 8*9(%esi), %mm6 /* tmt9 */ paddsw %mm4, %mm5 /* V135 ; mm4 free */ movq %mm0, %mm4 /* duplicate tmt1 */ @@ -439,17 +452,17 @@ IDCT_mmx: psubsw %mm7, %mm0 /* V144 */ movq %mm0, %mm3 /* duplicate V144 */ paddsw %mm7, %mm2 /* V147 ; free mm7 */ - pmulhw x539f539f539f539f, %mm0 /* 21407-> V151 */ + pmulhw x539f539f539f539f@GOTOFF(%ebp), %mm0 /* 21407-> V151 */ movq %mm1, %mm7 /* duplicate tmt3 */ paddsw %mm5, %mm7 /* V145 */ psubsw %mm5, %mm1 /* V146 ; free mm5 */ psubsw %mm1, %mm3 /* V150 */ movq %mm7, %mm5 /* duplicate V145 */ - pmulhw x4546454645464546, %mm1 /* 17734-> V153 */ + pmulhw x4546454645464546@GOTOFF(%ebp), %mm1 /* 17734-> V153 */ psubsw %mm2, %mm5 /* V148 */ - pmulhw x61f861f861f861f8, %mm3 /* 25080-> V154 */ + pmulhw x61f861f861f861f8@GOTOFF(%ebp), %mm3 /* 25080-> V154 */ psllw $2, %mm0 /* t311 */ - pmulhw x5a825a825a825a82, %mm5 /* 23170-> V152 */ + pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm5 /* 23170-> V152 */ paddsw %mm2, %mm7 /* V149 ; free mm2 */ psllw $1, %mm1 /* t313 */ nop /* without the nop - freeze here for one clock */ @@ -557,15 +570,15 @@ IDCT_mmx: paddsw %mm4, %mm3 /* V113 ; free mm4 */ movq %mm0, %mm4 /* duplicate V110 */ paddsw %mm1, %mm2 /* V111 */ - pmulhw x539f539f539f539f, %mm0 /* 21407-> V117 */ + pmulhw x539f539f539f539f@GOTOFF(%ebp), %mm0 /* 21407-> V117 */ psubsw %mm1, %mm5 /* V112 ; free mm1 */ psubsw %mm5, %mm4 /* V116 */ movq %mm2, %mm1 /* duplicate V111 */ - pmulhw x4546454645464546, %mm5 /* 17734-> V119 */ + pmulhw x4546454645464546@GOTOFF(%ebp), %mm5 /* 17734-> V119 */ psubsw %mm3, %mm2 /* V114 */ - pmulhw x61f861f861f861f8, %mm4 /* 25080-> V120 */ + pmulhw x61f861f861f861f8@GOTOFF(%ebp), %mm4 /* 25080-> V120 */ paddsw %mm3, %mm1 /* V115 ; free mm3 */ - pmulhw x5a825a825a825a82, %mm2 /* 23170-> V118 */ + pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm2 /* 23170-> V118 */ psllw $2, %mm0 /* t266 */ movq %mm1, (%esi) /* save V115 */ psllw $1, %mm5 /* t268 */ @@ -583,7 +596,7 @@ IDCT_mmx: movq %mm6, %mm3 /* duplicate tmt4 */ psubsw %mm0, %mm6 /* V100 */ paddsw %mm0, %mm3 /* V101 ; free mm0 */ - pmulhw x5a825a825a825a82, %mm6 /* 23170 ->V102 */ + pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm6 /* 23170 ->V102 */ movq %mm7, %mm5 /* duplicate tmt0 */ movq 8*8(%esi), %mm1 /* tmt8 */ paddsw %mm1, %mm7 /* V103 */ @@ -667,9 +680,8 @@ IDCT_mmx: popl %edx popl %ecx popl %ebx - movl %ebp, %esp popl %ebp ret .Lfe1: .size IDCT_mmx,.Lfe1-IDCT_mmx diff -urp libmpeg3-1.5.2-old/video/reconmmx.s libmpeg3-1.5.2/video/reconmmx.s --- libmpeg3-1.5.2-old/video/reconmmx.s 2002-06-21 14:35:24.000000000 +0200 +++ libmpeg3-1.5.2/video/reconmmx.s 2005-12-22 23:53:17.000000000 +0100 @@ -32,18 +32,28 @@ global add_block_mmx global set_block_mmx +extern _GLOBAL_OFFSET_TABLE_ +get_pc.bp: + mov ebp, [esp] + retn + align 16 rech_mmx: push esi push edi push ecx push ebx + push ebp + + call get_pc.bp + add ebp, _GLOBAL_OFFSET_TABLE_ + $$ - $ wrt ..gotpc + mov esi, [esp+source] mov edi, [esp+dest] mov ecx, [esp+h] mov ebx, [esp+lx2] - movq mm5, [MASK_AND] - movq mm6, [ADD_1] + movq mm5, [ebp + MASK_AND wrt ..gotoff] + movq mm6, [ebp + ADD_1 wrt ..gotoff] .rech1: movq mm0,[esi] movq mm1,[esi+1] @@ -68,6 +78,7 @@ rech_mmx: dec ecx jnz .rech1 emms + pop ebp pop ebx pop ecx pop edi @@ -80,13 +91,18 @@ rechc_mmx: push edi push ecx push ebx + push ebp + + call get_pc.bp + add ebp, _GLOBAL_OFFSET_TABLE_ + $$ - $ wrt ..gotpc + ; sub esp, LocalFrameSize mov esi, [esp+source] mov edi, [esp+dest] mov ecx, [esp+h] mov ebx, [esp+lx2] - movq mm5, [MASK_AND] - movq mm6, [ADD_1] + movq mm5, [ebp + MASK_AND wrt ..gotoff] + movq mm6, [ebp + ADD_1 wrt ..gotoff] .rechc1: movq mm0,[esi] movq mm1,[esi+1] @@ -103,6 +119,7 @@ rechc_mmx: jnz .rechc1 emms ; add esp, LocalFrameSize + pop ebp pop ebx pop ecx pop edi @@ -125,13 +142,18 @@ recva_mmx: push ecx push ebx push edx + push ebp + + call get_pc.bp + add ebp, _GLOBAL_OFFSET_TABLE_ + $$ - $ wrt ..gotpc + mov esi, [esp+source] mov edi, [esp+dest] mov ecx, [esp+h] mov ebx, [esp+lx2] mov edx, [esp+lx] - movq mm7, [MASK_AND] - movq mm6, [ADD_1] + movq mm7, [ebp + MASK_AND wrt ..gotoff] + movq mm6, [ebp + ADD_1 wrt ..gotoff] .recva1: movq mm0,[esi] movq mm1,[esi+edx] @@ -170,6 +192,7 @@ recva_mmx: dec ecx jnz near .recva1 emms + pop ebp pop edx pop ebx pop ecx @@ -184,13 +207,18 @@ recvac_mmx: push ecx push ebx push edx + push ebp + + call get_pc.bp + add ebp, _GLOBAL_OFFSET_TABLE_ + $$ - $ wrt ..gotpc + mov esi, [esp+source] mov edi, [esp+dest] mov ecx, [esp+h] mov ebx, [esp+lx2] mov edx, [esp+lx] - movq mm5, [MASK_AND] - movq mm6, [ADD_1] + movq mm5, [ebp + MASK_AND wrt ..gotoff] + movq mm6, [ebp + ADD_1 wrt ..gotoff] .recvac1: movq mm0,[esi] movq mm1,[esi+edx] @@ -213,6 +241,7 @@ recvac_mmx: dec ecx jnz .recvac1 emms + pop ebp pop edx pop ebx pop ecx @@ -269,10 +298,15 @@ set_block_mmx: push ecx push ebx push edx + push ebp + + call get_pc.bp + add ebp, _GLOBAL_OFFSET_TABLE_ + $$ - $ wrt ..gotpc + mov esi, [esp+bp] mov edi, [esp+rfp] mov ebx, [esp+iincr] - movq mm7, [PLUS_128] + movq mm7, [ebp + PLUS_128 wrt ..gotoff] %rep 4 movq mm0, [esi] movq mm1, [esi+8] @@ -291,6 +325,7 @@ set_block_mmx: add edi, ebx %endrep emms + pop ebp pop edx pop ebx pop ecx