Go to:
Gentoo Home
Documentation
Forums
Lists
Bugs
Planet
Store
Wiki
Get Gentoo!
Gentoo's Bugzilla – Attachment 337724 Details for
Bug 452482
media-video/libav-0.8.5 ABI=x32 - libavutil/x86/cpu.c:52: Error: invalid instruction suffix for `pushf'
Home
|
New
–
[Ex]
|
Browse
|
Search
|
Privacy Policy
|
[?]
|
Reports
|
Requests
|
Help
|
New Account
|
Log In
[x]
|
Forgot Password
Login:
[x]
[patch]
=media-video/libav-0.8.5 x32 abi support
libav-0.8.5-x32.patch (text/plain), 36.55 KB, created by
camper
on 2013-02-03 00:04:06 UTC
(
hide
)
Description:
=media-video/libav-0.8.5 x32 abi support
Filename:
MIME Type:
Creator:
camper
Created:
2013-02-03 00:04:06 UTC
Size:
36.55 KB
patch
obsolete
>From adb22e7ae3bd65e96c4f4bcf7f011532979ab9a7 Mon Sep 17 00:00:00 2001 >From: =?UTF-8?q?Matthias=20R=C3=A4ncker?= <theonetruecamper@gmx.de> >Date: Sat, 2 Feb 2013 22:13:14 +0100 >Subject: [PATCH] add x32 abi support >MIME-Version: 1.0 >Content-Type: text/plain; charset=UTF-8 >Content-Transfer-Encoding: 8bit > > >Signed-off-by: Matthias Räncker <theonetruecamper@gmx.de> >--- > configure | 12 +++++- > libavcodec/x86/dsputil_yasm.asm | 40 +++++++++---------- > libavcodec/x86/fft_mmx.asm | 30 +++++--------- > libavcodec/x86/fmtconvert.asm | 32 +++++++-------- > libavcodec/x86/h264_idct.asm | 18 ++++----- > libavcodec/x86/h264_idct_10bit.asm | 6 +-- > libavcodec/x86/mlpdsp.c | 4 +- > libavutil/x86/cpu.c | 4 +- > libavutil/x86/x86inc.asm | 34 +++++++++++++++- > libavutil/x86_cpu.h | 20 +++++++++- > libswscale/x86/output.asm | 4 +- > libswscale/x86/swscale_template.c | 80 +++++++++++++++++++------------------- > 12 files changed, 164 insertions(+), 120 deletions(-) > >diff --git a/configure b/configure >index 2d17ce0..21c5645 100755 >--- a/configure >+++ b/configure >@@ -1023,6 +1023,8 @@ ARCH_LIST=' > x86 > x86_32 > x86_64 >+ x86_64_x32 >+ x86_64_x64 > ' > > ARCH_EXT_LIST=' >@@ -2398,9 +2400,14 @@ case "$arch" in > x86) > subarch="x86_32" > check_cc <<EOF && subarch="x86_64" >- int test[(int)sizeof(char*) - 7]; >+ #ifndef __x86_64__ >+ int test[-1]; >+ #endif > EOF > if test "$subarch" = "x86_64"; then >+ check_cc <<EOF && subarch="x86_64 x86_64_x64" || subarch="x86_64 x86_64_x32" >+ int test[(int)sizeof(char*) - 7]; >+EOF > spic=$shared > fi > ;; >@@ -2770,7 +2777,8 @@ EOF > > if ! disabled_any asm mmx yasm; then > if check_cmd $yasmexe --version; then >- enabled x86_64 && yasm_extra="-m amd64" >+ enabled x86_64_x64 && yasm_extra="-m amd64" >+ enabled x86_64_x32 && yasm_extra="-m x32" > yasm_debug="-g dwarf2" > elif check_cmd nasm -v; then > yasmexe=nasm >diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm >index 4d2fb6a..57c8f45 100644 >--- a/libavcodec/x86/dsputil_yasm.asm >+++ b/libavcodec/x86/dsputil_yasm.asm >@@ -505,13 +505,13 @@ cglobal emu_edge_core, 6, 7, 1 > %else > %define w_reg r6 > cglobal emu_edge_core, 2, 7, 0 >- mov r4, r4m ; end_y >- mov r5, r5m ; block_h >+ mov r4p, r4m ; end_y >+ mov r5p, r5m ; block_h > %endif > > ; start with vertical extend (top/bottom) and body pixel copy >- mov w_reg, r7m >- sub w_reg, r6m ; w = start_x - end_x >+ mov preg(w_reg), r7m >+ sub preg(w_reg), r6m ; w = start_x - end_x > sub r5, r4 > %ifdef ARCH_X86_64 > sub r4, r3 >@@ -534,7 +534,7 @@ cglobal emu_edge_core, 2, 7, 0 > .v_extend_end: > > ; horizontal extend (left/right) >- mov w_reg, r6m ; start_x >+ mov preg(w_reg), r6m ; start_x > sub r0, w_reg > %ifdef ARCH_X86_64 > mov r3, r0 ; backup of buf+block_h*linesize >@@ -568,8 +568,8 @@ cglobal emu_edge_core, 2, 7, 0 > mov r0, r0m > mov r5, r5m > %endif >- mov w_reg, r7m ; end_x >- mov r1, r8m ; block_w >+ mov preg(w_reg), r7m ; end_x >+ mov r1p, r8m ; block_w > mov r4, r1 > sub r1, w_reg > jz .h_extend_end ; if (end_x == block_w) goto h_extend_end >@@ -750,7 +750,7 @@ ALIGN 128 > READ_NUM_BYTES top, %%n ; read bytes > .emuedge_extend_top_ %+ %%n %+ _loop: ; do { > WRITE_NUM_BYTES top, %%n ; write bytes >- add r0 , r2 ; dst += linesize >+ add r0p , r2p ; dst += linesize > %ifdef ARCH_X86_64 > dec r3d > %else ; ARCH_X86_32 >@@ -762,19 +762,19 @@ ALIGN 128 > .emuedge_copy_body_ %+ %%n %+ _loop: ; do { > READ_NUM_BYTES body, %%n ; read bytes > WRITE_NUM_BYTES body, %%n ; write bytes >- add r0 , r2 ; dst += linesize >- add r1 , r2 ; src += linesize >+ add r0p , r2p ; dst += linesize >+ add r1p , r2p ; src += linesize > dec r4d > jnz .emuedge_copy_body_ %+ %%n %+ _loop ; } while (--end_y) > > ; copy bottom pixels > test r5 , r5 ; if (!block_h) > jz .emuedge_v_extend_end_ %+ %%n ; goto end >- sub r1 , r2 ; src -= linesize >+ sub r1p , r2p ; src -= linesize > READ_NUM_BYTES bottom, %%n ; read bytes > .emuedge_extend_bottom_ %+ %%n %+ _loop: ; do { > WRITE_NUM_BYTES bottom, %%n ; write bytes >- add r0 , r2 ; dst += linesize >+ add r0p , r2p ; dst += linesize > dec r5d > jnz .emuedge_extend_bottom_ %+ %%n %+ _loop ; } while (--block_h) > >@@ -836,7 +836,7 @@ ALIGN 128 > %rep 11 > ALIGN 64 > .emuedge_extend_left_ %+ %%n: ; do { >- sub r0, r2 ; dst -= linesize >+ sub r0p, r2p ; dst -= linesize > READ_V_PIXEL %%n, [r0+r1] ; read pixels > WRITE_V_PIXEL %%n, r0 ; write pixels > dec r5 >@@ -857,7 +857,7 @@ ALIGN 64 > ALIGN 64 > .emuedge_extend_right_ %+ %%n: ; do { > %ifdef ARCH_X86_64 >- sub r3, r2 ; dst -= linesize >+ sub r3p, r2p ; dst -= linesize > READ_V_PIXEL %%n, [r3+w_reg-1] ; read pixels > WRITE_V_PIXEL %%n, r3+r4-%%n ; write pixels > dec r11 >@@ -907,7 +907,7 @@ ALIGN 64 > > %macro V_COPY_ROW 2 > %ifidn %1, bottom >- sub r1, linesize >+ sub r1p, linesize > %endif > .%1_copy_loop: > xor cnt_reg, cnt_reg >@@ -917,7 +917,7 @@ ALIGN 64 > %else ; sse > V_COPY_NPX %1, xmm0, movups, 16, 0xFFFFFFF0 > %ifdef ARCH_X86_64 >-%define linesize r2 >+%define linesize r2p > V_COPY_NPX %1, rax , mov, 8 > %else ; ARCH_X86_32 > %define linesize r2m >@@ -929,9 +929,9 @@ ALIGN 64 > V_COPY_NPX %1, vall, mov, 1 > mov w_reg, cnt_reg > %ifidn %1, body >- add r1, linesize >+ add r1p, linesize > %endif >- add r0, linesize >+ add r0p, linesize > dec %2 > jnz .%1_copy_loop > %endmacro >@@ -978,7 +978,7 @@ ALIGN 64 > .slow_left_extend_loop: > ; r0=buf+block_h*linesize,r2=linesize,r6(64)/r3(32)=val,r5=block_h,r4=cntr,r10/r6=start_x > mov r4, 8 >- sub r0, linesize >+ sub r0p, linesize > READ_V_PIXEL 8, [r0+w_reg] > .left_extend_8px_loop: > movq [r0+r4-8], mm0 >@@ -1014,7 +1014,7 @@ ALIGN 64 > %define bh_reg r5 > %endif > lea r1, [r4-8] >- sub buf_reg, linesize >+ sub preg(buf_reg), linesize > READ_V_PIXEL 8, [buf_reg+w_reg-1] > .right_extend_8px_loop: > movq [buf_reg+r1], mm0 >diff --git a/libavcodec/x86/fft_mmx.asm b/libavcodec/x86/fft_mmx.asm >index d6d07b8..6b9b9c2 100644 >--- a/libavcodec/x86/fft_mmx.asm >+++ b/libavcodec/x86/fft_mmx.asm >@@ -30,21 +30,15 @@ > > %include "x86inc.asm" > >-%ifdef ARCH_X86_64 >-%define pointer resq >-%else >-%define pointer resd >-%endif >- > struc FFTContext > .nbits: resd 1 > .reverse: resd 1 >- .revtab: pointer 1 >- .tmpbuf: pointer 1 >+ .revtab: resp 1 >+ .tmpbuf: resp 1 > .mdctsize: resd 1 > .mdctbits: resd 1 >- .tcos: pointer 1 >- .tsin: pointer 1 >+ .tcos: resp 1 >+ .tsin: resp 1 > endstruc > > SECTION_RODATA >@@ -73,12 +67,6 @@ cextern cos_ %+ i > %assign i i<<1 > %endrep > >-%ifdef ARCH_X86_64 >- %define pointer dq >-%else >- %define pointer dd >-%endif >- > %macro IF0 1+ > %endmacro > %macro IF1 1+ >@@ -584,7 +572,7 @@ DECL_PASS pass_interleave_3dn, PASS_BIG 0 > > %macro FFT_DISPATCH 2; clobbers 5 GPRs, 8 XMMs > lea r2, [dispatch_tab%1] >- mov r2, [r2 + (%2q-2)*gprsize] >+ mov r2p, [r2 + (%2q-2)*ptrsize] > %ifdef PIC > lea r3, [$$] > add r2, r3 >@@ -624,7 +612,7 @@ fft %+ n %+ %3%2: > %undef n > > align 8 >-dispatch_tab%3%2: pointer list_of_fft >+dispatch_tab%3%2: dp list_of_fft > > section .text > >@@ -765,8 +753,8 @@ cglobal imdct_half%1, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample * > mov r3d, [r0+FFTContext.mdctsize] > add r2, r3 > shr r3, 1 >- mov rtcos, [r0+FFTContext.tcos] >- mov rtsin, [r0+FFTContext.tsin] >+ mov preg(rtcos), [r0+FFTContext.tcos] >+ mov preg(rtsin), [r0+FFTContext.tsin] > add rtcos, r3 > add rtsin, r3 > %ifndef ARCH_X86_64 >@@ -774,7 +762,7 @@ cglobal imdct_half%1, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample * > push rtsin > %endif > shr r3, 1 >- mov rrevtab, [r0+FFTContext.revtab] >+ mov preg(rrevtab), [r0+FFTContext.revtab] > add rrevtab, r3 > %ifndef ARCH_X86_64 > push rrevtab >diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm >index d621089..32dc500 100644 >--- a/libavcodec/x86/fmtconvert.asm >+++ b/libavcodec/x86/fmtconvert.asm >@@ -122,8 +122,8 @@ FLOAT_TO_INT16 3dnow, 0 > %macro FLOAT_TO_INT16_INTERLEAVE2 1 > cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0, src1, len > lea lenq, [4*r2q] >- mov src1q, [src0q+gprsize] >- mov src0q, [src0q] >+ mov src1p, [src0q+ptrsize] >+ mov src0p, [src0q] > add dstq, lenq > add src0q, lenq > add src1q, lenq >@@ -186,12 +186,12 @@ cglobal float_to_int16_interleave6_%1, 2,7,0, dst, src, src1, src2, src3, src4, > %else > %define lend dword r2m > %endif >- mov src1q, [srcq+1*gprsize] >- mov src2q, [srcq+2*gprsize] >- mov src3q, [srcq+3*gprsize] >- mov src4q, [srcq+4*gprsize] >- mov src5q, [srcq+5*gprsize] >- mov srcq, [srcq] >+ mov src1p, [srcq+1*ptrsize] >+ mov src2p, [srcq+2*ptrsize] >+ mov src3p, [srcq+3*ptrsize] >+ mov src4p, [srcq+4*ptrsize] >+ mov src5p, [srcq+5*ptrsize] >+ mov srcp, [srcq] > sub src1q, srcq > sub src2q, srcq > sub src3q, srcq >@@ -247,12 +247,12 @@ cglobal float_interleave6_%1, 2,7,%2, dst, src, src1, src2, src3, src4, src5 > %else > %define lend dword r2m > %endif >- mov src1q, [srcq+1*gprsize] >- mov src2q, [srcq+2*gprsize] >- mov src3q, [srcq+3*gprsize] >- mov src4q, [srcq+4*gprsize] >- mov src5q, [srcq+5*gprsize] >- mov srcq, [srcq] >+ mov src1p, [srcq+1*ptrsize] >+ mov src2p, [srcq+2*ptrsize] >+ mov src3p, [srcq+3*ptrsize] >+ mov src4p, [srcq+4*ptrsize] >+ mov src5p, [srcq+5*ptrsize] >+ mov srcp, [srcq] > sub src1q, srcq > sub src2q, srcq > sub src3q, srcq >@@ -325,8 +325,8 @@ FLOAT_INTERLEAVE6 sse, 7 > > %macro FLOAT_INTERLEAVE2 2 > cglobal float_interleave2_%1, 3,4,%2, dst, src, len, src1 >- mov src1q, [srcq+gprsize] >- mov srcq, [srcq ] >+ mov src1p, [srcq+ptrsize] >+ mov srcp, [srcq ] > sub src1q, srcq > .loop > MOVPS m0, [srcq ] >diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm >index 37c2c90..f44d76a 100644 >--- a/libavcodec/x86/h264_idct.asm >+++ b/libavcodec/x86/h264_idct.asm >@@ -624,9 +624,9 @@ cglobal h264_idct_add8_8_mmx, 5, 7, 0 > mov r5, 32 > add r2, 384 > %ifdef ARCH_X86_64 >- add r10, gprsize >+ add r10, ptrsize > %else >- add r0mp, gprsize >+ add r0mp, ptrsize > %endif > call h264_idct_add8_mmx_plane > RET >@@ -639,7 +639,7 @@ h264_idct_add8_mmx2_plane > jz .try_dc > %ifdef ARCH_X86_64 > mov r0d, dword [r1+r5*4] >- add r0, [r10] >+ add r0p, [r10] > %else > mov r0, r1m ; XXX r1m here is actually r0m of the calling func > mov r0, [r0] >@@ -687,9 +687,9 @@ cglobal h264_idct_add8_8_mmx2, 5, 7, 0 > mov r5, 32 > add r2, 384 > %ifdef ARCH_X86_64 >- add r10, gprsize >+ add r10, ptrsize > %else >- add r0mp, gprsize >+ add r0mp, ptrsize > %endif > call h264_idct_add8_mmx2_plane > RET >@@ -819,7 +819,7 @@ cglobal h264_idct_add16intra_8_sse2, 5, 7, 8 > jz .try%1dc > %ifdef ARCH_X86_64 > mov r0d, dword [r1+(%1&1)*8+64*(1+(%1>>1))] >- add r0, [r10] >+ add r0p, [r10] > %else > mov r0, r0m > mov r0, [r0] >@@ -833,7 +833,7 @@ cglobal h264_idct_add16intra_8_sse2, 5, 7, 8 > jz .cycle%1end > %ifdef ARCH_X86_64 > mov r0d, dword [r1+(%1&1)*8+64*(1+(%1>>1))] >- add r0, [r10] >+ add r0p, [r10] > %else > mov r0, r0m > mov r0, [r0] >@@ -858,9 +858,9 @@ cglobal h264_idct_add8_8_sse2, 5, 7, 8 > add8_sse2_cycle 0, 0x34 > add8_sse2_cycle 1, 0x3c > %ifdef ARCH_X86_64 >- add r10, gprsize >+ add r10, ptrsize > %else >- add r0mp, gprsize >+ add r0mp, ptrsize > %endif > add8_sse2_cycle 2, 0x5c > add8_sse2_cycle 3, 0x64 >diff --git a/libavcodec/x86/h264_idct_10bit.asm b/libavcodec/x86/h264_idct_10bit.asm >index 54636a9..fc26e18 100644 >--- a/libavcodec/x86/h264_idct_10bit.asm >+++ b/libavcodec/x86/h264_idct_10bit.asm >@@ -320,15 +320,15 @@ cglobal h264_idct_add8_10_%1,5,7 > mov r10, r0 > %endif > add r2, 1024 >- mov r0, [r0] >+ mov r0p, [r0] > ADD16_OP_INTRA %1, 16, 4+ 6*8 > ADD16_OP_INTRA %1, 18, 4+ 7*8 > add r2, 1024-128*2 > %ifdef ARCH_X86_64 >- mov r0, [r10+gprsize] >+ mov r0p, [r10+ptrsize] > %else > mov r0, r0m >- mov r0, [r0+gprsize] >+ mov r0, [r0+ptrsize] > %endif > ADD16_OP_INTRA %1, 32, 4+11*8 > ADD16_OP_INTRA %1, 34, 4+12*8 >diff --git a/libavcodec/x86/mlpdsp.c b/libavcodec/x86/mlpdsp.c >index 400855d..75c3db1 100644 >--- a/libavcodec/x86/mlpdsp.c >+++ b/libavcodec/x86/mlpdsp.c >@@ -156,8 +156,8 @@ static void mlp_filter_channel_x86(int32_t *state, const int32_t *coeff, > /* 2*/"+r"(sample_buffer), > #if ARCH_X86_64 > /* 3*/"+r"(blocksize) >- : /* 4*/"r"((x86_reg)mask), /* 5*/"r"(firjump), >- /* 6*/"r"(iirjump) , /* 7*/"c"(filter_shift) >+ : /* 4*/"r"((x86_native_reg)mask), /* 5*/"r"((x86_native_reg)(uintptr_t)firjump), >+ /* 6*/"r"((x86_native_reg)(uintptr_t)iirjump), /* 7*/"c"(filter_shift) > , /* 8*/"r"((int64_t)coeff[0]) > , /* 9*/"r"((int64_t)coeff[1]) > , /*10*/"r"((int64_t)coeff[2]) >diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c >index 2424fe4..dad80d5 100644 >--- a/libavutil/x86/cpu.c >+++ b/libavutil/x86/cpu.c >@@ -28,9 +28,9 @@ > /* ebx saving is necessary for PIC. gcc seems unable to see it alone */ > #define cpuid(index,eax,ebx,ecx,edx)\ > __asm__ volatile\ >- ("mov %%"REG_b", %%"REG_S"\n\t"\ >+ ("mov %%"REG_rb", %%"REG_rS"\n\t"\ > "cpuid\n\t"\ >- "xchg %%"REG_b", %%"REG_S\ >+ "xchg %%"REG_rb", %%"REG_rS\ > : "=a" (eax), "=S" (ebx),\ > "=c" (ecx), "=d" (edx)\ > : "0" (index)); >diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm >index 6941c1a..a25662b 100644 >--- a/libavutil/x86/x86inc.asm >+++ b/libavutil/x86/x86inc.asm >@@ -120,12 +120,17 @@ > > ; registers: > ; rN and rNq are the native-size register holding function argument N >-; rNd, rNw, rNb are dword, word, and byte size >+; rNp, rNd, rNw, rNb are pointer, dword, word, and byte size > ; rNm is the original location of arg N (a register or on the stack), dword > ; rNmp is native size > > %macro DECLARE_REG 6 > %define r%1q %2 >+ %ifdef ARCH_X86_64_X64 >+ %define r%1p %2 >+ %else >+ %define r%1p %3 >+ %endif > %define r%1d %3 > %define r%1w %4 > %define r%1b %5 >@@ -143,6 +148,12 @@ > %macro DECLARE_REG_SIZE 2 > %define r%1q r%1 > %define e%1q r%1 >+ %ifdef ARCH_X86_64_X64 >+ %define r%1p r%1 >+ %else >+ %define r%1p e%1 >+ %endif >+ %define e%1p e%1 > %define r%1d e%1 > %define e%1d e%1 > %define r%1w %1 >@@ -176,6 +187,11 @@ DECLARE_REG_SIZE bp, bpl > %macro DECLARE_REG_TMP_SIZE 0-* > %rep %0 > %define t%1q t%1 %+ q >+ %ifdef ARCH_X86_64_X64 >+ %define t%1p t%1 >+ %else >+ %define t%1p t%1 %+ d >+ %endif > %define t%1d t%1 %+ d > %define t%1w t%1 %+ w > %define t%1b t%1 %+ b >@@ -191,6 +207,20 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9 > %define gprsize 4 > %endif > >+%ifdef ARCH_X86_64_X64 >+ %define ptrsize 8 >+ %define pword qword >+ %define dp dq >+ %define resp resq >+ %define preg(x) x >+%else >+ %define ptrsize 4 >+ %define pword dword >+ %define dp dd >+ %define resp resd >+ %define preg(x) x %+ d >+%endif >+ > %macro PUSH 1 > push %1 > %assign stack_offset stack_offset+gprsize >@@ -238,6 +268,7 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9 > %assign %%i 0 > %rep n_arg_names > CAT_UNDEF arg_name %+ %%i, q >+ CAT_UNDEF arg_name %+ %%i, p > CAT_UNDEF arg_name %+ %%i, d > CAT_UNDEF arg_name %+ %%i, w > CAT_UNDEF arg_name %+ %%i, b >@@ -250,6 +281,7 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9 > %assign %%i 0 > %rep %0 > %xdefine %1q r %+ %%i %+ q >+ %xdefine %1p r %+ %%i %+ p > %xdefine %1d r %+ %%i %+ d > %xdefine %1w r %+ %%i %+ w > %xdefine %1b r %+ %%i %+ b >diff --git a/libavutil/x86_cpu.h b/libavutil/x86_cpu.h >index f84eba6..50f2646 100644 >--- a/libavutil/x86_cpu.h >+++ b/libavutil/x86_cpu.h >@@ -24,7 +24,7 @@ > #include <stdint.h> > #include "config.h" > >-#if ARCH_X86_64 >+#if ARCH_X86_64_X64 > # define OPSIZE "q" > # define REG_a "rax" > # define REG_b "rbx" >@@ -32,6 +32,7 @@ > # define REG_d "rdx" > # define REG_D "rdi" > # define REG_S "rsi" >+# define REG_8 "r8" > # define PTR_SIZE "8" > typedef int64_t x86_reg; > >@@ -43,8 +44,9 @@ typedef int64_t x86_reg; > # define REGc rcx > # define REGd rdx > # define REGSP rsp >+# define REG8 r8 > >-#elif ARCH_X86_32 >+#elif ARCH_X86_32 || ARCH_X86_64_X32 > > # define OPSIZE "l" > # define REG_a "eax" >@@ -53,6 +55,7 @@ typedef int64_t x86_reg; > # define REG_d "edx" > # define REG_D "edi" > # define REG_S "esi" >+# define REG_8 "r8d" > # define PTR_SIZE "4" > typedef int32_t x86_reg; > >@@ -64,10 +67,23 @@ typedef int32_t x86_reg; > # define REGc ecx > # define REGd edx > # define REGSP esp >+# define REG8 r8d > #else > typedef int x86_reg; > #endif > >+#if ARCH_X86_64 >+# define REG_rb "rbx" >+# define REG_rS "rsi" >+# define REG_rBP "rbp" >+typedef int64_t x86_native_reg; >+#elif ARCH_X86_32 >+# define REG_rb "ebx" >+# define REG_rS "esi" >+# define REG_rBP "ebp" >+typedef int32_t x86_native_reg; >+#endif >+ > #define HAVE_7REGS (ARCH_X86_64 || (HAVE_EBX_AVAILABLE && HAVE_EBP_AVAILABLE)) > #define HAVE_6REGS (ARCH_X86_64 || (HAVE_EBX_AVAILABLE || HAVE_EBP_AVAILABLE)) > >diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm >index ae2929c..7e18d1b 100644 >--- a/libswscale/x86/output.asm >+++ b/libswscale/x86/output.asm >@@ -149,14 +149,14 @@ cglobal yuv2planeX_%1, %3, 7, %2 > movsx cntr_reg, r1m > .filterloop_ %+ %%i: > ; input pixels >- mov r6, [r2+gprsize*cntr_reg-2*gprsize] >+ mov r6p, [r2+ptrsize*cntr_reg-2*ptrsize] > %if %1 == 16 > mova m3, [r6+r5*4] > mova m5, [r6+r5*4+mmsize] > %else ; %1 == 8/9/10 > mova m3, [r6+r5*2] > %endif ; %1 == 8/9/10/16 >- mov r6, [r2+gprsize*cntr_reg-gprsize] >+ mov r6p, [r2+ptrsize*cntr_reg-ptrsize] > %if %1 == 16 > mova m4, [r6+r5*4] > mova m6, [r6+r5*4+mmsize] >diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c >index 40188d8..d037738 100644 >--- a/libswscale/x86/swscale_template.c >+++ b/libswscale/x86/swscale_template.c >@@ -774,12 +774,12 @@ static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2], > const int16_t *abuf0 = abuf[0], *abuf1 = abuf[1]; > #if ARCH_X86_64 > __asm__ volatile( >- YSCALEYUV2RGB(%%r8, %5) >- YSCALEYUV2RGB_YA(%%r8, %5, %6, %7) >+ YSCALEYUV2RGB(%%REG8, %5) >+ YSCALEYUV2RGB_YA(%%REG8, %5, %6, %7) > "psraw $3, %%mm1 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/ > "psraw $3, %%mm7 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/ > "packuswb %%mm7, %%mm1 \n\t" >- WRITEBGR32(%4, 8280(%5), %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6) >+ WRITEBGR32(%4, 8280(%5), %%REG8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6) > :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "r" (dest), > "a" (&c->redDither), > "r" (abuf0), "r" (abuf1) >@@ -791,7 +791,7 @@ static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2], > __asm__ volatile( > "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" > "mov %4, %%"REG_b" \n\t" >- "push %%"REG_BP" \n\t" >+ "push %%"REG_rBP" \n\t" > YSCALEYUV2RGB(%%REGBP, %5) > "push %0 \n\t" > "push %1 \n\t" >@@ -804,7 +804,7 @@ static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2], > "pop %1 \n\t" > "pop %0 \n\t" > WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6) >- "pop %%"REG_BP" \n\t" >+ "pop %%"REG_rBP" \n\t" > "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" > :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), > "a" (&c->redDither) >@@ -814,11 +814,11 @@ static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2], > __asm__ volatile( > "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" > "mov %4, %%"REG_b" \n\t" >- "push %%"REG_BP" \n\t" >+ "push %%"REG_rBP" \n\t" > YSCALEYUV2RGB(%%REGBP, %5) > "pcmpeqd %%mm7, %%mm7 \n\t" > WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) >- "pop %%"REG_BP" \n\t" >+ "pop %%"REG_rBP" \n\t" > "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" > :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), > "a" (&c->redDither) >@@ -838,11 +838,11 @@ static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2], > __asm__ volatile( > "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" > "mov %4, %%"REG_b" \n\t" >- "push %%"REG_BP" \n\t" >+ "push %%"REG_rBP" \n\t" > YSCALEYUV2RGB(%%REGBP, %5) > "pxor %%mm7, %%mm7 \n\t" > WRITEBGR24(%%REGb, 8280(%5), %%REGBP) >- "pop %%"REG_BP" \n\t" >+ "pop %%"REG_rBP" \n\t" > "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" > :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), > "a" (&c->redDither) >@@ -861,7 +861,7 @@ static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2], > __asm__ volatile( > "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" > "mov %4, %%"REG_b" \n\t" >- "push %%"REG_BP" \n\t" >+ "push %%"REG_rBP" \n\t" > YSCALEYUV2RGB(%%REGBP, %5) > "pxor %%mm7, %%mm7 \n\t" > /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ >@@ -871,7 +871,7 @@ static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2], > "paddusb "RED_DITHER"(%5), %%mm5 \n\t" > #endif > WRITERGB15(%%REGb, 8280(%5), %%REGBP) >- "pop %%"REG_BP" \n\t" >+ "pop %%"REG_rBP" \n\t" > "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" > :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), > "a" (&c->redDither) >@@ -890,7 +890,7 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2], > __asm__ volatile( > "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" > "mov %4, %%"REG_b" \n\t" >- "push %%"REG_BP" \n\t" >+ "push %%"REG_rBP" \n\t" > YSCALEYUV2RGB(%%REGBP, %5) > "pxor %%mm7, %%mm7 \n\t" > /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ >@@ -900,7 +900,7 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2], > "paddusb "RED_DITHER"(%5), %%mm5 \n\t" > #endif > WRITERGB16(%%REGb, 8280(%5), %%REGBP) >- "pop %%"REG_BP" \n\t" >+ "pop %%"REG_rBP" \n\t" > "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" > :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), > "a" (&c->redDither) >@@ -959,10 +959,10 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2], > __asm__ volatile( > "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" > "mov %4, %%"REG_b" \n\t" >- "push %%"REG_BP" \n\t" >+ "push %%"REG_rBP" \n\t" > YSCALEYUV2PACKED(%%REGBP, %5) > WRITEYUY2(%%REGb, 8280(%5), %%REGBP) >- "pop %%"REG_BP" \n\t" >+ "pop %%"REG_rBP" \n\t" > "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" > :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), > "a" (&c->redDither) >@@ -1100,11 +1100,11 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0, > __asm__ volatile( > "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" > "mov %4, %%"REG_b" \n\t" >- "push %%"REG_BP" \n\t" >+ "push %%"REG_rBP" \n\t" > YSCALEYUV2RGB1(%%REGBP, %5) > YSCALEYUV2RGB1_ALPHA(%%REGBP) > WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) >- "pop %%"REG_BP" \n\t" >+ "pop %%"REG_rBP" \n\t" > "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" > :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest), > "a" (&c->redDither) >@@ -1113,11 +1113,11 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0, > __asm__ volatile( > "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" > "mov %4, %%"REG_b" \n\t" >- "push %%"REG_BP" \n\t" >+ "push %%"REG_rBP" \n\t" > YSCALEYUV2RGB1(%%REGBP, %5) > "pcmpeqd %%mm7, %%mm7 \n\t" > WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) >- "pop %%"REG_BP" \n\t" >+ "pop %%"REG_rBP" \n\t" > "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" > :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), > "a" (&c->redDither) >@@ -1128,11 +1128,11 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0, > __asm__ volatile( > "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" > "mov %4, %%"REG_b" \n\t" >- "push %%"REG_BP" \n\t" >+ "push %%"REG_rBP" \n\t" > YSCALEYUV2RGB1b(%%REGBP, %5) > YSCALEYUV2RGB1_ALPHA(%%REGBP) > WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) >- "pop %%"REG_BP" \n\t" >+ "pop %%"REG_rBP" \n\t" > "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" > :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest), > "a" (&c->redDither) >@@ -1141,11 +1141,11 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0, > __asm__ volatile( > "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" > "mov %4, %%"REG_b" \n\t" >- "push %%"REG_BP" \n\t" >+ "push %%"REG_rBP" \n\t" > YSCALEYUV2RGB1b(%%REGBP, %5) > "pcmpeqd %%mm7, %%mm7 \n\t" > WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) >- "pop %%"REG_BP" \n\t" >+ "pop %%"REG_rBP" \n\t" > "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" > :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), > "a" (&c->redDither) >@@ -1166,11 +1166,11 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0, > __asm__ volatile( > "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" > "mov %4, %%"REG_b" \n\t" >- "push %%"REG_BP" \n\t" >+ "push %%"REG_rBP" \n\t" > YSCALEYUV2RGB1(%%REGBP, %5) > "pxor %%mm7, %%mm7 \n\t" > WRITEBGR24(%%REGb, 8280(%5), %%REGBP) >- "pop %%"REG_BP" \n\t" >+ "pop %%"REG_rBP" \n\t" > "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" > :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), > "a" (&c->redDither) >@@ -1179,11 +1179,11 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0, > __asm__ volatile( > "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" > "mov %4, %%"REG_b" \n\t" >- "push %%"REG_BP" \n\t" >+ "push %%"REG_rBP" \n\t" > YSCALEYUV2RGB1b(%%REGBP, %5) > "pxor %%mm7, %%mm7 \n\t" > WRITEBGR24(%%REGb, 8280(%5), %%REGBP) >- "pop %%"REG_BP" \n\t" >+ "pop %%"REG_rBP" \n\t" > "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" > :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), > "a" (&c->redDither) >@@ -1203,7 +1203,7 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0, > __asm__ volatile( > "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" > "mov %4, %%"REG_b" \n\t" >- "push %%"REG_BP" \n\t" >+ "push %%"REG_rBP" \n\t" > YSCALEYUV2RGB1(%%REGBP, %5) > "pxor %%mm7, %%mm7 \n\t" > /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ >@@ -1213,7 +1213,7 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0, > "paddusb "RED_DITHER"(%5), %%mm5 \n\t" > #endif > WRITERGB15(%%REGb, 8280(%5), %%REGBP) >- "pop %%"REG_BP" \n\t" >+ "pop %%"REG_rBP" \n\t" > "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" > :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), > "a" (&c->redDither) >@@ -1222,7 +1222,7 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0, > __asm__ volatile( > "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" > "mov %4, %%"REG_b" \n\t" >- "push %%"REG_BP" \n\t" >+ "push %%"REG_rBP" \n\t" > YSCALEYUV2RGB1b(%%REGBP, %5) > "pxor %%mm7, %%mm7 \n\t" > /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ >@@ -1232,7 +1232,7 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0, > "paddusb "RED_DITHER"(%5), %%mm5 \n\t" > #endif > WRITERGB15(%%REGb, 8280(%5), %%REGBP) >- "pop %%"REG_BP" \n\t" >+ "pop %%"REG_rBP" \n\t" > "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" > :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), > "a" (&c->redDither) >@@ -1252,7 +1252,7 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0, > __asm__ volatile( > "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" > "mov %4, %%"REG_b" \n\t" >- "push %%"REG_BP" \n\t" >+ "push %%"REG_rBP" \n\t" > YSCALEYUV2RGB1(%%REGBP, %5) > "pxor %%mm7, %%mm7 \n\t" > /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ >@@ -1262,7 +1262,7 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0, > "paddusb "RED_DITHER"(%5), %%mm5 \n\t" > #endif > WRITERGB16(%%REGb, 8280(%5), %%REGBP) >- "pop %%"REG_BP" \n\t" >+ "pop %%"REG_rBP" \n\t" > "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" > :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), > "a" (&c->redDither) >@@ -1271,7 +1271,7 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0, > __asm__ volatile( > "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" > "mov %4, %%"REG_b" \n\t" >- "push %%"REG_BP" \n\t" >+ "push %%"REG_rBP" \n\t" > YSCALEYUV2RGB1b(%%REGBP, %5) > "pxor %%mm7, %%mm7 \n\t" > /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ >@@ -1281,7 +1281,7 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0, > "paddusb "RED_DITHER"(%5), %%mm5 \n\t" > #endif > WRITERGB16(%%REGb, 8280(%5), %%REGBP) >- "pop %%"REG_BP" \n\t" >+ "pop %%"REG_rBP" \n\t" > "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" > :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), > "a" (&c->redDither) >@@ -1338,10 +1338,10 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0, > __asm__ volatile( > "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" > "mov %4, %%"REG_b" \n\t" >- "push %%"REG_BP" \n\t" >+ "push %%"REG_rBP" \n\t" > YSCALEYUV2PACKED1(%%REGBP, %5) > WRITEYUY2(%%REGb, 8280(%5), %%REGBP) >- "pop %%"REG_BP" \n\t" >+ "pop %%"REG_rBP" \n\t" > "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" > :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), > "a" (&c->redDither) >@@ -1350,10 +1350,10 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0, > __asm__ volatile( > "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" > "mov %4, %%"REG_b" \n\t" >- "push %%"REG_BP" \n\t" >+ "push %%"REG_rBP" \n\t" > YSCALEYUV2PACKED1b(%%REGBP, %5) > WRITEYUY2(%%REGb, 8280(%5), %%REGBP) >- "pop %%"REG_BP" \n\t" >+ "pop %%"REG_rBP" \n\t" > "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" > :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), > "a" (&c->redDither) >@@ -1510,7 +1510,7 @@ static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, > { > int32_t *filterPos = c->hLumFilterPos; > int16_t *filter = c->hLumFilter; >- void *mmx2FilterCode= c->lumMmx2FilterCode; >+ x86_native_reg mmx2FilterCode = (uintptr_t)c->lumMmx2FilterCode; > int i; > #if defined(PIC) > uint64_t ebxsave; >-- >1.8.1.2 >
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 452482
:
335800
|
335802
| 337724 |
337730