From c9e46cbb8fab6a655b6da6e8b53db3e5734a40ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matthias=20R=C3=A4ncker?= Date: Sun, 3 Feb 2013 00:52:49 +0100 Subject: [PATCH] add x32 abi support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Matthias Räncker --- configure | 14 +++- libavcodec/x86/cabac.h | 2 +- libavcodec/x86/fft.asm | 44 +++++-------- libavcodec/x86/fmtconvert.asm | 32 ++++----- libavcodec/x86/h264_idct.asm | 20 +++--- libavcodec/x86/h264_idct_10bit.asm | 6 +- libavcodec/x86/mlpdsp.c | 4 +- libavcodec/x86/videodsp.asm | 40 +++++------ libavresample/x86/audio_convert.asm | 128 ++++++++++++++++++------------------ libavresample/x86/audio_mix.asm | 48 +++++++------- libavutil/x86/asm.h | 28 +++++++- libavutil/x86/cpu.c | 4 +- libavutil/x86/x86inc.asm | 34 +++++++++- libswscale/x86/output.asm | 4 +- libswscale/x86/swscale_template.c | 80 +++++++++++----------- 15 files changed, 271 insertions(+), 217 deletions(-) diff --git a/configure b/configure index e87a326..a3337fe 100755 --- a/configure +++ b/configure @@ -1112,6 +1112,8 @@ ARCH_LIST=' x86 x86_32 x86_64 + x86_64_x32 + x86_64_x64 ' ARCH_EXT_LIST_ARM=' @@ -2808,7 +2810,14 @@ case "$arch" in spic=$shared ;; x86) - check_64bit x86_32 x86_64 'sizeof(void *) > 4' + check_64bit x86_32 'x86_64 x86_64_x64' 'sizeof(void *) > 4' + if test "$subarch" = "x86_32"; then + check_64bit x86_32 'x86_64 x86_64_x32' ' + #ifdef __x86_64__ + 1 + #endif + ' + fi if test "$subarch" = "x86_64"; then spic=$shared fi @@ -3274,7 +3283,8 @@ EOF if ! disabled_any asm mmx yasm; then if check_cmd $yasmexe --version; then - enabled x86_64 && yasm_extra="-m amd64" + enabled x86_64_x64 && yasm_extra="-m amd64" + enabled x86_64_x32 && yasm_extra="-m x32" yasm_debug="-g dwarf2" elif check_cmd nasm -v; then yasmexe=nasm diff --git a/libavcodec/x86/cabac.h b/libavcodec/x86/cabac.h index a74cf0b..b7ab738 100644 --- a/libavcodec/x86/cabac.h +++ b/libavcodec/x86/cabac.h @@ -30,7 +30,7 @@ #if HAVE_INLINE_ASM #ifdef BROKEN_RELOCATIONS -#define TABLES_ARG , "r"(tables) +#define TABLES_ARG , "r"((x86_native_reg)(uintptr_t)tables) #if HAVE_FAST_CMOV #define BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \ diff --git a/libavcodec/x86/fft.asm b/libavcodec/x86/fft.asm index c87752b..8c5eda8 100644 --- a/libavcodec/x86/fft.asm +++ b/libavcodec/x86/fft.asm @@ -30,25 +30,19 @@ %include "libavutil/x86/x86util.asm" -%if ARCH_X86_64 -%define pointer resq -%else -%define pointer resd -%endif - struc FFTContext .nbits: resd 1 .reverse: resd 1 - .revtab: pointer 1 - .tmpbuf: pointer 1 + .revtab: resp 1 + .tmpbuf: resp 1 .mdctsize: resd 1 .mdctbits: resd 1 - .tcos: pointer 1 - .tsin: pointer 1 - .fftperm: pointer 1 - .fftcalc: pointer 1 - .imdctcalc:pointer 1 - .imdcthalf:pointer 1 + .tcos: resp 1 + .tsin: resp 1 + .fftperm: resp 1 + .fftcalc: resp 1 + .imdctcalc:resp 1 + .imdcthalf:resp 1 endstruc SECTION_RODATA @@ -78,12 +72,6 @@ cextern cos_ %+ i %assign i i<<1 %endrep -%if ARCH_X86_64 - %define pointer dq -%else - %define pointer dd -%endif - %macro IF0 1+ %endmacro %macro IF1 1+ @@ -527,7 +515,7 @@ DEFINE_ARGS zc, w, n, o1, o3 %macro FFT_DISPATCH 2; clobbers 5 GPRs, 8 XMMs lea r2, [dispatch_tab%1] - mov r2, [r2 + (%2q-2)*gprsize] + mov r2p, [r2 + (%2q-2)*ptrsize] %ifdef PIC lea r3, [$$] add r2, r3 @@ -623,8 +611,8 @@ INIT_XMM sse FFT_CALC_FUNC cglobal fft_permute, 2,7,1 - mov r4, [r0 + FFTContext.revtab] - mov r5, [r0 + FFTContext.tmpbuf] + mov r4p, [r0 + FFTContext.revtab] + mov r5p, [r0 + FFTContext.tmpbuf] mov ecx, [r0 + FFTContext.nbits] mov r2, 1 shl r2, cl @@ -658,7 +646,7 @@ cglobal fft_permute, 2,7,1 %macro IMDCT_CALC_FUNC 0 cglobal imdct_calc, 3,5,3 mov r3d, [r0 + FFTContext.mdctsize] - mov r4, [r0 + FFTContext.imdcthalf] + mov r4p, [r0 + FFTContext.imdcthalf] add r1, r3 PUSH r3 PUSH r1 @@ -773,7 +761,7 @@ fft %+ n %+ fullsuffix: %undef n align 8 -dispatch_tab %+ fullsuffix: pointer list_of_fft +dispatch_tab %+ fullsuffix: dp list_of_fft %endmacro ; DECL_FFT INIT_YMM avx @@ -967,8 +955,8 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i mov r3d, [r0+FFTContext.mdctsize] add r2, r3 shr r3, 1 - mov rtcos, [r0+FFTContext.tcos] - mov rtsin, [r0+FFTContext.tsin] + mov preg(rtcos), [r0+FFTContext.tcos] + mov preg(rtsin), [r0+FFTContext.tsin] add rtcos, r3 add rtsin, r3 %if ARCH_X86_64 == 0 @@ -976,7 +964,7 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i push rtsin %endif shr r3, 1 - mov rrevtab, [r0+FFTContext.revtab] + mov preg(rrevtab), [r0+FFTContext.revtab] add rrevtab, r3 %if ARCH_X86_64 == 0 push rrevtab diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm index 8267bd4..5fa1301 100644 --- a/libavcodec/x86/fmtconvert.asm +++ b/libavcodec/x86/fmtconvert.asm @@ -201,8 +201,8 @@ FLOAT_TO_INT16_STEP 0 %macro FLOAT_TO_INT16_INTERLEAVE2 0 cglobal float_to_int16_interleave2, 3, 4, 2, dst, src0, src1, len lea lenq, [4*r2q] - mov src1q, [src0q+gprsize] - mov src0q, [src0q] + mov src1p, [src0q+ptrsize] + mov src0p, [src0q] add dstq, lenq add src0q, lenq add src1q, lenq @@ -251,12 +251,12 @@ cglobal float_to_int16_interleave6, 2, 8, 0, dst, src, src1, src2, src3, src4, s %else %define lend dword r2m %endif - mov src1q, [srcq+1*gprsize] - mov src2q, [srcq+2*gprsize] - mov src3q, [srcq+3*gprsize] - mov src4q, [srcq+4*gprsize] - mov src5q, [srcq+5*gprsize] - mov srcq, [srcq] + mov src1p, [srcq+1*ptrsize] + mov src2p, [srcq+2*ptrsize] + mov src3p, [srcq+3*ptrsize] + mov src4p, [srcq+4*ptrsize] + mov src5p, [srcq+5*ptrsize] + mov srcp, [srcq] sub src1q, srcq sub src2q, srcq sub src3q, srcq @@ -309,12 +309,12 @@ cglobal float_interleave6, 2, 8, %1, dst, src, src1, src2, src3, src4, src5, len %else %define lend dword r2m %endif - mov src1q, [srcq+1*gprsize] - mov src2q, [srcq+2*gprsize] - mov src3q, [srcq+3*gprsize] - mov src4q, [srcq+4*gprsize] - mov src5q, [srcq+5*gprsize] - mov srcq, [srcq] + mov src1p, [srcq+1*ptrsize] + mov src2p, [srcq+2*ptrsize] + mov src3p, [srcq+3*ptrsize] + mov src4p, [srcq+4*ptrsize] + mov src5p, [srcq+5*ptrsize] + mov srcp, [srcq] sub src1q, srcq sub src2q, srcq sub src3q, srcq @@ -387,8 +387,8 @@ FLOAT_INTERLEAVE6 7 %macro FLOAT_INTERLEAVE2 1 cglobal float_interleave2, 3, 4, %1, dst, src, len, src1 - mov src1q, [srcq+gprsize] - mov srcq, [srcq ] + mov src1p, [srcq+ptrsize] + mov srcp, [srcq ] sub src1q, srcq .loop: mova m0, [srcq ] diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm index 30cecd9..7486ac8 100644 --- a/libavcodec/x86/h264_idct.asm +++ b/libavcodec/x86/h264_idct.asm @@ -619,9 +619,9 @@ cglobal h264_idct_add8_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, mov r5, 32 add r2, 384 %if ARCH_X86_64 - add dst2q, gprsize + add dst2q, ptrsize %else - add r0mp, gprsize + add r0mp, ptrsize %endif call h264_idct_add8_mmx_plane RET @@ -634,7 +634,7 @@ h264_idct_add8_mmxext_plane: jz .try_dc %if ARCH_X86_64 mov r0d, dword [r1+r5*4] - add r0, [dst2q] + add r0p, [dst2q] %else mov r0, r1m ; XXX r1m here is actually r0m of the calling func mov r0, [r0] @@ -653,7 +653,7 @@ h264_idct_add8_mmxext_plane: DC_ADD_MMXEXT_INIT r2, r3, r6 %if ARCH_X86_64 mov r0d, dword [r1+r5*4] - add r0, [dst2q] + add r0p, [dst2q] %else mov r0, r1m ; XXX r1m here is actually r0m of the calling func mov r0, [r0] @@ -683,9 +683,9 @@ cglobal h264_idct_add8_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, mov r5, 32 add r2, 384 %if ARCH_X86_64 - add dst2q, gprsize + add dst2q, ptrsize %else - add r0mp, gprsize + add r0mp, ptrsize %endif call h264_idct_add8_mmxext_plane RET @@ -814,7 +814,7 @@ cglobal h264_idct_add16intra_8, 5, 7 + ARCH_X86_64, 8 jz .try%1dc %if ARCH_X86_64 mov r0d, dword [r1+(%1&1)*8+64*(1+(%1>>1))] - add r0, [r7] + add r0p, [r7] %else mov r0, r0m mov r0, [r0] @@ -828,7 +828,7 @@ cglobal h264_idct_add16intra_8, 5, 7 + ARCH_X86_64, 8 jz .cycle%1end %if ARCH_X86_64 mov r0d, dword [r1+(%1&1)*8+64*(1+(%1>>1))] - add r0, [r7] + add r0p, [r7] %else mov r0, r0m mov r0, [r0] @@ -853,9 +853,9 @@ cglobal h264_idct_add8_8, 5, 7 + ARCH_X86_64, 8 add8_sse2_cycle 0, 0x34 add8_sse2_cycle 1, 0x3c %if ARCH_X86_64 - add r7, gprsize + add r7, ptrsize %else - add r0mp, gprsize + add r0mp, ptrsize %endif add8_sse2_cycle 2, 0x5c add8_sse2_cycle 3, 0x64 diff --git a/libavcodec/x86/h264_idct_10bit.asm b/libavcodec/x86/h264_idct_10bit.asm index 51965f0..4cf5cf7 100644 --- a/libavcodec/x86/h264_idct_10bit.asm +++ b/libavcodec/x86/h264_idct_10bit.asm @@ -291,15 +291,15 @@ cglobal h264_idct_add8_10,5,8,7 mov r7, r0 %endif add r2, 1024 - mov r0, [r0] + mov r0p, [r0] ADD16_OP_INTRA 16, 4+ 6*8 ADD16_OP_INTRA 18, 4+ 7*8 add r2, 1024-128*2 %if ARCH_X86_64 - mov r0, [r7+gprsize] + mov r0p, [r7+ptrsize] %else mov r0, r0m - mov r0, [r0+gprsize] + mov r0, [r0+ptrsize] %endif ADD16_OP_INTRA 32, 4+11*8 ADD16_OP_INTRA 34, 4+12*8 diff --git a/libavcodec/x86/mlpdsp.c b/libavcodec/x86/mlpdsp.c index a18e9fa..f9b98c3 100644 --- a/libavcodec/x86/mlpdsp.c +++ b/libavcodec/x86/mlpdsp.c @@ -157,8 +157,8 @@ static void mlp_filter_channel_x86(int32_t *state, const int32_t *coeff, /* 2*/"+r"(sample_buffer), #if ARCH_X86_64 /* 3*/"+r"(blocksize) - : /* 4*/"r"((x86_reg)mask), /* 5*/"r"(firjump), - /* 6*/"r"(iirjump) , /* 7*/"c"(filter_shift) + : /* 4*/"r"((x86_native_reg)mask), /* 5*/"r"((x86_native_reg)(uintptr_t)firjump), + /* 6*/"r"((x86_native_reg)(uintptr_t)iirjump), /* 7*/"c"(filter_shift) , /* 8*/"r"((int64_t)coeff[0]) , /* 9*/"r"((int64_t)coeff[1]) , /*10*/"r"((int64_t)coeff[2]) diff --git a/libavcodec/x86/videodsp.asm b/libavcodec/x86/videodsp.asm index 19b910b..72b4cea 100644 --- a/libavcodec/x86/videodsp.asm +++ b/libavcodec/x86/videodsp.asm @@ -48,13 +48,13 @@ cglobal emu_edge_core, 6, 9, 1 %else %define w_reg r6 cglobal emu_edge_core, 2, 7, 0 - mov r4, r4m ; end_y - mov r5, r5m ; block_h + mov r4p, r4m ; end_y + mov r5p, r5m ; block_h %endif ; start with vertical extend (top/bottom) and body pixel copy - mov w_reg, r7m - sub w_reg, r6m ; w = start_x - end_x + mov preg(w_reg), r7m + sub preg(w_reg), r6m ; w = start_x - end_x sub r5, r4 %if ARCH_X86_64 sub r4, r3 @@ -77,7 +77,7 @@ cglobal emu_edge_core, 2, 7, 0 .v_extend_end: ; horizontal extend (left/right) - mov w_reg, r6m ; start_x + mov preg(w_reg), r6m ; start_x sub r0, w_reg %if ARCH_X86_64 mov r3, r0 ; backup of buf+block_h*linesize @@ -111,8 +111,8 @@ cglobal emu_edge_core, 2, 7, 0 mov r0, r0m mov r5, r5m %endif - mov w_reg, r7m ; end_x - mov r1, r8m ; block_w + mov preg(w_reg), r7m ; end_x + mov r1p, r8m ; block_w mov r4, r1 sub r1, w_reg jz .h_extend_end ; if (end_x == block_w) goto h_extend_end @@ -293,7 +293,7 @@ ALIGN 128 READ_NUM_BYTES top, %%n ; read bytes .emuedge_extend_top_ %+ %%n %+ _loop: ; do { WRITE_NUM_BYTES top, %%n ; write bytes - add r0 , r2 ; dst += linesize + add r0p , r2p ; dst += linesize %if ARCH_X86_64 dec r3d %else ; ARCH_X86_32 @@ -305,19 +305,19 @@ ALIGN 128 .emuedge_copy_body_ %+ %%n %+ _loop: ; do { READ_NUM_BYTES body, %%n ; read bytes WRITE_NUM_BYTES body, %%n ; write bytes - add r0 , r2 ; dst += linesize - add r1 , r2 ; src += linesize + add r0p , r2p ; dst += linesize + add r1p , r2p ; src += linesize dec r4d jnz .emuedge_copy_body_ %+ %%n %+ _loop ; } while (--end_y) ; copy bottom pixels test r5 , r5 ; if (!block_h) jz .emuedge_v_extend_end_ %+ %%n ; goto end - sub r1 , r2 ; src -= linesize + sub r1p , r2p ; src -= linesize READ_NUM_BYTES bottom, %%n ; read bytes .emuedge_extend_bottom_ %+ %%n %+ _loop: ; do { WRITE_NUM_BYTES bottom, %%n ; write bytes - add r0 , r2 ; dst += linesize + add r0p , r2p ; dst += linesize dec r5d jnz .emuedge_extend_bottom_ %+ %%n %+ _loop ; } while (--block_h) @@ -379,7 +379,7 @@ ALIGN 128 %rep 11 ALIGN 64 .emuedge_extend_left_ %+ %%n: ; do { - sub r0, r2 ; dst -= linesize + sub r0p, r2p ; dst -= linesize READ_V_PIXEL %%n, [r0+r1] ; read pixels WRITE_V_PIXEL %%n, r0 ; write pixels dec r5 @@ -400,7 +400,7 @@ ALIGN 64 ALIGN 64 .emuedge_extend_right_ %+ %%n: ; do { %if ARCH_X86_64 - sub r3, r2 ; dst -= linesize + sub r3p, r2p ; dst -= linesize READ_V_PIXEL %%n, [r3+w_reg-1] ; read pixels WRITE_V_PIXEL %%n, r3+r4-%%n ; write pixels dec r8 @@ -450,7 +450,7 @@ ALIGN 64 %macro V_COPY_ROW 2 %ifidn %1, bottom - sub r1, linesize + sub r1p, linesize %endif .%1_copy_loop: xor cnt_reg, cnt_reg @@ -460,7 +460,7 @@ ALIGN 64 %else ; sse V_COPY_NPX %1, xmm0, movups, 16, 0xFFFFFFF0 %if ARCH_X86_64 -%define linesize r2 +%define linesize r2p V_COPY_NPX %1, rax , mov, 8 %else ; ARCH_X86_32 %define linesize r2m @@ -472,9 +472,9 @@ ALIGN 64 V_COPY_NPX %1, vall, mov, 1 mov w_reg, cnt_reg %ifidn %1, body - add r1, linesize + add r1p, linesize %endif - add r0, linesize + add r0p, linesize dec %2 jnz .%1_copy_loop %endmacro @@ -521,7 +521,7 @@ ALIGN 64 .slow_left_extend_loop: ; r0=buf+block_h*linesize,r2=linesize,r6(64)/r3(32)=val,r5=block_h,r4=cntr,r7/r6=start_x mov r4, 8 - sub r0, linesize + sub r0p, linesize READ_V_PIXEL 8, [r0+w_reg] .left_extend_8px_loop: movq [r0+r4-8], mm0 @@ -557,7 +557,7 @@ ALIGN 64 %define bh_reg r5 %endif lea r1, [r4-8] - sub buf_reg, linesize + sub preg(buf_reg), linesize READ_V_PIXEL 8, [buf_reg+w_reg-1] .right_extend_8px_loop: movq [buf_reg+r1], mm0 diff --git a/libavresample/x86/audio_convert.asm b/libavresample/x86/audio_convert.asm index 1af1429..b140aca 100644 --- a/libavresample/x86/audio_convert.asm +++ b/libavresample/x86/audio_convert.asm @@ -236,8 +236,8 @@ CONV_FLT_TO_S32 %macro CONV_S16P_TO_S16_2CH 0 cglobal conv_s16p_to_s16_2ch, 3,4,5, dst, src0, len, src1 - mov src1q, [src0q+gprsize] - mov src0q, [src0q ] + mov src1p, [src0q+ptrsize] + mov src0p, [src0q ] lea lenq, [2*lend] add src0q, lenq add src1q, lenq @@ -285,12 +285,12 @@ cglobal conv_s16p_to_s16_6ch, 3,8,7, dst, src0, len, src1, src2, src3, src4, src cglobal conv_s16p_to_s16_6ch, 2,7,7, dst, src0, src1, src2, src3, src4, src5 %define lend dword r2m %endif - mov src1q, [src0q+1*gprsize] - mov src2q, [src0q+2*gprsize] - mov src3q, [src0q+3*gprsize] - mov src4q, [src0q+4*gprsize] - mov src5q, [src0q+5*gprsize] - mov src0q, [src0q] + mov src1p, [src0q+1*ptrsize] + mov src2p, [src0q+2*ptrsize] + mov src3p, [src0q+3*ptrsize] + mov src4p, [src0q+4*ptrsize] + mov src5p, [src0q+5*ptrsize] + mov src0p, [src0q] sub src1q, src0q sub src2q, src0q sub src3q, src0q @@ -393,8 +393,8 @@ CONV_S16P_TO_S16_6CH %macro CONV_S16P_TO_FLT_2CH 0 cglobal conv_s16p_to_flt_2ch, 3,4,6, dst, src0, len, src1 lea lenq, [2*lend] - mov src1q, [src0q+gprsize] - mov src0q, [src0q ] + mov src1p, [src0q+ptrsize] + mov src0p, [src0q ] lea dstq, [dstq+4*lenq] add src0q, lenq add src1q, lenq @@ -444,12 +444,12 @@ cglobal conv_s16p_to_flt_6ch, 3,8,8, dst, src, len, src1, src2, src3, src4, src5 cglobal conv_s16p_to_flt_6ch, 2,7,8, dst, src, src1, src2, src3, src4, src5 %define lend dword r2m %endif - mov src1q, [srcq+1*gprsize] - mov src2q, [srcq+2*gprsize] - mov src3q, [srcq+3*gprsize] - mov src4q, [srcq+4*gprsize] - mov src5q, [srcq+5*gprsize] - mov srcq, [srcq] + mov src1p, [srcq+1*ptrsize] + mov src2p, [srcq+2*ptrsize] + mov src3p, [srcq+3*ptrsize] + mov src4p, [srcq+4*ptrsize] + mov src5p, [srcq+5*ptrsize] + mov srcp, [srcq] sub src1q, srcq sub src2q, srcq sub src3q, srcq @@ -542,8 +542,8 @@ CONV_S16P_TO_FLT_6CH %macro CONV_FLTP_TO_S16_2CH 0 cglobal conv_fltp_to_s16_2ch, 3,4,3, dst, src0, len, src1 lea lenq, [4*lend] - mov src1q, [src0q+gprsize] - mov src0q, [src0q ] + mov src1p, [src0q+ptrsize] + mov src0p, [src0q ] add dstq, lenq add src0q, lenq add src1q, lenq @@ -588,12 +588,12 @@ cglobal conv_fltp_to_s16_6ch, 3,8,7, dst, src, len, src1, src2, src3, src4, src5 cglobal conv_fltp_to_s16_6ch, 2,7,7, dst, src, src1, src2, src3, src4, src5 %define lend dword r2m %endif - mov src1q, [srcq+1*gprsize] - mov src2q, [srcq+2*gprsize] - mov src3q, [srcq+3*gprsize] - mov src4q, [srcq+4*gprsize] - mov src5q, [srcq+5*gprsize] - mov srcq, [srcq] + mov src1p, [srcq+1*ptrsize] + mov src2p, [srcq+2*ptrsize] + mov src3p, [srcq+3*ptrsize] + mov src4p, [srcq+4*ptrsize] + mov src5p, [srcq+5*ptrsize] + mov srcp, [srcq] sub src1q, srcq sub src2q, srcq sub src3q, srcq @@ -695,8 +695,8 @@ CONV_FLTP_TO_S16_6CH %macro CONV_FLTP_TO_FLT_2CH 0 cglobal conv_fltp_to_flt_2ch, 3,4,5, dst, src0, len, src1 - mov src1q, [src0q+gprsize] - mov src0q, [src0q] + mov src1p, [src0q+ptrsize] + mov src0p, [src0q] lea lenq, [4*lend] add src0q, lenq add src1q, lenq @@ -735,12 +735,12 @@ cglobal conv_fltp_to_flt_6ch, 2,8,7, dst, src, src1, src2, src3, src4, src5, len %else %define lend dword r2m %endif - mov src1q, [srcq+1*gprsize] - mov src2q, [srcq+2*gprsize] - mov src3q, [srcq+3*gprsize] - mov src4q, [srcq+4*gprsize] - mov src5q, [srcq+5*gprsize] - mov srcq, [srcq] + mov src1p, [srcq+1*ptrsize] + mov src2p, [srcq+2*ptrsize] + mov src3p, [srcq+3*ptrsize] + mov src4p, [srcq+4*ptrsize] + mov src5p, [srcq+5*ptrsize] + mov srcp, [srcq] sub src1q, srcq sub src2q, srcq sub src3q, srcq @@ -810,8 +810,8 @@ CONV_FLTP_TO_FLT_6CH %macro CONV_S16_TO_S16P_2CH 0 cglobal conv_s16_to_s16p_2ch, 3,4,4, dst0, src, len, dst1 lea lenq, [2*lend] - mov dst1q, [dst0q+gprsize] - mov dst0q, [dst0q ] + mov dst1p, [dst0q+ptrsize] + mov dst0p, [dst0q ] lea srcq, [srcq+2*lenq] add dst0q, lenq add dst1q, lenq @@ -861,12 +861,12 @@ cglobal conv_s16_to_s16p_6ch, 3,8,5, dst, src, len, dst1, dst2, dst3, dst4, dst5 cglobal conv_s16_to_s16p_6ch, 2,7,5, dst, src, dst1, dst2, dst3, dst4, dst5 %define lend dword r2m %endif - mov dst1q, [dstq+ gprsize] - mov dst2q, [dstq+2*gprsize] - mov dst3q, [dstq+3*gprsize] - mov dst4q, [dstq+4*gprsize] - mov dst5q, [dstq+5*gprsize] - mov dstq, [dstq ] + mov dst1p, [dstq+ ptrsize] + mov dst2p, [dstq+2*ptrsize] + mov dst3p, [dstq+3*ptrsize] + mov dst4p, [dstq+4*ptrsize] + mov dst5p, [dstq+5*ptrsize] + mov dstp, [dstq ] sub dst1q, dstq sub dst2q, dstq sub dst3q, dstq @@ -914,8 +914,8 @@ CONV_S16_TO_S16P_6CH %macro CONV_S16_TO_FLTP_2CH 0 cglobal conv_s16_to_fltp_2ch, 3,4,5, dst0, src, len, dst1 lea lenq, [4*lend] - mov dst1q, [dst0q+gprsize] - mov dst0q, [dst0q ] + mov dst1p, [dst0q+ptrsize] + mov dst0p, [dst0q ] add srcq, lenq add dst0q, lenq add dst1q, lenq @@ -954,12 +954,12 @@ cglobal conv_s16_to_fltp_6ch, 3,8,7, dst, src, len, dst1, dst2, dst3, dst4, dst5 cglobal conv_s16_to_fltp_6ch, 2,7,7, dst, src, dst1, dst2, dst3, dst4, dst5 %define lend dword r2m %endif - mov dst1q, [dstq+ gprsize] - mov dst2q, [dstq+2*gprsize] - mov dst3q, [dstq+3*gprsize] - mov dst4q, [dstq+4*gprsize] - mov dst5q, [dstq+5*gprsize] - mov dstq, [dstq ] + mov dst1p, [dstq+ ptrsize] + mov dst2p, [dstq+2*ptrsize] + mov dst3p, [dstq+3*ptrsize] + mov dst4p, [dstq+4*ptrsize] + mov dst5p, [dstq+5*ptrsize] + mov dstp, [dstq ] sub dst1q, dstq sub dst2q, dstq sub dst3q, dstq @@ -1029,8 +1029,8 @@ CONV_S16_TO_FLTP_6CH %macro CONV_FLT_TO_S16P_2CH 0 cglobal conv_flt_to_s16p_2ch, 3,4,6, dst0, src, len, dst1 lea lenq, [2*lend] - mov dst1q, [dst0q+gprsize] - mov dst0q, [dst0q ] + mov dst1p, [dst0q+ptrsize] + mov dst0p, [dst0q ] lea srcq, [srcq+4*lenq] add dst0q, lenq add dst1q, lenq @@ -1077,12 +1077,12 @@ cglobal conv_flt_to_s16p_6ch, 3,8,7, dst, src, len, dst1, dst2, dst3, dst4, dst5 cglobal conv_flt_to_s16p_6ch, 2,7,7, dst, src, dst1, dst2, dst3, dst4, dst5 %define lend dword r2m %endif - mov dst1q, [dstq+ gprsize] - mov dst2q, [dstq+2*gprsize] - mov dst3q, [dstq+3*gprsize] - mov dst4q, [dstq+4*gprsize] - mov dst5q, [dstq+5*gprsize] - mov dstq, [dstq ] + mov dst1p, [dstq+ ptrsize] + mov dst2p, [dstq+2*ptrsize] + mov dst3p, [dstq+3*ptrsize] + mov dst4p, [dstq+4*ptrsize] + mov dst5p, [dstq+5*ptrsize] + mov dstp, [dstq ] sub dst1q, dstq sub dst2q, dstq sub dst3q, dstq @@ -1143,8 +1143,8 @@ CONV_FLT_TO_S16P_6CH %macro CONV_FLT_TO_FLTP_2CH 0 cglobal conv_flt_to_fltp_2ch, 3,4,3, dst0, src, len, dst1 lea lenq, [4*lend] - mov dst1q, [dst0q+gprsize] - mov dst0q, [dst0q ] + mov dst1p, [dst0q+ptrsize] + mov dst0p, [dst0q ] lea srcq, [srcq+2*lenq] add dst0q, lenq add dst1q, lenq @@ -1177,12 +1177,12 @@ cglobal conv_flt_to_fltp_6ch, 3,8,7, dst, src, len, dst1, dst2, dst3, dst4, dst5 cglobal conv_flt_to_fltp_6ch, 2,7,7, dst, src, dst1, dst2, dst3, dst4, dst5 %define lend dword r2m %endif - mov dst1q, [dstq+ gprsize] - mov dst2q, [dstq+2*gprsize] - mov dst3q, [dstq+3*gprsize] - mov dst4q, [dstq+4*gprsize] - mov dst5q, [dstq+5*gprsize] - mov dstq, [dstq ] + mov dst1p, [dstq+ ptrsize] + mov dst2p, [dstq+2*ptrsize] + mov dst3p, [dstq+3*ptrsize] + mov dst4p, [dstq+4*ptrsize] + mov dst5p, [dstq+5*ptrsize] + mov dstp, [dstq ] sub dst1q, dstq sub dst2q, dstq sub dst3q, dstq diff --git a/libavresample/x86/audio_mix.asm b/libavresample/x86/audio_mix.asm index 8a298e2..4a85adc 100644 --- a/libavresample/x86/audio_mix.asm +++ b/libavresample/x86/audio_mix.asm @@ -31,10 +31,10 @@ SECTION_TEXT %macro MIX_2_TO_1_FLTP_FLT 0 cglobal mix_2_to_1_fltp_flt, 3,4,6, src, matrix, len, src1 - mov src1q, [srcq+gprsize] - mov srcq, [srcq ] + mov src1p, [srcq+ptrsize] + mov srcp, [srcq ] sub src1q, srcq - mov matrixq, [matrixq ] + mov matrixp, [matrixq ] VBROADCASTSS m4, [matrixq ] VBROADCASTSS m5, [matrixq+4] ALIGN 16 @@ -65,10 +65,10 @@ MIX_2_TO_1_FLTP_FLT %macro MIX_2_TO_1_S16P_FLT 0 cglobal mix_2_to_1_s16p_flt, 3,4,6, src, matrix, len, src1 - mov src1q, [srcq+gprsize] - mov srcq, [srcq] + mov src1p, [srcq+ptrsize] + mov srcp, [srcq] sub src1q, srcq - mov matrixq, [matrixq ] + mov matrixp, [matrixq ] VBROADCASTSS m4, [matrixq ] VBROADCASTSS m5, [matrixq+4] ALIGN 16 @@ -109,10 +109,10 @@ MIX_2_TO_1_S16P_FLT INIT_XMM sse2 cglobal mix_2_to_1_s16p_q8, 3,4,6, src, matrix, len, src1 - mov src1q, [srcq+gprsize] - mov srcq, [srcq] + mov src1p, [srcq+ptrsize] + mov srcp, [srcq] sub src1q, srcq - mov matrixq, [matrixq] + mov matrixp, [matrixq] movd m4, [matrixq] movd m5, [matrixq] SPLATW m4, m4, 0 @@ -150,11 +150,11 @@ cglobal mix_2_to_1_s16p_q8, 3,4,6, src, matrix, len, src1 %macro MIX_1_TO_2_FLTP_FLT 0 cglobal mix_1_to_2_fltp_flt, 3,5,4, src0, matrix0, len, src1, matrix1 - mov src1q, [src0q+gprsize] - mov src0q, [src0q] + mov src1p, [src0q+ptrsize] + mov src0p, [src0q] sub src1q, src0q - mov matrix1q, [matrix0q+gprsize] - mov matrix0q, [matrix0q] + mov matrix1p, [matrix0q+ptrsize] + mov matrix0p, [matrix0q] VBROADCASTSS m2, [matrix0q] VBROADCASTSS m3, [matrix1q] ALIGN 16 @@ -182,11 +182,11 @@ MIX_1_TO_2_FLTP_FLT %macro MIX_1_TO_2_S16P_FLT 0 cglobal mix_1_to_2_s16p_flt, 3,5,6, src0, matrix0, len, src1, matrix1 - mov src1q, [src0q+gprsize] - mov src0q, [src0q] + mov src1p, [src0q+ptrsize] + mov src0p, [src0q] sub src1q, src0q - mov matrix1q, [matrix0q+gprsize] - mov matrix0q, [matrix0q] + mov matrix1p, [matrix0q+ptrsize] + mov matrix0p, [matrix0q] VBROADCASTSS m4, [matrix0q] VBROADCASTSS m5, [matrix1q] ALIGN 16 @@ -277,12 +277,12 @@ cglobal mix_%1_to_%2_%3_flt, 3,in_channels+2,needed_mmregs+matrix_elements_mm, n %endif ; load matrix pointers -%define matrix0q r1q -%define matrix1q r3q +%define matrix0q r1 +%define matrix1q r3 %if stereo - mov matrix1q, [matrix0q+gprsize] + mov preg(matrix1q), [matrix0q+ptrsize] %endif - mov matrix0q, [matrix0q] + mov preg(matrix0q), [matrix0q] ; define matrix coeff names %assign %%i 0 @@ -341,16 +341,16 @@ cglobal mix_%1_to_%2_%3_flt, 3,in_channels+2,needed_mmregs+matrix_elements_mm, n %assign %%i 1 %rep (in_channels - 1) %if ARCH_X86_32 && in_channels >= 7 && %%i >= 5 - mov src5q, [src0q+%%i*gprsize] + mov src5p, [src0q+%%i*ptrsize] add src5q, lenq mov src %+ %%i %+ m, src5q %else - mov src %+ %%i %+ q, [src0q+%%i*gprsize] + mov src %+ %%i %+ p, [src0q+%%i*ptrsize] add src %+ %%i %+ q, lenq %endif %assign %%i %%i+1 %endrep - mov src0q, [src0q] + mov src0p, [src0q] add src0q, lenq neg lenq .loop: diff --git a/libavutil/x86/asm.h b/libavutil/x86/asm.h index a43ab3c..234a1e2 100644 --- a/libavutil/x86/asm.h +++ b/libavutil/x86/asm.h @@ -24,7 +24,7 @@ #include #include "config.h" -#if ARCH_X86_64 +#if ARCH_X86_64_X64 # define OPSIZE "q" # define REG_a "rax" # define REG_b "rbx" @@ -32,6 +32,7 @@ # define REG_d "rdx" # define REG_D "rdi" # define REG_S "rsi" +# define REG_8 "r8" # define PTR_SIZE "8" typedef int64_t x86_reg; @@ -43,8 +44,9 @@ typedef int64_t x86_reg; # define REGc rcx # define REGd rdx # define REGSP rsp +# define REG8 r8 -#elif ARCH_X86_32 +#elif ARCH_X86_32 || ARCH_X86_64_X32 # define OPSIZE "l" # define REG_a "eax" @@ -53,6 +55,7 @@ typedef int64_t x86_reg; # define REG_d "edx" # define REG_D "edi" # define REG_S "esi" +# define REG_8 "r8d" # define PTR_SIZE "4" typedef int32_t x86_reg; @@ -64,10 +67,31 @@ typedef int32_t x86_reg; # define REGc ecx # define REGd edx # define REGSP esp +# define REG8 r8d #else typedef int x86_reg; #endif +#if ARCH_X86_64 +# define REG_ra "rax" +# define REG_rb "rbx" +# define REG_rc "rcx" +# define REG_rd "rdx" +# define REG_rD "rdi" +# define REG_rS "rsi" +# define REG_rBP "rbp" +typedef int64_t x86_native_reg; +#elif ARCH_X86_32 +# define REG_ra "eax" +# define REG_rb "ebx" +# define REG_rc "ecx" +# define REG_rd "edx" +# define REG_rD "edi" +# define REG_rS "esi" +# define REG_rBP "ebp" +typedef int32_t x86_native_reg; +#endif + #define HAVE_7REGS (ARCH_X86_64 || (HAVE_EBX_AVAILABLE && HAVE_EBP_AVAILABLE)) #define HAVE_6REGS (ARCH_X86_64 || (HAVE_EBX_AVAILABLE || HAVE_EBP_AVAILABLE)) diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c index 3b36fd0..0b6f5ff 100644 --- a/libavutil/x86/cpu.c +++ b/libavutil/x86/cpu.c @@ -40,9 +40,9 @@ /* ebx saving is necessary for PIC. gcc seems unable to see it alone */ #define cpuid(index, eax, ebx, ecx, edx) \ __asm__ volatile ( \ - "mov %%"REG_b", %%"REG_S" \n\t" \ + "mov %%"REG_rb", %%"REG_rS" \n\t" \ "cpuid \n\t" \ - "xchg %%"REG_b", %%"REG_S \ + "xchg %%"REG_rb", %%"REG_rS \ : "=a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx) \ : "0" (index)) diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm index 2617cdf..d1474a6 100644 --- a/libavutil/x86/x86inc.asm +++ b/libavutil/x86/x86inc.asm @@ -136,13 +136,18 @@ CPUNOP amdnop ; registers: ; rN and rNq are the native-size register holding function argument N -; rNd, rNw, rNb are dword, word, and byte size +; rNp, rNd, rNw, rNb are pointer, dword, word, and byte size ; rNh is the high 8 bits of the word size ; rNm is the original location of arg N (a register or on the stack), dword ; rNmp is native size %macro DECLARE_REG 2-3 %define r%1q %2 + %if ARCH_X86_64_X64 + %define r%1p %2 + %else + %define r%1p %2d + %endif %define r%1d %2d %define r%1w %2w %define r%1b %2b @@ -164,6 +169,12 @@ CPUNOP amdnop %macro DECLARE_REG_SIZE 3 %define r%1q r%1 %define e%1q r%1 + %if ARCH_X86_64_X64 + %define r%1p r%1 + %else + %define r%1p e%1 + %endif + %define e%1p e%1 %define r%1d e%1 %define e%1d e%1 %define r%1w %1 @@ -199,6 +210,11 @@ DECLARE_REG_SIZE bp, bpl, null %macro DECLARE_REG_TMP_SIZE 0-* %rep %0 %define t%1q t%1 %+ q + %if ARCH_X86_64_X64 + %define t%1p t%1 + %else + %define t%1p t%1 %+ d + %endif %define t%1d t%1 %+ d %define t%1w t%1 %+ w %define t%1h t%1 %+ h @@ -215,6 +231,20 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14 %define gprsize 4 %endif +%if ARCH_X86_64_X64 + %define ptrsize 8 + %define pword qword + %define dp dq + %define resp resq + %define preg(x) x +%else + %define ptrsize 4 + %define pword dword + %define dp dd + %define resp resd + %define preg(x) x %+ d +%endif + %macro PUSH 1 push %1 %ifidn rstk, rsp @@ -293,6 +323,7 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14 %assign %%i 0 %rep n_arg_names CAT_UNDEF arg_name %+ %%i, q + CAT_UNDEF arg_name %+ %%i, p CAT_UNDEF arg_name %+ %%i, d CAT_UNDEF arg_name %+ %%i, w CAT_UNDEF arg_name %+ %%i, h @@ -309,6 +340,7 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14 %assign %%i 0 %rep %0 %xdefine %1q r %+ %%i %+ q + %xdefine %1p r %+ %%i %+ p %xdefine %1d r %+ %%i %+ d %xdefine %1w r %+ %%i %+ w %xdefine %1h r %+ %%i %+ h diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm index e1ceded..c144e76 100644 --- a/libswscale/x86/output.asm +++ b/libswscale/x86/output.asm @@ -152,14 +152,14 @@ cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset movsx cntr_reg, fltsizem .filterloop_ %+ %%i: ; input pixels - mov r6, [srcq+gprsize*cntr_reg-2*gprsize] + mov r6p, [srcq+ptrsize*cntr_reg-2*ptrsize] %if %1 == 16 mova m3, [r6+r5*4] mova m5, [r6+r5*4+mmsize] %else ; %1 == 8/9/10 mova m3, [r6+r5*2] %endif ; %1 == 8/9/10/16 - mov r6, [srcq+gprsize*cntr_reg-gprsize] + mov r6p, [srcq+ptrsize*cntr_reg-ptrsize] %if %1 == 16 mova m4, [r6+r5*4] mova m6, [r6+r5*4+mmsize] diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c index d89a26f..c6535ce 100644 --- a/libswscale/x86/swscale_template.c +++ b/libswscale/x86/swscale_template.c @@ -774,12 +774,12 @@ static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2], const int16_t *abuf0 = abuf[0], *abuf1 = abuf[1]; #if ARCH_X86_64 __asm__ volatile( - YSCALEYUV2RGB(%%r8, %5) - YSCALEYUV2RGB_YA(%%r8, %5, %6, %7) + YSCALEYUV2RGB(%%REG8, %5) + YSCALEYUV2RGB_YA(%%REG8, %5, %6, %7) "psraw $3, %%mm1 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/ "psraw $3, %%mm7 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/ "packuswb %%mm7, %%mm1 \n\t" - WRITEBGR32(%4, 8280(%5), %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6) + WRITEBGR32(%4, 8280(%5), %%REG8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6) :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "r" (dest), "a" (&c->redDither), "r" (abuf0), "r" (abuf1) @@ -791,7 +791,7 @@ static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2], __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" + "push %%"REG_rBP" \n\t" YSCALEYUV2RGB(%%REGBP, %5) "push %0 \n\t" "push %1 \n\t" @@ -804,7 +804,7 @@ static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2], "pop %1 \n\t" "pop %0 \n\t" WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6) - "pop %%"REG_BP" \n\t" + "pop %%"REG_rBP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) @@ -814,11 +814,11 @@ static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2], __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" + "push %%"REG_rBP" \n\t" YSCALEYUV2RGB(%%REGBP, %5) "pcmpeqd %%mm7, %%mm7 \n\t" WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) - "pop %%"REG_BP" \n\t" + "pop %%"REG_rBP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) @@ -838,11 +838,11 @@ static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2], __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" + "push %%"REG_rBP" \n\t" YSCALEYUV2RGB(%%REGBP, %5) "pxor %%mm7, %%mm7 \n\t" WRITEBGR24(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" + "pop %%"REG_rBP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) @@ -861,7 +861,7 @@ static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2], __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" + "push %%"REG_rBP" \n\t" YSCALEYUV2RGB(%%REGBP, %5) "pxor %%mm7, %%mm7 \n\t" /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ @@ -871,7 +871,7 @@ static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2], "paddusb "RED_DITHER"(%5), %%mm5 \n\t" #endif WRITERGB15(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" + "pop %%"REG_rBP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) @@ -890,7 +890,7 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2], __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" + "push %%"REG_rBP" \n\t" YSCALEYUV2RGB(%%REGBP, %5) "pxor %%mm7, %%mm7 \n\t" /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ @@ -900,7 +900,7 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2], "paddusb "RED_DITHER"(%5), %%mm5 \n\t" #endif WRITERGB16(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" + "pop %%"REG_rBP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) @@ -959,10 +959,10 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2], __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" + "push %%"REG_rBP" \n\t" YSCALEYUV2PACKED(%%REGBP, %5) WRITEYUY2(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" + "pop %%"REG_rBP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) @@ -1101,11 +1101,11 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0, __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" + "push %%"REG_rBP" \n\t" YSCALEYUV2RGB1(%%REGBP, %5) YSCALEYUV2RGB1_ALPHA(%%REGBP) WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) - "pop %%"REG_BP" \n\t" + "pop %%"REG_rBP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) @@ -1114,11 +1114,11 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0, __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" + "push %%"REG_rBP" \n\t" YSCALEYUV2RGB1(%%REGBP, %5) "pcmpeqd %%mm7, %%mm7 \n\t" WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) - "pop %%"REG_BP" \n\t" + "pop %%"REG_rBP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) @@ -1130,11 +1130,11 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0, __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" + "push %%"REG_rBP" \n\t" YSCALEYUV2RGB1b(%%REGBP, %5) YSCALEYUV2RGB1_ALPHA(%%REGBP) WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) - "pop %%"REG_BP" \n\t" + "pop %%"REG_rBP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) @@ -1143,11 +1143,11 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0, __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" + "push %%"REG_rBP" \n\t" YSCALEYUV2RGB1b(%%REGBP, %5) "pcmpeqd %%mm7, %%mm7 \n\t" WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) - "pop %%"REG_BP" \n\t" + "pop %%"REG_rBP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) @@ -1169,11 +1169,11 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0, __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" + "push %%"REG_rBP" \n\t" YSCALEYUV2RGB1(%%REGBP, %5) "pxor %%mm7, %%mm7 \n\t" WRITEBGR24(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" + "pop %%"REG_rBP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) @@ -1183,11 +1183,11 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0, __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" + "push %%"REG_rBP" \n\t" YSCALEYUV2RGB1b(%%REGBP, %5) "pxor %%mm7, %%mm7 \n\t" WRITEBGR24(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" + "pop %%"REG_rBP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) @@ -1208,7 +1208,7 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0, __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" + "push %%"REG_rBP" \n\t" YSCALEYUV2RGB1(%%REGBP, %5) "pxor %%mm7, %%mm7 \n\t" /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ @@ -1218,7 +1218,7 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0, "paddusb "RED_DITHER"(%5), %%mm5 \n\t" #endif WRITERGB15(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" + "pop %%"REG_rBP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) @@ -1228,7 +1228,7 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0, __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" + "push %%"REG_rBP" \n\t" YSCALEYUV2RGB1b(%%REGBP, %5) "pxor %%mm7, %%mm7 \n\t" /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ @@ -1238,7 +1238,7 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0, "paddusb "RED_DITHER"(%5), %%mm5 \n\t" #endif WRITERGB15(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" + "pop %%"REG_rBP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) @@ -1259,7 +1259,7 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0, __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" + "push %%"REG_rBP" \n\t" YSCALEYUV2RGB1(%%REGBP, %5) "pxor %%mm7, %%mm7 \n\t" /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ @@ -1269,7 +1269,7 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0, "paddusb "RED_DITHER"(%5), %%mm5 \n\t" #endif WRITERGB16(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" + "pop %%"REG_rBP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) @@ -1279,7 +1279,7 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0, __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" + "push %%"REG_rBP" \n\t" YSCALEYUV2RGB1b(%%REGBP, %5) "pxor %%mm7, %%mm7 \n\t" /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ @@ -1289,7 +1289,7 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0, "paddusb "RED_DITHER"(%5), %%mm5 \n\t" #endif WRITERGB16(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" + "pop %%"REG_rBP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) @@ -1347,10 +1347,10 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0, __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" + "push %%"REG_rBP" \n\t" YSCALEYUV2PACKED1(%%REGBP, %5) WRITEYUY2(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" + "pop %%"REG_rBP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) @@ -1360,10 +1360,10 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0, __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" + "push %%"REG_rBP" \n\t" YSCALEYUV2PACKED1b(%%REGBP, %5) WRITEYUY2(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" + "pop %%"REG_rBP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) @@ -1378,7 +1378,7 @@ static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, { int32_t *filterPos = c->hLumFilterPos; int16_t *filter = c->hLumFilter; - void *mmxextFilterCode = c->lumMmxextFilterCode; + x86_native_reg mmxextFilterCode = (uintptr_t)c->lumMmxextFilterCode; int i; #if defined(PIC) uint64_t ebxsave; -- 1.8.1.2