Gentoo Websites Logo
Go to: Gentoo Home Documentation Forums Lists Bugs Planet Store Wiki Get Gentoo!
View | Details | Raw Unified | Return to bug 452482 | Differences between
and this patch

Collapse All | Expand All

(-)a/configure (-2 / +12 lines)
Lines 1112-1117 ARCH_LIST=' Link Here
1112
    x86
1112
    x86
1113
    x86_32
1113
    x86_32
1114
    x86_64
1114
    x86_64
1115
    x86_64_x32
1116
    x86_64_x64
1115
'
1117
'
1116
1118
1117
ARCH_EXT_LIST_ARM='
1119
ARCH_EXT_LIST_ARM='
Lines 2808-2814 case "$arch" in Link Here
2808
        spic=$shared
2810
        spic=$shared
2809
    ;;
2811
    ;;
2810
    x86)
2812
    x86)
2811
        check_64bit x86_32 x86_64 'sizeof(void *) > 4'
2813
        check_64bit x86_32 'x86_64 x86_64_x64' 'sizeof(void *) > 4'
2814
        if test "$subarch" = "x86_32"; then
2815
            check_64bit x86_32 'x86_64 x86_64_x32' '
2816
                #ifdef __x86_64__
2817
                1
2818
                #endif
2819
            '
2820
        fi
2812
        if test "$subarch" = "x86_64"; then
2821
        if test "$subarch" = "x86_64"; then
2813
            spic=$shared
2822
            spic=$shared
2814
        fi
2823
        fi
Lines 3274-3280 EOF Link Here
3274
3283
3275
    if ! disabled_any asm mmx yasm; then
3284
    if ! disabled_any asm mmx yasm; then
3276
        if check_cmd $yasmexe --version; then
3285
        if check_cmd $yasmexe --version; then
3277
            enabled x86_64 && yasm_extra="-m amd64"
3286
            enabled x86_64_x64 && yasm_extra="-m amd64"
3287
            enabled x86_64_x32 && yasm_extra="-m x32"
3278
            yasm_debug="-g dwarf2"
3288
            yasm_debug="-g dwarf2"
3279
        elif check_cmd nasm -v; then
3289
        elif check_cmd nasm -v; then
3280
            yasmexe=nasm
3290
            yasmexe=nasm
(-)a/libavcodec/x86/cabac.h (-1 / +1 lines)
Lines 30-36 Link Here
30
#if HAVE_INLINE_ASM
30
#if HAVE_INLINE_ASM
31
31
32
#ifdef BROKEN_RELOCATIONS
32
#ifdef BROKEN_RELOCATIONS
33
#define TABLES_ARG , "r"(tables)
33
#define TABLES_ARG , "r"((x86_native_reg)(uintptr_t)tables)
34
34
35
#if HAVE_FAST_CMOV
35
#if HAVE_FAST_CMOV
36
#define BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \
36
#define BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \
(-)a/libavcodec/x86/fft.asm (-28 / +16 lines)
Lines 30-54 Link Here
30
30
31
%include "libavutil/x86/x86util.asm"
31
%include "libavutil/x86/x86util.asm"
32
32
33
%if ARCH_X86_64
34
%define pointer resq
35
%else
36
%define pointer resd
37
%endif
38
39
struc FFTContext
33
struc FFTContext
40
    .nbits:    resd 1
34
    .nbits:    resd 1
41
    .reverse:  resd 1
35
    .reverse:  resd 1
42
    .revtab:   pointer 1
36
    .revtab:   resp 1
43
    .tmpbuf:   pointer 1
37
    .tmpbuf:   resp 1
44
    .mdctsize: resd 1
38
    .mdctsize: resd 1
45
    .mdctbits: resd 1
39
    .mdctbits: resd 1
46
    .tcos:     pointer 1
40
    .tcos:     resp 1
47
    .tsin:     pointer 1
41
    .tsin:     resp 1
48
    .fftperm:  pointer 1
42
    .fftperm:  resp 1
49
    .fftcalc:  pointer 1
43
    .fftcalc:  resp 1
50
    .imdctcalc:pointer 1
44
    .imdctcalc:resp 1
51
    .imdcthalf:pointer 1
45
    .imdcthalf:resp 1
52
endstruc
46
endstruc
53
47
54
SECTION_RODATA
48
SECTION_RODATA
Lines 78-89 cextern cos_ %+ i Link Here
78
%assign i i<<1
72
%assign i i<<1
79
%endrep
73
%endrep
80
74
81
%if ARCH_X86_64
82
    %define pointer dq
83
%else
84
    %define pointer dd
85
%endif
86
87
%macro IF0 1+
75
%macro IF0 1+
88
%endmacro
76
%endmacro
89
%macro IF1 1+
77
%macro IF1 1+
Lines 527-533 DEFINE_ARGS zc, w, n, o1, o3 Link Here
527
515
528
%macro FFT_DISPATCH 2; clobbers 5 GPRs, 8 XMMs
516
%macro FFT_DISPATCH 2; clobbers 5 GPRs, 8 XMMs
529
    lea r2, [dispatch_tab%1]
517
    lea r2, [dispatch_tab%1]
530
    mov r2, [r2 + (%2q-2)*gprsize]
518
    mov r2p, [r2 + (%2q-2)*ptrsize]
531
%ifdef PIC
519
%ifdef PIC
532
    lea r3, [$$]
520
    lea r3, [$$]
533
    add r2, r3
521
    add r2, r3
Lines 623-630 INIT_XMM sse Link Here
623
FFT_CALC_FUNC
611
FFT_CALC_FUNC
624
612
625
cglobal fft_permute, 2,7,1
613
cglobal fft_permute, 2,7,1
626
    mov     r4,  [r0 + FFTContext.revtab]
614
    mov     r4p, [r0 + FFTContext.revtab]
627
    mov     r5,  [r0 + FFTContext.tmpbuf]
615
    mov     r5p, [r0 + FFTContext.tmpbuf]
628
    mov     ecx, [r0 + FFTContext.nbits]
616
    mov     ecx, [r0 + FFTContext.nbits]
629
    mov     r2, 1
617
    mov     r2, 1
630
    shl     r2, cl
618
    shl     r2, cl
Lines 658-664 cglobal fft_permute, 2,7,1 Link Here
658
%macro IMDCT_CALC_FUNC 0
646
%macro IMDCT_CALC_FUNC 0
659
cglobal imdct_calc, 3,5,3
647
cglobal imdct_calc, 3,5,3
660
    mov     r3d, [r0 + FFTContext.mdctsize]
648
    mov     r3d, [r0 + FFTContext.mdctsize]
661
    mov     r4,  [r0 + FFTContext.imdcthalf]
649
    mov     r4p, [r0 + FFTContext.imdcthalf]
662
    add     r1,  r3
650
    add     r1,  r3
663
    PUSH    r3
651
    PUSH    r3
664
    PUSH    r1
652
    PUSH    r1
Lines 773-779 fft %+ n %+ fullsuffix: Link Here
773
%undef n
761
%undef n
774
762
775
align 8
763
align 8
776
dispatch_tab %+ fullsuffix: pointer list_of_fft
764
dispatch_tab %+ fullsuffix: dp list_of_fft
777
%endmacro ; DECL_FFT
765
%endmacro ; DECL_FFT
778
766
779
INIT_YMM avx
767
INIT_YMM avx
Lines 967-974 cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i Link Here
967
    mov   r3d, [r0+FFTContext.mdctsize]
955
    mov   r3d, [r0+FFTContext.mdctsize]
968
    add   r2, r3
956
    add   r2, r3
969
    shr   r3, 1
957
    shr   r3, 1
970
    mov   rtcos, [r0+FFTContext.tcos]
958
    mov   preg(rtcos), [r0+FFTContext.tcos]
971
    mov   rtsin, [r0+FFTContext.tsin]
959
    mov   preg(rtsin), [r0+FFTContext.tsin]
972
    add   rtcos, r3
960
    add   rtcos, r3
973
    add   rtsin, r3
961
    add   rtsin, r3
974
%if ARCH_X86_64 == 0
962
%if ARCH_X86_64 == 0
Lines 976-982 cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i Link Here
976
    push  rtsin
964
    push  rtsin
977
%endif
965
%endif
978
    shr   r3, 1
966
    shr   r3, 1
979
    mov   rrevtab, [r0+FFTContext.revtab]
967
    mov   preg(rrevtab), [r0+FFTContext.revtab]
980
    add   rrevtab, r3
968
    add   rrevtab, r3
981
%if ARCH_X86_64 == 0
969
%if ARCH_X86_64 == 0
982
    push  rrevtab
970
    push  rrevtab
(-)a/libavcodec/x86/fmtconvert.asm (-16 / +16 lines)
Lines 201-208 FLOAT_TO_INT16_STEP 0 Link Here
201
%macro FLOAT_TO_INT16_INTERLEAVE2 0
201
%macro FLOAT_TO_INT16_INTERLEAVE2 0
202
cglobal float_to_int16_interleave2, 3, 4, 2, dst, src0, src1, len
202
cglobal float_to_int16_interleave2, 3, 4, 2, dst, src0, src1, len
203
    lea      lenq, [4*r2q]
203
    lea      lenq, [4*r2q]
204
    mov     src1q, [src0q+gprsize]
204
    mov     src1p, [src0q+ptrsize]
205
    mov     src0q, [src0q]
205
    mov     src0p, [src0q]
206
    add      dstq, lenq
206
    add      dstq, lenq
207
    add     src0q, lenq
207
    add     src0q, lenq
208
    add     src1q, lenq
208
    add     src1q, lenq
Lines 251-262 cglobal float_to_int16_interleave6, 2, 8, 0, dst, src, src1, src2, src3, src4, s Link Here
251
%else
251
%else
252
    %define lend dword r2m
252
    %define lend dword r2m
253
%endif
253
%endif
254
    mov src1q, [srcq+1*gprsize]
254
    mov src1p, [srcq+1*ptrsize]
255
    mov src2q, [srcq+2*gprsize]
255
    mov src2p, [srcq+2*ptrsize]
256
    mov src3q, [srcq+3*gprsize]
256
    mov src3p, [srcq+3*ptrsize]
257
    mov src4q, [srcq+4*gprsize]
257
    mov src4p, [srcq+4*ptrsize]
258
    mov src5q, [srcq+5*gprsize]
258
    mov src5p, [srcq+5*ptrsize]
259
    mov srcq,  [srcq]
259
    mov srcp,  [srcq]
260
    sub src1q, srcq
260
    sub src1q, srcq
261
    sub src2q, srcq
261
    sub src2q, srcq
262
    sub src3q, srcq
262
    sub src3q, srcq
Lines 309-320 cglobal float_interleave6, 2, 8, %1, dst, src, src1, src2, src3, src4, src5, len Link Here
309
%else
309
%else
310
    %define lend dword r2m
310
    %define lend dword r2m
311
%endif
311
%endif
312
    mov    src1q, [srcq+1*gprsize]
312
    mov    src1p, [srcq+1*ptrsize]
313
    mov    src2q, [srcq+2*gprsize]
313
    mov    src2p, [srcq+2*ptrsize]
314
    mov    src3q, [srcq+3*gprsize]
314
    mov    src3p, [srcq+3*ptrsize]
315
    mov    src4q, [srcq+4*gprsize]
315
    mov    src4p, [srcq+4*ptrsize]
316
    mov    src5q, [srcq+5*gprsize]
316
    mov    src5p, [srcq+5*ptrsize]
317
    mov     srcq, [srcq]
317
    mov     srcp, [srcq]
318
    sub    src1q, srcq
318
    sub    src1q, srcq
319
    sub    src2q, srcq
319
    sub    src2q, srcq
320
    sub    src3q, srcq
320
    sub    src3q, srcq
Lines 387-394 FLOAT_INTERLEAVE6 7 Link Here
387
387
388
%macro FLOAT_INTERLEAVE2 1
388
%macro FLOAT_INTERLEAVE2 1
389
cglobal float_interleave2, 3, 4, %1, dst, src, len, src1
389
cglobal float_interleave2, 3, 4, %1, dst, src, len, src1
390
    mov     src1q, [srcq+gprsize]
390
    mov     src1p, [srcq+ptrsize]
391
    mov      srcq, [srcq        ]
391
    mov      srcp, [srcq        ]
392
    sub     src1q, srcq
392
    sub     src1q, srcq
393
.loop:
393
.loop:
394
    mova       m0, [srcq             ]
394
    mova       m0, [srcq             ]
(-)a/libavcodec/x86/h264_idct.asm (-10 / +10 lines)
Lines 619-627 cglobal h264_idct_add8_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, Link Here
619
    mov          r5, 32
619
    mov          r5, 32
620
    add          r2, 384
620
    add          r2, 384
621
%if ARCH_X86_64
621
%if ARCH_X86_64
622
    add       dst2q, gprsize
622
    add       dst2q, ptrsize
623
%else
623
%else
624
    add        r0mp, gprsize
624
    add        r0mp, ptrsize
625
%endif
625
%endif
626
    call         h264_idct_add8_mmx_plane
626
    call         h264_idct_add8_mmx_plane
627
    RET
627
    RET
Lines 634-640 h264_idct_add8_mmxext_plane: Link Here
634
    jz .try_dc
634
    jz .try_dc
635
%if ARCH_X86_64
635
%if ARCH_X86_64
636
    mov         r0d, dword [r1+r5*4]
636
    mov         r0d, dword [r1+r5*4]
637
    add          r0, [dst2q]
637
    add         r0p, [dst2q]
638
%else
638
%else
639
    mov          r0, r1m ; XXX r1m here is actually r0m of the calling func
639
    mov          r0, r1m ; XXX r1m here is actually r0m of the calling func
640
    mov          r0, [r0]
640
    mov          r0, [r0]
Lines 653-659 h264_idct_add8_mmxext_plane: Link Here
653
    DC_ADD_MMXEXT_INIT r2, r3, r6
653
    DC_ADD_MMXEXT_INIT r2, r3, r6
654
%if ARCH_X86_64
654
%if ARCH_X86_64
655
    mov         r0d, dword [r1+r5*4]
655
    mov         r0d, dword [r1+r5*4]
656
    add          r0, [dst2q]
656
    add         r0p, [dst2q]
657
%else
657
%else
658
    mov          r0, r1m ; XXX r1m here is actually r0m of the calling func
658
    mov          r0, r1m ; XXX r1m here is actually r0m of the calling func
659
    mov          r0, [r0]
659
    mov          r0, [r0]
Lines 683-691 cglobal h264_idct_add8_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, Link Here
683
    mov          r5, 32
683
    mov          r5, 32
684
    add          r2, 384
684
    add          r2, 384
685
%if ARCH_X86_64
685
%if ARCH_X86_64
686
    add       dst2q, gprsize
686
    add       dst2q, ptrsize
687
%else
687
%else
688
    add        r0mp, gprsize
688
    add        r0mp, ptrsize
689
%endif
689
%endif
690
    call h264_idct_add8_mmxext_plane
690
    call h264_idct_add8_mmxext_plane
691
    RET
691
    RET
Lines 814-820 cglobal h264_idct_add16intra_8, 5, 7 + ARCH_X86_64, 8 Link Here
814
    jz .try%1dc
814
    jz .try%1dc
815
%if ARCH_X86_64
815
%if ARCH_X86_64
816
    mov        r0d, dword [r1+(%1&1)*8+64*(1+(%1>>1))]
816
    mov        r0d, dword [r1+(%1&1)*8+64*(1+(%1>>1))]
817
    add         r0, [r7]
817
    add        r0p, [r7]
818
%else
818
%else
819
    mov         r0, r0m
819
    mov         r0, r0m
820
    mov         r0, [r0]
820
    mov         r0, [r0]
Lines 828-834 cglobal h264_idct_add16intra_8, 5, 7 + ARCH_X86_64, 8 Link Here
828
    jz .cycle%1end
828
    jz .cycle%1end
829
%if ARCH_X86_64
829
%if ARCH_X86_64
830
    mov        r0d, dword [r1+(%1&1)*8+64*(1+(%1>>1))]
830
    mov        r0d, dword [r1+(%1&1)*8+64*(1+(%1>>1))]
831
    add         r0, [r7]
831
    add        r0p, [r7]
832
%else
832
%else
833
    mov         r0, r0m
833
    mov         r0, r0m
834
    mov         r0, [r0]
834
    mov         r0, [r0]
Lines 853-861 cglobal h264_idct_add8_8, 5, 7 + ARCH_X86_64, 8 Link Here
853
    add8_sse2_cycle 0, 0x34
853
    add8_sse2_cycle 0, 0x34
854
    add8_sse2_cycle 1, 0x3c
854
    add8_sse2_cycle 1, 0x3c
855
%if ARCH_X86_64
855
%if ARCH_X86_64
856
    add          r7, gprsize
856
    add          r7, ptrsize
857
%else
857
%else
858
    add        r0mp, gprsize
858
    add        r0mp, ptrsize
859
%endif
859
%endif
860
    add8_sse2_cycle 2, 0x5c
860
    add8_sse2_cycle 2, 0x5c
861
    add8_sse2_cycle 3, 0x64
861
    add8_sse2_cycle 3, 0x64
(-)a/libavcodec/x86/h264_idct_10bit.asm (-3 / +3 lines)
Lines 291-305 cglobal h264_idct_add8_10,5,8,7 Link Here
291
    mov      r7, r0
291
    mov      r7, r0
292
%endif
292
%endif
293
    add      r2, 1024
293
    add      r2, 1024
294
    mov      r0, [r0]
294
    mov     r0p, [r0]
295
    ADD16_OP_INTRA 16, 4+ 6*8
295
    ADD16_OP_INTRA 16, 4+ 6*8
296
    ADD16_OP_INTRA 18, 4+ 7*8
296
    ADD16_OP_INTRA 18, 4+ 7*8
297
    add      r2, 1024-128*2
297
    add      r2, 1024-128*2
298
%if ARCH_X86_64
298
%if ARCH_X86_64
299
    mov      r0, [r7+gprsize]
299
    mov     r0p, [r7+ptrsize]
300
%else
300
%else
301
    mov      r0, r0m
301
    mov      r0, r0m
302
    mov      r0, [r0+gprsize]
302
    mov      r0, [r0+ptrsize]
303
%endif
303
%endif
304
    ADD16_OP_INTRA 32, 4+11*8
304
    ADD16_OP_INTRA 32, 4+11*8
305
    ADD16_OP_INTRA 34, 4+12*8
305
    ADD16_OP_INTRA 34, 4+12*8
(-)a/libavcodec/x86/mlpdsp.c (-2 / +2 lines)
Lines 157-164 static void mlp_filter_channel_x86(int32_t *state, const int32_t *coeff, Link Here
157
          /* 2*/"+r"(sample_buffer),
157
          /* 2*/"+r"(sample_buffer),
158
#if ARCH_X86_64
158
#if ARCH_X86_64
159
          /* 3*/"+r"(blocksize)
159
          /* 3*/"+r"(blocksize)
160
        : /* 4*/"r"((x86_reg)mask), /* 5*/"r"(firjump),
160
        : /* 4*/"r"((x86_native_reg)mask), /* 5*/"r"((x86_native_reg)(uintptr_t)firjump),
161
          /* 6*/"r"(iirjump)      , /* 7*/"c"(filter_shift)
161
          /* 6*/"r"((x86_native_reg)(uintptr_t)iirjump), /* 7*/"c"(filter_shift)
162
        , /* 8*/"r"((int64_t)coeff[0])
162
        , /* 8*/"r"((int64_t)coeff[0])
163
        , /* 9*/"r"((int64_t)coeff[1])
163
        , /* 9*/"r"((int64_t)coeff[1])
164
        , /*10*/"r"((int64_t)coeff[2])
164
        , /*10*/"r"((int64_t)coeff[2])
(-)a/libavcodec/x86/videodsp.asm (-20 / +20 lines)
Lines 48-60 cglobal emu_edge_core, 6, 9, 1 Link Here
48
%else
48
%else
49
%define w_reg r6
49
%define w_reg r6
50
cglobal emu_edge_core, 2, 7, 0
50
cglobal emu_edge_core, 2, 7, 0
51
    mov         r4, r4m         ; end_y
51
    mov        r4p, r4m         ; end_y
52
    mov         r5, r5m         ; block_h
52
    mov        r5p, r5m         ; block_h
53
%endif
53
%endif
54
54
55
    ; start with vertical extend (top/bottom) and body pixel copy
55
    ; start with vertical extend (top/bottom) and body pixel copy
56
    mov      w_reg, r7m
56
    mov preg(w_reg), r7m
57
    sub      w_reg, r6m         ; w = start_x - end_x
57
    sub preg(w_reg), r6m        ; w = start_x - end_x
58
    sub         r5, r4
58
    sub         r5, r4
59
%if ARCH_X86_64
59
%if ARCH_X86_64
60
    sub         r4, r3
60
    sub         r4, r3
Lines 77-83 cglobal emu_edge_core, 2, 7, 0 Link Here
77
.v_extend_end:
77
.v_extend_end:
78
78
79
    ; horizontal extend (left/right)
79
    ; horizontal extend (left/right)
80
    mov      w_reg, r6m         ; start_x
80
    mov preg(w_reg), r6m        ; start_x
81
    sub         r0, w_reg
81
    sub         r0, w_reg
82
%if ARCH_X86_64
82
%if ARCH_X86_64
83
    mov         r3, r0          ; backup of buf+block_h*linesize
83
    mov         r3, r0          ; backup of buf+block_h*linesize
Lines 111-118 cglobal emu_edge_core, 2, 7, 0 Link Here
111
    mov         r0, r0m
111
    mov         r0, r0m
112
    mov         r5, r5m
112
    mov         r5, r5m
113
%endif
113
%endif
114
    mov      w_reg, r7m         ; end_x
114
    mov preg(w_reg), r7m        ; end_x
115
    mov         r1, r8m         ; block_w
115
    mov        r1p, r8m         ; block_w
116
    mov         r4, r1
116
    mov         r4, r1
117
    sub         r1, w_reg
117
    sub         r1, w_reg
118
    jz .h_extend_end            ; if (end_x == block_w) goto h_extend_end
118
    jz .h_extend_end            ; if (end_x == block_w) goto h_extend_end
Lines 293-299 ALIGN 128 Link Here
293
    READ_NUM_BYTES  top,    %%n              ; read bytes
293
    READ_NUM_BYTES  top,    %%n              ; read bytes
294
.emuedge_extend_top_ %+ %%n %+ _loop:        ; do {
294
.emuedge_extend_top_ %+ %%n %+ _loop:        ; do {
295
    WRITE_NUM_BYTES top,    %%n              ;   write bytes
295
    WRITE_NUM_BYTES top,    %%n              ;   write bytes
296
    add            r0 , r2                   ;   dst += linesize
296
    add           r0p , r2p                  ;   dst += linesize
297
%if ARCH_X86_64
297
%if ARCH_X86_64
298
    dec            r3d
298
    dec            r3d
299
%else ; ARCH_X86_32
299
%else ; ARCH_X86_32
Lines 305-323 ALIGN 128 Link Here
305
.emuedge_copy_body_ %+ %%n %+ _loop:         ; do {
305
.emuedge_copy_body_ %+ %%n %+ _loop:         ; do {
306
    READ_NUM_BYTES  body,   %%n              ;   read bytes
306
    READ_NUM_BYTES  body,   %%n              ;   read bytes
307
    WRITE_NUM_BYTES body,   %%n              ;   write bytes
307
    WRITE_NUM_BYTES body,   %%n              ;   write bytes
308
    add            r0 , r2                   ;   dst += linesize
308
    add           r0p , r2p                  ;   dst += linesize
309
    add            r1 , r2                   ;   src += linesize
309
    add           r1p , r2p                  ;   src += linesize
310
    dec            r4d
310
    dec            r4d
311
    jnz .emuedge_copy_body_ %+ %%n %+ _loop  ; } while (--end_y)
311
    jnz .emuedge_copy_body_ %+ %%n %+ _loop  ; } while (--end_y)
312
312
313
    ; copy bottom pixels
313
    ; copy bottom pixels
314
    test           r5 , r5                   ; if (!block_h)
314
    test           r5 , r5                   ; if (!block_h)
315
    jz .emuedge_v_extend_end_ %+ %%n         ;   goto end
315
    jz .emuedge_v_extend_end_ %+ %%n         ;   goto end
316
    sub            r1 , r2                   ; src -= linesize
316
    sub           r1p , r2p                  ; src -= linesize
317
    READ_NUM_BYTES  bottom, %%n              ; read bytes
317
    READ_NUM_BYTES  bottom, %%n              ; read bytes
318
.emuedge_extend_bottom_ %+ %%n %+ _loop:     ; do {
318
.emuedge_extend_bottom_ %+ %%n %+ _loop:     ; do {
319
    WRITE_NUM_BYTES bottom, %%n              ;   write bytes
319
    WRITE_NUM_BYTES bottom, %%n              ;   write bytes
320
    add            r0 , r2                   ;   dst += linesize
320
    add           r0p , r2p                  ;   dst += linesize
321
    dec            r5d
321
    dec            r5d
322
    jnz .emuedge_extend_bottom_ %+ %%n %+ _loop ; } while (--block_h)
322
    jnz .emuedge_extend_bottom_ %+ %%n %+ _loop ; } while (--block_h)
323
323
Lines 379-385 ALIGN 128 Link Here
379
%rep 11
379
%rep 11
380
ALIGN 64
380
ALIGN 64
381
.emuedge_extend_left_ %+ %%n:          ; do {
381
.emuedge_extend_left_ %+ %%n:          ; do {
382
    sub         r0, r2                 ;   dst -= linesize
382
    sub        r0p, r2p                ;   dst -= linesize
383
    READ_V_PIXEL  %%n, [r0+r1]         ;   read pixels
383
    READ_V_PIXEL  %%n, [r0+r1]         ;   read pixels
384
    WRITE_V_PIXEL %%n, r0              ;   write pixels
384
    WRITE_V_PIXEL %%n, r0              ;   write pixels
385
    dec         r5
385
    dec         r5
Lines 400-406 ALIGN 64 Link Here
400
ALIGN 64
400
ALIGN 64
401
.emuedge_extend_right_ %+ %%n:          ; do {
401
.emuedge_extend_right_ %+ %%n:          ; do {
402
%if ARCH_X86_64
402
%if ARCH_X86_64
403
    sub        r3, r2                   ;   dst -= linesize
403
    sub       r3p, r2p                  ;   dst -= linesize
404
    READ_V_PIXEL  %%n, [r3+w_reg-1]     ;   read pixels
404
    READ_V_PIXEL  %%n, [r3+w_reg-1]     ;   read pixels
405
    WRITE_V_PIXEL %%n, r3+r4-%%n        ;   write pixels
405
    WRITE_V_PIXEL %%n, r3+r4-%%n        ;   write pixels
406
    dec       r8
406
    dec       r8
Lines 450-456 ALIGN 64 Link Here
450
450
451
%macro V_COPY_ROW 2
451
%macro V_COPY_ROW 2
452
%ifidn %1, bottom
452
%ifidn %1, bottom
453
    sub         r1, linesize
453
    sub        r1p, linesize
454
%endif
454
%endif
455
.%1_copy_loop:
455
.%1_copy_loop:
456
    xor    cnt_reg, cnt_reg
456
    xor    cnt_reg, cnt_reg
Lines 460-466 ALIGN 64 Link Here
460
%else ; sse
460
%else ; sse
461
    V_COPY_NPX %1, xmm0, movups, 16, 0xFFFFFFF0
461
    V_COPY_NPX %1, xmm0, movups, 16, 0xFFFFFFF0
462
%if ARCH_X86_64
462
%if ARCH_X86_64
463
%define linesize r2
463
%define linesize r2p
464
    V_COPY_NPX %1, rax , mov,     8
464
    V_COPY_NPX %1, rax , mov,     8
465
%else ; ARCH_X86_32
465
%else ; ARCH_X86_32
466
%define linesize r2m
466
%define linesize r2m
Lines 472-480 ALIGN 64 Link Here
472
    V_COPY_NPX %1, vall, mov,     1
472
    V_COPY_NPX %1, vall, mov,     1
473
    mov      w_reg, cnt_reg
473
    mov      w_reg, cnt_reg
474
%ifidn %1, body
474
%ifidn %1, body
475
    add         r1, linesize
475
    add        r1p, linesize
476
%endif
476
%endif
477
    add         r0, linesize
477
    add        r0p, linesize
478
    dec         %2
478
    dec         %2
479
    jnz .%1_copy_loop
479
    jnz .%1_copy_loop
480
%endmacro
480
%endmacro
Lines 521-527 ALIGN 64 Link Here
521
.slow_left_extend_loop:
521
.slow_left_extend_loop:
522
; r0=buf+block_h*linesize,r2=linesize,r6(64)/r3(32)=val,r5=block_h,r4=cntr,r7/r6=start_x
522
; r0=buf+block_h*linesize,r2=linesize,r6(64)/r3(32)=val,r5=block_h,r4=cntr,r7/r6=start_x
523
    mov         r4, 8
523
    mov         r4, 8
524
    sub         r0, linesize
524
    sub        r0p, linesize
525
    READ_V_PIXEL 8, [r0+w_reg]
525
    READ_V_PIXEL 8, [r0+w_reg]
526
.left_extend_8px_loop:
526
.left_extend_8px_loop:
527
    movq [r0+r4-8], mm0
527
    movq [r0+r4-8], mm0
Lines 557-563 ALIGN 64 Link Here
557
%define bh_reg r5
557
%define bh_reg r5
558
%endif
558
%endif
559
    lea         r1, [r4-8]
559
    lea         r1, [r4-8]
560
    sub    buf_reg, linesize
560
    sub preg(buf_reg), linesize
561
    READ_V_PIXEL 8, [buf_reg+w_reg-1]
561
    READ_V_PIXEL 8, [buf_reg+w_reg-1]
562
.right_extend_8px_loop:
562
.right_extend_8px_loop:
563
    movq [buf_reg+r1], mm0
563
    movq [buf_reg+r1], mm0
(-)a/libavresample/x86/audio_convert.asm (-64 / +64 lines)
Lines 236-243 CONV_FLT_TO_S32 Link Here
236
236
237
%macro CONV_S16P_TO_S16_2CH 0
237
%macro CONV_S16P_TO_S16_2CH 0
238
cglobal conv_s16p_to_s16_2ch, 3,4,5, dst, src0, len, src1
238
cglobal conv_s16p_to_s16_2ch, 3,4,5, dst, src0, len, src1
239
    mov       src1q, [src0q+gprsize]
239
    mov       src1p, [src0q+ptrsize]
240
    mov       src0q, [src0q        ]
240
    mov       src0p, [src0q        ]
241
    lea        lenq, [2*lend]
241
    lea        lenq, [2*lend]
242
    add       src0q, lenq
242
    add       src0q, lenq
243
    add       src1q, lenq
243
    add       src1q, lenq
Lines 285-296 cglobal conv_s16p_to_s16_6ch, 3,8,7, dst, src0, len, src1, src2, src3, src4, src Link Here
285
cglobal conv_s16p_to_s16_6ch, 2,7,7, dst, src0, src1, src2, src3, src4, src5
285
cglobal conv_s16p_to_s16_6ch, 2,7,7, dst, src0, src1, src2, src3, src4, src5
286
%define lend dword r2m
286
%define lend dword r2m
287
%endif
287
%endif
288
    mov      src1q, [src0q+1*gprsize]
288
    mov      src1p, [src0q+1*ptrsize]
289
    mov      src2q, [src0q+2*gprsize]
289
    mov      src2p, [src0q+2*ptrsize]
290
    mov      src3q, [src0q+3*gprsize]
290
    mov      src3p, [src0q+3*ptrsize]
291
    mov      src4q, [src0q+4*gprsize]
291
    mov      src4p, [src0q+4*ptrsize]
292
    mov      src5q, [src0q+5*gprsize]
292
    mov      src5p, [src0q+5*ptrsize]
293
    mov      src0q, [src0q]
293
    mov      src0p, [src0q]
294
    sub      src1q, src0q
294
    sub      src1q, src0q
295
    sub      src2q, src0q
295
    sub      src2q, src0q
296
    sub      src3q, src0q
296
    sub      src3q, src0q
Lines 393-400 CONV_S16P_TO_S16_6CH Link Here
393
%macro CONV_S16P_TO_FLT_2CH 0
393
%macro CONV_S16P_TO_FLT_2CH 0
394
cglobal conv_s16p_to_flt_2ch, 3,4,6, dst, src0, len, src1
394
cglobal conv_s16p_to_flt_2ch, 3,4,6, dst, src0, len, src1
395
    lea       lenq, [2*lend]
395
    lea       lenq, [2*lend]
396
    mov      src1q, [src0q+gprsize]
396
    mov      src1p, [src0q+ptrsize]
397
    mov      src0q, [src0q        ]
397
    mov      src0p, [src0q        ]
398
    lea       dstq, [dstq+4*lenq]
398
    lea       dstq, [dstq+4*lenq]
399
    add      src0q, lenq
399
    add      src0q, lenq
400
    add      src1q, lenq
400
    add      src1q, lenq
Lines 444-455 cglobal conv_s16p_to_flt_6ch, 3,8,8, dst, src, len, src1, src2, src3, src4, src5 Link Here
444
cglobal conv_s16p_to_flt_6ch, 2,7,8, dst, src, src1, src2, src3, src4, src5
444
cglobal conv_s16p_to_flt_6ch, 2,7,8, dst, src, src1, src2, src3, src4, src5
445
%define lend dword r2m
445
%define lend dword r2m
446
%endif
446
%endif
447
    mov     src1q, [srcq+1*gprsize]
447
    mov     src1p, [srcq+1*ptrsize]
448
    mov     src2q, [srcq+2*gprsize]
448
    mov     src2p, [srcq+2*ptrsize]
449
    mov     src3q, [srcq+3*gprsize]
449
    mov     src3p, [srcq+3*ptrsize]
450
    mov     src4q, [srcq+4*gprsize]
450
    mov     src4p, [srcq+4*ptrsize]
451
    mov     src5q, [srcq+5*gprsize]
451
    mov     src5p, [srcq+5*ptrsize]
452
    mov      srcq, [srcq]
452
    mov      srcp, [srcq]
453
    sub     src1q, srcq
453
    sub     src1q, srcq
454
    sub     src2q, srcq
454
    sub     src2q, srcq
455
    sub     src3q, srcq
455
    sub     src3q, srcq
Lines 542-549 CONV_S16P_TO_FLT_6CH Link Here
542
%macro CONV_FLTP_TO_S16_2CH 0
542
%macro CONV_FLTP_TO_S16_2CH 0
543
cglobal conv_fltp_to_s16_2ch, 3,4,3, dst, src0, len, src1
543
cglobal conv_fltp_to_s16_2ch, 3,4,3, dst, src0, len, src1
544
    lea      lenq, [4*lend]
544
    lea      lenq, [4*lend]
545
    mov     src1q, [src0q+gprsize]
545
    mov     src1p, [src0q+ptrsize]
546
    mov     src0q, [src0q        ]
546
    mov     src0p, [src0q        ]
547
    add      dstq, lenq
547
    add      dstq, lenq
548
    add     src0q, lenq
548
    add     src0q, lenq
549
    add     src1q, lenq
549
    add     src1q, lenq
Lines 588-599 cglobal conv_fltp_to_s16_6ch, 3,8,7, dst, src, len, src1, src2, src3, src4, src5 Link Here
588
cglobal conv_fltp_to_s16_6ch, 2,7,7, dst, src, src1, src2, src3, src4, src5
588
cglobal conv_fltp_to_s16_6ch, 2,7,7, dst, src, src1, src2, src3, src4, src5
589
%define lend dword r2m
589
%define lend dword r2m
590
%endif
590
%endif
591
    mov        src1q, [srcq+1*gprsize]
591
    mov        src1p, [srcq+1*ptrsize]
592
    mov        src2q, [srcq+2*gprsize]
592
    mov        src2p, [srcq+2*ptrsize]
593
    mov        src3q, [srcq+3*gprsize]
593
    mov        src3p, [srcq+3*ptrsize]
594
    mov        src4q, [srcq+4*gprsize]
594
    mov        src4p, [srcq+4*ptrsize]
595
    mov        src5q, [srcq+5*gprsize]
595
    mov        src5p, [srcq+5*ptrsize]
596
    mov         srcq, [srcq]
596
    mov         srcp, [srcq]
597
    sub        src1q, srcq
597
    sub        src1q, srcq
598
    sub        src2q, srcq
598
    sub        src2q, srcq
599
    sub        src3q, srcq
599
    sub        src3q, srcq
Lines 695-702 CONV_FLTP_TO_S16_6CH Link Here
695
695
696
%macro CONV_FLTP_TO_FLT_2CH 0
696
%macro CONV_FLTP_TO_FLT_2CH 0
697
cglobal conv_fltp_to_flt_2ch, 3,4,5, dst, src0, len, src1
697
cglobal conv_fltp_to_flt_2ch, 3,4,5, dst, src0, len, src1
698
    mov  src1q, [src0q+gprsize]
698
    mov  src1p, [src0q+ptrsize]
699
    mov  src0q, [src0q]
699
    mov  src0p, [src0q]
700
    lea   lenq, [4*lend]
700
    lea   lenq, [4*lend]
701
    add  src0q, lenq
701
    add  src0q, lenq
702
    add  src1q, lenq
702
    add  src1q, lenq
Lines 735-746 cglobal conv_fltp_to_flt_6ch, 2,8,7, dst, src, src1, src2, src3, src4, src5, len Link Here
735
%else
735
%else
736
    %define lend dword r2m
736
    %define lend dword r2m
737
%endif
737
%endif
738
    mov    src1q, [srcq+1*gprsize]
738
    mov    src1p, [srcq+1*ptrsize]
739
    mov    src2q, [srcq+2*gprsize]
739
    mov    src2p, [srcq+2*ptrsize]
740
    mov    src3q, [srcq+3*gprsize]
740
    mov    src3p, [srcq+3*ptrsize]
741
    mov    src4q, [srcq+4*gprsize]
741
    mov    src4p, [srcq+4*ptrsize]
742
    mov    src5q, [srcq+5*gprsize]
742
    mov    src5p, [srcq+5*ptrsize]
743
    mov     srcq, [srcq]
743
    mov     srcp, [srcq]
744
    sub    src1q, srcq
744
    sub    src1q, srcq
745
    sub    src2q, srcq
745
    sub    src2q, srcq
746
    sub    src3q, srcq
746
    sub    src3q, srcq
Lines 810-817 CONV_FLTP_TO_FLT_6CH Link Here
810
%macro CONV_S16_TO_S16P_2CH 0
810
%macro CONV_S16_TO_S16P_2CH 0
811
cglobal conv_s16_to_s16p_2ch, 3,4,4, dst0, src, len, dst1
811
cglobal conv_s16_to_s16p_2ch, 3,4,4, dst0, src, len, dst1
812
    lea       lenq, [2*lend]
812
    lea       lenq, [2*lend]
813
    mov      dst1q, [dst0q+gprsize]
813
    mov      dst1p, [dst0q+ptrsize]
814
    mov      dst0q, [dst0q        ]
814
    mov      dst0p, [dst0q        ]
815
    lea       srcq, [srcq+2*lenq]
815
    lea       srcq, [srcq+2*lenq]
816
    add      dst0q, lenq
816
    add      dst0q, lenq
817
    add      dst1q, lenq
817
    add      dst1q, lenq
Lines 861-872 cglobal conv_s16_to_s16p_6ch, 3,8,5, dst, src, len, dst1, dst2, dst3, dst4, dst5 Link Here
861
cglobal conv_s16_to_s16p_6ch, 2,7,5, dst, src, dst1, dst2, dst3, dst4, dst5
861
cglobal conv_s16_to_s16p_6ch, 2,7,5, dst, src, dst1, dst2, dst3, dst4, dst5
862
%define lend dword r2m
862
%define lend dword r2m
863
%endif
863
%endif
864
    mov     dst1q, [dstq+  gprsize]
864
    mov     dst1p, [dstq+  ptrsize]
865
    mov     dst2q, [dstq+2*gprsize]
865
    mov     dst2p, [dstq+2*ptrsize]
866
    mov     dst3q, [dstq+3*gprsize]
866
    mov     dst3p, [dstq+3*ptrsize]
867
    mov     dst4q, [dstq+4*gprsize]
867
    mov     dst4p, [dstq+4*ptrsize]
868
    mov     dst5q, [dstq+5*gprsize]
868
    mov     dst5p, [dstq+5*ptrsize]
869
    mov      dstq, [dstq          ]
869
    mov      dstp, [dstq          ]
870
    sub     dst1q, dstq
870
    sub     dst1q, dstq
871
    sub     dst2q, dstq
871
    sub     dst2q, dstq
872
    sub     dst3q, dstq
872
    sub     dst3q, dstq
Lines 914-921 CONV_S16_TO_S16P_6CH Link Here
914
%macro CONV_S16_TO_FLTP_2CH 0
914
%macro CONV_S16_TO_FLTP_2CH 0
915
cglobal conv_s16_to_fltp_2ch, 3,4,5, dst0, src, len, dst1
915
cglobal conv_s16_to_fltp_2ch, 3,4,5, dst0, src, len, dst1
916
    lea       lenq, [4*lend]
916
    lea       lenq, [4*lend]
917
    mov      dst1q, [dst0q+gprsize]
917
    mov      dst1p, [dst0q+ptrsize]
918
    mov      dst0q, [dst0q        ]
918
    mov      dst0p, [dst0q        ]
919
    add       srcq, lenq
919
    add       srcq, lenq
920
    add      dst0q, lenq
920
    add      dst0q, lenq
921
    add      dst1q, lenq
921
    add      dst1q, lenq
Lines 954-965 cglobal conv_s16_to_fltp_6ch, 3,8,7, dst, src, len, dst1, dst2, dst3, dst4, dst5 Link Here
954
cglobal conv_s16_to_fltp_6ch, 2,7,7, dst, src, dst1, dst2, dst3, dst4, dst5
954
cglobal conv_s16_to_fltp_6ch, 2,7,7, dst, src, dst1, dst2, dst3, dst4, dst5
955
%define lend dword r2m
955
%define lend dword r2m
956
%endif
956
%endif
957
    mov     dst1q, [dstq+  gprsize]
957
    mov     dst1p, [dstq+  ptrsize]
958
    mov     dst2q, [dstq+2*gprsize]
958
    mov     dst2p, [dstq+2*ptrsize]
959
    mov     dst3q, [dstq+3*gprsize]
959
    mov     dst3p, [dstq+3*ptrsize]
960
    mov     dst4q, [dstq+4*gprsize]
960
    mov     dst4p, [dstq+4*ptrsize]
961
    mov     dst5q, [dstq+5*gprsize]
961
    mov     dst5p, [dstq+5*ptrsize]
962
    mov      dstq, [dstq          ]
962
    mov      dstp, [dstq          ]
963
    sub     dst1q, dstq
963
    sub     dst1q, dstq
964
    sub     dst2q, dstq
964
    sub     dst2q, dstq
965
    sub     dst3q, dstq
965
    sub     dst3q, dstq
Lines 1029-1036 CONV_S16_TO_FLTP_6CH Link Here
1029
%macro CONV_FLT_TO_S16P_2CH 0
1029
%macro CONV_FLT_TO_S16P_2CH 0
1030
cglobal conv_flt_to_s16p_2ch, 3,4,6, dst0, src, len, dst1
1030
cglobal conv_flt_to_s16p_2ch, 3,4,6, dst0, src, len, dst1
1031
    lea       lenq, [2*lend]
1031
    lea       lenq, [2*lend]
1032
    mov      dst1q, [dst0q+gprsize]
1032
    mov      dst1p, [dst0q+ptrsize]
1033
    mov      dst0q, [dst0q        ]
1033
    mov      dst0p, [dst0q        ]
1034
    lea       srcq, [srcq+4*lenq]
1034
    lea       srcq, [srcq+4*lenq]
1035
    add      dst0q, lenq
1035
    add      dst0q, lenq
1036
    add      dst1q, lenq
1036
    add      dst1q, lenq
Lines 1077-1088 cglobal conv_flt_to_s16p_6ch, 3,8,7, dst, src, len, dst1, dst2, dst3, dst4, dst5 Link Here
1077
cglobal conv_flt_to_s16p_6ch, 2,7,7, dst, src, dst1, dst2, dst3, dst4, dst5
1077
cglobal conv_flt_to_s16p_6ch, 2,7,7, dst, src, dst1, dst2, dst3, dst4, dst5
1078
%define lend dword r2m
1078
%define lend dword r2m
1079
%endif
1079
%endif
1080
    mov     dst1q, [dstq+  gprsize]
1080
    mov     dst1p, [dstq+  ptrsize]
1081
    mov     dst2q, [dstq+2*gprsize]
1081
    mov     dst2p, [dstq+2*ptrsize]
1082
    mov     dst3q, [dstq+3*gprsize]
1082
    mov     dst3p, [dstq+3*ptrsize]
1083
    mov     dst4q, [dstq+4*gprsize]
1083
    mov     dst4p, [dstq+4*ptrsize]
1084
    mov     dst5q, [dstq+5*gprsize]
1084
    mov     dst5p, [dstq+5*ptrsize]
1085
    mov      dstq, [dstq          ]
1085
    mov      dstp, [dstq          ]
1086
    sub     dst1q, dstq
1086
    sub     dst1q, dstq
1087
    sub     dst2q, dstq
1087
    sub     dst2q, dstq
1088
    sub     dst3q, dstq
1088
    sub     dst3q, dstq
Lines 1143-1150 CONV_FLT_TO_S16P_6CH Link Here
1143
%macro CONV_FLT_TO_FLTP_2CH 0
1143
%macro CONV_FLT_TO_FLTP_2CH 0
1144
cglobal conv_flt_to_fltp_2ch, 3,4,3, dst0, src, len, dst1
1144
cglobal conv_flt_to_fltp_2ch, 3,4,3, dst0, src, len, dst1
1145
    lea    lenq, [4*lend]
1145
    lea    lenq, [4*lend]
1146
    mov   dst1q, [dst0q+gprsize]
1146
    mov   dst1p, [dst0q+ptrsize]
1147
    mov   dst0q, [dst0q        ]
1147
    mov   dst0p, [dst0q        ]
1148
    lea    srcq, [srcq+2*lenq]
1148
    lea    srcq, [srcq+2*lenq]
1149
    add   dst0q, lenq
1149
    add   dst0q, lenq
1150
    add   dst1q, lenq
1150
    add   dst1q, lenq
Lines 1177-1188 cglobal conv_flt_to_fltp_6ch, 3,8,7, dst, src, len, dst1, dst2, dst3, dst4, dst5 Link Here
1177
cglobal conv_flt_to_fltp_6ch, 2,7,7, dst, src, dst1, dst2, dst3, dst4, dst5
1177
cglobal conv_flt_to_fltp_6ch, 2,7,7, dst, src, dst1, dst2, dst3, dst4, dst5
1178
%define lend dword r2m
1178
%define lend dword r2m
1179
%endif
1179
%endif
1180
    mov     dst1q, [dstq+  gprsize]
1180
    mov     dst1p, [dstq+  ptrsize]
1181
    mov     dst2q, [dstq+2*gprsize]
1181
    mov     dst2p, [dstq+2*ptrsize]
1182
    mov     dst3q, [dstq+3*gprsize]
1182
    mov     dst3p, [dstq+3*ptrsize]
1183
    mov     dst4q, [dstq+4*gprsize]
1183
    mov     dst4p, [dstq+4*ptrsize]
1184
    mov     dst5q, [dstq+5*gprsize]
1184
    mov     dst5p, [dstq+5*ptrsize]
1185
    mov      dstq, [dstq          ]
1185
    mov      dstp, [dstq          ]
1186
    sub     dst1q, dstq
1186
    sub     dst1q, dstq
1187
    sub     dst2q, dstq
1187
    sub     dst2q, dstq
1188
    sub     dst3q, dstq
1188
    sub     dst3q, dstq
(-)a/libavresample/x86/audio_mix.asm (-24 / +24 lines)
Lines 31-40 SECTION_TEXT Link Here
31
31
32
%macro MIX_2_TO_1_FLTP_FLT 0
32
%macro MIX_2_TO_1_FLTP_FLT 0
33
cglobal mix_2_to_1_fltp_flt, 3,4,6, src, matrix, len, src1
33
cglobal mix_2_to_1_fltp_flt, 3,4,6, src, matrix, len, src1
34
    mov       src1q, [srcq+gprsize]
34
    mov       src1p, [srcq+ptrsize]
35
    mov        srcq, [srcq        ]
35
    mov        srcp, [srcq        ]
36
    sub       src1q, srcq
36
    sub       src1q, srcq
37
    mov     matrixq, [matrixq  ]
37
    mov     matrixp, [matrixq  ]
38
    VBROADCASTSS m4, [matrixq  ]
38
    VBROADCASTSS m4, [matrixq  ]
39
    VBROADCASTSS m5, [matrixq+4]
39
    VBROADCASTSS m5, [matrixq+4]
40
    ALIGN 16
40
    ALIGN 16
Lines 65-74 MIX_2_TO_1_FLTP_FLT Link Here
65
65
66
%macro MIX_2_TO_1_S16P_FLT 0
66
%macro MIX_2_TO_1_S16P_FLT 0
67
cglobal mix_2_to_1_s16p_flt, 3,4,6, src, matrix, len, src1
67
cglobal mix_2_to_1_s16p_flt, 3,4,6, src, matrix, len, src1
68
    mov       src1q, [srcq+gprsize]
68
    mov       src1p, [srcq+ptrsize]
69
    mov        srcq, [srcq]
69
    mov        srcp, [srcq]
70
    sub       src1q, srcq
70
    sub       src1q, srcq
71
    mov     matrixq, [matrixq  ]
71
    mov     matrixp, [matrixq  ]
72
    VBROADCASTSS m4, [matrixq  ]
72
    VBROADCASTSS m4, [matrixq  ]
73
    VBROADCASTSS m5, [matrixq+4]
73
    VBROADCASTSS m5, [matrixq+4]
74
    ALIGN 16
74
    ALIGN 16
Lines 109-118 MIX_2_TO_1_S16P_FLT Link Here
109
109
110
INIT_XMM sse2
110
INIT_XMM sse2
111
cglobal mix_2_to_1_s16p_q8, 3,4,6, src, matrix, len, src1
111
cglobal mix_2_to_1_s16p_q8, 3,4,6, src, matrix, len, src1
112
    mov       src1q, [srcq+gprsize]
112
    mov       src1p, [srcq+ptrsize]
113
    mov        srcq, [srcq]
113
    mov        srcp, [srcq]
114
    sub       src1q, srcq
114
    sub       src1q, srcq
115
    mov     matrixq, [matrixq]
115
    mov     matrixp, [matrixq]
116
    movd         m4, [matrixq]
116
    movd         m4, [matrixq]
117
    movd         m5, [matrixq]
117
    movd         m5, [matrixq]
118
    SPLATW       m4, m4, 0
118
    SPLATW       m4, m4, 0
Lines 150-160 cglobal mix_2_to_1_s16p_q8, 3,4,6, src, matrix, len, src1 Link Here
150
150
151
%macro MIX_1_TO_2_FLTP_FLT 0
151
%macro MIX_1_TO_2_FLTP_FLT 0
152
cglobal mix_1_to_2_fltp_flt, 3,5,4, src0, matrix0, len, src1, matrix1
152
cglobal mix_1_to_2_fltp_flt, 3,5,4, src0, matrix0, len, src1, matrix1
153
    mov       src1q, [src0q+gprsize]
153
    mov       src1p, [src0q+ptrsize]
154
    mov       src0q, [src0q]
154
    mov       src0p, [src0q]
155
    sub       src1q, src0q
155
    sub       src1q, src0q
156
    mov    matrix1q, [matrix0q+gprsize]
156
    mov    matrix1p, [matrix0q+ptrsize]
157
    mov    matrix0q, [matrix0q]
157
    mov    matrix0p, [matrix0q]
158
    VBROADCASTSS m2, [matrix0q]
158
    VBROADCASTSS m2, [matrix0q]
159
    VBROADCASTSS m3, [matrix1q]
159
    VBROADCASTSS m3, [matrix1q]
160
    ALIGN 16
160
    ALIGN 16
Lines 182-192 MIX_1_TO_2_FLTP_FLT Link Here
182
182
183
%macro MIX_1_TO_2_S16P_FLT 0
183
%macro MIX_1_TO_2_S16P_FLT 0
184
cglobal mix_1_to_2_s16p_flt, 3,5,6, src0, matrix0, len, src1, matrix1
184
cglobal mix_1_to_2_s16p_flt, 3,5,6, src0, matrix0, len, src1, matrix1
185
    mov       src1q, [src0q+gprsize]
185
    mov       src1p, [src0q+ptrsize]
186
    mov       src0q, [src0q]
186
    mov       src0p, [src0q]
187
    sub       src1q, src0q
187
    sub       src1q, src0q
188
    mov    matrix1q, [matrix0q+gprsize]
188
    mov    matrix1p, [matrix0q+ptrsize]
189
    mov    matrix0q, [matrix0q]
189
    mov    matrix0p, [matrix0q]
190
    VBROADCASTSS m4, [matrix0q]
190
    VBROADCASTSS m4, [matrix0q]
191
    VBROADCASTSS m5, [matrix1q]
191
    VBROADCASTSS m5, [matrix1q]
192
    ALIGN 16
192
    ALIGN 16
Lines 277-288 cglobal mix_%1_to_%2_%3_flt, 3,in_channels+2,needed_mmregs+matrix_elements_mm, n Link Here
277
%endif
277
%endif
278
278
279
; load matrix pointers
279
; load matrix pointers
280
%define matrix0q r1q
280
%define matrix0q r1
281
%define matrix1q r3q
281
%define matrix1q r3
282
%if stereo
282
%if stereo
283
    mov      matrix1q, [matrix0q+gprsize]
283
    mov preg(matrix1q), [matrix0q+ptrsize]
284
%endif
284
%endif
285
    mov      matrix0q, [matrix0q]
285
    mov preg(matrix0q), [matrix0q]
286
286
287
; define matrix coeff names
287
; define matrix coeff names
288
%assign %%i 0
288
%assign %%i 0
Lines 341-356 cglobal mix_%1_to_%2_%3_flt, 3,in_channels+2,needed_mmregs+matrix_elements_mm, n Link Here
341
%assign %%i 1
341
%assign %%i 1
342
%rep (in_channels - 1)
342
%rep (in_channels - 1)
343
    %if ARCH_X86_32 && in_channels >= 7 && %%i >= 5
343
    %if ARCH_X86_32 && in_channels >= 7 && %%i >= 5
344
    mov         src5q, [src0q+%%i*gprsize]
344
    mov         src5p, [src0q+%%i*ptrsize]
345
    add         src5q, lenq
345
    add         src5q, lenq
346
    mov         src %+ %%i %+ m, src5q
346
    mov         src %+ %%i %+ m, src5q
347
    %else
347
    %else
348
    mov         src %+ %%i %+ q, [src0q+%%i*gprsize]
348
    mov         src %+ %%i %+ p, [src0q+%%i*ptrsize]
349
    add         src %+ %%i %+ q, lenq
349
    add         src %+ %%i %+ q, lenq
350
    %endif
350
    %endif
351
    %assign %%i %%i+1
351
    %assign %%i %%i+1
352
%endrep
352
%endrep
353
    mov         src0q, [src0q]
353
    mov         src0p, [src0q]
354
    add         src0q, lenq
354
    add         src0q, lenq
355
    neg          lenq
355
    neg          lenq
356
.loop:
356
.loop:
(-)a/libavutil/x86/asm.h (-2 / +26 lines)
Lines 24-30 Link Here
24
#include <stdint.h>
24
#include <stdint.h>
25
#include "config.h"
25
#include "config.h"
26
26
27
#if ARCH_X86_64
27
#if ARCH_X86_64_X64
28
#    define OPSIZE "q"
28
#    define OPSIZE "q"
29
#    define REG_a "rax"
29
#    define REG_a "rax"
30
#    define REG_b "rbx"
30
#    define REG_b "rbx"
Lines 32-37 Link Here
32
#    define REG_d "rdx"
32
#    define REG_d "rdx"
33
#    define REG_D "rdi"
33
#    define REG_D "rdi"
34
#    define REG_S "rsi"
34
#    define REG_S "rsi"
35
#    define REG_8 "r8"
35
#    define PTR_SIZE "8"
36
#    define PTR_SIZE "8"
36
typedef int64_t x86_reg;
37
typedef int64_t x86_reg;
37
38
Lines 43-50 typedef int64_t x86_reg; Link Here
43
#    define REGc    rcx
44
#    define REGc    rcx
44
#    define REGd    rdx
45
#    define REGd    rdx
45
#    define REGSP   rsp
46
#    define REGSP   rsp
47
#    define REG8    r8
46
48
47
#elif ARCH_X86_32
49
#elif ARCH_X86_32 || ARCH_X86_64_X32
48
50
49
#    define OPSIZE "l"
51
#    define OPSIZE "l"
50
#    define REG_a "eax"
52
#    define REG_a "eax"
Lines 53-58 typedef int64_t x86_reg; Link Here
53
#    define REG_d "edx"
55
#    define REG_d "edx"
54
#    define REG_D "edi"
56
#    define REG_D "edi"
55
#    define REG_S "esi"
57
#    define REG_S "esi"
58
#    define REG_8 "r8d"
56
#    define PTR_SIZE "4"
59
#    define PTR_SIZE "4"
57
typedef int32_t x86_reg;
60
typedef int32_t x86_reg;
58
61
Lines 64-73 typedef int32_t x86_reg; Link Here
64
#    define REGc    ecx
67
#    define REGc    ecx
65
#    define REGd    edx
68
#    define REGd    edx
66
#    define REGSP   esp
69
#    define REGSP   esp
70
#    define REG8    r8d
67
#else
71
#else
68
typedef int x86_reg;
72
typedef int x86_reg;
69
#endif
73
#endif
70
74
75
#if ARCH_X86_64
76
#    define REG_ra "rax"
77
#    define REG_rb "rbx"
78
#    define REG_rc "rcx"
79
#    define REG_rd "rdx"
80
#    define REG_rD "rdi"
81
#    define REG_rS "rsi"
82
#    define REG_rBP "rbp"
83
typedef int64_t x86_native_reg;
84
#elif ARCH_X86_32
85
#    define REG_ra "eax"
86
#    define REG_rb "ebx"
87
#    define REG_rc "ecx"
88
#    define REG_rd "edx"
89
#    define REG_rD "edi"
90
#    define REG_rS "esi"
91
#    define REG_rBP "ebp"
92
typedef int32_t x86_native_reg;
93
#endif
94
71
#define HAVE_7REGS (ARCH_X86_64 || (HAVE_EBX_AVAILABLE && HAVE_EBP_AVAILABLE))
95
#define HAVE_7REGS (ARCH_X86_64 || (HAVE_EBX_AVAILABLE && HAVE_EBP_AVAILABLE))
72
#define HAVE_6REGS (ARCH_X86_64 || (HAVE_EBX_AVAILABLE || HAVE_EBP_AVAILABLE))
96
#define HAVE_6REGS (ARCH_X86_64 || (HAVE_EBX_AVAILABLE || HAVE_EBP_AVAILABLE))
73
97
(-)a/libavutil/x86/cpu.c (-2 / +2 lines)
Lines 40-48 Link Here
40
/* ebx saving is necessary for PIC. gcc seems unable to see it alone */
40
/* ebx saving is necessary for PIC. gcc seems unable to see it alone */
41
#define cpuid(index, eax, ebx, ecx, edx)                        \
41
#define cpuid(index, eax, ebx, ecx, edx)                        \
42
    __asm__ volatile (                                          \
42
    __asm__ volatile (                                          \
43
        "mov    %%"REG_b", %%"REG_S" \n\t"                      \
43
        "mov    %%"REG_rb", %%"REG_rS" \n\t"                    \
44
        "cpuid                       \n\t"                      \
44
        "cpuid                       \n\t"                      \
45
        "xchg   %%"REG_b", %%"REG_S                             \
45
        "xchg   %%"REG_rb", %%"REG_rS                           \
46
        : "=a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx)        \
46
        : "=a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx)        \
47
        : "0" (index))
47
        : "0" (index))
48
48
(-)a/libavutil/x86/x86inc.asm (-1 / +33 lines)
Lines 136-148 CPUNOP amdnop Link Here
136
136
137
; registers:
137
; registers:
138
; rN and rNq are the native-size register holding function argument N
138
; rN and rNq are the native-size register holding function argument N
139
; rNd, rNw, rNb are dword, word, and byte size
139
; rNp, rNd, rNw, rNb are pointer, dword, word, and byte size
140
; rNh is the high 8 bits of the word size
140
; rNh is the high 8 bits of the word size
141
; rNm is the original location of arg N (a register or on the stack), dword
141
; rNm is the original location of arg N (a register or on the stack), dword
142
; rNmp is native size
142
; rNmp is native size
143
143
144
%macro DECLARE_REG 2-3
144
%macro DECLARE_REG 2-3
145
    %define r%1q %2
145
    %define r%1q %2
146
    %if ARCH_X86_64_X64
147
        %define r%1p %2
148
    %else
149
        %define r%1p %2d
150
    %endif
146
    %define r%1d %2d
151
    %define r%1d %2d
147
    %define r%1w %2w
152
    %define r%1w %2w
148
    %define r%1b %2b
153
    %define r%1b %2b
Lines 164-169 CPUNOP amdnop Link Here
164
%macro DECLARE_REG_SIZE 3
169
%macro DECLARE_REG_SIZE 3
165
    %define r%1q r%1
170
    %define r%1q r%1
166
    %define e%1q r%1
171
    %define e%1q r%1
172
    %if ARCH_X86_64_X64
173
        %define r%1p r%1
174
    %else
175
        %define r%1p e%1
176
    %endif
177
    %define e%1p e%1
167
    %define r%1d e%1
178
    %define r%1d e%1
168
    %define e%1d e%1
179
    %define e%1d e%1
169
    %define r%1w %1
180
    %define r%1w %1
Lines 199-204 DECLARE_REG_SIZE bp, bpl, null Link Here
199
%macro DECLARE_REG_TMP_SIZE 0-*
210
%macro DECLARE_REG_TMP_SIZE 0-*
200
    %rep %0
211
    %rep %0
201
        %define t%1q t%1 %+ q
212
        %define t%1q t%1 %+ q
213
        %if ARCH_X86_64_X64
214
            %define t%1p t%1
215
        %else
216
            %define t%1p t%1 %+ d
217
        %endif
202
        %define t%1d t%1 %+ d
218
        %define t%1d t%1 %+ d
203
        %define t%1w t%1 %+ w
219
        %define t%1w t%1 %+ w
204
        %define t%1h t%1 %+ h
220
        %define t%1h t%1 %+ h
Lines 215-220 DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14 Link Here
215
    %define gprsize 4
231
    %define gprsize 4
216
%endif
232
%endif
217
233
234
%if ARCH_X86_64_X64
235
    %define ptrsize 8
236
    %define pword qword
237
    %define dp dq
238
    %define resp resq
239
    %define preg(x) x
240
%else
241
    %define ptrsize 4
242
    %define pword dword
243
    %define dp dd
244
    %define resp resd
245
    %define preg(x) x %+ d
246
%endif
247
218
%macro PUSH 1
248
%macro PUSH 1
219
    push %1
249
    push %1
220
    %ifidn rstk, rsp
250
    %ifidn rstk, rsp
Lines 293-298 DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14 Link Here
293
        %assign %%i 0
323
        %assign %%i 0
294
        %rep n_arg_names
324
        %rep n_arg_names
295
            CAT_UNDEF arg_name %+ %%i, q
325
            CAT_UNDEF arg_name %+ %%i, q
326
            CAT_UNDEF arg_name %+ %%i, p
296
            CAT_UNDEF arg_name %+ %%i, d
327
            CAT_UNDEF arg_name %+ %%i, d
297
            CAT_UNDEF arg_name %+ %%i, w
328
            CAT_UNDEF arg_name %+ %%i, w
298
            CAT_UNDEF arg_name %+ %%i, h
329
            CAT_UNDEF arg_name %+ %%i, h
Lines 309-314 DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14 Link Here
309
    %assign %%i 0
340
    %assign %%i 0
310
    %rep %0
341
    %rep %0
311
        %xdefine %1q r %+ %%i %+ q
342
        %xdefine %1q r %+ %%i %+ q
343
        %xdefine %1p r %+ %%i %+ p
312
        %xdefine %1d r %+ %%i %+ d
344
        %xdefine %1d r %+ %%i %+ d
313
        %xdefine %1w r %+ %%i %+ w
345
        %xdefine %1w r %+ %%i %+ w
314
        %xdefine %1h r %+ %%i %+ h
346
        %xdefine %1h r %+ %%i %+ h
(-)a/libswscale/x86/output.asm (-2 / +2 lines)
Lines 152-165 cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset Link Here
152
    movsx     cntr_reg,  fltsizem
152
    movsx     cntr_reg,  fltsizem
153
.filterloop_ %+ %%i:
153
.filterloop_ %+ %%i:
154
    ; input pixels
154
    ; input pixels
155
    mov             r6, [srcq+gprsize*cntr_reg-2*gprsize]
155
    mov            r6p, [srcq+ptrsize*cntr_reg-2*ptrsize]
156
%if %1 == 16
156
%if %1 == 16
157
    mova            m3, [r6+r5*4]
157
    mova            m3, [r6+r5*4]
158
    mova            m5, [r6+r5*4+mmsize]
158
    mova            m5, [r6+r5*4+mmsize]
159
%else ; %1 == 8/9/10
159
%else ; %1 == 8/9/10
160
    mova            m3, [r6+r5*2]
160
    mova            m3, [r6+r5*2]
161
%endif ; %1 == 8/9/10/16
161
%endif ; %1 == 8/9/10/16
162
    mov             r6, [srcq+gprsize*cntr_reg-gprsize]
162
    mov            r6p, [srcq+ptrsize*cntr_reg-ptrsize]
163
%if %1 == 16
163
%if %1 == 16
164
    mova            m4, [r6+r5*4]
164
    mova            m4, [r6+r5*4]
165
    mova            m6, [r6+r5*4+mmsize]
165
    mova            m6, [r6+r5*4+mmsize]
(-)a/libswscale/x86/swscale_template.c (-41 / +40 lines)
Lines 774-785 static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2], Link Here
774
        const int16_t *abuf0 = abuf[0], *abuf1 = abuf[1];
774
        const int16_t *abuf0 = abuf[0], *abuf1 = abuf[1];
775
#if ARCH_X86_64
775
#if ARCH_X86_64
776
        __asm__ volatile(
776
        __asm__ volatile(
777
            YSCALEYUV2RGB(%%r8, %5)
777
            YSCALEYUV2RGB(%%REG8, %5)
778
            YSCALEYUV2RGB_YA(%%r8, %5, %6, %7)
778
            YSCALEYUV2RGB_YA(%%REG8, %5, %6, %7)
779
            "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
779
            "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
780
            "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
780
            "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
781
            "packuswb            %%mm7, %%mm1       \n\t"
781
            "packuswb            %%mm7, %%mm1       \n\t"
782
            WRITEBGR32(%4, 8280(%5), %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
782
            WRITEBGR32(%4, 8280(%5), %%REG8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
783
            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "r" (dest),
783
            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "r" (dest),
784
               "a" (&c->redDither),
784
               "a" (&c->redDither),
785
               "r" (abuf0), "r" (abuf1)
785
               "r" (abuf0), "r" (abuf1)
Lines 791-797 static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2], Link Here
791
        __asm__ volatile(
791
        __asm__ volatile(
792
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
792
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
793
            "mov        %4, %%"REG_b"               \n\t"
793
            "mov        %4, %%"REG_b"               \n\t"
794
            "push %%"REG_BP"                        \n\t"
794
            "push %%"REG_rBP"                       \n\t"
795
            YSCALEYUV2RGB(%%REGBP, %5)
795
            YSCALEYUV2RGB(%%REGBP, %5)
796
            "push                   %0              \n\t"
796
            "push                   %0              \n\t"
797
            "push                   %1              \n\t"
797
            "push                   %1              \n\t"
Lines 804-810 static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2], Link Here
804
            "pop                    %1              \n\t"
804
            "pop                    %1              \n\t"
805
            "pop                    %0              \n\t"
805
            "pop                    %0              \n\t"
806
            WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
806
            WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
807
            "pop %%"REG_BP"                         \n\t"
807
            "pop %%"REG_rBP"                        \n\t"
808
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
808
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
809
            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
809
            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
810
               "a" (&c->redDither)
810
               "a" (&c->redDither)
Lines 814-824 static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2], Link Here
814
        __asm__ volatile(
814
        __asm__ volatile(
815
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
815
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
816
            "mov        %4, %%"REG_b"               \n\t"
816
            "mov        %4, %%"REG_b"               \n\t"
817
            "push %%"REG_BP"                        \n\t"
817
            "push %%"REG_rBP"                       \n\t"
818
            YSCALEYUV2RGB(%%REGBP, %5)
818
            YSCALEYUV2RGB(%%REGBP, %5)
819
            "pcmpeqd %%mm7, %%mm7                   \n\t"
819
            "pcmpeqd %%mm7, %%mm7                   \n\t"
820
            WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
820
            WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
821
            "pop %%"REG_BP"                         \n\t"
821
            "pop %%"REG_rBP"                        \n\t"
822
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
822
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
823
            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
823
            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
824
               "a" (&c->redDither)
824
               "a" (&c->redDither)
Lines 838-848 static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2], Link Here
838
    __asm__ volatile(
838
    __asm__ volatile(
839
        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
839
        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
840
        "mov        %4, %%"REG_b"               \n\t"
840
        "mov        %4, %%"REG_b"               \n\t"
841
        "push %%"REG_BP"                        \n\t"
841
        "push %%"REG_rBP"                       \n\t"
842
        YSCALEYUV2RGB(%%REGBP, %5)
842
        YSCALEYUV2RGB(%%REGBP, %5)
843
        "pxor    %%mm7, %%mm7                   \n\t"
843
        "pxor    %%mm7, %%mm7                   \n\t"
844
        WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
844
        WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
845
        "pop %%"REG_BP"                         \n\t"
845
        "pop %%"REG_rBP"                        \n\t"
846
        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
846
        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
847
        :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
847
        :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
848
           "a" (&c->redDither)
848
           "a" (&c->redDither)
Lines 861-867 static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2], Link Here
861
    __asm__ volatile(
861
    __asm__ volatile(
862
        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
862
        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
863
        "mov        %4, %%"REG_b"               \n\t"
863
        "mov        %4, %%"REG_b"               \n\t"
864
        "push %%"REG_BP"                        \n\t"
864
        "push %%"REG_rBP"                       \n\t"
865
        YSCALEYUV2RGB(%%REGBP, %5)
865
        YSCALEYUV2RGB(%%REGBP, %5)
866
        "pxor    %%mm7, %%mm7                   \n\t"
866
        "pxor    %%mm7, %%mm7                   \n\t"
867
        /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
867
        /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
Lines 871-877 static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2], Link Here
871
        "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
871
        "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
872
#endif
872
#endif
873
        WRITERGB15(%%REGb, 8280(%5), %%REGBP)
873
        WRITERGB15(%%REGb, 8280(%5), %%REGBP)
874
        "pop %%"REG_BP"                         \n\t"
874
        "pop %%"REG_rBP"                        \n\t"
875
        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
875
        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
876
        :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
876
        :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
877
           "a" (&c->redDither)
877
           "a" (&c->redDither)
Lines 890-896 static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2], Link Here
890
    __asm__ volatile(
890
    __asm__ volatile(
891
        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
891
        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
892
        "mov        %4, %%"REG_b"               \n\t"
892
        "mov        %4, %%"REG_b"               \n\t"
893
        "push %%"REG_BP"                        \n\t"
893
        "push %%"REG_rBP"                       \n\t"
894
        YSCALEYUV2RGB(%%REGBP, %5)
894
        YSCALEYUV2RGB(%%REGBP, %5)
895
        "pxor    %%mm7, %%mm7                   \n\t"
895
        "pxor    %%mm7, %%mm7                   \n\t"
896
        /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
896
        /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
Lines 900-906 static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2], Link Here
900
        "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
900
        "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
901
#endif
901
#endif
902
        WRITERGB16(%%REGb, 8280(%5), %%REGBP)
902
        WRITERGB16(%%REGb, 8280(%5), %%REGBP)
903
        "pop %%"REG_BP"                         \n\t"
903
        "pop %%"REG_rBP"                        \n\t"
904
        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
904
        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
905
        :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
905
        :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
906
           "a" (&c->redDither)
906
           "a" (&c->redDither)
Lines 959-968 static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2], Link Here
959
    __asm__ volatile(
959
    __asm__ volatile(
960
        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
960
        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
961
        "mov %4, %%"REG_b"                        \n\t"
961
        "mov %4, %%"REG_b"                        \n\t"
962
        "push %%"REG_BP"                        \n\t"
962
        "push %%"REG_rBP"                       \n\t"
963
        YSCALEYUV2PACKED(%%REGBP, %5)
963
        YSCALEYUV2PACKED(%%REGBP, %5)
964
        WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
964
        WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
965
        "pop %%"REG_BP"                         \n\t"
965
        "pop %%"REG_rBP"                        \n\t"
966
        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
966
        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
967
        :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
967
        :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
968
           "a" (&c->redDither)
968
           "a" (&c->redDither)
Lines 1101-1111 static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0, Link Here
1101
            __asm__ volatile(
1101
            __asm__ volatile(
1102
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1102
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1103
                "mov        %4, %%"REG_b"               \n\t"
1103
                "mov        %4, %%"REG_b"               \n\t"
1104
                "push %%"REG_BP"                        \n\t"
1104
                "push %%"REG_rBP"                       \n\t"
1105
                YSCALEYUV2RGB1(%%REGBP, %5)
1105
                YSCALEYUV2RGB1(%%REGBP, %5)
1106
                YSCALEYUV2RGB1_ALPHA(%%REGBP)
1106
                YSCALEYUV2RGB1_ALPHA(%%REGBP)
1107
                WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1107
                WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1108
                "pop %%"REG_BP"                         \n\t"
1108
                "pop %%"REG_rBP"                        \n\t"
1109
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1109
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1110
                :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1110
                :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1111
                   "a" (&c->redDither)
1111
                   "a" (&c->redDither)
Lines 1114-1124 static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0, Link Here
1114
            __asm__ volatile(
1114
            __asm__ volatile(
1115
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1115
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1116
                "mov        %4, %%"REG_b"               \n\t"
1116
                "mov        %4, %%"REG_b"               \n\t"
1117
                "push %%"REG_BP"                        \n\t"
1117
                "push %%"REG_rBP"                       \n\t"
1118
                YSCALEYUV2RGB1(%%REGBP, %5)
1118
                YSCALEYUV2RGB1(%%REGBP, %5)
1119
                "pcmpeqd %%mm7, %%mm7                   \n\t"
1119
                "pcmpeqd %%mm7, %%mm7                   \n\t"
1120
                WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1120
                WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1121
                "pop %%"REG_BP"                         \n\t"
1121
                "pop %%"REG_rBP"                        \n\t"
1122
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1122
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1123
                :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1123
                :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1124
                   "a" (&c->redDither)
1124
                   "a" (&c->redDither)
Lines 1130-1140 static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0, Link Here
1130
            __asm__ volatile(
1130
            __asm__ volatile(
1131
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1131
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1132
                "mov        %4, %%"REG_b"               \n\t"
1132
                "mov        %4, %%"REG_b"               \n\t"
1133
                "push %%"REG_BP"                        \n\t"
1133
                "push %%"REG_rBP"                       \n\t"
1134
                YSCALEYUV2RGB1b(%%REGBP, %5)
1134
                YSCALEYUV2RGB1b(%%REGBP, %5)
1135
                YSCALEYUV2RGB1_ALPHA(%%REGBP)
1135
                YSCALEYUV2RGB1_ALPHA(%%REGBP)
1136
                WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1136
                WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1137
                "pop %%"REG_BP"                         \n\t"
1137
                "pop %%"REG_rBP"                        \n\t"
1138
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1138
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1139
                :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1139
                :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1140
                   "a" (&c->redDither)
1140
                   "a" (&c->redDither)
Lines 1143-1153 static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0, Link Here
1143
            __asm__ volatile(
1143
            __asm__ volatile(
1144
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1144
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1145
                "mov        %4, %%"REG_b"               \n\t"
1145
                "mov        %4, %%"REG_b"               \n\t"
1146
                "push %%"REG_BP"                        \n\t"
1146
                "push %%"REG_rBP"                       \n\t"
1147
                YSCALEYUV2RGB1b(%%REGBP, %5)
1147
                YSCALEYUV2RGB1b(%%REGBP, %5)
1148
                "pcmpeqd %%mm7, %%mm7                   \n\t"
1148
                "pcmpeqd %%mm7, %%mm7                   \n\t"
1149
                WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1149
                WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1150
                "pop %%"REG_BP"                         \n\t"
1150
                "pop %%"REG_rBP"                        \n\t"
1151
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1151
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1152
                :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1152
                :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1153
                   "a" (&c->redDither)
1153
                   "a" (&c->redDither)
Lines 1169-1179 static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0, Link Here
1169
        __asm__ volatile(
1169
        __asm__ volatile(
1170
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1170
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1171
            "mov        %4, %%"REG_b"               \n\t"
1171
            "mov        %4, %%"REG_b"               \n\t"
1172
            "push %%"REG_BP"                        \n\t"
1172
            "push %%"REG_rBP"                       \n\t"
1173
            YSCALEYUV2RGB1(%%REGBP, %5)
1173
            YSCALEYUV2RGB1(%%REGBP, %5)
1174
            "pxor    %%mm7, %%mm7                   \n\t"
1174
            "pxor    %%mm7, %%mm7                   \n\t"
1175
            WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
1175
            WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
1176
            "pop %%"REG_BP"                         \n\t"
1176
            "pop %%"REG_rBP"                        \n\t"
1177
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1177
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1178
            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1178
            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1179
               "a" (&c->redDither)
1179
               "a" (&c->redDither)
Lines 1183-1193 static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0, Link Here
1183
        __asm__ volatile(
1183
        __asm__ volatile(
1184
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1184
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1185
            "mov        %4, %%"REG_b"               \n\t"
1185
            "mov        %4, %%"REG_b"               \n\t"
1186
            "push %%"REG_BP"                        \n\t"
1186
            "push %%"REG_rBP"                       \n\t"
1187
            YSCALEYUV2RGB1b(%%REGBP, %5)
1187
            YSCALEYUV2RGB1b(%%REGBP, %5)
1188
            "pxor    %%mm7, %%mm7                   \n\t"
1188
            "pxor    %%mm7, %%mm7                   \n\t"
1189
            WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
1189
            WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
1190
            "pop %%"REG_BP"                         \n\t"
1190
            "pop %%"REG_rBP"                        \n\t"
1191
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1191
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1192
            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1192
            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1193
               "a" (&c->redDither)
1193
               "a" (&c->redDither)
Lines 1208-1214 static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0, Link Here
1208
        __asm__ volatile(
1208
        __asm__ volatile(
1209
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1209
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1210
            "mov        %4, %%"REG_b"               \n\t"
1210
            "mov        %4, %%"REG_b"               \n\t"
1211
            "push %%"REG_BP"                        \n\t"
1211
            "push %%"REG_rBP"                       \n\t"
1212
            YSCALEYUV2RGB1(%%REGBP, %5)
1212
            YSCALEYUV2RGB1(%%REGBP, %5)
1213
            "pxor    %%mm7, %%mm7                   \n\t"
1213
            "pxor    %%mm7, %%mm7                   \n\t"
1214
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1214
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
Lines 1218-1224 static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0, Link Here
1218
            "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
1218
            "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
1219
#endif
1219
#endif
1220
            WRITERGB15(%%REGb, 8280(%5), %%REGBP)
1220
            WRITERGB15(%%REGb, 8280(%5), %%REGBP)
1221
            "pop %%"REG_BP"                         \n\t"
1221
            "pop %%"REG_rBP"                        \n\t"
1222
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1222
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1223
            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1223
            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1224
               "a" (&c->redDither)
1224
               "a" (&c->redDither)
Lines 1228-1234 static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0, Link Here
1228
        __asm__ volatile(
1228
        __asm__ volatile(
1229
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1229
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1230
            "mov        %4, %%"REG_b"               \n\t"
1230
            "mov        %4, %%"REG_b"               \n\t"
1231
            "push %%"REG_BP"                        \n\t"
1231
            "push %%"REG_rBP"                       \n\t"
1232
            YSCALEYUV2RGB1b(%%REGBP, %5)
1232
            YSCALEYUV2RGB1b(%%REGBP, %5)
1233
            "pxor    %%mm7, %%mm7                   \n\t"
1233
            "pxor    %%mm7, %%mm7                   \n\t"
1234
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1234
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
Lines 1238-1244 static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0, Link Here
1238
            "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
1238
            "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
1239
#endif
1239
#endif
1240
            WRITERGB15(%%REGb, 8280(%5), %%REGBP)
1240
            WRITERGB15(%%REGb, 8280(%5), %%REGBP)
1241
            "pop %%"REG_BP"                         \n\t"
1241
            "pop %%"REG_rBP"                        \n\t"
1242
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1242
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1243
            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1243
            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1244
               "a" (&c->redDither)
1244
               "a" (&c->redDither)
Lines 1259-1265 static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0, Link Here
1259
        __asm__ volatile(
1259
        __asm__ volatile(
1260
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1260
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1261
            "mov        %4, %%"REG_b"               \n\t"
1261
            "mov        %4, %%"REG_b"               \n\t"
1262
            "push %%"REG_BP"                        \n\t"
1262
            "push %%"REG_rBP"                       \n\t"
1263
            YSCALEYUV2RGB1(%%REGBP, %5)
1263
            YSCALEYUV2RGB1(%%REGBP, %5)
1264
            "pxor    %%mm7, %%mm7                   \n\t"
1264
            "pxor    %%mm7, %%mm7                   \n\t"
1265
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1265
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
Lines 1269-1275 static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0, Link Here
1269
            "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
1269
            "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
1270
#endif
1270
#endif
1271
            WRITERGB16(%%REGb, 8280(%5), %%REGBP)
1271
            WRITERGB16(%%REGb, 8280(%5), %%REGBP)
1272
            "pop %%"REG_BP"                         \n\t"
1272
            "pop %%"REG_rBP"                        \n\t"
1273
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1273
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1274
            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1274
            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1275
               "a" (&c->redDither)
1275
               "a" (&c->redDither)
Lines 1279-1285 static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0, Link Here
1279
        __asm__ volatile(
1279
        __asm__ volatile(
1280
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1280
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1281
            "mov        %4, %%"REG_b"               \n\t"
1281
            "mov        %4, %%"REG_b"               \n\t"
1282
            "push %%"REG_BP"                        \n\t"
1282
            "push %%"REG_rBP"                       \n\t"
1283
            YSCALEYUV2RGB1b(%%REGBP, %5)
1283
            YSCALEYUV2RGB1b(%%REGBP, %5)
1284
            "pxor    %%mm7, %%mm7                   \n\t"
1284
            "pxor    %%mm7, %%mm7                   \n\t"
1285
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1285
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
Lines 1289-1295 static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0, Link Here
1289
            "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
1289
            "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
1290
#endif
1290
#endif
1291
            WRITERGB16(%%REGb, 8280(%5), %%REGBP)
1291
            WRITERGB16(%%REGb, 8280(%5), %%REGBP)
1292
            "pop %%"REG_BP"                         \n\t"
1292
            "pop %%"REG_rBP"                        \n\t"
1293
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1293
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1294
            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1294
            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1295
               "a" (&c->redDither)
1295
               "a" (&c->redDither)
Lines 1347-1356 static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0, Link Here
1347
        __asm__ volatile(
1347
        __asm__ volatile(
1348
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1348
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1349
            "mov        %4, %%"REG_b"               \n\t"
1349
            "mov        %4, %%"REG_b"               \n\t"
1350
            "push %%"REG_BP"                        \n\t"
1350
            "push %%"REG_rBP"                       \n\t"
1351
            YSCALEYUV2PACKED1(%%REGBP, %5)
1351
            YSCALEYUV2PACKED1(%%REGBP, %5)
1352
            WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
1352
            WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
1353
            "pop %%"REG_BP"                         \n\t"
1353
            "pop %%"REG_rBP"                        \n\t"
1354
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1354
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1355
            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1355
            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1356
               "a" (&c->redDither)
1356
               "a" (&c->redDither)
Lines 1360-1369 static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0, Link Here
1360
        __asm__ volatile(
1360
        __asm__ volatile(
1361
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1361
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1362
            "mov        %4, %%"REG_b"               \n\t"
1362
            "mov        %4, %%"REG_b"               \n\t"
1363
            "push %%"REG_BP"                        \n\t"
1363
            "push %%"REG_rBP"                       \n\t"
1364
            YSCALEYUV2PACKED1b(%%REGBP, %5)
1364
            YSCALEYUV2PACKED1b(%%REGBP, %5)
1365
            WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
1365
            WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
1366
            "pop %%"REG_BP"                         \n\t"
1366
            "pop %%"REG_rBP"                        \n\t"
1367
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1367
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1368
            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1368
            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1369
               "a" (&c->redDither)
1369
               "a" (&c->redDither)
Lines 1378-1384 static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, Link Here
1378
{
1378
{
1379
    int32_t *filterPos = c->hLumFilterPos;
1379
    int32_t *filterPos = c->hLumFilterPos;
1380
    int16_t *filter    = c->hLumFilter;
1380
    int16_t *filter    = c->hLumFilter;
1381
    void    *mmxextFilterCode = c->lumMmxextFilterCode;
1381
    x86_native_reg mmxextFilterCode = (uintptr_t)c->lumMmxextFilterCode;
1382
    int i;
1382
    int i;
1383
#if defined(PIC)
1383
#if defined(PIC)
1384
    uint64_t ebxsave;
1384
    uint64_t ebxsave;
1385
- 

Return to bug 452482