Gentoo Websites Logo
Go to: Gentoo Home Documentation Forums Lists Bugs Planet Store Wiki Get Gentoo!
View | Details | Raw Unified | Return to bug 142380 | Differences between
and this patch

Collapse All | Expand All

(-)ffmpeg-0.4.9-p20060530-static/libavcodec/i386/snowdsp_mmx.c (-27 / +33 lines)
Lines 463-471 Link Here
463
        "1:                                          \n\t"
463
        "1:                                          \n\t"
464
464
465
        "mov %6, %%"REG_a"                           \n\t"
465
        "mov %6, %%"REG_a"                           \n\t"
466
        "mov %4, %%"REG_b"                           \n\t"
466
        "mov %4, %%"REG_S"                           \n\t"
467
467
468
        snow_vertical_compose_sse2_load(REG_b,"xmm0","xmm2","xmm4","xmm6")
468
        snow_vertical_compose_sse2_load(REG_S,"xmm0","xmm2","xmm4","xmm6")
469
        snow_vertical_compose_sse2_add(REG_a,"xmm0","xmm2","xmm4","xmm6")
469
        snow_vertical_compose_sse2_add(REG_a,"xmm0","xmm2","xmm4","xmm6")
470
        snow_vertical_compose_sse2_move("xmm0","xmm2","xmm4","xmm6","xmm1","xmm3","xmm5","xmm7")
470
        snow_vertical_compose_sse2_move("xmm0","xmm2","xmm4","xmm6","xmm1","xmm3","xmm5","xmm7")
471
        snow_vertical_compose_sse2_sll("1","xmm0","xmm2","xmm4","xmm6")\
471
        snow_vertical_compose_sse2_sll("1","xmm0","xmm2","xmm4","xmm6")\
Lines 482-491 Link Here
482
        snow_vertical_compose_sse2_sub("xmm0","xmm2","xmm4","xmm6","xmm1","xmm3","xmm5","xmm7")
482
        snow_vertical_compose_sse2_sub("xmm0","xmm2","xmm4","xmm6","xmm1","xmm3","xmm5","xmm7")
483
        snow_vertical_compose_sse2_store(REG_a,"xmm1","xmm3","xmm5","xmm7")
483
        snow_vertical_compose_sse2_store(REG_a,"xmm1","xmm3","xmm5","xmm7")
484
        "mov %3, %%"REG_c"                           \n\t"
484
        "mov %3, %%"REG_c"                           \n\t"
485
        snow_vertical_compose_sse2_load(REG_b,"xmm0","xmm2","xmm4","xmm6")
485
        snow_vertical_compose_sse2_load(REG_S,"xmm0","xmm2","xmm4","xmm6")
486
        snow_vertical_compose_sse2_add(REG_c,"xmm1","xmm3","xmm5","xmm7")
486
        snow_vertical_compose_sse2_add(REG_c,"xmm1","xmm3","xmm5","xmm7")
487
        snow_vertical_compose_sse2_sub("xmm1","xmm3","xmm5","xmm7","xmm0","xmm2","xmm4","xmm6")
487
        snow_vertical_compose_sse2_sub("xmm1","xmm3","xmm5","xmm7","xmm0","xmm2","xmm4","xmm6")
488
        snow_vertical_compose_sse2_store(REG_b,"xmm0","xmm2","xmm4","xmm6")
488
        snow_vertical_compose_sse2_store(REG_S,"xmm0","xmm2","xmm4","xmm6")
489
        "mov %2, %%"REG_a"                           \n\t"
489
        "mov %2, %%"REG_a"                           \n\t"
490
        snow_vertical_compose_sse2_load(REG_c,"xmm1","xmm3","xmm5","xmm7")
490
        snow_vertical_compose_sse2_load(REG_c,"xmm1","xmm3","xmm5","xmm7")
491
        snow_vertical_compose_sse2_add(REG_a,"xmm0","xmm2","xmm4","xmm6")
491
        snow_vertical_compose_sse2_add(REG_a,"xmm0","xmm2","xmm4","xmm6")
Lines 495-507 Link Here
495
        "pcmpeqd %%xmm1, %%xmm1                      \n\t"
495
        "pcmpeqd %%xmm1, %%xmm1                      \n\t"
496
        "pslld $31, %%xmm1                           \n\t"
496
        "pslld $31, %%xmm1                           \n\t"
497
        "psrld $28, %%xmm1                           \n\t"
497
        "psrld $28, %%xmm1                           \n\t"
498
        "mov %1, %%"REG_b"                           \n\t"
498
        "mov %1, %%"REG_S"                           \n\t"
499
499
500
        snow_vertical_compose_sse2_r2r_add("xmm1","xmm1","xmm1","xmm1","xmm0","xmm2","xmm4","xmm6")
500
        snow_vertical_compose_sse2_r2r_add("xmm1","xmm1","xmm1","xmm1","xmm0","xmm2","xmm4","xmm6")
501
        snow_vertical_compose_sse2_sra("4","xmm0","xmm2","xmm4","xmm6")
501
        snow_vertical_compose_sse2_sra("4","xmm0","xmm2","xmm4","xmm6")
502
        snow_vertical_compose_sse2_add(REG_c,"xmm0","xmm2","xmm4","xmm6")
502
        snow_vertical_compose_sse2_add(REG_c,"xmm0","xmm2","xmm4","xmm6")
503
        snow_vertical_compose_sse2_store(REG_c,"xmm0","xmm2","xmm4","xmm6")
503
        snow_vertical_compose_sse2_store(REG_c,"xmm0","xmm2","xmm4","xmm6")
504
        snow_vertical_compose_sse2_add(REG_b,"xmm0","xmm2","xmm4","xmm6")
504
        snow_vertical_compose_sse2_add(REG_S,"xmm0","xmm2","xmm4","xmm6")
505
        snow_vertical_compose_sse2_move("xmm0","xmm2","xmm4","xmm6","xmm1","xmm3","xmm5","xmm7")
505
        snow_vertical_compose_sse2_move("xmm0","xmm2","xmm4","xmm6","xmm1","xmm3","xmm5","xmm7")
506
        snow_vertical_compose_sse2_sll("1","xmm0","xmm2","xmm4","xmm6")\
506
        snow_vertical_compose_sse2_sll("1","xmm0","xmm2","xmm4","xmm6")\
507
        snow_vertical_compose_sse2_r2r_add("xmm1","xmm3","xmm5","xmm7","xmm0","xmm2","xmm4","xmm6")
507
        snow_vertical_compose_sse2_r2r_add("xmm1","xmm3","xmm5","xmm7","xmm0","xmm2","xmm4","xmm6")
Lines 515-521 Link Here
515
        :"+d"(i)
515
        :"+d"(i)
516
        :
516
        :
517
        "m"(b0),"m"(b1),"m"(b2),"m"(b3),"m"(b4),"m"(b5):
517
        "m"(b0),"m"(b1),"m"(b2),"m"(b3),"m"(b4),"m"(b5):
518
        "%"REG_a"","%"REG_b"","%"REG_c"");
518
        "%"REG_a"","%"REG_S"","%"REG_c"");
519
}
519
}
520
520
521
#define snow_vertical_compose_mmx_load_add(op,r,t0,t1,t2,t3)\
521
#define snow_vertical_compose_mmx_load_add(op,r,t0,t1,t2,t3)\
Lines 570-578 Link Here
570
        "1:                                          \n\t"
570
        "1:                                          \n\t"
571
571
572
        "mov %6, %%"REG_a"                           \n\t"
572
        "mov %6, %%"REG_a"                           \n\t"
573
        "mov %4, %%"REG_b"                           \n\t"
573
        "mov %4, %%"REG_S"                           \n\t"
574
574
575
        snow_vertical_compose_mmx_load(REG_b,"mm0","mm2","mm4","mm6")
575
        snow_vertical_compose_mmx_load(REG_S,"mm0","mm2","mm4","mm6")
576
        snow_vertical_compose_mmx_add(REG_a,"mm0","mm2","mm4","mm6")
576
        snow_vertical_compose_mmx_add(REG_a,"mm0","mm2","mm4","mm6")
577
        snow_vertical_compose_mmx_move("mm0","mm2","mm4","mm6","mm1","mm3","mm5","mm7")
577
        snow_vertical_compose_mmx_move("mm0","mm2","mm4","mm6","mm1","mm3","mm5","mm7")
578
        snow_vertical_compose_mmx_sll("1","mm0","mm2","mm4","mm6")
578
        snow_vertical_compose_mmx_sll("1","mm0","mm2","mm4","mm6")
Lines 589-598 Link Here
589
        snow_vertical_compose_mmx_sub("mm0","mm2","mm4","mm6","mm1","mm3","mm5","mm7")
589
        snow_vertical_compose_mmx_sub("mm0","mm2","mm4","mm6","mm1","mm3","mm5","mm7")
590
        snow_vertical_compose_mmx_store(REG_a,"mm1","mm3","mm5","mm7")
590
        snow_vertical_compose_mmx_store(REG_a,"mm1","mm3","mm5","mm7")
591
        "mov %3, %%"REG_c"                           \n\t"
591
        "mov %3, %%"REG_c"                           \n\t"
592
        snow_vertical_compose_mmx_load(REG_b,"mm0","mm2","mm4","mm6")
592
        snow_vertical_compose_mmx_load(REG_S,"mm0","mm2","mm4","mm6")
593
        snow_vertical_compose_mmx_add(REG_c,"mm1","mm3","mm5","mm7")
593
        snow_vertical_compose_mmx_add(REG_c,"mm1","mm3","mm5","mm7")
594
        snow_vertical_compose_mmx_sub("mm1","mm3","mm5","mm7","mm0","mm2","mm4","mm6")
594
        snow_vertical_compose_mmx_sub("mm1","mm3","mm5","mm7","mm0","mm2","mm4","mm6")
595
        snow_vertical_compose_mmx_store(REG_b,"mm0","mm2","mm4","mm6")
595
        snow_vertical_compose_mmx_store(REG_S,"mm0","mm2","mm4","mm6")
596
        "mov %2, %%"REG_a"                           \n\t"
596
        "mov %2, %%"REG_a"                           \n\t"
597
        snow_vertical_compose_mmx_load(REG_c,"mm1","mm3","mm5","mm7")
597
        snow_vertical_compose_mmx_load(REG_c,"mm1","mm3","mm5","mm7")
598
        snow_vertical_compose_mmx_add(REG_a,"mm0","mm2","mm4","mm6")
598
        snow_vertical_compose_mmx_add(REG_a,"mm0","mm2","mm4","mm6")
Lines 602-614 Link Here
602
        "pcmpeqd %%mm1, %%mm1                        \n\t"
602
        "pcmpeqd %%mm1, %%mm1                        \n\t"
603
        "pslld $31, %%mm1                            \n\t"
603
        "pslld $31, %%mm1                            \n\t"
604
        "psrld $28, %%mm1                            \n\t"
604
        "psrld $28, %%mm1                            \n\t"
605
        "mov %1, %%"REG_b"                           \n\t"
605
        "mov %1, %%"REG_S"                           \n\t"
606
606
607
        snow_vertical_compose_mmx_r2r_add("mm1","mm1","mm1","mm1","mm0","mm2","mm4","mm6")
607
        snow_vertical_compose_mmx_r2r_add("mm1","mm1","mm1","mm1","mm0","mm2","mm4","mm6")
608
        snow_vertical_compose_mmx_sra("4","mm0","mm2","mm4","mm6")
608
        snow_vertical_compose_mmx_sra("4","mm0","mm2","mm4","mm6")
609
        snow_vertical_compose_mmx_add(REG_c,"mm0","mm2","mm4","mm6")
609
        snow_vertical_compose_mmx_add(REG_c,"mm0","mm2","mm4","mm6")
610
        snow_vertical_compose_mmx_store(REG_c,"mm0","mm2","mm4","mm6")
610
        snow_vertical_compose_mmx_store(REG_c,"mm0","mm2","mm4","mm6")
611
        snow_vertical_compose_mmx_add(REG_b,"mm0","mm2","mm4","mm6")
611
        snow_vertical_compose_mmx_add(REG_S,"mm0","mm2","mm4","mm6")
612
        snow_vertical_compose_mmx_move("mm0","mm2","mm4","mm6","mm1","mm3","mm5","mm7")
612
        snow_vertical_compose_mmx_move("mm0","mm2","mm4","mm6","mm1","mm3","mm5","mm7")
613
        snow_vertical_compose_mmx_sll("1","mm0","mm2","mm4","mm6")
613
        snow_vertical_compose_mmx_sll("1","mm0","mm2","mm4","mm6")
614
        snow_vertical_compose_mmx_r2r_add("mm1","mm3","mm5","mm7","mm0","mm2","mm4","mm6")
614
        snow_vertical_compose_mmx_r2r_add("mm1","mm3","mm5","mm7","mm0","mm2","mm4","mm6")
Lines 622-636 Link Here
622
        :"+d"(i)
622
        :"+d"(i)
623
        :
623
        :
624
        "m"(b0),"m"(b1),"m"(b2),"m"(b3),"m"(b4),"m"(b5):
624
        "m"(b0),"m"(b1),"m"(b2),"m"(b3),"m"(b4),"m"(b5):
625
        "%"REG_a"","%"REG_b"","%"REG_c"");
625
        "%"REG_a"","%"REG_S"","%"REG_c"");
626
}
626
}
627
627
628
#define snow_inner_add_yblock_sse2_header \
628
#define snow_inner_add_yblock_sse2_header \
629
    DWTELEM * * dst_array = sb->line + src_y;\
629
    DWTELEM * * dst_array = sb->line + src_y;\
630
    long pic_reg_b;\
630
    asm volatile(\
631
    asm volatile(\
631
             "mov  %6, %%"REG_c"             \n\t"\
632
             "mov  %%"REG_b", %2             \n\t"\
632
             "mov  %5, %%"REG_b"             \n\t"\
633
             "mov  %7, %%"REG_c"             \n\t"\
633
             "mov  %3, %%"REG_S"             \n\t"\
634
             "mov  %6, %%"REG_b"             \n\t"\
635
             "mov  %4, %%"REG_S"             \n\t"\
634
             "pxor %%xmm7, %%xmm7            \n\t" /* 0 */\
636
             "pxor %%xmm7, %%xmm7            \n\t" /* 0 */\
635
             "pcmpeqd %%xmm3, %%xmm3         \n\t"\
637
             "pcmpeqd %%xmm3, %%xmm3         \n\t"\
636
             "pslld $31, %%xmm3              \n\t"\
638
             "pslld $31, %%xmm3              \n\t"\
Lines 638-644 Link Here
638
             "1:                             \n\t"\
640
             "1:                             \n\t"\
639
             "mov %1, %%"REG_D"              \n\t"\
641
             "mov %1, %%"REG_D"              \n\t"\
640
             "mov (%%"REG_D"), %%"REG_D"     \n\t"\
642
             "mov (%%"REG_D"), %%"REG_D"     \n\t"\
641
             "add %2, %%"REG_D"              \n\t"
643
             "add %3, %%"REG_D"              \n\t"
642
644
643
#define snow_inner_add_yblock_sse2_start_8(out_reg1, out_reg2, ptr_offset, s_offset)\
645
#define snow_inner_add_yblock_sse2_start_8(out_reg1, out_reg2, ptr_offset, s_offset)\
644
             "mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\
646
             "mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\
Lines 686-695 Link Here
686
688
687
#define snow_inner_add_yblock_sse2_end_common2\
689
#define snow_inner_add_yblock_sse2_end_common2\
688
             "jnz 1b                         \n\t"\
690
             "jnz 1b                         \n\t"\
689
             :"+m"(dst8),"+m"(dst_array)\
691
             "mov  %2, %%"REG_b"             \n\t"\
692
             :"+m"(dst8),"+m"(dst_array),"+m"(pic_reg_b)\
690
             :\
693
             :\
691
             "rm"((long)(src_x<<2)),"m"(obmc),"a"(block),"m"((long)b_h),"m"((long)src_stride):\
694
             "rm"((long)(src_x<<2)),"m"(obmc),"a"(block),"m"((long)b_h),"m"((long)src_stride):\
692
             "%"REG_b"","%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d"");
695
             "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d"");
693
696
694
#define snow_inner_add_yblock_sse2_end_8\
697
#define snow_inner_add_yblock_sse2_end_8\
695
             "sal $1, %%"REG_c"              \n\t"\
698
             "sal $1, %%"REG_c"              \n\t"\
Lines 727-733 Link Here
727
730
728
             "mov %1, %%"REG_D"              \n\t"
731
             "mov %1, %%"REG_D"              \n\t"
729
             "mov "PTR_SIZE"(%%"REG_D"), %%"REG_D";\n\t"
732
             "mov "PTR_SIZE"(%%"REG_D"), %%"REG_D";\n\t"
730
             "add %2, %%"REG_D"              \n\t"
733
             "add %3, %%"REG_D"              \n\t"
731
734
732
             "movdqa (%%"REG_D"), %%xmm4     \n\t"
735
             "movdqa (%%"REG_D"), %%xmm4     \n\t"
733
             "movdqa %%xmm5, %%xmm6          \n\t"
736
             "movdqa %%xmm5, %%xmm6          \n\t"
Lines 792-801 Link Here
792
795
793
#define snow_inner_add_yblock_mmx_header \
796
#define snow_inner_add_yblock_mmx_header \
794
    DWTELEM * * dst_array = sb->line + src_y;\
797
    DWTELEM * * dst_array = sb->line + src_y;\
798
    long pic_reg_b;\
795
    asm volatile(\
799
    asm volatile(\
796
             "mov  %6, %%"REG_c"             \n\t"\
800
             "mov  %%"REG_b", %2             \n\t"\
797
             "mov  %5, %%"REG_b"             \n\t"\
801
             "mov  %7, %%"REG_c"             \n\t"\
798
             "mov  %3, %%"REG_S"             \n\t"\
802
             "mov  %6, %%"REG_b"             \n\t"\
803
             "mov  %4, %%"REG_S"             \n\t"\
799
             "pxor %%mm7, %%mm7              \n\t" /* 0 */\
804
             "pxor %%mm7, %%mm7              \n\t" /* 0 */\
800
             "pcmpeqd %%mm3, %%mm3           \n\t"\
805
             "pcmpeqd %%mm3, %%mm3           \n\t"\
801
             "pslld $31, %%mm3               \n\t"\
806
             "pslld $31, %%mm3               \n\t"\
Lines 803-809 Link Here
803
             "1:                             \n\t"\
808
             "1:                             \n\t"\
804
             "mov %1, %%"REG_D"              \n\t"\
809
             "mov %1, %%"REG_D"              \n\t"\
805
             "mov (%%"REG_D"), %%"REG_D"     \n\t"\
810
             "mov (%%"REG_D"), %%"REG_D"     \n\t"\
806
             "add %2, %%"REG_D"              \n\t"
811
             "add %3, %%"REG_D"              \n\t"
807
812
808
#define snow_inner_add_yblock_mmx_start(out_reg1, out_reg2, ptr_offset, s_offset, d_offset)\
813
#define snow_inner_add_yblock_mmx_start(out_reg1, out_reg2, ptr_offset, s_offset, d_offset)\
809
             "mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\
814
             "mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\
Lines 859-868 Link Here
859
             "add %%"REG_c", %0              \n\t"\
864
             "add %%"REG_c", %0              \n\t"\
860
             "dec %%"REG_b"                  \n\t"\
865
             "dec %%"REG_b"                  \n\t"\
861
             "jnz 1b                         \n\t"\
866
             "jnz 1b                         \n\t"\
862
             :"+m"(dst8),"+m"(dst_array)\
867
             "mov %2,%%"REG_b"               \n\t"\
868
             :"+m"(dst8),"+m"(dst_array),"+m"(pic_reg_b)\
863
             :\
869
             :\
864
             "rm"((long)(src_x<<2)),"m"(obmc),"a"(block),"m"((long)b_h),"m"((long)src_stride):\
870
             "rm"((long)(src_x<<2)),"m"(obmc),"a"(block),"m"((long)b_h),"m"((long)src_stride):\
865
             "%"REG_b"","%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d"");
871
             "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d"");
866
872
867
static void inner_add_yblock_bw_8_obmc_16_mmx(uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h,
873
static void inner_add_yblock_bw_8_obmc_16_mmx(uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h,
868
                      int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){
874
                      int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){

Return to bug 142380