Gentoo Websites Logo
Go to: Gentoo Home Documentation Forums Lists Bugs Planet Store Wiki Get Gentoo!
View | Details | Raw Unified | Return to bug 290741 | Differences between
and this patch

Collapse All | Expand All

(-)libavcodec/x86/h264dsp_mmx.c.ori (-12 / +10 lines)
Lines 1138-1144 Link Here
1138
    int h=8;\
1138
    int h=8;\
1139
    __asm__ volatile(\
1139
    __asm__ volatile(\
1140
        "pxor %%mm7, %%mm7          \n\t"\
1140
        "pxor %%mm7, %%mm7          \n\t"\
1141
        "movq %5, %%mm6             \n\t"\
1141
        "movq "MANGLE(ff_pw_5) ", %%mm6\n\t"\
1142
        "1:                         \n\t"\
1142
        "1:                         \n\t"\
1143
        "movq    (%0), %%mm0        \n\t"\
1143
        "movq    (%0), %%mm0        \n\t"\
1144
        "movq   1(%0), %%mm2        \n\t"\
1144
        "movq   1(%0), %%mm2        \n\t"\
Lines 1172-1178 Link Here
1172
        "punpcklbw %%mm7, %%mm5     \n\t"\
1172
        "punpcklbw %%mm7, %%mm5     \n\t"\
1173
        "paddw %%mm3, %%mm2         \n\t"\
1173
        "paddw %%mm3, %%mm2         \n\t"\
1174
        "paddw %%mm5, %%mm4         \n\t"\
1174
        "paddw %%mm5, %%mm4         \n\t"\
1175
        "movq %6, %%mm5             \n\t"\
1175
        "movq "MANGLE(ff_pw_16) ", %%mm5\n\t"\
1176
        "paddw %%mm5, %%mm2         \n\t"\
1176
        "paddw %%mm5, %%mm2         \n\t"\
1177
        "paddw %%mm5, %%mm4         \n\t"\
1177
        "paddw %%mm5, %%mm4         \n\t"\
1178
        "paddw %%mm2, %%mm0         \n\t"\
1178
        "paddw %%mm2, %%mm0         \n\t"\
Lines 1186-1192 Link Here
1186
        "decl %2                    \n\t"\
1186
        "decl %2                    \n\t"\
1187
        " jnz 1b                    \n\t"\
1187
        " jnz 1b                    \n\t"\
1188
        : "+a"(src), "+c"(dst), "+g"(h)\
1188
        : "+a"(src), "+c"(dst), "+g"(h)\
1189
        : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
1189
        : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride)\
1190
        : "memory"\
1190
        : "memory"\
1191
    );\
1191
    );\
1192
}\
1192
}\
Lines 1593-1600 Link Here
1593
    int h=8;\
1593
    int h=8;\
1594
    __asm__ volatile(\
1594
    __asm__ volatile(\
1595
        "pxor %%xmm7, %%xmm7        \n\t"\
1595
        "pxor %%xmm7, %%xmm7        \n\t"\
1596
        "movdqa %0, %%xmm6          \n\t"\
1596
        "movdqa "MANGLE(ff_pw_5) ", %%xmm6          \n\t"\
1597
        :: "m"(ff_pw_5)\
1597
        ::\
1598
    );\
1598
    );\
1599
    do{\
1599
    do{\
1600
    __asm__ volatile(\
1600
    __asm__ volatile(\
Lines 1617-1623 Link Here
1617
        "psllw   $2,     %%xmm2     \n\t"\
1617
        "psllw   $2,     %%xmm2     \n\t"\
1618
        "movq    (%2),   %%xmm3     \n\t"\
1618
        "movq    (%2),   %%xmm3     \n\t"\
1619
        "psubw   %%xmm1, %%xmm2     \n\t"\
1619
        "psubw   %%xmm1, %%xmm2     \n\t"\
1620
        "paddw   %5,     %%xmm0     \n\t"\
1620
        "paddw   "MANGLE(ff_pw_16)",     %%xmm0     \n\t"\
1621
        "pmullw  %%xmm6, %%xmm2     \n\t"\
1621
        "pmullw  %%xmm6, %%xmm2     \n\t"\
1622
        "paddw   %%xmm0, %%xmm2     \n\t"\
1622
        "paddw   %%xmm0, %%xmm2     \n\t"\
1623
        "psraw   $5,     %%xmm2     \n\t"\
1623
        "psraw   $5,     %%xmm2     \n\t"\
Lines 1628-1635 Link Here
1628
        "add %4, %1                 \n\t"\
1628
        "add %4, %1                 \n\t"\
1629
        "add %3, %2                 \n\t"\
1629
        "add %3, %2                 \n\t"\
1630
        : "+a"(src), "+c"(dst), "+d"(src2)\
1630
        : "+a"(src), "+c"(dst), "+d"(src2)\
1631
        : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride),\
1631
        : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride)\
1632
          "m"(ff_pw_16)\
1633
        : "memory"\
1632
        : "memory"\
1634
    );\
1633
    );\
1635
    }while(--h);\
1634
    }while(--h);\
Lines 1640-1646 Link Here
1640
    int h=8;\
1639
    int h=8;\
1641
    __asm__ volatile(\
1640
    __asm__ volatile(\
1642
        "pxor %%xmm7, %%xmm7        \n\t"\
1641
        "pxor %%xmm7, %%xmm7        \n\t"\
1643
        "movdqa %5, %%xmm6          \n\t"\
1642
        "movdqa "MANGLE(ff_pw_5)", %%xmm6\n\t"\
1644
        "1:                         \n\t"\
1643
        "1:                         \n\t"\
1645
        "lddqu   -2(%0), %%xmm1     \n\t"\
1644
        "lddqu   -2(%0), %%xmm1     \n\t"\
1646
        "movdqa  %%xmm1, %%xmm0     \n\t"\
1645
        "movdqa  %%xmm1, %%xmm0     \n\t"\
Lines 1660-1666 Link Here
1660
        "paddw   %%xmm4, %%xmm1     \n\t"\
1659
        "paddw   %%xmm4, %%xmm1     \n\t"\
1661
        "psllw   $2,     %%xmm2     \n\t"\
1660
        "psllw   $2,     %%xmm2     \n\t"\
1662
        "psubw   %%xmm1, %%xmm2     \n\t"\
1661
        "psubw   %%xmm1, %%xmm2     \n\t"\
1663
        "paddw   %6,     %%xmm0     \n\t"\
1662
        "paddw   "MANGLE(ff_pw_16)",     %%xmm0     \n\t"\
1664
        "pmullw  %%xmm6, %%xmm2     \n\t"\
1663
        "pmullw  %%xmm6, %%xmm2     \n\t"\
1665
        "paddw   %%xmm0, %%xmm2     \n\t"\
1664
        "paddw   %%xmm0, %%xmm2     \n\t"\
1666
        "psraw   $5,     %%xmm2     \n\t"\
1665
        "psraw   $5,     %%xmm2     \n\t"\
Lines 1671-1678 Link Here
1671
        "decl %2                    \n\t"\
1670
        "decl %2                    \n\t"\
1672
        " jnz 1b                    \n\t"\
1671
        " jnz 1b                    \n\t"\
1673
        : "+a"(src), "+c"(dst), "+g"(h)\
1672
        : "+a"(src), "+c"(dst), "+g"(h)\
1674
        : "D"((x86_reg)srcStride), "S"((x86_reg)dstStride),\
1673
        : "D"((x86_reg)srcStride), "S"((x86_reg)dstStride)\
1675
          "m"(ff_pw_5), "m"(ff_pw_16)\
1676
        : "memory"\
1674
        : "memory"\
1677
    );\
1675
    );\
1678
}\
1676
}\
(-)libavcodec/x86/dsputil_mmx.c.ori (-12 / +18 lines)
Lines 724-734 Link Here
724
}
724
}
725
725
726
static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){
726
static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){
727
    uint32_t l1 = *(uint32_t*)(src + 0*src_stride);
728
    uint32_t l2 = *(uint32_t*)(src + 1*src_stride);
729
    uint32_t l3 = *(uint32_t*)(src + 2*src_stride);
730
    uint32_t l4 = *(uint32_t*)(src + 3*src_stride);
731
 
727
    __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ...
732
    __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ...
728
        "movd  %4, %%mm0                \n\t"
733
        "movd  %0, %%mm0                \n\t"
729
        "movd  %5, %%mm1                \n\t"
734
        "movd  %1, %%mm1                \n\t"
730
        "movd  %6, %%mm2                \n\t"
735
        "movd  %2, %%mm2                \n\t"
731
        "movd  %7, %%mm3                \n\t"
736
        "movd  %3, %%mm3                \n\t"
732
        "punpcklbw %%mm1, %%mm0         \n\t"
737
        "punpcklbw %%mm1, %%mm0         \n\t"
733
        "punpcklbw %%mm3, %%mm2         \n\t"
738
        "punpcklbw %%mm3, %%mm2         \n\t"
734
        "movq %%mm0, %%mm1              \n\t"
739
        "movq %%mm0, %%mm1              \n\t"
Lines 741-755 Link Here
741
        "punpckhdq %%mm1, %%mm1         \n\t"
746
        "punpckhdq %%mm1, %%mm1         \n\t"
742
        "movd  %%mm1, %3                \n\t"
747
        "movd  %%mm1, %3                \n\t"
743
748
744
        : "=m" (*(uint32_t*)(dst + 0*dst_stride)),
749
        : "+m" (l1),
745
          "=m" (*(uint32_t*)(dst + 1*dst_stride)),
750
          "+m" (l2),
746
          "=m" (*(uint32_t*)(dst + 2*dst_stride)),
751
          "+m" (l3),
747
          "=m" (*(uint32_t*)(dst + 3*dst_stride))
752
          "+m" (l4)
748
        :  "m" (*(uint32_t*)(src + 0*src_stride)),
749
           "m" (*(uint32_t*)(src + 1*src_stride)),
750
           "m" (*(uint32_t*)(src + 2*src_stride)),
751
           "m" (*(uint32_t*)(src + 3*src_stride))
752
    );
753
    );
754
755
    *(uint32_t*)(dst + 0*dst_stride) = l1;
756
    *(uint32_t*)(dst + 1*dst_stride) = l2;
757
    *(uint32_t*)(dst + 2*dst_stride) = l3;
758
    *(uint32_t*)(dst + 3*dst_stride) = l4;
753
}
759
}
754
760
755
static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){
761
static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){

Return to bug 290741