Gentoo Websites Logo
Go to: Gentoo Home Documentation Forums Lists Bugs Planet Store Wiki Get Gentoo!
View | Details | Raw Unified | Return to bug 290741 | Differences between
and this patch

Collapse All | Expand All

(-)libavcodec/x86/dsputil_mmx.c.ori (-12 / +18 lines)
Lines 724-734 Link Here
724
}
724
}
725
725
726
static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){
726
static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){
727
    uint32_t l1 = *(uint32_t*)(src + 0*src_stride);
728
    uint32_t l2 = *(uint32_t*)(src + 1*src_stride);
729
    uint32_t l3 = *(uint32_t*)(src + 2*src_stride);
730
    uint32_t l4 = *(uint32_t*)(src + 3*src_stride);
731
 
727
    __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ...
732
    __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ...
728
        "movd  %4, %%mm0                \n\t"
733
        "movd  %0, %%mm0                \n\t"
729
        "movd  %5, %%mm1                \n\t"
734
        "movd  %1, %%mm1                \n\t"
730
        "movd  %6, %%mm2                \n\t"
735
        "movd  %2, %%mm2                \n\t"
731
        "movd  %7, %%mm3                \n\t"
736
        "movd  %3, %%mm3                \n\t"
732
        "punpcklbw %%mm1, %%mm0         \n\t"
737
        "punpcklbw %%mm1, %%mm0         \n\t"
733
        "punpcklbw %%mm3, %%mm2         \n\t"
738
        "punpcklbw %%mm3, %%mm2         \n\t"
734
        "movq %%mm0, %%mm1              \n\t"
739
        "movq %%mm0, %%mm1              \n\t"
Lines 741-755 Link Here
741
        "punpckhdq %%mm1, %%mm1         \n\t"
746
        "punpckhdq %%mm1, %%mm1         \n\t"
742
        "movd  %%mm1, %3                \n\t"
747
        "movd  %%mm1, %3                \n\t"
743
748
744
        : "=m" (*(uint32_t*)(dst + 0*dst_stride)),
749
        : "+m" (l1),
745
          "=m" (*(uint32_t*)(dst + 1*dst_stride)),
750
          "+m" (l2),
746
          "=m" (*(uint32_t*)(dst + 2*dst_stride)),
751
          "+m" (l3),
747
          "=m" (*(uint32_t*)(dst + 3*dst_stride))
752
          "+m" (l4)
748
        :  "m" (*(uint32_t*)(src + 0*src_stride)),
749
           "m" (*(uint32_t*)(src + 1*src_stride)),
750
           "m" (*(uint32_t*)(src + 2*src_stride)),
751
           "m" (*(uint32_t*)(src + 3*src_stride))
752
    );
753
    );
754
755
    *(uint32_t*)(dst + 0*dst_stride) = l1;
756
    *(uint32_t*)(dst + 1*dst_stride) = l2;
757
    *(uint32_t*)(dst + 2*dst_stride) = l3;
758
    *(uint32_t*)(dst + 3*dst_stride) = l4;
753
}
759
}
754
760
755
static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){
761
static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){
(-)libavcodec/x86/h264dsp_mmx.c.ori (-6 / +11 lines)
Lines 1136-1144 Link Here
1136
\
1136
\
1137
static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1137
static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1138
    int h=8;\
1138
    int h=8;\
1139
    const xmm_reg xmm_consts[2] __attribute__((aligned (16))) =  { ff_pw_5, ff_pw_16 }; \
1140
\
1139
    __asm__ volatile(\
1141
    __asm__ volatile(\
1140
        "pxor %%mm7, %%mm7          \n\t"\
1142
        "pxor %%mm7, %%mm7          \n\t"\
1141
        "movq %5, %%mm6             \n\t"\
1143
        "movq (%5), %%mm6           \n\t"\
1144
        "add $16, %5		    \n\t"\
1142
        "1:                         \n\t"\
1145
        "1:                         \n\t"\
1143
        "movq    (%0), %%mm0        \n\t"\
1146
        "movq    (%0), %%mm0        \n\t"\
1144
        "movq   1(%0), %%mm2        \n\t"\
1147
        "movq   1(%0), %%mm2        \n\t"\
Lines 1172-1178 Link Here
1172
        "punpcklbw %%mm7, %%mm5     \n\t"\
1175
        "punpcklbw %%mm7, %%mm5     \n\t"\
1173
        "paddw %%mm3, %%mm2         \n\t"\
1176
        "paddw %%mm3, %%mm2         \n\t"\
1174
        "paddw %%mm5, %%mm4         \n\t"\
1177
        "paddw %%mm5, %%mm4         \n\t"\
1175
        "movq %6, %%mm5             \n\t"\
1178
        "movq (%5), %%mm5           \n\t"\
1176
        "paddw %%mm5, %%mm2         \n\t"\
1179
        "paddw %%mm5, %%mm2         \n\t"\
1177
        "paddw %%mm5, %%mm4         \n\t"\
1180
        "paddw %%mm5, %%mm4         \n\t"\
1178
        "paddw %%mm2, %%mm0         \n\t"\
1181
        "paddw %%mm2, %%mm0         \n\t"\
Lines 1186-1192 Link Here
1186
        "decl %2                    \n\t"\
1189
        "decl %2                    \n\t"\
1187
        " jnz 1b                    \n\t"\
1190
        " jnz 1b                    \n\t"\
1188
        : "+a"(src), "+c"(dst), "+g"(h)\
1191
        : "+a"(src), "+c"(dst), "+g"(h)\
1189
        : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
1192
        : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), "r"(xmm_consts)\
1190
        : "memory"\
1193
        : "memory"\
1191
    );\
1194
    );\
1192
}\
1195
}\
Lines 1638-1646 Link Here
1638
\
1641
\
1639
static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1642
static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1640
    int h=8;\
1643
    int h=8;\
1644
    const xmm_reg xmm_consts[2] __attribute__((aligned (16))) =  { ff_pw_5, ff_pw_16 }; \
1641
    __asm__ volatile(\
1645
    __asm__ volatile(\
1642
        "pxor %%xmm7, %%xmm7        \n\t"\
1646
        "pxor %%xmm7, %%xmm7        \n\t"\
1643
        "movdqa %5, %%xmm6          \n\t"\
1647
        "movdqa (%5), %%xmm6        \n\t"\
1648
        "add $16, %5		    \n\t"\
1644
        "1:                         \n\t"\
1649
        "1:                         \n\t"\
1645
        "lddqu   -2(%0), %%xmm1     \n\t"\
1650
        "lddqu   -2(%0), %%xmm1     \n\t"\
1646
        "movdqa  %%xmm1, %%xmm0     \n\t"\
1651
        "movdqa  %%xmm1, %%xmm0     \n\t"\
Lines 1660-1666 Link Here
1660
        "paddw   %%xmm4, %%xmm1     \n\t"\
1665
        "paddw   %%xmm4, %%xmm1     \n\t"\
1661
        "psllw   $2,     %%xmm2     \n\t"\
1666
        "psllw   $2,     %%xmm2     \n\t"\
1662
        "psubw   %%xmm1, %%xmm2     \n\t"\
1667
        "psubw   %%xmm1, %%xmm2     \n\t"\
1663
        "paddw   %6,     %%xmm0     \n\t"\
1668
        "paddw   (%5),   %%xmm0     \n\t"\
1664
        "pmullw  %%xmm6, %%xmm2     \n\t"\
1669
        "pmullw  %%xmm6, %%xmm2     \n\t"\
1665
        "paddw   %%xmm0, %%xmm2     \n\t"\
1670
        "paddw   %%xmm0, %%xmm2     \n\t"\
1666
        "psraw   $5,     %%xmm2     \n\t"\
1671
        "psraw   $5,     %%xmm2     \n\t"\
Lines 1672-1678 Link Here
1672
        " jnz 1b                    \n\t"\
1677
        " jnz 1b                    \n\t"\
1673
        : "+a"(src), "+c"(dst), "+g"(h)\
1678
        : "+a"(src), "+c"(dst), "+g"(h)\
1674
        : "D"((x86_reg)srcStride), "S"((x86_reg)dstStride),\
1679
        : "D"((x86_reg)srcStride), "S"((x86_reg)dstStride),\
1675
          "m"(ff_pw_5), "m"(ff_pw_16)\
1680
          "r"(xmm_consts)\
1676
        : "memory"\
1681
        : "memory"\
1677
    );\
1682
    );\
1678
}\
1683
}\

Return to bug 290741