diff -ur xine-lib-1.1.0-orig/src/libffmpeg/libavcodec/i386/dsputil_mmx.c xine-lib-1.1.0/src/libffmpeg/libavcodec/i386/dsputil_mmx.c --- xine-lib-1.1.0-orig/src/libffmpeg/libavcodec/i386/dsputil_mmx.c 2005-09-07 16:55:46.000000000 -0400 +++ xine-lib-1.1.0/src/libffmpeg/libavcodec/i386/dsputil_mmx.c 2005-09-07 16:56:31.000000000 -0400@@ -615,31 +615,32 @@ @@ -615,31 +615,32 @@ } static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){ + void *dst_reg = dst, *src_reg = src; + asm volatile( //FIXME could save 1 instruction if done as 8x4 ... - "movd %4, %%mm0 \n\t" - "movd %5, %%mm1 \n\t" - "movd %6, %%mm2 \n\t" - "movd %7, %%mm3 \n\t" + "movd (%1), %%mm0 \n\t" + "movd (%1,%5), %%mm1 \n\t" + "lea (%1, %5, 2), %1 \n\t" + "movd (%1), %%mm2 \n\t" + "movd (%1,%5), %%mm3 \n\t" "punpcklbw %%mm1, %%mm0 \n\t" "punpcklbw %%mm3, %%mm2 \n\t" "movq %%mm0, %%mm1 \n\t" "punpcklwd %%mm2, %%mm0 \n\t" "punpckhwd %%mm2, %%mm1 \n\t" - "movd %%mm0, %0 \n\t" + "movd %%mm0, (%0) \n\t" "punpckhdq %%mm0, %%mm0 \n\t" - "movd %%mm0, %1 \n\t" - "movd %%mm1, %2 \n\t" + "movd %%mm0, (%0,%4) \n\t" + "lea (%0, %4, 2), %0 \n\t" + "movd %%mm1, (%0) \n\t" "punpckhdq %%mm1, %%mm1 \n\t" - "movd %%mm1, %3 \n\t" - - : "=m" (*(uint32_t*)(dst + 0*dst_stride)), - "=m" (*(uint32_t*)(dst + 1*dst_stride)), - "=m" (*(uint32_t*)(dst + 2*dst_stride)), - "=m" (*(uint32_t*)(dst + 3*dst_stride)) - : "m" (*(uint32_t*)(src + 0*src_stride)), - "m" (*(uint32_t*)(src + 1*src_stride)), - "m" (*(uint32_t*)(src + 2*src_stride)), - "m" (*(uint32_t*)(src + 3*src_stride)) + "movd %%mm1, (%0,%4) \n\t" + : "=&r" (dst_reg), + "=&r" (src_reg) + : "0" (dst_reg), + "1" (src_reg), + "r" (dst_stride), + "r" (src_stride) ); }