2010-07-31 Magnus Granberg #179872 libswscale/rgb2rgb.c add declare ff_bgr2YCoeff, ff_bgr2UCoeff, ff_bgr2VCoeff, ff_bgr2YOffset, ff_bgr2UVOffset, ff_w1111 libswscale/rgb2rgb_template.c fix TEXTREL in STORE_BGR24_MMX, rgb24tobgr24, rgb24toyv12 libswscale/swscale.c remove declare ff_bgr2YCoeff, ff_bgr2UCoeff, ff_bgr2VCoeff, ff_bgr2YOffset, ff_bgr2UVOffset, ff_w1111 --- a/libswscale/rgb2rgb.c 2010-06-03 22:16:42.000000000 +0200 +++ b/libswscale/rgb2rgb.c 2010-07-31 13:08:44.070629634 +0200 @@ -133,6 +133,18 @@ DECLARE_ASM_CONST(8, uint64_t, red_15mask) = 0x00007c0000007c00ULL; DECLARE_ASM_CONST(8, uint64_t, green_15mask) = 0x000003e0000003e0ULL; DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL; +#ifdef FAST_BGR2YV12 +DECLARE_ASM_CONST(8, uint64_t, ff_bgr2YCoeff) = 0x000000210041000DULL; +DECLARE_ASM_CONST(8, uint64_t, ff_bgr2UCoeff) = 0x0000FFEEFFDC0038ULL; +DECLARE_ASM_CONST(8, uint64_t, ff_bgr2VCoeff) = 0x00000038FFD2FFF8ULL; +#else +DECLARE_ASM_CONST(8, uint64_t, ff_bgr2YCoeff) = 0x000020E540830C8BULL; +DECLARE_ASM_CONST(8, uint64_t, ff_bgr2UCoeff) = 0x0000ED0FDAC23831ULL; +DECLARE_ASM_CONST(8, uint64_t, ff_bgr2VCoeff) = 0x00003831D0E6F6EAULL; +#endif /* FAST_BGR2YV12 */ +DECLARE_ASM_CONST(8, uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL; +DECLARE_ASM_CONST(8, uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL; +DECLARE_ASM_CONST(8, uint64_t, ff_w1111) = 0x0001000100010001ULL; #endif /* ARCH_X86 */ #define RGB2YUV_SHIFT 8 --- a/libswscale/rgb2rgb_template.c 2010-05-04 23:01:48.000000000 +0200 +++ b/libswscale/rgb2rgb_template.c 2010-07-31 13:11:28.049880298 +0200 @@ -127,14 +127,14 @@ "psrlq $8, %%mm3 \n\t" \ "psrlq $8, %%mm6 \n\t" \ "psrlq $8, %%mm7 \n\t" \ - "pand "MANGLE(mask24l)", %%mm0\n\t" \ - "pand "MANGLE(mask24l)", %%mm1\n\t" \ - "pand "MANGLE(mask24l)", %%mm4\n\t" \ - "pand "MANGLE(mask24l)", %%mm5\n\t" \ - "pand "MANGLE(mask24h)", %%mm2\n\t" \ - "pand "MANGLE(mask24h)", %%mm3\n\t" \ - "pand "MANGLE(mask24h)", %%mm6\n\t" \ - "pand "MANGLE(mask24h)", %%mm7\n\t" \ + "pand %2, %%mm0\n\t" \ + "pand %2, %%mm1\n\t" \ + "pand %2, %%mm4\n\t" \ + "pand %2, %%mm5\n\t" \ + "pand %3, %%mm2\n\t" \ + "pand %3, %%mm3\n\t" \ + "pand %3, %%mm6\n\t" \ + "pand %3, %%mm7\n\t" \ "por %%mm2, %%mm0 \n\t" \ "por %%mm3, %%mm1 \n\t" \ "por %%mm6, %%mm4 \n\t" \ @@ -144,14 +144,14 @@ "movq %%mm4, %%mm3 \n\t" \ "psllq $48, %%mm2 \n\t" \ "psllq $32, %%mm3 \n\t" \ - "pand "MANGLE(mask24hh)", %%mm2\n\t" \ - "pand "MANGLE(mask24hhh)", %%mm3\n\t" \ + "pand %4, %%mm2\n\t" \ + "pand %5, %%mm3\n\t" \ "por %%mm2, %%mm0 \n\t" \ "psrlq $16, %%mm1 \n\t" \ "psrlq $32, %%mm4 \n\t" \ "psllq $16, %%mm5 \n\t" \ "por %%mm3, %%mm1 \n\t" \ - "pand "MANGLE(mask24hhhh)", %%mm5\n\t" \ + "pand %6, %%mm5\n\t" \ "por %%mm5, %%mm4 \n\t" \ \ MOVNTQ" %%mm0, %0 \n\t" \ @@ -184,7 +184,7 @@ "movq %%mm5, %%mm7 \n\t" STORE_BGR24_MMX :"=m"(*dest) - :"m"(*s) + :"m"(*s), "m"(mask24l), "m"(mask24h), "m"(mask24hh), "m"(mask24hhh), "m"(mask24hhhh) :"memory"); dest += 24; s += 32; @@ -977,7 +977,7 @@ STORE_BGR24_MMX :"=m"(*d) - :"m"(*s) + :"m"(*s), "m"(mask24l), "m"(mask24h), "m"(mask24hh), "m"(mask24hhh), "m"(mask24hhhh) :"memory"); d += 24; s += 8; @@ -1083,7 +1083,7 @@ STORE_BGR24_MMX :"=m"(*d) - :"m"(*s) + :"m"(*s), "m"(mask24l), "m"(mask24h), "m"(mask24hh), "m"(mask24hhh), "m"(mask24hhhh) :"memory"); d += 24; s += 8; @@ -1297,9 +1297,9 @@ __asm__ volatile ( "test %%"REG_a", %%"REG_a" \n\t" "jns 2f \n\t" - "movq "MANGLE(mask24r)", %%mm5 \n\t" - "movq "MANGLE(mask24g)", %%mm6 \n\t" - "movq "MANGLE(mask24b)", %%mm7 \n\t" + "movq %3, %%mm5 \n\t" + "movq %4, %%mm6 \n\t" + "movq %5, %%mm7 \n\t" ASMALIGN(4) "1: \n\t" PREFETCH" 32(%1, %%"REG_a") \n\t" @@ -1335,7 +1335,7 @@ " js 1b \n\t" "2: \n\t" : "+a" (mmx_size) - : "r" (src-mmx_size), "r"(dst-mmx_size) + : "r" (src-mmx_size), "r"(dst-mmx_size), "m"(mask24r), "m"(mask24g), "m"(mask24b) ); __asm__ volatile(SFENCE:::"memory"); @@ -2007,8 +2007,8 @@ for (i=0; i<2; i++) { __asm__ volatile( "mov %2, %%"REG_a" \n\t" - "movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t" - "movq "MANGLE(ff_w1111)", %%mm5 \n\t" + "movq %3, %%mm6 \n\t" + "movq %4, %%mm5 \n\t" "pxor %%mm7, %%mm7 \n\t" "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" ASMALIGN(4) @@ -2066,12 +2066,13 @@ "psraw $7, %%mm4 \n\t" "packuswb %%mm4, %%mm0 \n\t" - "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0 \n\t" + "paddusb %5, %%mm0 \n\t" MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t" "add $8, %%"REG_a" \n\t" " js 1b \n\t" - : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width) + : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width), + "m"(ff_bgr2YCoeff), "m"(ff_w1111), "m"(ff_bgr2YOffset) : "%"REG_a, "%"REG_d ); ydst += lumStride; @@ -2080,8 +2081,8 @@ src -= srcStride*2; __asm__ volatile( "mov %4, %%"REG_a" \n\t" - "movq "MANGLE(ff_w1111)", %%mm5 \n\t" - "movq "MANGLE(ff_bgr2UCoeff)", %%mm6 \n\t" + "movq %5, %%mm5 \n\t" + "movq %6, %%mm6 \n\t" "pxor %%mm7, %%mm7 \n\t" "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" "add %%"REG_d", %%"REG_d" \n\t" @@ -2130,8 +2131,8 @@ "psrlw $2, %%mm0 \n\t" "psrlw $2, %%mm2 \n\t" #endif - "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t" - "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t" + "movq %7, %%mm1 \n\t" + "movq %7, %%mm3 \n\t" "pmaddwd %%mm0, %%mm1 \n\t" "pmaddwd %%mm2, %%mm3 \n\t" @@ -2188,12 +2189,12 @@ "paddw %%mm1, %%mm5 \n\t" "paddw %%mm3, %%mm2 \n\t" "paddw %%mm5, %%mm2 \n\t" - "movq "MANGLE(ff_w1111)", %%mm5 \n\t" + "movq %5, %%mm5 \n\t" "psrlw $2, %%mm4 \n\t" "psrlw $2, %%mm2 \n\t" #endif - "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t" - "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t" + "movq %7, %%mm1 \n\t" + "movq %7, %%mm3 \n\t" "pmaddwd %%mm4, %%mm1 \n\t" "pmaddwd %%mm2, %%mm3 \n\t" @@ -2217,13 +2218,14 @@ "punpckldq %%mm4, %%mm0 \n\t" "punpckhdq %%mm4, %%mm1 \n\t" "packsswb %%mm1, %%mm0 \n\t" - "paddb "MANGLE(ff_bgr2UVOffset)", %%mm0 \n\t" + "paddb %8, %%mm0 \n\t" "movd %%mm0, (%2, %%"REG_a") \n\t" "punpckhdq %%mm0, %%mm0 \n\t" "movd %%mm0, (%3, %%"REG_a") \n\t" "add $4, %%"REG_a" \n\t" " js 1b \n\t" - : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth) + : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth), + "m"(ff_w1111), "m"(ff_bgr2UCoeff), "m"(ff_bgr2VCoeff), "m"(ff_bgr2UVOffset) : "%"REG_a, "%"REG_d ); --- a/libswscale/swscale.c 2010-06-03 22:16:42.000000000 +0200 +++ b/libswscale/swscale.c 2010-07-31 13:06:56.708636269 +0200 @@ -156,19 +156,6 @@ DECLARE_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL; DECLARE_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL; -#ifdef FAST_BGR2YV12 -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000000210041000DULL; -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000FFEEFFDC0038ULL; -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00000038FFD2FFF8ULL; -#else -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000020E540830C8BULL; -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000ED0FDAC23831ULL; -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00003831D0E6F6EAULL; -#endif /* FAST_BGR2YV12 */ -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL; -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL; -DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL; - DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY1Coeff) = 0x0C88000040870C88ULL; DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY2Coeff) = 0x20DE4087000020DEULL; DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY1Coeff) = 0x20DE0000408720DEULL;