Go to:
Gentoo Home
Documentation
Forums
Lists
Bugs
Planet
Store
Wiki
Get Gentoo!
Gentoo's Bugzilla – Attachment 51031 Details for
Bug 80685
x11-base/xorg-x11-6.8.2 fails to compile with GCC4
Home
|
New
–
[Ex]
|
Browse
|
Search
|
Privacy Policy
|
[?]
|
Reports
|
Requests
|
Help
|
New Account
|
Log In
[x]
|
Forgot Password
Login:
[x]
[patch]
MMX GCC4 compile fix
gcc4-mmx.patch (text/plain), 51.66 KB, created by
Mark Loeser (RETIRED)
on 2005-02-11 16:22:39 UTC
(
hide
)
Description:
MMX GCC4 compile fix
Filename:
MIME Type:
Creator:
Mark Loeser (RETIRED)
Created:
2005-02-11 16:22:39 UTC
Size:
51.66 KB
patch
obsolete
>diff -ur xc-orig/programs/Xserver/fb/Imakefile xc/programs/Xserver/fb/Imakefile >--- xc-orig/programs/Xserver/fb/Imakefile 2005-02-11 04:00:50.004092510 -0500 >+++ xc/programs/Xserver/fb/Imakefile 2005-02-11 04:01:32.059345739 -0500 >@@ -3,13 +3,22 @@ > XCOMM > XCOMM Id: Imakefile,v 1.1 1999/11/02 03:54:44 keithp Exp $ > >-#if defined(i386Architecture) && defined(HasGcc34) && HasGcc34 >+#if defined(HasGcc34) && HasGcc34 > MMXOPTIONS= -mmmx -Winline --param inline-unit-growth=10000 \ >- --param large-function-growth=10000 -DUSE_GCC34_MMX >+ --param large-function-growth=10000 -DUSE_MMX >+SSEOPTIONS= $(MMXOPTIONS) -msse -DUSE_SSE > >+#if defined(i386Architecture) > SpecialCObjectRule(fbmmx,fbmmx.c,$(MMXOPTIONS)) >+#elif defined(AMD64Architecture) >+SpecialCObjectRule(fbmmx,fbmmx.c,$(SSEOPTIONS)) >+#endif >+ >+#if defined(i386Architecture) || defined(AMD64Architecture) > SpecialCObjectRule(fbpict,fbpict.c,$(MMXOPTIONS)) > SpecialCObjectRule(fbfill,fbfill.c,$(MMXOPTIONS)) >+SpecialCObjectRule(fbcopy,fbcopy.c,$(MMXOPTIONS)) >+#endif > > #endif > >diff -ur xc-orig/programs/Xserver/fb/fbcompose.c xc/programs/Xserver/fb/fbcompose.c >--- xc-orig/programs/Xserver/fb/fbcompose.c 2005-02-11 04:00:50.009092659 -0500 >+++ xc/programs/Xserver/fb/fbcompose.c 2005-02-11 04:01:32.067345977 -0500 >@@ -1,8 +1,8 @@ > /* >- * $XdotOrg: xc/programs/Xserver/fb/fbcompose.c,v 1.3 2004/05/12 01:49:46 anholt Exp $ >+ * $XdotOrg: xc/programs/Xserver/fb/fbcompose.c,v 1.5 2005/01/13 20:49:21 sandmann Exp $ > * $XFree86: xc/programs/Xserver/fb/fbcompose.c,v 1.17tsi Exp $ > * >- * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc. >+ * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc. > * > * Permission to use, copy, modify, distribute, and sell this software and its > * documentation for any purpose is hereby granted without fee, provided that >@@ -2693,7 +2693,6 @@ > op->u.transform.y = y - op->u.transform.top_y; > } > >- > Bool > fbBuildCompositeOperand (PicturePtr pPict, > FbCompositeOperand op[4], >@@ -2710,7 +2709,6 @@ > > op->u.transform.top_y = pPict->pDrawable->y; > op->u.transform.left_x = pPict->pDrawable->x; >- > op->u.transform.start_x = x - op->u.transform.left_x; > op->u.transform.x = op->u.transform.start_x; > op->u.transform.y = y - op->u.transform.top_y; >@@ -2822,6 +2820,21 @@ > FbCombineFunc f; > int w; > >+#if 0 >+ ErrorF ("op: %d\n" >+ "src format: %lx\n" >+ "msk format %lx\n" >+ "dst format %lx\n" >+ "width: %d\n" >+ "height %d\n", >+ op, >+ pSrc? pSrc->format : 0, >+ pMask? pMask->format : 0, >+ pDst? pDst->format : 0, >+ width, height); >+ ErrorF ("PICT_x8r8g8b8: %lx\n", PICT_x8r8g8b8); >+#endif >+ > if (!fbBuildCompositeOperand (pSrc, src, xSrc, ySrc, TRUE, TRUE)) > return; > if (!fbBuildCompositeOperand (pDst, dst, xDst, yDst, FALSE, TRUE)) >diff -ur xc-orig/programs/Xserver/fb/fbcopy.c xc/programs/Xserver/fb/fbcopy.c >--- xc-orig/programs/Xserver/fb/fbcopy.c 2005-02-11 04:00:50.004092510 -0500 >+++ xc/programs/Xserver/fb/fbcopy.c 2005-02-11 04:01:32.068346007 -0500 >@@ -1,7 +1,7 @@ > /* > * Id: fbcopy.c,v 1.1 1999/11/02 03:54:45 keithp Exp $ > * >- * Copyright © 1998 Keith Packard >+ * Copyright © 1998 Keith Packard > * > * Permission to use, copy, modify, distribute, and sell this software and its > * documentation for any purpose is hereby granted without fee, provided that >@@ -27,6 +27,7 @@ > #ifdef IN_MODULE > #include "xf86_ansic.h" > #endif >+#include "fbmmx.h" > > void > fbCopyNtoN (DrawablePtr pSrcDrawable, >@@ -54,28 +55,51 @@ > > fbGetDrawable (pSrcDrawable, src, srcStride, srcBpp, srcXoff, srcYoff); > fbGetDrawable (pDstDrawable, dst, dstStride, dstBpp, dstXoff, dstYoff); >- >+ > while (nbox--) > { >+#ifdef USE_MMX >+ if (!reverse && !upsidedown && fbHaveMMX()) >+ { >+ if (!fbCopyAreammx (pSrcDrawable, >+ pDstDrawable, >+ >+ (pbox->x1 + dx + srcXoff), >+ (pbox->y1 + dy + srcYoff), >+ >+ (pbox->x1 + dstXoff), >+ (pbox->y1 + dstYoff), >+ >+ (pbox->x2 - pbox->x1), >+ (pbox->y2 - pbox->y1))) >+ goto fallback; >+ else >+ goto next; >+ } >+ fallback: >+#endif > fbBlt (src + (pbox->y1 + dy + srcYoff) * srcStride, > srcStride, > (pbox->x1 + dx + srcXoff) * srcBpp, >- >+ > dst + (pbox->y1 + dstYoff) * dstStride, > dstStride, > (pbox->x1 + dstXoff) * dstBpp, >- >+ > (pbox->x2 - pbox->x1) * dstBpp, > (pbox->y2 - pbox->y1), >- >+ > alu, > pm, > dstBpp, >- >+ > reverse, > upsidedown); >+#ifdef USE_MMX >+ next: >+#endif > pbox++; >- } >+ } > } > > void >@@ -594,7 +618,7 @@ > int yOut) > { > fbCopyProc copy; >- >+ > #ifdef FB_24_32BIT > if (pSrcDrawable->bitsPerPixel != pDstDrawable->bitsPerPixel) > copy = fb24_32CopyMtoN; >diff -ur xc-orig/programs/Xserver/fb/fbfill.c xc/programs/Xserver/fb/fbfill.c >--- xc-orig/programs/Xserver/fb/fbfill.c 2005-02-11 04:00:50.006092570 -0500 >+++ xc/programs/Xserver/fb/fbfill.c 2005-02-11 04:01:32.069346037 -0500 >@@ -1,7 +1,7 @@ > /* > * Id: fbfill.c,v 1.1 1999/11/02 03:54:45 keithp Exp $ > * >- * Copyright © 1998 Keith Packard >+ * Copyright © 1998 Keith Packard > * > * Permission to use, copy, modify, distribute, and sell this software and its > * documentation for any purpose is hereby granted without fee, provided that >@@ -44,7 +44,7 @@ > > switch (pGC->fillStyle) { > case FillSolid: >-#ifdef USE_GCC34_MMX >+#ifdef USE_MMX > if (!pPriv->and && fbHaveMMX()) > if (fbSolidFillmmx (pDrawable, x, y, width, height, pPriv->xor)) > return; > >diff -ur xc-orig/programs/Xserver/fb/fbmmx.c xc/programs/Xserver/fb/fbmmx.c >--- xc-orig/programs/Xserver/fb/fbmmx.c 2005-02-11 04:00:50.006092570 -0500 >+++ xc/programs/Xserver/fb/fbmmx.c 2005-02-11 04:01:32.072346126 -0500 >@@ -1,5 +1,6 @@ > /* >- * Copyright © 2004 Red Hat, Inc. >+ * Copyright © 2004 Red Hat, Inc. >+ * Copyright © 2004 Nicholas Miell > * > * Permission to use, copy, modify, distribute, and sell this software and its > * documentation for any purpose is hereby granted without fee, provided that >@@ -18,14 +19,23 @@ > * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN > * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. > * >- * Author: Søren Sandmann (sandmann@redhat.com) >- * >+ * Author: Søren Sandmann (sandmann@redhat.com) >+ * Minor Improvements: Nicholas Miell (nmiell@gmail.com) >+ * > * Based on work by Owen Taylor > */ > >+ >+#ifdef USE_MMX >+ > #include "fb.h" >+#include "fbmmx.h" >+ >+#include <mmintrin.h> > >-#ifdef USE_GCC34_MMX >+#ifdef USE_SSE >+#include <xmmintrin.h> /* for _mm_shuffle_pi16 and _MM_SHUFFLE */ >+#endif > > #ifdef RENDER > >@@ -33,11 +43,6 @@ > #include "mipict.h" > #include "fbpict.h" > >-typedef int Vector1x64 __attribute__ ((mode(DI))); >-typedef int Vector2x32 __attribute__ ((mode(V2SI))); >-typedef int Vector4x16 __attribute__ ((mode(V4HI))); >-typedef int Vector8x8 __attribute__ ((mode(V8QI))); >- > typedef unsigned long long ullong; > > #define noVERBOSE >@@ -50,7 +55,6 @@ > > typedef struct > { >- ullong mmx_zero; > ullong mmx_4x00ff; > ullong mmx_4x0080; > ullong mmx_565_rgb; >@@ -70,7 +74,6 @@ > > static const MMXData c = > { >- .mmx_zero = 0x0000000000000000ULL, > .mmx_4x00ff = 0x00ff00ff00ff00ffULL, > .mmx_4x0080 = 0x0080008000800080ULL, > .mmx_565_rgb = 0x000001f0003f001fULL, >@@ -88,121 +91,112 @@ > .mmx_000000000000ffff = 0x000000000000ffffULL, > }; > >-static __inline__ Vector1x64 >-shift (Vector1x64 v, int s) >+#define MC(x) ((__m64) c.mmx_##x) >+ >+static __inline__ __m64 >+shift (__m64 v, int s) > { > if (s > 0) >- return __builtin_ia32_psllq (v, s); >+ return _mm_slli_si64 (v, s); > else if (s < 0) >- return __builtin_ia32_psrlq (v, -s); >+ return _mm_srli_si64 (v, -s); > else > return v; > } > >-static __inline__ Vector4x16 >-negate (Vector4x16 mask) >+static __inline__ __m64 >+negate (__m64 mask) > { >- return (Vector4x16)__builtin_ia32_pxor ( >- (Vector1x64)mask, >- (Vector1x64)c.mmx_4x00ff); >+ return _mm_xor_si64 (mask, MC(4x00ff)); > } > >-static __inline__ Vector4x16 >-pix_multiply (Vector4x16 a, Vector4x16 b) >+static __inline__ __m64 >+pix_multiply (__m64 a, __m64 b) > { >- Vector4x16 res; >+ __m64 res; > >- res = __builtin_ia32_pmullw (a, b); >- res = __builtin_ia32_paddw (res, (Vector4x16)c.mmx_4x0080); >- res = __builtin_ia32_psrlw (res, 8); >+ res = _mm_mullo_pi16 (a, b); >+ res = _mm_add_pi16 (res, MC(4x0080)); >+ res = _mm_srli_pi16 (res, 8); > > return res; > } > >-#if 0 >+#ifdef USE_SSE > #define HAVE_PSHUFW > #endif > > #ifdef HAVE_PSHUFW > >-static __inline__ Vector4x16 >-expand_alpha (Vector4x16 pixel) >+static __inline__ __m64 >+expand_alpha (__m64 pixel) > { >- Vector4x16 result; >- __asm__ ("pshufw $0xFF, %1, %0\n\t" : "=y" (result) : "y" (pixel)); >- return result; >+ return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE(3, 3, 3, 3)); > } > >-static __inline__ Vector4x16 >-expand_alpha_rev (Vector4x16 pixel) >+static __inline__ __m64 >+expand_alpha_rev (__m64 pixel) > { >- Vector4x16 result; >- __asm__ ("pshufw $0x00, %1, %0\n\t" : "=y" (result) : "y" (pixel)); >- return result; >+ return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE(0, 0, 0, 0)); > } > >-static __inline__ Vector4x16 >-invert_colors (Vector4x16 pixel) >+static __inline__ __m64 >+invert_colors (__m64 pixel) > { >- Vector4x16 result; >- >- /* 0xC6 = 11000110 */ >- /* 3 0 1 2 */ >- >- __asm__ ("pshufw $0xC6, %1, %0\n\t" : "=y" (result) : "y" (pixel)); >- >- return result; >+ return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE(3, 0, 1, 2)); > } > > #else > >-static __inline__ Vector4x16 >-expand_alpha (Vector4x16 pixel) >+static __inline__ __m64 >+expand_alpha (__m64 pixel) > { >- Vector1x64 t1, t2; >- >- t1 = shift ((Vector1x64)pixel, -48); >+ __m64 t1, t2; >+ >+ t1 = shift (pixel, -48); > t2 = shift (t1, 16); >- t1 = __builtin_ia32_por (t1, t2); >+ t1 = _mm_or_si64 (t1, t2); > t2 = shift (t1, 32); >- t1 = __builtin_ia32_por (t1, t2); >- >- return (Vector4x16)t1; >+ t1 = _mm_or_si64 (t1, t2); >+ >+ return t1; > } > >-static __inline__ Vector4x16 >-expand_alpha_rev (Vector4x16 pixel) >+static __inline__ __m64 >+expand_alpha_rev (__m64 pixel) > { >- Vector1x64 t1, t2; >- >- t1 = shift ((Vector1x64)pixel, 48); >+ __m64 t1, t2; >+ >+ /* move alpha to low 16 bits and zero the rest */ >+ t1 = shift (pixel, 48); > t1 = shift (t1, -48); >+ > t2 = shift (t1, 16); >- t1 = __builtin_ia32_por (t1, t2); >+ t1 = _mm_or_si64 (t1, t2); > t2 = shift (t1, 32); >- t1 = __builtin_ia32_por (t1, t2); >- >- return (Vector4x16)t1; >+ t1 = _mm_or_si64 (t1, t2); >+ >+ return t1; > } > >-static __inline__ Vector4x16 >-invert_colors (Vector4x16 pixel) >+static __inline__ __m64 >+invert_colors (__m64 pixel) > { >- Vector1x64 x, y, z; >- >- x = y = z = (Vector1x64)pixel; >- >- x = __builtin_ia32_pand (x, (Vector1x64)c.mmx_ffff0000ffff0000); >- y = __builtin_ia32_pand (y, (Vector1x64)c.mmx_000000000000ffff); >- z = __builtin_ia32_pand (z, (Vector1x64)c.mmx_0000ffff00000000); >- >+ __m64 x, y, z; >+ >+ x = y = z = pixel; >+ >+ x = _mm_and_si64 (x, MC(ffff0000ffff0000)); >+ y = _mm_and_si64 (y, MC(000000000000ffff)); >+ z = _mm_and_si64 (z, MC(0000ffff00000000)); >+ > y = shift (y, 32); > z = shift (z, -32); >- >- x = __builtin_ia32_por (x, y); >- x = __builtin_ia32_por (x, z); >- >- return (Vector4x16)x; >+ >+ x = _mm_or_si64 (x, y); >+ x = _mm_or_si64 (x, z); >+ >+ return x; > } > > #endif >@@ -210,147 +204,138 @@ > /* Notes about writing mmx code > * > * give memory operands as the second operand. If you give it as the >- * first, gcc will first load it into a register, then use that register >+ * first, gcc will first load it into a register, then use that >+ * register > * > * ie. use > * >- * __builtin_pmullw (x, mmx_constant[8]); >+ * _mm_mullo_pi16 (x, mmx_constant); > * > * not > * >- * __builtin_pmullw (mmx_constant[8], x); >+ * _mm_mullo_pi16 (mmx_constant, x); > * >- * Also try to minimize dependencies. Ie. when you need a value, try to calculate >- * it from a value that was calculated as early as possible. >+ * Also try to minimize dependencies. i.e. when you need a value, try >+ * to calculate it from a value that was calculated as early as >+ * possible. > */ > >-static __inline__ Vector4x16 >-over (Vector4x16 src, Vector4x16 srca, Vector4x16 dest) >+static __inline__ __m64 >+over (__m64 src, __m64 srca, __m64 dest) > { >- return (Vector4x16)__builtin_ia32_paddusb ((Vector8x8)src, (Vector8x8)pix_multiply(dest, negate(srca))); >+ return _mm_adds_pu8 (src, pix_multiply(dest, negate(srca))); > } > >-static __inline__ Vector4x16 >-over_rev_non_pre (Vector4x16 src, Vector4x16 dest) >+static __inline__ __m64 >+over_rev_non_pre (__m64 src, __m64 dest) > { >- Vector4x16 srca = expand_alpha (src); >- Vector4x16 srcfaaa = (Vector4x16)__builtin_ia32_por((Vector1x64)srca, (Vector1x64)c.mmx_full_alpha); >- >+ __m64 srca = expand_alpha (src); >+ __m64 srcfaaa = _mm_or_si64 (srca, MC(full_alpha)); >+ > return over(pix_multiply(invert_colors(src), srcfaaa), srca, dest); > } > >-static __inline__ Vector4x16 >-in (Vector4x16 src, >- Vector4x16 mask) >+static __inline__ __m64 >+in (__m64 src, >+ __m64 mask) > { > return pix_multiply (src, mask); > } > >-static __inline__ Vector4x16 >-in_over (Vector4x16 src, >- Vector4x16 srca, >- Vector4x16 mask, >- Vector4x16 dest) >+static __inline__ __m64 >+in_over (__m64 src, >+ __m64 srca, >+ __m64 mask, >+ __m64 dest) > { > return over(in(src, mask), pix_multiply(srca, mask), dest); > } > >-static __inline__ Vector8x8 >-cvt32to64 (CARD32 v) >-{ >- ullong r = v; >- return (Vector8x8)r; >-} >- >-static __inline__ Vector4x16 >+static __inline__ __m64 > load8888 (CARD32 v) > { >- return (Vector4x16)__builtin_ia32_punpcklbw (cvt32to64 (v), >- (Vector8x8)c.mmx_zero); >+ return _mm_unpacklo_pi8 (_mm_cvtsi32_si64 (v), _mm_setzero_si64()); > } > >-static __inline__ Vector8x8 >-pack8888 (Vector4x16 lo, Vector4x16 hi) >+static __inline__ __m64 >+pack8888 (__m64 lo, __m64 hi) > { >- Vector8x8 r; >- r = __builtin_ia32_packuswb ((Vector4x16)lo, (Vector4x16)hi); >+ __m64 r; >+ r = _mm_packs_pu16 (lo, hi); > return r; > } > >-/* Expand 16 bits positioned at @pos (0-3) of a mmx register into 00RR00GG00BB >- >---- Expanding 565 in the low word --- >- >-m = (m << (32 - 3)) | (m << (16 - 5)) | m; >-m = m & (01f0003f001f); >-m = m * (008404100840); >-m = m >> 8; >- >-Note the trick here - the top word is shifted by another nibble to avoid >-it bumping into the middle word >-*/ >-static __inline__ Vector4x16 >-expand565 (Vector4x16 pixel, int pos) >+/* Expand 16 bits positioned at @pos (0-3) of a mmx register into >+ * >+ * 00RR00GG00BB >+ * >+ * --- Expanding 565 in the low word --- >+ * >+ * m = (m << (32 - 3)) | (m << (16 - 5)) | m; >+ * m = m & (01f0003f001f); >+ * m = m * (008404100840); >+ * m = m >> 8; >+ * >+ * Note the trick here - the top word is shifted by another nibble to >+ * avoid it bumping into the middle word >+ */ >+static __inline__ __m64 >+expand565 (__m64 pixel, int pos) > { >- Vector1x64 p = (Vector1x64)pixel; >+ __m64 p = pixel; >+ __m64 t1, t2; > > /* move pixel to low 16 bit and zero the rest */ > p = shift (shift (p, (3 - pos) * 16), -48); > >- Vector1x64 t1 = shift (p, 36 - 11); >- Vector1x64 t2 = shift (p, 16 - 5); >+ t1 = shift (p, 36 - 11); >+ t2 = shift (p, 16 - 5); > >- p = __builtin_ia32_por (t1, p); >- p = __builtin_ia32_por (t2, p); >- p = __builtin_ia32_pand (p, (Vector1x64)c.mmx_565_rgb); >+ p = _mm_or_si64 (t1, p); >+ p = _mm_or_si64 (t2, p); >+ p = _mm_and_si64 (p, MC(565_rgb)); > >- pixel = __builtin_ia32_pmullw ((Vector4x16)p, (Vector4x16)c.mmx_565_unpack_multiplier); >- return __builtin_ia32_psrlw (pixel, 8); >+ pixel = _mm_mullo_pi16 (p, MC(565_unpack_multiplier)); >+ return _mm_srli_pi16 (pixel, 8); > } > >-static __inline__ Vector4x16 >-expand8888 (Vector4x16 in, int pos) >+static __inline__ __m64 >+expand8888 (__m64 in, int pos) > { > if (pos == 0) >- return (Vector4x16)__builtin_ia32_punpcklbw ((Vector8x8)in, (Vector8x8)c.mmx_zero); >+ return _mm_unpacklo_pi8 (in, _mm_setzero_si64()); > else >- return (Vector4x16)__builtin_ia32_punpckhbw ((Vector8x8)in, (Vector8x8)c.mmx_zero); >+ return _mm_unpackhi_pi8 (in, _mm_setzero_si64()); > } > >-static __inline__ Vector4x16 >-pack565 (Vector4x16 pixel, Vector4x16 target, int pos) >+static __inline__ __m64 >+pack565 (__m64 pixel, __m64 target, int pos) > { >- Vector1x64 p = (Vector1x64)pixel; >- Vector1x64 t = (Vector1x64)target; >- Vector1x64 r, g, b; >+ __m64 p = pixel; >+ __m64 t = target; >+ __m64 r, g, b; > >- r = __builtin_ia32_pand (p, (Vector1x64)c.mmx_565_r); >- g = __builtin_ia32_pand (p, (Vector1x64)c.mmx_565_g); >- b = __builtin_ia32_pand (p, (Vector1x64)c.mmx_565_b); >+ r = _mm_and_si64 (p, MC(565_r)); >+ g = _mm_and_si64 (p, MC(565_g)); >+ b = _mm_and_si64 (p, MC(565_b)); > > r = shift (r, - (32 - 8) + pos * 16); > g = shift (g, - (16 - 3) + pos * 16); > b = shift (b, - (0 + 3) + pos * 16); >- >+ > if (pos == 0) >- t = __builtin_ia32_pand (t, (Vector1x64)c.mmx_mask_0); >+ t = _mm_and_si64 (t, MC(mask_0)); > else if (pos == 1) >- t = __builtin_ia32_pand (t, (Vector1x64)c.mmx_mask_1); >+ t = _mm_and_si64 (t, MC(mask_1)); > else if (pos == 2) >- t = __builtin_ia32_pand (t, (Vector1x64)c.mmx_mask_2); >+ t = _mm_and_si64 (t, MC(mask_2)); > else if (pos == 3) >- t = __builtin_ia32_pand (t, (Vector1x64)c.mmx_mask_3); >+ t = _mm_and_si64 (t, MC(mask_3)); > >- p = __builtin_ia32_por (r, t); >- p = __builtin_ia32_por (g, p); >+ p = _mm_or_si64 (r, t); >+ p = _mm_or_si64 (g, p); > >- return (Vector4x16)__builtin_ia32_por (b, p); >-} >- >-static __inline__ void >-emms (void) >-{ >- __asm__ __volatile__ ("emms"); >+ return _mm_or_si64 (b, p); > } > > void >@@ -371,8 +356,8 @@ > CARD32 *dstLine, *dst; > CARD16 w; > FbStride dstStride; >- Vector4x16 vsrc, vsrca; >- >+ __m64 vsrc, vsrca; >+ > CHECKPOINT(); > > fbComposeGetSolid(pSrc, src, pDst->format); >@@ -384,51 +369,52 @@ > > vsrc = load8888 (src); > vsrca = expand_alpha (vsrc); >- >+ > while (height--) > { > dst = dstLine; > dstLine += dstStride; > w = width; >- >+ > CHECKPOINT(); > > while (w && (unsigned long)dst & 7) > { >- *dst = (ullong) pack8888(over(vsrc, vsrca, load8888(*dst)), (Vector4x16)c.mmx_zero); >+ *dst = (ullong) pack8888(over(vsrc, vsrca, load8888(*dst)), >+ _mm_setzero_si64()); > > w--; > dst++; > } >- >+ > while (w >= 2) > { >- Vector4x16 vdest; >- Vector4x16 dest0, dest1; >- >- vdest = *(Vector4x16 *)dst; >+ __m64 vdest; >+ __m64 dest0, dest1; >+ >+ vdest = *(__m64 *)dst; > > dest0 = over(vsrc, vsrca, expand8888(vdest, 0)); > dest1 = over(vsrc, vsrca, expand8888(vdest, 1)); > >- *(Vector8x8 *)dst = (Vector8x8)pack8888(dest0, dest1); >+ *(__m64 *)dst = pack8888(dest0, dest1); > > dst += 2; > w -= 2; > } >- >+ > CHECKPOINT(); > > while (w) > { >- *dst = (ullong) pack8888(over(vsrc, vsrca, load8888(*dst)), (Vector4x16)c.mmx_zero); >+ *dst = (ullong) pack8888(over(vsrc, vsrca, load8888(*dst)), _mm_setzero_si64()); > > w--; > dst++; > } > } > >- emms(); >+ _mm_empty(); > } > > void >@@ -449,8 +435,8 @@ > CARD16 *dstLine, *dst; > CARD16 w; > FbStride dstStride; >- Vector4x16 vsrc, vsrca; >- >+ __m64 vsrc, vsrca; >+ > CHECKPOINT(); > > fbComposeGetSolid(pSrc, src, pDst->format); >@@ -462,49 +448,49 @@ > > vsrc = load8888 (src); > vsrca = expand_alpha (vsrc); >- >+ > while (height--) > { > dst = dstLine; > dstLine += dstStride; > w = width; >- >+ > CHECKPOINT(); > > while (w && (unsigned long)dst & 7) > { > ullong d = *dst; >- Vector4x16 vdest = expand565 ((Vector4x16)d, 0); >+ __m64 vdest = expand565 ((__m64)d, 0); > vdest = pack565(over(vsrc, vsrca, vdest), vdest, 0); > *dst = (ullong)vdest; > > w--; > dst++; > } >- >+ > while (w >= 4) > { >- Vector4x16 vdest; >- >- vdest = *(Vector4x16 *)dst; >+ __m64 vdest; >+ >+ vdest = *(__m64 *)dst; > > vdest = pack565 (over(vsrc, vsrca, expand565(vdest, 0)), vdest, 0); > vdest = pack565 (over(vsrc, vsrca, expand565(vdest, 1)), vdest, 1); > vdest = pack565 (over(vsrc, vsrca, expand565(vdest, 2)), vdest, 2); > vdest = pack565 (over(vsrc, vsrca, expand565(vdest, 3)), vdest, 3); > >- *(Vector8x8 *)dst = (Vector8x8)vdest; >+ *(__m64 *)dst = vdest; > > dst += 4; > w -= 4; > } >- >+ > CHECKPOINT(); > > while (w) > { > ullong d = *dst; >- Vector4x16 vdest = expand565 ((Vector4x16)d, 0); >+ __m64 vdest = expand565 ((__m64)d, 0); > vdest = pack565(over(vsrc, vsrca, vdest), vdest, 0); > *dst = (ullong)vdest; > >@@ -513,7 +499,7 @@ > } > } > >- emms(); >+ _mm_empty(); > } > > void >@@ -534,8 +520,8 @@ > CARD32 *dstLine; > CARD32 *maskLine; > FbStride dstStride, maskStride; >- Vector4x16 vsrc, vsrca; >- >+ __m64 vsrc, vsrca; >+ > CHECKPOINT(); > > fbComposeGetSolid(pSrc, src, pDst->format); >@@ -562,9 +548,9 @@ > > if (m) > { >- Vector4x16 vdest = load8888(*q); >+ __m64 vdest = load8888(*q); > vdest = in_over(vsrc, vsrca, load8888(m), vdest); >- *q = (ullong)pack8888(vdest, (Vector4x16)c.mmx_zero); >+ *q = (ullong)pack8888(vdest, _mm_setzero_si64()); > } > > twidth--; >@@ -580,15 +566,15 @@ > > if (m0 | m1) > { >- Vector4x16 dest0, dest1; >- Vector4x16 vdest = *(Vector4x16 *)q; >+ __m64 dest0, dest1; >+ __m64 vdest = *(__m64 *)q; > > dest0 = in_over(vsrc, vsrca, load8888(m0), > expand8888 (vdest, 0)); > dest1 = in_over(vsrc, vsrca, load8888(m1), > expand8888 (vdest, 1)); > >- *(Vector8x8 *)q = (Vector8x8)pack8888(dest0, dest1); >+ *(__m64 *)q = pack8888(dest0, dest1); > } > > p += 2; >@@ -602,9 +588,9 @@ > > if (m) > { >- Vector4x16 vdest = load8888(*q); >+ __m64 vdest = load8888(*q); > vdest = in_over(vsrc, vsrca, load8888(m), vdest); >- *q = (ullong)pack8888(vdest, (Vector4x16)c.mmx_zero); >+ *q = (ullong)pack8888(vdest, _mm_setzero_si64()); > } > > twidth--; >@@ -616,7 +602,133 @@ > maskLine += maskStride; > } > >- emms(); >+ _mm_empty(); >+} >+ >+void >+fbCompositeSrc_8888x8x8888mmx (CARD8 op, >+ PicturePtr pSrc, >+ PicturePtr pMask, >+ PicturePtr pDst, >+ INT16 xSrc, >+ INT16 ySrc, >+ INT16 xMask, >+ INT16 yMask, >+ INT16 xDst, >+ INT16 yDst, >+ CARD16 width, >+ CARD16 height) >+{ >+ CARD32 *dstLine, *dst; >+ CARD32 *srcLine, *src; >+ CARD8 *maskLine; >+ CARD32 mask; >+ __m64 vmask; >+ FbStride dstStride, srcStride, maskStride; >+ CARD16 w; >+ __m64 srca; >+ >+ CHECKPOINT(); >+ >+ fbComposeGetStart (pDst, xDst, yDst, CARD32, dstStride, dstLine, 1); >+ fbComposeGetStart (pSrc, xSrc, ySrc, CARD32, srcStride, srcLine, 1); >+ fbComposeGetStart (pMask, xMask, yMask, CARD8, maskStride, maskLine, 1); >+ >+ mask = *maskLine << 24 | *maskLine << 16 | *maskLine << 8 | *maskLine; >+ vmask = load8888 (mask); >+ srca = MC(4x00ff); >+ >+ while (height--) >+ { >+ dst = dstLine; >+ dstLine += dstStride; >+ src = srcLine; >+ srcLine += srcStride; >+ w = width; >+ >+ while (w && (unsigned long)dst & 7) >+ { >+ __m64 s = load8888 (*src); >+ __m64 d = load8888 (*dst); >+ >+ *dst = (ullong)pack8888 (in_over (s, srca, vmask, d), (__m64)_mm_setzero_si64()); >+ >+ w--; >+ dst++; >+ src++; >+ } >+ >+ while (w >= 16) >+ { >+ __m64 vd0 = *(__m64 *)(dst + 0); >+ __m64 vd1 = *(__m64 *)(dst + 2); >+ __m64 vd2 = *(__m64 *)(dst + 4); >+ __m64 vd3 = *(__m64 *)(dst + 6); >+ __m64 vd4 = *(__m64 *)(dst + 8); >+ __m64 vd5 = *(__m64 *)(dst + 10); >+ __m64 vd6 = *(__m64 *)(dst + 12); >+ __m64 vd7 = *(__m64 *)(dst + 14); >+ >+ __m64 vs0 = *(__m64 *)(src + 0); >+ __m64 vs1 = *(__m64 *)(src + 2); >+ __m64 vs2 = *(__m64 *)(src + 4); >+ __m64 vs3 = *(__m64 *)(src + 6); >+ __m64 vs4 = *(__m64 *)(src + 8); >+ __m64 vs5 = *(__m64 *)(src + 10); >+ __m64 vs6 = *(__m64 *)(src + 12); >+ __m64 vs7 = *(__m64 *)(dst + 14); >+ >+ vd0 = (__m64)pack8888 ( >+ in_over (expand8888 (vs0, 0), srca, vmask, expand8888 (vd0, 0)), >+ in_over (expand8888 (vs0, 1), srca, vmask, expand8888 (vd0, 1))); >+ >+ vd1 = (__m64)pack8888 ( >+ in_over (expand8888 (vs1, 0), srca, vmask, expand8888 (vd1, 0)), >+ in_over (expand8888 (vs1, 1), srca, vmask, expand8888 (vd1, 1))); >+ >+ vd2 = (__m64)pack8888 ( >+ in_over (expand8888 (vs2, 0), srca, vmask, expand8888 (vd2, 0)), >+ in_over (expand8888 (vs2, 1), srca, vmask, expand8888 (vd2, 1))); >+ >+ vd3 = (__m64)pack8888 ( >+ in_over (expand8888 (vs3, 0), srca, vmask, expand8888 (vd3, 0)), >+ in_over (expand8888 (vs3, 1), srca, vmask, expand8888 (vd3, 1))); >+ >+ vd4 = (__m64)pack8888 ( >+ in_over (expand8888 (vs4, 0), srca, vmask, expand8888 (vd4, 0)), >+ in_over (expand8888 (vs4, 1), srca, vmask, expand8888 (vd4, 1))); >+ >+ vd5 = (__m64)pack8888 ( >+ in_over (expand8888 (vs5, 0), srca, vmask, expand8888 (vd5, 0)), >+ in_over (expand8888 (vs5, 1), srca, vmask, expand8888 (vd5, 1))); >+ >+ vd6 = (__m64)pack8888 ( >+ in_over (expand8888 (vs6, 0), srca, vmask, expand8888 (vd6, 0)), >+ in_over (expand8888 (vs6, 1), srca, vmask, expand8888 (vd6, 1))); >+ >+ vd7 = (__m64)pack8888 ( >+ in_over (expand8888 (vs7, 0), srca, vmask, expand8888 (vd7, 0)), >+ in_over (expand8888 (vs7, 1), srca, vmask, expand8888 (vd7, 1))); >+ >+ w -= 16; >+ dst += 16; >+ src += 16; >+ } >+ >+ while (w) >+ { >+ __m64 s = load8888 (*src); >+ __m64 d = load8888 (*dst); >+ >+ *dst = (ullong)pack8888 (in_over (s, srca, vmask, d), (__m64)_mm_setzero_si64()); >+ >+ w--; >+ dst++; >+ src++; >+ } >+ } >+ >+ _mm_empty(); > } > > void >@@ -638,7 +750,7 @@ > CARD8 *maskLine, *mask; > FbStride dstStride, maskStride; > CARD16 w; >- Vector4x16 vsrc, vsrca; >+ __m64 vsrc, vsrca; > ullong srcsrc; > > CHECKPOINT(); >@@ -648,7 +760,7 @@ > srca = src >> 24; > if (srca == 0) > return; >- >+ > srcsrc = (unsigned long long)src << 32 | src; > > fbComposeGetStart (pDst, xDst, yDst, CARD32, dstStride, dstLine, 1); >@@ -664,7 +776,7 @@ > mask = maskLine; > maskLine += maskStride; > w = width; >- >+ > CHECKPOINT(); > > while (w && (unsigned long)dst & 7) >@@ -673,15 +785,15 @@ > > if (m) > { >- Vector4x16 vdest = in_over(vsrc, vsrca, expand_alpha_rev ((Vector4x16)m), load8888(*dst)); >- *dst = (ullong)pack8888(vdest, (Vector4x16)c.mmx_zero); >+ __m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m), load8888(*dst)); >+ *dst = (ullong)pack8888(vdest, _mm_setzero_si64()); > } > > w--; > mask++; > dst++; > } >- >+ > CHECKPOINT(); > > while (w >= 2) >@@ -689,29 +801,29 @@ > ullong m0, m1; > m0 = *mask; > m1 = *(mask + 1); >- >+ > if (srca == 0xff && (m0 & m1) == 0xff) > { > *(unsigned long long *)dst = srcsrc; > } > else if (m0 | m1) > { >- Vector4x16 vdest; >- Vector4x16 dest0, dest1; >- >- vdest = *(Vector4x16 *)dst; >+ __m64 vdest; >+ __m64 dest0, dest1; >+ >+ vdest = *(__m64 *)dst; > >- dest0 = in_over(vsrc, vsrca, expand_alpha_rev ((Vector4x16)m0), expand8888(vdest, 0)); >- dest1 = in_over(vsrc, vsrca, expand_alpha_rev ((Vector4x16)m1), expand8888(vdest, 1)); >+ dest0 = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m0), expand8888(vdest, 0)); >+ dest1 = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m1), expand8888(vdest, 1)); > >- *(Vector8x8 *)dst = (Vector8x8)pack8888(dest0, dest1); >+ *(__m64 *)dst = pack8888(dest0, dest1); > } > > mask += 2; > dst += 2; > w -= 2; > } >- >+ > CHECKPOINT(); > > while (w) >@@ -720,9 +832,9 @@ > > if (m) > { >- Vector4x16 vdest = load8888(*dst); >- vdest = in_over(vsrc, vsrca, expand_alpha_rev ((Vector4x16)m), vdest); >- *dst = (ullong)pack8888(vdest, (Vector4x16)c.mmx_zero); >+ __m64 vdest = load8888(*dst); >+ vdest = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m), vdest); >+ *dst = (ullong)pack8888(vdest, _mm_setzero_si64()); > } > > w--; >@@ -731,7 +843,7 @@ > } > } > >- emms(); >+ _mm_empty(); > } > > >@@ -754,7 +866,7 @@ > CARD8 *maskLine, *mask; > FbStride dstStride, maskStride; > CARD16 w; >- Vector4x16 vsrc, vsrca; >+ __m64 vsrc, vsrca; > unsigned long long srcsrcsrcsrc, src16; > > CHECKPOINT(); >@@ -770,9 +882,9 @@ > > vsrc = load8888 (src); > vsrca = expand_alpha (vsrc); >- >- src16 = (ullong)pack565(vsrc, (Vector4x16)c.mmx_zero, 0); >- >+ >+ src16 = (ullong)pack565(vsrc, _mm_setzero_si64(), 0); >+ > srcsrcsrcsrc = (ullong)src16 << 48 | (ullong)src16 << 32 | > (ullong)src16 << 16 | (ullong)src16; > >@@ -783,7 +895,7 @@ > mask = maskLine; > maskLine += maskStride; > w = width; >- >+ > CHECKPOINT(); > > while (w && (unsigned long)dst & 7) >@@ -793,16 +905,16 @@ > if (m) > { > ullong d = *dst; >- Vector4x16 vd = (Vector4x16)d; >- Vector4x16 vdest = in_over(vsrc, vsrca, expand_alpha_rev ((Vector4x16)m), expand565(vd, 0)); >- *dst = (ullong)pack565(vdest, (Vector4x16)c.mmx_zero, 0); >+ __m64 vd = (__m64)d; >+ __m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m), expand565(vd, 0)); >+ *dst = (ullong)pack565(vdest, _mm_setzero_si64(), 0); > } > > w--; > mask++; > dst++; > } >- >+ > CHECKPOINT(); > > while (w >= 4) >@@ -812,35 +924,35 @@ > m1 = *(mask + 1); > m2 = *(mask + 2); > m3 = *(mask + 3); >- >+ > if (srca == 0xff && (m0 & m1 & m2 & m3) == 0xff) > { > *(unsigned long long *)dst = srcsrcsrcsrc; > } > else if (m0 | m1 | m2 | m3) > { >- Vector4x16 vdest; >- Vector4x16 vm0, vm1, vm2, vm3; >- >- vdest = *(Vector4x16 *)dst; >- >- vm0 = (Vector4x16)m0; >+ __m64 vdest; >+ __m64 vm0, vm1, vm2, vm3; >+ >+ vdest = *(__m64 *)dst; >+ >+ vm0 = (__m64)m0; > vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm0), expand565(vdest, 0)), vdest, 0); >- vm1 = (Vector4x16)m1; >+ vm1 = (__m64)m1; > vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm1), expand565(vdest, 1)), vdest, 1); >- vm2 = (Vector4x16)m2; >+ vm2 = (__m64)m2; > vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm2), expand565(vdest, 2)), vdest, 2); >- vm3 = (Vector4x16)m3; >+ vm3 = (__m64)m3; > vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm3), expand565(vdest, 3)), vdest, 3); > >- *(Vector4x16 *)dst = vdest; >+ *(__m64 *)dst = vdest; > } > > w -= 4; > mask += 4; > dst += 4; > } >- >+ > CHECKPOINT(); > > while (w) >@@ -850,9 +962,9 @@ > if (m) > { > ullong d = *dst; >- Vector4x16 vd = (Vector4x16)d; >- Vector4x16 vdest = in_over(vsrc, vsrca, expand_alpha_rev ((Vector4x16)m), expand565(vd, 0)); >- *dst = (ullong)pack565(vdest, (Vector4x16)c.mmx_zero, 0); >+ __m64 vd = (__m64)d; >+ __m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m), expand565(vd, 0)); >+ *dst = (ullong)pack565(vdest, _mm_setzero_si64(), 0); > } > > w--; >@@ -861,7 +973,7 @@ > } > } > >- emms(); >+ _mm_empty(); > } > > void >@@ -887,9 +999,9 @@ > > fbComposeGetStart (pDst, xDst, yDst, CARD16, dstStride, dstLine, 1); > fbComposeGetStart (pSrc, xSrc, ySrc, CARD32, srcStride, srcLine, 1); >- >+ > assert (pSrc->pDrawable == pMask->pDrawable); >- >+ > while (height--) > { > dst = dstLine; >@@ -897,14 +1009,14 @@ > src = srcLine; > srcLine += srcStride; > w = width; >- >+ > CHECKPOINT(); > > while (w && (unsigned long)dst & 7) > { >- Vector4x16 vsrc = load8888 (*src); >+ __m64 vsrc = load8888 (*src); > ullong d = *dst; >- Vector4x16 vdest = expand565 ((Vector4x16)d, 0); >+ __m64 vdest = expand565 ((__m64)d, 0); > > vdest = pack565(over_rev_non_pre(vsrc, vdest), vdest, 0); > >@@ -914,19 +1026,19 @@ > dst++; > src++; > } >- >+ > CHECKPOINT(); > > while (w >= 4) > { > CARD32 s0, s1, s2, s3; > unsigned char a0, a1, a2, a3; >- >+ > s0 = *src; > s1 = *(src + 1); > s2 = *(src + 2); > s3 = *(src + 3); >- >+ > a0 = (s0 >> 24); > a1 = (s1 >> 24); > a2 = (s2 >> 24); >@@ -934,38 +1046,38 @@ > > if ((a0 & a1 & a2 & a3) == 0xFF) > { >- Vector4x16 vdest; >- vdest = pack565(invert_colors(load8888(s0)), (Vector4x16)c.mmx_zero, 0); >+ __m64 vdest; >+ vdest = pack565(invert_colors(load8888(s0)), _mm_setzero_si64(), 0); > vdest = pack565(invert_colors(load8888(s1)), vdest, 1); > vdest = pack565(invert_colors(load8888(s2)), vdest, 2); > vdest = pack565(invert_colors(load8888(s3)), vdest, 3); >- >- *(Vector4x16 *)dst = vdest; >+ >+ *(__m64 *)dst = vdest; > } > else if (a0 | a1 | a2 | a3) > { >- Vector4x16 vdest = *(Vector4x16 *)dst; >- >+ __m64 vdest = *(__m64 *)dst; >+ > vdest = pack565(over_rev_non_pre(load8888(s0), expand565(vdest, 0)), vdest, 0); > vdest = pack565(over_rev_non_pre(load8888(s1), expand565(vdest, 1)), vdest, 1); > vdest = pack565(over_rev_non_pre(load8888(s2), expand565(vdest, 2)), vdest, 2); > vdest = pack565(over_rev_non_pre(load8888(s3), expand565(vdest, 3)), vdest, 3); >- >- *(Vector4x16 *)dst = vdest; >+ >+ *(__m64 *)dst = vdest; > } > > w -= 4; > dst += 4; > src += 4; > } >- >+ > CHECKPOINT(); > > while (w) > { >- Vector4x16 vsrc = load8888 (*src); >+ __m64 vsrc = load8888 (*src); > ullong d = *dst; >- Vector4x16 vdest = expand565 ((Vector4x16)d, 0); >+ __m64 vdest = expand565 ((__m64)d, 0); > > vdest = pack565(over_rev_non_pre(vsrc, vdest), vdest, 0); > >@@ -976,11 +1088,11 @@ > src++; > } > } >- >- emms(); >+ >+ _mm_empty(); > } > >-/* "888RevNP" is GdkPixbuf's format: ABGR, non premultiplied */ >+/* "8888RevNP" is GdkPixbuf's format: ABGR, non premultiplied */ > > void > fbCompositeSrc_8888RevNPx8888mmx (CARD8 op, >@@ -1005,9 +1117,9 @@ > > fbComposeGetStart (pDst, xDst, yDst, CARD32, dstStride, dstLine, 1); > fbComposeGetStart (pSrc, xSrc, ySrc, CARD32, srcStride, srcLine, 1); >- >+ > assert (pSrc->pDrawable == pMask->pDrawable); >- >+ > while (height--) > { > dst = dstLine; >@@ -1015,28 +1127,28 @@ > src = srcLine; > srcLine += srcStride; > w = width; >- >+ > while (w && (unsigned long)dst & 7) > { >- Vector4x16 s = load8888 (*src); >- Vector4x16 d = load8888 (*dst); >+ __m64 s = load8888 (*src); >+ __m64 d = load8888 (*dst); > >- *dst = (ullong)pack8888 (over_rev_non_pre (s, d), (Vector4x16)c.mmx_zero); >+ *dst = (ullong)pack8888 (over_rev_non_pre (s, d), _mm_setzero_si64()); > > w--; > dst++; > src++; > } >- >+ > while (w >= 2) > { > ullong s0, s1; > unsigned char a0, a1; >- Vector4x16 d0, d1; >- >+ __m64 d0, d1; >+ > s0 = *src; > s1 = *(src + 1); >- >+ > a0 = (s0 >> 24); > a1 = (s1 >> 24); > >@@ -1044,17 +1156,17 @@ > { > d0 = invert_colors(load8888(s0)); > d1 = invert_colors(load8888(s1)); >- >- *(Vector8x8 *)dst = pack8888 (d0, d1); >+ >+ *(__m64 *)dst = pack8888 (d0, d1); > } > else if (a0 | a1) > { >- Vector4x16 vdest = *(Vector4x16 *)dst; >- >+ __m64 vdest = *(__m64 *)dst; >+ > d0 = over_rev_non_pre (load8888(s0), expand8888 (vdest, 0)); > d1 = over_rev_non_pre (load8888(s1), expand8888 (vdest, 1)); >- >- *(Vector8x8 *)dst = pack8888 (d0, d1); >+ >+ *(__m64 *)dst = pack8888 (d0, d1); > } > > w -= 2; >@@ -1064,18 +1176,18 @@ > > while (w) > { >- Vector4x16 s = load8888 (*src); >- Vector4x16 d = load8888 (*dst); >+ __m64 s = load8888 (*src); >+ __m64 d = load8888 (*dst); > >- *dst = (ullong)pack8888 (over_rev_non_pre (s, d), (Vector4x16)c.mmx_zero); >+ *dst = (ullong)pack8888 (over_rev_non_pre (s, d), _mm_setzero_si64()); > > w--; > dst++; > src++; > } > } >- >- emms(); >+ >+ _mm_empty(); > } > > void >@@ -1096,7 +1208,7 @@ > CARD16 *dstLine; > CARD32 *maskLine; > FbStride dstStride, maskStride; >- Vector4x16 vsrc, vsrca; >+ __m64 vsrc, vsrca; > > CHECKPOINT(); > >@@ -1125,7 +1237,7 @@ > if (m) > { > ullong d = *q; >- Vector4x16 vdest = expand565 ((Vector4x16)d, 0); >+ __m64 vdest = expand565 ((__m64)d, 0); > vdest = pack565 (in_over (vsrc, vsrca, load8888 (m), vdest), vdest, 0); > *q = (ullong)vdest; > } >@@ -1146,14 +1258,14 @@ > > if ((m0 | m1 | m2 | m3)) > { >- Vector4x16 vdest = *(Vector4x16 *)q; >+ __m64 vdest = *(__m64 *)q; > > vdest = pack565(in_over(vsrc, vsrca, load8888(m0), expand565(vdest, 0)), vdest, 0); > vdest = pack565(in_over(vsrc, vsrca, load8888(m1), expand565(vdest, 1)), vdest, 1); > vdest = pack565(in_over(vsrc, vsrca, load8888(m2), expand565(vdest, 2)), vdest, 2); > vdest = pack565(in_over(vsrc, vsrca, load8888(m3), expand565(vdest, 3)), vdest, 3); > >- *(Vector4x16 *)q = vdest; >+ *(__m64 *)q = vdest; > } > twidth -= 4; > p += 4; >@@ -1168,7 +1280,7 @@ > if (m) > { > ullong d = *q; >- Vector4x16 vdest = expand565((Vector4x16)d, 0); >+ __m64 vdest = expand565((__m64)d, 0); > vdest = pack565 (in_over(vsrc, vsrca, load8888(m), vdest), vdest, 0); > *q = (ullong)vdest; > } >@@ -1182,7 +1294,7 @@ > dstLine += dstStride; > } > >- emms (); >+ _mm_empty (); > } > > void >@@ -1210,7 +1322,7 @@ > > fbComposeGetStart (pSrc, xSrc, ySrc, CARD8, srcStride, srcLine, 1); > fbComposeGetStart (pDst, xDst, yDst, CARD8, dstStride, dstLine, 1); >- >+ > while (height--) > { > dst = dstLine; >@@ -1218,7 +1330,7 @@ > src = srcLine; > srcLine += srcStride; > w = width; >- >+ > while (w && (unsigned long)dst & 7) > { > s = *src; >@@ -1234,13 +1346,7 @@ > > while (w >= 8) > { >- __asm__ __volatile__ ( >- "movq (%0), %%mm2\n\t" >- "movq (%1), %%mm3\n\t" >- "paddusb %%mm2, %%mm3\n\t" >- "movq %%mm3, (%1)\n\t" >- : /* no output */ : "r" (src), "r" (dst)); >- >+ *(__m64*)dst = _mm_adds_pu8(*(__m64*)src, *(__m64*)dst); > dst += 8; > src += 8; > w -= 8; >@@ -1259,8 +1365,8 @@ > w--; > } > } >- >- emms(); >+ >+ _mm_empty(); > } > > void >@@ -1297,13 +1403,8 @@ > > while (w && (unsigned long)dst & 7) > { >- __asm__ __volatile__ ( >- "movd %0, %%mm2\n\t" >- "movd %1, %%mm3\n\t" >- "paddusb %%mm2, %%mm3\n\t" >- "movd %%mm3, %1\n\t" >- : /* no output */ : "m" (*src), "m" (*dst)); >- >+ *dst = _mm_cvtsi64_si32(_mm_adds_pu8(_mm_cvtsi32_si64(*src), >+ _mm_cvtsi32_si64(*dst))); > dst++; > src++; > w--; >@@ -1311,13 +1412,7 @@ > > while (w >= 2) > { >- __asm__ __volatile__ ( >- "movq (%0), %%mm2\n\t" >- "movq (%1), %%mm3\n\t" >- "paddusb %%mm2, %%mm3\n\t" >- "movq %%mm3, (%1)\n\t" >- : /* no output */ : "r" (src), "r" (dst)); >- >+ *(ullong*)dst = (ullong) _mm_adds_pu8(*(__m64*)src, *(__m64*)dst); > dst += 2; > src += 2; > w -= 2; >@@ -1325,16 +1420,13 @@ > > if (w) > { >- __asm__ __volatile__ ( >- "movd %0, %%mm2\n\t" >- "movd %1, %%mm3\n\t" >- "paddusb %%mm2, %%mm3\n\t" >- "movd %%mm3, %1\n\t" >- : /* no output */ : "m" (*src), "m" (*dst)); >+ *dst = _mm_cvtsi64_si32(_mm_adds_pu8(_mm_cvtsi32_si64(*src), >+ _mm_cvtsi32_si64(*dst))); >+ > } > } >- >- emms(); >+ >+ _mm_empty(); > } > > #define GetStart(drw,x,y,type,stride,line,bpp) {\ >@@ -1358,19 +1450,19 @@ > FbStride stride; > int bpp; > ullong fill; >- Vector8x8 vfill; >+ __m64 vfill; > CARD32 byte_width; > CARD8 *byte_line; > FbBits *bits; > int xoff, yoff; > > CHECKPOINT(); >- >+ > fbGetDrawable(pDraw, bits, stride, bpp, xoff, yoff); >- >+ > if (bpp == 16 && (xor >> 16 != (xor & 0xffff))) > return FALSE; >- >+ > if (bpp != 16 && bpp != 32) > return FALSE; > >@@ -1388,9 +1480,9 @@ > byte_width = 4 * width; > stride *= 4; > } >- >+ > fill = ((ullong)xor << 32) | xor; >- vfill = (Vector8x8)fill; >+ vfill = (__m64)fill; > > while (height--) > { >@@ -1398,7 +1490,7 @@ > CARD8 *d = byte_line; > byte_line += stride; > w = byte_width; >- >+ > while (w >= 2 && ((unsigned long)d & 3)) > { > *(CARD16 *)d = xor; >@@ -1406,35 +1498,32 @@ > d += 2; > } > >- while (w >= 4 && ((unsigned int)d & 7)) >+ while (w >= 4 && ((unsigned long)d & 7)) > { > *(CARD32 *)d = xor; >- >+ > w -= 4; > d += 4; > } > > while (w >= 64) > { >- __asm__ __volatile ( >- "movq %0, (%1)\n\t" >- "movq %0, 8(%1)\n\t" >- "movq %0, 16(%1)\n\t" >- "movq %0, 24(%1)\n\t" >- "movq %0, 32(%1)\n\t" >- "movq %0, 40(%1)\n\t" >- "movq %0, 48(%1)\n\t" >- "movq %0, 56(%1)\n\t" >- : /* no output */ >- : "y" (vfill), "r" (d) >- : "memory"); >+ *(__m64*) (d + 0) = vfill; >+ *(__m64*) (d + 8) = vfill; >+ *(__m64*) (d + 16) = vfill; >+ *(__m64*) (d + 24) = vfill; >+ *(__m64*) (d + 32) = vfill; >+ *(__m64*) (d + 40) = vfill; >+ *(__m64*) (d + 48) = vfill; >+ *(__m64*) (d + 56) = vfill; >+ > w -= 64; > d += 64; > } > while (w >= 4) > { > *(CARD32 *)d = xor; >- >+ > w -= 4; > d += 4; > } >@@ -1446,16 +1535,160 @@ > } > } > >- emms(); >+ _mm_empty(); >+ return TRUE; >+} >+ >+Bool >+fbCopyAreammx (DrawablePtr pSrc, >+ DrawablePtr pDst, >+ int src_x, >+ int src_y, >+ int dst_x, >+ int dst_y, >+ int width, >+ int height) >+{ >+ FbBits * src_bits; >+ FbStride src_stride; >+ int src_bpp; >+ int src_xoff; >+ int src_yoff; >+ >+ FbBits * dst_bits; >+ FbStride dst_stride; >+ int dst_bpp; >+ int dst_xoff; >+ int dst_yoff; >+ >+ CARD8 * src_bytes; >+ CARD8 * dst_bytes; >+ int byte_width; >+ >+ fbGetDrawable(pSrc, src_bits, src_stride, src_bpp, src_xoff, src_yoff); >+ fbGetDrawable(pDst, dst_bits, dst_stride, dst_bpp, dst_xoff, dst_yoff); >+ >+ if (src_bpp != 16 && src_bpp != 32) >+ return FALSE; >+ >+ if (dst_bpp != 16 && dst_bpp != 32) >+ return FALSE; >+ >+ if (src_bpp != dst_bpp) >+ { >+ return FALSE; >+ } >+ >+ if (src_bpp == 16) >+ { >+ src_stride = src_stride * sizeof (FbBits) / 2; >+ dst_stride = dst_stride * sizeof (FbBits) / 2; >+ src_bytes = (CARD8 *)(((CARD16 *)src_bits) + src_stride * (src_y - src_yoff) + (src_x - src_xoff)); >+ dst_bytes = (CARD8 *)(((CARD16 *)dst_bits) + dst_stride * (dst_y - dst_yoff) + (dst_x - dst_xoff)); >+ byte_width = 2 * width; >+ src_stride *= 2; >+ dst_stride *= 2; >+ } >+ else >+ { >+ src_stride = src_stride * sizeof (FbBits) / 4; >+ dst_stride = dst_stride * sizeof (FbBits) / 4; >+ src_bytes = (CARD8 *)(((CARD32 *)src_bits) + src_stride * (src_y - src_yoff) + (src_x - src_xoff)); >+ dst_bytes = (CARD8 *)(((CARD32 *)dst_bits) + dst_stride * (dst_y - dst_yoff) + (dst_x - dst_xoff)); >+ byte_width = 4 * width; >+ src_stride *= 4; >+ dst_stride *= 4; >+ } >+ >+ while (height--) >+ { >+ int w; >+ CARD8 *s = src_bytes; >+ CARD8 *d = dst_bytes; >+ src_bytes += src_stride; >+ dst_bytes += dst_stride; >+ w = byte_width; >+ >+ while (w >= 2 && ((unsigned long)d & 3)) >+ { >+ *(CARD16 *)d = *(CARD16 *)s; >+ w -= 2; >+ s += 2; >+ d += 2; >+ } >+ >+ while (w >= 4 && ((unsigned int)d & 7)) >+ { >+ *(CARD32 *)d = *(CARD32 *)s; >+ >+ w -= 4; >+ s += 4; >+ d += 4; >+ } >+ >+ while (w >= 64) >+ { >+ *(__m64 *)(d + 0) = *(__m64 *)(s + 0); >+ *(__m64 *)(d + 8) = *(__m64 *)(s + 8); >+ *(__m64 *)(d + 16) = *(__m64 *)(s + 16); >+ *(__m64 *)(d + 24) = *(__m64 *)(s + 24); >+ *(__m64 *)(d + 32) = *(__m64 *)(s + 32); >+ *(__m64 *)(d + 40) = *(__m64 *)(s + 40); >+ *(__m64 *)(d + 48) = *(__m64 *)(s + 48); >+ *(__m64 *)(d + 56) = *(__m64 *)(s + 56); >+ w -= 64; >+ s += 64; >+ d += 64; >+ } >+ while (w >= 4) >+ { >+ *(CARD32 *)d = *(CARD32 *)s; >+ >+ w -= 4; >+ s += 4; >+ d += 4; >+ } >+ if (w >= 2) >+ { >+ *(CARD16 *)d = *(CARD16 *)s; >+ w -= 2; >+ s += 2; >+ d += 2; >+ } >+ } >+ >+ _mm_empty(); > return TRUE; > } > >+void >+fbCompositeCopyAreammx (CARD8 op, >+ PicturePtr pSrc, >+ PicturePtr pMask, >+ PicturePtr pDst, >+ INT16 xSrc, >+ INT16 ySrc, >+ INT16 xMask, >+ INT16 yMask, >+ INT16 xDst, >+ INT16 yDst, >+ CARD16 width, >+ CARD16 height) >+{ >+ fbCopyAreammx (pSrc->pDrawable, >+ pDst->pDrawable, >+ xSrc, ySrc, >+ xDst, yDst, >+ width, height); >+} >+ >+#ifndef __amd64__ > Bool > fbHaveMMX (void) > { > static Bool initialized = FALSE; > static Bool mmx_present; >- >+ > if (!initialized) > { > int tmp; /* static variables are accessed through %ebx, >@@ -1466,7 +1699,7 @@ > > __asm__ __volatile__ ( > /* Check if bit 21 in flags word is writeable */ >- >+ > "pusha \n\t" > "pushfl \n\t" > "popl %%eax \n\t" >@@ -1502,13 +1735,14 @@ > : /* no input */); > > initialized = TRUE; >- >+ > mmx_present = tmp; > } > > return mmx_present; > } >+#endif /* __amd64__ */ > > > #endif /* RENDER */ >-#endif /* USE_GCC34_MMX */ >+#endif /* USE_MMX */ >diff -ur xc-orig/programs/Xserver/fb/fbmmx.h xc/programs/Xserver/fb/fbmmx.h >--- xc-orig/programs/Xserver/fb/fbmmx.h 2005-02-11 04:00:50.006092570 -0500 >+++ xc/programs/Xserver/fb/fbmmx.h 2005-02-11 04:01:32.072346126 -0500 >@@ -1,5 +1,5 @@ > /* >- * Copyright © 2004 Red Hat, Inc. >+ * Copyright © 2004 Red Hat, Inc. > * > * Permission to use, copy, modify, distribute, and sell this software and its > * documentation for any purpose is hereby granted without fee, provided that >@@ -18,17 +18,23 @@ > * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN > * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. > * >- * Author: Søren Sandmann (sandmann@redhat.com) >+ * Author: Søren Sandmann (sandmann@redhat.com) > * > * Based on work by Owen Taylor > */ >-#ifdef USE_GCC34_MMX >+#ifdef USE_MMX >+ >+#ifndef __amd64__ > Bool fbHaveMMX(void); > #else >-#define fbHaveMMX FALSE >+#define fbHaveMMX() TRUE >+#endif >+ >+#else >+#define fbHaveMMX() FALSE > #endif > >-#ifdef USE_GCC34_MMX >+#ifdef USE_MMX > > void fbCompositeSolidMask_nx8888x0565Cmmx (CARD8 op, > PicturePtr pSrc, >@@ -150,6 +156,38 @@ > INT16 yDst, > CARD16 width, > CARD16 height); >+void fbCompositeSrc_8888x8x8888mmx (CARD8 op, >+ PicturePtr pSrc, >+ PicturePtr pMask, >+ PicturePtr pDst, >+ INT16 xSrc, >+ INT16 ySrc, >+ INT16 xMask, >+ INT16 yMask, >+ INT16 xDst, >+ INT16 yDst, >+ CARD16 width, >+ CARD16 height); >+Bool fbCopyAreammx (DrawablePtr pSrc, >+ DrawablePtr pDst, >+ int src_x, >+ int src_y, >+ int dst_x, >+ int dst_y, >+ int width, >+ int height); >+void fbCompositeCopyAreammx (CARD8 op, >+ PicturePtr pSrc, >+ PicturePtr pMask, >+ PicturePtr pDst, >+ INT16 xSrc, >+ INT16 ySrc, >+ INT16 xMask, >+ INT16 yMask, >+ INT16 xDst, >+ INT16 yDst, >+ CARD16 width, >+ CARD16 height); > Bool fbSolidFillmmx (DrawablePtr pDraw, > int x, > int y, >@@ -157,4 +195,4 @@ > int height, > FbBits xor); > >-#endif /* USE_GCC34_MMX */ >+#endif /* USE_MMX */ > >diff -ur xc-orig/programs/Xserver/fb/fbpict.c xc/programs/Xserver/fb/fbpict.c >--- xc-orig/programs/Xserver/fb/fbpict.c 2005-02-11 04:00:50.007092600 -0500 >+++ xc/programs/Xserver/fb/fbpict.c 2005-02-11 04:01:32.075346216 -0500 >@@ -1,7 +1,7 @@ > /* > * $XFree86: xc/programs/Xserver/fb/fbpict.c,v 1.15 2002/09/26 02:56:48 keithp Exp $ > * >- * Copyright © 2000 SuSE, Inc. >+ * Copyright © 2000 SuSE, Inc. > * > * Permission to use, copy, modify, distribute, and sell this software and its > * documentation for any purpose is hereby granted without fee, provided that >@@ -863,6 +863,15 @@ > if (!pSrc->transform && !(pMask && pMask->transform)) > if (!maskAlphaMap && !srcAlphaMap && !dstAlphaMap) > switch (op) { >+ case PictOpSrc: >+#ifdef USE_MMX >+ if (!pMask && pSrc->format == pDst->format && >+ pSrc->pDrawable != pDst->pDrawable) >+ { >+ func = fbCompositeCopyAreammx; >+ } >+#endif >+ break; > case PictOpOver: > if (pMask) > { >@@ -877,7 +886,7 @@ > switch (pDst->format) { > case PICT_r5g6b5: > case PICT_b5g6r5: >-#ifdef USE_GCC34_MMX >+#ifdef USE_MMX > if (fbHaveMMX()) > func = fbCompositeSolidMask_nx8x0565mmx; > else >@@ -892,7 +901,7 @@ > case PICT_x8r8g8b8: > case PICT_a8b8g8r8: > case PICT_x8b8g8r8: >-#ifdef USE_GCC34_MMX >+#ifdef USE_MMX > if (fbHaveMMX()) > func = fbCompositeSolidMask_nx8x8888mmx; > else >@@ -906,7 +915,7 @@ > switch (pDst->format) { > case PICT_a8r8g8b8: > case PICT_x8r8g8b8: >-#ifdef USE_GCC34_MMX >+#ifdef USE_MMX > if (fbHaveMMX()) > func = fbCompositeSolidMask_nx8888x8888Cmmx; > else >@@ -914,7 +923,7 @@ > func = fbCompositeSolidMask_nx8888x8888C; > break; > case PICT_r5g6b5: >-#ifdef USE_GCC34_MMX >+#ifdef USE_MMX > if (fbHaveMMX()) > func = fbCompositeSolidMask_nx8888x0565Cmmx; > else >@@ -929,7 +938,7 @@ > switch (pDst->format) { > case PICT_a8b8g8r8: > case PICT_x8b8g8r8: >-#ifdef USE_GCC34_MMX >+#ifdef USE_MMX > if (fbHaveMMX()) > func = fbCompositeSolidMask_nx8888x8888Cmmx; > else >@@ -937,7 +946,7 @@ > func = fbCompositeSolidMask_nx8888x8888C; > break; > case PICT_b5g6r5: >-#ifdef USE_GCC34_MMX >+#ifdef USE_MMX > if (fbHaveMMX()) > func = fbCompositeSolidMask_nx8888x0565Cmmx; > else >@@ -970,6 +979,7 @@ > xSrc == xMask && ySrc == yMask && > !pMask->componentAlpha) > { >+ /* source == mask: non-premultiplied data */ > switch (pSrc->format) { > case PICT_x8b8g8r8: > switch (pMask->format) { >@@ -978,13 +988,13 @@ > switch (pDst->format) { > case PICT_a8r8g8b8: > case PICT_x8r8g8b8: >-#ifdef USE_GCC34_MMX >+#ifdef USE_MMX > if (fbHaveMMX()) > func = fbCompositeSrc_8888RevNPx8888mmx; > #endif > break; > case PICT_r5g6b5: >-#ifdef USE_GCC34_MMX >+#ifdef USE_MMX > if (fbHaveMMX()) > func = fbCompositeSrc_8888RevNPx0565mmx; > #endif >@@ -1000,13 +1010,13 @@ > switch (pDst->format) { > case PICT_a8b8g8r8: > case PICT_x8b8g8r8: >-#ifdef USE_GCC34_MMX >+#ifdef USE_MMX > if (fbHaveMMX()) > func = fbCompositeSrc_8888RevNPx8888mmx; > #endif > break; > case PICT_r5g6b5: >-#ifdef USE_GCC34_MMX >+#ifdef USE_MMX > if (fbHaveMMX()) > func = fbCompositeSrc_8888RevNPx0565mmx; > #endif >@@ -1018,9 +1028,27 @@ > } > break; > } >+ else >+ { >+ /* non-repeating source, repeating mask => translucent window */ >+ if (maskRepeat && >+ pMask->pDrawable->width == 1 && >+ pMask->pDrawable->height == 1) >+ { >+ if (pSrc->format == PICT_x8r8g8b8 && >+ pDst->format == PICT_x8r8g8b8 && >+ pMask->format == PICT_a8) >+ { >+#ifdef USE_MMX >+ if (fbHaveMMX()) >+ func = fbCompositeSrc_8888x8x8888mmx; >+#endif >+ } >+ } >+ } > } > } >- else >+ else /* no mask */ > { > if (srcRepeat && > pSrc->pDrawable->width == 1 && >@@ -1032,7 +1060,7 @@ > switch (pDst->format) { > case PICT_a8r8g8b8: > case PICT_x8r8g8b8: >-#ifdef USE_GCC34_MMX >+#ifdef USE_MMX > if (fbHaveMMX()) > { > srcRepeat = FALSE; >@@ -1041,7 +1069,7 @@ > #endif > break; > case PICT_r5g6b5: >-#ifdef USE_GCC34_MMX >+#ifdef USE_MMX > if (fbHaveMMX()) > { > srcRepeat = FALSE; >@@ -1070,6 +1098,27 @@ > break; > } > break; >+ case PICT_x8r8g8b8: >+ switch (pDst->format) { >+ case PICT_a8r8g8b8: >+ case PICT_x8r8g8b8: >+#ifdef USE_MMX >+ if (fbHaveMMX()) >+ func = fbCompositeCopyAreammx; >+#endif >+ break; >+ } >+ case PICT_x8b8g8r8: >+ switch (pDst->format) { >+ case PICT_a8b8g8r8: >+ case PICT_x8b8g8r8: >+#ifdef USE_MMX >+ if (fbHaveMMX()) >+ func = fbCompositeCopyAreammx; >+#endif >+ break; >+ } >+ break; > case PICT_a8b8g8r8: > switch (pDst->format) { > case PICT_a8b8g8r8: >@@ -1109,7 +1158,7 @@ > case PICT_a8r8g8b8: > switch (pDst->format) { > case PICT_a8r8g8b8: >-#ifdef USE_GCC34_MMX >+#ifdef USE_MMX > if (fbHaveMMX()) > func = fbCompositeSrcAdd_8888x8888mmx; > else >@@ -1121,7 +1170,7 @@ > case PICT_a8b8g8r8: > switch (pDst->format) { > case PICT_a8b8g8r8: >-#ifdef USE_GCC34_MMX >+#ifdef USE_MMX > if (fbHaveMMX()) > func = fbCompositeSrcAdd_8888x8888mmx; > else >@@ -1133,7 +1182,7 @@ > case PICT_a8: > switch (pDst->format) { > case PICT_a8: >-#ifdef USE_GCC34_MMX >+#ifdef USE_MMX > if (fbHaveMMX()) > func = fbCompositeSrcAdd_8000x8000mmx; > else
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 80685
:
50313
|
50407
|
50408
| 51031