Gentoo Websites Logo
Go to: Gentoo Home Documentation Forums Lists Bugs Planet Store Wiki Get Gentoo!
View | Details | Raw Unified | Return to bug 80685 | Differences between
and this patch

Collapse All | Expand All

(-)xc-orig/programs/Xserver/fb/Imakefile (-2 / +11 lines)
Lines 3-15 Link Here
3
XCOMM 
3
XCOMM 
4
XCOMM Id: Imakefile,v 1.1 1999/11/02 03:54:44 keithp Exp $
4
XCOMM Id: Imakefile,v 1.1 1999/11/02 03:54:44 keithp Exp $
5
5
6
#if defined(i386Architecture) && defined(HasGcc34) && HasGcc34
6
#if defined(HasGcc34) && HasGcc34
7
MMXOPTIONS= -mmmx -Winline --param inline-unit-growth=10000 \
7
MMXOPTIONS= -mmmx -Winline --param inline-unit-growth=10000 \
8
	--param large-function-growth=10000 -DUSE_GCC34_MMX
8
	--param large-function-growth=10000 -DUSE_MMX
9
SSEOPTIONS= $(MMXOPTIONS) -msse -DUSE_SSE
9
10
11
#if defined(i386Architecture)
10
SpecialCObjectRule(fbmmx,fbmmx.c,$(MMXOPTIONS))
12
SpecialCObjectRule(fbmmx,fbmmx.c,$(MMXOPTIONS))
13
#elif defined(AMD64Architecture)
14
SpecialCObjectRule(fbmmx,fbmmx.c,$(SSEOPTIONS))
15
#endif
16
17
#if defined(i386Architecture) || defined(AMD64Architecture)
11
SpecialCObjectRule(fbpict,fbpict.c,$(MMXOPTIONS))
18
SpecialCObjectRule(fbpict,fbpict.c,$(MMXOPTIONS))
12
SpecialCObjectRule(fbfill,fbfill.c,$(MMXOPTIONS))
19
SpecialCObjectRule(fbfill,fbfill.c,$(MMXOPTIONS))
20
SpecialCObjectRule(fbcopy,fbcopy.c,$(MMXOPTIONS))
21
#endif
13
22
14
#endif
23
#endif
15
24
(-)xc-orig/programs/Xserver/fb/fbcompose.c (-4 / +17 lines)
Lines 1-8 Link Here
1
/*
1
/*
2
 * $XdotOrg: xc/programs/Xserver/fb/fbcompose.c,v 1.3 2004/05/12 01:49:46 anholt Exp $
2
 * $XdotOrg: xc/programs/Xserver/fb/fbcompose.c,v 1.5 2005/01/13 20:49:21 sandmann Exp $
3
 * $XFree86: xc/programs/Xserver/fb/fbcompose.c,v 1.17tsi Exp $
3
 * $XFree86: xc/programs/Xserver/fb/fbcompose.c,v 1.17tsi Exp $
4
 *
4
 *
5
 * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
5
 * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
6
 *
6
 *
7
 * Permission to use, copy, modify, distribute, and sell this software and its
7
 * Permission to use, copy, modify, distribute, and sell this software and its
8
 * documentation for any purpose is hereby granted without fee, provided that
8
 * documentation for any purpose is hereby granted without fee, provided that
Lines 2693-2699 Link Here
2693
    op->u.transform.y = y - op->u.transform.top_y;
2693
    op->u.transform.y = y - op->u.transform.top_y;
2694
}
2694
}
2695
2695
2696
2697
Bool
2696
Bool
2698
fbBuildCompositeOperand (PicturePtr	    pPict,
2697
fbBuildCompositeOperand (PicturePtr	    pPict,
2699
			 FbCompositeOperand op[4],
2698
			 FbCompositeOperand op[4],
Lines 2710-2716 Link Here
2710
	
2709
	
2711
	op->u.transform.top_y = pPict->pDrawable->y;
2710
	op->u.transform.top_y = pPict->pDrawable->y;
2712
	op->u.transform.left_x = pPict->pDrawable->x;
2711
	op->u.transform.left_x = pPict->pDrawable->x;
2713
	
2714
	op->u.transform.start_x = x - op->u.transform.left_x;
2712
	op->u.transform.start_x = x - op->u.transform.left_x;
2715
	op->u.transform.x = op->u.transform.start_x;
2713
	op->u.transform.x = op->u.transform.start_x;
2716
	op->u.transform.y = y - op->u.transform.top_y;
2714
	op->u.transform.y = y - op->u.transform.top_y;
Lines 2822-2827 Link Here
2822
    FbCombineFunc	f;
2820
    FbCombineFunc	f;
2823
    int			w;
2821
    int			w;
2824
2822
2823
#if 0
2824
    ErrorF ("op:		%d\n"
2825
	    "src format:	%lx\n"
2826
	    "msk format		%lx\n"
2827
	    "dst format		%lx\n"
2828
	    "width:		%d\n"
2829
	    "height		%d\n",
2830
	    op,
2831
	    pSrc? pSrc->format : 0,
2832
	    pMask? pMask->format : 0,
2833
	    pDst? pDst->format : 0,
2834
	    width, height);
2835
    ErrorF ("PICT_x8r8g8b8: %lx\n", PICT_x8r8g8b8);
2836
#endif
2837
    
2825
    if (!fbBuildCompositeOperand (pSrc, src, xSrc, ySrc, TRUE, TRUE))
2838
    if (!fbBuildCompositeOperand (pSrc, src, xSrc, ySrc, TRUE, TRUE))
2826
	return;
2839
	return;
2827
    if (!fbBuildCompositeOperand (pDst, dst, xDst, yDst, FALSE, TRUE))
2840
    if (!fbBuildCompositeOperand (pDst, dst, xDst, yDst, FALSE, TRUE))
(-)xc-orig/programs/Xserver/fb/fbcopy.c (-8 / +32 lines)
Lines 1-7 Link Here
1
/*
1
/*
2
 * Id: fbcopy.c,v 1.1 1999/11/02 03:54:45 keithp Exp $
2
 * Id: fbcopy.c,v 1.1 1999/11/02 03:54:45 keithp Exp $
3
 *
3
 *
4
 * Copyright © 1998 Keith Packard
4
 * Copyright © 1998 Keith Packard
5
 *
5
 *
6
 * Permission to use, copy, modify, distribute, and sell this software and its
6
 * Permission to use, copy, modify, distribute, and sell this software and its
7
 * documentation for any purpose is hereby granted without fee, provided that
7
 * documentation for any purpose is hereby granted without fee, provided that
Lines 27-32 Link Here
27
#ifdef IN_MODULE
27
#ifdef IN_MODULE
28
#include "xf86_ansic.h"
28
#include "xf86_ansic.h"
29
#endif
29
#endif
30
#include "fbmmx.h"
30
31
31
void
32
void
32
fbCopyNtoN (DrawablePtr	pSrcDrawable,
33
fbCopyNtoN (DrawablePtr	pSrcDrawable,
Lines 54-81 Link Here
54
    
55
    
55
    fbGetDrawable (pSrcDrawable, src, srcStride, srcBpp, srcXoff, srcYoff);
56
    fbGetDrawable (pSrcDrawable, src, srcStride, srcBpp, srcXoff, srcYoff);
56
    fbGetDrawable (pDstDrawable, dst, dstStride, dstBpp, dstXoff, dstYoff);
57
    fbGetDrawable (pDstDrawable, dst, dstStride, dstBpp, dstXoff, dstYoff);
57
    
58
58
    while (nbox--)
59
    while (nbox--)
59
    {
60
    {
61
#ifdef USE_MMX
62
	if (!reverse && !upsidedown && fbHaveMMX())
63
	{
64
	    if (!fbCopyAreammx (pSrcDrawable,
65
				pDstDrawable,
66
				
67
				(pbox->x1 + dx + srcXoff),
68
				(pbox->y1 + dy + srcYoff),
69
				
70
				(pbox->x1 + dstXoff),
71
				(pbox->y1 + dstYoff),
72
				
73
				(pbox->x2 - pbox->x1),
74
				(pbox->y2 - pbox->y1)))
75
		goto fallback;
76
	    else
77
		goto next;
78
	}
79
    fallback:
80
#endif
60
	fbBlt (src + (pbox->y1 + dy + srcYoff) * srcStride,
81
	fbBlt (src + (pbox->y1 + dy + srcYoff) * srcStride,
61
	       srcStride,
82
	       srcStride,
62
	       (pbox->x1 + dx + srcXoff) * srcBpp,
83
	       (pbox->x1 + dx + srcXoff) * srcBpp,
63
    
84
	       
64
	       dst + (pbox->y1 + dstYoff) * dstStride,
85
	       dst + (pbox->y1 + dstYoff) * dstStride,
65
	       dstStride,
86
	       dstStride,
66
	       (pbox->x1 + dstXoff) * dstBpp,
87
	       (pbox->x1 + dstXoff) * dstBpp,
67
    
88
	       
68
	       (pbox->x2 - pbox->x1) * dstBpp,
89
	       (pbox->x2 - pbox->x1) * dstBpp,
69
	       (pbox->y2 - pbox->y1),
90
	       (pbox->y2 - pbox->y1),
70
    
91
	       
71
	       alu,
92
	       alu,
72
	       pm,
93
	       pm,
73
	       dstBpp,
94
	       dstBpp,
74
    
95
	       
75
	       reverse,
96
	       reverse,
76
	       upsidedown);
97
	       upsidedown);
98
#ifdef USE_MMX
99
    next:
100
#endif
77
	pbox++;
101
	pbox++;
78
    }
102
    }    
79
}
103
}
80
104
81
void
105
void
Lines 594-600 Link Here
594
	    int		yOut)
618
	    int		yOut)
595
{
619
{
596
    fbCopyProc	copy;
620
    fbCopyProc	copy;
597
    
621
598
#ifdef FB_24_32BIT
622
#ifdef FB_24_32BIT
599
    if (pSrcDrawable->bitsPerPixel != pDstDrawable->bitsPerPixel)
623
    if (pSrcDrawable->bitsPerPixel != pDstDrawable->bitsPerPixel)
600
	copy = fb24_32CopyMtoN;
624
	copy = fb24_32CopyMtoN;
(-)xc-orig/programs/Xserver/fb/fbfill.c (-2 / +2 lines)
Lines 1-7 Link Here
1
/*
1
/*
2
 * Id: fbfill.c,v 1.1 1999/11/02 03:54:45 keithp Exp $
2
 * Id: fbfill.c,v 1.1 1999/11/02 03:54:45 keithp Exp $
3
 *
3
 *
4
 * Copyright © 1998 Keith Packard
4
 * Copyright © 1998 Keith Packard
5
 *
5
 *
6
 * Permission to use, copy, modify, distribute, and sell this software and its
6
 * Permission to use, copy, modify, distribute, and sell this software and its
7
 * documentation for any purpose is hereby granted without fee, provided that
7
 * documentation for any purpose is hereby granted without fee, provided that
Lines 44-50 Link Here
44
44
45
    switch (pGC->fillStyle) {
45
    switch (pGC->fillStyle) {
46
    case FillSolid:
46
    case FillSolid:
47
#ifdef USE_GCC34_MMX
47
#ifdef USE_MMX
48
	if (!pPriv->and && fbHaveMMX())
48
	if (!pPriv->and && fbHaveMMX())
49
	    if (fbSolidFillmmx (pDrawable, x, y, width, height, pPriv->xor))
49
	    if (fbSolidFillmmx (pDrawable, x, y, width, height, pPriv->xor))
50
		return;
50
		return;
(-)xc-orig/programs/Xserver/fb/fbmmx.c (-353 / +587 lines)
Lines 1-5 Link Here
1
/*
1
/*
2
 * Copyright © 2004 Red Hat, Inc.
2
 * Copyright © 2004 Red Hat, Inc.
3
 * Copyright © 2004 Nicholas Miell
3
 *
4
 *
4
 * Permission to use, copy, modify, distribute, and sell this software and its
5
 * Permission to use, copy, modify, distribute, and sell this software and its
5
 * documentation for any purpose is hereby granted without fee, provided that
6
 * documentation for any purpose is hereby granted without fee, provided that
Lines 18-31 Link Here
18
 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 
19
 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 
19
 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20
 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20
 *
21
 *
21
 * Author:  Søren Sandmann (sandmann@redhat.com)
22
 * Author:  Søren Sandmann (sandmann@redhat.com)
22
 * 
23
 * Minor Improvements: Nicholas Miell (nmiell@gmail.com)
24
 *
23
 * Based on work by Owen Taylor
25
 * Based on work by Owen Taylor
24
 */
26
 */
25
27
28
29
#ifdef USE_MMX
30
26
#include "fb.h"
31
#include "fb.h"
32
#include "fbmmx.h"
33
34
#include <mmintrin.h>
27
35
28
#ifdef USE_GCC34_MMX
36
#ifdef USE_SSE
37
#include <xmmintrin.h> /* for _mm_shuffle_pi16 and _MM_SHUFFLE */
38
#endif 
29
39
30
#ifdef RENDER
40
#ifdef RENDER
31
41
Lines 33-43 Link Here
33
#include "mipict.h"
43
#include "mipict.h"
34
#include "fbpict.h"
44
#include "fbpict.h"
35
45
36
typedef int Vector1x64 __attribute__ ((mode(DI)));
37
typedef int Vector2x32 __attribute__ ((mode(V2SI)));
38
typedef int Vector4x16 __attribute__ ((mode(V4HI)));
39
typedef int Vector8x8  __attribute__ ((mode(V8QI)));
40
41
typedef unsigned long long ullong;
46
typedef unsigned long long ullong;
42
47
43
#define noVERBOSE
48
#define noVERBOSE
Lines 50-56 Link Here
50
55
51
typedef struct
56
typedef struct
52
{
57
{
53
    ullong mmx_zero;
54
    ullong mmx_4x00ff;
58
    ullong mmx_4x00ff;
55
    ullong mmx_4x0080;
59
    ullong mmx_4x0080;
56
    ullong mmx_565_rgb;
60
    ullong mmx_565_rgb;
Lines 70-76 Link Here
70
74
71
static const MMXData c =
75
static const MMXData c =
72
{
76
{
73
    .mmx_zero =				0x0000000000000000ULL,
74
    .mmx_4x00ff =			0x00ff00ff00ff00ffULL,
77
    .mmx_4x00ff =			0x00ff00ff00ff00ffULL,
75
    .mmx_4x0080 =			0x0080008000800080ULL,
78
    .mmx_4x0080 =			0x0080008000800080ULL,
76
    .mmx_565_rgb =			0x000001f0003f001fULL,
79
    .mmx_565_rgb =			0x000001f0003f001fULL,
Lines 88-208 Link Here
88
    .mmx_000000000000ffff =		0x000000000000ffffULL,
91
    .mmx_000000000000ffff =		0x000000000000ffffULL,
89
};
92
};
90
93
91
static __inline__ Vector1x64
94
#define MC(x) ((__m64) c.mmx_##x)
92
shift (Vector1x64 v, int s)
95
96
static __inline__ __m64
97
shift (__m64 v, int s)
93
{
98
{
94
    if (s > 0)
99
    if (s > 0)
95
	return __builtin_ia32_psllq (v, s);
100
	return _mm_slli_si64 (v, s);
96
    else if (s < 0)
101
    else if (s < 0)
97
	return __builtin_ia32_psrlq (v, -s);
102
	return _mm_srli_si64 (v, -s);
98
    else
103
    else
99
	return v;
104
	return v;
100
}
105
}
101
106
102
static __inline__ Vector4x16
107
static __inline__ __m64
103
negate (Vector4x16 mask)
108
negate (__m64 mask)
104
{
109
{
105
    return (Vector4x16)__builtin_ia32_pxor (
110
    return _mm_xor_si64 (mask, MC(4x00ff));
106
	(Vector1x64)mask,
107
	(Vector1x64)c.mmx_4x00ff);
108
}
111
}
109
112
110
static __inline__ Vector4x16
113
static __inline__ __m64
111
pix_multiply (Vector4x16 a, Vector4x16 b)
114
pix_multiply (__m64 a, __m64 b)
112
{
115
{
113
    Vector4x16 res;
116
    __m64 res;
114
    
117
    
115
    res = __builtin_ia32_pmullw (a, b);
118
    res = _mm_mullo_pi16 (a, b);
116
    res = __builtin_ia32_paddw (res, (Vector4x16)c.mmx_4x0080);
119
    res = _mm_add_pi16 (res, MC(4x0080));
117
    res = __builtin_ia32_psrlw (res, 8);
120
    res = _mm_srli_pi16 (res, 8);
118
    
121
    
119
    return res;
122
    return res;
120
}
123
}
121
124
122
#if 0
125
#ifdef USE_SSE
123
#define HAVE_PSHUFW
126
#define HAVE_PSHUFW
124
#endif
127
#endif
125
128
126
#ifdef HAVE_PSHUFW
129
#ifdef HAVE_PSHUFW
127
130
128
static __inline__ Vector4x16
131
static __inline__ __m64
129
expand_alpha (Vector4x16 pixel)
132
expand_alpha (__m64 pixel)
130
{
133
{
131
    Vector4x16 result;
134
    return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE(3, 3, 3, 3));
132
    __asm__ ("pshufw $0xFF, %1, %0\n\t" : "=y" (result) : "y" (pixel));
133
    return result;
134
}
135
}
135
136
136
static __inline__ Vector4x16
137
static __inline__ __m64
137
expand_alpha_rev (Vector4x16 pixel)
138
expand_alpha_rev (__m64 pixel)
138
{
139
{
139
    Vector4x16 result;
140
    return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE(0, 0, 0, 0));
140
    __asm__ ("pshufw $0x00, %1, %0\n\t" : "=y" (result) : "y" (pixel));
141
    return result;
142
}    
141
}    
143
142
144
static __inline__ Vector4x16
143
static __inline__ __m64
145
invert_colors (Vector4x16 pixel)
144
invert_colors (__m64 pixel)
146
{
145
{
147
    Vector4x16 result;
146
    return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE(3, 0, 1, 2));
148
149
    /* 0xC6 = 11000110 */
150
    /*         3 0 1 2 */
151
    
152
    __asm__ ("pshufw $0xC6, %1, %0\n\t" : "=y" (result) : "y" (pixel));
153
154
    return result;
155
}
147
}
156
148
157
#else
149
#else
158
150
159
static __inline__ Vector4x16
151
static __inline__ __m64
160
expand_alpha (Vector4x16 pixel)
152
expand_alpha (__m64 pixel)
161
{
153
{
162
    Vector1x64 t1, t2;
154
    __m64 t1, t2;
163
155
    
164
    t1 = shift ((Vector1x64)pixel, -48);
156
    t1 = shift (pixel, -48);
165
    t2 = shift (t1, 16);
157
    t2 = shift (t1, 16);
166
    t1 = __builtin_ia32_por (t1, t2);
158
    t1 = _mm_or_si64 (t1, t2);
167
    t2 = shift (t1, 32);
159
    t2 = shift (t1, 32);
168
    t1 = __builtin_ia32_por (t1, t2);
160
    t1 = _mm_or_si64 (t1, t2);
169
161
    
170
    return (Vector4x16)t1;
162
    return t1;
171
}
163
}
172
164
173
static __inline__ Vector4x16
165
static __inline__ __m64
174
expand_alpha_rev (Vector4x16 pixel)
166
expand_alpha_rev (__m64 pixel)
175
{
167
{
176
    Vector1x64 t1, t2;
168
    __m64 t1, t2;
177
169
    
178
    t1 = shift ((Vector1x64)pixel,  48);
170
    /* move alpha to low 16 bits and zero the rest */
171
    t1 = shift (pixel,  48);
179
    t1 = shift (t1, -48);
172
    t1 = shift (t1, -48);
173
    
180
    t2 = shift (t1, 16);
174
    t2 = shift (t1, 16);
181
    t1 = __builtin_ia32_por (t1, t2);
175
    t1 = _mm_or_si64 (t1, t2);
182
    t2 = shift (t1, 32);
176
    t2 = shift (t1, 32);
183
    t1 = __builtin_ia32_por (t1, t2);
177
    t1 = _mm_or_si64 (t1, t2);
184
178
    
185
    return (Vector4x16)t1;
179
    return t1;
186
}
180
}
187
181
188
static __inline__ Vector4x16
182
static __inline__ __m64
189
invert_colors (Vector4x16 pixel)
183
invert_colors (__m64 pixel)
190
{
184
{
191
    Vector1x64 x, y, z;
185
    __m64 x, y, z;
192
186
    
193
    x = y = z = (Vector1x64)pixel;
187
    x = y = z = pixel;
194
188
    
195
    x = __builtin_ia32_pand (x, (Vector1x64)c.mmx_ffff0000ffff0000);
189
    x = _mm_and_si64 (x, MC(ffff0000ffff0000));
196
    y = __builtin_ia32_pand (y, (Vector1x64)c.mmx_000000000000ffff);
190
    y = _mm_and_si64 (y, MC(000000000000ffff));
197
    z = __builtin_ia32_pand (z, (Vector1x64)c.mmx_0000ffff00000000);
191
    z = _mm_and_si64 (z, MC(0000ffff00000000));
198
192
    
199
    y = shift (y, 32);
193
    y = shift (y, 32);
200
    z = shift (z, -32);
194
    z = shift (z, -32);
201
195
    
202
    x = __builtin_ia32_por (x, y);
196
    x = _mm_or_si64 (x, y);
203
    x = __builtin_ia32_por (x, z);
197
    x = _mm_or_si64 (x, z);
204
198
    
205
    return (Vector4x16)x;
199
    return x;
206
}
200
}
207
201
208
#endif
202
#endif
Lines 210-356 Link Here
210
/* Notes about writing mmx code
204
/* Notes about writing mmx code
211
 *
205
 *
212
 * give memory operands as the second operand. If you give it as the
206
 * give memory operands as the second operand. If you give it as the
213
 * first, gcc will first load it into a register, then use that register
207
 * first, gcc will first load it into a register, then use that
208
 * register
214
 *
209
 *
215
 *   ie. use
210
 *   ie. use
216
 *
211
 *
217
 *         __builtin_pmullw (x, mmx_constant[8]);
212
 *         _mm_mullo_pi16 (x, mmx_constant);
218
 *
213
 *
219
 *   not
214
 *   not
220
 *
215
 *
221
 *         __builtin_pmullw (mmx_constant[8], x);
216
 *         _mm_mullo_pi16 (mmx_constant, x);
222
 *
217
 *
223
 * Also try to minimize dependencies. Ie. when you need a value, try to calculate
218
 * Also try to minimize dependencies. i.e. when you need a value, try
224
 * it from a value that was calculated as early as possible.
219
 * to calculate it from a value that was calculated as early as
220
 * possible.
225
 */
221
 */
226
222
227
static __inline__ Vector4x16
223
static __inline__ __m64
228
over (Vector4x16 src, Vector4x16 srca, Vector4x16 dest)
224
over (__m64 src, __m64 srca, __m64 dest)
229
{
225
{
230
    return (Vector4x16)__builtin_ia32_paddusb ((Vector8x8)src, (Vector8x8)pix_multiply(dest, negate(srca)));
226
    return  _mm_adds_pu8 (src, pix_multiply(dest, negate(srca)));
231
}
227
}
232
228
233
static __inline__ Vector4x16
229
static __inline__ __m64
234
over_rev_non_pre (Vector4x16 src, Vector4x16 dest)
230
over_rev_non_pre (__m64 src, __m64 dest)
235
{
231
{
236
    Vector4x16 srca = expand_alpha (src);
232
    __m64 srca = expand_alpha (src);
237
    Vector4x16 srcfaaa = (Vector4x16)__builtin_ia32_por((Vector1x64)srca, (Vector1x64)c.mmx_full_alpha);
233
    __m64 srcfaaa = _mm_or_si64 (srca, MC(full_alpha));
238
234
    
239
    return over(pix_multiply(invert_colors(src), srcfaaa), srca, dest);
235
    return over(pix_multiply(invert_colors(src), srcfaaa), srca, dest);
240
}
236
}
241
237
242
static __inline__ Vector4x16
238
static __inline__ __m64
243
in (Vector4x16 src,
239
in (__m64 src,
244
    Vector4x16 mask)
240
    __m64 mask)
245
{
241
{
246
    return pix_multiply (src, mask);
242
    return pix_multiply (src, mask);
247
}
243
}
248
244
249
static __inline__ Vector4x16
245
static __inline__ __m64
250
in_over (Vector4x16 src,
246
in_over (__m64 src,
251
	 Vector4x16 srca,
247
	 __m64 srca,
252
	 Vector4x16 mask,
248
	 __m64 mask,
253
	 Vector4x16 dest)
249
	 __m64 dest)
254
{
250
{
255
    return over(in(src, mask), pix_multiply(srca, mask), dest);
251
    return over(in(src, mask), pix_multiply(srca, mask), dest);
256
}
252
}
257
253
258
static __inline__ Vector8x8
254
static __inline__ __m64
259
cvt32to64 (CARD32 v)
260
{
261
    ullong r = v;
262
    return (Vector8x8)r;
263
}
264
265
static __inline__ Vector4x16
266
load8888 (CARD32 v)
255
load8888 (CARD32 v)
267
{
256
{
268
    return (Vector4x16)__builtin_ia32_punpcklbw (cvt32to64 (v),
257
    return _mm_unpacklo_pi8 (_mm_cvtsi32_si64 (v), _mm_setzero_si64());
269
						 (Vector8x8)c.mmx_zero);
270
}
258
}
271
259
272
static __inline__ Vector8x8
260
static __inline__ __m64
273
pack8888 (Vector4x16 lo, Vector4x16 hi)
261
pack8888 (__m64 lo, __m64 hi)
274
{
262
{
275
    Vector8x8 r;
263
    __m64 r;
276
    r = __builtin_ia32_packuswb ((Vector4x16)lo, (Vector4x16)hi);
264
    r = _mm_packs_pu16 (lo, hi);
277
    return r;
265
    return r;
278
}
266
}
279
267
280
/* Expand 16 bits positioned at @pos (0-3) of a mmx register into 00RR00GG00BB
268
/* Expand 16 bits positioned at @pos (0-3) of a mmx register into
281
   
269
 *
282
--- Expanding 565 in the low word ---
270
 *    00RR00GG00BB
283
271
 * 
284
m = (m << (32 - 3)) | (m << (16 - 5)) | m;
272
 * --- Expanding 565 in the low word ---
285
m = m & (01f0003f001f);
273
 * 
286
m = m * (008404100840);
274
 * m = (m << (32 - 3)) | (m << (16 - 5)) | m;
287
m = m >> 8;
275
 * m = m & (01f0003f001f);
288
276
 * m = m * (008404100840);
289
Note the trick here - the top word is shifted by another nibble to avoid
277
 * m = m >> 8;
290
it bumping into the middle word
278
 * 
291
*/
279
 * Note the trick here - the top word is shifted by another nibble to
292
static __inline__ Vector4x16
280
 * avoid it bumping into the middle word
293
expand565 (Vector4x16 pixel, int pos)
281
 */
282
static __inline__ __m64
283
expand565 (__m64 pixel, int pos)
294
{
284
{
295
    Vector1x64 p = (Vector1x64)pixel;
285
    __m64 p = pixel;
286
    __m64 t1, t2;
296
    
287
    
297
    /* move pixel to low 16 bit and zero the rest */
288
    /* move pixel to low 16 bit and zero the rest */
298
    p = shift (shift (p, (3 - pos) * 16), -48); 
289
    p = shift (shift (p, (3 - pos) * 16), -48); 
299
    
290
    
300
    Vector1x64 t1 = shift (p, 36 - 11);
291
    t1 = shift (p, 36 - 11);
301
    Vector1x64 t2 = shift (p, 16 - 5);
292
    t2 = shift (p, 16 - 5);
302
    
293
    
303
    p = __builtin_ia32_por (t1, p);
294
    p = _mm_or_si64 (t1, p);
304
    p = __builtin_ia32_por (t2, p);
295
    p = _mm_or_si64 (t2, p);
305
    p = __builtin_ia32_pand (p, (Vector1x64)c.mmx_565_rgb);
296
    p = _mm_and_si64 (p, MC(565_rgb));
306
    
297
    
307
    pixel = __builtin_ia32_pmullw ((Vector4x16)p, (Vector4x16)c.mmx_565_unpack_multiplier);
298
    pixel = _mm_mullo_pi16 (p, MC(565_unpack_multiplier));
308
    return __builtin_ia32_psrlw (pixel, 8);
299
    return _mm_srli_pi16 (pixel, 8);
309
}
300
}
310
301
311
static __inline__ Vector4x16
302
static __inline__ __m64
312
expand8888 (Vector4x16 in, int pos)
303
expand8888 (__m64 in, int pos)
313
{
304
{
314
    if (pos == 0)
305
    if (pos == 0)
315
	return (Vector4x16)__builtin_ia32_punpcklbw ((Vector8x8)in, (Vector8x8)c.mmx_zero);
306
	return _mm_unpacklo_pi8 (in, _mm_setzero_si64());
316
    else
307
    else
317
	return (Vector4x16)__builtin_ia32_punpckhbw ((Vector8x8)in, (Vector8x8)c.mmx_zero);
308
	return _mm_unpackhi_pi8 (in, _mm_setzero_si64());
318
}
309
}
319
310
320
static __inline__ Vector4x16
311
static __inline__ __m64
321
pack565 (Vector4x16 pixel, Vector4x16 target, int pos)
312
pack565 (__m64 pixel, __m64 target, int pos)
322
{
313
{
323
    Vector1x64 p = (Vector1x64)pixel;
314
    __m64 p = pixel;
324
    Vector1x64 t = (Vector1x64)target;
315
    __m64 t = target;
325
    Vector1x64 r, g, b;
316
    __m64 r, g, b;
326
    
317
    
327
    r = __builtin_ia32_pand (p, (Vector1x64)c.mmx_565_r);
318
    r = _mm_and_si64 (p, MC(565_r));
328
    g = __builtin_ia32_pand (p, (Vector1x64)c.mmx_565_g);
319
    g = _mm_and_si64 (p, MC(565_g));
329
    b = __builtin_ia32_pand (p, (Vector1x64)c.mmx_565_b);
320
    b = _mm_and_si64 (p, MC(565_b));
330
    
321
    
331
    r = shift (r, - (32 - 8) + pos * 16);
322
    r = shift (r, - (32 - 8) + pos * 16);
332
    g = shift (g, - (16 - 3) + pos * 16);
323
    g = shift (g, - (16 - 3) + pos * 16);
333
    b = shift (b, - (0  + 3) + pos * 16);
324
    b = shift (b, - (0  + 3) + pos * 16);
334
325
    
335
    if (pos == 0)
326
    if (pos == 0)
336
	t = __builtin_ia32_pand (t, (Vector1x64)c.mmx_mask_0);
327
	t = _mm_and_si64 (t, MC(mask_0));
337
    else if (pos == 1)
328
    else if (pos == 1)
338
	t = __builtin_ia32_pand (t, (Vector1x64)c.mmx_mask_1);
329
	t = _mm_and_si64 (t, MC(mask_1));
339
    else if (pos == 2)
330
    else if (pos == 2)
340
	t = __builtin_ia32_pand (t, (Vector1x64)c.mmx_mask_2);
331
	t = _mm_and_si64 (t, MC(mask_2));
341
    else if (pos == 3)
332
    else if (pos == 3)
342
	t = __builtin_ia32_pand (t, (Vector1x64)c.mmx_mask_3);
333
	t = _mm_and_si64 (t, MC(mask_3));
343
    
334
    
344
    p = __builtin_ia32_por (r, t);
335
    p = _mm_or_si64 (r, t);
345
    p = __builtin_ia32_por (g, p);
336
    p = _mm_or_si64 (g, p);
346
    
337
    
347
    return (Vector4x16)__builtin_ia32_por (b, p);
338
    return _mm_or_si64 (b, p);
348
}
349
350
static __inline__ void
351
emms (void)
352
{
353
    __asm__ __volatile__ ("emms");
354
}
339
}
355
340
356
void
341
void
Lines 371-378 Link Here
371
    CARD32	*dstLine, *dst;
356
    CARD32	*dstLine, *dst;
372
    CARD16	w;
357
    CARD16	w;
373
    FbStride	dstStride;
358
    FbStride	dstStride;
374
    Vector4x16	vsrc, vsrca;
359
    __m64	vsrc, vsrca;
375
360
    
376
    CHECKPOINT();
361
    CHECKPOINT();
377
    
362
    
378
    fbComposeGetSolid(pSrc, src, pDst->format);
363
    fbComposeGetSolid(pSrc, src, pDst->format);
Lines 384-434 Link Here
384
    
369
    
385
    vsrc = load8888 (src);
370
    vsrc = load8888 (src);
386
    vsrca = expand_alpha (vsrc);
371
    vsrca = expand_alpha (vsrc);
387
372
    
388
    while (height--)
373
    while (height--)
389
    {
374
    {
390
	dst = dstLine;
375
	dst = dstLine;
391
	dstLine += dstStride;
376
	dstLine += dstStride;
392
	w = width;
377
	w = width;
393
378
	
394
	CHECKPOINT();
379
	CHECKPOINT();
395
	
380
	
396
	while (w && (unsigned long)dst & 7)
381
	while (w && (unsigned long)dst & 7)
397
	{
382
	{
398
	    *dst = (ullong) pack8888(over(vsrc, vsrca, load8888(*dst)), (Vector4x16)c.mmx_zero);
383
	    *dst = (ullong) pack8888(over(vsrc, vsrca, load8888(*dst)),
384
				     _mm_setzero_si64());
399
	    
385
	    
400
	    w--;
386
	    w--;
401
	    dst++;
387
	    dst++;
402
	}
388
	}
403
389
	
404
	while (w >= 2)
390
	while (w >= 2)
405
	{
391
	{
406
	    Vector4x16 vdest;
392
	    __m64 vdest;
407
	    Vector4x16 dest0, dest1;
393
	    __m64 dest0, dest1;
408
394
	    
409
	    vdest = *(Vector4x16 *)dst;
395
	    vdest = *(__m64 *)dst;
410
	    
396
	    
411
	    dest0 = over(vsrc, vsrca, expand8888(vdest, 0));
397
	    dest0 = over(vsrc, vsrca, expand8888(vdest, 0));
412
	    dest1 = over(vsrc, vsrca, expand8888(vdest, 1));
398
	    dest1 = over(vsrc, vsrca, expand8888(vdest, 1));
413
	    
399
	    
414
	    *(Vector8x8 *)dst = (Vector8x8)pack8888(dest0, dest1);
400
	    *(__m64 *)dst = pack8888(dest0, dest1);
415
	    
401
	    
416
	    dst += 2;
402
	    dst += 2;
417
	    w -= 2;
403
	    w -= 2;
418
	}
404
	}
419
405
	
420
	CHECKPOINT();
406
	CHECKPOINT();
421
	
407
	
422
	while (w)
408
	while (w)
423
	{
409
	{
424
	    *dst = (ullong) pack8888(over(vsrc, vsrca, load8888(*dst)), (Vector4x16)c.mmx_zero);
410
	    *dst = (ullong) pack8888(over(vsrc, vsrca, load8888(*dst)), _mm_setzero_si64());
425
	    
411
	    
426
	    w--;
412
	    w--;
427
	    dst++;
413
	    dst++;
428
	}
414
	}
429
    }
415
    }
430
    
416
    
431
    emms();
417
    _mm_empty();
432
}
418
}
433
419
434
void
420
void
Lines 449-456 Link Here
449
    CARD16	*dstLine, *dst;
435
    CARD16	*dstLine, *dst;
450
    CARD16	w;
436
    CARD16	w;
451
    FbStride	dstStride;
437
    FbStride	dstStride;
452
    Vector4x16	vsrc, vsrca;
438
    __m64	vsrc, vsrca;
453
439
    
454
    CHECKPOINT();
440
    CHECKPOINT();
455
    
441
    
456
    fbComposeGetSolid(pSrc, src, pDst->format);
442
    fbComposeGetSolid(pSrc, src, pDst->format);
Lines 462-510 Link Here
462
    
448
    
463
    vsrc = load8888 (src);
449
    vsrc = load8888 (src);
464
    vsrca = expand_alpha (vsrc);
450
    vsrca = expand_alpha (vsrc);
465
451
    
466
    while (height--)
452
    while (height--)
467
    {
453
    {
468
	dst = dstLine;
454
	dst = dstLine;
469
	dstLine += dstStride;
455
	dstLine += dstStride;
470
	w = width;
456
	w = width;
471
457
	
472
	CHECKPOINT();
458
	CHECKPOINT();
473
	
459
	
474
	while (w && (unsigned long)dst & 7)
460
	while (w && (unsigned long)dst & 7)
475
	{
461
	{
476
	    ullong d = *dst;
462
	    ullong d = *dst;
477
	    Vector4x16 vdest = expand565 ((Vector4x16)d, 0);
463
	    __m64 vdest = expand565 ((__m64)d, 0);
478
	    vdest = pack565(over(vsrc, vsrca, vdest), vdest, 0);
464
	    vdest = pack565(over(vsrc, vsrca, vdest), vdest, 0);
479
	    *dst = (ullong)vdest;
465
	    *dst = (ullong)vdest;
480
	    
466
	    
481
	    w--;
467
	    w--;
482
	    dst++;
468
	    dst++;
483
	}
469
	}
484
470
	
485
	while (w >= 4)
471
	while (w >= 4)
486
	{
472
	{
487
	    Vector4x16 vdest;
473
	    __m64 vdest;
488
474
	    
489
	    vdest = *(Vector4x16 *)dst;
475
	    vdest = *(__m64 *)dst;
490
	    
476
	    
491
	    vdest = pack565 (over(vsrc, vsrca, expand565(vdest, 0)), vdest, 0);
477
	    vdest = pack565 (over(vsrc, vsrca, expand565(vdest, 0)), vdest, 0);
492
	    vdest = pack565 (over(vsrc, vsrca, expand565(vdest, 1)), vdest, 1);
478
	    vdest = pack565 (over(vsrc, vsrca, expand565(vdest, 1)), vdest, 1);
493
	    vdest = pack565 (over(vsrc, vsrca, expand565(vdest, 2)), vdest, 2);
479
	    vdest = pack565 (over(vsrc, vsrca, expand565(vdest, 2)), vdest, 2);
494
	    vdest = pack565 (over(vsrc, vsrca, expand565(vdest, 3)), vdest, 3);
480
	    vdest = pack565 (over(vsrc, vsrca, expand565(vdest, 3)), vdest, 3);
495
	    
481
	    
496
	    *(Vector8x8 *)dst = (Vector8x8)vdest;
482
	    *(__m64 *)dst = vdest;
497
	    
483
	    
498
	    dst += 4;
484
	    dst += 4;
499
	    w -= 4;
485
	    w -= 4;
500
	}
486
	}
501
487
	
502
	CHECKPOINT();
488
	CHECKPOINT();
503
	
489
	
504
	while (w)
490
	while (w)
505
	{
491
	{
506
	    ullong d = *dst;
492
	    ullong d = *dst;
507
	    Vector4x16 vdest = expand565 ((Vector4x16)d, 0);
493
	    __m64 vdest = expand565 ((__m64)d, 0);
508
	    vdest = pack565(over(vsrc, vsrca, vdest), vdest, 0);
494
	    vdest = pack565(over(vsrc, vsrca, vdest), vdest, 0);
509
	    *dst = (ullong)vdest;
495
	    *dst = (ullong)vdest;
510
	    
496
	    
Lines 513-519 Link Here
513
	}
499
	}
514
    }
500
    }
515
    
501
    
516
    emms();
502
    _mm_empty();
517
}
503
}
518
504
519
void
505
void
Lines 534-541 Link Here
534
    CARD32	*dstLine;
520
    CARD32	*dstLine;
535
    CARD32	*maskLine;
521
    CARD32	*maskLine;
536
    FbStride	dstStride, maskStride;
522
    FbStride	dstStride, maskStride;
537
    Vector4x16	vsrc, vsrca;
523
    __m64	vsrc, vsrca;
538
524
    
539
    CHECKPOINT();
525
    CHECKPOINT();
540
    
526
    
541
    fbComposeGetSolid(pSrc, src, pDst->format);
527
    fbComposeGetSolid(pSrc, src, pDst->format);
Lines 562-570 Link Here
562
	    
548
	    
563
	    if (m)
549
	    if (m)
564
	    {
550
	    {
565
		Vector4x16 vdest = load8888(*q);
551
		__m64 vdest = load8888(*q);
566
		vdest = in_over(vsrc, vsrca, load8888(m), vdest);
552
		vdest = in_over(vsrc, vsrca, load8888(m), vdest);
567
		*q = (ullong)pack8888(vdest, (Vector4x16)c.mmx_zero);
553
		*q = (ullong)pack8888(vdest, _mm_setzero_si64());
568
	    }
554
	    }
569
	    
555
	    
570
	    twidth--;
556
	    twidth--;
Lines 580-594 Link Here
580
	    
566
	    
581
	    if (m0 | m1)
567
	    if (m0 | m1)
582
	    {
568
	    {
583
		Vector4x16 dest0, dest1;
569
		__m64 dest0, dest1;
584
		Vector4x16 vdest = *(Vector4x16 *)q;
570
		__m64 vdest = *(__m64 *)q;
585
		
571
		
586
		dest0 = in_over(vsrc, vsrca, load8888(m0),
572
		dest0 = in_over(vsrc, vsrca, load8888(m0),
587
				expand8888 (vdest, 0));
573
				expand8888 (vdest, 0));
588
		dest1 = in_over(vsrc, vsrca, load8888(m1),
574
		dest1 = in_over(vsrc, vsrca, load8888(m1),
589
				expand8888 (vdest, 1));
575
				expand8888 (vdest, 1));
590
		
576
		
591
		*(Vector8x8 *)q = (Vector8x8)pack8888(dest0, dest1);
577
		*(__m64 *)q = pack8888(dest0, dest1);
592
	    }
578
	    }
593
	    
579
	    
594
	    p += 2;
580
	    p += 2;
Lines 602-610 Link Here
602
	    
588
	    
603
	    if (m)
589
	    if (m)
604
	    {
590
	    {
605
		Vector4x16 vdest = load8888(*q);
591
		__m64 vdest = load8888(*q);
606
		vdest = in_over(vsrc, vsrca, load8888(m), vdest);
592
		vdest = in_over(vsrc, vsrca, load8888(m), vdest);
607
		*q = (ullong)pack8888(vdest, (Vector4x16)c.mmx_zero);
593
		*q = (ullong)pack8888(vdest, _mm_setzero_si64());
608
	    }
594
	    }
609
	    
595
	    
610
	    twidth--;
596
	    twidth--;
Lines 616-622 Link Here
616
	maskLine += maskStride;
602
	maskLine += maskStride;
617
    }
603
    }
618
    
604
    
619
    emms();
605
    _mm_empty();
606
}
607
608
void
609
fbCompositeSrc_8888x8x8888mmx (CARD8	op,
610
			       PicturePtr pSrc,
611
			       PicturePtr pMask,
612
			       PicturePtr pDst,
613
			       INT16	xSrc,
614
			       INT16	ySrc,
615
			       INT16      xMask,
616
			       INT16      yMask,
617
			       INT16      xDst,
618
			       INT16      yDst,
619
			       CARD16     width,
620
			       CARD16     height)
621
{
622
    CARD32	*dstLine, *dst;
623
    CARD32	*srcLine, *src;
624
    CARD8	*maskLine;
625
    CARD32	mask;
626
    __m64	vmask;
627
    FbStride	dstStride, srcStride, maskStride;
628
    CARD16	w;
629
    __m64  srca;
630
    
631
    CHECKPOINT();
632
    
633
    fbComposeGetStart (pDst, xDst, yDst, CARD32, dstStride, dstLine, 1);
634
    fbComposeGetStart (pSrc, xSrc, ySrc, CARD32, srcStride, srcLine, 1);
635
    fbComposeGetStart (pMask, xMask, yMask, CARD8, maskStride, maskLine, 1);
636
637
    mask = *maskLine << 24 | *maskLine << 16 | *maskLine << 8 | *maskLine;
638
    vmask = load8888 (mask);
639
    srca = MC(4x00ff);
640
    
641
    while (height--)
642
    {
643
	dst = dstLine;
644
	dstLine += dstStride;
645
	src = srcLine;
646
	srcLine += srcStride;
647
	w = width;
648
649
	while (w && (unsigned long)dst & 7)
650
	{
651
	    __m64 s = load8888 (*src);
652
	    __m64 d = load8888 (*dst);
653
	    
654
	    *dst = (ullong)pack8888 (in_over (s, srca, vmask, d), (__m64)_mm_setzero_si64());
655
	    
656
	    w--;
657
	    dst++;
658
	    src++;
659
	}
660
661
	while (w >= 16)
662
	{
663
	    __m64 vd0 = *(__m64 *)(dst + 0);
664
	    __m64 vd1 = *(__m64 *)(dst + 2);
665
	    __m64 vd2 = *(__m64 *)(dst + 4);
666
	    __m64 vd3 = *(__m64 *)(dst + 6);
667
	    __m64 vd4 = *(__m64 *)(dst + 8);
668
	    __m64 vd5 = *(__m64 *)(dst + 10);
669
	    __m64 vd6 = *(__m64 *)(dst + 12);
670
	    __m64 vd7 = *(__m64 *)(dst + 14);
671
672
	    __m64 vs0 = *(__m64 *)(src + 0);
673
	    __m64 vs1 = *(__m64 *)(src + 2);
674
	    __m64 vs2 = *(__m64 *)(src + 4);
675
	    __m64 vs3 = *(__m64 *)(src + 6);
676
	    __m64 vs4 = *(__m64 *)(src + 8);
677
	    __m64 vs5 = *(__m64 *)(src + 10);
678
	    __m64 vs6 = *(__m64 *)(src + 12);
679
	    __m64 vs7 = *(__m64 *)(dst + 14);
680
681
	    vd0 = (__m64)pack8888 (
682
		in_over (expand8888 (vs0, 0), srca, vmask, expand8888 (vd0, 0)),
683
		in_over (expand8888 (vs0, 1), srca, vmask, expand8888 (vd0, 1)));
684
	
685
	    vd1 = (__m64)pack8888 (
686
		in_over (expand8888 (vs1, 0), srca, vmask, expand8888 (vd1, 0)),
687
		in_over (expand8888 (vs1, 1), srca, vmask, expand8888 (vd1, 1)));
688
	
689
	    vd2 = (__m64)pack8888 (
690
		in_over (expand8888 (vs2, 0), srca, vmask, expand8888 (vd2, 0)),
691
		in_over (expand8888 (vs2, 1), srca, vmask, expand8888 (vd2, 1)));
692
	
693
	    vd3 = (__m64)pack8888 (
694
		in_over (expand8888 (vs3, 0), srca, vmask, expand8888 (vd3, 0)),
695
		in_over (expand8888 (vs3, 1), srca, vmask, expand8888 (vd3, 1)));
696
	
697
	    vd4 = (__m64)pack8888 (
698
		in_over (expand8888 (vs4, 0), srca, vmask, expand8888 (vd4, 0)),
699
		in_over (expand8888 (vs4, 1), srca, vmask, expand8888 (vd4, 1)));
700
	
701
	    vd5 = (__m64)pack8888 (
702
		in_over (expand8888 (vs5, 0), srca, vmask, expand8888 (vd5, 0)),
703
		in_over (expand8888 (vs5, 1), srca, vmask, expand8888 (vd5, 1)));
704
	
705
	    vd6 = (__m64)pack8888 (
706
		in_over (expand8888 (vs6, 0), srca, vmask, expand8888 (vd6, 0)),
707
		in_over (expand8888 (vs6, 1), srca, vmask, expand8888 (vd6, 1)));
708
	
709
	    vd7 = (__m64)pack8888 (
710
		in_over (expand8888 (vs7, 0), srca, vmask, expand8888 (vd7, 0)),
711
		in_over (expand8888 (vs7, 1), srca, vmask, expand8888 (vd7, 1)));
712
	
713
	    w -= 16;
714
	    dst += 16;
715
	    src += 16;
716
	}
717
	
718
	while (w)
719
	{
720
	    __m64 s = load8888 (*src);
721
	    __m64 d = load8888 (*dst);
722
	    
723
	    *dst = (ullong)pack8888 (in_over (s, srca, vmask, d), (__m64)_mm_setzero_si64());
724
	    
725
	    w--;
726
	    dst++;
727
	    src++;
728
	}
729
    }
730
731
    _mm_empty(); 
620
}
732
}
621
733
622
void
734
void
Lines 638-644 Link Here
638
    CARD8	*maskLine, *mask;
750
    CARD8	*maskLine, *mask;
639
    FbStride	dstStride, maskStride;
751
    FbStride	dstStride, maskStride;
640
    CARD16	w;
752
    CARD16	w;
641
    Vector4x16	vsrc, vsrca;
753
    __m64	vsrc, vsrca;
642
    ullong	srcsrc;
754
    ullong	srcsrc;
643
    
755
    
644
    CHECKPOINT();
756
    CHECKPOINT();
Lines 648-654 Link Here
648
    srca = src >> 24;
760
    srca = src >> 24;
649
    if (srca == 0)
761
    if (srca == 0)
650
	return;
762
	return;
651
763
    
652
    srcsrc = (unsigned long long)src << 32 | src;
764
    srcsrc = (unsigned long long)src << 32 | src;
653
    
765
    
654
    fbComposeGetStart (pDst, xDst, yDst, CARD32, dstStride, dstLine, 1);
766
    fbComposeGetStart (pDst, xDst, yDst, CARD32, dstStride, dstLine, 1);
Lines 664-670 Link Here
664
	mask = maskLine;
776
	mask = maskLine;
665
	maskLine += maskStride;
777
	maskLine += maskStride;
666
	w = width;
778
	w = width;
667
779
	
668
	CHECKPOINT();
780
	CHECKPOINT();
669
	
781
	
670
	while (w && (unsigned long)dst & 7)
782
	while (w && (unsigned long)dst & 7)
Lines 673-687 Link Here
673
	    
785
	    
674
	    if (m)
786
	    if (m)
675
	    {
787
	    {
676
		Vector4x16 vdest = in_over(vsrc, vsrca, expand_alpha_rev ((Vector4x16)m), load8888(*dst));
788
		__m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m), load8888(*dst));
677
		*dst = (ullong)pack8888(vdest, (Vector4x16)c.mmx_zero);
789
		*dst = (ullong)pack8888(vdest, _mm_setzero_si64());
678
	    }
790
	    }
679
	    
791
	    
680
	    w--;
792
	    w--;
681
	    mask++;
793
	    mask++;
682
	    dst++;
794
	    dst++;
683
	}
795
	}
684
796
	
685
	CHECKPOINT();
797
	CHECKPOINT();
686
	
798
	
687
	while (w >= 2)
799
	while (w >= 2)
Lines 689-717 Link Here
689
	    ullong m0, m1;
801
	    ullong m0, m1;
690
	    m0 = *mask;
802
	    m0 = *mask;
691
	    m1 = *(mask + 1);
803
	    m1 = *(mask + 1);
692
804
	    
693
	    if (srca == 0xff && (m0 & m1) == 0xff)
805
	    if (srca == 0xff && (m0 & m1) == 0xff)
694
	    {
806
	    {
695
		*(unsigned long long *)dst = srcsrc;
807
		*(unsigned long long *)dst = srcsrc;
696
	    }
808
	    }
697
	    else if (m0 | m1)
809
	    else if (m0 | m1)
698
	    {
810
	    {
699
		Vector4x16 vdest;
811
		__m64 vdest;
700
		Vector4x16 dest0, dest1;
812
		__m64 dest0, dest1;
701
813
		
702
		vdest = *(Vector4x16 *)dst;
814
		vdest = *(__m64 *)dst;
703
		
815
		
704
		dest0 = in_over(vsrc, vsrca, expand_alpha_rev ((Vector4x16)m0), expand8888(vdest, 0));
816
		dest0 = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m0), expand8888(vdest, 0));
705
		dest1 = in_over(vsrc, vsrca, expand_alpha_rev ((Vector4x16)m1), expand8888(vdest, 1));
817
		dest1 = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m1), expand8888(vdest, 1));
706
		
818
		
707
		*(Vector8x8 *)dst = (Vector8x8)pack8888(dest0, dest1);
819
		*(__m64 *)dst = pack8888(dest0, dest1);
708
	    }
820
	    }
709
	    
821
	    
710
	    mask += 2;
822
	    mask += 2;
711
	    dst += 2;
823
	    dst += 2;
712
	    w -= 2;
824
	    w -= 2;
713
	}
825
	}
714
826
	
715
	CHECKPOINT();
827
	CHECKPOINT();
716
	
828
	
717
	while (w)
829
	while (w)
Lines 720-728 Link Here
720
	    
832
	    
721
	    if (m)
833
	    if (m)
722
	    {
834
	    {
723
		Vector4x16 vdest = load8888(*dst);
835
		__m64 vdest = load8888(*dst);
724
		vdest = in_over(vsrc, vsrca, expand_alpha_rev ((Vector4x16)m), vdest);
836
		vdest = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m), vdest);
725
		*dst = (ullong)pack8888(vdest, (Vector4x16)c.mmx_zero);
837
		*dst = (ullong)pack8888(vdest, _mm_setzero_si64());
726
	    }
838
	    }
727
	    
839
	    
728
	    w--;
840
	    w--;
Lines 731-737 Link Here
731
	}
843
	}
732
    }
844
    }
733
    
845
    
734
    emms();
846
    _mm_empty();
735
}
847
}
736
848
737
849
Lines 754-760 Link Here
754
    CARD8	*maskLine, *mask;
866
    CARD8	*maskLine, *mask;
755
    FbStride	dstStride, maskStride;
867
    FbStride	dstStride, maskStride;
756
    CARD16	w;
868
    CARD16	w;
757
    Vector4x16	vsrc, vsrca;
869
    __m64	vsrc, vsrca;
758
    unsigned long long srcsrcsrcsrc, src16;
870
    unsigned long long srcsrcsrcsrc, src16;
759
    
871
    
760
    CHECKPOINT();
872
    CHECKPOINT();
Lines 770-778 Link Here
770
    
882
    
771
    vsrc = load8888 (src);
883
    vsrc = load8888 (src);
772
    vsrca = expand_alpha (vsrc);
884
    vsrca = expand_alpha (vsrc);
773
885
    
774
    src16 = (ullong)pack565(vsrc, (Vector4x16)c.mmx_zero, 0);
886
    src16 = (ullong)pack565(vsrc, _mm_setzero_si64(), 0);
775
887
    
776
    srcsrcsrcsrc = (ullong)src16 << 48 | (ullong)src16 << 32 |
888
    srcsrcsrcsrc = (ullong)src16 << 48 | (ullong)src16 << 32 |
777
	(ullong)src16 << 16 | (ullong)src16;
889
	(ullong)src16 << 16 | (ullong)src16;
778
    
890
    
Lines 783-789 Link Here
783
	mask = maskLine;
895
	mask = maskLine;
784
	maskLine += maskStride;
896
	maskLine += maskStride;
785
	w = width;
897
	w = width;
786
898
	
787
	CHECKPOINT();
899
	CHECKPOINT();
788
	
900
	
789
	while (w && (unsigned long)dst & 7)
901
	while (w && (unsigned long)dst & 7)
Lines 793-808 Link Here
793
	    if (m)
905
	    if (m)
794
	    {
906
	    {
795
		ullong d = *dst;
907
		ullong d = *dst;
796
		Vector4x16 vd = (Vector4x16)d;
908
		__m64 vd = (__m64)d;
797
		Vector4x16 vdest = in_over(vsrc, vsrca, expand_alpha_rev ((Vector4x16)m), expand565(vd, 0));
909
		__m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m), expand565(vd, 0));
798
		*dst = (ullong)pack565(vdest, (Vector4x16)c.mmx_zero, 0);
910
		*dst = (ullong)pack565(vdest, _mm_setzero_si64(), 0);
799
	    }
911
	    }
800
	    
912
	    
801
	    w--;
913
	    w--;
802
	    mask++;
914
	    mask++;
803
	    dst++;
915
	    dst++;
804
	}
916
	}
805
917
	
806
	CHECKPOINT();
918
	CHECKPOINT();
807
	
919
	
808
	while (w >= 4)
920
	while (w >= 4)
Lines 812-846 Link Here
812
	    m1 = *(mask + 1);
924
	    m1 = *(mask + 1);
813
	    m2 = *(mask + 2);
925
	    m2 = *(mask + 2);
814
	    m3 = *(mask + 3);
926
	    m3 = *(mask + 3);
815
927
	    
816
	    if (srca == 0xff && (m0 & m1 & m2 & m3) == 0xff)
928
	    if (srca == 0xff && (m0 & m1 & m2 & m3) == 0xff)
817
	    {
929
	    {
818
		*(unsigned long long *)dst = srcsrcsrcsrc;
930
		*(unsigned long long *)dst = srcsrcsrcsrc;
819
	    }
931
	    }
820
	    else if (m0 | m1 | m2 | m3)
932
	    else if (m0 | m1 | m2 | m3)
821
	    {
933
	    {
822
		Vector4x16 vdest;
934
		__m64 vdest;
823
		Vector4x16 vm0, vm1, vm2, vm3;
935
		__m64 vm0, vm1, vm2, vm3;
824
936
		
825
		vdest = *(Vector4x16 *)dst;
937
		vdest = *(__m64 *)dst;
826
938
		
827
		vm0 = (Vector4x16)m0;
939
		vm0 = (__m64)m0;
828
		vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm0), expand565(vdest, 0)), vdest, 0);
940
		vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm0), expand565(vdest, 0)), vdest, 0);
829
		vm1 = (Vector4x16)m1;
941
		vm1 = (__m64)m1;
830
		vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm1), expand565(vdest, 1)), vdest, 1);
942
		vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm1), expand565(vdest, 1)), vdest, 1);
831
		vm2 = (Vector4x16)m2;
943
		vm2 = (__m64)m2;
832
		vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm2), expand565(vdest, 2)), vdest, 2);
944
		vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm2), expand565(vdest, 2)), vdest, 2);
833
		vm3 = (Vector4x16)m3;
945
		vm3 = (__m64)m3;
834
		vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm3), expand565(vdest, 3)), vdest, 3);
946
		vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm3), expand565(vdest, 3)), vdest, 3);
835
		
947
		
836
		*(Vector4x16 *)dst = vdest;
948
		*(__m64 *)dst = vdest;
837
	    }
949
	    }
838
	    
950
	    
839
	    w -= 4;
951
	    w -= 4;
840
	    mask += 4;
952
	    mask += 4;
841
	    dst += 4;
953
	    dst += 4;
842
	}
954
	}
843
955
	
844
	CHECKPOINT();
956
	CHECKPOINT();
845
	
957
	
846
	while (w)
958
	while (w)
Lines 850-858 Link Here
850
	    if (m)
962
	    if (m)
851
	    {
963
	    {
852
		ullong d = *dst;
964
		ullong d = *dst;
853
		Vector4x16 vd = (Vector4x16)d;
965
		__m64 vd = (__m64)d;
854
		Vector4x16 vdest = in_over(vsrc, vsrca, expand_alpha_rev ((Vector4x16)m), expand565(vd, 0));
966
		__m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m), expand565(vd, 0));
855
		*dst = (ullong)pack565(vdest, (Vector4x16)c.mmx_zero, 0);
967
		*dst = (ullong)pack565(vdest, _mm_setzero_si64(), 0);
856
	    }
968
	    }
857
	    
969
	    
858
	    w--;
970
	    w--;
Lines 861-867 Link Here
861
	}
973
	}
862
    }
974
    }
863
    
975
    
864
    emms();
976
    _mm_empty();
865
}
977
}
866
978
867
void
979
void
Lines 887-895 Link Here
887
    
999
    
888
    fbComposeGetStart (pDst, xDst, yDst, CARD16, dstStride, dstLine, 1);
1000
    fbComposeGetStart (pDst, xDst, yDst, CARD16, dstStride, dstLine, 1);
889
    fbComposeGetStart (pSrc, xSrc, ySrc, CARD32, srcStride, srcLine, 1);
1001
    fbComposeGetStart (pSrc, xSrc, ySrc, CARD32, srcStride, srcLine, 1);
890
1002
    
891
    assert (pSrc->pDrawable == pMask->pDrawable);
1003
    assert (pSrc->pDrawable == pMask->pDrawable);
892
1004
    
893
    while (height--)
1005
    while (height--)
894
    {
1006
    {
895
	dst = dstLine;
1007
	dst = dstLine;
Lines 897-910 Link Here
897
	src = srcLine;
1009
	src = srcLine;
898
	srcLine += srcStride;
1010
	srcLine += srcStride;
899
	w = width;
1011
	w = width;
900
1012
	
901
	CHECKPOINT();
1013
	CHECKPOINT();
902
	
1014
	
903
	while (w && (unsigned long)dst & 7)
1015
	while (w && (unsigned long)dst & 7)
904
	{
1016
	{
905
	    Vector4x16 vsrc = load8888 (*src);
1017
	    __m64 vsrc = load8888 (*src);
906
	    ullong d = *dst;
1018
	    ullong d = *dst;
907
	    Vector4x16 vdest = expand565 ((Vector4x16)d, 0);
1019
	    __m64 vdest = expand565 ((__m64)d, 0);
908
	    
1020
	    
909
	    vdest = pack565(over_rev_non_pre(vsrc, vdest), vdest, 0);
1021
	    vdest = pack565(over_rev_non_pre(vsrc, vdest), vdest, 0);
910
	    
1022
	    
Lines 914-932 Link Here
914
	    dst++;
1026
	    dst++;
915
	    src++;
1027
	    src++;
916
	}
1028
	}
917
1029
	
918
	CHECKPOINT();
1030
	CHECKPOINT();
919
	
1031
	
920
	while (w >= 4)
1032
	while (w >= 4)
921
	{
1033
	{
922
	    CARD32 s0, s1, s2, s3;
1034
	    CARD32 s0, s1, s2, s3;
923
	    unsigned char a0, a1, a2, a3;
1035
	    unsigned char a0, a1, a2, a3;
924
1036
	    
925
	    s0 = *src;
1037
	    s0 = *src;
926
	    s1 = *(src + 1);
1038
	    s1 = *(src + 1);
927
	    s2 = *(src + 2);
1039
	    s2 = *(src + 2);
928
	    s3 = *(src + 3);
1040
	    s3 = *(src + 3);
929
1041
	    
930
	    a0 = (s0 >> 24);
1042
	    a0 = (s0 >> 24);
931
	    a1 = (s1 >> 24);
1043
	    a1 = (s1 >> 24);
932
	    a2 = (s2 >> 24);
1044
	    a2 = (s2 >> 24);
Lines 934-971 Link Here
934
	    
1046
	    
935
	    if ((a0 & a1 & a2 & a3) == 0xFF)
1047
	    if ((a0 & a1 & a2 & a3) == 0xFF)
936
	    {
1048
	    {
937
		Vector4x16 vdest;
1049
		__m64 vdest;
938
		vdest = pack565(invert_colors(load8888(s0)), (Vector4x16)c.mmx_zero, 0);
1050
		vdest = pack565(invert_colors(load8888(s0)), _mm_setzero_si64(), 0);
939
		vdest = pack565(invert_colors(load8888(s1)), vdest, 1);
1051
		vdest = pack565(invert_colors(load8888(s1)), vdest, 1);
940
		vdest = pack565(invert_colors(load8888(s2)), vdest, 2);
1052
		vdest = pack565(invert_colors(load8888(s2)), vdest, 2);
941
		vdest = pack565(invert_colors(load8888(s3)), vdest, 3);
1053
		vdest = pack565(invert_colors(load8888(s3)), vdest, 3);
942
1054
		
943
		*(Vector4x16 *)dst = vdest;
1055
		*(__m64 *)dst = vdest;
944
	    }
1056
	    }
945
	    else if (a0 | a1 | a2 | a3)
1057
	    else if (a0 | a1 | a2 | a3)
946
	    {
1058
	    {
947
		Vector4x16 vdest = *(Vector4x16 *)dst;
1059
		__m64 vdest = *(__m64 *)dst;
948
1060
		
949
		vdest = pack565(over_rev_non_pre(load8888(s0), expand565(vdest, 0)), vdest, 0);
1061
		vdest = pack565(over_rev_non_pre(load8888(s0), expand565(vdest, 0)), vdest, 0);
950
	        vdest = pack565(over_rev_non_pre(load8888(s1), expand565(vdest, 1)), vdest, 1);
1062
	        vdest = pack565(over_rev_non_pre(load8888(s1), expand565(vdest, 1)), vdest, 1);
951
		vdest = pack565(over_rev_non_pre(load8888(s2), expand565(vdest, 2)), vdest, 2);
1063
		vdest = pack565(over_rev_non_pre(load8888(s2), expand565(vdest, 2)), vdest, 2);
952
		vdest = pack565(over_rev_non_pre(load8888(s3), expand565(vdest, 3)), vdest, 3);
1064
		vdest = pack565(over_rev_non_pre(load8888(s3), expand565(vdest, 3)), vdest, 3);
953
1065
		
954
		*(Vector4x16 *)dst = vdest;
1066
		*(__m64 *)dst = vdest;
955
	    }
1067
	    }
956
	    
1068
	    
957
	    w -= 4;
1069
	    w -= 4;
958
	    dst += 4;
1070
	    dst += 4;
959
	    src += 4;
1071
	    src += 4;
960
	}
1072
	}
961
1073
	
962
	CHECKPOINT();
1074
	CHECKPOINT();
963
	
1075
	
964
	while (w)
1076
	while (w)
965
	{
1077
	{
966
	    Vector4x16 vsrc = load8888 (*src);
1078
	    __m64 vsrc = load8888 (*src);
967
	    ullong d = *dst;
1079
	    ullong d = *dst;
968
	    Vector4x16 vdest = expand565 ((Vector4x16)d, 0);
1080
	    __m64 vdest = expand565 ((__m64)d, 0);
969
	    
1081
	    
970
	    vdest = pack565(over_rev_non_pre(vsrc, vdest), vdest, 0);
1082
	    vdest = pack565(over_rev_non_pre(vsrc, vdest), vdest, 0);
971
	    
1083
	    
Lines 976-986 Link Here
976
	    src++;
1088
	    src++;
977
	}
1089
	}
978
    }
1090
    }
979
1091
    
980
    emms();
1092
    _mm_empty();
981
}
1093
}
982
1094
983
/* "888RevNP" is GdkPixbuf's format: ABGR, non premultiplied */
1095
/* "8888RevNP" is GdkPixbuf's format: ABGR, non premultiplied */
984
1096
985
void
1097
void
986
fbCompositeSrc_8888RevNPx8888mmx (CARD8      op,
1098
fbCompositeSrc_8888RevNPx8888mmx (CARD8      op,
Lines 1005-1013 Link Here
1005
    
1117
    
1006
    fbComposeGetStart (pDst, xDst, yDst, CARD32, dstStride, dstLine, 1);
1118
    fbComposeGetStart (pDst, xDst, yDst, CARD32, dstStride, dstLine, 1);
1007
    fbComposeGetStart (pSrc, xSrc, ySrc, CARD32, srcStride, srcLine, 1);
1119
    fbComposeGetStart (pSrc, xSrc, ySrc, CARD32, srcStride, srcLine, 1);
1008
1120
    
1009
    assert (pSrc->pDrawable == pMask->pDrawable);
1121
    assert (pSrc->pDrawable == pMask->pDrawable);
1010
1122
    
1011
    while (height--)
1123
    while (height--)
1012
    {
1124
    {
1013
	dst = dstLine;
1125
	dst = dstLine;
Lines 1015-1042 Link Here
1015
	src = srcLine;
1127
	src = srcLine;
1016
	srcLine += srcStride;
1128
	srcLine += srcStride;
1017
	w = width;
1129
	w = width;
1018
1130
	
1019
	while (w && (unsigned long)dst & 7)
1131
	while (w && (unsigned long)dst & 7)
1020
	{
1132
	{
1021
	    Vector4x16 s = load8888 (*src);
1133
	    __m64 s = load8888 (*src);
1022
	    Vector4x16 d = load8888 (*dst);
1134
	    __m64 d = load8888 (*dst);
1023
	    
1135
	    
1024
	    *dst = (ullong)pack8888 (over_rev_non_pre (s, d), (Vector4x16)c.mmx_zero);
1136
	    *dst = (ullong)pack8888 (over_rev_non_pre (s, d), _mm_setzero_si64());
1025
	    
1137
	    
1026
	    w--;
1138
	    w--;
1027
	    dst++;
1139
	    dst++;
1028
	    src++;
1140
	    src++;
1029
	}
1141
	}
1030
1142
	
1031
	while (w >= 2)
1143
	while (w >= 2)
1032
	{
1144
	{
1033
	    ullong s0, s1;
1145
	    ullong s0, s1;
1034
	    unsigned char a0, a1;
1146
	    unsigned char a0, a1;
1035
	    Vector4x16 d0, d1;
1147
	    __m64 d0, d1;
1036
1148
	    
1037
	    s0 = *src;
1149
	    s0 = *src;
1038
	    s1 = *(src + 1);
1150
	    s1 = *(src + 1);
1039
1151
	    
1040
	    a0 = (s0 >> 24);
1152
	    a0 = (s0 >> 24);
1041
	    a1 = (s1 >> 24);
1153
	    a1 = (s1 >> 24);
1042
	    
1154
	    
Lines 1044-1060 Link Here
1044
	    {
1156
	    {
1045
		d0 = invert_colors(load8888(s0));
1157
		d0 = invert_colors(load8888(s0));
1046
		d1 = invert_colors(load8888(s1));
1158
		d1 = invert_colors(load8888(s1));
1047
1159
		
1048
		*(Vector8x8 *)dst = pack8888 (d0, d1);
1160
		*(__m64 *)dst = pack8888 (d0, d1);
1049
	    }
1161
	    }
1050
	    else if (a0 | a1)
1162
	    else if (a0 | a1)
1051
	    {
1163
	    {
1052
		Vector4x16 vdest = *(Vector4x16 *)dst;
1164
		__m64 vdest = *(__m64 *)dst;
1053
1165
		
1054
		d0 = over_rev_non_pre (load8888(s0), expand8888 (vdest, 0));
1166
		d0 = over_rev_non_pre (load8888(s0), expand8888 (vdest, 0));
1055
		d1 = over_rev_non_pre (load8888(s1), expand8888 (vdest, 1));
1167
		d1 = over_rev_non_pre (load8888(s1), expand8888 (vdest, 1));
1056
	    
1168
		
1057
		*(Vector8x8 *)dst = pack8888 (d0, d1);
1169
		*(__m64 *)dst = pack8888 (d0, d1);
1058
	    }
1170
	    }
1059
	    
1171
	    
1060
	    w -= 2;
1172
	    w -= 2;
Lines 1064-1081 Link Here
1064
	
1176
	
1065
	while (w)
1177
	while (w)
1066
	{
1178
	{
1067
	    Vector4x16 s = load8888 (*src);
1179
	    __m64 s = load8888 (*src);
1068
	    Vector4x16 d = load8888 (*dst);
1180
	    __m64 d = load8888 (*dst);
1069
	    
1181
	    
1070
	    *dst = (ullong)pack8888 (over_rev_non_pre (s, d), (Vector4x16)c.mmx_zero);
1182
	    *dst = (ullong)pack8888 (over_rev_non_pre (s, d), _mm_setzero_si64());
1071
	    
1183
	    
1072
	    w--;
1184
	    w--;
1073
	    dst++;
1185
	    dst++;
1074
	    src++;
1186
	    src++;
1075
	}
1187
	}
1076
    }
1188
    }
1077
1189
    
1078
    emms();
1190
    _mm_empty();
1079
}
1191
}
1080
1192
1081
void
1193
void
Lines 1096-1102 Link Here
1096
    CARD16	*dstLine;
1208
    CARD16	*dstLine;
1097
    CARD32	*maskLine;
1209
    CARD32	*maskLine;
1098
    FbStride	dstStride, maskStride;
1210
    FbStride	dstStride, maskStride;
1099
    Vector4x16  vsrc, vsrca;
1211
    __m64  vsrc, vsrca;
1100
    
1212
    
1101
    CHECKPOINT();
1213
    CHECKPOINT();
1102
    
1214
    
Lines 1125-1131 Link Here
1125
	    if (m)
1237
	    if (m)
1126
	    {
1238
	    {
1127
		ullong d = *q;
1239
		ullong d = *q;
1128
		Vector4x16 vdest = expand565 ((Vector4x16)d, 0);
1240
		__m64 vdest = expand565 ((__m64)d, 0);
1129
		vdest = pack565 (in_over (vsrc, vsrca, load8888 (m), vdest), vdest, 0);
1241
		vdest = pack565 (in_over (vsrc, vsrca, load8888 (m), vdest), vdest, 0);
1130
		*q = (ullong)vdest;
1242
		*q = (ullong)vdest;
1131
	    }
1243
	    }
Lines 1146-1159 Link Here
1146
	    
1258
	    
1147
	    if ((m0 | m1 | m2 | m3))
1259
	    if ((m0 | m1 | m2 | m3))
1148
	    {
1260
	    {
1149
		Vector4x16 vdest = *(Vector4x16 *)q;
1261
		__m64 vdest = *(__m64 *)q;
1150
		
1262
		
1151
		vdest = pack565(in_over(vsrc, vsrca, load8888(m0), expand565(vdest, 0)), vdest, 0);
1263
		vdest = pack565(in_over(vsrc, vsrca, load8888(m0), expand565(vdest, 0)), vdest, 0);
1152
		vdest = pack565(in_over(vsrc, vsrca, load8888(m1), expand565(vdest, 1)), vdest, 1);
1264
		vdest = pack565(in_over(vsrc, vsrca, load8888(m1), expand565(vdest, 1)), vdest, 1);
1153
		vdest = pack565(in_over(vsrc, vsrca, load8888(m2), expand565(vdest, 2)), vdest, 2);
1265
		vdest = pack565(in_over(vsrc, vsrca, load8888(m2), expand565(vdest, 2)), vdest, 2);
1154
		vdest = pack565(in_over(vsrc, vsrca, load8888(m3), expand565(vdest, 3)), vdest, 3);
1266
		vdest = pack565(in_over(vsrc, vsrca, load8888(m3), expand565(vdest, 3)), vdest, 3);
1155
		
1267
		
1156
		*(Vector4x16 *)q = vdest;
1268
		*(__m64 *)q = vdest;
1157
	    }
1269
	    }
1158
	    twidth -= 4;
1270
	    twidth -= 4;
1159
	    p += 4;
1271
	    p += 4;
Lines 1168-1174 Link Here
1168
	    if (m)
1280
	    if (m)
1169
	    {
1281
	    {
1170
		ullong d = *q;
1282
		ullong d = *q;
1171
		Vector4x16 vdest = expand565((Vector4x16)d, 0);
1283
		__m64 vdest = expand565((__m64)d, 0);
1172
		vdest = pack565 (in_over(vsrc, vsrca, load8888(m), vdest), vdest, 0);
1284
		vdest = pack565 (in_over(vsrc, vsrca, load8888(m), vdest), vdest, 0);
1173
		*q = (ullong)vdest;
1285
		*q = (ullong)vdest;
1174
	    }
1286
	    }
Lines 1182-1188 Link Here
1182
	dstLine += dstStride;
1294
	dstLine += dstStride;
1183
    }
1295
    }
1184
    
1296
    
1185
    emms ();
1297
    _mm_empty ();
1186
}
1298
}
1187
1299
1188
void
1300
void
Lines 1210-1216 Link Here
1210
    
1322
    
1211
    fbComposeGetStart (pSrc, xSrc, ySrc, CARD8, srcStride, srcLine, 1);
1323
    fbComposeGetStart (pSrc, xSrc, ySrc, CARD8, srcStride, srcLine, 1);
1212
    fbComposeGetStart (pDst, xDst, yDst, CARD8, dstStride, dstLine, 1);
1324
    fbComposeGetStart (pDst, xDst, yDst, CARD8, dstStride, dstLine, 1);
1213
1325
    
1214
    while (height--)
1326
    while (height--)
1215
    {
1327
    {
1216
	dst = dstLine;
1328
	dst = dstLine;
Lines 1218-1224 Link Here
1218
	src = srcLine;
1330
	src = srcLine;
1219
	srcLine += srcStride;
1331
	srcLine += srcStride;
1220
	w = width;
1332
	w = width;
1221
1333
	
1222
	while (w && (unsigned long)dst & 7)
1334
	while (w && (unsigned long)dst & 7)
1223
	{
1335
	{
1224
	    s = *src;
1336
	    s = *src;
Lines 1234-1246 Link Here
1234
	
1346
	
1235
	while (w >= 8)
1347
	while (w >= 8)
1236
	{
1348
	{
1237
	    __asm__ __volatile__ (
1349
	    *(__m64*)dst = _mm_adds_pu8(*(__m64*)src, *(__m64*)dst);
1238
		"movq (%0), %%mm2\n\t"
1239
		"movq (%1), %%mm3\n\t"
1240
		"paddusb %%mm2, %%mm3\n\t"
1241
		"movq %%mm3, (%1)\n\t"
1242
		: /* no output */ : "r" (src), "r" (dst));
1243
	    
1244
	    dst += 8;
1350
	    dst += 8;
1245
	    src += 8;
1351
	    src += 8;
1246
	    w -= 8;
1352
	    w -= 8;
Lines 1259-1266 Link Here
1259
	    w--;
1365
	    w--;
1260
	}
1366
	}
1261
    }
1367
    }
1262
1368
    
1263
    emms();
1369
    _mm_empty();
1264
}
1370
}
1265
1371
1266
void
1372
void
Lines 1297-1309 Link Here
1297
	
1403
	
1298
	while (w && (unsigned long)dst & 7)
1404
	while (w && (unsigned long)dst & 7)
1299
	{
1405
	{
1300
	    __asm__ __volatile__ (
1406
	    *dst = _mm_cvtsi64_si32(_mm_adds_pu8(_mm_cvtsi32_si64(*src),
1301
		"movd %0, %%mm2\n\t"
1407
						 _mm_cvtsi32_si64(*dst)));
1302
		"movd %1, %%mm3\n\t"
1303
		"paddusb %%mm2, %%mm3\n\t"
1304
		"movd %%mm3, %1\n\t"
1305
		: /* no output */ : "m" (*src), "m" (*dst));
1306
	    
1307
	    dst++;
1408
	    dst++;
1308
	    src++;
1409
	    src++;
1309
	    w--;
1410
	    w--;
Lines 1311-1323 Link Here
1311
	
1412
	
1312
	while (w >= 2)
1413
	while (w >= 2)
1313
	{
1414
	{
1314
	    __asm__ __volatile__ (
1415
	    *(ullong*)dst = (ullong) _mm_adds_pu8(*(__m64*)src, *(__m64*)dst);
1315
		"movq (%0), %%mm2\n\t"
1316
		"movq (%1), %%mm3\n\t"
1317
		"paddusb %%mm2, %%mm3\n\t"
1318
		"movq %%mm3, (%1)\n\t"
1319
		: /* no output */ : "r" (src), "r" (dst));
1320
	    
1321
	    dst += 2;
1416
	    dst += 2;
1322
	    src += 2;
1417
	    src += 2;
1323
	    w -= 2;
1418
	    w -= 2;
Lines 1325-1340 Link Here
1325
	
1420
	
1326
	if (w)
1421
	if (w)
1327
	{
1422
	{
1328
	    __asm__ __volatile__ (
1423
	    *dst = _mm_cvtsi64_si32(_mm_adds_pu8(_mm_cvtsi32_si64(*src),
1329
		"movd %0, %%mm2\n\t"
1424
						 _mm_cvtsi32_si64(*dst)));
1330
		"movd %1, %%mm3\n\t"
1425
	    
1331
		"paddusb %%mm2, %%mm3\n\t"
1332
		"movd %%mm3, %1\n\t"
1333
		: /* no output */ : "m" (*src), "m" (*dst));
1334
	}
1426
	}
1335
    }
1427
    }
1336
1428
    
1337
    emms();
1429
    _mm_empty();
1338
}
1430
}
1339
1431
1340
#define GetStart(drw,x,y,type,stride,line,bpp) {\
1432
#define GetStart(drw,x,y,type,stride,line,bpp) {\
Lines 1358-1376 Link Here
1358
    FbStride	stride;
1450
    FbStride	stride;
1359
    int		bpp;
1451
    int		bpp;
1360
    ullong	fill;
1452
    ullong	fill;
1361
    Vector8x8	vfill;
1453
    __m64	vfill;
1362
    CARD32	byte_width;
1454
    CARD32	byte_width;
1363
    CARD8	*byte_line;
1455
    CARD8	*byte_line;
1364
    FbBits      *bits;
1456
    FbBits      *bits;
1365
    int		xoff, yoff;
1457
    int		xoff, yoff;
1366
    
1458
    
1367
    CHECKPOINT();
1459
    CHECKPOINT();
1368
1460
    
1369
    fbGetDrawable(pDraw, bits, stride, bpp, xoff, yoff);
1461
    fbGetDrawable(pDraw, bits, stride, bpp, xoff, yoff);
1370
1462
    
1371
    if (bpp == 16 && (xor >> 16 != (xor & 0xffff)))
1463
    if (bpp == 16 && (xor >> 16 != (xor & 0xffff)))
1372
	return FALSE;
1464
	return FALSE;
1373
1465
    
1374
    if (bpp != 16 && bpp != 32)
1466
    if (bpp != 16 && bpp != 32)
1375
	return FALSE;
1467
	return FALSE;
1376
    
1468
    
Lines 1388-1396 Link Here
1388
	byte_width = 4 * width;
1480
	byte_width = 4 * width;
1389
	stride *= 4;
1481
	stride *= 4;
1390
    }
1482
    }
1391
1483
    
1392
    fill = ((ullong)xor << 32) | xor;
1484
    fill = ((ullong)xor << 32) | xor;
1393
    vfill = (Vector8x8)fill;
1485
    vfill = (__m64)fill;
1394
    
1486
    
1395
    while (height--)
1487
    while (height--)
1396
    {
1488
    {
Lines 1398-1404 Link Here
1398
	CARD8 *d = byte_line;
1490
	CARD8 *d = byte_line;
1399
	byte_line += stride;
1491
	byte_line += stride;
1400
	w = byte_width;
1492
	w = byte_width;
1401
1493
	
1402
	while (w >= 2 && ((unsigned long)d & 3))
1494
	while (w >= 2 && ((unsigned long)d & 3))
1403
	{
1495
	{
1404
	    *(CARD16 *)d = xor;
1496
	    *(CARD16 *)d = xor;
Lines 1406-1440 Link Here
1406
	    d += 2;
1498
	    d += 2;
1407
	}
1499
	}
1408
	
1500
	
1409
	while (w >= 4 && ((unsigned int)d & 7))
1501
	while (w >= 4 && ((unsigned long)d & 7))
1410
	{
1502
	{
1411
	    *(CARD32 *)d = xor;
1503
	    *(CARD32 *)d = xor;
1412
1504
	    
1413
	    w -= 4;
1505
	    w -= 4;
1414
	    d += 4;
1506
	    d += 4;
1415
	}
1507
	}
1416
	
1508
	
1417
	while (w >= 64)
1509
	while (w >= 64)
1418
	{
1510
	{
1419
	    __asm__ __volatile  (
1511
	    *(__m64*) (d +  0) = vfill;
1420
		"movq %0, (%1)\n\t"
1512
	    *(__m64*) (d +  8) = vfill;
1421
		"movq %0, 8(%1)\n\t"
1513
	    *(__m64*) (d + 16) = vfill;
1422
		"movq %0, 16(%1)\n\t"
1514
	    *(__m64*) (d + 24) = vfill;
1423
		"movq %0, 24(%1)\n\t"
1515
	    *(__m64*) (d + 32) = vfill;
1424
		"movq %0, 32(%1)\n\t"
1516
	    *(__m64*) (d + 40) = vfill;
1425
		"movq %0, 40(%1)\n\t"
1517
	    *(__m64*) (d + 48) = vfill;
1426
		"movq %0, 48(%1)\n\t"
1518
	    *(__m64*) (d + 56) = vfill;
1427
		"movq %0, 56(%1)\n\t"
1519
	    
1428
		: /* no output */
1429
		: "y" (vfill), "r" (d)
1430
		: "memory");
1431
	    w -= 64;
1520
	    w -= 64;
1432
	    d += 64;
1521
	    d += 64;
1433
	}
1522
	}
1434
	while (w >= 4)
1523
	while (w >= 4)
1435
	{
1524
	{
1436
	    *(CARD32 *)d = xor;
1525
	    *(CARD32 *)d = xor;
1437
1526
	    
1438
	    w -= 4;
1527
	    w -= 4;
1439
	    d += 4;
1528
	    d += 4;
1440
	}
1529
	}
Lines 1446-1461 Link Here
1446
	}
1535
	}
1447
    }
1536
    }
1448
    
1537
    
1449
    emms();
1538
    _mm_empty();
1539
    return TRUE;
1540
}
1541
1542
Bool
1543
fbCopyAreammx (DrawablePtr	pSrc,
1544
	       DrawablePtr	pDst,
1545
	       int		src_x,
1546
	       int		src_y,
1547
	       int		dst_x,
1548
	       int		dst_y,
1549
	       int		width,
1550
	       int		height)
1551
{
1552
    FbBits *	src_bits;
1553
    FbStride	src_stride;
1554
    int		src_bpp;
1555
    int		src_xoff;
1556
    int		src_yoff;
1557
1558
    FbBits *	dst_bits;
1559
    FbStride	dst_stride;
1560
    int		dst_bpp;
1561
    int		dst_xoff;
1562
    int		dst_yoff;
1563
1564
    CARD8 *	src_bytes;
1565
    CARD8 *	dst_bytes;
1566
    int		byte_width;
1567
    
1568
    fbGetDrawable(pSrc, src_bits, src_stride, src_bpp, src_xoff, src_yoff);
1569
    fbGetDrawable(pDst, dst_bits, dst_stride, dst_bpp, dst_xoff, dst_yoff);
1570
1571
    if (src_bpp != 16 && src_bpp != 32)
1572
	return FALSE;
1573
1574
    if (dst_bpp != 16 && dst_bpp != 32)
1575
	return FALSE;
1576
1577
    if (src_bpp != dst_bpp)
1578
    {
1579
	return FALSE;
1580
    }
1581
    
1582
    if (src_bpp == 16)
1583
    {
1584
	src_stride = src_stride * sizeof (FbBits) / 2;
1585
	dst_stride = dst_stride * sizeof (FbBits) / 2;
1586
	src_bytes = (CARD8 *)(((CARD16 *)src_bits) + src_stride * (src_y - src_yoff) + (src_x - src_xoff));
1587
	dst_bytes = (CARD8 *)(((CARD16 *)dst_bits) + dst_stride * (dst_y - dst_yoff) + (dst_x - dst_xoff));
1588
	byte_width = 2 * width;
1589
	src_stride *= 2;
1590
	dst_stride *= 2;
1591
    }
1592
    else
1593
    {
1594
	src_stride = src_stride * sizeof (FbBits) / 4;
1595
	dst_stride = dst_stride * sizeof (FbBits) / 4;
1596
	src_bytes = (CARD8 *)(((CARD32 *)src_bits) + src_stride * (src_y - src_yoff) + (src_x - src_xoff));
1597
	dst_bytes = (CARD8 *)(((CARD32 *)dst_bits) + dst_stride * (dst_y - dst_yoff) + (dst_x - dst_xoff));
1598
	byte_width = 4 * width;
1599
	src_stride *= 4;
1600
	dst_stride *= 4;
1601
    }
1602
1603
    while (height--)
1604
    {
1605
	int w;
1606
	CARD8 *s = src_bytes;
1607
	CARD8 *d = dst_bytes;
1608
	src_bytes += src_stride;
1609
	dst_bytes += dst_stride;
1610
	w = byte_width;
1611
	
1612
	while (w >= 2 && ((unsigned long)d & 3))
1613
	{
1614
	    *(CARD16 *)d = *(CARD16 *)s;
1615
	    w -= 2;
1616
	    s += 2;
1617
	    d += 2;
1618
	}
1619
	
1620
	while (w >= 4 && ((unsigned int)d & 7))
1621
	{
1622
	    *(CARD32 *)d = *(CARD32 *)s;
1623
	    
1624
	    w -= 4;
1625
	    s += 4;
1626
	    d += 4;
1627
	}
1628
	
1629
	while (w >= 64)
1630
	{
1631
	    *(__m64 *)(d + 0)  = *(__m64 *)(s + 0);
1632
	    *(__m64 *)(d + 8)  = *(__m64 *)(s + 8);
1633
	    *(__m64 *)(d + 16) = *(__m64 *)(s + 16);
1634
	    *(__m64 *)(d + 24) = *(__m64 *)(s + 24);
1635
	    *(__m64 *)(d + 32) = *(__m64 *)(s + 32);
1636
	    *(__m64 *)(d + 40) = *(__m64 *)(s + 40);
1637
	    *(__m64 *)(d + 48) = *(__m64 *)(s + 48);
1638
	    *(__m64 *)(d + 56) = *(__m64 *)(s + 56);
1639
	    w -= 64;
1640
	    s += 64;
1641
	    d += 64;
1642
	}
1643
	while (w >= 4)
1644
	{
1645
	    *(CARD32 *)d = *(CARD32 *)s;
1646
1647
	    w -= 4;
1648
	    s += 4;
1649
	    d += 4;
1650
	}
1651
	if (w >= 2)
1652
	{
1653
	    *(CARD16 *)d = *(CARD16 *)s;
1654
	    w -= 2;
1655
	    s += 2;
1656
	    d += 2;
1657
	}
1658
    }
1659
    
1660
    _mm_empty();
1450
    return TRUE;
1661
    return TRUE;
1451
}
1662
}
1452
1663
1664
void
1665
fbCompositeCopyAreammx (CARD8		op,
1666
			PicturePtr	pSrc,
1667
			PicturePtr	pMask,
1668
			PicturePtr	pDst,
1669
			INT16		xSrc,
1670
			INT16		ySrc,
1671
			INT16		xMask,
1672
			INT16		yMask,
1673
			INT16		xDst,
1674
			INT16		yDst,
1675
			CARD16		width,
1676
			CARD16		height)
1677
{
1678
    fbCopyAreammx (pSrc->pDrawable,
1679
		   pDst->pDrawable,
1680
		   xSrc, ySrc,
1681
		   xDst, yDst,
1682
		   width, height);
1683
}
1684
1685
#ifndef __amd64__
1453
Bool
1686
Bool
1454
fbHaveMMX (void)
1687
fbHaveMMX (void)
1455
{
1688
{
1456
    static Bool initialized = FALSE;
1689
    static Bool initialized = FALSE;
1457
    static Bool mmx_present;
1690
    static Bool mmx_present;
1458
1691
    
1459
    if (!initialized)
1692
    if (!initialized)
1460
    {
1693
    {
1461
	int tmp; /* static variables are accessed through %ebx,
1694
	int tmp; /* static variables are accessed through %ebx,
Lines 1466-1472 Link Here
1466
	
1699
	
1467
	__asm__ __volatile__ (
1700
	__asm__ __volatile__ (
1468
/* Check if bit 21 in flags word is writeable */
1701
/* Check if bit 21 in flags word is writeable */
1469
1702
	    
1470
	    "pusha			        \n\t"
1703
	    "pusha			        \n\t"
1471
	    "pushfl				\n\t"
1704
	    "pushfl				\n\t"
1472
	    "popl	%%eax			\n\t"
1705
	    "popl	%%eax			\n\t"
Lines 1502-1514 Link Here
1502
	    : /* no input */);
1735
	    : /* no input */);
1503
	
1736
	
1504
	initialized = TRUE;
1737
	initialized = TRUE;
1505
1738
	
1506
	mmx_present = tmp;
1739
	mmx_present = tmp;
1507
    }
1740
    }
1508
    
1741
    
1509
    return mmx_present;
1742
    return mmx_present;
1510
}
1743
}
1744
#endif /* __amd64__ */
1511
1745
1512
1746
1513
#endif /* RENDER */
1747
#endif /* RENDER */
1514
#endif /* USE_GCC34_MMX */
1748
#endif /* USE_MMX */
(-)xc-orig/programs/Xserver/fb/fbmmx.h (-6 / +44 lines)
Lines 1-5 Link Here
1
/*
1
/*
2
 * Copyright © 2004 Red Hat, Inc.
2
 * Copyright © 2004 Red Hat, Inc.
3
 *
3
 *
4
 * Permission to use, copy, modify, distribute, and sell this software and its
4
 * Permission to use, copy, modify, distribute, and sell this software and its
5
 * documentation for any purpose is hereby granted without fee, provided that
5
 * documentation for any purpose is hereby granted without fee, provided that
Lines 18-34 Link Here
18
 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 
18
 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 
19
 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19
 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20
 *
20
 *
21
 * Author:  Søren Sandmann (sandmann@redhat.com)
21
 * Author:  Søren Sandmann (sandmann@redhat.com)
22
 * 
22
 * 
23
 * Based on work by Owen Taylor
23
 * Based on work by Owen Taylor
24
 */
24
 */
25
#ifdef USE_GCC34_MMX
25
#ifdef USE_MMX
26
27
#ifndef __amd64__
26
Bool fbHaveMMX(void);
28
Bool fbHaveMMX(void);
27
#else
29
#else
28
#define fbHaveMMX FALSE
30
#define fbHaveMMX() TRUE
31
#endif
32
33
#else
34
#define fbHaveMMX() FALSE
29
#endif
35
#endif
30
36
31
#ifdef USE_GCC34_MMX
37
#ifdef USE_MMX
32
38
33
void fbCompositeSolidMask_nx8888x0565Cmmx (CARD8      op,
39
void fbCompositeSolidMask_nx8888x0565Cmmx (CARD8      op,
34
					   PicturePtr pSrc,
40
					   PicturePtr pSrc,
Lines 150-155 Link Here
150
				       INT16      yDst,
156
				       INT16      yDst,
151
				       CARD16     width,
157
				       CARD16     width,
152
				       CARD16     height);
158
				       CARD16     height);
159
void fbCompositeSrc_8888x8x8888mmx (CARD8	op,
160
				    PicturePtr  pSrc,
161
				    PicturePtr  pMask,
162
				    PicturePtr  pDst,
163
				    INT16	xSrc,
164
				    INT16	ySrc,
165
				    INT16       xMask,
166
				    INT16       yMask,
167
				    INT16       xDst,
168
				    INT16       yDst,
169
				    CARD16      width,
170
				    CARD16      height);
171
Bool fbCopyAreammx (DrawablePtr	pSrc,
172
		    DrawablePtr	pDst,
173
		    int		src_x,
174
		    int		src_y,
175
		    int		dst_x,
176
		    int		dst_y,
177
		    int		width,
178
		    int		height);
179
void fbCompositeCopyAreammx (CARD8	op,
180
			     PicturePtr	pSrc,
181
			     PicturePtr	pMask,
182
			     PicturePtr	pDst,
183
			     INT16	xSrc,
184
			     INT16      ySrc,
185
			     INT16      xMask,
186
			     INT16      yMask,
187
			     INT16      xDst,
188
			     INT16      yDst,
189
			     CARD16     width,
190
			     CARD16     height);
153
Bool fbSolidFillmmx (DrawablePtr	pDraw,
191
Bool fbSolidFillmmx (DrawablePtr	pDraw,
154
		     int		x,
192
		     int		x,
155
		     int		y,
193
		     int		y,
Lines 157-160 Link Here
157
		     int		height,
195
		     int		height,
158
		     FbBits		xor);
196
		     FbBits		xor);
159
197
160
#endif /* USE_GCC34_MMX */
198
#endif /* USE_MMX */
(-)xc-orig/programs/Xserver/fb/fbpict.c (-17 / +66 lines)
Lines 1-7 Link Here
1
/*
1
/*
2
 * $XFree86: xc/programs/Xserver/fb/fbpict.c,v 1.15 2002/09/26 02:56:48 keithp Exp $
2
 * $XFree86: xc/programs/Xserver/fb/fbpict.c,v 1.15 2002/09/26 02:56:48 keithp Exp $
3
 *
3
 *
4
 * Copyright © 2000 SuSE, Inc.
4
 * Copyright © 2000 SuSE, Inc.
5
 *
5
 *
6
 * Permission to use, copy, modify, distribute, and sell this software and its
6
 * Permission to use, copy, modify, distribute, and sell this software and its
7
 * documentation for any purpose is hereby granted without fee, provided that
7
 * documentation for any purpose is hereby granted without fee, provided that
Lines 863-868 Link Here
863
    if (!pSrc->transform && !(pMask && pMask->transform))
863
    if (!pSrc->transform && !(pMask && pMask->transform))
864
    if (!maskAlphaMap && !srcAlphaMap && !dstAlphaMap)
864
    if (!maskAlphaMap && !srcAlphaMap && !dstAlphaMap)
865
    switch (op) {
865
    switch (op) {
866
    case PictOpSrc:
867
#ifdef USE_MMX
868
	if (!pMask && pSrc->format == pDst->format &&
869
	    pSrc->pDrawable != pDst->pDrawable)
870
	{
871
	    func = fbCompositeCopyAreammx;
872
	}
873
#endif
874
	break;
866
    case PictOpOver:
875
    case PictOpOver:
867
	if (pMask)
876
	if (pMask)
868
	{
877
	{
Lines 877-883 Link Here
877
			switch (pDst->format) {
886
			switch (pDst->format) {
878
			case PICT_r5g6b5:
887
			case PICT_r5g6b5:
879
			case PICT_b5g6r5:
888
			case PICT_b5g6r5:
880
#ifdef USE_GCC34_MMX
889
#ifdef USE_MMX
881
			    if (fbHaveMMX())
890
			    if (fbHaveMMX())
882
				func = fbCompositeSolidMask_nx8x0565mmx;
891
				func = fbCompositeSolidMask_nx8x0565mmx;
883
			    else
892
			    else
Lines 892-898 Link Here
892
			case PICT_x8r8g8b8:
901
			case PICT_x8r8g8b8:
893
			case PICT_a8b8g8r8:
902
			case PICT_a8b8g8r8:
894
			case PICT_x8b8g8r8:
903
			case PICT_x8b8g8r8:
895
#ifdef USE_GCC34_MMX
904
#ifdef USE_MMX
896
			    if (fbHaveMMX())
905
			    if (fbHaveMMX())
897
				func = fbCompositeSolidMask_nx8x8888mmx;
906
				func = fbCompositeSolidMask_nx8x8888mmx;
898
			    else
907
			    else
Lines 906-912 Link Here
906
			    switch (pDst->format) {
915
			    switch (pDst->format) {
907
			    case PICT_a8r8g8b8:
916
			    case PICT_a8r8g8b8:
908
			    case PICT_x8r8g8b8:
917
			    case PICT_x8r8g8b8:
909
#ifdef USE_GCC34_MMX
918
#ifdef USE_MMX
910
				if (fbHaveMMX())
919
				if (fbHaveMMX())
911
				    func = fbCompositeSolidMask_nx8888x8888Cmmx;
920
				    func = fbCompositeSolidMask_nx8888x8888Cmmx;
912
				else
921
				else
Lines 914-920 Link Here
914
				    func = fbCompositeSolidMask_nx8888x8888C;
923
				    func = fbCompositeSolidMask_nx8888x8888C;
915
				break;
924
				break;
916
			    case PICT_r5g6b5:
925
			    case PICT_r5g6b5:
917
#ifdef USE_GCC34_MMX
926
#ifdef USE_MMX
918
				if (fbHaveMMX())
927
				if (fbHaveMMX())
919
				    func = fbCompositeSolidMask_nx8888x0565Cmmx;
928
				    func = fbCompositeSolidMask_nx8888x0565Cmmx;
920
				else
929
				else
Lines 929-935 Link Here
929
			    switch (pDst->format) {
938
			    switch (pDst->format) {
930
			    case PICT_a8b8g8r8:
939
			    case PICT_a8b8g8r8:
931
			    case PICT_x8b8g8r8:
940
			    case PICT_x8b8g8r8:
932
#ifdef USE_GCC34_MMX
941
#ifdef USE_MMX
933
				if (fbHaveMMX())
942
				if (fbHaveMMX())
934
				    func = fbCompositeSolidMask_nx8888x8888Cmmx;
943
				    func = fbCompositeSolidMask_nx8888x8888Cmmx;
935
				else
944
				else
Lines 937-943 Link Here
937
				    func = fbCompositeSolidMask_nx8888x8888C;
946
				    func = fbCompositeSolidMask_nx8888x8888C;
938
				break;
947
				break;
939
			    case PICT_b5g6r5:
948
			    case PICT_b5g6r5:
940
#ifdef USE_GCC34_MMX
949
#ifdef USE_MMX
941
				if (fbHaveMMX())
950
				if (fbHaveMMX())
942
				    func = fbCompositeSolidMask_nx8888x0565Cmmx;
951
				    func = fbCompositeSolidMask_nx8888x0565Cmmx;
943
				else
952
				else
Lines 970-975 Link Here
970
		    xSrc == xMask && ySrc == yMask &&
979
		    xSrc == xMask && ySrc == yMask &&
971
		    !pMask->componentAlpha)
980
		    !pMask->componentAlpha)
972
		{
981
		{
982
		    /* source == mask: non-premultiplied data */
973
		    switch (pSrc->format) {
983
		    switch (pSrc->format) {
974
		    case PICT_x8b8g8r8:
984
		    case PICT_x8b8g8r8:
975
			switch (pMask->format) {
985
			switch (pMask->format) {
Lines 978-990 Link Here
978
			    switch (pDst->format) {
988
			    switch (pDst->format) {
979
			    case PICT_a8r8g8b8:
989
			    case PICT_a8r8g8b8:
980
			    case PICT_x8r8g8b8:
990
			    case PICT_x8r8g8b8:
981
#ifdef USE_GCC34_MMX
991
#ifdef USE_MMX
982
				if (fbHaveMMX())
992
				if (fbHaveMMX())
983
				    func = fbCompositeSrc_8888RevNPx8888mmx;
993
				    func = fbCompositeSrc_8888RevNPx8888mmx;
984
#endif
994
#endif
985
				break;
995
				break;
986
			    case PICT_r5g6b5:
996
			    case PICT_r5g6b5:
987
#ifdef USE_GCC34_MMX
997
#ifdef USE_MMX
988
				if (fbHaveMMX())
998
				if (fbHaveMMX())
989
				    func = fbCompositeSrc_8888RevNPx0565mmx;
999
				    func = fbCompositeSrc_8888RevNPx0565mmx;
990
#endif
1000
#endif
Lines 1000-1012 Link Here
1000
			    switch (pDst->format) {
1010
			    switch (pDst->format) {
1001
			    case PICT_a8b8g8r8:
1011
			    case PICT_a8b8g8r8:
1002
			    case PICT_x8b8g8r8:
1012
			    case PICT_x8b8g8r8:
1003
#ifdef USE_GCC34_MMX
1013
#ifdef USE_MMX
1004
				if (fbHaveMMX())
1014
				if (fbHaveMMX())
1005
				    func = fbCompositeSrc_8888RevNPx8888mmx;
1015
				    func = fbCompositeSrc_8888RevNPx8888mmx;
1006
#endif
1016
#endif
1007
				break;
1017
				break;
1008
			    case PICT_r5g6b5:
1018
			    case PICT_r5g6b5:
1009
#ifdef USE_GCC34_MMX
1019
#ifdef USE_MMX
1010
				if (fbHaveMMX())
1020
				if (fbHaveMMX())
1011
				    func = fbCompositeSrc_8888RevNPx0565mmx;
1021
				    func = fbCompositeSrc_8888RevNPx0565mmx;
1012
#endif
1022
#endif
Lines 1018-1026 Link Here
1018
		    }
1028
		    }
1019
		    break;
1029
		    break;
1020
		}
1030
		}
1031
		else 
1032
		{
1033
		    /* non-repeating source, repeating mask => translucent window */
1034
		    if (maskRepeat &&
1035
			pMask->pDrawable->width == 1 &&
1036
			pMask->pDrawable->height == 1)
1037
		    {
1038
			if (pSrc->format == PICT_x8r8g8b8 &&
1039
			    pDst->format == PICT_x8r8g8b8 &&
1040
			    pMask->format == PICT_a8)
1041
			{
1042
#ifdef USE_MMX
1043
			    if (fbHaveMMX())
1044
				func = fbCompositeSrc_8888x8x8888mmx;
1045
#endif			    
1046
			}
1047
		    }
1048
		}
1021
	    }
1049
	    }
1022
	}
1050
	}
1023
	else
1051
	else /* no mask */
1024
	{
1052
	{
1025
	    if (srcRepeat && 
1053
	    if (srcRepeat && 
1026
		pSrc->pDrawable->width == 1 &&
1054
		pSrc->pDrawable->width == 1 &&
Lines 1032-1038 Link Here
1032
		    switch (pDst->format) {
1060
		    switch (pDst->format) {
1033
		    case PICT_a8r8g8b8:
1061
		    case PICT_a8r8g8b8:
1034
		    case PICT_x8r8g8b8:
1062
		    case PICT_x8r8g8b8:
1035
#ifdef USE_GCC34_MMX
1063
#ifdef USE_MMX
1036
			if (fbHaveMMX())
1064
			if (fbHaveMMX())
1037
			{
1065
			{
1038
			    srcRepeat = FALSE;
1066
			    srcRepeat = FALSE;
Lines 1041-1047 Link Here
1041
#endif
1069
#endif
1042
			break;
1070
			break;
1043
		    case PICT_r5g6b5:
1071
		    case PICT_r5g6b5:
1044
#ifdef USE_GCC34_MMX
1072
#ifdef USE_MMX
1045
			if (fbHaveMMX())
1073
			if (fbHaveMMX())
1046
			{
1074
			{
1047
			    srcRepeat = FALSE;
1075
			    srcRepeat = FALSE;
Lines 1070-1075 Link Here
1070
			break;
1098
			break;
1071
		    }
1099
		    }
1072
		    break;
1100
		    break;
1101
		case PICT_x8r8g8b8:
1102
		    switch (pDst->format) {
1103
		    case PICT_a8r8g8b8:
1104
		    case PICT_x8r8g8b8:
1105
#ifdef USE_MMX
1106
			if (fbHaveMMX())
1107
			    func = fbCompositeCopyAreammx;
1108
#endif
1109
			break;
1110
		    }
1111
		case PICT_x8b8g8r8:
1112
		    switch (pDst->format) {
1113
		    case PICT_a8b8g8r8:
1114
		    case PICT_x8b8g8r8:
1115
#ifdef USE_MMX
1116
			if (fbHaveMMX())
1117
			    func = fbCompositeCopyAreammx;
1118
#endif
1119
			break;
1120
		    }
1121
		    break;
1073
		case PICT_a8b8g8r8:
1122
		case PICT_a8b8g8r8:
1074
		    switch (pDst->format) {
1123
		    switch (pDst->format) {
1075
		    case PICT_a8b8g8r8:
1124
		    case PICT_a8b8g8r8:
Lines 1109-1115 Link Here
1109
	    case PICT_a8r8g8b8:
1158
	    case PICT_a8r8g8b8:
1110
		switch (pDst->format) {
1159
		switch (pDst->format) {
1111
		case PICT_a8r8g8b8:
1160
		case PICT_a8r8g8b8:
1112
#ifdef USE_GCC34_MMX
1161
#ifdef USE_MMX
1113
		    if (fbHaveMMX())
1162
		    if (fbHaveMMX())
1114
			func = fbCompositeSrcAdd_8888x8888mmx;
1163
			func = fbCompositeSrcAdd_8888x8888mmx;
1115
		    else
1164
		    else
Lines 1121-1127 Link Here
1121
	    case PICT_a8b8g8r8:
1170
	    case PICT_a8b8g8r8:
1122
		switch (pDst->format) {
1171
		switch (pDst->format) {
1123
		case PICT_a8b8g8r8:
1172
		case PICT_a8b8g8r8:
1124
#ifdef USE_GCC34_MMX
1173
#ifdef USE_MMX
1125
		    if (fbHaveMMX())
1174
		    if (fbHaveMMX())
1126
			func = fbCompositeSrcAdd_8888x8888mmx;
1175
			func = fbCompositeSrcAdd_8888x8888mmx;
1127
		    else
1176
		    else
Lines 1133-1139 Link Here
1133
	    case PICT_a8:
1182
	    case PICT_a8:
1134
		switch (pDst->format) {
1183
		switch (pDst->format) {
1135
		case PICT_a8:
1184
		case PICT_a8:
1136
#ifdef USE_GCC34_MMX
1185
#ifdef USE_MMX
1137
		    if (fbHaveMMX())
1186
		    if (fbHaveMMX())
1138
			func = fbCompositeSrcAdd_8000x8000mmx;
1187
			func = fbCompositeSrcAdd_8000x8000mmx;
1139
		    else
1188
		    else

Return to bug 80685