Lines 29-34
Link Here
|
29 |
|
29 |
|
30 |
#ifdef RENDER |
30 |
#ifdef RENDER |
31 |
|
31 |
|
|
|
32 |
#include <mmintrin.h> |
33 |
|
32 |
#include "picturestr.h" |
34 |
#include "picturestr.h" |
33 |
#include "mipict.h" |
35 |
#include "mipict.h" |
34 |
#include "fbpict.h" |
36 |
#include "fbpict.h" |
Lines 48-53
Link Here
|
48 |
#define CHECKPOINT() |
50 |
#define CHECKPOINT() |
49 |
#endif |
51 |
#endif |
50 |
|
52 |
|
|
|
53 |
#define mmx_and(a,b) ((Vector1x64)_mm_and_si64((__m64)(a),(__m64)(b))) |
54 |
#define mmx_or(a,b) ((Vector1x64)_mm_or_si64((__m64)(a),(__m64)(b))) |
55 |
#define mmx_xor(a,b) ((Vector1x64)_mm_xor_si64((__m64)(a),(__m64)(b))) |
56 |
|
51 |
typedef struct |
57 |
typedef struct |
52 |
{ |
58 |
{ |
53 |
ullong mmx_zero; |
59 |
ullong mmx_zero; |
Lines 102-108
Link Here
|
102 |
static __inline__ Vector4x16 |
108 |
static __inline__ Vector4x16 |
103 |
negate (Vector4x16 mask) |
109 |
negate (Vector4x16 mask) |
104 |
{ |
110 |
{ |
105 |
return (Vector4x16)__builtin_ia32_pxor ( |
111 |
return (Vector4x16)mmx_xor ( |
106 |
(Vector1x64)mask, |
112 |
(Vector1x64)mask, |
107 |
(Vector1x64)c.mmx_4x00ff); |
113 |
(Vector1x64)c.mmx_4x00ff); |
108 |
} |
114 |
} |
Lines 163-171
Link Here
|
163 |
|
169 |
|
164 |
t1 = shift ((Vector1x64)pixel, -48); |
170 |
t1 = shift ((Vector1x64)pixel, -48); |
165 |
t2 = shift (t1, 16); |
171 |
t2 = shift (t1, 16); |
166 |
t1 = __builtin_ia32_por (t1, t2); |
172 |
t1 = mmx_or (t1, t2); |
167 |
t2 = shift (t1, 32); |
173 |
t2 = shift (t1, 32); |
168 |
t1 = __builtin_ia32_por (t1, t2); |
174 |
t1 = mmx_or (t1, t2); |
169 |
|
175 |
|
170 |
return (Vector4x16)t1; |
176 |
return (Vector4x16)t1; |
171 |
} |
177 |
} |
Lines 178-186
Link Here
|
178 |
t1 = shift ((Vector1x64)pixel, 48); |
184 |
t1 = shift ((Vector1x64)pixel, 48); |
179 |
t1 = shift (t1, -48); |
185 |
t1 = shift (t1, -48); |
180 |
t2 = shift (t1, 16); |
186 |
t2 = shift (t1, 16); |
181 |
t1 = __builtin_ia32_por (t1, t2); |
187 |
t1 = mmx_or (t1, t2); |
182 |
t2 = shift (t1, 32); |
188 |
t2 = shift (t1, 32); |
183 |
t1 = __builtin_ia32_por (t1, t2); |
189 |
t1 = mmx_or (t1, t2); |
184 |
|
190 |
|
185 |
return (Vector4x16)t1; |
191 |
return (Vector4x16)t1; |
186 |
} |
192 |
} |
Lines 192-206
Link Here
|
192 |
|
198 |
|
193 |
x = y = z = (Vector1x64)pixel; |
199 |
x = y = z = (Vector1x64)pixel; |
194 |
|
200 |
|
195 |
x = __builtin_ia32_pand (x, (Vector1x64)c.mmx_ffff0000ffff0000); |
201 |
x = mmx_and (x, (Vector1x64)c.mmx_ffff0000ffff0000); |
196 |
y = __builtin_ia32_pand (y, (Vector1x64)c.mmx_000000000000ffff); |
202 |
y = mmx_and (y, (Vector1x64)c.mmx_000000000000ffff); |
197 |
z = __builtin_ia32_pand (z, (Vector1x64)c.mmx_0000ffff00000000); |
203 |
z = mmx_and (z, (Vector1x64)c.mmx_0000ffff00000000); |
198 |
|
204 |
|
199 |
y = shift (y, 32); |
205 |
y = shift (y, 32); |
200 |
z = shift (z, -32); |
206 |
z = shift (z, -32); |
201 |
|
207 |
|
202 |
x = __builtin_ia32_por (x, y); |
208 |
x = mmx_or (x, y); |
203 |
x = __builtin_ia32_por (x, z); |
209 |
x = mmx_or (x, z); |
204 |
|
210 |
|
205 |
return (Vector4x16)x; |
211 |
return (Vector4x16)x; |
206 |
} |
212 |
} |
Lines 234-240
Link Here
|
234 |
over_rev_non_pre (Vector4x16 src, Vector4x16 dest) |
240 |
over_rev_non_pre (Vector4x16 src, Vector4x16 dest) |
235 |
{ |
241 |
{ |
236 |
Vector4x16 srca = expand_alpha (src); |
242 |
Vector4x16 srca = expand_alpha (src); |
237 |
Vector4x16 srcfaaa = (Vector4x16)__builtin_ia32_por((Vector1x64)srca, (Vector1x64)c.mmx_full_alpha); |
243 |
Vector4x16 srcfaaa = (Vector4x16)mmx_or((Vector1x64)srca, (Vector1x64)c.mmx_full_alpha); |
238 |
|
244 |
|
239 |
return over(pix_multiply(invert_colors(src), srcfaaa), srca, dest); |
245 |
return over(pix_multiply(invert_colors(src), srcfaaa), srca, dest); |
240 |
} |
246 |
} |
Lines 300-308
Link Here
|
300 |
Vector1x64 t1 = shift (p, 36 - 11); |
306 |
Vector1x64 t1 = shift (p, 36 - 11); |
301 |
Vector1x64 t2 = shift (p, 16 - 5); |
307 |
Vector1x64 t2 = shift (p, 16 - 5); |
302 |
|
308 |
|
303 |
p = __builtin_ia32_por (t1, p); |
309 |
p = mmx_or (t1, p); |
304 |
p = __builtin_ia32_por (t2, p); |
310 |
p = mmx_or (t2, p); |
305 |
p = __builtin_ia32_pand (p, (Vector1x64)c.mmx_565_rgb); |
311 |
p = mmx_and (p, (Vector1x64)c.mmx_565_rgb); |
306 |
|
312 |
|
307 |
pixel = __builtin_ia32_pmullw ((Vector4x16)p, (Vector4x16)c.mmx_565_unpack_multiplier); |
313 |
pixel = __builtin_ia32_pmullw ((Vector4x16)p, (Vector4x16)c.mmx_565_unpack_multiplier); |
308 |
return __builtin_ia32_psrlw (pixel, 8); |
314 |
return __builtin_ia32_psrlw (pixel, 8); |
Lines 324-350
Link Here
|
324 |
Vector1x64 t = (Vector1x64)target; |
330 |
Vector1x64 t = (Vector1x64)target; |
325 |
Vector1x64 r, g, b; |
331 |
Vector1x64 r, g, b; |
326 |
|
332 |
|
327 |
r = __builtin_ia32_pand (p, (Vector1x64)c.mmx_565_r); |
333 |
r = mmx_and (p, (Vector1x64)c.mmx_565_r); |
328 |
g = __builtin_ia32_pand (p, (Vector1x64)c.mmx_565_g); |
334 |
g = mmx_and (p, (Vector1x64)c.mmx_565_g); |
329 |
b = __builtin_ia32_pand (p, (Vector1x64)c.mmx_565_b); |
335 |
b = mmx_and (p, (Vector1x64)c.mmx_565_b); |
330 |
|
336 |
|
331 |
r = shift (r, - (32 - 8) + pos * 16); |
337 |
r = shift (r, - (32 - 8) + pos * 16); |
332 |
g = shift (g, - (16 - 3) + pos * 16); |
338 |
g = shift (g, - (16 - 3) + pos * 16); |
333 |
b = shift (b, - (0 + 3) + pos * 16); |
339 |
b = shift (b, - (0 + 3) + pos * 16); |
334 |
|
340 |
|
335 |
if (pos == 0) |
341 |
if (pos == 0) |
336 |
t = __builtin_ia32_pand (t, (Vector1x64)c.mmx_mask_0); |
342 |
t = mmx_and (t, (Vector1x64)c.mmx_mask_0); |
337 |
else if (pos == 1) |
343 |
else if (pos == 1) |
338 |
t = __builtin_ia32_pand (t, (Vector1x64)c.mmx_mask_1); |
344 |
t = mmx_and (t, (Vector1x64)c.mmx_mask_1); |
339 |
else if (pos == 2) |
345 |
else if (pos == 2) |
340 |
t = __builtin_ia32_pand (t, (Vector1x64)c.mmx_mask_2); |
346 |
t = mmx_and (t, (Vector1x64)c.mmx_mask_2); |
341 |
else if (pos == 3) |
347 |
else if (pos == 3) |
342 |
t = __builtin_ia32_pand (t, (Vector1x64)c.mmx_mask_3); |
348 |
t = mmx_and (t, (Vector1x64)c.mmx_mask_3); |
343 |
|
349 |
|
344 |
p = __builtin_ia32_por (r, t); |
350 |
p = mmx_or (r, t); |
345 |
p = __builtin_ia32_por (g, p); |
351 |
p = mmx_or (g, p); |
346 |
|
352 |
|
347 |
return (Vector4x16)__builtin_ia32_por (b, p); |
353 |
return (Vector4x16)mmx_or (b, p); |
348 |
} |
354 |
} |
349 |
|
355 |
|
350 |
static __inline__ void |
356 |
static __inline__ void |