Lines 22-28
Link Here
|
22 |
|
22 |
|
23 |
#ifdef SAVE_RCSID |
23 |
#ifdef SAVE_RCSID |
24 |
static char rcsid = |
24 |
static char rcsid = |
25 |
"@(#) $Id: SDL_blit_N.c,v 1.11 2005/04/20 05:57:39 icculus Exp $"; |
25 |
"@(#) $Id: SDL_blit_N.c,v 1.9 2004/01/04 16:49:21 slouken Exp $"; |
26 |
#endif |
26 |
#endif |
27 |
|
27 |
|
28 |
#include <stdio.h> |
28 |
#include <stdio.h> |
Lines 35-689
Link Here
|
35 |
|
35 |
|
36 |
/* Functions to blit from N-bit surfaces to other surfaces */ |
36 |
/* Functions to blit from N-bit surfaces to other surfaces */ |
37 |
|
37 |
|
38 |
#ifdef USE_ALTIVEC_BLITTERS |
38 |
#ifdef USE_ASMBLIT |
39 |
#include <assert.h> |
|
|
40 |
#ifdef MACOSX |
41 |
#include <sys/sysctl.h> |
42 |
#include <stdlib.h> |
43 |
static size_t GetL3CacheSize( void ) |
44 |
{ |
45 |
const char key[] = "hw.l3cachesize"; |
46 |
u_int64_t result = 0; |
47 |
size_t typeSize = sizeof( result ); |
48 |
|
49 |
|
50 |
int err = sysctlbyname( key, &result, &typeSize, NULL, 0 ); |
51 |
if( 0 != err ) return 0; |
52 |
|
53 |
return result; |
54 |
} |
55 |
#else |
56 |
static size_t GetL3CacheSize( void ) |
57 |
{ |
58 |
/* XXX: Just guess G4 */ |
59 |
return 2097152; |
60 |
} |
61 |
#endif /* MACOSX */ |
62 |
|
63 |
#define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F) |
64 |
#define VSWIZZLE32(a,b,c,d) (vector unsigned char) \ |
65 |
( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \ |
66 |
0x04+a, 0x04+b, 0x04+c, 0x04+d, \ |
67 |
0x08+a, 0x08+b, 0x08+c, 0x08+d, \ |
68 |
0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d ) |
69 |
|
70 |
#define MAKE8888(dstfmt, r, g, b, a) \ |
71 |
( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \ |
72 |
((g<<dstfmt->Gshift)&dstfmt->Gmask) | \ |
73 |
((b<<dstfmt->Bshift)&dstfmt->Bmask) | \ |
74 |
((a<<dstfmt->Ashift)&dstfmt->Amask) ) |
75 |
|
76 |
/* |
77 |
* Data Stream Touch...Altivec cache prefetching. |
78 |
* |
79 |
* Don't use this on a G5...however, the speed boost is very significant |
80 |
* on a G4. |
81 |
*/ |
82 |
#define DST_CHAN_SRC 1 |
83 |
#define DST_CHAN_DEST 2 |
84 |
|
85 |
/* macro to set DST control word value... */ |
86 |
#define DST_CTRL(size, count, stride) \ |
87 |
(((size) << 24) | ((count) << 16) | (stride)) |
88 |
|
89 |
#define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \ |
90 |
? vec_lvsl(0, src) \ |
91 |
: vec_add(vec_lvsl(8, src), vec_splat_u8(8))) |
92 |
|
93 |
/* Calculate the permute vector used for 32->32 swizzling */ |
94 |
static vector unsigned char calc_swizzle32(const SDL_PixelFormat *srcfmt, |
95 |
const SDL_PixelFormat *dstfmt) |
96 |
{ |
97 |
/* |
98 |
* We have to assume that the bits that aren't used by other |
99 |
* colors is alpha, and it's one complete byte, since some formats |
100 |
* leave alpha with a zero mask, but we should still swizzle the bits. |
101 |
*/ |
102 |
/* ARGB */ |
103 |
const static struct SDL_PixelFormat default_pixel_format = { |
104 |
NULL, 0, 0, |
105 |
0, 0, 0, 0, |
106 |
16, 8, 0, 24, |
107 |
0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000, |
108 |
0, 0}; |
109 |
if (!srcfmt) { |
110 |
srcfmt = &default_pixel_format; |
111 |
} |
112 |
if (!dstfmt) { |
113 |
dstfmt = &default_pixel_format; |
114 |
} |
115 |
vector unsigned char plus = (vector unsigned char)( 0x00, 0x00, 0x00, 0x00, |
116 |
0x04, 0x04, 0x04, 0x04, |
117 |
0x08, 0x08, 0x08, 0x08, |
118 |
0x0C, 0x0C, 0x0C, 0x0C ); |
119 |
vector unsigned char vswiz; |
120 |
vector unsigned int srcvec; |
121 |
#define RESHIFT(X) (3 - ((X) >> 3)) |
122 |
Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift); |
123 |
Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift); |
124 |
Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift); |
125 |
Uint32 amask; |
126 |
/* Use zero for alpha if either surface doesn't have alpha */ |
127 |
if (dstfmt->Amask) { |
128 |
amask = ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->Ashift); |
129 |
} else { |
130 |
amask = 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 0xFFFFFFFF); |
131 |
} |
132 |
#undef RESHIFT |
133 |
((unsigned int *)&srcvec)[0] = (rmask | gmask | bmask | amask); |
134 |
vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0)); |
135 |
return(vswiz); |
136 |
} |
137 |
|
138 |
static void Blit_RGB888_RGB565(SDL_BlitInfo *info); |
139 |
static void Blit_RGB888_RGB565Altivec(SDL_BlitInfo *info) { |
140 |
int height = info->d_height; |
141 |
Uint8 *src = (Uint8 *) info->s_pixels; |
142 |
int srcskip = info->s_skip; |
143 |
Uint8 *dst = (Uint8 *) info->d_pixels; |
144 |
int dstskip = info->d_skip; |
145 |
SDL_PixelFormat *srcfmt = info->src; |
146 |
vector unsigned char valpha = vec_splat_u8(0); |
147 |
vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL); |
148 |
vector unsigned char vgmerge = (vector unsigned char)( |
149 |
0x00, 0x02, 0x00, 0x06, |
150 |
0x00, 0x0a, 0x00, 0x0e, |
151 |
0x00, 0x12, 0x00, 0x16, |
152 |
0x00, 0x1a, 0x00, 0x1e); |
153 |
vector unsigned short v1 = vec_splat_u16(1); |
154 |
vector unsigned short v3 = vec_splat_u16(3); |
155 |
vector unsigned short v3f = (vector unsigned short)( |
156 |
0x003f, 0x003f, 0x003f, 0x003f, |
157 |
0x003f, 0x003f, 0x003f, 0x003f); |
158 |
vector unsigned short vfc = (vector unsigned short)( |
159 |
0x00fc, 0x00fc, 0x00fc, 0x00fc, |
160 |
0x00fc, 0x00fc, 0x00fc, 0x00fc); |
161 |
vector unsigned short vf800 = (vector unsigned short)vec_splat_u8(-7); |
162 |
vf800 = vec_sl(vf800, vec_splat_u16(8)); |
163 |
|
164 |
while (height--) { |
165 |
vector unsigned char valigner; |
166 |
vector unsigned char voverflow; |
167 |
vector unsigned char vsrc; |
168 |
|
169 |
int width = info->d_width; |
170 |
int extrawidth; |
171 |
|
172 |
/* do scalar until we can align... */ |
173 |
#define ONE_PIXEL_BLEND(condition, widthvar) \ |
174 |
while (condition) { \ |
175 |
Uint32 pixel; \ |
176 |
unsigned sR, sG, sB, sA; \ |
177 |
DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, pixel, \ |
178 |
sR, sG, sB, sA); \ |
179 |
*(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \ |
180 |
((sG << 3) & 0x000007E0) | \ |
181 |
((sB >> 3) & 0x0000001F)); \ |
182 |
dst += 2; \ |
183 |
src += 4; \ |
184 |
widthvar--; \ |
185 |
} |
186 |
|
187 |
ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width); |
188 |
|
189 |
/* After all that work, here's the vector part! */ |
190 |
extrawidth = (width % 8); /* trailing unaligned stores */ |
191 |
width -= extrawidth; |
192 |
vsrc = vec_ld(0, src); |
193 |
valigner = VEC_ALIGNER(src); |
194 |
|
195 |
while (width) { |
196 |
vector unsigned short vpixel, vrpixel, vgpixel, vbpixel; |
197 |
vector unsigned int vsrc1, vsrc2; |
198 |
vector unsigned char vdst; |
199 |
|
200 |
voverflow = vec_ld(15, src); |
201 |
vsrc = vec_perm(vsrc, voverflow, valigner); |
202 |
vsrc1 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute); |
203 |
src += 16; |
204 |
vsrc = voverflow; |
205 |
voverflow = vec_ld(15, src); |
206 |
vsrc = vec_perm(vsrc, voverflow, valigner); |
207 |
vsrc2 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute); |
208 |
/* 1555 */ |
209 |
vpixel = (vector unsigned short)vec_packpx(vsrc1, vsrc2); |
210 |
vgpixel = (vector unsigned short)vec_perm(vsrc1, vsrc2, vgmerge); |
211 |
vgpixel = vec_and(vgpixel, vfc); |
212 |
vgpixel = vec_sl(vgpixel, v3); |
213 |
vrpixel = vec_sl(vpixel, v1); |
214 |
vrpixel = vec_and(vrpixel, vf800); |
215 |
vbpixel = vec_and(vpixel, v3f); |
216 |
vdst = vec_or((vector unsigned char)vrpixel, (vector unsigned char)vgpixel); |
217 |
/* 565 */ |
218 |
vdst = vec_or(vdst, (vector unsigned char)vbpixel); |
219 |
vec_st(vdst, 0, dst); |
220 |
|
221 |
width -= 8; |
222 |
src += 16; |
223 |
dst += 16; |
224 |
vsrc = voverflow; |
225 |
} |
226 |
|
227 |
assert(width == 0); |
228 |
|
229 |
|
230 |
/* do scalar until we can align... */ |
231 |
ONE_PIXEL_BLEND((extrawidth), extrawidth); |
232 |
#undef ONE_PIXEL_BLEND |
233 |
|
234 |
src += srcskip; /* move to next row, accounting for pitch. */ |
235 |
dst += dstskip; |
236 |
} |
237 |
|
238 |
|
239 |
} |
240 |
|
241 |
static void Blit_RGB565_32Altivec(SDL_BlitInfo *info) { |
242 |
int height = info->d_height; |
243 |
Uint8 *src = (Uint8 *) info->s_pixels; |
244 |
int srcskip = info->s_skip; |
245 |
Uint8 *dst = (Uint8 *) info->d_pixels; |
246 |
int dstskip = info->d_skip; |
247 |
SDL_PixelFormat *srcfmt = info->src; |
248 |
SDL_PixelFormat *dstfmt = info->dst; |
249 |
unsigned alpha; |
250 |
vector unsigned char valpha; |
251 |
vector unsigned char vpermute; |
252 |
vector unsigned short vf800; |
253 |
vector unsigned int v8 = vec_splat_u32(8); |
254 |
vector unsigned int v16 = vec_add(v8, v8); |
255 |
vector unsigned short v2 = vec_splat_u16(2); |
256 |
vector unsigned short v3 = vec_splat_u16(3); |
257 |
/* |
258 |
0x10 - 0x1f is the alpha |
259 |
0x00 - 0x0e evens are the red |
260 |
0x01 - 0x0f odds are zero |
261 |
*/ |
262 |
vector unsigned char vredalpha1 = (vector unsigned char)( |
263 |
0x10, 0x00, 0x01, 0x01, |
264 |
0x10, 0x02, 0x01, 0x01, |
265 |
0x10, 0x04, 0x01, 0x01, |
266 |
0x10, 0x06, 0x01, 0x01 |
267 |
); |
268 |
vector unsigned char vredalpha2 = (vector unsigned char)( |
269 |
vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16)) |
270 |
); |
271 |
/* |
272 |
0x00 - 0x0f is ARxx ARxx ARxx ARxx |
273 |
0x11 - 0x0f odds are blue |
274 |
*/ |
275 |
vector unsigned char vblue1 = (vector unsigned char)( |
276 |
0x00, 0x01, 0x02, 0x11, |
277 |
0x04, 0x05, 0x06, 0x13, |
278 |
0x08, 0x09, 0x0a, 0x15, |
279 |
0x0c, 0x0d, 0x0e, 0x17 |
280 |
); |
281 |
vector unsigned char vblue2 = (vector unsigned char)( |
282 |
vec_add((vector unsigned int)vblue1, v8) |
283 |
); |
284 |
/* |
285 |
0x00 - 0x0f is ARxB ARxB ARxB ARxB |
286 |
0x10 - 0x0e evens are green |
287 |
*/ |
288 |
vector unsigned char vgreen1 = (vector unsigned char)( |
289 |
0x00, 0x01, 0x10, 0x03, |
290 |
0x04, 0x05, 0x12, 0x07, |
291 |
0x08, 0x09, 0x14, 0x0b, |
292 |
0x0c, 0x0d, 0x16, 0x0f |
293 |
); |
294 |
vector unsigned char vgreen2 = (vector unsigned char)( |
295 |
vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8)) |
296 |
); |
297 |
|
298 |
|
299 |
assert(srcfmt->BytesPerPixel == 2); |
300 |
assert(dstfmt->BytesPerPixel == 4); |
301 |
|
302 |
vf800 = (vector unsigned short)vec_splat_u8(-7); |
303 |
vf800 = vec_sl(vf800, vec_splat_u16(8)); |
304 |
|
305 |
if (dstfmt->Amask && srcfmt->alpha) { |
306 |
((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha; |
307 |
valpha = vec_splat(valpha, 0); |
308 |
} else { |
309 |
alpha = 0; |
310 |
valpha = vec_splat_u8(0); |
311 |
} |
312 |
|
313 |
vpermute = calc_swizzle32(NULL, dstfmt); |
314 |
while (height--) { |
315 |
vector unsigned char valigner; |
316 |
vector unsigned char voverflow; |
317 |
vector unsigned char vsrc; |
318 |
|
319 |
int width = info->d_width; |
320 |
int extrawidth; |
321 |
|
322 |
/* do scalar until we can align... */ |
323 |
#define ONE_PIXEL_BLEND(condition, widthvar) \ |
324 |
while (condition) { \ |
325 |
unsigned sR, sG, sB; \ |
326 |
unsigned short pixel = *((unsigned short *)src); \ |
327 |
sR = (pixel >> 8) & 0xf8; \ |
328 |
sG = (pixel >> 3) & 0xfc; \ |
329 |
sB = (pixel << 3) & 0xf8; \ |
330 |
ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \ |
331 |
src += 2; \ |
332 |
dst += 4; \ |
333 |
widthvar--; \ |
334 |
} |
335 |
ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width); |
336 |
|
337 |
/* After all that work, here's the vector part! */ |
338 |
extrawidth = (width % 8); /* trailing unaligned stores */ |
339 |
width -= extrawidth; |
340 |
vsrc = vec_ld(0, src); |
341 |
valigner = VEC_ALIGNER(src); |
342 |
|
343 |
while (width) { |
344 |
vector unsigned short vR, vG, vB; |
345 |
vector unsigned char vdst1, vdst2; |
346 |
|
347 |
voverflow = vec_ld(15, src); |
348 |
vsrc = vec_perm(vsrc, voverflow, valigner); |
349 |
|
350 |
vR = vec_and((vector unsigned short)vsrc, vf800); |
351 |
vB = vec_sl((vector unsigned short)vsrc, v3); |
352 |
vG = vec_sl(vB, v2); |
353 |
|
354 |
vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1); |
355 |
vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1); |
356 |
vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1); |
357 |
vdst1 = vec_perm(vdst1, valpha, vpermute); |
358 |
vec_st(vdst1, 0, dst); |
359 |
|
360 |
vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2); |
361 |
vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2); |
362 |
vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2); |
363 |
vdst2 = vec_perm(vdst2, valpha, vpermute); |
364 |
vec_st(vdst2, 16, dst); |
365 |
|
366 |
width -= 8; |
367 |
dst += 32; |
368 |
src += 16; |
369 |
vsrc = voverflow; |
370 |
} |
371 |
|
372 |
assert(width == 0); |
373 |
|
374 |
|
375 |
/* do scalar until we can align... */ |
376 |
ONE_PIXEL_BLEND((extrawidth), extrawidth); |
377 |
#undef ONE_PIXEL_BLEND |
378 |
|
379 |
src += srcskip; /* move to next row, accounting for pitch. */ |
380 |
dst += dstskip; |
381 |
} |
382 |
|
383 |
} |
384 |
|
385 |
static void BlitNtoNKey(SDL_BlitInfo *info); |
386 |
static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info); |
387 |
static void Blit32to32KeyAltivec(SDL_BlitInfo *info) |
388 |
{ |
389 |
int height = info->d_height; |
390 |
Uint32 *srcp = (Uint32 *) info->s_pixels; |
391 |
int srcskip = info->s_skip; |
392 |
Uint32 *dstp = (Uint32 *) info->d_pixels; |
393 |
int dstskip = info->d_skip; |
394 |
SDL_PixelFormat *srcfmt = info->src; |
395 |
int srcbpp = srcfmt->BytesPerPixel; |
396 |
SDL_PixelFormat *dstfmt = info->dst; |
397 |
int dstbpp = dstfmt->BytesPerPixel; |
398 |
int copy_alpha = (srcfmt->Amask && dstfmt->Amask); |
399 |
unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0; |
400 |
Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask; |
401 |
Uint32 ckey = info->src->colorkey; |
402 |
vector unsigned int valpha; |
403 |
vector unsigned char vpermute; |
404 |
vector unsigned char vzero; |
405 |
vector unsigned int vckey; |
406 |
vector unsigned int vrgbmask; |
407 |
vpermute = calc_swizzle32(srcfmt, dstfmt); |
408 |
if (info->d_width < 16) { |
409 |
if(copy_alpha) { |
410 |
return BlitNtoNKeyCopyAlpha(info); |
411 |
} else { |
412 |
return BlitNtoNKey(info); |
413 |
} |
414 |
} |
415 |
vzero = vec_splat_u8(0); |
416 |
if (alpha) { |
417 |
((unsigned char *)&valpha)[0] = (unsigned char)alpha; |
418 |
valpha = (vector unsigned int)vec_splat((vector unsigned char)valpha, 0); |
419 |
} else { |
420 |
valpha = (vector unsigned int)vzero; |
421 |
} |
422 |
ckey &= rgbmask; |
423 |
((unsigned int *)&vckey)[0] = ckey; |
424 |
vckey = vec_splat(vckey, 0); |
425 |
((unsigned int *)&vrgbmask)[0] = rgbmask; |
426 |
vrgbmask = vec_splat(vrgbmask, 0); |
427 |
|
428 |
while (height--) { |
429 |
#define ONE_PIXEL_BLEND(condition, widthvar) \ |
430 |
if (copy_alpha) { \ |
431 |
while (condition) { \ |
432 |
Uint32 pixel; \ |
433 |
unsigned sR, sG, sB, sA; \ |
434 |
DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, pixel, \ |
435 |
sR, sG, sB, sA); \ |
436 |
if ( (pixel & rgbmask) != ckey ) { \ |
437 |
ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \ |
438 |
sR, sG, sB, sA); \ |
439 |
} \ |
440 |
((Uint8 *)dstp) += dstbpp; \ |
441 |
((Uint8 *)srcp) += srcbpp; \ |
442 |
widthvar--; \ |
443 |
} \ |
444 |
} else { \ |
445 |
while (condition) { \ |
446 |
Uint32 pixel; \ |
447 |
unsigned sR, sG, sB; \ |
448 |
RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, pixel); \ |
449 |
if ( pixel != ckey ) { \ |
450 |
RGB_FROM_PIXEL(pixel, srcfmt, sR, sG, sB); \ |
451 |
ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \ |
452 |
sR, sG, sB, alpha); \ |
453 |
} \ |
454 |
((Uint8 *)dstp) += dstbpp; \ |
455 |
((Uint8 *)srcp) += srcbpp; \ |
456 |
widthvar--; \ |
457 |
} \ |
458 |
} |
459 |
int width = info->d_width; |
460 |
ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); |
461 |
assert(width > 0); |
462 |
if (width > 0) { |
463 |
int extrawidth = (width % 4); |
464 |
vector unsigned char valigner = VEC_ALIGNER(srcp); |
465 |
vector unsigned int vs = vec_ld(0, srcp); |
466 |
width -= extrawidth; |
467 |
assert(width >= 4); |
468 |
while (width) { |
469 |
vector unsigned char vsel; |
470 |
vector unsigned int vd; |
471 |
vector unsigned int voverflow = vec_ld(15, srcp); |
472 |
/* load the source vec */ |
473 |
vs = vec_perm(vs, voverflow, valigner); |
474 |
/* vsel is set for items that match the key */ |
475 |
vsel = (vector unsigned char)vec_and(vs, vrgbmask); |
476 |
vsel = (vector unsigned char)vec_cmpeq(vs, vckey); |
477 |
/* permute the src vec to the dest format */ |
478 |
vs = vec_perm(vs, valpha, vpermute); |
479 |
/* load the destination vec */ |
480 |
vd = vec_ld(0, dstp); |
481 |
/* select the source and dest into vs */ |
482 |
vd = (vector unsigned int)vec_sel((vector unsigned char)vs, (vector unsigned char)vd, vsel); |
483 |
|
484 |
vec_st(vd, 0, dstp); |
485 |
srcp += 4; |
486 |
width -= 4; |
487 |
dstp += 4; |
488 |
vs = voverflow; |
489 |
} |
490 |
ONE_PIXEL_BLEND((extrawidth), extrawidth); |
491 |
#undef ONE_PIXEL_BLEND |
492 |
srcp += srcskip >> 2; |
493 |
dstp += dstskip >> 2; |
494 |
} |
495 |
} |
496 |
} |
497 |
|
498 |
/* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */ |
499 |
/* Use this on a G5 */ |
500 |
static void ConvertAltivec32to32_noprefetch(SDL_BlitInfo *info) |
501 |
{ |
502 |
int height = info->d_height; |
503 |
Uint32 *src = (Uint32 *) info->s_pixels; |
504 |
int srcskip = info->s_skip; |
505 |
Uint32 *dst = (Uint32 *) info->d_pixels; |
506 |
int dstskip = info->d_skip; |
507 |
SDL_PixelFormat *srcfmt = info->src; |
508 |
int srcbpp = srcfmt->BytesPerPixel; |
509 |
SDL_PixelFormat *dstfmt = info->dst; |
510 |
int dstbpp = dstfmt->BytesPerPixel; |
511 |
vector unsigned int vzero = vec_splat_u32(0); |
512 |
vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt); |
513 |
if (dstfmt->Amask && !srcfmt->Amask) { |
514 |
if (srcfmt->alpha) { |
515 |
vector unsigned char valpha; |
516 |
((unsigned char *)&valpha)[0] = srcfmt->alpha; |
517 |
vzero = (vector unsigned int)vec_splat(valpha, 0); |
518 |
} |
519 |
} |
520 |
|
521 |
assert(srcbpp == 4); |
522 |
assert(dstbpp == 4); |
523 |
|
524 |
while (height--) { |
525 |
vector unsigned char valigner; |
526 |
vector unsigned int vbits; |
527 |
vector unsigned int voverflow; |
528 |
Uint32 bits; |
529 |
Uint8 r, g, b, a; |
530 |
|
531 |
int width = info->d_width; |
532 |
int extrawidth; |
533 |
|
534 |
/* do scalar until we can align... */ |
535 |
while ((UNALIGNED_PTR(dst)) && (width)) { |
536 |
bits = *(src++); |
537 |
RGBA_FROM_8888(bits, srcfmt, r, g, b, a); |
538 |
*(dst++) = MAKE8888(dstfmt, r, g, b, a); |
539 |
width--; |
540 |
} |
541 |
|
542 |
/* After all that work, here's the vector part! */ |
543 |
extrawidth = (width % 4); |
544 |
width -= extrawidth; |
545 |
valigner = VEC_ALIGNER(src); |
546 |
vbits = vec_ld(0, src); |
547 |
|
548 |
while (width) { |
549 |
voverflow = vec_ld(15, src); |
550 |
src += 4; |
551 |
width -= 4; |
552 |
vbits = vec_perm(vbits, voverflow, valigner); /* src is ready. */ |
553 |
vbits = vec_perm(vbits, vzero, vpermute); /* swizzle it. */ |
554 |
vec_st(vbits, 0, dst); /* store it back out. */ |
555 |
dst += 4; |
556 |
vbits = voverflow; |
557 |
} |
558 |
|
559 |
assert(width == 0); |
560 |
|
561 |
/* cover pixels at the end of the row that didn't fit in 16 bytes. */ |
562 |
while (extrawidth) { |
563 |
bits = *(src++); /* max 7 pixels, don't bother with prefetch. */ |
564 |
RGBA_FROM_8888(bits, srcfmt, r, g, b, a); |
565 |
*(dst++) = MAKE8888(dstfmt, r, g, b, a); |
566 |
extrawidth--; |
567 |
} |
568 |
|
569 |
src += srcskip >> 2; /* move to next row, accounting for pitch. */ |
570 |
dst += dstskip >> 2; |
571 |
} |
572 |
|
573 |
} |
574 |
|
575 |
/* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */ |
576 |
/* Use this on a G4 */ |
577 |
static void ConvertAltivec32to32_prefetch(SDL_BlitInfo *info) |
578 |
{ |
579 |
const int scalar_dst_lead = sizeof (Uint32) * 4; |
580 |
const int vector_dst_lead = sizeof (Uint32) * 16; |
581 |
|
39 |
|
582 |
int height = info->d_height; |
40 |
/* Heheheh, we coerce Hermes into using SDL blit information */ |
583 |
Uint32 *src = (Uint32 *) info->s_pixels; |
41 |
#define X86_ASSEMBLER |
584 |
int srcskip = info->s_skip; |
42 |
#define HermesConverterInterface SDL_BlitInfo |
585 |
Uint32 *dst = (Uint32 *) info->d_pixels; |
43 |
#define HermesClearInterface void |
586 |
int dstskip = info->d_skip; |
44 |
#define STACKCALL |
587 |
SDL_PixelFormat *srcfmt = info->src; |
|
|
588 |
int srcbpp = srcfmt->BytesPerPixel; |
589 |
SDL_PixelFormat *dstfmt = info->dst; |
590 |
int dstbpp = dstfmt->BytesPerPixel; |
591 |
vector unsigned int vzero = vec_splat_u32(0); |
592 |
vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt); |
593 |
if (dstfmt->Amask && !srcfmt->Amask) { |
594 |
if (srcfmt->alpha) { |
595 |
vector unsigned char valpha; |
596 |
((unsigned char *)&valpha)[0] = srcfmt->alpha; |
597 |
vzero = (vector unsigned int)vec_splat(valpha, 0); |
598 |
} |
599 |
} |
600 |
|
601 |
assert(srcbpp == 4); |
602 |
assert(dstbpp == 4); |
603 |
|
604 |
while (height--) { |
605 |
vector unsigned char valigner; |
606 |
vector unsigned int vbits; |
607 |
vector unsigned int voverflow; |
608 |
Uint32 bits; |
609 |
Uint8 r, g, b, a; |
610 |
|
611 |
int width = info->d_width; |
612 |
int extrawidth; |
613 |
|
614 |
/* do scalar until we can align... */ |
615 |
while ((UNALIGNED_PTR(dst)) && (width)) { |
616 |
vec_dstt(src+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC); |
617 |
vec_dstst(dst+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST); |
618 |
bits = *(src++); |
619 |
RGBA_FROM_8888(bits, srcfmt, r, g, b, a); |
620 |
*(dst++) = MAKE8888(dstfmt, r, g, b, a); |
621 |
width--; |
622 |
} |
623 |
|
624 |
/* After all that work, here's the vector part! */ |
625 |
extrawidth = (width % 4); |
626 |
width -= extrawidth; |
627 |
valigner = VEC_ALIGNER(src); |
628 |
vbits = vec_ld(0, src); |
629 |
|
630 |
while (width) { |
631 |
vec_dstt(src+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC); |
632 |
vec_dstst(dst+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST); |
633 |
voverflow = vec_ld(15, src); |
634 |
src += 4; |
635 |
width -= 4; |
636 |
vbits = vec_perm(vbits, voverflow, valigner); /* src is ready. */ |
637 |
vbits = vec_perm(vbits, vzero, vpermute); /* swizzle it. */ |
638 |
vec_st(vbits, 0, dst); /* store it back out. */ |
639 |
dst += 4; |
640 |
vbits = voverflow; |
641 |
} |
642 |
|
643 |
assert(width == 0); |
644 |
|
645 |
/* cover pixels at the end of the row that didn't fit in 16 bytes. */ |
646 |
while (extrawidth) { |
647 |
bits = *(src++); /* max 7 pixels, don't bother with prefetch. */ |
648 |
RGBA_FROM_8888(bits, srcfmt, r, g, b, a); |
649 |
*(dst++) = MAKE8888(dstfmt, r, g, b, a); |
650 |
extrawidth--; |
651 |
} |
652 |
|
653 |
src += srcskip >> 2; /* move to next row, accounting for pitch. */ |
654 |
dst += dstskip >> 2; |
655 |
} |
656 |
|
45 |
|
657 |
vec_dss(DST_CHAN_SRC); |
46 |
#include "HeadMMX.h" |
658 |
vec_dss(DST_CHAN_DEST); |
47 |
#include "HeadX86.h" |
659 |
} |
|
|
660 |
|
48 |
|
661 |
static Uint32 GetBlitFeatures( void ) |
|
|
662 |
{ |
663 |
static Uint32 features = 0xffffffff; |
664 |
if (features == 0xffffffff) { |
665 |
/* Provide an override for testing .. */ |
666 |
char *override = getenv("SDL_ALTIVEC_BLIT_FEATURES"); |
667 |
if (override) { |
668 |
features = 0; |
669 |
sscanf(override, "%u", &features); |
670 |
} else { |
671 |
features = ( 0 |
672 |
/* Feature 1 is has-MMX */ |
673 |
| ((SDL_HasMMX()) ? 1 : 0) |
674 |
/* Feature 2 is has-AltiVec */ |
675 |
| ((SDL_HasAltiVec()) ? 2 : 0) |
676 |
/* Feature 4 is dont-use-prefetch */ |
677 |
| ((GetL3CacheSize() == 0) ? 4 : 0) |
678 |
); |
679 |
} |
680 |
} |
681 |
return features; |
682 |
} |
683 |
#else |
49 |
#else |
684 |
/* Feature 1 is has-MMX */ |
|
|
685 |
#define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0)) |
686 |
#endif |
687 |
|
50 |
|
688 |
/* This is now endian dependent */ |
51 |
/* This is now endian dependent */ |
689 |
#if ( SDL_BYTEORDER == SDL_LIL_ENDIAN ) |
52 |
#if ( SDL_BYTEORDER == SDL_LIL_ENDIAN ) |
Lines 694-712
Link Here
|
694 |
#define LO 1 |
57 |
#define LO 1 |
695 |
#endif |
58 |
#endif |
696 |
|
59 |
|
697 |
#ifdef USE_ASMBLIT |
|
|
698 |
|
699 |
/* Heheheh, we coerce Hermes into using SDL blit information */ |
700 |
#define X86_ASSEMBLER |
701 |
#define HermesConverterInterface SDL_BlitInfo |
702 |
#define HermesClearInterface void |
703 |
#define STACKCALL |
704 |
|
705 |
#include "HeadMMX.h" |
706 |
#include "HeadX86.h" |
707 |
|
708 |
#else |
709 |
|
710 |
/* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */ |
60 |
/* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */ |
711 |
#define RGB888_RGB332(dst, src) { \ |
61 |
#define RGB888_RGB332(dst, src) { \ |
712 |
dst = (((src)&0x00E00000)>>16)| \ |
62 |
dst = (((src)&0x00E00000)>>16)| \ |
Lines 1056-1062
Link Here
|
1056 |
|
406 |
|
1057 |
|
407 |
|
1058 |
/* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */ |
408 |
/* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */ |
1059 |
#define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1]) |
409 |
#if ( SDL_BYTEORDER == SDL_LIL_ENDIAN ) |
|
|
410 |
#define RGB565_32(dst, src, map) (map[src[0]*2] + map[src[1]*2+1]) |
411 |
#else /* ( SDL_BYTEORDER == SDL_BIG_ENDIAN ) */ |
412 |
#define RGB565_32(dst, src, map) (map[src[1]*2] + map[src[0]*2+1]) |
413 |
#endif |
1060 |
static void Blit_RGB565_32(SDL_BlitInfo *info, const Uint32 *map) |
414 |
static void Blit_RGB565_32(SDL_BlitInfo *info, const Uint32 *map) |
1061 |
{ |
415 |
{ |
1062 |
#ifndef USE_DUFFS_LOOP |
416 |
#ifndef USE_DUFFS_LOOP |
Lines 2068-2077
Link Here
|
2068 |
Uint32 srcR, srcG, srcB; |
1422 |
Uint32 srcR, srcG, srcB; |
2069 |
int dstbpp; |
1423 |
int dstbpp; |
2070 |
Uint32 dstR, dstG, dstB; |
1424 |
Uint32 dstR, dstG, dstB; |
2071 |
Uint32 blit_features; |
1425 |
SDL_bool cpu_mmx; |
2072 |
void *aux_data; |
1426 |
void *aux_data; |
2073 |
SDL_loblit blitfunc; |
1427 |
SDL_loblit blitfunc; |
2074 |
enum { NO_ALPHA=1, SET_ALPHA=2, COPY_ALPHA=4 } alpha; |
1428 |
enum { NO_ALPHA, SET_ALPHA, COPY_ALPHA } alpha; |
2075 |
}; |
1429 |
}; |
2076 |
static const struct blit_table normal_blit_1[] = { |
1430 |
static const struct blit_table normal_blit_1[] = { |
2077 |
/* Default for 8-bit RGB source, an invalid combination */ |
1431 |
/* Default for 8-bit RGB source, an invalid combination */ |
Lines 2086-2096
Link Here
|
2086 |
{ 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000003E0,0x00007C00, |
1440 |
{ 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000003E0,0x00007C00, |
2087 |
0, ConvertX86p16_16BGR555, ConvertX86, NO_ALPHA }, |
1441 |
0, ConvertX86p16_16BGR555, ConvertX86, NO_ALPHA }, |
2088 |
#endif |
1442 |
#endif |
2089 |
#ifdef USE_ALTIVEC_BLITTERS |
|
|
2090 |
/* has-altivec */ |
2091 |
{ 0x0000F800,0x000007E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000, |
2092 |
2, NULL, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA }, |
2093 |
#endif |
2094 |
{ 0x0000F800,0x000007E0,0x0000001F, 4, 0x00FF0000,0x0000FF00,0x000000FF, |
1443 |
{ 0x0000F800,0x000007E0,0x0000001F, 4, 0x00FF0000,0x0000FF00,0x000000FF, |
2095 |
0, NULL, Blit_RGB565_ARGB8888, SET_ALPHA }, |
1444 |
0, NULL, Blit_RGB565_ARGB8888, SET_ALPHA }, |
2096 |
{ 0x0000F800,0x000007E0,0x0000001F, 4, 0x000000FF,0x0000FF00,0x00FF0000, |
1445 |
{ 0x0000F800,0x000007E0,0x0000001F, 4, 0x000000FF,0x0000FF00,0x00FF0000, |
Lines 2136-2152
Link Here
|
2136 |
{ 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x0000FF00,0x00FF0000,0xFF000000, |
1485 |
{ 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x0000FF00,0x00FF0000,0xFF000000, |
2137 |
0, ConvertX86p32_32BGRA888, ConvertX86, NO_ALPHA }, |
1486 |
0, ConvertX86p32_32BGRA888, ConvertX86, NO_ALPHA }, |
2138 |
#else |
1487 |
#else |
2139 |
#ifdef USE_ALTIVEC_BLITTERS |
|
|
2140 |
/* has-altivec | dont-use-prefetch */ |
2141 |
{ 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000, |
2142 |
6, NULL, ConvertAltivec32to32_noprefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA }, |
2143 |
/* has-altivec */ |
2144 |
{ 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000, |
2145 |
2, NULL, ConvertAltivec32to32_prefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA }, |
2146 |
/* has-altivec */ |
2147 |
{ 0x00000000,0x00000000,0x00000000, 2, 0x0000F800,0x000007E0,0x0000001F, |
2148 |
2, NULL, Blit_RGB888_RGB565Altivec, NO_ALPHA }, |
2149 |
#endif |
2150 |
{ 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F, |
1488 |
{ 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F, |
2151 |
0, NULL, Blit_RGB888_RGB565, NO_ALPHA }, |
1489 |
0, NULL, Blit_RGB888_RGB565, NO_ALPHA }, |
2152 |
{ 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F, |
1490 |
{ 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F, |
Lines 2159-2167
Link Here
|
2159 |
normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4 |
1497 |
normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4 |
2160 |
}; |
1498 |
}; |
2161 |
|
1499 |
|
2162 |
/* Mask matches table, or table entry is zero */ |
|
|
2163 |
#define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000)) |
2164 |
|
2165 |
SDL_loblit SDL_CalculateBlitN(SDL_Surface *surface, int blit_index) |
1500 |
SDL_loblit SDL_CalculateBlitN(SDL_Surface *surface, int blit_index) |
2166 |
{ |
1501 |
{ |
2167 |
struct private_swaccel *sdata; |
1502 |
struct private_swaccel *sdata; |
Lines 2197-2208
Link Here
|
2197 |
else if(dstfmt->BytesPerPixel == 1) |
1532 |
else if(dstfmt->BytesPerPixel == 1) |
2198 |
return BlitNto1Key; |
1533 |
return BlitNto1Key; |
2199 |
else { |
1534 |
else { |
2200 |
#ifdef USE_ALTIVEC_BLITTERS |
|
|
2201 |
if((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4) && SDL_HasAltiVec()) { |
2202 |
return Blit32to32KeyAltivec; |
2203 |
} else |
2204 |
#endif |
2205 |
|
2206 |
if(srcfmt->Amask && dstfmt->Amask) |
1535 |
if(srcfmt->Amask && dstfmt->Amask) |
2207 |
return BlitNtoNKeyCopyAlpha; |
1536 |
return BlitNtoNKeyCopyAlpha; |
2208 |
else |
1537 |
else |
Lines 2232-2251
Link Here
|
2232 |
} |
1561 |
} |
2233 |
} else { |
1562 |
} else { |
2234 |
/* Now the meat, choose the blitter we want */ |
1563 |
/* Now the meat, choose the blitter we want */ |
2235 |
int a_need = 0; |
1564 |
int a_need = 0; |
2236 |
if(dstfmt->Amask) |
1565 |
if(dstfmt->Amask) |
2237 |
a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA; |
1566 |
a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA; |
2238 |
table = normal_blit[srcfmt->BytesPerPixel-1]; |
1567 |
table = normal_blit[srcfmt->BytesPerPixel-1]; |
2239 |
for ( which=0; table[which].dstbpp; ++which ) { |
1568 |
for ( which=0; table[which].srcR; ++which ) { |
2240 |
if ( MASKOK(srcfmt->Rmask, table[which].srcR) && |
1569 |
if ( srcfmt->Rmask == table[which].srcR && |
2241 |
MASKOK(srcfmt->Gmask, table[which].srcG) && |
1570 |
srcfmt->Gmask == table[which].srcG && |
2242 |
MASKOK(srcfmt->Bmask, table[which].srcB) && |
1571 |
srcfmt->Bmask == table[which].srcB && |
2243 |
MASKOK(dstfmt->Rmask, table[which].dstR) && |
1572 |
dstfmt->BytesPerPixel == table[which].dstbpp && |
2244 |
MASKOK(dstfmt->Gmask, table[which].dstG) && |
1573 |
dstfmt->Rmask == table[which].dstR && |
2245 |
MASKOK(dstfmt->Bmask, table[which].dstB) && |
1574 |
dstfmt->Gmask == table[which].dstG && |
2246 |
dstfmt->BytesPerPixel == table[which].dstbpp && |
1575 |
dstfmt->Bmask == table[which].dstB && |
2247 |
(a_need & table[which].alpha) == a_need && |
1576 |
(a_need & table[which].alpha) == a_need && |
2248 |
((table[which].blit_features & GetBlitFeatures()) == table[which].blit_features) ) |
1577 |
(table[which].cpu_mmx == SDL_HasMMX())) |
2249 |
break; |
1578 |
break; |
2250 |
} |
1579 |
} |
2251 |
sdata->aux_data = table[which].aux_data; |
1580 |
sdata->aux_data = table[which].aux_data; |