Gentoo Websites Logo
Go to: Gentoo Home Documentation Forums Lists Bugs Planet Store Wiki Get Gentoo!
View | Details | Raw Unified | Return to bug 104533 | Differences between
and this patch

Collapse All | Expand All

(-)SDL-1.2.9/src/video/SDL_blit_N.c (-698 / +27 lines)
Lines 22-28 Link Here
22
22
23
#ifdef SAVE_RCSID
23
#ifdef SAVE_RCSID
24
static char rcsid =
24
static char rcsid =
25
 "@(#) $Id: SDL_blit_N.c,v 1.11 2005/04/20 05:57:39 icculus Exp $";
25
 "@(#) $Id: SDL_blit_N.c,v 1.9 2004/01/04 16:49:21 slouken Exp $";
26
#endif
26
#endif
27
27
28
#include <stdio.h>
28
#include <stdio.h>
Lines 35-689 Link Here
35
35
36
/* Functions to blit from N-bit surfaces to other surfaces */
36
/* Functions to blit from N-bit surfaces to other surfaces */
37
37
38
#ifdef USE_ALTIVEC_BLITTERS
38
#ifdef USE_ASMBLIT
39
#include <assert.h>
40
#ifdef MACOSX
41
#include <sys/sysctl.h>
42
#include <stdlib.h>
43
static size_t GetL3CacheSize( void )
44
{
45
    const char key[] = "hw.l3cachesize";
46
    u_int64_t result = 0;
47
    size_t typeSize = sizeof( result );
48
49
50
    int err = sysctlbyname( key, &result, &typeSize, NULL, 0 );
51
    if( 0 != err ) return 0;
52
53
    return result;
54
}
55
#else
56
static size_t GetL3CacheSize( void )
57
{
58
    /* XXX: Just guess G4 */
59
    return 2097152;
60
}
61
#endif /* MACOSX */
62
63
#define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
64
#define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
65
                               ( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
66
                                 0x04+a, 0x04+b, 0x04+c, 0x04+d, \
67
                                 0x08+a, 0x08+b, 0x08+c, 0x08+d, \
68
                                 0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
69
70
#define MAKE8888(dstfmt, r, g, b, a)  \
71
    ( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
72
      ((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
73
      ((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
74
      ((a<<dstfmt->Ashift)&dstfmt->Amask) )
75
76
/*
77
 * Data Stream Touch...Altivec cache prefetching.
78
 *
79
 *  Don't use this on a G5...however, the speed boost is very significant
80
 *   on a G4.
81
 */
82
#define DST_CHAN_SRC 1
83
#define DST_CHAN_DEST 2
84
85
/* macro to set DST control word value... */
86
#define DST_CTRL(size, count, stride) \
87
    (((size) << 24) | ((count) << 16) | (stride))
88
89
#define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
90
    ? vec_lvsl(0, src) \
91
    : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
92
93
/* Calculate the permute vector used for 32->32 swizzling */
94
static vector unsigned char calc_swizzle32(const SDL_PixelFormat *srcfmt,
95
                                  const SDL_PixelFormat *dstfmt)
96
{
97
    /*
98
    * We have to assume that the bits that aren't used by other
99
     *  colors is alpha, and it's one complete byte, since some formats
100
     *  leave alpha with a zero mask, but we should still swizzle the bits.
101
     */
102
    /* ARGB */
103
    const static struct SDL_PixelFormat default_pixel_format = {
104
        NULL, 0, 0,
105
        0, 0, 0, 0,
106
        16, 8, 0, 24,
107
        0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000,
108
        0, 0};
109
    if (!srcfmt) {
110
        srcfmt = &default_pixel_format;
111
    }
112
    if (!dstfmt) {
113
        dstfmt = &default_pixel_format;
114
    }
115
    vector unsigned char plus = (vector unsigned char)( 0x00, 0x00, 0x00, 0x00,
116
                                      0x04, 0x04, 0x04, 0x04,
117
                                      0x08, 0x08, 0x08, 0x08,
118
                                      0x0C, 0x0C, 0x0C, 0x0C );
119
    vector unsigned char vswiz;
120
    vector unsigned int srcvec;
121
#define RESHIFT(X) (3 - ((X) >> 3))
122
    Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
123
    Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
124
    Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
125
    Uint32 amask;
126
    /* Use zero for alpha if either surface doesn't have alpha */
127
    if (dstfmt->Amask) {
128
        amask = ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->Ashift);
129
    } else {    
130
        amask = 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 0xFFFFFFFF);
131
    }           
132
#undef RESHIFT  
133
    ((unsigned int *)&srcvec)[0] = (rmask | gmask | bmask | amask);
134
    vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0));
135
    return(vswiz);
136
}
137
138
static void Blit_RGB888_RGB565(SDL_BlitInfo *info);
139
static void Blit_RGB888_RGB565Altivec(SDL_BlitInfo *info) {
140
    int height = info->d_height;
141
    Uint8 *src = (Uint8 *) info->s_pixels;
142
    int srcskip = info->s_skip;
143
    Uint8 *dst = (Uint8 *) info->d_pixels;
144
    int dstskip = info->d_skip;
145
    SDL_PixelFormat *srcfmt = info->src;
146
    vector unsigned char valpha = vec_splat_u8(0);
147
    vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
148
    vector unsigned char vgmerge = (vector unsigned char)(
149
        0x00, 0x02, 0x00, 0x06,
150
        0x00, 0x0a, 0x00, 0x0e,
151
        0x00, 0x12, 0x00, 0x16,
152
        0x00, 0x1a, 0x00, 0x1e);
153
    vector unsigned short v1 = vec_splat_u16(1);
154
    vector unsigned short v3 = vec_splat_u16(3);
155
    vector unsigned short v3f = (vector unsigned short)(
156
        0x003f, 0x003f, 0x003f, 0x003f,
157
        0x003f, 0x003f, 0x003f, 0x003f);
158
    vector unsigned short vfc = (vector unsigned short)(
159
        0x00fc, 0x00fc, 0x00fc, 0x00fc,
160
        0x00fc, 0x00fc, 0x00fc, 0x00fc);
161
    vector unsigned short vf800 = (vector unsigned short)vec_splat_u8(-7);
162
    vf800 = vec_sl(vf800, vec_splat_u16(8));
163
164
    while (height--) {
165
        vector unsigned char valigner;
166
        vector unsigned char voverflow;
167
        vector unsigned char vsrc;
168
169
        int width = info->d_width;
170
        int extrawidth;
171
172
        /* do scalar until we can align... */
173
#define ONE_PIXEL_BLEND(condition, widthvar) \
174
        while (condition) { \
175
            Uint32 pixel; \
176
            unsigned sR, sG, sB, sA; \
177
            DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, pixel, \
178
                          sR, sG, sB, sA); \
179
            *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
180
                                ((sG << 3) & 0x000007E0) | \
181
                                ((sB >> 3) & 0x0000001F)); \
182
            dst += 2; \
183
            src += 4; \
184
            widthvar--; \
185
        }
186
187
        ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
188
189
        /* After all that work, here's the vector part! */
190
        extrawidth = (width % 8);  /* trailing unaligned stores */
191
        width -= extrawidth;
192
        vsrc = vec_ld(0, src);
193
        valigner = VEC_ALIGNER(src);
194
195
        while (width) {
196
            vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
197
            vector unsigned int vsrc1, vsrc2;
198
            vector unsigned char vdst;
199
200
            voverflow = vec_ld(15, src);
201
            vsrc = vec_perm(vsrc, voverflow, valigner);
202
            vsrc1 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
203
            src += 16;
204
            vsrc = voverflow;
205
            voverflow = vec_ld(15, src);
206
            vsrc = vec_perm(vsrc, voverflow, valigner);
207
            vsrc2 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
208
            /* 1555 */
209
            vpixel = (vector unsigned short)vec_packpx(vsrc1, vsrc2);
210
            vgpixel = (vector unsigned short)vec_perm(vsrc1, vsrc2, vgmerge);
211
            vgpixel = vec_and(vgpixel, vfc);
212
            vgpixel = vec_sl(vgpixel, v3);
213
            vrpixel = vec_sl(vpixel, v1);
214
            vrpixel = vec_and(vrpixel, vf800);
215
            vbpixel = vec_and(vpixel, v3f);
216
            vdst = vec_or((vector unsigned char)vrpixel, (vector unsigned char)vgpixel);
217
            /* 565 */
218
            vdst = vec_or(vdst, (vector unsigned char)vbpixel);
219
            vec_st(vdst, 0, dst);
220
221
            width -= 8;
222
            src += 16;
223
            dst += 16;
224
            vsrc = voverflow;
225
        }
226
227
        assert(width == 0);
228
229
230
        /* do scalar until we can align... */
231
        ONE_PIXEL_BLEND((extrawidth), extrawidth);
232
#undef ONE_PIXEL_BLEND
233
234
        src += srcskip;  /* move to next row, accounting for pitch. */
235
        dst += dstskip;
236
    }
237
238
239
}
240
241
static void Blit_RGB565_32Altivec(SDL_BlitInfo *info) {
242
    int height = info->d_height;
243
    Uint8 *src = (Uint8 *) info->s_pixels;
244
    int srcskip = info->s_skip;
245
    Uint8 *dst = (Uint8 *) info->d_pixels;
246
    int dstskip = info->d_skip;
247
    SDL_PixelFormat *srcfmt = info->src;
248
    SDL_PixelFormat *dstfmt = info->dst;
249
    unsigned alpha;
250
    vector unsigned char valpha;
251
    vector unsigned char vpermute;
252
    vector unsigned short vf800;
253
    vector unsigned int v8 = vec_splat_u32(8);
254
    vector unsigned int v16 = vec_add(v8, v8);
255
    vector unsigned short v2 = vec_splat_u16(2);
256
    vector unsigned short v3 = vec_splat_u16(3);
257
    /* 
258
        0x10 - 0x1f is the alpha
259
        0x00 - 0x0e evens are the red
260
        0x01 - 0x0f odds are zero
261
    */
262
    vector unsigned char vredalpha1 = (vector unsigned char)(
263
        0x10, 0x00, 0x01, 0x01,
264
        0x10, 0x02, 0x01, 0x01,
265
        0x10, 0x04, 0x01, 0x01,
266
        0x10, 0x06, 0x01, 0x01
267
    );
268
    vector unsigned char vredalpha2 = (vector unsigned char)(
269
        vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
270
    );
271
    /*
272
        0x00 - 0x0f is ARxx ARxx ARxx ARxx
273
        0x11 - 0x0f odds are blue
274
    */
275
    vector unsigned char vblue1 = (vector unsigned char)(
276
        0x00, 0x01, 0x02, 0x11,
277
        0x04, 0x05, 0x06, 0x13,
278
        0x08, 0x09, 0x0a, 0x15,
279
        0x0c, 0x0d, 0x0e, 0x17
280
    );
281
    vector unsigned char vblue2 = (vector unsigned char)(
282
        vec_add((vector unsigned int)vblue1, v8)
283
    );
284
    /*
285
        0x00 - 0x0f is ARxB ARxB ARxB ARxB
286
        0x10 - 0x0e evens are green
287
    */
288
    vector unsigned char vgreen1 = (vector unsigned char)(
289
        0x00, 0x01, 0x10, 0x03,
290
        0x04, 0x05, 0x12, 0x07,
291
        0x08, 0x09, 0x14, 0x0b,
292
        0x0c, 0x0d, 0x16, 0x0f
293
    );
294
    vector unsigned char vgreen2 = (vector unsigned char)(
295
        vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
296
    );
297
    
298
299
    assert(srcfmt->BytesPerPixel == 2);
300
    assert(dstfmt->BytesPerPixel == 4);
301
302
    vf800 = (vector unsigned short)vec_splat_u8(-7);
303
    vf800 = vec_sl(vf800, vec_splat_u16(8));
304
305
    if (dstfmt->Amask && srcfmt->alpha) {
306
        ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
307
        valpha = vec_splat(valpha, 0);
308
    } else {
309
        alpha = 0;
310
        valpha = vec_splat_u8(0);
311
    }
312
313
    vpermute = calc_swizzle32(NULL, dstfmt);
314
    while (height--) {
315
        vector unsigned char valigner;
316
        vector unsigned char voverflow;
317
        vector unsigned char vsrc;
318
319
        int width = info->d_width;
320
        int extrawidth;
321
322
        /* do scalar until we can align... */
323
#define ONE_PIXEL_BLEND(condition, widthvar) \
324
        while (condition) { \
325
            unsigned sR, sG, sB; \
326
            unsigned short pixel = *((unsigned short *)src); \
327
            sR = (pixel >> 8) & 0xf8; \
328
            sG = (pixel >> 3) & 0xfc; \
329
            sB = (pixel << 3) & 0xf8; \
330
            ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
331
            src += 2; \
332
            dst += 4; \
333
            widthvar--; \
334
        }
335
        ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
336
337
        /* After all that work, here's the vector part! */
338
        extrawidth = (width % 8);  /* trailing unaligned stores */
339
        width -= extrawidth;
340
        vsrc = vec_ld(0, src);
341
        valigner = VEC_ALIGNER(src);
342
343
        while (width) {
344
            vector unsigned short vR, vG, vB;
345
            vector unsigned char vdst1, vdst2;
346
347
            voverflow = vec_ld(15, src);
348
            vsrc = vec_perm(vsrc, voverflow, valigner);
349
350
            vR = vec_and((vector unsigned short)vsrc, vf800);
351
            vB = vec_sl((vector unsigned short)vsrc, v3);
352
            vG = vec_sl(vB, v2);
353
354
            vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
355
            vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
356
            vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
357
            vdst1 = vec_perm(vdst1, valpha, vpermute);
358
            vec_st(vdst1, 0, dst);
359
360
            vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
361
            vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
362
            vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
363
            vdst2 = vec_perm(vdst2, valpha, vpermute);
364
            vec_st(vdst2, 16, dst);
365
            
366
            width -= 8;
367
            dst += 32;
368
            src += 16;
369
            vsrc = voverflow;
370
        }
371
372
        assert(width == 0);
373
374
375
        /* do scalar until we can align... */
376
        ONE_PIXEL_BLEND((extrawidth), extrawidth);
377
#undef ONE_PIXEL_BLEND
378
379
        src += srcskip;  /* move to next row, accounting for pitch. */
380
        dst += dstskip;
381
    }
382
383
}
384
385
static void BlitNtoNKey(SDL_BlitInfo *info);
386
static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info);
387
static void Blit32to32KeyAltivec(SDL_BlitInfo *info)
388
{
389
    int height = info->d_height;
390
    Uint32 *srcp = (Uint32 *) info->s_pixels;
391
    int srcskip = info->s_skip;
392
    Uint32 *dstp = (Uint32 *) info->d_pixels;
393
    int dstskip = info->d_skip;
394
    SDL_PixelFormat *srcfmt = info->src;
395
    int srcbpp = srcfmt->BytesPerPixel;
396
    SDL_PixelFormat *dstfmt = info->dst;
397
    int dstbpp = dstfmt->BytesPerPixel;
398
    int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
399
	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
400
    Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
401
	Uint32 ckey = info->src->colorkey;
402
    vector unsigned int valpha;
403
    vector unsigned char vpermute;
404
    vector unsigned char vzero;
405
    vector unsigned int vckey;
406
    vector unsigned int vrgbmask;
407
    vpermute = calc_swizzle32(srcfmt, dstfmt);
408
    if (info->d_width < 16) {
409
        if(copy_alpha) {
410
            return BlitNtoNKeyCopyAlpha(info);
411
        } else {
412
            return BlitNtoNKey(info);
413
        }
414
    }
415
    vzero = vec_splat_u8(0);
416
    if (alpha) {
417
        ((unsigned char *)&valpha)[0] = (unsigned char)alpha;
418
        valpha = (vector unsigned int)vec_splat((vector unsigned char)valpha, 0);
419
    } else {
420
        valpha = (vector unsigned int)vzero;
421
    }
422
    ckey &= rgbmask;
423
    ((unsigned int *)&vckey)[0] = ckey;
424
    vckey = vec_splat(vckey, 0);
425
    ((unsigned int *)&vrgbmask)[0] = rgbmask;
426
    vrgbmask = vec_splat(vrgbmask, 0);
427
428
    while (height--) {
429
#define ONE_PIXEL_BLEND(condition, widthvar) \
430
        if (copy_alpha) { \
431
            while (condition) { \
432
                Uint32 pixel; \
433
                unsigned sR, sG, sB, sA; \
434
                DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, pixel, \
435
                          sR, sG, sB, sA); \
436
                if ( (pixel & rgbmask) != ckey ) { \
437
                      ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
438
                            sR, sG, sB, sA); \
439
                } \
440
                ((Uint8 *)dstp) += dstbpp; \
441
                ((Uint8 *)srcp) += srcbpp; \
442
                widthvar--; \
443
            } \
444
        } else { \
445
            while (condition) { \
446
                Uint32 pixel; \
447
                unsigned sR, sG, sB; \
448
                RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, pixel); \
449
                if ( pixel != ckey ) { \
450
                    RGB_FROM_PIXEL(pixel, srcfmt, sR, sG, sB); \
451
                    ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
452
                              sR, sG, sB, alpha); \
453
                } \
454
                ((Uint8 *)dstp) += dstbpp; \
455
                ((Uint8 *)srcp) += srcbpp; \
456
                widthvar--; \
457
            } \
458
        }
459
        int width = info->d_width;
460
        ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
461
        assert(width > 0);
462
        if (width > 0) {
463
            int extrawidth = (width % 4);
464
            vector unsigned char valigner = VEC_ALIGNER(srcp);
465
            vector unsigned int vs = vec_ld(0, srcp);
466
            width -= extrawidth;
467
            assert(width >= 4);
468
            while (width) {
469
                vector unsigned char vsel;
470
                vector unsigned int vd;
471
                vector unsigned int voverflow = vec_ld(15, srcp);
472
                /* load the source vec */
473
                vs = vec_perm(vs, voverflow, valigner);
474
                /* vsel is set for items that match the key */
475
                vsel = (vector unsigned char)vec_and(vs, vrgbmask);
476
                vsel = (vector unsigned char)vec_cmpeq(vs, vckey);
477
                /* permute the src vec to the dest format */
478
                vs = vec_perm(vs, valpha, vpermute);
479
                /* load the destination vec */
480
                vd = vec_ld(0, dstp);
481
                /* select the source and dest into vs */
482
                vd = (vector unsigned int)vec_sel((vector unsigned char)vs, (vector unsigned char)vd, vsel);
483
                
484
                vec_st(vd, 0, dstp);
485
                srcp += 4;
486
                width -= 4;
487
                dstp += 4;
488
                vs = voverflow;
489
            }
490
            ONE_PIXEL_BLEND((extrawidth), extrawidth);
491
#undef ONE_PIXEL_BLEND
492
            srcp += srcskip >> 2;
493
            dstp += dstskip >> 2;
494
        }
495
    }
496
}
497
498
/* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
499
/* Use this on a G5 */
500
static void ConvertAltivec32to32_noprefetch(SDL_BlitInfo *info)
501
{
502
    int height = info->d_height;
503
    Uint32 *src = (Uint32 *) info->s_pixels;
504
    int srcskip = info->s_skip;
505
    Uint32 *dst = (Uint32 *) info->d_pixels;
506
    int dstskip = info->d_skip;
507
    SDL_PixelFormat *srcfmt = info->src;
508
    int srcbpp = srcfmt->BytesPerPixel;
509
    SDL_PixelFormat *dstfmt = info->dst;
510
    int dstbpp = dstfmt->BytesPerPixel;
511
    vector unsigned int vzero = vec_splat_u32(0);
512
    vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
513
    if (dstfmt->Amask && !srcfmt->Amask) {
514
        if (srcfmt->alpha) {
515
            vector unsigned char valpha;
516
            ((unsigned char *)&valpha)[0] = srcfmt->alpha;
517
            vzero = (vector unsigned int)vec_splat(valpha, 0);
518
        }
519
    }
520
521
    assert(srcbpp == 4);
522
    assert(dstbpp == 4);
523
524
    while (height--) {
525
        vector unsigned char valigner;
526
        vector unsigned int vbits;
527
        vector unsigned int voverflow;
528
        Uint32 bits;
529
        Uint8 r, g, b, a;
530
531
        int width = info->d_width;
532
        int extrawidth;
533
534
        /* do scalar until we can align... */
535
        while ((UNALIGNED_PTR(dst)) && (width)) {
536
            bits = *(src++);
537
            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
538
            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
539
            width--;
540
        }
541
542
        /* After all that work, here's the vector part! */
543
        extrawidth = (width % 4);
544
        width -= extrawidth;
545
        valigner = VEC_ALIGNER(src);
546
        vbits = vec_ld(0, src);
547
548
       while (width) {
549
            voverflow = vec_ld(15, src);
550
            src += 4;
551
            width -= 4;
552
            vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
553
            vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
554
            vec_st(vbits, 0, dst);  /* store it back out. */
555
            dst += 4;
556
            vbits = voverflow;
557
        }
558
559
        assert(width == 0);
560
561
        /* cover pixels at the end of the row that didn't fit in 16 bytes. */
562
        while (extrawidth) {
563
            bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
564
            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
565
            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
566
            extrawidth--;
567
        }
568
569
        src += srcskip >> 2;  /* move to next row, accounting for pitch. */
570
        dst += dstskip >> 2;
571
    }
572
573
}
574
575
/* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
576
/* Use this on a G4 */
577
static void ConvertAltivec32to32_prefetch(SDL_BlitInfo *info)
578
{
579
    const int scalar_dst_lead = sizeof (Uint32) * 4;
580
    const int vector_dst_lead = sizeof (Uint32) * 16;
581
39
582
    int height = info->d_height;
40
/* Heheheh, we coerce Hermes into using SDL blit information */
583
    Uint32 *src = (Uint32 *) info->s_pixels;
41
#define X86_ASSEMBLER
584
    int srcskip = info->s_skip;
42
#define HermesConverterInterface	SDL_BlitInfo
585
    Uint32 *dst = (Uint32 *) info->d_pixels;
43
#define HermesClearInterface		void
586
    int dstskip = info->d_skip;
44
#define STACKCALL
587
    SDL_PixelFormat *srcfmt = info->src;
588
    int srcbpp = srcfmt->BytesPerPixel;
589
    SDL_PixelFormat *dstfmt = info->dst;
590
    int dstbpp = dstfmt->BytesPerPixel;
591
    vector unsigned int vzero = vec_splat_u32(0);
592
    vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
593
    if (dstfmt->Amask && !srcfmt->Amask) {
594
        if (srcfmt->alpha) {
595
            vector unsigned char valpha;
596
            ((unsigned char *)&valpha)[0] = srcfmt->alpha;
597
            vzero = (vector unsigned int)vec_splat(valpha, 0);
598
        }
599
    }
600
601
    assert(srcbpp == 4);
602
    assert(dstbpp == 4);
603
604
    while (height--) {
605
        vector unsigned char valigner;
606
        vector unsigned int vbits;
607
        vector unsigned int voverflow;
608
        Uint32 bits;
609
        Uint8 r, g, b, a;
610
611
        int width = info->d_width;
612
        int extrawidth;
613
614
        /* do scalar until we can align... */
615
        while ((UNALIGNED_PTR(dst)) && (width)) {
616
            vec_dstt(src+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
617
            vec_dstst(dst+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
618
            bits = *(src++);
619
            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
620
            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
621
            width--;
622
        }
623
624
        /* After all that work, here's the vector part! */
625
        extrawidth = (width % 4);
626
        width -= extrawidth;
627
        valigner = VEC_ALIGNER(src);
628
        vbits = vec_ld(0, src);
629
630
        while (width) {
631
            vec_dstt(src+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
632
            vec_dstst(dst+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
633
            voverflow = vec_ld(15, src);
634
            src += 4;
635
            width -= 4;
636
            vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
637
            vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
638
            vec_st(vbits, 0, dst);  /* store it back out. */
639
            dst += 4;
640
            vbits = voverflow;
641
        }
642
        
643
        assert(width == 0);
644
645
        /* cover pixels at the end of the row that didn't fit in 16 bytes. */
646
        while (extrawidth) {
647
            bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
648
            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
649
            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
650
            extrawidth--;
651
        }
652
653
        src += srcskip >> 2;  /* move to next row, accounting for pitch. */
654
        dst += dstskip >> 2;
655
    }
656
45
657
    vec_dss(DST_CHAN_SRC);
46
#include "HeadMMX.h"
658
    vec_dss(DST_CHAN_DEST);
47
#include "HeadX86.h"
659
}
660
48
661
static Uint32 GetBlitFeatures( void )
662
{
663
    static Uint32 features = 0xffffffff;
664
    if (features == 0xffffffff) {
665
        /* Provide an override for testing .. */
666
        char *override = getenv("SDL_ALTIVEC_BLIT_FEATURES");
667
        if (override) {
668
            features = 0;
669
            sscanf(override, "%u", &features);
670
        } else {
671
            features = ( 0
672
                /* Feature 1 is has-MMX */
673
                | ((SDL_HasMMX()) ? 1 : 0)
674
                /* Feature 2 is has-AltiVec */
675
                | ((SDL_HasAltiVec()) ? 2 : 0)
676
                /* Feature 4 is dont-use-prefetch */
677
                | ((GetL3CacheSize() == 0) ? 4 : 0)
678
            );
679
        }
680
    }
681
    return features;
682
}
683
#else
49
#else
684
/* Feature 1 is has-MMX */
685
#define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
686
#endif
687
50
688
/* This is now endian dependent */
51
/* This is now endian dependent */
689
#if ( SDL_BYTEORDER == SDL_LIL_ENDIAN )
52
#if ( SDL_BYTEORDER == SDL_LIL_ENDIAN )
Lines 694-712 Link Here
694
#define LO	1
57
#define LO	1
695
#endif
58
#endif
696
59
697
#ifdef USE_ASMBLIT
698
699
/* Heheheh, we coerce Hermes into using SDL blit information */
700
#define X86_ASSEMBLER
701
#define HermesConverterInterface	SDL_BlitInfo
702
#define HermesClearInterface		void
703
#define STACKCALL
704
705
#include "HeadMMX.h"
706
#include "HeadX86.h"
707
708
#else
709
710
/* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
60
/* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
711
#define RGB888_RGB332(dst, src) { \
61
#define RGB888_RGB332(dst, src) { \
712
	dst = (((src)&0x00E00000)>>16)| \
62
	dst = (((src)&0x00E00000)>>16)| \
Lines 1056-1062 Link Here
1056
406
1057
407
1058
/* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
408
/* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
1059
#define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1])
409
#if ( SDL_BYTEORDER == SDL_LIL_ENDIAN )
410
#define RGB565_32(dst, src, map) (map[src[0]*2] + map[src[1]*2+1])
411
#else /* ( SDL_BYTEORDER == SDL_BIG_ENDIAN ) */
412
#define RGB565_32(dst, src, map) (map[src[1]*2] + map[src[0]*2+1])
413
#endif
1060
static void Blit_RGB565_32(SDL_BlitInfo *info, const Uint32 *map)
414
static void Blit_RGB565_32(SDL_BlitInfo *info, const Uint32 *map)
1061
{
415
{
1062
#ifndef USE_DUFFS_LOOP
416
#ifndef USE_DUFFS_LOOP
Lines 2068-2077 Link Here
2068
	Uint32 srcR, srcG, srcB;
1422
	Uint32 srcR, srcG, srcB;
2069
	int dstbpp;
1423
	int dstbpp;
2070
	Uint32 dstR, dstG, dstB;
1424
	Uint32 dstR, dstG, dstB;
2071
	Uint32 blit_features;
1425
	SDL_bool cpu_mmx;
2072
	void *aux_data;
1426
	void *aux_data;
2073
	SDL_loblit blitfunc;
1427
	SDL_loblit blitfunc;
2074
	enum { NO_ALPHA=1, SET_ALPHA=2, COPY_ALPHA=4 } alpha;
1428
        enum { NO_ALPHA, SET_ALPHA, COPY_ALPHA } alpha;
2075
};
1429
};
2076
static const struct blit_table normal_blit_1[] = {
1430
static const struct blit_table normal_blit_1[] = {
2077
	/* Default for 8-bit RGB source, an invalid combination */
1431
	/* Default for 8-bit RGB source, an invalid combination */
Lines 2086-2096 Link Here
2086
    { 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000003E0,0x00007C00,
1440
    { 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000003E0,0x00007C00,
2087
      0, ConvertX86p16_16BGR555, ConvertX86, NO_ALPHA },
1441
      0, ConvertX86p16_16BGR555, ConvertX86, NO_ALPHA },
2088
#endif
1442
#endif
2089
#ifdef USE_ALTIVEC_BLITTERS
2090
    /* has-altivec */
2091
    { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
2092
      2, NULL, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
2093
#endif
2094
    { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00FF0000,0x0000FF00,0x000000FF,
1443
    { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00FF0000,0x0000FF00,0x000000FF,
2095
      0, NULL, Blit_RGB565_ARGB8888, SET_ALPHA },
1444
      0, NULL, Blit_RGB565_ARGB8888, SET_ALPHA },
2096
    { 0x0000F800,0x000007E0,0x0000001F, 4, 0x000000FF,0x0000FF00,0x00FF0000,
1445
    { 0x0000F800,0x000007E0,0x0000001F, 4, 0x000000FF,0x0000FF00,0x00FF0000,
Lines 2136-2152 Link Here
2136
    { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x0000FF00,0x00FF0000,0xFF000000,
1485
    { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x0000FF00,0x00FF0000,0xFF000000,
2137
      0, ConvertX86p32_32BGRA888, ConvertX86, NO_ALPHA },
1486
      0, ConvertX86p32_32BGRA888, ConvertX86, NO_ALPHA },
2138
#else
1487
#else
2139
#ifdef USE_ALTIVEC_BLITTERS
2140
    /* has-altivec | dont-use-prefetch */
2141
    { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
2142
      6, NULL, ConvertAltivec32to32_noprefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
2143
    /* has-altivec */
2144
    { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
2145
      2, NULL, ConvertAltivec32to32_prefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
2146
    /* has-altivec */
2147
    { 0x00000000,0x00000000,0x00000000, 2, 0x0000F800,0x000007E0,0x0000001F,
2148
      2, NULL, Blit_RGB888_RGB565Altivec, NO_ALPHA },
2149
#endif
2150
    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
1488
    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
2151
      0, NULL, Blit_RGB888_RGB565, NO_ALPHA },
1489
      0, NULL, Blit_RGB888_RGB565, NO_ALPHA },
2152
    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
1490
    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
Lines 2159-2167 Link Here
2159
	normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4
1497
	normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4
2160
};
1498
};
2161
1499
2162
/* Mask matches table, or table entry is zero */
2163
#define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000))
2164
2165
SDL_loblit SDL_CalculateBlitN(SDL_Surface *surface, int blit_index)
1500
SDL_loblit SDL_CalculateBlitN(SDL_Surface *surface, int blit_index)
2166
{
1501
{
2167
	struct private_swaccel *sdata;
1502
	struct private_swaccel *sdata;
Lines 2197-2208 Link Here
2197
	    else if(dstfmt->BytesPerPixel == 1)
1532
	    else if(dstfmt->BytesPerPixel == 1)
2198
		return BlitNto1Key;
1533
		return BlitNto1Key;
2199
	    else {
1534
	    else {
2200
#ifdef USE_ALTIVEC_BLITTERS
2201
        if((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4) && SDL_HasAltiVec()) {
2202
            return Blit32to32KeyAltivec;
2203
        } else
2204
#endif
2205
2206
		if(srcfmt->Amask && dstfmt->Amask)
1535
		if(srcfmt->Amask && dstfmt->Amask)
2207
		    return BlitNtoNKeyCopyAlpha;
1536
		    return BlitNtoNKeyCopyAlpha;
2208
		else
1537
		else
Lines 2232-2251 Link Here
2232
		}
1561
		}
2233
	} else {
1562
	} else {
2234
		/* Now the meat, choose the blitter we want */
1563
		/* Now the meat, choose the blitter we want */
2235
		int a_need = 0;
1564
	        int a_need = 0;
2236
		if(dstfmt->Amask)
1565
		if(dstfmt->Amask)
2237
		    a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
1566
		    a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
2238
		table = normal_blit[srcfmt->BytesPerPixel-1];
1567
		table = normal_blit[srcfmt->BytesPerPixel-1];
2239
		for ( which=0; table[which].dstbpp; ++which ) {
1568
		for ( which=0; table[which].srcR; ++which ) {
2240
			if ( MASKOK(srcfmt->Rmask, table[which].srcR) &&
1569
			if ( srcfmt->Rmask == table[which].srcR &&
2241
			    MASKOK(srcfmt->Gmask, table[which].srcG) &&
1570
			     srcfmt->Gmask == table[which].srcG &&
2242
			    MASKOK(srcfmt->Bmask, table[which].srcB) &&
1571
			     srcfmt->Bmask == table[which].srcB &&
2243
			    MASKOK(dstfmt->Rmask, table[which].dstR) &&
1572
			     dstfmt->BytesPerPixel == table[which].dstbpp &&
2244
			    MASKOK(dstfmt->Gmask, table[which].dstG) &&
1573
			     dstfmt->Rmask == table[which].dstR &&
2245
			    MASKOK(dstfmt->Bmask, table[which].dstB) &&
1574
			     dstfmt->Gmask == table[which].dstG &&
2246
			    dstfmt->BytesPerPixel == table[which].dstbpp &&
1575
			     dstfmt->Bmask == table[which].dstB &&
2247
			    (a_need & table[which].alpha) == a_need &&
1576
			     (a_need & table[which].alpha) == a_need &&
2248
			    ((table[which].blit_features & GetBlitFeatures()) == table[which].blit_features) )
1577
			     (table[which].cpu_mmx == SDL_HasMMX())) 
2249
				break;
1578
				break;
2250
		}
1579
		}
2251
		sdata->aux_data = table[which].aux_data;
1580
		sdata->aux_data = table[which].aux_data;

Return to bug 104533