Attachment 67787 Details for Bug 104533 – hack

[patch] hack

newdiff.patch (text/plain), 28.17 KB, created by Aaron on 2005-09-06 18:59:57 UTC

(hide)

Description:

Filename:

MIME Type:

Creator: Aaron

Created: 2005-09-06 18:59:57 UTC

Size: 28.17 KB

patch

obsolete

>--- SDL-1.2.9/src/video/SDL_blit_N.c	2005-04-19 22:57:39.000000000 -0700
>+++ SDL-1.2.8/src/video/SDL_blit_N.c	2004-02-18 10:22:04.000000000 -0700
>@@ -22,7 +22,7 @@
> 
> #ifdef SAVE_RCSID
> static char rcsid =
>- "@(#) $Id: SDL_blit_N.c,v 1.11 2005/04/20 05:57:39 icculus Exp $";
>+ "@(#) $Id: SDL_blit_N.c,v 1.9 2004/01/04 16:49:21 slouken Exp $";
> #endif
> 
> #include <stdio.h>
>@@ -35,655 +35,18 @@
> 
> /* Functions to blit from N-bit surfaces to other surfaces */
> 
>-#ifdef USE_ALTIVEC_BLITTERS
>-#include <assert.h>
>-#ifdef MACOSX
>-#include <sys/sysctl.h>
>-#include <stdlib.h>
>-static size_t GetL3CacheSize( void )
>-{
>-    const char key[] = "hw.l3cachesize";
>-    u_int64_t result = 0;
>-    size_t typeSize = sizeof( result );
>-
>-
>-    int err = sysctlbyname( key, &result, &typeSize, NULL, 0 );
>-    if( 0 != err ) return 0;
>-
>-    return result;
>-}
>-#else
>-static size_t GetL3CacheSize( void )
>-{
>-    /* XXX: Just guess G4 */
>-    return 2097152;
>-}
>-#endif /* MACOSX */
>-
>-#define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
>-#define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
>-                               ( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
>-                                 0x04+a, 0x04+b, 0x04+c, 0x04+d, \
>-                                 0x08+a, 0x08+b, 0x08+c, 0x08+d, \
>-                                 0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
>-
>-#define MAKE8888(dstfmt, r, g, b, a)  \
>-    ( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
>-      ((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
>-      ((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
>-      ((a<<dstfmt->Ashift)&dstfmt->Amask) )
>-
>-/*
>- * Data Stream Touch...Altivec cache prefetching.
>- *
>- *  Don't use this on a G5...however, the speed boost is very significant
>- *   on a G4.
>- */
>-#define DST_CHAN_SRC 1
>-#define DST_CHAN_DEST 2
>-
>-/* macro to set DST control word value... */
>-#define DST_CTRL(size, count, stride) \
>-    (((size) << 24) | ((count) << 16) | (stride))
>-
>-#define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
>-    ? vec_lvsl(0, src) \
>-    : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
>-
>-/* Calculate the permute vector used for 32->32 swizzling */
>-static vector unsigned char calc_swizzle32(const SDL_PixelFormat *srcfmt,
>-                                  const SDL_PixelFormat *dstfmt)
>-{
>-    /*
>-    * We have to assume that the bits that aren't used by other
>-     *  colors is alpha, and it's one complete byte, since some formats
>-     *  leave alpha with a zero mask, but we should still swizzle the bits.
>-     */
>-    /* ARGB */
>-    const static struct SDL_PixelFormat default_pixel_format = {
>-        NULL, 0, 0,
>-        0, 0, 0, 0,
>-        16, 8, 0, 24,
>-        0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000,
>-        0, 0};
>-    if (!srcfmt) {
>-        srcfmt = &default_pixel_format;
>-    }
>-    if (!dstfmt) {
>-        dstfmt = &default_pixel_format;
>-    }
>-    vector unsigned char plus = (vector unsigned char)( 0x00, 0x00, 0x00, 0x00,
>-                                      0x04, 0x04, 0x04, 0x04,
>-                                      0x08, 0x08, 0x08, 0x08,
>-                                      0x0C, 0x0C, 0x0C, 0x0C );
>-    vector unsigned char vswiz;
>-    vector unsigned int srcvec;
>-#define RESHIFT(X) (3 - ((X) >> 3))
>-    Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
>-    Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
>-    Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
>-    Uint32 amask;
>-    /* Use zero for alpha if either surface doesn't have alpha */
>-    if (dstfmt->Amask) {
>-        amask = ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->Ashift);
>-    } else {    
>-        amask = 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 0xFFFFFFFF);
>-    }           
>-#undef RESHIFT  
>-    ((unsigned int *)&srcvec)[0] = (rmask | gmask | bmask | amask);
>-    vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0));
>-    return(vswiz);
>-}
>-
>-static void Blit_RGB888_RGB565(SDL_BlitInfo *info);
>-static void Blit_RGB888_RGB565Altivec(SDL_BlitInfo *info) {
>-    int height = info->d_height;
>-    Uint8 *src = (Uint8 *) info->s_pixels;
>-    int srcskip = info->s_skip;
>-    Uint8 *dst = (Uint8 *) info->d_pixels;
>-    int dstskip = info->d_skip;
>-    SDL_PixelFormat *srcfmt = info->src;
>-    vector unsigned char valpha = vec_splat_u8(0);
>-    vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
>-    vector unsigned char vgmerge = (vector unsigned char)(
>-        0x00, 0x02, 0x00, 0x06,
>-        0x00, 0x0a, 0x00, 0x0e,
>-        0x00, 0x12, 0x00, 0x16,
>-        0x00, 0x1a, 0x00, 0x1e);
>-    vector unsigned short v1 = vec_splat_u16(1);
>-    vector unsigned short v3 = vec_splat_u16(3);
>-    vector unsigned short v3f = (vector unsigned short)(
>-        0x003f, 0x003f, 0x003f, 0x003f,
>-        0x003f, 0x003f, 0x003f, 0x003f);
>-    vector unsigned short vfc = (vector unsigned short)(
>-        0x00fc, 0x00fc, 0x00fc, 0x00fc,
>-        0x00fc, 0x00fc, 0x00fc, 0x00fc);
>-    vector unsigned short vf800 = (vector unsigned short)vec_splat_u8(-7);
>-    vf800 = vec_sl(vf800, vec_splat_u16(8));
>-
>-    while (height--) {
>-        vector unsigned char valigner;
>-        vector unsigned char voverflow;
>-        vector unsigned char vsrc;
>-
>-        int width = info->d_width;
>-        int extrawidth;
>-
>-        /* do scalar until we can align... */
>-#define ONE_PIXEL_BLEND(condition, widthvar) \
>-        while (condition) { \
>-            Uint32 pixel; \
>-            unsigned sR, sG, sB, sA; \
>-            DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, pixel, \
>-                          sR, sG, sB, sA); \
>-            *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
>-                                ((sG << 3) & 0x000007E0) | \
>-                                ((sB >> 3) & 0x0000001F)); \
>-            dst += 2; \
>-            src += 4; \
>-            widthvar--; \
>-        }
>-
>-        ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
>-
>-        /* After all that work, here's the vector part! */
>-        extrawidth = (width % 8);  /* trailing unaligned stores */
>-        width -= extrawidth;
>-        vsrc = vec_ld(0, src);
>-        valigner = VEC_ALIGNER(src);
>-
>-        while (width) {
>-            vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
>-            vector unsigned int vsrc1, vsrc2;
>-            vector unsigned char vdst;
>-
>-            voverflow = vec_ld(15, src);
>-            vsrc = vec_perm(vsrc, voverflow, valigner);
>-            vsrc1 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
>-            src += 16;
>-            vsrc = voverflow;
>-            voverflow = vec_ld(15, src);
>-            vsrc = vec_perm(vsrc, voverflow, valigner);
>-            vsrc2 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
>-            /* 1555 */
>-            vpixel = (vector unsigned short)vec_packpx(vsrc1, vsrc2);
>-            vgpixel = (vector unsigned short)vec_perm(vsrc1, vsrc2, vgmerge);
>-            vgpixel = vec_and(vgpixel, vfc);
>-            vgpixel = vec_sl(vgpixel, v3);
>-            vrpixel = vec_sl(vpixel, v1);
>-            vrpixel = vec_and(vrpixel, vf800);
>-            vbpixel = vec_and(vpixel, v3f);
>-            vdst = vec_or((vector unsigned char)vrpixel, (vector unsigned char)vgpixel);
>-            /* 565 */
>-            vdst = vec_or(vdst, (vector unsigned char)vbpixel);
>-            vec_st(vdst, 0, dst);
>-
>-            width -= 8;
>-            src += 16;
>-            dst += 16;
>-            vsrc = voverflow;
>-        }
>-
>-        assert(width == 0);
>-
>-
>-        /* do scalar until we can align... */
>-        ONE_PIXEL_BLEND((extrawidth), extrawidth);
>-#undef ONE_PIXEL_BLEND
>-
>-        src += srcskip;  /* move to next row, accounting for pitch. */
>-        dst += dstskip;
>-    }
>-
>-
>-}
>-
>-static void Blit_RGB565_32Altivec(SDL_BlitInfo *info) {
>-    int height = info->d_height;
>-    Uint8 *src = (Uint8 *) info->s_pixels;
>-    int srcskip = info->s_skip;
>-    Uint8 *dst = (Uint8 *) info->d_pixels;
>-    int dstskip = info->d_skip;
>-    SDL_PixelFormat *srcfmt = info->src;
>-    SDL_PixelFormat *dstfmt = info->dst;
>-    unsigned alpha;
>-    vector unsigned char valpha;
>-    vector unsigned char vpermute;
>-    vector unsigned short vf800;
>-    vector unsigned int v8 = vec_splat_u32(8);
>-    vector unsigned int v16 = vec_add(v8, v8);
>-    vector unsigned short v2 = vec_splat_u16(2);
>-    vector unsigned short v3 = vec_splat_u16(3);
>-    /* 
>-        0x10 - 0x1f is the alpha
>-        0x00 - 0x0e evens are the red
>-        0x01 - 0x0f odds are zero
>-    */
>-    vector unsigned char vredalpha1 = (vector unsigned char)(
>-        0x10, 0x00, 0x01, 0x01,
>-        0x10, 0x02, 0x01, 0x01,
>-        0x10, 0x04, 0x01, 0x01,
>-        0x10, 0x06, 0x01, 0x01
>-    );
>-    vector unsigned char vredalpha2 = (vector unsigned char)(
>-        vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
>-    );
>-    /*
>-        0x00 - 0x0f is ARxx ARxx ARxx ARxx
>-        0x11 - 0x0f odds are blue
>-    */
>-    vector unsigned char vblue1 = (vector unsigned char)(
>-        0x00, 0x01, 0x02, 0x11,
>-        0x04, 0x05, 0x06, 0x13,
>-        0x08, 0x09, 0x0a, 0x15,
>-        0x0c, 0x0d, 0x0e, 0x17
>-    );
>-    vector unsigned char vblue2 = (vector unsigned char)(
>-        vec_add((vector unsigned int)vblue1, v8)
>-    );
>-    /*
>-        0x00 - 0x0f is ARxB ARxB ARxB ARxB
>-        0x10 - 0x0e evens are green
>-    */
>-    vector unsigned char vgreen1 = (vector unsigned char)(
>-        0x00, 0x01, 0x10, 0x03,
>-        0x04, 0x05, 0x12, 0x07,
>-        0x08, 0x09, 0x14, 0x0b,
>-        0x0c, 0x0d, 0x16, 0x0f
>-    );
>-    vector unsigned char vgreen2 = (vector unsigned char)(
>-        vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
>-    );
>-    
>-
>-    assert(srcfmt->BytesPerPixel == 2);
>-    assert(dstfmt->BytesPerPixel == 4);
>-
>-    vf800 = (vector unsigned short)vec_splat_u8(-7);
>-    vf800 = vec_sl(vf800, vec_splat_u16(8));
>-
>-    if (dstfmt->Amask && srcfmt->alpha) {
>-        ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
>-        valpha = vec_splat(valpha, 0);
>-    } else {
>-        alpha = 0;
>-        valpha = vec_splat_u8(0);
>-    }
>-
>-    vpermute = calc_swizzle32(NULL, dstfmt);
>-    while (height--) {
>-        vector unsigned char valigner;
>-        vector unsigned char voverflow;
>-        vector unsigned char vsrc;
>-
>-        int width = info->d_width;
>-        int extrawidth;
>-
>-        /* do scalar until we can align... */
>-#define ONE_PIXEL_BLEND(condition, widthvar) \
>-        while (condition) { \
>-            unsigned sR, sG, sB; \
>-            unsigned short pixel = *((unsigned short *)src); \
>-            sR = (pixel >> 8) & 0xf8; \
>-            sG = (pixel >> 3) & 0xfc; \
>-            sB = (pixel << 3) & 0xf8; \
>-            ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
>-            src += 2; \
>-            dst += 4; \
>-            widthvar--; \
>-        }
>-        ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
>-
>-        /* After all that work, here's the vector part! */
>-        extrawidth = (width % 8);  /* trailing unaligned stores */
>-        width -= extrawidth;
>-        vsrc = vec_ld(0, src);
>-        valigner = VEC_ALIGNER(src);
>-
>-        while (width) {
>-            vector unsigned short vR, vG, vB;
>-            vector unsigned char vdst1, vdst2;
>-
>-            voverflow = vec_ld(15, src);
>-            vsrc = vec_perm(vsrc, voverflow, valigner);
>-
>-            vR = vec_and((vector unsigned short)vsrc, vf800);
>-            vB = vec_sl((vector unsigned short)vsrc, v3);
>-            vG = vec_sl(vB, v2);
>-
>-            vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
>-            vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
>-            vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
>-            vdst1 = vec_perm(vdst1, valpha, vpermute);
>-            vec_st(vdst1, 0, dst);
>-
>-            vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
>-            vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
>-            vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
>-            vdst2 = vec_perm(vdst2, valpha, vpermute);
>-            vec_st(vdst2, 16, dst);
>-            
>-            width -= 8;
>-            dst += 32;
>-            src += 16;
>-            vsrc = voverflow;
>-        }
>-
>-        assert(width == 0);
>-
>-
>-        /* do scalar until we can align... */
>-        ONE_PIXEL_BLEND((extrawidth), extrawidth);
>-#undef ONE_PIXEL_BLEND
>-
>-        src += srcskip;  /* move to next row, accounting for pitch. */
>-        dst += dstskip;
>-    }
>-
>-}
>-
>-static void BlitNtoNKey(SDL_BlitInfo *info);
>-static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info);
>-static void Blit32to32KeyAltivec(SDL_BlitInfo *info)
>-{
>-    int height = info->d_height;
>-    Uint32 *srcp = (Uint32 *) info->s_pixels;
>-    int srcskip = info->s_skip;
>-    Uint32 *dstp = (Uint32 *) info->d_pixels;
>-    int dstskip = info->d_skip;
>-    SDL_PixelFormat *srcfmt = info->src;
>-    int srcbpp = srcfmt->BytesPerPixel;
>-    SDL_PixelFormat *dstfmt = info->dst;
>-    int dstbpp = dstfmt->BytesPerPixel;
>-    int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
>-	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
>-    Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
>-	Uint32 ckey = info->src->colorkey;
>-    vector unsigned int valpha;
>-    vector unsigned char vpermute;
>-    vector unsigned char vzero;
>-    vector unsigned int vckey;
>-    vector unsigned int vrgbmask;
>-    vpermute = calc_swizzle32(srcfmt, dstfmt);
>-    if (info->d_width < 16) {
>-        if(copy_alpha) {
>-            return BlitNtoNKeyCopyAlpha(info);
>-        } else {
>-            return BlitNtoNKey(info);
>-        }
>-    }
>-    vzero = vec_splat_u8(0);
>-    if (alpha) {
>-        ((unsigned char *)&valpha)[0] = (unsigned char)alpha;
>-        valpha = (vector unsigned int)vec_splat((vector unsigned char)valpha, 0);
>-    } else {
>-        valpha = (vector unsigned int)vzero;
>-    }
>-    ckey &= rgbmask;
>-    ((unsigned int *)&vckey)[0] = ckey;
>-    vckey = vec_splat(vckey, 0);
>-    ((unsigned int *)&vrgbmask)[0] = rgbmask;
>-    vrgbmask = vec_splat(vrgbmask, 0);
>-
>-    while (height--) {
>-#define ONE_PIXEL_BLEND(condition, widthvar) \
>-        if (copy_alpha) { \
>-            while (condition) { \
>-                Uint32 pixel; \
>-                unsigned sR, sG, sB, sA; \
>-                DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, pixel, \
>-                          sR, sG, sB, sA); \
>-                if ( (pixel & rgbmask) != ckey ) { \
>-                      ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
>-                            sR, sG, sB, sA); \
>-                } \
>-                ((Uint8 *)dstp) += dstbpp; \
>-                ((Uint8 *)srcp) += srcbpp; \
>-                widthvar--; \
>-            } \
>-        } else { \
>-            while (condition) { \
>-                Uint32 pixel; \
>-                unsigned sR, sG, sB; \
>-                RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, pixel); \
>-                if ( pixel != ckey ) { \
>-                    RGB_FROM_PIXEL(pixel, srcfmt, sR, sG, sB); \
>-                    ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
>-                              sR, sG, sB, alpha); \
>-                } \
>-                ((Uint8 *)dstp) += dstbpp; \
>-                ((Uint8 *)srcp) += srcbpp; \
>-                widthvar--; \
>-            } \
>-        }
>-        int width = info->d_width;
>-        ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
>-        assert(width > 0);
>-        if (width > 0) {
>-            int extrawidth = (width % 4);
>-            vector unsigned char valigner = VEC_ALIGNER(srcp);
>-            vector unsigned int vs = vec_ld(0, srcp);
>-            width -= extrawidth;
>-            assert(width >= 4);
>-            while (width) {
>-                vector unsigned char vsel;
>-                vector unsigned int vd;
>-                vector unsigned int voverflow = vec_ld(15, srcp);
>-                /* load the source vec */
>-                vs = vec_perm(vs, voverflow, valigner);
>-                /* vsel is set for items that match the key */
>-                vsel = (vector unsigned char)vec_and(vs, vrgbmask);
>-                vsel = (vector unsigned char)vec_cmpeq(vs, vckey);
>-                /* permute the src vec to the dest format */
>-                vs = vec_perm(vs, valpha, vpermute);
>-                /* load the destination vec */
>-                vd = vec_ld(0, dstp);
>-                /* select the source and dest into vs */
>-                vd = (vector unsigned int)vec_sel((vector unsigned char)vs, (vector unsigned char)vd, vsel);
>-                
>-                vec_st(vd, 0, dstp);
>-                srcp += 4;
>-                width -= 4;
>-                dstp += 4;
>-                vs = voverflow;
>-            }
>-            ONE_PIXEL_BLEND((extrawidth), extrawidth);
>-#undef ONE_PIXEL_BLEND
>-            srcp += srcskip >> 2;
>-            dstp += dstskip >> 2;
>-        }
>-    }
>-}
>-
>-/* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
>-/* Use this on a G5 */
>-static void ConvertAltivec32to32_noprefetch(SDL_BlitInfo *info)
>-{
>-    int height = info->d_height;
>-    Uint32 *src = (Uint32 *) info->s_pixels;
>-    int srcskip = info->s_skip;
>-    Uint32 *dst = (Uint32 *) info->d_pixels;
>-    int dstskip = info->d_skip;
>-    SDL_PixelFormat *srcfmt = info->src;
>-    int srcbpp = srcfmt->BytesPerPixel;
>-    SDL_PixelFormat *dstfmt = info->dst;
>-    int dstbpp = dstfmt->BytesPerPixel;
>-    vector unsigned int vzero = vec_splat_u32(0);
>-    vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
>-    if (dstfmt->Amask && !srcfmt->Amask) {
>-        if (srcfmt->alpha) {
>-            vector unsigned char valpha;
>-            ((unsigned char *)&valpha)[0] = srcfmt->alpha;
>-            vzero = (vector unsigned int)vec_splat(valpha, 0);
>-        }
>-    }
>-
>-    assert(srcbpp == 4);
>-    assert(dstbpp == 4);
>-
>-    while (height--) {
>-        vector unsigned char valigner;
>-        vector unsigned int vbits;
>-        vector unsigned int voverflow;
>-        Uint32 bits;
>-        Uint8 r, g, b, a;
>-
>-        int width = info->d_width;
>-        int extrawidth;
>-
>-        /* do scalar until we can align... */
>-        while ((UNALIGNED_PTR(dst)) && (width)) {
>-            bits = *(src++);
>-            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
>-            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
>-            width--;
>-        }
>-
>-        /* After all that work, here's the vector part! */
>-        extrawidth = (width % 4);
>-        width -= extrawidth;
>-        valigner = VEC_ALIGNER(src);
>-        vbits = vec_ld(0, src);
>-
>-       while (width) {
>-            voverflow = vec_ld(15, src);
>-            src += 4;
>-            width -= 4;
>-            vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
>-            vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
>-            vec_st(vbits, 0, dst);  /* store it back out. */
>-            dst += 4;
>-            vbits = voverflow;
>-        }
>-
>-        assert(width == 0);
>-
>-        /* cover pixels at the end of the row that didn't fit in 16 bytes. */
>-        while (extrawidth) {
>-            bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
>-            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
>-            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
>-            extrawidth--;
>-        }
>-
>-        src += srcskip >> 2;  /* move to next row, accounting for pitch. */
>-        dst += dstskip >> 2;
>-    }
>-
>-}
>-
>-/* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
>-/* Use this on a G4 */
>-static void ConvertAltivec32to32_prefetch(SDL_BlitInfo *info)
>-{
>-    const int scalar_dst_lead = sizeof (Uint32) * 4;
>-    const int vector_dst_lead = sizeof (Uint32) * 16;
>+#ifdef USE_ASMBLIT
> 
>-    int height = info->d_height;
>-    Uint32 *src = (Uint32 *) info->s_pixels;
>-    int srcskip = info->s_skip;
>-    Uint32 *dst = (Uint32 *) info->d_pixels;
>-    int dstskip = info->d_skip;
>-    SDL_PixelFormat *srcfmt = info->src;
>-    int srcbpp = srcfmt->BytesPerPixel;
>-    SDL_PixelFormat *dstfmt = info->dst;
>-    int dstbpp = dstfmt->BytesPerPixel;
>-    vector unsigned int vzero = vec_splat_u32(0);
>-    vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
>-    if (dstfmt->Amask && !srcfmt->Amask) {
>-        if (srcfmt->alpha) {
>-            vector unsigned char valpha;
>-            ((unsigned char *)&valpha)[0] = srcfmt->alpha;
>-            vzero = (vector unsigned int)vec_splat(valpha, 0);
>-        }
>-    }
>-
>-    assert(srcbpp == 4);
>-    assert(dstbpp == 4);
>-
>-    while (height--) {
>-        vector unsigned char valigner;
>-        vector unsigned int vbits;
>-        vector unsigned int voverflow;
>-        Uint32 bits;
>-        Uint8 r, g, b, a;
>-
>-        int width = info->d_width;
>-        int extrawidth;
>-
>-        /* do scalar until we can align... */
>-        while ((UNALIGNED_PTR(dst)) && (width)) {
>-            vec_dstt(src+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
>-            vec_dstst(dst+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
>-            bits = *(src++);
>-            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
>-            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
>-            width--;
>-        }
>-
>-        /* After all that work, here's the vector part! */
>-        extrawidth = (width % 4);
>-        width -= extrawidth;
>-        valigner = VEC_ALIGNER(src);
>-        vbits = vec_ld(0, src);
>-
>-        while (width) {
>-            vec_dstt(src+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
>-            vec_dstst(dst+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
>-            voverflow = vec_ld(15, src);
>-            src += 4;
>-            width -= 4;
>-            vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
>-            vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
>-            vec_st(vbits, 0, dst);  /* store it back out. */
>-            dst += 4;
>-            vbits = voverflow;
>-        }
>-        
>-        assert(width == 0);
>-
>-        /* cover pixels at the end of the row that didn't fit in 16 bytes. */
>-        while (extrawidth) {
>-            bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
>-            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
>-            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
>-            extrawidth--;
>-        }
>-
>-        src += srcskip >> 2;  /* move to next row, accounting for pitch. */
>-        dst += dstskip >> 2;
>-    }
>+/* Heheheh, we coerce Hermes into using SDL blit information */
>+#define X86_ASSEMBLER
>+#define HermesConverterInterface	SDL_BlitInfo
>+#define HermesClearInterface		void
>+#define STACKCALL
> 
>-    vec_dss(DST_CHAN_SRC);
>-    vec_dss(DST_CHAN_DEST);
>-}
>+#include "HeadMMX.h"
>+#include "HeadX86.h"
> 
>-static Uint32 GetBlitFeatures( void )
>-{
>-    static Uint32 features = 0xffffffff;
>-    if (features == 0xffffffff) {
>-        /* Provide an override for testing .. */
>-        char *override = getenv("SDL_ALTIVEC_BLIT_FEATURES");
>-        if (override) {
>-            features = 0;
>-            sscanf(override, "%u", &features);
>-        } else {
>-            features = ( 0
>-                /* Feature 1 is has-MMX */
>-                | ((SDL_HasMMX()) ? 1 : 0)
>-                /* Feature 2 is has-AltiVec */
>-                | ((SDL_HasAltiVec()) ? 2 : 0)
>-                /* Feature 4 is dont-use-prefetch */
>-                | ((GetL3CacheSize() == 0) ? 4 : 0)
>-            );
>-        }
>-    }
>-    return features;
>-}
> #else
>-/* Feature 1 is has-MMX */
>-#define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
>-#endif
> 
> /* This is now endian dependent */
> #if ( SDL_BYTEORDER == SDL_LIL_ENDIAN )
>@@ -694,19 +57,6 @@
> #define LO	1
> #endif
> 
>-#ifdef USE_ASMBLIT
>-
>-/* Heheheh, we coerce Hermes into using SDL blit information */
>-#define X86_ASSEMBLER
>-#define HermesConverterInterface	SDL_BlitInfo
>-#define HermesClearInterface		void
>-#define STACKCALL
>-
>-#include "HeadMMX.h"
>-#include "HeadX86.h"
>-
>-#else
>-
> /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
> #define RGB888_RGB332(dst, src) { \
> 	dst = (((src)&0x00E00000)>>16)| \
>@@ -1056,7 +406,11 @@
> 
> 
> /* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
>-#define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1])
>+#if ( SDL_BYTEORDER == SDL_LIL_ENDIAN )
>+#define RGB565_32(dst, src, map) (map[src[0]*2] + map[src[1]*2+1])
>+#else /* ( SDL_BYTEORDER == SDL_BIG_ENDIAN ) */
>+#define RGB565_32(dst, src, map) (map[src[1]*2] + map[src[0]*2+1])
>+#endif
> static void Blit_RGB565_32(SDL_BlitInfo *info, const Uint32 *map)
> {
> #ifndef USE_DUFFS_LOOP
>@@ -2068,10 +1422,10 @@
> 	Uint32 srcR, srcG, srcB;
> 	int dstbpp;
> 	Uint32 dstR, dstG, dstB;
>-	Uint32 blit_features;
>+	SDL_bool cpu_mmx;
> 	void *aux_data;
> 	SDL_loblit blitfunc;
>-	enum { NO_ALPHA=1, SET_ALPHA=2, COPY_ALPHA=4 } alpha;
>+        enum { NO_ALPHA, SET_ALPHA, COPY_ALPHA } alpha;
> };
> static const struct blit_table normal_blit_1[] = {
> 	/* Default for 8-bit RGB source, an invalid combination */
>@@ -2086,11 +1440,6 @@
>     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000003E0,0x00007C00,
>       0, ConvertX86p16_16BGR555, ConvertX86, NO_ALPHA },
> #endif
>-#ifdef USE_ALTIVEC_BLITTERS
>-    /* has-altivec */
>-    { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
>-      2, NULL, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
>-#endif
>     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00FF0000,0x0000FF00,0x000000FF,
>       0, NULL, Blit_RGB565_ARGB8888, SET_ALPHA },
>     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x000000FF,0x0000FF00,0x00FF0000,
>@@ -2136,17 +1485,6 @@
>     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x0000FF00,0x00FF0000,0xFF000000,
>       0, ConvertX86p32_32BGRA888, ConvertX86, NO_ALPHA },
> #else
>-#ifdef USE_ALTIVEC_BLITTERS
>-    /* has-altivec | dont-use-prefetch */
>-    { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
>-      6, NULL, ConvertAltivec32to32_noprefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
>-    /* has-altivec */
>-    { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
>-      2, NULL, ConvertAltivec32to32_prefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
>-    /* has-altivec */
>-    { 0x00000000,0x00000000,0x00000000, 2, 0x0000F800,0x000007E0,0x0000001F,
>-      2, NULL, Blit_RGB888_RGB565Altivec, NO_ALPHA },
>-#endif
>     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
>       0, NULL, Blit_RGB888_RGB565, NO_ALPHA },
>     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
>@@ -2159,9 +1497,6 @@
> 	normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4
> };
> 
>-/* Mask matches table, or table entry is zero */
>-#define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000))
>-
> SDL_loblit SDL_CalculateBlitN(SDL_Surface *surface, int blit_index)
> {
> 	struct private_swaccel *sdata;
>@@ -2197,12 +1532,6 @@
> 	    else if(dstfmt->BytesPerPixel == 1)
> 		return BlitNto1Key;
> 	    else {
>-#ifdef USE_ALTIVEC_BLITTERS
>-        if((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4) && SDL_HasAltiVec()) {
>-            return Blit32to32KeyAltivec;
>-        } else
>-#endif
>-
> 		if(srcfmt->Amask && dstfmt->Amask)
> 		    return BlitNtoNKeyCopyAlpha;
> 		else
>@@ -2232,20 +1561,20 @@
> 		}
> 	} else {
> 		/* Now the meat, choose the blitter we want */
>-		int a_need = 0;
>+	        int a_need = 0;
> 		if(dstfmt->Amask)
> 		    a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
> 		table = normal_blit[srcfmt->BytesPerPixel-1];
>-		for ( which=0; table[which].dstbpp; ++which ) {
>-			if ( MASKOK(srcfmt->Rmask, table[which].srcR) &&
>-			    MASKOK(srcfmt->Gmask, table[which].srcG) &&
>-			    MASKOK(srcfmt->Bmask, table[which].srcB) &&
>-			    MASKOK(dstfmt->Rmask, table[which].dstR) &&
>-			    MASKOK(dstfmt->Gmask, table[which].dstG) &&
>-			    MASKOK(dstfmt->Bmask, table[which].dstB) &&
>-			    dstfmt->BytesPerPixel == table[which].dstbpp &&
>-			    (a_need & table[which].alpha) == a_need &&
>-			    ((table[which].blit_features & GetBlitFeatures()) == table[which].blit_features) )
>+		for ( which=0; table[which].srcR; ++which ) {
>+			if ( srcfmt->Rmask == table[which].srcR &&
>+			     srcfmt->Gmask == table[which].srcG &&
>+			     srcfmt->Bmask == table[which].srcB &&
>+			     dstfmt->BytesPerPixel == table[which].dstbpp &&
>+			     dstfmt->Rmask == table[which].dstR &&
>+			     dstfmt->Gmask == table[which].dstG &&
>+			     dstfmt->Bmask == table[which].dstB &&
>+			     (a_need & table[which].alpha) == a_need &&
>+			     (table[which].cpu_mmx == SDL_HasMMX())) 
> 				break;
> 		}
> 		sdata->aux_data = table[which].aux_data;

Actions: View | Diff

Attachments on bug 104533: 67787 | 67789 | 67790