Go to:
Gentoo Home
Documentation
Forums
Lists
Bugs
Planet
Store
Wiki
Get Gentoo!
Gentoo's Bugzilla – Attachment 71054 Details for
Bug 109270
liboil-0.3.3 ppc build fails
Home
|
New
–
[Ex]
|
Browse
|
Search
|
Privacy Policy
|
[?]
|
Reports
|
Requests
|
Help
|
New Account
|
Log In
[x]
|
Forgot Password
Login:
[x]
[patch]
patch for liboil on ppc
liboil-gcc-altivec.patch (text/plain), 31.48 KB, created by
Andrew de Quincey
on 2005-10-19 18:01:04 UTC
(
hide
)
Description:
patch for liboil on ppc
Filename:
MIME Type:
Creator:
Andrew de Quincey
Created:
2005-10-19 18:01:04 UTC
Size:
31.48 KB
patch
obsolete
>diff -Naur liboil-0.3.3/liboil/powerpc/recon8x8_altivec.c liboil-0.3.3.fix/liboil/powerpc/recon8x8_altivec.c >--- liboil-0.3.3/liboil/powerpc/recon8x8_altivec.c 2005-08-14 12:55:33.000000000 +0100 >+++ liboil-0.3.3.fix/liboil/powerpc/recon8x8_altivec.c 2005-10-20 01:56:22.000000000 +0100 >@@ -46,131 +46,131 @@ > /* r3, r4, r5 */ > recon8x8_intra_altivec (uint8_t *dest, int16_t *change, int ds) > { >- asm { >+ asm( > //trying cache hints >- lis r6,0x0108 >- or r6,r6,r5 >- dstst r3,r6,0 >+ "lis r6,0x0108\n" >+ "or r6,r6,r5\n" >+ "dstst r3,r6,0\n" > >- vspltish v1,7 >+ "vspltish v1,7\n" > >- vspltish v8,1 >- xor r6,r6,r6 >+ "vspltish v8,1\n\n" >+ "xor r6,r6,r6\n" > >- lvx v0,r4,r6 //get 8 shorts >- vslh v8,v8,v1 //now have 128 >- addi r6,r6,16 >+ "lvx v0,r4,r6\n" //get 8 shorts >+ "vslh v8,v8,v1\n" //now have 128 >+ "addi r6,r6,16\n" > >- lvx v1,r4,r6 //get 8 shorts >- vaddshs v0,v0,v8 //+=128 >- addi r6,r6,16 >+ "lvx v1,r4,r6\n" //get 8 shorts >+ "vaddshs v0,v0,v8\n" //+=128 >+ "addi r6,r6,16\n" > >- lvx v2,r4,r6 //get 8 shorts >- vaddshs v1,v1,v8 //+=128 >- addi r6,r6,16 >- vpkshus v0,v0,v0 //convert to bytes >+ "lvx v2,r4,r6\n" //get 8 shorts >+ "vaddshs v1,v1,v8\n" //+=128 >+ "addi r6,r6,16\n" >+ "vpkshus v0,v0,v0\n" //convert to bytes > >- lvx v3,r4,r6 //get 8 shorts >- vaddshs v2,v2,v8 //+=128 >- addi r6,r6,16 >- vpkshus v1,v1,v1 //convert to bytes >+ "lvx v3,r4,r6\n" //get 8 shorts >+ "vaddshs v2,v2,v8\n" //+=128 >+ "addi r6,r6,16\n" >+ "vpkshus v1,v1,v1\n" //convert to bytes > >- lvx v4,r4,r6 //get 8 shorts >- vaddshs v3,v3,v8 //+=128 >- addi r6,r6,16 >- vpkshus v2,v2,v2 //convert to bytes >+ "lvx v4,r4,r6\n" //get 8 shorts >+ "vaddshs v3,v3,v8\n" //+=128 >+ "addi r6,r6,16\n" >+ "vpkshus v2,v2,v2\n" //convert to bytes > >- lvx v5,r4,r6 //get 8 shorts >- vaddshs v4,v4,v8 //+=128 >- addi r6,r6,16 >- vpkshus v3,v3,v3 //convert to bytes >+ "lvx v5,r4,r6\n" //get 8 shorts >+ "vaddshs v4,v4,v8\n" //+=128 >+ "addi r6,r6,16\n" >+ "vpkshus v3,v3,v3\n" //convert to bytes > >- lvx v6,r4,r6 //get 8 shorts >- vaddshs v5,v5,v8 //+=128 >- addi r6,r6,16 >- vpkshus v4,v4,v4 //convert to bytes >+ "lvx v6,r4,r6\n" //get 8 shorts >+ "vaddshs v5,v5,v8\n" //+=128 >+ "addi r6,r6,16\n" >+ "vpkshus v4,v4,v4\n" //convert to bytes > >- lvx v7,r4,r6 //get 8 shorts >- xor r6,r6,r6 >- vaddshs v6,v6,v8 //+=128 >- vpkshus v5,v5,v5 //convert to bytes >+ "lvx v7,r4,r6\n" //get 8 shorts >+ "xor r6,r6,r6\n" >+ "vaddshs v6,v6,v8\n" //+=128 >+ "vpkshus v5,v5,v5\n" //convert to bytes > >- lvsr v9,r3,r6 //load alignment vector for stores >- vaddshs v7,v7,v8 //+=128 >- vpkshus v6,v6,v6 //convert to bytes >+ "lvsr v9,r3,r6\n" //load alignment vector for stores >+ "vaddshs v7,v7,v8\n" //+=128 >+ "vpkshus v6,v6,v6\n" //convert to bytes > >- vpkshus v7,v7,v7 //convert to bytes >+ "vpkshus v7,v7,v7\n" //convert to bytes > >- li r7,4 >- vperm v0,v0,v0,v9 >+ "li r7,4\n" >+ "vperm v0,v0,v0,v9\n" > >- stvewx v0,r3,r6 >- add r6,r6,r5 >+ "stvewx v0,r3,r6\n" >+ "add r6,r6,r5\n" > >- lvsr v9,r3,r6 //load alignment vector for stores >+ "lvsr v9,r3,r6\n" //load alignment vector for stores > >- stvewx v0,r3,r7 >- add r7,r7,r5 >- vperm v1,v1,v1,v9 >+ "stvewx v0,r3,r7\n" >+ "add r7,r7,r5\n" >+ "vperm v1,v1,v1,v9\n" > >- stvewx v1,r3,r6 >- add r6,r6,r5 >+ "stvewx v1,r3,r6\n" >+ "add r6,r6,r5\n" > >- lvsr v9,r3,r6 //load alignment vector for stores >+ "lvsr v9,r3,r6\n" //load alignment vector for stores > >- stvewx v1,r3,r7 >- add r7,r7,r5 >- vperm v2,v2,v2,v9 >+ "stvewx v1,r3,r7\n" >+ "add r7,r7,r5\n" >+ "vperm v2,v2,v2,v9\n" > >- stvewx v2,r3,r6 >- add r6,r6,r5 >+ "stvewx v2,r3,r6\n" >+ "add r6,r6,r5\n" > >- lvsr v9,r3,r6 //load alignment vector for stores >+ "lvsr v9,r3,r6\n" //load alignment vector for stores > >- stvewx v2,r3,r7 >- add r7,r7,r5 >- vperm v3,v3,v3,v9 >+ "stvewx v2,r3,r7\n" >+ "add r7,r7,r5\n" >+ "vperm v3,v3,v3,v9\n" > >- stvewx v3,r3,r6 >- add r6,r6,r5 >+ "stvewx v3,r3,r6\n" >+ "add r6,r6,r5\n" > >- lvsr v9,r3,r6 //load alignment vector for stores >+ "lvsr v9,r3,r6\n" //load alignment vector for stores > >- stvewx v3,r3,r7 >- add r7,r7,r5 >- vperm v4,v4,v4,v9 >+ "stvewx v3,r3,r7\n" >+ "add r7,r7,r5\n" >+ "vperm v4,v4,v4,v9\n" > >- stvewx v4,r3,r6 >- add r6,r6,r5 >+ "stvewx v4,r3,r6\n" >+ "add r6,r6,r5\n" > >- lvsr v9,r3,r6 //load alignment vector for stores >+ "lvsr v9,r3,r6\n" //load alignment vector for stores > >- stvewx v4,r3,r7 >- add r7,r7,r5 >- vperm v5,v5,v5,v9 >+ "stvewx v4,r3,r7\n" >+ "add r7,r7,r5\n" >+ "vperm v5,v5,v5,v9\n" > >- stvewx v5,r3,r6 >- add r6,r6,r5 >+ "stvewx v5,r3,r6\n" >+ "add r6,r6,r5\n" > >- lvsr v9,r3,r6 //load alignment vector for stores >+ "lvsr v9,r3,r6\n" //load alignment vector for stores > >- stvewx v5,r3,r7 >- add r7,r7,r5 >- vperm v6,v6,v6,v9 >+ "stvewx v5,r3,r7\n" >+ "add r7,r7,r5\n" >+ "vperm v6,v6,v6,v9\n" > >- stvewx v6,r3,r6 >- add r6,r6,r5 >+ "stvewx v6,r3,r6\n" >+ "add r6,r6,r5\n" > >- lvsr v9,r3,r6 //load alignment vector for stores >+ "lvsr v9,r3,r6\n" //load alignment vector for stores > >- stvewx v6,r3,r7 >- add r7,r7,r5 >- vperm v7,v7,v7,v9 >+ "stvewx v6,r3,r7\n" >+ "add r7,r7,r5\n" >+ "vperm v7,v7,v7,v9\n" > >- stvewx v7,r3,r6 >+ "stvewx v7,r3,r6\n" > >- stvewx v7,r3,r7 >- } >+ "stvewx v7,r3,r7\n" >+ ); > } > > OIL_DEFINE_IMPL_FULL (recon8x8_intra_altivec, recon8x8_intra, OIL_IMPL_FLAG_ALTIVEC); >@@ -178,217 +178,216 @@ > static void /* r3, r4, r5, r6 */ > recon8x8_inter_altivec (uint8_t *dest, uint8_t *src, int16_t *change, int dss) > { >- asm >- { >+ asm( > //trying cache hints >- lis r7,0x0108 >- or r7,r7,r6 >- dstst r3,r7,0 >+ "lis r7,0x0108\n" >+ "or r7,r7,r6\n" >+ "dstst r3,r7,0\n" > >- xor r7,r7,r7 >- li r8,16 >+ "xor r7,r7,r7\n" >+ "li r8,16\n" > >- lvsl v8,r4,r7 //load alignment vector for refs >- vxor v9,v9,v9 >+ "lvsl v8,r4,r7\n" //load alignment vector for refs >+ "vxor v9,v9,v9\n" > >- lvx v10,r4,r7 //get 8 refs >- add r7,r7,r6 >+ "lvx v10,r4,r7\n" //get 8 refs >+ "add r7,r7,r6\n" > >- lvx v0,r4,r8 //need another 16 bytes for misaligned data -- 0 >- add r8,r8,r6 >+ "lvx v0,r4,r8\n" //need another 16 bytes for misaligned data -- 0 >+ "add r8,r8,r6\n" > >- lvx v11,r4,r7 //get 8 refs >- vperm v10,v10,v0,v8 >+ "lvx v11,r4,r7\n" //get 8 refs >+ "vperm v10,v10,v0,v8\n" > >- lvsl v8,r4,r7 //load alignment vector for refs >- add r7,r7,r6 >+ "lvsl v8,r4,r7\n" //load alignment vector for refs >+ "add r7,r7,r6\n" > >- lvx v1,r4,r8 //need another 16 bytes for misaligned data -- 1 >- add r8,r8,r6 >+ "lvx v1,r4,r8\n" //need another 16 bytes for misaligned data -- 1 >+ "add r8,r8,r6\n" > >- lvx v12,r4,r7 //get 8 refs >- vperm v11,v11,v1,v8 >+ "lvx v12,r4,r7\n" //get 8 refs >+ "vperm v11,v11,v1,v8\n" > >- lvsl v8,r4,r7 //load alignment vector for refs >- add r7,r7,r6 >+ "lvsl v8,r4,r7\n" //load alignment vector for refs >+ "add r7,r7,r6\n" > >- lvx v2,r4,r8 //need another 16 bytes for misaligned data -- 2 >- add r8,r8,r6 >+ "lvx v2,r4,r8\n" //need another 16 bytes for misaligned data -- 2 >+ "add r8,r8,r6\n" > >- lvx v13,r4,r7 //get 8 refs >- vperm v12,v12,v2,v8 >+ "lvx v13,r4,r7\n" //get 8 refs >+ "vperm v12,v12,v2,v8\n" > >- lvsl v8,r4,r7 //load alignment vector for refs >- add r7,r7,r6 >+ "lvsl v8,r4,r7\n" //load alignment vector for refs >+ "add r7,r7,r6\n" > >- lvx v3,r4,r8 //need another 16 bytes for misaligned data -- 3 >- add r8,r8,r6 >+ "lvx v3,r4,r8\n" //need another 16 bytes for misaligned data -- 3 >+ "add r8,r8,r6\n" > >- lvx v14,r4,r7 //get 8 refs >- vperm v13,v13,v3,v8 >+ "lvx v14,r4,r7\n" //get 8 refs >+ "vperm v13,v13,v3,v8\n" > >- lvsl v8,r4,r7 //load alignment vector for refs >- add r7,r7,r6 >+ "lvsl v8,r4,r7\n" //load alignment vector for refs >+ "add r7,r7,r6\n" > >- lvx v4,r4,r8 //need another 16 bytes for misaligned data -- 4 >- add r8,r8,r6 >+ "lvx v4,r4,r8\n" //need another 16 bytes for misaligned data -- 4 >+ "add r8,r8,r6\n" > >- lvx v15,r4,r7 //get 8 refs >- vperm v14,v14,v4,v8 >+ "lvx v15,r4,r7\n" //get 8 refs >+ "vperm v14,v14,v4,v8\n" > >- lvsl v8,r4,r7 //load alignment vector for refs >- add r7,r7,r6 >+ "lvsl v8,r4,r7\n" //load alignment vector for refs >+ "add r7,r7,r6\n" > >- lvx v5,r4,r8 //need another 16 bytes for misaligned data -- 5 >- add r8,r8,r6 >+ "lvx v5,r4,r8\n" //need another 16 bytes for misaligned data -- 5 >+ "add r8,r8,r6\n" > >- lvx v16,r4,r7 //get 8 refs >- vperm v15,v15,v5,v8 >+ "lvx v16,r4,r7\n" //get 8 refs >+ "vperm v15,v15,v5,v8\n" > >- lvsl v8,r4,r7 //load alignment vector for refs >- add r7,r7,r6 >+ "lvsl v8,r4,r7\n" //load alignment vector for refs >+ "add r7,r7,r6\n" > >- lvx v6,r4,r8 //need another 16 bytes for misaligned data -- 6 >- add r8,r8,r6 >+ "lvx v6,r4,r8\n" //need another 16 bytes for misaligned data -- 6 >+ "add r8,r8,r6\n" > >- lvx v17,r4,r7 //get 8 refs >- vperm v16,v16,v6,v8 >+ "lvx v17,r4,r7\n" //get 8 refs >+ "vperm v16,v16,v6,v8\n" > >- lvsl v8,r4,r7 //load alignment vector for refs >- xor r7,r7,r7 >+ "lvsl v8,r4,r7\n" //load alignment vector for refs >+ "xor r7,r7,r7\n" > >- lvx v7,r4,r8 //need another 16 bytes for misaligned data -- 7 >- add r8,r8,r6 >+ "lvx v7,r4,r8\n" //need another 16 bytes for misaligned data -- 7 >+ "add r8,r8,r6\n" > >- lvx v0,r5,r7 //get 8 shorts >- vperm v17,v17,v7,v8 >- addi r7,r7,16 >+ "lvx v0,r5,r7\n" //get 8 shorts >+ "vperm v17,v17,v7,v8\n" >+ "addi r7,r7,16\n" > >- lvx v1,r5,r7 //get 8 shorts >- vmrghb v10,v9,v10 //unsigned byte -> unsigned half >- addi r7,r7,16 >+ "lvx v1,r5,r7\n" //get 8 shorts >+ "vmrghb v10,v9,v10\n" //unsigned byte -> unsigned half >+ "addi r7,r7,16\n" > >- lvx v2,r5,r7 //get 8 shorts >- vmrghb v11,v9,v11 //unsigned byte -> unsigned half >- vaddshs v0,v0,v10 >- addi r7,r7,16 >+ "lvx v2,r5,r7\n" //get 8 shorts >+ "vmrghb v11,v9,v11\n" //unsigned byte -> unsigned half >+ "vaddshs v0,v0,v10\n" >+ "addi r7,r7,16\n" > >- lvx v3,r5,r7 //get 8 shorts >- vmrghb v12,v9,v12 //unsigned byte -> unsigned half >- vaddshs v1,v1,v11 >- addi r7,r7,16 >+ "lvx v3,r5,r7\n" //get 8 shorts >+ "vmrghb v12,v9,v12\n" //unsigned byte -> unsigned half >+ "vaddshs v1,v1,v11\n" >+ "addi r7,r7,16\n" > >- lvx v4,r5,r7 //get 8 shorts >- vmrghb v13,v9,v13 //unsigned byte -> unsigned half >- vaddshs v2,v2,v12 >- addi r7,r7,16 >+ "lvx v4,r5,r7\n" //get 8 shorts >+ "vmrghb v13,v9,v13\n" //unsigned byte -> unsigned half >+ "vaddshs v2,v2,v12\n" >+ "addi r7,r7,16\n" > >- lvx v5,r5,r7 //get 8 shorts >- vmrghb v14,v9,v14 //unsigned byte -> unsigned half >- vaddshs v3,v3,v13 >- addi r7,r7,16 >+ "lvx v5,r5,r7\n" //get 8 shorts >+ "vmrghb v14,v9,v14\n" //unsigned byte -> unsigned half >+ "vaddshs v3,v3,v13\n" >+ "addi r7,r7,16\n" > >- lvx v6,r5,r7 //get 8 shorts >- vmrghb v15,v9,v15 //unsigned byte -> unsigned half >- vaddshs v4,v4,v14 >- addi r7,r7,16 >+ "lvx v6,r5,r7\n" //get 8 shorts >+ "vmrghb v15,v9,v15\n" //unsigned byte -> unsigned half >+ "vaddshs v4,v4,v14\n" >+ "addi r7,r7,16\n" > >- lvx v7,r5,r7 //get 8 shorts >- vmrghb v16,v9,v16 //unsigned byte -> unsigned half >- vaddshs v5,v5,v15 >+ "lvx v7,r5,r7\n" //get 8 shorts >+ "vmrghb v16,v9,v16\n" //unsigned byte -> unsigned half >+ "vaddshs v5,v5,v15\n" > >- vmrghb v17,v9,v17 //unsigned byte -> unsigned half >- vaddshs v6,v6,v16 >+ "vmrghb v17,v9,v17\n" //unsigned byte -> unsigned half >+ "vaddshs v6,v6,v16\n" > >- vpkshus v0,v0,v0 >- vaddshs v7,v7,v17 >+ "vpkshus v0,v0,v0\n" >+ "vaddshs v7,v7,v17\n" > >- vpkshus v1,v1,v1 >- xor r7,r7,r7 >+ "vpkshus v1,v1,v1\n" >+ "xor r7,r7,r7\n" > >- vpkshus v2,v2,v2 >+ "vpkshus v2,v2,v2\n" > >- vpkshus v3,v3,v3 >+ "vpkshus v3,v3,v3\n" > >- vpkshus v4,v4,v4 >+ "vpkshus v4,v4,v4\n" > >- vpkshus v5,v5,v5 >+ "vpkshus v5,v5,v5\n" > >- vpkshus v6,v6,v6 >+ "vpkshus v6,v6,v6\n" > >- lvsr v9,r3,r7 //load alignment vector for stores >- vpkshus v7,v7,v7 >+ "lvsr v9,r3,r7\n" //load alignment vector for stores >+ "vpkshus v7,v7,v7\n" > >- li r8,4 >- vperm v0,v0,v0,v9 //adjust for writes >+ "li r8,4\n" >+ "vperm v0,v0,v0,v9\n" //adjust for writes > >- stvewx v0,r3,r7 >- add r7,r7,r6 >+ "stvewx v0,r3,r7\n" >+ "add r7,r7,r6\n" > >- lvsr v9,r3,r7 //load alignment vector for stores >+ "lvsr v9,r3,r7\n" //load alignment vector for stores > >- stvewx v0,r3,r8 >- add r8,r8,r6 >- vperm v1,v1,v1,v9 >+ "stvewx v0,r3,r8\n" >+ "add r8,r8,r6\n" >+ "vperm v1,v1,v1,v9\n" > >- stvewx v1,r3,r7 >- add r7,r7,r6 >+ "stvewx v1,r3,r7\n" >+ "add r7,r7,r6\n" > >- lvsr v9,r3,r7 //load alignment vector for stores >+ "lvsr v9,r3,r7\n" //load alignment vector for stores > >- stvewx v1,r3,r8 >- add r8,r8,r6 >- vperm v2,v2,v2,v9 >+ "stvewx v1,r3,r8\n" >+ "add r8,r8,r6\n" >+ "vperm v2,v2,v2,v9\n" > >- stvewx v2,r3,r7 >- add r7,r7,r6 >+ "stvewx v2,r3,r7\n" >+ "add r7,r7,r6\n" > >- lvsr v9,r3,r7 //load alignment vector for stores >+ "lvsr v9,r3,r7\n" //load alignment vector for stores > >- stvewx v2,r3,r8 >- add r8,r8,r6 >- vperm v3,v3,v3,v9 >+ "stvewx v2,r3,r8\n" >+ "add r8,r8,r6\n" >+ "vperm v3,v3,v3,v9\n" > >- stvewx v3,r3,r7 >- add r7,r7,r6 >+ "stvewx v3,r3,r7\n" >+ "add r7,r7,r6\n" > >- lvsr v9,r3,r7 //load alignment vector for stores >+ "lvsr v9,r3,r7\n" //load alignment vector for stores > >- stvewx v3,r3,r8 >- add r8,r8,r6 >- vperm v4,v4,v4,v9 >+ "stvewx v3,r3,r8\n" >+ "add r8,r8,r6\n" >+ "vperm v4,v4,v4,v9\n" > >- stvewx v4,r3,r7 >- add r7,r7,r6 >+ "stvewx v4,r3,r7\n" >+ "add r7,r7,r6\n" > >- lvsr v9,r3,r7 //load alignment vector for stores >+ "lvsr v9,r3,r7\n" //load alignment vector for stores > >- stvewx v4,r3,r8 >- add r8,r8,r6 >- vperm v5,v5,v5,v9 >+ "stvewx v4,r3,r8\n" >+ "add r8,r8,r6\n" >+ "vperm v5,v5,v5,v9\n" > >- stvewx v5,r3,r7 >- add r7,r7,r6 >+ "stvewx v5,r3,r7\n" >+ "add r7,r7,r6\n" > >- lvsr v9,r3,r7 //load alignment vector for stores >+ "lvsr v9,r3,r7\n" //load alignment vector for stores > >- stvewx v5,r3,r8 >- add r8,r8,r6 >- vperm v6,v6,v6,v9 >+ "stvewx v5,r3,r8\n" >+ "add r8,r8,r6\n" >+ "vperm v6,v6,v6,v9\n" > >- stvewx v6,r3,r7 >- add r7,r7,r6 >+ "stvewx v6,r3,r7\n" >+ "add r7,r7,r6\n" > >- lvsr v9,r3,r7 //load alignment vector for stores >+ "lvsr v9,r3,r7\n" //load alignment vector for stores > >- stvewx v6,r3,r8 >- add r8,r8,r6 >- vperm v7,v7,v7,v9 >+ "stvewx v6,r3,r8\n" >+ "add r8,r8,r6\n" >+ "vperm v7,v7,v7,v9\n" > >- stvewx v7,r3,r7 >+ "stvewx v7,r3,r7\n" > >- stvewx v7,r3,r8 >- } >+ "stvewx v7,r3,r8\n" >+ ); > } > > OIL_DEFINE_IMPL_FULL (recon8x8_inter_altivec, recon8x8_inter, OIL_IMPL_FLAG_ALTIVEC); >@@ -396,321 +395,320 @@ > static void /* r3, r4, r5, r6, r7 */ > recon8x8_inter2_altivec (uint8_t *dest, uint8_t *s1, uint8_t *s2, int16_t *change, int dsss) > { >- asm >- { >+ asm( > //trying cache hints >- lis r8,0x0108 >- or r8,r8,r7 >- dstst r3,r8,0 >+ "lis r8,0x0108\n" >+ "or r8,r8,r7\n" >+ "dstst r3,r8,0\n" > >- xor r8,r8,r8 >- li r9,16 >+ "xor r8,r8,r8\n" >+ "li r9,16\n" > >- lvsl v8,r4,r8 //load alignment vector for RefPtr1 >- vxor v9,v9,v9 >+ "lvsl v8,r4,r8\n" //load alignment vector for RefPtr1 >+ "vxor v9,v9,v9\n" > >- lvx v10,r4,r8 //get 8 RefPtr1 -- 0 >- add r8,r8,r7 >+ "lvx v10,r4,r8\n" //get 8 RefPtr1 -- 0 >+ "add r8,r8,r7\n" > >- lvx v0,r4,r9 //need another 16 bytes for misaligned data -- 0 >- add r9,r9,r7 >+ "lvx v0,r4,r9\n" //need another 16 bytes for misaligned data -- 0 >+ "add r9,r9,r7\n" > >- lvx v11,r4,r8 //get 8 RefPtr1 -- 1 >- vperm v10,v10,v0,v8 >+ "lvx v11,r4,r8\n" //get 8 RefPtr1 -- 1 >+ "vperm v10,v10,v0,v8\n" > >- lvsl v8,r4,r8 //load alignment vector for RefPtr1 >- add r8,r8,r7 >+ "lvsl v8,r4,r8\n" //load alignment vector for RefPtr1 >+ "add r8,r8,r7\n" > >- lvx v1,r4,r9 //need another 16 bytes for misaligned data -- 1 >- vmrghb v10,v9,v10 //unsigned byte -> unsigned half >- add r9,r9,r7 >+ "lvx v1,r4,r9\n" //need another 16 bytes for misaligned data -- 1 >+ "vmrghb v10,v9,v10\n" //unsigned byte -> unsigned half >+ "add r9,r9,r7\n" > >- lvx v12,r4,r8 //get 8 RefPtr1 -- 2 >- vperm v11,v11,v1,v8 >+ "lvx v12,r4,r8\n" //get 8 RefPtr1 -- 2 >+ "vperm v11,v11,v1,v8\n" > >- lvsl v8,r4,r8 //load alignment vector for RefPtr1 >- add r8,r8,r7 >+ "lvsl v8,r4,r8\n" //load alignment vector for RefPtr1 >+ "add r8,r8,r7\n" > >- lvx v2,r4,r9 //need another 16 bytes for misaligned data -- 2 >- vmrghb v11,v9,v11 //unsigned byte -> unsigned half >- add r9,r9,r7 >+ "lvx v2,r4,r9\n" //need another 16 bytes for misaligned data -- 2 >+ "vmrghb v11,v9,v11\n" //unsigned byte -> unsigned half >+ "add r9,r9,r7\n" > >- lvx v13,r4,r8 //get 8 RefPtr1 -- 3 >- vperm v12,v12,v2,v8 >+ "lvx v13,r4,r8\n" //get 8 RefPtr1 -- 3 >+ "vperm v12,v12,v2,v8\n" > >- lvsl v8,r4,r8 //load alignment vector for RefPtr1 >- add r8,r8,r7 >+ "lvsl v8,r4,r8\n" //load alignment vector for RefPtr1 >+ "add r8,r8,r7\n" > >- lvx v3,r4,r9 //need another 16 bytes for misaligned data -- 3 >- vmrghb v12,v9,v12 //unsigned byte -> unsigned half >- add r9,r9,r7 >+ "lvx v3,r4,r9\n" //need another 16 bytes for misaligned data -- 3 >+ "vmrghb v12,v9,v12\n" //unsigned byte -> unsigned half >+ "add r9,r9,r7\n" > >- lvx v14,r4,r8 //get 8 RefPtr1 -- 4 >- vperm v13,v13,v3,v8 >+ "lvx v14,r4,r8\n" //get 8 RefPtr1 -- 4 >+ "vperm v13,v13,v3,v8\n" > >- lvsl v8,r4,r8 //load alignment vector for RefPtr1 >- add r8,r8,r7 >+ "lvsl v8,r4,r8\n" //load alignment vector for RefPtr1 >+ "add r8,r8,r7\n" > >- lvx v4,r4,r9 //need another 16 bytes for misaligned data -- 4 >- vmrghb v13,v9,v13 //unsigned byte -> unsigned half >- add r9,r9,r7 >+ "lvx v4,r4,r9\n" //need another 16 bytes for misaligned data -- 4 >+ "vmrghb v13,v9,v13\n" //unsigned byte -> unsigned half >+ "add r9,r9,r7\n" > >- lvx v15,r4,r8 //get 8 RefPtr1 -- 5 >- vperm v14,v14,v4,v8 >+ "lvx v15,r4,r8\n" //get 8 RefPtr1 -- 5 >+ "vperm v14,v14,v4,v8\n" > >- lvsl v8,r4,r8 //load alignment vector for RefPtr1 >- add r8,r8,r7 >+ "lvsl v8,r4,r8\n" //load alignment vector for RefPtr1 >+ "add r8,r8,r7\n" > >- lvx v5,r4,r9 //need another 16 bytes for misaligned data -- 5 >- vmrghb v14,v9,v14 //unsigned byte -> unsigned half >- add r9,r9,r7 >+ "lvx v5,r4,r9\n" //need another 16 bytes for misaligned data -- 5 >+ "vmrghb v14,v9,v14\n" //unsigned byte -> unsigned half >+ "add r9,r9,r7\n" > >- lvx v16,r4,r8 //get 8 RefPtr1 -- 6 >- vperm v15,v15,v5,v8 >+ "lvx v16,r4,r8\n" //get 8 RefPtr1 -- 6 >+ "vperm v15,v15,v5,v8\n" > >- lvsl v8,r4,r8 //load alignment vector for RefPtr1 >- add r8,r8,r7 >+ "lvsl v8,r4,r8\n" //load alignment vector for RefPtr1 >+ "add r8,r8,r7\n" > >- lvx v6,r4,r9 //need another 16 bytes for misaligned data -- 6 >- vmrghb v15,v9,v15 //unsigned byte -> unsigned half >- add r9,r9,r7 >+ "lvx v6,r4,r9\n" //need another 16 bytes for misaligned data -- 6 >+ "vmrghb v15,v9,v15\n" //unsigned byte -> unsigned half >+ "add r9,r9,r7\n" > >- lvx v17,r4,r8 //get 8 RefPtr1 -- 7 >- vperm v16,v16,v6,v8 >+ "lvx v17,r4,r8\n" //get 8 RefPtr1 -- 7 >+ "vperm v16,v16,v6,v8\n" > >- lvsl v8,r4,r8 //load alignment vector for RefPtr1 >- add r8,r8,r7 >+ "lvsl v8,r4,r8\n" //load alignment vector for RefPtr1 >+ "add r8,r8,r7\n" > >- lvx v7,r4,r9 //need another 16 bytes for misaligned data -- 7 >- vmrghb v16,v9,v16 //unsigned byte -> unsigned half >- add r9,r9,r7 >+ "lvx v7,r4,r9\n" //need another 16 bytes for misaligned data -- 7 >+ "vmrghb v16,v9,v16\n" //unsigned byte -> unsigned half >+ "add r9,r9,r7\n" > //-------- >- vperm v17,v17,v7,v8 >- xor r8,r8,r8 >- li r9,16 >+ "vperm v17,v17,v7,v8\n" >+ "xor r8,r8,r8\n" >+ "li r9,16\n" > >- lvsl v18,r5,r8 //load alignment vector for RefPtr2 >- vmrghb v17,v9,v17 //unsigned byte -> unsigned half >+ "lvsl v18,r5,r8\n" //load alignment vector for RefPtr2 >+ "vmrghb v17,v9,v17\n" //unsigned byte -> unsigned half > >- lvx v20,r5,r8 //get 8 RefPtr2 -- 0 >- add r8,r8,r7 >+ "lvx v20,r5,r8\n" //get 8 RefPtr2 -- 0 >+ "add r8,r8,r7\n" > >- lvx v0,r5,r9 //need another 16 bytes for misaligned data -- 0 >- add r9,r9,r7 >+ "lvx v0,r5,r9\n" //need another 16 bytes for misaligned data -- 0 >+ "add r9,r9,r7\n" > >- lvx v21,r5,r8 //get 8 RefPtr2 -- 1 >- vperm v20,v20,v0,v18 >+ "lvx v21,r5,r8\n" //get 8 RefPtr2 -- 1 >+ "vperm v20,v20,v0,v18\n" > >- lvsl v18,r5,r8 //load alignment vector for RefPtr2 >- add r8,r8,r7 >+ "lvsl v18,r5,r8\n" //load alignment vector for RefPtr2 >+ "add r8,r8,r7\n" > >- lvx v1,r5,r9 //need another 16 bytes for misaligned data -- 1 >- vmrghb v20,v9,v20 //unsigned byte -> unsigned half >- add r9,r9,r7 >+ "lvx v1,r5,r9\n" //need another 16 bytes for misaligned data -- 1 >+ "vmrghb v20,v9,v20\n" //unsigned byte -> unsigned half >+ "add r9,r9,r7\n" > >- lvx v22,r5,r8 //get 8 RefPtr2 -- 2 >- vperm v21,v21,v1,v18 >+ "lvx v22,r5,r8\n" //get 8 RefPtr2 -- 2 >+ "vperm v21,v21,v1,v18\n" > >- lvsl v18,r5,r8 //load alignment vector for RefPtr2 >- add r8,r8,r7 >+ "lvsl v18,r5,r8\n" //load alignment vector for RefPtr2 >+ "add r8,r8,r7\n" > >- lvx v2,r5,r9 //need another 16 bytes for misaligned data -- 2 >- vmrghb v21,v9,v21 //unsigned byte -> unsigned half >- vadduhm v10,v10,v20 >- add r9,r9,r7 >+ "lvx v2,r5,r9\n" //need another 16 bytes for misaligned data -- 2 >+ "vmrghb v21,v9,v21\n" //unsigned byte -> unsigned half >+ "vadduhm v10,v10,v20\n" >+ "add r9,r9,r7\n" > >- lvx v23,r5,r8 //get 8 RefPtr2 -- 3 >- vperm v22,v22,v2,v18 >+ "lvx v23,r5,r8\n" //get 8 RefPtr2 -- 3 >+ "vperm v22,v22,v2,v18\n" > >- lvsl v18,r5,r8 //load alignment vector for RefPtr2 >- add r8,r8,r7 >+ "lvsl v18,r5,r8\n" //load alignment vector for RefPtr2 >+ "add r8,r8,r7\n" > >- lvx v3,r5,r9 //need another 16 bytes for misaligned data -- 3 >- vmrghb v22,v9,v22 //unsigned byte -> unsigned half >- vadduhm v11,v11,v21 >- add r9,r9,r7 >+ "lvx v3,r5,r9\n" //need another 16 bytes for misaligned data -- 3 >+ "vmrghb v22,v9,v22\n" //unsigned byte -> unsigned half >+ "vadduhm v11,v11,v21\n" >+ "add r9,r9,r7\n" > >- lvx v24,r5,r8 //get 8 RefPtr2 -- 4 >- vperm v23,v23,v3,v18 >+ "lvx v24,r5,r8\n" //get 8 RefPtr2 -- 4 >+ "vperm v23,v23,v3,v18\n" > >- lvsl v18,r5,r8 //load alignment vector for RefPtr2 >- add r8,r8,r7 >+ "lvsl v18,r5,r8\n" //load alignment vector for RefPtr2 >+ "add r8,r8,r7\n" > >- lvx v4,r5,r9 //need another 16 bytes for misaligned data -- 4 >- vmrghb v23,v9,v23 //unsigned byte -> unsigned half >- vadduhm v12,v12,v22 >- add r9,r9,r7 >+ "lvx v4,r5,r9\n" //need another 16 bytes for misaligned data -- 4 >+ "vmrghb v23,v9,v23\n" //unsigned byte -> unsigned half >+ "vadduhm v12,v12,v22\n" >+ "add r9,r9,r7\n" > >- lvx v25,r5,r8 //get 8 RefPtr2 -- 5 >- vperm v24,v24,v4,v18 >+ "lvx v25,r5,r8\n" //get 8 RefPtr2 -- 5 >+ "vperm v24,v24,v4,v18\n" > >- lvsl v18,r5,r8 //load alignment vector for RefPtr2 >- add r8,r8,r7 >+ "lvsl v18,r5,r8\n" //load alignment vector for RefPtr2 >+ "add r8,r8,r7\n" > >- lvx v5,r5,r9 //need another 16 bytes for misaligned data -- 5 >- vmrghb v24,v9,v24 //unsigned byte -> unsigned half >- vadduhm v13,v13,v23 >- add r9,r9,r7 >+ "lvx v5,r5,r9\n" //need another 16 bytes for misaligned data -- 5 >+ "vmrghb v24,v9,v24\n" //unsigned byte -> unsigned half >+ "vadduhm v13,v13,v23\n" >+ "add r9,r9,r7\n" > >- lvx v26,r5,r8 //get 8 RefPtr2 -- 6 >- vperm v25,v25,v5,v18 >+ "lvx v26,r5,r8\n" //get 8 RefPtr2 -- 6 >+ "vperm v25,v25,v5,v18\n" > >- lvsl v18,r5,r8 //load alignment vector for RefPtr2 >- add r8,r8,r7 >+ "lvsl v18,r5,r8\n" //load alignment vector for RefPtr2 >+ "add r8,r8,r7\n" > >- lvx v6,r5,r9 //need another 16 bytes for misaligned data -- 6 >- vmrghb v25,v9,v25 //unsigned byte -> unsigned half >- vadduhm v14,v14,v24 >- add r9,r9,r7 >+ "lvx v6,r5,r9\n" //need another 16 bytes for misaligned data -- 6 >+ "vmrghb v25,v9,v25\n" //unsigned byte -> unsigned half >+ "vadduhm v14,v14,v24\n" >+ "add r9,r9,r7\n" > >- lvx v27,r5,r8 //get 8 RefPtr2 -- 7 >- vperm v26,v26,v6,v18 >+ "lvx v27,r5,r8\n" //get 8 RefPtr2 -- 7 >+ "vperm v26,v26,v6,v18\n" > >- lvsl v18,r5,r8 //load alignment vector for RefPtr2 >- add r8,r8,r7 >+ "lvsl v18,r5,r8\n" //load alignment vector for RefPtr2 >+ "add r8,r8,r7\n" > >- lvx v7,r5,r9 //need another 16 bytes for misaligned data -- 7 >- vmrghb v26,v9,v26 //unsigned byte -> unsigned half >- vadduhm v15,v15,v25 >- add r9,r9,r7 >+ "lvx v7,r5,r9\n" //need another 16 bytes for misaligned data -- 7 >+ "vmrghb v26,v9,v26\n" //unsigned byte -> unsigned half >+ "vadduhm v15,v15,v25\n" >+ "add r9,r9,r7\n" > >- vperm v27,v27,v7,v18 >- xor r8,r8,r8 >+ "vperm v27,v27,v7,v18\n" >+ "xor r8,r8,r8\n" > >- vmrghb v27,v9,v27 //unsigned byte -> unsigned half >- vadduhm v16,v16,v26 >+ "vmrghb v27,v9,v27\n" //unsigned byte -> unsigned half >+ "vadduhm v16,v16,v26\n" > >- vadduhm v17,v17,v27 >- vspltish v8,1 >+ "vadduhm v17,v17,v27\n" >+ "vspltish v8,1\n" > //-------- >- lvx v0,r6,r8 //get 8 shorts >- vsrh v10,v10,v8 >- addi r8,r8,16 >- >- lvx v1,r6,r8 //get 8 shorts >- vsrh v11,v11,v8 >- addi r8,r8,16 >- >- lvx v2,r6,r8 //get 8 shorts >- vsrh v12,v12,v8 >- addi r8,r8,16 >- >- lvx v3,r6,r8 //get 8 shorts >- vsrh v13,v13,v8 >- addi r8,r8,16 >- >- lvx v4,r6,r8 //get 8 shorts >- vsrh v14,v14,v8 >- addi r8,r8,16 >- >- lvx v5,r6,r8 //get 8 shorts >- vsrh v15,v15,v8 >- addi r8,r8,16 >- >- lvx v6,r6,r8 //get 8 shorts >- vsrh v16,v16,v8 >- addi r8,r8,16 >- >- lvx v7,r6,r8 //get 8 shorts >- vsrh v17,v17,v8 >- xor r8,r8,r8 >+ "lvx v0,r6,r8\n" //get 8 shorts >+ "vsrh v10,v10,v8\n" >+ "addi r8,r8,16\n" >+ >+ "lvx v1,r6,r8\n" //get 8 shorts >+ "vsrh v11,v11,v8\n" >+ "addi r8,r8,16\n" >+ >+ "lvx v2,r6,r8\n" //get 8 shorts >+ "vsrh v12,v12,v8\n" >+ "addi r8,r8,16\n" >+ >+ "lvx v3,r6,r8\n" //get 8 shorts >+ "vsrh v13,v13,v8\n" >+ "addi r8,r8,16\n" >+ >+ "lvx v4,r6,r8\n" //get 8 shorts >+ "vsrh v14,v14,v8\n" >+ "addi r8,r8,16\n" >+ >+ "lvx v5,r6,r8\n" //get 8 shorts >+ "vsrh v15,v15,v8\n" >+ "addi r8,r8,16\n" >+ >+ "lvx v6,r6,r8\n" //get 8 shorts >+ "vsrh v16,v16,v8\n" >+ "addi r8,r8,16\n" >+ >+ "lvx v7,r6,r8\n" //get 8 shorts >+ "vsrh v17,v17,v8\n" >+ "xor r8,r8,r8\n" > //-------- >- lvsr v9,r3,r8 //load alignment vector for stores >- vaddshs v0,v0,v10 >+ "lvsr v9,r3,r8\n" //load alignment vector for stores >+ "vaddshs v0,v0,v10\n" > >- vaddshs v1,v1,v11 >- vpkshus v0,v0,v0 >+ "vaddshs v1,v1,v11\n" >+ "vpkshus v0,v0,v0\n" > >- vaddshs v2,v2,v12 >- vpkshus v1,v1,v1 >+ "vaddshs v2,v2,v12\n" >+ "vpkshus v1,v1,v1\n" > >- vaddshs v3,v3,v13 >- vpkshus v2,v2,v2 >+ "vaddshs v3,v3,v13\n" >+ "vpkshus v2,v2,v2\n" > >- vaddshs v4,v4,v14 >- vpkshus v3,v3,v3 >+ "vaddshs v4,v4,v14\n" >+ "vpkshus v3,v3,v3\n" > >- vaddshs v5,v5,v15 >- vpkshus v4,v4,v4 >+ "vaddshs v5,v5,v15\n" >+ "vpkshus v4,v4,v4\n" > >- vaddshs v6,v6,v16 >- vpkshus v5,v5,v5 >+ "vaddshs v6,v6,v16\n" >+ "vpkshus v5,v5,v5\n" > >- vaddshs v7,v7,v17 >- vpkshus v6,v6,v6 >+ "vaddshs v7,v7,v17\n" >+ "vpkshus v6,v6,v6\n" > >- vpkshus v7,v7,v7 >+ "vpkshus v7,v7,v7\n" > >- li r9,4 >- vperm v0,v0,v0,v9 //adjust for writes >+ "li r9,4\n" >+ "vperm v0,v0,v0,v9\n" //adjust for writes > >- stvewx v0,r3,r8 >- add r8,r8,r7 >+ "stvewx v0,r3,r8\n" >+ "add r8,r8,r7\n" > >- lvsr v9,r3,r8 //load alignment vector for stores >+ "lvsr v9,r3,r8\n" //load alignment vector for stores > >- stvewx v0,r3,r9 >- add r9,r9,r7 >- vperm v1,v1,v1,v9 >+ "stvewx v0,r3,r9\n" >+ "add r9,r9,r7\n" >+ "vperm v1,v1,v1,v9\n" > >- stvewx v1,r3,r8 >- add r8,r8,r7 >+ "stvewx v1,r3,r8\n" >+ "add r8,r8,r7\n" > >- lvsr v9,r3,r8 //load alignment vector for stores >+ "lvsr v9,r3,r8\n" //load alignment vector for stores > >- stvewx v1,r3,r9 >- add r9,r9,r7 >- vperm v2,v2,v2,v9 >+ "stvewx v1,r3,r9\n" >+ "add r9,r9,r7\n" >+ "vperm v2,v2,v2,v9\n" > >- stvewx v2,r3,r8 >- add r8,r8,r7 >+ "stvewx v2,r3,r8\n" >+ "add r8,r8,r7\n" > >- lvsr v9,r3,r8 //load alignment vector for stores >+ "lvsr v9,r3,r8\n" //load alignment vector for stores > >- stvewx v2,r3,r9 >- add r9,r9,r7 >- vperm v3,v3,v3,v9 >+ "stvewx v2,r3,r9\n" >+ "add r9,r9,r7\n" >+ "vperm v3,v3,v3,v9\n" > >- stvewx v3,r3,r8 >- add r8,r8,r7 >+ "stvewx v3,r3,r8\n" >+ "add r8,r8,r7\n" > >- lvsr v9,r3,r8 //load alignment vector for stores >+ "lvsr v9,r3,r8\n" //load alignment vector for stores > >- stvewx v3,r3,r9 >- add r9,r9,r7 >- vperm v4,v4,v4,v9 >+ "stvewx v3,r3,r9\n" >+ "add r9,r9,r7\n" >+ "vperm v4,v4,v4,v9\n" > >- stvewx v4,r3,r8 >- add r8,r8,r7 >+ "stvewx v4,r3,r8\n" >+ "add r8,r8,r7\n" > >- lvsr v9,r3,r8 //load alignment vector for stores >+ "lvsr v9,r3,r8\n" //load alignment vector for stores > >- stvewx v4,r3,r9 >- add r9,r9,r7 >- vperm v5,v5,v5,v9 >+ "stvewx v4,r3,r9\n" >+ "add r9,r9,r7\n" >+ "vperm v5,v5,v5,v9\n" > >- stvewx v5,r3,r8 >- add r8,r8,r7 >+ "stvewx v5,r3,r8\n" >+ "add r8,r8,r7\n" > >- lvsr v9,r3,r8 //load alignment vector for stores >+ "lvsr v9,r3,r8\n" //load alignment vector for stores > >- stvewx v5,r3,r9 >- add r9,r9,r7 >- vperm v6,v6,v6,v9 >+ "stvewx v5,r3,r9\n" >+ "add r9,r9,r7\n" >+ "vperm v6,v6,v6,v9\n" > >- stvewx v6,r3,r8 >- add r8,r8,r7 >+ "stvewx v6,r3,r8\n" >+ "add r8,r8,r7\n" > >- lvsr v9,r3,r8 //load alignment vector for stores >+ "lvsr v9,r3,r8\n" //load alignment vector for stores > >- stvewx v6,r3,r9 >- add r9,r9,r7 >- vperm v7,v7,v7,v9 >+ "stvewx v6,r3,r9\n" >+ "add r9,r9,r7\n" >+ "vperm v7,v7,v7,v9\n" > >- stvewx v7,r3,r8 >+ "stvewx v7,r3,r8\n" > >- stvewx v7,r3,r9 >- } >+ "stvewx v7,r3,r9\n" >+ ); > } > > OIL_DEFINE_IMPL_FULL (recon8x8_inter2_altivec, recon8x8_inter2, OIL_IMPL_FLAG_ALTIVEC);
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 109270
: 71054 |
71085