Go to:
Gentoo Home
Documentation
Forums
Lists
Bugs
Planet
Store
Wiki
Get Gentoo!
Gentoo's Bugzilla – Attachment 190738 Details for
Bug 269103
[patch] media-video/mplayer-1.0_rc2_p28450. Can't compile with pie enabled gcc (hardened)
Home
|
New
–
[Ex]
|
Browse
|
Search
|
Privacy Policy
|
[?]
|
Reports
|
Requests
|
Help
|
New Account
|
Log In
[x]
|
Forgot Password
Login:
[x]
works for me
mplayer-1.0_rc2_p28450-liba52-pie.patch (text/plain), 30.40 KB, created by
Ralph Sennhauser (RETIRED)
on 2009-05-09 09:12:45 UTC
(
hide
)
Description:
works for me
Filename:
MIME Type:
Creator:
Ralph Sennhauser (RETIRED)
Created:
2009-05-09 09:12:45 UTC
Size:
30.40 KB
patch
obsolete
>--- liba52/imdct.c 2009-02-03 06:50:53.000000000 +0100 >+++ liba52/imdct.c 2009-05-08 15:19:21.000000000 +0200 >@@ -366,701 +366,8 @@ > } > } > >-#if HAVE_ALTIVEC >- >-#ifdef HAVE_ALTIVEC_H >-#include <altivec.h> >-#endif >- >-// used to build registers permutation vectors (vcprm) >-// the 's' are for words in the _s_econd vector >-#define WORD_0 0x00,0x01,0x02,0x03 >-#define WORD_1 0x04,0x05,0x06,0x07 >-#define WORD_2 0x08,0x09,0x0a,0x0b >-#define WORD_3 0x0c,0x0d,0x0e,0x0f >-#define WORD_s0 0x10,0x11,0x12,0x13 >-#define WORD_s1 0x14,0x15,0x16,0x17 >-#define WORD_s2 0x18,0x19,0x1a,0x1b >-#define WORD_s3 0x1c,0x1d,0x1e,0x1f >- >-#define vcprm(a,b,c,d) (const vector unsigned char){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d} >-#define vcii(a,b,c,d) (const vector float){FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d} >- >-#define FOUROF(a) {a,a,a,a} >- >-// vcprmle is used to keep the same index as in the SSE version. >-// it's the same as vcprm, with the index inversed >-// ('le' is Little Endian) >-#define vcprmle(a,b,c,d) vcprm(d,c,b,a) >- >-// used to build inverse/identity vectors (vcii) >-// n is _n_egative, p is _p_ositive >-#define FLOAT_n -1. >-#define FLOAT_p 1. >- >- >-void >-imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) >-{ >- int i; >- int k; >- int p,q; >- int m; >- long two_m; >- long two_m_plus_one; >- >- sample_t tmp_b_i; >- sample_t tmp_b_r; >- sample_t tmp_a_i; >- sample_t tmp_a_r; >- >- sample_t *data_ptr; >- sample_t *delay_ptr; >- sample_t *window_ptr; >- >- /* 512 IMDCT with source and dest data in 'data' */ >- >- /* Pre IFFT complex multiply plus IFFT cmplx conjugate & reordering*/ >- for( i=0; i < 128; i++) { >- /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */ >- int j= bit_reverse_512[i]; >- buf[i].real = (data[256-2*j-1] * xcos1[j]) - (data[2*j] * xsin1[j]); >- buf[i].imag = -1.0 * ((data[2*j] * xcos1[j]) + (data[256-2*j-1] * xsin1[j])); >- } >- >- /* 1. iteration */ >- for(i = 0; i < 128; i += 2) { >-#if 0 >- tmp_a_r = buf[i].real; >- tmp_a_i = buf[i].imag; >- tmp_b_r = buf[i+1].real; >- tmp_b_i = buf[i+1].imag; >- buf[i].real = tmp_a_r + tmp_b_r; >- buf[i].imag = tmp_a_i + tmp_b_i; >- buf[i+1].real = tmp_a_r - tmp_b_r; >- buf[i+1].imag = tmp_a_i - tmp_b_i; >-#else >- vector float temp, bufv; >- >- bufv = vec_ld(i << 3, (float*)buf); >- temp = vec_perm(bufv, bufv, vcprm(2,3,0,1)); >- bufv = vec_madd(bufv, vcii(p,p,n,n), temp); >- vec_st(bufv, i << 3, (float*)buf); >-#endif >- } >- >- /* 2. iteration */ >- // Note w[1]={{1,0}, {0,-1}} >- for(i = 0; i < 128; i += 4) { >-#if 0 >- tmp_a_r = buf[i].real; >- tmp_a_i = buf[i].imag; >- tmp_b_r = buf[i+2].real; >- tmp_b_i = buf[i+2].imag; >- buf[i].real = tmp_a_r + tmp_b_r; >- buf[i].imag = tmp_a_i + tmp_b_i; >- buf[i+2].real = tmp_a_r - tmp_b_r; >- buf[i+2].imag = tmp_a_i - tmp_b_i; >- tmp_a_r = buf[i+1].real; >- tmp_a_i = buf[i+1].imag; >- /* WARNING: im <-> re here ! */ >- tmp_b_r = buf[i+3].imag; >- tmp_b_i = buf[i+3].real; >- buf[i+1].real = tmp_a_r + tmp_b_r; >- buf[i+1].imag = tmp_a_i - tmp_b_i; >- buf[i+3].real = tmp_a_r - tmp_b_r; >- buf[i+3].imag = tmp_a_i + tmp_b_i; >-#else >- vector float buf01, buf23, temp1, temp2; >- >- buf01 = vec_ld((i + 0) << 3, (float*)buf); >- buf23 = vec_ld((i + 2) << 3, (float*)buf); >- buf23 = vec_perm(buf23,buf23,vcprm(0,1,3,2)); >- >- temp1 = vec_madd(buf23, vcii(p,p,p,n), buf01); >- temp2 = vec_madd(buf23, vcii(n,n,n,p), buf01); >- >- vec_st(temp1, (i + 0) << 3, (float*)buf); >- vec_st(temp2, (i + 2) << 3, (float*)buf); >-#endif >- } >- >- /* 3. iteration */ >- for(i = 0; i < 128; i += 8) { >-#if 0 >- tmp_a_r = buf[i].real; >- tmp_a_i = buf[i].imag; >- tmp_b_r = buf[i+4].real; >- tmp_b_i = buf[i+4].imag; >- buf[i].real = tmp_a_r + tmp_b_r; >- buf[i].imag = tmp_a_i + tmp_b_i; >- buf[i+4].real = tmp_a_r - tmp_b_r; >- buf[i+4].imag = tmp_a_i - tmp_b_i; >- tmp_a_r = buf[1+i].real; >- tmp_a_i = buf[1+i].imag; >- tmp_b_r = (buf[i+5].real + buf[i+5].imag) * w[2][1].real; >- tmp_b_i = (buf[i+5].imag - buf[i+5].real) * w[2][1].real; >- buf[1+i].real = tmp_a_r + tmp_b_r; >- buf[1+i].imag = tmp_a_i + tmp_b_i; >- buf[i+5].real = tmp_a_r - tmp_b_r; >- buf[i+5].imag = tmp_a_i - tmp_b_i; >- tmp_a_r = buf[i+2].real; >- tmp_a_i = buf[i+2].imag; >- /* WARNING re <-> im & sign */ >- tmp_b_r = buf[i+6].imag; >- tmp_b_i = - buf[i+6].real; >- buf[i+2].real = tmp_a_r + tmp_b_r; >- buf[i+2].imag = tmp_a_i + tmp_b_i; >- buf[i+6].real = tmp_a_r - tmp_b_r; >- buf[i+6].imag = tmp_a_i - tmp_b_i; >- tmp_a_r = buf[i+3].real; >- tmp_a_i = buf[i+3].imag; >- tmp_b_r = (buf[i+7].real - buf[i+7].imag) * w[2][3].imag; >- tmp_b_i = (buf[i+7].imag + buf[i+7].real) * w[2][3].imag; >- buf[i+3].real = tmp_a_r + tmp_b_r; >- buf[i+3].imag = tmp_a_i + tmp_b_i; >- buf[i+7].real = tmp_a_r - tmp_b_r; >- buf[i+7].imag = tmp_a_i - tmp_b_i; >-#else >- vector float buf01, buf23, buf45, buf67; >- >- buf01 = vec_ld((i + 0) << 3, (float*)buf); >- buf23 = vec_ld((i + 2) << 3, (float*)buf); >- >- tmp_b_r = (buf[i+5].real + buf[i+5].imag) * w[2][1].real; >- tmp_b_i = (buf[i+5].imag - buf[i+5].real) * w[2][1].real; >- buf[i+5].real = tmp_b_r; >- buf[i+5].imag = tmp_b_i; >- tmp_b_r = (buf[i+7].real - buf[i+7].imag) * w[2][3].imag; >- tmp_b_i = (buf[i+7].imag + buf[i+7].real) * w[2][3].imag; >- buf[i+7].real = tmp_b_r; >- buf[i+7].imag = tmp_b_i; >- >- buf23 = vec_ld((i + 2) << 3, (float*)buf); >- buf45 = vec_ld((i + 4) << 3, (float*)buf); >- buf67 = vec_ld((i + 6) << 3, (float*)buf); >- buf67 = vec_perm(buf67, buf67, vcprm(1,0,2,3)); >- >- vec_st(vec_add(buf01, buf45), (i + 0) << 3, (float*)buf); >- vec_st(vec_madd(buf67, vcii(p,n,p,p), buf23), (i + 2) << 3, (float*)buf); >- vec_st(vec_sub(buf01, buf45), (i + 4) << 3, (float*)buf); >- vec_st(vec_nmsub(buf67, vcii(p,n,p,p), buf23), (i + 6) << 3, (float*)buf); >-#endif >- } >- >- /* 4-7. iterations */ >- for (m=3; m < 7; m++) { >- two_m = (1 << m); >- >- two_m_plus_one = two_m<<1; >- >- for(i = 0; i < 128; i += two_m_plus_one) { >- for(k = 0; k < two_m; k+=2) { >-#if 0 >- int p = k + i; >- int q = p + two_m; >- tmp_a_r = buf[p].real; >- tmp_a_i = buf[p].imag; >- tmp_b_r = >- buf[q].real * w[m][k].real - >- buf[q].imag * w[m][k].imag; >- tmp_b_i = >- buf[q].imag * w[m][k].real + >- buf[q].real * w[m][k].imag; >- buf[p].real = tmp_a_r + tmp_b_r; >- buf[p].imag = tmp_a_i + tmp_b_i; >- buf[q].real = tmp_a_r - tmp_b_r; >- buf[q].imag = tmp_a_i - tmp_b_i; >- >- tmp_a_r = buf[(p + 1)].real; >- tmp_a_i = buf[(p + 1)].imag; >- tmp_b_r = >- buf[(q + 1)].real * w[m][(k + 1)].real - >- buf[(q + 1)].imag * w[m][(k + 1)].imag; >- tmp_b_i = >- buf[(q + 1)].imag * w[m][(k + 1)].real + >- buf[(q + 1)].real * w[m][(k + 1)].imag; >- buf[(p + 1)].real = tmp_a_r + tmp_b_r; >- buf[(p + 1)].imag = tmp_a_i + tmp_b_i; >- buf[(q + 1)].real = tmp_a_r - tmp_b_r; >- buf[(q + 1)].imag = tmp_a_i - tmp_b_i; >-#else >- int p = k + i; >- int q = p + two_m; >- vector float vecp, vecq, vecw, temp1, temp2, temp3, temp4; >- const vector float vczero = (const vector float)FOUROF(0.); >- // first compute buf[q] and buf[q+1] >- vecq = vec_ld(q << 3, (float*)buf); >- vecw = vec_ld(0, (float*)&(w[m][k])); >- temp1 = vec_madd(vecq, vecw, vczero); >- temp2 = vec_perm(vecq, vecq, vcprm(1,0,3,2)); >- temp2 = vec_madd(temp2, vecw, vczero); >- temp3 = vec_perm(temp1, temp2, vcprm(0,s0,2,s2)); >- temp4 = vec_perm(temp1, temp2, vcprm(1,s1,3,s3)); >- vecq = vec_madd(temp4, vcii(n,p,n,p), temp3); >- // then butterfly with buf[p] and buf[p+1] >- vecp = vec_ld(p << 3, (float*)buf); >- >- temp1 = vec_add(vecp, vecq); >- temp2 = vec_sub(vecp, vecq); >- >- vec_st(temp1, p << 3, (float*)buf); >- vec_st(temp2, q << 3, (float*)buf); >-#endif >- } >- } >- } >- >- /* Post IFFT complex multiply plus IFFT complex conjugate*/ >- for( i=0; i < 128; i+=4) { >- /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */ >-#if 0 >- tmp_a_r = buf[(i + 0)].real; >- tmp_a_i = -1.0 * buf[(i + 0)].imag; >- buf[(i + 0)].real = >- (tmp_a_r * xcos1[(i + 0)]) - (tmp_a_i * xsin1[(i + 0)]); >- buf[(i + 0)].imag = >- (tmp_a_r * xsin1[(i + 0)]) + (tmp_a_i * xcos1[(i + 0)]); >- >- tmp_a_r = buf[(i + 1)].real; >- tmp_a_i = -1.0 * buf[(i + 1)].imag; >- buf[(i + 1)].real = >- (tmp_a_r * xcos1[(i + 1)]) - (tmp_a_i * xsin1[(i + 1)]); >- buf[(i + 1)].imag = >- (tmp_a_r * xsin1[(i + 1)]) + (tmp_a_i * xcos1[(i + 1)]); >- >- tmp_a_r = buf[(i + 2)].real; >- tmp_a_i = -1.0 * buf[(i + 2)].imag; >- buf[(i + 2)].real = >- (tmp_a_r * xcos1[(i + 2)]) - (tmp_a_i * xsin1[(i + 2)]); >- buf[(i + 2)].imag = >- (tmp_a_r * xsin1[(i + 2)]) + (tmp_a_i * xcos1[(i + 2)]); >- >- tmp_a_r = buf[(i + 3)].real; >- tmp_a_i = -1.0 * buf[(i + 3)].imag; >- buf[(i + 3)].real = >- (tmp_a_r * xcos1[(i + 3)]) - (tmp_a_i * xsin1[(i + 3)]); >- buf[(i + 3)].imag = >- (tmp_a_r * xsin1[(i + 3)]) + (tmp_a_i * xcos1[(i + 3)]); >-#else >- vector float bufv_0, bufv_2, cosv, sinv, temp1, temp2; >- vector float temp0022, temp1133, tempCS01; >- const vector float vczero = (const vector float)FOUROF(0.); >- >- bufv_0 = vec_ld((i + 0) << 3, (float*)buf); >- bufv_2 = vec_ld((i + 2) << 3, (float*)buf); >- >- cosv = vec_ld(i << 2, xcos1); >- sinv = vec_ld(i << 2, xsin1); >- >- temp0022 = vec_perm(bufv_0, bufv_0, vcprm(0,0,2,2)); >- temp1133 = vec_perm(bufv_0, bufv_0, vcprm(1,1,3,3)); >- tempCS01 = vec_perm(cosv, sinv, vcprm(0,s0,1,s1)); >- temp1 = vec_madd(temp0022, tempCS01, vczero); >- tempCS01 = vec_perm(cosv, sinv, vcprm(s0,0,s1,1)); >- temp2 = vec_madd(temp1133, tempCS01, vczero); >- bufv_0 = vec_madd(temp2, vcii(p,n,p,n), temp1); >- >- vec_st(bufv_0, (i + 0) << 3, (float*)buf); >- >- /* idem with bufv_2 and high-order cosv/sinv */ >- >- temp0022 = vec_perm(bufv_2, bufv_2, vcprm(0,0,2,2)); >- temp1133 = vec_perm(bufv_2, bufv_2, vcprm(1,1,3,3)); >- tempCS01 = vec_perm(cosv, sinv, vcprm(2,s2,3,s3)); >- temp1 = vec_madd(temp0022, tempCS01, vczero); >- tempCS01 = vec_perm(cosv, sinv, vcprm(s2,2,s3,3)); >- temp2 = vec_madd(temp1133, tempCS01, vczero); >- bufv_2 = vec_madd(temp2, vcii(p,n,p,n), temp1); >- >- vec_st(bufv_2, (i + 2) << 3, (float*)buf); >- >-#endif >- } >- >- data_ptr = data; >- delay_ptr = delay; >- window_ptr = a52_imdct_window; >- >- /* Window and convert to real valued signal */ >- for(i=0; i< 64; i++) { >- *data_ptr++ = -buf[64+i].imag * *window_ptr++ + *delay_ptr++ + bias; >- *data_ptr++ = buf[64-i-1].real * *window_ptr++ + *delay_ptr++ + bias; >- } >- >- for(i=0; i< 64; i++) { >- *data_ptr++ = -buf[i].real * *window_ptr++ + *delay_ptr++ + bias; >- *data_ptr++ = buf[128-i-1].imag * *window_ptr++ + *delay_ptr++ + bias; >- } >- >- /* The trailing edge of the window goes into the delay line */ >- delay_ptr = delay; >- >- for(i=0; i< 64; i++) { >- *delay_ptr++ = -buf[64+i].real * *--window_ptr; >- *delay_ptr++ = buf[64-i-1].imag * *--window_ptr; >- } >- >- for(i=0; i<64; i++) { >- *delay_ptr++ = buf[i].imag * *--window_ptr; >- *delay_ptr++ = -buf[128-i-1].real * *--window_ptr; >- } >-} >-#endif >- >- > // Stuff below this line is borrowed from libac3 > #include "srfftp.h" >-#if ARCH_X86 || ARCH_X86_64 >-#undef HAVE_AMD3DNOW >-#define HAVE_AMD3DNOW 1 >-#include "srfftp_3dnow.h" >- >-const i_cmplx_t x_plus_minus_3dnow __attribute__ ((aligned (8))) = {{ 0x00000000UL, 0x80000000UL }}; >-const i_cmplx_t x_minus_plus_3dnow __attribute__ ((aligned (8))) = {{ 0x80000000UL, 0x00000000UL }}; >-const complex_t HSQRT2_3DNOW __attribute__ ((aligned (8))) = { 0.707106781188, 0.707106781188 }; >- >-#undef HAVE_AMD3DNOWEXT >-#define HAVE_AMD3DNOWEXT 0 >-#include "imdct_3dnow.h" >-#undef HAVE_AMD3DNOWEXT >-#define HAVE_AMD3DNOWEXT 1 >-#include "imdct_3dnow.h" >- >-void >-imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) >-{ >-/* int i,k; >- int p,q;*/ >- int m; >- long two_m; >- long two_m_plus_one; >- long two_m_plus_one_shl3; >- complex_t *buf_offset; >- >-/* sample_t tmp_a_i; >- sample_t tmp_a_r; >- sample_t tmp_b_i; >- sample_t tmp_b_r;*/ >- >- sample_t *data_ptr; >- sample_t *delay_ptr; >- sample_t *window_ptr; >- >- /* 512 IMDCT with source and dest data in 'data' */ >- /* see the c version (dct_do_512()), its allmost identical, just in C */ >- >- /* Pre IFFT complex multiply plus IFFT cmplx conjugate */ >- /* Bit reversed shuffling */ >- __asm__ volatile( >- "xor %%"REG_S", %%"REG_S" \n\t" >- "lea "MANGLE(bit_reverse_512)", %%"REG_a"\n\t" >- "mov $1008, %%"REG_D" \n\t" >- "push %%"REG_BP" \n\t" //use ebp without telling gcc >- ASMALIGN(4) >- "1: \n\t" >- "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // XXXI >- "movhps 8(%0, %%"REG_D"), %%xmm0 \n\t" // RXXI >- "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // XXXi >- "movhps (%0, %%"REG_D"), %%xmm1 \n\t" // rXXi >- "shufps $0x33, %%xmm1, %%xmm0 \n\t" // irIR >- "movaps "MANGLE(sseSinCos1c)"(%%"REG_S"), %%xmm2\n\t" >- "mulps %%xmm0, %%xmm2 \n\t" >- "shufps $0xB1, %%xmm0, %%xmm0 \n\t" // riRI >- "mulps "MANGLE(sseSinCos1d)"(%%"REG_S"), %%xmm0\n\t" >- "subps %%xmm0, %%xmm2 \n\t" >- "movzb (%%"REG_a"), %%"REG_d" \n\t" >- "movzb 1(%%"REG_a"), %%"REG_BP" \n\t" >- "movlps %%xmm2, (%1, %%"REG_d", 8) \n\t" >- "movhps %%xmm2, (%1, %%"REG_BP", 8) \n\t" >- "add $16, %%"REG_S" \n\t" >- "add $2, %%"REG_a" \n\t" // avoid complex addressing for P4 crap >- "sub $16, %%"REG_D" \n\t" >- "jnc 1b \n\t" >- "pop %%"REG_BP" \n\t"//no we didnt touch ebp *g* >- :: "b" (data), "c" (buf) >- : "%"REG_S, "%"REG_D, "%"REG_a, "%"REG_d >- ); >- >- >- /* FFT Merge */ >-/* unoptimized variant >- for (m=1; m < 7; m++) { >- if(m) >- two_m = (1 << m); >- else >- two_m = 1; >- >- two_m_plus_one = (1 << (m+1)); >- >- for(i = 0; i < 128; i += two_m_plus_one) { >- for(k = 0; k < two_m; k++) { >- p = k + i; >- q = p + two_m; >- tmp_a_r = buf[p].real; >- tmp_a_i = buf[p].imag; >- tmp_b_r = buf[q].real * w[m][k].real - buf[q].imag * w[m][k].imag; >- tmp_b_i = buf[q].imag * w[m][k].real + buf[q].real * w[m][k].imag; >- buf[p].real = tmp_a_r + tmp_b_r; >- buf[p].imag = tmp_a_i + tmp_b_i; >- buf[q].real = tmp_a_r - tmp_b_r; >- buf[q].imag = tmp_a_i - tmp_b_i; >- } >- } >- } >-*/ >- >- /* 1. iteration */ >- // Note w[0][0]={1,0} >- __asm__ volatile( >- "xorps %%xmm1, %%xmm1 \n\t" >- "xorps %%xmm2, %%xmm2 \n\t" >- "mov %0, %%"REG_S" \n\t" >- ASMALIGN(4) >- "1: \n\t" >- "movlps (%%"REG_S"), %%xmm0\n\t" //buf[p] >- "movlps 8(%%"REG_S"), %%xmm1\n\t" //buf[q] >- "movhps (%%"REG_S"), %%xmm0\n\t" //buf[p] >- "movhps 8(%%"REG_S"), %%xmm2\n\t" //buf[q] >- "addps %%xmm1, %%xmm0 \n\t" >- "subps %%xmm2, %%xmm0 \n\t" >- "movaps %%xmm0, (%%"REG_S")\n\t" >- "add $16, %%"REG_S" \n\t" >- "cmp %1, %%"REG_S" \n\t" >- " jb 1b \n\t" >- :: "g" (buf), "r" (buf + 128) >- : "%"REG_S >- ); >- >- /* 2. iteration */ >- // Note w[1]={{1,0}, {0,-1}} >- __asm__ volatile( >- "movaps "MANGLE(ps111_1)", %%xmm7\n\t" // 1,1,1,-1 >- "mov %0, %%"REG_S" \n\t" >- ASMALIGN(4) >- "1: \n\t" >- "movaps 16(%%"REG_S"), %%xmm2 \n\t" //r2,i2,r3,i3 >- "shufps $0xB4, %%xmm2, %%xmm2 \n\t" //r2,i2,i3,r3 >- "mulps %%xmm7, %%xmm2 \n\t" //r2,i2,i3,-r3 >- "movaps (%%"REG_S"), %%xmm0 \n\t" //r0,i0,r1,i1 >- "movaps (%%"REG_S"), %%xmm1 \n\t" //r0,i0,r1,i1 >- "addps %%xmm2, %%xmm0 \n\t" >- "subps %%xmm2, %%xmm1 \n\t" >- "movaps %%xmm0, (%%"REG_S") \n\t" >- "movaps %%xmm1, 16(%%"REG_S") \n\t" >- "add $32, %%"REG_S" \n\t" >- "cmp %1, %%"REG_S" \n\t" >- " jb 1b \n\t" >- :: "g" (buf), "r" (buf + 128) >- : "%"REG_S >- ); >- >- /* 3. iteration */ >-/* >- Note sseW2+0={1,1,sqrt(2),sqrt(2)) >- Note sseW2+16={0,0,sqrt(2),-sqrt(2)) >- Note sseW2+32={0,0,-sqrt(2),-sqrt(2)) >- Note sseW2+48={1,-1,sqrt(2),-sqrt(2)) >-*/ >- __asm__ volatile( >- "movaps 48+"MANGLE(sseW2)", %%xmm6\n\t" >- "movaps 16+"MANGLE(sseW2)", %%xmm7\n\t" >- "xorps %%xmm5, %%xmm5 \n\t" >- "xorps %%xmm2, %%xmm2 \n\t" >- "mov %0, %%"REG_S" \n\t" >- ASMALIGN(4) >- "1: \n\t" >- "movaps 32(%%"REG_S"), %%xmm2 \n\t" //r4,i4,r5,i5 >- "movaps 48(%%"REG_S"), %%xmm3 \n\t" //r6,i6,r7,i7 >- "movaps "MANGLE(sseW2)", %%xmm4 \n\t" //r4,i4,r5,i5 >- "movaps 32+"MANGLE(sseW2)", %%xmm5\n\t" //r6,i6,r7,i7 >- "mulps %%xmm2, %%xmm4 \n\t" >- "mulps %%xmm3, %%xmm5 \n\t" >- "shufps $0xB1, %%xmm2, %%xmm2 \n\t" //i4,r4,i5,r5 >- "shufps $0xB1, %%xmm3, %%xmm3 \n\t" //i6,r6,i7,r7 >- "mulps %%xmm6, %%xmm3 \n\t" >- "mulps %%xmm7, %%xmm2 \n\t" >- "movaps (%%"REG_S"), %%xmm0 \n\t" //r0,i0,r1,i1 >- "movaps 16(%%"REG_S"), %%xmm1 \n\t" //r2,i2,r3,i3 >- "addps %%xmm4, %%xmm2 \n\t" >- "addps %%xmm5, %%xmm3 \n\t" >- "movaps %%xmm2, %%xmm4 \n\t" >- "movaps %%xmm3, %%xmm5 \n\t" >- "addps %%xmm0, %%xmm2 \n\t" >- "addps %%xmm1, %%xmm3 \n\t" >- "subps %%xmm4, %%xmm0 \n\t" >- "subps %%xmm5, %%xmm1 \n\t" >- "movaps %%xmm2, (%%"REG_S") \n\t" >- "movaps %%xmm3, 16(%%"REG_S") \n\t" >- "movaps %%xmm0, 32(%%"REG_S") \n\t" >- "movaps %%xmm1, 48(%%"REG_S") \n\t" >- "add $64, %%"REG_S" \n\t" >- "cmp %1, %%"REG_S" \n\t" >- " jb 1b \n\t" >- :: "g" (buf), "r" (buf + 128) >- : "%"REG_S >- ); >- >- /* 4-7. iterations */ >- for (m=3; m < 7; m++) { >- two_m = (1 << m); >- two_m_plus_one = two_m<<1; >- two_m_plus_one_shl3 = (two_m_plus_one<<3); >- buf_offset = buf+128; >- __asm__ volatile( >- "mov %0, %%"REG_S" \n\t" >- ASMALIGN(4) >- "1: \n\t" >- "xor %%"REG_D", %%"REG_D" \n\t" // k >- "lea (%%"REG_S", %3), %%"REG_d" \n\t" >- "2: \n\t" >- "movaps (%%"REG_d", %%"REG_D"), %%xmm1 \n\t" >- "movaps (%4, %%"REG_D", 2), %%xmm2 \n\t" >- "mulps %%xmm1, %%xmm2 \n\t" >- "shufps $0xB1, %%xmm1, %%xmm1 \n\t" >- "mulps 16(%4, %%"REG_D", 2), %%xmm1 \n\t" >- "movaps (%%"REG_S", %%"REG_D"), %%xmm0 \n\t" >- "addps %%xmm2, %%xmm1 \n\t" >- "movaps %%xmm1, %%xmm2 \n\t" >- "addps %%xmm0, %%xmm1 \n\t" >- "subps %%xmm2, %%xmm0 \n\t" >- "movaps %%xmm1, (%%"REG_S", %%"REG_D") \n\t" >- "movaps %%xmm0, (%%"REG_d", %%"REG_D") \n\t" >- "add $16, %%"REG_D" \n\t" >- "cmp %3, %%"REG_D" \n\t" //FIXME (opt) count against 0 >- "jb 2b \n\t" >- "add %2, %%"REG_S" \n\t" >- "cmp %1, %%"REG_S" \n\t" >- " jb 1b \n\t" >- :: "g" (buf), "m" (buf_offset), "m" (two_m_plus_one_shl3), "r" (two_m<<3), >- "r" (sseW[m]) >- : "%"REG_S, "%"REG_D, "%"REG_d >- ); >- } >- >- /* Post IFFT complex multiply plus IFFT complex conjugate*/ >- __asm__ volatile( >- "mov $-1024, %%"REG_S" \n\t" >- ASMALIGN(4) >- "1: \n\t" >- "movaps (%0, %%"REG_S"), %%xmm0 \n\t" >- "movaps (%0, %%"REG_S"), %%xmm1 \n\t" >- "shufps $0xB1, %%xmm0, %%xmm0 \n\t" >- "mulps 1024+"MANGLE(sseSinCos1c)"(%%"REG_S"), %%xmm1\n\t" >- "mulps 1024+"MANGLE(sseSinCos1d)"(%%"REG_S"), %%xmm0\n\t" >- "addps %%xmm1, %%xmm0 \n\t" >- "movaps %%xmm0, (%0, %%"REG_S") \n\t" >- "add $16, %%"REG_S" \n\t" >- " jnz 1b \n\t" >- :: "r" (buf+128) >- : "%"REG_S >- ); >- >- >- data_ptr = data; >- delay_ptr = delay; >- window_ptr = a52_imdct_window; >- >- /* Window and convert to real valued signal */ >- __asm__ volatile( >- "xor %%"REG_D", %%"REG_D" \n\t" // 0 >- "xor %%"REG_S", %%"REG_S" \n\t" // 0 >- "movss %3, %%xmm2 \n\t" // bias >- "shufps $0x00, %%xmm2, %%xmm2 \n\t" // bias, bias, ... >- ASMALIGN(4) >- "1: \n\t" >- "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? A ? >- "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? C ? >- "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // ? D C ? >- "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // ? B A ? >- "shufps $0x99, %%xmm1, %%xmm0 \n\t" // D C B A >- "mulps "MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t" >- "addps (%2, %%"REG_S"), %%xmm0 \n\t" >- "addps %%xmm2, %%xmm0 \n\t" >- "movaps %%xmm0, (%1, %%"REG_S") \n\t" >- "add $16, %%"REG_S" \n\t" >- "sub $16, %%"REG_D" \n\t" >- "cmp $512, %%"REG_S" \n\t" >- " jb 1b \n\t" >- :: "r" (buf+64), "r" (data_ptr), "r" (delay_ptr), "m" (bias) >- : "%"REG_S, "%"REG_D >- ); >- data_ptr+=128; >- delay_ptr+=128; >-// window_ptr+=128; >- >- __asm__ volatile( >- "mov $1024, %%"REG_D" \n\t" // 512 >- "xor %%"REG_S", %%"REG_S" \n\t" // 0 >- "movss %3, %%xmm2 \n\t" // bias >- "shufps $0x00, %%xmm2, %%xmm2 \n\t" // bias, bias, ... >- ASMALIGN(4) >- "1: \n\t" >- "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? ? A >- "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? ? C >- "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // D ? ? C >- "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // B ? ? A >- "shufps $0xCC, %%xmm1, %%xmm0 \n\t" // D C B A >- "mulps 512+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t" >- "addps (%2, %%"REG_S"), %%xmm0 \n\t" >- "addps %%xmm2, %%xmm0 \n\t" >- "movaps %%xmm0, (%1, %%"REG_S") \n\t" >- "add $16, %%"REG_S" \n\t" >- "sub $16, %%"REG_D" \n\t" >- "cmp $512, %%"REG_S" \n\t" >- " jb 1b \n\t" >- :: "r" (buf), "r" (data_ptr), "r" (delay_ptr), "m" (bias) >- : "%"REG_S, "%"REG_D >- ); >- data_ptr+=128; >-// window_ptr+=128; >- >- /* The trailing edge of the window goes into the delay line */ >- delay_ptr = delay; >- >- __asm__ volatile( >- "xor %%"REG_D", %%"REG_D" \n\t" // 0 >- "xor %%"REG_S", %%"REG_S" \n\t" // 0 >- ASMALIGN(4) >- "1: \n\t" >- "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? ? A >- "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? ? C >- "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // D ? ? C >- "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // B ? ? A >- "shufps $0xCC, %%xmm1, %%xmm0 \n\t" // D C B A >- "mulps 1024+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t" >- "movaps %%xmm0, (%1, %%"REG_S") \n\t" >- "add $16, %%"REG_S" \n\t" >- "sub $16, %%"REG_D" \n\t" >- "cmp $512, %%"REG_S" \n\t" >- " jb 1b \n\t" >- :: "r" (buf+64), "r" (delay_ptr) >- : "%"REG_S, "%"REG_D >- ); >- delay_ptr+=128; >-// window_ptr-=128; >- >- __asm__ volatile( >- "mov $1024, %%"REG_D" \n\t" // 1024 >- "xor %%"REG_S", %%"REG_S" \n\t" // 0 >- ASMALIGN(4) >- "1: \n\t" >- "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? A ? >- "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? C ? >- "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // ? D C ? >- "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // ? B A ? >- "shufps $0x99, %%xmm1, %%xmm0 \n\t" // D C B A >- "mulps 1536+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t" >- "movaps %%xmm0, (%1, %%"REG_S") \n\t" >- "add $16, %%"REG_S" \n\t" >- "sub $16, %%"REG_D" \n\t" >- "cmp $512, %%"REG_S" \n\t" >- " jb 1b \n\t" >- :: "r" (buf), "r" (delay_ptr) >- : "%"REG_S, "%"REG_D >- ); >-} >-#endif // ARCH_X86 || ARCH_X86_64 > > void a52_imdct_256(sample_t * data, sample_t * delay, sample_t bias) > { >@@ -1258,35 +565,6 @@ > ifft128 = ifft128_c; > ifft64 = ifft64_c; > >-#if ARCH_X86 || ARCH_X86_64 >- if(mm_accel & MM_ACCEL_X86_SSE) >- { >- fprintf (stderr, "Using SSE optimized IMDCT transform\n"); >- a52_imdct_512 = imdct_do_512_sse; >- } >- else >- if(mm_accel & MM_ACCEL_X86_3DNOWEXT) >- { >- fprintf (stderr, "Using 3DNowEx optimized IMDCT transform\n"); >- a52_imdct_512 = imdct_do_512_3dnowex; >- } >- else >- if(mm_accel & MM_ACCEL_X86_3DNOW) >- { >- fprintf (stderr, "Using 3DNow optimized IMDCT transform\n"); >- a52_imdct_512 = imdct_do_512_3dnow; >- } >- else >-#endif // ARCH_X86 || ARCH_X86_64 >-#if HAVE_ALTIVEC >- if (mm_accel & MM_ACCEL_PPC_ALTIVEC) >- { >- fprintf(stderr, "Using AltiVec optimized IMDCT transform\n"); >- a52_imdct_512 = imdct_do_512_altivec; >- } >- else >-#endif >- > #ifdef LIBA52_DJBFFT > if (mm_accel & MM_ACCEL_DJBFFT) { > fprintf (stderr, "Using djbfft for IMDCT transform\n"); >--- liba52/resample.c 2009-02-03 06:50:53.000000000 +0100 >+++ liba52/resample.c 2009-05-08 14:03:24.000000000 +0200 >@@ -38,38 +38,8 @@ > > #include "resample_c.c" > >-#if ARCH_X86 || ARCH_X86_64 >-#include "resample_mmx.c" >-#endif >- >-#if HAVE_ALTIVEC >-#include "resample_altivec.c" >-#endif >- > void* a52_resample_init(uint32_t mm_accel,int flags,int chans){ >-void* tmp; >- >-#if ARCH_X86 || ARCH_X86_64 >- if(mm_accel&MM_ACCEL_X86_MMX){ >- tmp=a52_resample_MMX(flags,chans); >- if(tmp){ >- if(a52_resample==NULL) fprintf(stderr, "Using MMX optimized resampler\n"); >- a52_resample=tmp; >- return tmp; >- } >- } >-#endif >-#if HAVE_ALTIVEC >- if(mm_accel&MM_ACCEL_PPC_ALTIVEC){ >- tmp=a52_resample_altivec(flags,chans); >- if(tmp){ >- if(a52_resample==NULL) fprintf(stderr, "Using AltiVec optimized resampler\n"); >- a52_resample=tmp; >- return tmp; >- } >- } >-#endif >- >+ void* tmp; > tmp=a52_resample_C(flags,chans); > if(tmp){ > if(a52_resample==NULL) fprintf(stderr, "No accelerated resampler found\n"); >--- libmpcodecs/vf_fspp.c 2009-02-03 06:50:55.000000000 +0100 >+++ libmpcodecs/vf_fspp.c 2009-05-08 22:33:45.000000000 +0200 >@@ -61,6 +61,10 @@ > #undef free > #undef malloc > >+#ifdef HAVE_MMX >+#undef HAVE_MMX >+#define restore_have_mmx >+#endif > //===========================================================================// > #define BLOCKSZ 12 > >@@ -2114,3 +2118,8 @@ > } > > #endif // HAVE_MMX >+ >+#ifdef restore_have_mmx >+#undef restore_have_mmx >+#define HAVE_MMX >+#endif >--- mp3lib/decode_mmx.c 2009-02-03 06:50:58.000000000 +0100 >+++ mp3lib/decode_mmx.c 2009-05-08 21:55:45.000000000 +0200 >@@ -186,186 +186,3 @@ > 9834, -1490, 1379, -500, 129, -55, 7, 0, > }; > >-int synth_1to1_MMX(real *bandPtr, int channel, short *samples) >-{ >- static short buffs[2][2][0x110] __attribute__((aligned(8))); >- static int bo = 1; >- short *b0, (*buf)[0x110], *a, *b; >- short* window; >- int bo1, i = 8; >- >- if (channel == 0) { >- bo = (bo - 1) & 0xf; >- buf = buffs[1]; >- } else { >- samples++; >- buf = buffs[0]; >- } >- >- if (bo & 1) { >- b0 = buf[1]; >- bo1 = bo + 1; >- a = buf[0] + bo; >- b = buf[1] + ((bo + 1) & 0xf); >- } else { >- b0 = buf[0]; >- bo1 = bo; >- b = buf[0] + bo; >- a = buf[1] + ((bo + 1) & 0xf); >- } >- >- dct64_MMX_func(a, b, bandPtr); >- window = mp3lib_decwins + 16 - bo1; >- //printf("DEBUG: channel %d, bo %d, off %d\n", channel, bo, 16 - bo1); >-__asm__ volatile( >-ASMALIGN(4) >-".L03:\n\t" >- "movq (%1),%%mm0\n\t" >- "movq 64(%1),%%mm4\n\t" >- "pmaddwd (%2),%%mm0\n\t" >- "pmaddwd 32(%2),%%mm4\n\t" >- "movq 8(%1),%%mm1\n\t" >- "movq 72(%1),%%mm5\n\t" >- "pmaddwd 8(%2),%%mm1\n\t" >- "pmaddwd 40(%2),%%mm5\n\t" >- "movq 16(%1),%%mm2\n\t" >- "movq 80(%1),%%mm6\n\t" >- "pmaddwd 16(%2),%%mm2\n\t" >- "pmaddwd 48(%2),%%mm6\n\t" >- "movq 24(%1),%%mm3\n\t" >- "movq 88(%1),%%mm7\n\t" >- "pmaddwd 24(%2),%%mm3\n\t" >- "pmaddwd 56(%2),%%mm7\n\t" >- "paddd %%mm1,%%mm0\n\t" >- "paddd %%mm5,%%mm4\n\t" >- "paddd %%mm2,%%mm0\n\t" >- "paddd %%mm6,%%mm4\n\t" >- "paddd %%mm3,%%mm0\n\t" >- "paddd %%mm7,%%mm4\n\t" >- "movq %%mm0,%%mm1\n\t" >- "movq %%mm4,%%mm5\n\t" >- "psrlq $32,%%mm1\n\t" >- "psrlq $32,%%mm5\n\t" >- "paddd %%mm1,%%mm0\n\t" >- "paddd %%mm5,%%mm4\n\t" >- "psrad $13,%%mm0\n\t" >- "psrad $13,%%mm4\n\t" >- "packssdw %%mm0,%%mm0\n\t" >- "packssdw %%mm4,%%mm4\n\t" >- >- "movq (%3), %%mm1\n\t" >- "punpckldq %%mm4, %%mm0\n\t" >- "pand "MANGLE(one_null)", %%mm1\n\t" >- "pand "MANGLE(null_one)", %%mm0\n\t" >- "por %%mm0, %%mm1\n\t" >- "movq %%mm1,(%3)\n\t" >- >- "add $64,%2\n\t" >- "add $128,%1\n\t" >- "add $8,%3\n\t" >- >- "decl %0\n\t" >- "jnz .L03\n\t" >- >- "movq (%1),%%mm0\n\t" >- "pmaddwd (%2),%%mm0\n\t" >- "movq 8(%1),%%mm1\n\t" >- "pmaddwd 8(%2),%%mm1\n\t" >- "movq 16(%1),%%mm2\n\t" >- "pmaddwd 16(%2),%%mm2\n\t" >- "movq 24(%1),%%mm3\n\t" >- "pmaddwd 24(%2),%%mm3\n\t" >- "paddd %%mm1,%%mm0\n\t" >- "paddd %%mm2,%%mm0\n\t" >- "paddd %%mm3,%%mm0\n\t" >- "movq %%mm0,%%mm1\n\t" >- "psrlq $32,%%mm1\n\t" >- "paddd %%mm1,%%mm0\n\t" >- "psrad $13,%%mm0\n\t" >- "packssdw %%mm0,%%mm0\n\t" >- "movd %%mm0,%%eax\n\t" >- "movw %%ax, (%3)\n\t" >- "sub $32,%2\n\t" >- "add $64,%1\n\t" >- "add $4,%3\n\t" >- >- "movl $7,%0\n\t" >-ASMALIGN(4) >-".L04:\n\t" >- "movq (%1),%%mm0\n\t" >- "movq 64(%1),%%mm4\n\t" >- "pmaddwd (%2),%%mm0\n\t" >- "pmaddwd -32(%2),%%mm4\n\t" >- "movq 8(%1),%%mm1\n\t" >- "movq 72(%1),%%mm5\n\t" >- "pmaddwd 8(%2),%%mm1\n\t" >- "pmaddwd -24(%2),%%mm5\n\t" >- "movq 16(%1),%%mm2\n\t" >- "movq 80(%1),%%mm6\n\t" >- "pmaddwd 16(%2),%%mm2\n\t" >- "pmaddwd -16(%2),%%mm6\n\t" >- "movq 24(%1),%%mm3\n\t" >- "movq 88(%1),%%mm7\n\t" >- "pmaddwd 24(%2),%%mm3\n\t" >- "pmaddwd -8(%2),%%mm7\n\t" >- "paddd %%mm1,%%mm0\n\t" >- "paddd %%mm5,%%mm4\n\t" >- "paddd %%mm2,%%mm0\n\t" >- "paddd %%mm6,%%mm4\n\t" >- "paddd %%mm3,%%mm0\n\t" >- "paddd %%mm7,%%mm4\n\t" >- "movq %%mm0,%%mm1\n\t" >- "movq %%mm4,%%mm5\n\t" >- "psrlq $32,%%mm1\n\t" >- "psrlq $32,%%mm5\n\t" >- "paddd %%mm0,%%mm1\n\t" >- "paddd %%mm4,%%mm5\n\t" >- "psrad $13,%%mm1\n\t" >- "psrad $13,%%mm5\n\t" >- "packssdw %%mm1,%%mm1\n\t" >- "packssdw %%mm5,%%mm5\n\t" >- "psubd %%mm0,%%mm0\n\t" >- "psubd %%mm4,%%mm4\n\t" >- "psubsw %%mm1,%%mm0\n\t" >- "psubsw %%mm5,%%mm4\n\t" >- >- "movq (%3), %%mm1\n\t" >- "punpckldq %%mm4, %%mm0\n\t" >- "pand "MANGLE(one_null)", %%mm1\n\t" >- "pand "MANGLE(null_one)", %%mm0\n\t" >- "por %%mm0, %%mm1\n\t" >- "movq %%mm1,(%3)\n\t" >- >- "sub $64,%2\n\t" >- "add $128,%1\n\t" >- "add $8,%3\n\t" >- "decl %0\n\t" >- "jnz .L04\n\t" >- >- "movq (%1),%%mm0\n\t" >- "pmaddwd (%2),%%mm0\n\t" >- "movq 8(%1),%%mm1\n\t" >- "pmaddwd 8(%2),%%mm1\n\t" >- "movq 16(%1),%%mm2\n\t" >- "pmaddwd 16(%2),%%mm2\n\t" >- "movq 24(%1),%%mm3\n\t" >- "pmaddwd 24(%2),%%mm3\n\t" >- "paddd %%mm1,%%mm0\n\t" >- "paddd %%mm2,%%mm0\n\t" >- "paddd %%mm3,%%mm0\n\t" >- "movq %%mm0,%%mm1\n\t" >- "psrlq $32,%%mm1\n\t" >- "paddd %%mm0,%%mm1\n\t" >- "psrad $13,%%mm1\n\t" >- "packssdw %%mm1,%%mm1\n\t" >- "psubd %%mm0,%%mm0\n\t" >- "psubsw %%mm1,%%mm0\n\t" >- "movd %%mm0,%%eax\n\t" >- "movw %%ax,(%3)\n\t" >- "emms\n\t" >- :"+r"(i), "+r"(window), "+r"(b0), "+r"(samples) >- : >- :"memory", "%eax"); >- return 0; >-} >- >--- mp3lib/sr1.c 2009-02-03 06:50:58.000000000 +0100 >+++ mp3lib/sr1.c 2009-05-08 21:40:43.000000000 +0200 >@@ -419,14 +419,6 @@ > > make_decode_tables(outscale); > >-#if HAVE_MMX >- if (gCpuCaps.hasMMX) >- { >- _has_mmx = 1; >- synth_func = synth_1to1_MMX; >- } >-#endif >- > #if HAVE_AMD3DNOWEXT > if (gCpuCaps.has3DNowExt) > {
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Raw
Actions:
View
Attachments on
bug 269103
: 190738