Lines 39-45
Link Here
|
39 |
#define rounder(bias) {round (bias), round (bias)} |
39 |
#define rounder(bias) {round (bias), round (bias)} |
40 |
#define rounder_sse2(bias) {round (bias), round (bias), round (bias), round (bias)} |
40 |
#define rounder_sse2(bias) {round (bias), round (bias), round (bias), round (bias)} |
41 |
|
41 |
|
42 |
|
|
|
43 |
#if 0 |
42 |
#if 0 |
44 |
/* C row IDCT - it is just here to document the MMXEXT and MMX versions */ |
43 |
/* C row IDCT - it is just here to document the MMXEXT and MMX versions */ |
45 |
static inline void idct_row (int16_t * row, int offset, |
44 |
static inline void idct_row (int16_t * row, int offset, |
Lines 464-473
static inline void sse2_idct_col (int16_
Link Here
|
464 |
/* Almost identical to mmxext version: */ |
463 |
/* Almost identical to mmxext version: */ |
465 |
/* just do both 4x8 columns in paraller */ |
464 |
/* just do both 4x8 columns in paraller */ |
466 |
|
465 |
|
467 |
static const short t1_vector[] ATTR_ALIGN(16) = {T1,T1,T1,T1,T1,T1,T1,T1}; |
466 |
static/* const*/ short t1_vector[] ATTR_ALIGN(16) = {T1,T1,T1,T1,T1,T1,T1,T1}; |
468 |
static const short t2_vector[] ATTR_ALIGN(16) = {T2,T2,T2,T2,T2,T2,T2,T2}; |
467 |
static/* const*/ short t2_vector[] ATTR_ALIGN(16) = {T2,T2,T2,T2,T2,T2,T2,T2}; |
469 |
static const short t3_vector[] ATTR_ALIGN(16) = {T3,T3,T3,T3,T3,T3,T3,T3}; |
468 |
static/* const*/ short t3_vector[] ATTR_ALIGN(16) = {T3,T3,T3,T3,T3,T3,T3,T3}; |
470 |
static const short c4_vector[] ATTR_ALIGN(16) = {C4,C4,C4,C4,C4,C4,C4,C4}; |
469 |
static/* const*/ short c4_vector[] ATTR_ALIGN(16) = {C4,C4,C4,C4,C4,C4,C4,C4}; |
471 |
|
470 |
|
472 |
#if defined(__x86_64__) |
471 |
#if defined(__x86_64__) |
473 |
|
472 |
|
Lines 710-719
static inline void sse2_idct_col (int16_
Link Here
|
710 |
/* MMX column IDCT */ |
709 |
/* MMX column IDCT */ |
711 |
static inline void idct_col (int16_t * const col, const int offset) |
710 |
static inline void idct_col (int16_t * const col, const int offset) |
712 |
{ |
711 |
{ |
713 |
static const short t1_vector[] ATTR_ALIGN(8) = {T1,T1,T1,T1}; |
712 |
static/* const*/ short t1_vector[] ATTR_ALIGN(8) = {T1,T1,T1,T1}; |
714 |
static const short t2_vector[] ATTR_ALIGN(8) = {T2,T2,T2,T2}; |
713 |
static/* const*/ short t2_vector[] ATTR_ALIGN(8) = {T2,T2,T2,T2}; |
715 |
static const short t3_vector[] ATTR_ALIGN(8) = {T3,T3,T3,T3}; |
714 |
static/* const*/ short t3_vector[] ATTR_ALIGN(8) = {T3,T3,T3,T3}; |
716 |
static const short c4_vector[] ATTR_ALIGN(8) = {C4,C4,C4,C4}; |
715 |
static/* const*/ short c4_vector[] ATTR_ALIGN(8) = {C4,C4,C4,C4}; |
717 |
|
716 |
|
718 |
/* column code adapted from peter gubanov */ |
717 |
/* column code adapted from peter gubanov */ |
719 |
/* http://www.elecard.com/peter/idct.shtml */ |
718 |
/* http://www.elecard.com/peter/idct.shtml */ |
Lines 847-879
static inline void idct_col (int16_t * c
Link Here
|
847 |
} |
846 |
} |
848 |
|
847 |
|
849 |
|
848 |
|
850 |
static const int32_t rounder0[] ATTR_ALIGN(8) = |
849 |
static/* const*/ int32_t rounder0[] ATTR_ALIGN(8) = |
851 |
rounder ((1 << (COL_SHIFT - 1)) - 0.5); |
850 |
rounder ((1 << (COL_SHIFT - 1)) - 0.5); |
852 |
static const int32_t rounder4[] ATTR_ALIGN(8) = rounder (0); |
851 |
static/* const*/ int32_t rounder4[] ATTR_ALIGN(8) = rounder (0); |
853 |
static const int32_t rounder1[] ATTR_ALIGN(8) = |
852 |
static/* const*/ int32_t rounder1[] ATTR_ALIGN(8) = |
854 |
rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */ |
853 |
rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */ |
855 |
static const int32_t rounder7[] ATTR_ALIGN(8) = |
854 |
static/* const*/ int32_t rounder7[] ATTR_ALIGN(8) = |
856 |
rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */ |
855 |
rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */ |
857 |
static const int32_t rounder2[] ATTR_ALIGN(8) = |
856 |
static/* const*/ int32_t rounder2[] ATTR_ALIGN(8) = |
858 |
rounder (0.60355339059); /* C2 * (C6+C2)/2 */ |
857 |
rounder (0.60355339059); /* C2 * (C6+C2)/2 */ |
859 |
static const int32_t rounder6[] ATTR_ALIGN(8) = |
858 |
static/* const*/ int32_t rounder6[] ATTR_ALIGN(8) = |
860 |
rounder (-0.25); /* C2 * (C6-C2)/2 */ |
859 |
rounder (-0.25); /* C2 * (C6-C2)/2 */ |
861 |
static const int32_t rounder3[] ATTR_ALIGN(8) = |
860 |
static/* const*/ int32_t rounder3[] ATTR_ALIGN(8) = |
862 |
rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */ |
861 |
rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */ |
863 |
static const int32_t rounder5[] ATTR_ALIGN(8) = |
862 |
static/* const*/ int32_t rounder5[] ATTR_ALIGN(8) = |
864 |
rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */ |
863 |
rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */ |
865 |
|
864 |
|
866 |
|
865 |
|
867 |
#define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \ |
866 |
#define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \ |
868 |
static inline void idct (int16_t * const block) \ |
867 |
static inline void idct (int16_t * const block) \ |
869 |
{ \ |
868 |
{ \ |
870 |
static const int16_t table04[] ATTR_ALIGN(16) = \ |
869 |
static/* const*/ int16_t table04[] ATTR_ALIGN(16) = \ |
871 |
table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \ |
870 |
table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \ |
872 |
static const int16_t table17[] ATTR_ALIGN(16) = \ |
871 |
static/* const*/ int16_t table17[] ATTR_ALIGN(16) = \ |
873 |
table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \ |
872 |
table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \ |
874 |
static const int16_t table26[] ATTR_ALIGN(16) = \ |
873 |
static/* const*/ int16_t table26[] ATTR_ALIGN(16) = \ |
875 |
table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \ |
874 |
table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \ |
876 |
static const int16_t table35[] ATTR_ALIGN(16) = \ |
875 |
static/* const*/ int16_t table35[] ATTR_ALIGN(16) = \ |
877 |
table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \ |
876 |
table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \ |
878 |
\ |
877 |
\ |
879 |
idct_row_head (block, 0*8, table04); \ |
878 |
idct_row_head (block, 0*8, table04); \ |
Lines 900-928
static inline void idct (int16_t * const
Link Here
|
900 |
|
899 |
|
901 |
static inline void sse2_idct (int16_t * const block) |
900 |
static inline void sse2_idct (int16_t * const block) |
902 |
{ |
901 |
{ |
903 |
static const int16_t table04[] ATTR_ALIGN(16) = |
902 |
static/* const*/ int16_t table04[] ATTR_ALIGN(16) = |
904 |
sse2_table (22725, 21407, 19266, 16384, 12873, 8867, 4520); |
903 |
sse2_table (22725, 21407, 19266, 16384, 12873, 8867, 4520); |
905 |
static const int16_t table17[] ATTR_ALIGN(16) = |
904 |
static/* const*/ int16_t table17[] ATTR_ALIGN(16) = |
906 |
sse2_table (31521, 29692, 26722, 22725, 17855, 12299, 6270); |
905 |
sse2_table (31521, 29692, 26722, 22725, 17855, 12299, 6270); |
907 |
static const int16_t table26[] ATTR_ALIGN(16) = |
906 |
static/* const*/ int16_t table26[] ATTR_ALIGN(16) = |
908 |
sse2_table (29692, 27969, 25172, 21407, 16819, 11585, 5906); |
907 |
sse2_table (29692, 27969, 25172, 21407, 16819, 11585, 5906); |
909 |
static const int16_t table35[] ATTR_ALIGN(16) = |
908 |
static/* const*/ int16_t table35[] ATTR_ALIGN(16) = |
910 |
sse2_table (26722, 25172, 22654, 19266, 15137, 10426, 5315); |
909 |
sse2_table (26722, 25172, 22654, 19266, 15137, 10426, 5315); |
911 |
|
910 |
|
912 |
static const int32_t rounder0_128[] ATTR_ALIGN(16) = |
911 |
static/* const*/ int32_t rounder0_128[] ATTR_ALIGN(16) = |
913 |
rounder_sse2 ((1 << (COL_SHIFT - 1)) - 0.5); |
912 |
rounder_sse2 ((1 << (COL_SHIFT - 1)) - 0.5); |
914 |
static const int32_t rounder4_128[] ATTR_ALIGN(16) = rounder_sse2 (0); |
913 |
static/* const*/ int32_t rounder4_128[] ATTR_ALIGN(16) = rounder_sse2 (0); |
915 |
static const int32_t rounder1_128[] ATTR_ALIGN(16) = |
914 |
static/* const*/ int32_t rounder1_128[] ATTR_ALIGN(16) = |
916 |
rounder_sse2 (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */ |
915 |
rounder_sse2 (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */ |
917 |
static const int32_t rounder7_128[] ATTR_ALIGN(16) = |
916 |
static/* const*/ int32_t rounder7_128[] ATTR_ALIGN(16) = |
918 |
rounder_sse2 (-0.25); /* C1*(C7/C4+C7-C1)/2 */ |
917 |
rounder_sse2 (-0.25); /* C1*(C7/C4+C7-C1)/2 */ |
919 |
static const int32_t rounder2_128[] ATTR_ALIGN(16) = |
918 |
static/* const*/ int32_t rounder2_128[] ATTR_ALIGN(16) = |
920 |
rounder_sse2 (0.60355339059); /* C2 * (C6+C2)/2 */ |
919 |
rounder_sse2 (0.60355339059); /* C2 * (C6+C2)/2 */ |
921 |
static const int32_t rounder6_128[] ATTR_ALIGN(16) = |
920 |
static/* const*/ int32_t rounder6_128[] ATTR_ALIGN(16) = |
922 |
rounder_sse2 (-0.25); /* C2 * (C6-C2)/2 */ |
921 |
rounder_sse2 (-0.25); /* C2 * (C6-C2)/2 */ |
923 |
static const int32_t rounder3_128[] ATTR_ALIGN(16) = |
922 |
static/* const*/ int32_t rounder3_128[] ATTR_ALIGN(16) = |
924 |
rounder_sse2 (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */ |
923 |
rounder_sse2 (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */ |
925 |
static const int32_t rounder5_128[] ATTR_ALIGN(16) = |
924 |
static/* const*/ int32_t rounder5_128[] ATTR_ALIGN(16) = |
926 |
rounder_sse2 (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */ |
925 |
rounder_sse2 (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */ |
927 |
|
926 |
|
928 |
#if defined(__x86_64__) |
927 |
#if defined(__x86_64__) |