Line
Link Here
|
0 |
-- libdv-0.104-old/libdv/asm_common.S |
0 |
++ libdv-0.104/libdv/asm_common.S |
Line 0
Link Here
|
0 |
-- libdv-0.104-old/libdv/dct_block_mmx.S |
1 |
/* public domain, do what you want */ |
|
|
2 |
|
3 |
#ifdef __PIC__ |
4 |
# define MUNG(sym) sym##@GOTOFF(%ebp) |
5 |
# define MUNG_ARR(sym, args...) sym##@GOTOFF(%ebp,##args) |
6 |
#else |
7 |
# define MUNG(sym) sym |
8 |
# define MUNG_ARR(sym, args...) sym(,##args) |
9 |
#endif |
10 |
|
11 |
#ifdef __PIC__ |
12 |
# undef __i686 /* gcc define gets in our way */ |
13 |
# define LOAD_PIC_REG(reg) \ |
14 |
.ifndef __i686.get_pc_thunk.reg; \ |
15 |
.section .gnu.linkonce.t.__i686.get_pc_thunk.reg,"ax",@progbits; \ |
16 |
.global __i686.get_pc_thunk.reg; \ |
17 |
.hidden __i686.get_pc_thunk.reg; \ |
18 |
.type __i686.get_pc_thunk.reg,@function; \ |
19 |
__i686.get_pc_thunk.reg: \ |
20 |
movl (%esp), %e##reg; \ |
21 |
ret; \ |
22 |
.size __i686.get_pc_thunk.reg,.-__i686.get_pc_thunk.reg; \ |
23 |
.previous; \ |
24 |
.endif; \ |
25 |
call __i686.get_pc_thunk.reg; \ |
26 |
addl $_GLOBAL_OFFSET_TABLE_, %e##reg |
27 |
#else |
28 |
# define LOAD_PIC_REG(reg) |
29 |
#endif |
|
|
30 |
++ libdv-0.104/libdv/dct_block_mmx.S |
Lines 53-71
scratch2: .quad 0
Link Here
|
53 |
|
53 |
|
54 |
.section .note.GNU-stack, "", @progbits |
54 |
.section .note.GNU-stack, "", @progbits |
55 |
|
55 |
|
|
|
56 |
#include "asm_common.S" |
57 |
|
56 |
.text |
58 |
.text |
57 |
|
59 |
|
58 |
.align 8 |
60 |
.align 8 |
59 |
.global _dv_dct_88_block_mmx |
61 |
.global _dv_dct_88_block_mmx |
60 |
.hidden _dv_dct_88_block_mmx |
62 |
.hidden _dv_dct_88_block_mmx |
61 |
.type _dv_dct_88_block_mmx,@function |
63 |
.type _dv_dct_88_block_mmx,@function |
62 |
_dv_dct_88_block_mmx: |
64 |
_dv_dct_88_block_mmx: |
63 |
|
65 |
|
64 |
pushl %ebp |
66 |
pushl %ebp |
65 |
movl %esp, %ebp |
|
|
66 |
pushl %esi |
67 |
pushl %esi |
67 |
|
68 |
|
68 |
movl 8(%ebp), %esi # source |
69 |
LOAD_PIC_REG(bp) |
|
|
70 |
|
71 |
movl 12(%esp), %esi # source |
69 |
|
72 |
|
70 |
# column 0 |
73 |
# column 0 |
71 |
movq 16*0(%esi), %mm0 # v0 |
74 |
movq 16*0(%esi), %mm0 # v0 |
Lines 86-107
_dv_dct_88_block_mmx:
Link Here
|
86 |
|
91 |
|
87 |
movq 16*3(%esi), %mm5 # v3 |
92 |
movq 16*3(%esi), %mm5 # v3 |
88 |
movq 16*4(%esi), %mm7 # v4 |
93 |
movq 16*4(%esi), %mm7 # v4 |
89 |
movq %mm7, scratch1 # scratch1: v4 ; |
94 |
movq %mm7, MUNG(scratch1) # scratch1: v4 ; |
90 |
movq %mm5, %mm7 # duplicate v3 |
95 |
movq %mm5, %mm7 # duplicate v3 |
91 |
paddw scratch1, %mm5 # v03: v3+v4 |
96 |
paddw MUNG(scratch1), %mm5 # v03: v3+v4 |
92 |
psubw scratch1, %mm7 # v04: v3-v4 |
97 |
psubw MUNG(scratch1), %mm7 # v04: v3-v4 |
93 |
movq %mm5, scratch2 # scratch2: v03 |
98 |
movq %mm5, MUNG(scratch2) # scratch2: v03 |
94 |
movq %mm0, %mm5 # mm5: v00 |
99 |
movq %mm0, %mm5 # mm5: v00 |
95 |
|
100 |
|
96 |
paddw scratch2, %mm0 # v10: v00+v03 |
101 |
paddw MUNG(scratch2), %mm0 # v10: v00+v03 |
97 |
psubw scratch2, %mm5 # v13: v00-v03 |
102 |
psubw MUNG(scratch2), %mm5 # v13: v00-v03 |
98 |
movq %mm3, scratch3 # scratch3: v02 |
103 |
movq %mm3, MUNG(scratch3) # scratch3: v02 |
99 |
movq %mm1, %mm3 # duplicate v01 |
104 |
movq %mm1, %mm3 # duplicate v01 |
100 |
|
105 |
|
101 |
paddw scratch3, %mm1 # v11: v01+v02 |
106 |
paddw MUNG(scratch3), %mm1 # v11: v01+v02 |
102 |
psubw scratch3, %mm3 # v12: v01-v02 |
107 |
psubw MUNG(scratch3), %mm3 # v12: v01-v02 |
103 |
|
108 |
|
104 |
movq %mm6, scratch4 # scratch4: v05 |
109 |
movq %mm6, MUNG(scratch4) # scratch4: v05 |
105 |
movq %mm0, %mm6 # duplicate v10 |
110 |
movq %mm0, %mm6 # duplicate v10 |
106 |
|
111 |
|
107 |
paddw %mm1, %mm0 # v10+v11 |
112 |
paddw %mm1, %mm0 # v10+v11 |
Lines 111-120
_dv_dct_88_block_mmx:
Link Here
|
111 |
movq %mm6, 16*4(%esi) # out4: v10-v11 |
116 |
movq %mm6, 16*4(%esi) # out4: v10-v11 |
112 |
|
117 |
|
113 |
movq %mm4, %mm0 # mm0: v06 |
118 |
movq %mm4, %mm0 # mm0: v06 |
114 |
paddw scratch4, %mm4 # v15: v05+v06 |
119 |
paddw MUNG(scratch4), %mm4 # v15: v05+v06 |
115 |
paddw %mm2, %mm0 # v16: v07+v06 |
120 |
paddw %mm2, %mm0 # v16: v07+v06 |
116 |
|
121 |
|
117 |
pmulhw WA3, %mm4 # v35~: WA3*v15 |
122 |
pmulhw MUNG(WA3), %mm4 # v35~: WA3*v15 |
118 |
psllw $1, %mm4 # v35: compensate the coeefient scale |
123 |
psllw $1, %mm4 # v35: compensate the coeefient scale |
119 |
|
124 |
|
120 |
movq %mm4, %mm6 # duplicate v35 |
125 |
movq %mm4, %mm6 # duplicate v35 |
Lines 123-129
_dv_dct_88_block_mmx:
Link Here
|
123 |
|
128 |
|
124 |
paddw %mm5, %mm3 # v22: v12+v13 |
129 |
paddw %mm5, %mm3 # v22: v12+v13 |
125 |
|
130 |
|
126 |
pmulhw WA1, %mm3 # v32~: WA1*v22 |
131 |
pmulhw MUNG(WA1), %mm3 # v32~: WA1*v22 |
127 |
psllw $16-NSHIFT, %mm3 # v32: compensate the coeefient scale |
132 |
psllw $16-NSHIFT, %mm3 # v32: compensate the coeefient scale |
128 |
movq %mm5, %mm6 # duplicate v13 |
133 |
movq %mm5, %mm6 # duplicate v13 |
129 |
|
134 |
|
Lines 134-146
_dv_dct_88_block_mmx:
Link Here
|
134 |
movq %mm6, 16*6(%esi) # out6: v13-v32 |
139 |
movq %mm6, 16*6(%esi) # out6: v13-v32 |
135 |
|
140 |
|
136 |
|
141 |
|
137 |
paddw scratch4, %mm7 # v14n: v04+v05 |
142 |
paddw MUNG(scratch4), %mm7 # v14n: v04+v05 |
138 |
movq %mm0, %mm5 # duplicate v16 |
143 |
movq %mm0, %mm5 # duplicate v16 |
139 |
|
144 |
|
140 |
psubw %mm7, %mm0 # va1: v16-v14n |
145 |
psubw %mm7, %mm0 # va1: v16-v14n |
141 |
pmulhw WA5, %mm0 # va0~: va1*WA5 |
146 |
pmulhw MUNG(WA5), %mm0 # va0~: va1*WA5 |
142 |
pmulhw WA4, %mm5 # v36~~: v16*WA4 |
147 |
pmulhw MUNG(WA4), %mm5 # v36~~: v16*WA4 |
143 |
pmulhw WA2, %mm7 # v34~~: v14n*WA2 |
148 |
pmulhw MUNG(WA2), %mm7 # v34~~: v14n*WA2 |
144 |
psllw $16-WA4_SHIFT, %mm5 # v36: compensate the coeefient scale |
149 |
psllw $16-WA4_SHIFT, %mm5 # v36: compensate the coeefient scale |
145 |
psllw $16-NSHIFT, %mm7 # v34: compensate the coeefient scale |
150 |
psllw $16-NSHIFT, %mm7 # v34: compensate the coeefient scale |
146 |
|
151 |
|
Lines 188-209
_dv_dct_88_block_mmx:
Link Here
|
188 |
|
193 |
|
189 |
movq 16*3(%esi), %mm5 # v3 |
194 |
movq 16*3(%esi), %mm5 # v3 |
190 |
movq 16*4(%esi), %mm7 # v4 |
195 |
movq 16*4(%esi), %mm7 # v4 |
191 |
movq %mm7, scratch1 # scratch1: v4 ; |
196 |
movq %mm7, MUNG(scratch1) # scratch1: v4 ; |
192 |
movq %mm5, %mm7 # duplicate v3 |
197 |
movq %mm5, %mm7 # duplicate v3 |
193 |
paddw scratch1, %mm5 # v03: v3+v4 |
198 |
paddw MUNG(scratch1), %mm5 # v03: v3+v4 |
194 |
psubw scratch1, %mm7 # v04: v3-v4 |
199 |
psubw MUNG(scratch1), %mm7 # v04: v3-v4 |
195 |
movq %mm5, scratch2 # scratch2: v03 |
200 |
movq %mm5, MUNG(scratch2) # scratch2: v03 |
196 |
movq %mm0, %mm5 # mm5: v00 |
201 |
movq %mm0, %mm5 # mm5: v00 |
197 |
|
202 |
|
198 |
paddw scratch2, %mm0 # v10: v00+v03 |
203 |
paddw MUNG(scratch2), %mm0 # v10: v00+v03 |
199 |
psubw scratch2, %mm5 # v13: v00-v03 |
204 |
psubw MUNG(scratch2), %mm5 # v13: v00-v03 |
200 |
movq %mm3, scratch3 # scratc3: v02 |
205 |
movq %mm3, MUNG(scratch3) # scratc3: v02 |
201 |
movq %mm1, %mm3 # duplicate v01 |
206 |
movq %mm1, %mm3 # duplicate v01 |
202 |
|
207 |
|
203 |
paddw scratch3, %mm1 # v11: v01+v02 |
208 |
paddw MUNG(scratch3), %mm1 # v11: v01+v02 |
204 |
psubw scratch3, %mm3 # v12: v01-v02 |
209 |
psubw MUNG(scratch3), %mm3 # v12: v01-v02 |
205 |
|
210 |
|
206 |
movq %mm6, scratch4 # scratc4: v05 |
211 |
movq %mm6, MUNG(scratch4) # scratc4: v05 |
207 |
movq %mm0, %mm6 # duplicate v10 |
212 |
movq %mm0, %mm6 # duplicate v10 |
208 |
|
213 |
|
209 |
paddw %mm1, %mm0 # v10+v11 |
214 |
paddw %mm1, %mm0 # v10+v11 |
Lines 213-222
_dv_dct_88_block_mmx:
Link Here
|
213 |
movq %mm6, 16*4(%esi) # out4: v10-v11 |
218 |
movq %mm6, 16*4(%esi) # out4: v10-v11 |
214 |
|
219 |
|
215 |
movq %mm4, %mm0 # mm0: v06 |
220 |
movq %mm4, %mm0 # mm0: v06 |
216 |
paddw scratch4, %mm4 # v15: v05+v06 |
221 |
paddw MUNG(scratch4), %mm4 # v15: v05+v06 |
217 |
paddw %mm2, %mm0 # v16: v07+v06 |
222 |
paddw %mm2, %mm0 # v16: v07+v06 |
218 |
|
223 |
|
219 |
pmulhw WA3, %mm4 # v35~: WA3*v15 |
224 |
pmulhw MUNG(WA3), %mm4 # v35~: WA3*v15 |
220 |
psllw $16-NSHIFT, %mm4 # v35: compensate the coeefient scale |
225 |
psllw $16-NSHIFT, %mm4 # v35: compensate the coeefient scale |
221 |
|
226 |
|
222 |
movq %mm4, %mm6 # duplicate v35 |
227 |
movq %mm4, %mm6 # duplicate v35 |
Lines 225-231
_dv_dct_88_block_mmx:
Link Here
|
225 |
|
230 |
|
226 |
paddw %mm5, %mm3 # v22: v12+v13 |
231 |
paddw %mm5, %mm3 # v22: v12+v13 |
227 |
|
232 |
|
228 |
pmulhw WA1, %mm3 # v32~: WA3*v15 |
233 |
pmulhw MUNG(WA1), %mm3 # v32~: WA3*v15 |
229 |
psllw $16-NSHIFT, %mm3 # v32: compensate the coeefient scale |
234 |
psllw $16-NSHIFT, %mm3 # v32: compensate the coeefient scale |
230 |
movq %mm5, %mm6 # duplicate v13 |
235 |
movq %mm5, %mm6 # duplicate v13 |
231 |
|
236 |
|
Lines 235-247
_dv_dct_88_block_mmx:
Link Here
|
235 |
movq %mm5, 16*2(%esi) # out2: v13+v32 |
240 |
movq %mm5, 16*2(%esi) # out2: v13+v32 |
236 |
movq %mm6, 16*6(%esi) # out6: v13-v32 |
241 |
movq %mm6, 16*6(%esi) # out6: v13-v32 |
237 |
|
242 |
|
238 |
paddw scratch4, %mm7 # v14n: v04+v05 |
243 |
paddw MUNG(scratch4), %mm7 # v14n: v04+v05 |
239 |
movq %mm0, %mm5 # duplicate v16 |
244 |
movq %mm0, %mm5 # duplicate v16 |
240 |
|
245 |
|
241 |
psubw %mm7, %mm0 # va1: v16-v14n |
246 |
psubw %mm7, %mm0 # va1: v16-v14n |
242 |
pmulhw WA2, %mm7 # v34~~: v14n*WA2 |
247 |
pmulhw MUNG(WA2), %mm7 # v34~~: v14n*WA2 |
243 |
pmulhw WA5, %mm0 # va0~: va1*WA5 |
248 |
pmulhw MUNG(WA5), %mm0 # va0~: va1*WA5 |
244 |
pmulhw WA4, %mm5 # v36~~: v16*WA4 |
249 |
pmulhw MUNG(WA4), %mm5 # v36~~: v16*WA4 |
245 |
psllw $16-NSHIFT, %mm7 |
250 |
psllw $16-NSHIFT, %mm7 |
246 |
psllw $16-WA4_SHIFT, %mm5 # v36: compensate the coeffient |
251 |
psllw $16-WA4_SHIFT, %mm5 # v36: compensate the coeffient |
247 |
# scale note that WA4 is shifted 1 bit less than the others |
252 |
# scale note that WA4 is shifted 1 bit less than the others |
Lines 748-758
_dv_dct_block_mmx_postscale_88:
Link Here
|
748 |
_dv_dct_248_block_mmx: |
755 |
_dv_dct_248_block_mmx: |
749 |
|
756 |
|
750 |
pushl %ebp |
757 |
pushl %ebp |
751 |
movl %esp, %ebp |
|
|
752 |
pushl %esi |
758 |
pushl %esi |
753 |
pushl %edi |
759 |
pushl %edi |
754 |
|
760 |
|
755 |
movl 8(%ebp), %esi # source |
761 |
LOAD_PIC_REG(bp) |
|
|
762 |
|
763 |
movl 16(%esp), %esi # source |
756 |
|
764 |
|
757 |
# column 0 |
765 |
# column 0 |
758 |
|
766 |
|
Lines 779-785
_dv_dct_248_block_mmx:
Link Here
|
779 |
paddw %mm1, %mm0 # v20: v10+v11 |
789 |
paddw %mm1, %mm0 # v20: v10+v11 |
780 |
psubw %mm1, %mm3 # v21: v10-v11 |
790 |
psubw %mm1, %mm3 # v21: v10-v11 |
781 |
|
791 |
|
782 |
pmulhw WA1, %mm5 # v32~: WA1*v22 |
792 |
pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22 |
783 |
movq %mm4, %mm2 |
793 |
movq %mm4, %mm2 |
784 |
psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale |
794 |
psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale |
785 |
|
795 |
|
Lines 818-824
_dv_dct_248_block_mmx:
Link Here
|
818 |
paddw %mm1, %mm0 # v20: v10+v11 |
828 |
paddw %mm1, %mm0 # v20: v10+v11 |
819 |
psubw %mm1, %mm3 # v21: v10-v11 |
829 |
psubw %mm1, %mm3 # v21: v10-v11 |
820 |
|
830 |
|
821 |
pmulhw WA1, %mm5 # v32~: WA1*v22 |
831 |
pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22 |
822 |
movq %mm4, %mm2 |
832 |
movq %mm4, %mm2 |
823 |
psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale |
833 |
psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale |
824 |
|
834 |
|
Lines 855-861
_dv_dct_248_block_mmx:
Link Here
|
855 |
paddw %mm1, %mm0 # v20: v10+v11 |
865 |
paddw %mm1, %mm0 # v20: v10+v11 |
856 |
psubw %mm1, %mm3 # v21: v10-v11 |
866 |
psubw %mm1, %mm3 # v21: v10-v11 |
857 |
|
867 |
|
858 |
pmulhw WA1, %mm5 # v32~: WA1*v22 |
868 |
pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22 |
859 |
movq %mm4, %mm2 |
869 |
movq %mm4, %mm2 |
860 |
psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale |
870 |
psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale |
861 |
|
871 |
|
Lines 892-898
_dv_dct_248_block_mmx:
Link Here
|
892 |
paddw %mm1, %mm0 # v20: v10+v11 |
902 |
paddw %mm1, %mm0 # v20: v10+v11 |
893 |
psubw %mm1, %mm3 # v21: v10-v11 |
903 |
psubw %mm1, %mm3 # v21: v10-v11 |
894 |
|
904 |
|
895 |
pmulhw WA1, %mm5 # v32~: WA1*v22 |
905 |
pmulhw MUNG(WA1), %mm5 # v32~: WA1*v22 |
896 |
movq %mm4, %mm2 |
906 |
movq %mm4, %mm2 |
897 |
psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale |
907 |
psllw $16-NSHIFT, %mm5 # v32: compensate the coeffient scale |
898 |
|
908 |
|
899 |
-- libdv-0.104-old/libdv/dv.c |
909 |
++ libdv-0.104/libdv/dv.c |
Lines 205-210
dv_reconfigure(int clamp_luma, int clamp
Link Here
|
205 |
} /* dv_reconfigure */ |
205 |
} /* dv_reconfigure */ |
206 |
|
206 |
|
207 |
|
207 |
|
|
|
208 |
extern uint8_t dv_quant_offset[4]; |
209 |
extern uint8_t dv_quant_shifts[22][4]; |
210 |
|
208 |
static inline void |
211 |
static inline void |
209 |
dv_decode_macroblock(dv_decoder_t *dv, dv_macroblock_t *mb, unsigned int quality) { |
212 |
dv_decode_macroblock(dv_decoder_t *dv, dv_macroblock_t *mb, unsigned int quality) { |
210 |
int i; |
213 |
int i; |
Lines 218-224
dv_decode_macroblock(dv_decoder_t *dv, d
Link Here
|
218 |
dv_idct_248 (co248, mb->b[i].coeffs); |
221 |
dv_idct_248 (co248, mb->b[i].coeffs); |
219 |
} else { |
222 |
} else { |
220 |
#if ARCH_X86 |
223 |
#if ARCH_X86 |
221 |
_dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no); |
224 |
_dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no,dv_quant_offset,dv_quant_shifts); |
222 |
_dv_idct_88(mb->b[i].coeffs); |
225 |
_dv_idct_88(mb->b[i].coeffs); |
223 |
#elif ARCH_X86_64 |
226 |
#elif ARCH_X86_64 |
224 |
_dv_quant_88_inverse_x86_64(mb->b[i].coeffs,mb->qno,mb->b[i].class_no); |
227 |
_dv_quant_88_inverse_x86_64(mb->b[i].coeffs,mb->qno,mb->b[i].class_no); |
Lines 250-256
dv_decode_video_segment(dv_decoder_t *dv
Link Here
|
250 |
dv_idct_248 (co248, mb->b[b].coeffs); |
253 |
dv_idct_248 (co248, mb->b[b].coeffs); |
251 |
} else { |
254 |
} else { |
252 |
#if ARCH_X86 |
255 |
#if ARCH_X86 |
253 |
_dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no); |
256 |
_dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no,dv_quant_offset,dv_quant_shifts); |
254 |
_dv_weight_88_inverse(bl->coeffs); |
257 |
_dv_weight_88_inverse(bl->coeffs); |
255 |
_dv_idct_88(bl->coeffs); |
258 |
_dv_idct_88(bl->coeffs); |
256 |
#elif ARCH_X86_64 |
259 |
#elif ARCH_X86_64 |
257 |
-- libdv-0.104-old/libdv/encode.c |
260 |
++ libdv-0.104/libdv/encode.c |
Lines 521-527
static void reorder_block(dv_block_t *bl
Link Here
|
521 |
} |
521 |
} |
522 |
|
522 |
|
523 |
extern unsigned long _dv_vlc_encode_block_mmx(dv_coeff_t* coeffs, |
523 |
extern unsigned long _dv_vlc_encode_block_mmx(dv_coeff_t* coeffs, |
524 |
dv_vlc_entry_t ** out); |
524 |
dv_vlc_entry_t ** out, |
|
|
525 |
dv_vlc_entry_t * lookup); |
525 |
|
526 |
|
526 |
extern unsigned long _dv_vlc_encode_block_mmx_x86_64(dv_coeff_t* coeffs, |
527 |
extern unsigned long _dv_vlc_encode_block_mmx_x86_64(dv_coeff_t* coeffs, |
527 |
dv_vlc_entry_t ** out); |
528 |
dv_vlc_entry_t ** out); |
Lines 558-564
static unsigned long vlc_encode_block(dv
Link Here
|
558 |
#elif ARCH_X86 |
559 |
#elif ARCH_X86 |
559 |
int num_bits; |
560 |
int num_bits; |
560 |
|
561 |
|
561 |
num_bits = _dv_vlc_encode_block_mmx(coeffs, &o); |
562 |
num_bits = _dv_vlc_encode_block_mmx(coeffs, &o, vlc_encode_lookup); |
562 |
emms(); |
563 |
emms(); |
563 |
#else |
564 |
#else |
564 |
int num_bits; |
565 |
int num_bits; |
Lines 574-580
static unsigned long vlc_encode_block(dv
Link Here
|
574 |
return num_bits; |
575 |
return num_bits; |
575 |
} |
576 |
} |
576 |
|
577 |
|
577 |
extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs); |
578 |
extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs, unsigned char* lookup); |
578 |
extern unsigned long _dv_vlc_num_bits_block_x86_64(dv_coeff_t* coeffs); |
579 |
extern unsigned long _dv_vlc_num_bits_block_x86_64(dv_coeff_t* coeffs); |
579 |
|
580 |
|
580 |
extern unsigned long _dv_vlc_num_bits_block(dv_coeff_t* coeffs) |
581 |
extern unsigned long _dv_vlc_num_bits_block(dv_coeff_t* coeffs) |
Lines 600-606
extern unsigned long _dv_vlc_num_bits_bl
Link Here
|
600 |
#elif ARCH_X86_64 |
601 |
#elif ARCH_X86_64 |
601 |
return _dv_vlc_num_bits_block_x86_64(coeffs); |
602 |
return _dv_vlc_num_bits_block_x86_64(coeffs); |
602 |
#else |
603 |
#else |
603 |
return _dv_vlc_num_bits_block_x86(coeffs); |
604 |
return _dv_vlc_num_bits_block_x86(coeffs, vlc_num_bits_lookup); |
604 |
#endif |
605 |
#endif |
605 |
} |
606 |
} |
606 |
|
607 |
|
607 |
-- libdv-0.104-old/libdv/encode_x86.S |
608 |
++ libdv-0.104/libdv/encode_x86.S |
Lines 23-31
Link Here
|
23 |
* The libdv homepage is http://libdv.sourceforge.net/. |
23 |
* The libdv homepage is http://libdv.sourceforge.net/. |
24 |
*/ |
24 |
*/ |
25 |
|
25 |
|
26 |
.data |
|
|
27 |
ALLONE: .word 1,1,1,1 |
28 |
VLCADDMASK: .byte 255,0,0,0,255,0,0,0 |
29 |
|
26 |
|
30 |
|
27 |
|
31 |
.section .note.GNU-stack, "", @progbits |
28 |
.section .note.GNU-stack, "", @progbits |
Lines 45-55
_dv_vlc_encode_block_mmx:
Link Here
|
45 |
|
43 |
|
46 |
movl $63, %ecx |
44 |
movl $63, %ecx |
47 |
|
45 |
|
48 |
movl vlc_encode_lookup, %esi |
46 |
movl 4+4*4+8(%esp), %esi # vlc_encode_lookup |
49 |
|
47 |
|
50 |
pxor %mm0, %mm0 |
48 |
pxor %mm0, %mm0 |
51 |
pxor %mm2, %mm2 |
49 |
pxor %mm2, %mm2 |
52 |
movq VLCADDMASK, %mm1 |
50 |
pushl $0x000000FF # these four lines |
|
|
51 |
pushl $0x000000FF # load VLCADDMASK |
52 |
movq (%esp), %mm1 # into %mm1 off the stack |
53 |
addl $8, %esp # --> no TEXTRELs |
53 |
xorl %ebp, %ebp |
54 |
xorl %ebp, %ebp |
54 |
subl $8, %edx |
55 |
subl $8, %edx |
55 |
vlc_encode_block_mmx_loop: |
56 |
vlc_encode_block_mmx_loop: |
Lines 121-127
_dv_vlc_num_bits_block_x86:
Link Here
|
121 |
addl $2, %edi |
124 |
addl $2, %edi |
122 |
|
125 |
|
123 |
movl $63, %ecx |
126 |
movl $63, %ecx |
124 |
movl vlc_num_bits_lookup, %esi |
127 |
movl 4+4*4+4(%esp), %esi # vlc_num_bits_lookup |
125 |
|
128 |
|
126 |
vlc_num_bits_block_x86_loop: |
129 |
vlc_num_bits_block_x86_loop: |
127 |
movw (%edi), %ax |
130 |
movw (%edi), %ax |
Lines 579-586
_dv_need_dct_248_mmx_rows:
Link Here
|
579 |
paddw %mm5, %mm1 |
590 |
paddw %mm5, %mm1 |
580 |
|
591 |
|
581 |
paddw %mm1, %mm0 |
592 |
paddw %mm1, %mm0 |
582 |
|
593 |
|
583 |
pmaddwd ALLONE, %mm0 |
594 |
pushl $0x00010001 # these four lines |
|
|
595 |
pushl $0x00010001 # load ALLONE |
596 |
pmaddwd (%esp), %mm0 # into %mm0 off the stack |
597 |
addl $8, %esp # --> no TEXTRELs |
584 |
movq %mm0, %mm1 |
598 |
movq %mm0, %mm1 |
585 |
psrlq $32, %mm1 |
599 |
psrlq $32, %mm1 |
586 |
paddd %mm1, %mm0 |
600 |
paddd %mm1, %mm0 |
587 |
-- libdv-0.104-old/libdv/idct_block_mmx.S |
601 |
++ libdv-0.104/libdv/idct_block_mmx.S |
Lines 8-24
Link Here
|
8 |
|
8 |
|
9 |
.section .note.GNU-stack, "", @progbits |
9 |
.section .note.GNU-stack, "", @progbits |
10 |
|
10 |
|
|
|
11 |
#include "asm_common.S" |
12 |
|
11 |
.text |
13 |
.text |
12 |
.align 4 |
14 |
.align 4 |
13 |
.global _dv_idct_block_mmx |
15 |
.global _dv_idct_block_mmx |
14 |
.hidden _dv_idct_block_mmx |
16 |
.hidden _dv_idct_block_mmx |
15 |
.type _dv_idct_block_mmx,@function |
17 |
.type _dv_idct_block_mmx,@function |
16 |
_dv_idct_block_mmx: |
18 |
_dv_idct_block_mmx: |
17 |
pushl %ebp |
19 |
pushl %ebp |
18 |
movl %esp,%ebp |
|
|
19 |
pushl %esi |
20 |
pushl %esi |
20 |
leal preSC, %ecx |
21 |
|
21 |
movl 8(%ebp),%esi /* source matrix */ |
22 |
LOAD_PIC_REG(bp) |
|
|
23 |
|
24 |
leal MUNG(preSC), %ecx |
25 |
movl 12(%esp),%esi /* source matrix */ |
22 |
|
26 |
|
23 |
/* |
27 |
/* |
24 |
* column 0: even part |
28 |
* column 0: even part |
Lines 35-41
_dv_idct_block_mmx:
Link Here
|
35 |
movq %mm1, %mm2 /* added 11/1/96 */ |
41 |
movq %mm1, %mm2 /* added 11/1/96 */ |
36 |
pmulhw 8*8(%esi),%mm5 /* V8 */ |
42 |
pmulhw 8*8(%esi),%mm5 /* V8 */ |
37 |
psubsw %mm0, %mm1 /* V16 */ |
43 |
psubsw %mm0, %mm1 /* V16 */ |
38 |
pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V18 */ |
44 |
pmulhw MUNG(x5a825a825a825a82), %mm1 /* 23170 ->V18 */ |
39 |
paddsw %mm0, %mm2 /* V17 */ |
45 |
paddsw %mm0, %mm2 /* V17 */ |
40 |
movq %mm2, %mm0 /* duplicate V17 */ |
46 |
movq %mm2, %mm0 /* duplicate V17 */ |
41 |
psraw $1, %mm2 /* t75=t82 */ |
47 |
psraw $1, %mm2 /* t75=t82 */ |
Lines 76-82
_dv_idct_block_mmx:
Link Here
|
76 |
paddsw %mm0, %mm3 /* V29 ; free mm0 */ |
82 |
paddsw %mm0, %mm3 /* V29 ; free mm0 */ |
77 |
movq %mm7, %mm1 /* duplicate V26 */ |
83 |
movq %mm7, %mm1 /* duplicate V26 */ |
78 |
psraw $1, %mm3 /* t91=t94 */ |
84 |
psraw $1, %mm3 /* t91=t94 */ |
79 |
pmulhw x539f539f539f539f,%mm7 /* V33 */ |
85 |
pmulhw MUNG(x539f539f539f539f),%mm7 /* V33 */ |
80 |
psraw $1, %mm1 /* t96 */ |
86 |
psraw $1, %mm1 /* t96 */ |
81 |
movq %mm5, %mm0 /* duplicate V2 */ |
87 |
movq %mm5, %mm0 /* duplicate V2 */ |
82 |
psraw $2, %mm4 /* t85=t87 */ |
88 |
psraw $2, %mm4 /* t85=t87 */ |
Lines 84-98
_dv_idct_block_mmx:
Link Here
|
84 |
psubsw %mm4, %mm0 /* V28 ; free mm4 */ |
90 |
psubsw %mm4, %mm0 /* V28 ; free mm4 */ |
85 |
movq %mm0, %mm2 /* duplicate V28 */ |
91 |
movq %mm0, %mm2 /* duplicate V28 */ |
86 |
psraw $1, %mm5 /* t90=t93 */ |
92 |
psraw $1, %mm5 /* t90=t93 */ |
87 |
pmulhw x4546454645464546,%mm0 /* V35 */ |
93 |
pmulhw MUNG(x4546454645464546),%mm0 /* V35 */ |
88 |
psraw $1, %mm2 /* t97 */ |
94 |
psraw $1, %mm2 /* t97 */ |
89 |
movq %mm5, %mm4 /* duplicate t90=t93 */ |
95 |
movq %mm5, %mm4 /* duplicate t90=t93 */ |
90 |
psubsw %mm2, %mm1 /* V32 ; free mm2 */ |
96 |
psubsw %mm2, %mm1 /* V32 ; free mm2 */ |
91 |
pmulhw x61f861f861f861f8,%mm1 /* V36 */ |
97 |
pmulhw MUNG(x61f861f861f861f8),%mm1 /* V36 */ |
92 |
psllw $1, %mm7 /* t107 */ |
98 |
psllw $1, %mm7 /* t107 */ |
93 |
paddsw %mm3, %mm5 /* V31 */ |
99 |
paddsw %mm3, %mm5 /* V31 */ |
94 |
psubsw %mm3, %mm4 /* V30 ; free mm3 */ |
100 |
psubsw %mm3, %mm4 /* V30 ; free mm3 */ |
95 |
pmulhw x5a825a825a825a82,%mm4 /* V34 */ |
101 |
pmulhw MUNG(x5a825a825a825a82),%mm4 /* V34 */ |
96 |
nop |
102 |
nop |
97 |
psubsw %mm1, %mm0 /* V38 */ |
103 |
psubsw %mm1, %mm0 /* V38 */ |
98 |
psubsw %mm7, %mm1 /* V37 ; free mm7 */ |
104 |
psubsw %mm7, %mm1 /* V37 ; free mm7 */ |
Lines 159-165
_dv_idct_block_mmx:
Link Here
|
159 |
psubsw %mm7, %mm1 /* V50 */ |
165 |
psubsw %mm7, %mm1 /* V50 */ |
160 |
pmulhw 8*9(%esi), %mm5 /* V9 */ |
166 |
pmulhw 8*9(%esi), %mm5 /* V9 */ |
161 |
paddsw %mm7, %mm2 /* V51 */ |
167 |
paddsw %mm7, %mm2 /* V51 */ |
162 |
pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V52 */ |
168 |
pmulhw MUNG(x5a825a825a825a82), %mm1 /* 23170 ->V52 */ |
163 |
movq %mm2, %mm6 /* duplicate V51 */ |
169 |
movq %mm2, %mm6 /* duplicate V51 */ |
164 |
psraw $1, %mm2 /* t138=t144 */ |
170 |
psraw $1, %mm2 /* t138=t144 */ |
165 |
movq %mm3, %mm4 /* duplicate V1 */ |
171 |
movq %mm3, %mm4 /* duplicate V1 */ |
Lines 200-210
_dv_idct_block_mmx:
Link Here
|
200 |
* even more by doing the correction step in a later stage when the number |
206 |
* even more by doing the correction step in a later stage when the number |
201 |
* is actually multiplied by 16 |
207 |
* is actually multiplied by 16 |
202 |
*/ |
208 |
*/ |
203 |
paddw x0005000200010001, %mm4 |
209 |
paddw MUNG(x0005000200010001), %mm4 |
204 |
psubsw %mm6, %mm3 /* V60 ; free mm6 */ |
210 |
psubsw %mm6, %mm3 /* V60 ; free mm6 */ |
205 |
psraw $1, %mm0 /* t154=t156 */ |
211 |
psraw $1, %mm0 /* t154=t156 */ |
206 |
movq %mm3, %mm1 /* duplicate V60 */ |
212 |
movq %mm3, %mm1 /* duplicate V60 */ |
207 |
pmulhw x539f539f539f539f, %mm1 /* V67 */ |
213 |
pmulhw MUNG(x539f539f539f539f), %mm1 /* V67 */ |
208 |
movq %mm5, %mm6 /* duplicate V3 */ |
214 |
movq %mm5, %mm6 /* duplicate V3 */ |
209 |
psraw $2, %mm4 /* t148=t150 */ |
215 |
psraw $2, %mm4 /* t148=t150 */ |
210 |
paddsw %mm4, %mm5 /* V61 */ |
216 |
paddsw %mm4, %mm5 /* V61 */ |
Lines 213-225
_dv_idct_block_mmx:
Link Here
|
213 |
psllw $1, %mm1 /* t169 */ |
219 |
psllw $1, %mm1 /* t169 */ |
214 |
paddsw %mm0, %mm5 /* V65 -> result */ |
220 |
paddsw %mm0, %mm5 /* V65 -> result */ |
215 |
psubsw %mm0, %mm4 /* V64 ; free mm0 */ |
221 |
psubsw %mm0, %mm4 /* V64 ; free mm0 */ |
216 |
pmulhw x5a825a825a825a82, %mm4 /* V68 */ |
222 |
pmulhw MUNG(x5a825a825a825a82), %mm4 /* V68 */ |
217 |
psraw $1, %mm3 /* t158 */ |
223 |
psraw $1, %mm3 /* t158 */ |
218 |
psubsw %mm6, %mm3 /* V66 */ |
224 |
psubsw %mm6, %mm3 /* V66 */ |
219 |
movq %mm5, %mm2 /* duplicate V65 */ |
225 |
movq %mm5, %mm2 /* duplicate V65 */ |
220 |
pmulhw x61f861f861f861f8, %mm3 /* V70 */ |
226 |
pmulhw MUNG(x61f861f861f861f8), %mm3 /* V70 */ |
221 |
psllw $1, %mm6 /* t165 */ |
227 |
psllw $1, %mm6 /* t165 */ |
222 |
pmulhw x4546454645464546, %mm6 /* V69 */ |
228 |
pmulhw MUNG(x4546454645464546), %mm6 /* V69 */ |
223 |
psraw $1, %mm2 /* t172 */ |
229 |
psraw $1, %mm2 /* t172 */ |
224 |
/* moved from next block */ |
230 |
/* moved from next block */ |
225 |
movq 8*5(%esi), %mm0 /* V56 */ |
231 |
movq 8*5(%esi), %mm0 /* V56 */ |
Lines 344-350
_dv_idct_block_mmx:
Link Here
|
344 |
* movq 8*13(%esi), %mm4 tmt13 |
350 |
* movq 8*13(%esi), %mm4 tmt13 |
345 |
*/ |
351 |
*/ |
346 |
psubsw %mm4, %mm3 /* V134 */ |
352 |
psubsw %mm4, %mm3 /* V134 */ |
347 |
pmulhw x5a825a825a825a82, %mm3 /* 23170 ->V136 */ |
353 |
pmulhw MUNG(x5a825a825a825a82), %mm3 /* 23170 ->V136 */ |
348 |
movq 8*9(%esi), %mm6 /* tmt9 */ |
354 |
movq 8*9(%esi), %mm6 /* tmt9 */ |
349 |
paddsw %mm4, %mm5 /* V135 ; mm4 free */ |
355 |
paddsw %mm4, %mm5 /* V135 ; mm4 free */ |
350 |
movq %mm0, %mm4 /* duplicate tmt1 */ |
356 |
movq %mm0, %mm4 /* duplicate tmt1 */ |
Lines 373-389
_dv_idct_block_mmx:
Link Here
|
373 |
psubsw %mm7, %mm0 /* V144 */ |
379 |
psubsw %mm7, %mm0 /* V144 */ |
374 |
movq %mm0, %mm3 /* duplicate V144 */ |
380 |
movq %mm0, %mm3 /* duplicate V144 */ |
375 |
paddsw %mm7, %mm2 /* V147 ; free mm7 */ |
381 |
paddsw %mm7, %mm2 /* V147 ; free mm7 */ |
376 |
pmulhw x539f539f539f539f, %mm0 /* 21407-> V151 */ |
382 |
pmulhw MUNG(x539f539f539f539f), %mm0 /* 21407-> V151 */ |
377 |
movq %mm1, %mm7 /* duplicate tmt3 */ |
383 |
movq %mm1, %mm7 /* duplicate tmt3 */ |
378 |
paddsw %mm5, %mm7 /* V145 */ |
384 |
paddsw %mm5, %mm7 /* V145 */ |
379 |
psubsw %mm5, %mm1 /* V146 ; free mm5 */ |
385 |
psubsw %mm5, %mm1 /* V146 ; free mm5 */ |
380 |
psubsw %mm1, %mm3 /* V150 */ |
386 |
psubsw %mm1, %mm3 /* V150 */ |
381 |
movq %mm7, %mm5 /* duplicate V145 */ |
387 |
movq %mm7, %mm5 /* duplicate V145 */ |
382 |
pmulhw x4546454645464546, %mm1 /* 17734-> V153 */ |
388 |
pmulhw MUNG(x4546454645464546), %mm1 /* 17734-> V153 */ |
383 |
psubsw %mm2, %mm5 /* V148 */ |
389 |
psubsw %mm2, %mm5 /* V148 */ |
384 |
pmulhw x61f861f861f861f8, %mm3 /* 25080-> V154 */ |
390 |
pmulhw MUNG(x61f861f861f861f8), %mm3 /* 25080-> V154 */ |
385 |
psllw $2, %mm0 /* t311 */ |
391 |
psllw $2, %mm0 /* t311 */ |
386 |
pmulhw x5a825a825a825a82, %mm5 /* 23170-> V152 */ |
392 |
pmulhw MUNG(x5a825a825a825a82), %mm5 /* 23170-> V152 */ |
387 |
paddsw %mm2, %mm7 /* V149 ; free mm2 */ |
393 |
paddsw %mm2, %mm7 /* V149 ; free mm2 */ |
388 |
psllw $1, %mm1 /* t313 */ |
394 |
psllw $1, %mm1 /* t313 */ |
389 |
nop /* without the nop - freeze here for one clock */ |
395 |
nop /* without the nop - freeze here for one clock */ |
Lines 409-415
_dv_idct_block_mmx:
Link Here
|
409 |
paddsw %mm3, %mm6 /* V164 ; free mm3 */ |
415 |
paddsw %mm3, %mm6 /* V164 ; free mm3 */ |
410 |
movq %mm4, %mm3 /* duplicate V142 */ |
416 |
movq %mm4, %mm3 /* duplicate V142 */ |
411 |
psubsw %mm5, %mm4 /* V165 ; free mm5 */ |
417 |
psubsw %mm5, %mm4 /* V165 ; free mm5 */ |
412 |
movq %mm2, scratch7 /* out7 */ |
418 |
movq %mm2, MUNG(scratch7) /* out7 */ |
413 |
psraw $4, %mm6 |
419 |
psraw $4, %mm6 |
414 |
psraw $4, %mm4 |
420 |
psraw $4, %mm4 |
415 |
paddsw %mm5, %mm3 /* V162 */ |
421 |
paddsw %mm5, %mm3 /* V162 */ |
Lines 420-430
_dv_idct_block_mmx:
Link Here
|
420 |
*/ |
426 |
*/ |
421 |
movq %mm6, 8*9(%esi) /* out9 */ |
427 |
movq %mm6, 8*9(%esi) /* out9 */ |
422 |
paddsw %mm1, %mm0 /* V161 */ |
428 |
paddsw %mm1, %mm0 /* V161 */ |
423 |
movq %mm3, scratch5 /* out5 */ |
429 |
movq %mm3, MUNG(scratch5) /* out5 */ |
424 |
psubsw %mm1, %mm5 /* V166 ; free mm1 */ |
430 |
psubsw %mm1, %mm5 /* V166 ; free mm1 */ |
425 |
movq %mm4, 8*11(%esi) /* out11 */ |
431 |
movq %mm4, 8*11(%esi) /* out11 */ |
426 |
psraw $4, %mm5 |
432 |
psraw $4, %mm5 |
427 |
movq %mm0, scratch3 /* out3 */ |
433 |
movq %mm0, MUNG(scratch3) /* out3 */ |
428 |
movq %mm2, %mm4 /* duplicate V140 */ |
434 |
movq %mm2, %mm4 /* duplicate V140 */ |
429 |
movq %mm5, 8*13(%esi) /* out13 */ |
435 |
movq %mm5, 8*13(%esi) /* out13 */ |
430 |
paddsw %mm7, %mm2 /* V160 */ |
436 |
paddsw %mm7, %mm2 /* V160 */ |
Lines 434-440
_dv_idct_block_mmx:
Link Here
|
434 |
/* moved from the next block */ |
440 |
/* moved from the next block */ |
435 |
movq 8*3(%esi), %mm7 |
441 |
movq 8*3(%esi), %mm7 |
436 |
psraw $4, %mm4 |
442 |
psraw $4, %mm4 |
437 |
movq %mm2, scratch1 /* out1 */ |
443 |
movq %mm2, MUNG(scratch1) /* out1 */ |
438 |
/* moved from the next block */ |
444 |
/* moved from the next block */ |
439 |
movq %mm0, %mm1 |
445 |
movq %mm0, %mm1 |
440 |
movq %mm4, 8*15(%esi) /* out15 */ |
446 |
movq %mm4, 8*15(%esi) /* out15 */ |
Lines 491-505
_dv_idct_block_mmx:
Link Here
|
491 |
paddsw %mm4, %mm3 /* V113 ; free mm4 */ |
497 |
paddsw %mm4, %mm3 /* V113 ; free mm4 */ |
492 |
movq %mm0, %mm4 /* duplicate V110 */ |
498 |
movq %mm0, %mm4 /* duplicate V110 */ |
493 |
paddsw %mm1, %mm2 /* V111 */ |
499 |
paddsw %mm1, %mm2 /* V111 */ |
494 |
pmulhw x539f539f539f539f, %mm0 /* 21407-> V117 */ |
500 |
pmulhw MUNG(x539f539f539f539f), %mm0 /* 21407-> V117 */ |
495 |
psubsw %mm1, %mm5 /* V112 ; free mm1 */ |
501 |
psubsw %mm1, %mm5 /* V112 ; free mm1 */ |
496 |
psubsw %mm5, %mm4 /* V116 */ |
502 |
psubsw %mm5, %mm4 /* V116 */ |
497 |
movq %mm2, %mm1 /* duplicate V111 */ |
503 |
movq %mm2, %mm1 /* duplicate V111 */ |
498 |
pmulhw x4546454645464546, %mm5 /* 17734-> V119 */ |
504 |
pmulhw MUNG(x4546454645464546), %mm5 /* 17734-> V119 */ |
499 |
psubsw %mm3, %mm2 /* V114 */ |
505 |
psubsw %mm3, %mm2 /* V114 */ |
500 |
pmulhw x61f861f861f861f8, %mm4 /* 25080-> V120 */ |
506 |
pmulhw MUNG(x61f861f861f861f8), %mm4 /* 25080-> V120 */ |
501 |
paddsw %mm3, %mm1 /* V115 ; free mm3 */ |
507 |
paddsw %mm3, %mm1 /* V115 ; free mm3 */ |
502 |
pmulhw x5a825a825a825a82, %mm2 /* 23170-> V118 */ |
508 |
pmulhw MUNG(x5a825a825a825a82), %mm2 /* 23170-> V118 */ |
503 |
psllw $2, %mm0 /* t266 */ |
509 |
psllw $2, %mm0 /* t266 */ |
504 |
movq %mm1, (%esi) /* save V115 */ |
510 |
movq %mm1, (%esi) /* save V115 */ |
505 |
psllw $1, %mm5 /* t268 */ |
511 |
psllw $1, %mm5 /* t268 */ |
Lines 517-523
_dv_idct_block_mmx:
Link Here
|
517 |
movq %mm6, %mm3 /* duplicate tmt4 */ |
523 |
movq %mm6, %mm3 /* duplicate tmt4 */ |
518 |
psubsw %mm0, %mm6 /* V100 */ |
524 |
psubsw %mm0, %mm6 /* V100 */ |
519 |
paddsw %mm0, %mm3 /* V101 ; free mm0 */ |
525 |
paddsw %mm0, %mm3 /* V101 ; free mm0 */ |
520 |
pmulhw x5a825a825a825a82, %mm6 /* 23170 ->V102 */ |
526 |
pmulhw MUNG(x5a825a825a825a82), %mm6 /* 23170 ->V102 */ |
521 |
movq %mm7, %mm5 /* duplicate tmt0 */ |
527 |
movq %mm7, %mm5 /* duplicate tmt0 */ |
522 |
movq 8*8(%esi), %mm1 /* tmt8 */ |
528 |
movq 8*8(%esi), %mm1 /* tmt8 */ |
523 |
paddsw %mm1, %mm7 /* V103 */ |
529 |
paddsw %mm1, %mm7 /* V103 */ |
Lines 551-560
_dv_idct_block_mmx:
Link Here
|
551 |
movq 8*2(%esi), %mm3 /* V123 */ |
557 |
movq 8*2(%esi), %mm3 /* V123 */ |
552 |
paddsw %mm4, %mm7 /* out0 */ |
558 |
paddsw %mm4, %mm7 /* out0 */ |
553 |
/* moved up from next block */ |
559 |
/* moved up from next block */ |
554 |
movq scratch3, %mm0 |
560 |
movq MUNG(scratch3), %mm0 |
555 |
psraw $4, %mm7 |
561 |
psraw $4, %mm7 |
556 |
/* moved up from next block */ |
562 |
/* moved up from next block */ |
557 |
movq scratch5, %mm6 |
563 |
movq MUNG(scratch5), %mm6 |
558 |
psubsw %mm4, %mm1 /* out14 ; free mm4 */ |
564 |
psubsw %mm4, %mm1 /* out14 ; free mm4 */ |
559 |
paddsw %mm3, %mm5 /* out2 */ |
565 |
paddsw %mm3, %mm5 /* out2 */ |
560 |
psraw $4, %mm1 |
566 |
psraw $4, %mm1 |
Lines 565-571
_dv_idct_block_mmx:
Link Here
|
565 |
movq %mm5, 8*2(%esi) /* out2 ; free mm5 */ |
571 |
movq %mm5, 8*2(%esi) /* out2 ; free mm5 */ |
566 |
psraw $4, %mm2 |
572 |
psraw $4, %mm2 |
567 |
/* moved up to the prev block */ |
573 |
/* moved up to the prev block */ |
568 |
movq scratch7, %mm4 |
574 |
movq MUNG(scratch7), %mm4 |
569 |
/* moved up to the prev block */ |
575 |
/* moved up to the prev block */ |
570 |
psraw $4, %mm0 |
576 |
psraw $4, %mm0 |
571 |
movq %mm2, 8*12(%esi) /* out12 ; free mm2 */ |
577 |
movq %mm2, 8*12(%esi) /* out12 ; free mm2 */ |
Lines 579-585
_dv_idct_block_mmx:
Link Here
|
579 |
* psraw $4, %mm0 |
585 |
* psraw $4, %mm0 |
580 |
* psraw $4, %mm6 |
586 |
* psraw $4, %mm6 |
581 |
*/ |
587 |
*/ |
582 |
movq scratch1, %mm1 |
588 |
movq MUNG(scratch1), %mm1 |
583 |
psraw $4, %mm4 |
589 |
psraw $4, %mm4 |
584 |
movq %mm0, 8*3(%esi) /* out3 */ |
590 |
movq %mm0, 8*3(%esi) /* out3 */ |
585 |
psraw $4, %mm1 |
591 |
psraw $4, %mm1 |
586 |
-- libdv-0.104-old/libdv/parse.c |
592 |
++ libdv-0.104/libdv/parse.c |
Lines 477-482
dv_parse_ac_coeffs(dv_videosegment_t *se
Link Here
|
477 |
exit(0); |
477 |
exit(0); |
478 |
#endif |
478 |
#endif |
479 |
} /* dv_parse_ac_coeffs */ |
479 |
} /* dv_parse_ac_coeffs */ |
|
|
480 |
#if defined __GNUC__ && __ELF__ |
481 |
# define dv_strong_hidden_alias(name, aliasname) \ |
482 |
extern __typeof (name) aliasname __attribute__ ((alias (#name), visibility ("hidden"))) |
483 |
dv_strong_hidden_alias(dv_parse_ac_coeffs, asm_dv_parse_ac_coeffs); |
484 |
#else |
485 |
int asm_dv_parse_ac_coeffs(dv_videosegment_t *seg) { return dv_parse_ac_coeffs(seg); } |
486 |
#endif |
480 |
|
487 |
|
481 |
/* --------------------------------------------------------------------------- |
488 |
/* --------------------------------------------------------------------------- |
482 |
*/ |
489 |
*/ |
483 |
-- libdv-0.104-old/libdv/quant.c |
490 |
++ libdv-0.104/libdv/quant.c |
Lines 144-150
uint8_t dv_quant_offset[4] = { 6,3,0,1
Link Here
|
144 |
uint32_t dv_quant_248_mul_tab [2] [22] [64]; |
144 |
uint32_t dv_quant_248_mul_tab [2] [22] [64]; |
145 |
uint32_t dv_quant_88_mul_tab [2] [22] [64]; |
145 |
uint32_t dv_quant_88_mul_tab [2] [22] [64]; |
146 |
|
146 |
|
147 |
extern void _dv_quant_x86(dv_coeff_t *block,int qno,int klass); |
147 |
extern void _dv_quant_x86(dv_coeff_t *block,int qno,int klass,uint8_t *dv_quant_offset,uint8_t *dv_quant_shifts); |
148 |
extern void _dv_quant_x86_64(dv_coeff_t *block,int qno,int klass); |
148 |
extern void _dv_quant_x86_64(dv_coeff_t *block,int qno,int klass); |
149 |
static void quant_248_inverse_std(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co); |
149 |
static void quant_248_inverse_std(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co); |
150 |
static void quant_248_inverse_mmx(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co); |
150 |
static void quant_248_inverse_mmx(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co); |
Lines 210-216
void _dv_quant(dv_coeff_t *block,int qno
Link Here
|
210 |
_dv_quant_x86_64(block, qno, klass); |
210 |
_dv_quant_x86_64(block, qno, klass); |
211 |
emms(); |
211 |
emms(); |
212 |
#else |
212 |
#else |
213 |
_dv_quant_x86(block, qno, klass); |
213 |
_dv_quant_x86(block, qno, klass, dv_quant_offset, dv_quant_shifts); |
214 |
emms(); |
214 |
emms(); |
215 |
#endif |
215 |
#endif |
216 |
} |
216 |
} |
217 |
-- libdv-0.104-old/libdv/quant.h |
217 |
++ libdv-0.104/libdv/quant.h |
Lines 27-33
extern void _dv_quant(dv_coeff_t *block,
Link Here
|
27 |
extern void _dv_quant_88_inverse(dv_coeff_t *block,int qno,int klass); |
27 |
extern void _dv_quant_88_inverse(dv_coeff_t *block,int qno,int klass); |
28 |
extern void (*_dv_quant_248_inverse) (dv_coeff_t *block,int qno,int klass, |
28 |
extern void (*_dv_quant_248_inverse) (dv_coeff_t *block,int qno,int klass, |
29 |
dv_248_coeff_t *co); |
29 |
dv_248_coeff_t *co); |
30 |
extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass); |
30 |
extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass, uint8_t *offset, uint8_t *shifts); |
31 |
extern void _dv_quant_88_inverse_x86_64(dv_coeff_t *block,int qno,int klass); |
31 |
extern void _dv_quant_88_inverse_x86_64(dv_coeff_t *block,int qno,int klass); |
32 |
extern void dv_quant_init (void); |
32 |
extern void dv_quant_init (void); |
33 |
#ifdef __cplusplus |
33 |
#ifdef __cplusplus |
34 |
-- libdv-0.104-old/libdv/quant_x86.S |
34 |
++ libdv-0.104/libdv/quant_x86.S |
Lines 71-80
_dv_quant_88_inverse_x86:
Link Here
|
71 |
|
73 |
|
72 |
/* pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */ |
74 |
/* pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */ |
73 |
movl ARGn(1),%eax /* qno */ |
75 |
movl ARGn(1),%eax /* qno */ |
|
|
76 |
movl ARGn(3),%ebx /* dv_quant_offset */ |
77 |
addl ARGn(2),%ebx /* class */ |
78 |
movzbl (%ebx),%ecx |
74 |
movl ARGn(2),%ebx /* class */ |
79 |
movl ARGn(2),%ebx /* class */ |
75 |
movzbl dv_quant_offset(%ebx),%ecx |
|
|
76 |
addl %ecx,%eax |
80 |
addl %ecx,%eax |
77 |
leal dv_quant_shifts(,%eax,4),%edx /* edx is pq */ |
81 |
movl ARGn(4),%edx /* dv_quant_shifts */ |
|
|
82 |
leal (%edx,%eax,4),%edx /* edx is pq */ |
78 |
|
83 |
|
79 |
/* extra = (class == 3); */ |
84 |
/* extra = (class == 3); */ |
80 |
/* 0 1 2 3 */ |
85 |
/* 0 1 2 3 */ |
Lines 212-222
_dv_quant_x86:
Link Here
|
212 |
|
219 |
|
213 |
/* pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */ |
220 |
/* pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */ |
214 |
movl ARGn(1),%eax /* qno */ |
221 |
movl ARGn(1),%eax /* qno */ |
|
|
222 |
movl ARGn(3),%ebx /* offset */ |
223 |
addl ARGn(2),%ebx /* class */ |
224 |
movzbl (%ebx),%ecx |
215 |
movl ARGn(2),%ebx /* class */ |
225 |
movl ARGn(2),%ebx /* class */ |
216 |
|
226 |
movl ARGn(4),%edx /* shifts */ |
217 |
movzbl dv_quant_offset(%ebx),%ecx |
|
|
218 |
addl %ecx,%eax |
227 |
addl %ecx,%eax |
219 |
leal dv_quant_shifts(,%eax,4),%edx /* edx is pq */ |
228 |
leal (%edx,%eax,4),%edx /* edx is pq */ |
220 |
|
229 |
|
221 |
/* extra = (class == 3); */ |
230 |
/* extra = (class == 3); */ |
222 |
/* 0 1 2 3 */ |
231 |
/* 0 1 2 3 */ |
223 |
-- libdv-0.104-old/libdv/rgbtoyuv.S |
232 |
++ libdv-0.104/libdv/rgbtoyuv.S |
Lines 41-49
Link Here
|
41 |
#define DV_WIDTH_SHORT_HALF 720 |
41 |
#define DV_WIDTH_SHORT_HALF 720 |
42 |
#define DV_WIDTH_BYTE_HALF 360 |
42 |
#define DV_WIDTH_BYTE_HALF 360 |
43 |
|
43 |
|
44 |
.global _dv_rgbtoycb_mmx |
|
|
45 |
# .global yuvtoycb_mmx |
46 |
|
47 |
.data |
44 |
.data |
48 |
|
45 |
|
49 |
.align 8 |
46 |
.align 8 |
Lines 110-134
VR0GR: .long 0,0
Link Here
|
110 |
VBG0B: .long 0,0 |
107 |
VBG0B: .long 0,0 |
111 |
|
108 |
|
112 |
#endif |
109 |
#endif |
113 |
|
110 |
|
|
|
111 |
#include "asm_common.S" |
112 |
|
114 |
.section .note.GNU-stack, "", @progbits |
113 |
.section .note.GNU-stack, "", @progbits |
115 |
|
114 |
|
116 |
.text |
115 |
.text |
117 |
|
116 |
|
118 |
#define _inPtr 8 |
117 |
#define _inPtr 24+8 |
119 |
#define _rows 12 |
118 |
#define _rows 24+12 |
120 |
#define _columns 16 |
119 |
#define _columns 24+16 |
121 |
#define _outyPtr 20 |
120 |
#define _outyPtr 24+20 |
122 |
#define _outuPtr 24 |
121 |
#define _outuPtr 24+24 |
123 |
#define _outvPtr 28 |
122 |
#define _outvPtr 24+28 |
124 |
|
123 |
|
125 |
.global _dv_rgbtoycb_mmx |
124 |
.global _dv_rgbtoycb_mmx |
126 |
.hidden _dv_rgbtoycb_mmx |
125 |
.hidden _dv_rgbtoycb_mmx |
127 |
.type _dv_rgbtoycb_mmx,@function |
126 |
.type _dv_rgbtoycb_mmx,@function |
128 |
_dv_rgbtoycb_mmx: |
127 |
_dv_rgbtoycb_mmx: |
129 |
|
128 |
|
130 |
pushl %ebp |
129 |
pushl %ebp |
131 |
movl %esp, %ebp |
|
|
132 |
pushl %eax |
130 |
pushl %eax |
133 |
pushl %ebx |
131 |
pushl %ebx |
134 |
pushl %ecx |
132 |
pushl %ecx |
Lines 131-176
_dv_rgbtoycb_mmx:
Link Here
|
131 |
pushl %esi |
132 |
pushl %esi |
132 |
pushl %edi |
133 |
pushl %edi |
133 |
|
134 |
|
134 |
leal ZEROSX, %eax #This section gets around a bug |
135 |
LOAD_PIC_REG(bp) |
|
|
136 |
|
137 |
leal MUNG(ZEROSX), %eax #This section gets around a bug |
135 |
movq (%eax), %mm0 #unlikely to persist |
138 |
movq (%eax), %mm0 #unlikely to persist |
136 |
movq %mm0, ZEROS |
139 |
movq %mm0, MUNG(ZEROS) |
137 |
leal OFFSETDX, %eax |
140 |
leal MUNG(OFFSETDX), %eax |
138 |
movq (%eax), %mm0 |
141 |
movq (%eax), %mm0 |
139 |
movq %mm0, OFFSETD |
142 |
movq %mm0, MUNG(OFFSETD) |
140 |
leal OFFSETWX, %eax |
143 |
leal MUNG(OFFSETWX), %eax |
141 |
movq (%eax), %mm0 |
144 |
movq (%eax), %mm0 |
142 |
movq %mm0, OFFSETW |
145 |
movq %mm0, MUNG(OFFSETW) |
143 |
leal OFFSETBX, %eax |
146 |
leal MUNG(OFFSETBX), %eax |
144 |
movq (%eax), %mm0 |
147 |
movq (%eax), %mm0 |
145 |
movq %mm0, OFFSETB |
148 |
movq %mm0, MUNG(OFFSETB) |
146 |
leal YR0GRX, %eax |
149 |
leal MUNG(YR0GRX), %eax |
147 |
movq (%eax), %mm0 |
150 |
movq (%eax), %mm0 |
148 |
movq %mm0, YR0GR |
151 |
movq %mm0, MUNG(YR0GR) |
149 |
leal YBG0BX, %eax |
152 |
leal MUNG(YBG0BX), %eax |
150 |
movq (%eax), %mm0 |
153 |
movq (%eax), %mm0 |
151 |
movq %mm0, YBG0B |
154 |
movq %mm0, MUNG(YBG0B) |
152 |
leal UR0GRX, %eax |
155 |
leal MUNG(UR0GRX), %eax |
153 |
movq (%eax), %mm0 |
156 |
movq (%eax), %mm0 |
154 |
movq %mm0, UR0GR |
157 |
movq %mm0, MUNG(UR0GR) |
155 |
leal UBG0BX, %eax |
158 |
leal MUNG(UBG0BX), %eax |
156 |
movq (%eax), %mm0 |
159 |
movq (%eax), %mm0 |
157 |
movq %mm0, UBG0B |
160 |
movq %mm0, MUNG(UBG0B) |
158 |
leal VR0GRX, %eax |
161 |
leal MUNG(VR0GRX), %eax |
159 |
movq (%eax), %mm0 |
162 |
movq (%eax), %mm0 |
160 |
movq %mm0, VR0GR |
163 |
movq %mm0, MUNG(VR0GR) |
161 |
leal VBG0BX, %eax |
164 |
leal MUNG(VBG0BX), %eax |
162 |
movq (%eax), %mm0 |
165 |
movq (%eax), %mm0 |
163 |
movq %mm0, VBG0B |
166 |
movq %mm0, MUNG(VBG0B) |
164 |
|
167 |
movl _rows(%esp), %eax |
165 |
movl _rows(%ebp), %eax |
168 |
movl _columns(%esp), %ebx |
166 |
movl _columns(%ebp), %ebx |
|
|
167 |
mull %ebx #number pixels |
169 |
mull %ebx #number pixels |
168 |
shrl $3, %eax #number of loops |
170 |
shrl $3, %eax #number of loops |
169 |
movl %eax, %edi #loop counter in edi |
171 |
movl %eax, %edi #loop counter in edi |
170 |
movl _inPtr(%ebp), %eax |
172 |
movl _inPtr(%esp), %eax |
171 |
movl _outyPtr(%ebp), %ebx |
173 |
movl _outyPtr(%esp), %ebx |
172 |
movl _outuPtr(%ebp), %ecx |
174 |
movl _outuPtr(%esp), %ecx |
173 |
movl _outvPtr(%ebp), %edx |
175 |
movl _outvPtr(%esp), %edx |
174 |
rgbtoycb_mmx_loop: |
176 |
rgbtoycb_mmx_loop: |
175 |
movq (%eax), %mm1 #load G2R2B1G1R1B0G0R0 |
177 |
movq (%eax), %mm1 #load G2R2B1G1R1B0G0R0 |
176 |
pxor %mm6, %mm6 #0 -> mm6 |
178 |
pxor %mm6, %mm6 #0 -> mm6 |
Lines 184-212
rgbtoycb_mmx_loop:
Link Here
|
184 |
punpcklbw %mm6, %mm1 #B1G1R1B0 -> mm1 |
186 |
punpcklbw %mm6, %mm1 #B1G1R1B0 -> mm1 |
185 |
movq %mm0, %mm2 #R1B0G0R0 -> mm2 |
187 |
movq %mm0, %mm2 #R1B0G0R0 -> mm2 |
186 |
|
188 |
|
187 |
pmaddwd YR0GR, %mm0 #yrR1,ygG0+yrR0 -> mm0 |
189 |
pmaddwd MUNG(YR0GR), %mm0 #yrR1,ygG0+yrR0 -> mm0 |
188 |
movq %mm1, %mm3 #B1G1R1B0 -> mm3 |
190 |
movq %mm1, %mm3 #B1G1R1B0 -> mm3 |
189 |
|
191 |
|
190 |
pmaddwd YBG0B, %mm1 #ybB1+ygG1,ybB0 -> mm1 |
192 |
pmaddwd MUNG(YBG0B), %mm1 #ybB1+ygG1,ybB0 -> mm1 |
191 |
movq %mm2, %mm4 #R1B0G0R0 -> mm4 |
193 |
movq %mm2, %mm4 #R1B0G0R0 -> mm4 |
192 |
|
194 |
|
193 |
pmaddwd UR0GR, %mm2 #urR1,ugG0+urR0 -> mm2 |
195 |
pmaddwd MUNG(UR0GR), %mm2 #urR1,ugG0+urR0 -> mm2 |
194 |
movq %mm3, %mm5 #B1G1R1B0 -> mm5 |
196 |
movq %mm3, %mm5 #B1G1R1B0 -> mm5 |
195 |
|
197 |
|
196 |
pmaddwd UBG0B, %mm3 #ubB1+ugG1,ubB0 -> mm3 |
198 |
pmaddwd MUNG(UBG0B), %mm3 #ubB1+ugG1,ubB0 -> mm3 |
197 |
punpckhbw %mm6, %mm7 # 00G2R2 -> mm7 |
199 |
punpckhbw %mm6, %mm7 # 00G2R2 -> mm7 |
198 |
|
200 |
|
199 |
pmaddwd VR0GR, %mm4 #vrR1,vgG0+vrR0 -> mm4 |
201 |
pmaddwd MUNG(VR0GR), %mm4 #vrR1,vgG0+vrR0 -> mm4 |
200 |
paddd %mm1, %mm0 #Y1Y0 -> mm0 |
202 |
paddd %mm1, %mm0 #Y1Y0 -> mm0 |
201 |
|
203 |
|
202 |
pmaddwd VBG0B, %mm5 #vbB1+vgG1,vbB0 -> mm5 |
204 |
pmaddwd MUNG(VBG0B), %mm5 #vbB1+vgG1,vbB0 -> mm5 |
203 |
|
205 |
|
204 |
movq 8(%eax), %mm1 #R5B4G4R4B3G3R3B2 -> mm1 |
206 |
movq 8(%eax), %mm1 #R5B4G4R4B3G3R3B2 -> mm1 |
205 |
paddd %mm3, %mm2 #U1U0 -> mm2 |
207 |
paddd %mm3, %mm2 #U1U0 -> mm2 |
206 |
|
208 |
|
207 |
movq %mm1, %mm6 #R5B4G4R4B3G3R3B2 -> mm6 |
209 |
movq %mm1, %mm6 #R5B4G4R4B3G3R3B2 -> mm6 |
208 |
|
210 |
|
209 |
punpcklbw ZEROS, %mm1 #B3G3R3B2 -> mm1 |
211 |
punpcklbw MUNG(ZEROS), %mm1 #B3G3R3B2 -> mm1 |
210 |
paddd %mm5, %mm4 #V1V0 -> mm4 |
212 |
paddd %mm5, %mm4 #V1V0 -> mm4 |
211 |
|
213 |
|
212 |
movq %mm1, %mm5 #B3G3R3B2 -> mm5 |
214 |
movq %mm1, %mm5 #B3G3R3B2 -> mm5 |
Lines 214-242
rgbtoycb_mmx_loop:
Link Here
|
214 |
|
216 |
|
215 |
paddd %mm7, %mm1 #R3B200+00G2R2=R3B2G2R2->mm1 |
217 |
paddd %mm7, %mm1 #R3B200+00G2R2=R3B2G2R2->mm1 |
216 |
|
218 |
|
217 |
punpckhbw ZEROS, %mm6 #R5B4G4R3 -> mm6 |
219 |
punpckhbw MUNG(ZEROS), %mm6 #R5B4G4R3 -> mm6 |
218 |
movq %mm1, %mm3 #R3B2G2R2 -> mm3 |
220 |
movq %mm1, %mm3 #R3B2G2R2 -> mm3 |
219 |
|
221 |
|
220 |
pmaddwd YR0GR, %mm1 #yrR3,ygG2+yrR2 -> mm1 |
222 |
pmaddwd MUNG(YR0GR), %mm1 #yrR3,ygG2+yrR2 -> mm1 |
221 |
movq %mm5, %mm7 #B3G3R3B2 -> mm7 |
223 |
movq %mm5, %mm7 #B3G3R3B2 -> mm7 |
222 |
|
224 |
|
223 |
pmaddwd YBG0B, %mm5 #ybB3+ygG3,ybB2 -> mm5 |
225 |
pmaddwd MUNG(YBG0B), %mm5 #ybB3+ygG3,ybB2 -> mm5 |
224 |
psrad $FIXPSHIFT, %mm0 #32-bit scaled Y1Y0 -> mm0 |
226 |
psrad $FIXPSHIFT, %mm0 #32-bit scaled Y1Y0 -> mm0 |
225 |
|
227 |
|
226 |
movq %mm6, TEMP0 #R5B4G4R4 -> TEMP0 |
228 |
movq %mm6, MUNG(TEMP0) #R5B4G4R4 -> TEMP0 |
227 |
movq %mm3, %mm6 #R3B2G2R2 -> mm6 |
229 |
movq %mm3, %mm6 #R3B2G2R2 -> mm6 |
228 |
pmaddwd UR0GR, %mm6 #urR3,ugG2+urR2 -> mm6 |
230 |
pmaddwd MUNG(UR0GR), %mm6 #urR3,ugG2+urR2 -> mm6 |
229 |
psrad $FIXPSHIFT, %mm2 #32-bit scaled U1U0 -> mm2 |
231 |
psrad $FIXPSHIFT, %mm2 #32-bit scaled U1U0 -> mm2 |
230 |
|
232 |
|
231 |
paddd %mm5, %mm1 #Y3Y2 -> mm1 |
233 |
paddd %mm5, %mm1 #Y3Y2 -> mm1 |
232 |
movq %mm7, %mm5 #B3G3R3B2 -> mm5 |
234 |
movq %mm7, %mm5 #B3G3R3B2 -> mm5 |
233 |
pmaddwd UBG0B, %mm7 #ubB3+ugG3,ubB2 |
235 |
pmaddwd MUNG(UBG0B), %mm7 #ubB3+ugG3,ubB2 |
234 |
psrad $FIXPSHIFT, %mm1 #32-bit scaled Y3Y2 -> mm1 |
236 |
psrad $FIXPSHIFT, %mm1 #32-bit scaled Y3Y2 -> mm1 |
235 |
|
237 |
|
236 |
pmaddwd VR0GR, %mm3 #vrR3,vgG2+vgR2 |
238 |
pmaddwd MUNG(VR0GR), %mm3 #vrR3,vgG2+vgR2 |
237 |
packssdw %mm1, %mm0 #Y3Y2Y1Y0 -> mm0 |
239 |
packssdw %mm1, %mm0 #Y3Y2Y1Y0 -> mm0 |
238 |
|
240 |
|
239 |
pmaddwd VBG0B, %mm5 #vbB3+vgG3,vbB2 -> mm5 |
241 |
pmaddwd MUNG(VBG0B), %mm5 #vbB3+vgG3,vbB2 -> mm5 |
240 |
psrad $FIXPSHIFT, %mm4 #32-bit scaled V1V0 -> mm4 |
242 |
psrad $FIXPSHIFT, %mm4 #32-bit scaled V1V0 -> mm4 |
241 |
|
243 |
|
242 |
movq 16(%eax), %mm1 #B7G7R7B6G6R6B5G5 -> mm7 |
244 |
movq 16(%eax), %mm1 #B7G7R7B6G6R6B5G5 -> mm7 |
Lines 251-308
rgbtoycb_mmx_loop:
Link Here
|
251 |
movq %mm7, %mm5 #R7B6G6R6B5G500 -> mm5 |
253 |
movq %mm7, %mm5 #R7B6G6R6B5G500 -> mm5 |
252 |
psrad $FIXPSHIFT, %mm3 #32-bit scaled V3V2 -> mm3 |
254 |
psrad $FIXPSHIFT, %mm3 #32-bit scaled V3V2 -> mm3 |
253 |
|
255 |
|
254 |
paddw OFFSETY, %mm0 |
256 |
paddw MUNG(OFFSETY), %mm0 |
255 |
movq %mm0, (%ebx) #store Y3Y2Y1Y0 |
257 |
movq %mm0, (%ebx) #store Y3Y2Y1Y0 |
256 |
packssdw %mm6, %mm2 #32-bit scaled U3U2U1U0 -> mm2 |
258 |
packssdw %mm6, %mm2 #32-bit scaled U3U2U1U0 -> mm2 |
257 |
|
259 |
|
258 |
movq TEMP0, %mm0 #R5B4G4R4 -> mm0 |
260 |
movq MUNG(TEMP0), %mm0 #R5B4G4R4 -> mm0 |
259 |
addl $8, %ebx |
261 |
addl $8, %ebx |
260 |
|
262 |
|
261 |
punpcklbw ZEROS, %mm7 #B5G500 -> mm7 |
263 |
punpcklbw MUNG(ZEROS), %mm7 #B5G500 -> mm7 |
262 |
movq %mm0, %mm6 #R5B4G4R4 -> mm6 |
264 |
movq %mm0, %mm6 #R5B4G4R4 -> mm6 |
263 |
|
265 |
|
264 |
movq %mm2, TEMPU #32-bit scaled U3U2U1U0 -> TEMPU |
266 |
movq %mm2, MUNG(TEMPU) #32-bit scaled U3U2U1U0 -> TEMPU |
265 |
psrlq $32, %mm0 #00R5B4 -> mm0 |
267 |
psrlq $32, %mm0 #00R5B4 -> mm0 |
266 |
|
268 |
|
267 |
paddw %mm0, %mm7 #B5G5R5B4 -> mm7 |
269 |
paddw %mm0, %mm7 #B5G5R5B4 -> mm7 |
268 |
movq %mm6, %mm2 #B5B4G4R4 -> mm2 |
270 |
movq %mm6, %mm2 #B5B4G4R4 -> mm2 |
269 |
|
271 |
|
270 |
pmaddwd YR0GR, %mm2 #yrR5,ygG4+yrR4 -> mm2 |
272 |
pmaddwd MUNG(YR0GR), %mm2 #yrR5,ygG4+yrR4 -> mm2 |
271 |
movq %mm7, %mm0 #B5G5R5B4 -> mm0 |
273 |
movq %mm7, %mm0 #B5G5R5B4 -> mm0 |
272 |
|
274 |
|
273 |
pmaddwd YBG0B, %mm7 #ybB5+ygG5,ybB4 -> mm7 |
275 |
pmaddwd MUNG(YBG0B), %mm7 #ybB5+ygG5,ybB4 -> mm7 |
274 |
packssdw %mm3, %mm4 #32-bit scaled V3V2V1V0 -> mm4 |
276 |
packssdw %mm3, %mm4 #32-bit scaled V3V2V1V0 -> mm4 |
275 |
|
277 |
|
276 |
addl $24, %eax #increment RGB count |
278 |
addl $24, %eax #increment RGB count |
277 |
|
279 |
|
278 |
movq %mm4, TEMPV #(V3V2V1V0)/256 -> mm4 |
280 |
movq %mm4, MUNG(TEMPV) #(V3V2V1V0)/256 -> mm4 |
279 |
movq %mm6, %mm4 #B5B4G4R4 -> mm4 |
281 |
movq %mm6, %mm4 #B5B4G4R4 -> mm4 |
280 |
|
282 |
|
281 |
pmaddwd UR0GR, %mm6 #urR5,ugG4+urR4 |
283 |
pmaddwd MUNG(UR0GR), %mm6 #urR5,ugG4+urR4 |
282 |
movq %mm0, %mm3 #B5G5R5B4 -> mm0 |
284 |
movq %mm0, %mm3 #B5G5R5B4 -> mm0 |
283 |
|
285 |
|
284 |
pmaddwd UBG0B, %mm0 #ubB5+ugG5,ubB4 |
286 |
pmaddwd MUNG(UBG0B), %mm0 #ubB5+ugG5,ubB4 |
285 |
paddd %mm7, %mm2 #Y5Y4 -> mm2 |
287 |
paddd %mm7, %mm2 #Y5Y4 -> mm2 |
286 |
|
288 |
|
287 |
pmaddwd VR0GR, %mm4 #vrR5,vgG4+vrR4 -> mm4 |
289 |
pmaddwd MUNG(VR0GR), %mm4 #vrR5,vgG4+vrR4 -> mm4 |
288 |
pxor %mm7, %mm7 #0 -> mm7 |
290 |
pxor %mm7, %mm7 #0 -> mm7 |
289 |
|
291 |
|
290 |
pmaddwd VBG0B, %mm3 #vbB5+vgG5,vbB4 -> mm3 |
292 |
pmaddwd MUNG(VBG0B), %mm3 #vbB5+vgG5,vbB4 -> mm3 |
291 |
punpckhbw %mm7, %mm1 #B7G7R7B6 -> mm1 |
293 |
punpckhbw %mm7, %mm1 #B7G7R7B6 -> mm1 |
292 |
|
294 |
|
293 |
paddd %mm6, %mm0 #U5U4 -> mm0 |
295 |
paddd %mm6, %mm0 #U5U4 -> mm0 |
294 |
movq %mm1, %mm6 #B7G7R7B6 -> mm6 |
296 |
movq %mm1, %mm6 #B7G7R7B6 -> mm6 |
295 |
|
297 |
|
296 |
pmaddwd YBG0B, %mm6 #ybB7+ygG7,ybB6 -> mm6 |
298 |
pmaddwd MUNG(YBG0B), %mm6 #ybB7+ygG7,ybB6 -> mm6 |
297 |
punpckhbw %mm7, %mm5 #R7B6G6R6 -> mm5 |
299 |
punpckhbw %mm7, %mm5 #R7B6G6R6 -> mm5 |
298 |
|
300 |
|
299 |
movq %mm5, %mm7 #R7B6G6R6 -> mm7 |
301 |
movq %mm5, %mm7 #R7B6G6R6 -> mm7 |
300 |
paddd %mm4, %mm3 #V5V4 -> mm3 |
302 |
paddd %mm4, %mm3 #V5V4 -> mm3 |
301 |
|
303 |
|
302 |
pmaddwd YR0GR, %mm5 #yrR7,ygG6+yrR6 -> mm5 |
304 |
pmaddwd MUNG(YR0GR), %mm5 #yrR7,ygG6+yrR6 -> mm5 |
303 |
movq %mm1, %mm4 #B7G7R7B6 -> mm4 |
305 |
movq %mm1, %mm4 #B7G7R7B6 -> mm4 |
304 |
|
306 |
|
305 |
pmaddwd UBG0B, %mm4 #ubB7+ugG7,ubB6 -> mm4 |
307 |
pmaddwd MUNG(UBG0B), %mm4 #ubB7+ugG7,ubB6 -> mm4 |
306 |
psrad $FIXPSHIFT, %mm0 #32-bit scaled U5U4 -> mm0 |
308 |
psrad $FIXPSHIFT, %mm0 #32-bit scaled U5U4 -> mm0 |
307 |
|
309 |
|
308 |
psrad $FIXPSHIFT, %mm2 #32-bit scaled Y5Y4 -> mm2 |
310 |
psrad $FIXPSHIFT, %mm2 #32-bit scaled Y5Y4 -> mm2 |
Lines 310-334
rgbtoycb_mmx_loop:
Link Here
|
310 |
paddd %mm5, %mm6 #Y7Y6 -> mm6 |
312 |
paddd %mm5, %mm6 #Y7Y6 -> mm6 |
311 |
movq %mm7, %mm5 #R7B6G6R6 -> mm5 |
313 |
movq %mm7, %mm5 #R7B6G6R6 -> mm5 |
312 |
|
314 |
|
313 |
pmaddwd UR0GR, %mm7 #urR7,ugG6+ugR6 -> mm7 |
315 |
pmaddwd MUNG(UR0GR), %mm7 #urR7,ugG6+ugR6 -> mm7 |
314 |
psrad $FIXPSHIFT, %mm3 #32-bit scaled V5V4 -> mm3 |
316 |
psrad $FIXPSHIFT, %mm3 #32-bit scaled V5V4 -> mm3 |
315 |
|
317 |
|
316 |
pmaddwd VBG0B, %mm1 #vbB7+vgG7,vbB6 -> mm1 |
318 |
pmaddwd MUNG(VBG0B), %mm1 #vbB7+vgG7,vbB6 -> mm1 |
317 |
psrad $FIXPSHIFT, %mm6 #32-bit scaled Y7Y6 -> mm6 |
319 |
psrad $FIXPSHIFT, %mm6 #32-bit scaled Y7Y6 -> mm6 |
318 |
|
320 |
|
319 |
packssdw %mm6, %mm2 #Y7Y6Y5Y4 -> mm2 |
321 |
packssdw %mm6, %mm2 #Y7Y6Y5Y4 -> mm2 |
320 |
|
322 |
|
321 |
pmaddwd VR0GR, %mm5 #vrR7,vgG6+vrR6 -> mm5 |
323 |
pmaddwd MUNG(VR0GR), %mm5 #vrR7,vgG6+vrR6 -> mm5 |
322 |
paddd %mm4, %mm7 #U7U6 -> mm7 |
324 |
paddd %mm4, %mm7 #U7U6 -> mm7 |
323 |
|
325 |
|
324 |
psrad $FIXPSHIFT, %mm7 #32-bit scaled U7U6 -> mm7 |
326 |
psrad $FIXPSHIFT, %mm7 #32-bit scaled U7U6 -> mm7 |
325 |
paddw OFFSETY, %mm2 |
327 |
paddw MUNG(OFFSETY), %mm2 |
326 |
movq %mm2, (%ebx) #store Y7Y6Y5Y4 |
328 |
movq %mm2, (%ebx) #store Y7Y6Y5Y4 |
327 |
|
329 |
|
328 |
movq ALLONE, %mm6 |
330 |
movq MUNG(ALLONE), %mm6 |
329 |
packssdw %mm7, %mm0 #32-bit scaled U7U6U5U4 -> mm0 |
331 |
packssdw %mm7, %mm0 #32-bit scaled U7U6U5U4 -> mm0 |
330 |
|
332 |
|
331 |
movq TEMPU, %mm4 #32-bit scaled U3U2U1U0 -> mm4 |
333 |
movq MUNG(TEMPU), %mm4 #32-bit scaled U3U2U1U0 -> mm4 |
332 |
pmaddwd %mm6, %mm0 #U7U6U5U4 averaged -> (U7U6)(U5U4)=UU3 UU2->mm0 |
334 |
pmaddwd %mm6, %mm0 #U7U6U5U4 averaged -> (U7U6)(U5U4)=UU3 UU2->mm0 |
333 |
|
335 |
|
334 |
pmaddwd %mm6, %mm4 #U3U2U1U0 averaged -> (U3U2)(U1U0)=UU1 UU0->mm4 |
336 |
pmaddwd %mm6, %mm4 #U3U2U1U0 averaged -> (U3U2)(U1U0)=UU1 UU0->mm4 |
Lines 338-345
rgbtoycb_mmx_loop:
Link Here
|
338 |
|
340 |
|
339 |
psrad $FIXPSHIFT, %mm1 #32-bit scaled V7V6 -> mm1 |
341 |
psrad $FIXPSHIFT, %mm1 #32-bit scaled V7V6 -> mm1 |
340 |
psraw $1, %mm4 #divide UU3 UU2 UU1 UU0 by 2 -> mm4 |
342 |
psraw $1, %mm4 #divide UU3 UU2 UU1 UU0 by 2 -> mm4 |
341 |
|
343 |
|
342 |
movq TEMPV, %mm5 #32-bit scaled V3V2V1V0 -> mm5 |
344 |
movq MUNG(TEMPV), %mm5 #32-bit scaled V3V2V1V0 -> mm5 |
343 |
|
345 |
|
344 |
movq %mm4, (%ecx) # store U |
346 |
movq %mm4, (%ecx) # store U |
345 |
|
347 |
|
Lines 422-435
_dv_ppm_copy_y_block_mmx:
Link Here
|
422 |
_dv_pgm_copy_y_block_mmx: |
426 |
_dv_pgm_copy_y_block_mmx: |
423 |
|
427 |
|
424 |
pushl %ebp |
428 |
pushl %ebp |
425 |
movl %esp, %ebp |
|
|
426 |
pushl %esi |
429 |
pushl %esi |
427 |
pushl %edi |
430 |
pushl %edi |
428 |
|
|
|
429 |
movl 8(%ebp), %edi # dest |
430 |
movl 12(%ebp), %esi # src |
431 |
|
431 |
|
432 |
movq OFFSETY, %mm7 |
432 |
LOAD_PIC_REG(bp) |
|
|
433 |
|
434 |
movl 16(%esp), %edi # dest |
435 |
movl 20(%esp), %esi # src |
436 |
|
437 |
movq MUNG(OFFSETY), %mm7 |
433 |
pxor %mm6, %mm6 |
438 |
pxor %mm6, %mm6 |
434 |
|
439 |
|
435 |
movq (%esi), %mm0 |
440 |
movq (%esi), %mm0 |
Lines 564-577
_dv_pgm_copy_y_block_mmx:
Link Here
|
564 |
_dv_video_copy_y_block_mmx: |
571 |
_dv_video_copy_y_block_mmx: |
565 |
|
572 |
|
566 |
pushl %ebp |
573 |
pushl %ebp |
567 |
movl %esp, %ebp |
|
|
568 |
pushl %esi |
574 |
pushl %esi |
569 |
pushl %edi |
575 |
pushl %edi |
570 |
|
|
|
571 |
movl 8(%ebp), %edi # dest |
572 |
movl 12(%ebp), %esi # src |
573 |
|
576 |
|
574 |
movq OFFSETBX, %mm7 |
577 |
LOAD_PIC_REG(bp) |
|
|
578 |
|
579 |
movl 16(%esp), %edi # dest |
580 |
movl 20(%esp), %esi # src |
581 |
|
582 |
movq MUNG(OFFSETBX), %mm7 |
575 |
pxor %mm6, %mm6 |
583 |
pxor %mm6, %mm6 |
576 |
|
584 |
|
577 |
movq (%esi), %mm0 |
585 |
movq (%esi), %mm0 |
Lines 852-867
_dv_ppm_copy_pal_c_block_mmx:
Link Here
|
852 |
_dv_pgm_copy_pal_c_block_mmx: |
864 |
_dv_pgm_copy_pal_c_block_mmx: |
853 |
|
865 |
|
854 |
pushl %ebp |
866 |
pushl %ebp |
855 |
movl %esp, %ebp |
|
|
856 |
pushl %esi |
867 |
pushl %esi |
857 |
pushl %edi |
868 |
pushl %edi |
858 |
pushl %ebx |
869 |
pushl %ebx |
859 |
|
|
|
860 |
movl 8(%ebp), %edi # dest |
861 |
movl 12(%ebp), %esi # src |
862 |
|
870 |
|
|
|
871 |
LOAD_PIC_REG(bp) |
872 |
|
873 |
movl 20(%esp), %edi # dest |
874 |
movl 24(%esp), %esi # src |
863 |
|
875 |
|
864 |
movq OFFSETBX, %mm7 |
876 |
movq MUNG(OFFSETBX), %mm7 |
865 |
pxor %mm6, %mm6 |
877 |
pxor %mm6, %mm6 |
866 |
|
878 |
|
867 |
|
879 |
|
Lines 1000-1014
_dv_pgm_copy_pal_c_block_mmx:
Link Here
|
1000 |
_dv_video_copy_pal_c_block_mmx: |
1014 |
_dv_video_copy_pal_c_block_mmx: |
1001 |
|
1015 |
|
1002 |
pushl %ebp |
1016 |
pushl %ebp |
1003 |
movl %esp, %ebp |
|
|
1004 |
pushl %esi |
1017 |
pushl %esi |
1005 |
pushl %edi |
1018 |
pushl %edi |
1006 |
pushl %ebx |
1019 |
pushl %ebx |
1007 |
|
|
|
1008 |
movl 8(%ebp), %edi # dest |
1009 |
movl 12(%ebp), %esi # src |
1010 |
|
1020 |
|
1011 |
movq OFFSETBX, %mm7 |
1021 |
LOAD_PIC_REG(bp) |
|
|
1022 |
|
1023 |
movl 20(%esp), %edi # dest |
1024 |
movl 24(%esp), %esi # src |
1025 |
|
1026 |
movq MUNG(OFFSETBX), %mm7 |
1012 |
paddw %mm7, %mm7 |
1027 |
paddw %mm7, %mm7 |
1013 |
pxor %mm6, %mm6 |
1028 |
pxor %mm6, %mm6 |
1014 |
|
1029 |
|
Lines 1095-1112
video_copy_pal_c_block_mmx_loop:
Link Here
|
1095 |
_dv_ppm_copy_ntsc_c_block_mmx: |
1112 |
_dv_ppm_copy_ntsc_c_block_mmx: |
1096 |
|
1113 |
|
1097 |
pushl %ebp |
1114 |
pushl %ebp |
1098 |
movl %esp, %ebp |
|
|
1099 |
pushl %esi |
1115 |
pushl %esi |
1100 |
pushl %edi |
1116 |
pushl %edi |
1101 |
pushl %ebx |
1117 |
pushl %ebx |
1102 |
|
1118 |
|
1103 |
movl 8(%ebp), %edi # dest |
1119 |
LOAD_PIC_REG(bp) |
1104 |
movl 12(%ebp), %esi # src |
1120 |
|
|
|
1121 |
movl 20(%esp), %edi # dest |
1122 |
movl 24(%esp), %esi # src |
1105 |
|
1123 |
|
1106 |
movl $4, %ebx |
1124 |
movl $4, %ebx |
1107 |
|
1125 |
|
1108 |
movq ALLONE, %mm6 |
1126 |
movq MUNG(ALLONE), %mm6 |
1109 |
|
|
|
1110 |
ppm_copy_ntsc_c_block_mmx_loop: |
1127 |
ppm_copy_ntsc_c_block_mmx_loop: |
1111 |
|
1128 |
|
1112 |
movq (%esi), %mm0 |
1129 |
movq (%esi), %mm0 |
Lines 1168-1181
ppm_copy_ntsc_c_block_mmx_loop:
Link Here
|
1168 |
_dv_pgm_copy_ntsc_c_block_mmx: |
1187 |
_dv_pgm_copy_ntsc_c_block_mmx: |
1169 |
|
1188 |
|
1170 |
pushl %ebp |
1189 |
pushl %ebp |
1171 |
movl %esp, %ebp |
|
|
1172 |
pushl %esi |
1190 |
pushl %esi |
1173 |
pushl %edi |
1191 |
pushl %edi |
1174 |
|
|
|
1175 |
movl 8(%ebp), %edi # dest |
1176 |
movl 12(%ebp), %esi # src |
1177 |
|
1192 |
|
1178 |
movq OFFSETBX, %mm7 |
1193 |
LOAD_PIC_REG(bp) |
|
|
1194 |
|
1195 |
movl 16(%esp), %edi # dest |
1196 |
movl 20(%esp), %esi # src |
1197 |
|
1198 |
movq MUNG(OFFSETBX), %mm7 |
1179 |
paddw %mm7, %mm7 |
1199 |
paddw %mm7, %mm7 |
1180 |
pxor %mm6, %mm6 |
1200 |
pxor %mm6, %mm6 |
1181 |
|
1201 |
|
Lines 1325-1339
_dv_pgm_copy_ntsc_c_block_mmx:
Link Here
|
1325 |
_dv_video_copy_ntsc_c_block_mmx: |
1347 |
_dv_video_copy_ntsc_c_block_mmx: |
1326 |
|
1348 |
|
1327 |
pushl %ebp |
1349 |
pushl %ebp |
1328 |
movl %esp, %ebp |
|
|
1329 |
pushl %esi |
1350 |
pushl %esi |
1330 |
pushl %edi |
1351 |
pushl %edi |
1331 |
pushl %ebx |
1352 |
pushl %ebx |
1332 |
|
|
|
1333 |
movl 8(%ebp), %edi # dest |
1334 |
movl 12(%ebp), %esi # src |
1335 |
|
1353 |
|
1336 |
movq OFFSETBX, %mm7 |
1354 |
LOAD_PIC_REG(bp) |
|
|
1355 |
|
1356 |
movl 20(%esp), %edi # dest |
1357 |
movl 24(%esp), %esi # src |
1358 |
|
1359 |
movq MUNG(OFFSETBX), %mm7 |
1337 |
paddw %mm7, %mm7 |
1360 |
paddw %mm7, %mm7 |
1338 |
pxor %mm6, %mm6 |
1361 |
pxor %mm6, %mm6 |
1339 |
|
1362 |
|
1340 |
-- libdv-0.104-old/libdv/rgbtoyuv_x86_64.S |
1363 |
++ libdv-0.104/libdv/rgbtoyuv_x86_64.S |
Lines 41-49
Link Here
|
41 |
#define DV_WIDTH_SHORT_HALF 720 |
41 |
#define DV_WIDTH_SHORT_HALF 720 |
42 |
#define DV_WIDTH_BYTE_HALF 360 |
42 |
#define DV_WIDTH_BYTE_HALF 360 |
43 |
|
43 |
|
44 |
.global _dv_rgbtoycb_mmx_x86_64 |
|
|
45 |
# .global yuvtoycb_mmx_x86_64 |
46 |
|
47 |
.data |
44 |
.data |
48 |
|
45 |
|
49 |
.align 8 |
46 |
.align 8 |
50 |
-- libdv-0.104-old/libdv/vlc_x86.S |
47 |
++ libdv-0.104/libdv/vlc_x86.S |
Lines 1-31
Link Here
|
1 |
#include "asmoff.h" |
1 |
#include "asmoff.h" |
2 |
.section .note.GNU-stack, "", @progbits |
2 |
.section .note.GNU-stack, "", @progbits |
|
|
3 |
#include "asm_common.S" |
3 |
|
4 |
|
4 |
.text |
5 |
.text |
5 |
.align 4 |
6 |
.align 4 |
6 |
.globl dv_decode_vlc |
7 |
.globl dv_decode_vlc |
|
|
8 |
.globl asm_dv_decode_vlc |
9 |
.hidden asm_dv_decode_vlc |
10 |
asm_dv_decode_vlc = dv_decode_vlc |
11 |
|
7 |
.type dv_decode_vlc,@function |
12 |
.type dv_decode_vlc,@function |
8 |
dv_decode_vlc: |
13 |
dv_decode_vlc: |
9 |
pushl %ebx |
14 |
pushl %ebx |
|
|
15 |
pushl %ebp |
16 |
|
17 |
LOAD_PIC_REG(bp) |
10 |
|
18 |
|
11 |
/* Args are at 8(%esp). */ |
19 |
/* Args are at 12(%esp). */ |
12 |
movl 8(%esp),%eax /* %eax is bits */ |
20 |
movl 12(%esp),%eax /* %eax is bits */ |
13 |
movl 12(%esp),%ebx /* %ebx is maxbits */ |
21 |
movl 16(%esp),%ebx /* %ebx is maxbits */ |
14 |
andl $0x3f,%ebx /* limit index range STL*/ |
22 |
andl $0x3f,%ebx /* limit index range STL*/ |
15 |
|
23 |
|
16 |
movl dv_vlc_class_index_mask(,%ebx,4),%edx |
24 |
movl MUNG_ARR(dv_vlc_class_index_mask,%ebx,4),%edx |
17 |
andl %eax,%edx |
25 |
andl %eax,%edx |
18 |
movl dv_vlc_class_index_rshift(,%ebx,4),%ecx |
26 |
movl MUNG_ARR(dv_vlc_class_index_rshift,%ebx,4),%ecx |
19 |
sarl %cl,%edx |
27 |
sarl %cl,%edx |
20 |
movl dv_vlc_classes(,%ebx,4),%ecx |
28 |
movl MUNG_ARR(dv_vlc_classes,%ebx,4),%ecx |
21 |
movsbl (%ecx,%edx,1),%edx /* %edx is class */ |
29 |
movsbl (%ecx,%edx,1),%edx /* %edx is class */ |
22 |
|
30 |
|
23 |
movl dv_vlc_index_mask(,%edx,4),%ebx |
31 |
movl MUNG_ARR(dv_vlc_index_mask,%edx,4),%ebx |
24 |
movl dv_vlc_index_rshift(,%edx,4),%ecx |
32 |
movl MUNG_ARR(dv_vlc_index_rshift,%edx,4),%ecx |
25 |
andl %eax,%ebx |
33 |
andl %eax,%ebx |
26 |
sarl %cl,%ebx |
34 |
sarl %cl,%ebx |
27 |
|
35 |
|
28 |
movl dv_vlc_lookups(,%edx,4),%edx |
36 |
movl MUNG_ARR(dv_vlc_lookups,%edx,4),%edx |
29 |
movl (%edx,%ebx,4),%edx |
37 |
movl (%edx,%ebx,4),%edx |
30 |
|
38 |
|
31 |
/* Now %edx holds result, like this: |
39 |
/* Now %edx holds result, like this: |
Lines 42-48
dv_decode_vlc:
Link Here
|
42 |
movl %edx,%ecx |
51 |
movl %edx,%ecx |
43 |
sarl $8,%ecx |
52 |
sarl $8,%ecx |
44 |
andl $0xff,%ecx |
53 |
andl $0xff,%ecx |
45 |
movl sign_mask(,%ecx,4),%ebx |
54 |
movl MUNG_ARR(sign_mask,%ecx,4),%ebx |
46 |
andl %ebx,%eax |
55 |
andl %ebx,%eax |
47 |
negl %eax |
56 |
negl %eax |
48 |
sarl $31,%eax |
57 |
sarl $31,%eax |
Lines 63-76
dv_decode_vlc:
Link Here
|
63 |
*result = broken; |
72 |
*result = broken; |
64 |
Note that the 'broken' pattern is all ones (i.e. 0xffffffff) |
73 |
Note that the 'broken' pattern is all ones (i.e. 0xffffffff) |
65 |
*/ |
74 |
*/ |
66 |
movl 12(%esp),%ebx /* %ebx is maxbits */ |
75 |
movl 16(%esp),%ebx /* %ebx is maxbits */ |
67 |
subl %ecx,%ebx |
76 |
subl %ecx,%ebx |
68 |
sbbl %ebx,%ebx |
77 |
sbbl %ebx,%ebx |
69 |
orl %ebx,%edx |
78 |
orl %ebx,%edx |
70 |
|
79 |
|
71 |
movl 16(%esp),%eax |
80 |
movl 20(%esp),%eax |
72 |
movl %edx,(%eax) |
81 |
movl %edx,(%eax) |
73 |
|
82 |
popl %ebp |
74 |
popl %ebx |
83 |
popl %ebx |
75 |
ret |
84 |
ret |
76 |
|
85 |
|
Lines 80-100
dv_decode_vlc:
Link Here
|
80 |
.type __dv_decode_vlc,@function |
89 |
.type __dv_decode_vlc,@function |
81 |
__dv_decode_vlc: |
90 |
__dv_decode_vlc: |
82 |
pushl %ebx |
91 |
pushl %ebx |
|
|
92 |
pushl %ebp |
93 |
|
94 |
LOAD_PIC_REG(bp) |
83 |
|
95 |
|
84 |
/* Args are at 8(%esp). */ |
96 |
/* Args are at 12(%esp). */ |
85 |
movl 8(%esp),%eax /* %eax is bits */ |
97 |
movl 12(%esp),%eax /* %eax is bits */ |
86 |
|
98 |
|
87 |
movl %eax,%edx /* %edx is class */ |
99 |
movl %eax,%edx /* %edx is class */ |
88 |
andl $0xfe00,%edx |
100 |
andl $0xfe00,%edx |
89 |
sarl $9,%edx |
101 |
sarl $9,%edx |
|
|
102 |
#ifdef __PIC__ |
103 |
movsbl dv_vlc_class_lookup5@GOTOFF(%ebp,%edx),%edx |
104 |
#else |
90 |
movsbl dv_vlc_class_lookup5(%edx),%edx |
105 |
movsbl dv_vlc_class_lookup5(%edx),%edx |
91 |
|
106 |
#endif |
92 |
movl dv_vlc_index_mask(,%edx,4),%ebx |
107 |
|
93 |
movl dv_vlc_index_rshift(,%edx,4),%ecx |
108 |
movl MUNG_ARR(dv_vlc_index_mask,%edx,4),%ebx |
|
|
109 |
movl MUNG_ARR(dv_vlc_index_rshift,%edx,4),%ecx |
94 |
andl %eax,%ebx |
110 |
andl %eax,%ebx |
95 |
sarl %cl,%ebx |
111 |
sarl %cl,%ebx |
96 |
|
112 |
|
97 |
movl dv_vlc_lookups(,%edx,4),%edx |
113 |
movl MUNG_ARR(dv_vlc_lookups,%edx,4),%edx |
98 |
movl (%edx,%ebx,4),%edx |
114 |
movl (%edx,%ebx,4),%edx |
99 |
|
115 |
|
100 |
/* Now %edx holds result, like this: |
116 |
/* Now %edx holds result, like this: |
Lines 112-118
__dv_decode_vlc:
Link Here
|
112 |
movl %edx,%ecx |
128 |
movl %edx,%ecx |
113 |
sarl $8,%ecx |
129 |
sarl $8,%ecx |
114 |
andl $0xff,%ecx |
130 |
andl $0xff,%ecx |
115 |
movl sign_mask(,%ecx,4),%ecx |
131 |
movl MUNG_ARR(sign_mask,%ecx,4),%ecx |
116 |
andl %ecx,%eax |
132 |
andl %ecx,%eax |
117 |
negl %eax |
133 |
negl %eax |
118 |
sarl $31,%eax |
134 |
sarl $31,%eax |
Lines 127-135
__dv_decode_vlc:
Link Here
|
127 |
xorl %eax,%edx |
143 |
xorl %eax,%edx |
128 |
subl %eax,%edx |
144 |
subl %eax,%edx |
129 |
|
145 |
|
130 |
movl 12(%esp),%eax |
146 |
movl 16(%esp),%eax |
131 |
movl %edx,(%eax) |
147 |
movl %edx,(%eax) |
132 |
|
148 |
popl %ebp |
133 |
popl %ebx |
149 |
popl %ebx |
134 |
ret |
150 |
ret |
135 |
|
151 |
|
Lines 140-153
void dv_parse_ac_coeffs_pass0(bitstream_
Link Here
|
140 |
*/ |
156 |
*/ |
141 |
.text |
157 |
.text |
142 |
.align 4 |
158 |
.align 4 |
|
|
159 |
.globl asm_dv_parse_ac_coeffs_pass0 |
160 |
.hidden asm_dv_parse_ac_coeffs_pass0 |
161 |
asm_dv_parse_ac_coeffs_pass0 = dv_parse_ac_coeffs_pass0 |
162 |
|
143 |
.globl dv_parse_ac_coeffs_pass0 |
163 |
.globl dv_parse_ac_coeffs_pass0 |
144 |
.type dv_parse_ac_coeffs_pass0,@function |
164 |
.type dv_parse_ac_coeffs_pass0,@function |
145 |
dv_parse_ac_coeffs_pass0: |
165 |
dv_parse_ac_coeffs_pass0: |
146 |
pushl %ebx |
166 |
pushl %ebx |
147 |
pushl %edi |
167 |
pushl %edi |
148 |
pushl %esi |
168 |
pushl %esi |
149 |
pushl %ebp |
169 |
pushl %ebp |
150 |
|
170 |
|
|
|
171 |
LOAD_PIC_REG(si) |
172 |
|
151 |
#define ARGn(N) (20+(4*(N)))(%esp) |
173 |
#define ARGn(N) (20+(4*(N)))(%esp) |
152 |
|
174 |
|
153 |
/* |
175 |
/* |
Lines 159-166
dv_parse_ac_coeffs_pass0:
Link Here
|
159 |
ebp bl |
182 |
ebp bl |
160 |
*/ |
183 |
*/ |
161 |
movl ARGn(2),%ebp |
184 |
movl ARGn(2),%ebp |
|
|
185 |
#ifndef __PIC__ |
162 |
movl ARGn(0),%esi |
186 |
movl ARGn(0),%esi |
163 |
movl bitstream_t_buf(%esi),%esi |
187 |
movl bitstream_t_buf(%esi),%esi |
|
|
188 |
#endif |
164 |
movl dv_block_t_offset(%ebp),%edi |
189 |
movl dv_block_t_offset(%ebp),%edi |
165 |
movl dv_block_t_reorder(%ebp),%ebx |
190 |
movl dv_block_t_reorder(%ebp),%ebx |
166 |
|
191 |
|
Lines 170-176
dv_parse_ac_coeffs_pass0:
Link Here
|
170 |
|
195 |
|
171 |
movq dv_block_t_coeffs(%ebp),%mm1 |
196 |
movq dv_block_t_coeffs(%ebp),%mm1 |
172 |
pxor %mm0,%mm0 |
197 |
pxor %mm0,%mm0 |
|
|
198 |
#ifdef __PIC__ |
199 |
pand const_f_0_0_0@GOTOFF(%esi),%mm1 |
200 |
#else |
173 |
pand const_f_0_0_0,%mm1 |
201 |
pand const_f_0_0_0,%mm1 |
|
|
202 |
#endif |
174 |
movq %mm1,dv_block_t_coeffs(%ebp) |
203 |
movq %mm1,dv_block_t_coeffs(%ebp) |
175 |
movq %mm0,(dv_block_t_coeffs + 8)(%ebp) |
204 |
movq %mm0,(dv_block_t_coeffs + 8)(%ebp) |
176 |
movq %mm0,(dv_block_t_coeffs + 16)(%ebp) |
205 |
movq %mm0,(dv_block_t_coeffs + 16)(%ebp) |
Lines 191-199
dv_parse_ac_coeffs_pass0:
Link Here
|
191 |
readloop: |
220 |
readloop: |
192 |
movl %edi,%ecx |
221 |
movl %edi,%ecx |
193 |
shrl $3,%ecx |
222 |
shrl $3,%ecx |
|
|
223 |
#ifdef __PIC__ |
224 |
movl ARGn(0),%eax |
225 |
addl bitstream_t_buf(%eax),%ecx |
226 |
movzbl (%ecx),%eax |
227 |
movzbl 1(%ecx),%edx |
228 |
movzbl 2(%ecx),%ecx |
229 |
#else |
194 |
movzbl (%esi,%ecx,1),%eax |
230 |
movzbl (%esi,%ecx,1),%eax |
195 |
movzbl 1(%esi,%ecx,1),%edx |
231 |
movzbl 1(%esi,%ecx,1),%edx |
196 |
movzbl 2(%esi,%ecx,1),%ecx |
232 |
movzbl 2(%esi,%ecx,1),%ecx |
|
|
233 |
#endif |
197 |
shll $16,%eax |
234 |
shll $16,%eax |
198 |
shll $8,%edx |
235 |
shll $8,%edx |
199 |
orl %ecx,%eax |
236 |
orl %ecx,%eax |
Lines 217-223
readloop:
Link Here
|
217 |
|
254 |
|
218 |
/* Attempt to use the shortcut first. If it hits, then |
255 |
/* Attempt to use the shortcut first. If it hits, then |
219 |
this vlc term has been decoded. */ |
256 |
this vlc term has been decoded. */ |
|
|
257 |
#ifdef __PIC__ |
258 |
movl dv_vlc_class1_shortcut@GOTOFF(%esi,%ecx,4),%edx |
259 |
#else |
220 |
movl dv_vlc_class1_shortcut(,%ecx,4),%edx |
260 |
movl dv_vlc_class1_shortcut(,%ecx,4),%edx |
|
|
261 |
#endif |
221 |
test $0x80,%edx |
262 |
test $0x80,%edx |
222 |
je done_decode |
263 |
je done_decode |
223 |
|
264 |
|
Lines 228-239
readloop:
Link Here
|
228 |
movl %ebx,dv_block_t_reorder(%ebp) |
269 |
movl %ebx,dv_block_t_reorder(%ebp) |
229 |
|
270 |
|
230 |
/* %eax is bits */ |
271 |
/* %eax is bits */ |
231 |
|
272 |
#ifdef __PIC__ |
|
|
273 |
movsbl dv_vlc_class_lookup5@GOTOFF(%esi,%ecx),%ecx |
274 |
|
275 |
movl dv_vlc_index_mask@GOTOFF(%esi,%ecx,4),%ebx |
276 |
movl dv_vlc_lookups@GOTOFF(%esi,%ecx,4),%edx |
277 |
movl dv_vlc_index_rshift@GOTOFF(%esi,%ecx,4),%ecx |
278 |
#else |
232 |
movsbl dv_vlc_class_lookup5(%ecx),%ecx |
279 |
movsbl dv_vlc_class_lookup5(%ecx),%ecx |
233 |
|
280 |
|
234 |
movl dv_vlc_index_mask(,%ecx,4),%ebx |
281 |
movl dv_vlc_index_mask(,%ecx,4),%ebx |
235 |
movl dv_vlc_lookups(,%ecx,4),%edx |
282 |
movl dv_vlc_lookups(,%ecx,4),%edx |
236 |
movl dv_vlc_index_rshift(,%ecx,4),%ecx |
283 |
movl dv_vlc_index_rshift(,%ecx,4),%ecx |
|
|
284 |
#endif |
237 |
andl %eax,%ebx |
285 |
andl %eax,%ebx |
238 |
sarl %cl,%ebx |
286 |
sarl %cl,%ebx |
239 |
|
287 |
|
Lines 256-262
readloop:
Link Here
|
256 |
movl %edx,%ecx |
304 |
movl %edx,%ecx |
257 |
sarl $8,%ecx |
305 |
sarl $8,%ecx |
258 |
andl $0xff,%ecx |
306 |
andl $0xff,%ecx |
|
|
307 |
#ifdef __PIC__ |
308 |
movl sign_mask@GOTOFF(%esi,%ecx,4),%ecx |
309 |
#else |
259 |
movl sign_mask(,%ecx,4),%ecx |
310 |
movl sign_mask(,%ecx,4),%ecx |
|
|
311 |
#endif |
260 |
andl %ecx,%eax |
312 |
andl %ecx,%eax |
261 |
negl %eax |
313 |
negl %eax |
262 |
sarl $31,%eax |
314 |
sarl $31,%eax |
Lines 326-335
alldone:
Link Here
|
326 |
|
378 |
|
327 |
slowpath: |
379 |
slowpath: |
328 |
/* slow path: use dv_decode_vlc */; |
380 |
/* slow path: use dv_decode_vlc */; |
|
|
381 |
#ifdef __PIC__ |
382 |
pushl %esi |
383 |
leal vlc@GOTOFF(%esi),%esi |
384 |
xchgl %esi,(%esp) /* last parameter is &vlc */ |
385 |
#else |
329 |
pushl $vlc /* last parameter is &vlc */ |
386 |
pushl $vlc /* last parameter is &vlc */ |
|
|
387 |
#endif |
330 |
pushl %edx /* bits_left */ |
388 |
pushl %edx /* bits_left */ |
331 |
pushl %eax /* bits */ |
389 |
pushl %eax /* bits */ |
332 |
call dv_decode_vlc |
390 |
call asm_dv_decode_vlc |
333 |
addl $12,%esp |
391 |
addl $12,%esp |
334 |
test $0x80,%edx /* If (vlc.run < 0) break */ |
392 |
test $0x80,%edx /* If (vlc.run < 0) break */ |
335 |
jne escape |
393 |
jne escape |
Lines 359-364
show16:
Link Here
|
359 |
pushl %esi |
417 |
pushl %esi |
360 |
pushl %ebp |
418 |
pushl %ebp |
361 |
|
419 |
|
|
|
420 |
LOAD_PIC_REG(si) |
421 |
|
362 |
#define ARGn(N) (20+(4*(N)))(%esp) |
422 |
#define ARGn(N) (20+(4*(N)))(%esp) |
363 |
|
423 |
|
364 |
movl ARGn(1),%eax /* quality */ |
424 |
movl ARGn(1),%eax /* quality */ |
Lines 373-379
dv_parse_video_segment:
Link Here
|
373 |
jz its_mono |
434 |
jz its_mono |
374 |
movl $6,%ebx |
435 |
movl $6,%ebx |
375 |
its_mono: |
436 |
its_mono: |
|
|
437 |
#ifdef __PIC__ |
438 |
movl %ebx,n_blocks@GOTOFF(%esi) |
439 |
#else |
376 |
movl %ebx,n_blocks |
440 |
movl %ebx,n_blocks |
|
|
441 |
#endif |
377 |
|
442 |
|
378 |
/* |
443 |
/* |
379 |
* ebx seg/b |
444 |
* ebx seg/b |
Lines 384-398
its_mono:
Link Here
|
384 |
* ebp bl |
449 |
* ebp bl |
385 |
*/ |
450 |
*/ |
386 |
movl ARGn(0),%ebx |
451 |
movl ARGn(0),%ebx |
|
|
452 |
#ifndef __PIC__ |
387 |
movl dv_videosegment_t_bs(%ebx),%esi |
453 |
movl dv_videosegment_t_bs(%ebx),%esi |
388 |
movl bitstream_t_buf(%esi),%esi |
454 |
movl bitstream_t_buf(%esi),%esi |
|
|
455 |
#endif |
389 |
leal dv_videosegment_t_mb(%ebx),%edi |
456 |
leal dv_videosegment_t_mb(%ebx),%edi |
390 |
|
457 |
|
391 |
movl $0,%eax |
458 |
movl $0,%eax |
392 |
movl $0,%ecx |
459 |
movl $0,%ecx |
393 |
macloop: |
460 |
macloop: |
|
|
461 |
#ifdef __PIC__ |
462 |
movl %eax,m@GOTOFF(%esi) |
463 |
movl %ecx,mb_start@GOTOFF(%esi) |
464 |
#else |
394 |
movl %eax,m |
465 |
movl %eax,m |
395 |
movl %ecx,mb_start |
466 |
movl %ecx,mb_start |
|
|
467 |
#endif |
396 |
|
468 |
|
397 |
movl ARGn(0),%ebx |
469 |
movl ARGn(0),%ebx |
398 |
|
470 |
|
Lines 400-406
macloop:
Link Here
|
400 |
/* mb->qno = bitstream_get(bs,4); */ |
472 |
/* mb->qno = bitstream_get(bs,4); */ |
401 |
movl %ecx,%edx |
473 |
movl %ecx,%edx |
402 |
shr $3,%edx |
474 |
shr $3,%edx |
|
|
475 |
#ifdef __PIC__ |
476 |
movl dv_videosegment_t_bs(%ebx),%ecx |
477 |
movl bitstream_t_buf(%ecx),%ecx |
478 |
movzbl 3(%ecx,%edx,1),%edx |
479 |
#else |
403 |
movzbl 3(%esi,%edx,1),%edx |
480 |
movzbl 3(%esi,%edx,1),%edx |
|
|
481 |
#endif |
404 |
andl $0xf,%edx |
482 |
andl $0xf,%edx |
405 |
movl %edx,dv_macroblock_t_qno(%edi) |
483 |
movl %edx,dv_macroblock_t_qno(%edi) |
406 |
|
484 |
|
Lines 411-417
macloop:
Link Here
|
411 |
movl %edx,dv_macroblock_t_eob_count(%edi) |
489 |
movl %edx,dv_macroblock_t_eob_count(%edi) |
412 |
|
490 |
|
413 |
/* mb->i = (seg->i + dv_super_map_vertical[m]) % (seg->isPAL?12:10); */ |
491 |
/* mb->i = (seg->i + dv_super_map_vertical[m]) % (seg->isPAL?12:10); */ |
|
|
492 |
#ifdef __PIC__ |
493 |
movl dv_super_map_vertical@GOTOFF(%esi,%eax,4),%edx |
494 |
#else |
414 |
movl dv_super_map_vertical(,%eax,4),%edx |
495 |
movl dv_super_map_vertical(,%eax,4),%edx |
|
|
496 |
#endif |
415 |
movl dv_videosegment_t_i(%ebx),%ecx |
497 |
movl dv_videosegment_t_i(%ebx),%ecx |
416 |
addl %ecx,%edx |
498 |
addl %ecx,%edx |
417 |
|
499 |
|
Lines 422-432
skarly:
Link Here
|
422 |
andl $1,%ecx |
504 |
andl $1,%ecx |
423 |
shll $5,%ecx /* ecx = (isPAL ? 32 : 0) */ |
505 |
shll $5,%ecx /* ecx = (isPAL ? 32 : 0) */ |
424 |
|
506 |
|
|
|
507 |
#ifdef __PIC__ |
508 |
leal mod_10@GOTOFF(%esi),%edx |
509 |
movzbl (%edx,%ecx,1),%edx /* uses mod_12 for PAL */ |
510 |
#else |
425 |
movzbl mod_10(%edx,%ecx,1),%edx /* uses mod_12 for PAL */ |
511 |
movzbl mod_10(%edx,%ecx,1),%edx /* uses mod_12 for PAL */ |
|
|
512 |
#endif |
426 |
movl %edx,dv_macroblock_t_i(%edi) |
513 |
movl %edx,dv_macroblock_t_i(%edi) |
427 |
|
514 |
|
428 |
/* mb->j = dv_super_map_horizontal[m]; */ |
515 |
/* mb->j = dv_super_map_horizontal[m]; */ |
|
|
516 |
#ifdef __PIC__ |
517 |
movl dv_super_map_horizontal@GOTOFF(%esi,%eax,4),%edx |
518 |
#else |
429 |
movl dv_super_map_horizontal(,%eax,4),%edx |
519 |
movl dv_super_map_horizontal(,%eax,4),%edx |
|
|
520 |
#endif |
430 |
movl %edx,dv_macroblock_t_j(%edi) |
521 |
movl %edx,dv_macroblock_t_j(%edi) |
431 |
|
522 |
|
432 |
/* mb->k = seg->k; */ |
523 |
/* mb->k = seg->k; */ |
Lines 445-456
blkloop:
Link Here
|
445 |
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ |
536 |
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ |
446 |
*/ |
537 |
*/ |
447 |
/* dc = bitstream_get(bs,9); */ |
538 |
/* dc = bitstream_get(bs,9); */ |
|
|
539 |
#ifdef __PIC__ |
540 |
movl mb_start@GOTOFF(%esi),%ecx |
541 |
#else |
448 |
movl mb_start,%ecx |
542 |
movl mb_start,%ecx |
|
|
543 |
#endif |
449 |
shr $3,%ecx |
544 |
shr $3,%ecx |
|
|
545 |
#ifdef __PIC__ |
546 |
movzbl blk_start@GOTOFF(%esi,%ebx),%edx |
547 |
#else |
450 |
movzbl blk_start(%ebx),%edx |
548 |
movzbl blk_start(%ebx),%edx |
|
|
549 |
#endif |
451 |
addl %ecx,%edx |
550 |
addl %ecx,%edx |
|
|
551 |
#ifdef __PIC__ |
552 |
movl ARGn(0),%ecx |
553 |
movl dv_videosegment_t_bs(%ecx),%ecx |
554 |
movl bitstream_t_buf(%ecx),%ecx |
555 |
movzbl (%ecx,%edx,1),%eax /* hi byte */ |
556 |
movzbl 1(%ecx,%edx,1),%ecx /* lo byte */ |
557 |
#else |
452 |
movzbl (%esi,%edx,1),%eax /* hi byte */ |
558 |
movzbl (%esi,%edx,1),%eax /* hi byte */ |
453 |
movzbl 1(%esi,%edx,1),%ecx /* lo byte */ |
559 |
movzbl 1(%esi,%edx,1),%ecx /* lo byte */ |
|
|
560 |
#endif |
454 |
shll $8,%eax |
561 |
shll $8,%eax |
455 |
orl %ecx,%eax |
562 |
orl %ecx,%eax |
456 |
|
563 |
|
Lines 477-483
blkloop:
Link Here
|
477 |
|
584 |
|
478 |
/* bl->reorder = &dv_reorder[bl->dct_mode][1]; */ |
585 |
/* bl->reorder = &dv_reorder[bl->dct_mode][1]; */ |
479 |
shll $6,%eax |
586 |
shll $6,%eax |
|
|
587 |
#ifdef __PIC__ |
588 |
leal dv_reorder@GOTOFF+1(%esi,%eax),%eax |
589 |
#else |
480 |
addl $(dv_reorder+1),%eax |
590 |
addl $(dv_reorder+1),%eax |
|
|
591 |
#endif |
481 |
movl %eax,dv_block_t_reorder(%ebp) |
592 |
movl %eax,dv_block_t_reorder(%ebp) |
482 |
|
593 |
|
483 |
/* bl->reorder_sentinel = bl->reorder + 63; */ |
594 |
/* bl->reorder_sentinel = bl->reorder + 63; */ |
Lines 485-497
blkloop:
Link Here
|
485 |
movl %eax,dv_block_t_reorder_sentinel(%ebp) |
596 |
movl %eax,dv_block_t_reorder_sentinel(%ebp) |
486 |
|
597 |
|
487 |
/* bl->offset= mb_start + dv_parse_bit_start[b]; */ |
598 |
/* bl->offset= mb_start + dv_parse_bit_start[b]; */ |
|
|
599 |
#ifdef __PIC__ |
600 |
movl mb_start@GOTOFF(%esi),%ecx |
601 |
movl dv_parse_bit_start@GOTOFF(%esi,%ebx,4),%eax |
602 |
#else |
488 |
movl mb_start,%ecx |
603 |
movl mb_start,%ecx |
489 |
movl dv_parse_bit_start(,%ebx,4),%eax |
604 |
movl dv_parse_bit_start(,%ebx,4),%eax |
|
|
605 |
#endif |
490 |
addl %ecx,%eax |
606 |
addl %ecx,%eax |
491 |
movl %eax,dv_block_t_offset(%ebp) |
607 |
movl %eax,dv_block_t_offset(%ebp) |
492 |
|
608 |
|
493 |
/* bl->end= mb_start + dv_parse_bit_end[b]; */ |
609 |
/* bl->end= mb_start + dv_parse_bit_end[b]; */ |
|
|
610 |
#ifdef __PIC__ |
611 |
movl dv_parse_bit_end@GOTOFF(%esi,%ebx,4),%eax |
612 |
#else |
494 |
movl dv_parse_bit_end(,%ebx,4),%eax |
613 |
movl dv_parse_bit_end(,%ebx,4),%eax |
|
|
614 |
#endif |
495 |
addl %ecx,%eax |
615 |
addl %ecx,%eax |
496 |
movl %eax,dv_block_t_end(%ebp) |
616 |
movl %eax,dv_block_t_end(%ebp) |
497 |
|
617 |
|
Lines 503-509
blkloop:
Link Here
|
503 |
/* no AC pass. Just zero out the remaining coeffs */ |
623 |
/* no AC pass. Just zero out the remaining coeffs */ |
504 |
movq dv_block_t_coeffs(%ebp),%mm1 |
624 |
movq dv_block_t_coeffs(%ebp),%mm1 |
505 |
pxor %mm0,%mm0 |
625 |
pxor %mm0,%mm0 |
|
|
626 |
#ifdef __PIC__ |
627 |
pand const_f_0_0_0@GOTOFF(%esi),%mm1 |
628 |
#else |
506 |
pand const_f_0_0_0,%mm1 |
629 |
pand const_f_0_0_0,%mm1 |
|
|
630 |
#endif |
507 |
movq %mm1,dv_block_t_coeffs(%ebp) |
631 |
movq %mm1,dv_block_t_coeffs(%ebp) |
508 |
movq %mm0,(dv_block_t_coeffs + 8)(%ebp) |
632 |
movq %mm0,(dv_block_t_coeffs + 8)(%ebp) |
509 |
movq %mm0,(dv_block_t_coeffs + 16)(%ebp) |
633 |
movq %mm0,(dv_block_t_coeffs + 16)(%ebp) |
Lines 528-545
do_ac_pass:
Link Here
|
528 |
pushl %ebp |
652 |
pushl %ebp |
529 |
pushl %edi |
653 |
pushl %edi |
530 |
pushl %eax |
654 |
pushl %eax |
531 |
call dv_parse_ac_coeffs_pass0 |
655 |
call asm_dv_parse_ac_coeffs_pass0 |
532 |
addl $12,%esp |
656 |
addl $12,%esp |
533 |
done_ac: |
657 |
done_ac: |
534 |
|
658 |
|
|
|
659 |
#ifdef __PIC__ |
660 |
movl n_blocks@GOTOFF(%esi),%eax |
661 |
#else |
535 |
movl n_blocks,%eax |
662 |
movl n_blocks,%eax |
|
|
663 |
#endif |
536 |
addl $dv_block_t_size,%ebp |
664 |
addl $dv_block_t_size,%ebp |
537 |
incl %ebx |
665 |
incl %ebx |
538 |
cmpl %eax,%ebx |
666 |
cmpl %eax,%ebx |
539 |
jnz blkloop |
667 |
jnz blkloop |
540 |
|
668 |
|
|
|
669 |
#ifdef __PIC__ |
670 |
movl m@GOTOFF(%esi),%eax |
671 |
movl mb_start@GOTOFF(%esi),%ecx |
672 |
#else |
541 |
movl m,%eax |
673 |
movl m,%eax |
542 |
movl mb_start,%ecx |
674 |
movl mb_start,%ecx |
|
|
675 |
#endif |
543 |
addl $(8 * 80),%ecx |
676 |
addl $(8 * 80),%ecx |
544 |
addl $dv_macroblock_t_size,%edi |
677 |
addl $dv_macroblock_t_size,%edi |
545 |
incl %eax |
678 |
incl %eax |
Lines 557-563
done_ac:
Link Here
|
557 |
|
690 |
|
558 |
andl $DV_QUALITY_AC_MASK,%eax |
691 |
andl $DV_QUALITY_AC_MASK,%eax |
559 |
cmpl $DV_QUALITY_AC_2,%eax |
692 |
cmpl $DV_QUALITY_AC_2,%eax |
560 |
jz dv_parse_ac_coeffs |
693 |
jz asm_dv_parse_ac_coeffs |
561 |
movl $0,%eax |
694 |
movl $0,%eax |
562 |
ret |
695 |
ret |
563 |
|
696 |
|