Gentoo Websites Logo
Go to: Gentoo Home Documentation Forums Lists Bugs Planet Store Wiki Get Gentoo!
View | Details | Raw Unified | Return to bug 121871 | Differences between
and this patch

Collapse All | Expand All

(-)libdv-0.104-old/libdv/asm_common.S (+37 lines)
Line 0 Link Here
1
/* public domain, do what you want */
2
3
#ifdef __PIC__
4
# define MUNG(sym)                 sym##@GOTOFF(%ebp)
5
# define MUNG_ARR(sym, args...)    sym##@GOTOFF(%ebp,##args)
6
#else
7
# define MUNG(sym)                 sym
8
# define MUNG_ARR(sym, args...)    sym(,##args)
9
#endif
10
11
#ifdef __PIC__
12
# undef __i686 /* gcc define gets in our way */
13
	.section .gnu.linkonce.t.__i686.get_pc_thunk.bp,"ax",@progbits
14
.globl __i686.get_pc_thunk.bp
15
	.hidden  __i686.get_pc_thunk.bp
16
	.type    __i686.get_pc_thunk.bp,@function
17
__i686.get_pc_thunk.bp:
18
	movl (%esp), %ebp
19
	ret
20
# define LOAD_PIC_REG_BP() \
21
	call __i686.get_pc_thunk.bp ; \
22
	addl $_GLOBAL_OFFSET_TABLE_, %ebp
23
24
	.section .gnu.linkonce.t.__i686.get_pc_thunk.si,"ax",@progbits
25
.globl __i686.get_pc_thunk.si
26
	.hidden  __i686.get_pc_thunk.si
27
	.type    __i686.get_pc_thunk.si,@function
28
__i686.get_pc_thunk.si:
29
	movl (%esp), %esi
30
	ret
31
# define LOAD_PIC_REG_SI() \
32
	call __i686.get_pc_thunk.si ; \
33
	addl $_GLOBAL_OFFSET_TABLE_, %esi
34
#else
35
# define LOAD_PIC_REG_BP()
36
# define LOAD_PIC_REG_SI()
37
#endif
(-)libdv-0.104-old/libdv/dct_block_mmx.S (-42 / +56 lines)
Lines 53-69 scratch2: .quad 0 Link Here
53
scratch3:       .quad 0
53
scratch3:       .quad 0
54
scratch4:       .quad 0
54
scratch4:       .quad 0
55
55
56
#include "asm_common.S"
57
56
.text
58
.text
57
59
58
.align 8	
60
.align 8	
59
.global _dv_dct_88_block_mmx
61
.global _dv_dct_88_block_mmx
62
.hidden _dv_dct_88_block_mmx
63
.type	_dv_dct_88_block_mmx,@function
60
_dv_dct_88_block_mmx:
64
_dv_dct_88_block_mmx:
61
65
62
	pushl   %ebp
66
	pushl   %ebp
63
	movl    %esp, %ebp
64
	pushl   %esi
67
	pushl   %esi
65
68
66
	movl    8(%ebp), %esi          # source
69
	LOAD_PIC_REG_BP()
70
71
	movl    12(%esp), %esi          # source
67
72
68
# column 0
73
# column 0
69
	movq 16*0(%esi), %mm0          # v0
74
	movq 16*0(%esi), %mm0          # v0
Lines 86-107 _dv_dct_88_block_mmx: Link Here
86
91
87
	movq 16*3(%esi), %mm5          # v3
92
	movq 16*3(%esi), %mm5          # v3
88
	movq 16*4(%esi), %mm7          # v4
93
	movq 16*4(%esi), %mm7          # v4
89
	movq  %mm7, scratch1           # scratch1: v4   ; 
94
	movq  %mm7, MUNG(scratch1)     # scratch1: v4   ; 
90
	movq  %mm5, %mm7               # duplicate v3 
95
	movq  %mm5, %mm7               # duplicate v3 
91
	paddw scratch1, %mm5           # v03: v3+v4  
96
	paddw MUNG(scratch1), %mm5     # v03: v3+v4  
92
	psubw scratch1, %mm7           # v04: v3-v4  
97
	psubw MUNG(scratch1), %mm7     # v04: v3-v4  
93
	movq  %mm5, scratch2           # scratch2: v03
98
	movq  %mm5, MUNG(scratch2)     # scratch2: v03
94
	movq  %mm0, %mm5               # mm5: v00
99
	movq  %mm0, %mm5               # mm5: v00
95
100
96
	paddw scratch2, %mm0           # v10: v00+v03   
101
	paddw MUNG(scratch2), %mm0     # v10: v00+v03   
97
	psubw scratch2, %mm5           # v13: v00-v03   
102
	psubw MUNG(scratch2), %mm5     # v13: v00-v03   
98
	movq  %mm3, scratch3           # scratch3: v02
103
	movq  %mm3, MUNG(scratch3)     # scratch3: v02
99
	movq  %mm1, %mm3               # duplicate v01
104
	movq  %mm1, %mm3               # duplicate v01
100
105
101
	paddw scratch3, %mm1          # v11: v01+v02
106
	paddw MUNG(scratch3), %mm1    # v11: v01+v02
102
	psubw scratch3, %mm3          # v12: v01-v02
107
	psubw MUNG(scratch3), %mm3    # v12: v01-v02
103
108
104
	movq  %mm6, scratch4           # scratch4: v05
109
	movq  %mm6, MUNG(scratch4)     # scratch4: v05
105
	movq  %mm0, %mm6               # duplicate v10
110
	movq  %mm0, %mm6               # duplicate v10
106
111
107
	paddw %mm1, %mm0              # v10+v11
112
	paddw %mm1, %mm0              # v10+v11
Lines 111-120 _dv_dct_88_block_mmx: Link Here
111
	movq  %mm6, 16*4(%esi)         # out4: v10-v11 
116
	movq  %mm6, 16*4(%esi)         # out4: v10-v11 
112
117
113
	movq  %mm4, %mm0               # mm0: v06
118
	movq  %mm4, %mm0               # mm0: v06
114
	paddw scratch4, %mm4          # v15: v05+v06 
119
	paddw MUNG(scratch4), %mm4    # v15: v05+v06 
115
	paddw  %mm2, %mm0             # v16: v07+v06
120
	paddw  %mm2, %mm0             # v16: v07+v06
116
121
117
	pmulhw WA3, %mm4               # v35~: WA3*v15
122
	pmulhw MUNG(WA3), %mm4         # v35~: WA3*v15
118
	psllw  $1, %mm4                # v35: compensate the coeefient scale
123
	psllw  $1, %mm4                # v35: compensate the coeefient scale
119
124
120
	movq   %mm4, %mm6              # duplicate v35
125
	movq   %mm4, %mm6              # duplicate v35
Lines 123-129 _dv_dct_88_block_mmx: Link Here
123
128
124
	paddw  %mm5, %mm3             # v22: v12+v13
129
	paddw  %mm5, %mm3             # v22: v12+v13
125
130
126
	pmulhw WA1, %mm3               # v32~: WA1*v22
131
	pmulhw MUNG(WA1), %mm3         # v32~: WA1*v22
127
	psllw  $16-NSHIFT, %mm3        # v32: compensate the coeefient scale
132
	psllw  $16-NSHIFT, %mm3        # v32: compensate the coeefient scale
128
	movq   %mm5, %mm6              # duplicate v13
133
	movq   %mm5, %mm6              # duplicate v13
129
134
Lines 134-146 _dv_dct_88_block_mmx: Link Here
134
	movq  %mm6, 16*6(%esi)         # out6: v13-v32 
139
	movq  %mm6, 16*6(%esi)         # out6: v13-v32 
135
140
136
141
137
	paddw  scratch4, %mm7         # v14n: v04+v05
142
	paddw  MUNG(scratch4), %mm7   # v14n: v04+v05
138
	movq   %mm0, %mm5              # duplicate v16
143
	movq   %mm0, %mm5              # duplicate v16
139
144
140
	psubw  %mm7, %mm0             # va1: v16-v14n
145
	psubw  %mm7, %mm0             # va1: v16-v14n
141
	pmulhw WA5, %mm0               # va0~:  va1*WA5
146
	pmulhw MUNG(WA5), %mm0         # va0~:  va1*WA5
142
	pmulhw WA4, %mm5               # v36~~: v16*WA4
147
	pmulhw MUNG(WA4), %mm5         # v36~~: v16*WA4
143
	pmulhw WA2, %mm7               # v34~~: v14n*WA2
148
	pmulhw MUNG(WA2), %mm7         # v34~~: v14n*WA2
144
	psllw  $16-WA4_SHIFT, %mm5     # v36: compensate the coeefient scale 
149
	psllw  $16-WA4_SHIFT, %mm5     # v36: compensate the coeefient scale 
145
	psllw  $16-NSHIFT, %mm7        # v34: compensate the coeefient scale
150
	psllw  $16-NSHIFT, %mm7        # v34: compensate the coeefient scale
146
151
Lines 188-209 _dv_dct_88_block_mmx: Link Here
188
193
189
	movq 16*3(%esi), %mm5              # v3
194
	movq 16*3(%esi), %mm5              # v3
190
	movq 16*4(%esi), %mm7              # v4
195
	movq 16*4(%esi), %mm7              # v4
191
	movq  %mm7, scratch1                    # scratch1: v4   ; 
196
	movq  %mm7, MUNG(scratch1)     # scratch1: v4   ; 
192
	movq  %mm5, %mm7               # duplicate v3 
197
	movq  %mm5, %mm7               # duplicate v3 
193
	paddw scratch1, %mm5           # v03: v3+v4  
198
	paddw MUNG(scratch1), %mm5     # v03: v3+v4  
194
	psubw scratch1, %mm7           # v04: v3-v4  
199
	psubw MUNG(scratch1), %mm7     # v04: v3-v4  
195
	movq  %mm5, scratch2        # scratch2: v03
200
	movq  %mm5, MUNG(scratch2)     # scratch2: v03
196
	movq  %mm0, %mm5               # mm5: v00
201
	movq  %mm0, %mm5               # mm5: v00
197
202
198
	paddw scratch2, %mm0           # v10: v00+v03   
203
	paddw MUNG(scratch2), %mm0     # v10: v00+v03   
199
	psubw scratch2, %mm5           # v13: v00-v03   
204
	psubw MUNG(scratch2), %mm5     # v13: v00-v03   
200
	movq  %mm3, scratch3         # scratc3: v02
205
	movq  %mm3, MUNG(scratch3)     # scratc3: v02
201
	movq  %mm1, %mm3               # duplicate v01
206
	movq  %mm1, %mm3               # duplicate v01
202
207
203
	paddw scratch3, %mm1           # v11: v01+v02
208
	paddw MUNG(scratch3), %mm1     # v11: v01+v02
204
	psubw scratch3, %mm3           # v12: v01-v02
209
	psubw MUNG(scratch3), %mm3     # v12: v01-v02
205
210
206
	movq  %mm6, scratch4         # scratc4: v05
211
	movq  %mm6, MUNG(scratch4)     # scratc4: v05
207
	movq  %mm0, %mm6               # duplicate v10
212
	movq  %mm0, %mm6               # duplicate v10
208
213
209
	paddw %mm1, %mm0                            # v10+v11
214
	paddw %mm1, %mm0                            # v10+v11
Lines 213-222 _dv_dct_88_block_mmx: Link Here
213
	movq  %mm6, 16*4(%esi)          # out4: v10-v11 
218
	movq  %mm6, 16*4(%esi)          # out4: v10-v11 
214
219
215
	movq  %mm4, %mm0             # mm0: v06
220
	movq  %mm4, %mm0             # mm0: v06
216
	paddw scratch4, %mm4         # v15: v05+v06 
221
	paddw MUNG(scratch4), %mm4     # v15: v05+v06 
217
	paddw  %mm2, %mm0                       # v16: v07+v06
222
	paddw  %mm2, %mm0                       # v16: v07+v06
218
223
219
	pmulhw WA3, %mm4           # v35~: WA3*v15
224
	pmulhw MUNG(WA3), %mm4         # v35~: WA3*v15
220
	psllw  $16-NSHIFT, %mm4       # v35: compensate the coeefient scale
225
	psllw  $16-NSHIFT, %mm4       # v35: compensate the coeefient scale
221
226
222
	movq   %mm4, %mm6            # duplicate v35
227
	movq   %mm4, %mm6            # duplicate v35
Lines 225-231 _dv_dct_88_block_mmx: Link Here
225
230
226
	paddw  %mm5, %mm3            # v22: v12+v13
231
	paddw  %mm5, %mm3            # v22: v12+v13
227
232
228
	pmulhw WA1, %mm3           # v32~: WA3*v15
233
	pmulhw MUNG(WA1), %mm3         # v32~: WA3*v15
229
	psllw  $16-NSHIFT, %mm3       # v32: compensate the coeefient scale
234
	psllw  $16-NSHIFT, %mm3       # v32: compensate the coeefient scale
230
	movq   %mm5, %mm6            # duplicate v13
235
	movq   %mm5, %mm6            # duplicate v13
231
236
Lines 235-247 _dv_dct_88_block_mmx: Link Here
235
	movq  %mm5, 16*2(%esi)          # out2: v13+v32 
240
	movq  %mm5, 16*2(%esi)          # out2: v13+v32 
236
	movq  %mm6, 16*6(%esi)          # out6: v13-v32 
241
	movq  %mm6, 16*6(%esi)          # out6: v13-v32 
237
242
238
	paddw  scratch4, %mm7                           # v14n: v04+v05
243
	paddw  MUNG(scratch4), %mm7     # v14n: v04+v05
239
	movq   %mm0, %mm5                               # duplicate v16
244
	movq   %mm0, %mm5                               # duplicate v16
240
245
241
	psubw  %mm7, %mm0                               # va1: v16-v14n
246
	psubw  %mm7, %mm0                               # va1: v16-v14n
242
	pmulhw WA2, %mm7                # v34~~: v14n*WA2
247
	pmulhw MUNG(WA2), %mm7          # v34~~: v14n*WA2
243
	pmulhw WA5, %mm0                # va0~:  va1*WA5
248
	pmulhw MUNG(WA5), %mm0          # va0~:  va1*WA5
244
	pmulhw WA4, %mm5                        # v36~~: v16*WA4
249
	pmulhw MUNG(WA4), %mm5          # v36~~: v16*WA4
245
	psllw  $16-NSHIFT, %mm7
250
	psllw  $16-NSHIFT, %mm7
246
	psllw  $16-WA4_SHIFT, %mm5      # v36: compensate the coeffient 
251
	psllw  $16-WA4_SHIFT, %mm5      # v36: compensate the coeffient 
247
		# scale note that WA4 is shifted 1 bit less than the others
252
		# scale note that WA4 is shifted 1 bit less than the others
Lines 272-277 _dv_dct_88_block_mmx: Link Here
272
		
277
		
273
.align 8	
278
.align 8	
274
.global _dv_dct_block_mmx_postscale_88
279
.global _dv_dct_block_mmx_postscale_88
280
.hidden _dv_dct_block_mmx_postscale_88
281
.type	_dv_dct_block_mmx_postscale_88,@function
275
_dv_dct_block_mmx_postscale_88:
282
_dv_dct_block_mmx_postscale_88:
276
283
277
	pushl	 %ebp
284
	pushl	 %ebp
Lines 748-761 _dv_dct_block_mmx_postscale_88: Link Here
748
755
749
.align 8	
756
.align 8	
750
.global _dv_dct_248_block_mmx
757
.global _dv_dct_248_block_mmx
758
.hidden _dv_dct_248_block_mmx
759
.type	_dv_dct_248_block_mmx,@function
751
_dv_dct_248_block_mmx:
760
_dv_dct_248_block_mmx:
752
761
753
	pushl   %ebp
762
	pushl   %ebp
754
	movl    %esp, %ebp
755
	pushl   %esi
763
	pushl   %esi
756
	pushl   %edi
764
	pushl   %edi
757
765
758
	movl    8(%ebp), %esi          # source
766
	LOAD_PIC_REG_BP()
767
768
	movl    16(%esp), %esi          # source
759
769
760
# column 0
770
# column 0
761
771
Lines 779-785 _dv_dct_248_block_mmx: Link Here
779
	paddw %mm1, %mm0	       # v20: v10+v11
789
	paddw %mm1, %mm0	       # v20: v10+v11
780
	psubw %mm1, %mm3	       # v21: v10-v11
790
	psubw %mm1, %mm3	       # v21: v10-v11
781
791
782
	pmulhw WA1, %mm5               # v32~: WA1*v22
792
	pmulhw MUNG(WA1), %mm5         # v32~: WA1*v22
783
	movq  %mm4, %mm2	
793
	movq  %mm4, %mm2	
784
	psllw  $16-NSHIFT, %mm5        # v32: compensate the coeffient scale
794
	psllw  $16-NSHIFT, %mm5        # v32: compensate the coeffient scale
785
795
Lines 818-824 _dv_dct_248_block_mmx: Link Here
818
	paddw %mm1, %mm0	       # v20: v10+v11
828
	paddw %mm1, %mm0	       # v20: v10+v11
819
	psubw %mm1, %mm3	       # v21: v10-v11
829
	psubw %mm1, %mm3	       # v21: v10-v11
820
830
821
	pmulhw WA1, %mm5               # v32~: WA1*v22
831
	pmulhw MUNG(WA1), %mm5         # v32~: WA1*v22
822
	movq  %mm4, %mm2	
832
	movq  %mm4, %mm2	
823
	psllw  $16-NSHIFT, %mm5        # v32: compensate the coeffient scale
833
	psllw  $16-NSHIFT, %mm5        # v32: compensate the coeffient scale
824
834
Lines 855-861 _dv_dct_248_block_mmx: Link Here
855
	paddw %mm1, %mm0	       # v20: v10+v11
865
	paddw %mm1, %mm0	       # v20: v10+v11
856
	psubw %mm1, %mm3	       # v21: v10-v11
866
	psubw %mm1, %mm3	       # v21: v10-v11
857
867
858
	pmulhw WA1, %mm5               # v32~: WA1*v22
868
	pmulhw MUNG(WA1), %mm5         # v32~: WA1*v22
859
	movq  %mm4, %mm2	
869
	movq  %mm4, %mm2	
860
	psllw  $16-NSHIFT, %mm5        # v32: compensate the coeffient scale
870
	psllw  $16-NSHIFT, %mm5        # v32: compensate the coeffient scale
861
871
Lines 892-898 _dv_dct_248_block_mmx: Link Here
892
	paddw %mm1, %mm0	       # v20: v10+v11
902
	paddw %mm1, %mm0	       # v20: v10+v11
893
	psubw %mm1, %mm3	       # v21: v10-v11
903
	psubw %mm1, %mm3	       # v21: v10-v11
894
904
895
	pmulhw WA1, %mm5               # v32~: WA1*v22
905
	pmulhw MUNG(WA1), %mm5         # v32~: WA1*v22
896
	movq  %mm4, %mm2	
906
	movq  %mm4, %mm2	
897
	psllw  $16-NSHIFT, %mm5        # v32: compensate the coeffient scale
907
	psllw  $16-NSHIFT, %mm5        # v32: compensate the coeffient scale
898
908
Lines 912-917 _dv_dct_248_block_mmx: Link Here
912
922
913
.align 8	
923
.align 8	
914
.global _dv_dct_248_block_mmx_post_sum
924
.global _dv_dct_248_block_mmx_post_sum
925
.hidden _dv_dct_248_block_mmx_post_sum
926
.type	_dv_dct_248_block_mmx_post_sum,@function
915
_dv_dct_248_block_mmx_post_sum:
927
_dv_dct_248_block_mmx_post_sum:
916
928
917
	pushl   %ebp
929
	pushl   %ebp
Lines 992-997 _dv_dct_248_block_mmx_post_sum: Link Here
992
1004
993
.align 8	
1005
.align 8	
994
.global _dv_dct_block_mmx_postscale_248
1006
.global _dv_dct_block_mmx_postscale_248
1007
.hidden _dv_dct_block_mmx_postscale_248
1008
.type	_dv_dct_block_mmx_postscale_248,@function
995
_dv_dct_block_mmx_postscale_248:
1009
_dv_dct_block_mmx_postscale_248:
996
1010
997
	pushl	 %ebp
1011
	pushl	 %ebp
(-)libdv-0.104-old/libdv/dct_block_mmx_x86_64.S (+4 lines)
Lines 57-62 scratch4: .quad 0 Link Here
57
57
58
.align 8	
58
.align 8	
59
.global _dv_dct_88_block_mmx_x86_64
59
.global _dv_dct_88_block_mmx_x86_64
60
.hidden _dv_dct_88_block_mmx_x86_64
61
.type	_dv_dct_88_block_mmx_x86_64,@function
60
_dv_dct_88_block_mmx_x86_64:
62
_dv_dct_88_block_mmx_x86_64:
61
63
62
/* void _dv_dct_88_block_mmx_x86_64(int16_t* block); */
64
/* void _dv_dct_88_block_mmx_x86_64(int16_t* block); */
Lines 269-274 _dv_dct_88_block_mmx_x86_64: Link Here
269
		
271
		
270
.align 8	
272
.align 8	
271
.global _dv_dct_block_mmx_x86_64_postscale_88
273
.global _dv_dct_block_mmx_x86_64_postscale_88
274
.hidden _dv_dct_block_mmx_x86_64_postscale_88
275
.type	_dv_dct_block_mmx_x86_64_postscale_88,@function
272
_dv_dct_block_mmx_x86_64_postscale_88:
276
_dv_dct_block_mmx_x86_64_postscale_88:
273
277
274
/* void _dv_dct_block_mmx_x86_64_postscale_88(int16_t* block, int16_t* postscale_matrix); */
278
/* void _dv_dct_block_mmx_x86_64_postscale_88(int16_t* block, int16_t* postscale_matrix); */
(-)libdv-0.104-old/libdv/dv.c (-2 / +5 lines)
Lines 205-210 dv_reconfigure(int clamp_luma, int clamp Link Here
205
} /* dv_reconfigure */
205
} /* dv_reconfigure */
206
206
207
207
208
extern uint8_t dv_quant_offset[4];
209
extern uint8_t dv_quant_shifts[22][4];
210
208
static inline void 
211
static inline void 
209
dv_decode_macroblock(dv_decoder_t *dv, dv_macroblock_t *mb, unsigned int quality) {
212
dv_decode_macroblock(dv_decoder_t *dv, dv_macroblock_t *mb, unsigned int quality) {
210
  int i;
213
  int i;
Lines 218-224 dv_decode_macroblock(dv_decoder_t *dv, d Link Here
218
      dv_idct_248 (co248, mb->b[i].coeffs);
221
      dv_idct_248 (co248, mb->b[i].coeffs);
219
    } else {
222
    } else {
220
#if ARCH_X86
223
#if ARCH_X86
221
      _dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no);
224
      _dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no,dv_quant_offset,dv_quant_shifts);
222
      _dv_idct_88(mb->b[i].coeffs);
225
      _dv_idct_88(mb->b[i].coeffs);
223
#elif ARCH_X86_64
226
#elif ARCH_X86_64
224
      _dv_quant_88_inverse_x86_64(mb->b[i].coeffs,mb->qno,mb->b[i].class_no);
227
      _dv_quant_88_inverse_x86_64(mb->b[i].coeffs,mb->qno,mb->b[i].class_no);
Lines 250-256 dv_decode_video_segment(dv_decoder_t *dv Link Here
250
	dv_idct_248 (co248, mb->b[b].coeffs);
253
	dv_idct_248 (co248, mb->b[b].coeffs);
251
      } else {
254
      } else {
252
#if ARCH_X86
255
#if ARCH_X86
253
	_dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no);
256
	_dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no,dv_quant_offset,dv_quant_shifts);
254
	_dv_weight_88_inverse(bl->coeffs);
257
	_dv_weight_88_inverse(bl->coeffs);
255
	_dv_idct_88(bl->coeffs);
258
	_dv_idct_88(bl->coeffs);
256
#elif ARCH_X86_64
259
#elif ARCH_X86_64
(-)libdv-0.104-old/libdv/encode.c (-4 / +5 lines)
Lines 521-527 static void reorder_block(dv_block_t *bl Link Here
521
}
521
}
522
522
523
extern unsigned long _dv_vlc_encode_block_mmx(dv_coeff_t* coeffs,
523
extern unsigned long _dv_vlc_encode_block_mmx(dv_coeff_t* coeffs,
524
					  dv_vlc_entry_t ** out);
524
					  dv_vlc_entry_t ** out,
525
					  dv_vlc_entry_t * lookup);
525
526
526
extern unsigned long _dv_vlc_encode_block_mmx_x86_64(dv_coeff_t* coeffs,
527
extern unsigned long _dv_vlc_encode_block_mmx_x86_64(dv_coeff_t* coeffs,
527
					  dv_vlc_entry_t ** out);
528
					  dv_vlc_entry_t ** out);
Lines 558-564 static unsigned long vlc_encode_block(dv Link Here
558
#elif ARCH_X86
559
#elif ARCH_X86
559
	int num_bits;
560
	int num_bits;
560
561
561
	num_bits = _dv_vlc_encode_block_mmx(coeffs, &o);
562
	num_bits = _dv_vlc_encode_block_mmx(coeffs, &o, vlc_encode_lookup);
562
	emms();
563
	emms();
563
#else
564
#else
564
	int num_bits;
565
	int num_bits;
Lines 574-580 static unsigned long vlc_encode_block(dv Link Here
574
	return num_bits;
575
	return num_bits;
575
}
576
}
576
577
577
extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs);
578
extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs, unsigned char* lookup);
578
extern unsigned long _dv_vlc_num_bits_block_x86_64(dv_coeff_t* coeffs);
579
extern unsigned long _dv_vlc_num_bits_block_x86_64(dv_coeff_t* coeffs);
579
580
580
extern unsigned long _dv_vlc_num_bits_block(dv_coeff_t* coeffs)
581
extern unsigned long _dv_vlc_num_bits_block(dv_coeff_t* coeffs)
Lines 600-606 extern unsigned long _dv_vlc_num_bits_bl Link Here
600
#elif ARCH_X86_64
601
#elif ARCH_X86_64
601
	return _dv_vlc_num_bits_block_x86_64(coeffs);
602
	return _dv_vlc_num_bits_block_x86_64(coeffs);
602
#else
603
#else
603
	return _dv_vlc_num_bits_block_x86(coeffs);
604
	return _dv_vlc_num_bits_block_x86(coeffs, vlc_num_bits_lookup);
604
#endif
605
#endif
605
}
606
}
606
607
(-)libdv-0.104-old/libdv/encode_x86.S (-9 / +23 lines)
Lines 23-35 Link Here
23
 *  The libdv homepage is http://libdv.sourceforge.net/.  
23
 *  The libdv homepage is http://libdv.sourceforge.net/.  
24
 */
24
 */
25
25
26
.data
27
ALLONE:		.word 1,1,1,1
28
VLCADDMASK:	.byte 255,0,0,0,255,0,0,0
29
		
30
.text
26
.text
31
27
32
.global _dv_vlc_encode_block_mmx
28
.global _dv_vlc_encode_block_mmx
29
.hidden _dv_vlc_encode_block_mmx
30
.type _dv_vlc_encode_block_mmx,@function
33
_dv_vlc_encode_block_mmx:	
31
_dv_vlc_encode_block_mmx:	
34
	pushl	%ebx
32
	pushl	%ebx
35
	pushl	%esi
33
	pushl	%esi
Lines 45-55 _dv_vlc_encode_block_mmx: Link Here
45
43
46
	movl	$63, %ecx
44
	movl	$63, %ecx
47
45
48
	movl	vlc_encode_lookup, %esi
46
	movl	4+4*4+8(%esp), %esi              # vlc_encode_lookup
49
47
50
	pxor	%mm0, %mm0
48
	pxor	%mm0, %mm0
51
	pxor	%mm2, %mm2
49
	pxor	%mm2, %mm2
52
	movq	VLCADDMASK, %mm1
50
	pushl	$0x000000FF                      # these four lines
51
	pushl	$0x000000FF                      # load VLCADDMASK
52
	movq	(%esp), %mm1                     # into %mm1 off the stack
53
	addl	$8, %esp                         #  --> no TEXTRELs
53
	xorl	%ebp, %ebp
54
	xorl	%ebp, %ebp
54
	subl	$8, %edx
55
	subl	$8, %edx
55
vlc_encode_block_mmx_loop:
56
vlc_encode_block_mmx_loop:
Lines 106-111 vlc_encode_block_out: Link Here
106
	ret	
107
	ret	
107
108
108
.global _dv_vlc_num_bits_block_x86
109
.global _dv_vlc_num_bits_block_x86
110
.hidden _dv_vlc_num_bits_block_x86
111
.type	_dv_vlc_num_bits_block_x86,@function
109
_dv_vlc_num_bits_block_x86:	
112
_dv_vlc_num_bits_block_x86:	
110
	pushl	%ebx
113
	pushl	%ebx
111
	pushl	%esi
114
	pushl	%esi
Lines 121-127 _dv_vlc_num_bits_block_x86: Link Here
121
	addl	$2, %edi
124
	addl	$2, %edi
122
125
123
	movl	$63, %ecx
126
	movl	$63, %ecx
124
	movl	vlc_num_bits_lookup, %esi
127
	movl	4+4*4+4(%esp), %esi              # vlc_num_bits_lookup
125
	
128
	
126
vlc_num_bits_block_x86_loop:
129
vlc_num_bits_block_x86_loop:
127
	movw	(%edi), %ax
130
	movw	(%edi), %ax
Lines 164-169 vlc_num_bits_block_out: Link Here
164
	ret	
167
	ret	
165
168
166
.global _dv_vlc_encode_block_pass_1_x86	
169
.global _dv_vlc_encode_block_pass_1_x86	
170
.hidden _dv_vlc_encode_block_pass_1_x86
171
.type	_dv_vlc_encode_block_pass_1_x86,@function
167
_dv_vlc_encode_block_pass_1_x86:	
172
_dv_vlc_encode_block_pass_1_x86:	
168
	pushl	%ebx
173
	pushl	%ebx
169
	pushl	%esi
174
	pushl	%esi
Lines 240-245 vlc_encode_block_pass1_x86_out: Link Here
240
	ret		
245
	ret		
241
		
246
		
242
.global _dv_classify_mmx
247
.global _dv_classify_mmx
248
.hidden _dv_classify_mmx
249
.type	_dv_classify_mmx,@function
243
_dv_classify_mmx:
250
_dv_classify_mmx:
244
251
245
	pushl   %ebp
252
	pushl   %ebp
Lines 345-350 _dv_classify_mmx: Link Here
345
	 don't know why... */
352
	 don't know why... */
346
	
353
	
347
.global _dv_reorder_block_mmx
354
.global _dv_reorder_block_mmx
355
.hidden _dv_reorder_block_mmx
356
.type	_dv_reorder_block_mmx,@function
348
_dv_reorder_block_mmx:
357
_dv_reorder_block_mmx:
349
				
358
				
350
	pushl   %ebp
359
	pushl   %ebp
Lines 460-465 reorder_loop: Link Here
460
	ret
469
	ret
461
470
462
.global _dv_need_dct_248_mmx_rows
471
.global _dv_need_dct_248_mmx_rows
472
.hidden _dv_need_dct_248_mmx_rows
473
.type	_dv_need_dct_248_mmx_rows,@function
463
_dv_need_dct_248_mmx_rows:
474
_dv_need_dct_248_mmx_rows:
464
	
475
	
465
	pushl   %ebp
476
	pushl   %ebp
Lines 579-586 _dv_need_dct_248_mmx_rows: Link Here
579
	paddw	%mm5, %mm1
590
	paddw	%mm5, %mm1
580
591
581
	paddw	%mm1, %mm0
592
	paddw	%mm1, %mm0
582
	
593
583
	pmaddwd	ALLONE, %mm0	
594
	pushl	$0x00010001              # these four lines
595
	pushl	$0x00010001              # load ALLONE
596
	pmaddwd	(%esp), %mm0             # into %mm0 off the stack
597
	addl	$8, %esp                 #  --> no TEXTRELs
584
	movq	%mm0, %mm1
598
	movq	%mm0, %mm1
585
	psrlq	$32, %mm1
599
	psrlq	$32, %mm1
586
	paddd	%mm1, %mm0
600
	paddd	%mm1, %mm0
(-)libdv-0.104-old/libdv/encode_x86_64.S (+12 lines)
Lines 30-35 VLCADDMASK: .byte 255,0,0,0,255,0,0,0 Link Here
30
.text
30
.text
31
31
32
.global _dv_vlc_encode_block_mmx_x86_64
32
.global _dv_vlc_encode_block_mmx_x86_64
33
.hidden _dv_vlc_encode_block_mmx_x86_64
34
.type	_dv_vlc_encode_block_mmx_x86_64,@function
33
_dv_vlc_encode_block_mmx_x86_64:
35
_dv_vlc_encode_block_mmx_x86_64:
34
	
36
	
35
/* extern unsigned long _dv_vlc_encode_block_mmx(dv_coeff_t* coeffs,
37
/* extern unsigned long _dv_vlc_encode_block_mmx(dv_coeff_t* coeffs,
Lines 113-118 vlc_encode_block_out: Link Here
113
	ret	
115
	ret	
114
116
115
.global _dv_vlc_num_bits_block_x86_64
117
.global _dv_vlc_num_bits_block_x86_64
118
.hidden _dv_vlc_num_bits_block_x86_64
119
.type	_dv_vlc_num_bits_block_x86_64,@function
116
_dv_vlc_num_bits_block_x86_64:
120
_dv_vlc_num_bits_block_x86_64:
117
	
121
	
118
	/* extern unsigned long _dv_vlc_num_bits_block_x86_64(dv_coeff_t* coeffs); */
122
	/* extern unsigned long _dv_vlc_num_bits_block_x86_64(dv_coeff_t* coeffs); */
Lines 173-178 vlc_num_bits_block_out: Link Here
173
	ret	
177
	ret	
174
178
175
.global _dv_vlc_encode_block_pass_1_x86_64
179
.global _dv_vlc_encode_block_pass_1_x86_64
180
.hidden _dv_vlc_encode_block_pass_1_x86_64
181
.type	_dv_vlc_encode_block_pass_1_x86_64,@function
176
_dv_vlc_encode_block_pass_1_x86_64:
182
_dv_vlc_encode_block_pass_1_x86_64:
177
183
178
/*
184
/*
Lines 251-256 vlc_encode_block_pass1_x86_out: Link Here
251
	ret		
257
	ret		
252
		
258
		
253
.global _dv_classify_mmx_x86_64
259
.global _dv_classify_mmx_x86_64
260
.hidden _dv_classify_mmx_x86_64
261
.type	_dv_classify_mmx_x86_64,@function
254
_dv_classify_mmx_x86_64:
262
_dv_classify_mmx_x86_64:
255
263
256
	/* extern int _dv_classify_mmx_x86_64(dv_coeff_t *    a,          rdi
264
	/* extern int _dv_classify_mmx_x86_64(dv_coeff_t *    a,          rdi
Lines 355-360 _dv_classify_mmx_x86_64: Link Here
355
	 don't know why... */
363
	 don't know why... */
356
	
364
	
357
.global _dv_reorder_block_mmx_x86_64
365
.global _dv_reorder_block_mmx_x86_64
366
.hidden _dv_reorder_block_mmx_x86_64
367
.type	_dv_reorder_block_mmx_x86_64,@function
358
_dv_reorder_block_mmx_x86_64:
368
_dv_reorder_block_mmx_x86_64:
359
	
369
	
360
/*extern int _dv_reorder_block_mmx_x86_64(dv_coeff_t *          a,               rdi
370
/*extern int _dv_reorder_block_mmx_x86_64(dv_coeff_t *          a,               rdi
Lines 469-474 reorder_loop: Link Here
469
	ret
479
	ret
470
480
471
.global _dv_need_dct_248_mmx_x86_64_rows
481
.global _dv_need_dct_248_mmx_x86_64_rows
482
.hidden _dv_need_dct_248_mmx_x86_64_rows
483
.type	_dv_need_dct_248_mmx_x86_64_rows,@function
472
_dv_need_dct_248_mmx_x86_64_rows:
484
_dv_need_dct_248_mmx_x86_64_rows:
473
	
485
	
474
	/* extern int _dv_need_dct_248_mmx_x86_64_rows(dv_coeff_t * bl);  rdi */
486
	/* extern int _dv_need_dct_248_mmx_x86_64_rows(dv_coeff_t * bl);  rdi */
(-)libdv-0.104-old/libdv/idct_block_mmx.S (-32 / +38 lines)
Lines 8-23 Link Here
8
8
9
9
10
10
11
#include "asm_common.S"
12
11
.text
13
.text
14
12
	.align 4
15
	.align 4
13
.globl _dv_idct_block_mmx 
16
.globl _dv_idct_block_mmx 
17
.hidden _dv_idct_block_mmx
14
	.type	 _dv_idct_block_mmx,@function
18
	.type	 _dv_idct_block_mmx,@function
15
_dv_idct_block_mmx:
19
_dv_idct_block_mmx:
16
	pushl	 %ebp
20
	pushl	 %ebp
17
	movl	 %esp,%ebp
18
	pushl	 %esi
21
	pushl	 %esi
19
	leal	 preSC, %ecx
22
20
	movl	 8(%ebp),%esi		/* source matrix */
23
	LOAD_PIC_REG_BP()
24
25
	leal	 MUNG(preSC), %ecx
26
	movl	 12(%esp),%esi		/* source matrix */
21
27
22
/* 
28
/* 
23
 *	column 0: even part
29
 *	column 0: even part
Lines 35-41 _dv_idct_block_mmx: Link Here
35
	movq %mm1, %mm2			/* added 11/1/96 */
41
	movq %mm1, %mm2			/* added 11/1/96 */
36
	pmulhw 8*8(%esi),%mm5		/* V8 */
42
	pmulhw 8*8(%esi),%mm5		/* V8 */
37
	psubsw %mm0, %mm1		/* V16 */
43
	psubsw %mm0, %mm1		/* V16 */
38
	pmulhw x5a825a825a825a82, %mm1	/* 23170 ->V18 */
44
	pmulhw MUNG(x5a825a825a825a82), %mm1	/* 23170 ->V18 */
39
	paddsw %mm0, %mm2		/* V17 */
45
	paddsw %mm0, %mm2		/* V17 */
40
	movq %mm2, %mm0			/* duplicate V17 */
46
	movq %mm2, %mm0			/* duplicate V17 */
41
	psraw $1, %mm2			/* t75=t82 */
47
	psraw $1, %mm2			/* t75=t82 */
Lines 76-82 _dv_idct_block_mmx: Link Here
76
	paddsw %mm0, %mm3		/* V29 ; free mm0 */
82
	paddsw %mm0, %mm3		/* V29 ; free mm0 */
77
	movq %mm7, %mm1			/* duplicate V26 */
83
	movq %mm7, %mm1			/* duplicate V26 */
78
	psraw $1, %mm3			/* t91=t94 */
84
	psraw $1, %mm3			/* t91=t94 */
79
	pmulhw x539f539f539f539f,%mm7	/* V33 */
85
	pmulhw MUNG(x539f539f539f539f),%mm7	/* V33 */
80
	psraw $1, %mm1			/* t96 */
86
	psraw $1, %mm1			/* t96 */
81
	movq %mm5, %mm0			/* duplicate V2 */
87
	movq %mm5, %mm0			/* duplicate V2 */
82
	psraw $2, %mm4			/* t85=t87 */
88
	psraw $2, %mm4			/* t85=t87 */
Lines 84-98 _dv_idct_block_mmx: Link Here
84
	psubsw %mm4, %mm0		/* V28 ; free mm4 */
90
	psubsw %mm4, %mm0		/* V28 ; free mm4 */
85
	movq %mm0, %mm2			/* duplicate V28 */
91
	movq %mm0, %mm2			/* duplicate V28 */
86
	psraw $1, %mm5			/* t90=t93 */
92
	psraw $1, %mm5			/* t90=t93 */
87
	pmulhw x4546454645464546,%mm0	/* V35 */
93
	pmulhw MUNG(x4546454645464546),%mm0	/* V35 */
88
	psraw $1, %mm2			/* t97 */
94
	psraw $1, %mm2			/* t97 */
89
	movq %mm5, %mm4			/* duplicate t90=t93 */
95
	movq %mm5, %mm4			/* duplicate t90=t93 */
90
	psubsw %mm2, %mm1		/* V32 ; free mm2 */
96
	psubsw %mm2, %mm1		/* V32 ; free mm2 */
91
	pmulhw x61f861f861f861f8,%mm1	/* V36 */
97
	pmulhw MUNG(x61f861f861f861f8),%mm1	/* V36 */
92
	psllw $1, %mm7			/* t107 */
98
	psllw $1, %mm7			/* t107 */
93
	paddsw %mm3, %mm5		/* V31 */
99
	paddsw %mm3, %mm5		/* V31 */
94
	psubsw %mm3, %mm4		/* V30 ; free mm3 */
100
	psubsw %mm3, %mm4		/* V30 ; free mm3 */
95
	pmulhw x5a825a825a825a82,%mm4	/* V34 */
101
	pmulhw MUNG(x5a825a825a825a82),%mm4	/* V34 */
96
	nop
102
	nop
97
	psubsw %mm1, %mm0		/* V38 */
103
	psubsw %mm1, %mm0		/* V38 */
98
	psubsw %mm7, %mm1		/* V37 ; free mm7 */
104
	psubsw %mm7, %mm1		/* V37 ; free mm7 */
Lines 159-165 _dv_idct_block_mmx: Link Here
159
	psubsw %mm7, %mm1		/* V50 */
165
	psubsw %mm7, %mm1		/* V50 */
160
	pmulhw 8*9(%esi), %mm5		/* V9 */
166
	pmulhw 8*9(%esi), %mm5		/* V9 */
161
	paddsw %mm7, %mm2		/* V51 */
167
	paddsw %mm7, %mm2		/* V51 */
162
	pmulhw x5a825a825a825a82, %mm1	/* 23170 ->V52 */
168
	pmulhw MUNG(x5a825a825a825a82), %mm1	/* 23170 ->V52 */
163
	movq %mm2, %mm6			/* duplicate V51 */
169
	movq %mm2, %mm6			/* duplicate V51 */
164
	psraw $1, %mm2			/* t138=t144 */
170
	psraw $1, %mm2			/* t138=t144 */
165
	movq %mm3, %mm4			/* duplicate V1 */
171
	movq %mm3, %mm4			/* duplicate V1 */
Lines 200-210 _dv_idct_block_mmx: Link Here
200
 * even more by doing the correction step in a later stage when the number
206
 * even more by doing the correction step in a later stage when the number
201
 * is actually multiplied by 16
207
 * is actually multiplied by 16
202
 */
208
 */
203
	paddw x0005000200010001, %mm4
209
	paddw MUNG(x0005000200010001), %mm4
204
	psubsw %mm6, %mm3		/* V60 ; free mm6 */
210
	psubsw %mm6, %mm3		/* V60 ; free mm6 */
205
	psraw $1, %mm0			/* t154=t156 */
211
	psraw $1, %mm0			/* t154=t156 */
206
	movq %mm3, %mm1			/* duplicate V60 */
212
	movq %mm3, %mm1			/* duplicate V60 */
207
	pmulhw x539f539f539f539f, %mm1	/* V67 */
213
	pmulhw MUNG(x539f539f539f539f), %mm1	/* V67 */
208
	movq %mm5, %mm6			/* duplicate V3 */
214
	movq %mm5, %mm6			/* duplicate V3 */
209
	psraw $2, %mm4			/* t148=t150 */
215
	psraw $2, %mm4			/* t148=t150 */
210
	paddsw %mm4, %mm5		/* V61 */
216
	paddsw %mm4, %mm5		/* V61 */
Lines 213-225 _dv_idct_block_mmx: Link Here
213
	psllw $1, %mm1			/* t169 */
219
	psllw $1, %mm1			/* t169 */
214
	paddsw %mm0, %mm5		/* V65 -> result */
220
	paddsw %mm0, %mm5		/* V65 -> result */
215
	psubsw %mm0, %mm4		/* V64 ; free mm0 */
221
	psubsw %mm0, %mm4		/* V64 ; free mm0 */
216
	pmulhw x5a825a825a825a82, %mm4	/* V68 */
222
	pmulhw MUNG(x5a825a825a825a82), %mm4	/* V68 */
217
	psraw $1, %mm3			/* t158 */
223
	psraw $1, %mm3			/* t158 */
218
	psubsw %mm6, %mm3		/* V66 */
224
	psubsw %mm6, %mm3		/* V66 */
219
	movq %mm5, %mm2			/* duplicate V65 */
225
	movq %mm5, %mm2			/* duplicate V65 */
220
	pmulhw x61f861f861f861f8, %mm3	/* V70 */
226
	pmulhw MUNG(x61f861f861f861f8), %mm3	/* V70 */
221
	psllw $1, %mm6			/* t165 */
227
	psllw $1, %mm6			/* t165 */
222
	pmulhw x4546454645464546, %mm6	/* V69 */
228
	pmulhw MUNG(x4546454645464546), %mm6	/* V69 */
223
	psraw $1, %mm2			/* t172 */
229
	psraw $1, %mm2			/* t172 */
224
/* moved from next block */
230
/* moved from next block */
225
	movq 8*5(%esi), %mm0		/* V56 */
231
	movq 8*5(%esi), %mm0		/* V56 */
Lines 344-350 _dv_idct_block_mmx: Link Here
344
*	movq 8*13(%esi), %mm4		tmt13
350
*	movq 8*13(%esi), %mm4		tmt13
345
*/
351
*/
346
	psubsw %mm4, %mm3		/* V134 */
352
	psubsw %mm4, %mm3		/* V134 */
347
	pmulhw x5a825a825a825a82, %mm3	/* 23170 ->V136 */
353
	pmulhw MUNG(x5a825a825a825a82), %mm3	/* 23170 ->V136 */
348
	movq 8*9(%esi), %mm6		/* tmt9 */
354
	movq 8*9(%esi), %mm6		/* tmt9 */
349
	paddsw %mm4, %mm5		/* V135 ; mm4 free */
355
	paddsw %mm4, %mm5		/* V135 ; mm4 free */
350
	movq %mm0, %mm4			/* duplicate tmt1 */
356
	movq %mm0, %mm4			/* duplicate tmt1 */
Lines 373-389 _dv_idct_block_mmx: Link Here
373
	psubsw %mm7, %mm0		/* V144 */
379
	psubsw %mm7, %mm0		/* V144 */
374
	movq %mm0, %mm3			/* duplicate V144 */
380
	movq %mm0, %mm3			/* duplicate V144 */
375
	paddsw %mm7, %mm2		/* V147 ; free mm7 */
381
	paddsw %mm7, %mm2		/* V147 ; free mm7 */
376
	pmulhw x539f539f539f539f, %mm0	/* 21407-> V151 */
382
	pmulhw MUNG(x539f539f539f539f), %mm0	/* 21407-> V151 */
377
	movq %mm1, %mm7			/* duplicate tmt3 */
383
	movq %mm1, %mm7			/* duplicate tmt3 */
378
	paddsw %mm5, %mm7		/* V145 */
384
	paddsw %mm5, %mm7		/* V145 */
379
	psubsw %mm5, %mm1		/* V146 ; free mm5 */
385
	psubsw %mm5, %mm1		/* V146 ; free mm5 */
380
	psubsw %mm1, %mm3		/* V150 */
386
	psubsw %mm1, %mm3		/* V150 */
381
	movq %mm7, %mm5			/* duplicate V145 */
387
	movq %mm7, %mm5			/* duplicate V145 */
382
	pmulhw x4546454645464546, %mm1	/* 17734-> V153 */
388
	pmulhw MUNG(x4546454645464546), %mm1	/* 17734-> V153 */
383
	psubsw %mm2, %mm5		/* V148 */
389
	psubsw %mm2, %mm5		/* V148 */
384
	pmulhw x61f861f861f861f8, %mm3	/* 25080-> V154 */
390
	pmulhw MUNG(x61f861f861f861f8), %mm3	/* 25080-> V154 */
385
	psllw $2, %mm0			/* t311 */
391
	psllw $2, %mm0			/* t311 */
386
	pmulhw x5a825a825a825a82, %mm5	/* 23170-> V152 */
392
	pmulhw MUNG(x5a825a825a825a82), %mm5	/* 23170-> V152 */
387
	paddsw %mm2, %mm7		/* V149 ; free mm2 */
393
	paddsw %mm2, %mm7		/* V149 ; free mm2 */
388
	psllw $1, %mm1			/* t313 */
394
	psllw $1, %mm1			/* t313 */
389
	nop	/* without the nop - freeze here for one clock */
395
	nop	/* without the nop - freeze here for one clock */
Lines 409-415 _dv_idct_block_mmx: Link Here
409
	paddsw %mm3, %mm6		/* V164 ; free mm3 */
415
	paddsw %mm3, %mm6		/* V164 ; free mm3 */
410
	movq %mm4, %mm3			/* duplicate V142 */
416
	movq %mm4, %mm3			/* duplicate V142 */
411
	psubsw %mm5, %mm4		/* V165 ; free mm5 */
417
	psubsw %mm5, %mm4		/* V165 ; free mm5 */
412
	movq %mm2, scratch7		/* out7 */
418
	movq %mm2, MUNG(scratch7)		/* out7 */
413
	psraw $4, %mm6
419
	psraw $4, %mm6
414
	psraw $4, %mm4
420
	psraw $4, %mm4
415
	paddsw %mm5, %mm3		/* V162 */
421
	paddsw %mm5, %mm3		/* V162 */
Lines 420-430 _dv_idct_block_mmx: Link Here
420
 */
426
 */
421
	movq %mm6, 8*9(%esi)		/* out9 */
427
	movq %mm6, 8*9(%esi)		/* out9 */
422
	paddsw %mm1, %mm0		/* V161 */
428
	paddsw %mm1, %mm0		/* V161 */
423
	movq %mm3, scratch5		/* out5 */
429
	movq %mm3, MUNG(scratch5)		/* out5 */
424
	psubsw %mm1, %mm5		/* V166 ; free mm1 */
430
	psubsw %mm1, %mm5		/* V166 ; free mm1 */
425
	movq %mm4, 8*11(%esi)		/* out11 */
431
	movq %mm4, 8*11(%esi)		/* out11 */
426
	psraw $4, %mm5
432
	psraw $4, %mm5
427
	movq %mm0, scratch3		/* out3 */
433
	movq %mm0, MUNG(scratch3)		/* out3 */
428
	movq %mm2, %mm4			/* duplicate V140 */
434
	movq %mm2, %mm4			/* duplicate V140 */
429
	movq %mm5, 8*13(%esi)		/* out13 */
435
	movq %mm5, 8*13(%esi)		/* out13 */
430
	paddsw %mm7, %mm2		/* V160 */
436
	paddsw %mm7, %mm2		/* V160 */
Lines 434-440 _dv_idct_block_mmx: Link Here
434
/* moved from the next block */
440
/* moved from the next block */
435
	movq 8*3(%esi), %mm7
441
	movq 8*3(%esi), %mm7
436
	psraw $4, %mm4
442
	psraw $4, %mm4
437
	movq %mm2, scratch1		/* out1 */
443
	movq %mm2, MUNG(scratch1)		/* out1 */
438
/* moved from the next block */
444
/* moved from the next block */
439
	movq %mm0, %mm1
445
	movq %mm0, %mm1
440
	movq %mm4, 8*15(%esi)		/* out15 */
446
	movq %mm4, 8*15(%esi)		/* out15 */
Lines 491-505 _dv_idct_block_mmx: Link Here
491
	paddsw %mm4, %mm3		/* V113 ; free mm4 */
497
	paddsw %mm4, %mm3		/* V113 ; free mm4 */
492
	movq %mm0, %mm4			/* duplicate V110 */
498
	movq %mm0, %mm4			/* duplicate V110 */
493
	paddsw %mm1, %mm2		/* V111 */
499
	paddsw %mm1, %mm2		/* V111 */
494
	pmulhw x539f539f539f539f, %mm0	/* 21407-> V117 */
500
	pmulhw MUNG(x539f539f539f539f), %mm0	/* 21407-> V117 */
495
	psubsw %mm1, %mm5		/* V112 ; free mm1 */
501
	psubsw %mm1, %mm5		/* V112 ; free mm1 */
496
	psubsw %mm5, %mm4		/* V116 */
502
	psubsw %mm5, %mm4		/* V116 */
497
	movq %mm2, %mm1			/* duplicate V111 */
503
	movq %mm2, %mm1			/* duplicate V111 */
498
	pmulhw x4546454645464546, %mm5	/* 17734-> V119 */
504
	pmulhw MUNG(x4546454645464546), %mm5	/* 17734-> V119 */
499
	psubsw %mm3, %mm2		/* V114 */
505
	psubsw %mm3, %mm2		/* V114 */
500
	pmulhw x61f861f861f861f8, %mm4	/* 25080-> V120 */
506
	pmulhw MUNG(x61f861f861f861f8), %mm4	/* 25080-> V120 */
501
	paddsw %mm3, %mm1		/* V115 ; free mm3 */
507
	paddsw %mm3, %mm1		/* V115 ; free mm3 */
502
	pmulhw x5a825a825a825a82, %mm2	/* 23170-> V118 */
508
	pmulhw MUNG(x5a825a825a825a82), %mm2	/* 23170-> V118 */
503
	psllw $2, %mm0			/* t266 */
509
	psllw $2, %mm0			/* t266 */
504
	movq %mm1, (%esi)		/* save V115 */
510
	movq %mm1, (%esi)		/* save V115 */
505
	psllw $1, %mm5			/* t268 */
511
	psllw $1, %mm5			/* t268 */
Lines 517-523 _dv_idct_block_mmx: Link Here
517
	movq %mm6, %mm3			/* duplicate tmt4 */
523
	movq %mm6, %mm3			/* duplicate tmt4 */
518
	psubsw %mm0, %mm6		/* V100 */
524
	psubsw %mm0, %mm6		/* V100 */
519
	paddsw %mm0, %mm3		/* V101 ; free mm0 */
525
	paddsw %mm0, %mm3		/* V101 ; free mm0 */
520
	pmulhw x5a825a825a825a82, %mm6	/* 23170 ->V102 */
526
	pmulhw MUNG(x5a825a825a825a82), %mm6	/* 23170 ->V102 */
521
	movq %mm7, %mm5			/* duplicate tmt0 */
527
	movq %mm7, %mm5			/* duplicate tmt0 */
522
	movq 8*8(%esi), %mm1		/* tmt8 */
528
	movq 8*8(%esi), %mm1		/* tmt8 */
523
	paddsw %mm1, %mm7		/* V103 */
529
	paddsw %mm1, %mm7		/* V103 */
Lines 551-560 _dv_idct_block_mmx: Link Here
551
	movq 8*2(%esi), %mm3		/* V123 */
557
	movq 8*2(%esi), %mm3		/* V123 */
552
	paddsw %mm4, %mm7		/* out0 */
558
	paddsw %mm4, %mm7		/* out0 */
553
/* moved up from next block */
559
/* moved up from next block */
554
	movq scratch3, %mm0
560
	movq MUNG(scratch3), %mm0
555
	psraw $4, %mm7
561
	psraw $4, %mm7
556
/* moved up from next block */
562
/* moved up from next block */
557
	movq scratch5, %mm6 
563
	movq MUNG(scratch5), %mm6 
558
	psubsw %mm4, %mm1		/* out14 ; free mm4 */
564
	psubsw %mm4, %mm1		/* out14 ; free mm4 */
559
	paddsw %mm3, %mm5		/* out2 */
565
	paddsw %mm3, %mm5		/* out2 */
560
	psraw $4, %mm1
566
	psraw $4, %mm1
Lines 565-571 _dv_idct_block_mmx: Link Here
565
	movq %mm5, 8*2(%esi)		/* out2 ; free mm5 */
571
	movq %mm5, 8*2(%esi)		/* out2 ; free mm5 */
566
	psraw $4, %mm2
572
	psraw $4, %mm2
567
/* moved up to the prev block */
573
/* moved up to the prev block */
568
	movq scratch7, %mm4
574
	movq MUNG(scratch7), %mm4
569
/* moved up to the prev block */
575
/* moved up to the prev block */
570
	psraw $4, %mm0
576
	psraw $4, %mm0
571
	movq %mm2, 8*12(%esi)		/* out12 ; free mm2 */
577
	movq %mm2, 8*12(%esi)		/* out12 ; free mm2 */
Lines 579-585 _dv_idct_block_mmx: Link Here
579
 *	psraw $4, %mm0
585
 *	psraw $4, %mm0
580
 *	psraw $4, %mm6
586
 *	psraw $4, %mm6
581
*/
587
*/
582
	movq scratch1, %mm1
588
	movq MUNG(scratch1), %mm1
583
	psraw $4, %mm4
589
	psraw $4, %mm4
584
	movq %mm0, 8*3(%esi)		/* out3 */
590
	movq %mm0, 8*3(%esi)		/* out3 */
585
	psraw $4, %mm1
591
	psraw $4, %mm1
(-)libdv-0.104-old/libdv/idct_block_mmx_x86_64.S (+1 lines)
Lines 17-22 Link Here
17
.text
17
.text
18
	.align 4
18
	.align 4
19
.globl _dv_idct_block_mmx_x86_64
19
.globl _dv_idct_block_mmx_x86_64
20
.hidden _dv_idct_block_mmx_x86_64
20
	.type	 _dv_idct_block_mmx_x86_64,@function
21
	.type	 _dv_idct_block_mmx_x86_64,@function
21
_dv_idct_block_mmx_x86_64:
22
_dv_idct_block_mmx_x86_64:
22
/* void _dv_idct_88(dv_coeff_t *block) */
23
/* void _dv_idct_88(dv_coeff_t *block) */
(-)libdv-0.104-old/libdv/parse.c (+7 lines)
Lines 477-482 dv_parse_ac_coeffs(dv_videosegment_t *se Link Here
477
  exit(0);
477
  exit(0);
478
#endif
478
#endif
479
} /* dv_parse_ac_coeffs */
479
} /* dv_parse_ac_coeffs */
480
#if defined __GNUC__ && __ELF__
481
# define dv_strong_hidden_alias(name, aliasname) \
482
    extern __typeof (name) aliasname __attribute__ ((alias (#name), visibility ("hidden")))
483
dv_strong_hidden_alias(dv_parse_ac_coeffs, asm_dv_parse_ac_coeffs);
484
#else
485
int asm_dv_parse_ac_coeffs(dv_videosegment_t *seg) { return dv_parse_ac_coeffs(seg); }
486
#endif
480
487
481
/* ---------------------------------------------------------------------------
488
/* ---------------------------------------------------------------------------
482
 */
489
 */
(-)libdv-0.104-old/libdv/quant.c (-2 / +2 lines)
Lines 144-150 uint8_t dv_quant_offset[4] = { 6,3,0,1 Link Here
144
uint32_t	dv_quant_248_mul_tab [2] [22] [64];
144
uint32_t	dv_quant_248_mul_tab [2] [22] [64];
145
uint32_t dv_quant_88_mul_tab [2] [22] [64];
145
uint32_t dv_quant_88_mul_tab [2] [22] [64];
146
146
147
extern void             _dv_quant_x86(dv_coeff_t *block,int qno,int klass);
147
extern void             _dv_quant_x86(dv_coeff_t *block,int qno,int klass,uint8_t *dv_quant_offset,uint8_t *dv_quant_shifts);
148
extern void             _dv_quant_x86_64(dv_coeff_t *block,int qno,int klass);
148
extern void             _dv_quant_x86_64(dv_coeff_t *block,int qno,int klass);
149
static void quant_248_inverse_std(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co);
149
static void quant_248_inverse_std(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co);
150
static void quant_248_inverse_mmx(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co);
150
static void quant_248_inverse_mmx(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co);
Lines 210-216 void _dv_quant(dv_coeff_t *block,int qno Link Here
210
		_dv_quant_x86_64(block, qno, klass);
210
		_dv_quant_x86_64(block, qno, klass);
211
		emms();
211
		emms();
212
#else
212
#else
213
		_dv_quant_x86(block, qno, klass);
213
		_dv_quant_x86(block, qno, klass, dv_quant_offset, dv_quant_shifts);
214
		emms();
214
		emms();
215
#endif
215
#endif
216
	}
216
	}
(-)libdv-0.104-old/libdv/quant.h (-1 / +1 lines)
Lines 27-33 extern void _dv_quant(dv_coeff_t *block, Link Here
27
extern void _dv_quant_88_inverse(dv_coeff_t *block,int qno,int klass);
27
extern void _dv_quant_88_inverse(dv_coeff_t *block,int qno,int klass);
28
extern void (*_dv_quant_248_inverse) (dv_coeff_t *block,int qno,int klass,
28
extern void (*_dv_quant_248_inverse) (dv_coeff_t *block,int qno,int klass,
29
                                  dv_248_coeff_t *co);
29
                                  dv_248_coeff_t *co);
30
extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass);
30
extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass, uint8_t *offset, uint8_t *shifts);
31
extern void _dv_quant_88_inverse_x86_64(dv_coeff_t *block,int qno,int klass);
31
extern void _dv_quant_88_inverse_x86_64(dv_coeff_t *block,int qno,int klass);
32
extern void dv_quant_init (void);
32
extern void dv_quant_init (void);
33
#ifdef __cplusplus
33
#ifdef __cplusplus
(-)libdv-0.104-old/libdv/quant_x86.S (-5 / +14 lines)
Lines 55-60 void _dv_quant_88_inverse(dv_coeff_t *bl Link Here
55
.text
55
.text
56
	.align	4
56
	.align	4
57
.globl	_dv_quant_88_inverse_x86
57
.globl	_dv_quant_88_inverse_x86
58
.hidden	_dv_quant_88_inverse_x86
59
.type	_dv_quant_88_inverse_x86,@function
58
_dv_quant_88_inverse_x86:	
60
_dv_quant_88_inverse_x86:	
59
	pushl	%ebx
61
	pushl	%ebx
60
	pushl	%esi
62
	pushl	%esi
Lines 71-80 _dv_quant_88_inverse_x86: Link Here
71
	
73
	
72
	/*  pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */
74
	/*  pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */
73
	movl	ARGn(1),%eax	/* qno */
75
	movl	ARGn(1),%eax	/* qno */
76
	movl	ARGn(3),%ebx	/* dv_quant_offset */
77
	addl	ARGn(2),%ebx	/* class */
78
	movzbl	(%ebx),%ecx
74
	movl	ARGn(2),%ebx	/* class */
79
	movl	ARGn(2),%ebx	/* class */
75
	movzbl	dv_quant_offset(%ebx),%ecx
76
	addl	%ecx,%eax
80
	addl	%ecx,%eax
77
	leal	dv_quant_shifts(,%eax,4),%edx	/* edx is pq */
81
	movl	ARGn(4),%edx	/* dv_quant_shifts */
82
	leal	(%edx,%eax,4),%edx	/* edx is pq */
78
83
79
	/* extra = (class == 3); */
84
	/* extra = (class == 3); */
80
				/*  0   1   2   3 */
85
				/*  0   1   2   3 */
Lines 193-198 _dv_quant_88_inverse_x86: Link Here
193
198
194
	.align	4
199
	.align	4
195
.globl	_dv_quant_x86
200
.globl	_dv_quant_x86
201
.hidden	_dv_quant_x86
202
.type	_dv_quant_x86,@function
196
_dv_quant_x86:	
203
_dv_quant_x86:	
197
	pushl	%ebx
204
	pushl	%ebx
198
	pushl	%ecx
205
	pushl	%ecx
Lines 212-222 _dv_quant_x86: Link Here
212
	
219
	
213
	/*  pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */
220
	/*  pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */
214
	movl	ARGn(1),%eax	/* qno */
221
	movl	ARGn(1),%eax	/* qno */
222
	movl	ARGn(3),%ebx	/* offset */
223
	addl	ARGn(2),%ebx	/* class */
224
	movzbl	(%ebx),%ecx
215
	movl	ARGn(2),%ebx	/* class */
225
	movl	ARGn(2),%ebx	/* class */
216
226
	movl	ARGn(4),%edx	/* shifts */
217
	movzbl	dv_quant_offset(%ebx),%ecx
218
	addl	%ecx,%eax
227
	addl	%ecx,%eax
219
	leal	dv_quant_shifts(,%eax,4),%edx	/* edx is pq */
228
	leal	(%edx,%eax,4),%edx	/* edx is pq */
220
229
221
	/* extra = (class == 3); */
230
	/* extra = (class == 3); */
222
				/*  0   1   2   3 */
231
				/*  0   1   2   3 */
(-)libdv-0.104-old/libdv/quant_x86_64.S (+4 lines)
Lines 55-60 void _dv_quant_88_inverse(dv_coeff_t *bl Link Here
55
.text
55
.text
56
	.align	4
56
	.align	4
57
.globl	_dv_quant_88_inverse_x86_64
57
.globl	_dv_quant_88_inverse_x86_64
58
.hidden	_dv_quant_88_inverse_x86_64
59
.type	_dv_quant_88_inverse_x86_64,@function
58
_dv_quant_88_inverse_x86_64:
60
_dv_quant_88_inverse_x86_64:
59
	
61
	
60
	/* Args are at block=rdi, qno=rsi, class=rdx */
62
	/* Args are at block=rdi, qno=rsi, class=rdx */
Lines 195-200 _dv_quant_88_inverse_x86_64: Link Here
195
197
196
	.align	4
198
	.align	4
197
.globl	_dv_quant_x86_64
199
.globl	_dv_quant_x86_64
200
.hidden	_dv_quant_x86_64
201
.type	_dv_quant_x86_64,@function
198
_dv_quant_x86_64:
202
_dv_quant_x86_64:
199
	
203
	
200
	/* Args are at block=rdi, qno=rsi, class=rdx */
204
	/* Args are at block=rdi, qno=rsi, class=rdx */
(-)libdv-0.104-old/libdv/rgbtoyuv.S (-112 / +137 lines)
Lines 41-49 Link Here
41
#define DV_WIDTH_SHORT_HALF 720
41
#define DV_WIDTH_SHORT_HALF 720
42
#define DV_WIDTH_BYTE_HALF  360	
42
#define DV_WIDTH_BYTE_HALF  360	
43
		
43
		
44
.global _dv_rgbtoycb_mmx
45
# .global yuvtoycb_mmx
46
47
.data
44
.data
48
45
49
.align 8
46
.align 8
Lines 110-129 VR0GR: .long 0,0 Link Here
110
VBG0B:  .long   0,0
107
VBG0B:  .long   0,0
111
	
108
	
112
#endif	
109
#endif	
113
	
110
111
#include "asm_common.S"
112
114
.text
113
.text
115
114
116
#define _inPtr     8
115
#define _inPtr     24+8
117
#define _rows      12
116
#define _rows      24+12
118
#define _columns   16
117
#define _columns   24+16
119
#define _outyPtr   20
118
#define _outyPtr   24+20
120
#define _outuPtr   24
119
#define _outuPtr   24+24
121
#define _outvPtr   28
120
#define _outvPtr   24+28
122
121
122
.global _dv_rgbtoycb_mmx
123
.hidden _dv_rgbtoycb_mmx
124
.type _dv_rgbtoycb_mmx,@function
123
_dv_rgbtoycb_mmx:
125
_dv_rgbtoycb_mmx:
124
126
125
	pushl   %ebp
127
	pushl   %ebp
126
	movl    %esp, %ebp
127
	pushl   %eax
128
	pushl   %eax
128
	pushl   %ebx
129
	pushl   %ebx
129
	pushl   %ecx
130
	pushl   %ecx
Lines 131-176 _dv_rgbtoycb_mmx: Link Here
131
	pushl   %esi
132
	pushl   %esi
132
	pushl   %edi
133
	pushl   %edi
133
134
134
	leal    ZEROSX, %eax    #This section gets around a bug
135
	LOAD_PIC_REG_BP()
136
137
	leal    MUNG(ZEROSX), %eax    #This section gets around a bug
135
	movq    (%eax), %mm0    #unlikely to persist
138
	movq    (%eax), %mm0    #unlikely to persist
136
	movq    %mm0, ZEROS
139
	movq    %mm0, MUNG(ZEROS)
137
	leal    OFFSETDX, %eax
140
	leal    MUNG(OFFSETDX), %eax
138
	movq    (%eax), %mm0
141
	movq    (%eax), %mm0
139
	movq    %mm0, OFFSETD
142
	movq    %mm0, MUNG(OFFSETD)
140
	leal    OFFSETWX, %eax
143
	leal    MUNG(OFFSETWX), %eax
141
	movq    (%eax), %mm0
144
	movq    (%eax), %mm0
142
	movq    %mm0, OFFSETW
145
	movq    %mm0, MUNG(OFFSETW)
143
	leal    OFFSETBX, %eax
146
	leal    MUNG(OFFSETBX), %eax
144
	movq    (%eax), %mm0
147
	movq    (%eax), %mm0
145
	movq    %mm0, OFFSETB
148
	movq    %mm0, MUNG(OFFSETB)
146
	leal    YR0GRX, %eax
149
	leal    MUNG(YR0GRX), %eax
147
	movq    (%eax), %mm0
150
	movq    (%eax), %mm0
148
	movq    %mm0, YR0GR
151
	movq    %mm0, MUNG(YR0GR)
149
	leal    YBG0BX, %eax
152
	leal    MUNG(YBG0BX), %eax
150
	movq    (%eax), %mm0
153
	movq    (%eax), %mm0
151
	movq    %mm0, YBG0B
154
	movq    %mm0, MUNG(YBG0B)
152
	leal    UR0GRX, %eax
155
	leal    MUNG(UR0GRX), %eax
153
	movq    (%eax), %mm0
156
	movq    (%eax), %mm0
154
	movq    %mm0, UR0GR
157
	movq    %mm0, MUNG(UR0GR)
155
	leal    UBG0BX, %eax
158
	leal    MUNG(UBG0BX), %eax
156
	movq    (%eax), %mm0
159
	movq    (%eax), %mm0
157
	movq    %mm0, UBG0B
160
	movq    %mm0, MUNG(UBG0B)
158
	leal    VR0GRX, %eax
161
	leal    MUNG(VR0GRX), %eax
159
	movq    (%eax), %mm0
162
	movq    (%eax), %mm0
160
	movq    %mm0, VR0GR
163
	movq    %mm0, MUNG(VR0GR)
161
	leal    VBG0BX, %eax
164
	leal    MUNG(VBG0BX), %eax
162
	movq    (%eax), %mm0
165
	movq    (%eax), %mm0
163
	movq    %mm0, VBG0B
166
	movq    %mm0, MUNG(VBG0B)
164
	
167
	movl    _rows(%esp), %eax
165
	movl    _rows(%ebp), %eax
168
	movl    _columns(%esp), %ebx
166
	movl    _columns(%ebp), %ebx
167
	mull    %ebx            #number pixels
169
	mull    %ebx            #number pixels
168
	shrl    $3, %eax        #number of loops
170
	shrl    $3, %eax        #number of loops
169
	movl    %eax, %edi      #loop counter in edi
171
	movl    %eax, %edi      #loop counter in edi
170
	movl    _inPtr(%ebp), %eax
172
	movl    _inPtr(%esp), %eax
171
	movl    _outyPtr(%ebp), %ebx
173
	movl    _outyPtr(%esp), %ebx
172
	movl    _outuPtr(%ebp), %ecx
174
	movl    _outuPtr(%esp), %ecx
173
	movl    _outvPtr(%ebp), %edx
175
	movl    _outvPtr(%esp), %edx
174
rgbtoycb_mmx_loop: 
176
rgbtoycb_mmx_loop: 
175
	movq    (%eax), %mm1    #load G2R2B1G1R1B0G0R0
177
	movq    (%eax), %mm1    #load G2R2B1G1R1B0G0R0
176
	pxor    %mm6, %mm6      #0 -> mm6
178
	pxor    %mm6, %mm6      #0 -> mm6
Lines 184-212 rgbtoycb_mmx_loop: Link Here
184
	punpcklbw %mm6, %mm1     #B1G1R1B0 -> mm1
186
	punpcklbw %mm6, %mm1     #B1G1R1B0 -> mm1
185
	movq    %mm0, %mm2      #R1B0G0R0 -> mm2
187
	movq    %mm0, %mm2      #R1B0G0R0 -> mm2
186
188
187
	pmaddwd YR0GR, %mm0     #yrR1,ygG0+yrR0 -> mm0
189
	pmaddwd MUNG(YR0GR), %mm0     #yrR1,ygG0+yrR0 -> mm0
188
	movq    %mm1, %mm3      #B1G1R1B0 -> mm3
190
	movq    %mm1, %mm3      #B1G1R1B0 -> mm3
189
191
190
	pmaddwd YBG0B, %mm1     #ybB1+ygG1,ybB0 -> mm1
192
	pmaddwd MUNG(YBG0B), %mm1     #ybB1+ygG1,ybB0 -> mm1
191
	movq    %mm2, %mm4      #R1B0G0R0 -> mm4
193
	movq    %mm2, %mm4      #R1B0G0R0 -> mm4
192
194
193
	pmaddwd UR0GR, %mm2     #urR1,ugG0+urR0 -> mm2
195
	pmaddwd MUNG(UR0GR), %mm2     #urR1,ugG0+urR0 -> mm2
194
	movq    %mm3, %mm5      #B1G1R1B0 -> mm5
196
	movq    %mm3, %mm5      #B1G1R1B0 -> mm5
195
197
196
	pmaddwd UBG0B, %mm3     #ubB1+ugG1,ubB0 -> mm3
198
	pmaddwd MUNG(UBG0B), %mm3     #ubB1+ugG1,ubB0 -> mm3
197
	punpckhbw       %mm6, %mm7 #    00G2R2 -> mm7
199
	punpckhbw       %mm6, %mm7 #    00G2R2 -> mm7
198
200
199
	pmaddwd VR0GR, %mm4     #vrR1,vgG0+vrR0 -> mm4
201
	pmaddwd MUNG(VR0GR), %mm4     #vrR1,vgG0+vrR0 -> mm4
200
	paddd   %mm1, %mm0      #Y1Y0 -> mm0
202
	paddd   %mm1, %mm0      #Y1Y0 -> mm0
201
203
202
	pmaddwd VBG0B, %mm5     #vbB1+vgG1,vbB0 -> mm5
204
	pmaddwd MUNG(VBG0B), %mm5     #vbB1+vgG1,vbB0 -> mm5
203
205
204
	movq    8(%eax), %mm1   #R5B4G4R4B3G3R3B2 -> mm1
206
	movq    8(%eax), %mm1   #R5B4G4R4B3G3R3B2 -> mm1
205
	paddd   %mm3, %mm2      #U1U0 -> mm2
207
	paddd   %mm3, %mm2      #U1U0 -> mm2
206
208
207
	movq    %mm1, %mm6      #R5B4G4R4B3G3R3B2 -> mm6
209
	movq    %mm1, %mm6      #R5B4G4R4B3G3R3B2 -> mm6
208
210
209
	punpcklbw       ZEROS, %mm1     #B3G3R3B2 -> mm1
211
	punpcklbw       MUNG(ZEROS), %mm1     #B3G3R3B2 -> mm1
210
	paddd   %mm5, %mm4      #V1V0 -> mm4
212
	paddd   %mm5, %mm4      #V1V0 -> mm4
211
213
212
	movq    %mm1, %mm5      #B3G3R3B2 -> mm5
214
	movq    %mm1, %mm5      #B3G3R3B2 -> mm5
Lines 214-242 rgbtoycb_mmx_loop: Link Here
214
216
215
	paddd   %mm7, %mm1      #R3B200+00G2R2=R3B2G2R2->mm1
217
	paddd   %mm7, %mm1      #R3B200+00G2R2=R3B2G2R2->mm1
216
218
217
	punpckhbw       ZEROS, %mm6     #R5B4G4R3 -> mm6
219
	punpckhbw       MUNG(ZEROS), %mm6     #R5B4G4R3 -> mm6
218
	movq    %mm1, %mm3      #R3B2G2R2 -> mm3
220
	movq    %mm1, %mm3      #R3B2G2R2 -> mm3
219
221
220
	pmaddwd YR0GR, %mm1     #yrR3,ygG2+yrR2 -> mm1
222
	pmaddwd MUNG(YR0GR), %mm1     #yrR3,ygG2+yrR2 -> mm1
221
	movq    %mm5, %mm7      #B3G3R3B2 -> mm7
223
	movq    %mm5, %mm7      #B3G3R3B2 -> mm7
222
224
223
	pmaddwd YBG0B, %mm5     #ybB3+ygG3,ybB2 -> mm5
225
	pmaddwd MUNG(YBG0B), %mm5     #ybB3+ygG3,ybB2 -> mm5
224
	psrad   $FIXPSHIFT, %mm0       #32-bit scaled Y1Y0 -> mm0
226
	psrad   $FIXPSHIFT, %mm0       #32-bit scaled Y1Y0 -> mm0
225
227
226
	movq    %mm6, TEMP0     #R5B4G4R4 -> TEMP0
228
	movq    %mm6, MUNG(TEMP0)     #R5B4G4R4 -> TEMP0
227
	movq    %mm3, %mm6      #R3B2G2R2 -> mm6
229
	movq    %mm3, %mm6      #R3B2G2R2 -> mm6
228
	pmaddwd UR0GR, %mm6     #urR3,ugG2+urR2 -> mm6
230
	pmaddwd MUNG(UR0GR), %mm6     #urR3,ugG2+urR2 -> mm6
229
	psrad   $FIXPSHIFT, %mm2       #32-bit scaled U1U0 -> mm2
231
	psrad   $FIXPSHIFT, %mm2       #32-bit scaled U1U0 -> mm2
230
232
231
	paddd   %mm5, %mm1      #Y3Y2 -> mm1
233
	paddd   %mm5, %mm1      #Y3Y2 -> mm1
232
	movq    %mm7, %mm5      #B3G3R3B2 -> mm5
234
	movq    %mm7, %mm5      #B3G3R3B2 -> mm5
233
	pmaddwd UBG0B, %mm7     #ubB3+ugG3,ubB2
235
	pmaddwd MUNG(UBG0B), %mm7     #ubB3+ugG3,ubB2
234
	psrad   $FIXPSHIFT, %mm1 #32-bit scaled Y3Y2 -> mm1
236
	psrad   $FIXPSHIFT, %mm1 #32-bit scaled Y3Y2 -> mm1
235
237
236
	pmaddwd VR0GR, %mm3     #vrR3,vgG2+vgR2
238
	pmaddwd MUNG(VR0GR), %mm3     #vrR3,vgG2+vgR2
237
	packssdw        %mm1, %mm0      #Y3Y2Y1Y0 -> mm0
239
	packssdw        %mm1, %mm0      #Y3Y2Y1Y0 -> mm0
238
240
239
	pmaddwd VBG0B, %mm5     #vbB3+vgG3,vbB2 -> mm5
241
	pmaddwd MUNG(VBG0B), %mm5     #vbB3+vgG3,vbB2 -> mm5
240
	psrad   $FIXPSHIFT, %mm4       #32-bit scaled V1V0 -> mm4
242
	psrad   $FIXPSHIFT, %mm4       #32-bit scaled V1V0 -> mm4
241
243
242
	movq    16(%eax), %mm1  #B7G7R7B6G6R6B5G5 -> mm7
244
	movq    16(%eax), %mm1  #B7G7R7B6G6R6B5G5 -> mm7
Lines 251-308 rgbtoycb_mmx_loop: Link Here
251
	movq    %mm7, %mm5      #R7B6G6R6B5G500 -> mm5
253
	movq    %mm7, %mm5      #R7B6G6R6B5G500 -> mm5
252
	psrad   $FIXPSHIFT, %mm3       #32-bit scaled V3V2 -> mm3
254
	psrad   $FIXPSHIFT, %mm3       #32-bit scaled V3V2 -> mm3
253
255
254
	paddw	OFFSETY, %mm0
256
	paddw	MUNG(OFFSETY), %mm0
255
	movq    %mm0, (%ebx)     #store Y3Y2Y1Y0 
257
	movq    %mm0, (%ebx)     #store Y3Y2Y1Y0 
256
	packssdw %mm6, %mm2      #32-bit scaled U3U2U1U0 -> mm2
258
	packssdw %mm6, %mm2      #32-bit scaled U3U2U1U0 -> mm2
257
259
258
	movq    TEMP0, %mm0     #R5B4G4R4 -> mm0
260
	movq    MUNG(TEMP0), %mm0     #R5B4G4R4 -> mm0
259
	addl	$8, %ebx
261
	addl	$8, %ebx
260
	
262
261
	punpcklbw       ZEROS, %mm7     #B5G500 -> mm7
263
	punpcklbw       MUNG(ZEROS), %mm7     #B5G500 -> mm7
262
	movq    %mm0, %mm6      #R5B4G4R4 -> mm6
264
	movq    %mm0, %mm6      #R5B4G4R4 -> mm6
263
265
264
	movq    %mm2, TEMPU     #32-bit scaled U3U2U1U0 -> TEMPU
266
	movq    %mm2, MUNG(TEMPU)     #32-bit scaled U3U2U1U0 -> TEMPU
265
	psrlq   $32, %mm0       #00R5B4 -> mm0
267
	psrlq   $32, %mm0       #00R5B4 -> mm0
266
268
267
	paddw   %mm0, %mm7      #B5G5R5B4 -> mm7
269
	paddw   %mm0, %mm7      #B5G5R5B4 -> mm7
268
	movq    %mm6, %mm2      #B5B4G4R4 -> mm2
270
	movq    %mm6, %mm2      #B5B4G4R4 -> mm2
269
271
270
	pmaddwd YR0GR, %mm2     #yrR5,ygG4+yrR4 -> mm2
272
	pmaddwd MUNG(YR0GR), %mm2     #yrR5,ygG4+yrR4 -> mm2
271
	movq    %mm7, %mm0      #B5G5R5B4 -> mm0
273
	movq    %mm7, %mm0      #B5G5R5B4 -> mm0
272
274
273
	pmaddwd YBG0B, %mm7     #ybB5+ygG5,ybB4 -> mm7
275
	pmaddwd MUNG(YBG0B), %mm7     #ybB5+ygG5,ybB4 -> mm7
274
	packssdw        %mm3, %mm4      #32-bit scaled V3V2V1V0 -> mm4
276
	packssdw        %mm3, %mm4      #32-bit scaled V3V2V1V0 -> mm4
275
277
276
	addl    $24, %eax       #increment RGB count
278
	addl    $24, %eax       #increment RGB count
277
279
278
	movq    %mm4, TEMPV     #(V3V2V1V0)/256 -> mm4
280
	movq    %mm4, MUNG(TEMPV)     #(V3V2V1V0)/256 -> mm4
279
	movq    %mm6, %mm4      #B5B4G4R4 -> mm4
281
	movq    %mm6, %mm4      #B5B4G4R4 -> mm4
280
282
281
	pmaddwd UR0GR, %mm6     #urR5,ugG4+urR4
283
	pmaddwd MUNG(UR0GR), %mm6     #urR5,ugG4+urR4
282
	movq    %mm0, %mm3      #B5G5R5B4 -> mm0
284
	movq    %mm0, %mm3      #B5G5R5B4 -> mm0
283
285
284
	pmaddwd UBG0B, %mm0     #ubB5+ugG5,ubB4
286
	pmaddwd MUNG(UBG0B), %mm0     #ubB5+ugG5,ubB4
285
	paddd   %mm7, %mm2      #Y5Y4 -> mm2
287
	paddd   %mm7, %mm2      #Y5Y4 -> mm2
286
288
287
	pmaddwd         VR0GR, %mm4     #vrR5,vgG4+vrR4 -> mm4
289
	pmaddwd         MUNG(VR0GR), %mm4     #vrR5,vgG4+vrR4 -> mm4
288
	pxor    %mm7, %mm7      #0 -> mm7
290
	pxor    %mm7, %mm7      #0 -> mm7
289
291
290
	pmaddwd VBG0B, %mm3     #vbB5+vgG5,vbB4 -> mm3
292
	pmaddwd MUNG(VBG0B), %mm3     #vbB5+vgG5,vbB4 -> mm3
291
	punpckhbw       %mm7, %mm1      #B7G7R7B6 -> mm1
293
	punpckhbw       %mm7, %mm1      #B7G7R7B6 -> mm1
292
294
293
	paddd   %mm6, %mm0      #U5U4 -> mm0
295
	paddd   %mm6, %mm0      #U5U4 -> mm0
294
	movq    %mm1, %mm6      #B7G7R7B6 -> mm6
296
	movq    %mm1, %mm6      #B7G7R7B6 -> mm6
295
297
296
	pmaddwd YBG0B, %mm6     #ybB7+ygG7,ybB6 -> mm6
298
	pmaddwd MUNG(YBG0B), %mm6     #ybB7+ygG7,ybB6 -> mm6
297
	punpckhbw       %mm7, %mm5      #R7B6G6R6 -> mm5
299
	punpckhbw       %mm7, %mm5      #R7B6G6R6 -> mm5
298
300
299
	movq    %mm5, %mm7      #R7B6G6R6 -> mm7
301
	movq    %mm5, %mm7      #R7B6G6R6 -> mm7
300
	paddd   %mm4, %mm3      #V5V4 -> mm3
302
	paddd   %mm4, %mm3      #V5V4 -> mm3
301
303
302
	pmaddwd YR0GR, %mm5     #yrR7,ygG6+yrR6 -> mm5
304
	pmaddwd MUNG(YR0GR), %mm5     #yrR7,ygG6+yrR6 -> mm5
303
	movq    %mm1, %mm4      #B7G7R7B6 -> mm4
305
	movq    %mm1, %mm4      #B7G7R7B6 -> mm4
304
306
305
	pmaddwd UBG0B, %mm4     #ubB7+ugG7,ubB6 -> mm4
307
	pmaddwd MUNG(UBG0B), %mm4     #ubB7+ugG7,ubB6 -> mm4
306
	psrad   $FIXPSHIFT, %mm0       #32-bit scaled U5U4 -> mm0
308
	psrad   $FIXPSHIFT, %mm0       #32-bit scaled U5U4 -> mm0
307
309
308
	psrad   $FIXPSHIFT, %mm2       #32-bit scaled Y5Y4 -> mm2
310
	psrad   $FIXPSHIFT, %mm2       #32-bit scaled Y5Y4 -> mm2
Lines 310-334 rgbtoycb_mmx_loop: Link Here
310
	paddd   %mm5, %mm6      #Y7Y6 -> mm6
312
	paddd   %mm5, %mm6      #Y7Y6 -> mm6
311
	movq    %mm7, %mm5      #R7B6G6R6 -> mm5
313
	movq    %mm7, %mm5      #R7B6G6R6 -> mm5
312
314
313
	pmaddwd UR0GR, %mm7     #urR7,ugG6+ugR6 -> mm7
315
	pmaddwd MUNG(UR0GR), %mm7     #urR7,ugG6+ugR6 -> mm7
314
	psrad   $FIXPSHIFT, %mm3       #32-bit scaled V5V4 -> mm3
316
	psrad   $FIXPSHIFT, %mm3       #32-bit scaled V5V4 -> mm3
315
317
316
	pmaddwd VBG0B, %mm1     #vbB7+vgG7,vbB6 -> mm1
318
	pmaddwd MUNG(VBG0B), %mm1     #vbB7+vgG7,vbB6 -> mm1
317
	psrad   $FIXPSHIFT, %mm6 #32-bit scaled Y7Y6 -> mm6
319
	psrad   $FIXPSHIFT, %mm6 #32-bit scaled Y7Y6 -> mm6
318
320
319
	packssdw %mm6, %mm2     #Y7Y6Y5Y4 -> mm2
321
	packssdw %mm6, %mm2     #Y7Y6Y5Y4 -> mm2
320
322
321
	pmaddwd VR0GR, %mm5     #vrR7,vgG6+vrR6 -> mm5
323
	pmaddwd MUNG(VR0GR), %mm5     #vrR7,vgG6+vrR6 -> mm5
322
	paddd   %mm4, %mm7      #U7U6 -> mm7    
324
	paddd   %mm4, %mm7      #U7U6 -> mm7    
323
325
324
	psrad   $FIXPSHIFT, %mm7       #32-bit scaled U7U6 -> mm7
326
	psrad   $FIXPSHIFT, %mm7       #32-bit scaled U7U6 -> mm7
325
	paddw	OFFSETY, %mm2
327
	paddw	MUNG(OFFSETY), %mm2
326
	movq	%mm2, (%ebx)    #store Y7Y6Y5Y4 
328
	movq	%mm2, (%ebx)    #store Y7Y6Y5Y4 
327
329
328
	movq	ALLONE, %mm6
330
	movq	MUNG(ALLONE), %mm6
329
	packssdw %mm7, %mm0     #32-bit scaled U7U6U5U4 -> mm0
331
	packssdw %mm7, %mm0     #32-bit scaled U7U6U5U4 -> mm0
330
332
331
	movq    TEMPU, %mm4     #32-bit scaled U3U2U1U0 -> mm4
333
	movq    MUNG(TEMPU), %mm4     #32-bit scaled U3U2U1U0 -> mm4
332
	pmaddwd	%mm6, %mm0      #U7U6U5U4 averaged -> (U7U6)(U5U4)=UU3 UU2->mm0
334
	pmaddwd	%mm6, %mm0      #U7U6U5U4 averaged -> (U7U6)(U5U4)=UU3 UU2->mm0
333
	
335
	
334
	pmaddwd	%mm6, %mm4      #U3U2U1U0 averaged -> (U3U2)(U1U0)=UU1 UU0->mm4
336
	pmaddwd	%mm6, %mm4      #U3U2U1U0 averaged -> (U3U2)(U1U0)=UU1 UU0->mm4
Lines 338-345 rgbtoycb_mmx_loop: Link Here
338
340
339
	psrad   $FIXPSHIFT, %mm1       #32-bit scaled V7V6 -> mm1
341
	psrad   $FIXPSHIFT, %mm1       #32-bit scaled V7V6 -> mm1
340
	psraw	$1, %mm4 	#divide UU3 UU2 UU1 UU0 by 2 -> mm4
342
	psraw	$1, %mm4 	#divide UU3 UU2 UU1 UU0 by 2 -> mm4
341
		
343
342
	movq    TEMPV, %mm5     #32-bit scaled V3V2V1V0 -> mm5
344
	movq    MUNG(TEMPV), %mm5     #32-bit scaled V3V2V1V0 -> mm5
343
345
344
	movq	%mm4, (%ecx)    # store U	
346
	movq	%mm4, (%ecx)    # store U	
345
347
Lines 372-377 rgbtoycb_mmx_loop: Link Here
372
	ret     
374
	ret     
373
375
374
.global _dv_ppm_copy_y_block_mmx
376
.global _dv_ppm_copy_y_block_mmx
377
.hidden _dv_ppm_copy_y_block_mmx
378
.type	_dv_ppm_copy_y_block_mmx,@function
375
_dv_ppm_copy_y_block_mmx:
379
_dv_ppm_copy_y_block_mmx:
376
380
377
	pushl   %ebp
381
	pushl   %ebp
Lines 422-438 _dv_ppm_copy_y_block_mmx: Link Here
422
	ret
426
	ret
423
427
424
.global _dv_pgm_copy_y_block_mmx
428
.global _dv_pgm_copy_y_block_mmx
429
.hidden _dv_pgm_copy_y_block_mmx
430
.type	_dv_ppm_copy_y_block_mmx,@function
425
_dv_pgm_copy_y_block_mmx:
431
_dv_pgm_copy_y_block_mmx:
426
432
427
	pushl   %ebp
433
	pushl   %ebp
428
	movl    %esp, %ebp
429
	pushl   %esi
434
	pushl   %esi
430
	pushl	%edi
435
	pushl	%edi
431
	
432
	movl    8(%ebp), %edi          # dest
433
	movl    12(%ebp), %esi         # src
434
436
435
	movq	OFFSETY, %mm7
437
	LOAD_PIC_REG_BP()
438
439
	movl    16(%esp), %edi          # dest
440
	movl    20(%esp), %esi         # src
441
442
	movq	MUNG(OFFSETY), %mm7
436
	pxor	%mm6, %mm6
443
	pxor	%mm6, %mm6
437
	
444
	
438
	movq	(%esi), %mm0
445
	movq	(%esi), %mm0
Lines 564-580 _dv_pgm_copy_y_block_mmx: Link Here
564
	ret
571
	ret
565
572
566
.global _dv_video_copy_y_block_mmx
573
.global _dv_video_copy_y_block_mmx
574
.hidden _dv_video_copy_y_block_mmx
575
.type	_dv_video_copy_y_block_mmx,@function
567
_dv_video_copy_y_block_mmx:
576
_dv_video_copy_y_block_mmx:
568
577
569
	pushl   %ebp
578
	pushl   %ebp
570
	movl    %esp, %ebp
571
	pushl   %esi
579
	pushl   %esi
572
	pushl	%edi
580
	pushl	%edi
573
	
574
	movl    8(%ebp), %edi          # dest
575
	movl    12(%ebp), %esi         # src
576
581
577
	movq	OFFSETBX, %mm7
582
	LOAD_PIC_REG_BP()
583
584
	movl    16(%esp), %edi          # dest
585
	movl    20(%esp), %esi         # src
586
587
	movq	MUNG(OFFSETBX), %mm7
578
	pxor	%mm6, %mm6
588
	pxor	%mm6, %mm6
579
	
589
	
580
	movq	(%esi), %mm0
590
	movq	(%esi), %mm0
Lines 709-714 _dv_video_copy_y_block_mmx: Link Here
709
	
719
	
710
		
720
		
711
.global _dv_ppm_copy_pal_c_block_mmx
721
.global _dv_ppm_copy_pal_c_block_mmx
722
.hidden _dv_ppm_copy_pal_c_block_mmx
723
.type	_dv_ppm_copy_pal_c_block_mmx,@function
712
_dv_ppm_copy_pal_c_block_mmx:
724
_dv_ppm_copy_pal_c_block_mmx:
713
				
725
				
714
	pushl   %ebp
726
	pushl   %ebp
Lines 852-870 _dv_ppm_copy_pal_c_block_mmx: Link Here
852
	ret
864
	ret
853
865
854
.global _dv_pgm_copy_pal_c_block_mmx
866
.global _dv_pgm_copy_pal_c_block_mmx
867
.hidden _dv_ppm_copy_pal_c_block_mmx
868
.type	_dv_pgm_copy_pal_c_block_mmx,@function
855
_dv_pgm_copy_pal_c_block_mmx:
869
_dv_pgm_copy_pal_c_block_mmx:
856
				
870
				
857
	pushl   %ebp
871
	pushl   %ebp
858
	movl    %esp, %ebp
859
	pushl   %esi
872
	pushl   %esi
860
	pushl	%edi
873
	pushl	%edi
861
	pushl	%ebx
874
	pushl	%ebx
862
	
863
	movl    8(%ebp), %edi          # dest
864
	movl    12(%ebp), %esi         # src
865
875
876
	LOAD_PIC_REG_BP()
877
878
	movl    20(%esp), %edi          # dest
879
	movl    24(%esp), %esi         # src
866
880
867
	movq	OFFSETBX, %mm7
881
	movq	MUNG(OFFSETBX), %mm7
868
	pxor	%mm6, %mm6
882
	pxor	%mm6, %mm6
869
883
870
	
884
	
Lines 1000-1017 _dv_pgm_copy_pal_c_block_mmx: Link Here
1000
	ret
1014
	ret
1001
1015
1002
.global _dv_video_copy_pal_c_block_mmx
1016
.global _dv_video_copy_pal_c_block_mmx
1017
.hidden _dv_video_copy_pal_c_block_mmx
1018
.type	_dv_video_copy_pal_c_block_mmx,@function
1003
_dv_video_copy_pal_c_block_mmx:
1019
_dv_video_copy_pal_c_block_mmx:
1004
				
1020
				
1005
	pushl   %ebp
1021
	pushl   %ebp
1006
	movl    %esp, %ebp
1007
	pushl   %esi
1022
	pushl   %esi
1008
	pushl	%edi
1023
	pushl	%edi
1009
	pushl	%ebx
1024
	pushl	%ebx
1010
	
1011
	movl    8(%ebp), %edi          # dest
1012
	movl    12(%ebp), %esi         # src
1013
1025
1014
	movq	OFFSETBX, %mm7
1026
	LOAD_PIC_REG_BP()
1027
1028
	movl    20(%esp), %edi          # dest
1029
	movl    24(%esp), %esi         # src
1030
1031
	movq	MUNG(OFFSETBX), %mm7
1015
	paddw	%mm7, %mm7
1032
	paddw	%mm7, %mm7
1016
	pxor	%mm6, %mm6
1033
	pxor	%mm6, %mm6
1017
1034
Lines 1095-1115 video_copy_pal_c_block_mmx_loop: Link Here
1095
	ret
1112
	ret
1096
	
1113
	
1097
.global _dv_ppm_copy_ntsc_c_block_mmx
1114
.global _dv_ppm_copy_ntsc_c_block_mmx
1115
.hidden _dv_ppm_copy_ntsc_c_block_mmx
1116
.type	_dv_ppm_copy_ntsc_c_block_mmx,@function
1098
_dv_ppm_copy_ntsc_c_block_mmx:
1117
_dv_ppm_copy_ntsc_c_block_mmx:
1099
				
1118
				
1100
	pushl   %ebp
1119
	pushl   %ebp
1101
	movl    %esp, %ebp
1102
	pushl   %esi
1120
	pushl   %esi
1103
	pushl	%edi
1121
	pushl	%edi
1104
	pushl	%ebx
1122
	pushl	%ebx
1105
	
1123
1106
	movl    8(%ebp), %edi          # dest
1124
	LOAD_PIC_REG_BP()
1107
	movl    12(%ebp), %esi         # src
1125
1126
	movl    20(%esp), %edi          # dest
1127
	movl    24(%esp), %esi         # src
1108
1128
1109
	movl	$4, %ebx	
1129
	movl	$4, %ebx	
1110
1130
1111
	movq	ALLONE, %mm6
1131
	movq	MUNG(ALLONE), %mm6
1112
	
1113
ppm_copy_ntsc_c_block_mmx_loop:	
1132
ppm_copy_ntsc_c_block_mmx_loop:	
1114
	
1133
	
1115
	movq	(%esi), %mm0
1134
	movq	(%esi), %mm0
Lines 1168-1184 ppm_copy_ntsc_c_block_mmx_loop: Link Here
1168
	ret
1187
	ret
1169
1188
1170
.global _dv_pgm_copy_ntsc_c_block_mmx
1189
.global _dv_pgm_copy_ntsc_c_block_mmx
1190
.hidden _dv_pgm_copy_ntsc_c_block_mmx
1191
.type	_dv_pgm_copy_ntsc_c_block_mmx,@function
1171
_dv_pgm_copy_ntsc_c_block_mmx:
1192
_dv_pgm_copy_ntsc_c_block_mmx:
1172
				
1193
				
1173
	pushl   %ebp
1194
	pushl   %ebp
1174
	movl    %esp, %ebp
1175
	pushl   %esi
1195
	pushl   %esi
1176
	pushl	%edi
1196
	pushl	%edi
1177
	
1178
	movl    8(%ebp), %edi          # dest
1179
	movl    12(%ebp), %esi         # src
1180
1197
1181
	movq	OFFSETBX, %mm7
1198
	LOAD_PIC_REG_BP()
1199
1200
	movl    16(%esp), %edi          # dest
1201
	movl    20(%esp), %esi         # src
1202
1203
	movq	MUNG(OFFSETBX), %mm7
1182
	paddw	%mm7, %mm7
1204
	paddw	%mm7, %mm7
1183
	pxor	%mm6, %mm6
1205
	pxor	%mm6, %mm6
1184
1206
Lines 1325-1342 _dv_pgm_copy_ntsc_c_block_mmx: Link Here
1325
	ret
1347
	ret
1326
1348
1327
.global _dv_video_copy_ntsc_c_block_mmx
1349
.global _dv_video_copy_ntsc_c_block_mmx
1350
.hidden _dv_video_copy_ntsc_c_block_mmx
1351
.type	_dv_video_copy_ntsc_c_block_mmx,@function
1328
_dv_video_copy_ntsc_c_block_mmx:
1352
_dv_video_copy_ntsc_c_block_mmx:
1329
				
1353
				
1330
	pushl   %ebp
1354
	pushl   %ebp
1331
	movl    %esp, %ebp
1332
	pushl   %esi
1355
	pushl   %esi
1333
	pushl	%edi
1356
	pushl	%edi
1334
	pushl	%ebx
1357
	pushl	%ebx
1335
	
1336
	movl    8(%ebp), %edi          # dest
1337
	movl    12(%ebp), %esi         # src
1338
1358
1339
	movq	OFFSETBX, %mm7
1359
	LOAD_PIC_REG_BP()
1360
1361
	movl    20(%esp), %edi          # dest
1362
	movl    24(%esp), %esi         # src
1363
1364
	movq	MUNG(OFFSETBX), %mm7
1340
	paddw	%mm7, %mm7
1365
	paddw	%mm7, %mm7
1341
	pxor	%mm6, %mm6
1366
	pxor	%mm6, %mm6
1342
1367
(-)libdv-0.104-old/libdv/rgbtoyuv_x86_64.S (-3 lines)
Lines 41-49 Link Here
41
#define DV_WIDTH_SHORT_HALF 720
41
#define DV_WIDTH_SHORT_HALF 720
42
#define DV_WIDTH_BYTE_HALF  360	
42
#define DV_WIDTH_BYTE_HALF  360	
43
		
43
		
44
.global _dv_rgbtoycb_mmx_x86_64
45
# .global yuvtoycb_mmx_x86_64
46
47
.data
44
.data
48
45
49
.align 8
46
.align 8
(-)libdv-0.104-old/libdv/transpose_x86.S (+2 lines)
Lines 1-5 Link Here
1
.text
1
.text
2
.global _dv_transpose_mmx
2
.global _dv_transpose_mmx
3
.hidden _dv_transpose_mmx
4
.type _dv_transpose_mmx,@function
3
	
5
	
4
_dv_transpose_mmx:
6
_dv_transpose_mmx:
5
	pushl   %ebp
7
	pushl   %ebp
(-)libdv-0.104-old/libdv/transpose_x86_64.S (+2 lines)
Lines 1-5 Link Here
1
.text
1
.text
2
.global _dv_transpose_mmx_x86_64
2
.global _dv_transpose_mmx_x86_64
3
.hidden _dv_transpose_mmx_x86_64
4
.type _dv_transpose_mmx_x86_64,@function
3
	
5
	
4
_dv_transpose_mmx_x86_64:
6
_dv_transpose_mmx_x86_64:
5
	
7
	
(-)libdv-0.104-old/libdv/vlc_x86.S (-26 / +159 lines)
Lines 1-29 Link Here
1
	#include "asmoff.h"
1
	#include "asmoff.h"
2
	#include "asm_common.S"
3
2
.text
4
.text
3
	.align 4
5
	.align 4
4
.globl dv_decode_vlc 
6
.globl dv_decode_vlc 
7
.globl asm_dv_decode_vlc 
8
.hidden asm_dv_decode_vlc
9
asm_dv_decode_vlc = dv_decode_vlc
10
5
	.type	 dv_decode_vlc,@function
11
	.type	 dv_decode_vlc,@function
6
dv_decode_vlc:
12
dv_decode_vlc:
7
	pushl %ebx
13
	pushl %ebx
14
	pushl %ebp
15
16
	LOAD_PIC_REG_BP()
8
17
9
	/* Args are at 8(%esp). */
18
	/* Args are at 12(%esp). */
10
	movl  8(%esp),%eax		/* %eax is bits */
19
	movl  12(%esp),%eax		/* %eax is bits */
11
	movl  12(%esp),%ebx		/* %ebx is maxbits */
20
	movl  16(%esp),%ebx		/* %ebx is maxbits */
12
	andl  $0x3f,%ebx		/* limit index range STL*/
21
	andl  $0x3f,%ebx		/* limit index range STL*/
13
22
14
	movl  dv_vlc_class_index_mask(,%ebx,4),%edx
23
	movl  MUNG_ARR(dv_vlc_class_index_mask,%ebx,4),%edx
15
	andl  %eax,%edx
24
	andl  %eax,%edx
16
	movl  dv_vlc_class_index_rshift(,%ebx,4),%ecx
25
	movl  MUNG_ARR(dv_vlc_class_index_rshift,%ebx,4),%ecx
17
	sarl  %cl,%edx
26
	sarl  %cl,%edx
18
	movl  dv_vlc_classes(,%ebx,4),%ecx
27
	movl  MUNG_ARR(dv_vlc_classes,%ebx,4),%ecx
19
	movsbl  (%ecx,%edx,1),%edx	/* %edx is class */
28
	movsbl  (%ecx,%edx,1),%edx	/* %edx is class */
20
			
29
			
21
	movl  dv_vlc_index_mask(,%edx,4),%ebx
30
	movl  MUNG_ARR(dv_vlc_index_mask,%edx,4),%ebx
22
	movl  dv_vlc_index_rshift(,%edx,4),%ecx
31
	movl  MUNG_ARR(dv_vlc_index_rshift,%edx,4),%ecx
23
	andl  %eax,%ebx
32
	andl  %eax,%ebx
24
	sarl  %cl,%ebx
33
	sarl  %cl,%ebx
25
34
26
	movl  dv_vlc_lookups(,%edx,4),%edx
35
	movl  MUNG_ARR(dv_vlc_lookups,%edx,4),%edx
27
	movl  (%edx,%ebx,4),%edx
36
	movl  (%edx,%ebx,4),%edx
28
37
29
	/* Now %edx holds result, like this:
38
	/* Now %edx holds result, like this:
Lines 42-48 dv_decode_vlc: Link Here
42
	movl  %edx,%ecx
51
	movl  %edx,%ecx
43
	sarl  $8,%ecx
52
	sarl  $8,%ecx
44
	andl  $0xff,%ecx
53
	andl  $0xff,%ecx
45
	movl  sign_mask(,%ecx,4),%ebx
54
	movl  MUNG_ARR(sign_mask,%ecx,4),%ebx
46
	andl  %ebx,%eax
55
	andl  %ebx,%eax
47
	negl  %eax
56
	negl  %eax
48
	sarl  $31,%eax
57
	sarl  $31,%eax
Lines 63-76 dv_decode_vlc: Link Here
63
	    *result = broken;
72
	    *result = broken;
64
	Note that the 'broken' pattern is all ones (i.e. 0xffffffff)
73
	Note that the 'broken' pattern is all ones (i.e. 0xffffffff)
65
	*/
74
	*/
66
	movl  12(%esp),%ebx		/* %ebx is maxbits */
75
	movl  16(%esp),%ebx		/* %ebx is maxbits */
67
	subl  %ecx,%ebx
76
	subl  %ecx,%ebx
68
	sbbl  %ebx,%ebx
77
	sbbl  %ebx,%ebx
69
	orl   %ebx,%edx
78
	orl   %ebx,%edx
70
79
71
	movl  16(%esp),%eax
80
	movl  20(%esp),%eax
72
	movl  %edx,(%eax)
81
	movl  %edx,(%eax)
73
	
82
	popl  %ebp
74
	popl  %ebx
83
	popl  %ebx
75
	ret
84
	ret
76
	
85
	
Lines 80-100 dv_decode_vlc: Link Here
80
	.type	 __dv_decode_vlc,@function
89
	.type	 __dv_decode_vlc,@function
81
__dv_decode_vlc:
90
__dv_decode_vlc:
82
	pushl %ebx
91
	pushl %ebx
92
	pushl %ebp
93
94
	LOAD_PIC_REG_BP()
83
95
84
	/* Args are at 8(%esp). */
96
	/* Args are at 12(%esp). */
85
	movl  8(%esp),%eax		/* %eax is bits */
97
	movl  12(%esp),%eax		/* %eax is bits */
86
	
98
	
87
	movl  %eax,%edx			/* %edx is class */
99
	movl  %eax,%edx			/* %edx is class */
88
	andl  $0xfe00,%edx
100
	andl  $0xfe00,%edx
89
	sarl  $9,%edx
101
	sarl  $9,%edx
102
#ifdef __PIC__
103
	movsbl dv_vlc_class_lookup5@GOTOFF(%ebp,%edx),%edx
104
#else
90
	movsbl dv_vlc_class_lookup5(%edx),%edx
105
	movsbl dv_vlc_class_lookup5(%edx),%edx
91
	
106
#endif
92
	movl  dv_vlc_index_mask(,%edx,4),%ebx
107
93
	movl  dv_vlc_index_rshift(,%edx,4),%ecx
108
	movl  MUNG_ARR(dv_vlc_index_mask,%edx,4),%ebx
109
	movl  MUNG_ARR(dv_vlc_index_rshift,%edx,4),%ecx
94
	andl  %eax,%ebx
110
	andl  %eax,%ebx
95
	sarl  %cl,%ebx
111
	sarl  %cl,%ebx
96
112
97
	movl  dv_vlc_lookups(,%edx,4),%edx
113
	movl  MUNG_ARR(dv_vlc_lookups,%edx,4),%edx
98
	movl  (%edx,%ebx,4),%edx
114
	movl  (%edx,%ebx,4),%edx
99
115
100
	/* Now %edx holds result, like this:
116
	/* Now %edx holds result, like this:
Lines 112-118 __dv_decode_vlc: Link Here
112
	movl  %edx,%ecx
128
	movl  %edx,%ecx
113
	sarl  $8,%ecx
129
	sarl  $8,%ecx
114
	andl  $0xff,%ecx
130
	andl  $0xff,%ecx
115
	movl  sign_mask(,%ecx,4),%ecx
131
	movl  MUNG_ARR(sign_mask,%ecx,4),%ecx
116
	andl  %ecx,%eax
132
	andl  %ecx,%eax
117
	negl  %eax
133
	negl  %eax
118
	sarl  $31,%eax
134
	sarl  $31,%eax
Lines 127-135 __dv_decode_vlc: Link Here
127
	xorl  %eax,%edx
143
	xorl  %eax,%edx
128
	subl  %eax,%edx
144
	subl  %eax,%edx
129
145
130
	movl  12(%esp),%eax
146
	movl  16(%esp),%eax
131
	movl  %edx,(%eax)
147
	movl  %edx,(%eax)
132
	
148
	popl  %ebp
133
	popl  %ebx
149
	popl  %ebx
134
	ret
150
	ret
135
151
Lines 140-152 void dv_parse_ac_coeffs_pass0(bitstream_ Link Here
140
*/
156
*/
141
.text
157
.text
142
	.align	4
158
	.align	4
159
.globl asm_dv_parse_ac_coeffs_pass0
160
.hidden asm_dv_parse_ac_coeffs_pass0
161
	asm_dv_parse_ac_coeffs_pass0 = dv_parse_ac_coeffs_pass0
162
143
.globl	dv_parse_ac_coeffs_pass0
163
.globl	dv_parse_ac_coeffs_pass0
164
.type	dv_parse_ac_coeffs_pass0,@function
144
dv_parse_ac_coeffs_pass0:
165
dv_parse_ac_coeffs_pass0:
145
	pushl	%ebx
166
	pushl	%ebx
146
	pushl	%edi
167
	pushl	%edi
147
	pushl	%esi
168
	pushl	%esi
148
	pushl	%ebp
169
	pushl	%ebp
149
170
171
	LOAD_PIC_REG_SI()
172
150
#define ARGn(N)  (20+(4*(N)))(%esp)
173
#define ARGn(N)  (20+(4*(N)))(%esp)
151
174
152
	/*
175
	/*
Lines 159-166 dv_parse_ac_coeffs_pass0: Link Here
159
	ebp	bl
182
	ebp	bl
160
	*/
183
	*/
161
	movl    ARGn(2),%ebp
184
	movl    ARGn(2),%ebp
185
#ifndef __PIC__
162
	movl	ARGn(0),%esi
186
	movl	ARGn(0),%esi
163
	movl	bitstream_t_buf(%esi),%esi
187
	movl	bitstream_t_buf(%esi),%esi
188
#endif
164
	movl	dv_block_t_offset(%ebp),%edi
189
	movl	dv_block_t_offset(%ebp),%edi
165
	movl	dv_block_t_reorder(%ebp),%ebx
190
	movl	dv_block_t_reorder(%ebp),%ebx
166
191
Lines 170-176 dv_parse_ac_coeffs_pass0: Link Here
170
	
195
	
171
	movq    dv_block_t_coeffs(%ebp),%mm1
196
	movq    dv_block_t_coeffs(%ebp),%mm1
172
	pxor    %mm0,%mm0
197
	pxor    %mm0,%mm0
198
#ifdef __PIC__
199
	pand    const_f_0_0_0@GOTOFF(%esi),%mm1
200
#else
173
	pand    const_f_0_0_0,%mm1
201
	pand    const_f_0_0_0,%mm1
202
#endif
174
	movq    %mm1,dv_block_t_coeffs(%ebp)
203
	movq    %mm1,dv_block_t_coeffs(%ebp)
175
	movq    %mm0,(dv_block_t_coeffs + 8)(%ebp)
204
	movq    %mm0,(dv_block_t_coeffs + 8)(%ebp)
176
	movq    %mm0,(dv_block_t_coeffs + 16)(%ebp)
205
	movq    %mm0,(dv_block_t_coeffs + 16)(%ebp)
Lines 191-199 dv_parse_ac_coeffs_pass0: Link Here
191
readloop:
220
readloop:
192
	movl	%edi,%ecx
221
	movl	%edi,%ecx
193
	shrl	$3,%ecx
222
	shrl	$3,%ecx
223
#ifdef __PIC__
224
	movl    ARGn(0),%eax
225
	addl    bitstream_t_buf(%eax),%ecx
226
	movzbl  (%ecx),%eax
227
	movzbl  1(%ecx),%edx
228
	movzbl  2(%ecx),%ecx
229
#else
194
	movzbl  (%esi,%ecx,1),%eax
230
	movzbl  (%esi,%ecx,1),%eax
195
	movzbl  1(%esi,%ecx,1),%edx
231
	movzbl  1(%esi,%ecx,1),%edx
196
	movzbl  2(%esi,%ecx,1),%ecx
232
	movzbl  2(%esi,%ecx,1),%ecx
233
#endif
197
	shll	$16,%eax
234
	shll	$16,%eax
198
	shll	$8,%edx
235
	shll	$8,%edx
199
	orl	%ecx,%eax
236
	orl	%ecx,%eax
Lines 217-223 readloop: Link Here
217
254
218
	/* Attempt to use the shortcut first.  If it hits, then
255
	/* Attempt to use the shortcut first.  If it hits, then
219
	   this vlc term has been decoded. */
256
	   this vlc term has been decoded. */
257
#ifdef __PIC__
258
	movl	dv_vlc_class1_shortcut@GOTOFF(%esi,%ecx,4),%edx
259
#else
220
	movl	dv_vlc_class1_shortcut(,%ecx,4),%edx
260
	movl	dv_vlc_class1_shortcut(,%ecx,4),%edx
261
#endif
221
	test	$0x80,%edx
262
	test	$0x80,%edx
222
	je	done_decode
263
	je	done_decode
223
264
Lines 228-239 readloop: Link Here
228
	movl	%ebx,dv_block_t_reorder(%ebp)
269
	movl	%ebx,dv_block_t_reorder(%ebp)
229
270
230
	/* %eax is bits */
271
	/* %eax is bits */
231
	
272
#ifdef __PIC__
273
	movsbl dv_vlc_class_lookup5@GOTOFF(%esi,%ecx),%ecx
274
275
	movl  dv_vlc_index_mask@GOTOFF(%esi,%ecx,4),%ebx
276
	movl  dv_vlc_lookups@GOTOFF(%esi,%ecx,4),%edx
277
	movl  dv_vlc_index_rshift@GOTOFF(%esi,%ecx,4),%ecx
278
#else
232
	movsbl dv_vlc_class_lookup5(%ecx),%ecx
279
	movsbl dv_vlc_class_lookup5(%ecx),%ecx
233
280
234
	movl  dv_vlc_index_mask(,%ecx,4),%ebx
281
	movl  dv_vlc_index_mask(,%ecx,4),%ebx
235
	movl  dv_vlc_lookups(,%ecx,4),%edx
282
	movl  dv_vlc_lookups(,%ecx,4),%edx
236
	movl  dv_vlc_index_rshift(,%ecx,4),%ecx
283
	movl  dv_vlc_index_rshift(,%ecx,4),%ecx
284
#endif
237
	andl  %eax,%ebx
285
	andl  %eax,%ebx
238
	sarl  %cl,%ebx
286
	sarl  %cl,%ebx
239
287
Lines 256-262 readloop: Link Here
256
	movl  %edx,%ecx
304
	movl  %edx,%ecx
257
	sarl  $8,%ecx
305
	sarl  $8,%ecx
258
	andl  $0xff,%ecx
306
	andl  $0xff,%ecx
307
#ifdef __PIC__
308
	movl  sign_mask@GOTOFF(%esi,%ecx,4),%ecx
309
#else
259
	movl  sign_mask(,%ecx,4),%ecx
310
	movl  sign_mask(,%ecx,4),%ecx
311
#endif
260
	andl  %ecx,%eax
312
	andl  %ecx,%eax
261
	negl  %eax
313
	negl  %eax
262
	sarl  $31,%eax
314
	sarl  $31,%eax
Lines 326-335 alldone: Link Here
326
378
327
slowpath:
379
slowpath:
328
	/* slow path:	 use dv_decode_vlc */;
380
	/* slow path:	 use dv_decode_vlc */;
381
#ifdef __PIC__
382
	pushl	%esi
383
	leal	vlc@GOTOFF(%esi),%esi
384
	xchgl	%esi,(%esp)	/* last parameter is &vlc */
385
#else
329
	pushl	$vlc		/* last parameter is &vlc */
386
	pushl	$vlc		/* last parameter is &vlc */
387
#endif
330
	pushl	%edx		/* bits_left */
388
	pushl	%edx		/* bits_left */
331
	pushl	%eax		/* bits */
389
	pushl	%eax		/* bits */
332
	call	dv_decode_vlc
390
	call	asm_dv_decode_vlc
333
	addl	$12,%esp
391
	addl	$12,%esp
334
	test	$0x80,%edx	/* If (vlc.run < 0) break */
392
	test	$0x80,%edx	/* If (vlc.run < 0) break */
335
	jne	escape
393
	jne	escape
Lines 359-370 show16: Link Here
359
	gint dv_parse_video_segment(dv_videosegment_t *seg, guint quality) {
417
	gint dv_parse_video_segment(dv_videosegment_t *seg, guint quality) {
360
	*/
418
	*/
361
	.globl dv_parse_video_segment
419
	.globl dv_parse_video_segment
420
	.type	dv_parse_video_segment,@function
362
dv_parse_video_segment:
421
dv_parse_video_segment:
363
	pushl	%ebx
422
	pushl	%ebx
364
	pushl	%edi
423
	pushl	%edi
365
	pushl	%esi
424
	pushl	%esi
366
	pushl	%ebp
425
	pushl	%ebp
367
426
427
	LOAD_PIC_REG_SI()
428
368
#define ARGn(N)  (20+(4*(N)))(%esp)
429
#define ARGn(N)  (20+(4*(N)))(%esp)
369
430
370
	movl	ARGn(1),%eax			/* quality */
431
	movl	ARGn(1),%eax			/* quality */
Lines 373-379 dv_parse_video_segment: Link Here
373
	jz	its_mono
434
	jz	its_mono
374
	movl	$6,%ebx
435
	movl	$6,%ebx
375
its_mono:
436
its_mono:
437
#ifdef __PIC__
438
	movl	%ebx,n_blocks@GOTOFF(%esi)
439
#else
376
	movl	%ebx,n_blocks
440
	movl	%ebx,n_blocks
441
#endif
377
	
442
	
378
	/*
443
	/*
379
	 *	ebx	seg/b
444
	 *	ebx	seg/b
Lines 384-398 its_mono: Link Here
384
	 *	ebp	bl
449
	 *	ebp	bl
385
	 */
450
	 */
386
	movl	ARGn(0),%ebx
451
	movl	ARGn(0),%ebx
452
#ifndef __PIC__
387
	movl	dv_videosegment_t_bs(%ebx),%esi
453
	movl	dv_videosegment_t_bs(%ebx),%esi
388
	movl	bitstream_t_buf(%esi),%esi
454
	movl	bitstream_t_buf(%esi),%esi
455
#endif
389
	leal	dv_videosegment_t_mb(%ebx),%edi
456
	leal	dv_videosegment_t_mb(%ebx),%edi
390
457
391
	movl	$0,%eax
458
	movl	$0,%eax
392
	movl	$0,%ecx
459
	movl	$0,%ecx
393
macloop:
460
macloop:
461
#ifdef __PIC__
462
	movl	%eax,m@GOTOFF(%esi)
463
	movl	%ecx,mb_start@GOTOFF(%esi)
464
#else
394
	movl	%eax,m
465
	movl	%eax,m
395
	movl	%ecx,mb_start
466
	movl	%ecx,mb_start
467
#endif
396
468
397
	movl	ARGn(0),%ebx
469
	movl	ARGn(0),%ebx
398
	
470
	
Lines 400-406 macloop: Link Here
400
	/* mb->qno = bitstream_get(bs,4); */
472
	/* mb->qno = bitstream_get(bs,4); */
401
	movl	%ecx,%edx
473
	movl	%ecx,%edx
402
	shr	$3,%edx
474
	shr	$3,%edx
475
#ifdef __PIC__
476
	movl	dv_videosegment_t_bs(%ebx),%ecx
477
	movl	bitstream_t_buf(%ecx),%ecx
478
	movzbl	3(%ecx,%edx,1),%edx
479
#else
403
	movzbl	3(%esi,%edx,1),%edx
480
	movzbl	3(%esi,%edx,1),%edx
481
#endif
404
	andl	$0xf,%edx
482
	andl	$0xf,%edx
405
	movl	%edx,dv_macroblock_t_qno(%edi)
483
	movl	%edx,dv_macroblock_t_qno(%edi)
406
484
Lines 411-417 macloop: Link Here
411
	movl	%edx,dv_macroblock_t_eob_count(%edi)
489
	movl	%edx,dv_macroblock_t_eob_count(%edi)
412
490
413
	/* mb->i = (seg->i + dv_super_map_vertical[m]) % (seg->isPAL?12:10); */
491
	/* mb->i = (seg->i + dv_super_map_vertical[m]) % (seg->isPAL?12:10); */
492
#ifdef __PIC__
493
	movl	dv_super_map_vertical@GOTOFF(%esi,%eax,4),%edx
494
#else
414
	movl	dv_super_map_vertical(,%eax,4),%edx
495
	movl	dv_super_map_vertical(,%eax,4),%edx
496
#endif
415
	movl	dv_videosegment_t_i(%ebx),%ecx
497
	movl	dv_videosegment_t_i(%ebx),%ecx
416
	addl	%ecx,%edx
498
	addl	%ecx,%edx
417
499
Lines 422-432 skarly: Link Here
422
	andl	$1,%ecx
504
	andl	$1,%ecx
423
	shll	$5,%ecx		/* ecx = (isPAL ? 32 : 0) */
505
	shll	$5,%ecx		/* ecx = (isPAL ? 32 : 0) */
424
506
507
#ifdef __PIC__
508
	leal	mod_10@GOTOFF(%esi),%edx
509
	movzbl	(%edx,%ecx,1),%edx	/* uses mod_12 for PAL */
510
#else
425
	movzbl	mod_10(%edx,%ecx,1),%edx	/* uses mod_12 for PAL */
511
	movzbl	mod_10(%edx,%ecx,1),%edx	/* uses mod_12 for PAL */
512
#endif
426
	movl	%edx,dv_macroblock_t_i(%edi)
513
	movl	%edx,dv_macroblock_t_i(%edi)
427
514
428
	/*  mb->j = dv_super_map_horizontal[m]; */	
515
	/*  mb->j = dv_super_map_horizontal[m]; */	
516
#ifdef __PIC__
517
	movl	dv_super_map_horizontal@GOTOFF(%esi,%eax,4),%edx
518
#else
429
	movl	dv_super_map_horizontal(,%eax,4),%edx
519
	movl	dv_super_map_horizontal(,%eax,4),%edx
520
#endif
430
	movl	%edx,dv_macroblock_t_j(%edi)
521
	movl	%edx,dv_macroblock_t_j(%edi)
431
522
432
	/* mb->k = seg->k; */
523
	/* mb->k = seg->k; */
Lines 445-456 blkloop: Link Here
445
	        +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
536
	        +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
446
	*/
537
	*/
447
	/* dc = bitstream_get(bs,9); */
538
	/* dc = bitstream_get(bs,9); */
539
#ifdef __PIC__
540
	movl	mb_start@GOTOFF(%esi),%ecx
541
#else
448
	movl	mb_start,%ecx
542
	movl	mb_start,%ecx
543
#endif
449
	shr	$3,%ecx
544
	shr	$3,%ecx
545
#ifdef __PIC__
546
	movzbl	blk_start@GOTOFF(%esi,%ebx),%edx
547
#else
450
	movzbl	blk_start(%ebx),%edx
548
	movzbl	blk_start(%ebx),%edx
549
#endif
451
	addl	%ecx,%edx
550
	addl	%ecx,%edx
551
#ifdef __PIC__
552
	movl	ARGn(0),%ecx
553
	movl	dv_videosegment_t_bs(%ecx),%ecx
554
	movl	bitstream_t_buf(%ecx),%ecx
555
	movzbl	(%ecx,%edx,1),%eax	/* hi byte */
556
	movzbl	1(%ecx,%edx,1),%ecx	/* lo byte */
557
#else
452
	movzbl	(%esi,%edx,1),%eax	/* hi byte */
558
	movzbl	(%esi,%edx,1),%eax	/* hi byte */
453
	movzbl	1(%esi,%edx,1),%ecx	/* lo byte */
559
	movzbl	1(%esi,%edx,1),%ecx	/* lo byte */
560
#endif
454
	shll	$8,%eax
561
	shll	$8,%eax
455
	orl	%ecx,%eax
562
	orl	%ecx,%eax
456
563
Lines 477-483 blkloop: Link Here
477
584
478
	/* bl->reorder = &dv_reorder[bl->dct_mode][1]; */
585
	/* bl->reorder = &dv_reorder[bl->dct_mode][1]; */
479
	shll	$6,%eax
586
	shll	$6,%eax
587
#ifdef __PIC__
588
	leal	dv_reorder@GOTOFF+1(%esi),%eax
589
#else
480
	addl	$(dv_reorder+1),%eax
590
	addl	$(dv_reorder+1),%eax
591
#endif
481
	movl	%eax,dv_block_t_reorder(%ebp)
592
	movl	%eax,dv_block_t_reorder(%ebp)
482
593
483
	/* bl->reorder_sentinel = bl->reorder + 63; */
594
	/* bl->reorder_sentinel = bl->reorder + 63; */
Lines 485-497 blkloop: Link Here
485
	movl	%eax,dv_block_t_reorder_sentinel(%ebp)
596
	movl	%eax,dv_block_t_reorder_sentinel(%ebp)
486
597
487
	/* bl->offset= mb_start + dv_parse_bit_start[b]; */
598
	/* bl->offset= mb_start + dv_parse_bit_start[b]; */
599
#ifdef __PIC__
600
	movl	mb_start@GOTOFF(%esi),%ecx
601
	movl	dv_parse_bit_start@GOTOFF(%esi,%ebx,4),%eax
602
#else
488
	movl	mb_start,%ecx
603
	movl	mb_start,%ecx
489
	movl	dv_parse_bit_start(,%ebx,4),%eax
604
	movl	dv_parse_bit_start(,%ebx,4),%eax
605
#endif
490
	addl	%ecx,%eax
606
	addl	%ecx,%eax
491
	movl	%eax,dv_block_t_offset(%ebp)
607
	movl	%eax,dv_block_t_offset(%ebp)
492
608
493
	/* bl->end= mb_start + dv_parse_bit_end[b]; */
609
	/* bl->end= mb_start + dv_parse_bit_end[b]; */
610
#ifdef __PIC__
611
	movl	dv_parse_bit_end@GOTOFF(%esi,%ebx,4),%eax
612
#else
494
	movl	dv_parse_bit_end(,%ebx,4),%eax
613
	movl	dv_parse_bit_end(,%ebx,4),%eax
614
#endif
495
	addl	%ecx,%eax
615
	addl	%ecx,%eax
496
	movl	%eax,dv_block_t_end(%ebp)
616
	movl	%eax,dv_block_t_end(%ebp)
497
617
Lines 503-509 blkloop: Link Here
503
	/* no AC pass.  Just zero out the remaining coeffs */
623
	/* no AC pass.  Just zero out the remaining coeffs */
504
	movq    dv_block_t_coeffs(%ebp),%mm1
624
	movq    dv_block_t_coeffs(%ebp),%mm1
505
	pxor    %mm0,%mm0
625
	pxor    %mm0,%mm0
626
#ifdef __PIC__
627
	pand    const_f_0_0_0@GOTOFF(%esi),%mm1
628
#else
506
	pand    const_f_0_0_0,%mm1
629
	pand    const_f_0_0_0,%mm1
630
#endif
507
	movq    %mm1,dv_block_t_coeffs(%ebp)
631
	movq    %mm1,dv_block_t_coeffs(%ebp)
508
	movq    %mm0,(dv_block_t_coeffs + 8)(%ebp)
632
	movq    %mm0,(dv_block_t_coeffs + 8)(%ebp)
509
	movq    %mm0,(dv_block_t_coeffs + 16)(%ebp)
633
	movq    %mm0,(dv_block_t_coeffs + 16)(%ebp)
Lines 528-545 do_ac_pass: Link Here
528
	pushl	%ebp
652
	pushl	%ebp
529
	pushl	%edi
653
	pushl	%edi
530
	pushl	%eax
654
	pushl	%eax
531
	call	dv_parse_ac_coeffs_pass0
655
	call	asm_dv_parse_ac_coeffs_pass0
532
	addl	$12,%esp
656
	addl	$12,%esp
533
done_ac:
657
done_ac:
534
658
659
#ifdef __PIC__
660
	movl	n_blocks@GOTOFF(%esi),%eax
661
#else
535
	movl	n_blocks,%eax
662
	movl	n_blocks,%eax
663
#endif
536
	addl	$dv_block_t_size,%ebp
664
	addl	$dv_block_t_size,%ebp
537
	incl	%ebx
665
	incl	%ebx
538
	cmpl	%eax,%ebx
666
	cmpl	%eax,%ebx
539
	jnz	blkloop
667
	jnz	blkloop
540
668
669
#ifdef __PIC__
670
	movl	m@GOTOFF(%esi),%eax
671
	movl	mb_start@GOTOFF(%esi),%ecx
672
#else
541
	movl	m,%eax
673
	movl	m,%eax
542
	movl	mb_start,%ecx
674
	movl	mb_start,%ecx
675
#endif
543
	addl	$(8 * 80),%ecx
676
	addl	$(8 * 80),%ecx
544
	addl	$dv_macroblock_t_size,%edi
677
	addl	$dv_macroblock_t_size,%edi
545
	incl	%eax
678
	incl	%eax
Lines 557-563 done_ac: Link Here
557
690
558
	andl	$DV_QUALITY_AC_MASK,%eax
691
	andl	$DV_QUALITY_AC_MASK,%eax
559
	cmpl	$DV_QUALITY_AC_2,%eax
692
	cmpl	$DV_QUALITY_AC_2,%eax
560
	jz	dv_parse_ac_coeffs
693
	jz	asm_dv_parse_ac_coeffs
561
	movl	$0,%eax
694
	movl	$0,%eax
562
	ret
695
	ret
563
696
(-)libdv-0.104-old/libdv/vlc_x86_64.S (-1 / +3 lines)
Lines 169-175 void dv_parse_ac_coeffs_pass0(bitstream_ Link Here
169
.text
169
.text
170
	.align	4
170
	.align	4
171
.globl	dv_parse_ac_coeffs_pass0
171
.globl	dv_parse_ac_coeffs_pass0
172
	
172
.type	dv_parse_ac_coeffs_pass0,@function
173
173
dv_parse_ac_coeffs_pass0:
174
dv_parse_ac_coeffs_pass0:
174
	
175
	
175
	/* Args are at rdi=bs, rsi=mb, rdx=bl */
176
	/* Args are at rdi=bs, rsi=mb, rdx=bl */
Lines 422-427 show16: /* not u Link Here
422
gint dv_parse_video_segment(dv_videosegment_t *seg, guint quality) {
423
gint dv_parse_video_segment(dv_videosegment_t *seg, guint quality) {
423
*/
424
*/
424
	.globl dv_parse_video_segment
425
	.globl dv_parse_video_segment
426
	.type	dv_parse_video_segment,@function
425
dv_parse_video_segment:
427
dv_parse_video_segment:
426
	
428
	
427
	/* Args are at rdi=seg, rsi=quality */
429
	/* Args are at rdi=seg, rsi=quality */

Return to bug 121871