Lines 41-49
Link Here
|
41 |
#define DV_WIDTH_SHORT_HALF 720 |
41 |
#define DV_WIDTH_SHORT_HALF 720 |
42 |
#define DV_WIDTH_BYTE_HALF 360 |
42 |
#define DV_WIDTH_BYTE_HALF 360 |
43 |
|
43 |
|
44 |
.global _dv_rgbtoycb_mmx |
|
|
45 |
# .global yuvtoycb_mmx |
46 |
|
47 |
.data |
44 |
.data |
48 |
|
45 |
|
49 |
.align 8 |
46 |
.align 8 |
Lines 110-129
VR0GR: .long 0,0
Link Here
|
110 |
VBG0B: .long 0,0 |
107 |
VBG0B: .long 0,0 |
111 |
|
108 |
|
112 |
#endif |
109 |
#endif |
113 |
|
110 |
|
|
|
111 |
#include "asm_common.S" |
112 |
|
114 |
.text |
113 |
.text |
115 |
|
114 |
|
116 |
#define _inPtr 8 |
115 |
#define _inPtr 24+8 |
117 |
#define _rows 12 |
116 |
#define _rows 24+12 |
118 |
#define _columns 16 |
117 |
#define _columns 24+16 |
119 |
#define _outyPtr 20 |
118 |
#define _outyPtr 24+20 |
120 |
#define _outuPtr 24 |
119 |
#define _outuPtr 24+24 |
121 |
#define _outvPtr 28 |
120 |
#define _outvPtr 24+28 |
122 |
|
121 |
|
|
|
122 |
.global _dv_rgbtoycb_mmx |
123 |
.hidden _dv_rgbtoycb_mmx |
124 |
.type _dv_rgbtoycb_mmx,@function |
123 |
_dv_rgbtoycb_mmx: |
125 |
_dv_rgbtoycb_mmx: |
124 |
|
126 |
|
125 |
pushl %ebp |
127 |
pushl %ebp |
126 |
movl %esp, %ebp |
|
|
127 |
pushl %eax |
128 |
pushl %eax |
128 |
pushl %ebx |
129 |
pushl %ebx |
129 |
pushl %ecx |
130 |
pushl %ecx |
Lines 131-176
_dv_rgbtoycb_mmx:
Link Here
|
131 |
pushl %esi |
132 |
pushl %esi |
132 |
pushl %edi |
133 |
pushl %edi |
133 |
|
134 |
|
134 |
leal ZEROSX, %eax #This section gets around a bug |
135 |
LOAD_PIC_REG_BP() |
|
|
136 |
|
137 |
leal MUNG(ZEROSX), %eax #This section gets around a bug |
135 |
movq (%eax), %mm0 #unlikely to persist |
138 |
movq (%eax), %mm0 #unlikely to persist |
136 |
movq %mm0, ZEROS |
139 |
movq %mm0, MUNG(ZEROS) |
137 |
leal OFFSETDX, %eax |
140 |
leal MUNG(OFFSETDX), %eax |
138 |
movq (%eax), %mm0 |
141 |
movq (%eax), %mm0 |
139 |
movq %mm0, OFFSETD |
142 |
movq %mm0, MUNG(OFFSETD) |
140 |
leal OFFSETWX, %eax |
143 |
leal MUNG(OFFSETWX), %eax |
141 |
movq (%eax), %mm0 |
144 |
movq (%eax), %mm0 |
142 |
movq %mm0, OFFSETW |
145 |
movq %mm0, MUNG(OFFSETW) |
143 |
leal OFFSETBX, %eax |
146 |
leal MUNG(OFFSETBX), %eax |
144 |
movq (%eax), %mm0 |
147 |
movq (%eax), %mm0 |
145 |
movq %mm0, OFFSETB |
148 |
movq %mm0, MUNG(OFFSETB) |
146 |
leal YR0GRX, %eax |
149 |
leal MUNG(YR0GRX), %eax |
147 |
movq (%eax), %mm0 |
150 |
movq (%eax), %mm0 |
148 |
movq %mm0, YR0GR |
151 |
movq %mm0, MUNG(YR0GR) |
149 |
leal YBG0BX, %eax |
152 |
leal MUNG(YBG0BX), %eax |
150 |
movq (%eax), %mm0 |
153 |
movq (%eax), %mm0 |
151 |
movq %mm0, YBG0B |
154 |
movq %mm0, MUNG(YBG0B) |
152 |
leal UR0GRX, %eax |
155 |
leal MUNG(UR0GRX), %eax |
153 |
movq (%eax), %mm0 |
156 |
movq (%eax), %mm0 |
154 |
movq %mm0, UR0GR |
157 |
movq %mm0, MUNG(UR0GR) |
155 |
leal UBG0BX, %eax |
158 |
leal MUNG(UBG0BX), %eax |
156 |
movq (%eax), %mm0 |
159 |
movq (%eax), %mm0 |
157 |
movq %mm0, UBG0B |
160 |
movq %mm0, MUNG(UBG0B) |
158 |
leal VR0GRX, %eax |
161 |
leal MUNG(VR0GRX), %eax |
159 |
movq (%eax), %mm0 |
162 |
movq (%eax), %mm0 |
160 |
movq %mm0, VR0GR |
163 |
movq %mm0, MUNG(VR0GR) |
161 |
leal VBG0BX, %eax |
164 |
leal MUNG(VBG0BX), %eax |
162 |
movq (%eax), %mm0 |
165 |
movq (%eax), %mm0 |
163 |
movq %mm0, VBG0B |
166 |
movq %mm0, MUNG(VBG0B) |
164 |
|
167 |
movl _rows(%esp), %eax |
165 |
movl _rows(%ebp), %eax |
168 |
movl _columns(%esp), %ebx |
166 |
movl _columns(%ebp), %ebx |
|
|
167 |
mull %ebx #number pixels |
169 |
mull %ebx #number pixels |
168 |
shrl $3, %eax #number of loops |
170 |
shrl $3, %eax #number of loops |
169 |
movl %eax, %edi #loop counter in edi |
171 |
movl %eax, %edi #loop counter in edi |
170 |
movl _inPtr(%ebp), %eax |
172 |
movl _inPtr(%esp), %eax |
171 |
movl _outyPtr(%ebp), %ebx |
173 |
movl _outyPtr(%esp), %ebx |
172 |
movl _outuPtr(%ebp), %ecx |
174 |
movl _outuPtr(%esp), %ecx |
173 |
movl _outvPtr(%ebp), %edx |
175 |
movl _outvPtr(%esp), %edx |
174 |
rgbtoycb_mmx_loop: |
176 |
rgbtoycb_mmx_loop: |
175 |
movq (%eax), %mm1 #load G2R2B1G1R1B0G0R0 |
177 |
movq (%eax), %mm1 #load G2R2B1G1R1B0G0R0 |
176 |
pxor %mm6, %mm6 #0 -> mm6 |
178 |
pxor %mm6, %mm6 #0 -> mm6 |
Lines 184-212
rgbtoycb_mmx_loop:
Link Here
|
184 |
punpcklbw %mm6, %mm1 #B1G1R1B0 -> mm1 |
186 |
punpcklbw %mm6, %mm1 #B1G1R1B0 -> mm1 |
185 |
movq %mm0, %mm2 #R1B0G0R0 -> mm2 |
187 |
movq %mm0, %mm2 #R1B0G0R0 -> mm2 |
186 |
|
188 |
|
187 |
pmaddwd YR0GR, %mm0 #yrR1,ygG0+yrR0 -> mm0 |
189 |
pmaddwd MUNG(YR0GR), %mm0 #yrR1,ygG0+yrR0 -> mm0 |
188 |
movq %mm1, %mm3 #B1G1R1B0 -> mm3 |
190 |
movq %mm1, %mm3 #B1G1R1B0 -> mm3 |
189 |
|
191 |
|
190 |
pmaddwd YBG0B, %mm1 #ybB1+ygG1,ybB0 -> mm1 |
192 |
pmaddwd MUNG(YBG0B), %mm1 #ybB1+ygG1,ybB0 -> mm1 |
191 |
movq %mm2, %mm4 #R1B0G0R0 -> mm4 |
193 |
movq %mm2, %mm4 #R1B0G0R0 -> mm4 |
192 |
|
194 |
|
193 |
pmaddwd UR0GR, %mm2 #urR1,ugG0+urR0 -> mm2 |
195 |
pmaddwd MUNG(UR0GR), %mm2 #urR1,ugG0+urR0 -> mm2 |
194 |
movq %mm3, %mm5 #B1G1R1B0 -> mm5 |
196 |
movq %mm3, %mm5 #B1G1R1B0 -> mm5 |
195 |
|
197 |
|
196 |
pmaddwd UBG0B, %mm3 #ubB1+ugG1,ubB0 -> mm3 |
198 |
pmaddwd MUNG(UBG0B), %mm3 #ubB1+ugG1,ubB0 -> mm3 |
197 |
punpckhbw %mm6, %mm7 # 00G2R2 -> mm7 |
199 |
punpckhbw %mm6, %mm7 # 00G2R2 -> mm7 |
198 |
|
200 |
|
199 |
pmaddwd VR0GR, %mm4 #vrR1,vgG0+vrR0 -> mm4 |
201 |
pmaddwd MUNG(VR0GR), %mm4 #vrR1,vgG0+vrR0 -> mm4 |
200 |
paddd %mm1, %mm0 #Y1Y0 -> mm0 |
202 |
paddd %mm1, %mm0 #Y1Y0 -> mm0 |
201 |
|
203 |
|
202 |
pmaddwd VBG0B, %mm5 #vbB1+vgG1,vbB0 -> mm5 |
204 |
pmaddwd MUNG(VBG0B), %mm5 #vbB1+vgG1,vbB0 -> mm5 |
203 |
|
205 |
|
204 |
movq 8(%eax), %mm1 #R5B4G4R4B3G3R3B2 -> mm1 |
206 |
movq 8(%eax), %mm1 #R5B4G4R4B3G3R3B2 -> mm1 |
205 |
paddd %mm3, %mm2 #U1U0 -> mm2 |
207 |
paddd %mm3, %mm2 #U1U0 -> mm2 |
206 |
|
208 |
|
207 |
movq %mm1, %mm6 #R5B4G4R4B3G3R3B2 -> mm6 |
209 |
movq %mm1, %mm6 #R5B4G4R4B3G3R3B2 -> mm6 |
208 |
|
210 |
|
209 |
punpcklbw ZEROS, %mm1 #B3G3R3B2 -> mm1 |
211 |
punpcklbw MUNG(ZEROS), %mm1 #B3G3R3B2 -> mm1 |
210 |
paddd %mm5, %mm4 #V1V0 -> mm4 |
212 |
paddd %mm5, %mm4 #V1V0 -> mm4 |
211 |
|
213 |
|
212 |
movq %mm1, %mm5 #B3G3R3B2 -> mm5 |
214 |
movq %mm1, %mm5 #B3G3R3B2 -> mm5 |
Lines 214-242
rgbtoycb_mmx_loop:
Link Here
|
214 |
|
216 |
|
215 |
paddd %mm7, %mm1 #R3B200+00G2R2=R3B2G2R2->mm1 |
217 |
paddd %mm7, %mm1 #R3B200+00G2R2=R3B2G2R2->mm1 |
216 |
|
218 |
|
217 |
punpckhbw ZEROS, %mm6 #R5B4G4R3 -> mm6 |
219 |
punpckhbw MUNG(ZEROS), %mm6 #R5B4G4R3 -> mm6 |
218 |
movq %mm1, %mm3 #R3B2G2R2 -> mm3 |
220 |
movq %mm1, %mm3 #R3B2G2R2 -> mm3 |
219 |
|
221 |
|
220 |
pmaddwd YR0GR, %mm1 #yrR3,ygG2+yrR2 -> mm1 |
222 |
pmaddwd MUNG(YR0GR), %mm1 #yrR3,ygG2+yrR2 -> mm1 |
221 |
movq %mm5, %mm7 #B3G3R3B2 -> mm7 |
223 |
movq %mm5, %mm7 #B3G3R3B2 -> mm7 |
222 |
|
224 |
|
223 |
pmaddwd YBG0B, %mm5 #ybB3+ygG3,ybB2 -> mm5 |
225 |
pmaddwd MUNG(YBG0B), %mm5 #ybB3+ygG3,ybB2 -> mm5 |
224 |
psrad $FIXPSHIFT, %mm0 #32-bit scaled Y1Y0 -> mm0 |
226 |
psrad $FIXPSHIFT, %mm0 #32-bit scaled Y1Y0 -> mm0 |
225 |
|
227 |
|
226 |
movq %mm6, TEMP0 #R5B4G4R4 -> TEMP0 |
228 |
movq %mm6, MUNG(TEMP0) #R5B4G4R4 -> TEMP0 |
227 |
movq %mm3, %mm6 #R3B2G2R2 -> mm6 |
229 |
movq %mm3, %mm6 #R3B2G2R2 -> mm6 |
228 |
pmaddwd UR0GR, %mm6 #urR3,ugG2+urR2 -> mm6 |
230 |
pmaddwd MUNG(UR0GR), %mm6 #urR3,ugG2+urR2 -> mm6 |
229 |
psrad $FIXPSHIFT, %mm2 #32-bit scaled U1U0 -> mm2 |
231 |
psrad $FIXPSHIFT, %mm2 #32-bit scaled U1U0 -> mm2 |
230 |
|
232 |
|
231 |
paddd %mm5, %mm1 #Y3Y2 -> mm1 |
233 |
paddd %mm5, %mm1 #Y3Y2 -> mm1 |
232 |
movq %mm7, %mm5 #B3G3R3B2 -> mm5 |
234 |
movq %mm7, %mm5 #B3G3R3B2 -> mm5 |
233 |
pmaddwd UBG0B, %mm7 #ubB3+ugG3,ubB2 |
235 |
pmaddwd MUNG(UBG0B), %mm7 #ubB3+ugG3,ubB2 |
234 |
psrad $FIXPSHIFT, %mm1 #32-bit scaled Y3Y2 -> mm1 |
236 |
psrad $FIXPSHIFT, %mm1 #32-bit scaled Y3Y2 -> mm1 |
235 |
|
237 |
|
236 |
pmaddwd VR0GR, %mm3 #vrR3,vgG2+vgR2 |
238 |
pmaddwd MUNG(VR0GR), %mm3 #vrR3,vgG2+vgR2 |
237 |
packssdw %mm1, %mm0 #Y3Y2Y1Y0 -> mm0 |
239 |
packssdw %mm1, %mm0 #Y3Y2Y1Y0 -> mm0 |
238 |
|
240 |
|
239 |
pmaddwd VBG0B, %mm5 #vbB3+vgG3,vbB2 -> mm5 |
241 |
pmaddwd MUNG(VBG0B), %mm5 #vbB3+vgG3,vbB2 -> mm5 |
240 |
psrad $FIXPSHIFT, %mm4 #32-bit scaled V1V0 -> mm4 |
242 |
psrad $FIXPSHIFT, %mm4 #32-bit scaled V1V0 -> mm4 |
241 |
|
243 |
|
242 |
movq 16(%eax), %mm1 #B7G7R7B6G6R6B5G5 -> mm7 |
244 |
movq 16(%eax), %mm1 #B7G7R7B6G6R6B5G5 -> mm7 |
Lines 251-308
rgbtoycb_mmx_loop:
Link Here
|
251 |
movq %mm7, %mm5 #R7B6G6R6B5G500 -> mm5 |
253 |
movq %mm7, %mm5 #R7B6G6R6B5G500 -> mm5 |
252 |
psrad $FIXPSHIFT, %mm3 #32-bit scaled V3V2 -> mm3 |
254 |
psrad $FIXPSHIFT, %mm3 #32-bit scaled V3V2 -> mm3 |
253 |
|
255 |
|
254 |
paddw OFFSETY, %mm0 |
256 |
paddw MUNG(OFFSETY), %mm0 |
255 |
movq %mm0, (%ebx) #store Y3Y2Y1Y0 |
257 |
movq %mm0, (%ebx) #store Y3Y2Y1Y0 |
256 |
packssdw %mm6, %mm2 #32-bit scaled U3U2U1U0 -> mm2 |
258 |
packssdw %mm6, %mm2 #32-bit scaled U3U2U1U0 -> mm2 |
257 |
|
259 |
|
258 |
movq TEMP0, %mm0 #R5B4G4R4 -> mm0 |
260 |
movq MUNG(TEMP0), %mm0 #R5B4G4R4 -> mm0 |
259 |
addl $8, %ebx |
261 |
addl $8, %ebx |
260 |
|
262 |
|
261 |
punpcklbw ZEROS, %mm7 #B5G500 -> mm7 |
263 |
punpcklbw MUNG(ZEROS), %mm7 #B5G500 -> mm7 |
262 |
movq %mm0, %mm6 #R5B4G4R4 -> mm6 |
264 |
movq %mm0, %mm6 #R5B4G4R4 -> mm6 |
263 |
|
265 |
|
264 |
movq %mm2, TEMPU #32-bit scaled U3U2U1U0 -> TEMPU |
266 |
movq %mm2, MUNG(TEMPU) #32-bit scaled U3U2U1U0 -> TEMPU |
265 |
psrlq $32, %mm0 #00R5B4 -> mm0 |
267 |
psrlq $32, %mm0 #00R5B4 -> mm0 |
266 |
|
268 |
|
267 |
paddw %mm0, %mm7 #B5G5R5B4 -> mm7 |
269 |
paddw %mm0, %mm7 #B5G5R5B4 -> mm7 |
268 |
movq %mm6, %mm2 #B5B4G4R4 -> mm2 |
270 |
movq %mm6, %mm2 #B5B4G4R4 -> mm2 |
269 |
|
271 |
|
270 |
pmaddwd YR0GR, %mm2 #yrR5,ygG4+yrR4 -> mm2 |
272 |
pmaddwd MUNG(YR0GR), %mm2 #yrR5,ygG4+yrR4 -> mm2 |
271 |
movq %mm7, %mm0 #B5G5R5B4 -> mm0 |
273 |
movq %mm7, %mm0 #B5G5R5B4 -> mm0 |
272 |
|
274 |
|
273 |
pmaddwd YBG0B, %mm7 #ybB5+ygG5,ybB4 -> mm7 |
275 |
pmaddwd MUNG(YBG0B), %mm7 #ybB5+ygG5,ybB4 -> mm7 |
274 |
packssdw %mm3, %mm4 #32-bit scaled V3V2V1V0 -> mm4 |
276 |
packssdw %mm3, %mm4 #32-bit scaled V3V2V1V0 -> mm4 |
275 |
|
277 |
|
276 |
addl $24, %eax #increment RGB count |
278 |
addl $24, %eax #increment RGB count |
277 |
|
279 |
|
278 |
movq %mm4, TEMPV #(V3V2V1V0)/256 -> mm4 |
280 |
movq %mm4, MUNG(TEMPV) #(V3V2V1V0)/256 -> mm4 |
279 |
movq %mm6, %mm4 #B5B4G4R4 -> mm4 |
281 |
movq %mm6, %mm4 #B5B4G4R4 -> mm4 |
280 |
|
282 |
|
281 |
pmaddwd UR0GR, %mm6 #urR5,ugG4+urR4 |
283 |
pmaddwd MUNG(UR0GR), %mm6 #urR5,ugG4+urR4 |
282 |
movq %mm0, %mm3 #B5G5R5B4 -> mm0 |
284 |
movq %mm0, %mm3 #B5G5R5B4 -> mm0 |
283 |
|
285 |
|
284 |
pmaddwd UBG0B, %mm0 #ubB5+ugG5,ubB4 |
286 |
pmaddwd MUNG(UBG0B), %mm0 #ubB5+ugG5,ubB4 |
285 |
paddd %mm7, %mm2 #Y5Y4 -> mm2 |
287 |
paddd %mm7, %mm2 #Y5Y4 -> mm2 |
286 |
|
288 |
|
287 |
pmaddwd VR0GR, %mm4 #vrR5,vgG4+vrR4 -> mm4 |
289 |
pmaddwd MUNG(VR0GR), %mm4 #vrR5,vgG4+vrR4 -> mm4 |
288 |
pxor %mm7, %mm7 #0 -> mm7 |
290 |
pxor %mm7, %mm7 #0 -> mm7 |
289 |
|
291 |
|
290 |
pmaddwd VBG0B, %mm3 #vbB5+vgG5,vbB4 -> mm3 |
292 |
pmaddwd MUNG(VBG0B), %mm3 #vbB5+vgG5,vbB4 -> mm3 |
291 |
punpckhbw %mm7, %mm1 #B7G7R7B6 -> mm1 |
293 |
punpckhbw %mm7, %mm1 #B7G7R7B6 -> mm1 |
292 |
|
294 |
|
293 |
paddd %mm6, %mm0 #U5U4 -> mm0 |
295 |
paddd %mm6, %mm0 #U5U4 -> mm0 |
294 |
movq %mm1, %mm6 #B7G7R7B6 -> mm6 |
296 |
movq %mm1, %mm6 #B7G7R7B6 -> mm6 |
295 |
|
297 |
|
296 |
pmaddwd YBG0B, %mm6 #ybB7+ygG7,ybB6 -> mm6 |
298 |
pmaddwd MUNG(YBG0B), %mm6 #ybB7+ygG7,ybB6 -> mm6 |
297 |
punpckhbw %mm7, %mm5 #R7B6G6R6 -> mm5 |
299 |
punpckhbw %mm7, %mm5 #R7B6G6R6 -> mm5 |
298 |
|
300 |
|
299 |
movq %mm5, %mm7 #R7B6G6R6 -> mm7 |
301 |
movq %mm5, %mm7 #R7B6G6R6 -> mm7 |
300 |
paddd %mm4, %mm3 #V5V4 -> mm3 |
302 |
paddd %mm4, %mm3 #V5V4 -> mm3 |
301 |
|
303 |
|
302 |
pmaddwd YR0GR, %mm5 #yrR7,ygG6+yrR6 -> mm5 |
304 |
pmaddwd MUNG(YR0GR), %mm5 #yrR7,ygG6+yrR6 -> mm5 |
303 |
movq %mm1, %mm4 #B7G7R7B6 -> mm4 |
305 |
movq %mm1, %mm4 #B7G7R7B6 -> mm4 |
304 |
|
306 |
|
305 |
pmaddwd UBG0B, %mm4 #ubB7+ugG7,ubB6 -> mm4 |
307 |
pmaddwd MUNG(UBG0B), %mm4 #ubB7+ugG7,ubB6 -> mm4 |
306 |
psrad $FIXPSHIFT, %mm0 #32-bit scaled U5U4 -> mm0 |
308 |
psrad $FIXPSHIFT, %mm0 #32-bit scaled U5U4 -> mm0 |
307 |
|
309 |
|
308 |
psrad $FIXPSHIFT, %mm2 #32-bit scaled Y5Y4 -> mm2 |
310 |
psrad $FIXPSHIFT, %mm2 #32-bit scaled Y5Y4 -> mm2 |
Lines 310-334
rgbtoycb_mmx_loop:
Link Here
|
310 |
paddd %mm5, %mm6 #Y7Y6 -> mm6 |
312 |
paddd %mm5, %mm6 #Y7Y6 -> mm6 |
311 |
movq %mm7, %mm5 #R7B6G6R6 -> mm5 |
313 |
movq %mm7, %mm5 #R7B6G6R6 -> mm5 |
312 |
|
314 |
|
313 |
pmaddwd UR0GR, %mm7 #urR7,ugG6+ugR6 -> mm7 |
315 |
pmaddwd MUNG(UR0GR), %mm7 #urR7,ugG6+ugR6 -> mm7 |
314 |
psrad $FIXPSHIFT, %mm3 #32-bit scaled V5V4 -> mm3 |
316 |
psrad $FIXPSHIFT, %mm3 #32-bit scaled V5V4 -> mm3 |
315 |
|
317 |
|
316 |
pmaddwd VBG0B, %mm1 #vbB7+vgG7,vbB6 -> mm1 |
318 |
pmaddwd MUNG(VBG0B), %mm1 #vbB7+vgG7,vbB6 -> mm1 |
317 |
psrad $FIXPSHIFT, %mm6 #32-bit scaled Y7Y6 -> mm6 |
319 |
psrad $FIXPSHIFT, %mm6 #32-bit scaled Y7Y6 -> mm6 |
318 |
|
320 |
|
319 |
packssdw %mm6, %mm2 #Y7Y6Y5Y4 -> mm2 |
321 |
packssdw %mm6, %mm2 #Y7Y6Y5Y4 -> mm2 |
320 |
|
322 |
|
321 |
pmaddwd VR0GR, %mm5 #vrR7,vgG6+vrR6 -> mm5 |
323 |
pmaddwd MUNG(VR0GR), %mm5 #vrR7,vgG6+vrR6 -> mm5 |
322 |
paddd %mm4, %mm7 #U7U6 -> mm7 |
324 |
paddd %mm4, %mm7 #U7U6 -> mm7 |
323 |
|
325 |
|
324 |
psrad $FIXPSHIFT, %mm7 #32-bit scaled U7U6 -> mm7 |
326 |
psrad $FIXPSHIFT, %mm7 #32-bit scaled U7U6 -> mm7 |
325 |
paddw OFFSETY, %mm2 |
327 |
paddw MUNG(OFFSETY), %mm2 |
326 |
movq %mm2, (%ebx) #store Y7Y6Y5Y4 |
328 |
movq %mm2, (%ebx) #store Y7Y6Y5Y4 |
327 |
|
329 |
|
328 |
movq ALLONE, %mm6 |
330 |
movq MUNG(ALLONE), %mm6 |
329 |
packssdw %mm7, %mm0 #32-bit scaled U7U6U5U4 -> mm0 |
331 |
packssdw %mm7, %mm0 #32-bit scaled U7U6U5U4 -> mm0 |
330 |
|
332 |
|
331 |
movq TEMPU, %mm4 #32-bit scaled U3U2U1U0 -> mm4 |
333 |
movq MUNG(TEMPU), %mm4 #32-bit scaled U3U2U1U0 -> mm4 |
332 |
pmaddwd %mm6, %mm0 #U7U6U5U4 averaged -> (U7U6)(U5U4)=UU3 UU2->mm0 |
334 |
pmaddwd %mm6, %mm0 #U7U6U5U4 averaged -> (U7U6)(U5U4)=UU3 UU2->mm0 |
333 |
|
335 |
|
334 |
pmaddwd %mm6, %mm4 #U3U2U1U0 averaged -> (U3U2)(U1U0)=UU1 UU0->mm4 |
336 |
pmaddwd %mm6, %mm4 #U3U2U1U0 averaged -> (U3U2)(U1U0)=UU1 UU0->mm4 |
Lines 338-345
rgbtoycb_mmx_loop:
Link Here
|
338 |
|
340 |
|
339 |
psrad $FIXPSHIFT, %mm1 #32-bit scaled V7V6 -> mm1 |
341 |
psrad $FIXPSHIFT, %mm1 #32-bit scaled V7V6 -> mm1 |
340 |
psraw $1, %mm4 #divide UU3 UU2 UU1 UU0 by 2 -> mm4 |
342 |
psraw $1, %mm4 #divide UU3 UU2 UU1 UU0 by 2 -> mm4 |
341 |
|
343 |
|
342 |
movq TEMPV, %mm5 #32-bit scaled V3V2V1V0 -> mm5 |
344 |
movq MUNG(TEMPV), %mm5 #32-bit scaled V3V2V1V0 -> mm5 |
343 |
|
345 |
|
344 |
movq %mm4, (%ecx) # store U |
346 |
movq %mm4, (%ecx) # store U |
345 |
|
347 |
|
Lines 372-377
rgbtoycb_mmx_loop:
Link Here
|
372 |
ret |
374 |
ret |
373 |
|
375 |
|
374 |
.global _dv_ppm_copy_y_block_mmx |
376 |
.global _dv_ppm_copy_y_block_mmx |
|
|
377 |
.hidden _dv_ppm_copy_y_block_mmx |
378 |
.type _dv_ppm_copy_y_block_mmx,@function |
375 |
_dv_ppm_copy_y_block_mmx: |
379 |
_dv_ppm_copy_y_block_mmx: |
376 |
|
380 |
|
377 |
pushl %ebp |
381 |
pushl %ebp |
Lines 422-438
_dv_ppm_copy_y_block_mmx:
Link Here
|
422 |
ret |
426 |
ret |
423 |
|
427 |
|
424 |
.global _dv_pgm_copy_y_block_mmx |
428 |
.global _dv_pgm_copy_y_block_mmx |
|
|
429 |
.hidden _dv_pgm_copy_y_block_mmx |
430 |
.type _dv_ppm_copy_y_block_mmx,@function |
425 |
_dv_pgm_copy_y_block_mmx: |
431 |
_dv_pgm_copy_y_block_mmx: |
426 |
|
432 |
|
427 |
pushl %ebp |
433 |
pushl %ebp |
428 |
movl %esp, %ebp |
|
|
429 |
pushl %esi |
434 |
pushl %esi |
430 |
pushl %edi |
435 |
pushl %edi |
431 |
|
|
|
432 |
movl 8(%ebp), %edi # dest |
433 |
movl 12(%ebp), %esi # src |
434 |
|
436 |
|
435 |
movq OFFSETY, %mm7 |
437 |
LOAD_PIC_REG_BP() |
|
|
438 |
|
439 |
movl 16(%esp), %edi # dest |
440 |
movl 20(%esp), %esi # src |
441 |
|
442 |
movq MUNG(OFFSETY), %mm7 |
436 |
pxor %mm6, %mm6 |
443 |
pxor %mm6, %mm6 |
437 |
|
444 |
|
438 |
movq (%esi), %mm0 |
445 |
movq (%esi), %mm0 |
Lines 564-580
_dv_pgm_copy_y_block_mmx:
Link Here
|
564 |
ret |
571 |
ret |
565 |
|
572 |
|
566 |
.global _dv_video_copy_y_block_mmx |
573 |
.global _dv_video_copy_y_block_mmx |
|
|
574 |
.hidden _dv_video_copy_y_block_mmx |
575 |
.type _dv_video_copy_y_block_mmx,@function |
567 |
_dv_video_copy_y_block_mmx: |
576 |
_dv_video_copy_y_block_mmx: |
568 |
|
577 |
|
569 |
pushl %ebp |
578 |
pushl %ebp |
570 |
movl %esp, %ebp |
|
|
571 |
pushl %esi |
579 |
pushl %esi |
572 |
pushl %edi |
580 |
pushl %edi |
573 |
|
|
|
574 |
movl 8(%ebp), %edi # dest |
575 |
movl 12(%ebp), %esi # src |
576 |
|
581 |
|
577 |
movq OFFSETBX, %mm7 |
582 |
LOAD_PIC_REG_BP() |
|
|
583 |
|
584 |
movl 16(%esp), %edi # dest |
585 |
movl 20(%esp), %esi # src |
586 |
|
587 |
movq MUNG(OFFSETBX), %mm7 |
578 |
pxor %mm6, %mm6 |
588 |
pxor %mm6, %mm6 |
579 |
|
589 |
|
580 |
movq (%esi), %mm0 |
590 |
movq (%esi), %mm0 |
Lines 709-714
_dv_video_copy_y_block_mmx:
Link Here
|
709 |
|
719 |
|
710 |
|
720 |
|
711 |
.global _dv_ppm_copy_pal_c_block_mmx |
721 |
.global _dv_ppm_copy_pal_c_block_mmx |
|
|
722 |
.hidden _dv_ppm_copy_pal_c_block_mmx |
723 |
.type _dv_ppm_copy_pal_c_block_mmx,@function |
712 |
_dv_ppm_copy_pal_c_block_mmx: |
724 |
_dv_ppm_copy_pal_c_block_mmx: |
713 |
|
725 |
|
714 |
pushl %ebp |
726 |
pushl %ebp |
Lines 852-870
_dv_ppm_copy_pal_c_block_mmx:
Link Here
|
852 |
ret |
864 |
ret |
853 |
|
865 |
|
854 |
.global _dv_pgm_copy_pal_c_block_mmx |
866 |
.global _dv_pgm_copy_pal_c_block_mmx |
|
|
867 |
.hidden _dv_ppm_copy_pal_c_block_mmx |
868 |
.type _dv_pgm_copy_pal_c_block_mmx,@function |
855 |
_dv_pgm_copy_pal_c_block_mmx: |
869 |
_dv_pgm_copy_pal_c_block_mmx: |
856 |
|
870 |
|
857 |
pushl %ebp |
871 |
pushl %ebp |
858 |
movl %esp, %ebp |
|
|
859 |
pushl %esi |
872 |
pushl %esi |
860 |
pushl %edi |
873 |
pushl %edi |
861 |
pushl %ebx |
874 |
pushl %ebx |
862 |
|
|
|
863 |
movl 8(%ebp), %edi # dest |
864 |
movl 12(%ebp), %esi # src |
865 |
|
875 |
|
|
|
876 |
LOAD_PIC_REG_BP() |
877 |
|
878 |
movl 20(%esp), %edi # dest |
879 |
movl 24(%esp), %esi # src |
866 |
|
880 |
|
867 |
movq OFFSETBX, %mm7 |
881 |
movq MUNG(OFFSETBX), %mm7 |
868 |
pxor %mm6, %mm6 |
882 |
pxor %mm6, %mm6 |
869 |
|
883 |
|
870 |
|
884 |
|
Lines 1000-1017
_dv_pgm_copy_pal_c_block_mmx:
Link Here
|
1000 |
ret |
1014 |
ret |
1001 |
|
1015 |
|
1002 |
.global _dv_video_copy_pal_c_block_mmx |
1016 |
.global _dv_video_copy_pal_c_block_mmx |
|
|
1017 |
.hidden _dv_video_copy_pal_c_block_mmx |
1018 |
.type _dv_video_copy_pal_c_block_mmx,@function |
1003 |
_dv_video_copy_pal_c_block_mmx: |
1019 |
_dv_video_copy_pal_c_block_mmx: |
1004 |
|
1020 |
|
1005 |
pushl %ebp |
1021 |
pushl %ebp |
1006 |
movl %esp, %ebp |
|
|
1007 |
pushl %esi |
1022 |
pushl %esi |
1008 |
pushl %edi |
1023 |
pushl %edi |
1009 |
pushl %ebx |
1024 |
pushl %ebx |
1010 |
|
|
|
1011 |
movl 8(%ebp), %edi # dest |
1012 |
movl 12(%ebp), %esi # src |
1013 |
|
1025 |
|
1014 |
movq OFFSETBX, %mm7 |
1026 |
LOAD_PIC_REG_BP() |
|
|
1027 |
|
1028 |
movl 20(%esp), %edi # dest |
1029 |
movl 24(%esp), %esi # src |
1030 |
|
1031 |
movq MUNG(OFFSETBX), %mm7 |
1015 |
paddw %mm7, %mm7 |
1032 |
paddw %mm7, %mm7 |
1016 |
pxor %mm6, %mm6 |
1033 |
pxor %mm6, %mm6 |
1017 |
|
1034 |
|
Lines 1095-1115
video_copy_pal_c_block_mmx_loop:
Link Here
|
1095 |
ret |
1112 |
ret |
1096 |
|
1113 |
|
1097 |
.global _dv_ppm_copy_ntsc_c_block_mmx |
1114 |
.global _dv_ppm_copy_ntsc_c_block_mmx |
|
|
1115 |
.hidden _dv_ppm_copy_ntsc_c_block_mmx |
1116 |
.type _dv_ppm_copy_ntsc_c_block_mmx,@function |
1098 |
_dv_ppm_copy_ntsc_c_block_mmx: |
1117 |
_dv_ppm_copy_ntsc_c_block_mmx: |
1099 |
|
1118 |
|
1100 |
pushl %ebp |
1119 |
pushl %ebp |
1101 |
movl %esp, %ebp |
|
|
1102 |
pushl %esi |
1120 |
pushl %esi |
1103 |
pushl %edi |
1121 |
pushl %edi |
1104 |
pushl %ebx |
1122 |
pushl %ebx |
1105 |
|
1123 |
|
1106 |
movl 8(%ebp), %edi # dest |
1124 |
LOAD_PIC_REG_BP() |
1107 |
movl 12(%ebp), %esi # src |
1125 |
|
|
|
1126 |
movl 20(%esp), %edi # dest |
1127 |
movl 24(%esp), %esi # src |
1108 |
|
1128 |
|
1109 |
movl $4, %ebx |
1129 |
movl $4, %ebx |
1110 |
|
1130 |
|
1111 |
movq ALLONE, %mm6 |
1131 |
movq MUNG(ALLONE), %mm6 |
1112 |
|
|
|
1113 |
ppm_copy_ntsc_c_block_mmx_loop: |
1132 |
ppm_copy_ntsc_c_block_mmx_loop: |
1114 |
|
1133 |
|
1115 |
movq (%esi), %mm0 |
1134 |
movq (%esi), %mm0 |
Lines 1168-1184
ppm_copy_ntsc_c_block_mmx_loop:
Link Here
|
1168 |
ret |
1187 |
ret |
1169 |
|
1188 |
|
1170 |
.global _dv_pgm_copy_ntsc_c_block_mmx |
1189 |
.global _dv_pgm_copy_ntsc_c_block_mmx |
|
|
1190 |
.hidden _dv_pgm_copy_ntsc_c_block_mmx |
1191 |
.type _dv_pgm_copy_ntsc_c_block_mmx,@function |
1171 |
_dv_pgm_copy_ntsc_c_block_mmx: |
1192 |
_dv_pgm_copy_ntsc_c_block_mmx: |
1172 |
|
1193 |
|
1173 |
pushl %ebp |
1194 |
pushl %ebp |
1174 |
movl %esp, %ebp |
|
|
1175 |
pushl %esi |
1195 |
pushl %esi |
1176 |
pushl %edi |
1196 |
pushl %edi |
1177 |
|
|
|
1178 |
movl 8(%ebp), %edi # dest |
1179 |
movl 12(%ebp), %esi # src |
1180 |
|
1197 |
|
1181 |
movq OFFSETBX, %mm7 |
1198 |
LOAD_PIC_REG_BP() |
|
|
1199 |
|
1200 |
movl 16(%esp), %edi # dest |
1201 |
movl 20(%esp), %esi # src |
1202 |
|
1203 |
movq MUNG(OFFSETBX), %mm7 |
1182 |
paddw %mm7, %mm7 |
1204 |
paddw %mm7, %mm7 |
1183 |
pxor %mm6, %mm6 |
1205 |
pxor %mm6, %mm6 |
1184 |
|
1206 |
|
Lines 1325-1342
_dv_pgm_copy_ntsc_c_block_mmx:
Link Here
|
1325 |
ret |
1347 |
ret |
1326 |
|
1348 |
|
1327 |
.global _dv_video_copy_ntsc_c_block_mmx |
1349 |
.global _dv_video_copy_ntsc_c_block_mmx |
|
|
1350 |
.hidden _dv_video_copy_ntsc_c_block_mmx |
1351 |
.type _dv_video_copy_ntsc_c_block_mmx,@function |
1328 |
_dv_video_copy_ntsc_c_block_mmx: |
1352 |
_dv_video_copy_ntsc_c_block_mmx: |
1329 |
|
1353 |
|
1330 |
pushl %ebp |
1354 |
pushl %ebp |
1331 |
movl %esp, %ebp |
|
|
1332 |
pushl %esi |
1355 |
pushl %esi |
1333 |
pushl %edi |
1356 |
pushl %edi |
1334 |
pushl %ebx |
1357 |
pushl %ebx |
1335 |
|
|
|
1336 |
movl 8(%ebp), %edi # dest |
1337 |
movl 12(%ebp), %esi # src |
1338 |
|
1358 |
|
1339 |
movq OFFSETBX, %mm7 |
1359 |
LOAD_PIC_REG_BP() |
|
|
1360 |
|
1361 |
movl 20(%esp), %edi # dest |
1362 |
movl 24(%esp), %esi # src |
1363 |
|
1364 |
movq MUNG(OFFSETBX), %mm7 |
1340 |
paddw %mm7, %mm7 |
1365 |
paddw %mm7, %mm7 |
1341 |
pxor %mm6, %mm6 |
1366 |
pxor %mm6, %mm6 |
1342 |
|
1367 |
|