Lines 63-74
x0:
Link Here
|
63 |
|
63 |
|
64 |
.align 8 |
64 |
.align 8 |
65 |
.text |
65 |
.text |
|
|
66 |
|
67 |
# undef __i686 /* gcc define gets in our way */ |
68 |
.section .gnu.linkonce.t.__i686.get_pc_thunk.bp,"ax",@progbits |
69 |
.globl __i686.get_pc_thunk.bp |
70 |
.hidden __i686.get_pc_thunk.bp |
71 |
.type __i686.get_pc_thunk.bp,@function |
72 |
__i686.get_pc_thunk.bp: |
73 |
movl (%esp), %ebp |
74 |
ret |
75 |
|
66 |
.align 4 |
76 |
.align 4 |
67 |
.globl IDCT_mmx |
77 |
.globl IDCT_mmx |
68 |
.type IDCT_mmx, @function |
78 |
.type IDCT_mmx, @function |
69 |
IDCT_mmx: |
79 |
IDCT_mmx: |
70 |
pushl %ebp |
80 |
pushl %ebp |
71 |
movl %esp, %ebp |
81 |
|
|
|
82 |
call __i686.get_pc_thunk.bp |
83 |
addl $_GLOBAL_OFFSET_TABLE_, %ebp |
84 |
|
72 |
pushl %ebx |
85 |
pushl %ebx |
73 |
pushl %ecx |
86 |
pushl %ecx |
74 |
pushl %edx |
87 |
pushl %edx |
Lines 84-91
IDCT_mmx:
Link Here
|
84 |
pushl $0 |
97 |
pushl $0 |
85 |
pushl $0 |
98 |
pushl $0 |
86 |
|
99 |
|
87 |
movl 8(%ebp), %esi /* source matrix */ |
100 |
movl 8+13*4(%esp), %esi /* source matrix */ |
88 |
leal preSC, %ecx |
101 |
leal preSC@GOTOFF(%ebp), %ecx |
89 |
/* column 0: even part |
102 |
/* column 0: even part |
90 |
* use V4, V12, V0, V8 to produce V22..V25 |
103 |
* use V4, V12, V0, V8 to produce V22..V25 |
91 |
*/ |
104 |
*/ |
Lines 101-107
IDCT_mmx:
Link Here
|
101 |
movq %mm1, %mm2 /* added 11/1/96 */ |
114 |
movq %mm1, %mm2 /* added 11/1/96 */ |
102 |
pmulhw 8*8(%esi),%mm5 /* V8 */ |
115 |
pmulhw 8*8(%esi),%mm5 /* V8 */ |
103 |
psubsw %mm0, %mm1 /* V16 */ |
116 |
psubsw %mm0, %mm1 /* V16 */ |
104 |
pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V18 */ |
117 |
pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm1 /* 23170 ->V18 */ |
105 |
paddsw %mm0, %mm2 /* V17 */ |
118 |
paddsw %mm0, %mm2 /* V17 */ |
106 |
movq %mm2, %mm0 /* duplicate V17 */ |
119 |
movq %mm2, %mm0 /* duplicate V17 */ |
107 |
psraw $1, %mm2 /* t75=t82 */ |
120 |
psraw $1, %mm2 /* t75=t82 */ |
Lines 142-148
IDCT_mmx:
Link Here
|
142 |
paddsw %mm0, %mm3 /* V29 ; free mm0 */ |
155 |
paddsw %mm0, %mm3 /* V29 ; free mm0 */ |
143 |
movq %mm7, %mm1 /* duplicate V26 */ |
156 |
movq %mm7, %mm1 /* duplicate V26 */ |
144 |
psraw $1, %mm3 /* t91=t94 */ |
157 |
psraw $1, %mm3 /* t91=t94 */ |
145 |
pmulhw x539f539f539f539f,%mm7 /* V33 */ |
158 |
pmulhw x539f539f539f539f@GOTOFF(%ebp),%mm7 /* V33 */ |
146 |
psraw $1, %mm1 /* t96 */ |
159 |
psraw $1, %mm1 /* t96 */ |
147 |
movq %mm5, %mm0 /* duplicate V2 */ |
160 |
movq %mm5, %mm0 /* duplicate V2 */ |
148 |
psraw $2, %mm4 /* t85=t87 */ |
161 |
psraw $2, %mm4 /* t85=t87 */ |
Lines 150-164
IDCT_mmx:
Link Here
|
150 |
psubsw %mm4, %mm0 /* V28 ; free mm4 */ |
163 |
psubsw %mm4, %mm0 /* V28 ; free mm4 */ |
151 |
movq %mm0, %mm2 /* duplicate V28 */ |
164 |
movq %mm0, %mm2 /* duplicate V28 */ |
152 |
psraw $1, %mm5 /* t90=t93 */ |
165 |
psraw $1, %mm5 /* t90=t93 */ |
153 |
pmulhw x4546454645464546,%mm0 /* V35 */ |
166 |
pmulhw x4546454645464546@GOTOFF(%ebp),%mm0 /* V35 */ |
154 |
psraw $1, %mm2 /* t97 */ |
167 |
psraw $1, %mm2 /* t97 */ |
155 |
movq %mm5, %mm4 /* duplicate t90=t93 */ |
168 |
movq %mm5, %mm4 /* duplicate t90=t93 */ |
156 |
psubsw %mm2, %mm1 /* V32 ; free mm2 */ |
169 |
psubsw %mm2, %mm1 /* V32 ; free mm2 */ |
157 |
pmulhw x61f861f861f861f8,%mm1 /* V36 */ |
170 |
pmulhw x61f861f861f861f8@GOTOFF(%ebp),%mm1 /* V36 */ |
158 |
psllw $1, %mm7 /* t107 */ |
171 |
psllw $1, %mm7 /* t107 */ |
159 |
paddsw %mm3, %mm5 /* V31 */ |
172 |
paddsw %mm3, %mm5 /* V31 */ |
160 |
psubsw %mm3, %mm4 /* V30 ; free mm3 */ |
173 |
psubsw %mm3, %mm4 /* V30 ; free mm3 */ |
161 |
pmulhw x5a825a825a825a82,%mm4 /* V34 */ |
174 |
pmulhw x5a825a825a825a82@GOTOFF(%ebp),%mm4 /* V34 */ |
162 |
nop |
175 |
nop |
163 |
psubsw %mm1, %mm0 /* V38 */ |
176 |
psubsw %mm1, %mm0 /* V38 */ |
164 |
psubsw %mm7, %mm1 /* V37 ; free mm7 */ |
177 |
psubsw %mm7, %mm1 /* V37 ; free mm7 */ |
Lines 225-231
IDCT_mmx:
Link Here
|
225 |
psubsw %mm7, %mm1 /* V50 */ |
238 |
psubsw %mm7, %mm1 /* V50 */ |
226 |
pmulhw 8*9(%esi), %mm5 /* V9 */ |
239 |
pmulhw 8*9(%esi), %mm5 /* V9 */ |
227 |
paddsw %mm7, %mm2 /* V51 */ |
240 |
paddsw %mm7, %mm2 /* V51 */ |
228 |
pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V52 */ |
241 |
pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm1 /* 23170 ->V52 */ |
229 |
movq %mm2, %mm6 /* duplicate V51 */ |
242 |
movq %mm2, %mm6 /* duplicate V51 */ |
230 |
psraw $1, %mm2 /* t138=t144 */ |
243 |
psraw $1, %mm2 /* t138=t144 */ |
231 |
movq %mm3, %mm4 /* duplicate V1 */ |
244 |
movq %mm3, %mm4 /* duplicate V1 */ |
Lines 266-276
IDCT_mmx:
Link Here
|
266 |
* even more by doing the correction step in a later stage when the number |
279 |
* even more by doing the correction step in a later stage when the number |
267 |
* is actually multiplied by 16 |
280 |
* is actually multiplied by 16 |
268 |
*/ |
281 |
*/ |
269 |
paddw x0005000200010001, %mm4 |
282 |
paddw x0005000200010001@GOTOFF(%ebp), %mm4 |
270 |
psubsw %mm6, %mm3 /* V60 ; free mm6 */ |
283 |
psubsw %mm6, %mm3 /* V60 ; free mm6 */ |
271 |
psraw $1, %mm0 /* t154=t156 */ |
284 |
psraw $1, %mm0 /* t154=t156 */ |
272 |
movq %mm3, %mm1 /* duplicate V60 */ |
285 |
movq %mm3, %mm1 /* duplicate V60 */ |
273 |
pmulhw x539f539f539f539f, %mm1 /* V67 */ |
286 |
pmulhw x539f539f539f539f@GOTOFF(%ebp), %mm1 /* V67 */ |
274 |
movq %mm5, %mm6 /* duplicate V3 */ |
287 |
movq %mm5, %mm6 /* duplicate V3 */ |
275 |
psraw $2, %mm4 /* t148=t150 */ |
288 |
psraw $2, %mm4 /* t148=t150 */ |
276 |
paddsw %mm4, %mm5 /* V61 */ |
289 |
paddsw %mm4, %mm5 /* V61 */ |
Lines 279-291
IDCT_mmx:
Link Here
|
279 |
psllw $1, %mm1 /* t169 */ |
292 |
psllw $1, %mm1 /* t169 */ |
280 |
paddsw %mm0, %mm5 /* V65 -> result */ |
293 |
paddsw %mm0, %mm5 /* V65 -> result */ |
281 |
psubsw %mm0, %mm4 /* V64 ; free mm0 */ |
294 |
psubsw %mm0, %mm4 /* V64 ; free mm0 */ |
282 |
pmulhw x5a825a825a825a82, %mm4 /* V68 */ |
295 |
pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm4 /* V68 */ |
283 |
psraw $1, %mm3 /* t158 */ |
296 |
psraw $1, %mm3 /* t158 */ |
284 |
psubsw %mm6, %mm3 /* V66 */ |
297 |
psubsw %mm6, %mm3 /* V66 */ |
285 |
movq %mm5, %mm2 /* duplicate V65 */ |
298 |
movq %mm5, %mm2 /* duplicate V65 */ |
286 |
pmulhw x61f861f861f861f8, %mm3 /* V70 */ |
299 |
pmulhw x61f861f861f861f8@GOTOFF(%ebp), %mm3 /* V70 */ |
287 |
psllw $1, %mm6 /* t165 */ |
300 |
psllw $1, %mm6 /* t165 */ |
288 |
pmulhw x4546454645464546, %mm6 /* V69 */ |
301 |
pmulhw x4546454645464546@GOTOFF(%ebp), %mm6 /* V69 */ |
289 |
psraw $1, %mm2 /* t172 */ |
302 |
psraw $1, %mm2 /* t172 */ |
290 |
/* moved from next block */ |
303 |
/* moved from next block */ |
291 |
movq 8*5(%esi), %mm0 /* V56 */ |
304 |
movq 8*5(%esi), %mm0 /* V56 */ |
Lines 410-416
IDCT_mmx:
Link Here
|
410 |
* movq 8*13(%esi), %mm4 tmt13 |
423 |
* movq 8*13(%esi), %mm4 tmt13 |
411 |
*/ |
424 |
*/ |
412 |
psubsw %mm4, %mm3 /* V134 */ |
425 |
psubsw %mm4, %mm3 /* V134 */ |
413 |
pmulhw x5a825a825a825a82, %mm3 /* 23170 ->V136 */ |
426 |
pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm3 /* 23170 ->V136 */ |
414 |
movq 8*9(%esi), %mm6 /* tmt9 */ |
427 |
movq 8*9(%esi), %mm6 /* tmt9 */ |
415 |
paddsw %mm4, %mm5 /* V135 ; mm4 free */ |
428 |
paddsw %mm4, %mm5 /* V135 ; mm4 free */ |
416 |
movq %mm0, %mm4 /* duplicate tmt1 */ |
429 |
movq %mm0, %mm4 /* duplicate tmt1 */ |
Lines 439-455
IDCT_mmx:
Link Here
|
439 |
psubsw %mm7, %mm0 /* V144 */ |
452 |
psubsw %mm7, %mm0 /* V144 */ |
440 |
movq %mm0, %mm3 /* duplicate V144 */ |
453 |
movq %mm0, %mm3 /* duplicate V144 */ |
441 |
paddsw %mm7, %mm2 /* V147 ; free mm7 */ |
454 |
paddsw %mm7, %mm2 /* V147 ; free mm7 */ |
442 |
pmulhw x539f539f539f539f, %mm0 /* 21407-> V151 */ |
455 |
pmulhw x539f539f539f539f@GOTOFF(%ebp), %mm0 /* 21407-> V151 */ |
443 |
movq %mm1, %mm7 /* duplicate tmt3 */ |
456 |
movq %mm1, %mm7 /* duplicate tmt3 */ |
444 |
paddsw %mm5, %mm7 /* V145 */ |
457 |
paddsw %mm5, %mm7 /* V145 */ |
445 |
psubsw %mm5, %mm1 /* V146 ; free mm5 */ |
458 |
psubsw %mm5, %mm1 /* V146 ; free mm5 */ |
446 |
psubsw %mm1, %mm3 /* V150 */ |
459 |
psubsw %mm1, %mm3 /* V150 */ |
447 |
movq %mm7, %mm5 /* duplicate V145 */ |
460 |
movq %mm7, %mm5 /* duplicate V145 */ |
448 |
pmulhw x4546454645464546, %mm1 /* 17734-> V153 */ |
461 |
pmulhw x4546454645464546@GOTOFF(%ebp), %mm1 /* 17734-> V153 */ |
449 |
psubsw %mm2, %mm5 /* V148 */ |
462 |
psubsw %mm2, %mm5 /* V148 */ |
450 |
pmulhw x61f861f861f861f8, %mm3 /* 25080-> V154 */ |
463 |
pmulhw x61f861f861f861f8@GOTOFF(%ebp), %mm3 /* 25080-> V154 */ |
451 |
psllw $2, %mm0 /* t311 */ |
464 |
psllw $2, %mm0 /* t311 */ |
452 |
pmulhw x5a825a825a825a82, %mm5 /* 23170-> V152 */ |
465 |
pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm5 /* 23170-> V152 */ |
453 |
paddsw %mm2, %mm7 /* V149 ; free mm2 */ |
466 |
paddsw %mm2, %mm7 /* V149 ; free mm2 */ |
454 |
psllw $1, %mm1 /* t313 */ |
467 |
psllw $1, %mm1 /* t313 */ |
455 |
nop /* without the nop - freeze here for one clock */ |
468 |
nop /* without the nop - freeze here for one clock */ |
Lines 557-571
IDCT_mmx:
Link Here
|
557 |
paddsw %mm4, %mm3 /* V113 ; free mm4 */ |
570 |
paddsw %mm4, %mm3 /* V113 ; free mm4 */ |
558 |
movq %mm0, %mm4 /* duplicate V110 */ |
571 |
movq %mm0, %mm4 /* duplicate V110 */ |
559 |
paddsw %mm1, %mm2 /* V111 */ |
572 |
paddsw %mm1, %mm2 /* V111 */ |
560 |
pmulhw x539f539f539f539f, %mm0 /* 21407-> V117 */ |
573 |
pmulhw x539f539f539f539f@GOTOFF(%ebp), %mm0 /* 21407-> V117 */ |
561 |
psubsw %mm1, %mm5 /* V112 ; free mm1 */ |
574 |
psubsw %mm1, %mm5 /* V112 ; free mm1 */ |
562 |
psubsw %mm5, %mm4 /* V116 */ |
575 |
psubsw %mm5, %mm4 /* V116 */ |
563 |
movq %mm2, %mm1 /* duplicate V111 */ |
576 |
movq %mm2, %mm1 /* duplicate V111 */ |
564 |
pmulhw x4546454645464546, %mm5 /* 17734-> V119 */ |
577 |
pmulhw x4546454645464546@GOTOFF(%ebp), %mm5 /* 17734-> V119 */ |
565 |
psubsw %mm3, %mm2 /* V114 */ |
578 |
psubsw %mm3, %mm2 /* V114 */ |
566 |
pmulhw x61f861f861f861f8, %mm4 /* 25080-> V120 */ |
579 |
pmulhw x61f861f861f861f8@GOTOFF(%ebp), %mm4 /* 25080-> V120 */ |
567 |
paddsw %mm3, %mm1 /* V115 ; free mm3 */ |
580 |
paddsw %mm3, %mm1 /* V115 ; free mm3 */ |
568 |
pmulhw x5a825a825a825a82, %mm2 /* 23170-> V118 */ |
581 |
pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm2 /* 23170-> V118 */ |
569 |
psllw $2, %mm0 /* t266 */ |
582 |
psllw $2, %mm0 /* t266 */ |
570 |
movq %mm1, (%esi) /* save V115 */ |
583 |
movq %mm1, (%esi) /* save V115 */ |
571 |
psllw $1, %mm5 /* t268 */ |
584 |
psllw $1, %mm5 /* t268 */ |
Lines 583-589
IDCT_mmx:
Link Here
|
583 |
movq %mm6, %mm3 /* duplicate tmt4 */ |
596 |
movq %mm6, %mm3 /* duplicate tmt4 */ |
584 |
psubsw %mm0, %mm6 /* V100 */ |
597 |
psubsw %mm0, %mm6 /* V100 */ |
585 |
paddsw %mm0, %mm3 /* V101 ; free mm0 */ |
598 |
paddsw %mm0, %mm3 /* V101 ; free mm0 */ |
586 |
pmulhw x5a825a825a825a82, %mm6 /* 23170 ->V102 */ |
599 |
pmulhw x5a825a825a825a82@GOTOFF(%ebp), %mm6 /* 23170 ->V102 */ |
587 |
movq %mm7, %mm5 /* duplicate tmt0 */ |
600 |
movq %mm7, %mm5 /* duplicate tmt0 */ |
588 |
movq 8*8(%esi), %mm1 /* tmt8 */ |
601 |
movq 8*8(%esi), %mm1 /* tmt8 */ |
589 |
paddsw %mm1, %mm7 /* V103 */ |
602 |
paddsw %mm1, %mm7 /* V103 */ |
Lines 667-675
IDCT_mmx:
Link Here
|
667 |
popl %edx |
680 |
popl %edx |
668 |
popl %ecx |
681 |
popl %ecx |
669 |
popl %ebx |
682 |
popl %ebx |
670 |
movl %ebp, %esp |
|
|
671 |
popl %ebp |
683 |
popl %ebp |
672 |
|
684 |
|
673 |
ret |
685 |
ret |
674 |
.Lfe1: |
686 |
.Lfe1: |
675 |
.size IDCT_mmx,.Lfe1-IDCT_mmx |
687 |
.size IDCT_mmx,.Lfe1-IDCT_mmx |