Lines 24-30
Link Here
|
24 |
{ |
24 |
{ |
25 |
volatile short out[4]; |
25 |
volatile short out[4]; |
26 |
asm ( |
26 |
asm ( |
27 |
"movl $8, %%ecx \n\t" |
27 |
"push %%ebx \n\t" |
|
|
28 |
"mov $8, %%ebx \n\t" |
28 |
"pxor %%mm4, %%mm4 \n\t" |
29 |
"pxor %%mm4, %%mm4 \n\t" |
29 |
"pxor %%mm7, %%mm7 \n\t" |
30 |
"pxor %%mm7, %%mm7 \n\t" |
30 |
|
31 |
|
Lines 35-41
Link Here
|
35 |
"movq (%%"REG_S"), %%mm2 \n\t" |
36 |
"movq (%%"REG_S"), %%mm2 \n\t" |
36 |
"add %%"REG_a", %%"REG_S" \n\t" |
37 |
"add %%"REG_a", %%"REG_S" \n\t" |
37 |
"movq (%%"REG_D"), %%mm1 \n\t" |
38 |
"movq (%%"REG_D"), %%mm1 \n\t" |
38 |
"add %%"REG_b", %%"REG_D" \n\t" |
39 |
"add %%"REG_c", %%"REG_D" \n\t" |
39 |
"psubusb %%mm1, %%mm2 \n\t" |
40 |
"psubusb %%mm1, %%mm2 \n\t" |
40 |
"psubusb %%mm0, %%mm1 \n\t" |
41 |
"psubusb %%mm0, %%mm1 \n\t" |
41 |
"movq %%mm2, %%mm0 \n\t" |
42 |
"movq %%mm2, %%mm0 \n\t" |
Lines 49-60
Link Here
|
49 |
"paddw %%mm2, %%mm4 \n\t" |
50 |
"paddw %%mm2, %%mm4 \n\t" |
50 |
"paddw %%mm3, %%mm4 \n\t" |
51 |
"paddw %%mm3, %%mm4 \n\t" |
51 |
|
52 |
|
52 |
"decl %%ecx \n\t" |
53 |
"decl %%ebx \n\t" |
53 |
"jnz 1b \n\t" |
54 |
"jnz 1b \n\t" |
54 |
"movq %%mm4, (%%"REG_d") \n\t" |
55 |
"movq %%mm4, (%%"REG_d") \n\t" |
55 |
"emms \n\t" |
56 |
"emms \n\t" |
|
|
57 |
"pop %%ebx \n\t" |
56 |
: |
58 |
: |
57 |
: "S" (old), "D" (new), "a" ((long)os), "b" ((long)ns), "d" (out) |
59 |
: "S" (old), "D" (new), "a" ((long)os), "c" ((long)ns), "d" (out) |
58 |
: "memory" |
60 |
: "memory" |
59 |
); |
61 |
); |
60 |
return out[0]+out[1]+out[2]+out[3]; |
62 |
return out[0]+out[1]+out[2]+out[3]; |