__asm__ __volatile__ ( \
"std\n\t" \
"rep ; movsl\n\t" \
"cld\n\t" \
: "=&c" (u0), "=&D" (u1), "=&S" (u2) \
: "0" (n >> 2), \
"1" (dstp+(n-4)), "2" (srcp+(n-4)) \