/* Initialise zero block flags */
/* Set up SSE rounding mode */
__asm__ ( "ldmxcsr %0\n" : : "X" (trunc_mxcsr) );
__asm__ ( "ldmxcsr %0\n" : : "m" (trunc_mxcsr) );
for (i=0; i < 64 ; i+=4)
{