--- SDL_gfx-2.0.16/SDL_imageFilter.c.orig 2008-04-28 19:10:08.000000000 +0200 +++ SDL_gfx-2.0.16/SDL_imageFilter.c 2008-04-28 21:29:23.000000000 +0200 @@ -79,17 +79,17 @@ "mov %1, %%ebx \n\t" /* load Src2 address into ebx */ "mov %0, %%edi \n\t" /* load Dest address into edi */ "mov %3, %%ecx \n\t" /* load loop counter (SIZE) into ecx */ "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */ ".align 16 \n\t" /* 16 byte allignment of the loop entry */ - ".L1010: \n\t" "movq (%%eax), %%mm1 \n\t" /* load 8 bytes from Src1 into mm1 */ + "1: movq (%%eax), %%mm1 \n\t" /* load 8 bytes from Src1 into mm1 */ "paddusb (%%ebx), %%mm1 \n\t" /* mm1=Src1+Src2 (add 8 bytes with saturation) */ "movq %%mm1, (%%edi) \n\t" /* store result in Dest */ "add $8, %%eax \n\t" /* increase Src1, Src2 and Dest */ "add $8, %%ebx \n\t" /* register pointers by 8 */ "add $8, %%edi \n\t" "dec %%ecx \n\t" /* decrease loop counter */ - "jnz .L1010 \n\t" /* check loop termination, proceed if required */ + "jnz 1b \n\t" /* check loop termination, proceed if required */ "emms \n\t" /* exit MMX state */ "popa \n\t":"=m" (Dest) /* %0 */ :"m"(Src2), /* %1 */ "m"(Src1), /* %2 */ "m"(length) /* %3 */ @@ -156,11 +156,11 @@ "mov %1, %%ebx \n\t" /* load Src2 address into ebx */ "mov %0, %%edi \n\t" /* load Dest address into edi */ "mov %3, %%ecx \n\t" /* load loop counter (SIZE) into ecx */ "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */ ".align 16 \n\t" /* 16 byte allignment of the loop entry */ - ".L21011: \n\t" + "1: \n\t" "movq (%%eax), %%mm1 \n\t" /* load 8 bytes from Src1 into mm1 */ "movq (%%ebx), %%mm2 \n\t" /* load 8 bytes from Src2 into mm2 */ /* --- Byte shift via Word shift --- */ "psrlw $1, %%mm1 \n\t" /* shift 4 WORDS of mm1 1 bit to the right */ "psrlw $1, %%mm2 \n\t" /* shift 4 WORDS of mm2 1 bit to the right */ @@ -172,11 +172,11 @@ "movq %%mm1, (%%edi) \n\t" /* store result in Dest */ "add $8, %%eax \n\t" /* increase Src1, Src2 and Dest */ "add $8, %%ebx \n\t" /* register pointers by 8 */ "add $8, %%edi \n\t" "dec %%ecx \n\t" /* decrease loop counter */ - "jnz .L21011 \n\t" /* check loop termination, proceed if required */ + "jnz 1b \n\t" /* check loop termination, proceed if required */ "emms \n\t" /* exit MMX state */ "popa \n\t":"=m" (Dest) /* %0 */ :"m"(Src2), /* %1 */ "m"(Src1), /* %2 */ "m"(length), /* %3 */ @@ -239,17 +239,17 @@ "mov %1, %%ebx \n\t" /* load Src2 address into ebx */ "mov %0, %%edi \n\t" /* load Dest address into edi */ "mov %3, %%ecx \n\t" /* load loop counter (SIZE) into ecx */ "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */ ".align 16 \n\t" /* 16 byte allignment of the loop entry */ - ".L1012: \n\t" "movq (%%eax), %%mm1 \n\t" /* load 8 bytes from Src1 into mm1 */ + "1: movq (%%eax), %%mm1 \n\t" /* load 8 bytes from Src1 into mm1 */ "psubusb (%%ebx), %%mm1 \n\t" /* mm1=Src1-Src2 (sub 8 bytes with saturation) */ "movq %%mm1, (%%edi) \n\t" /* store result in Dest */ "add $8, %%eax \n\t" /* increase Src1, Src2 and Dest */ "add $8, %%ebx \n\t" /* register pointers by 8 */ "add $8, %%edi \n\t" "dec %%ecx \n\t" /* decrease loop counter */ - "jnz .L1012 \n\t" /* check loop termination, proceed if required */ + "jnz 1b \n\t" /* check loop termination, proceed if required */ "emms \n\t" /* exit MMX state */ "popa \n\t":"=m" (Dest) /* %0 */ :"m"(Src2), /* %1 */ "m"(Src1), /* %2 */ "m"(length) /* %3 */ @@ -311,20 +311,20 @@ "mov %1, %%ebx \n\t" /* load Src2 address into ebx */ "mov %0, %%edi \n\t" /* load Dest address into edi */ "mov %3, %%ecx \n\t" /* load loop counter (SIZE) into ecx */ "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */ ".align 16 \n\t" /* 16 byte allignment of the loop entry */ - ".L1013: \n\t" "movq (%%eax), %%mm1 \n\t" /* load 8 bytes from Src1 into mm1 */ + "1: movq (%%eax), %%mm1 \n\t" /* load 8 bytes from Src1 into mm1 */ "movq (%%ebx), %%mm2 \n\t" /* load 8 bytes from Src2 into mm2 */ "psubusb (%%ebx), %%mm1 \n\t" /* mm1=Src1-Src2 (sub 8 bytes with saturation) */ "psubusb (%%eax), %%mm2 \n\t" /* mm2=Src2-Src1 (sub 8 bytes with saturation) */ "por %%mm2, %%mm1 \n\t" /* combine both mm2 and mm1 results */ "movq %%mm1, (%%edi) \n\t" /* store result in Dest */ "add $8, %%eax \n\t" /* increase Src1, Src2 and Dest */ "add $8, %%ebx \n\t" /* register pointers by 8 */ "add $8, %%edi \n\t" "dec %%ecx \n\t" /* decrease loop counter */ - "jnz .L1013 \n\t" /* check loop termination, proceed if required */ + "jnz 1b \n\t" /* check loop termination, proceed if required */ "emms \n\t" /* exit MMX state */ "popa \n\t":"=m" (Dest) /* %0 */ :"m"(Src2), /* %1 */ "m"(Src1), /* %2 */ "m"(length) /* %3 */ @@ -386,11 +386,11 @@ "mov %0, %%edi \n\t" /* load Dest address into edi */ "mov %3, %%ecx \n\t" /* load loop counter (SIZE) into ecx */ "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */ "pxor %%mm0, %%mm0 \n\t" /* zero mm0 register */ ".align 16 \n\t" /* 16 byte allignment of the loop entry */ - ".L1014: \n\t" "movq (%%eax), %%mm1 \n\t" /* load 8 bytes from Src1 into mm1 */ + "1: movq (%%eax), %%mm1 \n\t" /* load 8 bytes from Src1 into mm1 */ "movq (%%ebx), %%mm3 \n\t" /* load 8 bytes from Src2 into mm3 */ "movq %%mm1, %%mm2 \n\t" /* copy mm1 into mm2 */ "movq %%mm3, %%mm4 \n\t" /* copy mm3 into mm4 */ "punpcklbw %%mm0, %%mm1 \n\t" /* unpack low bytes of Src1 into words */ "punpckhbw %%mm0, %%mm2 \n\t" /* unpack high bytes of Src1 into words */ @@ -410,11 +410,11 @@ "packuswb %%mm2, %%mm1 \n\t" /* pack words back into bytes with saturation */ "movq %%mm1, (%%edi) \n\t" /* store result in Dest */ "add $8, %%eax \n\t" /* increase Src1, Src2 and Dest */ "add $8, %%ebx \n\t" /* register pointers by 8 */ "add $8, %%edi \n\t" "dec %%ecx \n\t" /* decrease loop counter */ - "jnz .L1014 \n\t" /* check loop termination, proceed if required */ + "jnz 1b \n\t" /* check loop termination, proceed if required */ "emms \n\t" /* exit MMX state */ "popa \n\t":"=m" (Dest) /* %0 */ :"m"(Src2), /* %1 */ "m"(Src1), /* %2 */ "m"(length) /* %3 */ @@ -479,17 +479,17 @@ ("pusha \n\t" "mov %2, %%edx \n\t" /* load Src1 address into edx */ "mov %1, %%esi \n\t" /* load Src2 address into esi */ "mov %0, %%edi \n\t" /* load Dest address into edi */ "mov %3, %%ecx \n\t" /* load loop counter (SIZE) into ecx */ ".align 16 \n\t" /* 16 byte allignment of the loop entry */ - ".L10141: \n\t" "mov (%%edx), %%al \n\t" /* load a byte from Src1 */ + "1:mov (%%edx), %%al \n\t" /* load a byte from Src1 */ "mulb (%%esi) \n\t" /* mul with a byte from Src2 */ - ".L10142: \n\t" "mov %%al, (%%edi) \n\t" /* move a byte result to Dest */ + "mov %%al, (%%edi) \n\t" /* move a byte result to Dest */ "inc %%edx \n\t" /* increment Src1, Src2, Dest */ "inc %%esi \n\t" /* pointer registers by one */ "inc %%edi \n\t" "dec %%ecx \n\t" /* decrease loop counter */ - "jnz .L10141 \n\t" /* check loop termination, proceed if required */ + "jnz 1b \n\t" /* check loop termination, proceed if required */ "popa \n\t":"=m" (Dest) /* %0 */ :"m"(Src2), /* %1 */ "m"(Src1), /* %2 */ "m"(length) /* %3 */ ); @@ -555,11 +555,11 @@ "mov %0, %%edi \n\t" /* load Dest address into edi */ "mov %3, %%ecx \n\t" /* load loop counter (SIZE) into ecx */ "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */ "pxor %%mm0, %%mm0 \n\t" /* zero mm0 register */ ".align 16 \n\t" /* 16 byte allignment of the loop entry */ - ".L1015: \n\t" "movq (%%eax), %%mm1 \n\t" /* load 8 bytes from Src1 into mm1 */ + "1: movq (%%eax), %%mm1 \n\t" /* load 8 bytes from Src1 into mm1 */ "movq (%%ebx), %%mm3 \n\t" /* load 8 bytes from Src2 into mm3 */ "movq %%mm1, %%mm2 \n\t" /* copy mm1 into mm2 */ "movq %%mm3, %%mm4 \n\t" /* copy mm3 into mm4 */ "punpcklbw %%mm0, %%mm1 \n\t" /* unpack low bytes of Src1 into words */ "punpckhbw %%mm0, %%mm2 \n\t" /* unpack high bytes of Src1 into words */ @@ -572,11 +572,11 @@ "packuswb %%mm2, %%mm1 \n\t" /* pack words back into bytes with saturation */ "movq %%mm1, (%%edi) \n\t" /* store result in Dest */ "add $8, %%eax \n\t" /* increase Src1, Src2 and Dest */ "add $8, %%ebx \n\t" /* register pointers by 8 */ "add $8, %%edi \n\t" "dec %%ecx \n\t" /* decrease loop counter */ - "jnz .L1015 \n\t" /* check loop termination, proceed if required */ + "jnz 1b \n\t" /* check loop termination, proceed if required */ "emms \n\t" /* exit MMX state */ "popa \n\t":"=m" (Dest) /* %0 */ :"m"(Src2), /* %1 */ "m"(Src1), /* %2 */ "m"(length) /* %3 */ @@ -640,11 +640,11 @@ "mov %0, %%edi \n\t" /* load Dest address into edi */ "mov %3, %%ecx \n\t" /* load loop counter (SIZE) into ecx */ "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */ "pxor %%mm0, %%mm0 \n\t" /* zero mm0 register */ ".align 16 \n\t" /* 16 byte allignment of the loop entry */ - ".L1016: \n\t" "movq (%%eax), %%mm1 \n\t" /* load 8 bytes from Src1 into mm1 */ + "1: movq (%%eax), %%mm1 \n\t" /* load 8 bytes from Src1 into mm1 */ "movq (%%ebx), %%mm3 \n\t" /* load 8 bytes from Src2 into mm3 */ "movq %%mm1, %%mm2 \n\t" /* copy mm1 into mm2 */ "movq %%mm3, %%mm4 \n\t" /* copy mm3 into mm4 */ "punpcklbw %%mm0, %%mm1 \n\t" /* unpack low bytes of Src1 into words */ "punpckhbw %%mm0, %%mm2 \n\t" /* unpack high bytes of Src1 into words */ @@ -659,11 +659,11 @@ "packuswb %%mm2, %%mm1 \n\t" /* pack words back into bytes with saturation */ "movq %%mm1, (%%edi) \n\t" /* store result in Dest */ "add $8, %%eax \n\t" /* increase Src1, Src2 and Dest */ "add $8, %%ebx \n\t" /* register pointers by 8 */ "add $8, %%edi \n\t" "dec %%ecx \n\t" /* decrease loop counter */ - "jnz .L1016 \n\t" /* check loop termination, proceed if required */ + "jnz 1b \n\t" /* check loop termination, proceed if required */ "emms \n\t" /* exit MMX state */ "popa \n\t":"=m" (Dest) /* %0 */ :"m"(Src2), /* %1 */ "m"(Src1), /* %2 */ "m"(length) /* %3 */ @@ -726,17 +726,17 @@ "mov %1, %%ebx \n\t" /* load Src2 address into ebx */ "mov %0, %%edi \n\t" /* load Dest address into edi */ "mov %3, %%ecx \n\t" /* load loop counter (SIZE) into ecx */ "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */ ".align 16 \n\t" /* 16 byte allignment of the loop entry */ - ".L1017: \n\t" "movq (%%eax), %%mm1 \n\t" /* load 8 bytes from Src1 into mm1 */ + "1: movq (%%eax), %%mm1 \n\t" /* load 8 bytes from Src1 into mm1 */ "pand (%%ebx), %%mm1 \n\t" /* mm1=Src1&Src2 */ "movq %%mm1, (%%edi) \n\t" /* store result in Dest */ "add $8, %%eax \n\t" /* increase Src1, Src2 and Dest */ "add $8, %%ebx \n\t" /* register pointers by 8 */ "add $8, %%edi \n\t" "dec %%ecx \n\t" /* decrease loop counter */ - "jnz .L1017 \n\t" /* check loop termination, proceed if required */ + "jnz 1b \n\t" /* check loop termination, proceed if required */ "emms \n\t" /* exit MMX state */ "popa \n\t":"=m" (Dest) /* %0 */ :"m"(Src2), /* %1 */ "m"(Src1), /* %2 */ "m"(length) /* %3 */ @@ -798,17 +798,17 @@ "mov %1, %%ebx \n\t" /* load Src2 address into ebx */ "mov %0, %%edi \n\t" /* load Dest address into edi */ "mov %3, %%ecx \n\t" /* load loop counter (SIZE) into ecx */ "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */ ".align 16 \n\t" /* 16 byte allignment of the loop entry */ - ".L91017: \n\t" "movq (%%eax), %%mm1 \n\t" /* load 8 bytes from Src1 into mm1 */ + "1: movq (%%eax), %%mm1 \n\t" /* load 8 bytes from Src1 into mm1 */ "por (%%ebx), %%mm1 \n\t" /* mm1=Src1|Src2 */ "movq %%mm1, (%%edi) \n\t" /* store result in Dest */ "add $8, %%eax \n\t" /* increase Src1, Src2 and Dest */ "add $8, %%ebx \n\t" /* register pointers by 8 */ "add $8, %%edi \n\t" "dec %%ecx \n\t" /* decrease loop counter */ - "jnz .L91017 \n\t" /* check loop termination, proceed if required */ + "jnz 1b \n\t" /* check loop termination, proceed if required */ "emms \n\t" /* exit MMX state */ "popa \n\t":"=m" (Dest) /* %0 */ :"m"(Src2), /* %1 */ "m"(Src1), /* %2 */ "m"(length) /* %3 */ @@ -866,21 +866,21 @@ ("pusha \n\t" "mov %2, %%edx \n\t" /* load Src1 address into edx */ "mov %1, %%esi \n\t" /* load Src2 address into esi */ "mov %0, %%edi \n\t" /* load Dest address into edi */ "mov %3, %%ecx \n\t" /* load loop counter (SIZE) into ecx */ ".align 16 \n\t" /* 16 byte allignment of the loop entry */ - ".L10191: \n\t" "mov (%%esi), %%bl \n\t" /* load a byte from Src2 */ + "1: mov (%%esi), %%bl \n\t" /* load a byte from Src2 */ "cmp $0, %%bl \n\t" /* check if it zero */ - "jnz .L10192 \n\t" "movb $255, (%%edi) \n\t" /* division by zero = 255 !!! */ - "jmp .L10193 \n\t" ".L10192: \n\t" "xor %%ah, %%ah \n\t" /* prepare AX, zero AH register */ + "jnz 2f \n\t" "movb $255, (%%edi) \n\t" /* division by zero = 255 !!! */ + "jmp 3f \n\t" "2: \n\t" "xor %%ah, %%ah \n\t" /* prepare AX, zero AH register */ "mov (%%edx), %%al \n\t" /* load a byte from Src1 into AL */ "div %%bl \n\t" /* divide AL by BL */ "mov %%al, (%%edi) \n\t" /* move a byte result to Dest */ - ".L10193: \n\t" "inc %%edx \n\t" /* increment Src1, Src2, Dest */ + "3: inc %%edx \n\t" /* increment Src1, Src2, Dest */ "inc %%esi \n\t" /* pointer registers by one */ "inc %%edi \n\t" "dec %%ecx \n\t" /* decrease loop counter */ - "jnz .L10191 \n\t" /* check loop termination, proceed if required */ + "jnz 1b \n\t" /* check loop termination, proceed if required */ "popa \n\t":"=m" (Dest) /* %0 */ :"m"(Src2), /* %1 */ "m"(Src1), /* %2 */ "m"(length) /* %3 */ ); @@ -937,16 +937,16 @@ "mov %1, %%eax \n\t" /* load Src1 address into eax */ "mov %0, %%edi \n\t" /* load Dest address into edi */ "mov %2, %%ecx \n\t" /* load loop counter (SIZE) into ecx */ "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */ ".align 16 \n\t" /* 16 byte allignment of the loop entry */ - ".L91117: \n\t" "movq (%%eax), %%mm0 \n\t" /* load 8 bytes from Src1 into mm1 */ + "1: movq (%%eax), %%mm0 \n\t" /* load 8 bytes from Src1 into mm1 */ "pxor %%mm1, %%mm0 \n\t" /* negate mm0 by xoring with mm1 */ "movq %%mm0, (%%edi) \n\t" /* store result in Dest */ "add $8, %%eax \n\t" /* increase Src1, Src2 and Dest */ "add $8, %%edi \n\t" "dec %%ecx \n\t" /* decrease loop counter */ - "jnz .L91117 \n\t" /* check loop termination, proceed if required */ + "jnz 1b \n\t" /* check loop termination, proceed if required */ "emms \n\t" /* exit MMX state */ "popa \n\t":"=m" (Dest) /* %0 */ :"m"(Src1), /* %1 */ "m"(length) /* %2 */ ); @@ -1010,18 +1010,18 @@ "mov %1, %%eax \n\t" /* load Src1 address into eax */ "mov %0, %%edi \n\t" /* load Dest address into edi */ "mov %2, %%ecx \n\t" /* load loop counter (SIZE) into ecx */ "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */ ".align 16 \n\t" /* 16 byte allignment of the loop entry */ - ".L1021: \n\t" + "1: \n\t" "movq (%%eax), %%mm0 \n\t" /* load 8 bytes from Src1 into MM0 */ "paddusb %%mm1, %%mm0 \n\t" /* MM0=SrcDest+C (add 8 bytes with saturation) */ "movq %%mm0, (%%edi) \n\t" /* store result in Dest */ "add $8, %%eax \n\t" /* increase Dest register pointer by 8 */ "add $8, %%edi \n\t" /* increase Dest register pointer by 8 */ "dec %%ecx \n\t" /* decrease loop counter */ - "jnz .L1021 \n\t" /* check loop termination, proceed if required */ + "jnz 1b \n\t" /* check loop termination, proceed if required */ "emms \n\t" /* exit MMX state */ "popa \n\t":"=m" (Dest) /* %0 */ :"m"(Src1), /* %1 */ "m"(length), /* %2 */ "m"(C) /* %3 */ @@ -1089,18 +1089,18 @@ "mov %1, %%eax \n\t" /* load Src1 address into eax */ "mov %0, %%edi \n\t" /* load Dest address into edi */ "mov %2, %%ecx \n\t" /* load loop counter (SIZE) into ecx */ "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */ ".align 16 \n\t" /* 16 byte allignment of the loop entry */ - ".L11023: \n\t" + "1: \n\t" "movq (%%eax), %%mm0 \n\t" /* load 8 bytes from SrcDest into MM0 */ "paddusb %%mm1, %%mm0 \n\t" /* MM0=SrcDest+C (add 8 bytes with saturation) */ "movq %%mm0, (%%edi) \n\t" /* store result in SrcDest */ "add $8, %%eax \n\t" /* increase Src1 register pointer by 8 */ "add $8, %%edi \n\t" /* increase Dest register pointer by 8 */ "dec %%ecx \n\t" /* decrease loop counter */ - "jnz .L11023 \n\t" /* check loop termination, proceed if required */ + "jnz 1b \n\t" /* check loop termination, proceed if required */ "emms \n\t" /* exit MMX state */ "popa \n\t":"=m" (Dest) /* %0 */ :"m"(Src1), /* %1 */ "m"(length), /* %2 */ "m"(C), /* %3 */ @@ -1184,21 +1184,21 @@ "mov %1, %%eax \n\t" /* load Src1 address into eax */ "mov %0, %%edi \n\t" /* load Dest address into edi */ "mov %2, %%ecx \n\t" /* load loop counter (SIZE) into ecx */ "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */ ".align 16 \n\t" /* 16 byte allignment of the loop entry */ - ".L1022: \n\t" + "1: \n\t" "movq (%%eax), %%mm2 \n\t" /* load 8 bytes from Src1 into MM2 */ "psrlw $1, %%mm2 \n\t" /* shift 4 WORDS of MM2 1 bit to the right */ /* "pand %%mm0, %%mm2 \n\t" // apply Mask to 8 BYTES of MM2 */ ".byte 0x0f, 0xdb, 0xd0 \n\t" "paddusb %%mm1, %%mm2 \n\t" /* MM2=SrcDest+C (add 8 bytes with saturation) */ "movq %%mm2, (%%edi) \n\t" /* store result in Dest */ "add $8, %%eax \n\t" /* increase Src1 register pointer by 8 */ "add $8, %%edi \n\t" /* increase Dest register pointer by 8 */ "dec %%ecx \n\t" /* decrease loop counter */ - "jnz .L1022 \n\t" /* check loop termination, proceed if required */ + "jnz 1b \n\t" /* check loop termination, proceed if required */ "emms \n\t" /* exit MMX state */ "popa \n\t":"=m" (Dest) /* %0 */ :"m"(Src1), /* %1 */ "m"(length), /* %2 */ "m"(C), /* %3 */ @@ -1273,17 +1273,17 @@ "mov %1, %%eax \n\t" /* load Src1 address into eax */ "mov %0, %%edi \n\t" /* load Dest address into edi */ "mov %2, %%ecx \n\t" /* load loop counter (SIZE) into ecx */ "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */ ".align 16 \n\t" /* 16 byte allignment of the loop entry */ - ".L1023: \n\t" "movq (%%eax), %%mm0 \n\t" /* load 8 bytes from SrcDest into MM0 */ + "1: movq (%%eax), %%mm0 \n\t" /* load 8 bytes from SrcDest into MM0 */ "psubusb %%mm1, %%mm0 \n\t" /* MM0=SrcDest-C (sub 8 bytes with saturation) */ "movq %%mm0, (%%edi) \n\t" /* store result in SrcDest */ "add $8, %%eax \n\t" /* increase Src1 register pointer by 8 */ "add $8, %%edi \n\t" /* increase Dest register pointer by 8 */ "dec %%ecx \n\t" /* decrease loop counter */ - "jnz .L1023 \n\t" /* check loop termination, proceed if required */ + "jnz 1b \n\t" /* check loop termination, proceed if required */ "emms \n\t" /* exit MMX state */ "popa \n\t":"=m" (Dest) /* %0 */ :"m"(Src1), /* %1 */ "m"(length), /* %2 */ "m"(C) /* %3 */ @@ -1352,17 +1352,17 @@ "mov %1, %%eax \n\t" /* load Src1 address into eax */ "mov %0, %%edi \n\t" /* load Dest address into edi */ "mov %2, %%ecx \n\t" /* load loop counter (SIZE) into ecx */ "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */ ".align 16 \n\t" /* 16 byte allignment of the loop entry */ - ".L11024: \n\t" "movq (%%eax), %%mm0 \n\t" /* load 8 bytes from SrcDest into MM0 */ + "1: movq (%%eax), %%mm0 \n\t" /* load 8 bytes from SrcDest into MM0 */ "psubusb %%mm1, %%mm0 \n\t" /* MM0=SrcDest-C (sub 8 bytes with saturation) */ "movq %%mm0, (%%edi) \n\t" /* store result in SrcDest */ "add $8, %%eax \n\t" /* increase Src1 register pointer by 8 */ "add $8, %%edi \n\t" /* increase Dest register pointer by 8 */ "dec %%ecx \n\t" /* decrease loop counter */ - "jnz .L11024 \n\t" /* check loop termination, proceed if required */ + "jnz 1b \n\t" /* check loop termination, proceed if required */ "emms \n\t" /* exit MMX state */ "popa \n\t":"=m" (Dest) /* %0 */ :"m"(Src1), /* %1 */ "m"(length), /* %2 */ "m"(C), /* %3 */ @@ -1435,32 +1435,32 @@ "movq (%%edx), %%mm0 \n\t" /* load Mask into mm0 */ "xor %%ecx, %%ecx \n\t" /* zero ECX */ "mov %3, %%cl \n\t" /* load loop counter (N) into CL */ "movd %%ecx, %%mm3 \n\t" /* copy (N) into MM3 */ "pcmpeqb %%mm1, %%mm1 \n\t" /* generate all 1's in mm1 */ - ".L10240: \n\t" /* ** Prepare proper bit-Mask in MM1 ** */ + "1: \n\t" /* ** Prepare proper bit-Mask in MM1 ** */ "psrlw $1, %%mm1 \n\t" /* shift 4 WORDS of MM1 1 bit to the right */ /* "pand %%mm0, %%mm1 \n\t" // apply Mask to 8 BYTES of MM1 */ ".byte 0x0f, 0xdb, 0xc8 \n\t" "dec %%cl \n\t" /* decrease loop counter */ - "jnz .L10240 \n\t" /* check loop termination, proceed if required */ + "jnz 1b \n\t" /* check loop termination, proceed if required */ /* ** Shift all bytes of the image ** */ "mov %1, %%eax \n\t" /* load Src1 address into eax */ "mov %0, %%edi \n\t" /* load Dest address into edi */ "mov %2, %%ecx \n\t" /* load loop counter (SIZE) into ecx */ "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */ ".align 16 \n\t" /* 16 byte allignment of the loop entry */ - ".L10241: \n\t" + "2: \n\t" "movq (%%eax), %%mm0 \n\t" /* load 8 bytes from SrcDest into MM0 */ "psrlw %%mm3, %%mm0 \n\t" /* shift 4 WORDS of MM0 (N) bits to the right */ /* "pand %%mm1, %%mm0 \n\t" // apply proper bit-Mask to 8 BYTES of MM0 */ ".byte 0x0f, 0xdb, 0xc1 \n\t" "movq %%mm0, (%%edi) \n\t" /* store result in SrcDest */ "add $8, %%eax \n\t" /* increase Src1 register pointer by 8 */ "add $8, %%edi \n\t" /* increase Dest register pointer by 8 */ "dec %%ecx \n\t" /* decrease loop counter */ - "jnz .L10241 \n\t" /* check loop termination, proceed if required */ + "jnz 2b \n\t" /* check loop termination, proceed if required */ "emms \n\t" /* exit MMX state */ "popa \n\t":"=m" (Dest) /* %0 */ :"m"(Src1), /* %1 */ "m"(length), /* %2 */ "m"(N), /* %3 */ @@ -1525,17 +1525,17 @@ "mov %1, %%eax \n\t" /* load Src1 address into eax */ "mov %0, %%edi \n\t" /* load Dest address into edi */ "mov %2, %%ecx \n\t" /* load loop counter (SIZE) into ecx */ "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */ ".align 16 \n\t" /* 16 byte allignment of the loop entry */ - ".L13023: \n\t" "movq (%%eax), %%mm0 \n\t" /* load 8 bytes from SrcDest into MM0 */ + "1: movq (%%eax), %%mm0 \n\t" /* load 8 bytes from SrcDest into MM0 */ "psrld %3, %%mm0 \n\t" "movq %%mm0, (%%edi) \n\t" /* store result in SrcDest */ "add $8, %%eax \n\t" /* increase Src1 register pointer by 8 */ "add $8, %%edi \n\t" /* increase Dest register pointer by 8 */ "dec %%ecx \n\t" /* decrease loop counter */ - "jnz .L13023 \n\t" /* check loop termination, proceed if required */ + "jnz 1b \n\t" /* check loop termination, proceed if required */ "emms \n\t" /* exit MMX state */ "popa \n\t":"=m" (Dest) /* %0 */ :"m"(Src1), /* %1 */ "m"(length), /* %2 */ "m"(N) /* %3 */ @@ -1611,25 +1611,25 @@ "mov %1, %%eax \n\t" /* load Src1 address into eax */ "mov %0, %%edi \n\t" /* load Dest address into edi */ "mov %2, %%ecx \n\t" /* load loop counter (SIZE) into ecx */ "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */ "cmp $128, %%al \n\t" /* if (C <= 128) execute more efficient code */ - "jg .L10251 \n\t" ".align 16 \n\t" /* 16 byte allignment of the loop entry */ - ".L10250: \n\t" "movq (%%eax), %%mm3 \n\t" /* load 8 bytes from Src1 into MM3 */ + "jg 2f \n\t" ".align 16 \n\t" /* 16 byte allignment of the loop entry */ + "1: movq (%%eax), %%mm3 \n\t" /* load 8 bytes from Src1 into MM3 */ "movq %%mm3, %%mm4 \n\t" /* copy MM3 into MM4 */ "punpcklbw %%mm0, %%mm3 \n\t" /* unpack low bytes of SrcDest into words */ "punpckhbw %%mm0, %%mm4 \n\t" /* unpack high bytes of SrcDest into words */ "pmullw %%mm1, %%mm3 \n\t" /* mul low bytes of SrcDest and MM1 */ "pmullw %%mm1, %%mm4 \n\t" /* mul high bytes of SrcDest and MM1 */ "packuswb %%mm4, %%mm3 \n\t" /* pack words back into bytes with saturation */ "movq %%mm3, (%%edi) \n\t" /* store result in Dest */ "add $8, %%eax \n\t" /* increase Src1 register pointer by 8 */ "add $8, %%edi \n\t" /* increase Dest register pointer by 8 */ "dec %%ecx \n\t" /* decrease loop counter */ - "jnz .L10250 \n\t" /* check loop termination, proceed if required */ - "jmp .L10252 \n\t" ".align 16 \n\t" /* 16 byte allignment of the loop entry */ - ".L10251: \n\t" "movq (%%eax), %%mm3 \n\t" /* load 8 bytes from Src1 into MM3 */ + "jnz 1b \n\t" /* check loop termination, proceed if required */ + "jmp 3f \n\t" ".align 16 \n\t" /* 16 byte allignment of the loop entry */ + "2: movq (%%eax), %%mm3 \n\t" /* load 8 bytes from Src1 into MM3 */ "movq %%mm3, %%mm4 \n\t" /* copy MM3 into MM4 */ "punpcklbw %%mm0, %%mm3 \n\t" /* unpack low bytes of SrcDest into words */ "punpckhbw %%mm0, %%mm4 \n\t" /* unpack high bytes of SrcDest into words */ "pmullw %%mm1, %%mm3 \n\t" /* mul low bytes of SrcDest and MM1 */ "pmullw %%mm1, %%mm4 \n\t" /* mul high bytes of SrcDest and MM1 */ @@ -1645,12 +1645,12 @@ "packuswb %%mm4, %%mm3 \n\t" /* pack words back into bytes with saturation */ "movq %%mm3, (%%edi) \n\t" /* store result in Dest */ "add $8, %%eax \n\t" /* increase Src1 register pointer by 8 */ "add $8, %%edi \n\t" /* increase Dest register pointer by 8 */ "dec %%ecx \n\t" /* decrease loop counter */ - "jnz .L10251 \n\t" /* check loop termination, proceed if required */ - ".L10252: \n\t" "emms \n\t" /* exit MMX state */ + "jnz 2b \n\t" /* check loop termination, proceed if required */ + "3: emms \n\t" /* exit MMX state */ "popa \n\t":"=m" (Dest) /* %0 */ :"m"(Src1), /* %1 */ "m"(length), /* %2 */ "m"(C) /* %3 */ ); @@ -1725,12 +1725,12 @@ "pxor %%mm0, %%mm0 \n\t" /* zero MM0 register */ "mov %1, %%eax \n\t" /* load Src1 address into eax */ "mov %0, %%edi \n\t" /* load Dest address into edi */ "mov %2, %%ecx \n\t" /* load loop counter (SIZE) into ecx */ "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */ - ".align 16 \n\t" /* 16 byte allignment of the loop entry */ - ".L1026: \n\t" "movq (%%eax), %%mm3 \n\t" /* load 8 bytes from Src1 into MM3 */ + ".align 16 \n\t" /* 16 byte allignment of the loop entry */ + "1: movq (%%eax), %%mm3 \n\t" /* load 8 bytes from Src1 into MM3 */ "movq %%mm3, %%mm4 \n\t" /* copy MM3 into MM4 */ "punpcklbw %%mm0, %%mm3 \n\t" /* unpack low bytes of SrcDest into words */ "punpckhbw %%mm0, %%mm4 \n\t" /* unpack high bytes of SrcDest into words */ "psrlw %%mm7, %%mm3 \n\t" /* shift 4 WORDS of MM3 (N) bits to the right */ "psrlw %%mm7, %%mm4 \n\t" /* shift 4 WORDS of MM4 (N) bits to the right */ @@ -1739,11 +1739,11 @@ "packuswb %%mm4, %%mm3 \n\t" /* pack words back into bytes with saturation */ "movq %%mm3, (%%edi) \n\t" /* store result in Dest */ "add $8, %%eax \n\t" /* increase Src1 register pointer by 8 */ "add $8, %%edi \n\t" /* increase Dest register pointer by 8 */ "dec %%ecx \n\t" /* decrease loop counter */ - "jnz .L1026 \n\t" /* check loop termination, proceed if required */ + "jnz 1b \n\t" /* check loop termination, proceed if required */ "emms \n\t" /* exit MMX state */ "popa \n\t":"=m" (Dest) /* %0 */ :"m"(Src1), /* %1 */ "m"(length), /* %2 */ "m"(N), /* %3 */ @@ -1814,29 +1814,29 @@ "movq (%%edx), %%mm0 \n\t" /* load Mask into mm0 */ "xor %%ecx, %%ecx \n\t" /* zero ECX */ "mov %3, %%cl \n\t" /* load loop counter (N) into CL */ "movd %%ecx, %%mm3 \n\t" /* copy (N) into MM3 */ "pcmpeqb %%mm1, %%mm1 \n\t" /* generate all 1's in mm1 */ - ".L10270: \n\t" /* ** Prepare proper bit-Mask in MM1 ** */ + "1: \n\t" /* ** Prepare proper bit-Mask in MM1 ** */ "psllw $1, %%mm1 \n\t" /* shift 4 WORDS of MM1 1 bit to the left */ /* "pand %%mm0, %%mm1 \n\t" // apply Mask to 8 BYTES of MM1 */ ".byte 0x0f, 0xdb, 0xc8 \n\t" "dec %%cl \n\t" /* decrease loop counter */ - "jnz .L10270 \n\t" /* check loop termination, proceed if required */ + "jnz 1b \n\t" /* check loop termination, proceed if required */ /* ** Shift all bytes of the image ** */ "mov %1, %%eax \n\t" /* load Src1 address into eax */ "mov %0, %%edi \n\t" /* load SrcDest address into edi */ "mov %2, %%ecx \n\t" /* load loop counter (SIZE) into ecx */ "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */ ".align 16 \n\t" /* 16 byte allignment of the loop entry */ - ".L10271: \n\t" "movq (%%eax), %%mm0 \n\t" /* load 8 bytes from Src1 into MM0 */ + "2: movq (%%eax), %%mm0 \n\t" /* load 8 bytes from Src1 into MM0 */ "psllw %%mm3, %%mm0 \n\t" /* shift 4 WORDS of MM0 (N) bits to the left */ /* "pand %%mm1, %%mm0 \n\t" // apply proper bit-Mask to 8 BYTES of MM0 */ ".byte 0x0f, 0xdb, 0xc1 \n\t" "movq %%mm0, (%%edi) \n\t" /* store result in Dest */ "add $8, %%eax \n\t" /* increase Src1 register pointer by 8 */ "add $8, %%edi \n\t" /* increase Dest register pointer by 8 */ "dec %%ecx \n\t" /* decrease loop counter */ - "jnz .L10271 \n\t" /* check loop termination, proceed if required */ + "jnz 2b \n\t" /* check loop termination, proceed if required */ "emms \n\t" /* exit MMX state */ "popa \n\t":"=m" (Dest) /* %0 */ :"m"(Src1), /* %1 */ "m"(length), /* %2 */ "m"(N), /* %3 */ @@ -1900,17 +1900,17 @@ "mov %1, %%eax \n\t" /* load Src1 address into eax */ "mov %0, %%edi \n\t" /* load Dest address into edi */ "mov %2, %%ecx \n\t" /* load loop counter (SIZE) into ecx */ "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */ ".align 16 \n\t" /* 16 byte allignment of the loop entry */ - ".L12023: \n\t" "movq (%%eax), %%mm0 \n\t" /* load 8 bytes from SrcDest into MM0 */ + "1: movq (%%eax), %%mm0 \n\t" /* load 8 bytes from SrcDest into MM0 */ "pslld %3, %%mm0 \n\t" /* MM0=SrcDest+C (add 8 bytes with saturation) */ "movq %%mm0, (%%edi) \n\t" /* store result in SrcDest */ "add $8, %%eax \n\t" /* increase Src1 register pointer by 8 */ "add $8, %%edi \n\t" /* increase Dest register pointer by 8 */ "dec %%ecx \n\t" /* decrease loop counter */ - "jnz .L12023 \n\t" /* check loop termination, proceed if required */ + "jnz 1b \n\t" /* check loop termination, proceed if required */ "emms \n\t" /* exit MMX state */ "popa \n\t":"=m" (Dest) /* %0 */ :"m"(Src1), /* %1 */ "m"(length), /* %2 */ "m"(N) /* %3 */ @@ -1979,25 +1979,25 @@ "mov %1, %%eax \n\t" /* load Src1 address into eax */ "mov %0, %%edi \n\t" /* load Dest address into edi */ "mov %2, %%ecx \n\t" /* load loop counter (SIZE) into ecx */ "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */ "cmp $7, %%al \n\t" /* if (N <= 7) execute more efficient code */ - "jg .L10281 \n\t" ".align 16 \n\t" /* 16 byte allignment of the loop entry */ - ".L10280: \n\t" "movq (%%eax), %%mm3 \n\t" /* load 8 bytes from Src1 into MM3 */ + "jg 2f \n\t" ".align 16 \n\t" /* 16 byte allignment of the loop entry */ + "1: movq (%%eax), %%mm3 \n\t" /* load 8 bytes from Src1 into MM3 */ "movq %%mm3, %%mm4 \n\t" /* copy MM3 into MM4 */ "punpcklbw %%mm0, %%mm3 \n\t" /* unpack low bytes of SrcDest into words */ "punpckhbw %%mm0, %%mm4 \n\t" /* unpack high bytes of SrcDest into words */ "psllw %%mm7, %%mm3 \n\t" /* shift 4 WORDS of MM3 (N) bits to the right */ "psllw %%mm7, %%mm4 \n\t" /* shift 4 WORDS of MM4 (N) bits to the right */ "packuswb %%mm4, %%mm3 \n\t" /* pack words back into bytes with saturation */ "movq %%mm3, (%%edi) \n\t" /* store result in Dest */ "add $8, %%eax \n\t" /* increase Src1 register pointer by 8 */ "add $8, %%edi \n\t" /* increase Dest register pointer by 8 */ "dec %%ecx \n\t" /* decrease loop counter */ - "jnz .L10280 \n\t" /* check loop termination, proceed if required */ - "jmp .L10282 \n\t" ".align 16 \n\t" /* 16 byte allignment of the loop entry */ - ".L10281: \n\t" "movq (%%eax), %%mm3 \n\t" /* load 8 bytes from Src1 into MM3 */ + "jnz 1b \n\t" /* check loop termination, proceed if required */ + "jmp 3f \n\t" ".align 16 \n\t" /* 16 byte allignment of the loop entry */ + "2: movq (%%eax), %%mm3 \n\t" /* load 8 bytes from Src1 into MM3 */ "movq %%mm3, %%mm4 \n\t" /* copy MM3 into MM4 */ "punpcklbw %%mm0, %%mm3 \n\t" /* unpack low bytes of SrcDest into words */ "punpckhbw %%mm0, %%mm4 \n\t" /* unpack high bytes of SrcDest into words */ "psllw %%mm7, %%mm3 \n\t" /* shift 4 WORDS of MM3 (N) bits to the right */ "psllw %%mm7, %%mm4 \n\t" /* shift 4 WORDS of MM4 (N) bits to the right */ @@ -2013,12 +2013,12 @@ "packuswb %%mm4, %%mm3 \n\t" /* pack words back into bytes with saturation */ "movq %%mm3, (%%edi) \n\t" /* store result in Dest */ "add $8, %%eax \n\t" /* increase Src1 register pointer by 8 */ "add $8, %%edi \n\t" /* increase Dest register pointer by 8 */ "dec %%ecx \n\t" /* decrease loop counter */ - "jnz .L10281 \n\t" /* check loop termination, proceed if required */ - ".L10282: \n\t" "emms \n\t" /* exit MMX state */ + "jnz 2b \n\t" /* check loop termination, proceed if required */ + "3: emms \n\t" /* exit MMX state */ "popa \n\t":"=m" (Dest) /* %0 */ :"m"(Src1), /* %1 */ "m"(length), /* %2 */ "m"(N) /* %3 */ ); @@ -2093,19 +2093,19 @@ "mov %1, %%eax \n\t" /* load Src1 address into eax */ "mov %0, %%edi \n\t" /* load Dest address into edi */ "mov %2, %%ecx \n\t" /* load loop counter (SIZE) into ecx */ "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */ ".align 16 \n\t" /* 16 byte alignment of the loop entry */ - ".L1029: \n\t" + "1: \n\t" "movq (%%eax), %%mm0 \n\t" /* load 8 bytes from SrcDest into MM0 */ "paddusb %%mm2, %%mm0 \n\t" /* MM0=SrcDest+(0xFF-T) (add 8 bytes with saturation) */ "pcmpeqb %%mm1, %%mm0 \n\t" /* binarize 255:0, comparing to 255 */ "movq %%mm0, (%%edi) \n\t" /* store result in SrcDest */ "add $8, %%eax \n\t" /* increase Src1 register pointer by 8 */ "add $8, %%edi \n\t" /* increase Dest register pointer by 8 */ "dec %%ecx \n\t" /* decrease loop counter */ - "jnz .L1029 \n\t" /* check loop termination, proceed if required */ + "jnz 1b \n\t" /* check loop termination, proceed if required */ "emms \n\t" /* exit MMX state */ "popa \n\t":"=m" (Dest) /* %0 */ :"m"(Src1), /* %1 */ "m"(length), /* %2 */ "m"(T) /* %3 */ @@ -2184,20 +2184,20 @@ "mov %1, %%eax \n\t" /* load Src1 address into eax */ "mov %0, %%edi \n\t" /* load Dest address into edi */ "mov %2, %%ecx \n\t" /* load loop counter (SIZE) into ecx */ "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */ ".align 16 \n\t" /* 16 byte allignment of the loop entry */ - ".L1030: \n\t" + "1: \n\t" "movq (%%eax), %%mm0 \n\t" /* load 8 bytes from Src1 into MM0 */ "paddusb %%mm1, %%mm0 \n\t" /* MM0=SrcDest+(0xFF-Tmax) */ "psubusb %%mm7, %%mm0 \n\t" /* MM0=MM0-(0xFF-Tmax+Tmin) */ "paddusb %%mm5, %%mm0 \n\t" /* MM0=MM0+Tmin */ "movq %%mm0, (%%edi) \n\t" /* store result in Dest */ "add $8, %%eax \n\t" /* increase Src1 register pointer by 8 */ "add $8, %%edi \n\t" /* increase Dest register pointer by 8 */ "dec %%ecx \n\t" /* decrease loop counter */ - "jnz .L1030 \n\t" /* check loop termination, proceed if required */ + "jnz 1b \n\t" /* check loop termination, proceed if required */ "emms \n\t" /* exit MMX state */ "popa \n\t":"=m" (Dest) /* %0 */ :"m"(Src1), /* %1 */ "m"(length), /* %2 */ "m"(Tmin), /* %3 */ @@ -2261,15 +2261,15 @@ asm volatile ("pusha \n\t" "mov %6, %%ax \n\t" /* load Nmax in AX */ "mov %4, %%bx \n\t" /* load Cmax in BX */ "sub %5, %%ax \n\t" /* AX = Nmax - Nmin */ "sub %3, %%bx \n\t" /* BX = Cmax - Cmin */ - "jz .L10311 \n\t" /* check division by zero */ + "jz 1f \n\t" /* check division by zero */ "xor %%dx, %%dx \n\t" /* prepare for division, zero DX */ "div %%bx \n\t" /* AX = AX/BX */ - "jmp .L10312 \n\t" ".L10311: \n\t" "mov $255, %%ax \n\t" /* if div by zero, assume result max. byte value */ - ".L10312: \n\t" /* ** Duplicate AX in 4 words of MM0 ** */ + "jmp 2f \n\t" "1: \n\t" "mov $255, %%ax \n\t" /* if div by zero, assume result max. byte value */ + "2: \n\t" /* ** Duplicate AX in 4 words of MM0 ** */ "mov %%ax, %%bx \n\t" /* copy AX into BX */ "shl $16, %%eax \n\t" /* shift 2 bytes of EAX left */ "mov %%bx, %%ax \n\t" /* copy BX into AX */ "movd %%eax, %%mm0 \n\t" /* copy EAX into MM0 */ "movd %%eax, %%mm1 \n\t" /* copy EAX into MM1 */ @@ -2294,11 +2294,11 @@ "mov %1, %%eax \n\t" /* load Src1 address into eax */ "mov %0, %%edi \n\t" /* load Dest address into edi */ "mov %2, %%ecx \n\t" /* load loop counter (SIZE) into ecx */ "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */ ".align 16 \n\t" /* 16 byte allignment of the loop entry */ - ".L1031: \n\t" + "1: \n\t" "movq (%%eax), %%mm3 \n\t" /* load 8 bytes from Src1 into MM3 */ "movq %%mm3, %%mm4 \n\t" /* copy MM3 into MM4 */ "punpcklbw %%mm7, %%mm3 \n\t" /* unpack low bytes of SrcDest into words */ "punpckhbw %%mm7, %%mm4 \n\t" /* unpack high bytes of SrcDest into words */ "psubusb %%mm1, %%mm3 \n\t" /* S-Cmin, low bytes */ @@ -2319,11 +2319,11 @@ "packuswb %%mm4, %%mm3 \n\t" /* pack words back into bytes with saturation */ "movq %%mm3, (%%edi) \n\t" /* store result in Dest */ "add $8, %%eax \n\t" /* increase Src1 register pointer by 8 */ "add $8, %%edi \n\t" /* increase Dest register pointer by 8 */ "dec %%ecx \n\t" /* decrease loop counter */ - "jnz .L1031 \n\t" /* check loop termination, proceed if required */ + "jnz 1b \n\t" /* check loop termination, proceed if required */ "emms \n\t" /* exit MMX state */ "popa \n\t":"=m" (Dest) /* %0 */ :"m"(Src1), /* %1 */ "m"(length), /* %2 */ "m"(Cmin), /* %3 */