--- SDL_gfx-2.0.13.orig/SDL_imageFilter.c 2004-11-29 20:53:35.000000000 +0100 +++ SDL_gfx-2.0.13/SDL_imageFilter.c 2008-04-22 18:11:27.000000000 +0200 @@ -81,13 +81,13 @@ "mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) ".align 16 \n\t" // 16 byte allignment of the loop entry - ".L1010: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1 + "1: movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1 "paddusb (%%ebx), %%mm1 \n\t" // mm1=Src1+Src2 (add 8 bytes with saturation) "movq %%mm1, (%%edi) \n\t" // store result in Dest "add $8, %%eax \n\t" // increase Src1, Src2 and Dest "add $8, %%ebx \n\t" // register pointers by 8 "add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter - "jnz .L1010 \n\t" // check loop termination, proceed if required + "jnz 1b \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src2), // %1 @@ -158,7 +158,7 @@ "mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) ".align 16 \n\t" // 16 byte allignment of the loop entry - ".L21011: \n\t" + "1: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1 "movq (%%ebx), %%mm2 \n\t" // load 8 bytes from Src2 into mm2 // --- Byte shift via Word shift --- @@ -174,7 +174,7 @@ "add $8, %%ebx \n\t" // register pointers by 8 "add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter - "jnz .L21011 \n\t" // check loop termination, proceed if required + "jnz 1b \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src2), // %1 @@ -241,13 +241,13 @@ "mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) ".align 16 \n\t" // 16 byte allignment of the loop entry - ".L1012: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1 + "1: movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1 "psubusb (%%ebx), %%mm1 \n\t" // mm1=Src1-Src2 (sub 8 bytes with saturation) "movq %%mm1, (%%edi) \n\t" // store result in Dest "add $8, %%eax \n\t" // increase Src1, Src2 and Dest "add $8, %%ebx \n\t" // register pointers by 8 "add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter - "jnz .L1012 \n\t" // check loop termination, proceed if required + "jnz 1b \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src2), // %1 @@ -313,7 +313,7 @@ "mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) ".align 16 \n\t" // 16 byte allignment of the loop entry - ".L1013: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1 + "1: movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1 "movq (%%ebx), %%mm2 \n\t" // load 8 bytes from Src2 into mm2 "psubusb (%%ebx), %%mm1 \n\t" // mm1=Src1-Src2 (sub 8 bytes with saturation) "psubusb (%%eax), %%mm2 \n\t" // mm2=Src2-Src1 (sub 8 bytes with saturation) @@ -322,7 +322,7 @@ "add $8, %%eax \n\t" // increase Src1, Src2 and Dest "add $8, %%ebx \n\t" // register pointers by 8 "add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter - "jnz .L1013 \n\t" // check loop termination, proceed if required + "jnz 1b \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src2), // %1 @@ -388,7 +388,7 @@ "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) "pxor %%mm0, %%mm0 \n\t" // zero mm0 register ".align 16 \n\t" // 16 byte allignment of the loop entry - ".L1014: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1 + "1: movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1 "movq (%%ebx), %%mm3 \n\t" // load 8 bytes from Src2 into mm3 "movq %%mm1, %%mm2 \n\t" // copy mm1 into mm2 "movq %%mm3, %%mm4 \n\t" // copy mm3 into mm4 @@ -412,7 +412,7 @@ "add $8, %%eax \n\t" // increase Src1, Src2 and Dest "add $8, %%ebx \n\t" // register pointers by 8 "add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter - "jnz .L1014 \n\t" // check loop termination, proceed if required + "jnz 1b \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src2), // %1 @@ -481,13 +481,13 @@ "mov %0, %%edi \n\t" // load Dest address into edi "mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx ".align 16 \n\t" // 16 byte allignment of the loop entry - ".L10141: \n\t" "mov (%%edx), %%al \n\t" // load a byte from Src1 + "1:mov (%%edx), %%al \n\t" // load a byte from Src1 "mulb (%%esi) \n\t" // mul with a byte from Src2 - ".L10142: \n\t" "mov %%al, (%%edi) \n\t" // move a byte result to Dest + "mov %%al, (%%edi) \n\t" // move a byte result to Dest "inc %%edx \n\t" // increment Src1, Src2, Dest "inc %%esi \n\t" // pointer registers by one "inc %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter - "jnz .L10141 \n\t" // check loop termination, proceed if required + "jnz 1b \n\t" // check loop termination, proceed if required "popa \n\t":"=m" (Dest) // %0 :"m"(Src2), // %1 "m"(Src1), // %2 @@ -549,7 +549,7 @@ "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) "pxor %%mm0, %%mm0 \n\t" // zero mm0 register ".align 16 \n\t" // 16 byte allignment of the loop entry - ".L1015: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1 + "1: movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1 "movq (%%ebx), %%mm3 \n\t" // load 8 bytes from Src2 into mm3 "movq %%mm1, %%mm2 \n\t" // copy mm1 into mm2 "movq %%mm3, %%mm4 \n\t" // copy mm3 into mm4 @@ -566,7 +566,7 @@ "add $8, %%eax \n\t" // increase Src1, Src2 and Dest "add $8, %%ebx \n\t" // register pointers by 8 "add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter - "jnz .L1015 \n\t" // check loop termination, proceed if required + "jnz 1b \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src2), // %1 @@ -634,7 +634,7 @@ "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) "pxor %%mm0, %%mm0 \n\t" // zero mm0 register ".align 16 \n\t" // 16 byte allignment of the loop entry - ".L1016: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1 + "1: movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1 "movq (%%ebx), %%mm3 \n\t" // load 8 bytes from Src2 into mm3 "movq %%mm1, %%mm2 \n\t" // copy mm1 into mm2 "movq %%mm3, %%mm4 \n\t" // copy mm3 into mm4 @@ -653,7 +653,7 @@ "add $8, %%eax \n\t" // increase Src1, Src2 and Dest "add $8, %%ebx \n\t" // register pointers by 8 "add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter - "jnz .L1016 \n\t" // check loop termination, proceed if required + "jnz 1b \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src2), // %1 @@ -720,13 +720,13 @@ "mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) ".align 16 \n\t" // 16 byte allignment of the loop entry - ".L1017: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1 + "1: movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1 "pand (%%ebx), %%mm1 \n\t" // mm1=Src1&Src2 "movq %%mm1, (%%edi) \n\t" // store result in Dest "add $8, %%eax \n\t" // increase Src1, Src2 and Dest "add $8, %%ebx \n\t" // register pointers by 8 "add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter - "jnz .L1017 \n\t" // check loop termination, proceed if required + "jnz 1b \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src2), // %1 @@ -792,13 +792,13 @@ "mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) ".align 16 \n\t" // 16 byte allignment of the loop entry - ".L91017: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1 + "1: movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1 "por (%%ebx), %%mm1 \n\t" // mm1=Src1|Src2 "movq %%mm1, (%%edi) \n\t" // store result in Dest "add $8, %%eax \n\t" // increase Src1, Src2 and Dest "add $8, %%ebx \n\t" // register pointers by 8 "add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter - "jnz .L91017 \n\t" // check loop termination, proceed if required + "jnz 1b \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src2), // %1 @@ -860,17 +860,17 @@ "mov %0, %%edi \n\t" // load Dest address into edi "mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx ".align 16 \n\t" // 16 byte allignment of the loop entry - ".L10191: \n\t" "mov (%%esi), %%bl \n\t" // load a byte from Src2 + "1: mov (%%esi), %%bl \n\t" // load a byte from Src2 "cmp $0, %%bl \n\t" // check if it zero - "jnz .L10192 \n\t" "movb $255, (%%edi) \n\t" // division by zero = 255 !!! - "jmp .L10193 \n\t" ".L10192: \n\t" "xor %%ah, %%ah \n\t" // prepare AX, zero AH register + "jnz 2f \n\t" "movb $255, (%%edi) \n\t" // division by zero = 255 !!! + "jmp 3f \n\t" "2: \n\t" "xor %%ah, %%ah \n\t" // prepare AX, zero AH register "mov (%%edx), %%al \n\t" // load a byte from Src1 into AL "div %%bl \n\t" // divide AL by BL "mov %%al, (%%edi) \n\t" // move a byte result to Dest - ".L10193: \n\t" "inc %%edx \n\t" // increment Src1, Src2, Dest + "3: inc %%edx \n\t" // increment Src1, Src2, Dest "inc %%esi \n\t" // pointer registers by one "inc %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter - "jnz .L10191 \n\t" // check loop termination, proceed if required + "jnz 1b \n\t" // check loop termination, proceed if required "popa \n\t":"=m" (Dest) // %0 :"m"(Src2), // %1 "m"(Src1), // %2 @@ -907,12 +907,12 @@ "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) ".align 16 \n\t" // 16 byte allignment of the loop entry - ".L91117: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from Src1 into mm1 + "1: movq (%%eax), %%mm0 \n\t" // load 8 bytes from Src1 into mm1 "pxor %%mm1, %%mm0 \n\t" // negate mm0 by xoring with mm1 "movq %%mm0, (%%edi) \n\t" // store result in Dest "add $8, %%eax \n\t" // increase Src1, Src2 and Dest "add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter - "jnz .L91117 \n\t" // check loop termination, proceed if required + "jnz 1b \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src1), // %1 @@ -980,14 +980,14 @@ "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) ".align 16 \n\t" // 16 byte allignment of the loop entry - ".L1021: \n\t" + "1: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from Src1 into MM0 "paddusb %%mm1, %%mm0 \n\t" // MM0=SrcDest+C (add 8 bytes with saturation) "movq %%mm0, (%%edi) \n\t" // store result in Dest "add $8, %%eax \n\t" // increase Dest register pointer by 8 "add $8, %%edi \n\t" // increase Dest register pointer by 8 "dec %%ecx \n\t" // decrease loop counter - "jnz .L1021 \n\t" // check loop termination, proceed if required + "jnz 1b \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src1), // %1 @@ -1059,14 +1059,14 @@ "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) ".align 16 \n\t" // 16 byte allignment of the loop entry - ".L11023: \n\t" + "1: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0 "paddusb %%mm1, %%mm0 \n\t" // MM0=SrcDest+C (add 8 bytes with saturation) "movq %%mm0, (%%edi) \n\t" // store result in SrcDest "add $8, %%eax \n\t" // increase Src1 register pointer by 8 "add $8, %%edi \n\t" // increase Dest register pointer by 8 "dec %%ecx \n\t" // decrease loop counter - "jnz .L11023 \n\t" // check loop termination, proceed if required + "jnz 1b \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src1), // %1 @@ -1154,7 +1154,7 @@ "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) ".align 16 \n\t" // 16 byte allignment of the loop entry - ".L1022: \n\t" + "1: \n\t" "movq (%%eax), %%mm2 \n\t" // load 8 bytes from Src1 into MM2 "psrlw $1, %%mm2 \n\t" // shift 4 WORDS of MM2 1 bit to the right // "pand %%mm0, %%mm2 \n\t" // apply Mask to 8 BYTES of MM2 @@ -1164,7 +1164,7 @@ "add $8, %%eax \n\t" // increase Src1 register pointer by 8 "add $8, %%edi \n\t" // increase Dest register pointer by 8 "dec %%ecx \n\t" // decrease loop counter - "jnz .L1022 \n\t" // check loop termination, proceed if required + "jnz 1b \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src1), // %1 @@ -1243,13 +1243,13 @@ "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) ".align 16 \n\t" // 16 byte allignment of the loop entry - ".L1023: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0 + "1: movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0 "psubusb %%mm1, %%mm0 \n\t" // MM0=SrcDest-C (sub 8 bytes with saturation) "movq %%mm0, (%%edi) \n\t" // store result in SrcDest "add $8, %%eax \n\t" // increase Src1 register pointer by 8 "add $8, %%edi \n\t" // increase Dest register pointer by 8 "dec %%ecx \n\t" // decrease loop counter - "jnz .L1023 \n\t" // check loop termination, proceed if required + "jnz 1b \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src1), // %1 @@ -1322,13 +1322,13 @@ "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) ".align 16 \n\t" // 16 byte allignment of the loop entry - ".L11024: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0 + "1: movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0 "psubusb %%mm1, %%mm0 \n\t" // MM0=SrcDest-C (sub 8 bytes with saturation) "movq %%mm0, (%%edi) \n\t" // store result in SrcDest "add $8, %%eax \n\t" // increase Src1 register pointer by 8 "add $8, %%edi \n\t" // increase Dest register pointer by 8 "dec %%ecx \n\t" // decrease loop counter - "jnz .L11024 \n\t" // check loop termination, proceed if required + "jnz 1b \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src1), // %1 @@ -1405,19 +1405,19 @@ "mov %3, %%cl \n\t" // load loop counter (N) into CL "movd %%ecx, %%mm3 \n\t" // copy (N) into MM3 "pcmpeqb %%mm1, %%mm1 \n\t" // generate all 1's in mm1 - ".L10240: \n\t" // ** Prepare proper bit-Mask in MM1 ** + "1: \n\t" // ** Prepare proper bit-Mask in MM1 ** "psrlw $1, %%mm1 \n\t" // shift 4 WORDS of MM1 1 bit to the right // "pand %%mm0, %%mm1 \n\t" // apply Mask to 8 BYTES of MM1 ".byte 0x0f, 0xdb, 0xc8 \n\t" "dec %%cl \n\t" // decrease loop counter - "jnz .L10240 \n\t" // check loop termination, proceed if required + "jnz 1b \n\t" // check loop termination, proceed if required // ** Shift all bytes of the image ** "mov %1, %%eax \n\t" // load Src1 address into eax "mov %0, %%edi \n\t" // load Dest address into edi "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) ".align 16 \n\t" // 16 byte allignment of the loop entry - ".L10241: \n\t" + "2: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0 "psrlw %%mm3, %%mm0 \n\t" // shift 4 WORDS of MM0 (N) bits to the right // "pand %%mm1, %%mm0 \n\t" // apply proper bit-Mask to 8 BYTES of MM0 @@ -1426,7 +1426,7 @@ "add $8, %%eax \n\t" // increase Src1 register pointer by 8 "add $8, %%edi \n\t" // increase Dest register pointer by 8 "dec %%ecx \n\t" // decrease loop counter - "jnz .L10241 \n\t" // check loop termination, proceed if required + "jnz 2b \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src1), // %1 @@ -1495,13 +1495,13 @@ "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) ".align 16 \n\t" // 16 byte allignment of the loop entry - ".L13023: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0 + "1: movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0 "psrld %3, %%mm0 \n\t" // MM0=SrcDest+C (add 8 bytes with saturation) "movq %%mm0, (%%edi) \n\t" // store result in SrcDest "add $8, %%eax \n\t" // increase Src1 register pointer by 8 "add $8, %%edi \n\t" // increase Dest register pointer by 8 "dec %%ecx \n\t" // decrease loop counter - "jnz .L13023 \n\t" // check loop termination, proceed if required + "jnz 1b \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src1), // %1 @@ -1581,8 +1581,8 @@ "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) "cmp $128, %%al \n\t" // if (C <= 128) execute more efficient code - "jg .L10251 \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry - ".L10250: \n\t" "movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3 + "jg 2f \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry + "1: movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3 "movq %%mm3, %%mm4 \n\t" // copy MM3 into MM4 "punpcklbw %%mm0, %%mm3 \n\t" // unpack low bytes of SrcDest into words "punpckhbw %%mm0, %%mm4 \n\t" // unpack high bytes of SrcDest into words @@ -1593,9 +1593,9 @@ "add $8, %%eax \n\t" // increase Src1 register pointer by 8 "add $8, %%edi \n\t" // increase Dest register pointer by 8 "dec %%ecx \n\t" // decrease loop counter - "jnz .L10250 \n\t" // check loop termination, proceed if required - "jmp .L10252 \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry - ".L10251: \n\t" "movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3 + "jnz 1b \n\t" // check loop termination, proceed if required + "jmp 3f \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry + "2: movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3 "movq %%mm3, %%mm4 \n\t" // copy MM3 into MM4 "punpcklbw %%mm0, %%mm3 \n\t" // unpack low bytes of SrcDest into words "punpckhbw %%mm0, %%mm4 \n\t" // unpack high bytes of SrcDest into words @@ -1615,8 +1615,8 @@ "add $8, %%eax \n\t" // increase Src1 register pointer by 8 "add $8, %%edi \n\t" // increase Dest register pointer by 8 "dec %%ecx \n\t" // decrease loop counter - "jnz .L10251 \n\t" // check loop termination, proceed if required - ".L10252: \n\t" "emms \n\t" // exit MMX state + "jnz 2b \n\t" // check loop termination, proceed if required + "3: emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src1), // %1 "m"(length), // %2 @@ -1695,8 +1695,8 @@ "mov %0, %%edi \n\t" // load Dest address into edi "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) - ".align 16 \n\t" // 16 byte allignment of the loop entry - ".L1026: \n\t" "movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3 + ".align 16 \n\t" // 16 byte allignment of the loop entry + "1: movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3 "movq %%mm3, %%mm4 \n\t" // copy MM3 into MM4 "punpcklbw %%mm0, %%mm3 \n\t" // unpack low bytes of SrcDest into words "punpckhbw %%mm0, %%mm4 \n\t" // unpack high bytes of SrcDest into words @@ -1709,7 +1709,7 @@ "add $8, %%eax \n\t" // increase Src1 register pointer by 8 "add $8, %%edi \n\t" // increase Dest register pointer by 8 "dec %%ecx \n\t" // decrease loop counter - "jnz .L1026 \n\t" // check loop termination, proceed if required + "jnz 1b \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src1), // %1 @@ -1784,25 +1784,25 @@ "mov %3, %%cl \n\t" // load loop counter (N) into CL "movd %%ecx, %%mm3 \n\t" // copy (N) into MM3 "pcmpeqb %%mm1, %%mm1 \n\t" // generate all 1's in mm1 - ".L10270: \n\t" // ** Prepare proper bit-Mask in MM1 ** + "1: \n\t" // ** Prepare proper bit-Mask in MM1 ** "psllw $1, %%mm1 \n\t" // shift 4 WORDS of MM1 1 bit to the left // "pand %%mm0, %%mm1 \n\t" // apply Mask to 8 BYTES of MM1 ".byte 0x0f, 0xdb, 0xc8 \n\t" "dec %%cl \n\t" // decrease loop counter - "jnz .L10270 \n\t" // check loop termination, proceed if required + "jnz 1b \n\t" // check loop termination, proceed if required // ** Shift all bytes of the image ** "mov %1, %%eax \n\t" // load Src1 address into eax "mov %0, %%edi \n\t" // load SrcDest address into edi "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) ".align 16 \n\t" // 16 byte allignment of the loop entry - ".L10271: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from Src1 into MM0 + "2: movq (%%eax), %%mm0 \n\t" // load 8 bytes from Src1 into MM0 "psllw %%mm3, %%mm0 \n\t" // shift 4 WORDS of MM0 (N) bits to the left // "pand %%mm1, %%mm0 \n\t" // apply proper bit-Mask to 8 BYTES of MM0 ".byte 0x0f, 0xdb, 0xc1 \n\t" "movq %%mm0, (%%edi) \n\t" // store result in Dest "add $8, %%eax \n\t" // increase Src1 register pointer by 8 "add $8, %%edi \n\t" // increase Dest register pointer by 8 "dec %%ecx \n\t" // decrease loop counter - "jnz .L10271 \n\t" // check loop termination, proceed if required + "jnz 2b \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src1), // %1 @@ -1870,13 +1870,13 @@ "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) ".align 16 \n\t" // 16 byte allignment of the loop entry - ".L12023: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0 + "1: movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0 "pslld %3, %%mm0 \n\t" // MM0=SrcDest+C (add 8 bytes with saturation) "movq %%mm0, (%%edi) \n\t" // store result in SrcDest "add $8, %%eax \n\t" // increase Src1 register pointer by 8 "add $8, %%edi \n\t" // increase Dest register pointer by 8 "dec %%ecx \n\t" // decrease loop counter - "jnz .L12023 \n\t" // check loop termination, proceed if required + "jnz 1b \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src1), // %1 @@ -1949,8 +1949,8 @@ "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) "cmp $7, %%al \n\t" // if (N <= 7) execute more efficient code - "jg .L10281 \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry - ".L10280: \n\t" "movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3 + "jg 2f \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry + "1: movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3 "movq %%mm3, %%mm4 \n\t" // copy MM3 into MM4 "punpcklbw %%mm0, %%mm3 \n\t" // unpack low bytes of SrcDest into words "punpckhbw %%mm0, %%mm4 \n\t" // unpack high bytes of SrcDest into words @@ -1961,9 +1961,9 @@ "add $8, %%eax \n\t" // increase Src1 register pointer by 8 "add $8, %%edi \n\t" // increase Dest register pointer by 8 "dec %%ecx \n\t" // decrease loop counter - "jnz .L10280 \n\t" // check loop termination, proceed if required - "jmp .L10282 \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry - ".L10281: \n\t" "movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3 + "jnz 1b \n\t" // check loop termination, proceed if required + "jmp 3f \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry + "2: movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3 "movq %%mm3, %%mm4 \n\t" // copy MM3 into MM4 "punpcklbw %%mm0, %%mm3 \n\t" // unpack low bytes of SrcDest into words "punpckhbw %%mm0, %%mm4 \n\t" // unpack high bytes of SrcDest into words @@ -1983,8 +1983,8 @@ "add $8, %%eax \n\t" // increase Src1 register pointer by 8 "add $8, %%edi \n\t" // increase Dest register pointer by 8 "dec %%ecx \n\t" // decrease loop counter - "jnz .L10281 \n\t" // check loop termination, proceed if required - ".L10282: \n\t" "emms \n\t" // exit MMX state + "jnz 2b \n\t" // check loop termination, proceed if required + "3: emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src1), // %1 "m"(length), // %2 @@ -2063,7 +2063,7 @@ "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) ".align 16 \n\t" // 16 byte alignment of the loop entry - ".L1029: \n\t" + "1: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0 "paddusb %%mm2, %%mm0 \n\t" // MM0=SrcDest+(0xFF-T) (add 8 bytes with saturation) "pcmpeqb %%mm1, %%mm0 \n\t" // binarize 255:0, comparing to 255 @@ -2071,7 +2071,7 @@ "add $8, %%eax \n\t" // increase Src1 register pointer by 8 "add $8, %%edi \n\t" // increase Dest register pointer by 8 "dec %%ecx \n\t" // decrease loop counter - "jnz .L1029 \n\t" // check loop termination, proceed if required + "jnz 1b \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src1), // %1 @@ -2154,7 +2154,7 @@ "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) ".align 16 \n\t" // 16 byte allignment of the loop entry - ".L1030: \n\t" + "1: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from Src1 into MM0 "paddusb %%mm1, %%mm0 \n\t" // MM0=SrcDest+(0xFF-Tmax) "psubusb %%mm7, %%mm0 \n\t" // MM0=MM0-(0xFF-Tmax+Tmin) @@ -2163,7 +2163,7 @@ "add $8, %%eax \n\t" // increase Src1 register pointer by 8 "add $8, %%edi \n\t" // increase Dest register pointer by 8 "dec %%ecx \n\t" // decrease loop counter - "jnz .L1030 \n\t" // check loop termination, proceed if required + "jnz 1b \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src1), // %1 @@ -2231,11 +2231,11 @@ "mov %4, %%bx \n\t" // load Cmax in BX "sub %5, %%ax \n\t" // AX = Nmax - Nmin "sub %3, %%bx \n\t" // BX = Cmax - Cmin - "jz .L10311 \n\t" // check division by zero + "jz 1f \n\t" // check division by zero "xor %%dx, %%dx \n\t" // prepare for division, zero DX "div %%bx \n\t" // AX = AX/BX - "jmp .L10312 \n\t" ".L10311: \n\t" "mov $255, %%ax \n\t" // if div by zero, assume result max. byte value - ".L10312: \n\t" // ** Duplicate AX in 4 words of MM0 ** + "jmp 2f \n\t" "1: \n\t" "mov $255, %%ax \n\t" // if div by zero, assume result max. byte value + "2: \n\t" // ** Duplicate AX in 4 words of MM0 ** "mov %%ax, %%bx \n\t" // copy AX into BX "shl $16, %%eax \n\t" // shift 2 bytes of EAX left "mov %%bx, %%ax \n\t" // copy BX into AX @@ -2264,7 +2264,7 @@ "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) ".align 16 \n\t" // 16 byte allignment of the loop entry - ".L1031: \n\t" + "1: \n\t" "movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3 "movq %%mm3, %%mm4 \n\t" // copy MM3 into MM4 "punpcklbw %%mm7, %%mm3 \n\t" // unpack low bytes of SrcDest into words @@ -2289,7 +2289,7 @@ "add $8, %%eax \n\t" // increase Src1 register pointer by 8 "add $8, %%edi \n\t" // increase Dest register pointer by 8 "dec %%ecx \n\t" // decrease loop counter - "jnz .L1031 \n\t" // check loop termination, proceed if required + "jnz 1b \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src1), // %1