diff -Nur qemu-1/qemu/cpu-all.h qemu-2/qemu/cpu-all.h --- qemu-1/qemu/cpu-all.h 2008-01-01 19:57:19.000000000 +0300 +++ qemu-2/qemu/cpu-all.h 2008-01-20 00:30:28.000000000 +0300 @@ -369,7 +369,13 @@ static inline void stq_le_p(void *ptr, uint64_t v) { +#if defined(__i386__) && __GNUC__ >= 4 + const union { uint64_t v; uint32_t p[2]; } x = { .v = v }; + ((uint32_t *)ptr)[0] = x.p[0]; + ((uint32_t *)ptr)[1] = x.p[1]; +#else *(uint64_t *)ptr = v; +#endif } /* float access */ diff -Nur qemu-1/qemu/dyngen.c qemu-2/qemu/dyngen.c --- qemu-1/qemu/dyngen.c 2007-11-19 00:22:10.000000000 +0300 +++ qemu-2/qemu/dyngen.c 2008-01-20 00:40:31.000000000 +0300 @@ -32,6 +32,8 @@ #include "config-host.h" +//#define DEBUG_OP + /* NOTE: we test CONFIG_WIN32 instead of _WIN32 to enabled cross compilation */ #if defined(CONFIG_WIN32) @@ -1429,6 +1431,677 @@ #endif +#if defined(HOST_I386) || defined(HOST_X86_64) + +/* This byte is the first byte of an instruction. */ +#define FLAG_INSN (1 << 0) +/* This byte has been processed as part of an instruction. */ +#define FLAG_SCANNED (1 << 1) +/* This instruction is a return instruction. Gcc cometimes generates prefix + bytes, so may be more than one byte long. */ +#define FLAG_RET (1 << 2) +/* This is either the target of a jump, or the preceeding instruction uses + a pc-relative offset. */ +#define FLAG_TARGET (1 << 3) +/* This is a magic instruction that needs fixing up. */ +#define FLAG_EXIT (1 << 4) +/* This instruction clobbers the stack pointer. */ +/* XXX only supports push, pop, add/sub $imm,%esp */ +#define FLAG_STACK (1 << 5) +#define MAX_EXITS 5 + +static void +bad_opcode(const char *name, uint32_t op) +{ + error("Unsupported opcode %0*x in %s", (op > 0xff) ? 4 : 2, op, name); +} + +/* Mark len bytes as scanned, Returns insn_size + len. Reports an error + if these bytes have already been scanned. */ +static int +eat_bytes(const char *name, char *flags, int insn, int insn_size, int len) +{ + while (len > 0) { + /* This should never occur in sane code. */ + if (flags[insn + insn_size] & FLAG_SCANNED) + error ("Overlapping instructions in %s", name); + flags[insn + insn_size] |= FLAG_SCANNED; + insn_size++; + len--; + } + return insn_size; +} + +static void +trace_i386_insn (const char *name, uint8_t *start_p, char *flags, int insn, + int len) +{ + uint8_t *ptr; + uint8_t op; + int modrm; + int is_prefix; + int op_size; + int addr_size; + int insn_size; + int is_ret; + int is_condjmp; + int is_jmp; + int is_exit; + int is_pcrel; + int is_stack; + int immed; + int seen_rexw; + int32_t disp; + + ptr = start_p + insn; + /* nonzero if this insn has a ModR/M byte. */ + modrm = 1; + /* The size of the immediate value in this instruction. */ + immed = 0; + /* The operand size. */ + op_size = 4; + /* The address size */ + addr_size = 4; + /* The total length of this instruction. */ + insn_size = 0; + is_prefix = 1; + is_ret = 0; + is_condjmp = 0; + is_jmp = 0; + is_exit = 0; + seen_rexw = 0; + is_pcrel = 0; + is_stack = 0; + + while (is_prefix) { + op = ptr[insn_size]; + insn_size = eat_bytes(name, flags, insn, insn_size, 1); + is_prefix = 0; + switch (op >> 4) { + case 0: + case 1: + case 2: + case 3: + if (op == 0x0f) { + /* two-byte opcode. */ + op = ptr[insn_size]; + insn_size = eat_bytes(name, flags, insn, insn_size, 1); + switch (op >> 4) { + case 0: + if ((op & 0xf) > 3) + modrm = 0; + break; + case 1: /* vector move or prefetch */ + case 2: /* various moves and vector compares. */ + case 4: /* cmov */ + case 5: /* vector instructions */ + case 6: + case 13: + case 14: + case 15: + break; + case 7: /* mmx */ + if (op & 0x77) /* emms */ + modrm = 0; + break; + case 3: /* wrmsr, rdtsc, rdmsr, rdpmc, sysenter, sysexit */ + modrm = 0; + break; + case 8: /* long conditional jump */ + is_condjmp = 1; + immed = op_size; + modrm = 0; + break; + case 9: /* setcc */ + break; + case 10: + switch (op & 0x7) { + case 0: /* push fs/gs */ + case 1: /* pop fs/gs */ + is_stack = 1; + case 2: /* cpuid/rsm */ + modrm = 0; + break; + case 4: /* shld/shrd immediate */ + immed = 1; + break; + default: /* Normal instructions with a ModR/M byte. */ + break; + } + break; + case 11: + switch (op & 0xf) { + case 10: /* bt, bts, btr, btc */ + immed = 1; + break; + default: + /* cmpxchg, lss, btr, lfs, lgs, movzx, btc, bsf, bsr + undefined, and movsx */ + break; + } + break; + case 12: + if (op & 8) { + /* bswap */ + modrm = 0; + } else { + switch (op & 0x7) { + case 2: + case 4: + case 5: + case 6: + immed = 1; + break; + default: + break; + } + } + break; + } + } else if ((op & 0x07) <= 0x3) { + /* General arithmentic ax. */ + } else if ((op & 0x07) <= 0x5) { + /* General arithmetic ax, immediate. */ + if (op & 0x01) + immed = op_size; + else + immed = 1; + modrm = 0; + } else if ((op & 0x23) == 0x22) { + /* Segment prefix. */ + is_prefix = 1; + } else { + /* Segment register push/pop or DAA/AAA/DAS/AAS. */ + modrm = 0; + } + break; + +#if defined(HOST_X86_64) + case 4: /* rex prefix. */ + is_prefix = 1; + /* The address/operand size is actually 64-bit, but the immediate + values in the instruction are still 32-bit. */ + op_size = 4; + addr_size = 4; + if (op & 8) + seen_rexw = 1; + break; +#else + case 4: /* inc/dec register. */ +#endif + case 5: /* push/pop general register. */ + modrm = 0; + is_stack = 1; + break; + + case 6: + switch (op & 0x0f) { + case 0: /* pusha */ + case 1: /* popa */ + modrm = 0; + is_stack = 1; + break; + case 2: /* bound */ + case 3: /* arpl */ + break; + case 4: /* FS */ + case 5: /* GS */ + is_prefix = 1; + break; + case 6: /* opcode size prefix. */ + op_size = 2; + is_prefix = 1; + break; + case 7: /* Address size prefix. */ + addr_size = 2; + is_prefix = 1; + break; + case 8: /* push immediate */ + immed = op_size; + modrm = 0; + is_stack = 1; + break; + case 10: /* push 8-bit immediate */ + immed = 1; + modrm = 0; + is_stack = 1; + break; + case 9: /* imul immediate */ + immed = op_size; + break; + case 11: /* imul 8-bit immediate */ + immed = 1; + break; + case 12: /* insb */ + case 13: /* insw */ + case 14: /* outsb */ + case 15: /* outsw */ + modrm = 0; + break; + } + break; + + case 7: /* Short conditional jump. */ + is_condjmp = 1; + immed = 1; + modrm = 0; + break; + + case 8: + if ((op & 0xf) <= 3) { + /* arithmetic immediate. */ + if ((op & 3) == 1) + immed = op_size; + else + immed = 1; + if (op == 0x81 || op == 0x83) { + /* add, sub */ + op = ptr[insn_size]; + switch ((op >> 3) & 7) { + case 0: + case 5: + is_stack = (op & 7) == 4; + break; + } + } + } + else if ((op & 0xf) == 0xf) { + /* pop general. */ + is_stack = 1; + } + /* else test, xchg, mov, lea. */ + break; + + case 9: + /* Various single-byte opcodes with no modrm byte. */ + modrm = 0; + if (op == 10) { + /* Call */ + immed = 4; + } + break; + + case 10: + switch ((op & 0xe) >> 1) { + case 0: /* mov absoliute immediate. */ + case 1: + if (seen_rexw) + immed = 8; + else + immed = addr_size; + break; + case 4: /* test immediate. */ + if (op & 1) + immed = op_size; + else + immed = 1; + break; + default: /* Various string ops. */ + break; + } + modrm = 0; + break; + + case 11: /* move immediate to register */ + if (op & 8) { + if (seen_rexw) + immed = 8; + else + immed = op_size; + } else { + immed = 1; + } + modrm = 0; + break; + + case 12: + switch (op & 0xf) { + case 0: /* shift immediate */ + case 1: + immed = 1; + break; + case 2: /* ret immediate */ + immed = 2; + modrm = 0; + bad_opcode(name, op); + break; + case 3: /* ret */ + modrm = 0; + is_ret = 1; + case 4: /* les */ + case 5: /* lds */ + break; + case 6: /* mov immediate byte */ + immed = 1; + break; + case 7: /* mov immediate */ + immed = op_size; + break; + case 8: /* enter */ + /* TODO: Is this right? */ + immed = 3; + modrm = 0; + break; + case 10: /* retf immediate */ + immed = 2; + modrm = 0; + bad_opcode(name, op); + break; + case 13: /* int */ + immed = 1; + modrm = 0; + break; + case 11: /* retf */ + case 15: /* iret */ + modrm = 0; + bad_opcode(name, op); + break; + default: /* leave, int3 or into */ + modrm = 0; + break; + } + break; + + case 13: + if ((op & 0xf) >= 8) { + /* Coprocessor escape. For our purposes this is just a normal + instruction with a ModR/M byte. */ + } else if ((op & 0xf) >= 4) { + /* AAM, AAD or XLAT */ + modrm = 0; + } + /* else shift instruction */ + break; + + case 14: + switch ((op & 0xc) >> 2) { + case 0: /* loop or jcxz */ + is_condjmp = 1; + immed = 1; + break; + case 1: /* in/out immed */ + immed = 1; + break; + case 2: /* call or jmp */ + switch (op & 3) { + case 0: /* call */ + immed = op_size; + break; + case 1: /* long jump */ + immed = 4; + is_jmp = 1; + break; + case 2: /* far jmp */ + bad_opcode(name, op); + break; + case 3: /* short jmp */ + immed = 1; + is_jmp = 1; + break; + } + break; + case 3: /* in/out register */ + break; + } + modrm = 0; + break; + + case 15: + switch ((op & 0xe) >> 1) { + case 0: + case 1: + is_prefix = 1; + break; + case 2: + case 4: + case 5: + case 6: + modrm = 0; + /* Some privileged insns are used as markers. */ + switch (op) { + case 0xf4: /* hlt: Exit translation block. */ + is_exit = 1; + break; + case 0xfa: /* cli: Jump to label. */ + is_exit = 1; + immed = 4; + break; + case 0xfb: /* sti: TB patch jump. */ + /* Mark the insn for patching, but continue sscanning. */ + flags[insn] |= FLAG_EXIT; + immed = 4; + break; + } + break; + case 3: /* unary grp3 */ + if ((ptr[insn_size] & 0x38) == 0) { + if (op == 0xf7) + immed = op_size; + else + immed = 1; /* test immediate */ + } + break; + case 7: /* inc/dec grp4/5 */ + /* TODO: This includes indirect jumps. We should fail if we + encounter one of these. */ + break; + } + break; + } + } + + if (modrm) { + if (addr_size != 4) + error("16-bit addressing mode used in %s", name); + + disp = 0; + modrm = ptr[insn_size]; + insn_size = eat_bytes(name, flags, insn, insn_size, 1); + modrm &= 0xc7; + switch ((modrm & 0xc0) >> 6) { + case 0: + if (modrm == 5) + disp = 4; + break; + case 1: + disp = 1; + break; + case 2: + disp = 4; + break; + } + if ((modrm & 0xc0) != 0xc0 && (modrm & 0x7) == 4) { + /* SIB byte */ + if (modrm == 4 && (ptr[insn_size] & 0x7) == 5) { + disp = 4; + is_pcrel = 1; + } + insn_size = eat_bytes(name, flags, insn, insn_size, 1); + } + insn_size = eat_bytes(name, flags, insn, insn_size, disp); + } + insn_size = eat_bytes(name, flags, insn, insn_size, immed); + if (is_condjmp || is_jmp) { + if (immed == 1) { + disp = (int8_t)*(ptr + insn_size - 1); + } else { + disp = (((int32_t)*(ptr + insn_size - 1)) << 24) + | (((int32_t)*(ptr + insn_size - 2)) << 16) + | (((int32_t)*(ptr + insn_size - 3)) << 8) + | *(ptr + insn_size - 4); + } + disp += insn_size; + /* Jumps to external symbols point to the address of the offset + before relocation. */ + /* ??? These are probably a tailcall. We could fix them up by + replacing them with jmp to EOB + call, but it's easier to just + prevent the compiler generating them. */ + if (disp == 1) + error("Unconditional jump (sibcall?) in %s", name); + disp += insn; + if (disp < 0 || disp > len) + error("Jump outside instruction in %s", name); + + if ((flags[disp] & (FLAG_INSN | FLAG_SCANNED)) == FLAG_SCANNED) + error("Overlapping instructions in %s", name); + + flags[disp] |= (FLAG_INSN | FLAG_TARGET); + is_pcrel = 1; + } + if (is_pcrel) { + /* Mark the following insn as a jump target. This will stop + this instruction being moved. */ + flags[insn + insn_size] |= FLAG_TARGET; + } + if (is_ret) + flags[insn] |= FLAG_RET; + + if (is_exit) + flags[insn] |= FLAG_EXIT; + + if (is_stack) + flags[insn] |= FLAG_STACK; + + if (!(is_jmp || is_ret || is_exit)) + flags[insn + insn_size] |= FLAG_INSN; +} + +/* Scan a function body. Returns the position of the return sequence. + Sets *patch_bytes to the number of bytes that need to be copied from that + location. If no patching is required (ie. the return is the last insn) + *patch_bytes will be set to -1. *plen is the number of code bytes to copy. + */ +static int trace_i386_op(const char * name, uint8_t *start_p, int *plen, + int *patch_bytes, int *exit_addrs) +{ + char *flags; + int more; + int insn; + int retpos; + int bytes; + int num_exits; + int len; + int last_insn; + int stack_clobbered; + + len = *plen; + flags = malloc(len + 1); + memset(flags, 0, len + 1); + flags[0] |= FLAG_INSN; + more = 1; + while (more) { + more = 0; + for (insn = 0; insn < len; insn++) { + if ((flags[insn] & (FLAG_INSN | FLAG_SCANNED)) == FLAG_INSN) { + trace_i386_insn(name, start_p, flags, insn, len); + more = 1; + } + } + } + + /* Strip any unused code at the end of the function. */ + while (len > 0 && flags[len - 1] == 0) + len--; + + retpos = -1; + num_exits = 0; + last_insn = 0; + stack_clobbered = 0; + for (insn = 0; insn < len; insn++) { + if (flags[insn] & FLAG_RET) { + /* ??? In theory it should be possible to handle multiple return + points. In practice it's not worth the effort. */ + if (retpos != -1) + error("Multiple return instructions in %s", name); + retpos = insn; + } + if (flags[insn] & FLAG_EXIT) { + if (stack_clobbered) + error("Stack clobbered in %s", name); + if (num_exits == MAX_EXITS) + error("Too many block exits in %s", name); + exit_addrs[num_exits] = insn; + num_exits++; + } + if (flags[insn] & FLAG_INSN) + last_insn = insn; + if (flags[insn] & FLAG_STACK) + stack_clobbered = 1; + } + + exit_addrs[num_exits] = -1; + if (retpos == -1) { + if (num_exits == 0) { + error ("No return instruction found in %s", name); + } else { + retpos = len; + last_insn = len; + } + } + + /* If the return instruction is the last instruction we can just + remove it. */ + if (retpos == last_insn) + *patch_bytes = -1; + else + *patch_bytes = 0; + + /* Back up over any nop instructions. */ + while (retpos > 0 + && (flags[retpos] & FLAG_TARGET) == 0 + && (flags[retpos - 1] & FLAG_INSN) != 0 + && start_p[retpos - 1] == 0x90) { + retpos--; + } + + if (*patch_bytes == -1) { + *plen = retpos; + free (flags); + return retpos; + } + *plen = len; + + /* The ret is in the middle of the function. Find four more bytes that + so the ret can be replaced by a jmp. */ + /* ??? Use a short jump where possible. */ + bytes = 4; + insn = retpos + 1; + /* We can clobber everything up to the next jump target. */ + while (insn < len && bytes > 0 && (flags[insn] & FLAG_TARGET) == 0) { + insn++; + bytes--; + } + if (bytes > 0) { + /* ???: Strip out nop blocks. */ + /* We can't do the replacement without clobbering anything important. + Copy preceeding instructions(s) to give us some space. */ + while (retpos > 0) { + /* If this byte is the target of a jmp we can't move it. */ + if (flags[retpos] & FLAG_TARGET) + break; + + (*patch_bytes)++; + bytes--; + retpos--; + + /* Break out of the loop if we have enough space and this is either + the first byte of an instruction or a pad byte. */ + if ((flags[retpos] & (FLAG_INSN | FLAG_SCANNED)) != FLAG_SCANNED + && bytes <= 0) { + break; + } + } + } + + if (bytes > 0) + error("Unable to replace ret with jmp in %s\n", name); + + free(flags); + return retpos; +} + +#endif + #define MAX_ARGS 3 /* generate op code */ @@ -1442,6 +2115,11 @@ uint8_t args_present[MAX_ARGS]; const char *sym_name, *p; EXE_RELOC *rel; +#if defined(HOST_I386) || defined(HOST_X86_64) + int patch_bytes; + int retpos; + int exit_addrs[MAX_EXITS]; +#endif /* Compute exact size excluding prologue and epilogue instructions. * Increment start_offset to skip epilogue instructions, then compute @@ -1452,33 +2130,12 @@ p_end = p_start + size; start_offset = offset; #if defined(HOST_I386) || defined(HOST_X86_64) -#ifdef CONFIG_FORMAT_COFF - { - uint8_t *p; - p = p_end - 1; - if (p == p_start) - error("empty code for %s", name); - while (*p != 0xc3) { - p--; - if (p <= p_start) - error("ret or jmp expected at the end of %s", name); - } - copy_size = p - p_start; - } -#else { int len; len = p_end - p_start; - if (len == 0) - error("empty code for %s", name); - if (p_end[-1] == 0xc3) { - len--; - } else { - error("ret or jmp expected at the end of %s", name); - } + retpos = trace_i386_op(name, p_start, &len, &patch_bytes, exit_addrs); copy_size = len; } -#endif #elif defined(HOST_PPC) { uint8_t *p; @@ -1710,6 +2367,13 @@ } if (gen_switch == 2) { +#if defined(HOST_I386) || defined(HOST_X86_64) + if (patch_bytes != -1) + copy_size += patch_bytes; +#ifdef DEBUG_OP + copy_size += 2; +#endif +#endif fprintf(outfile, "DEF(%s, %d, %d)\n", name + 3, nb_args, copy_size); } else if (gen_switch == 1) { @@ -1915,7 +2579,43 @@ #error unsupport object format #endif } + } + /* Replace the marker instructions with the actual opcodes. */ + for (i = 0; exit_addrs[i] != -1; i++) { + int op; + switch (p_start[exit_addrs[i]]) + { + case 0xf4: op = 0xc3; break; /* hlt -> ret */ + case 0xfa: op = 0xe9; break; /* cli -> jmp */ + case 0xfb: op = 0xe9; break; /* sti -> jmp */ + default: error("Internal error"); + } + fprintf(outfile, + " *(uint8_t *)(gen_code_ptr + %d) = 0x%x;\n", + exit_addrs[i], op); } + /* Fix up the return instruction. */ + if (patch_bytes != -1) { + if (patch_bytes) { + fprintf(outfile, " memcpy(gen_code_ptr + %d," + "gen_code_ptr + %d, %d);\n", + copy_size, retpos, patch_bytes); + } + fprintf(outfile, + " *(uint8_t *)(gen_code_ptr + %d) = 0xe9;\n", + retpos); + fprintf(outfile, + " *(uint32_t *)(gen_code_ptr + %d) = 0x%x;\n", + retpos + 1, copy_size - (retpos + 5)); + + copy_size += patch_bytes; + } +#ifdef DEBUG_OP + fprintf(outfile, + " *(uint16_t *)(gen_code_ptr + %d) = 0x9090;\n", + copy_size); + copy_size += 2; +#endif } #elif defined(HOST_X86_64) { @@ -1949,6 +2649,42 @@ } } } + /* Replace the marker instructions with the actual opcodes. */ + for (i = 0; exit_addrs[i] != -1; i++) { + int op; + switch (p_start[exit_addrs[i]]) + { + case 0xf4: op = 0xc3; break; /* hlt -> ret */ + case 0xfa: op = 0xe9; break; /* cli -> jmp */ + case 0xfb: op = 0xe9; break; /* sti -> jmp */ + default: error("Internal error"); + } + fprintf(outfile, + " *(uint8_t *)(gen_code_ptr + %d) = 0x%x;\n", + exit_addrs[i], op); + } + /* Fix up the return instruction. */ + if (patch_bytes != -1) { + if (patch_bytes) { + fprintf(outfile, " memcpy(gen_code_ptr + %d," + "gen_code_ptr + %d, %d);\n", + copy_size, retpos, patch_bytes); + } + fprintf(outfile, + " *(uint8_t *)(gen_code_ptr + %d) = 0xe9;\n", + retpos); + fprintf(outfile, + " *(uint32_t *)(gen_code_ptr + %d) = 0x%x;\n", + retpos + 1, copy_size - (retpos + 5)); + + copy_size += patch_bytes; + } +#ifdef DEBUG_OP + fprintf(outfile, + " *(uint16_t *)(gen_code_ptr + %d) = 0x9090;\n", + copy_size); + copy_size += 2; +#endif } #elif defined(HOST_PPC) { diff -Nur qemu-1/qemu/dyngen-exec.h qemu-2/qemu/dyngen-exec.h --- qemu-1/qemu/dyngen-exec.h 2007-12-25 03:26:36.000000000 +0300 +++ qemu-2/qemu/dyngen-exec.h 2008-01-20 00:34:12.000000000 +0300 @@ -194,7 +194,12 @@ #endif /* force GCC to generate only one epilog at the end of the function */ +#if defined(__i386__) || defined(__x86_64__) +/* Also add 4 bytes of padding so that we can replace the ret with a jmp. */ +#define FORCE_RET() __asm__ __volatile__ ("nop;nop;nop;nop" : : : "memory"); +#else #define FORCE_RET() __asm__ __volatile__("" : : : "memory"); +#endif #ifndef OPPROTO #define OPPROTO @@ -251,11 +256,11 @@ #endif #if defined(__i386__) -#define EXIT_TB() asm volatile ("ret") -#define GOTO_LABEL_PARAM(n) asm volatile ("jmp " ASM_NAME(__op_gen_label) #n) +#define EXIT_TB() asm volatile ("hlt") +#define GOTO_LABEL_PARAM(n) asm volatile ("cli;.long " ASM_NAME(__op_gen_label) #n " - 1f;1:") #elif defined(__x86_64__) -#define EXIT_TB() asm volatile ("ret") -#define GOTO_LABEL_PARAM(n) asm volatile ("jmp " ASM_NAME(__op_gen_label) #n) +#define EXIT_TB() asm volatile ("hlt") +#define GOTO_LABEL_PARAM(n) asm volatile ("cli;.long " ASM_NAME(__op_gen_label) #n " - 1f;1:") #elif defined(__powerpc__) #define EXIT_TB() asm volatile ("blr") #define GOTO_LABEL_PARAM(n) asm volatile ("b " ASM_NAME(__op_gen_label) #n) diff -Nur qemu-1/qemu/exec-all.h qemu-2/qemu/exec-all.h --- qemu-1/qemu/exec-all.h 2007-12-11 22:35:45.000000000 +0300 +++ qemu-2/qemu/exec-all.h 2008-01-20 00:30:28.000000000 +0300 @@ -309,14 +309,15 @@ #elif defined(__i386__) && defined(USE_DIRECT_JUMP) -/* we patch the jump instruction directly */ +/* we patch the jump instruction directly. Use sti in place of the actual + jmp instruction so that dyngen can patch in the correct result. */ #define GOTO_TB(opname, tbparam, n)\ do {\ asm volatile (".section .data\n"\ ASM_OP_LABEL_NAME(n, opname) ":\n"\ ".long 1f\n"\ ASM_PREVIOUS_SECTION \ - "jmp " ASM_NAME(__op_jmp) #n "\n"\ + "sti;.long " ASM_NAME(__op_jmp) #n " - 1f\n"\ "1:\n");\ } while (0) diff -Nur qemu-1/qemu/Makefile.target qemu-2/qemu/Makefile.target --- qemu-1/qemu/Makefile.target 2008-01-15 01:09:11.000000000 +0300 +++ qemu-2/qemu/Makefile.target 2008-01-20 00:30:28.000000000 +0300 @@ -82,6 +82,10 @@ else QEMU_SYSTEM=qemu-fast endif +ifeq ($(TARGET_ARCH), x86_64) +# XXX globally save %ebx, %esi, %edi on entry to generated function +OP_CFLAGS+= -fcall-used-ebx -fcall-used-esi -fcall-used-edi +endif ifdef CONFIG_USER_ONLY PROGS=$(QEMU_USER) diff -Nur qemu-1/qemu/softmmu_header.h qemu-2/qemu/softmmu_header.h --- qemu-1/qemu/softmmu_header.h 2008-01-21 18:07:18.000000000 +0300 +++ qemu-2/qemu/softmmu_header.h 2008-01-20 00:30:28.000000000 +0300 @@ -117,7 +117,7 @@ "m" (*(uint32_t *)offsetof(CPUState, tlb_table[CPU_MMU_INDEX][0].addr_read)), "i" (CPU_MMU_INDEX), "m" (*(uint8_t *)&glue(glue(__ld, SUFFIX), MMUSUFFIX)) - : "%eax", "%ecx", "%edx", "memory", "cc"); + : "%eax", "%edx", "memory", "cc"); return res; } @@ -164,13 +164,14 @@ "m" (*(uint32_t *)offsetof(CPUState, tlb_table[CPU_MMU_INDEX][0].addr_read)), "i" (CPU_MMU_INDEX), "m" (*(uint8_t *)&glue(glue(__ld, SUFFIX), MMUSUFFIX)) - : "%eax", "%ecx", "%edx", "memory", "cc"); + : "%eax", "%edx", "memory", "cc"); return res; } #endif -static inline void glue(glue(st, SUFFIX), MEMSUFFIX)(target_ulong ptr, RES_TYPE v) +static inline void glue(glue(st, SUFFIX), MEMSUFFIX)(target_ulong ptr, RES_TYPE val) { + RES_TYPE v = val; asm volatile ("movl %0, %%edx\n" "movl %0, %%eax\n" "shrl %3, %%edx\n" @@ -207,11 +208,9 @@ "2:\n" : : "r" (ptr), -#if DATA_SIZE == 1 - "q" (v), -#else +/* NOTE: 'q' would be needed as constraint, but we could not use it + with T1 ! */ "r" (v), -#endif "i" ((CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS), "i" (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS), "i" (TARGET_PAGE_MASK | (DATA_SIZE - 1)), diff -Nur qemu-1/qemu/target-arm/op_iwmmxt.c qemu-2/qemu/target-arm/op_iwmmxt.c --- qemu-1/qemu/target-arm/op_iwmmxt.c 2007-09-17 01:08:02.000000000 +0400 +++ qemu-2/qemu/target-arm/op_iwmmxt.c 2008-01-20 00:50:41.000000000 +0300 @@ -170,9 +170,16 @@ M0 += M1; } +#if defined(__i386__) && __GNUC__ >= 4 +#define RegCopy(d, s) __builtin_memcpy(&(d), &(s), sizeof(d)) +#endif +#ifndef RegCopy +#define RegCopy(d, s) d = s +#endif + void OPPROTO op_iwmmxt_movq_wRn_M0(void) { - M1 = M0; + RegCopy (M1, M0); } void OPPROTO op_iwmmxt_movl_wCx_T0(void) diff -Nur qemu-1/qemu/target-arm/op_iwmmxt.d qemu-2/qemu/target-arm/op_iwmmxt.d --- qemu-1/qemu/target-arm/op_iwmmxt.d 1970-01-01 03:00:00.000000000 +0300 +++ qemu-2/qemu/target-arm/op_iwmmxt.d 2008-01-20 00:28:11.000000000 +0300 @@ -0,0 +1 @@ +target-arm/op_iwmmxt.o: target-arm/op_iwmmxt.c diff -Nur qemu-1/qemu/target-i386/helper.c qemu-2/qemu/target-i386/helper.c --- qemu-1/qemu/target-i386/helper.c 2007-12-24 16:36:00.000000000 +0300 +++ qemu-2/qemu/target-i386/helper.c 2008-01-20 00:30:28.000000000 +0300 @@ -3607,8 +3607,15 @@ nb_xmm_regs = 8 << data64; addr = ptr + 0xa0; for(i = 0; i < nb_xmm_regs; i++) { +#if defined(__i386__) && __GNUC__ >= 4 + env->xmm_regs[i].XMM_L(0) = ldl(addr); + env->xmm_regs[i].XMM_L(1) = ldl(addr + 4); + env->xmm_regs[i].XMM_L(2) = ldl(addr + 8); + env->xmm_regs[i].XMM_L(3) = ldl(addr + 12); +#else env->xmm_regs[i].XMM_Q(0) = ldq(addr); env->xmm_regs[i].XMM_Q(1) = ldq(addr + 8); +#endif addr += 16; } } diff -Nur qemu-1/qemu/target-i386/ops_sse.h qemu-2/qemu/target-i386/ops_sse.h --- qemu-1/qemu/target-i386/ops_sse.h 2007-09-17 12:09:52.000000000 +0400 +++ qemu-2/qemu/target-i386/ops_sse.h 2008-01-20 00:30:28.000000000 +0300 @@ -34,6 +34,12 @@ #define Q(n) XMM_Q(n) #define SUFFIX _xmm #endif +#if defined(__i386__) && __GNUC__ >= 4 +#define RegCopy(d, s) __builtin_memcpy(&(d), &(s), sizeof(d)) +#endif +#ifndef RegCopy +#define RegCopy(d, s) d = s +#endif void OPPROTO glue(op_psrlw, SUFFIX)(void) { @@ -589,7 +595,7 @@ r.W(1) = s->W((order >> 2) & 3); r.W(2) = s->W((order >> 4) & 3); r.W(3) = s->W((order >> 6) & 3); - *d = r; + RegCopy(*d, r); } #else void OPPROTO op_shufps(void)