diff --git a/TODO b/TODO index caeb64d56b..5602097b8e 100644 --- a/TODO +++ b/TODO @@ -1,6 +1,5 @@ -- daa/das -- optimize translated cache chaining (DLL PLT like system) - segment ops (minimal LDT/GDT support for wine) +- optimize translated cache chaining (DLL PLT like system) - improved 16 bit support - optimize inverse flags propagation (easy by generating intermediate micro operation array). diff --git a/cpu-i386.h b/cpu-i386.h index 40542f283e..fc68a91d92 100644 --- a/cpu-i386.h +++ b/cpu-i386.h @@ -123,6 +123,20 @@ typedef long double CPU86_LDouble; typedef double CPU86_LDouble; #endif +typedef struct SegmentCache { + uint8_t *base; + unsigned long limit; + uint8_t seg_32bit; +} SegmentCache; + +typedef struct SegmentDescriptorTable { + uint8_t *base; + unsigned long limit; + /* this is the returned base when reading the register, just to + avoid that the emulated program modifies it */ + unsigned long emu_base; +} SegmentDescriptorTable; + typedef struct CPUX86State { /* standard registers */ uint32_t regs[8]; @@ -135,9 +149,6 @@ typedef struct CPUX86State { uint32_t cc_op; int32_t df; /* D flag : 1 if D = 0, -1 if D = 1 */ - /* segments */ - uint8_t *segs_base[6]; - /* FPU state */ unsigned int fpstt; /* top of stack index */ unsigned int fpus; @@ -145,12 +156,19 @@ typedef struct CPUX86State { uint8_t fptags[8]; /* 0 = valid, 1 = empty */ CPU86_LDouble fpregs[8]; - /* segments */ - uint32_t segs[6]; - /* emulator internal variables */ CPU86_LDouble ft0; + /* segments */ + uint32_t segs[6]; /* selector values */ + SegmentCache seg_cache[6]; /* info taken from LDT/GDT */ + SegmentDescriptorTable gdt; + SegmentDescriptorTable ldt; + SegmentDescriptorTable idt; + + /* various CPU modes */ + int vm86; + /* exception handling */ jmp_buf jmp_env; int exception_index; @@ -241,9 +259,17 @@ CPUX86State *cpu_x86_init(void); int cpu_x86_exec(CPUX86State *s); void cpu_x86_close(CPUX86State *s); +/* needed to load some predefinied segment registers */ +void cpu_x86_load_seg(CPUX86State *s, int seg_reg, int selector); + /* internal functions */ + +#define GEN_FLAG_CODE32_SHIFT 0 +#define GEN_FLAG_ADDSEG_SHIFT 1 +#define GEN_FLAG_ST_SHIFT 2 int cpu_x86_gen_code(uint8_t *gen_code_buf, int max_code_size, - int *gen_code_size_ptr, uint8_t *pc_start); + int *gen_code_size_ptr, uint8_t *pc_start, + int flags); void cpu_x86_tblocks_init(void); #endif /* CPU_I386_H */ diff --git a/exec-i386.c b/exec-i386.c index c067685095..8144add7ba 100644 --- a/exec-i386.c +++ b/exec-i386.c @@ -36,8 +36,10 @@ #define CODE_GEN_MAX_BLOCKS (CODE_GEN_BUFFER_SIZE / 64) #define CODE_GEN_HASH_BITS 15 #define CODE_GEN_HASH_SIZE (1 << CODE_GEN_HASH_BITS) + typedef struct TranslationBlock { unsigned long pc; /* simulated PC corresponding to this block */ + unsigned int flags; /* flags defining in which context the code was generated */ uint8_t *tc_ptr; /* pointer to the translated code */ struct TranslationBlock *hash_next; /* next matching block */ } TranslationBlock; @@ -137,7 +139,8 @@ static void tb_flush(void) /* find a translation block in the translation cache. If not found, allocate a new one */ -static inline TranslationBlock *tb_find_and_alloc(unsigned long pc) +static inline TranslationBlock *tb_find_and_alloc(unsigned long pc, + unsigned int flags) { TranslationBlock **ptb, *tb; unsigned int h; @@ -148,7 +151,7 @@ static inline TranslationBlock *tb_find_and_alloc(unsigned long pc) tb = *ptb; if (!tb) break; - if (tb->pc == pc) + if (tb->pc == pc && tb->flags == flags) return tb; ptb = &tb->hash_next; } @@ -158,6 +161,7 @@ static inline TranslationBlock *tb_find_and_alloc(unsigned long pc) tb = &tbs[nb_tbs++]; *ptb = tb; tb->pc = pc; + tb->flags = flags; tb->tc_ptr = NULL; tb->hash_next = NULL; return tb; @@ -171,7 +175,8 @@ int cpu_x86_exec(CPUX86State *env1) void (*gen_func)(void); TranslationBlock *tb; uint8_t *tc_ptr; - + unsigned int flags; + /* first we save global registers */ saved_T0 = T0; saved_T1 = T1; @@ -187,13 +192,20 @@ int cpu_x86_exec(CPUX86State *env1) cpu_x86_dump_state(); } #endif - tb = tb_find_and_alloc((unsigned long)env->pc); + /* we compute the CPU state. We assume it will not + change during the whole generated block. */ + flags = env->seg_cache[R_CS].seg_32bit << GEN_FLAG_CODE32_SHIFT; + flags |= (((unsigned long)env->seg_cache[R_DS].base | + (unsigned long)env->seg_cache[R_ES].base | + (unsigned long)env->seg_cache[R_SS].base) != 0) << + GEN_FLAG_ADDSEG_SHIFT; + tb = tb_find_and_alloc((unsigned long)env->pc, flags); tc_ptr = tb->tc_ptr; if (!tb->tc_ptr) { /* if no translated code available, then translate it now */ tc_ptr = code_gen_ptr; cpu_x86_gen_code(code_gen_ptr, CODE_GEN_MAX_SIZE, - &code_gen_size, (uint8_t *)env->pc); + &code_gen_size, (uint8_t *)env->pc, flags); tb->tc_ptr = tc_ptr; code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1)); } @@ -211,3 +223,13 @@ int cpu_x86_exec(CPUX86State *env1) env = saved_env; return ret; } + +void cpu_x86_load_seg(CPUX86State *s, int seg_reg, int selector) +{ + CPUX86State *saved_env; + + saved_env = env; + env = s; + load_seg(seg_reg, selector); + env = saved_env; +} diff --git a/exec-i386.h b/exec-i386.h index 62f681bc1a..0e0cae2756 100644 --- a/exec-i386.h +++ b/exec-i386.h @@ -27,6 +27,7 @@ typedef struct FILE FILE; extern FILE *logfile; extern int loglevel; extern int fprintf(FILE *, const char *, ...); +extern int printf(const char *, ...); #ifdef __i386__ register unsigned int T0 asm("ebx"); @@ -103,3 +104,5 @@ typedef struct CCTable { } CCTable; extern CCTable cc_table[]; + +void load_seg(int seg_reg, int selector); diff --git a/linux-user/main.c b/linux-user/main.c index 6aefe3afb6..b59c85d9c5 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -1,5 +1,5 @@ /* - * emu main + * gemu main * * Copyright (c) 2003 Fabrice Bellard * @@ -80,10 +80,28 @@ int cpu_x86_inl(int addr) return 0; } +/* default linux values for the selectors */ +#define __USER_CS (0x23) +#define __USER_DS (0x2B) -/* XXX: currently we use LDT entries */ -#define __USER_CS (0x23|4) -#define __USER_DS (0x2B|4) +void write_dt(void *ptr, unsigned long addr, unsigned long limit, + int seg32_bit) +{ + unsigned int e1, e2, limit_in_pages; + limit_in_pages = 0; + if (limit > 0xffff) { + limit = limit >> 12; + limit_in_pages = 1; + } + e1 = (addr << 16) | (limit & 0xffff); + e2 = ((addr >> 16) & 0xff) | (addr & 0xff000000) | (limit & 0x000f0000); + e2 |= limit_in_pages << 23; /* byte granularity */ + e2 |= seg32_bit << 22; /* 32 bit segment */ + stl((uint8_t *)ptr, e1); + stl((uint8_t *)ptr + 4, e2); +} + +uint64_t gdt_table[6]; void usage(void) { @@ -94,6 +112,8 @@ void usage(void) exit(1); } + + int main(int argc, char **argv) { const char *filename; @@ -149,6 +169,7 @@ int main(int argc, char **argv) env = cpu_x86_init(); + /* linux register setup */ env->regs[R_EAX] = regs->eax; env->regs[R_EBX] = regs->ebx; env->regs[R_ECX] = regs->ecx; @@ -157,23 +178,19 @@ int main(int argc, char **argv) env->regs[R_EDI] = regs->edi; env->regs[R_EBP] = regs->ebp; env->regs[R_ESP] = regs->esp; - env->segs[R_CS] = __USER_CS; - env->segs[R_DS] = __USER_DS; - env->segs[R_ES] = __USER_DS; - env->segs[R_SS] = __USER_DS; - env->segs[R_FS] = __USER_DS; - env->segs[R_GS] = __USER_DS; env->pc = regs->eip; -#if 0 - LDT[__USER_CS >> 3].w86Flags = DF_PRESENT | DF_PAGES | DF_32; - LDT[__USER_CS >> 3].dwSelLimit = 0xfffff; - LDT[__USER_CS >> 3].lpSelBase = NULL; - - LDT[__USER_DS >> 3].w86Flags = DF_PRESENT | DF_PAGES | DF_32; - LDT[__USER_DS >> 3].dwSelLimit = 0xfffff; - LDT[__USER_DS >> 3].lpSelBase = NULL; -#endif + /* linux segment setup */ + env->gdt.base = (void *)gdt_table; + env->gdt.limit = sizeof(gdt_table) - 1; + write_dt(&gdt_table[__USER_CS >> 3], 0, 0xffffffff, 1); + write_dt(&gdt_table[__USER_DS >> 3], 0, 0xffffffff, 1); + cpu_x86_load_seg(env, R_CS, __USER_CS); + cpu_x86_load_seg(env, R_DS, __USER_DS); + cpu_x86_load_seg(env, R_ES, __USER_DS); + cpu_x86_load_seg(env, R_SS, __USER_DS); + cpu_x86_load_seg(env, R_FS, __USER_DS); + cpu_x86_load_seg(env, R_GS, __USER_DS); for(;;) { int err; @@ -186,7 +203,8 @@ int main(int argc, char **argv) if (pc[0] == 0xcd && pc[1] == 0x80) { /* syscall */ env->pc += 2; - env->regs[R_EAX] = do_syscall(env->regs[R_EAX], + env->regs[R_EAX] = do_syscall(env, + env->regs[R_EAX], env->regs[R_EBX], env->regs[R_ECX], env->regs[R_EDX], diff --git a/linux-user/qemu.h b/linux-user/qemu.h index 0b9de6b3fe..4f09e6fde2 100644 --- a/linux-user/qemu.h +++ b/linux-user/qemu.h @@ -48,7 +48,7 @@ int elf_exec(const char * filename, char ** argv, char ** envp, void target_set_brk(char *new_brk); void syscall_init(void); -long do_syscall(int num, long arg1, long arg2, long arg3, +long do_syscall(void *cpu_env, int num, long arg1, long arg2, long arg3, long arg4, long arg5, long arg6); void gemu_log(const char *fmt, ...) __attribute__((format(printf,1,2))); diff --git a/linux-user/syscall.c b/linux-user/syscall.c index ac40cf19ef..9ed8daa0f8 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -69,6 +69,7 @@ struct dirent { #include "syscall_defs.h" #ifdef TARGET_I386 +#include "cpu-i386.h" #include "syscall-i386.h" #endif @@ -607,6 +608,124 @@ StructEntry struct_termios_def = { .align = { __alignof__(struct target_termios), __alignof__(struct host_termios) }, }; +#ifdef TARGET_I386 + +/* NOTE: there is really one LDT for all the threads */ +uint8_t *ldt_table; + +static int read_ldt(void *ptr, unsigned long bytecount) +{ + int size; + + if (!ldt_table) + return 0; + size = TARGET_LDT_ENTRIES * TARGET_LDT_ENTRY_SIZE; + if (size > bytecount) + size = bytecount; + memcpy(ptr, ldt_table, size); + return size; +} + +/* XXX: add locking support */ +static int write_ldt(CPUX86State *env, + void *ptr, unsigned long bytecount, int oldmode) +{ + struct target_modify_ldt_ldt_s ldt_info; + int seg_32bit, contents, read_exec_only, limit_in_pages; + int seg_not_present, useable; + uint32_t *lp, entry_1, entry_2; + + if (bytecount != sizeof(ldt_info)) + return -EINVAL; + memcpy(&ldt_info, ptr, sizeof(ldt_info)); + tswap32s(&ldt_info.entry_number); + tswapls((long *)&ldt_info.base_addr); + tswap32s(&ldt_info.limit); + tswap32s(&ldt_info.flags); + + if (ldt_info.entry_number >= TARGET_LDT_ENTRIES) + return -EINVAL; + seg_32bit = ldt_info.flags & 1; + contents = (ldt_info.flags >> 1) & 3; + read_exec_only = (ldt_info.flags >> 3) & 1; + limit_in_pages = (ldt_info.flags >> 4) & 1; + seg_not_present = (ldt_info.flags >> 5) & 1; + useable = (ldt_info.flags >> 6) & 1; + + if (contents == 3) { + if (oldmode) + return -EINVAL; + if (seg_not_present == 0) + return -EINVAL; + } + /* allocate the LDT */ + if (!ldt_table) { + ldt_table = malloc(TARGET_LDT_ENTRIES * TARGET_LDT_ENTRY_SIZE); + if (!ldt_table) + return -ENOMEM; + memset(ldt_table, 0, TARGET_LDT_ENTRIES * TARGET_LDT_ENTRY_SIZE); + env->ldt.base = ldt_table; + env->ldt.limit = 0xffff; + } + + /* NOTE: same code as Linux kernel */ + /* Allow LDTs to be cleared by the user. */ + if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { + if (oldmode || + (contents == 0 && + read_exec_only == 1 && + seg_32bit == 0 && + limit_in_pages == 0 && + seg_not_present == 1 && + useable == 0 )) { + entry_1 = 0; + entry_2 = 0; + goto install; + } + } + + entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) | + (ldt_info.limit & 0x0ffff); + entry_2 = (ldt_info.base_addr & 0xff000000) | + ((ldt_info.base_addr & 0x00ff0000) >> 16) | + (ldt_info.limit & 0xf0000) | + ((read_exec_only ^ 1) << 9) | + (contents << 10) | + ((seg_not_present ^ 1) << 15) | + (seg_32bit << 22) | + (limit_in_pages << 23) | + 0x7000; + if (!oldmode) + entry_2 |= (useable << 20); + + /* Install the new entry ... */ +install: + lp = (uint32_t *)(ldt_table + (ldt_info.entry_number << 3)); + lp[0] = tswap32(entry_1); + lp[1] = tswap32(entry_2); + return 0; +} + +/* specific and weird i386 syscalls */ +int gemu_modify_ldt(CPUX86State *env, int func, void *ptr, unsigned long bytecount) +{ + int ret = -ENOSYS; + + switch (func) { + case 0: + ret = read_ldt(ptr, bytecount); + break; + case 1: + ret = write_ldt(env, ptr, bytecount, 1); + break; + case 0x11: + ret = write_ldt(env, ptr, bytecount, 0); + break; + } + return ret; +} +#endif + void syscall_init(void) { #define STRUCT(name, list...) thunk_register_struct(STRUCT_ ## name, #name, struct_ ## name ## _def); @@ -616,7 +735,7 @@ void syscall_init(void) #undef STRUCT_SPECIAL } -long do_syscall(int num, long arg1, long arg2, long arg3, +long do_syscall(void *cpu_env, int num, long arg1, long arg2, long arg3, long arg4, long arg5, long arg6) { long ret; @@ -1095,8 +1214,11 @@ long do_syscall(int num, long arg1, long arg2, long arg3, /* no need to transcode because we use the linux syscall */ ret = get_errno(sys_uname((struct new_utsname *)arg1)); break; +#ifdef TARGET_I386 case TARGET_NR_modify_ldt: - goto unimplemented; + ret = get_errno(gemu_modify_ldt(cpu_env, arg1, (void *)arg2, arg3)); + break; +#endif case TARGET_NR_adjtimex: goto unimplemented; case TARGET_NR_mprotect: diff --git a/linux-user/syscall_types.h b/linux-user/syscall_types.h index 63852d3af0..540114430b 100644 --- a/linux-user/syscall_types.h +++ b/linux-user/syscall_types.h @@ -61,4 +61,3 @@ STRUCT(cdrom_read_audio, STRUCT(hd_geometry, TYPE_CHAR, TYPE_CHAR, TYPE_SHORT, TYPE_ULONG) - diff --git a/op-i386.c b/op-i386.c index 6d695ff90d..503fb88ede 100644 --- a/op-i386.c +++ b/op-i386.c @@ -858,6 +858,60 @@ void OPPROTO op_das(void) CC_SRC = eflags; } +/* segment handling */ + +void load_seg(int seg_reg, int selector) +{ + SegmentCache *sc; + SegmentDescriptorTable *dt; + int index; + uint32_t e1, e2; + uint8_t *ptr; + + env->segs[seg_reg] = selector; + sc = &env->seg_cache[seg_reg]; + if (env->vm86) { + sc->base = (void *)(selector << 4); + sc->limit = 0xffff; + sc->seg_32bit = 0; + } else { + if (selector & 0x4) + dt = &env->ldt; + else + dt = &env->gdt; + index = selector & ~7; + if ((index + 7) > dt->limit) + raise_exception(EXCP0D_GPF); + ptr = dt->base + index; + e1 = ldl(ptr); + e2 = ldl(ptr + 4); + sc->base = (void *)((e1 >> 16) | ((e2 & 0xff) << 16) | (e2 & 0xff000000)); + sc->limit = (e1 & 0xffff) | (e2 & 0x000f0000); + if (e2 & (1 << 23)) + sc->limit = (sc->limit << 12) | 0xfff; + sc->seg_32bit = (e2 >> 22) & 1; +#if 0 + fprintf(logfile, "load_seg: sel=0x%04x base=0x%08lx limit=0x%08lx seg_32bit=%d\n", + selector, (unsigned long)sc->base, sc->limit, sc->seg_32bit); +#endif + } +} + +void OPPROTO op_movl_seg_T0(void) +{ + load_seg(PARAM1, T0 & 0xffff); +} + +void OPPROTO op_movl_T0_seg(void) +{ + T0 = env->segs[PARAM1]; +} + +void OPPROTO op_addl_A0_seg(void) +{ + A0 += *(unsigned long *)((char *)env + PARAM1); +} + /* flags handling */ /* slow jumps cases (compute x86 flags) */ diff --git a/syscall-i386.h b/syscall-i386.h index 312edc6849..a54064a054 100644 --- a/syscall-i386.h +++ b/syscall-i386.h @@ -758,3 +758,14 @@ struct target_termios { #define TARGET_SOUND_MIXER_WRITE_ENHANCE 0xc0044d1f #define TARGET_SOUND_MIXER_WRITE_LOUD 0xc0044d1f #define TARGET_SOUND_MIXER_WRITE_RECSRC 0xc0044dff + +#define TARGET_LDT_ENTRIES 8192 +#define TARGET_LDT_ENTRY_SIZE 8 + +struct target_modify_ldt_ldt_s { + unsigned int entry_number; + target_ulong base_addr; + unsigned int limit; + unsigned int flags; +}; + diff --git a/tests/test-i386.c b/tests/test-i386.c index b43ca59d98..95ec5d1509 100644 --- a/tests/test-i386.c +++ b/tests/test-i386.c @@ -1,5 +1,6 @@ #include #include +#include #include #define xglue(x, y) x ## y @@ -612,6 +613,81 @@ void test_bcd(void) TEST_BCD(aad, 0x12340407, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A)); } +/**********************************************/ +/* segmentation tests */ + +#include +#include + +_syscall3(int, modify_ldt, int, func, void *, ptr, unsigned long, bytecount) + +uint8_t seg_data1[4096]; +uint8_t seg_data2[4096]; + +#define MK_SEL(n) (((n) << 3) | 4) + +/* NOTE: we use Linux modify_ldt syscall */ +void test_segs(void) +{ + struct modify_ldt_ldt_s ldt; + long long ldt_table[3]; + int i, res, res2; + char tmp; + + ldt.entry_number = 1; + ldt.base_addr = (unsigned long)&seg_data1; + ldt.limit = (sizeof(seg_data1) + 0xfff) >> 12; + ldt.seg_32bit = 1; + ldt.contents = MODIFY_LDT_CONTENTS_DATA; + ldt.read_exec_only = 0; + ldt.limit_in_pages = 1; + ldt.seg_not_present = 0; + ldt.useable = 1; + modify_ldt(1, &ldt, sizeof(ldt)); /* write ldt entry */ + + ldt.entry_number = 2; + ldt.base_addr = (unsigned long)&seg_data2; + ldt.limit = (sizeof(seg_data2) + 0xfff) >> 12; + ldt.seg_32bit = 1; + ldt.contents = MODIFY_LDT_CONTENTS_DATA; + ldt.read_exec_only = 0; + ldt.limit_in_pages = 1; + ldt.seg_not_present = 0; + ldt.useable = 1; + modify_ldt(1, &ldt, sizeof(ldt)); /* write ldt entry */ + + modify_ldt(0, &ldt_table, sizeof(ldt_table)); /* read ldt entries */ + for(i=0;i<3;i++) + printf("%d: %016Lx\n", i, ldt_table[i]); + + /* do some tests with fs or gs */ + asm volatile ("movl %0, %%fs" : : "r" (MK_SEL(1))); + asm volatile ("movl %0, %%gs" : : "r" (MK_SEL(2))); + + seg_data1[1] = 0xaa; + seg_data2[1] = 0x55; + + asm volatile ("fs movzbl 0x1, %0" : "=r" (res)); + printf("FS[1] = %02x\n", res); + + asm volatile ("gs movzbl 0x1, %0" : "=r" (res)); + printf("GS[1] = %02x\n", res); + + /* tests with ds/ss (implicit segment case) */ + tmp = 0xa5; + asm volatile ("pushl %%ebp\n\t" + "pushl %%ds\n\t" + "movl %2, %%ds\n\t" + "movl %3, %%ebp\n\t" + "movzbl 0x1, %0\n\t" + "movzbl (%%ebp), %1\n\t" + "popl %%ds\n\t" + "popl %%ebp\n\t" + : "=r" (res), "=r" (res2) + : "r" (MK_SEL(1)), "r" (&tmp)); + printf("DS[1] = %02x\n", res); + printf("SS[tmp] = %02x\n", res2); +} static void *call_end __init_call = NULL; @@ -628,8 +704,9 @@ int main(int argc, char **argv) test_bsx(); test_mul(); test_jcc(); - test_lea(); test_floats(); test_bcd(); + test_lea(); + test_segs(); return 0; } diff --git a/translate-i386.c b/translate-i386.c index 0dbaa99d92..5146242c6c 100644 --- a/translate-i386.c +++ b/translate-i386.c @@ -34,6 +34,10 @@ #include "dis-asm.h" #endif +#ifndef offsetof +#define offsetof(type, field) ((size_t) &((type *)0)->field) +#endif + static uint8_t *gen_code_ptr; int __op_param1, __op_param2, __op_param3; @@ -71,8 +75,13 @@ typedef struct DisasContext { int prefix; int aflag, dflag; uint8_t *pc; /* current pc */ - int cc_op; /* current CC operation */ - int f_st; + int is_jmp; /* 1 = means jump (stop translation), 2 means CPU + static state change (stop translation) */ + /* current block context */ + int code32; /* 32 bit code segment */ + int cc_op; /* current CC operation */ + int addseg; /* non zero if either DS/ES/SS have a non zero base */ + int f_st; /* currently unused */ } DisasContext; /* i386 arith/logic operations */ @@ -763,12 +772,32 @@ static void gen_shifti(DisasContext *s1, int op, int ot, int d, int c) static void gen_lea_modrm(DisasContext *s, int modrm, int *reg_ptr, int *offset_ptr) { int havesib; - int havebase; int base, disp; - int index = 0; - int scale = 0; - int reg1, reg2, opreg; - int mod, rm, code; + int index; + int scale; + int opreg; + int mod, rm, code, override, must_add_seg; + + /* XXX: add a generation time variable to tell if base == 0 in DS/ES/SS */ + /* XXX: fix lea case */ + override = -1; + must_add_seg = s->addseg; + if (s->prefix & (PREFIX_CS | PREFIX_SS | PREFIX_DS | + PREFIX_ES | PREFIX_FS | PREFIX_GS)) { + if (s->prefix & PREFIX_ES) + override = R_ES; + else if (s->prefix & PREFIX_CS) + override = R_CS; + else if (s->prefix & PREFIX_SS) + override = R_SS; + else if (s->prefix & PREFIX_DS) + override = R_DS; + else if (s->prefix & PREFIX_FS) + override = R_FS; + else + override = R_GS; + must_add_seg = 1; + } mod = (modrm >> 6) & 3; rm = modrm & 7; @@ -776,8 +805,9 @@ static void gen_lea_modrm(DisasContext *s, int modrm, int *reg_ptr, int *offset_ if (s->aflag) { havesib = 0; - havebase = 1; base = rm; + index = 0; + scale = 0; if (base == 4) { havesib = 1; @@ -790,7 +820,7 @@ static void gen_lea_modrm(DisasContext *s, int modrm, int *reg_ptr, int *offset_ switch (mod) { case 0: if (base == 5) { - havebase = 0; + base = -1; disp = ldl(s->pc); s->pc += 4; } else { @@ -806,40 +836,25 @@ static void gen_lea_modrm(DisasContext *s, int modrm, int *reg_ptr, int *offset_ s->pc += 4; break; } - - reg1 = OR_ZERO; - reg2 = OR_ZERO; - - if (havebase || (havesib && (index != 4 || scale != 0))) { - if (havebase) - reg1 = OR_EAX + base; - if (havesib && index != 4) { - if (havebase) - reg2 = index + OR_EAX; - else - reg1 = index + OR_EAX; - } - } - /* XXX: disp only ? */ - if (reg2 == OR_ZERO) { - /* op: disp + (reg1 << scale) */ - if (reg1 == OR_ZERO) { - gen_op_movl_A0_im(disp); - } else if (scale == 0 && disp == 0) { - gen_op_movl_A0_reg[reg1](); - } else { - gen_op_movl_A0_im(disp); - gen_op_addl_A0_reg_sN[scale][reg1](); - } + + if (base >= 0) { + gen_op_movl_A0_reg[base](); + if (disp != 0) + gen_op_addl_A0_im(disp); } else { - /* op: disp + reg1 + (reg2 << scale) */ - if (disp != 0) { - gen_op_movl_A0_im(disp); - gen_op_addl_A0_reg_sN[0][reg1](); - } else { - gen_op_movl_A0_reg[reg1](); + gen_op_movl_A0_im(disp); + } + if (havesib && (index != 4 || scale != 0)) { + gen_op_addl_A0_reg_sN[scale][index](); + } + if (must_add_seg) { + if (override < 0) { + if (base == R_EBP || base == R_ESP) + override = R_SS; + else + override = R_DS; } - gen_op_addl_A0_reg_sN[scale][reg2](); + gen_op_addl_A0_seg(offsetof(CPUX86State,seg_cache[override].base)); } } else { switch (mod) { @@ -848,6 +863,7 @@ static void gen_lea_modrm(DisasContext *s, int modrm, int *reg_ptr, int *offset_ disp = lduw(s->pc); s->pc += 2; gen_op_movl_A0_im(disp); + rm = 0; /* avoid SS override */ goto no_rm; } else { disp = 0; @@ -896,8 +912,18 @@ static void gen_lea_modrm(DisasContext *s, int modrm, int *reg_ptr, int *offset_ if (disp != 0) gen_op_addl_A0_im(disp); gen_op_andl_A0_ffff(); - no_rm: ; + no_rm: + if (must_add_seg) { + if (override < 0) { + if (rm == 2 || rm == 3 || rm == 6) + override = R_SS; + else + override = R_DS; + } + gen_op_addl_A0_seg(offsetof(CPUX86State,seg_cache[override].base)); + } } + opreg = OR_A0; disp = 0; *reg_ptr = opreg; @@ -1082,10 +1108,19 @@ static void gen_setcc(DisasContext *s, int b) } } +/* move T0 to seg_reg and compute if the CPU state may change */ +void gen_movl_seg_T0(DisasContext *s, int seg_reg) +{ + gen_op_movl_seg_T0(seg_reg); + if (!s->addseg && seg_reg < R_FS) + s->is_jmp = 2; /* abort translation because the register may + have a non zero base */ +} + /* return the next pc address. Return -1 if no insn found. *is_jmp_ptr is set to true if the instruction sets the PC (last instruction of a basic block) */ -long disas_insn(DisasContext *s, uint8_t *pc_start, int *is_jmp_ptr) +long disas_insn(DisasContext *s, uint8_t *pc_start) { int b, prefixes, aflag, dflag; int shift, ot; @@ -1093,8 +1128,8 @@ long disas_insn(DisasContext *s, uint8_t *pc_start, int *is_jmp_ptr) s->pc = pc_start; prefixes = 0; - aflag = 1; - dflag = 1; + aflag = s->code32; + dflag = s->code32; // cur_pc = s->pc; /* for insn generation */ next_byte: b = ldub(s->pc); @@ -1416,11 +1451,11 @@ long disas_insn(DisasContext *s, uint8_t *pc_start, int *is_jmp_ptr) gen_op_movl_T1_im((long)s->pc); gen_op_pushl_T1(); gen_op_jmp_T0(); - *is_jmp_ptr = 1; + s->is_jmp = 1; break; case 4: /* jmp Ev */ gen_op_jmp_T0(); - *is_jmp_ptr = 1; + s->is_jmp = 1; break; case 6: /* push Ev */ gen_op_pushl_T0(); @@ -1555,6 +1590,30 @@ long disas_insn(DisasContext *s, uint8_t *pc_start, int *is_jmp_ptr) gen_op_popl_T0(); gen_op_mov_reg_T0[OT_LONG][R_EBP](); break; + case 0x06: /* push es */ + case 0x0e: /* push cs */ + case 0x16: /* push ss */ + case 0x1e: /* push ds */ + gen_op_movl_T0_seg(b >> 3); + gen_op_pushl_T0(); + break; + case 0x1a0: /* push fs */ + case 0x1a8: /* push gs */ + gen_op_movl_T0_seg(((b >> 3) & 7) + R_FS); + gen_op_pushl_T0(); + break; + case 0x07: /* pop es */ + case 0x17: /* pop ss */ + case 0x1f: /* pop ds */ + gen_op_popl_T0(); + gen_movl_seg_T0(s, b >> 3); + break; + case 0x1a1: /* pop fs */ + case 0x1a9: /* pop gs */ + gen_op_popl_T0(); + gen_movl_seg_T0(s, ((b >> 3) & 7) + R_FS); + break; + /**************************/ /* mov */ case 0x88: @@ -1598,6 +1657,24 @@ long disas_insn(DisasContext *s, uint8_t *pc_start, int *is_jmp_ptr) gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0); gen_op_mov_reg_T0[ot][reg](); break; + case 0x8e: /* mov seg, Gv */ + ot = dflag ? OT_LONG : OT_WORD; + modrm = ldub(s->pc++); + reg = (modrm >> 3) & 7; + gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0); + if (reg >= 6) + goto illegal_op; + gen_movl_seg_T0(s, reg); + break; + case 0x8c: /* mov Gv, seg */ + ot = dflag ? OT_LONG : OT_WORD; + modrm = ldub(s->pc++); + reg = (modrm >> 3) & 7; + if (reg >= 6) + goto illegal_op; + gen_op_movl_T0_seg(reg); + gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1); + break; case 0x1b6: /* movzbS Gv, Eb */ case 0x1b7: /* movzwS Gv, Eb */ @@ -1648,8 +1725,13 @@ long disas_insn(DisasContext *s, uint8_t *pc_start, int *is_jmp_ptr) ot = dflag ? OT_LONG : OT_WORD; modrm = ldub(s->pc++); reg = (modrm >> 3) & 7; - + /* we must ensure that no segment is added */ + s->prefix &= ~(PREFIX_CS | PREFIX_SS | PREFIX_DS | + PREFIX_ES | PREFIX_FS | PREFIX_GS); + val = s->addseg; + s->addseg = 0; gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + s->addseg = val; gen_op_mov_reg_A0[ot - OT_WORD][reg](); break; @@ -1711,6 +1793,35 @@ long disas_insn(DisasContext *s, uint8_t *pc_start, int *is_jmp_ptr) gen_op_st_T0_A0[ot](); gen_op_mov_reg_T1[ot][reg](); break; + case 0xc4: /* les Gv */ + op = R_ES; + goto do_lxx; + case 0xc5: /* lds Gv */ + op = R_DS; + goto do_lxx; + case 0x1b2: /* lss Gv */ + op = R_SS; + goto do_lxx; + case 0x1b4: /* lfs Gv */ + op = R_FS; + goto do_lxx; + case 0x1b5: /* lgs Gv */ + op = R_GS; + do_lxx: + ot = dflag ? OT_LONG : OT_WORD; + modrm = ldub(s->pc++); + reg = (modrm >> 3) & 7; + mod = (modrm >> 6) & 3; + if (mod == 3) + goto illegal_op; + gen_op_ld_T1_A0[ot](); + op_addl_A0_im(1 << (ot - OT_WORD + 1)); + /* load the segment first to handle exceptions properly */ + gen_op_lduw_T0_A0(); + gen_movl_seg_T0(s, op); + /* then put the data */ + gen_op_mov_reg_T1[ot][reg](); + break; /************************/ /* shifts */ @@ -2327,12 +2438,12 @@ long disas_insn(DisasContext *s, uint8_t *pc_start, int *is_jmp_ptr) gen_op_popl_T0(); gen_op_addl_ESP_im(val); gen_op_jmp_T0(); - *is_jmp_ptr = 1; + s->is_jmp = 1; break; case 0xc3: /* ret */ gen_op_popl_T0(); gen_op_jmp_T0(); - *is_jmp_ptr = 1; + s->is_jmp = 1; break; case 0xe8: /* call */ val = insn_get(s, OT_LONG); @@ -2340,19 +2451,19 @@ long disas_insn(DisasContext *s, uint8_t *pc_start, int *is_jmp_ptr) gen_op_movl_T1_im((long)s->pc); gen_op_pushl_T1(); gen_op_jmp_im(val); - *is_jmp_ptr = 1; + s->is_jmp = 1; break; case 0xe9: /* jmp */ val = insn_get(s, OT_LONG); val += (long)s->pc; gen_op_jmp_im(val); - *is_jmp_ptr = 1; + s->is_jmp = 1; break; case 0xeb: /* jmp Jb */ val = (int8_t)insn_get(s, OT_BYTE); val += (long)s->pc; gen_op_jmp_im(val); - *is_jmp_ptr = 1; + s->is_jmp = 1; break; case 0x70 ... 0x7f: /* jcc Jb */ val = (int8_t)insn_get(s, OT_BYTE); @@ -2367,7 +2478,7 @@ long disas_insn(DisasContext *s, uint8_t *pc_start, int *is_jmp_ptr) val += (long)s->pc; /* XXX: fix 16 bit wrap */ do_jcc: gen_jcc(s, b, val); - *is_jmp_ptr = 1; + s->is_jmp = 1; break; case 0x190 ... 0x19f: @@ -2548,19 +2659,19 @@ long disas_insn(DisasContext *s, uint8_t *pc_start, int *is_jmp_ptr) break; case 0xcc: /* int3 */ gen_op_int3((long)pc_start); - *is_jmp_ptr = 1; + s->is_jmp = 1; break; case 0xcd: /* int N */ val = ldub(s->pc++); /* XXX: currently we ignore the interrupt number */ gen_op_int_im((long)pc_start); - *is_jmp_ptr = 1; + s->is_jmp = 1; break; case 0xce: /* into */ if (s->cc_op != CC_OP_DYNAMIC) gen_op_set_cc_op(s->cc_op); gen_op_into((long)pc_start, (long)s->pc); - *is_jmp_ptr = 1; + s->is_jmp = 1; break; case 0x1c8 ... 0x1cf: /* bswap reg */ reg = b & 7; @@ -2586,38 +2697,43 @@ long disas_insn(DisasContext *s, uint8_t *pc_start, int *is_jmp_ptr) return -1; } return (long)s->pc; + illegal_op: + error("illegal opcode pc=0x%08Lx", (long)pc_start); + return -1; } /* return the next pc */ int cpu_x86_gen_code(uint8_t *gen_code_buf, int max_code_size, - int *gen_code_size_ptr, uint8_t *pc_start) + int *gen_code_size_ptr, uint8_t *pc_start, + int flags) { DisasContext dc1, *dc = &dc1; uint8_t *gen_code_end, *pc_ptr; - int is_jmp; long ret; #ifdef DEBUG_DISAS struct disassemble_info disasm_info; #endif - + dc->code32 = (flags >> GEN_FLAG_CODE32_SHIFT) & 1; + dc->addseg = (flags >> GEN_FLAG_ADDSEG_SHIFT) & 1; + dc->f_st = (flags >> GEN_FLAG_ST_SHIFT) & 7; dc->cc_op = CC_OP_DYNAMIC; gen_code_ptr = gen_code_buf; gen_code_end = gen_code_buf + max_code_size - 4096; gen_start(); - is_jmp = 0; + dc->is_jmp = 0; pc_ptr = pc_start; do { - ret = disas_insn(dc, pc_ptr, &is_jmp); + ret = disas_insn(dc, pc_ptr); if (ret == -1) error("unknown instruction at PC=0x%x B=%02x %02x", pc_ptr, pc_ptr[0], pc_ptr[1]); pc_ptr = (void *)ret; - } while (!is_jmp && gen_code_ptr < gen_code_end); + } while (!dc->is_jmp && gen_code_ptr < gen_code_end); /* we must store the eflags state if it is not already done */ if (dc->cc_op != CC_OP_DYNAMIC) gen_op_set_cc_op(dc->cc_op); - if (!is_jmp) { + if (dc->is_jmp != 1) { /* we add an additionnal jmp to update the simulated PC */ gen_op_jmp_im(ret); }