diff --git a/cpu-i386.h b/cpu-i386.h index 9add4175d3..e528f61981 100644 --- a/cpu-i386.h +++ b/cpu-i386.h @@ -78,19 +78,27 @@ enum { CC_OP_ADDW, CC_OP_ADDL, + CC_OP_ADCB, /* modify all flags, CC_DST = res, CC_SRC = src1 */ + CC_OP_ADCW, + CC_OP_ADCL, + CC_OP_SUBB, /* modify all flags, CC_DST = res, CC_SRC = src1 */ CC_OP_SUBW, CC_OP_SUBL, + CC_OP_SBBB, /* modify all flags, CC_DST = res, CC_SRC = src1 */ + CC_OP_SBBW, + CC_OP_SBBL, + CC_OP_LOGICB, /* modify all flags, CC_DST = res */ CC_OP_LOGICW, CC_OP_LOGICL, - CC_OP_INCB, /* modify all flags except, CC_DST = res */ + CC_OP_INCB, /* modify all flags except, CC_DST = res, CC_SRC = C */ CC_OP_INCW, CC_OP_INCL, - CC_OP_DECB, /* modify all flags except, CC_DST = res */ + CC_OP_DECB, /* modify all flags except, CC_DST = res, CC_SRC = C */ CC_OP_DECW, CC_OP_DECL, @@ -98,6 +106,10 @@ enum { CC_OP_SHLW, CC_OP_SHLL, + CC_OP_SARB, /* modify all flags, CC_DST = res, CC_SRC.lsb = C */ + CC_OP_SARW, + CC_OP_SARL, + CC_OP_NB, }; diff --git a/dyngen.c b/dyngen.c index ce38dcaec4..f6b102fefa 100644 --- a/dyngen.c +++ b/dyngen.c @@ -198,14 +198,10 @@ void gen_code(const char *name, unsigned long offset, unsigned long size, { uint8_t *p; p = p_end - 1; - /* find ret */ - while (p > p_start && *p != 0xc3) - p--; - /* skip double ret */ - if (p > p_start && p[-1] == 0xc3) - p--; if (p == p_start) error("empty code for %s", name); + if (p[0] != 0xc3) + error("ret expected at the end of %s", name); copy_size = p - p_start; } break; diff --git a/linux-user/main.c b/linux-user/main.c index cdd118f369..356d980f04 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -128,21 +128,21 @@ int main(int argc, char **argv) /* Zero out image_info */ memset(info, 0, sizeof(struct image_info)); - if(elf_exec(filename, argv+1, environ, regs, info) != 0) { + if(elf_exec(filename, argv+optind, environ, regs, info) != 0) { printf("Error loading %s\n", filename); exit(1); } -#if 0 - printf("start_brk 0x%08lx\n" , info->start_brk); - printf("end_code 0x%08lx\n" , info->end_code); - printf("start_code 0x%08lx\n" , info->start_code); - printf("end_data 0x%08lx\n" , info->end_data); - printf("start_stack 0x%08lx\n" , info->start_stack); - printf("brk 0x%08lx\n" , info->brk); - printf("esp 0x%08lx\n" , regs->esp); - printf("eip 0x%08lx\n" , regs->eip); -#endif + if (loglevel) { + fprintf(logfile, "start_brk 0x%08lx\n" , info->start_brk); + fprintf(logfile, "end_code 0x%08lx\n" , info->end_code); + fprintf(logfile, "start_code 0x%08lx\n" , info->start_code); + fprintf(logfile, "end_data 0x%08lx\n" , info->end_data); + fprintf(logfile, "start_stack 0x%08lx\n" , info->start_stack); + fprintf(logfile, "brk 0x%08lx\n" , info->brk); + fprintf(logfile, "esp 0x%08lx\n" , regs->esp); + fprintf(logfile, "eip 0x%08lx\n" , regs->eip); + } target_set_brk((char *)info->brk); syscall_init(); diff --git a/op-i386.c b/op-i386.c index 849e508e20..f7f1a9849e 100644 --- a/op-i386.c +++ b/op-i386.c @@ -10,7 +10,18 @@ typedef signed short int16_t; typedef signed int int32_t; typedef signed long long int64_t; +#define bswap32(x) \ +({ \ + uint32_t __x = (x); \ + ((uint32_t)( \ + (((uint32_t)(__x) & (uint32_t)0x000000ffUL) << 24) | \ + (((uint32_t)(__x) & (uint32_t)0x0000ff00UL) << 8) | \ + (((uint32_t)(__x) & (uint32_t)0x00ff0000UL) >> 8) | \ + (((uint32_t)(__x) & (uint32_t)0xff000000UL) >> 24) )); \ +}) + #define NULL 0 +#include typedef struct FILE FILE; extern FILE *logfile; @@ -18,41 +29,39 @@ extern int loglevel; extern int fprintf(FILE *, const char *, ...); #ifdef __i386__ -register int T0 asm("esi"); -register int T1 asm("ebx"); -register int A0 asm("edi"); +register unsigned int T0 asm("ebx"); +register unsigned int T1 asm("esi"); +register unsigned int A0 asm("edi"); register struct CPUX86State *env asm("ebp"); -#define FORCE_RET() asm volatile ("ret"); #endif #ifdef __powerpc__ -register int T0 asm("r24"); -register int T1 asm("r25"); -register int A0 asm("r26"); +register unsigned int T0 asm("r24"); +register unsigned int T1 asm("r25"); +register unsigned int A0 asm("r26"); register struct CPUX86State *env asm("r27"); -#define FORCE_RET() asm volatile ("blr"); #endif #ifdef __arm__ -register int T0 asm("r4"); -register int T1 asm("r5"); -register int A0 asm("r6"); +register unsigned int T0 asm("r4"); +register unsigned int T1 asm("r5"); +register unsigned int A0 asm("r6"); register struct CPUX86State *env asm("r7"); -#define FORCE_RET() asm volatile ("mov pc, lr"); #endif #ifdef __mips__ -register int T0 asm("s0"); -register int T1 asm("s1"); -register int A0 asm("s2"); +register unsigned int T0 asm("s0"); +register unsigned int T1 asm("s1"); +register unsigned int A0 asm("s2"); register struct CPUX86State *env asm("s3"); -#define FORCE_RET() asm volatile ("jr $31"); #endif #ifdef __sparc__ -register int T0 asm("l0"); -register int T1 asm("l1"); -register int A0 asm("l2"); +register unsigned int T0 asm("l0"); +register unsigned int T1 asm("l1"); +register unsigned int A0 asm("l2"); register struct CPUX86State *env asm("l3"); -#define FORCE_RET() asm volatile ("retl ; nop"); #endif +/* force GCC to generate only one epilog at the end of the function */ +#define FORCE_RET() asm volatile (""); + #ifndef OPPROTO #define OPPROTO #endif @@ -267,20 +276,6 @@ void OPPROTO op_orl_T0_T1_cc(void) CC_DST = T0; } -void OPPROTO op_adcl_T0_T1_cc(void) -{ - CC_SRC = T0; - T0 = T0 + T1 + cc_table[CC_OP].compute_c(); - CC_DST = T0; -} - -void OPPROTO op_sbbl_T0_T1_cc(void) -{ - CC_SRC = T0; - T0 = T0 - T1 - cc_table[CC_OP].compute_c(); - CC_DST = T0; -} - void OPPROTO op_andl_T0_T1_cc(void) { T0 &= T1; @@ -320,12 +315,14 @@ void OPPROTO op_negl_T0_cc(void) void OPPROTO op_incl_T0_cc(void) { + CC_SRC = cc_table[CC_OP].compute_c(); T0++; CC_DST = T0; } void OPPROTO op_decl_T0_cc(void) { + CC_SRC = cc_table[CC_OP].compute_c(); T0--; CC_DST = T0; } @@ -335,6 +332,11 @@ void OPPROTO op_testl_T0_T1_cc(void) CC_DST = T0 & T1; } +void OPPROTO op_bswapl_T0(void) +{ + T0 = bswap32(T0); +} + /* multiply/divide */ void OPPROTO op_mulb_AL_T0(void) { @@ -399,7 +401,7 @@ void OPPROTO op_imulw_T0_T1(void) void OPPROTO op_imull_T0_T1(void) { int64_t res; - res = (int64_t)((int32_t)EAX) * (int64_t)((int32_t)T1); + res = (int64_t)((int32_t)T0) * (int64_t)((int32_t)T1); T0 = res; CC_SRC = (res != (int32_t)res); } @@ -468,10 +470,10 @@ void OPPROTO op_divl_EAX_T0(void) void OPPROTO op_idivl_EAX_T0(void) { int den, q, r; - int16_t num; + int64_t num; num = EAX | ((uint64_t)EDX << 32); - den = (int16_t)T0; + den = T0; q = (num / den); r = (num % den); EAX = q; @@ -495,6 +497,16 @@ void OPPROTO op_movl_A0_im(void) A0 = PARAM1; } +void OPPROTO op_addl_A0_im(void) +{ + A0 += PARAM1; +} + +void OPPROTO op_andl_A0_ffff(void) +{ + A0 = A0 & 0xffff; +} + /* memory access */ void OPPROTO op_ldub_T0_A0(void) @@ -562,7 +574,17 @@ void OPPROTO op_stl_T0_A0(void) stl((uint8_t *)A0, T0); } -/* jumps */ +/* used for bit operations */ + +void OPPROTO op_add_bitw_A0_T1(void) +{ + A0 += ((int32_t)T1 >> 4) << 1; +} + +void OPPROTO op_add_bitl_A0_T1(void) +{ + A0 += ((int32_t)T1 >> 5) << 2; +} /* indirect jump */ @@ -938,25 +960,37 @@ CCTable cc_table[CC_OP_NB] = { [CC_OP_ADDW] = { compute_all_addw, compute_c_addw }, [CC_OP_ADDL] = { compute_all_addl, compute_c_addl }, + [CC_OP_ADCB] = { compute_all_adcb, compute_c_adcb }, + [CC_OP_ADCW] = { compute_all_adcw, compute_c_adcw }, + [CC_OP_ADCL] = { compute_all_adcl, compute_c_adcl }, + [CC_OP_SUBB] = { compute_all_subb, compute_c_subb }, [CC_OP_SUBW] = { compute_all_subw, compute_c_subw }, [CC_OP_SUBL] = { compute_all_subl, compute_c_subl }, + [CC_OP_SBBB] = { compute_all_sbbb, compute_c_sbbb }, + [CC_OP_SBBW] = { compute_all_sbbw, compute_c_sbbw }, + [CC_OP_SBBL] = { compute_all_sbbl, compute_c_sbbl }, + [CC_OP_LOGICB] = { compute_all_logicb, compute_c_logicb }, [CC_OP_LOGICW] = { compute_all_logicw, compute_c_logicw }, [CC_OP_LOGICL] = { compute_all_logicl, compute_c_logicl }, - [CC_OP_INCB] = { compute_all_incb, compute_c_incb }, - [CC_OP_INCW] = { compute_all_incw, compute_c_incw }, + [CC_OP_INCB] = { compute_all_incb, compute_c_incl }, + [CC_OP_INCW] = { compute_all_incw, compute_c_incl }, [CC_OP_INCL] = { compute_all_incl, compute_c_incl }, - [CC_OP_DECB] = { compute_all_decb, compute_c_incb }, - [CC_OP_DECW] = { compute_all_decw, compute_c_incw }, + [CC_OP_DECB] = { compute_all_decb, compute_c_incl }, + [CC_OP_DECW] = { compute_all_decw, compute_c_incl }, [CC_OP_DECL] = { compute_all_decl, compute_c_incl }, - [CC_OP_SHLB] = { compute_all_shlb, compute_c_shlb }, - [CC_OP_SHLW] = { compute_all_shlw, compute_c_shlw }, + [CC_OP_SHLB] = { compute_all_shlb, compute_c_shll }, + [CC_OP_SHLW] = { compute_all_shlw, compute_c_shll }, [CC_OP_SHLL] = { compute_all_shll, compute_c_shll }, + + [CC_OP_SARB] = { compute_all_sarb, compute_c_shll }, + [CC_OP_SARW] = { compute_all_sarw, compute_c_shll }, + [CC_OP_SARL] = { compute_all_sarl, compute_c_shll }, }; /* floating point support */ @@ -1640,6 +1674,41 @@ void OPPROTO op_fcos(void) helper_fcos(); } +void OPPROTO op_fnstsw_A0(void) +{ + int fpus; + fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; + stw((void *)A0, fpus); +} + +void OPPROTO op_fnstcw_A0(void) +{ + stw((void *)A0, env->fpuc); +} + +void OPPROTO op_fldcw_A0(void) +{ + int rnd_type; + env->fpuc = lduw((void *)A0); + /* set rounding mode */ + switch(env->fpuc & RC_MASK) { + default: + case RC_NEAR: + rnd_type = FE_TONEAREST; + break; + case RC_DOWN: + rnd_type = FE_DOWNWARD; + break; + case RC_UP: + rnd_type = FE_UPWARD; + break; + case RC_CHOP: + rnd_type = FE_TOWARDZERO; + break; + } + fesetround(rnd_type); +} + /* main execution loop */ uint8_t code_gen_buffer[65536]; @@ -1651,9 +1720,15 @@ static const char *cc_op_str[] = { "ADDB", "ADDW", "ADDL", + "ADCB", + "ADCW", + "ADCL", "SUBB", "SUBW", "SUBL", + "SBBB", + "SBBW", + "SBBL", "LOGICB", "LOGICW", "LOGICL", @@ -1666,6 +1741,9 @@ static const char *cc_op_str[] = { "SHLB", "SHLW", "SHLL", + "SARB", + "SARW", + "SARL", }; #endif @@ -1688,13 +1766,24 @@ int cpu_x86_exec(CPUX86State *env1) for(;;) { #ifdef DEBUG_EXEC if (loglevel) { + int eflags; + eflags = cc_table[CC_OP].compute_all(); + eflags |= (DF & DIRECTION_FLAG); fprintf(logfile, "EAX=%08x EBX=%08X ECX=%08x EDX=%08x\n" - "ESI=%08x ESI=%08X EBP=%08x ESP=%08x\n" - "CCS=%08x CCD=%08x CCOP=%s\n", + "ESI=%08x EDI=%08X EBP=%08x ESP=%08x\n" + "CCS=%08x CCD=%08x CCO=%-8s EFL=%c%c%c%c%c%c%c\n", env->regs[R_EAX], env->regs[R_EBX], env->regs[R_ECX], env->regs[R_EDX], env->regs[R_ESI], env->regs[R_EDI], env->regs[R_EBP], env->regs[R_ESP], - env->cc_src, env->cc_dst, cc_op_str[env->cc_op]); + env->cc_src, env->cc_dst, cc_op_str[env->cc_op], + eflags & DIRECTION_FLAG ? 'D' : '-', + eflags & CC_O ? 'O' : '-', + eflags & CC_S ? 'S' : '-', + eflags & CC_Z ? 'Z' : '-', + eflags & CC_A ? 'A' : '-', + eflags & CC_P ? 'P' : '-', + eflags & CC_C ? 'C' : '-' + ); } #endif cpu_x86_gen_code(code_gen_buffer, &code_gen_size, (uint8_t *)env->pc); diff --git a/ops_template.h b/ops_template.h index c67fe0fd41..e7317eae6a 100644 --- a/ops_template.h +++ b/ops_template.h @@ -33,7 +33,7 @@ static int glue(compute_all_add, SUFFIX)(void) cf = (DATA_TYPE)CC_DST < (DATA_TYPE)src1; pf = parity_table[(uint8_t)CC_DST]; af = (CC_DST ^ src1 ^ src2) & 0x10; - zf = ((DATA_TYPE)CC_DST != 0) << 6; + zf = ((DATA_TYPE)CC_DST == 0) << 6; sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O; return cf | pf | af | zf | sf | of; @@ -47,6 +47,29 @@ static int glue(compute_c_add, SUFFIX)(void) return cf; } +static int glue(compute_all_adc, SUFFIX)(void) +{ + int cf, pf, af, zf, sf, of; + int src1, src2; + src1 = CC_SRC; + src2 = CC_DST - CC_SRC - 1; + cf = (DATA_TYPE)CC_DST <= (DATA_TYPE)src1; + pf = parity_table[(uint8_t)CC_DST]; + af = (CC_DST ^ src1 ^ src2) & 0x10; + zf = ((DATA_TYPE)CC_DST == 0) << 6; + sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; + of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O; + return cf | pf | af | zf | sf | of; +} + +static int glue(compute_c_adc, SUFFIX)(void) +{ + int src1, cf; + src1 = CC_SRC; + cf = (DATA_TYPE)CC_DST <= (DATA_TYPE)src1; + return cf; +} + static int glue(compute_all_sub, SUFFIX)(void) { int cf, pf, af, zf, sf, of; @@ -56,9 +79,9 @@ static int glue(compute_all_sub, SUFFIX)(void) cf = (DATA_TYPE)src1 < (DATA_TYPE)src2; pf = parity_table[(uint8_t)CC_DST]; af = (CC_DST ^ src1 ^ src2) & 0x10; - zf = ((DATA_TYPE)CC_DST != 0) << 6; + zf = ((DATA_TYPE)CC_DST == 0) << 6; sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; - of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O; + of = lshift((src1 ^ src2) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O; return cf | pf | af | zf | sf | of; } @@ -67,7 +90,31 @@ static int glue(compute_c_sub, SUFFIX)(void) int src1, src2, cf; src1 = CC_SRC; src2 = CC_SRC - CC_DST; - cf = (DATA_TYPE)src1 < (DATA_TYPE)src1; + cf = (DATA_TYPE)src1 < (DATA_TYPE)src2; + return cf; +} + +static int glue(compute_all_sbb, SUFFIX)(void) +{ + int cf, pf, af, zf, sf, of; + int src1, src2; + src1 = CC_SRC; + src2 = CC_SRC - CC_DST - 1; + cf = (DATA_TYPE)src1 <= (DATA_TYPE)src2; + pf = parity_table[(uint8_t)CC_DST]; + af = (CC_DST ^ src1 ^ src2) & 0x10; + zf = ((DATA_TYPE)CC_DST == 0) << 6; + sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; + of = lshift((src1 ^ src2) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O; + return cf | pf | af | zf | sf | of; +} + +static int glue(compute_c_sbb, SUFFIX)(void) +{ + int src1, src2, cf; + src1 = CC_SRC; + src2 = CC_SRC - CC_DST - 1; + cf = (DATA_TYPE)src1 <= (DATA_TYPE)src2; return cf; } @@ -77,7 +124,7 @@ static int glue(compute_all_logic, SUFFIX)(void) cf = 0; pf = parity_table[(uint8_t)CC_DST]; af = 0; - zf = ((DATA_TYPE)CC_DST != 0) << 6; + zf = ((DATA_TYPE)CC_DST == 0) << 6; sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; of = 0; return cf | pf | af | zf | sf | of; @@ -97,16 +144,18 @@ static int glue(compute_all_inc, SUFFIX)(void) cf = CC_SRC; pf = parity_table[(uint8_t)CC_DST]; af = (CC_DST ^ src1 ^ src2) & 0x10; - zf = ((DATA_TYPE)CC_DST != 0) << 6; + zf = ((DATA_TYPE)CC_DST == 0) << 6; sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; - of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O; + of = ((CC_DST & DATA_MASK) == SIGN_MASK) << 11; return cf | pf | af | zf | sf | of; } +#if DATA_BITS == 32 static int glue(compute_c_inc, SUFFIX)(void) { return CC_SRC; } +#endif static int glue(compute_all_dec, SUFFIX)(void) { @@ -117,9 +166,9 @@ static int glue(compute_all_dec, SUFFIX)(void) cf = CC_SRC; pf = parity_table[(uint8_t)CC_DST]; af = (CC_DST ^ src1 ^ src2) & 0x10; - zf = ((DATA_TYPE)CC_DST != 0) << 6; + zf = ((DATA_TYPE)CC_DST == 0) << 6; sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; - of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O; + of = ((CC_DST & DATA_MASK) == ((uint32_t)SIGN_MASK - 1)) << 11; return cf | pf | af | zf | sf | of; } @@ -129,16 +178,30 @@ static int glue(compute_all_shl, SUFFIX)(void) cf = CC_SRC & 1; pf = parity_table[(uint8_t)CC_DST]; af = 0; /* undefined */ - zf = ((DATA_TYPE)CC_DST != 0) << 6; + zf = ((DATA_TYPE)CC_DST == 0) << 6; sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; - of = sf << 4; /* only meaniful for shr with count == 1 */ + of = lshift(CC_SRC, 12 - DATA_BITS) & CC_O; /* only meaniful for shr with count == 1 */ return cf | pf | af | zf | sf | of; } +#if DATA_BITS == 32 static int glue(compute_c_shl, SUFFIX)(void) { return CC_SRC & 1; } +#endif + +static int glue(compute_all_sar, SUFFIX)(void) +{ + int cf, pf, af, zf, sf, of; + cf = CC_SRC & 1; + pf = parity_table[(uint8_t)CC_DST]; + af = 0; /* undefined */ + zf = ((DATA_TYPE)CC_DST == 0) << 6; + sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; + of = 0; /* only meaniful for shr with count == 1 */ + return cf | pf | af | zf | sf | of; +} /* various optimized jumps cases */ @@ -157,7 +220,7 @@ void OPPROTO glue(op_jb_sub, SUFFIX)(void) void OPPROTO glue(op_jz_sub, SUFFIX)(void) { - if ((DATA_TYPE)CC_DST != 0) + if ((DATA_TYPE)CC_DST == 0) PC = PARAM1; else PC = PARAM2; @@ -225,7 +288,7 @@ void OPPROTO glue(op_setb_T0_sub, SUFFIX)(void) void OPPROTO glue(op_setz_T0_sub, SUFFIX)(void) { - T0 = ((DATA_TYPE)CC_DST != 0); + T0 = ((DATA_TYPE)CC_DST == 0); } void OPPROTO glue(op_setbe_T0_sub, SUFFIX)(void) @@ -275,6 +338,7 @@ void OPPROTO glue(glue(op_rol, SUFFIX), _T0_T1_cc)(void) (T0 & CC_C); CC_OP = CC_OP_EFLAGS; } + FORCE_RET(); } void OPPROTO glue(glue(op_ror, SUFFIX), _T0_T1_cc)(void) @@ -290,6 +354,7 @@ void OPPROTO glue(glue(op_ror, SUFFIX), _T0_T1_cc)(void) ((T0 >> (DATA_BITS - 1)) & CC_C); CC_OP = CC_OP_EFLAGS; } + FORCE_RET(); } void OPPROTO glue(glue(op_rcl, SUFFIX), _T0_T1_cc)(void) @@ -305,6 +370,7 @@ void OPPROTO glue(glue(op_rcl, SUFFIX), _T0_T1_cc)(void) #endif if (count) { eflags = cc_table[CC_OP].compute_all(); + T0 &= DATA_MASK; src = T0; res = (T0 << count) | ((eflags & CC_C) << (count - 1)); if (count > 1) @@ -315,6 +381,7 @@ void OPPROTO glue(glue(op_rcl, SUFFIX), _T0_T1_cc)(void) ((src >> (DATA_BITS - count)) & CC_C); CC_OP = CC_OP_EFLAGS; } + FORCE_RET(); } void OPPROTO glue(glue(op_rcr, SUFFIX), _T0_T1_cc)(void) @@ -330,6 +397,7 @@ void OPPROTO glue(glue(op_rcr, SUFFIX), _T0_T1_cc)(void) #endif if (count) { eflags = cc_table[CC_OP].compute_all(); + T0 &= DATA_MASK; src = T0; res = (T0 >> count) | ((eflags & CC_C) << (DATA_BITS - count)); if (count > 1) @@ -340,6 +408,7 @@ void OPPROTO glue(glue(op_rcr, SUFFIX), _T0_T1_cc)(void) ((src >> (count - 1)) & CC_C); CC_OP = CC_OP_EFLAGS; } + FORCE_RET(); } void OPPROTO glue(glue(op_shl, SUFFIX), _T0_T1_cc)(void) @@ -352,11 +421,12 @@ void OPPROTO glue(glue(op_shl, SUFFIX), _T0_T1_cc)(void) CC_DST = T0; CC_OP = CC_OP_ADDB + SHIFT; } else if (count) { - CC_SRC = T0 >> (DATA_BITS - count); + CC_SRC = (DATA_TYPE)T0 >> (DATA_BITS - count); T0 = T0 << count; CC_DST = T0; CC_OP = CC_OP_SHLB + SHIFT; } + FORCE_RET(); } void OPPROTO glue(glue(op_shr, SUFFIX), _T0_T1_cc)(void) @@ -370,6 +440,7 @@ void OPPROTO glue(glue(op_shr, SUFFIX), _T0_T1_cc)(void) CC_DST = T0; CC_OP = CC_OP_SHLB + SHIFT; } + FORCE_RET(); } void OPPROTO glue(glue(op_sar, SUFFIX), _T0_T1_cc)(void) @@ -381,10 +452,69 @@ void OPPROTO glue(glue(op_sar, SUFFIX), _T0_T1_cc)(void) CC_SRC = src >> (count - 1); T0 = src >> count; CC_DST = T0; - CC_OP = CC_OP_SHLB + SHIFT; + CC_OP = CC_OP_SARB + SHIFT; } + FORCE_RET(); } +/* carry add/sub (we only need to set CC_OP differently) */ + +void OPPROTO glue(glue(op_adc, SUFFIX), _T0_T1_cc)(void) +{ + int cf; + cf = cc_table[CC_OP].compute_c(); + CC_SRC = T0; + T0 = T0 + T1 + cf; + CC_DST = T0; + CC_OP = CC_OP_ADDB + SHIFT + cf * 3; +} + +void OPPROTO glue(glue(op_sbb, SUFFIX), _T0_T1_cc)(void) +{ + int cf; + cf = cc_table[CC_OP].compute_c(); + CC_SRC = T0; + T0 = T0 - T1 - cf; + CC_DST = T0; + CC_OP = CC_OP_SUBB + SHIFT + cf * 3; +} + +/* bit operations */ +#if DATA_BITS >= 16 + +void OPPROTO glue(glue(op_bt, SUFFIX), _T0_T1_cc)(void) +{ + int count; + count = T1 & SHIFT_MASK; + CC_SRC = T0 >> count; +} + +void OPPROTO glue(glue(op_bts, SUFFIX), _T0_T1_cc)(void) +{ + int count; + count = T1 & SHIFT_MASK; + CC_SRC = T0 >> count; + T0 |= (1 << count); +} + +void OPPROTO glue(glue(op_btr, SUFFIX), _T0_T1_cc)(void) +{ + int count; + count = T1 & SHIFT_MASK; + CC_SRC = T0 >> count; + T0 &= ~(1 << count); +} + +void OPPROTO glue(glue(op_btc, SUFFIX), _T0_T1_cc)(void) +{ + int count; + count = T1 & SHIFT_MASK; + CC_SRC = T0 >> count; + T0 ^= (1 << count); +} + +#endif + /* string operations */ /* XXX: maybe use lower level instructions to ease exception handling */ @@ -464,8 +594,8 @@ void OPPROTO glue(op_scas, SUFFIX)(void) { int v; - v = glue(ldu, SUFFIX)((void *)ESI); - ESI += (DF << SHIFT); + v = glue(ldu, SUFFIX)((void *)EDI); + EDI += (DF << SHIFT); CC_SRC = EAX; CC_DST = EAX - v; } @@ -476,20 +606,14 @@ void OPPROTO glue(op_repz_scas, SUFFIX)(void) if (ECX != 0) { /* NOTE: the flags are not modified if ECX == 0 */ -#if SHIFT == 0 - v1 = EAX & 0xff; -#elif SHIFT == 1 - v1 = EAX & 0xffff; -#else - v1 = EAX; -#endif + v1 = EAX & DATA_MASK; inc = (DF << SHIFT); do { - v2 = glue(ldu, SUFFIX)((void *)ESI); + v2 = glue(ldu, SUFFIX)((void *)EDI); + EDI += inc; + ECX--; if (v1 != v2) break; - ESI += inc; - ECX--; } while (ECX != 0); CC_SRC = v1; CC_DST = v1 - v2; @@ -503,20 +627,14 @@ void OPPROTO glue(op_repnz_scas, SUFFIX)(void) if (ECX != 0) { /* NOTE: the flags are not modified if ECX == 0 */ -#if SHIFT == 0 - v1 = EAX & 0xff; -#elif SHIFT == 1 - v1 = EAX & 0xffff; -#else - v1 = EAX; -#endif + v1 = EAX & DATA_MASK; inc = (DF << SHIFT); do { - v2 = glue(ldu, SUFFIX)((void *)ESI); + v2 = glue(ldu, SUFFIX)((void *)EDI); + EDI += inc; + ECX--; if (v1 == v2) break; - ESI += inc; - ECX--; } while (ECX != 0); CC_SRC = v1; CC_DST = v1 - v2; @@ -543,11 +661,11 @@ void OPPROTO glue(op_repz_cmps, SUFFIX)(void) do { v1 = glue(ldu, SUFFIX)((void *)ESI); v2 = glue(ldu, SUFFIX)((void *)EDI); - if (v1 != v2) - break; ESI += inc; EDI += inc; ECX--; + if (v1 != v2) + break; } while (ECX != 0); CC_SRC = v1; CC_DST = v1 - v2; @@ -563,11 +681,11 @@ void OPPROTO glue(op_repnz_cmps, SUFFIX)(void) do { v1 = glue(ldu, SUFFIX)((void *)ESI); v2 = glue(ldu, SUFFIX)((void *)EDI); - if (v1 == v2) - break; ESI += inc; EDI += inc; ECX--; + if (v1 == v2) + break; } while (ECX != 0); CC_SRC = v1; CC_DST = v1 - v2; diff --git a/tests/Makefile b/tests/Makefile index 2c2b059df4..489e6b557f 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -20,7 +20,7 @@ test2: test2.c # i386 emulation test (dump various opcodes) */ test-i386: test-i386.c test-i386.h test-i386-shift.h - $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< + $(CC) $(CFLAGS) $(LDFLAGS) -static -o $@ $< test: test-i386 ./test-i386 > test-i386.ref diff --git a/tests/test-i386.c b/tests/test-i386.c index 5fb9c5cd05..55dd9eb2c7 100644 --- a/tests/test-i386.c +++ b/tests/test-i386.c @@ -14,13 +14,12 @@ #define CC_S 0x0080 #define CC_O 0x0800 -/* XXX: currently no A flag */ -#define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O) - #define __init_call __attribute__ ((unused,__section__ (".initcall.init"))) static void *call_start __init_call = NULL; +#define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A) + #define OP add #include "test-i386.h" @@ -67,6 +66,9 @@ static void *call_start __init_call = NULL; #define OP1 #include "test-i386.h" +#undef CC_MASK +#define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O) + #define OP shl #include "test-i386-shift.h" @@ -268,18 +270,148 @@ void test_jcc(void) TEST_JCC("jns", 0, 0); } +#undef CC_MASK +#define CC_MASK (CC_O | CC_C) + +#define OP mul +#include "test-i386-muldiv.h" + +#define OP imul +#include "test-i386-muldiv.h" + +#undef CC_MASK +#define CC_MASK (0) + +#define OP div +#include "test-i386-muldiv.h" + +#define OP idiv +#include "test-i386-muldiv.h" + +void test_imulw2(int op0, int op1) +{ + int res, s1, s0, flags; + s0 = op0; + s1 = op1; + res = s0; + flags = 0; + asm ("push %4\n\t" + "popf\n\t" + "imulw %w2, %w0\n\t" + "pushf\n\t" + "popl %1\n\t" + : "=q" (res), "=g" (flags) + : "q" (s1), "0" (res), "1" (flags)); + printf("%-10s A=%08x B=%08x R=%08x CC=%04x\n", + "imulw", s0, s1, res, flags & CC_MASK); +} + +void test_imull2(int op0, int op1) +{ + int res, s1, s0, flags; + s0 = op0; + s1 = op1; + res = s0; + flags = 0; + asm ("push %4\n\t" + "popf\n\t" + "imull %2, %0\n\t" + "pushf\n\t" + "popl %1\n\t" + : "=q" (res), "=g" (flags) + : "q" (s1), "0" (res), "1" (flags)); + printf("%-10s A=%08x B=%08x R=%08x CC=%04x\n", + "imull", s0, s1, res, flags & CC_MASK); +} + +void test_mul(void) +{ + test_imulb(0x1234561d, 4); + test_imulb(3, -4); + test_imulb(0x80, 0x80); + test_imulb(0x10, 0x10); + + test_imulw(0, 0x1234001d, 45); + test_imulw(0, 23, -45); + test_imulw(0, 0x8000, 0x8000); + test_imulw(0, 0x100, 0x100); + + test_imull(0, 0x1234001d, 45); + test_imull(0, 23, -45); + test_imull(0, 0x80000000, 0x80000000); + test_imull(0, 0x10000, 0x10000); + + test_mulb(0x1234561d, 4); + test_mulb(3, -4); + test_mulb(0x80, 0x80); + test_mulb(0x10, 0x10); + + test_mulw(0, 0x1234001d, 45); + test_mulw(0, 23, -45); + test_mulw(0, 0x8000, 0x8000); + test_mulw(0, 0x100, 0x100); + + test_mull(0, 0x1234001d, 45); + test_mull(0, 23, -45); + test_mull(0, 0x80000000, 0x80000000); + test_mull(0, 0x10000, 0x10000); + + test_imulw2(0x1234001d, 45); + test_imulw2(23, -45); + test_imulw2(0x8000, 0x8000); + test_imulw2(0x100, 0x100); + + test_imull2(0x1234001d, 45); + test_imull2(23, -45); + test_imull2(0x80000000, 0x80000000); + test_imull2(0x10000, 0x10000); + + test_idivb(0x12341678, 0x127e); + test_idivb(0x43210123, -5); + test_idivb(0x12340004, -1); + + test_idivw(0, 0x12345678, 12347); + test_idivw(0, -23223, -45); + test_idivw(0, 0x12348000, -1); + test_idivw(0x12343, 0x12345678, 0x81238567); + + test_idivl(0, 0x12345678, 12347); + test_idivl(0, -233223, -45); + test_idivl(0, 0x80000000, -1); + test_idivl(0x12343, 0x12345678, 0x81234567); + + test_divb(0x12341678, 0x127e); + test_divb(0x43210123, -5); + test_divb(0x12340004, -1); + + test_divw(0, 0x12345678, 12347); + test_divw(0, -23223, -45); + test_divw(0, 0x12348000, -1); + test_divw(0x12343, 0x12345678, 0x81238567); + + test_divl(0, 0x12345678, 12347); + test_divl(0, -233223, -45); + test_divl(0, 0x80000000, -1); + test_divl(0x12343, 0x12345678, 0x81234567); +} + + static void *call_end __init_call = NULL; int main(int argc, char **argv) { void **ptr; void (*func)(void); + + test_mul(); +#if 0 ptr = &call_start + 1; while (*ptr != NULL) { func = *ptr++; func(); } - test_lea(); test_jcc(); + test_lea(); +#endif return 0; } diff --git a/translate-i386.c b/translate-i386.c index 20a8039048..f145a54068 100644 --- a/translate-i386.c +++ b/translate-i386.c @@ -27,7 +27,9 @@ static void error(const char *fmt, ...) va_list ap; va_start(ap, fmt); + fprintf(stderr, "\n"); vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); va_end(ap); exit(1); } @@ -98,42 +100,13 @@ enum { OR_EBP, OR_ESI, OR_EDI, - - /* I386 float registers */ - OR_ST0, - OR_ST1, - OR_ST2, - OR_ST3, - OR_ST4, - OR_ST5, - OR_ST6, - OR_ST7, OR_TMP0, /* temporary operand register */ OR_TMP1, OR_A0, /* temporary register used when doing address evaluation */ - OR_EFLAGS, /* cpu flags */ - OR_ITMP0, /* used for byte/word insertion */ - OR_ITMP1, /* used for byte/word insertion */ - OR_ITMP2, /* used for byte/word insertion */ - OR_FTMP0, /* float temporary */ - OR_DF, /* D flag, for string ops */ OR_ZERO, /* fixed zero register */ - OR_IM, /* dummy immediate value register */ NB_OREGS, }; -#if 0 -static const double tab_const[7] = { - 1.0, - 3.32192809488736234789, /* log2(10) */ - M_LOG2E, - M_PI, - 0.30102999566398119521, /* log10(2) */ - M_LN2, - 0.0 -}; -#endif - typedef void (GenOpFunc)(void); typedef void (GenOpFunc1)(long); typedef void (GenOpFunc2)(long, long); @@ -354,14 +327,29 @@ static GenOpFunc *gen_op_addl_A0_reg_sN[4][8] = { static GenOpFunc *gen_op_arith_T0_T1_cc[8] = { gen_op_addl_T0_T1_cc, gen_op_orl_T0_T1_cc, - gen_op_adcl_T0_T1_cc, - gen_op_sbbl_T0_T1_cc, + NULL, + NULL, gen_op_andl_T0_T1_cc, gen_op_subl_T0_T1_cc, gen_op_xorl_T0_T1_cc, gen_op_cmpl_T0_T1_cc, }; +static GenOpFunc *gen_op_arithc_T0_T1_cc[3][2] = { + [OT_BYTE] = { + gen_op_adcb_T0_T1_cc, + gen_op_sbbb_T0_T1_cc, + }, + [OT_WORD] = { + gen_op_adcw_T0_T1_cc, + gen_op_sbbw_T0_T1_cc, + }, + [OT_LONG] = { + gen_op_adcl_T0_T1_cc, + gen_op_sbbl_T0_T1_cc, + }, +}; + static const int cc_op_arithb[8] = { CC_OP_ADDB, CC_OP_LOGICB, @@ -406,6 +394,21 @@ static GenOpFunc *gen_op_shift_T0_T1_cc[3][8] = { }, }; +static GenOpFunc *gen_op_btx_T0_T1_cc[2][4] = { + [0] = { + gen_op_btw_T0_T1_cc, + gen_op_btsw_T0_T1_cc, + gen_op_btrw_T0_T1_cc, + gen_op_btcw_T0_T1_cc, + }, + [1] = { + gen_op_btl_T0_T1_cc, + gen_op_btsl_T0_T1_cc, + gen_op_btrl_T0_T1_cc, + gen_op_btcl_T0_T1_cc, + }, +}; + static GenOpFunc *gen_op_lds_T0_A0[3] = { gen_op_ldsb_T0_A0, gen_op_ldsw_T0_A0, @@ -644,18 +647,23 @@ static void gen_op(DisasContext *s1, int op, int ot, int d, int s) gen_op_mov_TN_reg[ot][0][d](); if (s != OR_TMP1) gen_op_mov_TN_reg[ot][1][s](); - if ((op == OP_ADCL || op == OP_SBBL) && s1->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s1->cc_op); - gen_op_arith_T0_T1_cc[op](); + if (op == OP_ADCL || op == OP_SBBL) { + if (s1->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s1->cc_op); + gen_op_arithc_T0_T1_cc[ot][op - OP_ADCL](); + s1->cc_op = CC_OP_DYNAMIC; + } else { + gen_op_arith_T0_T1_cc[op](); + s1->cc_op = cc_op_arithb[op] + ot; + } if (d != OR_TMP0 && op != OP_CMPL) gen_op_mov_reg_T0[ot][d](); - s1->cc_op = cc_op_arithb[op] + ot; } static void gen_opi(DisasContext *s1, int op, int ot, int d, int c) { gen_op_movl_T1_im(c); - gen_op(s1, op, ot, d, OR_TMP0); + gen_op(s1, op, ot, d, OR_TMP1); } static void gen_inc(DisasContext *s1, int ot, int d, int c) @@ -664,10 +672,13 @@ static void gen_inc(DisasContext *s1, int ot, int d, int c) gen_op_mov_TN_reg[ot][0][d](); if (s1->cc_op != CC_OP_DYNAMIC) gen_op_set_cc_op(s1->cc_op); - if (c > 0) + if (c > 0) { gen_op_incl_T0_cc(); - else + s1->cc_op = CC_OP_INCB + ot; + } else { gen_op_decl_T0_cc(); + s1->cc_op = CC_OP_DECB + ot; + } if (d != OR_TMP0) gen_op_mov_reg_T0[ot][d](); } @@ -678,20 +689,12 @@ static void gen_shift(DisasContext *s1, int op, int ot, int d, int s) gen_op_mov_TN_reg[ot][0][d](); if (s != OR_TMP1) gen_op_mov_TN_reg[ot][1][s](); - switch(op) { - case OP_ROL: - case OP_ROR: - case OP_RCL: - case OP_RCR: - /* only C and O are modified, so we must update flags dynamically */ - if (s1->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s1->cc_op); - gen_op_shift_T0_T1_cc[ot][op](); - break; - default: - gen_op_shift_T0_T1_cc[ot][op](); - break; - } + /* for zero counts, flags are not updated, so must do it dynamically */ + if (s1->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s1->cc_op); + + gen_op_shift_T0_T1_cc[ot][op](); + if (d != OR_TMP0) gen_op_mov_reg_T0[ot][d](); s1->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */ @@ -785,12 +788,65 @@ static void gen_lea_modrm(DisasContext *s, int modrm, int *reg_ptr, int *offset_ } gen_op_addl_A0_reg_sN[scale][reg2](); } - opreg = OR_A0; } else { - fprintf(stderr, "16 bit addressing not supported\n"); - disp = 0; - opreg = 0; + switch (mod) { + case 0: + if (rm == 6) { + disp = lduw(s->pc); + s->pc += 2; + gen_op_movl_A0_im(disp); + goto no_rm; + } else { + disp = 0; + } + break; + case 1: + disp = (int8_t)ldub(s->pc++); + break; + default: + case 2: + disp = lduw(s->pc); + s->pc += 2; + break; + } + switch(rm) { + case 0: + gen_op_movl_A0_reg[R_EBX](); + gen_op_addl_A0_reg_sN[0][R_ESI](); + break; + case 1: + gen_op_movl_A0_reg[R_EBX](); + gen_op_addl_A0_reg_sN[0][R_EDI](); + break; + case 2: + gen_op_movl_A0_reg[R_EBP](); + gen_op_addl_A0_reg_sN[0][R_ESI](); + break; + case 3: + gen_op_movl_A0_reg[R_EBP](); + gen_op_addl_A0_reg_sN[0][R_EDI](); + break; + case 4: + gen_op_movl_A0_reg[R_ESI](); + break; + case 5: + gen_op_movl_A0_reg[R_EDI](); + break; + case 6: + gen_op_movl_A0_reg[R_EBP](); + break; + default: + case 7: + gen_op_movl_A0_reg[R_EBX](); + break; + } + if (disp != 0) + gen_op_addl_A0_im(disp); + gen_op_andl_A0_ffff(); + no_rm: ; } + opreg = OR_A0; + disp = 0; *reg_ptr = opreg; *offset_ptr = disp; } @@ -870,6 +926,12 @@ static void gen_jcc(DisasContext *s, int b, int val) case CC_OP_ADDB: case CC_OP_ADDW: case CC_OP_ADDL: + case CC_OP_ADCB: + case CC_OP_ADCW: + case CC_OP_ADCL: + case CC_OP_SBBB: + case CC_OP_SBBW: + case CC_OP_SBBL: case CC_OP_LOGICB: case CC_OP_LOGICW: case CC_OP_LOGICL: @@ -882,6 +944,9 @@ static void gen_jcc(DisasContext *s, int b, int val) case CC_OP_SHLB: case CC_OP_SHLW: case CC_OP_SHLL: + case CC_OP_SARB: + case CC_OP_SARW: + case CC_OP_SARL: switch(jcc_op) { case JCC_Z: func = gen_jcc_sub[(s->cc_op - CC_OP_ADDB) % 3][jcc_op]; @@ -1284,11 +1349,15 @@ long disas_insn(DisasContext *s, uint8_t *pc_start, int *is_jmp_ptr) gen_inc(s, ot, OR_TMP0, 1); if (mod != 3) gen_op_st_T0_A0[ot](); + else + gen_op_mov_reg_T0[ot][rm](); break; case 1: /* dec Ev */ gen_inc(s, ot, OR_TMP0, -1); if (mod != 3) gen_op_st_T0_A0[ot](); + else + gen_op_mov_reg_T0[ot][rm](); break; case 2: /* call Ev */ gen_op_movl_T1_im((long)s->pc); @@ -1359,7 +1428,6 @@ long disas_insn(DisasContext *s, uint8_t *pc_start, int *is_jmp_ptr) ot = dflag ? OT_LONG : OT_WORD; modrm = ldub(s->pc++); reg = ((modrm >> 3) & 7) + OR_EAX; - gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0); if (b == 0x69) { val = insn_get(s, ot); @@ -1372,9 +1440,9 @@ long disas_insn(DisasContext *s, uint8_t *pc_start, int *is_jmp_ptr) } if (ot == OT_LONG) { - op_imull_T0_T1(); + gen_op_imull_T0_T1(); } else { - op_imulw_T0_T1(); + gen_op_imulw_T0_T1(); } gen_op_mov_reg_T0[ot][reg](); s->cc_op = CC_OP_MUL; @@ -1522,7 +1590,7 @@ long disas_insn(DisasContext *s, uint8_t *pc_start, int *is_jmp_ptr) offset_addr = insn_get(s, OT_LONG); else offset_addr = insn_get(s, OT_WORD); - + gen_op_movl_A0_im(offset_addr); if ((b & 2) == 0) { gen_op_ld_T0_A0[ot](); gen_op_mov_reg_T0[ot][R_EAX](); @@ -1717,17 +1785,19 @@ long disas_insn(DisasContext *s, uint8_t *pc_start, int *is_jmp_ptr) break; } break; -#if 0 - case 0x2f: /* fnstsw mem */ - gen_insn3(OP_FNSTS, OR_TMP0, OR_ZERO, OR_ZERO); - gen_st(OP_STW, OR_TMP0, reg_addr, offset_addr); + case 0x0d: /* fldcw mem */ + gen_op_fldcw_A0(); + break; + case 0x0f: /* fnstcw mem */ + gen_op_fnstcw_A0(); + break; + case 0x2f: /* fnstsw mem */ + gen_op_fnstsw_A0(); break; - case 0x3c: /* fbld */ case 0x3e: /* fbstp */ error("float BCD not hanlded"); return -1; -#endif case 0x3d: /* fildll */ gen_op_fpush(); gen_op_fildll_ST0_A0(); @@ -1737,7 +1807,7 @@ long disas_insn(DisasContext *s, uint8_t *pc_start, int *is_jmp_ptr) gen_op_fpop(); break; default: - error("unhandled memory FP\n"); + error("unhandled memory FP [op=0x%02x]\n", op); return -1; } } else { @@ -1987,11 +2057,18 @@ long disas_insn(DisasContext *s, uint8_t *pc_start, int *is_jmp_ptr) else ot = dflag ? OT_LONG : OT_WORD; if (prefixes & PREFIX_REPNZ) { + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); gen_op_scas[6 + ot](); + s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */ } else if (prefixes & PREFIX_REPZ) { + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); gen_op_scas[3 + ot](); + s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */ } else { gen_op_scas[ot](); + s->cc_op = CC_OP_SUBB + ot; } break; @@ -2002,11 +2079,18 @@ long disas_insn(DisasContext *s, uint8_t *pc_start, int *is_jmp_ptr) else ot = dflag ? OT_LONG : OT_WORD; if (prefixes & PREFIX_REPNZ) { + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); gen_op_cmps[6 + ot](); + s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */ } else if (prefixes & PREFIX_REPZ) { + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); gen_op_cmps[3 + ot](); + s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */ } else { gen_op_cmps[ot](); + s->cc_op = CC_OP_SUBB + ot; } break; @@ -2186,6 +2270,74 @@ long disas_insn(DisasContext *s, uint8_t *pc_start, int *is_jmp_ptr) gen_op_std(); break; + /************************/ + /* bit operations */ + case 0x1ba: /* bt/bts/btr/btc Gv, im */ + ot = dflag ? OT_LONG : OT_WORD; + modrm = ldub(s->pc++); + op = (modrm >> 3) & 7; + mod = (modrm >> 6) & 3; + rm = modrm & 7; + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_op_ld_T0_A0[ot](); + } else { + gen_op_mov_TN_reg[ot][0][rm](); + } + /* load shift */ + val = ldub(s->pc++); + gen_op_movl_T1_im(val); + if (op < 4) + return -1; + op -= 4; + gen_op_btx_T0_T1_cc[ot - OT_WORD][op](); + s->cc_op = CC_OP_SHLB + ot; + if (op != 0) { + if (mod != 3) + gen_op_st_T0_A0[ot](); + else + gen_op_mov_reg_T0[ot][rm](); + } + break; + case 0x1a3: /* bt Gv, Ev */ + op = 0; + goto do_btx; + case 0x1ab: /* bts */ + op = 1; + goto do_btx; + case 0x1b3: /* btr */ + op = 2; + goto do_btx; + case 0x1bb: /* btc */ + op = 3; + do_btx: + ot = dflag ? OT_LONG : OT_WORD; + modrm = ldub(s->pc++); + reg = (modrm >> 3) & 7; + mod = (modrm >> 6) & 3; + rm = modrm & 7; + gen_op_mov_TN_reg[OT_LONG][1][reg](); + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + /* specific case: we need to add a displacement */ + if (ot == OT_WORD) + gen_op_add_bitw_A0_T1(); + else + gen_op_add_bitl_A0_T1(); + gen_op_ld_T0_A0[ot](); + } else { + gen_op_mov_TN_reg[ot][0][rm](); + } + gen_op_btx_T0_T1_cc[ot - OT_WORD][op](); + s->cc_op = CC_OP_SHLB + ot; + if (op != 0) { + if (mod != 3) + gen_op_st_T0_A0[ot](); + else + gen_op_mov_reg_T0[ot][rm](); + } + break; + /************************/ /* misc */ case 0x90: /* nop */ @@ -2206,6 +2358,13 @@ long disas_insn(DisasContext *s, uint8_t *pc_start, int *is_jmp_ptr) gen_op_into((long)pc_start, (long)s->pc); *is_jmp_ptr = 1; break; + case 0x1c8 ... 0x1cf: /* bswap reg */ + reg = b & 7; + gen_op_mov_TN_reg[OT_LONG][0][reg](); + gen_op_bswapl_T0(); + gen_op_mov_reg_T0[OT_LONG][reg](); + break; + #if 0 case 0x1a2: /* cpuid */ gen_insn0(OP_ASM);