target/i386: move scalar 0F 38 and 0F 3A instruction to new decoder

Because these are the only VEX instructions that QEMU supports, the
new decoder is entered on the first byte of a valid VEX prefix, and VEX
decoding only needs to be done in decode-new.c.inc.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Paolo Bonzini 2022-08-24 18:01:41 +02:00
parent 55a3328669
commit 1d0b926150
3 changed files with 321 additions and 289 deletions

View File

@ -135,11 +135,69 @@ static uint8_t get_modrm(DisasContext *s, CPUX86State *env)
return s->modrm;
}
static void decode_group17(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
{
static const X86GenFunc group17_gen[8] = {
NULL, gen_BLSR, gen_BLSMSK, gen_BLSI,
};
int op = (get_modrm(s, env) >> 3) & 7;
entry->gen = group17_gen[op];
}
static const X86OpEntry opcodes_0F38_00toEF[240] = {
};
/* five rows for no prefix, 66, F3, F2, 66+F2 */
static const X86OpEntry opcodes_0F38_F0toFF[16][5] = {
[0] = {
X86_OP_ENTRY3(MOVBE, G,y, M,y, None,None, cpuid(MOVBE)),
X86_OP_ENTRY3(MOVBE, G,w, M,w, None,None, cpuid(MOVBE)),
{},
X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)),
X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)),
},
[1] = {
X86_OP_ENTRY3(MOVBE, M,y, G,y, None,None, cpuid(MOVBE)),
X86_OP_ENTRY3(MOVBE, M,w, G,w, None,None, cpuid(MOVBE)),
{},
X86_OP_ENTRY2(CRC32, G,d, E,y, cpuid(SSE42)),
X86_OP_ENTRY2(CRC32, G,d, E,w, cpuid(SSE42)),
},
[2] = {
X86_OP_ENTRY3(ANDN, G,y, B,y, E,y, vex13 cpuid(BMI1)),
{},
{},
{},
{},
},
[3] = {
X86_OP_GROUP3(group17, B,y, E,y, None,None, vex13 cpuid(BMI1)),
{},
{},
{},
{},
},
[5] = {
X86_OP_ENTRY3(BZHI, G,y, E,y, B,y, vex13 cpuid(BMI1)),
{},
X86_OP_ENTRY3(PEXT, G,y, B,y, E,y, vex13 cpuid(BMI2)),
X86_OP_ENTRY3(PDEP, G,y, B,y, E,y, vex13 cpuid(BMI2)),
{},
},
[6] = {
{},
X86_OP_ENTRY2(ADCX, G,y, E,y, cpuid(ADX)),
X86_OP_ENTRY2(ADOX, G,y, E,y, cpuid(ADX)),
X86_OP_ENTRY3(MULX, /* B,y, */ G,y, E,y, 2,y, vex13 cpuid(BMI2)),
{},
},
[7] = {
X86_OP_ENTRY3(BEXTR, G,y, E,y, B,y, vex13 cpuid(BMI1)),
X86_OP_ENTRY3(SHLX, G,y, E,y, B,y, vex13 cpuid(BMI1)),
X86_OP_ENTRY3(SARX, G,y, E,y, B,y, vex13 cpuid(BMI1)),
X86_OP_ENTRY3(SHRX, G,y, E,y, B,y, vex13 cpuid(BMI1)),
{},
},
};
static void decode_0F38(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
@ -161,6 +219,7 @@ static void decode_0F38(DisasContext *s, CPUX86State *env, X86OpEntry *entry, ui
}
static const X86OpEntry opcodes_0F3A[256] = {
[0xF0] = X86_OP_ENTRY3(RORX, G,y, E,y, I,b, vex13 cpuid(BMI2) p_f2),
};
static void decode_0F3A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)

View File

@ -215,3 +215,261 @@ static void gen_writeback(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv
g_assert_not_reached();
}
}
static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op)
{
TCGv carry_in = NULL;
TCGv carry_out = (cc_op == CC_OP_ADCX ? cpu_cc_dst : cpu_cc_src2);
TCGv zero;
if (cc_op == s->cc_op || s->cc_op == CC_OP_ADCOX) {
/* Re-use the carry-out from a previous round. */
carry_in = carry_out;
cc_op = s->cc_op;
} else if (s->cc_op == CC_OP_ADCX || s->cc_op == CC_OP_ADOX) {
/* Merge with the carry-out from the opposite instruction. */
cc_op = CC_OP_ADCOX;
}
/* If we don't have a carry-in, get it out of EFLAGS. */
if (!carry_in) {
if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
gen_compute_eflags(s);
}
carry_in = s->tmp0;
tcg_gen_extract_tl(carry_in, cpu_cc_src,
ctz32(cc_op == CC_OP_ADCX ? CC_C : CC_O), 1);
}
switch (ot) {
#ifdef TARGET_X86_64
case MO_32:
/* If TL is 64-bit just do everything in 64-bit arithmetic. */
tcg_gen_add_i64(s->T0, s->T0, s->T1);
tcg_gen_add_i64(s->T0, s->T0, carry_in);
tcg_gen_shri_i64(carry_out, s->T0, 32);
break;
#endif
default:
zero = tcg_constant_tl(0);
tcg_gen_add2_tl(s->T0, carry_out, s->T0, zero, carry_in, zero);
tcg_gen_add2_tl(s->T0, carry_out, s->T0, carry_out, s->T1, zero);
break;
}
set_cc_op(s, cc_op);
}
static void gen_ADCX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
gen_ADCOX(s, env, decode->op[0].ot, CC_OP_ADCX);
}
static void gen_ADOX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
gen_ADCOX(s, env, decode->op[0].ot, CC_OP_ADOX);
}
static void gen_ANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
tcg_gen_andc_tl(s->T0, s->T1, s->T0);
gen_op_update1_cc(s);
set_cc_op(s, CC_OP_LOGICB + ot);
}
static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
TCGv bound, zero;
/*
* Extract START, and shift the operand.
* Shifts larger than operand size get zeros.
*/
tcg_gen_ext8u_tl(s->A0, s->T1);
tcg_gen_shr_tl(s->T0, s->T0, s->A0);
bound = tcg_constant_tl(ot == MO_64 ? 63 : 31);
zero = tcg_constant_tl(0);
tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound, s->T0, zero);
/*
* Extract the LEN into a mask. Lengths larger than
* operand size get all ones.
*/
tcg_gen_extract_tl(s->A0, s->T1, 8, 8);
tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound, s->A0, bound);
tcg_gen_movi_tl(s->T1, 1);
tcg_gen_shl_tl(s->T1, s->T1, s->A0);
tcg_gen_subi_tl(s->T1, s->T1, 1);
tcg_gen_and_tl(s->T0, s->T0, s->T1);
gen_op_update1_cc(s);
set_cc_op(s, CC_OP_LOGICB + ot);
}
static void gen_BLSI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
tcg_gen_neg_tl(s->T1, s->T0);
tcg_gen_and_tl(s->T0, s->T0, s->T1);
tcg_gen_mov_tl(cpu_cc_dst, s->T0);
set_cc_op(s, CC_OP_BMILGB + ot);
}
static void gen_BLSMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
tcg_gen_subi_tl(s->T1, s->T0, 1);
tcg_gen_xor_tl(s->T0, s->T0, s->T1);
tcg_gen_mov_tl(cpu_cc_dst, s->T0);
set_cc_op(s, CC_OP_BMILGB + ot);
}
static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
tcg_gen_subi_tl(s->T1, s->T0, 1);
tcg_gen_and_tl(s->T0, s->T0, s->T1);
tcg_gen_mov_tl(cpu_cc_dst, s->T0);
set_cc_op(s, CC_OP_BMILGB + ot);
}
static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
TCGv bound;
tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]);
bound = tcg_constant_tl(ot == MO_64 ? 63 : 31);
/*
* Note that since we're using BMILG (in order to get O
* cleared) we need to store the inverse into C.
*/
tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src, s->T1, bound);
tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1, bound, bound, s->T1);
tcg_gen_movi_tl(s->A0, -1);
tcg_gen_shl_tl(s->A0, s->A0, s->T1);
tcg_gen_andc_tl(s->T0, s->T0, s->A0);
gen_op_update1_cc(s);
set_cc_op(s, CC_OP_BMILGB + ot);
}
static void gen_CRC32(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[2].ot;
tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
gen_helper_crc32(s->T0, s->tmp2_i32, s->T1, tcg_constant_i32(8 << ot));
}
static void gen_MOVBE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
/* M operand type does not load/store */
if (decode->e.op0 == X86_TYPE_M) {
tcg_gen_qemu_st_tl(s->T0, s->A0, s->mem_index, ot | MO_BE);
} else {
tcg_gen_qemu_ld_tl(s->T0, s->A0, s->mem_index, ot | MO_BE);
}
}
static void gen_MULX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
/* low part of result in VEX.vvvv, high in MODRM */
switch (ot) {
default:
tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
s->tmp2_i32, s->tmp3_i32);
tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32);
tcg_gen_extu_i32_tl(s->T0, s->tmp3_i32);
break;
#ifdef TARGET_X86_64
case MO_64:
tcg_gen_mulu2_i64(cpu_regs[s->vex_v], s->T0, s->T0, s->T1);
break;
#endif
}
}
static void gen_PDEP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[1].ot;
if (ot < MO_64) {
tcg_gen_ext32u_tl(s->T0, s->T0);
}
gen_helper_pdep(s->T0, s->T0, s->T1);
}
static void gen_PEXT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[1].ot;
if (ot < MO_64) {
tcg_gen_ext32u_tl(s->T0, s->T0);
}
gen_helper_pext(s->T0, s->T0, s->T1);
}
static void gen_RORX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
int b = decode->immediate;
if (ot == MO_64) {
tcg_gen_rotri_tl(s->T0, s->T0, b & 63);
} else {
tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31);
tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
}
}
static void gen_SARX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
int mask;
mask = ot == MO_64 ? 63 : 31;
tcg_gen_andi_tl(s->T1, s->T1, mask);
if (ot != MO_64) {
tcg_gen_ext32s_tl(s->T0, s->T0);
}
tcg_gen_sar_tl(s->T0, s->T0, s->T1);
}
static void gen_SHLX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
int mask;
mask = ot == MO_64 ? 63 : 31;
tcg_gen_andi_tl(s->T1, s->T1, mask);
tcg_gen_shl_tl(s->T0, s->T0, s->T1);
}
static void gen_SHRX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
int mask;
mask = ot == MO_64 ? 63 : 31;
tcg_gen_andi_tl(s->T1, s->T1, mask);
if (ot != MO_64) {
tcg_gen_ext32u_tl(s->T0, s->T0);
}
tcg_gen_shr_tl(s->T0, s->T0, s->T1);
}

View File

@ -4210,151 +4210,6 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b)
s->mem_index, ot | MO_BE);
}
break;
case 0x0f2: /* andn Gy, By, Ey */
if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
|| !(s->prefix & PREFIX_VEX)
|| s->vex_l != 0) {
goto illegal_op;
}
ot = mo_64_32(s->dflag);
gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]);
gen_op_mov_reg_v(s, ot, reg, s->T0);
gen_op_update1_cc(s);
set_cc_op(s, CC_OP_LOGICB + ot);
break;
case 0x0f7: /* bextr Gy, Ey, By */
if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
|| !(s->prefix & PREFIX_VEX)
|| s->vex_l != 0) {
goto illegal_op;
}
ot = mo_64_32(s->dflag);
{
TCGv bound, zero;
gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
/* Extract START, and shift the operand.
Shifts larger than operand size get zeros. */
tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]);
tcg_gen_shr_tl(s->T0, s->T0, s->A0);
bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
zero = tcg_const_tl(0);
tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound,
s->T0, zero);
tcg_temp_free(zero);
/* Extract the LEN into a mask. Lengths larger than
operand size get all ones. */
tcg_gen_extract_tl(s->A0, cpu_regs[s->vex_v], 8, 8);
tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound,
s->A0, bound);
tcg_temp_free(bound);
tcg_gen_movi_tl(s->T1, 1);
tcg_gen_shl_tl(s->T1, s->T1, s->A0);
tcg_gen_subi_tl(s->T1, s->T1, 1);
tcg_gen_and_tl(s->T0, s->T0, s->T1);
gen_op_mov_reg_v(s, ot, reg, s->T0);
gen_op_update1_cc(s);
set_cc_op(s, CC_OP_LOGICB + ot);
}
break;
case 0x0f5: /* bzhi Gy, Ey, By */
if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
|| !(s->prefix & PREFIX_VEX)
|| s->vex_l != 0) {
goto illegal_op;
}
ot = mo_64_32(s->dflag);
gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]);
{
TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
/* Note that since we're using BMILG (in order to get O
cleared) we need to store the inverse into C. */
tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
s->T1, bound);
tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1,
bound, bound, s->T1);
tcg_temp_free(bound);
}
tcg_gen_movi_tl(s->A0, -1);
tcg_gen_shl_tl(s->A0, s->A0, s->T1);
tcg_gen_andc_tl(s->T0, s->T0, s->A0);
gen_op_mov_reg_v(s, ot, reg, s->T0);
gen_op_update1_cc(s);
set_cc_op(s, CC_OP_BMILGB + ot);
break;
case 0x3f6: /* mulx By, Gy, rdx, Ey */
if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
|| !(s->prefix & PREFIX_VEX)
|| s->vex_l != 0) {
goto illegal_op;
}
ot = mo_64_32(s->dflag);
gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
switch (ot) {
default:
tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EDX]);
tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
s->tmp2_i32, s->tmp3_i32);
tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32);
tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp3_i32);
break;
#ifdef TARGET_X86_64
case MO_64:
tcg_gen_mulu2_i64(s->T0, s->T1,
s->T0, cpu_regs[R_EDX]);
tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0);
tcg_gen_mov_i64(cpu_regs[reg], s->T1);
break;
#endif
}
break;
case 0x3f5: /* pdep Gy, By, Ey */
if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
|| !(s->prefix & PREFIX_VEX)
|| s->vex_l != 0) {
goto illegal_op;
}
ot = mo_64_32(s->dflag);
gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
/* Note that by zero-extending the source operand, we
automatically handle zero-extending the result. */
if (ot == MO_64) {
tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
} else {
tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
}
gen_helper_pdep(cpu_regs[reg], s->T1, s->T0);
break;
case 0x2f5: /* pext Gy, By, Ey */
if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
|| !(s->prefix & PREFIX_VEX)
|| s->vex_l != 0) {
goto illegal_op;
}
ot = mo_64_32(s->dflag);
gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
/* Note that by zero-extending the source operand, we
automatically handle zero-extending the result. */
if (ot == MO_64) {
tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
} else {
tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
}
gen_helper_pext(cpu_regs[reg], s->T1, s->T0);
break;
case 0x1f6: /* adcx Gy, Ey */
case 0x2f6: /* adox Gy, Ey */
CHECK_NO_VEX(s);
@ -4434,73 +4289,6 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b)
}
break;
case 0x1f7: /* shlx Gy, Ey, By */
case 0x2f7: /* sarx Gy, Ey, By */
case 0x3f7: /* shrx Gy, Ey, By */
if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
|| !(s->prefix & PREFIX_VEX)
|| s->vex_l != 0) {
goto illegal_op;
}
ot = mo_64_32(s->dflag);
gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
if (ot == MO_64) {
tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 63);
} else {
tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 31);
}
if (b == 0x1f7) {
tcg_gen_shl_tl(s->T0, s->T0, s->T1);
} else if (b == 0x2f7) {
if (ot != MO_64) {
tcg_gen_ext32s_tl(s->T0, s->T0);
}
tcg_gen_sar_tl(s->T0, s->T0, s->T1);
} else {
if (ot != MO_64) {
tcg_gen_ext32u_tl(s->T0, s->T0);
}
tcg_gen_shr_tl(s->T0, s->T0, s->T1);
}
gen_op_mov_reg_v(s, ot, reg, s->T0);
break;
case 0x0f3:
case 0x1f3:
case 0x2f3:
case 0x3f3: /* Group 17 */
if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
|| !(s->prefix & PREFIX_VEX)
|| s->vex_l != 0) {
goto illegal_op;
}
ot = mo_64_32(s->dflag);
gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
tcg_gen_mov_tl(cpu_cc_src, s->T0);
switch (reg & 7) {
case 1: /* blsr By,Ey */
tcg_gen_subi_tl(s->T1, s->T0, 1);
tcg_gen_and_tl(s->T0, s->T0, s->T1);
break;
case 2: /* blsmsk By,Ey */
tcg_gen_subi_tl(s->T1, s->T0, 1);
tcg_gen_xor_tl(s->T0, s->T0, s->T1);
break;
case 3: /* blsi By, Ey */
tcg_gen_neg_tl(s->T1, s->T0);
tcg_gen_and_tl(s->T0, s->T0, s->T1);
break;
default:
goto unknown_op;
}
tcg_gen_mov_tl(cpu_cc_dst, s->T0);
gen_op_mov_reg_v(s, ot, s->vex_v, s->T0);
set_cc_op(s, CC_OP_BMILGB + ot);
break;
default:
goto unknown_op;
}
break;
@ -4716,37 +4504,6 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b)
}
break;
case 0x33a:
/* Various integer extensions at 0f 3a f[0-f]. */
b = modrm | (b1 << 8);
modrm = x86_ldub_code(env, s);
reg = ((modrm >> 3) & 7) | REX_R(s);
switch (b) {
case 0x3f0: /* rorx Gy,Ey, Ib */
if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
|| !(s->prefix & PREFIX_VEX)
|| s->vex_l != 0) {
goto illegal_op;
}
ot = mo_64_32(s->dflag);
gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
b = x86_ldub_code(env, s);
if (ot == MO_64) {
tcg_gen_rotri_tl(s->T0, s->T0, b & 63);
} else {
tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31);
tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
}
gen_op_mov_reg_v(s, ot, reg, s->T0);
break;
default:
goto unknown_op;
}
break;
default:
unknown_op:
gen_unknown_opcode(env, s);
@ -4992,59 +4749,17 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
#endif
case 0xc5: /* 2-byte VEX */
case 0xc4: /* 3-byte VEX */
use_new = false;
/* VEX prefixes cannot be used except in 32-bit mode.
Otherwise the instruction is LES or LDS. */
if (CODE32(s) && !VM86(s)) {
static const int pp_prefix[4] = {
0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
};
int vex3, vex2 = x86_ldub_code(env, s);
int vex2 = x86_ldub_code(env, s);
s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
/* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
otherwise the instruction is LES or LDS. */
s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
break;
}
/* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
| PREFIX_LOCK | PREFIX_DATA | PREFIX_REX)) {
goto illegal_op;
}
#ifdef TARGET_X86_64
s->rex_r = (~vex2 >> 4) & 8;
#endif
if (b == 0xc5) {
/* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */
vex3 = vex2;
b = x86_ldub_code(env, s) | 0x100;
} else {
/* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
vex3 = x86_ldub_code(env, s);
#ifdef TARGET_X86_64
s->rex_x = (~vex2 >> 3) & 8;
s->rex_b = (~vex2 >> 2) & 8;
#endif
s->vex_w = (vex3 >> 7) & 1;
switch (vex2 & 0x1f) {
case 0x01: /* Implied 0f leading opcode bytes. */
b = x86_ldub_code(env, s) | 0x100;
break;
case 0x02: /* Implied 0f 38 leading opcode bytes. */
b = 0x138;
break;
case 0x03: /* Implied 0f 3a leading opcode bytes. */
b = 0x13a;
break;
default: /* Reserved for future use. */
goto unknown_op;
}
}
s->vex_v = (~vex3 >> 3) & 0xf;
s->vex_l = (vex3 >> 2) & 1;
prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
disas_insn_new(s, cpu, b);
return s->pc;
}
break;
}