From abd41884c530aa025ada253bf1a5bd0c2b808219 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 29 Aug 2023 18:28:33 +0200 Subject: [PATCH] target/i386: fix memory operand size for CVTPS2PD CVTPS2PD only loads a half-register for memory, unlike the other operations under 0x0F 0x5A. "Unpack" the group into separate emission functions instead of using gen_unary_fp_sse. Signed-off-by: Paolo Bonzini --- target/i386/tcg/decode-new.c.inc | 14 ++++++++++++-- target/i386/tcg/emit.c.inc | 30 +++++++++++++++++++++++++----- 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc index 43c39aad2a..0db19cda3b 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -805,10 +805,20 @@ static void decode_sse_unary(DisasContext *s, CPUX86State *env, X86OpEntry *entr case 0x51: entry->gen = gen_VSQRT; break; case 0x52: entry->gen = gen_VRSQRT; break; case 0x53: entry->gen = gen_VRCP; break; - case 0x5A: entry->gen = gen_VCVTfp2fp; break; } } +static void decode_0F5A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86OpEntry opcodes_0F5A[4] = { + X86_OP_ENTRY2(VCVTPS2PD, V,x, W,xh, vex2), /* VCVTPS2PD */ + X86_OP_ENTRY2(VCVTPD2PS, V,x, W,x, vex2), /* VCVTPD2PS */ + X86_OP_ENTRY3(VCVTSS2SD, V,x, H,x, W,x, vex2_rep3), /* VCVTSS2SD */ + X86_OP_ENTRY3(VCVTSD2SS, V,x, H,x, W,x, vex2_rep3), /* VCVTSD2SS */ + }; + *entry = *decode_by_prefix(s, opcodes_0F5A); +} + static void decode_0F5B(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) { static const X86OpEntry opcodes_0F5B[4] = { @@ -891,7 +901,7 @@ static const X86OpEntry opcodes_0F[256] = { [0x58] = X86_OP_ENTRY3(VADD, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), [0x59] = X86_OP_ENTRY3(VMUL, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), - [0x5a] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), /* CVTPS2PD */ + [0x5a] = X86_OP_GROUP0(0F5A), [0x5b] = X86_OP_GROUP0(0F5B), [0x5c] = X86_OP_ENTRY3(VSUB, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), [0x5d] = X86_OP_ENTRY3(VMIN, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 4fe8dec427..45a3e55cbf 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -1914,12 +1914,22 @@ static void gen_VCOMI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) set_cc_op(s, CC_OP_EFLAGS); } -static void gen_VCVTfp2fp(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VCVTPD2PS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { - gen_unary_fp_sse(s, env, decode, - gen_helper_cvtpd2ps_xmm, gen_helper_cvtps2pd_xmm, - gen_helper_cvtpd2ps_ymm, gen_helper_cvtps2pd_ymm, - gen_helper_cvtsd2ss, gen_helper_cvtss2sd); + if (s->vex_l) { + gen_helper_cvtpd2ps_ymm(cpu_env, OP_PTR0, OP_PTR2); + } else { + gen_helper_cvtpd2ps_xmm(cpu_env, OP_PTR0, OP_PTR2); + } +} + +static void gen_VCVTPS2PD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + if (s->vex_l) { + gen_helper_cvtps2pd_ymm(cpu_env, OP_PTR0, OP_PTR2); + } else { + gen_helper_cvtps2pd_xmm(cpu_env, OP_PTR0, OP_PTR2); + } } static void gen_VCVTPS2PH(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) @@ -1936,6 +1946,16 @@ static void gen_VCVTPS2PH(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec } } +static void gen_VCVTSD2SS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_helper_cvtsd2ss(cpu_env, OP_PTR0, OP_PTR1, OP_PTR2); +} + +static void gen_VCVTSS2SD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_helper_cvtss2sd(cpu_env, OP_PTR0, OP_PTR1, OP_PTR2); +} + static void gen_VCVTSI2Sx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { int vec_len = vector_len(s, decode);