x86: fix (dis)assembly of certain SSE2 insns in 16-bit mode

MOVNTI was wrongly assembled with a 66h prefix. Add IgnoreSize to
address this. It and the scalar to/from integer conversion insns also
were also wrongly using Ev / Gv, leading to 16-bit register names being
printed when 32-bit ones were meant.

Clone the 32-bit SSE2 test to cover both assembler and disassembler.
This commit is contained in:
Jan Beulich 2019-06-25 09:25:26 +02:00 committed by Jan Beulich
parent b8364fa775
commit e1a1babdad
8 changed files with 198 additions and 9 deletions

View File

@ -1,3 +1,9 @@
2019-06-25 Jan Beulich <jbeulich@suse.com>
* testsuite/gas/i386/sse2-16bit.d,
testsuite/gas/i386/sse2-16bit.s: New.
testsuite/gas/i386/i386.exp: Run new test.
2019-06-25 Jan Beulich <jbeulich@suse.com>
* config/tc-i386.c (optimize_encoding): Also handle ANDQ with

View File

@ -73,6 +73,7 @@ if [expr ([istarget "i*86-*-*"] || [istarget "x86_64-*-*"]) && [gas_32_check]]
run_dump_test "relax-2"
run_dump_test "ssemmx2"
run_dump_test "sse2"
run_dump_test "sse2-16bit"
run_dump_test "sub"
run_dump_test "sse3"
run_dump_test "sib"

View File

@ -0,0 +1,167 @@
#as: -I${srcdir}/$subdir
#objdump: -dwMaddr16 -Mdata16
#name: i386 16-bit SSE2
.*: file format .*
Disassembly of section .text:
0+ <foo>:
[ ]*[a-f0-9]+: 67 0f c3 00 movnti %eax,\(%eax\)
[ ]*[a-f0-9]+: 0f ae f8 sfence
[ ]*[a-f0-9]+: 0f ae e8 lfence
[ ]*[a-f0-9]+: 0f ae f0 mfence
[ ]*[a-f0-9]+: 67 66 0f 58 01 addpd \(%ecx\),%xmm0
[ ]*[a-f0-9]+: 66 0f 58 ca addpd %xmm2,%xmm1
[ ]*[a-f0-9]+: 67 f2 0f 58 13 addsd \(%ebx\),%xmm2
[ ]*[a-f0-9]+: f2 0f 58 dc addsd %xmm4,%xmm3
[ ]*[a-f0-9]+: 67 66 0f 55 65 00 andnpd 0x0\(%ebp\),%xmm4
[ ]*[a-f0-9]+: 66 0f 55 ee andnpd %xmm6,%xmm5
[ ]*[a-f0-9]+: 67 66 0f 54 37 andpd \(%edi\),%xmm6
[ ]*[a-f0-9]+: 66 0f 54 f8 andpd %xmm0,%xmm7
[ ]*[a-f0-9]+: 66 0f c2 c1 02 cmplepd %xmm1,%xmm0
[ ]*[a-f0-9]+: 67 66 0f c2 0a 03 cmpunordpd \(%edx\),%xmm1
[ ]*[a-f0-9]+: f2 0f c2 d2 04 cmpneqsd %xmm2,%xmm2
[ ]*[a-f0-9]+: 67 f2 0f c2 1c 24 05 cmpnltsd \(%esp\),%xmm3
[ ]*[a-f0-9]+: 66 0f c2 e5 06 cmpnlepd %xmm5,%xmm4
[ ]*[a-f0-9]+: 67 66 0f c2 2e 07 cmpordpd \(%esi\),%xmm5
[ ]*[a-f0-9]+: f2 0f c2 f7 00 cmpeqsd %xmm7,%xmm6
[ ]*[a-f0-9]+: 67 f2 0f c2 38 01 cmpltsd \(%eax\),%xmm7
[ ]*[a-f0-9]+: 66 0f c2 c1 00 cmpeqpd %xmm1,%xmm0
[ ]*[a-f0-9]+: 67 66 0f c2 0a 00 cmpeqpd \(%edx\),%xmm1
[ ]*[a-f0-9]+: f2 0f c2 d2 00 cmpeqsd %xmm2,%xmm2
[ ]*[a-f0-9]+: 67 f2 0f c2 1c 24 00 cmpeqsd \(%esp\),%xmm3
[ ]*[a-f0-9]+: 66 0f c2 e5 01 cmpltpd %xmm5,%xmm4
[ ]*[a-f0-9]+: 67 66 0f c2 2e 01 cmpltpd \(%esi\),%xmm5
[ ]*[a-f0-9]+: f2 0f c2 f7 01 cmpltsd %xmm7,%xmm6
[ ]*[a-f0-9]+: 67 f2 0f c2 38 01 cmpltsd \(%eax\),%xmm7
[ ]*[a-f0-9]+: 67 66 0f c2 01 02 cmplepd \(%ecx\),%xmm0
[ ]*[a-f0-9]+: 66 0f c2 ca 02 cmplepd %xmm2,%xmm1
[ ]*[a-f0-9]+: 67 f2 0f c2 13 02 cmplesd \(%ebx\),%xmm2
[ ]*[a-f0-9]+: f2 0f c2 dc 02 cmplesd %xmm4,%xmm3
[ ]*[a-f0-9]+: 67 66 0f c2 65 00 03 cmpunordpd 0x0\(%ebp\),%xmm4
[ ]*[a-f0-9]+: 66 0f c2 ee 03 cmpunordpd %xmm6,%xmm5
[ ]*[a-f0-9]+: 67 f2 0f c2 37 03 cmpunordsd \(%edi\),%xmm6
[ ]*[a-f0-9]+: f2 0f c2 f8 03 cmpunordsd %xmm0,%xmm7
[ ]*[a-f0-9]+: 66 0f c2 c1 04 cmpneqpd %xmm1,%xmm0
[ ]*[a-f0-9]+: 67 66 0f c2 0a 04 cmpneqpd \(%edx\),%xmm1
[ ]*[a-f0-9]+: f2 0f c2 d2 04 cmpneqsd %xmm2,%xmm2
[ ]*[a-f0-9]+: 67 f2 0f c2 1c 24 04 cmpneqsd \(%esp\),%xmm3
[ ]*[a-f0-9]+: 66 0f c2 e5 05 cmpnltpd %xmm5,%xmm4
[ ]*[a-f0-9]+: 67 66 0f c2 2e 05 cmpnltpd \(%esi\),%xmm5
[ ]*[a-f0-9]+: f2 0f c2 f7 05 cmpnltsd %xmm7,%xmm6
[ ]*[a-f0-9]+: 67 f2 0f c2 38 05 cmpnltsd \(%eax\),%xmm7
[ ]*[a-f0-9]+: 67 66 0f c2 01 06 cmpnlepd \(%ecx\),%xmm0
[ ]*[a-f0-9]+: 66 0f c2 ca 06 cmpnlepd %xmm2,%xmm1
[ ]*[a-f0-9]+: 67 f2 0f c2 13 06 cmpnlesd \(%ebx\),%xmm2
[ ]*[a-f0-9]+: f2 0f c2 dc 06 cmpnlesd %xmm4,%xmm3
[ ]*[a-f0-9]+: 67 66 0f c2 65 00 07 cmpordpd 0x0\(%ebp\),%xmm4
[ ]*[a-f0-9]+: 66 0f c2 ee 07 cmpordpd %xmm6,%xmm5
[ ]*[a-f0-9]+: 67 f2 0f c2 37 07 cmpordsd \(%edi\),%xmm6
[ ]*[a-f0-9]+: f2 0f c2 f8 07 cmpordsd %xmm0,%xmm7
[ ]*[a-f0-9]+: 66 0f 2f c1 comisd %xmm1,%xmm0
[ ]*[a-f0-9]+: 67 66 0f 2f 0a comisd \(%edx\),%xmm1
[ ]*[a-f0-9]+: 66 0f 2a d3 cvtpi2pd %mm3,%xmm2
[ ]*[a-f0-9]+: 67 66 0f 2a 1c 24 cvtpi2pd \(%esp\),%xmm3
[ ]*[a-f0-9]+: f2 0f 2a e5 cvtsi2sd %ebp,%xmm4
[ ]*[a-f0-9]+: 67 f2 0f 2a 2e cvtsi2sdl \(%esi\),%xmm5
[ ]*[a-f0-9]+: 66 0f 2d f7 cvtpd2pi %xmm7,%mm6
[ ]*[a-f0-9]+: 67 66 0f 2d 38 cvtpd2pi \(%eax\),%mm7
[ ]*[a-f0-9]+: 67 f2 0f 2d 01 cvtsd2si \(%ecx\),%eax
[ ]*[a-f0-9]+: f2 0f 2d ca cvtsd2si %xmm2,%ecx
[ ]*[a-f0-9]+: 67 66 0f 2c 13 cvttpd2pi \(%ebx\),%mm2
[ ]*[a-f0-9]+: 66 0f 2c dc cvttpd2pi %xmm4,%mm3
[ ]*[a-f0-9]+: 67 f2 0f 2c 65 00 cvttsd2si 0x0\(%ebp\),%esp
[ ]*[a-f0-9]+: f2 0f 2c ee cvttsd2si %xmm6,%ebp
[ ]*[a-f0-9]+: 66 0f 5e c1 divpd %xmm1,%xmm0
[ ]*[a-f0-9]+: 67 66 0f 5e 0a divpd \(%edx\),%xmm1
[ ]*[a-f0-9]+: f2 0f 5e d3 divsd %xmm3,%xmm2
[ ]*[a-f0-9]+: 67 f2 0f 5e 1c 24 divsd \(%esp\),%xmm3
[ ]*[a-f0-9]+: 67 0f ae 55 00 ldmxcsr 0x0\(%ebp\)
[ ]*[a-f0-9]+: 67 0f ae 1e stmxcsr \(%esi\)
[ ]*[a-f0-9]+: 0f ae f8 sfence
[ ]*[a-f0-9]+: 66 0f 5f c1 maxpd %xmm1,%xmm0
[ ]*[a-f0-9]+: 67 66 0f 5f 0a maxpd \(%edx\),%xmm1
[ ]*[a-f0-9]+: f2 0f 5f d3 maxsd %xmm3,%xmm2
[ ]*[a-f0-9]+: 67 f2 0f 5f 1c 24 maxsd \(%esp\),%xmm3
[ ]*[a-f0-9]+: 66 0f 5d e5 minpd %xmm5,%xmm4
[ ]*[a-f0-9]+: 67 66 0f 5d 2e minpd \(%esi\),%xmm5
[ ]*[a-f0-9]+: f2 0f 5d f7 minsd %xmm7,%xmm6
[ ]*[a-f0-9]+: 67 f2 0f 5d 38 minsd \(%eax\),%xmm7
[ ]*[a-f0-9]+: 66 0f 28 c1 movapd %xmm1,%xmm0
[ ]*[a-f0-9]+: 67 66 0f 29 11 movapd %xmm2,\(%ecx\)
[ ]*[a-f0-9]+: 67 66 0f 28 12 movapd \(%edx\),%xmm2
[ ]*[a-f0-9]+: 67 66 0f 17 2c 24 movhpd %xmm5,\(%esp\)
[ ]*[a-f0-9]+: 67 66 0f 16 2e movhpd \(%esi\),%xmm5
[ ]*[a-f0-9]+: 67 66 0f 13 07 movlpd %xmm0,\(%edi\)
[ ]*[a-f0-9]+: 67 66 0f 12 00 movlpd \(%eax\),%xmm0
[ ]*[a-f0-9]+: 66 0f 50 ca movmskpd %xmm2,%ecx
[ ]*[a-f0-9]+: 66 0f 10 d3 movupd %xmm3,%xmm2
[ ]*[a-f0-9]+: 67 66 0f 11 22 movupd %xmm4,\(%edx\)
[ ]*[a-f0-9]+: 67 66 0f 10 65 00 movupd 0x0\(%ebp\),%xmm4
[ ]*[a-f0-9]+: f2 0f 10 ee movsd %xmm6,%xmm5
[ ]*[a-f0-9]+: 67 f2 0f 11 3e movsd %xmm7,\(%esi\)
[ ]*[a-f0-9]+: 67 f2 0f 10 38 movsd \(%eax\),%xmm7
[ ]*[a-f0-9]+: 66 0f 59 c1 mulpd %xmm1,%xmm0
[ ]*[a-f0-9]+: 67 66 0f 59 0a mulpd \(%edx\),%xmm1
[ ]*[a-f0-9]+: f2 0f 59 d2 mulsd %xmm2,%xmm2
[ ]*[a-f0-9]+: 67 f2 0f 59 1c 24 mulsd \(%esp\),%xmm3
[ ]*[a-f0-9]+: 66 0f 56 e5 orpd %xmm5,%xmm4
[ ]*[a-f0-9]+: 67 66 0f 56 2e orpd \(%esi\),%xmm5
[ ]*[a-f0-9]+: 67 66 0f c6 37 02 shufpd \$0x2,\(%edi\),%xmm6
[ ]*[a-f0-9]+: 66 0f c6 f8 03 shufpd \$0x3,%xmm0,%xmm7
[ ]*[a-f0-9]+: 66 0f 51 c1 sqrtpd %xmm1,%xmm0
[ ]*[a-f0-9]+: 67 66 0f 51 0a sqrtpd \(%edx\),%xmm1
[ ]*[a-f0-9]+: f2 0f 51 d2 sqrtsd %xmm2,%xmm2
[ ]*[a-f0-9]+: 67 f2 0f 51 1c 24 sqrtsd \(%esp\),%xmm3
[ ]*[a-f0-9]+: 66 0f 5c e5 subpd %xmm5,%xmm4
[ ]*[a-f0-9]+: 67 66 0f 5c 2e subpd \(%esi\),%xmm5
[ ]*[a-f0-9]+: f2 0f 5c f7 subsd %xmm7,%xmm6
[ ]*[a-f0-9]+: 67 f2 0f 5c 38 subsd \(%eax\),%xmm7
[ ]*[a-f0-9]+: 67 66 0f 2e 01 ucomisd \(%ecx\),%xmm0
[ ]*[a-f0-9]+: 66 0f 2e ca ucomisd %xmm2,%xmm1
[ ]*[a-f0-9]+: 67 66 0f 15 13 unpckhpd \(%ebx\),%xmm2
[ ]*[a-f0-9]+: 66 0f 15 dc unpckhpd %xmm4,%xmm3
[ ]*[a-f0-9]+: 67 66 0f 14 65 00 unpcklpd 0x0\(%ebp\),%xmm4
[ ]*[a-f0-9]+: 66 0f 14 ee unpcklpd %xmm6,%xmm5
[ ]*[a-f0-9]+: 67 66 0f 57 37 xorpd \(%edi\),%xmm6
[ ]*[a-f0-9]+: 66 0f 57 f8 xorpd %xmm0,%xmm7
[ ]*[a-f0-9]+: 67 66 0f 2b 33 movntpd %xmm6,\(%ebx\)
[ ]*[a-f0-9]+: 66 0f 57 c8 xorpd %xmm0,%xmm1
[ ]*[a-f0-9]+: f3 0f e6 c8 cvtdq2pd %xmm0,%xmm1
[ ]*[a-f0-9]+: f2 0f e6 c8 cvtpd2dq %xmm0,%xmm1
[ ]*[a-f0-9]+: 0f 5b c8 cvtdq2ps %xmm0,%xmm1
[ ]*[a-f0-9]+: 66 0f 5a c8 cvtpd2ps %xmm0,%xmm1
[ ]*[a-f0-9]+: 0f 5a c8 cvtps2pd %xmm0,%xmm1
[ ]*[a-f0-9]+: 66 0f 5b c8 cvtps2dq %xmm0,%xmm1
[ ]*[a-f0-9]+: f2 0f 5a c8 cvtsd2ss %xmm0,%xmm1
[ ]*[a-f0-9]+: f3 0f 5a c8 cvtss2sd %xmm0,%xmm1
[ ]*[a-f0-9]+: 66 0f e6 c8 cvttpd2dq %xmm0,%xmm1
[ ]*[a-f0-9]+: f3 0f 5b c8 cvttps2dq %xmm0,%xmm1
[ ]*[a-f0-9]+: 66 0f f7 c8 maskmovdqu %xmm0,%xmm1
[ ]*[a-f0-9]+: 66 0f 6f c8 movdqa %xmm0,%xmm1
[ ]*[a-f0-9]+: 67 66 0f 7f 06 movdqa %xmm0,\(%esi\)
[ ]*[a-f0-9]+: f3 0f 6f c8 movdqu %xmm0,%xmm1
[ ]*[a-f0-9]+: 67 f3 0f 7f 06 movdqu %xmm0,\(%esi\)
[ ]*[a-f0-9]+: f2 0f d6 c8 movdq2q %xmm0,%mm1
[ ]*[a-f0-9]+: f3 0f d6 c8 movq2dq %mm0,%xmm1
[ ]*[a-f0-9]+: 0f f4 c8 pmuludq %mm0,%mm1
[ ]*[a-f0-9]+: 67 0f f4 08 pmuludq \(%eax\),%mm1
[ ]*[a-f0-9]+: 66 0f f4 c8 pmuludq %xmm0,%xmm1
[ ]*[a-f0-9]+: 67 66 0f f4 08 pmuludq \(%eax\),%xmm1
[ ]*[a-f0-9]+: 66 0f 70 c8 01 pshufd \$0x1,%xmm0,%xmm1
[ ]*[a-f0-9]+: f3 0f 70 c8 01 pshufhw \$0x1,%xmm0,%xmm1
[ ]*[a-f0-9]+: f2 0f 70 c8 01 pshuflw \$0x1,%xmm0,%xmm1
[ ]*[a-f0-9]+: 66 0f 73 f8 01 pslldq \$0x1,%xmm0
[ ]*[a-f0-9]+: 66 0f 73 d8 01 psrldq \$0x1,%xmm0
[ ]*[a-f0-9]+: 66 0f 6d c8 punpckhqdq %xmm0,%xmm1
[ ]*[a-f0-9]+: 0f d4 c1 paddq %mm1,%mm0
[ ]*[a-f0-9]+: 67 0f d4 00 paddq \(%eax\),%mm0
[ ]*[a-f0-9]+: 66 0f d4 c1 paddq %xmm1,%xmm0
[ ]*[a-f0-9]+: 67 66 0f d4 00 paddq \(%eax\),%xmm0
[ ]*[a-f0-9]+: 0f fb c1 psubq %mm1,%mm0
[ ]*[a-f0-9]+: 67 0f fb 00 psubq \(%eax\),%mm0
[ ]*[a-f0-9]+: 66 0f fb c1 psubq %xmm1,%xmm0
[ ]*[a-f0-9]+: 67 66 0f fb 00 psubq \(%eax\),%xmm0
[ ]*[a-f0-9]+: 0f 58 2f addps \(%bx\),%xmm5
#pass

View File

@ -0,0 +1,7 @@
# Check SSE2 instructions in 16-bit mode
.code16
.include "sse2.s"
.att_syntax prefix
addps (%bx),%xmm5

View File

@ -1,3 +1,11 @@
2019-06-25 Jan Beulich <jbeulich@suse.com>
* i386-dis.c (prefix_table): Use Edq for cvtsi2ss and cvtsi2sd.
Use Gdq for cvttss2si, cvttsd2si, cvtss2si, and cvtsd2si, and
movnti.
* i386-opc.tbl (movnti): Add IgnoreSize.
* i386-tbl.h: Re-generate.
2019-06-25 Jan Beulich <jbeulich@suse.com>
* i386-opc.tbl (and): Mark Imm8S form for optimization.

View File

@ -3728,9 +3728,9 @@ static const struct dis386 prefix_table[][4] = {
/* PREFIX_0F2A */
{
{ "cvtpi2ps", { XM, EMCq }, PREFIX_OPCODE },
{ "cvtsi2ss%LQ", { XM, Ev }, PREFIX_OPCODE },
{ "cvtsi2ss%LQ", { XM, Edq }, PREFIX_OPCODE },
{ "cvtpi2pd", { XM, EMCq }, PREFIX_OPCODE },
{ "cvtsi2sd%LQ", { XM, Ev }, 0 },
{ "cvtsi2sd%LQ", { XM, Edq }, 0 },
},
/* PREFIX_0F2B */
@ -3744,17 +3744,17 @@ static const struct dis386 prefix_table[][4] = {
/* PREFIX_0F2C */
{
{ "cvttps2pi", { MXC, EXq }, PREFIX_OPCODE },
{ "cvttss2si", { Gv, EXd }, PREFIX_OPCODE },
{ "cvttss2si", { Gdq, EXd }, PREFIX_OPCODE },
{ "cvttpd2pi", { MXC, EXx }, PREFIX_OPCODE },
{ "cvttsd2si", { Gv, EXq }, PREFIX_OPCODE },
{ "cvttsd2si", { Gdq, EXq }, PREFIX_OPCODE },
},
/* PREFIX_0F2D */
{
{ "cvtps2pi", { MXC, EXq }, PREFIX_OPCODE },
{ "cvtss2si", { Gv, EXd }, PREFIX_OPCODE },
{ "cvtss2si", { Gdq, EXd }, PREFIX_OPCODE },
{ "cvtpd2pi", { MXC, EXx }, PREFIX_OPCODE },
{ "cvtsd2si", { Gv, EXq }, PREFIX_OPCODE },
{ "cvtsd2si", { Gdq, EXq }, PREFIX_OPCODE },
},
/* PREFIX_0F2E */
@ -4063,7 +4063,7 @@ static const struct dis386 prefix_table[][4] = {
/* PREFIX_MOD_0_0FC3 */
{
{ "movntiS", { Ev, Gv }, PREFIX_OPCODE },
{ "movntiS", { Edq, Gdq }, PREFIX_OPCODE },
},
/* PREFIX_MOD_0_0FC7_REG_6 */

View File

@ -945,7 +945,7 @@ fucompi, 1, 0xdfe8, None, 2, Cpu687, ShortForm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|N
// Pentium4 extensions.
movnti, 2, 0xfc3, None, 2, CpuSSE2, Modrm|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|NoAVX, { Reg32|Reg64, Dword|Qword|Unspecified|BaseIndex }
movnti, 2, 0xfc3, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|NoAVX, { Reg32|Reg64, Dword|Qword|Unspecified|BaseIndex }
clflush, 1, 0xfae, 0x7, 2, CpuClflush, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Anysize|BaseIndex }
lfence, 0, 0xfae, 0xe8, 2, CpuSSE2, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt|NoAVX, { 0 }
mfence, 0, 0xfae, 0xf0, 2, CpuSSE2, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt|NoAVX, { 0 }

View File

@ -9736,7 +9736,7 @@ const insn_template i386_optab[] =
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0,
{ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ { { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,