From 1424ad867742286be44932bf29720539add19ae0 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 31 Jul 2018 10:58:05 +0200 Subject: [PATCH] x86: also optimize KXOR{D,Q} and KANDN{D,Q} These can be converted to 2-byte VEX encoding when both source registers are the same, by using KXORW / KANDNW as replacement. --- gas/ChangeLog | 9 +++++++++ gas/config/tc-i386.c | 17 ++++++++++++++++- gas/testsuite/gas/i386/optimize-1.d | 4 ++++ gas/testsuite/gas/i386/optimize-1.s | 6 ++++++ gas/testsuite/gas/i386/optimize-4.d | 4 ++++ gas/testsuite/gas/i386/optimize-5.d | 4 ++++ opcodes/ChangeLog | 5 +++++ opcodes/i386-opc.tbl | 8 ++++---- opcodes/i386-tbl.h | 8 ++++---- 9 files changed, 56 insertions(+), 9 deletions(-) diff --git a/gas/ChangeLog b/gas/ChangeLog index 4472821a2d..9682f66c72 100644 --- a/gas/ChangeLog +++ b/gas/ChangeLog @@ -1,3 +1,12 @@ +2018-07-31 Jan Beulich + + * config/tc-i386.c (optimize_encoding): Also handle kandnd, + kandnq, kxord, and kxorq. + * testsuite/gas/i386/optimize-1.s: Add kandn and kxor tests. + * testsuite/gas/i386/optimize-1.d, + testsuite/gas/i386/optimize-4.d, + testsuite/gas/i386/optimize-5.d: Adjust expectations. + 2018-07-31 Jan Beulich * config/tc-i386.c (check_VecOperands): Convert masking handling diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index 8761e9793d..e07056f725 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -3942,7 +3942,11 @@ optimize_encoding (void) || i.tm.base_opcode == 0x66f8 || i.tm.base_opcode == 0x66f9 || i.tm.base_opcode == 0x66fa - || i.tm.base_opcode == 0x66fb) + || i.tm.base_opcode == 0x66fb + || i.tm.base_opcode == 0x42 + || i.tm.base_opcode == 0x6642 + || i.tm.base_opcode == 0x47 + || i.tm.base_opcode == 0x6647) && i.tm.extension_opcode == None)) { /* Optimize: -O2: @@ -3973,6 +3977,12 @@ optimize_encoding (void) EVEX VOP %ymmM, %ymmM, %ymmN -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16) -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) + VOP, one of kxord and kxorq: + VEX VOP %kM, %kM, %kN + -> VEX kxorw %kM, %kM, %kN + VOP, one of kandnd and kandnq: + VEX VOP %kM, %kM, %kN + -> VEX kandnw %kM, %kM, %kN */ if (is_evex_encoding (&i.tm)) { @@ -3985,6 +3995,11 @@ optimize_encoding (void) i.tm.opcode_modifier.evex = 0; } } + else if (i.tm.operand_types[0].bitfield.regmask) + { + i.tm.base_opcode &= 0xff; + i.tm.opcode_modifier.vexw = VEXW0; + } else i.tm.opcode_modifier.vex = VEX128; diff --git a/gas/testsuite/gas/i386/optimize-1.d b/gas/testsuite/gas/i386/optimize-1.d index 33ce43c872..4358c19c21 100644 --- a/gas/testsuite/gas/i386/optimize-1.d +++ b/gas/testsuite/gas/i386/optimize-1.d @@ -58,4 +58,8 @@ Disassembly of section .text: +[a-f0-9]+: c5 f1 fb e9 vpsubq %xmm1,%xmm1,%xmm5 +[a-f0-9]+: c5 f1 fb e9 vpsubq %xmm1,%xmm1,%xmm5 +[a-f0-9]+: c5 f1 fb e9 vpsubq %xmm1,%xmm1,%xmm5 + +[a-f0-9]+: c5 f4 47 e9 kxorw %k1,%k1,%k5 + +[a-f0-9]+: c5 f4 47 e9 kxorw %k1,%k1,%k5 + +[a-f0-9]+: c5 f4 42 e9 kandnw %k1,%k1,%k5 + +[a-f0-9]+: c5 f4 42 e9 kandnw %k1,%k1,%k5 #pass diff --git a/gas/testsuite/gas/i386/optimize-1.s b/gas/testsuite/gas/i386/optimize-1.s index 21b9594034..f61a176de8 100644 --- a/gas/testsuite/gas/i386/optimize-1.s +++ b/gas/testsuite/gas/i386/optimize-1.s @@ -66,3 +66,9 @@ _start: vpsubq %ymm1, %ymm1, %ymm5{z}{%k7} vpsubq %zmm1, %zmm1, %zmm5 vpsubq %ymm1, %ymm1, %ymm5 + + kxord %k1, %k1, %k5 + kxorq %k1, %k1, %k5 + + kandnd %k1, %k1, %k5 + kandnq %k1, %k1, %k5 diff --git a/gas/testsuite/gas/i386/optimize-4.d b/gas/testsuite/gas/i386/optimize-4.d index c9f50f5daa..9f99dadf34 100644 --- a/gas/testsuite/gas/i386/optimize-4.d +++ b/gas/testsuite/gas/i386/optimize-4.d @@ -58,6 +58,10 @@ Disassembly of section .text: +[a-f0-9]+: c5 f1 fb e9 vpsubq %xmm1,%xmm1,%xmm5 +[a-f0-9]+: c5 f1 fb e9 vpsubq %xmm1,%xmm1,%xmm5 +[a-f0-9]+: c5 f1 fb e9 vpsubq %xmm1,%xmm1,%xmm5 + +[a-f0-9]+: c5 f4 47 e9 kxorw %k1,%k1,%k5 + +[a-f0-9]+: c5 f4 47 e9 kxorw %k1,%k1,%k5 + +[a-f0-9]+: c5 f4 42 e9 kandnw %k1,%k1,%k5 + +[a-f0-9]+: c5 f4 42 e9 kandnw %k1,%k1,%k5 +[a-f0-9]+: 62 f1 f5 08 55 e9 vandnpd %xmm1,%xmm1,%xmm5 +[a-f0-9]+: 62 f1 f5 08 55 e9 vandnpd %xmm1,%xmm1,%xmm5 #pass diff --git a/gas/testsuite/gas/i386/optimize-5.d b/gas/testsuite/gas/i386/optimize-5.d index ba4601ef19..cfd0df04a4 100644 --- a/gas/testsuite/gas/i386/optimize-5.d +++ b/gas/testsuite/gas/i386/optimize-5.d @@ -58,6 +58,10 @@ Disassembly of section .text: +[a-f0-9]+: c5 f1 fb e9 vpsubq %xmm1,%xmm1,%xmm5 +[a-f0-9]+: c5 f1 fb e9 vpsubq %xmm1,%xmm1,%xmm5 +[a-f0-9]+: c5 f1 fb e9 vpsubq %xmm1,%xmm1,%xmm5 + +[a-f0-9]+: c5 f4 47 e9 kxorw %k1,%k1,%k5 + +[a-f0-9]+: c5 f4 47 e9 kxorw %k1,%k1,%k5 + +[a-f0-9]+: c5 f4 42 e9 kandnw %k1,%k1,%k5 + +[a-f0-9]+: c5 f4 42 e9 kandnw %k1,%k1,%k5 +[a-f0-9]+: 62 f1 f5 08 55 e9 vandnpd %xmm1,%xmm1,%xmm5 +[a-f0-9]+: 62 f1 f5 08 55 e9 vandnpd %xmm1,%xmm1,%xmm5 #pass diff --git a/opcodes/ChangeLog b/opcodes/ChangeLog index e5f9300208..f469810795 100644 --- a/opcodes/ChangeLog +++ b/opcodes/ChangeLog @@ -1,3 +1,8 @@ +2018-07-31 Jan Beulich + + * i386-opc.tbl (kandnd, kandnq, kxord, kxorq): Add Optimize. + * i386-init.h, i386-tbl.h: Re-generate. + 2018-07-31 Jan Beulich * i386-opc.h (ZEROING_MASKING) Rename to ... diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl index ed9513fe57..e65a9ac232 100644 --- a/opcodes/i386-opc.tbl +++ b/opcodes/i386-opc.tbl @@ -4211,7 +4211,7 @@ vpmovzxwq, 2, 0x6634, None, 1, CpuAVX512F|CpuAVX512VL, Modrm|EVex=3|Masking=3|Ve kaddd, 3, 0x664A, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, RegMask, RegMask } kandd, 3, 0x6641, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, RegMask, RegMask } -kandnd, 3, 0x6642, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, RegMask, RegMask } +kandnd, 3, 0x6642, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegMask, RegMask, RegMask } kmovd, 2, 0x6690, None, 1, CpuAVX512BW, Modrm|Vex=1|VexOpcode=0|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask|Dword|Unspecified|BaseIndex, RegMask } kmovd, 2, 0x6691, None, 1, CpuAVX512BW, Modrm|Vex=1|VexOpcode=0|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, Dword|Unspecified|BaseIndex } knotd, 2, 0x6644, None, 1, CpuAVX512BW, Modrm|Vex=1|VexOpcode=0|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, RegMask } @@ -4219,10 +4219,10 @@ kord, 3, 0x6645, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=2| kortestd, 2, 0x6698, None, 1, CpuAVX512BW, Modrm|Vex=1|VexOpcode=0|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, RegMask } ktestd, 2, 0x6699, None, 1, CpuAVX512BW, Modrm|Vex=1|VexOpcode=0|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, RegMask } kxnord, 3, 0x6646, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, RegMask, RegMask } -kxord, 3, 0x6647, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, RegMask, RegMask } +kxord, 3, 0x6647, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegMask, RegMask, RegMask } kaddq, 3, 0x4A, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, RegMask, RegMask } -kandnq, 3, 0x42, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, RegMask, RegMask } +kandnq, 3, 0x42, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegMask, RegMask, RegMask } kandq, 3, 0x41, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, RegMask, RegMask } kmovq, 2, 0x90, None, 1, CpuAVX512BW, Modrm|Vex=1|VexOpcode=0|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask|Qword|Unspecified|BaseIndex, RegMask } kmovq, 2, 0x91, None, 1, CpuAVX512BW, Modrm|Vex=1|VexOpcode=0|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, Qword|Unspecified|BaseIndex } @@ -4233,7 +4233,7 @@ ktestq, 2, 0x99, None, 1, CpuAVX512BW, Modrm|Vex=1|VexOpcode=0|VexW=2|IgnoreSize kunpckdq, 3, 0x4B, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, RegMask, RegMask } kunpckwd, 3, 0x4B, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, RegMask, RegMask } kxnorq, 3, 0x46, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, RegMask, RegMask } -kxorq, 3, 0x47, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, RegMask, RegMask } +kxorq, 3, 0x47, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegMask, RegMask, RegMask } kmovd, 2, 0xF292, None, 1, CpuAVX512BW, Modrm|Vex=1|VexOpcode=0|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Reg32, RegMask } kmovd, 2, 0xF293, None, 1, CpuAVX512BW, Modrm|Vex=1|VexOpcode=0|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, Reg32 } diff --git a/opcodes/i386-tbl.h b/opcodes/i386-tbl.h index 7c77137d1a..1fc4b6e937 100644 --- a/opcodes/i386-tbl.h +++ b/opcodes/i386-tbl.h @@ -67739,7 +67739,7 @@ const insn_template i386_optab[] = 0, 0, 0, 0 } }, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0 }, { { { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -67918,7 +67918,7 @@ const insn_template i386_optab[] = 0, 0, 0, 0 } }, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0 }, { { { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -67958,7 +67958,7 @@ const insn_template i386_optab[] = 0, 0, 0, 0 } }, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0 }, { { { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -68197,7 +68197,7 @@ const insn_template i386_optab[] = 0, 0, 0, 0 } }, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0 }, { { { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,