x86: VCVTNEPS2BF16{X,Y} should permit broadcasting
Just like other VCVT*{X,Y} templates do, and to allow the programmer
flexibility (might be relevant in particular when heavily macro-izing
code), the two templates should also have Broadcast set, just like their
X/Y-suffix-less counterparts. This in turn requires them to also have
* Dword set on their memory operands, to cover the logic added to
i386gen by 4a1b91eabb
("x86: Expand Broadcast to 3 bits"),
* RegXMM/RegYMM set on their source operands, to satisfy broadcast
sizing logic in gas itself.
Otherwise ATTSyntax templates wouldn't need such operand size attributes.
While extending the test cases, also add Intel syntax broadcast forms
without explicit size specifiers.
This commit is contained in:
parent
53570fbcca
commit
c906a69a1f
@ -1,3 +1,12 @@
|
||||
2020-01-21 Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
* testsuite/gas/i386/avx512_bf16_vl.s,
|
||||
testsuite/gas/i386/x86-64-avx512_bf16_vl.s: Add broadcast forms
|
||||
of VCVTNEPS2BF16{X,Y}. Add operand-size less Intel syntax
|
||||
broadcast forms of VCVTNEPS2BF16.
|
||||
* testsuite/gas/i386/avx512_bf16_vl.d,
|
||||
testsuite/gas/i386/x86-64-avx512_bf16_vl.d: Adjust expectations.
|
||||
|
||||
2020-01-20 Nick Clifton <nickc@redhat.com>
|
||||
|
||||
* po/uk.po: Updated Ukranian translation.
|
||||
|
@ -23,9 +23,11 @@ Disassembly of section .text:
|
||||
[ ]*[a-f0-9]+: 62 f2 7e 28 72 f5 vcvtneps2bf16 %ymm5,%xmm6
|
||||
[ ]*[a-f0-9]+: 62 f2 7e 0f 72 b4 f4 00 00 00 10 vcvtneps2bf16x 0x10000000\(%esp,%esi,8\),%xmm6\{%k7\}
|
||||
[ ]*[a-f0-9]+: 62 f2 7e 18 72 31 vcvtneps2bf16 \(%ecx\)\{1to4\},%xmm6
|
||||
[ ]*[a-f0-9]+: 62 f2 7e 18 72 31 vcvtneps2bf16 \(%ecx\)\{1to4\},%xmm6
|
||||
[ ]*[a-f0-9]+: 62 f2 7e 08 72 71 7f vcvtneps2bf16x 0x7f0\(%ecx\),%xmm6
|
||||
[ ]*[a-f0-9]+: 62 f2 7e 9f 72 b2 00 f8 ff ff vcvtneps2bf16 -0x800\(%edx\)\{1to4\},%xmm6\{%k7\}\{z\}
|
||||
[ ]*[a-f0-9]+: 62 f2 7e 38 72 31 vcvtneps2bf16 \(%ecx\)\{1to8\},%xmm6
|
||||
[ ]*[a-f0-9]+: 62 f2 7e 38 72 31 vcvtneps2bf16 \(%ecx\)\{1to8\},%xmm6
|
||||
[ ]*[a-f0-9]+: 62 f2 7e 28 72 71 7f vcvtneps2bf16y 0xfe0\(%ecx\),%xmm6
|
||||
[ ]*[a-f0-9]+: 62 f2 7e bf 72 b2 00 f0 ff ff vcvtneps2bf16 -0x1000\(%edx\)\{1to8\},%xmm6\{%k7\}\{z\}
|
||||
[ ]*[a-f0-9]+: 62 f2 56 28 52 f4 vdpbf16ps %ymm4,%ymm5,%ymm6
|
||||
@ -52,9 +54,11 @@ Disassembly of section .text:
|
||||
[ ]*[a-f0-9]+: 62 f2 7e 28 72 f5 vcvtneps2bf16 %ymm5,%xmm6
|
||||
[ ]*[a-f0-9]+: 62 f2 7e 0f 72 b4 f4 00 00 00 10 vcvtneps2bf16x 0x10000000\(%esp,%esi,8\),%xmm6\{%k7\}
|
||||
[ ]*[a-f0-9]+: 62 f2 7e 18 72 31 vcvtneps2bf16 \(%ecx\)\{1to4\},%xmm6
|
||||
[ ]*[a-f0-9]+: 62 f2 7e 18 72 31 vcvtneps2bf16 \(%ecx\)\{1to4\},%xmm6
|
||||
[ ]*[a-f0-9]+: 62 f2 7e 08 72 71 7f vcvtneps2bf16x 0x7f0\(%ecx\),%xmm6
|
||||
[ ]*[a-f0-9]+: 62 f2 7e 9f 72 b2 00 f8 ff ff vcvtneps2bf16 -0x800\(%edx\)\{1to4\},%xmm6\{%k7\}\{z\}
|
||||
[ ]*[a-f0-9]+: 62 f2 7e 38 72 31 vcvtneps2bf16 \(%ecx\)\{1to8\},%xmm6
|
||||
[ ]*[a-f0-9]+: 62 f2 7e 38 72 31 vcvtneps2bf16 \(%ecx\)\{1to8\},%xmm6
|
||||
[ ]*[a-f0-9]+: 62 f2 7e 28 72 71 7f vcvtneps2bf16y 0xfe0\(%ecx\),%xmm6
|
||||
[ ]*[a-f0-9]+: 62 f2 7e bf 72 b2 00 f0 ff ff vcvtneps2bf16 -0x1000\(%edx\)\{1to8\},%xmm6\{%k7\}\{z\}
|
||||
[ ]*[a-f0-9]+: 62 f2 56 28 52 f4 vdpbf16ps %ymm4,%ymm5,%ymm6
|
||||
|
@ -17,9 +17,11 @@ _start:
|
||||
vcvtneps2bf16 %ymm5, %xmm6 #AVX512{BF16,VL}
|
||||
vcvtneps2bf16x 0x10000000(%esp, %esi, 8), %xmm6{%k7} #AVX512{BF16,VL} MASK_ENABLING
|
||||
vcvtneps2bf16 (%ecx){1to4}, %xmm6 #AVX512{BF16,VL} BROADCAST_EN
|
||||
vcvtneps2bf16x (%ecx){1to4}, %xmm6 #AVX512{BF16,VL} BROADCAST_EN
|
||||
vcvtneps2bf16x 2032(%ecx), %xmm6 #AVX512{BF16,VL} Disp8
|
||||
vcvtneps2bf16 -2048(%edx){1to4}, %xmm6{%k7}{z} #AVX512{BF16,VL} Disp8 BROADCAST_EN MASK_ENABLING ZEROCTL
|
||||
vcvtneps2bf16 (%ecx){1to8}, %xmm6 #AVX512{BF16,VL} BROADCAST_EN
|
||||
vcvtneps2bf16y (%ecx){1to8}, %xmm6 #AVX512{BF16,VL} BROADCAST_EN
|
||||
vcvtneps2bf16y 4064(%ecx), %xmm6 #AVX512{BF16,VL} Disp8
|
||||
vcvtneps2bf16 -4096(%edx){1to8}, %xmm6{%k7}{z} #AVX512{BF16,VL} Disp8 BROADCAST_EN MASK_ENABLING ZEROCTL
|
||||
vdpbf16ps %ymm4, %ymm5, %ymm6 #AVX512{BF16,VL}
|
||||
@ -47,9 +49,11 @@ _start:
|
||||
vcvtneps2bf16 xmm6, xmm5 #AVX512{BF16,VL}
|
||||
vcvtneps2bf16 xmm6, ymm5 #AVX512{BF16,VL}
|
||||
vcvtneps2bf16 xmm6{k7}, XMMWORD PTR [esp+esi*8+0x10000000] #AVX512{BF16,VL} MASK_ENABLING
|
||||
vcvtneps2bf16 xmm6, [ecx]{1to4} #AVX512{BF16,VL} BROADCAST_EN
|
||||
vcvtneps2bf16 xmm6, DWORD PTR [ecx]{1to4} #AVX512{BF16,VL} BROADCAST_EN
|
||||
vcvtneps2bf16 xmm6, XMMWORD PTR [ecx+2032] #AVX512{BF16,VL} Disp8
|
||||
vcvtneps2bf16 xmm6{k7}{z}, DWORD PTR [edx-2048]{1to4} #AVX512{BF16,VL} Disp8 BROADCAST_EN MASK_ENABLING ZEROCTL
|
||||
vcvtneps2bf16 xmm6, [ecx]{1to8} #AVX512{BF16,VL} BROADCAST_EN
|
||||
vcvtneps2bf16 xmm6, DWORD PTR [ecx]{1to8} #AVX512{BF16,VL} BROADCAST_EN
|
||||
vcvtneps2bf16 xmm6, YMMWORD PTR [ecx+4064] #AVX512{BF16,VL} Disp8
|
||||
vcvtneps2bf16 xmm6{k7}{z}, DWORD PTR [edx-4096]{1to8} #AVX512{BF16,VL} Disp8 BROADCAST_EN MASK_ENABLING ZEROCTL
|
||||
|
@ -23,9 +23,11 @@ Disassembly of section .text:
|
||||
[ ]*[a-f0-9]+: 62 02 7e 28 72 f5 vcvtneps2bf16 %ymm29,%xmm30
|
||||
[ ]*[a-f0-9]+: 62 22 7e 0f 72 b4 f5 00 00 00 10 vcvtneps2bf16x 0x10000000\(%rbp,%r14,8\),%xmm30\{%k7\}
|
||||
[ ]*[a-f0-9]+: 62 c2 7e 18 72 29 vcvtneps2bf16 \(%r9\)\{1to4\},%xmm21
|
||||
[ ]*[a-f0-9]+: 62 f2 7e 18 72 09 vcvtneps2bf16 \(%rcx\)\{1to4\},%xmm1
|
||||
[ ]*[a-f0-9]+: 62 62 7e 08 72 71 7f vcvtneps2bf16x 0x7f0\(%rcx\),%xmm30
|
||||
[ ]*[a-f0-9]+: 62 62 7e 9f 72 aa 00 f8 ff ff vcvtneps2bf16 -0x800\(%rdx\)\{1to4\},%xmm29\{%k7\}\{z\}
|
||||
[ ]*[a-f0-9]+: 62 c2 7e 38 72 31 vcvtneps2bf16 \(%r9\)\{1to8\},%xmm22
|
||||
[ ]*[a-f0-9]+: 62 f2 7e 38 72 11 vcvtneps2bf16 \(%rcx\)\{1to8\},%xmm2
|
||||
[ ]*[a-f0-9]+: 62 e2 7e 28 72 79 7f vcvtneps2bf16y 0xfe0\(%rcx\),%xmm23
|
||||
[ ]*[a-f0-9]+: 62 62 7e bf 72 9a 00 f0 ff ff vcvtneps2bf16 -0x1000\(%rdx\)\{1to8\},%xmm27\{%k7\}\{z\}
|
||||
[ ]*[a-f0-9]+: 62 02 16 20 52 f4 vdpbf16ps %ymm28,%ymm29,%ymm30
|
||||
@ -51,9 +53,11 @@ Disassembly of section .text:
|
||||
[ ]*[a-f0-9]+: 62 02 7e 08 72 f5 vcvtneps2bf16 %xmm29,%xmm30
|
||||
[ ]*[a-f0-9]+: 62 02 7e 28 72 f5 vcvtneps2bf16 %ymm29,%xmm30
|
||||
[ ]*[a-f0-9]+: 62 22 7e 0f 72 b4 f5 00 00 00 10 vcvtneps2bf16x 0x10000000\(%rbp,%r14,8\),%xmm30\{%k7\}
|
||||
[ ]*[a-f0-9]+: 62 f2 7e 18 72 29 vcvtneps2bf16 \(%rcx\)\{1to4\},%xmm5
|
||||
[ ]*[a-f0-9]+: 62 42 7e 18 72 09 vcvtneps2bf16 \(%r9\)\{1to4\},%xmm25
|
||||
[ ]*[a-f0-9]+: 62 62 7e 08 72 71 7f vcvtneps2bf16x 0x7f0\(%rcx\),%xmm30
|
||||
[ ]*[a-f0-9]+: 62 62 7e 9f 72 b2 00 f8 ff ff vcvtneps2bf16 -0x800\(%rdx\)\{1to4\},%xmm30\{%k7\}\{z\}
|
||||
[ ]*[a-f0-9]+: 62 f2 7e 38 72 21 vcvtneps2bf16 \(%rcx\)\{1to8\},%xmm4
|
||||
[ ]*[a-f0-9]+: 62 42 7e 38 72 01 vcvtneps2bf16 \(%r9\)\{1to8\},%xmm24
|
||||
[ ]*[a-f0-9]+: 62 62 7e 28 72 71 7f vcvtneps2bf16y 0xfe0\(%rcx\),%xmm30
|
||||
[ ]*[a-f0-9]+: 62 62 7e bf 72 b2 00 f0 ff ff vcvtneps2bf16 -0x1000\(%rdx\)\{1to8\},%xmm30\{%k7\}\{z\}
|
||||
|
@ -17,9 +17,11 @@ _start:
|
||||
vcvtneps2bf16 %ymm29, %xmm30 #AVX512{BF16,VL}
|
||||
vcvtneps2bf16x 0x10000000(%rbp, %r14, 8), %xmm30{%k7} #AVX512{BF16,VL} MASK_ENABLING
|
||||
vcvtneps2bf16 (%r9){1to4}, %xmm21 #AVX512{BF16,VL} BROADCAST_EN
|
||||
vcvtneps2bf16x (%rcx){1to4}, %xmm1 #AVX512{BF16,VL} BROADCAST_EN
|
||||
vcvtneps2bf16x 2032(%rcx), %xmm30 #AVX512{BF16,VL} Disp8
|
||||
vcvtneps2bf16 -2048(%rdx){1to4}, %xmm29{%k7}{z} #AVX512{BF16,VL} Disp8 BROADCAST_EN MASK_ENABLING ZEROCTL
|
||||
vcvtneps2bf16 (%r9){1to8}, %xmm22 #AVX512{BF16,VL} BROADCAST_EN
|
||||
vcvtneps2bf16y (%rcx){1to8}, %xmm2 #AVX512{BF16,VL} BROADCAST_EN
|
||||
vcvtneps2bf16y 4064(%rcx), %xmm23 #AVX512{BF16,VL} Disp8
|
||||
vcvtneps2bf16 -4096(%rdx){1to8}, %xmm27{%k7}{z} #AVX512{BF16,VL} Disp8 BROADCAST_EN MASK_ENABLING ZEROCTL
|
||||
vdpbf16ps %ymm28, %ymm29, %ymm30 #AVX512{BF16,VL}
|
||||
@ -47,9 +49,11 @@ _start:
|
||||
vcvtneps2bf16 xmm30, xmm29 #AVX512{BF16,VL}
|
||||
vcvtneps2bf16 xmm30, ymm29 #AVX512{BF16,VL}
|
||||
vcvtneps2bf16 xmm30{k7}, XMMWORD PTR [rbp+r14*8+0x10000000] #AVX512{BF16,VL} MASK_ENABLING
|
||||
vcvtneps2bf16 xmm5, [rcx]{1to4} #AVX512{BF16,VL} BROADCAST_EN
|
||||
vcvtneps2bf16 xmm25, DWORD PTR [r9]{1to4} #AVX512{BF16,VL} BROADCAST_EN
|
||||
vcvtneps2bf16 xmm30, XMMWORD PTR [rcx+2032] #AVX512{BF16,VL} Disp8
|
||||
vcvtneps2bf16 xmm30{k7}{z}, DWORD PTR [rdx-2048]{1to4} #AVX512{BF16,VL} Disp8 BROADCAST_EN MASK_ENABLING ZEROCTL
|
||||
vcvtneps2bf16 xmm4, [rcx]{1to8} #AVX512{BF16,VL} BROADCAST_EN
|
||||
vcvtneps2bf16 xmm24, DWORD PTR [r9]{1to8} #AVX512{BF16,VL} BROADCAST_EN
|
||||
vcvtneps2bf16 xmm30, YMMWORD PTR [rcx+4064] #AVX512{BF16,VL} Disp8
|
||||
vcvtneps2bf16 xmm30{k7}{z}, DWORD PTR [rdx-4096]{1to8} #AVX512{BF16,VL} Disp8 BROADCAST_EN MASK_ENABLING ZEROCTL
|
||||
|
@ -1,3 +1,10 @@
|
||||
2020-01-21 Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
* i386-opc.tbl (vcvtneps2bf16x): Add Broadcast, Xmmword, and
|
||||
Dword.
|
||||
(vcvtneps2bf16y): Add Broadcast, Ymmword, and Dword.
|
||||
* i386-tbl.h: Re-generate.
|
||||
|
||||
2020-01-20 Nick Clifton <nickc@redhat.com>
|
||||
|
||||
* po/de.po: Updated German translation.
|
||||
|
@ -4772,8 +4772,8 @@ vcvtneps2bf16, 2, 0xf372, None, 1, CpuAVX512_BF16|CpuAVX512VL, Modrm|VexOpcode|E
|
||||
vcvtneps2bf16, 2, 0xf372, None, 1, CpuAVX512_BF16|CpuAVX512VL, Modrm|VexOpcode|EVex256|Masking=3|VexW0|Broadcast|Disp8MemShift=5|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegYMM|Dword|BaseIndex, RegXMM }
|
||||
vcvtneps2bf16, 2, 0xf372, None, 1, CpuAVX512_BF16, Modrm|VexOpcode|EVex512|Masking=3|VexW0|Broadcast|Disp8MemShift=6|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegZMM|Dword|Unspecified|BaseIndex, RegYMM }
|
||||
|
||||
vcvtneps2bf16x, 2, 0xf372, None, 1, CpuAVX512_BF16|CpuAVX512VL, Modrm|VexOpcode|EVex128|Masking=3|VexW0|Disp8MemShift=4|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ATTSyntax, { Unspecified|BaseIndex, RegXMM }
|
||||
vcvtneps2bf16y, 2, 0xf372, None, 1, CpuAVX512_BF16|CpuAVX512VL, Modrm|VexOpcode|EVex256|Masking=3|VexW0|Disp8MemShift=5|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ATTSyntax, { Unspecified|BaseIndex, RegXMM }
|
||||
vcvtneps2bf16x, 2, 0xf372, None, 1, CpuAVX512_BF16|CpuAVX512VL, Modrm|VexOpcode|EVex128|Masking=3|VexW0|Broadcast|Disp8MemShift=4|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ATTSyntax, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM }
|
||||
vcvtneps2bf16y, 2, 0xf372, None, 1, CpuAVX512_BF16|CpuAVX512VL, Modrm|VexOpcode|EVex256|Masking=3|VexW0|Broadcast|Disp8MemShift=5|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ATTSyntax, { RegYMM|Dword|Unspecified|BaseIndex, RegXMM }
|
||||
|
||||
vdpbf16ps, 3, 0xf352, None, 1, CpuAVX512_BF16, Modrm|VexOpcode|VexVVVV|Masking=3|VexW0|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
|
||||
|
||||
|
@ -58323,9 +58323,9 @@ const insn_template i386_optab[] =
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
|
||||
{ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
|
||||
1, 0, 0, 0, 0, 2, 3, 0, 0, 0, 4, 0, 0, 0, 0, 1, 0, 0, 0 },
|
||||
{ { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 1, 0 } },
|
||||
1, 0, 0, 0, 0, 2, 3, 3, 0, 0, 4, 0, 0, 0, 0, 1, 0, 0, 0 },
|
||||
{ { { 7, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0,
|
||||
0, 1, 0, 0, 1, 0 } },
|
||||
{ { 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 0, 0, 0, 0 } } } },
|
||||
{ "vcvtneps2bf16y", 0xf372, None, 1, 2,
|
||||
@ -58337,9 +58337,9 @@ const insn_template i386_optab[] =
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
|
||||
{ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
|
||||
1, 0, 0, 0, 0, 3, 3, 0, 0, 0, 5, 0, 0, 0, 0, 1, 0, 0, 0 },
|
||||
{ { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 1, 0 } },
|
||||
1, 0, 0, 0, 0, 3, 3, 3, 0, 0, 5, 0, 0, 0, 0, 1, 0, 0, 0 },
|
||||
{ { { 7, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0,
|
||||
0, 0, 1, 0, 1, 0 } },
|
||||
{ { 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 0, 0, 0, 0 } } } },
|
||||
{ "vdpbf16ps", 0xf352, None, 1, 3,
|
||||
|
Loading…
Reference in New Issue
Block a user