AVX512FP16: Fix wrong code for _mm_mask_f[c]madd.*sch [PR 104978]
For complex scalar intrinsic like _mm_mask_fcmadd_sch, the mask should be and by 1 to ensure the mask is bind to lowest byte. Use masked vmovss to perform same operation which omits higher bits of mask. gcc/ChangeLog: PR target/104978 * config/i386/sse.md (avx512fp16_fmaddcsh_v8hf_mask1<round_expand_name): Use avx512f_movsf_mask instead of vmovaps or vblend, and force_reg before lowpart_subreg. (avx512fp16_fcmaddcsh_v8hf_mask1<round_expand_name): Likewise. gcc/testsuite/ChangeLog: PR target/104978 * gcc.target/i386/avx512fp16-vfcmaddcsh-1a.c: Adjust asm scan. * gcc.target/i386/avx512fp16-vfmaddcsh-1a.c: Ditto. * gcc.target/i386/avx512fp16-vfcmaddcsh-1c.c: Removed. * gcc.target/i386/avx512fp16-vfmaddcsh-1c.c: Ditto. * gcc.target/i386/pr104978.c: New test.
This commit is contained in:
parent
d156bb8702
commit
7bce0be03b
@ -6576,7 +6576,7 @@
|
||||
(match_operand:QI 4 "register_operand")]
|
||||
"TARGET_AVX512FP16 && <round_mode512bit_condition>"
|
||||
{
|
||||
rtx op0, op1;
|
||||
rtx op0, op1, dest;
|
||||
|
||||
if (<round_embedded_complex>)
|
||||
emit_insn (gen_avx512fp16_fmaddcsh_v8hf_mask<round_expand_name> (
|
||||
@ -6586,26 +6586,15 @@
|
||||
emit_insn (gen_avx512fp16_fmaddcsh_v8hf_mask (operands[0],
|
||||
operands[1], operands[2], operands[3], operands[4]));
|
||||
|
||||
if (TARGET_AVX512VL)
|
||||
{
|
||||
op0 = lowpart_subreg (V4SFmode, operands[0], V8HFmode);
|
||||
op1 = lowpart_subreg (V4SFmode, operands[1], V8HFmode);
|
||||
emit_insn (gen_avx512vl_loadv4sf_mask (op0, op0, op1, operands[4]));
|
||||
}
|
||||
else
|
||||
{
|
||||
rtx mask, tmp, vec_mask;
|
||||
mask = lowpart_subreg (SImode, operands[4], QImode),
|
||||
tmp = gen_reg_rtx (SImode);
|
||||
emit_insn (gen_ashlsi3 (tmp, mask, GEN_INT (31)));
|
||||
vec_mask = gen_reg_rtx (V4SImode);
|
||||
emit_insn (gen_rtx_SET (vec_mask, CONST0_RTX (V4SImode)));
|
||||
emit_insn (gen_vec_setv4si_0 (vec_mask, vec_mask, tmp));
|
||||
vec_mask = lowpart_subreg (V4SFmode, vec_mask, V4SImode);
|
||||
op0 = lowpart_subreg (V4SFmode, operands[0], V8HFmode);
|
||||
op1 = lowpart_subreg (V4SFmode, operands[1], V8HFmode);
|
||||
emit_insn (gen_sse4_1_blendvps (op0, op1, op0, vec_mask));
|
||||
}
|
||||
op0 = lowpart_subreg (V4SFmode, force_reg (V8HFmode, operands[0]),
|
||||
V8HFmode);
|
||||
if (!MEM_P (operands[1]))
|
||||
operands[1] = force_reg (V8HFmode, operands[1]);
|
||||
op1 = lowpart_subreg (V4SFmode, operands[1], V8HFmode);
|
||||
dest = gen_reg_rtx (V4SFmode);
|
||||
emit_insn (gen_avx512f_movsf_mask (dest, op1, op0, op1, operands[4]));
|
||||
emit_move_insn (operands[0], lowpart_subreg (V8HFmode, dest,
|
||||
V4SFmode));
|
||||
DONE;
|
||||
})
|
||||
|
||||
@ -6631,7 +6620,7 @@
|
||||
(match_operand:QI 4 "register_operand")]
|
||||
"TARGET_AVX512FP16 && <round_mode512bit_condition>"
|
||||
{
|
||||
rtx op0, op1;
|
||||
rtx op0, op1, dest;
|
||||
|
||||
if (<round_embedded_complex>)
|
||||
emit_insn (gen_avx512fp16_fcmaddcsh_v8hf_mask<round_expand_name> (
|
||||
@ -6641,26 +6630,15 @@
|
||||
emit_insn (gen_avx512fp16_fcmaddcsh_v8hf_mask (operands[0],
|
||||
operands[1], operands[2], operands[3], operands[4]));
|
||||
|
||||
if (TARGET_AVX512VL)
|
||||
{
|
||||
op0 = lowpart_subreg (V4SFmode, operands[0], V8HFmode);
|
||||
op1 = lowpart_subreg (V4SFmode, operands[1], V8HFmode);
|
||||
emit_insn (gen_avx512vl_loadv4sf_mask (op0, op0, op1, operands[4]));
|
||||
}
|
||||
else
|
||||
{
|
||||
rtx mask, tmp, vec_mask;
|
||||
mask = lowpart_subreg (SImode, operands[4], QImode),
|
||||
tmp = gen_reg_rtx (SImode);
|
||||
emit_insn (gen_ashlsi3 (tmp, mask, GEN_INT (31)));
|
||||
vec_mask = gen_reg_rtx (V4SImode);
|
||||
emit_insn (gen_rtx_SET (vec_mask, CONST0_RTX (V4SImode)));
|
||||
emit_insn (gen_vec_setv4si_0 (vec_mask, vec_mask, tmp));
|
||||
vec_mask = lowpart_subreg (V4SFmode, vec_mask, V4SImode);
|
||||
op0 = lowpart_subreg (V4SFmode, operands[0], V8HFmode);
|
||||
op1 = lowpart_subreg (V4SFmode, operands[1], V8HFmode);
|
||||
emit_insn (gen_sse4_1_blendvps (op0, op1, op0, vec_mask));
|
||||
}
|
||||
op0 = lowpart_subreg (V4SFmode, force_reg (V8HFmode, operands[0]),
|
||||
V8HFmode);
|
||||
if (!MEM_P (operands[1]))
|
||||
operands[1] = force_reg (V8HFmode, operands[1]);
|
||||
op1 = lowpart_subreg (V4SFmode, operands[1], V8HFmode);
|
||||
dest = gen_reg_rtx (V4SFmode);
|
||||
emit_insn (gen_avx512f_movsf_mask (dest, op1, op0, op1, operands[4]));
|
||||
emit_move_insn (operands[0], lowpart_subreg (V8HFmode, dest,
|
||||
V4SFmode));
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
@ -1,13 +1,13 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512fp16 -mno-avx512vl -O2" } */
|
||||
/* { dg-options "-mavx512fp16 -O2" } */
|
||||
/* { dg-final { scan-assembler-times "vfcmaddcsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vfcmaddcsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vfcmaddcsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vfcmaddcsh\[ \\t\]+\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vfcmaddcsh\[ \\t\]+\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vfcmaddcsh\[ \\t\]+\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vblendvps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vmovss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vmovss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}+(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
|
@ -1,13 +0,0 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */
|
||||
/* { dg-final { scan-assembler-times "vfcmaddcsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vfcmaddcsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vfcmaddcsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vfcmaddcsh\[ \\t\]+\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vfcmaddcsh\[ \\t\]+\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vfcmaddcsh\[ \\t\]+\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vblendmps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vmovss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
|
||||
#include "avx512fp16-vfcmaddcsh-1a.c"
|
||||
|
@ -1,13 +1,13 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512fp16 -mno-avx512vl -O2" } */
|
||||
/* { dg-options "-mavx512fp16 -O2" } */
|
||||
/* { dg-final { scan-assembler-times "vfmaddcsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmaddcsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmaddcsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmaddcsh\[ \\t\]+\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmaddcsh\[ \\t\]+\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmaddcsh\[ \\t\]+\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vblendvps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vmovss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vmovss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}+(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
|
@ -1,13 +0,0 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */
|
||||
/* { dg-final { scan-assembler-times "vfmaddcsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmaddcsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmaddcsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmaddcsh\[ \\t\]+\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmaddcsh\[ \\t\]+\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmaddcsh\[ \\t\]+\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vblendmps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vmovss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
|
||||
#include "avx512fp16-vfmaddcsh-1a.c"
|
||||
|
18
gcc/testsuite/gcc.target/i386/pr104978.c
Normal file
18
gcc/testsuite/gcc.target/i386/pr104978.c
Normal file
@ -0,0 +1,18 @@
|
||||
/* PR target/104978 */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mavx512fp16" } */
|
||||
/* { dg-final { scan-assembler-times "vmovss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}+(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
|
||||
#include<immintrin.h>
|
||||
|
||||
__m128h
|
||||
foo (__m128h a, __m128h b, __m128h c, __mmask8 m)
|
||||
{
|
||||
return _mm_mask_fmadd_round_sch (a, m, b, c, 8);
|
||||
}
|
||||
|
||||
__m128h
|
||||
foo2 (__m128h a, __m128h b, __m128h c, __mmask8 m)
|
||||
{
|
||||
return _mm_mask_fcmadd_round_sch (a, m, b, c, 8);
|
||||
}
|
Loading…
Reference in New Issue
Block a user