Refine movhfcc.

For AVX512-FP16, HFmode only supports vcmpsh whose dest is mask
register, so for movhfcc, it's

vcmpsh op2, op1, %k1
vmovsh op1, op2{%k1}
mov op2, dest

gcc/ChangeLog:

	PR target/102639
	* config/i386/i386-expand.c (ix86_valid_mask_cmp_mode): Handle
	HFmode.
	(ix86_use_mask_cmp_p): Ditto.
	(ix86_expand_sse_movcc): Ditto.
	* config/i386/i386.md (setcc_hf_mask): New define_insn.
	(movhf_mask): Ditto.
	(UNSPEC_MOVCC_MASK): New unspec.
	* config/i386/sse.md (UNSPEC_PCMP): Move to i386.md.

gcc/testsuite/ChangeLog:
	* g++.target/i386/pr102639.C: New test.
This commit is contained in:
liuhongt 2021-10-08 15:21:44 +08:00
parent ce6eec3926
commit 0d788c358b
4 changed files with 67 additions and 6 deletions

View File

@ -3613,6 +3613,10 @@ ix86_valid_mask_cmp_mode (machine_mode mode)
if (TARGET_XOP && !TARGET_AVX512F)
return false;
/* HFmode only supports vcmpsh whose dest is mask register. */
if (TARGET_AVX512FP16 && mode == HFmode)
return true;
/* AVX512F is needed for mask operation. */
if (!(TARGET_AVX512F && VECTOR_MODE_P (mode)))
return false;
@ -3634,7 +3638,9 @@ ix86_use_mask_cmp_p (machine_mode mode, machine_mode cmp_mode,
{
int vector_size = GET_MODE_SIZE (mode);
if (vector_size < 16)
if (cmp_mode == HFmode)
return true;
else if (vector_size < 16)
return false;
else if (vector_size == 64)
return true;
@ -3750,7 +3756,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
&& GET_MODE_CLASS (cmpmode) == MODE_INT)
{
gcc_assert (ix86_valid_mask_cmp_mode (mode));
/* Using vector move with mask register. */
/* Using scalar/vector move with mask register. */
cmp = force_reg (cmpmode, cmp);
/* Optimize for mask zero. */
op_true = (op_true != CONST0_RTX (mode)
@ -3769,8 +3775,13 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
std::swap (op_true, op_false);
}
rtx vec_merge = gen_rtx_VEC_MERGE (mode, op_true, op_false, cmp);
emit_insn (gen_rtx_SET (dest, vec_merge));
if (mode == HFmode)
emit_insn (gen_movhf_mask (dest, op_true, op_false, cmp));
else
{
rtx vec_merge = gen_rtx_VEC_MERGE (mode, op_true, op_false, cmp);
emit_insn (gen_rtx_SET (dest, vec_merge));
}
return;
}
else if (vector_all_ones_operand (op_true, mode)

View File

@ -117,6 +117,7 @@
;; For SSE/MMX support:
UNSPEC_FIX_NOTRUNC
UNSPEC_MASKMOV
UNSPEC_MOVCC_MASK
UNSPEC_MOVMSK
UNSPEC_BLENDV
UNSPEC_PSHUFB
@ -125,8 +126,9 @@
UNSPEC_RSQRT
UNSPEC_PSADBW
;; For AVX512F support
;; For AVX/AVX512F support
UNSPEC_SCALEF
UNSPEC_PCMP
;; Generic math support
UNSPEC_IEEE_MIN ; not commutative
@ -13608,6 +13610,20 @@
(set_attr "length_immediate" "1")
(set_attr "prefix" "orig,vex")
(set_attr "mode" "<MODE>")])
(define_insn "setcc_hf_mask"
[(set (match_operand:QI 0 "register_operand" "=k")
(unspec:QI
[(match_operand:HF 1 "register_operand" "v")
(match_operand:HF 2 "nonimmediate_operand" "vm")
(match_operand:SI 3 "const_0_to_31_operand" "n")]
UNSPEC_PCMP))]
"TARGET_AVX512FP16"
"vcmpsh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ssecmp")
(set_attr "prefix" "evex")
(set_attr "mode" "HF")])
;; Basic conditional jump instructions.
@ -19841,6 +19857,22 @@
operands[9] = replace_rtx (operands[6], operands[0], operands[1], true);
})
(define_insn "movhf_mask"
[(set (match_operand:HF 0 "nonimmediate_operand" "=v,m,v")
(unspec:HF
[(match_operand:HF 1 "nonimmediate_operand" "m,v,v")
(match_operand:HF 2 "nonimm_or_0_operand" "0C,0C,0C")
(match_operand:QI 3 "register_operand" "Yk,Yk,Yk")]
UNSPEC_MOVCC_MASK))]
"TARGET_AVX512FP16"
"@
vmovsh\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}
vmovsh\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}
vmovsh\t{%d1, %0%{%3%}%N2|%0%{%3%}%N2, %d1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
(set_attr "mode" "HF")])
(define_expand "movhfcc"
[(set (match_operand:HF 0 "register_operand")
(if_then_else:HF

View File

@ -67,7 +67,6 @@
UNSPEC_PCLMUL
;; For AVX support
UNSPEC_PCMP
UNSPEC_VPERMIL
UNSPEC_VPERMIL2
UNSPEC_VPERMIL2F128

View File

@ -0,0 +1,19 @@
/* PR target/102639 */
/* { dg-do compile } */
/* { dg-options "-O2 -std=c++14 -mavx512fp16" } */
/* { dg-final { scan-assembler-times "vminsh" 1 } } */
typedef _Float16 v16hf __attribute__((vector_size(2)));
v16hf vcond_v16hfv16hfge_b, vcond_v16hfv16hfge_c, vcond_v16hfv16hfge_d,
__attribute__vcond_v16hfv16hfge_a;
v16hf __attribute__vcond_v16hfv16hfge() {
return __attribute__vcond_v16hfv16hfge_a >= vcond_v16hfv16hfge_b
? vcond_v16hfv16hfge_c
: vcond_v16hfv16hfge_d;
}
v16hf __attribute__vcond_v16hfv16hfmax() {
return __attribute__vcond_v16hfv16hfge_a < vcond_v16hfv16hfge_b
? __attribute__vcond_v16hfv16hfge_a
: vcond_v16hfv16hfge_b;
}