Add combine splitter to transform vpcmpeqd/vpxor/vblendvps to vblendvps for ~op0

gcc/ChangeLog:

	PR target/100738
	* config/i386/sse.md
	(*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_not_ltint):
	Add new define_insn_and_split.

gcc/testsuite/ChangeLog:

	PR target/100738
	* g++.target/i386/pr100738-1.C: New test.
This commit is contained in:
Haochen Jiang 2021-12-02 15:30:17 +08:00 committed by liuhongt
parent 0485ce9128
commit 691f05c219
2 changed files with 46 additions and 0 deletions

View File

@ -20767,6 +20767,33 @@
(set_attr "btver2_decode" "vector,vector,vector")
(set_attr "mode" "<ssefltvecmode>")])
;; PR target/100738: Transform vpcmpeqd + vpxor + vblendvps to vblendvps for inverted mask;
(define_insn_and_split "*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_not_ltint"
[(set (match_operand:<ssebytemode> 0 "register_operand")
(unspec:<ssebytemode>
[(match_operand:<ssebytemode> 1 "register_operand")
(match_operand:<ssebytemode> 2 "vector_operand")
(subreg:<ssebytemode>
(lt:VI48_AVX
(subreg:VI48_AVX
(not:<ssebytemode>
(match_operand:<ssebytemode> 3 "register_operand")) 0)
(match_operand:VI48_AVX 4 "const0_operand")) 0)]
UNSPEC_BLENDV))]
"TARGET_SSE4_1 && ix86_pre_reload_split ()"
"#"
"&& 1"
[(set (match_dup 0)
(unspec:<ssefltvecmode>
[(match_dup 2) (match_dup 1) (match_dup 3)] UNSPEC_BLENDV))]
{
operands[0] = gen_lowpart (<ssefltvecmode>mode, operands[0]);
operands[1] = gen_lowpart (<ssefltvecmode>mode, operands[1]);
operands[2] = force_reg (<ssefltvecmode>mode,
gen_lowpart (<ssefltvecmode>mode, operands[2]));
operands[3] = gen_lowpart (<ssefltvecmode>mode, operands[3]);
})
(define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
[(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
(unspec:VF_128_256

View File

@ -0,0 +1,19 @@
/* { dg-do compile } */
/* { dg-options "-Ofast -mavx2" } */
/* { dg-final {scan-assembler-times "vblendvps\[ \\t\]" 2 } } */
/* { dg-final {scan-assembler-not "vpcmpeqd\[ \\t\]" } } */
/* { dg-final {scan-assembler-not "vpxor\[ \\t\]" } } */
typedef int v4si __attribute__((vector_size(16)));
typedef char v16qi __attribute__((vector_size(16)));
v4si
foo_1 (v16qi a, v4si b, v4si c, v4si d)
{
return ((v4si)~a) < 0 ? c : d;
}
v4si
foo_2 (v16qi a, v4si b, v4si c, v4si d)
{
return ((v4si)~a) >= 0 ? c : d;
}