i386: Optimize pmovskb on zero_extend of subreg HI of pmovskb result [PR98461]

The following patch adds define_insn_and_split to optimize

       vpmovmskb       %xmm0, %eax
-       movzwl  %ax, %eax
        notl    %eax

and combine splitter to optimize

        pmovmskb        %xmm0, %eax
-       notl    %eax
-       movzwl  %ax, %eax
+       xorl    $65535, %eax

gcc/ChangeLog
	PR target/98461
	* config/i386/sse.md (*sse2_pmovskb_zexthisi): New
	define_insn_and_split for zero_extend of subreg HI of pmovskb
	result.
	(*sse2_pmovskb_zexthisi): Add new combine splitters for
	zero_extend of not of subreg HI of pmovskb result.

gcc/testsuite/ChangeLog
	* gcc.target/i386/sse2-pr98461-2.c: New test.
This commit is contained in:
liuhongt 2021-01-04 11:24:30 +08:00
parent e8beba1cfc
commit bea984814c
2 changed files with 53 additions and 0 deletions

View File

@ -16099,6 +16099,34 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")])
(define_insn_and_split "*sse2_pmovskb_zexthisi"
[(set (match_operand:SI 0 "register_operand")
(zero_extend:SI
(subreg:HI
(unspec:SI
[(match_operand:V16QI 1 "register_operand")]
UNSPEC_MOVMSK) 0)))]
"TARGET_SSE2 && ix86_pre_reload_split ()"
"#"
"&& 1"
[(set (match_dup 0)
(unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))])
(define_split
[(set (match_operand:SI 0 "register_operand")
(zero_extend:SI
(not:HI
(subreg:HI
(unspec:SI
[(match_operand:V16QI 1 "register_operand")]
UNSPEC_MOVMSK) 0))))]
"TARGET_SSE2"
[(set (match_dup 2)
(unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
(set (match_dup 0)
(xor:SI (match_dup 2) (const_int 65535)))]
"operands[2] = gen_reg_rtx (SImode);")
(define_split
[(set (match_operand:SI 0 "register_operand")
(unspec:SI

View File

@ -0,0 +1,25 @@
/* PR target/98461 */
/* { dg-do compile } */
/* { dg-options "-O2 -msse2 -mno-sse3 -masm=att" } */
/* { dg-final { scan-assembler-times "\tpmovmskb\t" 3 } } */
/* { dg-final { scan-assembler-not "\tmovzwl" } } */
/* { dg-final { scan-assembler-times "\tnotl" 1 } } *
/* { dg-final { scan-assembler-times "\txorl" 1 } } */
#include <immintrin.h>
unsigned int movemask_not1(__m128i logical) {
unsigned short res = (unsigned short)(_mm_movemask_epi8(logical));
return ~res;
}
unsigned int movemask_not2(__m128i logical) {
unsigned short res = (unsigned short)(_mm_movemask_epi8(logical));
res = ~res;
return res;
}
unsigned int movemask_zero_extend(__m128i logical) {
unsigned int res = _mm_movemask_epi8(logical);
return res & 0xffff;
}