i386: Optimize vpblendvb on inverted mask register to vpblendvb on swapping the order of operand 1 and operand 2. [PR target/99908]
- vpcmpeqd %ymm3, %ymm3, %ymm3 - vpandn %ymm3, %ymm2, %ymm2 - vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 + vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 gcc/ChangeLog: PR target/99908 * config/i386/sse.md (<sse4_1_avx2>_pblendvb): Add splitters for pblendvb of NOT mask register. gcc/testsuite/ChangeLog: PR target/99908 * gcc.target/i386/avx2-pr99908.c: New test. * gcc.target/i386/sse4_1-pr99908.c: New test.
This commit is contained in:
parent
cd36bbb228
commit
8da3b309d8
|
@ -17734,6 +17734,35 @@
|
|||
(set_attr "btver2_decode" "vector,vector,vector")
|
||||
(set_attr "mode" "<sseinsnmode>")])
|
||||
|
||||
(define_split
|
||||
[(set (match_operand:VI1_AVX2 0 "register_operand")
|
||||
(unspec:VI1_AVX2
|
||||
[(match_operand:VI1_AVX2 1 "vector_operand")
|
||||
(match_operand:VI1_AVX2 2 "register_operand")
|
||||
(not:VI1_AVX2 (match_operand:VI1_AVX2 3 "register_operand"))]
|
||||
UNSPEC_BLENDV))]
|
||||
"TARGET_SSE4_1"
|
||||
[(set (match_dup 0)
|
||||
(unspec:VI1_AVX2
|
||||
[(match_dup 2) (match_dup 1) (match_dup 3)]
|
||||
UNSPEC_BLENDV))])
|
||||
|
||||
(define_split
|
||||
[(set (match_operand:VI1_AVX2 0 "register_operand")
|
||||
(unspec:VI1_AVX2
|
||||
[(match_operand:VI1_AVX2 1 "vector_operand")
|
||||
(match_operand:VI1_AVX2 2 "register_operand")
|
||||
(subreg:VI1_AVX2 (not (match_operand 3 "register_operand")) 0)]
|
||||
UNSPEC_BLENDV))]
|
||||
"TARGET_SSE4_1
|
||||
&& GET_MODE_CLASS (GET_MODE (operands[3])) == MODE_VECTOR_INT
|
||||
&& GET_MODE_SIZE (GET_MODE (operands[3])) == <MODE_SIZE>"
|
||||
[(set (match_dup 0)
|
||||
(unspec:VI1_AVX2
|
||||
[(match_dup 2) (match_dup 1) (match_dup 4)]
|
||||
UNSPEC_BLENDV))]
|
||||
"operands[4] = gen_lowpart (<MODE>mode, operands[3]);")
|
||||
|
||||
(define_insn_and_split "*<sse4_1_avx2>_pblendvb_lt"
|
||||
[(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
|
||||
(unspec:VI1_AVX2
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
/* PR target/99908 */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mavx2 -masm=att" } */
|
||||
/* { dg-final { scan-assembler-times "\tvpblendvb\t" 2 } } */
|
||||
/* { dg-final { scan-assembler-not "\tvpcmpeq" } } */
|
||||
/* { dg-final { scan-assembler-not "\tvpandn" } } */
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
__m256i
|
||||
f1 (__m256i a, __m256i b, __m256i mask)
|
||||
{
|
||||
return _mm256_blendv_epi8(a, b,
|
||||
_mm256_andnot_si256(mask, _mm256_set1_epi8(255)));
|
||||
}
|
||||
|
||||
__m256i
|
||||
f2 (__v32qi x, __v32qi a, __v32qi b)
|
||||
{
|
||||
x ^= (__v32qi) { -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1 };
|
||||
return _mm256_blendv_epi8 ((__m256i) a, (__m256i) b, (__m256i) x);
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
/* PR target/99908 */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -msse4.1 -mno-avx -masm=att" } */
|
||||
/* { dg-final { scan-assembler-times "\tpblendvb\t" 2 } } */
|
||||
/* { dg-final { scan-assembler-not "\tpcmpeq" } } */
|
||||
/* { dg-final { scan-assembler-not "\tpandn" } } */
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
__m128i
|
||||
f1 (__m128i a, __m128i b, __m128i mask)
|
||||
{
|
||||
return _mm_blendv_epi8(a, b,
|
||||
_mm_andnot_si128(mask, _mm_set1_epi8(255)));
|
||||
}
|
||||
|
||||
__m128i
|
||||
f2 (__v16qi x, __v16qi a, __v16qi b)
|
||||
{
|
||||
x ^= (__v16qi) { -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1 };
|
||||
return _mm_blendv_epi8 ((__m128i) a, (__m128i) b, (__m128i) x);
|
||||
}
|
Loading…
Reference in New Issue