For 128/256-bit vec_cond_expr, When mask operands is lt reg const0_rtx, blendv can be used instead of avx512 mask.

gcc/ChangeLog:

	PR target/100648
	* config/i386/sse.md (*avx_cmp<mode>3_lt): New
	define_insn_and_split.
	(*avx_cmp<mode>3_ltint): Ditto.
	(*avx2_pcmp<mode>3_3): Ditto.
	(*avx2_pcmp<mode>3_4): Ditto.
	(*avx2_pcmp<mode>3_5): Ditto.

gcc/testsuite/ChangeLog:

	PR target/100648
	* g++.target/i386/avx2-pr54700-2.C: Adjust testcase.
	* g++.target/i386/avx512vl-pr54700-1a.C: New test.
	* g++.target/i386/avx512vl-pr54700-1b.C: New test.
	* g++.target/i386/avx512vl-pr54700-2a.C: New test.
	* g++.target/i386/avx512vl-pr54700-2b.C: New test.
	* gcc.target/i386/avx512vl-pr100648.c: New test.
	* gcc.target/i386/avx512vl-blendv-1.c: New test.
	* gcc.target/i386/avx512vl-blendv-2.c: New test.
This commit is contained in:
liuhongt 2021-05-24 10:57:52 +08:00
parent 28560c6d40
commit 3f1a08d9d7
9 changed files with 324 additions and 1 deletions

View File

@ -3048,6 +3048,68 @@
UNSPEC_PCMP))]
"operands[5] = GEN_INT (INTVAL (operands[5]) ^ 4);")
(define_insn_and_split "*avx_cmp<mode>3_lt"
[(set (match_operand:VF_128_256 0 "register_operand")
(vec_merge:VF_128_256
(match_operand:VF_128_256 1 "vector_operand")
(match_operand:VF_128_256 2 "vector_operand")
(unspec:<avx512fmaskmode>
[(match_operand:<sseintvecmode> 3 "register_operand")
(match_operand:<sseintvecmode> 4 "const0_operand")
(match_operand:SI 5 "const_0_to_7_operand")]
UNSPEC_PCMP)))]
"TARGET_AVX512VL && ix86_pre_reload_split ()
/* LT or GE 0 */
&& ((INTVAL (operands[5]) == 1 && !MEM_P (operands[2]))
|| (INTVAL (operands[5]) == 5 && !MEM_P (operands[1])))"
"#"
"&& 1"
[(set (match_dup 0)
(unspec:VF_128_256
[(match_dup 2)
(match_dup 1)
(lt:VF_128_256
(match_dup 3)
(match_dup 4))]
UNSPEC_BLENDV))]
{
if (INTVAL (operands[5]) == 5)
std::swap (operands[1], operands[2]);
})
(define_insn_and_split "*avx_cmp<mode>3_ltint"
[(set (match_operand:VI48_AVX 0 "register_operand")
(vec_merge:VI48_AVX
(match_operand:VI48_AVX 1 "vector_operand")
(match_operand:VI48_AVX 2 "vector_operand")
(unspec:<avx512fmaskmode>
[(match_operand:VI48_AVX 3 "register_operand")
(match_operand:VI48_AVX 4 "const0_operand")
(match_operand:SI 5 "const_0_to_7_operand")]
UNSPEC_PCMP)))]
"TARGET_AVX512VL && ix86_pre_reload_split ()
/* LT or GE 0 */
&& ((INTVAL (operands[5]) == 1 && !MEM_P (operands[2]))
|| (INTVAL (operands[5]) == 5 && !MEM_P (operands[1])))"
"#"
"&& 1"
[(set (match_dup 0)
(unspec:<ssebytemode>
[(match_dup 2)
(match_dup 1)
(subreg:<ssebytemode>
(lt:VI48_AVX
(match_dup 3)
(match_dup 4)) 0)]
UNSPEC_BLENDV))]
{
if (INTVAL (operands[5]) == 5)
std::swap (operands[1], operands[2]);
operands[0] = gen_lowpart (<ssebytemode>mode, operands[0]);
operands[1] = gen_lowpart (<ssebytemode>mode, operands[1]);
operands[2] = gen_lowpart (<ssebytemode>mode, operands[2]);
})
(define_insn "avx_vmcmp<mode>3"
[(set (match_operand:VF_128 0 "register_operand" "=x")
(vec_merge:VF_128
@ -13063,6 +13125,96 @@
DONE;
})
(define_insn_and_split "*avx2_pcmp<mode>3_3"
[(set (match_operand:VI1_AVX2 0 "register_operand")
(vec_merge:VI1_AVX2
(match_operand:VI1_AVX2 1 "vector_operand")
(match_operand:VI1_AVX2 2 "vector_operand")
(unspec:<avx512fmaskmode>
[(match_operand:VI1_AVX2 3 "register_operand")
(match_operand:VI1_AVX2 4 "const0_operand")
(match_operand:SI 5 "const_0_to_7_operand")]
UNSPEC_PCMP)))]
"TARGET_AVX512VL && ix86_pre_reload_split ()
/* LT or GE 0 */
&& ((INTVAL (operands[5]) == 1 && !MEM_P (operands[2]))
|| (INTVAL (operands[5]) == 5 && !MEM_P (operands[1])))"
"#"
"&& 1"
[(set (match_dup 0)
(unspec:VI1_AVX2
[(match_dup 2)
(match_dup 1)
(lt:VI1_AVX2
(match_dup 3)
(match_dup 4))]
UNSPEC_BLENDV))]
{
if (INTVAL (operands[5]) == 5)
std::swap (operands[1], operands[2]);
})
(define_insn_and_split "*avx2_pcmp<mode>3_4"
[(set (match_operand:VI1_AVX2 0 "register_operand")
(vec_merge:VI1_AVX2
(match_operand:VI1_AVX2 1 "vector_operand")
(match_operand:VI1_AVX2 2 "vector_operand")
(unspec:<avx512fmaskmode>
[(subreg:VI1_AVX2 (not (match_operand 3 "register_operand")) 0)
(match_operand:VI1_AVX2 4 "const0_operand")
(match_operand:SI 5 "const_0_to_7_operand")]
UNSPEC_PCMP)))]
"TARGET_AVX512VL && ix86_pre_reload_split ()
&& GET_MODE_CLASS (GET_MODE (operands[3])) == MODE_VECTOR_INT
&& GET_MODE_SIZE (GET_MODE (operands[3])) == <MODE_SIZE>
/* LT or GE 0 */
&& ((INTVAL (operands[5]) == 1 && !MEM_P (operands[1]))
|| (INTVAL (operands[5]) == 5 && !MEM_P (operands[2])))"
"#"
"&& 1"
[(set (match_dup 0)
(unspec:VI1_AVX2
[(match_dup 1)
(match_dup 2)
(lt:VI1_AVX2
(match_dup 3)
(match_dup 4))]
UNSPEC_BLENDV))]
{
if (INTVAL (operands[5]) == 1)
std::swap (operands[1], operands[2]);
operands[3] = gen_lowpart (<MODE>mode, operands[3]);
})
(define_insn_and_split "*avx2_pcmp<mode>3_5"
[(set (match_operand:VI1_AVX2 0 "register_operand")
(vec_merge:VI1_AVX2
(match_operand:VI1_AVX2 1 "vector_operand")
(match_operand:VI1_AVX2 2 "vector_operand")
(unspec:<avx512fmaskmode>
[(not:VI1_AVX2 (match_operand:VI1_AVX2 3 "register_operand"))
(match_operand:VI1_AVX2 4 "const0_operand")
(match_operand:SI 5 "const_0_to_7_operand")]
UNSPEC_PCMP)))]
"TARGET_AVX512VL && ix86_pre_reload_split ()
/* LT or GE 0 */
&& ((INTVAL (operands[5]) == 1 && !MEM_P (operands[1]))
|| (INTVAL (operands[5]) == 5 && !MEM_P (operands[2])))"
"#"
"&& 1"
[(set (match_dup 0)
(unspec:VI1_AVX2
[(match_dup 1)
(match_dup 2)
(lt:VI1_AVX2
(match_dup 3)
(match_dup 4))]
UNSPEC_BLENDV))]
{
if (INTVAL (operands[5]) == 1)
std::swap (operands[1], operands[2]);
})
(define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand")
(unspec:<avx512fmaskmode>

View File

@ -2,9 +2,15 @@
/* { dg-do run { target avx2 } } */
/* { dg-options "-O2 -std=c++14 -mavx2 -mno-xop -mno-avx512f" } */
#include "avx2-check.h"
#ifndef CHECK_H
#define CHECK_H "avx2-check.h"
#endif
#ifndef TEST
#define TEST avx2_test
#endif
#include CHECK_H
#include "avx2-pr54700-1.C"

View File

@ -0,0 +1,9 @@
/* PR target/100648 */
/* { dg-do compile } */
/* { dg-options "-O2 -std=c++14 -mavx2 -mno-xop -mavx512vl -mavx512bw" } */
/* { dg-final { scan-assembler-not "vpcmpgt\[bdq]" } } */
/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */
/* { dg-final { scan-assembler-times "vblendvps" 4 } } */
/* { dg-final { scan-assembler-times "vblendvpd" 4 } } */
#include "avx2-pr54700-1.C"

View File

@ -0,0 +1,9 @@
/* PR target/100648 */
/* { dg-do compile } */
/* { dg-options "-O2 -std=c++14 -mavx512vl -mavx512bw -mno-xop" } */
/* { dg-final { scan-assembler-not "pcmpgt\[bdq]" } } */
/* { dg-final { scan-assembler-times "pblendvb" 2 } } */
/* { dg-final { scan-assembler-times "blendvps" 4 } } */
/* { dg-final { scan-assembler-times "blendvpd" 4 } } */
#include "sse4_1-pr54700-1.C"

View File

@ -0,0 +1,17 @@
/* PR target/100648 */
/* { dg-do run { target avx2 } } */
/* { dg-options "-O2 -std=c++14 -mavx2 -mavx512vl -mavx512bw" } */
#ifndef CHECK_H
#define CHECK_H "avx512f-helper.h"
#endif
#ifndef TEST
#define TEST_test_256
#endif
#include CHECK_H
#include "avx2-pr54700-2.C"
#define AVX512VL
#define AVX512BW

View File

@ -0,0 +1,17 @@
/* PR target/pr100648 */
/* { dg-do run { target sse4 } } */
/* { dg-options "-O2 -std=c++14 -msse4 -mavx512vl -mavx512bw -mno-xop" } */
#ifndef CHECK_H
#define CHECK_H "avx512f-helper.h"
#endif
#ifndef TEST
#define TEST_test_128
#endif
#include CHECK_H
#include "sse4_1-pr54700-2.C"
#define AVX512VL
#define AVX512BW

View File

@ -0,0 +1,51 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mavx512vl" } */
/* { dg-final { scan-assembler-times {pblendvb[\t ]*%xmm} 1 } } */
/* { dg-final { scan-assembler-times {pblendvb[\t ]*%ymm} 1 } } */
/* { dg-final { scan-assembler-times {blendvps[\t ]*%xmm} 1 } } */
/* { dg-final { scan-assembler-times {blendvps[\t ]*%ymm} 1 } } */
/* { dg-final { scan-assembler-times {blendvpd[\t ]*%xmm} 1 } } */
/* { dg-final { scan-assembler-times {blendvpd[\t ]*%ymm} 1 } } */
typedef float v4sf __attribute__ ((vector_size (16)));
typedef float v8sf __attribute__ ((vector_size (32)));
typedef double v2df __attribute__ ((vector_size (16)));
typedef double v4df __attribute__ ((vector_size (32)));
typedef char v16qi __attribute__ ((vector_size (16)));
typedef char v32qi __attribute__ ((vector_size (32)));
v4sf
foo (v4sf a, v4sf b, v4sf c)
{
return __builtin_ia32_blendvps (a, b, c);
}
v8sf
foo2 (v8sf a, v8sf b, v8sf c)
{
return __builtin_ia32_blendvps256 (a, b, c);
}
v2df
foo3 (v2df a, v2df b, v2df c)
{
return __builtin_ia32_blendvpd (a, b, c);
}
v4df
foo4 (v4df a, v4df b, v4df c)
{
return __builtin_ia32_blendvpd256 (a, b, c);
}
v16qi
foo5 (v16qi a, v16qi b, v16qi c)
{
return __builtin_ia32_pblendvb128 (a, b, c);
}
v32qi
foo6 (v32qi a, v32qi b, v32qi c)
{
return __builtin_ia32_pblendvb256 (a, b, c);
}

View File

@ -0,0 +1,41 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mavx512vl" } */
/* { dg-final { scan-assembler-not {pblendv} } } */
/* { dg-final { scan-assembler-not {blendvp} } } */
#include <x86intrin.h>
__m128
foo (__m128 a, __m128 b)
{
return _mm_blendv_ps (a, b, _mm_setzero_ps ());
}
__m256
foo2 (__m256 a, __m256 b)
{
return _mm256_blendv_ps (a, b, _mm256_set1_ps (-1.0));
}
__m128d
foo3 (__m128d a, __m128d b, __m128d c)
{
return _mm_blendv_pd (a, b, _mm_set1_pd (1.0));
}
__m256d
foo4 (__m256d a, __m256d b, __m256d c)
{
return _mm256_blendv_pd (a, b, _mm256_set1_pd (-134.3));
}
__m128i
foo5 (__m128i a, __m128i b, __m128i c)
{
return _mm_blendv_epi8 (a, b, _mm_set1_epi8 (3));
}
__m256i
foo6 (__m256i a, __m256i b, __m256i c)
{
return _mm256_blendv_epi8 (a, b, _mm256_set1_epi8 (-22));
}

View File

@ -0,0 +1,21 @@
/* PR target/100648. */
/* { dg-do compile } */
/* { dg-options "-O2 -mavx512vl -mavx512bw -masm=att" } */
/* { dg-final { scan-assembler-times "\tvpblendvb\t" 2 } } */
/* { dg-final { scan-assembler-not "\tvpcmpeq" } } */
/* { dg-final { scan-assembler-not "\tvpandn" } } */
#include <x86intrin.h>
__m256i
f1 (__m256i a, __m256i b, __m256i mask)
{
return _mm256_blendv_epi8(a, b,
_mm256_andnot_si256(mask, _mm256_set1_epi8(255)));
}
__m128i
f2 (__m128i a, __m128i b, __m128i mask)
{
return _mm_blendv_epi8(a, b,
_mm_andnot_si128(mask, _mm_set1_epi8(255)));
}