re PR target/54700 (Optimize away x<0 as mask argument of a blend.)

PR target/54700
	* config/i386/sse.md (ssebytemode): Add V16SI, V8SI and V4SI entries.
	(ssefltmodesuffix, ssefltvecmode): New define_mode_attrs.
	(*<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>_lt,
	*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_ltint,
	*<sse4_1_avx2>_pblendvb_lt): New define_insns.

	* g++.target/i386/sse4_1-pr54700-1.C: New test.
	* g++.target/i386/sse4_1-pr54700-2.C: New test.
	* g++.target/i386/avx-pr54700-1.C: New test.
	* g++.target/i386/avx-pr54700-2.C: New test.
	* g++.target/i386/avx2-pr54700-1.C: New test.
	* g++.target/i386/avx2-pr54700-2.C: New test.
	* g++.target/i386/sse4_1-check.h: New file.
	* g++.target/i386/avx-check.h: New file.
	* g++.target/i386/avx2-check.h: New file.
	* g++.target/i386/m128-check.h: New file.
	* g++.target/i386/m256-check.h: New file.
	* g++.target/i386/avx-os-support.h: New file.

From-SVN: r266621
This commit is contained in:
Jakub Jelinek 2018-11-29 15:33:27 +01:00 committed by Jakub Jelinek
parent f06e47d7b6
commit fe907c1fd2
15 changed files with 404 additions and 1 deletions

View File

@ -1,5 +1,12 @@
2018-11-29 Jakub Jelinek <jakub@redhat.com>
PR target/54700
* config/i386/sse.md (ssebytemode): Add V16SI, V8SI and V4SI entries.
(ssefltmodesuffix, ssefltvecmode): New define_mode_attrs.
(*<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>_lt,
*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_ltint,
*<sse4_1_avx2>_pblendvb_lt): New define_insns.
PR target/88152
* tree.h (build_uniform_cst, uniform_integer_cst_p): Declare.
* tree.c (build_uniform_cst, uniform_integer_cst_p): New functions.

View File

@ -601,7 +601,8 @@
(V4DI "V8DI") (V8DI "V16DI")])
(define_mode_attr ssebytemode
[(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")])
[(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")
(V16SI "V64QI") (V8SI "V32QI") (V4SI "V16QI")])
;; All 128bit vector integer modes
(define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
@ -15681,6 +15682,60 @@
]
(const_string "<ssevecmode>")))])
(define_insn "*<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>_lt"
[(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
(unspec:VF_128_256
[(match_operand:VF_128_256 1 "register_operand" "0,0,x")
(match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
(subreg:VF_128_256
(lt:<sseintvecmode>
(match_operand:<sseintvecmode> 3 "register_operand" "Yz,Yz,x")
(match_operand:<sseintvecmode> 4 "const0_operand" "C,C,C")) 0)]
UNSPEC_BLENDV))]
"TARGET_SSE4_1"
"@
blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "ssemov")
(set_attr "length_immediate" "1")
(set_attr "prefix_data16" "1,1,*")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "orig,orig,vex")
(set_attr "btver2_decode" "vector,vector,vector")
(set_attr "mode" "<MODE>")])
(define_mode_attr ssefltmodesuffix
[(V2DI "pd") (V4DI "pd") (V4SI "ps") (V8SI "ps")])
(define_mode_attr ssefltvecmode
[(V2DI "V2DF") (V4DI "V4DF") (V4SI "V4SF") (V8SI "V8SF")])
(define_insn "*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_ltint"
[(set (match_operand:<ssebytemode> 0 "register_operand" "=Yr,*x,x")
(unspec:<ssebytemode>
[(match_operand:<ssebytemode> 1 "register_operand" "0,0,x")
(match_operand:<ssebytemode> 2 "vector_operand" "YrBm,*xBm,xm")
(subreg:<ssebytemode>
(lt:VI48_AVX
(match_operand:VI48_AVX 3 "register_operand" "Yz,Yz,x")
(match_operand:VI48_AVX 4 "const0_operand" "C,C,C")) 0)]
UNSPEC_BLENDV))]
"TARGET_SSE4_1"
"@
blendv<ssefltmodesuffix>\t{%3, %2, %0|%0, %2, %3}
blendv<ssefltmodesuffix>\t{%3, %2, %0|%0, %2, %3}
vblendv<ssefltmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "ssemov")
(set_attr "length_immediate" "1")
(set_attr "prefix_data16" "1,1,*")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "orig,orig,vex")
(set_attr "btver2_decode" "vector,vector,vector")
(set_attr "mode" "<ssefltvecmode>")])
(define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
[(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
(unspec:VF_128_256
@ -15779,6 +15834,27 @@
(set_attr "btver2_decode" "vector,vector,vector")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "*<sse4_1_avx2>_pblendvb_lt"
[(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
(unspec:VI1_AVX2
[(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
(match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
(lt:VI1_AVX2 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")
(match_operand:VI1_AVX2 4 "const0_operand" "C,C,C"))]
UNSPEC_BLENDV))]
"TARGET_SSE4_1"
"@
pblendvb\t{%3, %2, %0|%0, %2, %3}
pblendvb\t{%3, %2, %0|%0, %2, %3}
vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "*,*,1")
(set_attr "prefix" "orig,orig,vex")
(set_attr "btver2_decode" "vector,vector,vector")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "sse4_1_pblendw"
[(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
(vec_merge:V8HI

View File

@ -1,5 +1,19 @@
2018-11-29 Jakub Jelinek <jakub@redhat.com>
PR target/54700
* g++.target/i386/sse4_1-pr54700-1.C: New test.
* g++.target/i386/sse4_1-pr54700-2.C: New test.
* g++.target/i386/avx-pr54700-1.C: New test.
* g++.target/i386/avx-pr54700-2.C: New test.
* g++.target/i386/avx2-pr54700-1.C: New test.
* g++.target/i386/avx2-pr54700-2.C: New test.
* g++.target/i386/sse4_1-check.h: New file.
* g++.target/i386/avx-check.h: New file.
* g++.target/i386/avx2-check.h: New file.
* g++.target/i386/m128-check.h: New file.
* g++.target/i386/m256-check.h: New file.
* g++.target/i386/avx-os-support.h: New file.
PR target/88152
* g++.dg/tree-ssa/pr88152-1.C: New test.
* g++.dg/tree-ssa/pr88152-2.C: New test.

View File

@ -0,0 +1 @@
#include "../../gcc.target/i386/avx-check.h"

View File

@ -0,0 +1 @@
#include "../../gcc.target/i386/avx-os-support.h"

View File

@ -0,0 +1,9 @@
/* PR target/54700 */
/* { dg-do compile } */
/* { dg-options "-O2 -std=c++14 -mavx -mno-xop -mno-avx2" } */
/* { dg-final { scan-assembler-not "vpcmpgt\[bdq]" } } */
/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */
/* { dg-final { scan-assembler-times "vblendvps" 4 } } */
/* { dg-final { scan-assembler-times "vblendvpd" 4 } } */
#include "sse4_1-pr54700-1.C"

View File

@ -0,0 +1,8 @@
/* PR target/54700 */
/* { dg-do run { target avx } } */
/* { dg-options "-O2 -std=c++14 -mavx -mno-xop -mno-avx2" } */
#define CHECK_H "avx-check.h"
#define TEST avx_test
#include "sse4_1-pr54700-2.C"

View File

@ -0,0 +1 @@
#include "../../gcc.target/i386/avx2-check.h"

View File

@ -0,0 +1,69 @@
/* PR target/54700 */
/* { dg-do compile } */
/* { dg-options "-O2 -std=c++14 -mavx2 -mno-xop -mno-avx512f" } */
/* { dg-final { scan-assembler-not "vpcmpgt\[bdq]" } } */
/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */
/* { dg-final { scan-assembler-times "vblendvps" 4 } } */
/* { dg-final { scan-assembler-times "vblendvpd" 4 } } */
#include <x86intrin.h>
__attribute__((noipa)) __v32qi
f1 (__v32qi a, __v32qi b, __v32qi c)
{
return a < 0 ? b : c;
}
__attribute__((noipa)) __v32qi
f2 (__v32qi a, __v32qi b, __v32qi c)
{
return a >= 0 ? b : c;
}
__attribute__((noipa)) __v8si
f3 (__v8si a, __v8si b, __v8si c)
{
return a < 0 ? b : c;
}
__attribute__((noipa)) __v8si
f4 (__v8si a, __v8si b, __v8si c)
{
return a >= 0 ? b : c;
}
__attribute__((noipa)) __v4di
f5 (__v4di a, __v4di b, __v4di c)
{
return a < 0 ? b : c;
}
__attribute__((noipa)) __v4di
f6 (__v4di a, __v4di b, __v4di c)
{
return a >= 0 ? b : c;
}
__attribute__((noipa)) __v8sf
f7 (__v8si a, __v8sf b, __v8sf c)
{
return a < 0 ? b : c;
}
__attribute__((noipa)) __v8sf
f8 (__v8si a, __v8sf b, __v8sf c)
{
return a >= 0 ? b : c;
}
__attribute__((noipa)) __v4df
f9 (__v4di a, __v4df b, __v4df c)
{
return a < 0 ? b : c;
}
__attribute__((noipa)) __v4df
f10 (__v4di a, __v4df b, __v4df c)
{
return a >= 0 ? b : c;
}

View File

@ -0,0 +1,72 @@
/* PR target/54700 */
/* { dg-do run { target avx2 } } */
/* { dg-options "-O2 -std=c++14 -mavx2 -mno-xop -mno-avx512f" } */
#include "avx2-check.h"
#define TEST avx2_test
#include "avx2-pr54700-1.C"
static void
TEST ()
{
__v32qi v32qia = { -128, 12, -1, 127, 115, 0, -19, 125, -125, 12, 0, -37, 37, 15, 98, -105,
0, 1, 2, 3, -1, -2, -3, -4, 4, -5, 5, -6, 6, -7, 7, -8 };
__v32qi v32qib = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
-1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16 };
__v32qi v32qic = { 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
-17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, -32 };
__v32qi v32qie = { 1, 18, 3, 20, 21, 22, 7, 24, 9, 26, 27, 12, 29, 30, 31, 16,
-17, -18, -19, -20, -5, -6, -7, -8, -25, -10, -27, -12, -29, -14, -31, -16 };
__v32qi v32qif = { 17, 2, 19, 4, 5, 6, 23, 8, 25, 10, 11, 28, 13, 14, 15, 32,
-1, -2, -3, -4, -21, -22, -23, -24, -9, -26, -11, -28, -13, -30, -15, -32 };
__v32qi v32qir = f1 (v32qia, v32qib, v32qic);
if (__builtin_memcmp (&v32qir, &v32qie, sizeof (__v32qi)))
__builtin_abort ();
v32qir = f2 (v32qia, v32qib, v32qic);
if (__builtin_memcmp (&v32qir, &v32qif, sizeof (__v32qi)))
__builtin_abort ();
__v8si v8sia = { __INT_MAX__, -__INT_MAX__ - 1, -32, 12, __INT_MAX__ - 2, -__INT_MAX__, 15, -1 };
__v8si v8sib = { 1, 2, 3, 4, -1, -2, -3, -4 };
__v8si v8sic = { 5, 6, 7, 8, -5, -6, -7, -8 };
__v8si v8sie = { 5, 2, 3, 8, -5, -2, -7, -4 };
__v8si v8sif = { 1, 6, 7, 4, -1, -6, -3, -8 };
__v8si v8sir = f3 (v8sia, v8sib, v8sic);
if (__builtin_memcmp (&v8sir, &v8sie, sizeof (__v8si)))
__builtin_abort ();
v8sir = f4 (v8sia, v8sib, v8sic);
if (__builtin_memcmp (&v8sir, &v8sif, sizeof (__v8si)))
__builtin_abort ();
__v4di v4dia = { -__LONG_LONG_MAX__, 1000LL * __INT_MAX__, __LONG_LONG_MAX__, -2 };
__v4di v4dib = { 1, 2, -1, -2 };
__v4di v4dic = { 3, 4, -3, -4 };
__v4di v4die = { 1, 4, -3, -2 };
__v4di v4dif = { 3, 2, -1, -4 };
__v4di v4dir = f5 (v4dia, v4dib, v4dic);
if (__builtin_memcmp (&v4dir, &v4die, sizeof (__v4di)))
__builtin_abort ();
v4dir = f6 (v4dia, v4dib, v4dic);
if (__builtin_memcmp (&v4dir, &v4dif, sizeof (__v4di)))
__builtin_abort ();
__v8sf v8sfb = { 1.0f, 2.0f, 3.0f, 4.0f, -1.0f, -2.0f, -3.0f, -4.0f };
__v8sf v8sfc = { 5.0f, 6.0f, 7.0f, 8.0f, -5.0f, -6.0f, -7.0f, -8.0f };
__v8sf v8sfe = { 5.0f, 2.0f, 3.0f, 8.0f, -5.0f, -2.0f, -7.0f, -4.0f };
__v8sf v8sff = { 1.0f, 6.0f, 7.0f, 4.0f, -1.0f, -6.0f, -3.0f, -8.0f };
__v8sf v8sfr = f7 (v8sia, v8sfb, v8sfc);
if (__builtin_memcmp (&v8sfr, &v8sfe, sizeof (__v8sf)))
__builtin_abort ();
v8sfr = f8 (v8sia, v8sfb, v8sfc);
if (__builtin_memcmp (&v8sfr, &v8sff, sizeof (__v8sf)))
__builtin_abort ();
__v4df v4dfb = { 1.0, 2.0, -1.0, -2.0 };
__v4df v4dfc = { 3.0, 4.0, -3.0, -4.0 };
__v4df v4dfe = { 1.0, 4.0, -3.0, -2.0 };
__v4df v4dff = { 3.0, 2.0, -1.0, -4.0 };
__v4df v4dfr = f9 (v4dia, v4dfb, v4dfc);
if (__builtin_memcmp (&v4dfr, &v4dfe, sizeof (__v4df)))
__builtin_abort ();
v4dfr = f10 (v4dia, v4dfb, v4dfc);
if (__builtin_memcmp (&v4dfr, &v4dff, sizeof (__v4df)))
__builtin_abort ();
}

View File

@ -0,0 +1 @@
#include "../../gcc.target/i386/m128-check.h"

View File

@ -0,0 +1 @@
#include "../../gcc.target/i386/m256-check.h"

View File

@ -0,0 +1 @@
#include "../../gcc.target/i386/sse4_1-check.h"

View File

@ -0,0 +1,69 @@
/* PR target/54700 */
/* { dg-do compile } */
/* { dg-options "-O2 -std=c++14 -msse4 -mno-avx -mno-xop" } */
/* { dg-final { scan-assembler-not "pcmpgt\[bdq]" } } */
/* { dg-final { scan-assembler-times "pblendvb" 2 } } */
/* { dg-final { scan-assembler-times "blendvps" 4 } } */
/* { dg-final { scan-assembler-times "blendvpd" 4 } } */
#include <x86intrin.h>
__attribute__((noipa)) __v16qi
f1 (__v16qi a, __v16qi b, __v16qi c)
{
return a < 0 ? b : c;
}
__attribute__((noipa)) __v16qi
f2 (__v16qi a, __v16qi b, __v16qi c)
{
return a >= 0 ? b : c;
}
__attribute__((noipa)) __v4si
f3 (__v4si a, __v4si b, __v4si c)
{
return a < 0 ? b : c;
}
__attribute__((noipa)) __v4si
f4 (__v4si a, __v4si b, __v4si c)
{
return a >= 0 ? b : c;
}
__attribute__((noipa)) __v2di
f5 (__v2di a, __v2di b, __v2di c)
{
return a < 0 ? b : c;
}
__attribute__((noipa)) __v2di
f6 (__v2di a, __v2di b, __v2di c)
{
return a >= 0 ? b : c;
}
__attribute__((noipa)) __v4sf
f7 (__v4si a, __v4sf b, __v4sf c)
{
return a < 0 ? b : c;
}
__attribute__((noipa)) __v4sf
f8 (__v4si a, __v4sf b, __v4sf c)
{
return a >= 0 ? b : c;
}
__attribute__((noipa)) __v2df
f9 (__v2di a, __v2df b, __v2df c)
{
return a < 0 ? b : c;
}
__attribute__((noipa)) __v2df
f10 (__v2di a, __v2df b, __v2df c)
{
return a >= 0 ? b : c;
}

View File

@ -0,0 +1,73 @@
/* PR target/54700 */
/* { dg-do run { target sse4 } } */
/* { dg-options "-O2 -std=c++14 -msse4 -mno-avx -mno-xop" } */
#ifndef CHECK_H
#define CHECK_H "sse4_1-check.h"
#endif
#ifndef TEST
#define TEST sse4_1_test
#endif
#include CHECK_H
#include "sse4_1-pr54700-1.C"
static void
TEST ()
{
__v16qi v16qia = { -128, 12, -1, 127, 115, 0, -19, 125, -125, 12, 0, -37, 37, 15, 98, -105 };
__v16qi v16qib = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
__v16qi v16qic = { 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 };
__v16qi v16qie = { 1, 18, 3, 20, 21, 22, 7, 24, 9, 26, 27, 12, 29, 30, 31, 16 };
__v16qi v16qif = { 17, 2, 19, 4, 5, 6, 23, 8, 25, 10, 11, 28, 13, 14, 15, 32 };
__v16qi v16qir = f1 (v16qia, v16qib, v16qic);
if (__builtin_memcmp (&v16qir, &v16qie, sizeof (__v16qi)))
__builtin_abort ();
v16qir = f2 (v16qia, v16qib, v16qic);
if (__builtin_memcmp (&v16qir, &v16qif, sizeof (__v16qi)))
__builtin_abort ();
__v4si v4sia = { __INT_MAX__, -__INT_MAX__ - 1, -32, 12 };
__v4si v4sib = { 1, 2, 3, 4 };
__v4si v4sic = { 5, 6, 7, 8 };
__v4si v4sie = { 5, 2, 3, 8 };
__v4si v4sif = { 1, 6, 7, 4 };
__v4si v4sir = f3 (v4sia, v4sib, v4sic);
if (__builtin_memcmp (&v4sir, &v4sie, sizeof (__v4si)))
__builtin_abort ();
v4sir = f4 (v4sia, v4sib, v4sic);
if (__builtin_memcmp (&v4sir, &v4sif, sizeof (__v4si)))
__builtin_abort ();
__v2di v2dia = { -__LONG_LONG_MAX__, 1000LL * __INT_MAX__ };
__v2di v2dib = { 1, 2 };
__v2di v2dic = { 3, 4 };
__v2di v2die = { 1, 4 };
__v2di v2dif = { 3, 2 };
__v2di v2dir = f5 (v2dia, v2dib, v2dic);
if (__builtin_memcmp (&v2dir, &v2die, sizeof (__v2di)))
__builtin_abort ();
v2dir = f6 (v2dia, v2dib, v2dic);
if (__builtin_memcmp (&v2dir, &v2dif, sizeof (__v2di)))
__builtin_abort ();
__v4sf v4sfb = { 1.0f, 2.0f, 3.0f, 4.0f };
__v4sf v4sfc = { 5.0f, 6.0f, 7.0f, 8.0f };
__v4sf v4sfe = { 5.0f, 2.0f, 3.0f, 8.0f };
__v4sf v4sff = { 1.0f, 6.0f, 7.0f, 4.0f };
__v4sf v4sfr = f7 (v4sia, v4sfb, v4sfc);
if (__builtin_memcmp (&v4sfr, &v4sfe, sizeof (__v4sf)))
__builtin_abort ();
v4sfr = f8 (v4sia, v4sfb, v4sfc);
if (__builtin_memcmp (&v4sfr, &v4sff, sizeof (__v4sf)))
__builtin_abort ();
__v2df v2dfb = { 1.0, 2.0 };
__v2df v2dfc = { 3.0, 4.0 };
__v2df v2dfe = { 1.0, 4.0 };
__v2df v2dff = { 3.0, 2.0 };
__v2df v2dfr = f9 (v2dia, v2dfb, v2dfc);
if (__builtin_memcmp (&v2dfr, &v2dfe, sizeof (__v2df)))
__builtin_abort ();
v2dfr = f10 (v2dia, v2dfb, v2dfc);
if (__builtin_memcmp (&v2dfr, &v2dff, sizeof (__v2df)))
__builtin_abort ();
}