From 4a389eb62b96bb738c79c339f38aae45ef46ef39 Mon Sep 17 00:00:00 2001 From: Patrick Palka Date: Sat, 27 Aug 2016 22:00:17 +0000 Subject: [PATCH] Fix folding of VECTOR_CST comparisons gcc/ChangeLog: PR tree-optimization/71077 PR tree-optimization/68542 * fold-const.c (fold_relational_const): Fix folding of VECTOR_CST comparisons that have a scalar boolean result type. (selftest::test_vector_folding): New static function. (selftest::fold_const_c_tests): Call it. gcc/testsuite/ChangeLog: PR tree-optimization/71077 * gcc.target/i386/pr71077.c: New test. From-SVN: r239798 --- gcc/ChangeLog | 9 ++++++ gcc/fold-const.c | 30 +++++++++++++++--- gcc/testsuite/ChangeLog | 5 +++ gcc/testsuite/gcc.target/i386/pr71077.c | 42 +++++++++++++++++++++++++ 4 files changed, 81 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr71077.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ae2a8619f7b..cb259a07fab 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2016-08-27 Patrick Palka + + PR tree-optimization/71077 + PR tree-optimization/68542 + * fold-const.c (fold_relational_const): Fix folding of + VECTOR_CST comparisons that have a scalar boolean result type. + (selftest::test_vector_folding): New static function. + (selftest::fold_const_c_tests): Call it. + 2016-08-27 Gerald Pfeifer * doc/extend.texi (SPU Built-in Functions): Remove stale diff --git a/gcc/fold-const.c b/gcc/fold-const.c index 103ed2d3ba0..bf177b609e5 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -13898,7 +13898,6 @@ fold_relational_const (enum tree_code code, tree type, tree op0, tree op1) if (!VECTOR_TYPE_P (type)) { /* Have vector comparison with scalar boolean result. */ - bool result = true; gcc_assert ((code == EQ_EXPR || code == NE_EXPR) && VECTOR_CST_NELTS (op0) == VECTOR_CST_NELTS (op1)); for (unsigned i = 0; i < VECTOR_CST_NELTS (op0); i++) @@ -13906,11 +13905,12 @@ fold_relational_const (enum tree_code code, tree type, tree op0, tree op1) tree elem0 = VECTOR_CST_ELT (op0, i); tree elem1 = VECTOR_CST_ELT (op1, i); tree tmp = fold_relational_const (code, type, elem0, elem1); - result &= integer_onep (tmp); + if (tmp == NULL_TREE) + return NULL_TREE; + if (integer_zerop (tmp)) + return constant_boolean_node (false, type); } - if (code == NE_EXPR) - result = !result; - return constant_boolean_node (result, type); + return constant_boolean_node (true, type); } unsigned count = VECTOR_CST_NELTS (op0); tree *elts = XALLOCAVEC (tree, count); @@ -14518,12 +14518,32 @@ test_arithmetic_folding () x); } +/* Verify that various binary operations on vectors are folded + correctly. */ + +static void +test_vector_folding () +{ + tree inner_type = integer_type_node; + tree type = build_vector_type (inner_type, 4); + tree zero = build_zero_cst (type); + tree one = build_one_cst (type); + + /* Verify equality tests that return a scalar boolean result. */ + tree res_type = boolean_type_node; + ASSERT_FALSE (integer_nonzerop (fold_build2 (EQ_EXPR, res_type, zero, one))); + ASSERT_TRUE (integer_nonzerop (fold_build2 (EQ_EXPR, res_type, zero, zero))); + ASSERT_TRUE (integer_nonzerop (fold_build2 (NE_EXPR, res_type, zero, one))); + ASSERT_FALSE (integer_nonzerop (fold_build2 (NE_EXPR, res_type, one, one))); +} + /* Run all of the selftests within this file. */ void fold_const_c_tests () { test_arithmetic_folding (); + test_vector_folding (); } } // namespace selftest diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 7d117eed30a..344aa203b0f 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2016-08-27 Patrick Palka + + PR tree-optimization/71077 + * gcc.target/i386/pr71077.c: New test. + 2016-08-27 Steven G. Kargl PR fortran/77380 diff --git a/gcc/testsuite/gcc.target/i386/pr71077.c b/gcc/testsuite/gcc.target/i386/pr71077.c new file mode 100644 index 00000000000..0637bc990b8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr71077.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx2 } */ +/* { dg-options "-O3 -mavx2" } */ + +#include "avx2-check.h" + +typedef unsigned int ui; +ui x[32*32]; +ui y[32]; +ui z[32]; +void __attribute__ ((noinline, noclone)) foo (ui n, ui z) +{ + ui i, b; + ui v; + for (i = 0; i< n; i++) + { + v = y[i]; + if (v) { + for (b = 0; b < 32; b++) + if ((v >> b) & 1) + x[i*32 +b] = z; + y[i] = 0; + } + } +} + +static void +avx2_test (void) +{ + int i; + unsigned int val; + for (i = 0; i<32; i++) + { + val = 1U << i; + y[i] = (i & 1)? 0 : val; + z[i] = i; + } + foo (32, 10); + for (i=0; i<1024; i+=66) + if (x[i] != 10) + __builtin_abort (); +}