From 285fa338b06b804e72997c4d876ecf08a9c083af Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 13 Jan 2021 13:48:31 +0100 Subject: [PATCH] tree-optimization/92645 - avoid harmful early BIT_FIELD_REF canonicalization This avoids canonicalizing BIT_FIELD_REF (a, , 0) to (T1)a on integer typed a. This confuses the vectorizer SLP matching. With this delayed to after vector lowering the testcase in PR92645 from Skia is now finally optimized to reasonable assembly. 2021-01-13 Richard Biener PR tree-optimization/92645 * match.pd (BIT_FIELD_REF to conversion): Delay canonicalization until after vector lowering. * gcc.target/i386/pr92645-7.c: New testcase. * gcc.dg/tree-ssa/ssa-fre-54.c: Adjust. * gcc.dg/pr69047.c: Likewise. --- gcc/match.pd | 2 ++ gcc/testsuite/gcc.dg/pr69047.c | 4 ++-- gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-54.c | 6 +++--- gcc/testsuite/gcc.target/i386/pr92645-7.c | 24 ++++++++++++++++++++++ 4 files changed, 31 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr92645-7.c diff --git a/gcc/match.pd b/gcc/match.pd index c286a540c4e..60c383da13b 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -6075,6 +6075,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) /* Low-parts can be reduced to integral conversions. ??? The following doesn't work for PDP endian. */ || (BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN + /* But only do this after vectorization. */ + && canonicalize_math_after_vectorization_p () /* Don't even think about BITS_BIG_ENDIAN. */ && TYPE_PRECISION (TREE_TYPE (@0)) % BITS_PER_UNIT == 0 && TYPE_PRECISION (type) % BITS_PER_UNIT == 0 diff --git a/gcc/testsuite/gcc.dg/pr69047.c b/gcc/testsuite/gcc.dg/pr69047.c index 63d9fd90e83..d562663d86a 100644 --- a/gcc/testsuite/gcc.dg/pr69047.c +++ b/gcc/testsuite/gcc.dg/pr69047.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O -fdump-tree-cddce1" } */ +/* { dg-options "-O -fdump-tree-forwprop4" } */ __UINT8_TYPE__ f(__UINT16_TYPE__ b) @@ -15,4 +15,4 @@ f(__UINT16_TYPE__ b) return a; } -/* { dg-final { scan-tree-dump "_\[0-9\]+ = \\(\[^)\]+\\) b" "cddce1" } } */ +/* { dg-final { scan-tree-dump "_\[0-9\]+ = \\(\[^)\]+\\) b" "forwprop4" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-54.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-54.c index be7537e80c1..02ebf068a61 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-54.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-54.c @@ -1,6 +1,6 @@ /* { dg-do run } */ /* { dg-require-effective-target int32plus } */ -/* { dg-options "-O -fdump-tree-fre1 -fdump-tree-dse1" } */ +/* { dg-options "-O -fdump-tree-forwprop4 -fdump-tree-dse1" } */ extern void abort (void); @@ -51,6 +51,6 @@ int main() return 0; } -/* { dg-final { scan-tree-dump "\\(char\\) i_" "fre1" } } */ -/* { dg-final { scan-tree-dump "\\(short int\\) i_" "fre1" } } */ +/* { dg-final { scan-tree-dump "\\(char\\) i_" "forwprop4" } } */ +/* { dg-final { scan-tree-dump "\\(short int\\) i_" "forwprop4" } } */ /* { dg-final { scan-tree-dump-not "u.i =" "dse1" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr92645-7.c b/gcc/testsuite/gcc.target/i386/pr92645-7.c new file mode 100644 index 00000000000..e4c04c2a82a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr92645-7.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O3 -msse2" } */ + +typedef long v2di __attribute__((vector_size(16))); +typedef int v4si __attribute__((vector_size(16))); + +void bar (v4si *p, __int128_t *q) +{ + union { __int128_t a; v4si b; } u; + u.a = *q; + (*p)[0] = u.b[0]; + (*p)[1] = u.b[2]; + (*p)[2] = u.b[1]; + (*p)[3] = u.b[3]; +} + +/* The function should end up with sth like + [v]pshufd $216, (%esi), %xmm0 + [v]movdqa %xmm0, (%edi) + ret + recognized by SLP vectorization involving an existing "vector". */ +/* { dg-final { scan-assembler-not "punpck" } } */ +/* { dg-final { scan-assembler-times "pshufd" 1 } } */