tree-optimization/92645 - avoid harmful early BIT_FIELD_REF canonicalization
This avoids canonicalizing BIT_FIELD_REF <T1> (a, <sz>, 0) to (T1)a on integer typed a. This confuses the vectorizer SLP matching. With this delayed to after vector lowering the testcase in PR92645 from Skia is now finally optimized to reasonable assembly. 2021-01-13 Richard Biener <rguenther@suse.de> PR tree-optimization/92645 * match.pd (BIT_FIELD_REF to conversion): Delay canonicalization until after vector lowering. * gcc.target/i386/pr92645-7.c: New testcase. * gcc.dg/tree-ssa/ssa-fre-54.c: Adjust. * gcc.dg/pr69047.c: Likewise.
This commit is contained in:
parent
a2d04f3d2c
commit
285fa338b0
@ -6075,6 +6075,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||
/* Low-parts can be reduced to integral conversions.
|
||||
??? The following doesn't work for PDP endian. */
|
||||
|| (BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
|
||||
/* But only do this after vectorization. */
|
||||
&& canonicalize_math_after_vectorization_p ()
|
||||
/* Don't even think about BITS_BIG_ENDIAN. */
|
||||
&& TYPE_PRECISION (TREE_TYPE (@0)) % BITS_PER_UNIT == 0
|
||||
&& TYPE_PRECISION (type) % BITS_PER_UNIT == 0
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O -fdump-tree-cddce1" } */
|
||||
/* { dg-options "-O -fdump-tree-forwprop4" } */
|
||||
|
||||
__UINT8_TYPE__
|
||||
f(__UINT16_TYPE__ b)
|
||||
@ -15,4 +15,4 @@ f(__UINT16_TYPE__ b)
|
||||
return a;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump "_\[0-9\]+ = \\(\[^)\]+\\) b" "cddce1" } } */
|
||||
/* { dg-final { scan-tree-dump "_\[0-9\]+ = \\(\[^)\]+\\) b" "forwprop4" } } */
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target int32plus } */
|
||||
/* { dg-options "-O -fdump-tree-fre1 -fdump-tree-dse1" } */
|
||||
/* { dg-options "-O -fdump-tree-forwprop4 -fdump-tree-dse1" } */
|
||||
|
||||
extern void abort (void);
|
||||
|
||||
@ -51,6 +51,6 @@ int main()
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump "\\(char\\) i_" "fre1" } } */
|
||||
/* { dg-final { scan-tree-dump "\\(short int\\) i_" "fre1" } } */
|
||||
/* { dg-final { scan-tree-dump "\\(char\\) i_" "forwprop4" } } */
|
||||
/* { dg-final { scan-tree-dump "\\(short int\\) i_" "forwprop4" } } */
|
||||
/* { dg-final { scan-tree-dump-not "u.i =" "dse1" } } */
|
||||
|
24
gcc/testsuite/gcc.target/i386/pr92645-7.c
Normal file
24
gcc/testsuite/gcc.target/i386/pr92645-7.c
Normal file
@ -0,0 +1,24 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target lp64 } */
|
||||
/* { dg-options "-O3 -msse2" } */
|
||||
|
||||
typedef long v2di __attribute__((vector_size(16)));
|
||||
typedef int v4si __attribute__((vector_size(16)));
|
||||
|
||||
void bar (v4si *p, __int128_t *q)
|
||||
{
|
||||
union { __int128_t a; v4si b; } u;
|
||||
u.a = *q;
|
||||
(*p)[0] = u.b[0];
|
||||
(*p)[1] = u.b[2];
|
||||
(*p)[2] = u.b[1];
|
||||
(*p)[3] = u.b[3];
|
||||
}
|
||||
|
||||
/* The function should end up with sth like
|
||||
[v]pshufd $216, (%esi), %xmm0
|
||||
[v]movdqa %xmm0, (%edi)
|
||||
ret
|
||||
recognized by SLP vectorization involving an existing "vector". */
|
||||
/* { dg-final { scan-assembler-not "punpck" } } */
|
||||
/* { dg-final { scan-assembler-times "pshufd" 1 } } */
|
Loading…
x
Reference in New Issue
Block a user