re PR tree-optimization/78821 (GCC7: Copying whole 32 bits structure field by field not optimised into copying whole 32 bits at once)

PR tree-optimization/78821
	* gimple-ssa-store-merging.c (find_bswap_or_nop_load): Give up
	if base is TARGET_MEM_REF.  If base is not MEM_REF, set base_addr
	to the address of the base rather than the base itself.
	(find_bswap_or_nop_1): Just use pointer comparison for vuse check.
	(find_bswap_or_nop_finalize): New function.
	(find_bswap_or_nop): Use it.
	(bswap_replace): Return a tree rather than bool, change first
	argument from gimple * to gimple_stmt_iterator, allow inserting
	into an empty sequence, allow ins_stmt to be NULL - then emit
	all stmts into gsi.  Fix up MEM_REF address gimplification.
	(pass_optimize_bswap::execute): Adjust bswap_replace caller.
	(struct store_immediate_info): Add N and INS_STMT non-static
	data members.
	(store_immediate_info::store_immediate_info): Initialize them
	from newly added ctor args.
	(merged_store_group::apply_stores): Formatting fixes.  Sort by
	bitpos at the end.
	(stmts_may_clobber_ref_p): For stores call also
	refs_anti_dependent_p.
	(gather_bswap_load_refs): New function.
	(imm_store_chain_info::try_coalesce_bswap): New method.
	(imm_store_chain_info::coalesce_immediate_stores): Use it.
	(split_group): Handle LROTATE_EXPR and NOP_EXPR rhs_code specially.
	(imm_store_chain_info::output_merged_store): Fail if number of
	new estimated stmts is bigger or equal than old.  Handle LROTATE_EXPR
	and NOP_EXPR rhs_code.
	(pass_store_merging::process_store): Compute n and ins_stmt, if
	ins_stmt is non-NULL and the store rhs is otherwise invalid, use
	LROTATE_EXPR rhs_code.  Pass n and ins_stmt to store_immediate_info
	ctor.
	(pass_store_merging::execute): Calculate dominators.

	* gcc.dg/store_merging_16.c: New test.

From-SVN: r254948
This commit is contained in:
Jakub Jelinek 2017-11-20 11:10:23 +01:00 committed by Jakub Jelinek
parent dffec8ebdb
commit 4b84d9b8f9
4 changed files with 812 additions and 117 deletions

View File

@ -1,5 +1,38 @@
2017-11-20 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/78821
* gimple-ssa-store-merging.c (find_bswap_or_nop_load): Give up
if base is TARGET_MEM_REF. If base is not MEM_REF, set base_addr
to the address of the base rather than the base itself.
(find_bswap_or_nop_1): Just use pointer comparison for vuse check.
(find_bswap_or_nop_finalize): New function.
(find_bswap_or_nop): Use it.
(bswap_replace): Return a tree rather than bool, change first
argument from gimple * to gimple_stmt_iterator, allow inserting
into an empty sequence, allow ins_stmt to be NULL - then emit
all stmts into gsi. Fix up MEM_REF address gimplification.
(pass_optimize_bswap::execute): Adjust bswap_replace caller.
(struct store_immediate_info): Add N and INS_STMT non-static
data members.
(store_immediate_info::store_immediate_info): Initialize them
from newly added ctor args.
(merged_store_group::apply_stores): Formatting fixes. Sort by
bitpos at the end.
(stmts_may_clobber_ref_p): For stores call also
refs_anti_dependent_p.
(gather_bswap_load_refs): New function.
(imm_store_chain_info::try_coalesce_bswap): New method.
(imm_store_chain_info::coalesce_immediate_stores): Use it.
(split_group): Handle LROTATE_EXPR and NOP_EXPR rhs_code specially.
(imm_store_chain_info::output_merged_store): Fail if number of
new estimated stmts is bigger or equal than old. Handle LROTATE_EXPR
and NOP_EXPR rhs_code.
(pass_store_merging::process_store): Compute n and ins_stmt, if
ins_stmt is non-NULL and the store rhs is otherwise invalid, use
LROTATE_EXPR rhs_code. Pass n and ins_stmt to store_immediate_info
ctor.
(pass_store_merging::execute): Calculate dominators.
* tree-ssa-math-opts.c (nop_stats, bswap_stats, struct symbolic_number,
BITS_PER_MARKER, MARKER_MASK, MARKER_BYTE_UNKNOWN, HEAD_MARKER, CMPNOP,
CMPXCHG, do_shift_rotate, verify_symbolic_number_p,

File diff suppressed because it is too large Load Diff

View File

@ -1,3 +1,8 @@
2017-11-20 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/78821
* gcc.dg/store_merging_16.c: New test.
2017-11-19 Jan Hubicka <hubicka@ucw.cz>
PR target/82281

View File

@ -0,0 +1,157 @@
/* Only test on some 64-bit targets which do have bswap{si,di}2 patterns and
are either big or little endian (not pdp endian). */
/* { dg-do compile { target { lp64 && { i?86-*-* x86_64-*-* powerpc*-*-* aarch64*-*-* } } } } */
/* { dg-require-effective-target store_merge } */
/* { dg-options "-O2 -fdump-tree-store-merging" } */
__attribute__((noipa)) void
f1 (unsigned char *p, unsigned long long q)
{
p[0] = q;
p[1] = q >> 8;
p[2] = q >> 16;
p[3] = q >> 24;
p[4] = q >> 32;
p[5] = q >> 40;
p[6] = q >> 48;
p[7] = q >> 56;
}
__attribute__((noipa)) void
f2 (unsigned char *p, unsigned long long q)
{
p[0] = q >> 56;
p[1] = q >> 48;
p[2] = q >> 40;
p[3] = q >> 32;
p[4] = q >> 24;
p[5] = q >> 16;
p[6] = q >> 8;
p[7] = q;
}
__attribute__((noipa)) void
f3 (unsigned char *__restrict p, unsigned char *__restrict q)
{
unsigned char q3 = q[3];
unsigned char q2 = q[2];
unsigned char q1 = q[1];
unsigned char q0 = q[0];
p[0] = q3;
p[1] = q2;
p[2] = q1;
p[3] = q0;
}
__attribute__((noipa)) void
f4 (unsigned char *__restrict p, unsigned char *__restrict q)
{
p[0] = q[3];
p[1] = q[2];
p[2] = q[1];
p[3] = q[0];
}
struct S { unsigned char a, b; unsigned short c; };
__attribute__((noipa)) void
f5 (struct S *__restrict p, struct S *__restrict q)
{
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
unsigned char pa = q->c >> 8;
unsigned char pb = q->c;
unsigned short pc = (q->a << 8) | q->b;
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
unsigned char pa = q->c;
unsigned char pb = q->c >> 8;
unsigned short pc = q->a | (q->b << 8);
#endif
p->a = pa;
p->b = pb;
p->c = pc;
}
__attribute__((noipa)) void
f6 (struct S *__restrict p, struct S *__restrict q)
{
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
p->a = q->c >> 8;
p->b = q->c;
p->c = (q->a << 8) | q->b;
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
p->a = q->c;
p->b = q->c >> 8;
p->c = q->a | (q->b << 8);
#endif
}
struct T { unsigned long long a : 8, b : 8, c : 8, d : 8, e : 8, f : 8, g : 8, h : 8; };
__attribute__((noipa)) void
f7 (struct T *__restrict p, struct T *__restrict q)
{
p->a = q->h;
p->b = q->g;
p->c = q->f;
p->d = q->e;
p->e = q->d;
p->f = q->c;
p->g = q->b;
p->h = q->a;
}
struct S b = { 0x11, 0x12,
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
0x1413
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
0x1314
#endif
};
struct T e = { 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28 };
int
main ()
{
unsigned char a[8];
int i;
struct S b, c, d;
f1 (a, 0x0102030405060708ULL);
for (i = 0; i < 8; ++i)
if (a[i] != 8 - i)
__builtin_abort ();
f2 (a, 0x0102030405060708ULL);
for (i = 0; i < 8; ++i)
if (a[i] != 1 + i)
__builtin_abort ();
f3 (a, a + 4);
for (i = 0; i < 8; ++i)
if (a[i] != (i < 4 ? 8 - i : 1 + i))
__builtin_abort ();
f2 (a, 0x090a0b0c0d0e0f10ULL);
f4 (a + 4, a);
for (i = 0; i < 8; ++i)
if (a[i] != (i < 4 ? 9 + i : 16 - i))
__builtin_abort ();
f5 (&c, &b);
if (c.a != 0x14 || c.b != 0x13
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|| c.c != 0x1112
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|| c.c != 0x1211
#endif
)
__builtin_abort ();
f6 (&d, &c);
if (d.a != 0x11 || d.b != 0x12 || d.c != b.c)
__builtin_abort ();
struct T f;
f7 (&f, &e);
if (f.a != 0x28 || f.b != 0x27 || f.c != 0x26 || f.d != 0x25
|| f.e != 0x24 || f.f != 0x23 || f.g != 0x22 || f.h != 0x21)
__builtin_abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "Merging successful" 7 "store-merging" } } */
/* { dg-final { scan-tree-dump-times "__builtin_bswap64" 2 "store-merging" } } */
/* { dg-final { scan-tree-dump-times "__builtin_bswap32" 4 "store-merging" } } */