re PR tree-optimization/78821 (GCC7: Copying whole 32 bits structure field by field not optimised into copying whole 32 bits at once)

PR tree-optimization/78821
	* gimple-ssa-store-merging.c (struct store_operand_info): Add bit_not_p
	data member.
	(store_operand_info::store_operand_info): Initialize it to false.
	(pass_store_merging::terminate_all_aliasing_chains): Rewritten to use
	ref_maybe_used_by_stmt_p and stmt_may_clobber_ref_p on lhs of each
	store in the group, and if chain_info is non-NULL, to ignore altogether
	that chain.
	(compatible_load_p): Fail if bit_not_p does not match.
	(imm_store_chain_info::output_merged_store): Handle bit_not_p loads.
	(handled_load): Fill in bit_not_p.  Handle BIT_NOT_EXPR.
	(pass_store_merging::process_store): Adjust
	terminate_all_aliasing_chains calls to pass NULL in all current spots,
	call terminate_all_aliasing_chains newly when adding a store into
	a chain with non-NULL chain_info.

	* gcc.dg/store_merging_2.c: Expect 3 store mergings instead of 2.
	* gcc.dg/store_merging_13.c (f7, f8, f9, f10, f11, f12, f13): New
	functions.
	(main): Test also those.  Expect 13 store mergings instead of 6.
	* gcc.dg/store_merging_14.c (f7, f8, f9): New functions.
	(main): Test also those.  Expect 9 store mergings instead of 6.

From-SVN: r254536
This commit is contained in:
Jakub Jelinek 2017-11-08 16:46:58 +01:00 committed by Jakub Jelinek
parent a14ab2c346
commit 383ac8dc26
6 changed files with 285 additions and 52 deletions

View File

@ -1,3 +1,21 @@
2017-11-08 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/78821
* gimple-ssa-store-merging.c (struct store_operand_info): Add bit_not_p
data member.
(store_operand_info::store_operand_info): Initialize it to false.
(pass_store_merging::terminate_all_aliasing_chains): Rewritten to use
ref_maybe_used_by_stmt_p and stmt_may_clobber_ref_p on lhs of each
store in the group, and if chain_info is non-NULL, to ignore altogether
that chain.
(compatible_load_p): Fail if bit_not_p does not match.
(imm_store_chain_info::output_merged_store): Handle bit_not_p loads.
(handled_load): Fill in bit_not_p. Handle BIT_NOT_EXPR.
(pass_store_merging::process_store): Adjust
terminate_all_aliasing_chains calls to pass NULL in all current spots,
call terminate_all_aliasing_chains newly when adding a store into
a chain with non-NULL chain_info.
2017-11-08 Wilco Dijkstra <wdijkstr@arm.com>
* config/aarch64/aarch64.c (aarch64_can_eliminate): Simplify logic.

View File

@ -183,12 +183,13 @@ struct store_operand_info
unsigned HOST_WIDE_INT bitregion_start;
unsigned HOST_WIDE_INT bitregion_end;
gimple *stmt;
bool bit_not_p;
store_operand_info ();
};
store_operand_info::store_operand_info ()
: val (NULL_TREE), base_addr (NULL_TREE), bitsize (0), bitpos (0),
bitregion_start (0), bitregion_end (0), stmt (NULL)
bitregion_start (0), bitregion_end (0), stmt (NULL), bit_not_p (false)
{
}
@ -910,8 +911,7 @@ private:
void process_store (gimple *);
bool terminate_and_process_all_chains ();
bool terminate_all_aliasing_chains (imm_store_chain_info **,
gimple *);
bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *);
bool terminate_and_release_chain (imm_store_chain_info *);
}; // class pass_store_merging
@ -930,13 +930,9 @@ pass_store_merging::terminate_and_process_all_chains ()
return ret;
}
/* Terminate all chains that are affected by the assignment to DEST, appearing
in statement STMT and ultimately points to the object BASE. Return true if
at least one aliasing chain was terminated. BASE and DEST are allowed to
be NULL_TREE. In that case the aliasing checks are performed on the whole
statement rather than a particular operand in it. VAR_OFFSET_P signifies
whether STMT represents a store to BASE offset by a variable amount.
If that is the case we have to terminate any chain anchored at BASE. */
/* Terminate all chains that are affected by the statement STMT.
CHAIN_INFO is the chain we should ignore from the checks if
non-NULL. */
bool
pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
@ -949,14 +945,18 @@ pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
if (!gimple_vuse (stmt))
return false;
/* Check if the assignment destination (BASE) is part of a store chain.
This is to catch non-constant stores to destinations that may be part
of a chain. */
if (chain_info)
for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
{
next = cur->next;
/* We already checked all the stores in chain_info and terminated the
chain if necessary. Skip it here. */
if (chain_info && *chain_info == cur)
continue;
store_immediate_info *info;
unsigned int i;
FOR_EACH_VEC_ELT ((*chain_info)->m_store_info, i, info)
FOR_EACH_VEC_ELT (cur->m_store_info, i, info)
{
if (ref_maybe_used_by_stmt_p (stmt, gimple_assign_lhs (info->stmt))
|| stmt_may_clobber_ref_p (stmt, gimple_assign_lhs (info->stmt)))
@ -966,37 +966,13 @@ pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
fprintf (dump_file, "stmt causes chain termination:\n");
print_gimple_stmt (dump_file, stmt, 0);
}
terminate_and_release_chain (*chain_info);
terminate_and_release_chain (cur);
ret = true;
break;
}
}
}
/* Check for aliasing with all other store chains. */
for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
{
next = cur->next;
/* We already checked all the stores in chain_info and terminated the
chain if necessary. Skip it here. */
if (chain_info && (*chain_info) == cur)
continue;
/* We can't use the base object here as that does not reliably exist.
Build a ao_ref from the base object address (if we know the
minimum and maximum offset and the maximum size we could improve
things here). */
ao_ref chain_ref;
ao_ref_init_from_ptr_and_size (&chain_ref, cur->base_addr, NULL_TREE);
if (ref_maybe_used_by_stmt_p (stmt, &chain_ref)
|| stmt_may_clobber_ref_p_1 (stmt, &chain_ref))
{
terminate_and_release_chain (cur);
ret = true;
}
}
return ret;
}
@ -1053,6 +1029,7 @@ compatible_load_p (merged_store_group *merged_store,
{
store_immediate_info *infof = merged_store->stores[0];
if (!info->ops[idx].base_addr
|| info->ops[idx].bit_not_p != infof->ops[idx].bit_not_p
|| (info->ops[idx].bitpos - infof->ops[idx].bitpos
!= info->bitpos - infof->bitpos)
|| !operand_equal_p (info->ops[idx].base_addr,
@ -1755,6 +1732,19 @@ imm_store_chain_info::output_merged_store (merged_store_group *group)
gimple_seq_add_stmt_without_update (&seq, stmt);
}
ops[j] = gimple_assign_lhs (stmt);
if (op.bit_not_p)
{
stmt = gimple_build_assign (make_ssa_name (int_type),
BIT_NOT_EXPR, ops[j]);
gimple_set_location (stmt, load_loc);
ops[j] = gimple_assign_lhs (stmt);
if (gsi_bb (load_gsi[j]))
gimple_seq_add_stmt_without_update (&load_seq[j],
stmt);
else
gimple_seq_add_stmt_without_update (&seq, stmt);
}
}
else
ops[j] = native_interpret_expr (int_type,
@ -2100,9 +2090,23 @@ handled_load (gimple *stmt, store_operand_info *op,
unsigned HOST_WIDE_INT bitregion_start,
unsigned HOST_WIDE_INT bitregion_end)
{
if (!is_gimple_assign (stmt) || !gimple_vuse (stmt))
if (!is_gimple_assign (stmt))
return false;
if (gimple_assign_load_p (stmt)
if (gimple_assign_rhs_code (stmt) == BIT_NOT_EXPR)
{
tree rhs1 = gimple_assign_rhs1 (stmt);
if (TREE_CODE (rhs1) == SSA_NAME
&& has_single_use (rhs1)
&& handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos,
bitregion_start, bitregion_end))
{
op->bit_not_p = !op->bit_not_p;
return true;
}
return false;
}
if (gimple_vuse (stmt)
&& gimple_assign_load_p (stmt)
&& !stmt_can_throw_internal (stmt)
&& !gimple_has_volatile_ops (stmt))
{
@ -2119,6 +2123,7 @@ handled_load (gimple *stmt, store_operand_info *op,
{
op->stmt = stmt;
op->val = mem;
op->bit_not_p = false;
return true;
}
}
@ -2202,16 +2207,16 @@ pass_store_merging::process_store (gimple *stmt)
}
}
if (invalid)
{
terminate_all_aliasing_chains (NULL, stmt);
return;
}
struct imm_store_chain_info **chain_info = NULL;
if (base_addr)
chain_info = m_stores.get (base_addr);
if (invalid)
{
terminate_all_aliasing_chains (chain_info, stmt);
return;
}
store_immediate_info *info;
if (chain_info)
{
@ -2225,6 +2230,7 @@ pass_store_merging::process_store (gimple *stmt)
print_gimple_stmt (dump_file, stmt, 0);
}
(*chain_info)->m_store_info.safe_push (info);
terminate_all_aliasing_chains (chain_info, stmt);
/* If we reach the limit of stores to merge in a chain terminate and
process the chain now. */
if ((*chain_info)->m_store_info.length ()
@ -2239,7 +2245,7 @@ pass_store_merging::process_store (gimple *stmt)
}
/* Store aliases any existing chain? */
terminate_all_aliasing_chains (chain_info, stmt);
terminate_all_aliasing_chains (NULL, stmt);
/* Start a new chain. */
struct imm_store_chain_info *new_chain
= new imm_store_chain_info (m_stores_head, base_addr);

View File

@ -1,3 +1,13 @@
2017-11-08 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/78821
* gcc.dg/store_merging_2.c: Expect 3 store mergings instead of 2.
* gcc.dg/store_merging_13.c (f7, f8, f9, f10, f11, f12, f13): New
functions.
(main): Test also those. Expect 13 store mergings instead of 6.
* gcc.dg/store_merging_14.c (f7, f8, f9): New functions.
(main): Test also those. Expect 9 store mergings instead of 6.
2017-11-08 Wilco Dijkstra <wdijkstr@arm.com>
* gcc.target/aarch64/dwarf-cfa-reg.c: Update.

View File

@ -104,6 +104,90 @@ f6 (struct S *p, struct S *q)
p->g = pg;
}
__attribute__((noipa)) void
f7 (struct S *__restrict p, struct S *__restrict q)
{
p->a |= q->a;
p->b |= q->b;
p->c |= q->c;
p->d |= q->d;
p->e |= q->e;
p->f |= q->f;
p->g |= q->g;
}
__attribute__((noipa)) void
f8 (struct S *__restrict p, struct S *__restrict q)
{
p->a &= q->a;
p->b &= q->b;
p->c &= q->c;
p->d &= q->d;
p->e &= q->e;
p->f &= q->f;
p->g &= q->g;
}
__attribute__((noipa)) void
f9 (struct S *__restrict p, struct S *__restrict q)
{
p->a ^= q->a;
p->b ^= q->b;
p->c ^= q->c;
p->d ^= q->d;
p->e ^= q->e;
p->f ^= q->f;
p->g ^= q->g;
}
__attribute__((noipa)) void
f10 (struct S *__restrict p, struct S *__restrict q)
{
p->a = ~q->a;
p->b = ~q->b;
p->c = ~q->c;
p->d = ~q->d;
p->e = ~q->e;
p->f = ~q->f;
p->g = ~q->g;
}
__attribute__((noipa)) void
f11 (struct S *__restrict p, struct S *__restrict q)
{
p->a = p->a | (unsigned char) ~q->a;
p->b = p->b | (unsigned char) ~q->b;
p->c = p->c | (unsigned short) ~q->c;
p->d = p->d | (unsigned char) ~q->d;
p->e = p->e | (unsigned char) ~q->e;
p->f = p->f | (unsigned char) ~q->f;
p->g = p->g | (unsigned char) ~q->g;
}
__attribute__((noipa)) void
f12 (struct S *__restrict p, struct S *__restrict q)
{
p->a = p->a & (unsigned char) ~q->a;
p->b = p->b & (unsigned char) ~q->b;
p->c = p->c & (unsigned short) ~q->c;
p->d = p->d & (unsigned char) ~q->d;
p->e = p->e & (unsigned char) ~q->e;
p->f = p->f & (unsigned char) ~q->f;
p->g = p->g & (unsigned char) ~q->g;
}
__attribute__((noipa)) void
f13 (struct S *__restrict p, struct S *__restrict q)
{
p->a = p->a ^ (unsigned char) ~q->a;
p->b = p->b ^ (unsigned char) ~q->b;
p->c = p->c ^ (unsigned short) ~q->c;
p->d = p->d ^ (unsigned char) ~q->d;
p->e = p->e ^ (unsigned char) ~q->e;
p->f = p->f ^ (unsigned char) ~q->f;
p->g = p->g ^ (unsigned char) ~q->g;
}
struct S s = { 20, 21, 22, 23, 24, 25, 26, 27 };
struct S t = { 0x71, 0x72, 0x7f04, 0x78, 0x31, 0x32, 0x34, 0xf1f2f3f4f5f6f7f8ULL };
struct S u = { 28, 29, 30, 31, 32, 33, 34, 35 };
@ -151,7 +235,62 @@ main ()
|| s.e != (40 ^ 0x31) || s.f != (41 ^ 0x32)
|| s.g != (42 ^ 0x34) || s.h != 27)
__builtin_abort ();
f3 (&s, &v);
f7 (&s, &t);
asm volatile ("" : : : "memory");
if (s.a != (36 | 0x71) || s.b != (37 | 0x72)
|| s.c != (38 | 0x7f04) || s.d != (39 | 0x78)
|| s.e != (40 | 0x31) || s.f != (41 | 0x32)
|| s.g != (42 | 0x34) || s.h != 27)
__builtin_abort ();
f3 (&s, &u);
f8 (&s, &t);
asm volatile ("" : : : "memory");
if (s.a != (28 & 0x71) || s.b != (29 & 0x72)
|| s.c != (30 & 0x7f04) || s.d != (31 & 0x78)
|| s.e != (32 & 0x31) || s.f != (33 & 0x32)
|| s.g != (34 & 0x34) || s.h != 27)
__builtin_abort ();
f2 (&s, &v);
f9 (&s, &t);
asm volatile ("" : : : "memory");
if (s.a != (36 ^ 0x71) || s.b != (37 ^ 0x72)
|| s.c != (38 ^ 0x7f04) || s.d != (39 ^ 0x78)
|| s.e != (40 ^ 0x31) || s.f != (41 ^ 0x32)
|| s.g != (42 ^ 0x34) || s.h != 27)
__builtin_abort ();
f10 (&s, &u);
asm volatile ("" : : : "memory");
if (s.a != (unsigned char) ~28 || s.b != (unsigned char) ~29
|| s.c != (unsigned short) ~30 || s.d != (unsigned char) ~31
|| s.e != (unsigned char) ~32 || s.f != (unsigned char) ~33
|| s.g != (unsigned char) ~34 || s.h != 27)
__builtin_abort ();
f3 (&s, &v);
f11 (&s, &t);
asm volatile ("" : : : "memory");
if (s.a != (36 | (unsigned char) ~0x71) || s.b != (37 | (unsigned char) ~0x72)
|| s.c != (38 | (unsigned short) ~0x7f04) || s.d != (39 | (unsigned char) ~0x78)
|| s.e != (40 | (unsigned char) ~0x31) || s.f != (41 | (unsigned char) ~0x32)
|| s.g != (42 | (unsigned char) ~0x34) || s.h != 27)
__builtin_abort ();
f3 (&s, &u);
f12 (&s, &t);
asm volatile ("" : : : "memory");
if (s.a != (28 & (unsigned char) ~0x71) || s.b != (29 & (unsigned char) ~0x72)
|| s.c != (30 & (unsigned short) ~0x7f04) || s.d != (31 & (unsigned char) ~0x78)
|| s.e != (32 & (unsigned char) ~0x31) || s.f != (33 & (unsigned char) ~0x32)
|| s.g != (34 & (unsigned char) ~0x34) || s.h != 27)
__builtin_abort ();
f2 (&s, &v);
f13 (&s, &t);
asm volatile ("" : : : "memory");
if (s.a != (36 ^ (unsigned char) ~0x71) || s.b != (37 ^ (unsigned char) ~0x72)
|| s.c != (38 ^ (unsigned short) ~0x7f04) || s.d != (39 ^ (unsigned char) ~0x78)
|| s.e != (40 ^ (unsigned char) ~0x31) || s.f != (41 ^ (unsigned char) ~0x32)
|| s.g != (42 ^ (unsigned char) ~0x34) || s.h != 27)
__builtin_abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "Merging successful" 6 "store-merging" } } */
/* { dg-final { scan-tree-dump-times "Merging successful" 13 "store-merging" } } */

View File

@ -104,6 +104,42 @@ f6 (struct S *p, struct S *q)
p->g = pg;
}
__attribute__((noipa)) void
f7 (struct S *__restrict p, struct S *__restrict q)
{
p->a |= q->a;
p->b |= q->b;
p->c |= q->c;
p->d |= q->d;
p->e |= q->e;
p->f |= q->f;
p->g |= q->g;
}
__attribute__((noipa)) void
f8 (struct S *__restrict p, struct S *__restrict q)
{
p->a &= q->a;
p->b &= q->b;
p->c &= q->c;
p->d &= q->d;
p->e &= q->e;
p->f &= q->f;
p->g &= q->g;
}
__attribute__((noipa)) void
f9 (struct S *__restrict p, struct S *__restrict q)
{
p->a ^= q->a;
p->b ^= q->b;
p->c ^= q->c;
p->d ^= q->d;
p->e ^= q->e;
p->f ^= q->f;
p->g ^= q->g;
}
struct S s = { 72, 20, 21, 73, 22, 23, 24, 25, 26, 74, 27 };
struct S t = { 75, 0x71, 0x72, 76, 0x7f04, 0x78, 0x31, 0x32, 0x34, 77, 0xf1f2f3f4f5f6f7f8ULL };
struct S u = { 78, 28, 29, 79, 30, 31, 32, 33, 34, 80, 35 };
@ -146,6 +182,30 @@ main ()
f2 (&s, &v);
f6 (&s, &t);
asm volatile ("" : : : "memory");
if (s.i != 72 || s.a != (36 ^ 0x71) || s.b != (37 ^ 0x72) || s.j != 73
|| s.c != (38 ^ 0x7f04) || s.d != (39 ^ 0x78)
|| s.e != (40 ^ 0x31) || s.f != (41 ^ 0x32)
|| s.g != (42 ^ 0x34) || s.k != 74 || s.h != 27)
__builtin_abort ();
f3 (&s, &v);
f7 (&s, &t);
asm volatile ("" : : : "memory");
if (s.i != 72 || s.a != (36 | 0x71) || s.b != (37 | 0x72) || s.j != 73
|| s.c != (38 | 0x7f04) || s.d != (39 | 0x78)
|| s.e != (40 | 0x31) || s.f != (41 | 0x32)
|| s.g != (42 | 0x34) || s.k != 74 || s.h != 27)
__builtin_abort ();
f3 (&s, &u);
f8 (&s, &t);
asm volatile ("" : : : "memory");
if (s.i != 72 || s.a != (28 & 0x71) || s.b != (29 & 0x72) || s.j != 73
|| s.c != (30 & 0x7f04) || s.d != (31 & 0x78)
|| s.e != (32 & 0x31) || s.f != (33 & 0x32)
|| s.g != (34 & 0x34) || s.k != 74 || s.h != 27)
__builtin_abort ();
f2 (&s, &v);
f9 (&s, &t);
asm volatile ("" : : : "memory");
if (s.i != 72 || s.a != (36 ^ 0x71) || s.b != (37 ^ 0x72) || s.j != 73
|| s.c != (38 ^ 0x7f04) || s.d != (39 ^ 0x78)
|| s.e != (40 ^ 0x31) || s.f != (41 ^ 0x32)
@ -154,4 +214,4 @@ main ()
return 0;
}
/* { dg-final { scan-tree-dump-times "Merging successful" 6 "store-merging" } } */
/* { dg-final { scan-tree-dump-times "Merging successful" 9 "store-merging" } } */

View File

@ -77,4 +77,4 @@ main (void)
return 0;
}
/* { dg-final { scan-tree-dump-times "Merging successful" 2 "store-merging" } } */
/* { dg-final { scan-tree-dump-times "Merging successful" 3 "store-merging" } } */