re PR tree-optimization/78821 (GCC7: Copying whole 32 bits structure field by field not optimised into copying whole 32 bits at once)

PR tree-optimization/78821
	* gimple-ssa-store-merging.c: Update the file comment.
	(MAX_STORE_ALIAS_CHECKS): Define.
	(struct store_operand_info): New type.
	(store_operand_info::store_operand_info): New constructor.
	(struct store_immediate_info): Add rhs_code and ops data members.
	(store_immediate_info::store_immediate_info): Add rhscode, op0r
	and op1r arguments to the ctor, initialize corresponding data members.
	(struct merged_store_group): Add load_align_base and load_align
	data members.
	(merged_store_group::merged_store_group): Initialize them.
	(merged_store_group::do_merge): Update them.
	(merged_store_group::apply_stores): Pick the constant for
	encode_tree_to_bitpos from one of the two operands, or skip
	encode_tree_to_bitpos if neither operand is a constant.
	(class pass_store_merging): Add process_store method decl.  Remove
	bool argument from terminate_all_aliasing_chains method decl.
	(pass_store_merging::terminate_all_aliasing_chains): Remove
	var_offset_p argument and corresponding handling.
	(stmts_may_clobber_ref_p): New function.
	(compatible_load_p): New function.
	(imm_store_chain_info::coalesce_immediate_stores): Terminate group
	if there is overlap and rhs_code is not INTEGER_CST.  For
	non-overlapping stores terminate group if rhs is not mergeable.
	(get_alias_type_for_stmts): Change first argument from
	auto_vec<gimple *> & to vec<gimple *> &.  Add IS_LOAD, CLIQUEP and
	BASEP arguments.  If IS_LOAD is true, look at rhs1 of the stmts
	instead of lhs.  Compute *CLIQUEP and *BASEP in addition to the
	alias type.
	(get_location_for_stmts): Change first argument from
	auto_vec<gimple *> & to vec<gimple *> &.
	(struct split_store): Remove orig_stmts data member, add orig_stores.
	(split_store::split_store): Create orig_stores rather than orig_stmts.
	(find_constituent_stmts): Renamed to ...
	(find_constituent_stores): ... this.  Change second argument from
	vec<gimple *> * to vec<store_immediate_info *> *, push pointers
	to info structures rather than the statements.
	(split_group): Rename ALLOW_UNALIGNED argument to
	ALLOW_UNALIGNED_STORE, add ALLOW_UNALIGNED_LOAD argument and handle
	it.  Adjust find_constituent_stores caller.
	(imm_store_chain_info::output_merged_store): Handle rhs_code other
	than INTEGER_CST, adjust split_group, get_alias_type_for_stmts and
	get_location_for_stmts callers.  Set MR_DEPENDENCE_CLIQUE and
	MR_DEPENDENCE_BASE on the MEM_REFs if they are the same in all stores.
	(mem_valid_for_store_merging): New function.
	(handled_load): New function.
	(pass_store_merging::process_store): New method.
	(pass_store_merging::execute): Use process_store method.  Adjust
	terminate_all_aliasing_chains caller.

	* gcc.dg/store_merging_13.c: New test.
	* gcc.dg/store_merging_14.c: New test.

From-SVN: r254391
This commit is contained in:
Jakub Jelinek 2017-11-03 20:08:25 +01:00 committed by Jakub Jelinek
parent 248b06ba7d
commit 245f6de13d
5 changed files with 1230 additions and 290 deletions

View File

@ -1,3 +1,55 @@
2017-11-03 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/78821
* gimple-ssa-store-merging.c: Update the file comment.
(MAX_STORE_ALIAS_CHECKS): Define.
(struct store_operand_info): New type.
(store_operand_info::store_operand_info): New constructor.
(struct store_immediate_info): Add rhs_code and ops data members.
(store_immediate_info::store_immediate_info): Add rhscode, op0r
and op1r arguments to the ctor, initialize corresponding data members.
(struct merged_store_group): Add load_align_base and load_align
data members.
(merged_store_group::merged_store_group): Initialize them.
(merged_store_group::do_merge): Update them.
(merged_store_group::apply_stores): Pick the constant for
encode_tree_to_bitpos from one of the two operands, or skip
encode_tree_to_bitpos if neither operand is a constant.
(class pass_store_merging): Add process_store method decl. Remove
bool argument from terminate_all_aliasing_chains method decl.
(pass_store_merging::terminate_all_aliasing_chains): Remove
var_offset_p argument and corresponding handling.
(stmts_may_clobber_ref_p): New function.
(compatible_load_p): New function.
(imm_store_chain_info::coalesce_immediate_stores): Terminate group
if there is overlap and rhs_code is not INTEGER_CST. For
non-overlapping stores terminate group if rhs is not mergeable.
(get_alias_type_for_stmts): Change first argument from
auto_vec<gimple *> & to vec<gimple *> &. Add IS_LOAD, CLIQUEP and
BASEP arguments. If IS_LOAD is true, look at rhs1 of the stmts
instead of lhs. Compute *CLIQUEP and *BASEP in addition to the
alias type.
(get_location_for_stmts): Change first argument from
auto_vec<gimple *> & to vec<gimple *> &.
(struct split_store): Remove orig_stmts data member, add orig_stores.
(split_store::split_store): Create orig_stores rather than orig_stmts.
(find_constituent_stmts): Renamed to ...
(find_constituent_stores): ... this. Change second argument from
vec<gimple *> * to vec<store_immediate_info *> *, push pointers
to info structures rather than the statements.
(split_group): Rename ALLOW_UNALIGNED argument to
ALLOW_UNALIGNED_STORE, add ALLOW_UNALIGNED_LOAD argument and handle
it. Adjust find_constituent_stores caller.
(imm_store_chain_info::output_merged_store): Handle rhs_code other
than INTEGER_CST, adjust split_group, get_alias_type_for_stmts and
get_location_for_stmts callers. Set MR_DEPENDENCE_CLIQUE and
MR_DEPENDENCE_BASE on the MEM_REFs if they are the same in all stores.
(mem_valid_for_store_merging): New function.
(handled_load): New function.
(pass_store_merging::process_store): New method.
(pass_store_merging::execute): Use process_store method. Adjust
terminate_all_aliasing_chains caller.
2017-11-03 Wilco Dijkstra <wdijkstr@arm.com>
* config/aarch64/aarch64.c (aarch64_legitimate_constant_p):

File diff suppressed because it is too large Load Diff

View File

@ -1,3 +1,9 @@
2017-11-03 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/78821
* gcc.dg/store_merging_13.c: New test.
* gcc.dg/store_merging_14.c: New test.
2017-11-03 Steven G. Kargl <kargl@gcc.gnu.org>
* gfortran.dg/large_real_kind_2.F90: Test passes on FreeBSD. Remove

View File

@ -0,0 +1,157 @@
/* { dg-do compile } */
/* { dg-require-effective-target store_merge } */
/* { dg-options "-O2 -fdump-tree-store-merging" } */
struct S { unsigned char a, b; unsigned short c; unsigned char d, e, f, g; unsigned long long h; };
__attribute__((noipa)) void
f1 (struct S *p)
{
p->a = 1;
p->b = 2;
p->c = 3;
p->d = 4;
p->e = 5;
p->f = 6;
p->g = 7;
}
__attribute__((noipa)) void
f2 (struct S *__restrict p, struct S *__restrict q)
{
p->a = q->a;
p->b = q->b;
p->c = q->c;
p->d = q->d;
p->e = q->e;
p->f = q->f;
p->g = q->g;
}
__attribute__((noipa)) void
f3 (struct S *p, struct S *q)
{
unsigned char pa = q->a;
unsigned char pb = q->b;
unsigned short pc = q->c;
unsigned char pd = q->d;
unsigned char pe = q->e;
unsigned char pf = q->f;
unsigned char pg = q->g;
p->a = pa;
p->b = pb;
p->c = pc;
p->d = pd;
p->e = pe;
p->f = pf;
p->g = pg;
}
__attribute__((noipa)) void
f4 (struct S *p, struct S *q)
{
unsigned char pa = p->a | q->a;
unsigned char pb = p->b | q->b;
unsigned short pc = p->c | q->c;
unsigned char pd = p->d | q->d;
unsigned char pe = p->e | q->e;
unsigned char pf = p->f | q->f;
unsigned char pg = p->g | q->g;
p->a = pa;
p->b = pb;
p->c = pc;
p->d = pd;
p->e = pe;
p->f = pf;
p->g = pg;
}
__attribute__((noipa)) void
f5 (struct S *p, struct S *q)
{
unsigned char pa = p->a & q->a;
unsigned char pb = p->b & q->b;
unsigned short pc = p->c & q->c;
unsigned char pd = p->d & q->d;
unsigned char pe = p->e & q->e;
unsigned char pf = p->f & q->f;
unsigned char pg = p->g & q->g;
p->a = pa;
p->b = pb;
p->c = pc;
p->d = pd;
p->e = pe;
p->f = pf;
p->g = pg;
}
__attribute__((noipa)) void
f6 (struct S *p, struct S *q)
{
unsigned char pa = p->a ^ q->a;
unsigned char pb = p->b ^ q->b;
unsigned short pc = p->c ^ q->c;
unsigned char pd = p->d ^ q->d;
unsigned char pe = p->e ^ q->e;
unsigned char pf = p->f ^ q->f;
unsigned char pg = p->g ^ q->g;
p->a = pa;
p->b = pb;
p->c = pc;
p->d = pd;
p->e = pe;
p->f = pf;
p->g = pg;
}
struct S s = { 20, 21, 22, 23, 24, 25, 26, 27 };
struct S t = { 0x71, 0x72, 0x7f04, 0x78, 0x31, 0x32, 0x34, 0xf1f2f3f4f5f6f7f8ULL };
struct S u = { 28, 29, 30, 31, 32, 33, 34, 35 };
struct S v = { 36, 37, 38, 39, 40, 41, 42, 43 };
int
main ()
{
asm volatile ("" : : : "memory");
f1 (&s);
asm volatile ("" : : : "memory");
if (s.a != 1 || s.b != 2 || s.c != 3 || s.d != 4
|| s.e != 5 || s.f != 6 || s.g != 7 || s.h != 27)
__builtin_abort ();
f2 (&s, &u);
asm volatile ("" : : : "memory");
if (s.a != 28 || s.b != 29 || s.c != 30 || s.d != 31
|| s.e != 32 || s.f != 33 || s.g != 34 || s.h != 27)
__builtin_abort ();
f3 (&s, &v);
asm volatile ("" : : : "memory");
if (s.a != 36 || s.b != 37 || s.c != 38 || s.d != 39
|| s.e != 40 || s.f != 41 || s.g != 42 || s.h != 27)
__builtin_abort ();
f4 (&s, &t);
asm volatile ("" : : : "memory");
if (s.a != (36 | 0x71) || s.b != (37 | 0x72)
|| s.c != (38 | 0x7f04) || s.d != (39 | 0x78)
|| s.e != (40 | 0x31) || s.f != (41 | 0x32)
|| s.g != (42 | 0x34) || s.h != 27)
__builtin_abort ();
f3 (&s, &u);
f5 (&s, &t);
asm volatile ("" : : : "memory");
if (s.a != (28 & 0x71) || s.b != (29 & 0x72)
|| s.c != (30 & 0x7f04) || s.d != (31 & 0x78)
|| s.e != (32 & 0x31) || s.f != (33 & 0x32)
|| s.g != (34 & 0x34) || s.h != 27)
__builtin_abort ();
f2 (&s, &v);
f6 (&s, &t);
asm volatile ("" : : : "memory");
if (s.a != (36 ^ 0x71) || s.b != (37 ^ 0x72)
|| s.c != (38 ^ 0x7f04) || s.d != (39 ^ 0x78)
|| s.e != (40 ^ 0x31) || s.f != (41 ^ 0x32)
|| s.g != (42 ^ 0x34) || s.h != 27)
__builtin_abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "Merging successful" 6 "store-merging" } } */

View File

@ -0,0 +1,157 @@
/* { dg-do compile } */
/* { dg-require-effective-target store_merge } */
/* { dg-options "-O2 -fdump-tree-store-merging" } */
struct S { unsigned int i : 8, a : 7, b : 7, j : 10, c : 15, d : 7, e : 10, f : 7, g : 9, k : 16; unsigned long long h; };
__attribute__((noipa)) void
f1 (struct S *p)
{
p->a = 1;
p->b = 2;
p->c = 3;
p->d = 4;
p->e = 5;
p->f = 6;
p->g = 7;
}
__attribute__((noipa)) void
f2 (struct S *__restrict p, struct S *__restrict q)
{
p->a = q->a;
p->b = q->b;
p->c = q->c;
p->d = q->d;
p->e = q->e;
p->f = q->f;
p->g = q->g;
}
__attribute__((noipa)) void
f3 (struct S *p, struct S *q)
{
unsigned char pa = q->a;
unsigned char pb = q->b;
unsigned short pc = q->c;
unsigned char pd = q->d;
unsigned short pe = q->e;
unsigned char pf = q->f;
unsigned short pg = q->g;
p->a = pa;
p->b = pb;
p->c = pc;
p->d = pd;
p->e = pe;
p->f = pf;
p->g = pg;
}
__attribute__((noipa)) void
f4 (struct S *p, struct S *q)
{
unsigned char pa = p->a | q->a;
unsigned char pb = p->b | q->b;
unsigned short pc = p->c | q->c;
unsigned char pd = p->d | q->d;
unsigned short pe = p->e | q->e;
unsigned char pf = p->f | q->f;
unsigned short pg = p->g | q->g;
p->a = pa;
p->b = pb;
p->c = pc;
p->d = pd;
p->e = pe;
p->f = pf;
p->g = pg;
}
__attribute__((noipa)) void
f5 (struct S *p, struct S *q)
{
unsigned char pa = p->a & q->a;
unsigned char pb = p->b & q->b;
unsigned short pc = p->c & q->c;
unsigned char pd = p->d & q->d;
unsigned short pe = p->e & q->e;
unsigned char pf = p->f & q->f;
unsigned short pg = p->g & q->g;
p->a = pa;
p->b = pb;
p->c = pc;
p->d = pd;
p->e = pe;
p->f = pf;
p->g = pg;
}
__attribute__((noipa)) void
f6 (struct S *p, struct S *q)
{
unsigned char pa = p->a ^ q->a;
unsigned char pb = p->b ^ q->b;
unsigned short pc = p->c ^ q->c;
unsigned char pd = p->d ^ q->d;
unsigned short pe = p->e ^ q->e;
unsigned char pf = p->f ^ q->f;
unsigned short pg = p->g ^ q->g;
p->a = pa;
p->b = pb;
p->c = pc;
p->d = pd;
p->e = pe;
p->f = pf;
p->g = pg;
}
struct S s = { 72, 20, 21, 73, 22, 23, 24, 25, 26, 74, 27 };
struct S t = { 75, 0x71, 0x72, 76, 0x7f04, 0x78, 0x31, 0x32, 0x34, 77, 0xf1f2f3f4f5f6f7f8ULL };
struct S u = { 78, 28, 29, 79, 30, 31, 32, 33, 34, 80, 35 };
struct S v = { 81, 36, 37, 82, 38, 39, 40, 41, 42, 83, 43 };
int
main ()
{
asm volatile ("" : : : "memory");
f1 (&s);
asm volatile ("" : : : "memory");
if (s.i != 72 || s.a != 1 || s.b != 2 || s.j != 73 || s.c != 3 || s.d != 4
|| s.e != 5 || s.f != 6 || s.g != 7 || s.k != 74 || s.h != 27)
__builtin_abort ();
f2 (&s, &u);
asm volatile ("" : : : "memory");
if (s.i != 72 || s.a != 28 || s.b != 29 || s.j != 73 || s.c != 30 || s.d != 31
|| s.e != 32 || s.f != 33 || s.g != 34 || s.k != 74 || s.h != 27)
__builtin_abort ();
f3 (&s, &v);
asm volatile ("" : : : "memory");
if (s.i != 72 || s.a != 36 || s.b != 37 || s.j != 73 || s.c != 38 || s.d != 39
|| s.e != 40 || s.f != 41 || s.g != 42 || s.k != 74 || s.h != 27)
__builtin_abort ();
f4 (&s, &t);
asm volatile ("" : : : "memory");
if (s.i != 72 || s.a != (36 | 0x71) || s.b != (37 | 0x72) || s.j != 73
|| s.c != (38 | 0x7f04) || s.d != (39 | 0x78)
|| s.e != (40 | 0x31) || s.f != (41 | 0x32)
|| s.g != (42 | 0x34) || s.k != 74 || s.h != 27)
__builtin_abort ();
f3 (&s, &u);
f5 (&s, &t);
asm volatile ("" : : : "memory");
if (s.i != 72 || s.a != (28 & 0x71) || s.b != (29 & 0x72) || s.j != 73
|| s.c != (30 & 0x7f04) || s.d != (31 & 0x78)
|| s.e != (32 & 0x31) || s.f != (33 & 0x32)
|| s.g != (34 & 0x34) || s.k != 74 || s.h != 27)
__builtin_abort ();
f2 (&s, &v);
f6 (&s, &t);
asm volatile ("" : : : "memory");
if (s.i != 72 || s.a != (36 ^ 0x71) || s.b != (37 ^ 0x72) || s.j != 73
|| s.c != (38 ^ 0x7f04) || s.d != (39 ^ 0x78)
|| s.e != (40 ^ 0x31) || s.f != (41 ^ 0x32)
|| s.g != (42 ^ 0x34) || s.k != 74 || s.h != 27)
__builtin_abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "Merging successful" 6 "store-merging" } } */