tree-loop-distribution.c (classify_partition): Return whether a reduction appeared in all partitions and do not stop builtin...

2019-06-14  Richard Biener  <rguenther@suse.de>

	* tree-loop-distribution.c (classify_partition): Return
	whether a reduction appeared in all partitions and do not
	stop builtin detection because of this.
	(distribute_loop): Sort a non-builtin partition last if
	there's a reduction in all partitions and make sure the
	partition prevailing as last is not a builtin.

	* gcc.dg/tree-ssa/ldist-26.c: Adjust.

From-SVN: r272284
This commit is contained in:
Richard Biener 2019-06-14 11:29:44 +00:00 committed by Richard Biener
parent 46771da574
commit e748435795
4 changed files with 69 additions and 18 deletions

View File

@ -1,3 +1,12 @@
2019-06-14 Richard Biener <rguenther@suse.de>
* tree-loop-distribution.c (classify_partition): Return
whether a reduction appeared in all partitions and do not
stop builtin detection because of this.
(distribute_loop): Sort a non-builtin partition last if
there's a reduction in all partitions and make sure the
partition prevailing as last is not a builtin.
2019-06-14 Feng Xue <fxue@os.amperecomputing.com>
PR ipa/90401

View File

@ -1,3 +1,7 @@
2019-06-14 Richard Biener <rguenther@suse.de>
* gcc.dg/tree-ssa/ldist-26.c: Adjust.
2019-06-14 Feng Xue <fxue@os.amperecomputing.com>
PR ipa/90401

View File

@ -31,6 +31,8 @@ int main()
return 0;
}
/* { dg-final { scan-tree-dump "distributed: split to 2 loops and 0 library calls" "ldist" } } */
/* { dg-final { scan-tree-dump "distributed: split to 1 loops and 1 library calls" "ldist" } } */
/* { dg-final { scan-tree-dump "generated memset zero" "ldist" } } */
/* Loop splitting splits the iteration space so we end up with two
loops entering loop distribution. Both should have the b[i] = 0
part split out as memset. */
/* { dg-final { scan-tree-dump-times "distributed: split to 1 loops and 1 library calls" 2 "ldist" } } */
/* { dg-final { scan-tree-dump-times "generated memset zero" 2 "ldist" } } */

View File

@ -1658,9 +1658,11 @@ classify_builtin_ldst (loop_p loop, struct graph *rdg, partition *partition,
/* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
For the moment we detect memset, memcpy and memmove patterns. Bitmap
STMT_IN_ALL_PARTITIONS contains statements belonging to all partitions. */
STMT_IN_ALL_PARTITIONS contains statements belonging to all partitions.
Returns true if there is a reduction in all partitions and we
possibly did not mark PARTITION as having one for this reason. */
static void
static bool
classify_partition (loop_p loop, struct graph *rdg, partition *partition,
bitmap stmt_in_all_partitions)
{
@ -1688,25 +1690,27 @@ classify_partition (loop_p loop, struct graph *rdg, partition *partition,
to all partitions. In such case, reduction will be computed
correctly no matter how partitions are fused/distributed. */
if (!bitmap_bit_p (stmt_in_all_partitions, i))
{
partition->reduction_p = true;
return;
}
has_reduction = true;
partition->reduction_p = true;
else
has_reduction = true;
}
}
/* Simple workaround to prevent classifying the partition as builtin
if it contains any use outside of loop. For the case where all
partitions have the reduction this simple workaround is delayed
to only affect the last partition. */
if (partition->reduction_p)
return has_reduction;
/* Perform general partition disqualification for builtins. */
if (volatiles_p
/* Simple workaround to prevent classifying the partition as builtin
if it contains any use outside of loop. */
|| has_reduction
|| !flag_tree_loop_distribute_patterns)
return;
return has_reduction;
/* Find single load/store data references for builtin partition. */
if (!find_single_drs (loop, rdg, partition, &single_st, &single_ld))
return;
return has_reduction;
partition->loc = gimple_location (DR_STMT (single_st));
@ -1715,6 +1719,7 @@ classify_partition (loop_p loop, struct graph *rdg, partition *partition,
classify_builtin_st (loop, partition, single_st);
else
classify_builtin_ldst (loop, rdg, partition, single_st, single_ld);
return has_reduction;
}
/* Returns true when PARTITION1 and PARTITION2 access the same memory
@ -2782,7 +2787,6 @@ distribute_loop (struct loop *loop, vec<gimple *> stmts,
ddrs_table = new hash_table<ddr_hasher> (389);
struct graph *rdg;
partition *partition;
bool any_builtin;
int i, nbp;
*destroy_p = false;
@ -2842,10 +2846,12 @@ distribute_loop (struct loop *loop, vec<gimple *> stmts,
for (i = 1; partitions.iterate (i, &partition); ++i)
bitmap_and_into (stmt_in_all_partitions, partitions[i]->stmts);
any_builtin = false;
bool any_builtin = false;
bool reduction_in_all = false;
FOR_EACH_VEC_ELT (partitions, i, partition)
{
classify_partition (loop, rdg, partition, stmt_in_all_partitions);
reduction_in_all
|= classify_partition (loop, rdg, partition, stmt_in_all_partitions);
any_builtin |= partition_builtin_p (partition);
}
@ -2920,6 +2926,21 @@ distribute_loop (struct loop *loop, vec<gimple *> stmts,
i--;
}
/* Put a non-builtin partition last if we need to preserve a reduction.
??? This is a workaround that makes sort_partitions_by_post_order do
the correct thing while in reality it should sort each component
separately and then put the component with a reduction or a non-builtin
last. */
if (reduction_in_all
&& partition_builtin_p (partitions.last()))
FOR_EACH_VEC_ELT (partitions, i, partition)
if (!partition_builtin_p (partition))
{
partitions.unordered_remove (i);
partitions.quick_push (partition);
break;
}
/* Build the partition dependency graph and fuse partitions in strong
connected component. */
if (partitions.length () > 1)
@ -2940,6 +2961,21 @@ distribute_loop (struct loop *loop, vec<gimple *> stmts,
finalize_partitions (loop, &partitions, &alias_ddrs);
/* If there is a reduction in all partitions make sure the last one
is not classified for builtin code generation. */
if (reduction_in_all)
{
partition = partitions.last ();
if (only_patterns_p
&& partition_builtin_p (partition)
&& !partition_builtin_p (partitions[0]))
{
nbp = 0;
goto ldist_done;
}
partition->kind = PKIND_NORMAL;
}
nbp = partitions.length ();
if (nbp == 0
|| (nbp == 1 && !partition_builtin_p (partitions[0]))