tree-data-ref.c (have_similar_memory_accesses_1): Remove.
2012-06-04 Richard Guenther <rguenther@suse.de> * tree-data-ref.c (have_similar_memory_accesses_1): Remove. (ref_base_address_1): Likewise. (remove_similar_memory_refs): Likewise. * tree-data-ref.h (remove_similar_memory_refs): Remove. * tree-loop-distribution.c (classify_partition): Do not classify as builtin if -ftree-loop-distribute-patterns is not enabled. (fuse_partitions_with_similar_memory_accesses): Inline ... (ldist_gen): ... here. Fuse all non-builtin partitions if -ftree-loop-distribution is not enabled. Properly return the number of created partitions. Do not update SSA form here but ... (tree_loop_distribution): ... once here for the whole function. Only walk innermost loops, constrain loops we consider here further. Do not call remove_similar_memory_refs. (distribute_loop): Do not check number of loop nodes here. * gcc.dg/tree-ssa/ldist-11.c: Enable -ftree-loop-distribute-patterns. * gcc.dg/tree-ssa/ldist-17.c: Likewise. * gcc.dg/tree-ssa/ldist-pr45948.c: Likewise. From-SVN: r188168
This commit is contained in:
parent
296f202e8e
commit
c014f6f54b
|
@ -1,3 +1,21 @@
|
|||
2012-06-04 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
* tree-data-ref.c (have_similar_memory_accesses_1): Remove.
|
||||
(ref_base_address_1): Likewise.
|
||||
(remove_similar_memory_refs): Likewise.
|
||||
* tree-data-ref.h (remove_similar_memory_refs): Remove.
|
||||
* tree-loop-distribution.c (classify_partition): Do not classify
|
||||
as builtin if -ftree-loop-distribute-patterns is not enabled.
|
||||
(fuse_partitions_with_similar_memory_accesses): Inline ...
|
||||
(ldist_gen): ... here. Fuse all non-builtin partitions if
|
||||
-ftree-loop-distribution is not enabled. Properly return
|
||||
the number of created partitions. Do not update SSA form here
|
||||
but ...
|
||||
(tree_loop_distribution): ... once here for the whole function.
|
||||
Only walk innermost loops, constrain loops we consider here
|
||||
further. Do not call remove_similar_memory_refs.
|
||||
(distribute_loop): Do not check number of loop nodes here.
|
||||
|
||||
2012-06-04 Steven Bosscher <steven@gcc.gnu.org>
|
||||
|
||||
* Makefile.in (GIMPLE_H): Do not depend on TARGET_H.
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
2012-06-04 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
* gcc.dg/tree-ssa/ldist-11.c: Enable -ftree-loop-distribute-patterns.
|
||||
* gcc.dg/tree-ssa/ldist-17.c: Likewise.
|
||||
* gcc.dg/tree-ssa/ldist-pr45948.c: Likewise.
|
||||
|
||||
2012-06-03 Alessandro Fanfarillo <fanfarillo.gcc@gmail.com>
|
||||
|
||||
PR fortran/48831
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-all" } */
|
||||
/* { dg-options "-O2 -ftree-loop-distribution -ftree-loop-distribute-patterns -fdump-tree-ldist-all" } */
|
||||
|
||||
void foo (int * __restrict__ ia,
|
||||
int * __restrict__ ib,
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-details" } */
|
||||
/* { dg-options "-O2 -ftree-loop-distribution -ftree-loop-distribute-patterns -fdump-tree-ldist-details" } */
|
||||
|
||||
typedef int mad_fixed_t;
|
||||
struct mad_pcm
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-details" } */
|
||||
/* { dg-options "-O2 -ftree-loop-distribution -ftree-loop-distribute-patterns -fdump-tree-ldist-details" } */
|
||||
|
||||
extern void bar(int);
|
||||
|
||||
|
|
|
@ -5403,65 +5403,3 @@ have_similar_memory_accesses (gimple s1, gimple s2)
|
|||
VEC_free (data_ref_loc, heap, refs2);
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Helper function for the hashtab. */
|
||||
|
||||
static int
|
||||
have_similar_memory_accesses_1 (const void *s1, const void *s2)
|
||||
{
|
||||
return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
|
||||
CONST_CAST_GIMPLE ((const_gimple) s2));
|
||||
}
|
||||
|
||||
/* Helper function for the hashtab. */
|
||||
|
||||
static hashval_t
|
||||
ref_base_address_1 (const void *s)
|
||||
{
|
||||
gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
|
||||
unsigned i;
|
||||
VEC (data_ref_loc, heap) *refs;
|
||||
data_ref_loc *ref;
|
||||
hashval_t res = 0;
|
||||
|
||||
get_references_in_stmt (stmt, &refs);
|
||||
|
||||
FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
|
||||
if (!ref->is_read)
|
||||
{
|
||||
res = htab_hash_pointer (ref_base_address (stmt, ref));
|
||||
break;
|
||||
}
|
||||
|
||||
VEC_free (data_ref_loc, heap, refs);
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Try to remove duplicated write data references from STMTS. */
|
||||
|
||||
void
|
||||
remove_similar_memory_refs (VEC (gimple, heap) **stmts)
|
||||
{
|
||||
unsigned i;
|
||||
gimple stmt;
|
||||
htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
|
||||
have_similar_memory_accesses_1, NULL);
|
||||
|
||||
for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
|
||||
{
|
||||
void **slot;
|
||||
|
||||
slot = htab_find_slot (seen, stmt, INSERT);
|
||||
|
||||
if (*slot)
|
||||
VEC_ordered_remove (gimple, *stmts, i);
|
||||
else
|
||||
{
|
||||
*slot = (void *) stmt;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
htab_delete (seen);
|
||||
}
|
||||
|
||||
|
|
|
@ -607,7 +607,6 @@ index_in_loop_nest (int var, VEC (loop_p, heap) *loop_nest)
|
|||
|
||||
void stores_from_loop (struct loop *, VEC (gimple, heap) **);
|
||||
void stores_zero_from_loop (struct loop *, VEC (gimple, heap) **);
|
||||
void remove_similar_memory_refs (VEC (gimple, heap) **);
|
||||
bool rdg_defs_used_in_other_loops_p (struct graph *, int);
|
||||
bool have_similar_memory_accesses (gimple, gimple);
|
||||
bool stmt_with_adjacent_zero_store_dr_p (gimple);
|
||||
|
|
|
@ -398,7 +398,28 @@ destroy_loop (struct loop *loop)
|
|||
rescan_loop_exit (exit, false, true);
|
||||
|
||||
for (i = 0; i < nbbs; i++)
|
||||
{
|
||||
/* We have made sure to not leave any dangling uses of SSA
|
||||
names defined in the loop. With the exception of virtuals.
|
||||
Make sure we replace all uses of virtual defs that will remain
|
||||
outside of the loop with the bare symbol as delete_basic_block
|
||||
will release them. */
|
||||
gimple_stmt_iterator gsi;
|
||||
for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
|
||||
{
|
||||
gimple phi = gsi_stmt (gsi);
|
||||
if (!is_gimple_reg (gimple_phi_result (phi)))
|
||||
mark_virtual_phi_result_for_renaming (phi);
|
||||
}
|
||||
for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
|
||||
{
|
||||
gimple stmt = gsi_stmt (gsi);
|
||||
tree vdef = gimple_vdef (stmt);
|
||||
if (vdef && TREE_CODE (vdef) == SSA_NAME)
|
||||
mark_virtual_operand_for_renaming (vdef);
|
||||
}
|
||||
delete_basic_block (bbs[i]);
|
||||
}
|
||||
free (bbs);
|
||||
|
||||
set_immediate_dominator (CDI_DOMINATORS, dest,
|
||||
|
@ -801,6 +822,9 @@ classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
|
|||
partition->kind = PKIND_NORMAL;
|
||||
partition->main_stmt = NULL;
|
||||
|
||||
if (!flag_tree_loop_distribute_patterns)
|
||||
return;
|
||||
|
||||
/* Perform general partition disqualification for builtins. */
|
||||
nb_iter = number_of_exit_cond_executions (loop);
|
||||
if (!nb_iter || nb_iter == chrec_dont_know)
|
||||
|
@ -876,31 +900,6 @@ similar_memory_accesses (struct graph *rdg, partition_t partition1,
|
|||
return false;
|
||||
}
|
||||
|
||||
/* Fuse all the partitions from PARTITIONS that contain similar memory
|
||||
references, i.e., we're taking care of cache locality. This
|
||||
function does not fuse those partitions that contain patterns that
|
||||
can be code generated with builtins. */
|
||||
|
||||
static void
|
||||
fuse_partitions_with_similar_memory_accesses (struct graph *rdg,
|
||||
VEC (partition_t, heap) **partitions)
|
||||
{
|
||||
int p1, p2;
|
||||
partition_t partition1, partition2;
|
||||
|
||||
FOR_EACH_VEC_ELT (partition_t, *partitions, p1, partition1)
|
||||
if (!partition_builtin_p (partition1))
|
||||
FOR_EACH_VEC_ELT (partition_t, *partitions, p2, partition2)
|
||||
if (p1 != p2
|
||||
&& !partition_builtin_p (partition2)
|
||||
&& similar_memory_accesses (rdg, partition1, partition2))
|
||||
{
|
||||
bitmap_ior_into (partition1->stmts, partition2->stmts);
|
||||
VEC_ordered_remove (partition_t, *partitions, p2);
|
||||
p2--;
|
||||
}
|
||||
}
|
||||
|
||||
/* Aggregate several components into a useful partition that is
|
||||
registered in the PARTITIONS vector. Partitions will be
|
||||
distributed in different loops. */
|
||||
|
@ -1100,7 +1099,55 @@ ldist_gen (struct loop *loop, struct graph *rdg,
|
|||
FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
|
||||
classify_partition (loop, rdg, partition);
|
||||
|
||||
fuse_partitions_with_similar_memory_accesses (rdg, &partitions);
|
||||
/* If we are only distributing patterns fuse all partitions that
|
||||
were not properly classified as builtins. Else fuse partitions
|
||||
with similar memory accesses. */
|
||||
if (!flag_tree_loop_distribution)
|
||||
{
|
||||
partition_t into;
|
||||
for (i = 0; VEC_iterate (partition_t, partitions, i, into); ++i)
|
||||
if (!partition_builtin_p (into))
|
||||
break;
|
||||
for (++i; VEC_iterate (partition_t, partitions, i, partition); ++i)
|
||||
if (!partition_builtin_p (partition))
|
||||
{
|
||||
bitmap_ior_into (into->stmts, partition->stmts);
|
||||
VEC_ordered_remove (partition_t, partitions, i);
|
||||
i--;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
partition_t into;
|
||||
int j;
|
||||
for (i = 0; VEC_iterate (partition_t, partitions, i, into); ++i)
|
||||
{
|
||||
if (partition_builtin_p (into))
|
||||
continue;
|
||||
for (j = i + 1;
|
||||
VEC_iterate (partition_t, partitions, j, partition); ++j)
|
||||
{
|
||||
if (!partition_builtin_p (partition)
|
||||
/* ??? The following is horribly inefficient,
|
||||
we are re-computing and analyzing data-references
|
||||
of the stmts in the partitions all the time. */
|
||||
&& similar_memory_accesses (rdg, into, partition))
|
||||
{
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
fprintf (dump_file, "fusing partitions\n");
|
||||
dump_bitmap (dump_file, into->stmts);
|
||||
dump_bitmap (dump_file, partition->stmts);
|
||||
fprintf (dump_file, "because they have similar "
|
||||
"memory accesses\n");
|
||||
}
|
||||
bitmap_ior_into (into->stmts, partition->stmts);
|
||||
VEC_ordered_remove (partition_t, partitions, j);
|
||||
j--;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
nbp = VEC_length (partition_t, partitions);
|
||||
if (nbp == 0
|
||||
|
@ -1108,7 +1155,10 @@ ldist_gen (struct loop *loop, struct graph *rdg,
|
|||
&& !partition_builtin_p (VEC_index (partition_t, partitions, 0)))
|
||||
|| (nbp > 1
|
||||
&& partition_contains_all_rw (rdg, partitions)))
|
||||
{
|
||||
nbp = 0;
|
||||
goto ldist_done;
|
||||
}
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
dump_rdg_partitions (dump_file, partitions);
|
||||
|
@ -1116,10 +1166,6 @@ ldist_gen (struct loop *loop, struct graph *rdg,
|
|||
FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
|
||||
generate_code_for_partition (loop, partition, i < nbp - 1);
|
||||
|
||||
rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
|
||||
mark_sym_for_renaming (gimple_vop (cfun));
|
||||
update_ssa (TODO_update_ssa_only_virtuals);
|
||||
|
||||
ldist_done:
|
||||
|
||||
BITMAP_FREE (remaining_stmts);
|
||||
|
@ -1152,16 +1198,6 @@ distribute_loop (struct loop *loop, VEC (gimple, heap) *stmts)
|
|||
VEC (data_reference_p, heap) *datarefs;
|
||||
VEC (loop_p, heap) *loop_nest;
|
||||
|
||||
if (loop->num_nodes > 2)
|
||||
{
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
fprintf (dump_file,
|
||||
"FIXME: Loop %d not distributed: it has more than two basic blocks.\n",
|
||||
loop->num);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
datarefs = VEC_alloc (data_reference_p, heap, 10);
|
||||
dependence_relations = VEC_alloc (ddr_p, heap, 100);
|
||||
loop_nest = VEC_alloc (loop_p, heap, 3);
|
||||
|
@ -1215,48 +1251,38 @@ tree_loop_distribution (void)
|
|||
{
|
||||
struct loop *loop;
|
||||
loop_iterator li;
|
||||
int nb_generated_loops = 0;
|
||||
bool changed = false;
|
||||
|
||||
FOR_EACH_LOOP (li, loop, 0)
|
||||
/* We can at the moment only distribute non-nested loops, thus restrict
|
||||
walking to innermost loops. */
|
||||
FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
|
||||
{
|
||||
VEC (gimple, heap) *work_list = NULL;
|
||||
int num = loop->num;
|
||||
int nb_generated_loops = 0;
|
||||
|
||||
/* If the loop doesn't have a single exit we will fail anyway,
|
||||
so do that early. */
|
||||
if (!single_exit (loop))
|
||||
continue;
|
||||
|
||||
/* If both flag_tree_loop_distribute_patterns and
|
||||
flag_tree_loop_distribution are set, then only
|
||||
distribute_patterns is executed. */
|
||||
if (flag_tree_loop_distribute_patterns)
|
||||
{
|
||||
/* With the following working list, we're asking
|
||||
distribute_loop to separate from the rest of the loop the
|
||||
stores of the form "A[i] = 0". */
|
||||
/* Only distribute loops with a header and latch for now. */
|
||||
if (loop->num_nodes > 2)
|
||||
continue;
|
||||
|
||||
/* -ftree-loop-distribution strictly distributes more but also
|
||||
enables pattern detection. For now simply distribute all stores
|
||||
or memset like stores. */
|
||||
if (flag_tree_loop_distribution)
|
||||
stores_from_loop (loop, &work_list);
|
||||
else if (flag_tree_loop_distribute_patterns)
|
||||
stores_zero_from_loop (loop, &work_list);
|
||||
|
||||
/* Do nothing if there are no patterns to be distributed. */
|
||||
if (VEC_length (gimple, work_list) > 0)
|
||||
nb_generated_loops = distribute_loop (loop, work_list);
|
||||
}
|
||||
else if (flag_tree_loop_distribution)
|
||||
{
|
||||
/* With the following working list, we're asking
|
||||
distribute_loop to separate the stores of the loop: when
|
||||
dependences allow, it will end on having one store per
|
||||
loop. */
|
||||
stores_from_loop (loop, &work_list);
|
||||
|
||||
/* A simple heuristic for cache locality is to not split
|
||||
stores to the same array. Without this call, an unrolled
|
||||
loop would be split into as many loops as unroll factor,
|
||||
each loop storing in the same array. */
|
||||
remove_similar_memory_refs (&work_list);
|
||||
|
||||
nb_generated_loops = distribute_loop (loop, work_list);
|
||||
}
|
||||
if (nb_generated_loops > 0)
|
||||
changed = true;
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
|
@ -1267,13 +1293,19 @@ tree_loop_distribution (void)
|
|||
fprintf (dump_file, "Loop %d is the same.\n", num);
|
||||
}
|
||||
|
||||
VEC_free (gimple, heap, work_list);
|
||||
}
|
||||
|
||||
if (changed)
|
||||
{
|
||||
mark_sym_for_renaming (gimple_vop (cfun));
|
||||
rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
|
||||
}
|
||||
|
||||
#ifdef ENABLE_CHECKING
|
||||
verify_loop_structure ();
|
||||
#endif
|
||||
|
||||
VEC_free (gimple, heap, work_list);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue