diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d21e519bc09..d79f2a139f4 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,21 @@ +2012-06-04 Richard Guenther + + * tree-data-ref.c (have_similar_memory_accesses_1): Remove. + (ref_base_address_1): Likewise. + (remove_similar_memory_refs): Likewise. + * tree-data-ref.h (remove_similar_memory_refs): Remove. + * tree-loop-distribution.c (classify_partition): Do not classify + as builtin if -ftree-loop-distribute-patterns is not enabled. + (fuse_partitions_with_similar_memory_accesses): Inline ... + (ldist_gen): ... here. Fuse all non-builtin partitions if + -ftree-loop-distribution is not enabled. Properly return + the number of created partitions. Do not update SSA form here + but ... + (tree_loop_distribution): ... once here for the whole function. + Only walk innermost loops, constrain loops we consider here + further. Do not call remove_similar_memory_refs. + (distribute_loop): Do not check number of loop nodes here. + 2012-06-04 Steven Bosscher * Makefile.in (GIMPLE_H): Do not depend on TARGET_H. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index cb5c8820323..69019bfba5a 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2012-06-04 Richard Guenther + + * gcc.dg/tree-ssa/ldist-11.c: Enable -ftree-loop-distribute-patterns. + * gcc.dg/tree-ssa/ldist-17.c: Likewise. + * gcc.dg/tree-ssa/ldist-pr45948.c: Likewise. + 2012-06-03 Alessandro Fanfarillo PR fortran/48831 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-11.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-11.c index 88651e7b72d..e55a1b64ecc 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-11.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-11.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-all" } */ +/* { dg-options "-O2 -ftree-loop-distribution -ftree-loop-distribute-patterns -fdump-tree-ldist-all" } */ void foo (int * __restrict__ ia, int * __restrict__ ib, diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-17.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-17.c index 6690b913770..fe40bed5811 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-17.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-17.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-details" } */ +/* { dg-options "-O2 -ftree-loop-distribution -ftree-loop-distribute-patterns -fdump-tree-ldist-details" } */ typedef int mad_fixed_t; struct mad_pcm diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-pr45948.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-pr45948.c index 593031c8bcf..da3c7b626f9 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-pr45948.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-pr45948.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-details" } */ +/* { dg-options "-O2 -ftree-loop-distribution -ftree-loop-distribute-patterns -fdump-tree-ldist-details" } */ extern void bar(int); diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index 45682462946..bbfc32154ef 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -5403,65 +5403,3 @@ have_similar_memory_accesses (gimple s1, gimple s2) VEC_free (data_ref_loc, heap, refs2); return res; } - -/* Helper function for the hashtab. */ - -static int -have_similar_memory_accesses_1 (const void *s1, const void *s2) -{ - return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1), - CONST_CAST_GIMPLE ((const_gimple) s2)); -} - -/* Helper function for the hashtab. */ - -static hashval_t -ref_base_address_1 (const void *s) -{ - gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s); - unsigned i; - VEC (data_ref_loc, heap) *refs; - data_ref_loc *ref; - hashval_t res = 0; - - get_references_in_stmt (stmt, &refs); - - FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref) - if (!ref->is_read) - { - res = htab_hash_pointer (ref_base_address (stmt, ref)); - break; - } - - VEC_free (data_ref_loc, heap, refs); - return res; -} - -/* Try to remove duplicated write data references from STMTS. */ - -void -remove_similar_memory_refs (VEC (gimple, heap) **stmts) -{ - unsigned i; - gimple stmt; - htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1, - have_similar_memory_accesses_1, NULL); - - for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); ) - { - void **slot; - - slot = htab_find_slot (seen, stmt, INSERT); - - if (*slot) - VEC_ordered_remove (gimple, *stmts, i); - else - { - *slot = (void *) stmt; - i++; - } - } - - htab_delete (seen); -} - diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h index efce845625f..da4802ef4f2 100644 --- a/gcc/tree-data-ref.h +++ b/gcc/tree-data-ref.h @@ -607,7 +607,6 @@ index_in_loop_nest (int var, VEC (loop_p, heap) *loop_nest) void stores_from_loop (struct loop *, VEC (gimple, heap) **); void stores_zero_from_loop (struct loop *, VEC (gimple, heap) **); -void remove_similar_memory_refs (VEC (gimple, heap) **); bool rdg_defs_used_in_other_loops_p (struct graph *, int); bool have_similar_memory_accesses (gimple, gimple); bool stmt_with_adjacent_zero_store_dr_p (gimple); diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c index 92464a6b1e8..1fc1d8d249b 100644 --- a/gcc/tree-loop-distribution.c +++ b/gcc/tree-loop-distribution.c @@ -398,7 +398,28 @@ destroy_loop (struct loop *loop) rescan_loop_exit (exit, false, true); for (i = 0; i < nbbs; i++) - delete_basic_block (bbs[i]); + { + /* We have made sure to not leave any dangling uses of SSA + names defined in the loop. With the exception of virtuals. + Make sure we replace all uses of virtual defs that will remain + outside of the loop with the bare symbol as delete_basic_block + will release them. */ + gimple_stmt_iterator gsi; + for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple phi = gsi_stmt (gsi); + if (!is_gimple_reg (gimple_phi_result (phi))) + mark_virtual_phi_result_for_renaming (phi); + } + for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple stmt = gsi_stmt (gsi); + tree vdef = gimple_vdef (stmt); + if (vdef && TREE_CODE (vdef) == SSA_NAME) + mark_virtual_operand_for_renaming (vdef); + } + delete_basic_block (bbs[i]); + } free (bbs); set_immediate_dominator (CDI_DOMINATORS, dest, @@ -801,6 +822,9 @@ classify_partition (loop_p loop, struct graph *rdg, partition_t partition) partition->kind = PKIND_NORMAL; partition->main_stmt = NULL; + if (!flag_tree_loop_distribute_patterns) + return; + /* Perform general partition disqualification for builtins. */ nb_iter = number_of_exit_cond_executions (loop); if (!nb_iter || nb_iter == chrec_dont_know) @@ -876,31 +900,6 @@ similar_memory_accesses (struct graph *rdg, partition_t partition1, return false; } -/* Fuse all the partitions from PARTITIONS that contain similar memory - references, i.e., we're taking care of cache locality. This - function does not fuse those partitions that contain patterns that - can be code generated with builtins. */ - -static void -fuse_partitions_with_similar_memory_accesses (struct graph *rdg, - VEC (partition_t, heap) **partitions) -{ - int p1, p2; - partition_t partition1, partition2; - - FOR_EACH_VEC_ELT (partition_t, *partitions, p1, partition1) - if (!partition_builtin_p (partition1)) - FOR_EACH_VEC_ELT (partition_t, *partitions, p2, partition2) - if (p1 != p2 - && !partition_builtin_p (partition2) - && similar_memory_accesses (rdg, partition1, partition2)) - { - bitmap_ior_into (partition1->stmts, partition2->stmts); - VEC_ordered_remove (partition_t, *partitions, p2); - p2--; - } -} - /* Aggregate several components into a useful partition that is registered in the PARTITIONS vector. Partitions will be distributed in different loops. */ @@ -1100,7 +1099,55 @@ ldist_gen (struct loop *loop, struct graph *rdg, FOR_EACH_VEC_ELT (partition_t, partitions, i, partition) classify_partition (loop, rdg, partition); - fuse_partitions_with_similar_memory_accesses (rdg, &partitions); + /* If we are only distributing patterns fuse all partitions that + were not properly classified as builtins. Else fuse partitions + with similar memory accesses. */ + if (!flag_tree_loop_distribution) + { + partition_t into; + for (i = 0; VEC_iterate (partition_t, partitions, i, into); ++i) + if (!partition_builtin_p (into)) + break; + for (++i; VEC_iterate (partition_t, partitions, i, partition); ++i) + if (!partition_builtin_p (partition)) + { + bitmap_ior_into (into->stmts, partition->stmts); + VEC_ordered_remove (partition_t, partitions, i); + i--; + } + } + else + { + partition_t into; + int j; + for (i = 0; VEC_iterate (partition_t, partitions, i, into); ++i) + { + if (partition_builtin_p (into)) + continue; + for (j = i + 1; + VEC_iterate (partition_t, partitions, j, partition); ++j) + { + if (!partition_builtin_p (partition) + /* ??? The following is horribly inefficient, + we are re-computing and analyzing data-references + of the stmts in the partitions all the time. */ + && similar_memory_accesses (rdg, into, partition)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "fusing partitions\n"); + dump_bitmap (dump_file, into->stmts); + dump_bitmap (dump_file, partition->stmts); + fprintf (dump_file, "because they have similar " + "memory accesses\n"); + } + bitmap_ior_into (into->stmts, partition->stmts); + VEC_ordered_remove (partition_t, partitions, j); + j--; + } + } + } + } nbp = VEC_length (partition_t, partitions); if (nbp == 0 @@ -1108,7 +1155,10 @@ ldist_gen (struct loop *loop, struct graph *rdg, && !partition_builtin_p (VEC_index (partition_t, partitions, 0))) || (nbp > 1 && partition_contains_all_rw (rdg, partitions))) - goto ldist_done; + { + nbp = 0; + goto ldist_done; + } if (dump_file && (dump_flags & TDF_DETAILS)) dump_rdg_partitions (dump_file, partitions); @@ -1116,10 +1166,6 @@ ldist_gen (struct loop *loop, struct graph *rdg, FOR_EACH_VEC_ELT (partition_t, partitions, i, partition) generate_code_for_partition (loop, partition, i < nbp - 1); - rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); - mark_sym_for_renaming (gimple_vop (cfun)); - update_ssa (TODO_update_ssa_only_virtuals); - ldist_done: BITMAP_FREE (remaining_stmts); @@ -1152,16 +1198,6 @@ distribute_loop (struct loop *loop, VEC (gimple, heap) *stmts) VEC (data_reference_p, heap) *datarefs; VEC (loop_p, heap) *loop_nest; - if (loop->num_nodes > 2) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, - "FIXME: Loop %d not distributed: it has more than two basic blocks.\n", - loop->num); - - return res; - } - datarefs = VEC_alloc (data_reference_p, heap, 10); dependence_relations = VEC_alloc (ddr_p, heap, 100); loop_nest = VEC_alloc (loop_p, heap, 3); @@ -1215,48 +1251,38 @@ tree_loop_distribution (void) { struct loop *loop; loop_iterator li; - int nb_generated_loops = 0; + bool changed = false; - FOR_EACH_LOOP (li, loop, 0) + /* We can at the moment only distribute non-nested loops, thus restrict + walking to innermost loops. */ + FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST) { VEC (gimple, heap) *work_list = NULL; int num = loop->num; + int nb_generated_loops = 0; /* If the loop doesn't have a single exit we will fail anyway, so do that early. */ if (!single_exit (loop)) continue; - /* If both flag_tree_loop_distribute_patterns and - flag_tree_loop_distribution are set, then only - distribute_patterns is executed. */ - if (flag_tree_loop_distribute_patterns) - { - /* With the following working list, we're asking - distribute_loop to separate from the rest of the loop the - stores of the form "A[i] = 0". */ - stores_zero_from_loop (loop, &work_list); + /* Only distribute loops with a header and latch for now. */ + if (loop->num_nodes > 2) + continue; - /* Do nothing if there are no patterns to be distributed. */ - if (VEC_length (gimple, work_list) > 0) - nb_generated_loops = distribute_loop (loop, work_list); - } - else if (flag_tree_loop_distribution) - { - /* With the following working list, we're asking - distribute_loop to separate the stores of the loop: when - dependences allow, it will end on having one store per - loop. */ - stores_from_loop (loop, &work_list); + /* -ftree-loop-distribution strictly distributes more but also + enables pattern detection. For now simply distribute all stores + or memset like stores. */ + if (flag_tree_loop_distribution) + stores_from_loop (loop, &work_list); + else if (flag_tree_loop_distribute_patterns) + stores_zero_from_loop (loop, &work_list); - /* A simple heuristic for cache locality is to not split - stores to the same array. Without this call, an unrolled - loop would be split into as many loops as unroll factor, - each loop storing in the same array. */ - remove_similar_memory_refs (&work_list); + if (VEC_length (gimple, work_list) > 0) + nb_generated_loops = distribute_loop (loop, work_list); - nb_generated_loops = distribute_loop (loop, work_list); - } + if (nb_generated_loops > 0) + changed = true; if (dump_file && (dump_flags & TDF_DETAILS)) { @@ -1267,13 +1293,19 @@ tree_loop_distribution (void) fprintf (dump_file, "Loop %d is the same.\n", num); } -#ifdef ENABLE_CHECKING - verify_loop_structure (); -#endif - VEC_free (gimple, heap, work_list); } + if (changed) + { + mark_sym_for_renaming (gimple_vop (cfun)); + rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); + } + +#ifdef ENABLE_CHECKING + verify_loop_structure (); +#endif + return 0; }