tree-parloops.c: New file.
* tree-parloops.c: New file. * tree-ssa-operands.h (free_stmt_operands): Declare. * tree-ssa-loop-manip.c (split_loop_exit_edge): Return the new basic block. * tree-pass.h (pass_parallelize_loops): Declare. * omp-low.c (expand_omp_parallel, expand_omp_for): Update SSA form for virtual operands. (build_omp_regions_1): Allow analysing just a single OMP region and its subregions. ( build_omp_regions_root, omp_expand_local): New functions. (build_omp_regions): Add argument to build_omp_regions_1 call. * builtins.def (DEF_GOMP_BUILTIN): Initialize OMP builtins when autoparallelization is run. * timevar.def (TV_TREE_PARALLELIZE_LOOPS): New. * tree-ssa-loop.c (gate_tree_parallelize_loops, tree_parallelize_loops, pass_parallelize_loops): New. * common.opt (ftree-parallelize-loops): New. * tree-flow.h (omp_expand_local, tree_duplicate_sese_tail, parallelize_loops): Declare. (add_phi_args_after_copy, split_loop_exit_edge): Declaration changed. * Makefile.in (tree-parloops.o): Added. * tree-cfg.c (add_phi_args_after_copy_edge, tree_duplicate_sese_tail): New functions. (add_phi_args_after_copy_bb): Use add_phi_args_after_copy_edge. (add_phi_args_after_copy): Call add_phi_args_after_copy_edge for one extra edge as well. (tree_duplicate_sese_region): Add argument to add_phi_args_after_copy. Use VEC_free to free doms vector. (move_block_to_fn): Update loop info. Remove phi nodes for virtual operands. Recompute operand caches in the new function. (move_sese_region_to_fn): Update loop info. * passes.c (init_optimization_passes): Add pass_parallelize_loops. * tree-ssa-operands.c (free_stmt_operands): New function. * doc/passes.texi: Document autoparallelization. * doc/invoke.texi (-ftree-parallelize-loops): New option. * gcc.dg/tree-ssa/parallelization-1.c: New test. From-SVN: r128517
This commit is contained in:
parent
2ae88ecd92
commit
5f40b3cbe2
|
@ -1,3 +1,42 @@
|
|||
2007-09-15 Zdenek Dvorak <ook@ucw.cz>
|
||||
|
||||
* tree-parloops.c: New file.
|
||||
* tree-ssa-operands.h (free_stmt_operands): Declare.
|
||||
* tree-ssa-loop-manip.c (split_loop_exit_edge): Return the new basic
|
||||
block.
|
||||
* tree-pass.h (pass_parallelize_loops): Declare.
|
||||
* omp-low.c (expand_omp_parallel, expand_omp_for): Update SSA form for
|
||||
virtual operands.
|
||||
(build_omp_regions_1): Allow analysing just a single OMP region and
|
||||
its subregions.
|
||||
( build_omp_regions_root, omp_expand_local): New functions.
|
||||
(build_omp_regions): Add argument to build_omp_regions_1 call.
|
||||
* builtins.def (DEF_GOMP_BUILTIN): Initialize OMP builtins when
|
||||
autoparallelization is run.
|
||||
* timevar.def (TV_TREE_PARALLELIZE_LOOPS): New.
|
||||
* tree-ssa-loop.c (gate_tree_parallelize_loops, tree_parallelize_loops,
|
||||
pass_parallelize_loops): New.
|
||||
* common.opt (ftree-parallelize-loops): New.
|
||||
* tree-flow.h (omp_expand_local, tree_duplicate_sese_tail,
|
||||
parallelize_loops): Declare.
|
||||
(add_phi_args_after_copy, split_loop_exit_edge): Declaration changed.
|
||||
* Makefile.in (tree-parloops.o): Added.
|
||||
* tree-cfg.c (add_phi_args_after_copy_edge, tree_duplicate_sese_tail):
|
||||
New functions.
|
||||
(add_phi_args_after_copy_bb): Use add_phi_args_after_copy_edge.
|
||||
(add_phi_args_after_copy): Call add_phi_args_after_copy_edge for
|
||||
one extra edge as well.
|
||||
(tree_duplicate_sese_region): Add argument to add_phi_args_after_copy.
|
||||
Use VEC_free to free doms vector.
|
||||
(move_block_to_fn): Update loop info. Remove phi nodes for virtual
|
||||
operands. Recompute operand caches in the new function.
|
||||
(move_sese_region_to_fn): Update loop info.
|
||||
* passes.c (init_optimization_passes): Add pass_parallelize_loops.
|
||||
* tree-ssa-operands.c (free_stmt_operands): New function.
|
||||
|
||||
* doc/passes.texi: Document autoparallelization.
|
||||
* doc/invoke.texi (-ftree-parallelize-loops): New option.
|
||||
|
||||
2007-09-15 John David Anglin <dave.anglin@nrc-cnrc.gc.ca>
|
||||
|
||||
PR target/33062
|
||||
|
|
|
@ -1153,6 +1153,7 @@ OBJS-common = \
|
|||
tree-object-size.o \
|
||||
tree-optimize.o \
|
||||
tree-outof-ssa.o \
|
||||
tree-parloops.o \
|
||||
tree-phinodes.o \
|
||||
tree-predcom.o \
|
||||
tree-pretty-print.o \
|
||||
|
@ -2270,6 +2271,9 @@ tree-loop-linear.o: tree-loop-linear.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
|
|||
$(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \
|
||||
tree-pass.h $(TREE_DATA_REF_H) $(SCEV_H) $(EXPR_H) $(LAMBDA_H) \
|
||||
$(TARGET_H) tree-chrec.h $(OBSTACK_H)
|
||||
tree-parloops.o: tree-parloops.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
|
||||
$(TREE_FLOW_H) $(TREE_H) $(RTL_H) $(CFGLOOP_H) $(TREE_DATA_REF_H) $(GGC_H) \
|
||||
$(DIAGNOSTIC_H) tree-pass.h $(SCEV_H) langhooks.h gt-tree-parloops.h
|
||||
tree-stdarg.o: tree-stdarg.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
|
||||
$(TREE_H) $(FUNCTION_H) $(DIAGNOSTIC_H) $(TREE_FLOW_H) tree-pass.h \
|
||||
tree-stdarg.h $(TARGET_H) langhooks.h
|
||||
|
@ -3091,7 +3095,7 @@ GTFILES = $(srcdir)/input.h $(srcdir)/coretypes.h \
|
|||
$(srcdir)/tree-ssa-operands.h \
|
||||
$(srcdir)/tree-profile.c $(srcdir)/tree-nested.c \
|
||||
$(srcdir)/ipa-reference.c $(srcdir)/tree-ssa-structalias.h \
|
||||
$(srcdir)/tree-ssa-structalias.c \
|
||||
$(srcdir)/tree-ssa-structalias.c $(srcdir)/tree-parloops.c \
|
||||
$(srcdir)/omp-low.c $(srcdir)/varpool.c \
|
||||
$(srcdir)/targhooks.c $(out_file) $(srcdir)/passes.c $(srcdir)/cgraphunit.c \
|
||||
@all_gtfiles@
|
||||
|
|
|
@ -139,7 +139,8 @@ along with GCC; see the file COPYING3. If not see
|
|||
#undef DEF_GOMP_BUILTIN
|
||||
#define DEF_GOMP_BUILTIN(ENUM, NAME, TYPE, ATTRS) \
|
||||
DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE, \
|
||||
false, true, true, ATTRS, false, flag_openmp)
|
||||
false, true, true, ATTRS, false, \
|
||||
(flag_openmp || flag_tree_parallelize_loops))
|
||||
|
||||
/* Define an attribute list for math functions that are normally
|
||||
"impure" because some of them may write into global memory for
|
||||
|
|
|
@ -1088,6 +1088,10 @@ ftree-loop-optimize
|
|||
Common Report Var(flag_tree_loop_optimize) Init(1) Optimization
|
||||
Enable loop optimizations on tree level
|
||||
|
||||
ftree-parallelize-loops=
|
||||
Common Report Joined UInteger Var(flag_tree_parallelize_loops) Init(1)
|
||||
Enable automatic parallelization of loops
|
||||
|
||||
ftree-pre
|
||||
Common Report Var(flag_tree_pre) Optimization
|
||||
Enable SSA-PRE optimization on trees
|
||||
|
|
|
@ -358,7 +358,7 @@ Objective-C and Objective-C++ Dialects}.
|
|||
-fvariable-expansion-in-unroller -ftree-reassoc @gol
|
||||
-ftree-pre -ftree-ccp -ftree-dce -ftree-loop-optimize @gol
|
||||
-ftree-loop-linear -ftree-loop-im -ftree-loop-ivcanon -fivopts @gol
|
||||
-fcheck-data-deps @gol
|
||||
-fcheck-data-deps -ftree-parallelize-loops @gol
|
||||
-ftree-dominator-opts -ftree-dse -ftree-copyrename -ftree-sink @gol
|
||||
-ftree-ch -ftree-sra -ftree-ter -ftree-fre -ftree-vectorize @gol
|
||||
-ftree-vect-loop-version -fvect-cost-model -ftree-salias -fipa-pta -fweb @gol
|
||||
|
@ -5744,6 +5744,14 @@ in connection with unrolling.
|
|||
Perform induction variable optimizations (strength reduction, induction
|
||||
variable merging and induction variable elimination) on trees.
|
||||
|
||||
@item -ftree-parallelize-loops=n
|
||||
@opindex ftree-parallelize-loops=n
|
||||
Parallelize loops, i.e., split their iteration space to run in n threads.
|
||||
This is only possible for loops whose iterations are independent
|
||||
and can be arbitrarily reordered. The optimization is only
|
||||
profitable on multiprocessor machines, for loops that are CPU-intensive,
|
||||
rather than constrained e.g. by memory bandwidth.
|
||||
|
||||
@item -ftree-sra
|
||||
Perform scalar replacement of aggregates. This pass replaces structure
|
||||
references with scalars to prevent committing structures to memory too
|
||||
|
|
|
@ -438,6 +438,9 @@ The pass is implemented in @file{tree-vectorizer.c} (the main driver and general
|
|||
utilities), @file{tree-vect-analyze.c} and @file{tree-vect-transform.c}.
|
||||
Analysis of data references is in @file{tree-data-ref.c}.
|
||||
|
||||
Autoparallelization. This pass splits the loop iteration space to run
|
||||
into several threads. The pass is implemented in @file{tree-parloops.c}.
|
||||
|
||||
@item Tree level if-conversion for vectorizer
|
||||
|
||||
This pass applies if-conversion to simple loops to help vectorizer.
|
||||
|
|
|
@ -2600,6 +2600,7 @@ expand_omp_parallel (struct omp_region *region)
|
|||
|
||||
/* Emit a library call to launch the children threads. */
|
||||
expand_parallel_call (region, new_bb, entry_stmt, ws_args);
|
||||
update_ssa (TODO_update_ssa_only_virtuals);
|
||||
}
|
||||
|
||||
|
||||
|
@ -3282,6 +3283,8 @@ expand_omp_for (struct omp_region *region)
|
|||
int next_ix = BUILT_IN_GOMP_LOOP_STATIC_NEXT + fn_index;
|
||||
expand_omp_for_generic (region, &fd, start_ix, next_ix);
|
||||
}
|
||||
|
||||
update_ssa (TODO_update_ssa_only_virtuals);
|
||||
}
|
||||
|
||||
|
||||
|
@ -3591,10 +3594,13 @@ expand_omp (struct omp_region *region)
|
|||
|
||||
|
||||
/* Helper for build_omp_regions. Scan the dominator tree starting at
|
||||
block BB. PARENT is the region that contains BB. */
|
||||
block BB. PARENT is the region that contains BB. If SINGLE_TREE is
|
||||
true, the function ends once a single tree is built (otherwise, whole
|
||||
forest of OMP constructs may be built). */
|
||||
|
||||
static void
|
||||
build_omp_regions_1 (basic_block bb, struct omp_region *parent)
|
||||
build_omp_regions_1 (basic_block bb, struct omp_region *parent,
|
||||
bool single_tree)
|
||||
{
|
||||
block_stmt_iterator si;
|
||||
tree stmt;
|
||||
|
@ -3643,12 +3649,44 @@ build_omp_regions_1 (basic_block bb, struct omp_region *parent)
|
|||
}
|
||||
}
|
||||
|
||||
if (single_tree && !parent)
|
||||
return;
|
||||
|
||||
for (son = first_dom_son (CDI_DOMINATORS, bb);
|
||||
son;
|
||||
son = next_dom_son (CDI_DOMINATORS, son))
|
||||
build_omp_regions_1 (son, parent);
|
||||
build_omp_regions_1 (son, parent, single_tree);
|
||||
}
|
||||
|
||||
/* Builds the tree of OMP regions rooted at ROOT, storing it to
|
||||
root_omp_region. */
|
||||
|
||||
static void
|
||||
build_omp_regions_root (basic_block root)
|
||||
{
|
||||
gcc_assert (root_omp_region == NULL);
|
||||
build_omp_regions_1 (root, NULL, true);
|
||||
gcc_assert (root_omp_region != NULL);
|
||||
}
|
||||
|
||||
/* Expands omp construct (and its subconstructs) starting in HEAD. */
|
||||
|
||||
void
|
||||
omp_expand_local (basic_block head)
|
||||
{
|
||||
build_omp_regions_root (head);
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
fprintf (dump_file, "\nOMP region tree\n\n");
|
||||
dump_omp_region (dump_file, root_omp_region, 0);
|
||||
fprintf (dump_file, "\n");
|
||||
}
|
||||
|
||||
remove_exit_barriers (root_omp_region);
|
||||
expand_omp (root_omp_region);
|
||||
|
||||
free_omp_regions ();
|
||||
}
|
||||
|
||||
/* Scan the CFG and build a tree of OMP regions. Return the root of
|
||||
the OMP region tree. */
|
||||
|
@ -3658,7 +3696,7 @@ build_omp_regions (void)
|
|||
{
|
||||
gcc_assert (root_omp_region == NULL);
|
||||
calculate_dominance_info (CDI_DOMINATORS);
|
||||
build_omp_regions_1 (ENTRY_BLOCK_PTR, NULL);
|
||||
build_omp_regions_1 (ENTRY_BLOCK_PTR, NULL, false);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -629,6 +629,7 @@ init_optimization_passes (void)
|
|||
NEXT_PASS (pass_dce_loop);
|
||||
}
|
||||
NEXT_PASS (pass_complete_unroll);
|
||||
NEXT_PASS (pass_parallelize_loops);
|
||||
NEXT_PASS (pass_loop_prefetch);
|
||||
NEXT_PASS (pass_iv_optimize);
|
||||
NEXT_PASS (pass_tree_loop_done);
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
2007-09-15 Zdenek Dvorak <ook@ucw.cz>
|
||||
|
||||
* gcc.dg/tree-ssa/parallelization-1.c: New test.
|
||||
|
||||
2007-09-15 Dorit Nuzman <dorit@il.ibm.com>
|
||||
|
||||
* gcc.dg/vect/pr33373b.c: New test.
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-final_cleanup" } */
|
||||
|
||||
void abort (void);
|
||||
|
||||
void parloop (int N)
|
||||
{
|
||||
int i;
|
||||
int x[10000000];
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
x[i] = i + 3;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (x[i] != i + 3)
|
||||
abort ();
|
||||
}
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
parloop(10000000);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Check that the first loop in parloop got parallelized. */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops" } } */
|
||||
/* { dg-final { scan-tree-dump-times "loopfn" 5 "final_cleanup" } } */
|
||||
/* { dg-final { cleanup-tree-dump "parloops" } } */
|
||||
/* { dg-final { cleanup-tree-dump "final_cleanup" } } */
|
|
@ -120,6 +120,7 @@ DEFTIMEVAR (TV_TREE_LOOP_IVCANON , "tree canonical iv")
|
|||
DEFTIMEVAR (TV_SCEV_CONST , "scev constant prop")
|
||||
DEFTIMEVAR (TV_TREE_LOOP_UNSWITCH , "tree loop unswitching")
|
||||
DEFTIMEVAR (TV_COMPLETE_UNROLL , "complete unrolling")
|
||||
DEFTIMEVAR (TV_TREE_PARALLELIZE_LOOPS, "tree parallelize loops")
|
||||
DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree vectorization")
|
||||
DEFTIMEVAR (TV_TREE_LINEAR_TRANSFORM , "tree loop linear")
|
||||
DEFTIMEVAR (TV_CHECK_DATA_DEPS , "tree check data dependences")
|
||||
|
|
|
@ -310,7 +310,7 @@ tail_duplicate (void)
|
|||
copy = duplicate_block (bb2, e, bb);
|
||||
flush_pending_stmts (e);
|
||||
|
||||
add_phi_args_after_copy (©, 1);
|
||||
add_phi_args_after_copy (©, 1, NULL);
|
||||
|
||||
/* Reconsider the original copy of block we've duplicated.
|
||||
Removing the most common predecessor may make it to be
|
||||
|
|
241
gcc/tree-cfg.c
241
gcc/tree-cfg.c
|
@ -5009,25 +5009,20 @@ tree_duplicate_bb (basic_block bb)
|
|||
return new_bb;
|
||||
}
|
||||
|
||||
/* Adds phi node arguments for edge E_COPY after basic block duplication. */
|
||||
|
||||
/* Basic block BB_COPY was created by code duplication. Add phi node
|
||||
arguments for edges going out of BB_COPY. The blocks that were
|
||||
duplicated have BB_DUPLICATED set. */
|
||||
|
||||
void
|
||||
add_phi_args_after_copy_bb (basic_block bb_copy)
|
||||
static void
|
||||
add_phi_args_after_copy_edge (edge e_copy)
|
||||
{
|
||||
basic_block bb, dest;
|
||||
edge e, e_copy;
|
||||
basic_block bb, bb_copy = e_copy->src, dest;
|
||||
edge e;
|
||||
edge_iterator ei;
|
||||
tree phi, phi_copy, phi_next, def;
|
||||
|
||||
bb = get_bb_original (bb_copy);
|
||||
|
||||
FOR_EACH_EDGE (e_copy, ei, bb_copy->succs)
|
||||
{
|
||||
if (!phi_nodes (e_copy->dest))
|
||||
continue;
|
||||
return;
|
||||
|
||||
bb = bb_copy->flags & BB_DUPLICATED ? get_bb_original (bb_copy) : bb_copy;
|
||||
|
||||
if (e_copy->dest->flags & BB_DUPLICATED)
|
||||
dest = get_bb_original (e_copy->dest);
|
||||
|
@ -5041,9 +5036,11 @@ add_phi_args_after_copy_bb (basic_block bb_copy)
|
|||
In this case we are not looking for edge to dest, but to
|
||||
duplicated block whose original was dest. */
|
||||
FOR_EACH_EDGE (e, ei, bb->succs)
|
||||
{
|
||||
if ((e->dest->flags & BB_DUPLICATED)
|
||||
&& get_bb_original (e->dest) == dest)
|
||||
break;
|
||||
}
|
||||
|
||||
gcc_assert (e != NULL);
|
||||
}
|
||||
|
@ -5056,15 +5053,33 @@ add_phi_args_after_copy_bb (basic_block bb_copy)
|
|||
def = PHI_ARG_DEF_FROM_EDGE (phi, e);
|
||||
add_phi_arg (phi_copy, def, e_copy);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Basic block BB_COPY was created by code duplication. Add phi node
|
||||
arguments for edges going out of BB_COPY. The blocks that were
|
||||
duplicated have BB_DUPLICATED set. */
|
||||
|
||||
void
|
||||
add_phi_args_after_copy_bb (basic_block bb_copy)
|
||||
{
|
||||
edge_iterator ei;
|
||||
edge e_copy;
|
||||
|
||||
FOR_EACH_EDGE (e_copy, ei, bb_copy->succs)
|
||||
{
|
||||
add_phi_args_after_copy_edge (e_copy);
|
||||
}
|
||||
}
|
||||
|
||||
/* Blocks in REGION_COPY array of length N_REGION were created by
|
||||
duplication of basic blocks. Add phi node arguments for edges
|
||||
going from these blocks. */
|
||||
going from these blocks. If E_COPY is not NULL, also add
|
||||
phi node arguments for its destination.*/
|
||||
|
||||
void
|
||||
add_phi_args_after_copy (basic_block *region_copy, unsigned n_region)
|
||||
add_phi_args_after_copy (basic_block *region_copy, unsigned n_region,
|
||||
edge e_copy)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
|
@ -5073,6 +5088,8 @@ add_phi_args_after_copy (basic_block *region_copy, unsigned n_region)
|
|||
|
||||
for (i = 0; i < n_region; i++)
|
||||
add_phi_args_after_copy_bb (region_copy[i]);
|
||||
if (e_copy)
|
||||
add_phi_args_after_copy_edge (e_copy);
|
||||
|
||||
for (i = 0; i < n_region; i++)
|
||||
region_copy[i]->flags &= ~BB_DUPLICATED;
|
||||
|
@ -5210,10 +5227,180 @@ tree_duplicate_sese_region (edge entry, edge exit,
|
|||
set_immediate_dominator (CDI_DOMINATORS, entry->dest, entry->src);
|
||||
VEC_safe_push (basic_block, heap, doms, get_bb_original (entry->dest));
|
||||
iterate_fix_dominators (CDI_DOMINATORS, doms, false);
|
||||
free (doms);
|
||||
VEC_free (basic_block, heap, doms);
|
||||
|
||||
/* Add the other PHI node arguments. */
|
||||
add_phi_args_after_copy (region_copy, n_region);
|
||||
add_phi_args_after_copy (region_copy, n_region, NULL);
|
||||
|
||||
/* Update the SSA web. */
|
||||
update_ssa (TODO_update_ssa);
|
||||
|
||||
if (free_region_copy)
|
||||
free (region_copy);
|
||||
|
||||
free_original_copy_tables ();
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Duplicates REGION consisting of N_REGION blocks. The new blocks
|
||||
are stored to REGION_COPY in the same order in that they appear
|
||||
in REGION, if REGION_COPY is not NULL. ENTRY is the entry to
|
||||
the region, EXIT an exit from it. The condition guarding EXIT
|
||||
is moved to ENTRY. Returns true if duplication succeeds, false
|
||||
otherwise.
|
||||
|
||||
For example,
|
||||
|
||||
some_code;
|
||||
if (cond)
|
||||
A;
|
||||
else
|
||||
B;
|
||||
|
||||
is transformed to
|
||||
|
||||
if (cond)
|
||||
{
|
||||
some_code;
|
||||
A;
|
||||
}
|
||||
else
|
||||
{
|
||||
some_code;
|
||||
B;
|
||||
}
|
||||
*/
|
||||
|
||||
bool
|
||||
tree_duplicate_sese_tail (edge entry, edge exit,
|
||||
basic_block *region, unsigned n_region,
|
||||
basic_block *region_copy)
|
||||
{
|
||||
unsigned i;
|
||||
bool free_region_copy = false;
|
||||
struct loop *loop = exit->dest->loop_father;
|
||||
struct loop *orig_loop = entry->dest->loop_father;
|
||||
basic_block switch_bb, entry_bb, nentry_bb;
|
||||
VEC (basic_block, heap) *doms;
|
||||
int total_freq = 0, exit_freq = 0;
|
||||
gcov_type total_count = 0, exit_count = 0;
|
||||
edge exits[2], nexits[2], e;
|
||||
block_stmt_iterator bsi;
|
||||
tree cond;
|
||||
edge sorig, snew;
|
||||
|
||||
gcc_assert (EDGE_COUNT (exit->src->succs) == 2);
|
||||
exits[0] = exit;
|
||||
exits[1] = EDGE_SUCC (exit->src, EDGE_SUCC (exit->src, 0) == exit);
|
||||
|
||||
if (!can_copy_bbs_p (region, n_region))
|
||||
return false;
|
||||
|
||||
/* Some sanity checking. Note that we do not check for all possible
|
||||
missuses of the functions. I.e. if you ask to copy something weird
|
||||
(e.g., in the example, if there is a jump from inside to the middle
|
||||
of some_code, or come_code defines some of the values used in cond)
|
||||
it will work, but the resulting code will not be correct. */
|
||||
for (i = 0; i < n_region; i++)
|
||||
{
|
||||
/* We do not handle subloops, i.e. all the blocks must belong to the
|
||||
same loop. */
|
||||
if (region[i]->loop_father != orig_loop)
|
||||
return false;
|
||||
|
||||
if (region[i] == orig_loop->latch)
|
||||
return false;
|
||||
}
|
||||
|
||||
initialize_original_copy_tables ();
|
||||
set_loop_copy (orig_loop, loop);
|
||||
|
||||
if (!region_copy)
|
||||
{
|
||||
region_copy = XNEWVEC (basic_block, n_region);
|
||||
free_region_copy = true;
|
||||
}
|
||||
|
||||
gcc_assert (!need_ssa_update_p ());
|
||||
|
||||
/* Record blocks outside the region that are dominated by something
|
||||
inside. */
|
||||
doms = get_dominated_by_region (CDI_DOMINATORS, region, n_region);
|
||||
|
||||
if (exit->src->count)
|
||||
{
|
||||
total_count = exit->src->count;
|
||||
exit_count = exit->count;
|
||||
/* Fix up corner cases, to avoid division by zero or creation of negative
|
||||
frequencies. */
|
||||
if (exit_count > total_count)
|
||||
exit_count = total_count;
|
||||
}
|
||||
else
|
||||
{
|
||||
total_freq = exit->src->frequency;
|
||||
exit_freq = EDGE_FREQUENCY (exit);
|
||||
/* Fix up corner cases, to avoid division by zero or creation of negative
|
||||
frequencies. */
|
||||
if (total_freq == 0)
|
||||
total_freq = 1;
|
||||
if (exit_freq > total_freq)
|
||||
exit_freq = total_freq;
|
||||
}
|
||||
|
||||
copy_bbs (region, n_region, region_copy, exits, 2, nexits, orig_loop,
|
||||
split_edge_bb_loc (exit));
|
||||
if (total_count)
|
||||
{
|
||||
scale_bbs_frequencies_gcov_type (region, n_region,
|
||||
total_count - exit_count,
|
||||
total_count);
|
||||
scale_bbs_frequencies_gcov_type (region_copy, n_region, exit_count,
|
||||
total_count);
|
||||
}
|
||||
else
|
||||
{
|
||||
scale_bbs_frequencies_int (region, n_region, total_freq - exit_freq,
|
||||
total_freq);
|
||||
scale_bbs_frequencies_int (region_copy, n_region, exit_freq, total_freq);
|
||||
}
|
||||
|
||||
/* Create the switch block, and put the exit condition to it. */
|
||||
entry_bb = entry->dest;
|
||||
nentry_bb = get_bb_copy (entry_bb);
|
||||
if (!last_stmt (entry->src)
|
||||
|| !stmt_ends_bb_p (last_stmt (entry->src)))
|
||||
switch_bb = entry->src;
|
||||
else
|
||||
switch_bb = split_edge (entry);
|
||||
set_immediate_dominator (CDI_DOMINATORS, nentry_bb, switch_bb);
|
||||
|
||||
bsi = bsi_last (switch_bb);
|
||||
cond = last_stmt (exit->src);
|
||||
gcc_assert (TREE_CODE (cond) == COND_EXPR);
|
||||
bsi_insert_after (&bsi, unshare_expr (cond), BSI_NEW_STMT);
|
||||
|
||||
sorig = single_succ_edge (switch_bb);
|
||||
sorig->flags = exits[1]->flags;
|
||||
snew = make_edge (switch_bb, nentry_bb, exits[0]->flags);
|
||||
|
||||
/* Register the new edge from SWITCH_BB in loop exit lists. */
|
||||
rescan_loop_exit (snew, true, false);
|
||||
|
||||
/* Add the PHI node arguments. */
|
||||
add_phi_args_after_copy (region_copy, n_region, snew);
|
||||
|
||||
/* Get rid of now superfluous conditions and associated edges (and phi node
|
||||
arguments). */
|
||||
e = redirect_edge_and_branch (exits[0], exits[1]->dest);
|
||||
PENDING_STMT (e) = NULL_TREE;
|
||||
e = redirect_edge_and_branch (nexits[1], nexits[0]->dest);
|
||||
PENDING_STMT (e) = NULL_TREE;
|
||||
|
||||
/* Anything that is outside of the region, but was dominated by something
|
||||
inside needs to update dominance info. */
|
||||
iterate_fix_dominators (CDI_DOMINATORS, doms, false);
|
||||
VEC_free (basic_block, heap, doms);
|
||||
|
||||
/* Update the SSA web. */
|
||||
update_ssa (TODO_update_ssa);
|
||||
|
@ -5456,10 +5643,12 @@ move_block_to_fn (struct function *dest_cfun, basic_block bb,
|
|||
block_stmt_iterator si;
|
||||
struct move_stmt_d d;
|
||||
unsigned old_len, new_len;
|
||||
tree phi;
|
||||
tree phi, next_phi;
|
||||
|
||||
/* Remove BB from dominance structures. */
|
||||
delete_from_dominance_info (CDI_DOMINATORS, bb);
|
||||
if (current_loops)
|
||||
remove_bb_from_loops (bb);
|
||||
|
||||
/* Link BB to the new linked list. */
|
||||
move_block_after (bb, after);
|
||||
|
@ -5494,14 +5683,20 @@ move_block_to_fn (struct function *dest_cfun, basic_block bb,
|
|||
bb->index, bb);
|
||||
|
||||
/* Remap the variables in phi nodes. */
|
||||
for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
|
||||
for (phi = phi_nodes (bb); phi; phi = next_phi)
|
||||
{
|
||||
use_operand_p use;
|
||||
tree op = PHI_RESULT (phi);
|
||||
ssa_op_iter oi;
|
||||
|
||||
next_phi = PHI_CHAIN (phi);
|
||||
if (!is_gimple_reg (op))
|
||||
{
|
||||
/* Remove the phi nodes for virtual operands (alias analysis will be
|
||||
run for the new function, anyway). */
|
||||
remove_phi_node (phi, NULL, true);
|
||||
continue;
|
||||
}
|
||||
|
||||
SET_PHI_RESULT (phi, replace_ssa_name (op, vars_map, dest_cfun->decl));
|
||||
FOR_EACH_PHI_ARG (use, phi, oi, SSA_OP_USE)
|
||||
|
@ -5569,7 +5764,12 @@ move_block_to_fn (struct function *dest_cfun, basic_block bb,
|
|||
gimple_remove_stmt_histograms (cfun, stmt);
|
||||
}
|
||||
|
||||
/* We cannot leave any operands allocated from the operand caches of
|
||||
the current function. */
|
||||
free_stmt_operands (stmt);
|
||||
push_cfun (dest_cfun);
|
||||
update_stmt (stmt);
|
||||
pop_cfun ();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5658,6 +5858,7 @@ move_sese_region_to_fn (struct function *dest_cfun, basic_block entry_bb,
|
|||
edge_iterator ei;
|
||||
htab_t new_label_map;
|
||||
struct pointer_map_t *vars_map;
|
||||
struct loop *loop = entry_bb->loop_father;
|
||||
|
||||
/* If ENTRY does not strictly dominate EXIT, this cannot be an SESE
|
||||
region. */
|
||||
|
@ -5786,6 +5987,8 @@ move_sese_region_to_fn (struct function *dest_cfun, basic_block entry_bb,
|
|||
/* Back in the original function, the SESE region has disappeared,
|
||||
create a new basic block in its place. */
|
||||
bb = create_empty_bb (entry_pred[0]);
|
||||
if (current_loops)
|
||||
add_bb_to_loop (bb, loop);
|
||||
for (i = 0; i < num_entry_edges; i++)
|
||||
{
|
||||
e = make_edge (entry_pred[i], bb, entry_flag[i]);
|
||||
|
|
|
@ -704,6 +704,7 @@ extern struct omp_region *root_omp_region;
|
|||
extern struct omp_region *new_omp_region (basic_block, enum tree_code,
|
||||
struct omp_region *);
|
||||
extern void free_omp_regions (void);
|
||||
void omp_expand_local (basic_block);
|
||||
extern tree find_omp_clause (tree, enum tree_code);
|
||||
tree copy_var_decl (tree, tree, tree);
|
||||
|
||||
|
@ -753,8 +754,10 @@ extern tree tree_block_label (basic_block);
|
|||
extern void extract_true_false_edges_from_block (basic_block, edge *, edge *);
|
||||
extern bool tree_duplicate_sese_region (edge, edge, basic_block *, unsigned,
|
||||
basic_block *);
|
||||
extern bool tree_duplicate_sese_tail (edge, edge, basic_block *, unsigned,
|
||||
basic_block *);
|
||||
extern void add_phi_args_after_copy_bb (basic_block);
|
||||
extern void add_phi_args_after_copy (basic_block *, unsigned);
|
||||
extern void add_phi_args_after_copy (basic_block *, unsigned, edge);
|
||||
extern bool tree_purge_dead_abnormal_call_edges (basic_block);
|
||||
extern bool tree_purge_dead_eh_edges (basic_block);
|
||||
extern bool tree_purge_all_dead_eh_edges (const_bitmap);
|
||||
|
@ -971,6 +974,7 @@ unsigned int tree_ssa_prefetch_arrays (void);
|
|||
unsigned int remove_empty_loops (void);
|
||||
void tree_ssa_iv_optimize (void);
|
||||
unsigned tree_predictive_commoning (void);
|
||||
bool parallelize_loops (void);
|
||||
|
||||
bool number_of_iterations_exit (struct loop *, edge,
|
||||
struct tree_niter_desc *niter, bool);
|
||||
|
@ -992,7 +996,7 @@ void verify_loop_closed_ssa (void);
|
|||
bool for_each_index (tree *, bool (*) (tree, tree *, void *), void *);
|
||||
void create_iv (tree, tree, tree, struct loop *, block_stmt_iterator *, bool,
|
||||
tree *, tree *);
|
||||
void split_loop_exit_edge (edge);
|
||||
basic_block split_loop_exit_edge (edge);
|
||||
unsigned force_expr_to_var_cost (tree);
|
||||
void standard_iv_increment_position (struct loop *, block_stmt_iterator *,
|
||||
bool *);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -268,6 +268,7 @@ extern struct tree_opt_pass pass_record_bounds;
|
|||
extern struct tree_opt_pass pass_if_conversion;
|
||||
extern struct tree_opt_pass pass_vectorize;
|
||||
extern struct tree_opt_pass pass_complete_unroll;
|
||||
extern struct tree_opt_pass pass_parallelize_loops;
|
||||
extern struct tree_opt_pass pass_loop_prefetch;
|
||||
extern struct tree_opt_pass pass_iv_optimize;
|
||||
extern struct tree_opt_pass pass_tree_loop_done;
|
||||
|
|
|
@ -454,9 +454,9 @@ verify_loop_closed_ssa (void)
|
|||
}
|
||||
|
||||
/* Split loop exit edge EXIT. The things are a bit complicated by a need to
|
||||
preserve the loop closed ssa form. */
|
||||
preserve the loop closed ssa form. The newly created block is returned. */
|
||||
|
||||
void
|
||||
basic_block
|
||||
split_loop_exit_edge (edge exit)
|
||||
{
|
||||
basic_block dest = exit->dest;
|
||||
|
@ -483,6 +483,8 @@ split_loop_exit_edge (edge exit)
|
|||
add_phi_arg (new_phi, name, exit);
|
||||
SET_USE (op_p, new_name);
|
||||
}
|
||||
|
||||
return bb;
|
||||
}
|
||||
|
||||
/* Returns the basic block in that statements should be emitted for induction
|
||||
|
|
|
@ -468,6 +468,42 @@ struct tree_opt_pass pass_complete_unroll =
|
|||
0 /* letter */
|
||||
};
|
||||
|
||||
/* Parallelization. */
|
||||
|
||||
static bool
|
||||
gate_tree_parallelize_loops (void)
|
||||
{
|
||||
return flag_tree_parallelize_loops != 1;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
tree_parallelize_loops (void)
|
||||
{
|
||||
if (number_of_loops () <= 1)
|
||||
return 0;
|
||||
|
||||
if (parallelize_loops ())
|
||||
return TODO_cleanup_cfg | TODO_rebuild_alias;
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct tree_opt_pass pass_parallelize_loops =
|
||||
{
|
||||
"parloops", /* name */
|
||||
gate_tree_parallelize_loops, /* gate */
|
||||
tree_parallelize_loops, /* execute */
|
||||
NULL, /* sub */
|
||||
NULL, /* next */
|
||||
0, /* static_pass_number */
|
||||
TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
|
||||
PROP_cfg | PROP_ssa, /* properties_required */
|
||||
0, /* properties_provided */
|
||||
0, /* properties_destroyed */
|
||||
0, /* todo_flags_start */
|
||||
TODO_dump_func | TODO_verify_loops, /* todo_flags_finish */
|
||||
0 /* letter */
|
||||
};
|
||||
|
||||
/* Prefetching. */
|
||||
|
||||
static unsigned int
|
||||
|
|
|
@ -2450,6 +2450,56 @@ build_ssa_operands (tree stmt)
|
|||
ann->references_memory = true;
|
||||
}
|
||||
|
||||
/* Releases the operands of STMT back to their freelists, and clears
|
||||
the stmt operand lists. */
|
||||
|
||||
void
|
||||
free_stmt_operands (tree stmt)
|
||||
{
|
||||
def_optype_p defs = DEF_OPS (stmt), last_def;
|
||||
use_optype_p uses = USE_OPS (stmt), last_use;
|
||||
voptype_p vuses = VUSE_OPS (stmt);
|
||||
voptype_p vdefs = VDEF_OPS (stmt), vdef, next_vdef;
|
||||
unsigned i;
|
||||
|
||||
if (defs)
|
||||
{
|
||||
for (last_def = defs; last_def->next; last_def = last_def->next)
|
||||
continue;
|
||||
last_def->next = gimple_ssa_operands (cfun)->free_defs;
|
||||
gimple_ssa_operands (cfun)->free_defs = defs;
|
||||
DEF_OPS (stmt) = NULL;
|
||||
}
|
||||
|
||||
if (uses)
|
||||
{
|
||||
for (last_use = uses; last_use->next; last_use = last_use->next)
|
||||
delink_imm_use (USE_OP_PTR (last_use));
|
||||
delink_imm_use (USE_OP_PTR (last_use));
|
||||
last_use->next = gimple_ssa_operands (cfun)->free_uses;
|
||||
gimple_ssa_operands (cfun)->free_uses = uses;
|
||||
USE_OPS (stmt) = NULL;
|
||||
}
|
||||
|
||||
if (vuses)
|
||||
{
|
||||
for (i = 0; i < VUSE_NUM (vuses); i++)
|
||||
delink_imm_use (VUSE_OP_PTR (vuses, i));
|
||||
add_vop_to_freelist (vuses);
|
||||
VUSE_OPS (stmt) = NULL;
|
||||
}
|
||||
|
||||
if (vdefs)
|
||||
{
|
||||
for (vdef = vdefs; vdef; vdef = next_vdef)
|
||||
{
|
||||
next_vdef = vdef->next;
|
||||
delink_imm_use (VDEF_OP_PTR (vdef, 0));
|
||||
add_vop_to_freelist (vdef);
|
||||
}
|
||||
VDEF_OPS (stmt) = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Free any operands vectors in OPS. */
|
||||
|
||||
|
|
|
@ -206,6 +206,7 @@ extern void init_ssa_operands (void);
|
|||
extern void fini_ssa_operands (void);
|
||||
extern void free_ssa_operands (stmt_operands_p);
|
||||
extern void update_stmt_operands (tree);
|
||||
extern void free_stmt_operands (tree);
|
||||
extern bool verify_imm_links (FILE *f, tree var);
|
||||
|
||||
extern void copy_virtual_operands (tree, tree);
|
||||
|
|
Loading…
Reference in New Issue