re PR middle-end/43864 (Same basic blocks should be merged)
2011-09-27 Tom de Vries <tom@codesourcery.com> PR middle-end/43864 * tree-ssa-tail-merge.c: New file. (struct same_succ_def): Define. (same_succ, const_same_succ): New typedef. (struct bb_cluster_def): Define. (bb_cluster, const_bb_cluster): New typedef. (struct aux_bb_info): Define. (BB_SIZE, BB_SAME_SUCC, BB_CLUSTER, BB_VOP_AT_EXIT): Define. (gvn_uses_equal): New function. (same_succ_print, same_succ_print_traverse, update_dep_bb) (stmt_update_dep_bb, local_def, same_succ_hash) (inverse_flags, same_succ_equal, same_succ_alloc, same_succ_delete) (same_succ_reset): New function. (same_succ_htab, same_succ_edge_flags) (deleted_bbs, deleted_bb_preds): New var. (debug_same_succ): New function. (worklist): New var. (print_worklist, add_to_worklist, find_same_succ_bb, find_same_succ) (init_worklist, delete_worklist, delete_basic_block_same_succ) (same_succ_flush_bbs, purge_bbs, update_worklist): New function. (print_cluster, debug_cluster, update_rep_bb) (add_bb_to_cluster, new_cluster, delete_cluster): New function. (all_clusters): New var. (alloc_cluster_vectors, reset_cluster_vectors, delete_cluster_vectors) (merge_clusters, set_cluster): New function. (gimple_equal_p, gsi_advance_bw_nondebug_nonlocal, find_duplicate) (same_phi_alternatives_1, same_phi_alternatives, bb_has_non_vop_phi) (deps_ok_for_redirect_from_bb_to_bb, deps_ok_for_redirect) (find_clusters_1, find_clusters): New function. (update_vuses, vop_phi, vop_at_entry, replace_block_by): New function. (update_bbs): New var. (apply_clusters): New function. (update_debug_stmt, update_debug_stmts): New function. (tail_merge_optimize): New function. tree-pass.h (tail_merge_optimize): Declare. * tree-ssa-pre.c (execute_pre): Use tail_merge_optimize. * Makefile.in (OBJS-common): Add tree-ssa-tail-merge.o. (tree-ssa-tail-merge.o): New rule. * opts.c (default_options_table): Set OPT_ftree_tail_merge by default at OPT_LEVELS_2_PLUS. * tree-ssa-sccvn.c (vn_valueize): Move to ... * tree-ssa-sccvn.h (vn_valueize): Here. * timevar.def (TV_TREE_TAIL_MERGE): New timevar. * common.opt (ftree-tail-merge): New switch. * params.def (PARAM_MAX_TAIL_MERGE_COMPARISONS) (PARAM_MAX_TAIL_MERGE_ITERATIONS): New parameter. * doc/invoke.texi (Optimization Options, -O2): Add -ftree-tail-merge. (-ftree-tail-merge, max-tail-merge-comparisons) (max-tail-merge-iterations): New item. From-SVN: r179275
This commit is contained in:
parent
99e299a8c7
commit
c9e9316807
@ -1,3 +1,55 @@
|
||||
2011-09-27 Tom de Vries <tom@codesourcery.com>
|
||||
|
||||
PR middle-end/43864
|
||||
* tree-ssa-tail-merge.c: New file.
|
||||
(struct same_succ_def): Define.
|
||||
(same_succ, const_same_succ): New typedef.
|
||||
(struct bb_cluster_def): Define.
|
||||
(bb_cluster, const_bb_cluster): New typedef.
|
||||
(struct aux_bb_info): Define.
|
||||
(BB_SIZE, BB_SAME_SUCC, BB_CLUSTER, BB_VOP_AT_EXIT): Define.
|
||||
(gvn_uses_equal): New function.
|
||||
(same_succ_print, same_succ_print_traverse, update_dep_bb)
|
||||
(stmt_update_dep_bb, local_def, same_succ_hash)
|
||||
(inverse_flags, same_succ_equal, same_succ_alloc, same_succ_delete)
|
||||
(same_succ_reset): New function.
|
||||
(same_succ_htab, same_succ_edge_flags)
|
||||
(deleted_bbs, deleted_bb_preds): New var.
|
||||
(debug_same_succ): New function.
|
||||
(worklist): New var.
|
||||
(print_worklist, add_to_worklist, find_same_succ_bb, find_same_succ)
|
||||
(init_worklist, delete_worklist, delete_basic_block_same_succ)
|
||||
(same_succ_flush_bbs, purge_bbs, update_worklist): New function.
|
||||
(print_cluster, debug_cluster, update_rep_bb)
|
||||
(add_bb_to_cluster, new_cluster, delete_cluster): New function.
|
||||
(all_clusters): New var.
|
||||
(alloc_cluster_vectors, reset_cluster_vectors, delete_cluster_vectors)
|
||||
(merge_clusters, set_cluster): New function.
|
||||
(gimple_equal_p, gsi_advance_bw_nondebug_nonlocal, find_duplicate)
|
||||
(same_phi_alternatives_1, same_phi_alternatives, bb_has_non_vop_phi)
|
||||
(deps_ok_for_redirect_from_bb_to_bb, deps_ok_for_redirect)
|
||||
(find_clusters_1, find_clusters): New function.
|
||||
(update_vuses, vop_phi, vop_at_entry, replace_block_by): New function.
|
||||
(update_bbs): New var.
|
||||
(apply_clusters): New function.
|
||||
(update_debug_stmt, update_debug_stmts): New function.
|
||||
(tail_merge_optimize): New function.
|
||||
tree-pass.h (tail_merge_optimize): Declare.
|
||||
* tree-ssa-pre.c (execute_pre): Use tail_merge_optimize.
|
||||
* Makefile.in (OBJS-common): Add tree-ssa-tail-merge.o.
|
||||
(tree-ssa-tail-merge.o): New rule.
|
||||
* opts.c (default_options_table): Set OPT_ftree_tail_merge by default at
|
||||
OPT_LEVELS_2_PLUS.
|
||||
* tree-ssa-sccvn.c (vn_valueize): Move to ...
|
||||
* tree-ssa-sccvn.h (vn_valueize): Here.
|
||||
* timevar.def (TV_TREE_TAIL_MERGE): New timevar.
|
||||
* common.opt (ftree-tail-merge): New switch.
|
||||
* params.def (PARAM_MAX_TAIL_MERGE_COMPARISONS)
|
||||
(PARAM_MAX_TAIL_MERGE_ITERATIONS): New parameter.
|
||||
* doc/invoke.texi (Optimization Options, -O2): Add -ftree-tail-merge.
|
||||
(-ftree-tail-merge, max-tail-merge-comparisons)
|
||||
(max-tail-merge-iterations): New item.
|
||||
|
||||
2011-09-27 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* ipa-inline-analysis.c (MAX_TIME): Reduce to avoid overflows.
|
||||
|
@ -1476,6 +1476,7 @@ OBJS = \
|
||||
tree-ssa-sccvn.o \
|
||||
tree-ssa-sink.o \
|
||||
tree-ssa-structalias.o \
|
||||
tree-ssa-tail-merge.o \
|
||||
tree-ssa-ter.o \
|
||||
tree-ssa-threadedge.o \
|
||||
tree-ssa-threadupdate.o \
|
||||
@ -2382,6 +2383,13 @@ stor-layout.o : stor-layout.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
|
||||
$(TREE_H) $(PARAMS_H) $(FLAGS_H) $(FUNCTION_H) $(EXPR_H) output.h $(RTL_H) \
|
||||
$(GGC_H) $(TM_P_H) $(TARGET_H) langhooks.h $(REGS_H) gt-stor-layout.h \
|
||||
$(DIAGNOSTIC_CORE_H) $(CGRAPH_H) $(TREE_INLINE_H) $(TREE_DUMP_H) $(GIMPLE_H)
|
||||
tree-ssa-tail-merge.o: tree-ssa-tail-merge.c \
|
||||
$(SYSTEM_H) $(CONFIG_H) coretypes.h $(TM_H) $(BITMAP_H) \
|
||||
$(FLAGS_H) $(TM_P_H) $(BASIC_BLOCK_H) output.h \
|
||||
$(TREE_H) $(TREE_FLOW_H) $(TREE_INLINE_H) \
|
||||
$(GIMPLE_H) $(FUNCTION_H) \
|
||||
$(TIMEVAR_H) tree-ssa-sccvn.h \
|
||||
$(CGRAPH_H) gimple-pretty-print.h tree-pretty-print.h $(PARAMS_H)
|
||||
tree-ssa-structalias.o: tree-ssa-structalias.c \
|
||||
$(SYSTEM_H) $(CONFIG_H) coretypes.h $(TM_H) $(GGC_H) $(OBSTACK_H) $(BITMAP_H) \
|
||||
$(FLAGS_H) $(TM_P_H) $(BASIC_BLOCK_H) output.h \
|
||||
|
@ -1945,6 +1945,10 @@ ftree-dominator-opts
|
||||
Common Report Var(flag_tree_dom) Optimization
|
||||
Enable dominator optimizations
|
||||
|
||||
ftree-tail-merge
|
||||
Common Report Var(flag_tree_tail_merge) Optimization
|
||||
Enable tail merging on trees
|
||||
|
||||
ftree-dse
|
||||
Common Report Var(flag_tree_dse) Optimization
|
||||
Enable dead store elimination
|
||||
|
@ -407,7 +407,7 @@ Objective-C and Objective-C++ Dialects}.
|
||||
-ftree-phiprop -ftree-loop-distribution -ftree-loop-distribute-patterns @gol
|
||||
-ftree-loop-ivcanon -ftree-loop-linear -ftree-loop-optimize @gol
|
||||
-ftree-parallelize-loops=@var{n} -ftree-pre -ftree-pta -ftree-reassoc @gol
|
||||
-ftree-sink -ftree-sra -ftree-switch-conversion @gol
|
||||
-ftree-sink -ftree-sra -ftree-switch-conversion -ftree-tail-merge @gol
|
||||
-ftree-ter -ftree-vect-loop-version -ftree-vectorize -ftree-vrp @gol
|
||||
-funit-at-a-time -funroll-all-loops -funroll-loops @gol
|
||||
-funsafe-loop-optimizations -funsafe-math-optimizations -funswitch-loops @gol
|
||||
@ -6137,7 +6137,7 @@ also turns on the following optimization flags:
|
||||
-fsched-interblock -fsched-spec @gol
|
||||
-fschedule-insns -fschedule-insns2 @gol
|
||||
-fstrict-aliasing -fstrict-overflow @gol
|
||||
-ftree-switch-conversion @gol
|
||||
-ftree-switch-conversion -ftree-tail-merge @gol
|
||||
-ftree-pre @gol
|
||||
-ftree-vrp}
|
||||
|
||||
@ -7020,6 +7020,13 @@ Perform conversion of simple initializations in a switch to
|
||||
initializations from a scalar array. This flag is enabled by default
|
||||
at @option{-O2} and higher.
|
||||
|
||||
@item -ftree-tail-merge
|
||||
Look for identical code sequences. When found, replace one with a jump to the
|
||||
other. This optimization is known as tail merging or cross jumping. This flag
|
||||
is enabled by default at @option{-O2} and higher. The run time of this pass can
|
||||
be limited using @option{max-tail-merge-comparisons} parameter and
|
||||
@option{max-tail-merge-iterations} parameter.
|
||||
|
||||
@item -ftree-dce
|
||||
@opindex ftree-dce
|
||||
Perform dead code elimination (DCE) on trees. This flag is enabled by
|
||||
@ -8603,6 +8610,14 @@ This is used to avoid quadratic behavior in hoisting algorithm.
|
||||
The value of 0 will avoid limiting the search, but may slow down compilation
|
||||
of huge functions. The default value is 30.
|
||||
|
||||
@item max-tail-merge-comparisons
|
||||
The maximum amount of similar bbs to compare a bb with. This is used to
|
||||
avoid quadratic behaviour in tree tail merging. The default value is 10.
|
||||
|
||||
@item max-tail-merge-iterations
|
||||
The maximum amount of iterations of the pass over the function. This is used to
|
||||
limit run time in tree tail merging. The default value is 2.
|
||||
|
||||
@item max-unrolled-insns
|
||||
The maximum number of instructions that a loop should have if that loop
|
||||
is unrolled, and if the loop is unrolled, it determines how many times
|
||||
|
@ -484,6 +484,7 @@ static const struct default_options default_options_table[] =
|
||||
{ OPT_LEVELS_2_PLUS, OPT_falign_jumps, NULL, 1 },
|
||||
{ OPT_LEVELS_2_PLUS, OPT_falign_labels, NULL, 1 },
|
||||
{ OPT_LEVELS_2_PLUS, OPT_falign_functions, NULL, 1 },
|
||||
{ OPT_LEVELS_2_PLUS, OPT_ftree_tail_merge, NULL, 1 },
|
||||
|
||||
/* -O3 optimizations. */
|
||||
{ OPT_LEVELS_3_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 },
|
||||
|
@ -921,6 +921,15 @@ DEFPARAM (PARAM_TREE_REASSOC_WIDTH,
|
||||
"reassociated tree. If 0, use the target dependent heuristic.",
|
||||
0, 0, 0)
|
||||
|
||||
DEFPARAM (PARAM_MAX_TAIL_MERGE_COMPARISONS,
|
||||
"max-tail-merge-comparisons",
|
||||
"Maximum amount of similar bbs to compare a bb with",
|
||||
10, 0, 0)
|
||||
|
||||
DEFPARAM (PARAM_MAX_TAIL_MERGE_ITERATIONS,
|
||||
"max-tail-merge-iterations",
|
||||
"Maximum amount of iterations of the pass over a function",
|
||||
2, 0, 0)
|
||||
|
||||
/*
|
||||
Local variables:
|
||||
|
@ -127,6 +127,7 @@ DEFTIMEVAR (TV_TREE_GIMPLIFY , "tree gimplify")
|
||||
DEFTIMEVAR (TV_TREE_EH , "tree eh")
|
||||
DEFTIMEVAR (TV_TREE_CFG , "tree CFG construction")
|
||||
DEFTIMEVAR (TV_TREE_CLEANUP_CFG , "tree CFG cleanup")
|
||||
DEFTIMEVAR (TV_TREE_TAIL_MERGE , "tree tail merge")
|
||||
DEFTIMEVAR (TV_TREE_VRP , "tree VRP")
|
||||
DEFTIMEVAR (TV_TREE_COPY_PROP , "tree copy propagation")
|
||||
DEFTIMEVAR (TV_FIND_REFERENCED_VARS , "tree find ref. vars")
|
||||
|
@ -401,6 +401,7 @@ extern struct gimple_opt_pass pass_call_cdce;
|
||||
extern struct gimple_opt_pass pass_merge_phi;
|
||||
extern struct gimple_opt_pass pass_split_crit_edges;
|
||||
extern struct gimple_opt_pass pass_pre;
|
||||
extern unsigned int tail_merge_optimize (unsigned int);
|
||||
extern struct gimple_opt_pass pass_profile;
|
||||
extern struct gimple_opt_pass pass_strip_predict_hints;
|
||||
extern struct gimple_opt_pass pass_lower_complex_O0;
|
||||
|
@ -4915,7 +4915,6 @@ execute_pre (bool do_fre)
|
||||
statistics_counter_event (cfun, "Constified", pre_stats.constified);
|
||||
|
||||
clear_expression_ids ();
|
||||
free_scc_vn ();
|
||||
if (!do_fre)
|
||||
{
|
||||
remove_dead_inserted_code ();
|
||||
@ -4925,6 +4924,17 @@ execute_pre (bool do_fre)
|
||||
scev_finalize ();
|
||||
fini_pre (do_fre);
|
||||
|
||||
if (!do_fre)
|
||||
/* TODO: tail_merge_optimize may merge all predecessors of a block, in which
|
||||
case we can merge the block with the remaining predecessor of the block.
|
||||
It should either:
|
||||
- call merge_blocks after each tail merge iteration
|
||||
- call merge_blocks after all tail merge iterations
|
||||
- mark TODO_cleanup_cfg when necessary
|
||||
- share the cfg cleanup with fini_pre. */
|
||||
todo |= tail_merge_optimize (todo);
|
||||
free_scc_vn ();
|
||||
|
||||
return todo;
|
||||
}
|
||||
|
||||
|
@ -2820,19 +2820,6 @@ stmt_has_constants (gimple stmt)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Valueize NAME if it is an SSA name, otherwise just return it. */
|
||||
|
||||
static inline tree
|
||||
vn_valueize (tree name)
|
||||
{
|
||||
if (TREE_CODE (name) == SSA_NAME)
|
||||
{
|
||||
tree tem = SSA_VAL (name);
|
||||
return tem == VN_TOP ? name : tem;
|
||||
}
|
||||
return name;
|
||||
}
|
||||
|
||||
/* Replace SSA_NAMES in expr with their value numbers, and return the
|
||||
result.
|
||||
This is performed in place. */
|
||||
|
@ -215,4 +215,18 @@ unsigned int get_constant_value_id (tree);
|
||||
unsigned int get_or_alloc_constant_value_id (tree);
|
||||
bool value_id_constant_p (unsigned int);
|
||||
tree fully_constant_vn_reference_p (vn_reference_t);
|
||||
|
||||
/* Valueize NAME if it is an SSA name, otherwise just return it. */
|
||||
|
||||
static inline tree
|
||||
vn_valueize (tree name)
|
||||
{
|
||||
if (TREE_CODE (name) == SSA_NAME)
|
||||
{
|
||||
tree tem = VN_INFO (name)->valnum;
|
||||
return tem == VN_TOP ? name : tem;
|
||||
}
|
||||
return name;
|
||||
}
|
||||
|
||||
#endif /* TREE_SSA_SCCVN_H */
|
||||
|
1706
gcc/tree-ssa-tail-merge.c
Normal file
1706
gcc/tree-ssa-tail-merge.c
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user