re PR middle-end/43864 (Same basic blocks should be merged)

2011-09-27  Tom de Vries  <tom@codesourcery.com>

	PR middle-end/43864
	* tree-ssa-tail-merge.c: New file.
	(struct same_succ_def): Define.
	(same_succ, const_same_succ): New typedef.
	(struct bb_cluster_def): Define.
	(bb_cluster, const_bb_cluster): New typedef.
	(struct aux_bb_info): Define.
	(BB_SIZE, BB_SAME_SUCC, BB_CLUSTER, BB_VOP_AT_EXIT): Define.
	(gvn_uses_equal): New function.
	(same_succ_print, same_succ_print_traverse, update_dep_bb)
	(stmt_update_dep_bb, local_def, same_succ_hash)
	(inverse_flags, same_succ_equal, same_succ_alloc, same_succ_delete)
	(same_succ_reset): New function.
	(same_succ_htab, same_succ_edge_flags)
	(deleted_bbs, deleted_bb_preds): New var.
	(debug_same_succ): New function.
	(worklist): New var.
	(print_worklist, add_to_worklist, find_same_succ_bb, find_same_succ)
	(init_worklist, delete_worklist, delete_basic_block_same_succ)
	(same_succ_flush_bbs, purge_bbs, update_worklist): New function.
	(print_cluster, debug_cluster, update_rep_bb)
	(add_bb_to_cluster, new_cluster, delete_cluster): New function.
	(all_clusters): New var.
	(alloc_cluster_vectors, reset_cluster_vectors, delete_cluster_vectors)
	(merge_clusters, set_cluster): New function.
	(gimple_equal_p, gsi_advance_bw_nondebug_nonlocal, find_duplicate)
	(same_phi_alternatives_1, same_phi_alternatives, bb_has_non_vop_phi)
	(deps_ok_for_redirect_from_bb_to_bb, deps_ok_for_redirect)
	(find_clusters_1, find_clusters): New function.
	(update_vuses, vop_phi, vop_at_entry, replace_block_by): New function.
	(update_bbs): New var.
	(apply_clusters): New function.
	(update_debug_stmt, update_debug_stmts): New function.
	(tail_merge_optimize): New function.
	tree-pass.h (tail_merge_optimize): Declare.
	* tree-ssa-pre.c (execute_pre): Use tail_merge_optimize.
	* Makefile.in (OBJS-common): Add tree-ssa-tail-merge.o.
	(tree-ssa-tail-merge.o): New rule.
	* opts.c (default_options_table): Set OPT_ftree_tail_merge by default at
	OPT_LEVELS_2_PLUS.
	* tree-ssa-sccvn.c (vn_valueize): Move to ...
	* tree-ssa-sccvn.h (vn_valueize): Here.
	* timevar.def (TV_TREE_TAIL_MERGE): New timevar.
	* common.opt (ftree-tail-merge): New switch.
	* params.def (PARAM_MAX_TAIL_MERGE_COMPARISONS)
	(PARAM_MAX_TAIL_MERGE_ITERATIONS): New parameter.
	* doc/invoke.texi (Optimization Options, -O2): Add -ftree-tail-merge.
	(-ftree-tail-merge, max-tail-merge-comparisons)
	(max-tail-merge-iterations): New item.

From-SVN: r179275
This commit is contained in:
Tom de Vries 2011-09-27 16:10:42 +00:00 committed by Tom de Vries
parent 99e299a8c7
commit c9e9316807
12 changed files with 1824 additions and 16 deletions

View File

@ -1,3 +1,55 @@
2011-09-27 Tom de Vries <tom@codesourcery.com>
PR middle-end/43864
* tree-ssa-tail-merge.c: New file.
(struct same_succ_def): Define.
(same_succ, const_same_succ): New typedef.
(struct bb_cluster_def): Define.
(bb_cluster, const_bb_cluster): New typedef.
(struct aux_bb_info): Define.
(BB_SIZE, BB_SAME_SUCC, BB_CLUSTER, BB_VOP_AT_EXIT): Define.
(gvn_uses_equal): New function.
(same_succ_print, same_succ_print_traverse, update_dep_bb)
(stmt_update_dep_bb, local_def, same_succ_hash)
(inverse_flags, same_succ_equal, same_succ_alloc, same_succ_delete)
(same_succ_reset): New function.
(same_succ_htab, same_succ_edge_flags)
(deleted_bbs, deleted_bb_preds): New var.
(debug_same_succ): New function.
(worklist): New var.
(print_worklist, add_to_worklist, find_same_succ_bb, find_same_succ)
(init_worklist, delete_worklist, delete_basic_block_same_succ)
(same_succ_flush_bbs, purge_bbs, update_worklist): New function.
(print_cluster, debug_cluster, update_rep_bb)
(add_bb_to_cluster, new_cluster, delete_cluster): New function.
(all_clusters): New var.
(alloc_cluster_vectors, reset_cluster_vectors, delete_cluster_vectors)
(merge_clusters, set_cluster): New function.
(gimple_equal_p, gsi_advance_bw_nondebug_nonlocal, find_duplicate)
(same_phi_alternatives_1, same_phi_alternatives, bb_has_non_vop_phi)
(deps_ok_for_redirect_from_bb_to_bb, deps_ok_for_redirect)
(find_clusters_1, find_clusters): New function.
(update_vuses, vop_phi, vop_at_entry, replace_block_by): New function.
(update_bbs): New var.
(apply_clusters): New function.
(update_debug_stmt, update_debug_stmts): New function.
(tail_merge_optimize): New function.
tree-pass.h (tail_merge_optimize): Declare.
* tree-ssa-pre.c (execute_pre): Use tail_merge_optimize.
* Makefile.in (OBJS-common): Add tree-ssa-tail-merge.o.
(tree-ssa-tail-merge.o): New rule.
* opts.c (default_options_table): Set OPT_ftree_tail_merge by default at
OPT_LEVELS_2_PLUS.
* tree-ssa-sccvn.c (vn_valueize): Move to ...
* tree-ssa-sccvn.h (vn_valueize): Here.
* timevar.def (TV_TREE_TAIL_MERGE): New timevar.
* common.opt (ftree-tail-merge): New switch.
* params.def (PARAM_MAX_TAIL_MERGE_COMPARISONS)
(PARAM_MAX_TAIL_MERGE_ITERATIONS): New parameter.
* doc/invoke.texi (Optimization Options, -O2): Add -ftree-tail-merge.
(-ftree-tail-merge, max-tail-merge-comparisons)
(max-tail-merge-iterations): New item.
2011-09-27 Jan Hubicka <jh@suse.cz>
* ipa-inline-analysis.c (MAX_TIME): Reduce to avoid overflows.

View File

@ -1476,6 +1476,7 @@ OBJS = \
tree-ssa-sccvn.o \
tree-ssa-sink.o \
tree-ssa-structalias.o \
tree-ssa-tail-merge.o \
tree-ssa-ter.o \
tree-ssa-threadedge.o \
tree-ssa-threadupdate.o \
@ -2382,6 +2383,13 @@ stor-layout.o : stor-layout.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
$(TREE_H) $(PARAMS_H) $(FLAGS_H) $(FUNCTION_H) $(EXPR_H) output.h $(RTL_H) \
$(GGC_H) $(TM_P_H) $(TARGET_H) langhooks.h $(REGS_H) gt-stor-layout.h \
$(DIAGNOSTIC_CORE_H) $(CGRAPH_H) $(TREE_INLINE_H) $(TREE_DUMP_H) $(GIMPLE_H)
tree-ssa-tail-merge.o: tree-ssa-tail-merge.c \
$(SYSTEM_H) $(CONFIG_H) coretypes.h $(TM_H) $(BITMAP_H) \
$(FLAGS_H) $(TM_P_H) $(BASIC_BLOCK_H) output.h \
$(TREE_H) $(TREE_FLOW_H) $(TREE_INLINE_H) \
$(GIMPLE_H) $(FUNCTION_H) \
$(TIMEVAR_H) tree-ssa-sccvn.h \
$(CGRAPH_H) gimple-pretty-print.h tree-pretty-print.h $(PARAMS_H)
tree-ssa-structalias.o: tree-ssa-structalias.c \
$(SYSTEM_H) $(CONFIG_H) coretypes.h $(TM_H) $(GGC_H) $(OBSTACK_H) $(BITMAP_H) \
$(FLAGS_H) $(TM_P_H) $(BASIC_BLOCK_H) output.h \

View File

@ -1945,6 +1945,10 @@ ftree-dominator-opts
Common Report Var(flag_tree_dom) Optimization
Enable dominator optimizations
ftree-tail-merge
Common Report Var(flag_tree_tail_merge) Optimization
Enable tail merging on trees
ftree-dse
Common Report Var(flag_tree_dse) Optimization
Enable dead store elimination

View File

@ -407,7 +407,7 @@ Objective-C and Objective-C++ Dialects}.
-ftree-phiprop -ftree-loop-distribution -ftree-loop-distribute-patterns @gol
-ftree-loop-ivcanon -ftree-loop-linear -ftree-loop-optimize @gol
-ftree-parallelize-loops=@var{n} -ftree-pre -ftree-pta -ftree-reassoc @gol
-ftree-sink -ftree-sra -ftree-switch-conversion @gol
-ftree-sink -ftree-sra -ftree-switch-conversion -ftree-tail-merge @gol
-ftree-ter -ftree-vect-loop-version -ftree-vectorize -ftree-vrp @gol
-funit-at-a-time -funroll-all-loops -funroll-loops @gol
-funsafe-loop-optimizations -funsafe-math-optimizations -funswitch-loops @gol
@ -6137,7 +6137,7 @@ also turns on the following optimization flags:
-fsched-interblock -fsched-spec @gol
-fschedule-insns -fschedule-insns2 @gol
-fstrict-aliasing -fstrict-overflow @gol
-ftree-switch-conversion @gol
-ftree-switch-conversion -ftree-tail-merge @gol
-ftree-pre @gol
-ftree-vrp}
@ -7020,6 +7020,13 @@ Perform conversion of simple initializations in a switch to
initializations from a scalar array. This flag is enabled by default
at @option{-O2} and higher.
@item -ftree-tail-merge
Look for identical code sequences. When found, replace one with a jump to the
other. This optimization is known as tail merging or cross jumping. This flag
is enabled by default at @option{-O2} and higher. The run time of this pass can
be limited using @option{max-tail-merge-comparisons} parameter and
@option{max-tail-merge-iterations} parameter.
@item -ftree-dce
@opindex ftree-dce
Perform dead code elimination (DCE) on trees. This flag is enabled by
@ -8603,6 +8610,14 @@ This is used to avoid quadratic behavior in hoisting algorithm.
The value of 0 will avoid limiting the search, but may slow down compilation
of huge functions. The default value is 30.
@item max-tail-merge-comparisons
The maximum amount of similar bbs to compare a bb with. This is used to
avoid quadratic behaviour in tree tail merging. The default value is 10.
@item max-tail-merge-iterations
The maximum amount of iterations of the pass over the function. This is used to
limit run time in tree tail merging. The default value is 2.
@item max-unrolled-insns
The maximum number of instructions that a loop should have if that loop
is unrolled, and if the loop is unrolled, it determines how many times

View File

@ -484,6 +484,7 @@ static const struct default_options default_options_table[] =
{ OPT_LEVELS_2_PLUS, OPT_falign_jumps, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_falign_labels, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_falign_functions, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_ftree_tail_merge, NULL, 1 },
/* -O3 optimizations. */
{ OPT_LEVELS_3_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 },

View File

@ -921,6 +921,15 @@ DEFPARAM (PARAM_TREE_REASSOC_WIDTH,
"reassociated tree. If 0, use the target dependent heuristic.",
0, 0, 0)
DEFPARAM (PARAM_MAX_TAIL_MERGE_COMPARISONS,
"max-tail-merge-comparisons",
"Maximum amount of similar bbs to compare a bb with",
10, 0, 0)
DEFPARAM (PARAM_MAX_TAIL_MERGE_ITERATIONS,
"max-tail-merge-iterations",
"Maximum amount of iterations of the pass over a function",
2, 0, 0)
/*
Local variables:

View File

@ -127,6 +127,7 @@ DEFTIMEVAR (TV_TREE_GIMPLIFY , "tree gimplify")
DEFTIMEVAR (TV_TREE_EH , "tree eh")
DEFTIMEVAR (TV_TREE_CFG , "tree CFG construction")
DEFTIMEVAR (TV_TREE_CLEANUP_CFG , "tree CFG cleanup")
DEFTIMEVAR (TV_TREE_TAIL_MERGE , "tree tail merge")
DEFTIMEVAR (TV_TREE_VRP , "tree VRP")
DEFTIMEVAR (TV_TREE_COPY_PROP , "tree copy propagation")
DEFTIMEVAR (TV_FIND_REFERENCED_VARS , "tree find ref. vars")

View File

@ -401,6 +401,7 @@ extern struct gimple_opt_pass pass_call_cdce;
extern struct gimple_opt_pass pass_merge_phi;
extern struct gimple_opt_pass pass_split_crit_edges;
extern struct gimple_opt_pass pass_pre;
extern unsigned int tail_merge_optimize (unsigned int);
extern struct gimple_opt_pass pass_profile;
extern struct gimple_opt_pass pass_strip_predict_hints;
extern struct gimple_opt_pass pass_lower_complex_O0;

View File

@ -4915,7 +4915,6 @@ execute_pre (bool do_fre)
statistics_counter_event (cfun, "Constified", pre_stats.constified);
clear_expression_ids ();
free_scc_vn ();
if (!do_fre)
{
remove_dead_inserted_code ();
@ -4925,6 +4924,17 @@ execute_pre (bool do_fre)
scev_finalize ();
fini_pre (do_fre);
if (!do_fre)
/* TODO: tail_merge_optimize may merge all predecessors of a block, in which
case we can merge the block with the remaining predecessor of the block.
It should either:
- call merge_blocks after each tail merge iteration
- call merge_blocks after all tail merge iterations
- mark TODO_cleanup_cfg when necessary
- share the cfg cleanup with fini_pre. */
todo |= tail_merge_optimize (todo);
free_scc_vn ();
return todo;
}

View File

@ -2820,19 +2820,6 @@ stmt_has_constants (gimple stmt)
return false;
}
/* Valueize NAME if it is an SSA name, otherwise just return it. */
static inline tree
vn_valueize (tree name)
{
if (TREE_CODE (name) == SSA_NAME)
{
tree tem = SSA_VAL (name);
return tem == VN_TOP ? name : tem;
}
return name;
}
/* Replace SSA_NAMES in expr with their value numbers, and return the
result.
This is performed in place. */

View File

@ -215,4 +215,18 @@ unsigned int get_constant_value_id (tree);
unsigned int get_or_alloc_constant_value_id (tree);
bool value_id_constant_p (unsigned int);
tree fully_constant_vn_reference_p (vn_reference_t);
/* Valueize NAME if it is an SSA name, otherwise just return it. */
static inline tree
vn_valueize (tree name)
{
if (TREE_CODE (name) == SSA_NAME)
{
tree tem = VN_INFO (name)->valnum;
return tem == VN_TOP ? name : tem;
}
return name;
}
#endif /* TREE_SSA_SCCVN_H */

1706
gcc/tree-ssa-tail-merge.c Normal file

File diff suppressed because it is too large Load Diff