cgraph.c (cgraph_remove_node): Do not release function bodies until full cgraph is built.

* cgraph.c (cgraph_remove_node): Do not release function bodies until
	full cgraph is built.
	* cgraph.h (cgraph_decide_inlining_incrementally): Add early argument.
	* cgraphunit.c (cgraph_finalize_function): Update call of
	cgraph_decide_inlining_incrementally.
	(initialize_inline_failed): Break out of ...
	(cgraph_analyze_function): ... here.
	(rebuild_cgraph_edges): New function.
	(pass_rebuild_cgraph_edges): New pass.
	* common.opt (fearly-inlining): New flag.
	* ipa-inline.c: Include ggc.h
	(cgraph_clone_inlined_nodes): Avoid re-using of original copy
	when cgraph is not fully built.
	(cgraph_decide_inlining_incrementally): Add early mode.
	(cgraph_early_inlining): New function.
	(cgraph_gate_early_inlining): Likewise.
	(pass_early_ipa_inline): New pass.
	* ipa.c (cgraph_postorder): NULLify aux pointer.
	* tree-inline.c (expand_call_inline): Avoid warning early.
	* tree-optimize.c (pass_early_local_passes): New.
	(execute_cleanup_cfg_pre_ipa): New.
	(pass_cleanup_cfg): New.
	(register_dump_files): Fix handling subpasses of IPA pass.
	(init_tree_optimization_passes): Add early passes.
	(execute_ipa_pass_list): Fix handling of subpasses of IPA pass.
	* passes.h (pass_early_tree_profile, pass_rebuild_cgraph_edges,
	pass_early_ipa_inline): New passes.
	* tree-profile.c (do_early_tree_profiling, pass_early_tree_profile): New.

	* invoke.texi: Document early-inlining.

From-SVN: r101369
This commit is contained in:
Jan Hubicka 2005-06-28 04:20:29 +02:00 committed by Jan Hubicka
parent 2aba33dd6f
commit d63db217ae
12 changed files with 308 additions and 26 deletions

View File

@ -1,3 +1,36 @@
2005-06-28 Jan Hubicka <jh@suse.cz>
* cgraph.c (cgraph_remove_node): Do not release function bodies until
full cgraph is built.
* cgraph.h (cgraph_decide_inlining_incrementally): Add early argument.
* cgraphunit.c (cgraph_finalize_function): Update call of
cgraph_decide_inlining_incrementally.
(initialize_inline_failed): Break out of ...
(cgraph_analyze_function): ... here.
(rebuild_cgraph_edges): New function.
(pass_rebuild_cgraph_edges): New pass.
* common.opt (fearly-inlining): New flag.
* ipa-inline.c: Include ggc.h
(cgraph_clone_inlined_nodes): Avoid re-using of original copy
when cgraph is not fully built.
(cgraph_decide_inlining_incrementally): Add early mode.
(cgraph_early_inlining): New function.
(cgraph_gate_early_inlining): Likewise.
(pass_early_ipa_inline): New pass.
* ipa.c (cgraph_postorder): NULLify aux pointer.
* tree-inline.c (expand_call_inline): Avoid warning early.
* tree-optimize.c (pass_early_local_passes): New.
(execute_cleanup_cfg_pre_ipa): New.
(pass_cleanup_cfg): New.
(register_dump_files): Fix handling subpasses of IPA pass.
(init_tree_optimization_passes): Add early passes.
(execute_ipa_pass_list): Fix handling of subpasses of IPA pass.
* passes.h (pass_early_tree_profile, pass_rebuild_cgraph_edges,
pass_early_ipa_inline): New passes.
* tree-profile.c (do_early_tree_profiling, pass_early_tree_profile): New.
* invoke.texi: Document early-inlining.
2005-06-28 Kelley Cook <kcook@gcc.gnu.org>
* doc/include/fdl.texi: Merge in changes from upstream.

View File

@ -473,7 +473,8 @@ cgraph_remove_node (struct cgraph_node *node)
{
struct cgraph_node *n = *slot;
if (!n->next_clone && !n->global.inlined_to
&& (TREE_ASM_WRITTEN (n->decl) || DECL_EXTERNAL (n->decl)))
&& (cgraph_global_info_ready
&& (TREE_ASM_WRITTEN (n->decl) || DECL_EXTERNAL (n->decl))))
kill_body = true;
}

View File

@ -286,7 +286,7 @@ bool cgraph_remove_unreachable_nodes (bool, FILE *);
int cgraph_postorder (struct cgraph_node **);
/* In ipa-inline.c */
void cgraph_decide_inlining_incrementally (struct cgraph_node *);
bool cgraph_decide_inlining_incrementally (struct cgraph_node *, bool);
void cgraph_clone_inlined_nodes (struct cgraph_edge *, bool);
void cgraph_mark_inline_edge (struct cgraph_edge *);
bool cgraph_default_inline_p (struct cgraph_node *);

View File

@ -427,7 +427,7 @@ cgraph_finalize_function (tree decl, bool nested)
if (!flag_unit_at_a_time)
{
cgraph_analyze_function (node);
cgraph_decide_inlining_incrementally (node);
cgraph_decide_inlining_incrementally (node, false);
}
if (decide_is_function_needed (node, decl))
@ -569,6 +569,73 @@ cgraph_create_edges (struct cgraph_node *node, tree body)
visited_nodes = NULL;
}
/* Give initial reasons why inlining would fail. Those gets
either NULLified or usually overwritten by more precise reason
later. */
static void
initialize_inline_failed (struct cgraph_node *node)
{
struct cgraph_edge *e;
for (e = node->callers; e; e = e->next_caller)
{
gcc_assert (!e->callee->global.inlined_to);
gcc_assert (e->inline_failed);
if (node->local.redefined_extern_inline)
e->inline_failed = N_("redefined extern inline functions are not "
"considered for inlining");
else if (!node->local.inlinable)
e->inline_failed = N_("function not inlinable");
else
e->inline_failed = N_("function not considered for inlining");
}
}
/* Rebuild call edges from current function after a passes not aware
of cgraph updating. */
static void
rebuild_cgraph_edges (void)
{
basic_block bb;
struct cgraph_node *node = cgraph_node (current_function_decl);
block_stmt_iterator bsi;
cgraph_node_remove_callees (node);
node->count = ENTRY_BLOCK_PTR->count;
FOR_EACH_BB (bb)
for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
{
tree stmt = bsi_stmt (bsi);
tree call = get_call_expr_in (stmt);
tree decl;
if (call && (decl = get_callee_fndecl (call)))
cgraph_create_edge (node, cgraph_node (decl), stmt,
bb->count,
bb->loop_depth);
}
initialize_inline_failed (node);
gcc_assert (!node->global.inlined_to);
}
struct tree_opt_pass pass_rebuild_cgraph_edges =
{
NULL, /* name */
NULL, /* gate */
rebuild_cgraph_edges, /* execute */
NULL, /* sub */
NULL, /* next */
0, /* static_pass_number */
0, /* tv_id */
PROP_cfg, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
0, /* todo_flags_finish */
0 /* letter */
};
/* Verify cgraph nodes of given cgraph node. */
void
@ -764,7 +831,6 @@ static void
cgraph_analyze_function (struct cgraph_node *node)
{
tree decl = node->decl;
struct cgraph_edge *e;
current_function_decl = decl;
push_cfun (DECL_STRUCT_FUNCTION (decl));
@ -778,16 +844,7 @@ cgraph_analyze_function (struct cgraph_node *node)
if (node->local.inlinable)
node->local.disregard_inline_limits
= lang_hooks.tree_inlining.disregard_inline_limits (decl);
for (e = node->callers; e; e = e->next_caller)
{
if (node->local.redefined_extern_inline)
e->inline_failed = N_("redefined extern inline functions are not "
"considered for inlining");
else if (!node->local.inlinable)
e->inline_failed = N_("function not inlinable");
else
e->inline_failed = N_("function not considered for inlining");
}
initialize_inline_failed (node);
if (flag_really_no_inline && !node->local.disregard_inline_limits)
node->local.inlinable = 0;
/* Inlining characteristics are maintained by the cgraph_mark_inline. */

View File

@ -476,6 +476,10 @@ finline-functions
Common Report Var(flag_inline_functions)
Integrate simple functions into their callers
fearly-inlining
Common Report Var(flag_early_inlining) Init(1)
Perform early inlining
finline-limit-
Common RejectNegative Joined UInteger

View File

@ -300,7 +300,7 @@ Objective-C and Objective-C++ Dialects}.
-fbranch-target-load-optimize2 -fbtr-bb-exclusive @gol
-fcaller-saves -fcprop-registers -fcse-follow-jumps @gol
-fcse-skip-blocks -fcx-limited-range -fdata-sections @gol
-fdelayed-branch -fdelete-null-pointer-checks @gol
-fdelayed-branch -fdelete-null-pointer-checks -fearly-inlining @gol
-fexpensive-optimizations -ffast-math -ffloat-store @gol
-fforce-addr -fforce-mem -ffunction-sections @gol
-fgcse -fgcse-lm -fgcse-sm -fgcse-las -fgcse-after-reload @gol
@ -4450,6 +4450,16 @@ assembler code in its own right.
Enabled at level @option{-O3}.
@item -fearly-inlining
@opindex fearly-inlining
Inline functions marked by @code{always_inline} and functions whose body seems
smaller than the function call overhead early before doing
@option{-fprofile-generate} instrumentation and real inlining pass. Doing so
makes profiling significantly cheaper and usually inlining faster on programs
having large chains of nested wrapper functions.
Enabled by default.
@item -finline-limit=@var{n}
@opindex finline-limit
By default, GCC limits the size of functions that can be inlined. This flag

View File

@ -79,6 +79,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
#include "intl.h"
#include "tree-pass.h"
#include "coverage.h"
#include "ggc.h"
/* Statistics we collect about inlining algorithm. */
static int ncalls_inlined;
@ -120,7 +121,7 @@ cgraph_clone_inlined_nodes (struct cgraph_edge *e, bool duplicate)
if (!e->callee->callers->next_caller
&& (!e->callee->needed || DECL_EXTERNAL (e->callee->decl))
&& duplicate
&& flag_unit_at_a_time)
&& (flag_unit_at_a_time && cgraph_global_info_ready))
{
gcc_assert (!e->callee->global.inlined_to);
if (!DECL_EXTERNAL (e->callee->decl))
@ -870,10 +871,11 @@ cgraph_decide_inlining (void)
/* Decide on the inlining. We do so in the topological order to avoid
expenses on updating data structures. */
void
cgraph_decide_inlining_incrementally (struct cgraph_node *node)
bool
cgraph_decide_inlining_incrementally (struct cgraph_node *node, bool early)
{
struct cgraph_edge *e;
bool inlined = false;
/* First of all look for always inline functions. */
for (e = node->callees; e; e = e->next_callee)
@ -883,7 +885,13 @@ cgraph_decide_inlining_incrementally (struct cgraph_node *node)
/* ??? It is possible that renaming variable removed the function body
in duplicate_decls. See gcc.c-torture/compile/20011119-2.c */
&& DECL_SAVED_TREE (e->callee->decl))
cgraph_mark_inline (e);
{
if (dump_file && early)
fprintf (dump_file, " Early inlining %s into %s\n",
cgraph_node_name (e->callee), cgraph_node_name (node));
cgraph_mark_inline (e);
inlined = true;
}
/* Now do the automatic inlining. */
if (!flag_really_no_inline)
@ -892,15 +900,36 @@ cgraph_decide_inlining_incrementally (struct cgraph_node *node)
&& e->inline_failed
&& !e->callee->local.disregard_inline_limits
&& !cgraph_recursive_inlining_p (node, e->callee, &e->inline_failed)
&& (!early
|| (cgraph_estimate_size_after_inlining (1, e->caller, node)
<= e->caller->global.insns))
&& cgraph_check_inline_limits (node, e->callee, &e->inline_failed)
&& DECL_SAVED_TREE (e->callee->decl))
{
if (cgraph_default_inline_p (e->callee))
cgraph_mark_inline (e);
else
{
if (dump_file && early)
fprintf (dump_file, " Early inlining %s into %s\n",
cgraph_node_name (e->callee), cgraph_node_name (node));
cgraph_mark_inline (e);
inlined = true;
}
else if (!early)
e->inline_failed
= N_("--param max-inline-insns-single limit reached");
}
if (early && inlined)
{
push_cfun (DECL_STRUCT_FUNCTION (node->decl));
tree_register_cfg_hooks ();
current_function_decl = node->decl;
optimize_inline_calls (current_function_decl);
node->local.self_insns = node->global.insns;
current_function_decl = NULL;
pop_cfun ();
ggc_collect ();
}
return inlined;
}
/* When inlining shall be performed. */
@ -920,7 +949,67 @@ struct tree_opt_pass pass_ipa_inline =
0, /* static_pass_number */
TV_INTEGRATION, /* tv_id */
0, /* properties_required */
PROP_trees, /* properties_provided */
PROP_cfg, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
TODO_dump_cgraph | TODO_dump_func, /* todo_flags_finish */
0 /* letter */
};
/* Do inlining of small functions. Doing so early helps profiling and other
passes to be somewhat more effective and avoids some code duplication in
later real inlining pass for testcases with very many function calls. */
static void
cgraph_early_inlining (void)
{
struct cgraph_node *node;
int nnodes;
struct cgraph_node **order =
xcalloc (cgraph_n_nodes, sizeof (struct cgraph_node *));
int i;
if (sorrycount || errorcount)
return;
#ifdef ENABLE_CHECKING
for (node = cgraph_nodes; node; node = node->next)
gcc_assert (!node->aux);
#endif
nnodes = cgraph_postorder (order);
for (i = nnodes - 1; i >= 0; i--)
{
node = order[i];
if (node->analyzed && node->local.inlinable
&& (node->needed || node->reachable)
&& node->callers)
cgraph_decide_inlining_incrementally (node, true);
}
cgraph_remove_unreachable_nodes (true, dump_file);
#ifdef ENABLE_CHECKING
for (node = cgraph_nodes; node; node = node->next)
gcc_assert (!node->global.inlined_to);
#endif
free (order);
}
/* When inlining shall be performed. */
static bool
cgraph_gate_early_inlining (void)
{
return flag_inline_trees && flag_early_inlining;
}
struct tree_opt_pass pass_early_ipa_inline =
{
"einline", /* name */
cgraph_gate_early_inlining, /* gate */
cgraph_early_inlining, /* execute */
NULL, /* sub */
NULL, /* next */
0, /* static_pass_number */
TV_INTEGRATION, /* tv_id */
0, /* properties_required */
PROP_cfg, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
TODO_dump_cgraph | TODO_dump_func, /* todo_flags_finish */

View File

@ -83,6 +83,8 @@ cgraph_postorder (struct cgraph_node **order)
}
}
free (stack);
for (node = cgraph_nodes; node; node = node->next)
node->aux = NULL;
return order_pos;
}

View File

@ -1969,7 +1969,9 @@ expand_call_inline (basic_block bb, tree stmt, tree *tp, void *data)
else if (warn_inline && DECL_DECLARED_INLINE_P (fn)
&& !DECL_IN_SYSTEM_HEADER (fn)
&& strlen (reason)
&& !lookup_attribute ("noinline", DECL_ATTRIBUTES (fn)))
&& !lookup_attribute ("noinline", DECL_ATTRIBUTES (fn))
/* Avoid warnings during early inline pass. */
&& (!flag_unit_at_a_time || cgraph_global_info_ready))
{
warning (0, "%Jinlining failed in call to %qF: %s", fn, fn, reason);
warning (0, "called from here");

View File

@ -55,7 +55,7 @@ int dump_flags;
bool in_gimple_form;
/* The root of the compilation pass tree, once constructed. */
static struct tree_opt_pass *all_passes, *all_ipa_passes, * all_lowering_passes;
static struct tree_opt_pass *all_passes, *all_ipa_passes, *all_lowering_passes;
/* Gate: execute, or not, all of the non-trivial optimizations. */
@ -84,6 +84,52 @@ static struct tree_opt_pass pass_all_optimizations =
0 /* letter */
};
static struct tree_opt_pass pass_early_local_passes =
{
NULL, /* name */
gate_all_optimizations, /* gate */
NULL, /* execute */
NULL, /* sub */
NULL, /* next */
0, /* static_pass_number */
0, /* tv_id */
0, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
0, /* todo_flags_finish */
0 /* letter */
};
/* Pass: cleanup the CFG just before expanding trees to RTL.
This is just a round of label cleanups and case node grouping
because after the tree optimizers have run such cleanups may
be necessary. */
static void
execute_cleanup_cfg_pre_ipa (void)
{
cleanup_tree_cfg ();
}
static struct tree_opt_pass pass_cleanup_cfg =
{
"cleanup_cfg", /* name */
NULL, /* gate */
execute_cleanup_cfg_pre_ipa, /* execute */
NULL, /* sub */
NULL, /* next */
0, /* static_pass_number */
0, /* tv_id */
PROP_cfg, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
TODO_dump_func, /* todo_flags_finish */
0 /* letter */
};
/* Pass: cleanup the CFG just before expanding trees to RTL.
This is just a round of label cleanups and case node grouping
because after the tree optimizers have run such cleanups may
@ -322,7 +368,7 @@ register_dump_files (struct tree_opt_pass *pass, bool ipa, int properties)
n++;
if (pass->sub)
new_properties = register_dump_files (pass->sub, ipa, new_properties);
new_properties = register_dump_files (pass->sub, false, new_properties);
/* If we have a gate, combine the properties that we could have with
and without the pass being examined. */
@ -390,6 +436,8 @@ init_tree_optimization_passes (void)
#define NEXT_PASS(PASS) (p = next_pass_1 (p, &PASS))
/* Intraprocedural optimization passes. */
p = &all_ipa_passes;
NEXT_PASS (pass_early_ipa_inline);
NEXT_PASS (pass_early_local_passes);
NEXT_PASS (pass_ipa_inline);
*p = NULL;
@ -405,7 +453,13 @@ init_tree_optimization_passes (void)
NEXT_PASS (pass_lower_complex_O0);
NEXT_PASS (pass_lower_vector);
NEXT_PASS (pass_warn_function_return);
NEXT_PASS (pass_early_tree_profile);
*p = NULL;
p = &pass_early_local_passes.sub;
NEXT_PASS (pass_tree_profile);
NEXT_PASS (pass_cleanup_cfg);
NEXT_PASS (pass_rebuild_cgraph_edges);
*p = NULL;
p = &all_passes;
@ -716,7 +770,7 @@ execute_ipa_pass_list (struct tree_opt_pass *pass)
{
push_cfun (DECL_STRUCT_FUNCTION (node->decl));
current_function_decl = node->decl;
execute_pass_list (pass);
execute_pass_list (pass->sub);
free_dominance_info (CDI_DOMINATORS);
free_dominance_info (CDI_POST_DOMINATORS);
current_function_decl = NULL;

View File

@ -164,6 +164,7 @@ extern struct tree_opt_pass pass_lower_cf;
extern struct tree_opt_pass pass_lower_eh;
extern struct tree_opt_pass pass_build_cfg;
extern struct tree_opt_pass pass_tree_profile;
extern struct tree_opt_pass pass_early_tree_profile;
extern struct tree_opt_pass pass_referenced_vars;
extern struct tree_opt_pass pass_sra;
extern struct tree_opt_pass pass_tail_recursion;
@ -227,8 +228,10 @@ extern struct tree_opt_pass pass_del_pta;
extern struct tree_opt_pass pass_uncprop;
extern struct tree_opt_pass pass_return_slot;
extern struct tree_opt_pass pass_reassoc;
extern struct tree_opt_pass pass_rebuild_cgraph_edges;
/* IPA Passes */
extern struct tree_opt_pass pass_ipa_inline;
extern struct tree_opt_pass pass_early_ipa_inline;
#endif /* GCC_TREE_PASS_H */

View File

@ -273,6 +273,33 @@ struct tree_opt_pass pass_tree_profile =
0 /* letter */
};
/* Return 1 if tree-based profiling is in effect, else 0.
If it is, set up hooks for tree-based profiling.
Gate for pass_tree_profile. */
static bool
do_early_tree_profiling (void)
{
return (do_tree_profiling () && (!flag_unit_at_a_time || !optimize));
}
struct tree_opt_pass pass_early_tree_profile =
{
"early_tree_profile", /* name */
do_early_tree_profiling, /* gate */
tree_profiling, /* execute */
NULL, /* sub */
NULL, /* next */
0, /* static_pass_number */
TV_BRANCH_PROB, /* tv_id */
PROP_gimple_leh | PROP_cfg, /* properties_required */
PROP_gimple_leh | PROP_cfg, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
TODO_verify_stmts, /* todo_flags_finish */
0 /* letter */
};
struct profile_hooks tree_profile_hooks =
{
tree_init_edge_profiler, /* init_edge_profiler */