diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 8015be9133e..3a8ac5440a8 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,51 @@ +2009-10-07 Jan Hubicka + + * lto-symtab.c (lto_cgraph_replace_node): Assert that inline clones has + no address taken. + * cgraph.c (cgraph_mark_needed_node): Assert that inline clones are + never needed. + (cgraph_clone_node): Clear externally_visible flag for clones. + * cgraph.h (cgraph_only_called_directly_p, + cgraph_can_remove_if_no_direct_calls_p): New predicates. + * tree-pass.h (pass_ipa_whole_program_visibility): Declare. + * ipa-cp.c (ipcp_cloning_candidate_p): Use new predicate. + (ipcp_initialize_node_lattices, ipcp_estimate_growth, + ipcp_insert_stage): Likwise. + * cgraphunit.c (cgraph_decide_is_function_needed): Do not compute + externally_visible flag. + (verify_cgraph_node): Verify that inline clones look right. + (process_function_and_variable_attributes): Do not set + externally_visible flags. + (ipa_passes): Avoid executing small_ipa_passes at LTO stage; they've + been already run. + * lto-cgraph.c (lto_output_node): Assert that inline clones are not + boundaries. + * ipa-inline.c (cgraph_clone_inlined_nodes): Use new predicates; + clear externally_visible when turning into inline clones + (cgraph_mark_inline_edge): Use new predicates. + (cgraph_estimate_growth): Likewise. + (cgraph_decide_inlining): Likewise. + * ipa.c (cgraph_postorder): Likewise. + (cgraph_remove_unreachable_nodes): Likewise; sanity check + that inline clones are not needed. + (cgraph_externally_visible_p): New predicate. + (function_and_variable_visibility): Add whole_program parameter; + always set externally_visible flag; handle COMDAT function + privatization. + (local_function_and_variable_visibility): New function. + (gate_whole_program_function_and_variable_visibility): New function. + (whole_program_function_and_variable_visibility): New function. + (pass_ipa_whole_program_visibility): New function. + * passes.c (init_optimization_passes): Add whole program visibility + pass. + (do_per_function_toporder, function_called_by_processed_nodes_p): Do + not care about needed/reachable flags. + * varpool.c: Include flags.h + (decide_is_variable_needed): When doing LTO assume whole-program mode. + (varpool_finalize_decl): When we are in LTO read-back, all variables + are analyzed. + (varpool_analyze_pending_decls): Skip analyzis of analyzed vars. + 2009-10-07 Andreas Krebbel * config/s390/tpf.h (TARGET_DEFAULT): Remove MASK_HARD_FLOAT and diff --git a/gcc/cgraph.c b/gcc/cgraph.c index 8dcf4e43edb..bc8f1015e62 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -1359,6 +1359,7 @@ void cgraph_mark_needed_node (struct cgraph_node *node) { node->needed = 1; + gcc_assert (!node->global.inlined_to); cgraph_mark_reachable_node (node); } @@ -1682,6 +1683,7 @@ cgraph_clone_node (struct cgraph_node *n, gcov_type count, int freq, } new_node->analyzed = n->analyzed; new_node->local = n->local; + new_node->local.externally_visible = false; new_node->global = n->global; new_node->rtl = n->rtl; new_node->count = count; diff --git a/gcc/cgraph.h b/gcc/cgraph.h index 845897b2841..438cd1e6fae 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -658,6 +658,26 @@ struct GTY(()) constant_descriptor_tree { hashval_t hash; }; +/* Return true when function NODE is only called directly. + i.e. it is not externally visible, address was not taken and + it is not used in any other non-standard way. */ + +static inline bool +cgraph_only_called_directly_p (struct cgraph_node *node) +{ + return !node->needed && !node->local.externally_visible; +} + +/* Return true when function NODE can be removed from callgraph + if all direct calls are eliminated. */ + +static inline bool +cgraph_can_remove_if_no_direct_calls_p (struct cgraph_node *node) +{ + return (!node->needed + && (DECL_COMDAT (node->decl) || !node->local.externally_visible)); +} + /* Constant pool accessor function. */ htab_t constant_pool_htab (void); diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c index 9a4f63d3aca..9a97bef2962 100644 --- a/gcc/cgraphunit.c +++ b/gcc/cgraphunit.c @@ -316,13 +316,6 @@ cgraph_build_cdtor_fns (void) bool cgraph_decide_is_function_needed (struct cgraph_node *node, tree decl) { - if (MAIN_NAME_P (DECL_NAME (decl)) - && TREE_PUBLIC (decl)) - { - node->local.externally_visible = true; - return true; - } - /* If the user told us it is used, then it must be so. */ if (node->local.externally_visible) return true; @@ -360,7 +353,9 @@ cgraph_decide_is_function_needed (struct cgraph_node *node, tree decl) || (!optimize && !node->local.disregard_inline_limits && !DECL_DECLARED_INLINE_P (decl) && !node->origin)) - && !flag_whole_program) + && !flag_whole_program + && !flag_lto + && !flag_whopr) && !DECL_COMDAT (decl) && !DECL_EXTERNAL (decl)) return true; @@ -593,6 +588,21 @@ verify_cgraph_node (struct cgraph_node *node) error ("Execution count is negative"); error_found = true; } + if (node->global.inlined_to && node->local.externally_visible) + { + error ("Externally visible inline clone"); + error_found = true; + } + if (node->global.inlined_to && node->address_taken) + { + error ("Inline clone with address taken"); + error_found = true; + } + if (node->global.inlined_to && node->needed) + { + error ("Inline clone is needed"); + error_found = true; + } for (e = node->callers; e; e = e->next_caller) { if (e->count < 0) @@ -864,12 +874,8 @@ process_function_and_variable_attributes (struct cgraph_node *first, warning_at (DECL_SOURCE_LOCATION (node->decl), OPT_Wattributes, "%" " attribute have effect only on public objects"); - else - { - if (node->local.finalized) - cgraph_mark_needed_node (node); - node->local.externally_visible = true; - } + else if (node->local.finalized) + cgraph_mark_needed_node (node); } } for (vnode = varpool_nodes; vnode != first_var; vnode = vnode->next) @@ -887,12 +893,8 @@ process_function_and_variable_attributes (struct cgraph_node *first, warning_at (DECL_SOURCE_LOCATION (vnode->decl), OPT_Wattributes, "%" " attribute have effect only on public objects"); - else - { - if (vnode->finalized) - varpool_mark_needed_node (vnode); - vnode->externally_visible = true; - } + else if (vnode->finalized) + varpool_mark_needed_node (vnode); } } } @@ -1355,7 +1357,9 @@ ipa_passes (void) current_function_decl = NULL; gimple_register_cfg_hooks (); bitmap_obstack_initialize (NULL); - execute_ipa_pass_list (all_small_ipa_passes); + + if (!in_lto_p) + execute_ipa_pass_list (all_small_ipa_passes); /* If pass_all_early_optimizations was not scheduled, the state of the cgraph will not be properly updated. Update it now. */ diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c index a974dd0aab3..59a051915f6 100644 --- a/gcc/ipa-cp.c +++ b/gcc/ipa-cp.c @@ -442,7 +442,7 @@ ipcp_cloning_candidate_p (struct cgraph_node *node) FIXME: in future we should clone such functions when they are called with different constants, but current ipcp implementation is not good on this. */ - if (!node->needed || !node->analyzed) + if (cgraph_only_called_directly_p (node) || !node->analyzed) return false; if (cgraph_function_body_availability (node) <= AVAIL_OVERWRITABLE) @@ -536,7 +536,7 @@ ipcp_initialize_node_lattices (struct cgraph_node *node) if (ipa_is_called_with_var_arguments (info)) type = IPA_BOTTOM; - else if (!node->needed) + else if (cgraph_only_called_directly_p (node)) type = IPA_TOP; /* When cloning is allowed, we can assume that externally visible functions are not called. We will compensate this by cloning later. */ @@ -954,7 +954,7 @@ ipcp_estimate_growth (struct cgraph_node *node) struct cgraph_edge *cs; int redirectable_node_callers = 0; int removable_args = 0; - bool need_original = node->needed; + bool need_original = !cgraph_only_called_directly_p (node); struct ipa_node_params *info; int i, count; int growth; @@ -1143,7 +1143,7 @@ ipcp_insert_stage (void) for (cs = node->callers; cs != NULL; cs = cs->next_caller) if (cs->caller == node || ipcp_need_redirect_p (cs)) break; - if (!cs && !node->needed) + if (!cs && cgraph_only_called_directly_p (node)) bitmap_set_bit (dead_nodes, node->uid); info = IPA_NODE_REF (node); diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c index 0a02ae1770a..18e440a60fe 100644 --- a/gcc/ipa-inline.c +++ b/gcc/ipa-inline.c @@ -223,7 +223,7 @@ cgraph_clone_inlined_nodes (struct cgraph_edge *e, bool duplicate, /* We may eliminate the need for out-of-line copy to be output. In that case just go ahead and re-use it. */ if (!e->callee->callers->next_caller - && !e->callee->needed + && cgraph_can_remove_if_no_direct_calls_p (e->callee) && !cgraph_new_nodes) { gcc_assert (!e->callee->global.inlined_to); @@ -233,6 +233,7 @@ cgraph_clone_inlined_nodes (struct cgraph_edge *e, bool duplicate, nfunctions_inlined++; } duplicate = false; + e->callee->local.externally_visible = false; } else { @@ -286,7 +287,7 @@ cgraph_mark_inline_edge (struct cgraph_edge *e, bool update_original, e->callee->global.inlined = true; if (e->callee->callers->next_caller - || e->callee->needed) + || !cgraph_can_remove_if_no_direct_calls_p (e->callee)) duplicate = true; cgraph_clone_inlined_nodes (e, true, update_original); @@ -368,7 +369,8 @@ cgraph_estimate_growth (struct cgraph_node *node) we decide to not inline for different reasons, but it is not big deal as in that case we will keep the body around, but we will also avoid some inlining. */ - if (!node->needed && !DECL_EXTERNAL (node->decl) && !self_recursive) + if (cgraph_only_called_directly_p (node) + && !DECL_EXTERNAL (node->decl) && !self_recursive) growth -= node->global.size; node->global.estimated_growth = growth; @@ -1226,7 +1228,7 @@ cgraph_decide_inlining (void) if (node->callers && !node->callers->next_caller - && !node->needed + && cgraph_only_called_directly_p (node) && node->local.inlinable && node->callers->inline_failed && node->callers->caller != node diff --git a/gcc/ipa.c b/gcc/ipa.c index 9204caae77b..50eade08cdd 100644 --- a/gcc/ipa.c +++ b/gcc/ipa.c @@ -52,7 +52,9 @@ cgraph_postorder (struct cgraph_node **order) for (pass = 0; pass < 2; pass++) for (node = cgraph_nodes; node; node = node->next) if (!node->aux - && (pass || (node->needed && !node->address_taken))) + && (pass + || (!cgraph_only_called_directly_p (node) + && !node->address_taken))) { node2 = node; if (!node->callers) @@ -132,11 +134,12 @@ cgraph_remove_unreachable_nodes (bool before_inlining_p, FILE *file) gcc_assert (!node->aux); #endif for (node = cgraph_nodes; node; node = node->next) - if (node->needed && !node->global.inlined_to + if (!cgraph_can_remove_if_no_direct_calls_p (node) && ((!DECL_EXTERNAL (node->decl)) || !node->analyzed || before_inlining_p)) { + gcc_assert (!node->global.inlined_to); node->aux = first; first = node; } @@ -248,6 +251,26 @@ cgraph_remove_unreachable_nodes (bool before_inlining_p, FILE *file) return changed; } +static bool +cgraph_externally_visible_p (struct cgraph_node *node, bool whole_program) +{ + if (!DECL_COMDAT (node->decl) + && (!TREE_PUBLIC (node->decl) || DECL_EXTERNAL (node->decl))) + return false; + if (!whole_program) + return true; + /* COMDAT functions must be shared only if they have address taken, + otherwise we can produce our own private implementation with + -fwhole-program. */ + if (DECL_COMDAT (node->decl) && (node->address_taken || !node->analyzed)) + return true; + if (MAIN_NAME_P (DECL_NAME (node->decl))) + return true; + if (lookup_attribute ("externally_visible", DECL_ATTRIBUTES (node->decl))) + return true; + return false; +} + /* Mark visibility of all functions. A local function is one whose calls can occur only in the current @@ -260,25 +283,29 @@ cgraph_remove_unreachable_nodes (bool before_inlining_p, FILE *file) via visibilities for the backend point of view. */ static unsigned int -function_and_variable_visibility (void) +function_and_variable_visibility (bool whole_program) { struct cgraph_node *node; struct varpool_node *vnode; for (node = cgraph_nodes; node; node = node->next) { - if (node->reachable - && (DECL_COMDAT (node->decl) - || (!flag_whole_program - && TREE_PUBLIC (node->decl) && !DECL_EXTERNAL (node->decl)))) - node->local.externally_visible = true; + if (cgraph_externally_visible_p (node, whole_program)) + { + gcc_assert (!node->global.inlined_to); + node->local.externally_visible = true; + } + else + node->local.externally_visible = false; if (!node->local.externally_visible && node->analyzed && !DECL_EXTERNAL (node->decl)) { - gcc_assert (flag_whole_program || !TREE_PUBLIC (node->decl)); + gcc_assert (whole_program || !TREE_PUBLIC (node->decl)); TREE_PUBLIC (node->decl) = 0; + DECL_COMDAT (node->decl) = 0; + DECL_WEAK (node->decl) = 0; } - node->local.local = (!node->needed + node->local.local = (cgraph_only_called_directly_p (node) && node->analyzed && !DECL_EXTERNAL (node->decl) && !node->local.externally_visible); @@ -286,12 +313,16 @@ function_and_variable_visibility (void) for (vnode = varpool_nodes_queue; vnode; vnode = vnode->next_needed) { if (vnode->needed - && !flag_whole_program - && (DECL_COMDAT (vnode->decl) || TREE_PUBLIC (vnode->decl))) - vnode->externally_visible = 1; + && (DECL_COMDAT (vnode->decl) || TREE_PUBLIC (vnode->decl)) + && (!whole_program + || lookup_attribute ("externally_visible", + DECL_ATTRIBUTES (vnode->decl)))) + vnode->externally_visible = true; + else + vnode->externally_visible = false; if (!vnode->externally_visible) { - gcc_assert (flag_whole_program || !TREE_PUBLIC (vnode->decl)); + gcc_assert (whole_program || !TREE_PUBLIC (vnode->decl)); TREE_PUBLIC (vnode->decl) = 0; } gcc_assert (TREE_STATIC (vnode->decl)); @@ -314,13 +345,22 @@ function_and_variable_visibility (void) return 0; } +/* Local function pass handling visibilities. This happens before LTO streaming + so in particular -fwhole-program should be ignored at this level. */ + +static unsigned int +local_function_and_variable_visibility (void) +{ + return function_and_variable_visibility (flag_whole_program && !flag_lto && !flag_whopr); +} + struct simple_ipa_opt_pass pass_ipa_function_and_variable_visibility = { { SIMPLE_IPA_PASS, "visibility", /* name */ NULL, /* gate */ - function_and_variable_visibility, /* execute */ + local_function_and_variable_visibility,/* execute */ NULL, /* sub */ NULL, /* next */ 0, /* static_pass_number */ @@ -333,6 +373,58 @@ struct simple_ipa_opt_pass pass_ipa_function_and_variable_visibility = } }; +/* Do not re-run on ltrans stage. */ + +static bool +gate_whole_program_function_and_variable_visibility (void) +{ + return !flag_ltrans; +} + +/* Bring functionss local at LTO time whith -fwhole-program. */ + +static unsigned int +whole_program_function_and_variable_visibility (void) +{ + struct cgraph_node *node; + struct varpool_node *vnode; + + function_and_variable_visibility (flag_whole_program); + + for (node = cgraph_nodes; node; node = node->next) + if (node->local.externally_visible) + cgraph_mark_needed_node (node); + for (vnode = varpool_nodes_queue; vnode; vnode = vnode->next_needed) + if (vnode->externally_visible) + varpool_mark_needed_node (vnode); + return 0; +} + +struct ipa_opt_pass_d pass_ipa_whole_program_visibility = +{ + { + IPA_PASS, + "whole-program", /* name */ + gate_whole_program_function_and_variable_visibility,/* gate */ + whole_program_function_and_variable_visibility,/* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_CGRAPHOPT, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_cgraph | TODO_remove_functions/* todo_flags_finish */ + }, + NULL, /* generate_summary */ + NULL, /* write_summary */ + NULL, /* read_summary */ + NULL, /* function_read_summary */ + 0, /* TODOs */ + NULL, /* function_transform */ + NULL, /* variable_transform */ +}; /* Hash a cgraph node set element. */ diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c index 14916473640..fbf0df21c24 100644 --- a/gcc/lto-cgraph.c +++ b/gcc/lto-cgraph.c @@ -227,6 +227,8 @@ lto_output_node (struct lto_simple_output_block *ob, struct cgraph_node *node, local static nodes to prevent clashes with other local statics. */ if (boundary_p) { + /* Inline clones can not be part of boundary. */ + gcc_assert (!node->global.inlined_to); local = 0; externally_visible = 1; inlinable = 0; diff --git a/gcc/lto-symtab.c b/gcc/lto-symtab.c index 600f75b41e7..7d30448447a 100644 --- a/gcc/lto-symtab.c +++ b/gcc/lto-symtab.c @@ -388,7 +388,10 @@ lto_cgraph_replace_node (struct cgraph_node *old_node, if (old_node->reachable) cgraph_mark_reachable_node (new_node); if (old_node->address_taken) - cgraph_mark_address_taken_node (new_node); + { + gcc_assert (!new_node->global.inlined_to); + cgraph_mark_address_taken_node (new_node); + } /* Redirect all incoming edges. */ for (e = old_node->callers; e; e = next) diff --git a/gcc/lto/ChangeLog b/gcc/lto/ChangeLog index c4ee42ae6a2..dae0367d5da 100644 --- a/gcc/lto/ChangeLog +++ b/gcc/lto/ChangeLog @@ -1,3 +1,8 @@ +2009-10-07 Jan Hubicka + + * lto.c (read_cgraph_and_symbols): Mark functions neccesary only at + ltrans stage; explain why this is needed and should not. + 2009-10-05 Richard Guenther PR lto/41552 diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c index daefa823db3..ab4c208670d 100644 --- a/gcc/lto/lto.c +++ b/gcc/lto/lto.c @@ -1824,11 +1824,18 @@ read_cgraph_and_symbols (unsigned nfiles, const char **fnames) /* Merge global decls. */ lto_symtab_merge_decls (); - /* Mark cgraph nodes needed in the merged cgraph. - ??? Is this really necessary? */ - for (node = cgraph_nodes; node; node = node->next) - if (cgraph_decide_is_function_needed (node, node->decl)) - cgraph_mark_needed_node (node); + /* Mark cgraph nodes needed in the merged cgraph + This normally happens in whole-program pass, but for + ltrans the pass was already run at WPA phase. + + FIXME: This is not valid way to do so; nodes can be needed + for non-obvious reasons. We should stream the flags from WPA + phase. */ + if (flag_ltrans) + for (node = cgraph_nodes; node; node = node->next) + if (!node->global.inlined_to + && cgraph_decide_is_function_needed (node, node->decl)) + cgraph_mark_needed_node (node); timevar_push (TV_IPA_LTO_DECL_IO); diff --git a/gcc/passes.c b/gcc/passes.c index e3fd7a8454d..60a850969c2 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -759,6 +759,7 @@ init_optimization_passes (void) *p = NULL; p = &all_regular_ipa_passes; + NEXT_PASS (pass_ipa_whole_program_visibility); NEXT_PASS (pass_ipa_cp); NEXT_PASS (pass_ipa_inline); NEXT_PASS (pass_ipa_reference); @@ -1099,7 +1100,7 @@ do_per_function_toporder (void (*callback) (void *data), void *data) /* Allow possibly removed nodes to be garbage collected. */ order[i] = NULL; node->process = 0; - if (node->analyzed && (node->needed || node->reachable)) + if (node->analyzed) { push_cfun (DECL_STRUCT_FUNCTION (node->decl)); current_function_decl = node->decl; @@ -1783,7 +1784,7 @@ function_called_by_processed_nodes_p (void) { if (e->caller->decl == current_function_decl) continue; - if (!e->caller->analyzed || (!e->caller->needed && !e->caller->reachable)) + if (!e->caller->analyzed) continue; if (TREE_ASM_WRITTEN (e->caller->decl)) continue; diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index ae510494c86..2cbe3e4b448 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -437,6 +437,7 @@ extern struct simple_ipa_opt_pass pass_ipa_early_inline; extern struct simple_ipa_opt_pass pass_early_local_passes; +extern struct ipa_opt_pass_d pass_ipa_whole_program_visibility; extern struct ipa_opt_pass_d pass_ipa_lto_gimple_out; extern struct simple_ipa_opt_pass pass_ipa_increase_alignment; extern struct simple_ipa_opt_pass pass_ipa_matrix_reorg; diff --git a/gcc/varpool.c b/gcc/varpool.c index 12cdad90e28..f7b338ff44b 100644 --- a/gcc/varpool.c +++ b/gcc/varpool.c @@ -35,6 +35,7 @@ along with GCC; see the file COPYING3. If not see #include "output.h" #include "gimple.h" #include "tree-flow.h" +#include "flags.h" /* This file contains basic routines manipulating variable pool. @@ -245,7 +246,11 @@ decide_is_variable_needed (struct varpool_node *node, tree decl) /* Externally visible variables must be output. The exception is COMDAT variables that must be output only when they are needed. */ - if (TREE_PUBLIC (decl) && !flag_whole_program && !DECL_COMDAT (decl) + if (TREE_PUBLIC (decl) + && !flag_whole_program + && !flag_lto + && !flag_whopr + && !DECL_COMDAT (decl) && !DECL_EXTERNAL (decl)) return true; @@ -279,6 +284,17 @@ varpool_finalize_decl (tree decl) { struct varpool_node *node = varpool_node (decl); + /* FIXME: We don't really stream varpool datastructure and instead rebuild it + by varpool_finalize_decl. This is not quite correct since this way we can't + attach any info to varpool. Eventually we will want to stream varpool nodes + and the flags. + + For the moment just prevent analysis of varpool nodes to happen again, so + we will re-try to compute "address_taken" flag of varpool that breaks + in presence of clones. */ + if (in_lto_p) + node->analyzed = true; + /* The first declaration of a variable that comes through this function decides whether it is global (in C, has external linkage) or local (in C, has internal linkage). So do nothing more @@ -333,17 +349,25 @@ varpool_analyze_pending_decls (void) while (varpool_first_unanalyzed_node) { tree decl = varpool_first_unanalyzed_node->decl; + bool analyzed = varpool_first_unanalyzed_node->analyzed; varpool_first_unanalyzed_node->analyzed = true; varpool_first_unanalyzed_node = varpool_first_unanalyzed_node->next_needed; - /* Compute the alignment early so function body expanders are - already informed about increased alignment. */ - align_variable (decl, 0); + /* When reading back varpool at LTO time, we re-construct the queue in order + to have "needed" list right by inserting all needed nodes into varpool. + We however don't want to re-analyze already analyzed nodes. */ + if (!analyzed) + { + gcc_assert (!in_lto_p); + /* Compute the alignment early so function body expanders are + already informed about increased alignment. */ + align_variable (decl, 0); - if (DECL_INITIAL (decl)) - record_references_in_initializer (decl); + if (DECL_INITIAL (decl)) + record_references_in_initializer (decl); + } changed = true; } timevar_pop (TV_CGRAPH);