Materialize clones on demand

this patch removes the pass to materialize all clones and instead this
is now done on demand.  The motivation is to reduce lifetime of function
bodies in ltrans that should noticeably reduce memory use for highly
parallel compilations of large programs (like Martin does) or with
partitioning reduced/disabled. For cc1 with one partition the memory use
seems to go down from 4gb to cca 1.5gb (seeing from top, so this is not
particularly accurate).

gcc/ChangeLog:

2020-10-22  Jan Hubicka  <hubicka@ucw.cz>

	* cgraph.c (cgraph_node::get_untransformed_body): Perform lazy
	clone materialization.
	* cgraph.h (cgraph_node::materialize_clone): Declare.
	(symbol_table::materialize_all_clones): Remove.
	* cgraphclones.c (cgraph_materialize_clone): Turn to ...
	(cgraph_node::materialize_clone): .. this one; move here
	dumping from symbol_table::materialize_all_clones.
	(symbol_table::materialize_all_clones): Remove.
	* cgraphunit.c (mark_functions_to_output): Clear stmt references.
	(cgraph_node::expand): Initialize bitmaps early;
	do not call execute_all_ipa_transforms if there are no transforms.
	* ipa-inline-transform.c (save_inline_function_body): Fix formating.
	(inline_transform): Materialize all clones before function is modified.
	* ipa-param-manipulation.c (ipa_param_adjustments::modify_call):
	Materialize clone if needed.
	* ipa.c (class pass_materialize_all_clones): Remove.
	(make_pass_materialize_all_clones): Remove.
	* passes.c (execute_all_ipa_transforms): Materialize all clones.
	* passes.def: Remove pass_materialize_all_clones.
	* tree-pass.h (make_pass_materialize_all_clones): Remove.
	* tree-ssa-structalias.c (ipa_pta_execute): Clear refs.
This commit is contained in:
Jan Hubicka 2020-10-22 17:32:32 +02:00
parent c26d7df103
commit 0e590b68fa
11 changed files with 94 additions and 159 deletions

View File

@ -3872,16 +3872,27 @@ cgraph_node::function_or_virtual_thunk_symbol
}
/* When doing LTO, read cgraph_node's body from disk if it is not already
present. */
present. Also perform any necessary clone materializations. */
bool
cgraph_node::get_untransformed_body (void)
cgraph_node::get_untransformed_body ()
{
lto_file_decl_data *file_data;
const char *data, *name;
size_t len;
tree decl = this->decl;
/* See if there is clone to be materialized.
(inline clones does not need materialization, but we can be seeing
an inline clone of real clone). */
cgraph_node *p = this;
for (cgraph_node *c = clone_of; c; c = c->clone_of)
{
if (c->decl != decl)
p->materialize_clone ();
p = c;
}
/* Check if body is already there. Either we have gimple body or
the function is thunk and in that case we set DECL_ARGUMENTS. */
if (DECL_ARGUMENTS (decl) || gimple_has_body_p (decl))

View File

@ -1145,12 +1145,14 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : public symtab_node
/* When doing LTO, read cgraph_node's body from disk if it is not already
present. */
bool get_untransformed_body (void);
bool get_untransformed_body ();
/* Prepare function body. When doing LTO, read cgraph_node's body from disk
if it is not already present. When some IPA transformations are scheduled,
apply them. */
bool get_body (void);
bool get_body ();
void materialize_clone (void);
/* Release memory used to represent body of function.
Use this only for functions that are released before being translated to
@ -2286,13 +2288,6 @@ public:
functions inserted into callgraph already at construction time. */
void process_new_functions (void);
/* Once all functions from compilation unit are in memory, produce all clones
and update all calls. We might also do this on demand if we don't want to
bring all functions to memory prior compilation, but current WHOPR
implementation does that and it is bit easier to keep everything right
in this order. */
void materialize_all_clones (void);
/* Register a symbol NODE. */
inline void register_symbol (symtab_node *node);

View File

@ -1083,114 +1083,57 @@ void cgraph_node::remove_from_clone_tree ()
/* Given virtual clone, turn it into actual clone. */
static void
cgraph_materialize_clone (cgraph_node *node)
void
cgraph_node::materialize_clone ()
{
bitmap_obstack_initialize (NULL);
node->former_clone_of = node->clone_of->decl;
if (node->clone_of->former_clone_of)
node->former_clone_of = node->clone_of->former_clone_of;
clone_of->get_untransformed_body ();
former_clone_of = clone_of->decl;
if (clone_of->former_clone_of)
former_clone_of = clone_of->former_clone_of;
if (symtab->dump_file)
{
fprintf (symtab->dump_file, "cloning %s to %s\n",
clone_of->dump_name (),
dump_name ());
if (clone.tree_map)
{
fprintf (symtab->dump_file, " replace map:");
for (unsigned int i = 0;
i < vec_safe_length (clone.tree_map);
i++)
{
ipa_replace_map *replace_info;
replace_info = (*clone.tree_map)[i];
fprintf (symtab->dump_file, "%s %i -> ",
i ? "," : "", replace_info->parm_num);
print_generic_expr (symtab->dump_file,
replace_info->new_tree);
}
fprintf (symtab->dump_file, "\n");
}
if (clone.param_adjustments)
clone.param_adjustments->dump (symtab->dump_file);
}
/* Copy the OLD_VERSION_NODE function tree to the new version. */
tree_function_versioning (node->clone_of->decl, node->decl,
node->clone.tree_map, node->clone.param_adjustments,
tree_function_versioning (clone_of->decl, decl,
clone.tree_map, clone.param_adjustments,
true, NULL, NULL);
if (symtab->dump_file)
{
dump_function_to_file (node->clone_of->decl, symtab->dump_file,
dump_function_to_file (clone_of->decl, symtab->dump_file,
dump_flags);
dump_function_to_file (node->decl, symtab->dump_file, dump_flags);
dump_function_to_file (decl, symtab->dump_file, dump_flags);
}
cgraph_node *clone_of = node->clone_of;
cgraph_node *this_clone_of = clone_of;
/* Function is no longer clone. */
node->remove_from_clone_tree ();
if (!clone_of->analyzed && !clone_of->clones)
remove_from_clone_tree ();
if (!this_clone_of->analyzed && !this_clone_of->clones)
{
clone_of->release_body ();
clone_of->remove_callees ();
clone_of->remove_all_references ();
this_clone_of->release_body ();
this_clone_of->remove_callees ();
this_clone_of->remove_all_references ();
}
bitmap_obstack_release (NULL);
}
/* Once all functions from compilation unit are in memory, produce all clones
and update all calls. We might also do this on demand if we don't want to
bring all functions to memory prior compilation, but current WHOPR
implementation does that and it is a bit easier to keep everything right in
this order. */
void
symbol_table::materialize_all_clones (void)
{
cgraph_node *node;
bool stabilized = false;
if (symtab->dump_file)
fprintf (symtab->dump_file, "Materializing clones\n");
cgraph_node::checking_verify_cgraph_nodes ();
/* We can also do topological order, but number of iterations should be
bounded by number of IPA passes since single IPA pass is probably not
going to create clones of clones it created itself. */
while (!stabilized)
{
stabilized = true;
FOR_EACH_FUNCTION (node)
{
if (node->clone_of && node->decl != node->clone_of->decl
&& !gimple_has_body_p (node->decl))
{
if (!node->clone_of->clone_of)
node->clone_of->get_untransformed_body ();
if (gimple_has_body_p (node->clone_of->decl))
{
if (symtab->dump_file)
{
fprintf (symtab->dump_file, "cloning %s to %s\n",
node->clone_of->dump_name (),
node->dump_name ());
if (node->clone.tree_map)
{
unsigned int i;
fprintf (symtab->dump_file, " replace map:");
for (i = 0;
i < vec_safe_length (node->clone.tree_map);
i++)
{
ipa_replace_map *replace_info;
replace_info = (*node->clone.tree_map)[i];
fprintf (symtab->dump_file, "%s %i -> ",
i ? "," : "", replace_info->parm_num);
print_generic_expr (symtab->dump_file,
replace_info->new_tree);
}
fprintf (symtab->dump_file, "\n");
}
if (node->clone.param_adjustments)
node->clone.param_adjustments->dump (symtab->dump_file);
}
cgraph_materialize_clone (node);
stabilized = false;
}
}
}
}
FOR_EACH_FUNCTION (node)
if (!node->analyzed && node->callees)
{
node->remove_callees ();
node->remove_all_references ();
}
else
node->clear_stmts_in_references ();
if (symtab->dump_file)
fprintf (symtab->dump_file, "Materialization Call site updates done.\n");
cgraph_node::checking_verify_cgraph_nodes ();
symtab->remove_unreachable_nodes (symtab->dump_file);
}
#include "gt-cgraphclones.h"

View File

@ -1601,6 +1601,7 @@ mark_functions_to_output (void)
FOR_EACH_FUNCTION (node)
{
tree decl = node->decl;
node->clear_stmts_in_references ();
gcc_assert (!node->process || node->same_comdat_group);
if (node->process)
@ -2274,6 +2275,9 @@ cgraph_node::expand (void)
announce_function (decl);
process = 0;
gcc_assert (lowered);
/* Initialize the default bitmap obstack. */
bitmap_obstack_initialize (NULL);
get_untransformed_body ();
/* Generate RTL for the body of DECL. */
@ -2282,9 +2286,6 @@ cgraph_node::expand (void)
gcc_assert (symtab->global_info_ready);
/* Initialize the default bitmap obstack. */
bitmap_obstack_initialize (NULL);
/* Initialize the RTL code for the function. */
saved_loc = input_location;
input_location = DECL_SOURCE_LOCATION (decl);
@ -2298,7 +2299,8 @@ cgraph_node::expand (void)
bitmap_obstack_initialize (&reg_obstack); /* FIXME, only at RTL generation*/
update_ssa (TODO_update_ssa_only_virtuals);
execute_all_ipa_transforms (false);
if (ipa_transforms_to_apply.exists ())
execute_all_ipa_transforms (false);
/* Perform all tree transforms and optimizations. */

View File

@ -644,16 +644,16 @@ save_inline_function_body (struct cgraph_node *node)
tree_function_versioning (node->decl, first_clone->decl,
NULL, NULL, true, NULL, NULL);
/* The function will be short lived and removed after we inline all the clones,
but make it internal so we won't confuse ourself. */
/* The function will be short lived and removed after we inline all the
clones, but make it internal so we won't confuse ourself. */
DECL_EXTERNAL (first_clone->decl) = 0;
TREE_PUBLIC (first_clone->decl) = 0;
DECL_COMDAT (first_clone->decl) = 0;
first_clone->ipa_transforms_to_apply.release ();
/* When doing recursive inlining, the clone may become unnecessary.
This is possible i.e. in the case when the recursive function is proved to be
non-throwing and the recursion happens only in the EH landing pad.
This is possible i.e. in the case when the recursive function is proved to
be non-throwing and the recursion happens only in the EH landing pad.
We cannot remove the clone until we are done with saving the body.
Remove it now. */
if (!first_clone->callers)
@ -696,6 +696,14 @@ inline_transform (struct cgraph_node *node)
if (cfun->after_inlining)
return 0;
cgraph_node *next_clone;
for (cgraph_node *n = node->clones; n; n = next_clone)
{
next_clone = n->next_sibling_clone;
if (n->decl != node->decl)
n->materialize_clone ();
}
/* We might need the body of this function so that we can expand
it inline somewhere else. */
if (preserve_function_body_p (node))

View File

@ -783,6 +783,13 @@ ipa_param_adjustments::modify_call (gcall *stmt,
{
vec<tree, va_gc> **debug_args = NULL;
unsigned i = 0;
cgraph_node *callee_node = cgraph_node::get (callee_decl);
/* FIXME: we don't seem to be able to insert debug args before clone
is materialized. Materializing them early leads to extra memory
use. */
if (callee_node->clone_of)
callee_node->get_untransformed_body ();
for (tree old_parm = DECL_ARGUMENTS (old_decl);
old_parm && i < old_nargs && ((int) i) < m_always_copy_start;
old_parm = DECL_CHAIN (old_parm), i++)

View File

@ -1386,43 +1386,3 @@ make_pass_ipa_single_use (gcc::context *ctxt)
return new pass_ipa_single_use (ctxt);
}
/* Materialize all clones. */
namespace {
const pass_data pass_data_materialize_all_clones =
{
SIMPLE_IPA_PASS, /* type */
"materialize-all-clones", /* name */
OPTGROUP_NONE, /* optinfo_flags */
TV_IPA_OPT, /* tv_id */
0, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
0, /* todo_flags_finish */
};
class pass_materialize_all_clones : public simple_ipa_opt_pass
{
public:
pass_materialize_all_clones (gcc::context *ctxt)
: simple_ipa_opt_pass (pass_data_materialize_all_clones, ctxt)
{}
/* opt_pass methods: */
virtual unsigned int execute (function *)
{
symtab->materialize_all_clones ();
return 0;
}
}; // class pass_materialize_all_clones
} // anon namespace
simple_ipa_opt_pass *
make_pass_materialize_all_clones (gcc::context *ctxt)
{
return new pass_materialize_all_clones (ctxt);
}

View File

@ -2271,6 +2271,14 @@ execute_all_ipa_transforms (bool do_not_collect)
return;
node = cgraph_node::get (current_function_decl);
cgraph_node *next_clone;
for (cgraph_node *n = node->clones; n; n = next_clone)
{
next_clone = n->next_sibling_clone;
if (n->decl != node->decl)
n->materialize_clone ();
}
if (node->ipa_transforms_to_apply.exists ())
{
unsigned int i;

View File

@ -172,7 +172,6 @@ along with GCC; see the file COPYING3. If not see
passes are executed after partitioning and thus see just parts of the
compiled unit. */
INSERT_PASSES_AFTER (all_late_ipa_passes)
NEXT_PASS (pass_materialize_all_clones);
NEXT_PASS (pass_ipa_pta);
NEXT_PASS (pass_omp_simd_clone);
TERMINATE_PASS_LIST (all_late_ipa_passes)

View File

@ -519,8 +519,6 @@ extern ipa_opt_pass_d *make_pass_ipa_cdtor_merge (gcc::context *ctxt);
extern ipa_opt_pass_d *make_pass_ipa_single_use (gcc::context *ctxt);
extern ipa_opt_pass_d *make_pass_ipa_comdats (gcc::context *ctxt);
extern ipa_opt_pass_d *make_pass_ipa_modref (gcc::context *ctxt);
extern simple_ipa_opt_pass *make_pass_materialize_all_clones (gcc::context *
ctxt);
extern gimple_opt_pass *make_pass_cleanup_cfg_post_optimizing (gcc::context
*ctxt);

View File

@ -8138,6 +8138,10 @@ ipa_pta_execute (void)
from = constraints.length ();
}
/* FIXME: Clone materialization is not preserving stmt references. */
FOR_EACH_DEFINED_FUNCTION (node)
node->clear_stmts_in_references ();
/* Build the constraints. */
FOR_EACH_DEFINED_FUNCTION (node)
{