gcc/gcc/ipa-cp.c
Martin Jambor 438789ffe3 re PR tree-optimization/42366 (ICE in ipa_write_node_info, at ipa-prop.c:2023)
2010-01-04  Martin Jambor  <mjambor@suse.cz>

	PR tree-optimization/42366
	* ipa-cp.c (ipcp_init_stage): Always call ipa_compute_jump_functions on
	edges with variable number of parameters.
	* ipa-prop.c (ipa_write_node_info): Stream out uses_analysis_done
	flag instead of asserting it.
	(ipa_read_node_info): Read uses_analysis_done flag.

From-SVN: r155630
2010-01-04 19:18:54 +01:00

1340 lines
38 KiB
C

/* Interprocedural constant propagation
Copyright (C) 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
Contributed by Razya Ladelsky <RAZYA@il.ibm.com>
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
/* Interprocedural constant propagation. The aim of interprocedural constant
propagation (IPCP) is to find which function's argument has the same
constant value in each invocation throughout the whole program. For example,
consider the following program:
int g (int y)
{
printf ("value is %d",y);
}
int f (int x)
{
g (x);
}
int h (int y)
{
g (y);
}
void main (void)
{
f (3);
h (3);
}
The IPCP algorithm will find that g's formal argument y is always called
with the value 3.
The algorithm used is based on "Interprocedural Constant Propagation", by
Challahan David, Keith D Cooper, Ken Kennedy, Linda Torczon, Comp86, pg
152-161
The optimization is divided into three stages:
First stage - intraprocedural analysis
=======================================
This phase computes jump_function and modification flags.
A jump function for a callsite represents the values passed as an actual
arguments of a given callsite. There are three types of values:
Pass through - the caller's formal parameter is passed as an actual argument.
Constant - a constant is passed as an actual argument.
Unknown - neither of the above.
The jump function info, ipa_jump_func, is stored in ipa_edge_args
structure (defined in ipa_prop.h and pointed to by cgraph_node->aux)
modified_flags are defined in ipa_node_params structure
(defined in ipa_prop.h and pointed to by cgraph_edge->aux).
-ipcp_init_stage() is the first stage driver.
Second stage - interprocedural analysis
========================================
This phase does the interprocedural constant propagation.
It computes lattices for all formal parameters in the program
and their value that may be:
TOP - unknown.
BOTTOM - non constant.
CONSTANT - constant value.
Lattice describing a formal parameter p will have a constant value if all
callsites invoking this function have the same constant value passed to p.
The lattices are stored in ipcp_lattice which is itself in ipa_node_params
structure (defined in ipa_prop.h and pointed to by cgraph_edge->aux).
-ipcp_iterate_stage() is the second stage driver.
Third phase - transformation of function code
============================================
Propagates the constant-valued formals into the function.
For each function whose parameters are constants, we create its clone.
Then we process the clone in two ways:
1. We insert an assignment statement 'parameter = const' at the beginning
of the cloned function.
2. For read-only parameters that do not live in memory, we replace all their
uses with the constant.
We also need to modify some callsites to call the cloned functions instead
of the original ones. For a callsite passing an argument found to be a
constant by IPCP, there are two different cases to handle:
1. A constant is passed as an argument. In this case the callsite in the
should be redirected to call the cloned callee.
2. A parameter (of the caller) passed as an argument (pass through
argument). In such cases both the caller and the callee have clones and
only the callsite in the cloned caller is redirected to call to the
cloned callee.
This update is done in two steps: First all cloned functions are created
during a traversal of the call graph, during which all callsites are
redirected to call the cloned function. Then the callsites are traversed
and many calls redirected back to fit the description above.
-ipcp_insert_stage() is the third phase driver.
*/
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tree.h"
#include "target.h"
#include "cgraph.h"
#include "ipa-prop.h"
#include "tree-flow.h"
#include "tree-pass.h"
#include "flags.h"
#include "timevar.h"
#include "diagnostic.h"
#include "tree-dump.h"
#include "tree-inline.h"
#include "fibheap.h"
#include "params.h"
/* Number of functions identified as candidates for cloning. When not cloning
we can simplify iterate stage not forcing it to go through the decision
on what is profitable and what not. */
static int n_cloning_candidates;
/* Maximal count found in program. */
static gcov_type max_count;
/* Cgraph nodes that has been completely replaced by cloning during iterate
* stage and will be removed after ipcp is finished. */
static bitmap dead_nodes;
static void ipcp_print_profile_data (FILE *);
static void ipcp_function_scale_print (FILE *);
/* Get the original node field of ipa_node_params associated with node NODE. */
static inline struct cgraph_node *
ipcp_get_orig_node (struct cgraph_node *node)
{
return IPA_NODE_REF (node)->ipcp_orig_node;
}
/* Return true if NODE describes a cloned/versioned function. */
static inline bool
ipcp_node_is_clone (struct cgraph_node *node)
{
return (ipcp_get_orig_node (node) != NULL);
}
/* Create ipa_node_params and its data structures for NEW_NODE. Set ORIG_NODE
as the ipcp_orig_node field in ipa_node_params. */
static void
ipcp_init_cloned_node (struct cgraph_node *orig_node,
struct cgraph_node *new_node)
{
ipa_check_create_node_params ();
ipa_initialize_node_params (new_node);
IPA_NODE_REF (new_node)->ipcp_orig_node = orig_node;
}
/* Perform intraprocedrual analysis needed for ipcp. */
static void
ipcp_analyze_node (struct cgraph_node *node)
{
/* Unreachable nodes should have been eliminated before ipcp. */
gcc_assert (node->needed || node->reachable);
ipa_initialize_node_params (node);
ipa_detect_param_modifications (node);
}
/* Return scale for NODE. */
static inline gcov_type
ipcp_get_node_scale (struct cgraph_node *node)
{
return IPA_NODE_REF (node)->count_scale;
}
/* Set COUNT as scale for NODE. */
static inline void
ipcp_set_node_scale (struct cgraph_node *node, gcov_type count)
{
IPA_NODE_REF (node)->count_scale = count;
}
/* Return whether LAT is a constant lattice. */
static inline bool
ipcp_lat_is_const (struct ipcp_lattice *lat)
{
if (lat->type == IPA_CONST_VALUE)
return true;
else
return false;
}
/* Return whether LAT is a constant lattice that ipa-cp can actually insert
into the code (i.e. constants excluding member pointers and pointers). */
static inline bool
ipcp_lat_is_insertable (struct ipcp_lattice *lat)
{
return lat->type == IPA_CONST_VALUE;
}
/* Return true if LAT1 and LAT2 are equal. */
static inline bool
ipcp_lats_are_equal (struct ipcp_lattice *lat1, struct ipcp_lattice *lat2)
{
gcc_assert (ipcp_lat_is_const (lat1) && ipcp_lat_is_const (lat2));
if (lat1->type != lat2->type)
return false;
if (operand_equal_p (lat1->constant, lat2->constant, 0))
return true;
return false;
}
/* Compute Meet arithmetics:
Meet (IPA_BOTTOM, x) = IPA_BOTTOM
Meet (IPA_TOP,x) = x
Meet (const_a,const_b) = IPA_BOTTOM, if const_a != const_b.
MEET (const_a,const_b) = const_a, if const_a == const_b.*/
static void
ipa_lattice_meet (struct ipcp_lattice *res, struct ipcp_lattice *lat1,
struct ipcp_lattice *lat2)
{
if (lat1->type == IPA_BOTTOM || lat2->type == IPA_BOTTOM)
{
res->type = IPA_BOTTOM;
return;
}
if (lat1->type == IPA_TOP)
{
res->type = lat2->type;
res->constant = lat2->constant;
return;
}
if (lat2->type == IPA_TOP)
{
res->type = lat1->type;
res->constant = lat1->constant;
return;
}
if (!ipcp_lats_are_equal (lat1, lat2))
{
res->type = IPA_BOTTOM;
return;
}
res->type = lat1->type;
res->constant = lat1->constant;
}
/* Return the lattice corresponding to the Ith formal parameter of the function
described by INFO. */
static inline struct ipcp_lattice *
ipcp_get_lattice (struct ipa_node_params *info, int i)
{
return &(info->params[i].ipcp_lattice);
}
/* Given the jump function JFUNC, compute the lattice LAT that describes the
value coming down the callsite. INFO describes the caller node so that
pass-through jump functions can be evaluated. */
static void
ipcp_lattice_from_jfunc (struct ipa_node_params *info, struct ipcp_lattice *lat,
struct ipa_jump_func *jfunc)
{
if (jfunc->type == IPA_JF_CONST)
{
lat->type = IPA_CONST_VALUE;
lat->constant = jfunc->value.constant;
}
else if (jfunc->type == IPA_JF_PASS_THROUGH)
{
struct ipcp_lattice *caller_lat;
tree cst;
caller_lat = ipcp_get_lattice (info, jfunc->value.pass_through.formal_id);
lat->type = caller_lat->type;
if (caller_lat->type != IPA_CONST_VALUE)
return;
cst = caller_lat->constant;
if (jfunc->value.pass_through.operation != NOP_EXPR)
{
tree restype;
if (TREE_CODE_CLASS (jfunc->value.pass_through.operation)
== tcc_comparison)
restype = boolean_type_node;
else
restype = TREE_TYPE (cst);
cst = fold_binary (jfunc->value.pass_through.operation,
restype, cst, jfunc->value.pass_through.operand);
}
if (!cst || !is_gimple_ip_invariant (cst))
lat->type = IPA_BOTTOM;
lat->constant = cst;
}
else if (jfunc->type == IPA_JF_ANCESTOR)
{
struct ipcp_lattice *caller_lat;
tree t;
bool ok;
caller_lat = ipcp_get_lattice (info, jfunc->value.ancestor.formal_id);
lat->type = caller_lat->type;
if (caller_lat->type != IPA_CONST_VALUE)
return;
if (TREE_CODE (caller_lat->constant) != ADDR_EXPR)
{
/* This can happen when the constant is a NULL pointer. */
lat->type = IPA_BOTTOM;
return;
}
t = TREE_OPERAND (caller_lat->constant, 0);
ok = build_ref_for_offset (&t, TREE_TYPE (t),
jfunc->value.ancestor.offset,
jfunc->value.ancestor.type, false);
if (!ok)
{
lat->type = IPA_BOTTOM;
lat->constant = NULL_TREE;
}
else
lat->constant = build_fold_addr_expr (t);
}
else
lat->type = IPA_BOTTOM;
}
/* True when OLD_LAT and NEW_LAT values are not the same. */
static bool
ipcp_lattice_changed (struct ipcp_lattice *old_lat,
struct ipcp_lattice *new_lat)
{
if (old_lat->type == new_lat->type)
{
if (!ipcp_lat_is_const (old_lat))
return false;
if (ipcp_lats_are_equal (old_lat, new_lat))
return false;
}
return true;
}
/* Print all ipcp_lattices of all functions to F. */
static void
ipcp_print_all_lattices (FILE * f)
{
struct cgraph_node *node;
int i, count;
fprintf (f, "\nLattice:\n");
for (node = cgraph_nodes; node; node = node->next)
{
struct ipa_node_params *info;
if (!node->analyzed)
continue;
info = IPA_NODE_REF (node);
fprintf (f, " Node: %s:\n", cgraph_node_name (node));
count = ipa_get_param_count (info);
for (i = 0; i < count; i++)
{
struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
fprintf (f, " param [%d]: ", i);
if (lat->type == IPA_CONST_VALUE)
{
fprintf (f, "type is CONST ");
print_generic_expr (f, lat->constant, 0);
fprintf (f, "\n");
}
else if (lat->type == IPA_TOP)
fprintf (f, "type is TOP\n");
else
fprintf (f, "type is BOTTOM\n");
}
}
}
/* Return true if ipcp algorithms would allow cloning NODE. */
static bool
ipcp_versionable_function_p (struct cgraph_node *node)
{
tree decl = node->decl;
basic_block bb;
/* There are a number of generic reasons functions cannot be versioned. */
if (!tree_versionable_function_p (decl))
return false;
/* Removing arguments doesn't work if the function takes varargs. */
if (DECL_STRUCT_FUNCTION (decl)->stdarg)
return false;
/* Removing arguments doesn't work if we use __builtin_apply_args. */
FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (decl))
{
gimple_stmt_iterator gsi;
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
{
const_gimple stmt = gsi_stmt (gsi);
tree t;
if (!is_gimple_call (stmt))
continue;
t = gimple_call_fndecl (stmt);
if (t == NULL_TREE)
continue;
if (DECL_BUILT_IN_CLASS (t) == BUILT_IN_NORMAL
&& DECL_FUNCTION_CODE (t) == BUILT_IN_APPLY_ARGS)
return false;
}
}
return true;
}
/* Return true if this NODE is viable candidate for cloning. */
static bool
ipcp_cloning_candidate_p (struct cgraph_node *node)
{
int n_calls = 0;
int n_hot_calls = 0;
gcov_type direct_call_sum = 0;
struct cgraph_edge *e;
/* We never clone functions that are not visible from outside.
FIXME: in future we should clone such functions when they are called with
different constants, but current ipcp implementation is not good on this.
*/
if (cgraph_only_called_directly_p (node) || !node->analyzed)
return false;
if (cgraph_function_body_availability (node) <= AVAIL_OVERWRITABLE)
{
if (dump_file)
fprintf (dump_file, "Not considering %s for cloning; body is overwrittable.\n",
cgraph_node_name (node));
return false;
}
if (!ipcp_versionable_function_p (node))
{
if (dump_file)
fprintf (dump_file, "Not considering %s for cloning; body is not versionable.\n",
cgraph_node_name (node));
return false;
}
for (e = node->callers; e; e = e->next_caller)
{
direct_call_sum += e->count;
n_calls ++;
if (cgraph_maybe_hot_edge_p (e))
n_hot_calls ++;
}
if (!n_calls)
{
if (dump_file)
fprintf (dump_file, "Not considering %s for cloning; no direct calls.\n",
cgraph_node_name (node));
return false;
}
if (node->local.inline_summary.self_size < n_calls)
{
if (dump_file)
fprintf (dump_file, "Considering %s for cloning; code would shrink.\n",
cgraph_node_name (node));
return true;
}
if (!flag_ipa_cp_clone)
{
if (dump_file)
fprintf (dump_file, "Not considering %s for cloning; -fipa-cp-clone disabled.\n",
cgraph_node_name (node));
return false;
}
if (!optimize_function_for_speed_p (DECL_STRUCT_FUNCTION (node->decl)))
{
if (dump_file)
fprintf (dump_file, "Not considering %s for cloning; optimizing it for size.\n",
cgraph_node_name (node));
return false;
}
/* When profile is available and function is hot, propagate into it even if
calls seems cold; constant propagation can improve function's speed
significandly. */
if (max_count)
{
if (direct_call_sum > node->count * 90 / 100)
{
if (dump_file)
fprintf (dump_file, "Considering %s for cloning; usually called directly.\n",
cgraph_node_name (node));
return true;
}
}
if (!n_hot_calls)
{
if (dump_file)
fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n",
cgraph_node_name (node));
return false;
}
if (dump_file)
fprintf (dump_file, "Considering %s for cloning.\n",
cgraph_node_name (node));
return true;
}
/* Initialize ipcp_lattices array. The lattices corresponding to supported
types (integers, real types and Fortran constants defined as const_decls)
are initialized to IPA_TOP, the rest of them to IPA_BOTTOM. */
static void
ipcp_initialize_node_lattices (struct cgraph_node *node)
{
int i;
struct ipa_node_params *info = IPA_NODE_REF (node);
enum ipa_lattice_type type;
if (ipa_is_called_with_var_arguments (info))
type = IPA_BOTTOM;
else if (cgraph_only_called_directly_p (node))
type = IPA_TOP;
/* When cloning is allowed, we can assume that externally visible functions
are not called. We will compensate this by cloning later. */
else if (ipcp_cloning_candidate_p (node))
type = IPA_TOP, n_cloning_candidates ++;
else
type = IPA_BOTTOM;
for (i = 0; i < ipa_get_param_count (info) ; i++)
ipcp_get_lattice (info, i)->type = type;
}
/* build INTEGER_CST tree with type TREE_TYPE and value according to LAT.
Return the tree. */
static tree
build_const_val (struct ipcp_lattice *lat, tree tree_type)
{
tree val;
gcc_assert (ipcp_lat_is_const (lat));
val = lat->constant;
if (!useless_type_conversion_p (tree_type, TREE_TYPE (val)))
{
if (fold_convertible_p (tree_type, val))
return fold_build1 (NOP_EXPR, tree_type, val);
else
return fold_build1 (VIEW_CONVERT_EXPR, tree_type, val);
}
return val;
}
/* Compute the proper scale for NODE. It is the ratio between the number of
direct calls (represented on the incoming cgraph_edges) and sum of all
invocations of NODE (represented as count in cgraph_node).
FIXME: This code is wrong. Since the callers can be also clones and
the clones are not scaled yet, the sums gets unrealistically high.
To properly compute the counts, we would need to do propagation across
callgraph (as external call to A might imply call to non-clonned B
if A's clone calls clonned B). */
static void
ipcp_compute_node_scale (struct cgraph_node *node)
{
gcov_type sum;
struct cgraph_edge *cs;
sum = 0;
/* Compute sum of all counts of callers. */
for (cs = node->callers; cs != NULL; cs = cs->next_caller)
sum += cs->count;
/* Work around the unrealistically high sum problem. We just don't want
the non-cloned body to have negative or very low frequency. Since
majority of execution time will be spent in clones anyway, this should
give good enough profile. */
if (sum > node->count * 9 / 10)
sum = node->count * 9 / 10;
if (node->count == 0)
ipcp_set_node_scale (node, 0);
else
ipcp_set_node_scale (node, sum * REG_BR_PROB_BASE / node->count);
}
/* Initialization and computation of IPCP data structures. This is the initial
intraprocedural analysis of functions, which gathers information to be
propagated later on. */
static void
ipcp_init_stage (void)
{
struct cgraph_node *node;
struct cgraph_edge *cs;
for (node = cgraph_nodes; node; node = node->next)
if (node->analyzed)
ipcp_analyze_node (node);
for (node = cgraph_nodes; node; node = node->next)
{
if (!node->analyzed)
continue;
/* building jump functions */
for (cs = node->callees; cs; cs = cs->next_callee)
{
/* We do not need to bother analyzing calls to unknown
functions unless they may become known during lto/whopr. */
if (!cs->callee->analyzed && !flag_lto && !flag_whopr)
continue;
ipa_count_arguments (cs);
if (ipa_get_cs_argument_count (IPA_EDGE_REF (cs))
!= ipa_get_param_count (IPA_NODE_REF (cs->callee)))
ipa_set_called_with_variable_arg (IPA_NODE_REF (cs->callee));
ipa_compute_jump_functions (cs);
}
}
}
/* Return true if there are some formal parameters whose value is IPA_TOP (in
the whole compilation unit). Change their values to IPA_BOTTOM, since they
most probably get their values from outside of this compilation unit. */
static bool
ipcp_change_tops_to_bottom (void)
{
int i, count;
struct cgraph_node *node;
bool prop_again;
prop_again = false;
for (node = cgraph_nodes; node; node = node->next)
{
struct ipa_node_params *info = IPA_NODE_REF (node);
count = ipa_get_param_count (info);
for (i = 0; i < count; i++)
{
struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
if (lat->type == IPA_TOP)
{
prop_again = true;
if (dump_file)
{
fprintf (dump_file, "Forcing param ");
print_generic_expr (dump_file, ipa_get_param (info, i), 0);
fprintf (dump_file, " of node %s to bottom.\n",
cgraph_node_name (node));
}
lat->type = IPA_BOTTOM;
}
}
}
return prop_again;
}
/* Interprocedural analysis. The algorithm propagates constants from the
caller's parameters to the callee's arguments. */
static void
ipcp_propagate_stage (void)
{
int i;
struct ipcp_lattice inc_lat = { IPA_BOTTOM, NULL };
struct ipcp_lattice new_lat = { IPA_BOTTOM, NULL };
struct ipcp_lattice *dest_lat;
struct cgraph_edge *cs;
struct ipa_jump_func *jump_func;
struct ipa_func_list *wl;
int count;
ipa_check_create_node_params ();
ipa_check_create_edge_args ();
/* Initialize worklist to contain all functions. */
wl = ipa_init_func_list ();
while (wl)
{
struct cgraph_node *node = ipa_pop_func_from_list (&wl);
struct ipa_node_params *info = IPA_NODE_REF (node);
for (cs = node->callees; cs; cs = cs->next_callee)
{
struct ipa_node_params *callee_info = IPA_NODE_REF (cs->callee);
struct ipa_edge_args *args = IPA_EDGE_REF (cs);
if (ipa_is_called_with_var_arguments (callee_info)
|| !cs->callee->analyzed
|| ipa_is_called_with_var_arguments (callee_info))
continue;
count = ipa_get_cs_argument_count (args);
for (i = 0; i < count; i++)
{
jump_func = ipa_get_ith_jump_func (args, i);
ipcp_lattice_from_jfunc (info, &inc_lat, jump_func);
dest_lat = ipcp_get_lattice (callee_info, i);
ipa_lattice_meet (&new_lat, &inc_lat, dest_lat);
if (ipcp_lattice_changed (&new_lat, dest_lat))
{
dest_lat->type = new_lat.type;
dest_lat->constant = new_lat.constant;
ipa_push_func_to_list (&wl, cs->callee);
}
}
}
}
}
/* Call the constant propagation algorithm and re-call it if necessary
(if there are undetermined values left). */
static void
ipcp_iterate_stage (void)
{
struct cgraph_node *node;
n_cloning_candidates = 0;
if (dump_file)
fprintf (dump_file, "\nIPA iterate stage:\n\n");
if (in_lto_p)
ipa_update_after_lto_read ();
for (node = cgraph_nodes; node; node = node->next)
{
ipcp_initialize_node_lattices (node);
ipcp_compute_node_scale (node);
}
if (dump_file && (dump_flags & TDF_DETAILS))
{
ipcp_print_all_lattices (dump_file);
ipcp_function_scale_print (dump_file);
}
ipcp_propagate_stage ();
if (ipcp_change_tops_to_bottom ())
/* Some lattices have changed from IPA_TOP to IPA_BOTTOM.
This change should be propagated. */
{
gcc_assert (n_cloning_candidates);
ipcp_propagate_stage ();
}
if (dump_file)
{
fprintf (dump_file, "\nIPA lattices after propagation:\n");
ipcp_print_all_lattices (dump_file);
if (dump_flags & TDF_DETAILS)
ipcp_print_profile_data (dump_file);
}
}
/* Check conditions to forbid constant insertion to function described by
NODE. */
static inline bool
ipcp_node_modifiable_p (struct cgraph_node *node)
{
/* Once we will be able to do in-place replacement, we can be more
lax here. */
return ipcp_versionable_function_p (node);
}
/* Print count scale data structures. */
static void
ipcp_function_scale_print (FILE * f)
{
struct cgraph_node *node;
for (node = cgraph_nodes; node; node = node->next)
{
if (!node->analyzed)
continue;
fprintf (f, "printing scale for %s: ", cgraph_node_name (node));
fprintf (f, "value is " HOST_WIDE_INT_PRINT_DEC
" \n", (HOST_WIDE_INT) ipcp_get_node_scale (node));
}
}
/* Print counts of all cgraph nodes. */
static void
ipcp_print_func_profile_counts (FILE * f)
{
struct cgraph_node *node;
for (node = cgraph_nodes; node; node = node->next)
{
fprintf (f, "function %s: ", cgraph_node_name (node));
fprintf (f, "count is " HOST_WIDE_INT_PRINT_DEC
" \n", (HOST_WIDE_INT) node->count);
}
}
/* Print counts of all cgraph edges. */
static void
ipcp_print_call_profile_counts (FILE * f)
{
struct cgraph_node *node;
struct cgraph_edge *cs;
for (node = cgraph_nodes; node; node = node->next)
{
for (cs = node->callees; cs; cs = cs->next_callee)
{
fprintf (f, "%s -> %s ", cgraph_node_name (cs->caller),
cgraph_node_name (cs->callee));
fprintf (f, "count is " HOST_WIDE_INT_PRINT_DEC " \n",
(HOST_WIDE_INT) cs->count);
}
}
}
/* Print profile info for all functions. */
static void
ipcp_print_profile_data (FILE * f)
{
fprintf (f, "\nNODE COUNTS :\n");
ipcp_print_func_profile_counts (f);
fprintf (f, "\nCS COUNTS stage:\n");
ipcp_print_call_profile_counts (f);
}
/* Build and initialize ipa_replace_map struct according to LAT. This struct is
processed by versioning, which operates according to the flags set.
PARM_TREE is the formal parameter found to be constant. LAT represents the
constant. */
static struct ipa_replace_map *
ipcp_create_replace_map (tree parm_tree, struct ipcp_lattice *lat)
{
struct ipa_replace_map *replace_map;
tree const_val;
replace_map = GGC_NEW (struct ipa_replace_map);
const_val = build_const_val (lat, TREE_TYPE (parm_tree));
if (dump_file)
{
fprintf (dump_file, " replacing param ");
print_generic_expr (dump_file, parm_tree, 0);
fprintf (dump_file, " with const ");
print_generic_expr (dump_file, const_val, 0);
fprintf (dump_file, "\n");
}
replace_map->old_tree = parm_tree;
replace_map->new_tree = const_val;
replace_map->replace_p = true;
replace_map->ref_p = false;
return replace_map;
}
/* Return true if this callsite should be redirected to the original callee
(instead of the cloned one). */
static bool
ipcp_need_redirect_p (struct cgraph_edge *cs)
{
struct ipa_node_params *orig_callee_info;
int i, count;
struct ipa_jump_func *jump_func;
struct cgraph_node *node = cs->callee, *orig;
if (!n_cloning_candidates)
return false;
if ((orig = ipcp_get_orig_node (node)) != NULL)
node = orig;
if (ipcp_get_orig_node (cs->caller))
return false;
orig_callee_info = IPA_NODE_REF (node);
count = ipa_get_param_count (orig_callee_info);
for (i = 0; i < count; i++)
{
struct ipcp_lattice *lat = ipcp_get_lattice (orig_callee_info, i);
if (ipcp_lat_is_const (lat))
{
jump_func = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), i);
if (jump_func->type != IPA_JF_CONST)
return true;
}
}
return false;
}
/* Fix the callsites and the call graph after function cloning was done. */
static void
ipcp_update_callgraph (void)
{
struct cgraph_node *node;
for (node = cgraph_nodes; node; node = node->next)
if (node->analyzed && ipcp_node_is_clone (node))
{
bitmap args_to_skip = BITMAP_ALLOC (NULL);
struct cgraph_node *orig_node = ipcp_get_orig_node (node);
struct ipa_node_params *info = IPA_NODE_REF (orig_node);
int i, count = ipa_get_param_count (info);
struct cgraph_edge *cs, *next;
for (i = 0; i < count; i++)
{
struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
tree parm_tree = ipa_get_param (info, i);
/* We can proactively remove obviously unused arguments. */
if (is_gimple_reg (parm_tree)
&& !gimple_default_def (DECL_STRUCT_FUNCTION (orig_node->decl),
parm_tree))
{
bitmap_set_bit (args_to_skip, i);
continue;
}
if (lat->type == IPA_CONST_VALUE)
bitmap_set_bit (args_to_skip, i);
}
for (cs = node->callers; cs; cs = next)
{
next = cs->next_caller;
if (!ipcp_node_is_clone (cs->caller) && ipcp_need_redirect_p (cs))
cgraph_redirect_edge_callee (cs, orig_node);
}
}
}
/* Update profiling info for versioned functions and the functions they were
versioned from. */
static void
ipcp_update_profiling (void)
{
struct cgraph_node *node, *orig_node;
gcov_type scale, scale_complement;
struct cgraph_edge *cs;
for (node = cgraph_nodes; node; node = node->next)
{
if (ipcp_node_is_clone (node))
{
orig_node = ipcp_get_orig_node (node);
scale = ipcp_get_node_scale (orig_node);
node->count = orig_node->count * scale / REG_BR_PROB_BASE;
scale_complement = REG_BR_PROB_BASE - scale;
orig_node->count =
orig_node->count * scale_complement / REG_BR_PROB_BASE;
for (cs = node->callees; cs; cs = cs->next_callee)
cs->count = cs->count * scale / REG_BR_PROB_BASE;
for (cs = orig_node->callees; cs; cs = cs->next_callee)
cs->count = cs->count * scale_complement / REG_BR_PROB_BASE;
}
}
}
/* If NODE was cloned, how much would program grow? */
static long
ipcp_estimate_growth (struct cgraph_node *node)
{
struct cgraph_edge *cs;
int redirectable_node_callers = 0;
int removable_args = 0;
bool need_original = !cgraph_only_called_directly_p (node);
struct ipa_node_params *info;
int i, count;
int growth;
for (cs = node->callers; cs != NULL; cs = cs->next_caller)
if (cs->caller == node || !ipcp_need_redirect_p (cs))
redirectable_node_callers++;
else
need_original = true;
/* If we will be able to fully replace orignal node, we never increase
program size. */
if (!need_original)
return 0;
info = IPA_NODE_REF (node);
count = ipa_get_param_count (info);
for (i = 0; i < count; i++)
{
struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
tree parm_tree = ipa_get_param (info, i);
/* We can proactively remove obviously unused arguments. */
if (is_gimple_reg (parm_tree)
&& !gimple_default_def (DECL_STRUCT_FUNCTION (node->decl),
parm_tree))
removable_args++;
if (lat->type == IPA_CONST_VALUE)
removable_args++;
}
/* We make just very simple estimate of savings for removal of operand from
call site. Precise cost is dificult to get, as our size metric counts
constants and moves as free. Generally we are looking for cases that
small function is called very many times. */
growth = node->local.inline_summary.self_size
- removable_args * redirectable_node_callers;
if (growth < 0)
return 0;
return growth;
}
/* Estimate cost of cloning NODE. */
static long
ipcp_estimate_cloning_cost (struct cgraph_node *node)
{
int freq_sum = 1;
gcov_type count_sum = 1;
struct cgraph_edge *e;
int cost;
cost = ipcp_estimate_growth (node) * 1000;
if (!cost)
{
if (dump_file)
fprintf (dump_file, "Versioning of %s will save code size\n",
cgraph_node_name (node));
return 0;
}
for (e = node->callers; e; e = e->next_caller)
if (!bitmap_bit_p (dead_nodes, e->caller->uid)
&& !ipcp_need_redirect_p (e))
{
count_sum += e->count;
freq_sum += e->frequency + 1;
}
if (max_count)
cost /= count_sum * 1000 / max_count + 1;
else
cost /= freq_sum * 1000 / REG_BR_PROB_BASE + 1;
if (dump_file)
fprintf (dump_file, "Cost of versioning %s is %i, (size: %i, freq: %i)\n",
cgraph_node_name (node), cost, node->local.inline_summary.self_size,
freq_sum);
return cost + 1;
}
/* Return number of live constant parameters. */
static int
ipcp_const_param_count (struct cgraph_node *node)
{
int const_param = 0;
struct ipa_node_params *info = IPA_NODE_REF (node);
int count = ipa_get_param_count (info);
int i;
for (i = 0; i < count; i++)
{
struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
tree parm_tree = ipa_get_param (info, i);
if (ipcp_lat_is_insertable (lat)
/* Do not count obviously unused arguments. */
&& (!is_gimple_reg (parm_tree)
|| gimple_default_def (DECL_STRUCT_FUNCTION (node->decl),
parm_tree)))
const_param++;
}
return const_param;
}
/* Propagate the constant parameters found by ipcp_iterate_stage()
to the function's code. */
static void
ipcp_insert_stage (void)
{
struct cgraph_node *node, *node1 = NULL;
int i;
VEC (cgraph_edge_p, heap) * redirect_callers;
VEC (ipa_replace_map_p,gc)* replace_trees;
int node_callers, count;
tree parm_tree;
struct ipa_replace_map *replace_param;
fibheap_t heap;
long overall_size = 0, new_size = 0;
long max_new_size;
ipa_check_create_node_params ();
ipa_check_create_edge_args ();
if (dump_file)
fprintf (dump_file, "\nIPA insert stage:\n\n");
dead_nodes = BITMAP_ALLOC (NULL);
for (node = cgraph_nodes; node; node = node->next)
if (node->analyzed)
{
if (node->count > max_count)
max_count = node->count;
overall_size += node->local.inline_summary.self_size;
}
max_new_size = overall_size;
if (max_new_size < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
max_new_size = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
max_new_size = max_new_size * PARAM_VALUE (PARAM_IPCP_UNIT_GROWTH) / 100 + 1;
/* First collect all functions we proved to have constant arguments to heap. */
heap = fibheap_new ();
for (node = cgraph_nodes; node; node = node->next)
{
struct ipa_node_params *info;
/* Propagation of the constant is forbidden in certain conditions. */
if (!node->analyzed || !ipcp_node_modifiable_p (node))
continue;
info = IPA_NODE_REF (node);
if (ipa_is_called_with_var_arguments (info))
continue;
if (ipcp_const_param_count (node))
node->aux = fibheap_insert (heap, ipcp_estimate_cloning_cost (node), node);
}
/* Now clone in priority order until code size growth limits are met or
heap is emptied. */
while (!fibheap_empty (heap))
{
struct ipa_node_params *info;
int growth = 0;
bitmap args_to_skip;
struct cgraph_edge *cs;
node = (struct cgraph_node *)fibheap_extract_min (heap);
node->aux = NULL;
if (dump_file)
fprintf (dump_file, "considering function %s\n",
cgraph_node_name (node));
growth = ipcp_estimate_growth (node);
if (new_size + growth > max_new_size)
break;
if (growth
&& optimize_function_for_size_p (DECL_STRUCT_FUNCTION (node->decl)))
{
if (dump_file)
fprintf (dump_file, "Not versioning, cold code would grow");
continue;
}
new_size += growth;
/* Look if original function becomes dead after clonning. */
for (cs = node->callers; cs != NULL; cs = cs->next_caller)
if (cs->caller == node || ipcp_need_redirect_p (cs))
break;
if (!cs && cgraph_only_called_directly_p (node))
bitmap_set_bit (dead_nodes, node->uid);
info = IPA_NODE_REF (node);
count = ipa_get_param_count (info);
replace_trees = VEC_alloc (ipa_replace_map_p, gc, 1);
args_to_skip = BITMAP_GGC_ALLOC ();
for (i = 0; i < count; i++)
{
struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
parm_tree = ipa_get_param (info, i);
/* We can proactively remove obviously unused arguments. */
if (is_gimple_reg (parm_tree)
&& !gimple_default_def (DECL_STRUCT_FUNCTION (node->decl),
parm_tree))
{
bitmap_set_bit (args_to_skip, i);
continue;
}
if (lat->type == IPA_CONST_VALUE)
{
replace_param =
ipcp_create_replace_map (parm_tree, lat);
VEC_safe_push (ipa_replace_map_p, gc, replace_trees, replace_param);
bitmap_set_bit (args_to_skip, i);
}
}
/* Compute how many callers node has. */
node_callers = 0;
for (cs = node->callers; cs != NULL; cs = cs->next_caller)
node_callers++;
redirect_callers = VEC_alloc (cgraph_edge_p, heap, node_callers);
for (cs = node->callers; cs != NULL; cs = cs->next_caller)
VEC_quick_push (cgraph_edge_p, redirect_callers, cs);
/* Redirecting all the callers of the node to the
new versioned node. */
node1 =
cgraph_create_virtual_clone (node, redirect_callers, replace_trees,
args_to_skip);
args_to_skip = NULL;
VEC_free (cgraph_edge_p, heap, redirect_callers);
replace_trees = NULL;
if (node1 == NULL)
continue;
if (dump_file)
fprintf (dump_file, "versioned function %s with growth %i, overall %i\n",
cgraph_node_name (node), (int)growth, (int)new_size);
ipcp_init_cloned_node (node, node1);
/* TODO: We can use indirect inlning info to produce new calls. */
if (dump_file)
dump_function_to_file (node1->decl, dump_file, dump_flags);
for (cs = node->callees; cs; cs = cs->next_callee)
if (cs->callee->aux)
{
fibheap_delete_node (heap, (fibnode_t) cs->callee->aux);
cs->callee->aux = fibheap_insert (heap,
ipcp_estimate_cloning_cost (cs->callee),
cs->callee);
}
}
while (!fibheap_empty (heap))
{
if (dump_file)
fprintf (dump_file, "skipping function %s\n",
cgraph_node_name (node));
node = (struct cgraph_node *) fibheap_extract_min (heap);
node->aux = NULL;
}
fibheap_delete (heap);
BITMAP_FREE (dead_nodes);
ipcp_update_callgraph ();
ipcp_update_profiling ();
}
/* The IPCP driver. */
static unsigned int
ipcp_driver (void)
{
cgraph_remove_unreachable_nodes (true,dump_file);
if (dump_file)
{
fprintf (dump_file, "\nIPA structures before propagation:\n");
if (dump_flags & TDF_DETAILS)
ipa_print_all_params (dump_file);
ipa_print_all_jump_functions (dump_file);
}
/* 2. Do the interprocedural propagation. */
ipcp_iterate_stage ();
/* 3. Insert the constants found to the functions. */
ipcp_insert_stage ();
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "\nProfiling info after insert stage:\n");
ipcp_print_profile_data (dump_file);
}
/* Free all IPCP structures. */
free_all_ipa_structures_after_ipa_cp ();
if (dump_file)
fprintf (dump_file, "\nIPA constant propagation end\n");
return 0;
}
/* Note function body size. */
static void
ipcp_generate_summary (void)
{
if (dump_file)
fprintf (dump_file, "\nIPA constant propagation start:\n");
ipa_check_create_node_params ();
ipa_check_create_edge_args ();
ipa_register_cgraph_hooks ();
/* 1. Call the init stage to initialize
the ipa_node_params and ipa_edge_args structures. */
ipcp_init_stage ();
}
/* Write ipcp summary for nodes in SET. */
static void
ipcp_write_summary (cgraph_node_set set)
{
ipa_prop_write_jump_functions (set);
}
/* Read ipcp summary. */
static void
ipcp_read_summary (void)
{
ipa_prop_read_jump_functions ();
}
/* Gate for IPCP optimization. */
static bool
cgraph_gate_cp (void)
{
return flag_ipa_cp;
}
struct ipa_opt_pass_d pass_ipa_cp =
{
{
IPA_PASS,
"cp", /* name */
cgraph_gate_cp, /* gate */
ipcp_driver, /* execute */
NULL, /* sub */
NULL, /* next */
0, /* static_pass_number */
TV_IPA_CONSTANT_PROP, /* tv_id */
0, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
TODO_dump_cgraph | TODO_dump_func |
TODO_remove_functions /* todo_flags_finish */
},
ipcp_generate_summary, /* generate_summary */
ipcp_write_summary, /* write_summary */
ipcp_read_summary, /* read_summary */
NULL, /* function_read_summary */
lto_ipa_fixup_call_notes, /* stmt_fixup */
0, /* TODOs */
NULL, /* function_transform */
NULL, /* variable_transform */
};