re PR ipa/60243 (IPA is slow on large cgraph tree)

PR ipa/60243
	* ipa-inline.c (want_inline_small_function_p): Short circuit large
	functions; reorganize to make cheap checks first.
	(inline_small_functions): Do not estimate growth when dumping;
	it is expensive.
	* ipa-inline.h (inline_summary): Add min_size.
	(growth_likely_positive): New function.
	* ipa-inline-analysis.c (dump_inline_summary): Add min_size.
	(set_cond_stmt_execution_predicate): Cleanup.
	(estimate_edge_size_and_time): Compute min_size.
	(estimate_calls_size_and_time): Likewise.
	(estimate_node_size_and_time): Likewise.
	(inline_update_overall_summary): Update min_size.
	(do_estimate_edge_time): Likewise.
	(do_estimate_edge_size): Update.
	(do_estimate_edge_hints): Update.
	(growth_likely_positive): New function.

From-SVN: r208916
This commit is contained in:
Jan Hubicka 2014-03-28 20:50:28 +01:00 committed by Jan Hubicka
parent ff76f4e2c0
commit 4cd8957f51
4 changed files with 164 additions and 69 deletions

View File

@ -1,3 +1,23 @@
2014-03-28 Jan Hubicka <hubicka@ucw.cz>
PR ipa/60243
* ipa-inline.c (want_inline_small_function_p): Short circuit large
functions; reorganize to make cheap checks first.
(inline_small_functions): Do not estimate growth when dumping;
it is expensive.
* ipa-inline.h (inline_summary): Add min_size.
(growth_likely_positive): New function.
* ipa-inline-analysis.c (dump_inline_summary): Add min_size.
(set_cond_stmt_execution_predicate): Cleanup.
(estimate_edge_size_and_time): Compute min_size.
(estimate_calls_size_and_time): Likewise.
(estimate_node_size_and_time): Likewise.
(inline_update_overall_summary): Update min_size.
(do_estimate_edge_time): Likewise.
(do_estimate_edge_size): Update.
(do_estimate_edge_hints): Update.
(growth_likely_positive): New function.
2014-03-28 Jakub Jelinek <jakub@redhat.com>
PR target/60693

View File

@ -1397,6 +1397,7 @@ dump_inline_summary (FILE *f, struct cgraph_node *node)
fprintf (f, " global time: %i\n", s->time);
fprintf (f, " self size: %i\n", s->self_size);
fprintf (f, " global size: %i\n", s->size);
fprintf (f, " min size: %i\n", s->min_size);
fprintf (f, " self stack: %i\n",
(int) s->estimated_self_stack_size);
fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size);
@ -1746,8 +1747,7 @@ set_cond_stmt_execution_predicate (struct ipa_node_params *info,
if (this_code != ERROR_MARK)
{
struct predicate p = add_condition (summary, index, &aggpos,
e->flags & EDGE_TRUE_VALUE
? code : inverted_code,
this_code,
gimple_cond_rhs (last));
e->aux = pool_alloc (edge_predicate_pool);
*(struct predicate *) e->aux = p;
@ -2991,10 +2991,15 @@ estimate_edge_devirt_benefit (struct cgraph_edge *ie,
return isummary->inlinable;
}
/* Increase SIZE and TIME for size and time needed to handle edge E. */
/* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
handle edge E with probability PROB.
Set HINTS if edge may be devirtualized.
KNOWN_VALS, KNOWN_AGGS and KNOWN_BINFOS describe context of the call
site. */
static inline void
estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *time,
estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size,
int *time,
int prob,
vec<tree> known_vals,
vec<tree> known_binfos,
@ -3004,12 +3009,16 @@ estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *time,
struct inline_edge_summary *es = inline_edge_summary (e);
int call_size = es->call_stmt_size;
int call_time = es->call_stmt_time;
int cur_size;
if (!e->callee
&& estimate_edge_devirt_benefit (e, &call_size, &call_time,
known_vals, known_binfos, known_aggs)
&& hints && cgraph_maybe_hot_edge_p (e))
*hints |= INLINE_HINT_indirect_call;
*size += call_size * INLINE_SIZE_SCALE;
cur_size = call_size * INLINE_SIZE_SCALE;
*size += cur_size;
if (min_size)
*min_size += cur_size;
*time += apply_probability ((gcov_type) call_time, prob)
* e->frequency * (INLINE_TIME_SCALE / CGRAPH_FREQ_BASE);
if (*time > MAX_TIME * INLINE_TIME_SCALE)
@ -3018,12 +3027,14 @@ estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *time,
/* Increase SIZE and TIME for size and time needed to handle all calls in NODE.
POSSIBLE_TRUTHS, KNOWN_VALS and KNOWN_BINFOS describe context of the call
site. */
/* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
calls in NODE.
POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_BINFOS describe context of
the call site. */
static void
estimate_calls_size_and_time (struct cgraph_node *node, int *size, int *time,
estimate_calls_size_and_time (struct cgraph_node *node, int *size,
int *min_size, int *time,
inline_hints *hints,
clause_t possible_truths,
vec<tree> known_vals,
@ -3041,12 +3052,15 @@ estimate_calls_size_and_time (struct cgraph_node *node, int *size, int *time,
{
/* Predicates of calls shall not use NOT_CHANGED codes,
sowe do not need to compute probabilities. */
estimate_edge_size_and_time (e, size, time, REG_BR_PROB_BASE,
estimate_edge_size_and_time (e, size,
es->predicate ? NULL : min_size,
time, REG_BR_PROB_BASE,
known_vals, known_binfos,
known_aggs, hints);
}
else
estimate_calls_size_and_time (e->callee, size, time, hints,
estimate_calls_size_and_time (e->callee, size, min_size, time,
hints,
possible_truths,
known_vals, known_binfos,
known_aggs);
@ -3057,7 +3071,9 @@ estimate_calls_size_and_time (struct cgraph_node *node, int *size, int *time,
struct inline_edge_summary *es = inline_edge_summary (e);
if (!es->predicate
|| evaluate_predicate (es->predicate, possible_truths))
estimate_edge_size_and_time (e, size, time, REG_BR_PROB_BASE,
estimate_edge_size_and_time (e, size,
es->predicate ? NULL : min_size,
time, REG_BR_PROB_BASE,
known_vals, known_binfos, known_aggs,
hints);
}
@ -3065,8 +3081,13 @@ estimate_calls_size_and_time (struct cgraph_node *node, int *size, int *time,
/* Estimate size and time needed to execute NODE assuming
POSSIBLE_TRUTHS clause, and KNOWN_VALS and KNOWN_BINFOS information
about NODE's arguments. */
POSSIBLE_TRUTHS clause, and KNOWN_VALS, KNOWN_AGGS and KNOWN_BINFOS
information about NODE's arguments. If non-NULL use also probability
information present in INLINE_PARAM_SUMMARY vector.
Additionally detemine hints determined by the context. Finally compute
minimal size needed for the call that is independent on the call context and
can be used for fast estimates. Return the values in RET_SIZE,
RET_MIN_SIZE, RET_TIME and RET_HINTS. */
static void
estimate_node_size_and_time (struct cgraph_node *node,
@ -3074,7 +3095,7 @@ estimate_node_size_and_time (struct cgraph_node *node,
vec<tree> known_vals,
vec<tree> known_binfos,
vec<ipa_agg_jump_function_p> known_aggs,
int *ret_size, int *ret_time,
int *ret_size, int *ret_min_size, int *ret_time,
inline_hints *ret_hints,
vec<inline_param_summary>
inline_param_summary)
@ -3083,6 +3104,7 @@ estimate_node_size_and_time (struct cgraph_node *node,
size_time_entry *e;
int size = 0;
int time = 0;
int min_size = 0;
inline_hints hints = 0;
int i;
@ -3128,6 +3150,8 @@ estimate_node_size_and_time (struct cgraph_node *node,
gcc_checking_assert (time >= 0);
}
gcc_checking_assert (true_predicate_p (&(*info->entry)[0].predicate));
min_size = (*info->entry)[0].size;
gcc_checking_assert (size >= 0);
gcc_checking_assert (time >= 0);
@ -3145,12 +3169,13 @@ estimate_node_size_and_time (struct cgraph_node *node,
if (DECL_DECLARED_INLINE_P (node->decl))
hints |= INLINE_HINT_declared_inline;
estimate_calls_size_and_time (node, &size, &time, &hints, possible_truths,
estimate_calls_size_and_time (node, &size, &min_size, &time, &hints, possible_truths,
known_vals, known_binfos, known_aggs);
gcc_checking_assert (size >= 0);
gcc_checking_assert (time >= 0);
time = RDIV (time, INLINE_TIME_SCALE);
size = RDIV (size, INLINE_SIZE_SCALE);
min_size = RDIV (min_size, INLINE_SIZE_SCALE);
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "\n size:%i time:%i\n", (int) size, (int) time);
@ -3158,6 +3183,8 @@ estimate_node_size_and_time (struct cgraph_node *node,
*ret_time = time;
if (ret_size)
*ret_size = size;
if (ret_min_size)
*ret_min_size = min_size;
if (ret_hints)
*ret_hints = hints;
return;
@ -3182,7 +3209,7 @@ estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
clause = evaluate_conditions_for_known_args (node, false, known_vals,
known_aggs);
estimate_node_size_and_time (node, clause, known_vals, known_binfos,
known_aggs, ret_size, ret_time, hints, vNULL);
known_aggs, ret_size, NULL, ret_time, hints, vNULL);
}
/* Translate all conditions from callee representation into caller
@ -3583,7 +3610,8 @@ inline_update_overall_summary (struct cgraph_node *node)
if (info->time > MAX_TIME * INLINE_TIME_SCALE)
info->time = MAX_TIME * INLINE_TIME_SCALE;
}
estimate_calls_size_and_time (node, &info->size, &info->time, NULL,
estimate_calls_size_and_time (node, &info->size, &info->min_size,
&info->time, NULL,
~(clause_t) (1 << predicate_false_condition),
vNULL, vNULL, vNULL);
info->time = (info->time + INLINE_TIME_SCALE / 2) / INLINE_TIME_SCALE;
@ -3628,6 +3656,7 @@ do_estimate_edge_time (struct cgraph_edge *edge)
vec<tree> known_binfos;
vec<ipa_agg_jump_function_p> known_aggs;
struct inline_edge_summary *es = inline_edge_summary (edge);
int min_size;
callee = cgraph_function_or_thunk_node (edge->callee, NULL);
@ -3636,7 +3665,7 @@ do_estimate_edge_time (struct cgraph_edge *edge)
&clause, &known_vals, &known_binfos,
&known_aggs);
estimate_node_size_and_time (callee, clause, known_vals, known_binfos,
known_aggs, &size, &time, &hints, es->param);
known_aggs, &size, &min_size, &time, &hints, es->param);
known_vals.release ();
known_binfos.release ();
known_aggs.release ();
@ -3646,6 +3675,7 @@ do_estimate_edge_time (struct cgraph_edge *edge)
/* When caching, update the cache entry. */
if (edge_growth_cache.exists ())
{
inline_summary (edge->callee)->min_size = min_size;
if ((int) edge_growth_cache.length () <= edge->uid)
edge_growth_cache.safe_grow_cleared (cgraph_edge_max_uid);
edge_growth_cache[edge->uid].time = time + (time >= 0);
@ -3689,7 +3719,7 @@ do_estimate_edge_size (struct cgraph_edge *edge)
&clause, &known_vals, &known_binfos,
&known_aggs);
estimate_node_size_and_time (callee, clause, known_vals, known_binfos,
known_aggs, &size, NULL, NULL, vNULL);
known_aggs, &size, NULL, NULL, NULL, vNULL);
known_vals.release ();
known_binfos.release ();
known_aggs.release ();
@ -3728,7 +3758,7 @@ do_estimate_edge_hints (struct cgraph_edge *edge)
&clause, &known_vals, &known_binfos,
&known_aggs);
estimate_node_size_and_time (callee, clause, known_vals, known_binfos,
known_aggs, NULL, NULL, &hints, vNULL);
known_aggs, NULL, NULL, NULL, &hints, vNULL);
known_vals.release ();
known_binfos.release ();
known_aggs.release ();
@ -3848,6 +3878,55 @@ do_estimate_growth (struct cgraph_node *node)
}
/* Make cheap estimation if growth of NODE is likely positive knowing
EDGE_GROWTH of one particular edge.
We assume that most of other edges will have similar growth
and skip computation if there are too many callers. */
bool
growth_likely_positive (struct cgraph_node *node, int edge_growth ATTRIBUTE_UNUSED)
{
int max_callers;
int ret;
struct cgraph_edge *e;
gcc_checking_assert (edge_growth > 0);
/* Unlike for functions called once, we play unsafe with
COMDATs. We can allow that since we know functions
in consideration are small (and thus risk is small) and
moreover grow estimates already accounts that COMDAT
functions may or may not disappear when eliminated from
current unit. With good probability making aggressive
choice in all units is going to make overall program
smaller.
Consequently we ask cgraph_can_remove_if_no_direct_calls_p
instead of
cgraph_will_be_removed_from_program_if_no_direct_calls */
if (DECL_EXTERNAL (node->decl)
|| !cgraph_can_remove_if_no_direct_calls_p (node))
return true;
/* If there is cached value, just go ahead. */
if ((int)node_growth_cache.length () > node->uid
&& (ret = node_growth_cache[node->uid]))
return ret > 0;
if (!cgraph_will_be_removed_from_program_if_no_direct_calls (node)
&& (!DECL_COMDAT (node->decl)
|| !cgraph_can_remove_if_no_direct_calls_p (node)))
return true;
max_callers = inline_summary (node)->size * 4 / edge_growth + 2;
for (e = node->callers; e; e = e->next_caller)
{
max_callers--;
if (!max_callers)
return true;
}
return estimate_growth (node) > 0;
}
/* This function performs intraprocedural analysis in NODE that is required to
inline indirect calls. */

View File

@ -573,6 +573,24 @@ want_inline_small_function_p (struct cgraph_edge *e, bool report)
e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
want_inline = false;
}
/* Do fast and conservative check if the function can be good
inline cnadidate. At themoment we allow inline hints to
promote non-inline function to inline and we increase
MAX_INLINE_INSNS_SINGLE 16fold for inline functions. */
else if (!DECL_DECLARED_INLINE_P (callee->decl)
&& inline_summary (callee)->min_size - inline_edge_summary (e)->call_stmt_size
> MAX (MAX_INLINE_INSNS_SINGLE, MAX_INLINE_INSNS_AUTO))
{
e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
want_inline = false;
}
else if (DECL_DECLARED_INLINE_P (callee->decl)
&& inline_summary (callee)->min_size - inline_edge_summary (e)->call_stmt_size
> 16 * MAX_INLINE_INSNS_SINGLE)
{
e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
want_inline = false;
}
else
{
int growth = estimate_edge_growth (e);
@ -585,56 +603,26 @@ want_inline_small_function_p (struct cgraph_edge *e, bool report)
hints suggests that inlining given function is very profitable. */
else if (DECL_DECLARED_INLINE_P (callee->decl)
&& growth >= MAX_INLINE_INSNS_SINGLE
&& !big_speedup
&& !(hints & (INLINE_HINT_indirect_call
| INLINE_HINT_loop_iterations
| INLINE_HINT_array_index
| INLINE_HINT_loop_stride)))
&& ((!big_speedup
&& !(hints & (INLINE_HINT_indirect_call
| INLINE_HINT_loop_iterations
| INLINE_HINT_array_index
| INLINE_HINT_loop_stride)))
|| growth >= MAX_INLINE_INSNS_SINGLE * 16))
{
e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
want_inline = false;
}
/* Before giving up based on fact that caller size will grow, allow
functions that are called few times and eliminating the offline
copy will lead to overall code size reduction.
Not all of these will be handled by subsequent inlining of functions
called once: in particular weak functions are not handled or funcitons
that inline to multiple calls but a lot of bodies is optimized out.
Finally we want to inline earlier to allow inlining of callbacks.
This is slightly wrong on aggressive side: it is entirely possible
that function is called many times with a context where inlining
reduces code size and few times with a context where inlining increase
code size. Resoluting growth estimate will be negative even if it
would make more sense to keep offline copy and do not inline into the
call sites that makes the code size grow.
When badness orders the calls in a way that code reducing calls come
first, this situation is not a problem at all: after inlining all
"good" calls, we will realize that keeping the function around is
better. */
else if (growth <= MAX_INLINE_INSNS_SINGLE
/* Unlike for functions called once, we play unsafe with
COMDATs. We can allow that since we know functions
in consideration are small (and thus risk is small) and
moreover grow estimates already accounts that COMDAT
functions may or may not disappear when eliminated from
current unit. With good probability making aggressive
choice in all units is going to make overall program
smaller.
Consequently we ask cgraph_can_remove_if_no_direct_calls_p
instead of
cgraph_will_be_removed_from_program_if_no_direct_calls */
&& !DECL_EXTERNAL (callee->decl)
&& cgraph_can_remove_if_no_direct_calls_p (callee)
&& estimate_growth (callee) <= 0)
;
else if (!DECL_DECLARED_INLINE_P (callee->decl)
&& !flag_inline_functions)
{
e->inline_failed = CIF_NOT_DECLARED_INLINED;
want_inline = false;
/* growth_likely_positive is expensive, always test it last. */
if (growth >= MAX_INLINE_INSNS_SINGLE
|| growth_likely_positive (callee, growth))
{
e->inline_failed = CIF_NOT_DECLARED_INLINED;
want_inline = false;
}
}
/* Apply MAX_INLINE_INSNS_AUTO limit for functions not declared inline
Upgrade it to MAX_INLINE_INSNS_SINGLE when hints suggests that
@ -649,11 +637,18 @@ want_inline_small_function_p (struct cgraph_edge *e, bool report)
MAX_INLINE_INSNS_SINGLE)
: MAX_INLINE_INSNS_AUTO))
{
e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
want_inline = false;
/* growth_likely_positive is expensive, always test it last. */
if (growth >= MAX_INLINE_INSNS_SINGLE
|| growth_likely_positive (callee, growth))
{
e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
want_inline = false;
}
}
/* If call is cold, do not inline when function body would grow. */
else if (!cgraph_maybe_hot_edge_p (e))
else if (!cgraph_maybe_hot_edge_p (e)
&& (growth >= MAX_INLINE_INSNS_SINGLE
|| growth_likely_positive (callee, growth)))
{
e->inline_failed = CIF_UNLIKELY_CALL;
want_inline = false;
@ -1723,14 +1718,12 @@ inline_small_functions (void)
inline_summary (callee)->size);
fprintf (dump_file,
" to be inlined into %s/%i in %s:%i\n"
" Estimated growth after inlined into all is %+i insns.\n"
" Estimated badness is %i, frequency %.2f.\n",
edge->caller->name (), edge->caller->order,
flag_wpa ? "unknown"
: gimple_filename ((const_gimple) edge->call_stmt),
flag_wpa ? -1
: gimple_lineno ((const_gimple) edge->call_stmt),
estimate_growth (callee),
badness,
edge->frequency / (double)CGRAPH_FREQ_BASE);
if (edge->count)

View File

@ -117,6 +117,8 @@ struct GTY(()) inline_summary
int self_size;
/* Time of the function body. */
int self_time;
/* Minimal size increase after inlining. */
int min_size;
/* False when there something makes inlining impossible (such as va_arg). */
unsigned inlinable : 1;
@ -220,6 +222,7 @@ void estimate_ipcp_clone_size_and_time (struct cgraph_node *,
vec<ipa_agg_jump_function_p>,
int *, int *, inline_hints *);
int do_estimate_growth (struct cgraph_node *);
bool growth_likely_positive (struct cgraph_node *, int);
void inline_merge_summary (struct cgraph_edge *edge);
void inline_update_overall_summary (struct cgraph_node *node);
int do_estimate_edge_size (struct cgraph_edge *edge);