ipa-inline.c (compute_uninlined_call_time, [...]): New functions.

* ipa-inline.c (compute_uninlined_call_time,
	compute_inlined_call_time): New functions.
	(RELATIVE_TIME_BENEFIT_RANGE): New macro.
	(relative_time_benefit): Rewrite.
	(edge_badness): Rewrite path with guessed profile and estimated profile.
	* ipa-inline.h (INLINE_HINT_declared_inline, INLINE_HINT_cross_module):
	New hints.
	(struct inline_summary): Add GROWTH filed.
	* ipa-inline-analysis.c (dump_inline_hints): Update.
	(reset_inline_summary): Update.
	(dump_inline_summary): Update.
	(will_be_nonconstant_predicate): Cleanup to use gimple_store_p and
	gimple_assign_load_p predicates.
	(estimate_node_size_and_time): Drop INLINE_HINT_declared_inline hint.
	(simple_edge_hints): New function.
	(do_estimate_edge_time): Return time of invocation of callee rather
	than the time scaled by edge frequency; update hints code.
	(do_estimate_edge_hints): Update.
	(do_estimate_growth): Cleanup.

From-SVN: r193161
This commit is contained in:
Jan Hubicka 2012-11-05 15:00:46 +01:00 committed by Jan Hubicka
parent 0450d71880
commit d59171daa7
4 changed files with 179 additions and 96 deletions

View File

@ -1,3 +1,25 @@
2012-11-05 Jan Hubicka <jh@suse.cz>
* ipa-inline.c (compute_uninlined_call_time,
compute_inlined_call_time): New functions.
(RELATIVE_TIME_BENEFIT_RANGE): New macro.
(relative_time_benefit): Rewrite.
(edge_badness): Rewrite path with guessed profile and estimated profile.
* ipa-inline.h (INLINE_HINT_declared_inline, INLINE_HINT_cross_module):
New hints.
(struct inline_summary): Add GROWTH filed.
* ipa-inline-analysis.c (dump_inline_hints): Update.
(reset_inline_summary): Update.
(dump_inline_summary): Update.
(will_be_nonconstant_predicate): Cleanup to use gimple_store_p and
gimple_assign_load_p predicates.
(estimate_node_size_and_time): Drop INLINE_HINT_declared_inline hint.
(simple_edge_hints): New function.
(do_estimate_edge_time): Return time of invocation of callee rather
than the time scaled by edge frequency; update hints code.
(do_estimate_edge_hints): Update.
(do_estimate_growth): Cleanup.
2012-11-05 Jakub Jelinek <jakub@redhat.com>
PR target/55194

View File

@ -649,6 +649,16 @@ dump_inline_hints (FILE *f, inline_hints hints)
hints &= ~INLINE_HINT_in_scc;
fprintf (f, " in_scc");
}
if (hints & INLINE_HINT_cross_module)
{
hints &= ~INLINE_HINT_cross_module;
fprintf (f, " cross_module");
}
if (hints & INLINE_HINT_declared_inline)
{
hints &= ~INLINE_HINT_declared_inline;
fprintf (f, " declared_inline");
}
gcc_assert (!hints);
}
@ -983,6 +993,7 @@ reset_inline_summary (struct cgraph_node *node)
info->stack_frame_offset = 0;
info->size = 0;
info->time = 0;
info->growth = 0;
info->scc_no = 0;
if (info->loop_iterations)
{
@ -1375,6 +1386,9 @@ dump_inline_summary (FILE * f, struct cgraph_node *node)
(int) s->estimated_self_stack_size);
fprintf (f, " global stack: %i\n",
(int) s->estimated_stack_size);
if (s->growth)
fprintf (f, " estimated growth:%i\n",
(int) s->growth);
if (s->scc_no)
fprintf (f, " In SCC: %i\n",
(int) s->scc_no);
@ -1977,10 +1991,11 @@ will_be_nonconstant_predicate (struct ipa_node_params *info,
return p;
/* Stores will stay anyway. */
if (gimple_vdef (stmt))
if (gimple_store_p (stmt))
return p;
is_load = gimple_vuse (stmt) != NULL;
is_load = gimple_assign_load_p (stmt);
/* Loads can be optimized when the value is known. */
if (is_load)
{
@ -2857,6 +2872,8 @@ estimate_node_size_and_time (struct cgraph_node *node,
hints |=INLINE_HINT_loop_stride;
if (info->scc_no)
hints |= INLINE_HINT_in_scc;
if (DECL_DECLARED_INLINE_P (node->symbol.decl))
hints |= INLINE_HINT_declared_inline;
estimate_calls_size_and_time (node, &size, &time, &hints, possible_truths,
known_vals, known_binfos, known_aggs);
@ -2865,7 +2882,6 @@ estimate_node_size_and_time (struct cgraph_node *node,
time = RDIV (time, INLINE_TIME_SCALE);
size = RDIV (size, INLINE_SIZE_SCALE);
if (dump_file
&& (dump_flags & TDF_DETAILS))
fprintf (dump_file, "\n size:%i time:%i\n", (int)size, (int)time);
@ -3315,6 +3331,26 @@ inline_update_overall_summary (struct cgraph_node *node)
info->size = (info->size + INLINE_SIZE_SCALE / 2) / INLINE_SIZE_SCALE;
}
/* Return hints derrived from EDGE. */
int
simple_edge_hints (struct cgraph_edge *edge)
{
int hints = 0;
struct cgraph_node *to = (edge->caller->global.inlined_to
? edge->caller->global.inlined_to
: edge->caller);
if (inline_summary (to)->scc_no
&& inline_summary (to)->scc_no == inline_summary (edge->callee)->scc_no
&& !cgraph_edge_recursive_p (edge))
hints |= INLINE_HINT_same_scc;
if (to->symbol.lto_file_data && edge->callee->symbol.lto_file_data
&& to->symbol.lto_file_data != edge->callee->symbol.lto_file_data)
hints |= INLINE_HINT_cross_module;
return hints;
}
/* Estimate the time cost for the caller when inlining EDGE.
Only to be called via estimate_edge_time, that handles the
caching mechanism.
@ -3328,7 +3364,6 @@ do_estimate_edge_time (struct cgraph_edge *edge)
int time;
int size;
inline_hints hints;
gcov_type ret;
struct cgraph_node *callee;
clause_t clause;
VEC (tree, heap) *known_vals;
@ -3347,33 +3382,26 @@ do_estimate_edge_time (struct cgraph_edge *edge)
VEC_free (tree, heap, known_vals);
VEC_free (tree, heap, known_binfos);
VEC_free (ipa_agg_jump_function_p, heap, known_aggs);
ret = RDIV ((gcov_type)time * edge->frequency,
CGRAPH_FREQ_BASE);
gcc_checking_assert (size >= 0);
gcc_checking_assert (time >= 0);
/* When caching, update the cache entry. */
if (edge_growth_cache)
{
struct cgraph_node *to = (edge->caller->global.inlined_to
? edge->caller->global.inlined_to
: edge->caller);
if ((int)VEC_length (edge_growth_cache_entry, edge_growth_cache)
<= edge->uid)
VEC_safe_grow_cleared (edge_growth_cache_entry, heap, edge_growth_cache,
cgraph_edge_max_uid);
VEC_index (edge_growth_cache_entry, edge_growth_cache, edge->uid).time
= ret + (ret >= 0);
= time + (time >= 0);
VEC_index (edge_growth_cache_entry, edge_growth_cache, edge->uid).size
= size + (size >= 0);
if (inline_summary (to)->scc_no
&& inline_summary (to)->scc_no == inline_summary (callee)->scc_no
&& !cgraph_edge_recursive_p (edge))
hints |= INLINE_HINT_same_scc;
hints |= simple_edge_hints (edge);
VEC_index (edge_growth_cache_entry, edge_growth_cache, edge->uid).hints
= hints + 1;
}
return ret;
return time;
}
@ -3430,9 +3458,6 @@ do_estimate_edge_hints (struct cgraph_edge *edge)
VEC (tree, heap) *known_vals;
VEC (tree, heap) *known_binfos;
VEC (ipa_agg_jump_function_p, heap) *known_aggs;
struct cgraph_node *to = (edge->caller->global.inlined_to
? edge->caller->global.inlined_to
: edge->caller);
/* When we do caching, use do_estimate_edge_time to populate the entry. */
@ -3458,10 +3483,7 @@ do_estimate_edge_hints (struct cgraph_edge *edge)
VEC_free (tree, heap, known_vals);
VEC_free (tree, heap, known_binfos);
VEC_free (ipa_agg_jump_function_p, heap, known_aggs);
if (inline_summary (to)->scc_no
&& inline_summary (to)->scc_no == inline_summary (callee)->scc_no
&& !cgraph_edge_recursive_p (edge))
hints |= INLINE_HINT_same_scc;
hints |= simple_edge_hints (edge);
return hints;
}
@ -3549,10 +3571,11 @@ do_estimate_growth (struct cgraph_node *node)
return zero or negative growths. */
if (d.self_recursive)
d.growth = d.growth < info->size ? info->size : d.growth;
else if (DECL_EXTERNAL (node->symbol.decl))
;
else
{
if (!DECL_EXTERNAL (node->symbol.decl)
&& cgraph_will_be_removed_from_program_if_no_direct_calls (node))
if (cgraph_will_be_removed_from_program_if_no_direct_calls (node))
d.growth -= info->size;
/* COMDAT functions are very often not shared across multiple units
since they come from various template instantiations.

View File

@ -456,6 +456,42 @@ want_early_inline_function_p (struct cgraph_edge *e)
return want_inline;
}
/* Compute time of the edge->caller + edge->callee execution when inlining
does not happen. */
inline int
compute_uninlined_call_time (struct inline_summary *callee_info,
struct cgraph_edge *edge)
{
int uninlined_call_time =
RDIV ((gcov_type)callee_info->time * MAX (edge->frequency, 1),
CGRAPH_FREQ_BASE);
int caller_time = inline_summary (edge->caller->global.inlined_to
? edge->caller->global.inlined_to
: edge->caller)->time;
return uninlined_call_time + caller_time;
}
/* Same as compute_uinlined_call_time but compute time when inlining
does happen. */
inline gcov_type
compute_inlined_call_time (struct cgraph_edge *edge,
int edge_time)
{
int caller_time = inline_summary (edge->caller->global.inlined_to
? edge->caller->global.inlined_to
: edge->caller)->time;
int time = caller_time + RDIV ((edge_time - inline_edge_summary (edge)->call_stmt_time)
* MAX (edge->frequency, 1),
CGRAPH_FREQ_BASE);
/* Possible one roundoff error, but watch for overflows. */
gcc_checking_assert (time >= INT_MIN / 2);
if (time < 0)
time = 0;
return time;
}
/* Return true if we are interested in inlining small function.
When REPORT is true, report reason to dump file. */
@ -724,31 +760,41 @@ want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
return true;
}
#define RELATIVE_TIME_BENEFIT_RANGE (INT_MAX / 64)
/* Return relative time improvement for inlining EDGE in range
1...2^9. */
1...RELATIVE_TIME_BENEFIT_RANGE */
static inline int
relative_time_benefit (struct inline_summary *callee_info,
struct cgraph_edge *edge,
int time_growth)
int edge_time)
{
int relbenefit;
gcov_type uninlined_call_time;
int uninlined_call_time = compute_uninlined_call_time (callee_info, edge);
int inlined_call_time = compute_inlined_call_time (edge, edge_time);
/* Inlining into extern inline function is not a win. */
if (DECL_EXTERNAL (edge->caller->global.inlined_to
? edge->caller->global.inlined_to->symbol.decl
: edge->caller->symbol.decl))
return 1;
/* Watch overflows. */
gcc_checking_assert (uninlined_call_time >= 0);
gcc_checking_assert (inlined_call_time >= 0);
gcc_checking_assert (uninlined_call_time >= inlined_call_time);
uninlined_call_time =
((gcov_type)
(callee_info->time
+ inline_edge_summary (edge)->call_stmt_time) * edge->frequency
+ CGRAPH_FREQ_BASE / 2) / CGRAPH_FREQ_BASE;
/* Compute relative time benefit, i.e. how much the call becomes faster.
??? perhaps computing how much the caller+calle together become faster
would lead to more realistic results. */
if (!uninlined_call_time)
uninlined_call_time = 1;
relbenefit =
(uninlined_call_time - time_growth) * 256 / (uninlined_call_time);
relbenefit = MIN (relbenefit, 512);
RDIV (((gcov_type)uninlined_call_time - inlined_call_time) * RELATIVE_TIME_BENEFIT_RANGE,
uninlined_call_time);
relbenefit = MIN (relbenefit, RELATIVE_TIME_BENEFIT_RANGE);
gcc_checking_assert (relbenefit >= 0);
relbenefit = MAX (relbenefit, 1);
return relbenefit;
}
@ -764,7 +810,7 @@ static int
edge_badness (struct cgraph_edge *edge, bool dump)
{
gcov_type badness;
int growth, time_growth;
int growth, edge_time;
struct cgraph_node *callee = cgraph_function_or_thunk_node (edge->callee,
NULL);
struct inline_summary *callee_info = inline_summary (callee);
@ -774,17 +820,20 @@ edge_badness (struct cgraph_edge *edge, bool dump)
return INT_MIN;
growth = estimate_edge_growth (edge);
time_growth = estimate_edge_time (edge);
edge_time = estimate_edge_time (edge);
hints = estimate_edge_hints (edge);
gcc_checking_assert (edge_time >= 0);
gcc_checking_assert (edge_time <= callee_info->time);
gcc_checking_assert (growth <= callee_info->size);
if (dump)
{
fprintf (dump_file, " Badness calculation for %s -> %s\n",
xstrdup (cgraph_node_name (edge->caller)),
xstrdup (cgraph_node_name (callee)));
fprintf (dump_file, " size growth %i, time growth %i ",
fprintf (dump_file, " size growth %i, time %i ",
growth,
time_growth);
edge_time);
dump_inline_hints (dump_file, hints);
fprintf (dump_file, "\n");
}
@ -802,7 +851,7 @@ edge_badness (struct cgraph_edge *edge, bool dump)
relative_edge_count * relative_time_benefit
goodness = -------------------------------------------
edge_growth
growth_f_caller
badness = -goodness
The fraction is upside down, because on edge counts and time beneits
@ -810,11 +859,11 @@ edge_badness (struct cgraph_edge *edge, bool dump)
else if (max_count)
{
int relbenefit = relative_time_benefit (callee_info, edge, time_growth);
int relbenefit = relative_time_benefit (callee_info, edge, edge_time);
badness =
((int)
((double) edge->count * INT_MIN / 2 / max_count / 512) *
relative_time_benefit (callee_info, edge, time_growth)) / growth;
((double) edge->count * INT_MIN / 2 / max_count / RELATIVE_TIME_BENEFIT_RANGE) *
relbenefit) / growth;
/* Be sure that insanity of the profile won't lead to increasing counts
in the scalling and thus to overflow in the computation above. */
@ -826,73 +875,53 @@ edge_badness (struct cgraph_edge *edge, bool dump)
" * Relative benefit %f\n",
(int) badness, (double) badness / INT_MIN,
(double) edge->count / max_count,
relbenefit * 100 / 256.0);
relbenefit * 100.0 / RELATIVE_TIME_BENEFIT_RANGE);
}
}
/* When function local profile is available. Compute badness as:
growth_of_callee
badness = -------------------------------------- + growth_for-all
relative_time_benefit * edge_frequency
relative_time_benefit
goodness = ---------------------------------
growth_of_caller * overall_growth
badness = - goodness
compensated by the inline hints.
*/
else if (flag_guess_branch_prob)
{
int div = edge->frequency * (1<<10) / CGRAPH_FREQ_MAX;
div = MAX (div, 1);
gcc_checking_assert (edge->frequency <= CGRAPH_FREQ_MAX);
div *= relative_time_benefit (callee_info, edge, time_growth);
/* frequency is normalized in range 1...2^10.
relbenefit in range 1...2^9
DIV should be in range 1....2^19. */
gcc_checking_assert (div >= 1 && div <= (1<<19));
/* Result must be integer in range 0...INT_MAX.
Set the base of fixed point calculation so we don't lose much of
precision for small bandesses (those are interesting) yet we don't
overflow for growths that are still in interesting range.
Fixed point arithmetic with point at 6th bit. */
badness = ((gcov_type)growth) * (1<<(19+6));
badness = (badness + div / 2) / div;
/* Overall growth of inlining all calls of function matters: we want to
inline so offline copy of function is no longer needed.
Additionally functions that can be fully inlined without much of
effort are better inline candidates than functions that can be fully
inlined only after noticeable overall unit growths. The latter
are better in a sense compressing of code size by factoring out common
code into separate function shared by multiple code paths.
We might mix the valud into the fraction by taking into account
relative growth of the unit, but for now just add the number
into resulting fraction. */
if (badness > INT_MAX / 8)
{
badness = INT_MAX / 8;
if (dump)
fprintf (dump_file, "Badness overflow\n");
}
if (hints & (INLINE_HINT_indirect_call
| INLINE_HINT_loop_iterations
| INLINE_HINT_loop_stride))
badness /= 8;
badness = (relative_time_benefit (callee_info, edge, edge_time)
* (INT_MIN / 16 / RELATIVE_TIME_BENEFIT_RANGE));
badness /= (growth * MAX (1, callee_info->growth));
gcc_checking_assert (badness <=0 && badness >= INT_MIN / 16);
if ((hints & (INLINE_HINT_indirect_call
| INLINE_HINT_loop_iterations
| INLINE_HINT_loop_stride))
|| callee_info->growth <= 0)
badness *= 8;
if (hints & (INLINE_HINT_same_scc))
badness *= 4;
if (hints & (INLINE_HINT_in_scc))
badness *= 2;
badness /= 16;
else if (hints & (INLINE_HINT_in_scc))
badness /= 8;
else if (hints & (INLINE_HINT_cross_module))
badness /= 2;
gcc_checking_assert (badness <= 0 && badness >= INT_MIN / 2);
if ((hints & INLINE_HINT_declared_inline) && badness >= INT_MIN / 32)
badness *= 16;
if (dump)
{
fprintf (dump_file,
" %i: guessed profile. frequency %f,"
" benefit %f%%, divisor %i\n",
" benefit %f%%, time w/o inlining %i, time w inlining %i"
" overall growth %i (current) %i (original)\n",
(int) badness, (double)edge->frequency / CGRAPH_FREQ_BASE,
relative_time_benefit (callee_info, edge, time_growth) * 100 / 256.0, div);
relative_time_benefit (callee_info, edge, edge_time) * 100.0
/ RELATIVE_TIME_BENEFIT_RANGE,
compute_uninlined_call_time (callee_info, edge),
(int)compute_inlined_call_time (edge, edge_time),
estimate_growth (callee),
callee_info->growth);
}
}
/* When function local profile is not available or it does not give
@ -1371,6 +1400,7 @@ inline_small_functions (void)
if (!DECL_EXTERNAL (node->symbol.decl))
initial_size += info->size;
info->growth = estimate_growth (node);
if (dfs && dfs->next_cycle)
{
struct cgraph_node *n2;

View File

@ -49,7 +49,9 @@ enum inline_hints_vals {
INLINE_HINT_loop_iterations = 2,
INLINE_HINT_loop_stride = 4,
INLINE_HINT_same_scc = 8,
INLINE_HINT_in_scc = 16
INLINE_HINT_in_scc = 16,
INLINE_HINT_declared_inline = 32,
INLINE_HINT_cross_module = 64
};
typedef int inline_hints;
@ -129,6 +131,12 @@ struct GTY(()) inline_summary
/* Predicate on when some loop in the function becomes to have known
stride. */
struct predicate * GTY((skip)) loop_stride;
/* Estimated growth for inlining all copies of the function before start
of small functions inlining.
This value will get out of date as the callers are duplicated, but
using up-to-date value in the badness metric mean a lot of extra
expenses. */
int growth;
/* Number of SCC on the beggining of inlining process. */
int scc_no;
};