Improve -fprofile-report

Profile-report was never properly updated after switch to new profile
representation.  This patch fixes the way profile mismatches are calculated:
we used to collect separately count and freq mismatches, while now we have
only counts & probabilities.  So we verify
 - in count: that total count of incomming edges is close to acutal count of
   the BB
 - out prob: that total sum of outgoing edge edge probabilities is close
   to 1 (except for BB containing noreturn calls or EH).

Moreover I added dumping of absolute data which is useful to plot them: with
Martin Liska we plan to setup regular testing so we keep optimizers profie
updates bit under control.

Finally I added both static and dynamic stats about mismatches - static one is
simply number of inconsistencies in the cfg while dynamic is scaled by the
profile - I think in order to keep eye on optimizers the first number is quite
relevant. WHile when tracking why code quality regressed the second number
matters more.

2021-11-28  Jan Hubicka  <hubicka@ucw.cz>

	* cfghooks.c: Include sreal.h, profile.h.
	(profile_record_check_consistency): Fix checking of count counsistency;
	record also dynamic mismatches.
	* cfgrtl.c (rtl_account_profile_record): Similarly.
	* tree-cfg.c (gimple_account_profile_record): Likewise.
	* cfghooks.h (struct profile_record): Remove num_mismatched_freq_in,
	num_mismatched_freq_out, turn time to double, add
	dyn_mismatched_prob_out, dyn_mismatched_count_in,
	num_mismatched_prob_out; remove num_mismatched_count_out.
	* passes.c (account_profile_1): New function.
	(account_profile_in_list): New function.
	(pass_manager::dump_profile_report): Rewrite.
	(execute_one_ipa_transform_pass): Check profile consistency after
	running all passes.
	(execute_all_ipa_transforms): Remove cfun test; record all transform
	methods.
	(execute_one_pass): Fix collecting of profile stats.
This commit is contained in:
Jan Hubicka 2021-11-28 19:25:33 +01:00
parent 7393fa8b1d
commit d1471457fc
5 changed files with 271 additions and 126 deletions

View File

@ -31,6 +31,8 @@ along with GCC; see the file COPYING3. If not see
#include "cfganal.h"
#include "tree-ssa.h"
#include "cfgloop.h"
#include "sreal.h"
#include "profile.h"
/* Disable warnings about missing quoting in GCC diagnostics. */
#if __GNUC__ >= 10
@ -1467,41 +1469,73 @@ profile_record_check_consistency (profile_record *record)
FOR_ALL_BB_FN (bb, cfun)
{
if (bb != EXIT_BLOCK_PTR_FOR_FN (cfun)
&& profile_status_for_fn (cfun) != PROFILE_ABSENT)
&& profile_status_for_fn (cfun) != PROFILE_ABSENT
&& EDGE_COUNT (bb->succs))
{
profile_probability sum = profile_probability::never ();
sreal sum = 0;
bool found = false;
FOR_EACH_EDGE (e, ei, bb->succs)
sum += e->probability;
if (EDGE_COUNT (bb->succs)
&& sum.differs_from_p (profile_probability::always ()))
record->num_mismatched_freq_out++;
profile_count lsum = profile_count::zero ();
FOR_EACH_EDGE (e, ei, bb->succs)
lsum += e->count ();
if (EDGE_COUNT (bb->succs) && (lsum.differs_from_p (bb->count)))
record->num_mismatched_count_out++;
{
if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
found = true;
if (e->probability.initialized_p ())
sum += e->probability.to_sreal ();
}
double dsum = sum.to_double ();
if (found && (dsum < 0.9 || dsum > 1.1)
&& !(bb->count == profile_count::zero ()))
{
record->num_mismatched_prob_out++;
dsum = dsum > 1 ? dsum - 1 : 1 - dsum;
if (profile_info)
{
if (ENTRY_BLOCK_PTR_FOR_FN
(cfun)->count.ipa ().initialized_p ()
&& ENTRY_BLOCK_PTR_FOR_FN
(cfun)->count.ipa ().nonzero_p ()
&& bb->count.ipa ().initialized_p ())
record->dyn_mismatched_prob_out
+= dsum * bb->count.ipa ().to_gcov_type ();
}
else if (bb->count.initialized_p ())
record->dyn_mismatched_prob_out
+= dsum * bb->count.to_sreal_scale
(ENTRY_BLOCK_PTR_FOR_FN (cfun)->count).to_double ();
}
}
if (bb != ENTRY_BLOCK_PTR_FOR_FN (cfun)
&& profile_status_for_fn (cfun) != PROFILE_ABSENT)
{
profile_probability sum = profile_probability::never ();
profile_count lsum = profile_count::zero ();
FOR_EACH_EDGE (e, ei, bb->preds)
{
sum += e->probability;
lsum += e->count ();
}
if (EDGE_COUNT (bb->preds)
&& sum.differs_from_p (profile_probability::always ()))
record->num_mismatched_freq_in++;
lsum += e->count ();
if (lsum.differs_from_p (bb->count))
record->num_mismatched_count_in++;
{
record->num_mismatched_count_in++;
profile_count max;
if (lsum < bb->count)
max = bb->count;
else
max = lsum;
if (profile_info)
{
if (ENTRY_BLOCK_PTR_FOR_FN
(cfun)->count.ipa ().initialized_p ()
&& ENTRY_BLOCK_PTR_FOR_FN
(cfun)->count.ipa ().nonzero_p ()
&& max.ipa ().initialized_p ())
record->dyn_mismatched_count_in
+= max.ipa ().to_gcov_type ();
}
else if (bb->count.initialized_p ())
record->dyn_mismatched_prob_out
+= max.to_sreal_scale
(ENTRY_BLOCK_PTR_FOR_FN (cfun)->count).to_double ();
}
}
if (bb == ENTRY_BLOCK_PTR_FOR_FN (cfun)
|| bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
continue;
gcc_assert (cfg_hooks->account_profile_record);
cfg_hooks->account_profile_record (bb, record);
}
}

View File

@ -36,22 +36,25 @@ along with GCC; see the file COPYING3. If not see
and one CFG hook per CFG mode. */
struct profile_record
{
/* The number of basic blocks where sum(freq) of the block's predecessors
doesn't match reasonably well with the incoming frequency. */
int num_mismatched_freq_in;
/* Likewise for a basic block's successors. */
int num_mismatched_freq_out;
/* A weighted cost of the run-time of the function body. */
double time;
/* Frequency of execution of basic blocks where sum(prob) of the block's
predecessors doesn't match reasonably probability 1. */
double dyn_mismatched_prob_out;
/* Frequency of execution basic blocks where sum(count) of the block's
predecessors doesn't match reasonably well with the incoming frequency. */
double dyn_mismatched_count_in;
/* The number of basic blocks where sum(prob) of the block's predecessors
doesn't match reasonably probability 1. */
int num_mismatched_prob_out;
/* The number of basic blocks where sum(count) of the block's predecessors
doesn't match reasonably well with the incoming frequency. */
int num_mismatched_count_in;
/* Likewise for a basic block's successors. */
int num_mismatched_count_out;
/* A weighted cost of the run-time of the function body. */
gcov_type_unsigned time;
/* A weighted cost of the size of the function body. */
int size;
/* True iff this pass actually was run. */
bool run;
bool fdo;
};
typedef int_hash <unsigned short, 0> dependence_hash;

View File

@ -63,6 +63,8 @@ along with GCC; see the file COPYING3. If not see
#include "print-rtl.h"
#include "rtl-iter.h"
#include "gimplify.h"
#include "profile.h"
#include "sreal.h"
/* Disable warnings about missing quoting in GCC diagnostics. */
#if __GNUC__ >= 10
@ -5264,12 +5266,22 @@ rtl_account_profile_record (basic_block bb, struct profile_record *record)
if (INSN_P (insn))
{
record->size += insn_cost (insn, false);
if (bb->count.initialized_p ())
if (profile_info)
{
if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa ().initialized_p ()
&& ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa ().nonzero_p ()
&& bb->count.ipa ().initialized_p ())
record->time
+= insn_cost (insn, true) * bb->count.ipa ().to_gcov_type ();
}
else if (bb->count.initialized_p ()
&& ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p ())
record->time
+= insn_cost (insn, true) * bb->count.to_gcov_type ();
else if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
record->time
+= insn_cost (insn, true) * bb->count.to_frequency (cfun);
+= insn_cost (insn, true)
* bb->count.to_sreal_scale
(ENTRY_BLOCK_PTR_FOR_FN (cfun)->count).to_double ();
else
record->time += insn_cost (insn, true);
}
}

View File

@ -1882,6 +1882,33 @@ account_profile (int index, bool run)
profile_record_account_profile (&profile_record[index]);
}
/* Account profile for IPA pass. Callback for do_per_function. */
static void
account_profile_1 (function *fn, void *data)
{
opt_pass *pass = (opt_pass *)data;
push_cfun (fn);
check_profile_consistency (pass->static_pass_number, true);
account_profile (pass->static_pass_number, true);
pop_cfun ();
}
/* Account profile chnages to all passes in list starting in SUB. */
static void
account_profile_in_list (opt_pass *sub)
{
for (; sub; sub = sub->next)
{
check_profile_consistency (sub->static_pass_number, false);
account_profile (sub->static_pass_number, false);
if (sub->sub)
account_profile_in_list (sub->sub);
}
}
/* Output profile consistency. */
void
@ -1893,10 +1920,12 @@ dump_profile_report (void)
void
pass_manager::dump_profile_report () const
{
int last_freq_in = 0, last_count_in = 0, last_freq_out = 0, last_count_out = 0;
gcov_type last_time = 0, last_size = 0;
int last_count_in = 0, last_prob_out = 0;
double last_dyn_count_in = 0, last_dyn_prob_out = 0;
double last_time = 0;
int last_size = 0;
double rel_time_change, rel_size_change;
int last_reported = 0;
gcc::dump_manager *dumps = m_ctxt->get_dumps ();
if (!profile_record)
return;
@ -1906,16 +1935,21 @@ pass_manager::dump_profile_report () const
dump_file = stderr;
fprintf (dump_file, "Profile consistency report:\n\n");
fprintf (dump_file, " |mismatch |mismatch | |\n");
fprintf (dump_file, "Pass name |IN |IN |OUT |OUT |overall |\n");
fprintf (dump_file, " |freq |count |freq |count |size |time |\n");
fprintf (dump_file,
"Pass dump id and name |static mismatch "
"|dynamic mismatch "
"|overall |\n");
fprintf (dump_file,
" |in count |out prob "
"|in count |out prob "
"|size |time |\n");
for (int i = 1; i < passes_by_id_size; i++)
if (profile_record[i].run)
{
if (last_time)
rel_time_change = (profile_record[i].time
- (double)last_time) * 100 / (double)last_time;
- last_time) * 100 / last_time;
else
rel_time_change = 0;
if (last_size)
@ -1924,65 +1958,73 @@ pass_manager::dump_profile_report () const
else
rel_size_change = 0;
if (profile_record[i].num_mismatched_freq_in != last_freq_in
|| profile_record[i].num_mismatched_freq_out != last_freq_out
|| profile_record[i].num_mismatched_count_in != last_count_in
|| profile_record[i].num_mismatched_count_out != last_count_out
|| rel_time_change || rel_size_change)
{
last_reported = i;
fprintf (dump_file, "%-33s", passes_by_id[i]->name);
if (profile_record[i].num_mismatched_freq_in != last_freq_in)
fprintf (dump_file, "| %+5i",
profile_record[i].num_mismatched_freq_in
- last_freq_in);
else
fprintf (dump_file, "| ");
if (profile_record[i].num_mismatched_count_in != last_count_in)
fprintf (dump_file, "| %+5i",
profile_record[i].num_mismatched_count_in
- last_count_in);
else
fprintf (dump_file, "| ");
if (profile_record[i].num_mismatched_freq_out != last_freq_out)
fprintf (dump_file, "| %+5i",
profile_record[i].num_mismatched_freq_out
- last_freq_out);
else
fprintf (dump_file, "| ");
if (profile_record[i].num_mismatched_count_out != last_count_out)
fprintf (dump_file, "| %+5i",
profile_record[i].num_mismatched_count_out
- last_count_out);
else
fprintf (dump_file, "| ");
dump_file_info *dfi = dumps->get_dump_file_info (i);
/* Size/time units change across gimple and RTL. */
if (i == pass_expand_1->static_pass_number)
fprintf (dump_file, "|----------|----------");
else
{
if (rel_size_change)
fprintf (dump_file, "| %+8.1f%%", rel_size_change);
else
fprintf (dump_file, "| ");
if (rel_time_change)
fprintf (dump_file, "| %+8.1f%%", rel_time_change);
else
fprintf (dump_file, "| ");
}
fprintf (dump_file, "|\n");
last_freq_in = profile_record[i].num_mismatched_freq_in;
last_freq_out = profile_record[i].num_mismatched_freq_out;
last_count_in = profile_record[i].num_mismatched_count_in;
last_count_out = profile_record[i].num_mismatched_count_out;
}
else if (last_reported != i)
fprintf (dump_file, "%3i%c %-28s| %6i",
dfi->num,
passes_by_id[i]->type == GIMPLE_PASS ? 't'
: passes_by_id[i]->type == RTL_PASS ? 'r'
: 'i',
passes_by_id[i]->name,
profile_record[i].num_mismatched_count_in);
if (profile_record[i].num_mismatched_count_in != last_count_in)
fprintf (dump_file, " %+5i",
profile_record[i].num_mismatched_count_in
- last_count_in);
else
fprintf (dump_file, " ");
fprintf (dump_file, "| %6i",
profile_record[i].num_mismatched_prob_out);
if (profile_record[i].num_mismatched_prob_out != last_prob_out)
fprintf (dump_file, " %+5i",
profile_record[i].num_mismatched_prob_out
- last_prob_out);
else
fprintf (dump_file, " ");
fprintf (dump_file, "| %12.0f",
profile_record[i].dyn_mismatched_count_in);
if (profile_record[i].dyn_mismatched_count_in != last_dyn_count_in)
fprintf (dump_file, " %+12.0f",
profile_record[i].dyn_mismatched_count_in
- last_dyn_count_in);
else
fprintf (dump_file, " ");
fprintf (dump_file, "| %12.0f",
profile_record[i].dyn_mismatched_prob_out);
if (profile_record[i].dyn_mismatched_prob_out != last_dyn_prob_out)
fprintf (dump_file, " %+12.0f",
profile_record[i].dyn_mismatched_prob_out
- last_dyn_prob_out);
else
fprintf (dump_file, " ");
/* Size/time units change across gimple and RTL. */
if (i == pass_expand_1->static_pass_number)
fprintf (dump_file,
"|-------------------|--------------------------");
else
{
last_reported = i;
fprintf (dump_file, "%-20s ------------| | | | | | |\n",
passes_by_id[i]->name);
fprintf (dump_file, "| %8i", profile_record[i].size);
if (rel_size_change)
fprintf (dump_file, " %+8.1f%%", rel_size_change);
else
fprintf (dump_file, " ");
fprintf (dump_file, "| %12.0f", profile_record[i].time);
/* Time units changes with profile estimate and feedback. */
if (i == pass_profile_1->static_pass_number
|| i == pass_ipa_tree_profile_1->static_pass_number)
fprintf (dump_file, "-------------");
else if (rel_time_change)
fprintf (dump_file, " %+11.1f%%", rel_time_change);
else
fprintf (dump_file, " ");
}
fprintf (dump_file, "|\n");
last_prob_out = profile_record[i].num_mismatched_prob_out;
last_count_in = profile_record[i].num_mismatched_count_in;
last_dyn_prob_out = profile_record[i].dyn_mismatched_prob_out;
last_dyn_count_in = profile_record[i].dyn_mismatched_count_in;
last_time = profile_record[i].time;
last_size = profile_record[i].size;
}
@ -2280,9 +2322,6 @@ execute_one_ipa_transform_pass (struct cgraph_node *node,
if (pass->tv_id != TV_NONE)
timevar_push (pass->tv_id);
if (profile_report && cfun && (cfun->curr_properties & PROP_cfg))
check_profile_consistency (pass->static_pass_number, true);
/* Run pre-pass verification. */
execute_todo (ipa_pass->function_transform_todo_flags_start);
@ -2292,8 +2331,6 @@ execute_one_ipa_transform_pass (struct cgraph_node *node,
/* Run post-pass cleanup and verification. */
execute_todo (todo_after);
verify_interpass_invariants ();
if (profile_report && cfun && (cfun->curr_properties & PROP_cfg))
account_profile (pass->static_pass_number, true);
/* Stop timevar. */
if (pass->tv_id != TV_NONE)
@ -2317,10 +2354,9 @@ void
execute_all_ipa_transforms (bool do_not_collect)
{
struct cgraph_node *node;
if (!cfun)
return;
node = cgraph_node::get (current_function_decl);
cgraph_node *next_clone;
for (cgraph_node *n = node->clones; n; n = next_clone)
{
@ -2329,15 +2365,48 @@ execute_all_ipa_transforms (bool do_not_collect)
n->materialize_clone ();
}
if (node->ipa_transforms_to_apply.exists ())
{
unsigned int i;
int j = 0;
gcc::pass_manager *passes = g->get_passes ();
bool report = profile_report && (cfun->curr_properties & PROP_gimple) != 0;
for (i = 0; i < node->ipa_transforms_to_apply.length (); i++)
execute_one_ipa_transform_pass (node, node->ipa_transforms_to_apply[i],
do_not_collect);
node->ipa_transforms_to_apply.release ();
if (report)
push_cfun (DECL_STRUCT_FUNCTION (node->decl));
for (auto p : node->ipa_transforms_to_apply)
{
/* To get consistent statistics, we need to account each functio
to each IPA pass. */
if (report)
{
for (;j < p->static_pass_number; j++)
if (passes->get_pass_for_id (j)
&& passes->get_pass_for_id (j)->type == IPA_PASS
&& ((ipa_opt_pass_d *)passes->get_pass_for_id (j))
->function_transform)
{
check_profile_consistency (j, true);
account_profile (j, true);
}
gcc_checking_assert (passes->get_pass_for_id (j) == p);
}
execute_one_ipa_transform_pass (node, p, do_not_collect);
}
/* Account remaining IPA passes. */
if (report)
{
for (;!passes->get_pass_for_id (j)
|| passes->get_pass_for_id (j)->type != RTL_PASS; j++)
if (passes->get_pass_for_id (j)
&& passes->get_pass_for_id (j)->type == IPA_PASS
&& ((ipa_opt_pass_d *)passes->get_pass_for_id (j))
->function_transform)
{
check_profile_consistency (j, true);
account_profile (j, true);
}
pop_cfun ();
}
node->ipa_transforms_to_apply.release ();
}
/* Check if PASS is explicitly disabled or enabled and return
@ -2521,10 +2590,13 @@ execute_one_pass (opt_pass *pass)
{
/* Run so passes selectively disabling themselves on a given function
are not miscounted. */
if (profile_report && cfun && (cfun->curr_properties & PROP_cfg))
if (profile_report && cfun && (cfun->curr_properties & PROP_cfg)
&& pass->type != IPA_PASS && pass->type != SIMPLE_IPA_PASS)
{
check_profile_consistency (pass->static_pass_number, false);
account_profile (pass->static_pass_number, false);
if (pass->sub)
account_profile_in_list (pass->sub);
}
current_pass = NULL;
return false;
@ -2553,8 +2625,6 @@ execute_one_pass (opt_pass *pass)
if (pass->tv_id != TV_NONE)
timevar_push (pass->tv_id);
if (profile_report && cfun && (cfun->curr_properties & PROP_cfg))
check_profile_consistency (pass->static_pass_number, true);
/* Run pre-pass verification. */
execute_todo (pass->todo_flags_start);
@ -2602,8 +2672,19 @@ execute_one_pass (opt_pass *pass)
/* Run post-pass cleanup and verification. */
execute_todo (todo_after | pass->todo_flags_finish | TODO_verify_il);
if (profile_report && cfun && (cfun->curr_properties & PROP_cfg))
account_profile (pass->static_pass_number, true);
if (profile_report)
{
/* IPA passes are accounted at transform time. */
if (pass->type == IPA_PASS)
;
else if (pass->type == SIMPLE_IPA_PASS)
do_per_function (account_profile_1, pass);
else if (cfun && (cfun->curr_properties & PROP_cfg))
{
check_profile_consistency (pass->static_pass_number, true);
account_profile (pass->static_pass_number, true);
}
}
verify_interpass_invariants ();

View File

@ -64,6 +64,7 @@ along with GCC; see the file COPYING3. If not see
#include "opts.h"
#include "asan.h"
#include "profile.h"
#include "sreal.h"
/* This file contains functions for building the Control Flow Graph (CFG)
for a function tree. */
@ -9084,18 +9085,32 @@ gimple_account_profile_record (basic_block bb,
struct profile_record *record)
{
gimple_stmt_iterator i;
for (i = gsi_start_bb (bb); !gsi_end_p (i); gsi_next (&i))
for (i = gsi_start_nondebug_after_labels_bb (bb); !gsi_end_p (i);
gsi_next_nondebug (&i))
{
record->size
+= estimate_num_insns (gsi_stmt (i), &eni_size_weights);
if (bb->count.initialized_p ())
if (profile_info)
{
if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa ().initialized_p ()
&& ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa ().nonzero_p ()
&& bb->count.ipa ().initialized_p ())
record->time
+= estimate_num_insns (gsi_stmt (i),
&eni_time_weights)
* bb->count.ipa ().to_gcov_type ();
}
else if (bb->count.initialized_p ()
&& ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p ())
record->time
+= estimate_num_insns (gsi_stmt (i),
&eni_time_weights) * bb->count.to_gcov_type ();
else if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
record->time
+= estimate_num_insns (gsi_stmt (i),
&eni_time_weights) * bb->count.to_frequency (cfun);
+= estimate_num_insns
(gsi_stmt (i),
&eni_time_weights)
* bb->count.to_sreal_scale
(ENTRY_BLOCK_PTR_FOR_FN (cfun)->count).to_double ();
else
record->time
+= estimate_num_insns (gsi_stmt (i), &eni_time_weights);
}
}