* final.c (compute_alignments): Use counts rather than frequencies.

From-SVN: r254809
This commit is contained in:
Jan Hubicka 2017-11-16 12:03:23 +01:00 committed by Jan Hubicka
parent debc8f4a0c
commit 6786ba1aaa
2 changed files with 37 additions and 27 deletions

View File

@ -1,3 +1,7 @@
2017-11-14 Jan Hubicka <hubicka@ucw.cz>
* final.c (compute_alignments): Use counts rather than frequencies.
2017-11-14 Jan Hubicka <hubicka@ucw.cz> 2017-11-14 Jan Hubicka <hubicka@ucw.cz>
* cfgloopanal.c: Include sreal.h * cfgloopanal.c: Include sreal.h

View File

@ -661,16 +661,13 @@ insn_current_reference_address (rtx_insn *branch)
} }
} }
/* Compute branch alignments based on frequency information in the /* Compute branch alignments based on CFG profile. */
CFG. */
unsigned int unsigned int
compute_alignments (void) compute_alignments (void)
{ {
int log, max_skip, max_log; int log, max_skip, max_log;
basic_block bb; basic_block bb;
int freq_max = 0;
int freq_threshold = 0;
if (label_align) if (label_align)
{ {
@ -693,17 +690,19 @@ compute_alignments (void)
flow_loops_dump (dump_file, NULL, 1); flow_loops_dump (dump_file, NULL, 1);
} }
loop_optimizer_init (AVOID_CFG_MODIFICATIONS); loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
FOR_EACH_BB_FN (bb, cfun) profile_count count_threshold = cfun->cfg->count_max.apply_scale
if (bb->count.to_frequency (cfun) > freq_max) (1, PARAM_VALUE (PARAM_ALIGN_THRESHOLD));
freq_max = bb->count.to_frequency (cfun);
freq_threshold = freq_max / PARAM_VALUE (PARAM_ALIGN_THRESHOLD);
if (dump_file) if (dump_file)
fprintf (dump_file, "freq_max: %i\n",freq_max); {
fprintf (dump_file, "count_max: ");
cfun->cfg->count_max.dump (dump_file);
fprintf (dump_file, "\n");
}
FOR_EACH_BB_FN (bb, cfun) FOR_EACH_BB_FN (bb, cfun)
{ {
rtx_insn *label = BB_HEAD (bb); rtx_insn *label = BB_HEAD (bb);
int fallthru_frequency = 0, branch_frequency = 0, has_fallthru = 0; bool has_fallthru = 0;
edge e; edge e;
edge_iterator ei; edge_iterator ei;
@ -712,35 +711,41 @@ compute_alignments (void)
{ {
if (dump_file) if (dump_file)
fprintf (dump_file, fprintf (dump_file,
"BB %4i freq %4i loop %2i loop_depth %2i skipped.\n", "BB %4i loop %2i loop_depth %2i skipped.\n",
bb->index, bb->count.to_frequency (cfun), bb->index,
bb->loop_father->num, bb->loop_father->num,
bb_loop_depth (bb)); bb_loop_depth (bb));
continue; continue;
} }
max_log = LABEL_ALIGN (label); max_log = LABEL_ALIGN (label);
max_skip = targetm.asm_out.label_align_max_skip (label); max_skip = targetm.asm_out.label_align_max_skip (label);
profile_count fallthru_count = profile_count::zero ();
profile_count branch_count = profile_count::zero ();
FOR_EACH_EDGE (e, ei, bb->preds) FOR_EACH_EDGE (e, ei, bb->preds)
{ {
if (e->flags & EDGE_FALLTHRU) if (e->flags & EDGE_FALLTHRU)
has_fallthru = 1, fallthru_frequency += EDGE_FREQUENCY (e); has_fallthru = 1, fallthru_count += e->count ();
else else
branch_frequency += EDGE_FREQUENCY (e); branch_count += e->count ();
} }
if (dump_file) if (dump_file)
{ {
fprintf (dump_file, "BB %4i freq %4i loop %2i loop_depth" fprintf (dump_file, "BB %4i loop %2i loop_depth"
" %2i fall %4i branch %4i", " %2i fall ",
bb->index, bb->count.to_frequency (cfun), bb->loop_father->num, bb->index, bb->loop_father->num,
bb_loop_depth (bb), bb_loop_depth (bb));
fallthru_frequency, branch_frequency); fallthru_count.dump (dump_file);
fprintf (dump_file, " branch ");
branch_count.dump (dump_file);
if (!bb->loop_father->inner && bb->loop_father->num) if (!bb->loop_father->inner && bb->loop_father->num)
fprintf (dump_file, " inner_loop"); fprintf (dump_file, " inner_loop");
if (bb->loop_father->header == bb) if (bb->loop_father->header == bb)
fprintf (dump_file, " loop_header"); fprintf (dump_file, " loop_header");
fprintf (dump_file, "\n"); fprintf (dump_file, "\n");
} }
if (!fallthru_count.initialized_p () || !branch_count.initialized_p ())
continue;
/* There are two purposes to align block with no fallthru incoming edge: /* There are two purposes to align block with no fallthru incoming edge:
1) to avoid fetch stalls when branch destination is near cache boundary 1) to avoid fetch stalls when branch destination is near cache boundary
@ -753,11 +758,11 @@ compute_alignments (void)
when function is called. */ when function is called. */
if (!has_fallthru if (!has_fallthru
&& (branch_frequency > freq_threshold && (branch_count > count_threshold
|| (bb->count.to_frequency (cfun) || (bb->count > bb->prev_bb->count.apply_scale (10, 1)
> bb->prev_bb->count.to_frequency (cfun) * 10 && (bb->prev_bb->count
&& (bb->prev_bb->count.to_frequency (cfun) <= ENTRY_BLOCK_PTR_FOR_FN (cfun)
<= ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun) / 2)))) ->count.apply_scale (1, 2)))))
{ {
log = JUMP_ALIGN (label); log = JUMP_ALIGN (label);
if (dump_file) if (dump_file)
@ -774,9 +779,10 @@ compute_alignments (void)
&& !(single_succ_p (bb) && !(single_succ_p (bb)
&& single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (cfun)) && single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (cfun))
&& optimize_bb_for_speed_p (bb) && optimize_bb_for_speed_p (bb)
&& branch_frequency + fallthru_frequency > freq_threshold && branch_count + fallthru_count > count_threshold
&& (branch_frequency && (branch_count
> fallthru_frequency * PARAM_VALUE (PARAM_ALIGN_LOOP_ITERATIONS))) > fallthru_count.apply_scale
(PARAM_VALUE (PARAM_ALIGN_LOOP_ITERATIONS), 1)))
{ {
log = LOOP_ALIGN (label); log = LOOP_ALIGN (label);
if (dump_file) if (dump_file)