basic-block.h (RDIV): Define.

* basic-block.h (RDIV): Define.
	(EDGE_FREQUENCY): Simplify.
	(check_probability, combine_probabilities, apply_probability,
	inverse_probability): New.
	* cfgloop.c (scale_loop_profile): New function.
	* cfgloop.h (scale_loop_profile): Declare.
	(slpeel_add_loop_guard): Add probability parameter.
	(set_prologue_iterations): Add probability parameter.
	(slpeel_tree_peel_loop_to_edge): Add bound1 and bound2 parameters;
	update probabilities correctly.
	(vect_do_peeling_for_alignment, vect_gen_niters_for_prolog_loop): New.

From-SVN: r191839
This commit is contained in:
Jan Hubicka 2012-09-28 23:14:23 +02:00 committed by Jan Hubicka
parent 0380c51fef
commit e78410bf11
5 changed files with 235 additions and 14 deletions

View File

@ -1,3 +1,17 @@
2012-09-28 Jan Hubicka <jh@suse.cz>
* basic-block.h (RDIV): Define.
(EDGE_FREQUENCY): Simplify.
(check_probability, combine_probabilities, apply_probability,
inverse_probability): New.
* cfgloop.c (scale_loop_profile): New function.
* cfgloop.h (scale_loop_profile): Declare.
(slpeel_add_loop_guard): Add probability parameter.
(set_prologue_iterations): Add probability parameter.
(slpeel_tree_peel_loop_to_edge): Add bound1 and bound2 parameters;
update probabilities correctly.
(vect_do_peeling_for_alignment, vect_gen_niters_for_prolog_loop): New.
2012-09-20 Bernd Schmidt <bernds@codesourcery.com>
PR bootstrap/54688

View File

@ -478,11 +478,10 @@ struct edge_list
#define BRANCH_EDGE(bb) (EDGE_SUCC ((bb), 0)->flags & EDGE_FALLTHRU \
? EDGE_SUCC ((bb), 1) : EDGE_SUCC ((bb), 0))
#define RDIV(X,Y) (((X) + (Y) / 2) / (Y))
/* Return expected execution frequency of the edge E. */
#define EDGE_FREQUENCY(e) (((e)->src->frequency \
* (e)->probability \
+ REG_BR_PROB_BASE / 2) \
/ REG_BR_PROB_BASE)
#define EDGE_FREQUENCY(e) RDIV ((e)->src->frequency * (e)->probability, \
REG_BR_PROB_BASE)
/* Return nonzero if edge is critical. */
#define EDGE_CRITICAL_P(e) (EDGE_COUNT ((e)->src->succs) >= 2 \
@ -910,4 +909,40 @@ extern void default_rtl_profile (void);
/* In profile.c. */
extern gcov_working_set_t *find_working_set(unsigned pct_times_10);
/* Check tha probability is sane. */
static inline void
check_probability (int prob)
{
gcc_checking_assert (prob >= 0 && prob <= REG_BR_PROB_BASE);
}
/* Given PROB1 and PROB2, return PROB1*PROB2/REG_BR_PROB_BASE.
Used to combine BB probabilities. */
static inline int
combine_probabilities (int prob1, int prob2)
{
check_probability (prob1);
check_probability (prob2);
return RDIV (prob1 * prob2, REG_BR_PROB_BASE);
}
/* Apply probability PROB on frequency or count FREQ. */
static inline gcov_type
apply_probability (gcov_type freq, int prob)
{
check_probability (prob);
return RDIV (freq * prob, REG_BR_PROB_BASE);
}
/* Return inverse probability for PROB. */
static inline int
inverse_probability (int prob1)
{
check_probability (prob1);
return REG_BR_PROB_BASE - prob1;
}
#endif /* GCC_BASIC_BLOCK_H */

View File

@ -1666,3 +1666,121 @@ loop_exits_from_bb_p (struct loop *loop, basic_block bb)
return false;
}
/* Scale the profile estiamte within loop by SCALE.
If ITERATION_BOUND is non-zero, scale even further if loop is predicted
to iterate too many times. */
void
scale_loop_profile (struct loop *loop, int scale, int iteration_bound)
{
gcov_type iterations = expected_loop_iterations_unbounded (loop);
basic_block *bbs;
unsigned int i;
edge e;
edge_iterator ei;
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, ";; Scaling loop %i with scale %f, "
"bounding iterations to %i from guessed %i\n",
loop->num, (double)scale / REG_BR_PROB_BASE,
iteration_bound, (int)iterations);
/* See if loop is predicted to iterate too many times. */
if (iteration_bound && iterations > 0
&& RDIV (iterations * scale, REG_BR_PROB_BASE) > iteration_bound)
{
/* Fixing loop profile for different trip count is not trivial; the exit
probabilities has to be updated to match and frequencies propagated down
to the loop body.
We fully update only the simple case of loop with single exit that is
either from the latch or BB just before latch and leads from BB with
simple conditional jump. This is OK for use in vectorizer. */
e = single_exit (loop);
if (e)
{
edge other_e;
int freq_delta;
gcov_type count_delta;
FOR_EACH_EDGE (other_e, ei, e->src->succs)
if (!(other_e->flags & (EDGE_ABNORMAL | EDGE_FAKE))
&& e != other_e)
break;
/* Probability of exit must be 1/iterations. */
freq_delta = EDGE_FREQUENCY (e);
e->probability = REG_BR_PROB_BASE / iteration_bound;
other_e->probability = inverse_probability (e->probability);
freq_delta -= EDGE_FREQUENCY (e);
/* Adjust counts accordingly. */
count_delta = e->count;
e->count = apply_probability (e->src->count, e->probability);
other_e->count = apply_probability (e->src->count, other_e->probability);
count_delta -= e->count;
/* If latch exists, change its frequency and count, since we changed
probability of exit. Theoretically we should update everything from
source of exit edge to latch, but for vectorizer this is enough. */
if (loop->latch
&& loop->latch != e->src)
{
loop->latch->frequency += freq_delta;
if (loop->latch->frequency < 0)
loop->latch->frequency = 0;
loop->latch->count += count_delta;
if (loop->latch->count < 0)
loop->latch->count = 0;
}
}
/* Roughly speaking we want to reduce the loop body profile by the
the difference of loop iterations. We however can do better if
we look at the actual profile, if it is available. */
scale = RDIV (iteration_bound * scale, iterations);
if (loop->header->count)
{
gcov_type count_in = 0;
FOR_EACH_EDGE (e, ei, loop->header->preds)
if (e->src != loop->latch)
count_in += e->count;
if (count_in != 0)
scale = RDIV (count_in * iteration_bound * REG_BR_PROB_BASE, loop->header->count);
}
else if (loop->header->frequency)
{
int freq_in = 0;
FOR_EACH_EDGE (e, ei, loop->header->preds)
if (e->src != loop->latch)
freq_in += EDGE_FREQUENCY (e);
if (freq_in != 0)
scale = RDIV (freq_in * iteration_bound * REG_BR_PROB_BASE, loop->header->frequency);
}
if (!scale)
scale = 1;
}
if (scale == REG_BR_PROB_BASE)
return;
/* Scale the actual probabilities. */
bbs = get_loop_body (loop);
for (i = 0; i < loop->num_nodes; i++)
{
basic_block bb = bbs[i];
bb->count = RDIV (bb->count * scale, REG_BR_PROB_BASE);
bb->frequency = RDIV (bb->frequency * scale, REG_BR_PROB_BASE);
FOR_EACH_EDGE (e, ei, bb->succs)
e->count = RDIV (e->count * scale, REG_BR_PROB_BASE);
}
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, ";; guessed iterations are now %i\n",
(int)expected_loop_iterations_unbounded (loop));
free (bbs);
}

View File

@ -713,5 +713,6 @@ extern void unroll_and_peel_loops (int);
extern void doloop_optimize_loops (void);
extern void move_loop_invariants (void);
extern bool finite_loop_p (struct loop *);
extern void scale_loop_profile (struct loop *loop, int scale, int iteration_bound);
#endif /* GCC_CFGLOOP_H */

View File

@ -931,7 +931,8 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
static edge
slpeel_add_loop_guard (basic_block guard_bb, tree cond,
gimple_seq cond_expr_stmt_list,
basic_block exit_bb, basic_block dom_bb)
basic_block exit_bb, basic_block dom_bb,
int probability)
{
gimple_stmt_iterator gsi;
edge new_e, enter_e;
@ -956,6 +957,12 @@ slpeel_add_loop_guard (basic_block guard_bb, tree cond,
/* Add new edge to connect guard block to the merge/loop-exit block. */
new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
new_e->count = guard_bb->count;
new_e->probability = probability;
new_e->count = apply_probability (enter_e->count, probability);
enter_e->count -= new_e->count;
enter_e->probability = inverse_probability (probability);
set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
return new_e;
}
@ -1038,7 +1045,8 @@ static void
set_prologue_iterations (basic_block bb_before_first_loop,
tree *first_niters,
struct loop *loop,
unsigned int th)
unsigned int th,
int probability)
{
edge e;
basic_block cond_bb, then_bb;
@ -1067,7 +1075,15 @@ set_prologue_iterations (basic_block bb_before_first_loop,
e_true->flags &= ~EDGE_FALLTHRU;
e_true->flags |= EDGE_TRUE_VALUE;
e_true->probability = probability;
e_false->probability = inverse_probability (probability);
e_true->count = apply_probability (cond_bb->count, probability);
e_false->count = cond_bb->count - e_true->count;
then_bb->frequency = EDGE_FREQUENCY (e_true);
then_bb->count = e_true->count;
e_fallthru = EDGE_SUCC (then_bb, 0);
e_fallthru->count = then_bb->count;
gsi = gsi_last_bb (cond_bb);
cost_pre_condition =
@ -1126,6 +1142,8 @@ set_prologue_iterations (basic_block bb_before_first_loop,
prologue generation or whether cost model check
has not occurred during prologue generation and hence
needs to occur during epilogue generation.
- BOUND1 is the upper bound on number of iterations of the first loop (if known)
- BOUND2 is the upper bound on number of iterations of the second loop (if known)
Output:
@ -1153,7 +1171,8 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
edge e, tree *first_niters,
tree niters, bool update_first_loop_count,
unsigned int th, bool check_profitability,
tree cond_expr, gimple_seq cond_expr_stmt_list)
tree cond_expr, gimple_seq cond_expr_stmt_list,
int bound1, int bound2)
{
struct loop *new_loop = NULL, *first_loop, *second_loop;
edge skip_e;
@ -1166,6 +1185,13 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
edge exit_e = single_exit (loop);
LOC loop_loc;
tree cost_pre_condition = NULL_TREE;
/* There are many aspects to how likely the first loop is going to be executed.
Without histogram we can't really do good job. Simply set it to
2/3, so the first loop is not reordered to the end of function and
the hot path through stays short. */
int first_guard_probability = 2 * REG_BR_PROB_BASE / 3;
int second_guard_probability = 2 * REG_BR_PROB_BASE / 3;
int probability_of_second_loop;
if (!slpeel_can_duplicate_loop_p (loop, e))
return NULL;
@ -1341,6 +1367,21 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
bb_before_second_loop = split_edge (single_exit (first_loop));
probability_of_second_loop = (inverse_probability (first_guard_probability)
+ combine_probabilities (second_guard_probability,
first_guard_probability));
/* Theoretically preheader edge of first loop and exit edge should have
same frequencies. Loop exit probablities are however easy to get wrong.
It is safer to copy value from original loop entry. */
bb_before_second_loop->frequency
= apply_probability (bb_before_first_loop->frequency,
probability_of_second_loop);
bb_before_second_loop->count
= apply_probability (bb_before_first_loop->count,
probability_of_second_loop);
single_succ_edge (bb_before_second_loop)->count
= bb_before_second_loop->count;
/* Epilogue peeling. */
if (!update_first_loop_count)
{
@ -1374,7 +1415,7 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
{
if (check_profitability)
set_prologue_iterations (bb_before_first_loop, first_niters,
loop, th);
loop, th, first_guard_probability);
pre_condition =
fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
@ -1383,7 +1424,10 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
cond_expr_stmt_list,
bb_before_second_loop, bb_before_first_loop);
bb_before_second_loop, bb_before_first_loop,
inverse_probability (first_guard_probability));
scale_loop_profile (first_loop, first_guard_probability,
check_profitability && (int)th > bound1 ? th : bound1);
slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
first_loop == new_loop,
&new_exit_bb);
@ -1421,7 +1465,9 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
pre_condition =
fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters);
skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
bb_after_second_loop, bb_before_first_loop);
bb_after_second_loop, bb_before_first_loop,
inverse_probability (second_guard_probability));
scale_loop_profile (second_loop, probability_of_second_loop, bound2);
slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
second_loop == new_loop, &new_exit_bb);
@ -1882,7 +1928,8 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
&ratio_mult_vf_name, ni_name, false,
th, check_profitability,
cond_expr, cond_expr_stmt_list);
cond_expr, cond_expr_stmt_list,
0, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
gcc_assert (new_loop);
gcc_assert (loop_num == loop->num);
#ifdef ENABLE_CHECKING
@ -1951,7 +1998,7 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
use TYPE_VECTOR_SUBPARTS. */
static tree
vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int *bound)
{
struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
@ -1977,6 +2024,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
fprintf (vect_dump, "known peeling = %d.", npeel);
iters = build_int_cst (niters_type, npeel);
*bound = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
}
else
{
@ -2015,6 +2063,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
iters = fold_convert (niters_type, iters);
*bound = nelements;
}
/* Create: prolog_loop_niters = min (iters, loop_niters) */
@ -2107,6 +2156,7 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo,
tree wide_prolog_niters;
struct loop *new_loop;
int max_iter;
int bound = 0;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
@ -2115,13 +2165,16 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo,
ni_name = vect_build_loop_niters (loop_vinfo, NULL);
niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo,
ni_name);
ni_name,
&bound);
/* Peel the prolog loop and iterate it niters_of_prolog_loop. */
new_loop =
slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
&niters_of_prolog_loop, ni_name, true,
th, check_profitability, NULL_TREE, NULL);
th, check_profitability, NULL_TREE, NULL,
bound,
0);
gcc_assert (new_loop);
#ifdef ENABLE_CHECKING