re PR tree-optimization/77536 (Vectorizer not maintaining relationship of relative block frequencies in absence of real profile data)
PR tree-optimization/77536 * tree-ssa-loop-manip.c (niter_for_unrolled_loop): New function. (tree_transform_and_unroll_loop): Use above function to compute the estimated niter of unrolled loop and use it when scaling profile. Also use count info rather than frequency if it's non-zero. * tree-ssa-loop-manip.h niter_for_unrolled_loop(): New declaration. * tree-vect-loop.c (scale_profile_for_vect_loop): New function. (vect_transform_loop): Call above function. gcc/testsuite * gcc.dg/vect/pr79347.c: Revise testing string. From-SVN: r245754
This commit is contained in:
parent
68b948d3ba
commit
57fa080bf6
|
@ -1,3 +1,14 @@
|
||||||
|
2017-02-27 Bin Cheng <bin.cheng@arm.com>
|
||||||
|
|
||||||
|
PR tree-optimization/77536
|
||||||
|
* tree-ssa-loop-manip.c (niter_for_unrolled_loop): New function.
|
||||||
|
(tree_transform_and_unroll_loop): Use above function to compute the
|
||||||
|
estimated niter of unrolled loop and use it when scaling profile.
|
||||||
|
Also use count info rather than frequency if it's non-zero.
|
||||||
|
* tree-ssa-loop-manip.h niter_for_unrolled_loop(): New declaration.
|
||||||
|
* tree-vect-loop.c (scale_profile_for_vect_loop): New function.
|
||||||
|
(vect_transform_loop): Call above function.
|
||||||
|
|
||||||
2017-02-27 Richard Biener <rguenther@suse.de>
|
2017-02-27 Richard Biener <rguenther@suse.de>
|
||||||
|
|
||||||
PR tree-optimization/45397
|
PR tree-optimization/45397
|
||||||
|
|
|
@ -1,3 +1,8 @@
|
||||||
|
2017-02-27 Bin Cheng <bin.cheng@arm.com>
|
||||||
|
|
||||||
|
PR tree-optimization/77536
|
||||||
|
* gcc.dg/vect/pr79347.c: Revise testing string.
|
||||||
|
|
||||||
2017-02-27 Richard Biener <rguenther@suse.de>
|
2017-02-27 Richard Biener <rguenther@suse.de>
|
||||||
|
|
||||||
PR tree-optimization/45397
|
PR tree-optimization/45397
|
||||||
|
|
|
@ -10,4 +10,4 @@ void n(void)
|
||||||
a[i]++;
|
a[i]++;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* { dg-final { scan-tree-dump-times "Invalid sum of " 2 "vect" } } */
|
/* { dg-final { scan-tree-dump-not "Invalid sum of " "vect" } } */
|
||||||
|
|
|
@ -1093,6 +1093,33 @@ scale_dominated_blocks_in_loop (struct loop *loop, basic_block bb,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Return estimated niter for LOOP after unrolling by FACTOR times. */
|
||||||
|
|
||||||
|
gcov_type
|
||||||
|
niter_for_unrolled_loop (struct loop *loop, unsigned factor)
|
||||||
|
{
|
||||||
|
gcc_assert (factor != 0);
|
||||||
|
bool profile_p = false;
|
||||||
|
gcov_type est_niter = expected_loop_iterations_unbounded (loop, &profile_p);
|
||||||
|
gcov_type new_est_niter = est_niter / factor;
|
||||||
|
|
||||||
|
/* Without profile feedback, loops for which we do not know a better estimate
|
||||||
|
are assumed to roll 10 times. When we unroll such loop, it appears to
|
||||||
|
roll too little, and it may even seem to be cold. To avoid this, we
|
||||||
|
ensure that the created loop appears to roll at least 5 times (but at
|
||||||
|
most as many times as before unrolling). Don't do adjustment if profile
|
||||||
|
feedback is present. */
|
||||||
|
if (new_est_niter < 5 && !profile_p)
|
||||||
|
{
|
||||||
|
if (est_niter < 5)
|
||||||
|
new_est_niter = est_niter;
|
||||||
|
else
|
||||||
|
new_est_niter = 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
return new_est_niter;
|
||||||
|
}
|
||||||
|
|
||||||
/* Unroll LOOP FACTOR times. DESC describes number of iterations of LOOP.
|
/* Unroll LOOP FACTOR times. DESC describes number of iterations of LOOP.
|
||||||
EXIT is the exit of the loop to that DESC corresponds.
|
EXIT is the exit of the loop to that DESC corresponds.
|
||||||
|
|
||||||
|
@ -1170,12 +1197,12 @@ tree_transform_and_unroll_loop (struct loop *loop, unsigned factor,
|
||||||
gimple_stmt_iterator bsi;
|
gimple_stmt_iterator bsi;
|
||||||
use_operand_p op;
|
use_operand_p op;
|
||||||
bool ok;
|
bool ok;
|
||||||
unsigned est_niter, prob_entry, scale_unrolled, scale_rest, freq_e, freq_h;
|
unsigned i, prob, prob_entry, scale_unrolled, scale_rest;
|
||||||
unsigned new_est_niter, i, prob;
|
gcov_type freq_e, freq_h;
|
||||||
|
gcov_type new_est_niter = niter_for_unrolled_loop (loop, factor);
|
||||||
unsigned irr = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
|
unsigned irr = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
|
||||||
auto_vec<edge> to_remove;
|
auto_vec<edge> to_remove;
|
||||||
|
|
||||||
est_niter = expected_loop_iterations (loop);
|
|
||||||
determine_exit_conditions (loop, desc, factor,
|
determine_exit_conditions (loop, desc, factor,
|
||||||
&enter_main_cond, &exit_base, &exit_step,
|
&enter_main_cond, &exit_base, &exit_step,
|
||||||
&exit_cmp, &exit_bound);
|
&exit_cmp, &exit_bound);
|
||||||
|
@ -1207,22 +1234,6 @@ tree_transform_and_unroll_loop (struct loop *loop, unsigned factor,
|
||||||
gcc_assert (new_loop != NULL);
|
gcc_assert (new_loop != NULL);
|
||||||
update_ssa (TODO_update_ssa);
|
update_ssa (TODO_update_ssa);
|
||||||
|
|
||||||
/* Determine the probability of the exit edge of the unrolled loop. */
|
|
||||||
new_est_niter = est_niter / factor;
|
|
||||||
|
|
||||||
/* Without profile feedback, loops for that we do not know a better estimate
|
|
||||||
are assumed to roll 10 times. When we unroll such loop, it appears to
|
|
||||||
roll too little, and it may even seem to be cold. To avoid this, we
|
|
||||||
ensure that the created loop appears to roll at least 5 times (but at
|
|
||||||
most as many times as before unrolling). */
|
|
||||||
if (new_est_niter < 5)
|
|
||||||
{
|
|
||||||
if (est_niter < 5)
|
|
||||||
new_est_niter = est_niter;
|
|
||||||
else
|
|
||||||
new_est_niter = 5;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Prepare the cfg and update the phi nodes. Move the loop exit to the
|
/* Prepare the cfg and update the phi nodes. Move the loop exit to the
|
||||||
loop latch (and make its condition dummy, for the moment). */
|
loop latch (and make its condition dummy, for the moment). */
|
||||||
rest = loop_preheader_edge (new_loop)->src;
|
rest = loop_preheader_edge (new_loop)->src;
|
||||||
|
@ -1326,10 +1337,25 @@ tree_transform_and_unroll_loop (struct loop *loop, unsigned factor,
|
||||||
/* Ensure that the frequencies in the loop match the new estimated
|
/* Ensure that the frequencies in the loop match the new estimated
|
||||||
number of iterations, and change the probability of the new
|
number of iterations, and change the probability of the new
|
||||||
exit edge. */
|
exit edge. */
|
||||||
|
|
||||||
|
freq_h = loop->header->count;
|
||||||
|
freq_e = (loop_preheader_edge (loop))->count;
|
||||||
|
/* Use frequency only if counts are zero. */
|
||||||
|
if (freq_h == 0 && freq_e == 0)
|
||||||
|
{
|
||||||
freq_h = loop->header->frequency;
|
freq_h = loop->header->frequency;
|
||||||
freq_e = EDGE_FREQUENCY (loop_preheader_edge (loop));
|
freq_e = EDGE_FREQUENCY (loop_preheader_edge (loop));
|
||||||
|
}
|
||||||
if (freq_h != 0)
|
if (freq_h != 0)
|
||||||
scale_loop_frequencies (loop, freq_e * (new_est_niter + 1), freq_h);
|
{
|
||||||
|
gcov_type scale;
|
||||||
|
/* Avoid dropping loop body profile counter to 0 because of zero count
|
||||||
|
in loop's preheader. */
|
||||||
|
freq_e = MAX (freq_e, 1);
|
||||||
|
/* This should not overflow. */
|
||||||
|
scale = GCOV_COMPUTE_SCALE (freq_e * (new_est_niter + 1), freq_h);
|
||||||
|
scale_loop_frequencies (loop, scale, REG_BR_PROB_BASE);
|
||||||
|
}
|
||||||
|
|
||||||
exit_bb = single_pred (loop->latch);
|
exit_bb = single_pred (loop->latch);
|
||||||
new_exit = find_edge (exit_bb, rest);
|
new_exit = find_edge (exit_bb, rest);
|
||||||
|
|
|
@ -48,6 +48,7 @@ extern bool gimple_duplicate_loop_to_header_edge (struct loop *, edge,
|
||||||
int);
|
int);
|
||||||
extern bool can_unroll_loop_p (struct loop *loop, unsigned factor,
|
extern bool can_unroll_loop_p (struct loop *loop, unsigned factor,
|
||||||
struct tree_niter_desc *niter);
|
struct tree_niter_desc *niter);
|
||||||
|
extern gcov_type niter_for_unrolled_loop (struct loop *, unsigned);
|
||||||
extern void tree_transform_and_unroll_loop (struct loop *, unsigned,
|
extern void tree_transform_and_unroll_loop (struct loop *, unsigned,
|
||||||
edge, struct tree_niter_desc *,
|
edge, struct tree_niter_desc *,
|
||||||
transform_callback, void *);
|
transform_callback, void *);
|
||||||
|
|
|
@ -6718,6 +6718,50 @@ loop_niters_no_overflow (loop_vec_info loop_vinfo)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Scale profiling counters by estimation for LOOP which is vectorized
|
||||||
|
by factor VF. */
|
||||||
|
|
||||||
|
static void
|
||||||
|
scale_profile_for_vect_loop (struct loop *loop, unsigned vf)
|
||||||
|
{
|
||||||
|
edge preheader = loop_preheader_edge (loop);
|
||||||
|
/* Reduce loop iterations by the vectorization factor. */
|
||||||
|
gcov_type new_est_niter = niter_for_unrolled_loop (loop, vf);
|
||||||
|
gcov_type freq_h = loop->header->count, freq_e = preheader->count;
|
||||||
|
|
||||||
|
/* Use frequency only if counts are zero. */
|
||||||
|
if (freq_h == 0 && freq_e == 0)
|
||||||
|
{
|
||||||
|
freq_h = loop->header->frequency;
|
||||||
|
freq_e = EDGE_FREQUENCY (preheader);
|
||||||
|
}
|
||||||
|
if (freq_h != 0)
|
||||||
|
{
|
||||||
|
gcov_type scale;
|
||||||
|
|
||||||
|
/* Avoid dropping loop body profile counter to 0 because of zero count
|
||||||
|
in loop's preheader. */
|
||||||
|
freq_e = MAX (freq_e, 1);
|
||||||
|
/* This should not overflow. */
|
||||||
|
scale = GCOV_COMPUTE_SCALE (freq_e * (new_est_niter + 1), freq_h);
|
||||||
|
scale_loop_frequencies (loop, scale, REG_BR_PROB_BASE);
|
||||||
|
}
|
||||||
|
|
||||||
|
basic_block exit_bb = single_pred (loop->latch);
|
||||||
|
edge exit_e = single_exit (loop);
|
||||||
|
exit_e->count = loop_preheader_edge (loop)->count;
|
||||||
|
exit_e->probability = REG_BR_PROB_BASE / (new_est_niter + 1);
|
||||||
|
|
||||||
|
edge exit_l = single_pred_edge (loop->latch);
|
||||||
|
int prob = exit_l->probability;
|
||||||
|
exit_l->probability = REG_BR_PROB_BASE - exit_e->probability;
|
||||||
|
exit_l->count = exit_bb->count - exit_e->count;
|
||||||
|
if (exit_l->count < 0)
|
||||||
|
exit_l->count = 0;
|
||||||
|
if (prob > 0)
|
||||||
|
scale_bbs_frequencies_int (&loop->latch, 1, exit_l->probability, prob);
|
||||||
|
}
|
||||||
|
|
||||||
/* Function vect_transform_loop.
|
/* Function vect_transform_loop.
|
||||||
|
|
||||||
The analysis phase has determined that the loop is vectorizable.
|
The analysis phase has determined that the loop is vectorizable.
|
||||||
|
@ -6743,16 +6787,10 @@ vect_transform_loop (loop_vec_info loop_vinfo)
|
||||||
bool transform_pattern_stmt = false;
|
bool transform_pattern_stmt = false;
|
||||||
bool check_profitability = false;
|
bool check_profitability = false;
|
||||||
int th;
|
int th;
|
||||||
/* Record number of iterations before we started tampering with the profile. */
|
|
||||||
gcov_type expected_iterations = expected_loop_iterations_unbounded (loop);
|
|
||||||
|
|
||||||
if (dump_enabled_p ())
|
if (dump_enabled_p ())
|
||||||
dump_printf_loc (MSG_NOTE, vect_location, "=== vec_transform_loop ===\n");
|
dump_printf_loc (MSG_NOTE, vect_location, "=== vec_transform_loop ===\n");
|
||||||
|
|
||||||
/* If profile is inprecise, we have chance to fix it up. */
|
|
||||||
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
|
|
||||||
expected_iterations = LOOP_VINFO_INT_NITERS (loop_vinfo);
|
|
||||||
|
|
||||||
/* Use the more conservative vectorization threshold. If the number
|
/* Use the more conservative vectorization threshold. If the number
|
||||||
of iterations is constant assume the cost check has been performed
|
of iterations is constant assume the cost check has been performed
|
||||||
by our caller. If the threshold makes all loops profitable that
|
by our caller. If the threshold makes all loops profitable that
|
||||||
|
@ -7068,9 +7106,8 @@ vect_transform_loop (loop_vec_info loop_vinfo)
|
||||||
|
|
||||||
slpeel_make_loop_iterate_ntimes (loop, niters_vector);
|
slpeel_make_loop_iterate_ntimes (loop, niters_vector);
|
||||||
|
|
||||||
/* Reduce loop iterations by the vectorization factor. */
|
scale_profile_for_vect_loop (loop, vf);
|
||||||
scale_loop_profile (loop, GCOV_COMPUTE_SCALE (1, vf),
|
|
||||||
expected_iterations / vf);
|
|
||||||
/* The minimum number of iterations performed by the epilogue. This
|
/* The minimum number of iterations performed by the epilogue. This
|
||||||
is 1 when peeling for gaps because we always need a final scalar
|
is 1 when peeling for gaps because we always need a final scalar
|
||||||
iteration. */
|
iteration. */
|
||||||
|
|
Loading…
Reference in New Issue