l_fma_float_?.c: Update.

* gcc.target/i386/l_fma_float_?.c: Update.
	* gcc.target/i386/l_fma_double_?.c: Update.

	* tree-vect-loop-manip.c (vect_do_peeling_for_loop_bound,
	vect_do_peeling_for_alignment): Fix loop bound computation.
	* tree-vect-loop.c (vect_transform_loop): Maintain loop bounds.

From-SVN: r193241
This commit is contained in:
Jan Hubicka 2012-11-06 14:49:30 +01:00 committed by Jan Hubicka
parent c8fef899e8
commit 22458c5af7
16 changed files with 113 additions and 68 deletions

View File

@ -1,3 +1,9 @@
2012-11-06 Jan Hubicka <jh@suse.cz>
* tree-vect-loop-manip.c (vect_do_peeling_for_loop_bound,
vect_do_peeling_for_alignment): Fix loop bound computation.
* tree-vect-loop.c (vect_transform_loop): Maintain loop bounds.
2012-11-06 Oleg Endo <olegendo@gcc.gnu.org>
PR target/54089

View File

@ -1,3 +1,8 @@
2012-11-06 Jan Hubicka <jh@suse.cz>
* gcc.target/i386/l_fma_float_?.c: Update.
* gcc.target/i386/l_fma_double_?.c: Update.
2012-11-06 Oleg Endo <olegendo@gcc.gnu.org>
PR target/54089

View File

@ -16,11 +16,11 @@
/* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */
/* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */
/* { dg-final { scan-assembler-times "vfnmsub231pd" 4 } } */
/* { dg-final { scan-assembler-times "vfmadd132sd" 20 } } */
/* { dg-final { scan-assembler-times "vfmadd213sd" 20 } } */
/* { dg-final { scan-assembler-times "vfmsub132sd" 20 } } */
/* { dg-final { scan-assembler-times "vfmsub213sd" 20 } } */
/* { dg-final { scan-assembler-times "vfnmadd132sd" 20 } } */
/* { dg-final { scan-assembler-times "vfnmadd213sd" 20 } } */
/* { dg-final { scan-assembler-times "vfnmsub132sd" 20 } } */
/* { dg-final { scan-assembler-times "vfnmsub213sd" 20 } } */
/* { dg-final { scan-assembler-times "vfmadd132sd" 16 } } */
/* { dg-final { scan-assembler-times "vfmadd213sd" 16 } } */
/* { dg-final { scan-assembler-times "vfmsub132sd" 16 } } */
/* { dg-final { scan-assembler-times "vfmsub213sd" 16 } } */
/* { dg-final { scan-assembler-times "vfnmadd132sd" 16 } } */
/* { dg-final { scan-assembler-times "vfnmadd213sd" 16 } } */
/* { dg-final { scan-assembler-times "vfnmsub132sd" 16 } } */
/* { dg-final { scan-assembler-times "vfnmsub213sd" 16 } } */

View File

@ -12,7 +12,7 @@
/* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd132sd" 40 } } */
/* { dg-final { scan-assembler-times "vfmsub132sd" 40 } } */
/* { dg-final { scan-assembler-times "vfnmadd132sd" 40 } } */
/* { dg-final { scan-assembler-times "vfnmsub132sd" 40 } } */
/* { dg-final { scan-assembler-times "vfmadd132sd" 32 } } */
/* { dg-final { scan-assembler-times "vfmsub132sd" 32 } } */
/* { dg-final { scan-assembler-times "vfnmadd132sd" 32 } } */
/* { dg-final { scan-assembler-times "vfnmsub132sd" 32 } } */

View File

@ -16,11 +16,11 @@
/* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */
/* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */
/* { dg-final { scan-assembler-times "vfnmsub231pd" 4 } } */
/* { dg-final { scan-assembler-times "vfmadd132sd" 20 } } */
/* { dg-final { scan-assembler-times "vfmadd213sd" 20 } } */
/* { dg-final { scan-assembler-times "vfmsub132sd" 20 } } */
/* { dg-final { scan-assembler-times "vfmsub213sd" 20 } } */
/* { dg-final { scan-assembler-times "vfnmadd132sd" 20 } } */
/* { dg-final { scan-assembler-times "vfnmadd213sd" 20 } } */
/* { dg-final { scan-assembler-times "vfnmsub132sd" 20 } } */
/* { dg-final { scan-assembler-times "vfnmsub213sd" 20 } } */
/* { dg-final { scan-assembler-times "vfmadd132sd" 16 } } */
/* { dg-final { scan-assembler-times "vfmadd213sd" 16 } } */
/* { dg-final { scan-assembler-times "vfmsub132sd" 16 } } */
/* { dg-final { scan-assembler-times "vfmsub213sd" 16 } } */
/* { dg-final { scan-assembler-times "vfnmadd132sd" 16 } } */
/* { dg-final { scan-assembler-times "vfnmadd213sd" 16 } } */
/* { dg-final { scan-assembler-times "vfnmsub132sd" 16 } } */
/* { dg-final { scan-assembler-times "vfnmsub213sd" 16 } } */

View File

@ -12,7 +12,7 @@
/* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd132sd" 40 } } */
/* { dg-final { scan-assembler-times "vfmsub132sd" 40 } } */
/* { dg-final { scan-assembler-times "vfnmadd132sd" 40 } } */
/* { dg-final { scan-assembler-times "vfnmsub132sd" 40 } } */
/* { dg-final { scan-assembler-times "vfmadd132sd" 32 } } */
/* { dg-final { scan-assembler-times "vfmsub132sd" 32 } } */
/* { dg-final { scan-assembler-times "vfnmadd132sd" 32 } } */
/* { dg-final { scan-assembler-times "vfnmsub132sd" 32 } } */

View File

@ -12,7 +12,7 @@
/* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd132sd" 40 } } */
/* { dg-final { scan-assembler-times "vfmsub132sd" 40 } } */
/* { dg-final { scan-assembler-times "vfnmadd132sd" 40 } } */
/* { dg-final { scan-assembler-times "vfnmsub132sd" 40 } } */
/* { dg-final { scan-assembler-times "vfmadd132sd" 32 } } */
/* { dg-final { scan-assembler-times "vfmsub132sd" 32 } } */
/* { dg-final { scan-assembler-times "vfnmadd132sd" 32 } } */
/* { dg-final { scan-assembler-times "vfnmsub132sd" 32 } } */

View File

@ -12,7 +12,7 @@
/* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd132sd" 40 } } */
/* { dg-final { scan-assembler-times "vfmsub132sd" 40 } } */
/* { dg-final { scan-assembler-times "vfnmadd132sd" 40 } } */
/* { dg-final { scan-assembler-times "vfnmsub132sd" 40 } } */
/* { dg-final { scan-assembler-times "vfmadd132sd" 32 } } */
/* { dg-final { scan-assembler-times "vfmsub132sd" 32 } } */
/* { dg-final { scan-assembler-times "vfnmadd132sd" 32 } } */
/* { dg-final { scan-assembler-times "vfnmsub132sd" 32 } } */

View File

@ -16,11 +16,11 @@
/* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */
/* { dg-final { scan-assembler-times "vfnmsub231ps" 4 } } */
/* { dg-final { scan-assembler-times "vfmadd132ss" 36 } } */
/* { dg-final { scan-assembler-times "vfmadd213ss" 36 } } */
/* { dg-final { scan-assembler-times "vfmsub132ss" 36 } } */
/* { dg-final { scan-assembler-times "vfmsub213ss" 36 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ss" 36 } } */
/* { dg-final { scan-assembler-times "vfnmadd213ss" 36 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ss" 36 } } */
/* { dg-final { scan-assembler-times "vfnmsub213ss" 36 } } */
/* { dg-final { scan-assembler-times "vfmadd132ss" 32 } } */
/* { dg-final { scan-assembler-times "vfmadd213ss" 32 } } */
/* { dg-final { scan-assembler-times "vfmsub132ss" 32 } } */
/* { dg-final { scan-assembler-times "vfmsub213ss" 32 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ss" 32 } } */
/* { dg-final { scan-assembler-times "vfnmadd213ss" 32 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ss" 32 } } */
/* { dg-final { scan-assembler-times "vfnmsub213ss" 32 } } */

View File

@ -12,7 +12,7 @@
/* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd132ss" 72 } } */
/* { dg-final { scan-assembler-times "vfmsub132ss" 72 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ss" 72 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ss" 72 } } */
/* { dg-final { scan-assembler-times "vfmadd132ss" 64 } } */
/* { dg-final { scan-assembler-times "vfmsub132ss" 64 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ss" 64 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ss" 64 } } */

View File

@ -16,11 +16,11 @@
/* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */
/* { dg-final { scan-assembler-times "vfnmsub231ps" 4 } } */
/* { dg-final { scan-assembler-times "vfmadd132ss" 36 } } */
/* { dg-final { scan-assembler-times "vfmadd213ss" 36 } } */
/* { dg-final { scan-assembler-times "vfmsub132ss" 36 } } */
/* { dg-final { scan-assembler-times "vfmsub213ss" 36 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ss" 36 } } */
/* { dg-final { scan-assembler-times "vfnmadd213ss" 36 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ss" 36 } } */
/* { dg-final { scan-assembler-times "vfnmsub213ss" 36 } } */
/* { dg-final { scan-assembler-times "vfmadd132ss" 32 } } */
/* { dg-final { scan-assembler-times "vfmadd213ss" 32 } } */
/* { dg-final { scan-assembler-times "vfmsub132ss" 32 } } */
/* { dg-final { scan-assembler-times "vfmsub213ss" 32 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ss" 32 } } */
/* { dg-final { scan-assembler-times "vfnmadd213ss" 32 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ss" 32 } } */
/* { dg-final { scan-assembler-times "vfnmsub213ss" 32 } } */

View File

@ -12,7 +12,7 @@
/* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd132ss" 72 } } */
/* { dg-final { scan-assembler-times "vfmsub132ss" 72 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ss" 72 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ss" 72 } } */
/* { dg-final { scan-assembler-times "vfmadd132ss" 64 } } */
/* { dg-final { scan-assembler-times "vfmsub132ss" 64 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ss" 64 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ss" 64 } } */

View File

@ -12,7 +12,7 @@
/* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd132ss" 72 } } */
/* { dg-final { scan-assembler-times "vfmsub132ss" 72 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ss" 72 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ss" 72 } } */
/* { dg-final { scan-assembler-times "vfmadd132ss" 64 } } */
/* { dg-final { scan-assembler-times "vfmsub132ss" 64 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ss" 64 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ss" 64 } } */

View File

@ -12,7 +12,7 @@
/* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd132ss" 72 } } */
/* { dg-final { scan-assembler-times "vfmsub132ss" 72 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ss" 72 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ss" 72 } } */
/* { dg-final { scan-assembler-times "vfmadd132ss" 64 } } */
/* { dg-final { scan-assembler-times "vfmsub132ss" 64 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ss" 64 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ss" 64 } } */

View File

@ -1954,9 +1954,16 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
by ratio_mult_vf_name steps. */
vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1;
/* For vectorization factor N, we need to copy last N-1 values in epilogue
and this means N-2 loopback edge executions.
PEELING_FOR_GAPS works by subtracting last iteration and thus the epilogue
will execute at least LOOP_VINFO_VECT_FACTOR times. */
max_iter = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
? LOOP_VINFO_VECT_FACTOR (loop_vinfo) * 2
: LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 2;
if (check_profitability)
max_iter = MAX (max_iter, (int) th);
max_iter = MAX (max_iter, (int) th - 1);
record_niter_bound (new_loop, double_int::from_shwi (max_iter), false, true);
dump_printf (MSG_OPTIMIZED_LOCATIONS,
"Setting upper bound of nb iterations for epilogue "
@ -2186,9 +2193,11 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo,
#ifdef ENABLE_CHECKING
slpeel_verify_cfg_after_peeling (new_loop, loop);
#endif
max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1;
/* For vectorization factor N, we need to copy at most N-1 values
for alignment and this means N-2 loopback edge executions. */
max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 2;
if (check_profitability)
max_iter = MAX (max_iter, (int) th);
max_iter = MAX (max_iter, (int) th - 1);
record_niter_bound (new_loop, double_int::from_shwi (max_iter), false, true);
dump_printf (MSG_OPTIMIZED_LOCATIONS,
"Setting upper bound of nb iterations for prologue "

View File

@ -5448,10 +5448,16 @@ vect_transform_loop (loop_vec_info loop_vinfo)
bool transform_pattern_stmt = false;
bool check_profitability = false;
int th;
/* Record number of iterations before we started tampering with the profile. */
gcov_type expected_iterations = expected_loop_iterations_unbounded (loop);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, "=== vec_transform_loop ===");
/* If profile is inprecise, we have chance to fix it up. */
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
expected_iterations = LOOP_VINFO_INT_NITERS (loop_vinfo);
/* Use the more conservative vectorization threshold. If the number
of iterations is constant assume the cost check has been performed
by our caller. If the threshold makes all loops profitable that
@ -5735,6 +5741,25 @@ vect_transform_loop (loop_vec_info loop_vinfo)
slpeel_make_loop_iterate_ntimes (loop, ratio);
/* Reduce loop iterations by the vectorization factor. */
scale_loop_profile (loop, RDIV (REG_BR_PROB_BASE , vectorization_factor),
expected_iterations / vectorization_factor);
loop->nb_iterations_upper_bound
= loop->nb_iterations_upper_bound.udiv (double_int::from_uhwi (vectorization_factor),
FLOOR_DIV_EXPR);
if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
&& loop->nb_iterations_upper_bound != double_int_zero)
loop->nb_iterations_upper_bound = loop->nb_iterations_upper_bound - double_int_one;
if (loop->any_estimate)
{
loop->nb_iterations_estimate
= loop->nb_iterations_estimate.udiv (double_int::from_uhwi (vectorization_factor),
FLOOR_DIV_EXPR);
if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
&& loop->nb_iterations_estimate != double_int_zero)
loop->nb_iterations_estimate = loop->nb_iterations_estimate - double_int_one;
}
/* The memory tags and pointers in vectorized statements need to
have their SSA forms updated. FIXME, why can't this be delayed
until all the loops have been transformed? */