diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b22409ec0a3..ad525d3d2d6 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2012-11-06 Jan Hubicka + + * tree-vect-loop-manip.c (vect_do_peeling_for_loop_bound, + vect_do_peeling_for_alignment): Fix loop bound computation. + * tree-vect-loop.c (vect_transform_loop): Maintain loop bounds. + 2012-11-06 Oleg Endo PR target/54089 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 9ff85006830..62f9f0c2783 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2012-11-06 Jan Hubicka + + * gcc.target/i386/l_fma_float_?.c: Update. + * gcc.target/i386/l_fma_double_?.c: Update. + 2012-11-06 Oleg Endo PR target/54089 diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_1.c b/gcc/testsuite/gcc.target/i386/l_fma_double_1.c index 716acfef65c..270659359f4 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_double_1.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_double_1.c @@ -16,11 +16,11 @@ /* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub231pd" 4 } } */ -/* { dg-final { scan-assembler-times "vfmadd132sd" 20 } } */ -/* { dg-final { scan-assembler-times "vfmadd213sd" 20 } } */ -/* { dg-final { scan-assembler-times "vfmsub132sd" 20 } } */ -/* { dg-final { scan-assembler-times "vfmsub213sd" 20 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132sd" 20 } } */ -/* { dg-final { scan-assembler-times "vfnmadd213sd" 20 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132sd" 20 } } */ -/* { dg-final { scan-assembler-times "vfnmsub213sd" 20 } } */ +/* { dg-final { scan-assembler-times "vfmadd132sd" 16 } } */ +/* { dg-final { scan-assembler-times "vfmadd213sd" 16 } } */ +/* { dg-final { scan-assembler-times "vfmsub132sd" 16 } } */ +/* { dg-final { scan-assembler-times "vfmsub213sd" 16 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132sd" 16 } } */ +/* { dg-final { scan-assembler-times "vfnmadd213sd" 16 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132sd" 16 } } */ +/* { dg-final { scan-assembler-times "vfnmsub213sd" 16 } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_2.c b/gcc/testsuite/gcc.target/i386/l_fma_double_2.c index 01173afb223..e8933e25d53 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_double_2.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_double_2.c @@ -12,7 +12,7 @@ /* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */ -/* { dg-final { scan-assembler-times "vfmadd132sd" 40 } } */ -/* { dg-final { scan-assembler-times "vfmsub132sd" 40 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132sd" 40 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132sd" 40 } } */ +/* { dg-final { scan-assembler-times "vfmadd132sd" 32 } } */ +/* { dg-final { scan-assembler-times "vfmsub132sd" 32 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132sd" 32 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132sd" 32 } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_3.c b/gcc/testsuite/gcc.target/i386/l_fma_double_3.c index 8cda521a870..00c756775c8 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_double_3.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_double_3.c @@ -16,11 +16,11 @@ /* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub231pd" 4 } } */ -/* { dg-final { scan-assembler-times "vfmadd132sd" 20 } } */ -/* { dg-final { scan-assembler-times "vfmadd213sd" 20 } } */ -/* { dg-final { scan-assembler-times "vfmsub132sd" 20 } } */ -/* { dg-final { scan-assembler-times "vfmsub213sd" 20 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132sd" 20 } } */ -/* { dg-final { scan-assembler-times "vfnmadd213sd" 20 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132sd" 20 } } */ -/* { dg-final { scan-assembler-times "vfnmsub213sd" 20 } } */ +/* { dg-final { scan-assembler-times "vfmadd132sd" 16 } } */ +/* { dg-final { scan-assembler-times "vfmadd213sd" 16 } } */ +/* { dg-final { scan-assembler-times "vfmsub132sd" 16 } } */ +/* { dg-final { scan-assembler-times "vfmsub213sd" 16 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132sd" 16 } } */ +/* { dg-final { scan-assembler-times "vfnmadd213sd" 16 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132sd" 16 } } */ +/* { dg-final { scan-assembler-times "vfnmsub213sd" 16 } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_4.c b/gcc/testsuite/gcc.target/i386/l_fma_double_4.c index 9f2331b51e8..09970bdb5c6 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_double_4.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_double_4.c @@ -12,7 +12,7 @@ /* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */ -/* { dg-final { scan-assembler-times "vfmadd132sd" 40 } } */ -/* { dg-final { scan-assembler-times "vfmsub132sd" 40 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132sd" 40 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132sd" 40 } } */ +/* { dg-final { scan-assembler-times "vfmadd132sd" 32 } } */ +/* { dg-final { scan-assembler-times "vfmsub132sd" 32 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132sd" 32 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132sd" 32 } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_5.c b/gcc/testsuite/gcc.target/i386/l_fma_double_5.c index 9e33975b1e4..2a1428e4d9c 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_double_5.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_double_5.c @@ -12,7 +12,7 @@ /* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */ -/* { dg-final { scan-assembler-times "vfmadd132sd" 40 } } */ -/* { dg-final { scan-assembler-times "vfmsub132sd" 40 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132sd" 40 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132sd" 40 } } */ +/* { dg-final { scan-assembler-times "vfmadd132sd" 32 } } */ +/* { dg-final { scan-assembler-times "vfmsub132sd" 32 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132sd" 32 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132sd" 32 } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_6.c b/gcc/testsuite/gcc.target/i386/l_fma_double_6.c index 28d264dd20d..092032aa0b5 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_double_6.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_double_6.c @@ -12,7 +12,7 @@ /* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */ -/* { dg-final { scan-assembler-times "vfmadd132sd" 40 } } */ -/* { dg-final { scan-assembler-times "vfmsub132sd" 40 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132sd" 40 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132sd" 40 } } */ +/* { dg-final { scan-assembler-times "vfmadd132sd" 32 } } */ +/* { dg-final { scan-assembler-times "vfmsub132sd" 32 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132sd" 32 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132sd" 32 } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_1.c b/gcc/testsuite/gcc.target/i386/l_fma_float_1.c index fea0b20619d..4bcd81de9da 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_float_1.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_float_1.c @@ -16,11 +16,11 @@ /* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub231ps" 4 } } */ -/* { dg-final { scan-assembler-times "vfmadd132ss" 36 } } */ -/* { dg-final { scan-assembler-times "vfmadd213ss" 36 } } */ -/* { dg-final { scan-assembler-times "vfmsub132ss" 36 } } */ -/* { dg-final { scan-assembler-times "vfmsub213ss" 36 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132ss" 36 } } */ -/* { dg-final { scan-assembler-times "vfnmadd213ss" 36 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132ss" 36 } } */ -/* { dg-final { scan-assembler-times "vfnmsub213ss" 36 } } */ +/* { dg-final { scan-assembler-times "vfmadd132ss" 32 } } */ +/* { dg-final { scan-assembler-times "vfmadd213ss" 32 } } */ +/* { dg-final { scan-assembler-times "vfmsub132ss" 32 } } */ +/* { dg-final { scan-assembler-times "vfmsub213ss" 32 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132ss" 32 } } */ +/* { dg-final { scan-assembler-times "vfnmadd213ss" 32 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132ss" 32 } } */ +/* { dg-final { scan-assembler-times "vfnmsub213ss" 32 } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_2.c b/gcc/testsuite/gcc.target/i386/l_fma_float_2.c index dd5f543f58c..34b7fcb6dd5 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_float_2.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_float_2.c @@ -12,7 +12,7 @@ /* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */ -/* { dg-final { scan-assembler-times "vfmadd132ss" 72 } } */ -/* { dg-final { scan-assembler-times "vfmsub132ss" 72 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132ss" 72 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132ss" 72 } } */ +/* { dg-final { scan-assembler-times "vfmadd132ss" 64 } } */ +/* { dg-final { scan-assembler-times "vfmsub132ss" 64 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132ss" 64 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132ss" 64 } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_3.c b/gcc/testsuite/gcc.target/i386/l_fma_float_3.c index 38853353b01..6ff2c6eacd5 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_float_3.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_float_3.c @@ -16,11 +16,11 @@ /* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub231ps" 4 } } */ -/* { dg-final { scan-assembler-times "vfmadd132ss" 36 } } */ -/* { dg-final { scan-assembler-times "vfmadd213ss" 36 } } */ -/* { dg-final { scan-assembler-times "vfmsub132ss" 36 } } */ -/* { dg-final { scan-assembler-times "vfmsub213ss" 36 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132ss" 36 } } */ -/* { dg-final { scan-assembler-times "vfnmadd213ss" 36 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132ss" 36 } } */ -/* { dg-final { scan-assembler-times "vfnmsub213ss" 36 } } */ +/* { dg-final { scan-assembler-times "vfmadd132ss" 32 } } */ +/* { dg-final { scan-assembler-times "vfmadd213ss" 32 } } */ +/* { dg-final { scan-assembler-times "vfmsub132ss" 32 } } */ +/* { dg-final { scan-assembler-times "vfmsub213ss" 32 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132ss" 32 } } */ +/* { dg-final { scan-assembler-times "vfnmadd213ss" 32 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132ss" 32 } } */ +/* { dg-final { scan-assembler-times "vfnmsub213ss" 32 } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_4.c b/gcc/testsuite/gcc.target/i386/l_fma_float_4.c index 5a7bb217836..39548bfa76b 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_float_4.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_float_4.c @@ -12,7 +12,7 @@ /* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */ -/* { dg-final { scan-assembler-times "vfmadd132ss" 72 } } */ -/* { dg-final { scan-assembler-times "vfmsub132ss" 72 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132ss" 72 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132ss" 72 } } */ +/* { dg-final { scan-assembler-times "vfmadd132ss" 64 } } */ +/* { dg-final { scan-assembler-times "vfmsub132ss" 64 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132ss" 64 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132ss" 64 } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_5.c b/gcc/testsuite/gcc.target/i386/l_fma_float_5.c index 0b0454ed336..83d79512592 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_float_5.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_float_5.c @@ -12,7 +12,7 @@ /* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */ -/* { dg-final { scan-assembler-times "vfmadd132ss" 72 } } */ -/* { dg-final { scan-assembler-times "vfmsub132ss" 72 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132ss" 72 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132ss" 72 } } */ +/* { dg-final { scan-assembler-times "vfmadd132ss" 64 } } */ +/* { dg-final { scan-assembler-times "vfmsub132ss" 64 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132ss" 64 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132ss" 64 } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_6.c b/gcc/testsuite/gcc.target/i386/l_fma_float_6.c index 03bf8e84835..1eefc817c36 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_float_6.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_float_6.c @@ -12,7 +12,7 @@ /* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */ -/* { dg-final { scan-assembler-times "vfmadd132ss" 72 } } */ -/* { dg-final { scan-assembler-times "vfmsub132ss" 72 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132ss" 72 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132ss" 72 } } */ +/* { dg-final { scan-assembler-times "vfmadd132ss" 64 } } */ +/* { dg-final { scan-assembler-times "vfmsub132ss" 64 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132ss" 64 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132ss" 64 } } */ diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c index 3c356e35b6f..58ded23399e 100644 --- a/gcc/tree-vect-loop-manip.c +++ b/gcc/tree-vect-loop-manip.c @@ -1954,9 +1954,16 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio, by ratio_mult_vf_name steps. */ vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e); - max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1; + /* For vectorization factor N, we need to copy last N-1 values in epilogue + and this means N-2 loopback edge executions. + + PEELING_FOR_GAPS works by subtracting last iteration and thus the epilogue + will execute at least LOOP_VINFO_VECT_FACTOR times. */ + max_iter = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) + ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) * 2 + : LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 2; if (check_profitability) - max_iter = MAX (max_iter, (int) th); + max_iter = MAX (max_iter, (int) th - 1); record_niter_bound (new_loop, double_int::from_shwi (max_iter), false, true); dump_printf (MSG_OPTIMIZED_LOCATIONS, "Setting upper bound of nb iterations for epilogue " @@ -2186,9 +2193,11 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, #ifdef ENABLE_CHECKING slpeel_verify_cfg_after_peeling (new_loop, loop); #endif - max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1; + /* For vectorization factor N, we need to copy at most N-1 values + for alignment and this means N-2 loopback edge executions. */ + max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 2; if (check_profitability) - max_iter = MAX (max_iter, (int) th); + max_iter = MAX (max_iter, (int) th - 1); record_niter_bound (new_loop, double_int::from_shwi (max_iter), false, true); dump_printf (MSG_OPTIMIZED_LOCATIONS, "Setting upper bound of nb iterations for prologue " diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 908caed0b57..5e99857efd6 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -5448,10 +5448,16 @@ vect_transform_loop (loop_vec_info loop_vinfo) bool transform_pattern_stmt = false; bool check_profitability = false; int th; + /* Record number of iterations before we started tampering with the profile. */ + gcov_type expected_iterations = expected_loop_iterations_unbounded (loop); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "=== vec_transform_loop ==="); + /* If profile is inprecise, we have chance to fix it up. */ + if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) + expected_iterations = LOOP_VINFO_INT_NITERS (loop_vinfo); + /* Use the more conservative vectorization threshold. If the number of iterations is constant assume the cost check has been performed by our caller. If the threshold makes all loops profitable that @@ -5735,6 +5741,25 @@ vect_transform_loop (loop_vec_info loop_vinfo) slpeel_make_loop_iterate_ntimes (loop, ratio); + /* Reduce loop iterations by the vectorization factor. */ + scale_loop_profile (loop, RDIV (REG_BR_PROB_BASE , vectorization_factor), + expected_iterations / vectorization_factor); + loop->nb_iterations_upper_bound + = loop->nb_iterations_upper_bound.udiv (double_int::from_uhwi (vectorization_factor), + FLOOR_DIV_EXPR); + if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) + && loop->nb_iterations_upper_bound != double_int_zero) + loop->nb_iterations_upper_bound = loop->nb_iterations_upper_bound - double_int_one; + if (loop->any_estimate) + { + loop->nb_iterations_estimate + = loop->nb_iterations_estimate.udiv (double_int::from_uhwi (vectorization_factor), + FLOOR_DIV_EXPR); + if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) + && loop->nb_iterations_estimate != double_int_zero) + loop->nb_iterations_estimate = loop->nb_iterations_estimate - double_int_one; + } + /* The memory tags and pointers in vectorized statements need to have their SSA forms updated. FIXME, why can't this be delayed until all the loops have been transformed? */