diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 4c9de79c1fd..433b976077c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2020-04-20 Richard Sandiford + + * tree-vect-loop.c (vect_better_loop_vinfo_p): If old_loop_vinfo + has a variable VF, prefer new_loop_vinfo if it is cheaper for the + estimated VF and is no worse at double the estimated VF. + 2020-04-20 Richard Sandiford PR target/94668 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 9bf3581b770..ea3ba36d97f 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2020-04-20 Richard Sandiford + + * gcc.target/aarch64/sve/cost_model_8.c: New test. + * gcc.target/aarch64/sve/cost_model_9.c: Likewise. + * gcc.target/aarch64/sve/pr89007-1.c: Add -msve-vector-bits=512. + * gcc.target/aarch64/sve/pr89007-2.c: Likewise. + 2020-04-20 Richard Sandiford PR target/94668 diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_8.c b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_8.c new file mode 100644 index 00000000000..80c3a23e18a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_8.c @@ -0,0 +1,12 @@ +/* { dg-options "-O3 -msve-vector-bits=scalable" } */ + +void +vset (int *restrict dst, int *restrict src, int count) +{ + for (int i = 0; i < count; ++i) +#pragma GCC unroll 4 + for (int j = 0; j < 4; ++j) + *dst++ = 1; +} + +/* { dg-final { scan-assembler-times {\tst1w\tz} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_9.c b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_9.c new file mode 100644 index 00000000000..e7a1bac3c83 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_9.c @@ -0,0 +1,13 @@ +/* { dg-options "-O3 -msve-vector-bits=scalable" } */ + +void +vset (int *restrict dst, int *restrict src, int count) +{ + for (int i = 0; i < count; ++i) +#pragma GCC unroll 8 + for (int j = 0; j < 8; ++j) + *dst++ = 1; +} + +/* { dg-final { scan-assembler-not {\tst1w\tz} } } */ +/* { dg-final { scan-assembler-times {\tstp\tq} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr89007-1.c b/gcc/testsuite/gcc.target/aarch64/sve/pr89007-1.c index af4aff4ec6d..ff9550c9109 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pr89007-1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr89007-1.c @@ -1,5 +1,5 @@ /* { dg-do assemble { target aarch64_asm_sve_ok } } */ -/* { dg-options "-O -ftree-vectorize -march=armv8.2-a+sve --save-temps" } */ +/* { dg-options "-O -ftree-vectorize -march=armv8.2-a+sve -msve-vector-bits=512 --save-temps" } */ /* { dg-final { check-function-bodies "**" "" } } */ #define N 1024 diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr89007-2.c b/gcc/testsuite/gcc.target/aarch64/sve/pr89007-2.c index 2ccdd0d353e..da345fe8bd6 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pr89007-2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr89007-2.c @@ -1,5 +1,5 @@ /* { dg-do assemble { target aarch64_asm_sve_ok } } */ -/* { dg-options "-O -ftree-vectorize -march=armv8.2-a+sve --save-temps" } */ +/* { dg-options "-O -ftree-vectorize -march=armv8.2-a+sve -msve-vector-bits=512 --save-temps" } */ /* { dg-final { check-function-bodies "**" "" } } */ #define N 1024 diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 265bcfdc5af..b6c3faeae51 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -2414,7 +2414,36 @@ vect_better_loop_vinfo_p (loop_vec_info new_loop_vinfo, poly_widest_int rel_old = (old_loop_vinfo->vec_inside_cost * poly_widest_int (new_vf)); if (maybe_lt (rel_old, rel_new)) - return false; + { + /* When old_loop_vinfo uses a variable vectorization factor, + we know that it has a lower cost for at least one runtime VF. + However, we don't know how likely that VF is. + + One option would be to compare the costs for the estimated VFs. + The problem is that that can put too much pressure on the cost + model. E.g. if the estimated VF is also the lowest possible VF, + and if old_loop_vinfo is 1 unit worse than new_loop_vinfo + for the estimated VF, we'd then choose new_loop_vinfo even + though (a) new_loop_vinfo might not actually be better than + old_loop_vinfo for that VF and (b) it would be significantly + worse at larger VFs. + + Here we go for a hacky compromise: pick new_loop_vinfo if it is + no more expensive than old_loop_vinfo even after doubling the + estimated old_loop_vinfo VF. For all but trivial loops, this + ensures that we only pick new_loop_vinfo if it is significantly + better than old_loop_vinfo at the estimated VF. */ + if (rel_new.is_constant ()) + return false; + + HOST_WIDE_INT new_estimated_vf = estimated_poly_value (new_vf); + HOST_WIDE_INT old_estimated_vf = estimated_poly_value (old_vf); + widest_int estimated_rel_new = (new_loop_vinfo->vec_inside_cost + * widest_int (old_estimated_vf)); + widest_int estimated_rel_old = (old_loop_vinfo->vec_inside_cost + * widest_int (new_estimated_vf)); + return estimated_rel_new * 2 <= estimated_rel_old; + } if (known_lt (rel_new, rel_old)) return true;