diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 4c9de79c1fd..433b976077c 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2020-04-20  Richard Sandiford  <richard.sandiford@arm.com>
+
+	* tree-vect-loop.c (vect_better_loop_vinfo_p): If old_loop_vinfo
+	has a variable VF, prefer new_loop_vinfo if it is cheaper for the
+	estimated VF and is no worse at double the estimated VF.
+
 2020-04-20  Richard Sandiford  <richard.sandiford@arm.com>
 
 	PR target/94668
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 9bf3581b770..ea3ba36d97f 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2020-04-20  Richard Sandiford  <richard.sandiford@arm.com>
+
+	* gcc.target/aarch64/sve/cost_model_8.c: New test.
+	* gcc.target/aarch64/sve/cost_model_9.c: Likewise.
+	* gcc.target/aarch64/sve/pr89007-1.c: Add -msve-vector-bits=512.
+	* gcc.target/aarch64/sve/pr89007-2.c: Likewise.
+
 2020-04-20  Richard Sandiford  <richard.sandiford@arm.com>
 
 	PR target/94668
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_8.c b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_8.c
new file mode 100644
index 00000000000..80c3a23e18a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_8.c
@@ -0,0 +1,12 @@
+/* { dg-options "-O3 -msve-vector-bits=scalable" } */
+
+void
+vset (int *restrict dst, int *restrict src, int count)
+{
+  for (int i = 0; i < count; ++i)
+#pragma GCC unroll 4
+    for (int j = 0; j < 4; ++j)
+      *dst++ = 1;
+}
+
+/* { dg-final { scan-assembler-times {\tst1w\tz} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_9.c b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_9.c
new file mode 100644
index 00000000000..e7a1bac3c83
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_9.c
@@ -0,0 +1,13 @@
+/* { dg-options "-O3 -msve-vector-bits=scalable" } */
+
+void
+vset (int *restrict dst, int *restrict src, int count)
+{
+  for (int i = 0; i < count; ++i)
+#pragma GCC unroll 8
+    for (int j = 0; j < 8; ++j)
+      *dst++ = 1;
+}
+
+/* { dg-final { scan-assembler-not {\tst1w\tz} } } */
+/* { dg-final { scan-assembler-times {\tstp\tq} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr89007-1.c b/gcc/testsuite/gcc.target/aarch64/sve/pr89007-1.c
index af4aff4ec6d..ff9550c9109 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pr89007-1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr89007-1.c
@@ -1,5 +1,5 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O -ftree-vectorize -march=armv8.2-a+sve --save-temps" } */
+/* { dg-options "-O -ftree-vectorize -march=armv8.2-a+sve -msve-vector-bits=512 --save-temps" } */
 /* { dg-final { check-function-bodies "**" "" } } */
 
 #define N 1024
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr89007-2.c b/gcc/testsuite/gcc.target/aarch64/sve/pr89007-2.c
index 2ccdd0d353e..da345fe8bd6 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pr89007-2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr89007-2.c
@@ -1,5 +1,5 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O -ftree-vectorize -march=armv8.2-a+sve --save-temps" } */
+/* { dg-options "-O -ftree-vectorize -march=armv8.2-a+sve -msve-vector-bits=512 --save-temps" } */
 /* { dg-final { check-function-bodies "**" "" } } */
 
 #define N 1024
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 265bcfdc5af..b6c3faeae51 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -2414,7 +2414,36 @@ vect_better_loop_vinfo_p (loop_vec_info new_loop_vinfo,
   poly_widest_int rel_old = (old_loop_vinfo->vec_inside_cost
 			     * poly_widest_int (new_vf));
   if (maybe_lt (rel_old, rel_new))
-    return false;
+    {
+      /* When old_loop_vinfo uses a variable vectorization factor,
+	 we know that it has a lower cost for at least one runtime VF.
+	 However, we don't know how likely that VF is.
+
+	 One option would be to compare the costs for the estimated VFs.
+	 The problem is that that can put too much pressure on the cost
+	 model.  E.g. if the estimated VF is also the lowest possible VF,
+	 and if old_loop_vinfo is 1 unit worse than new_loop_vinfo
+	 for the estimated VF, we'd then choose new_loop_vinfo even
+	 though (a) new_loop_vinfo might not actually be better than
+	 old_loop_vinfo for that VF and (b) it would be significantly
+	 worse at larger VFs.
+
+	 Here we go for a hacky compromise: pick new_loop_vinfo if it is
+	 no more expensive than old_loop_vinfo even after doubling the
+	 estimated old_loop_vinfo VF.  For all but trivial loops, this
+	 ensures that we only pick new_loop_vinfo if it is significantly
+	 better than old_loop_vinfo at the estimated VF.  */
+      if (rel_new.is_constant ())
+	return false;
+
+      HOST_WIDE_INT new_estimated_vf = estimated_poly_value (new_vf);
+      HOST_WIDE_INT old_estimated_vf = estimated_poly_value (old_vf);
+      widest_int estimated_rel_new = (new_loop_vinfo->vec_inside_cost
+				      * widest_int (old_estimated_vf));
+      widest_int estimated_rel_old = (old_loop_vinfo->vec_inside_cost
+				      * widest_int (new_estimated_vf));
+      return estimated_rel_new * 2 <= estimated_rel_old;
+    }
   if (known_lt (rel_new, rel_old))
     return true;