From 82567e3386ed9d11a425f12d822ed1ee6b1c949b Mon Sep 17 00:00:00 2001 From: Ira Rosen Date: Thu, 8 Jan 2009 07:59:40 +0000 Subject: [PATCH] re PR tree-optimization/37194 (Autovectorization of small constant iteration loop degrades performance) PR tree-optimization/37194 * tree-vect-transform.c (vect_estimate_min_profitable_iters): Don't add the cost of cost model guard in prologue to scalar outside cost in case of known number of iterations. From-SVN: r143183 --- gcc/ChangeLog | 7 +++++ gcc/testsuite/ChangeLog | 5 ++++ .../vect/costmodel/ppc/costmodel-pr37194.c | 28 ++++++++++++++++++ gcc/tree-vect-transform.c | 29 ++++++++----------- 4 files changed, 52 insertions(+), 17 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-pr37194.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 29c4ba4a431..33652905d4b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2009-01-08 Ira Rosen + + PR tree-optimization/37194 + * tree-vect-transform.c (vect_estimate_min_profitable_iters): + Don't add the cost of cost model guard in prologue to scalar + outside cost in case of known number of iterations. + 2009-01-07 Nathan Froyd Alan Modra diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 6b7d3e70015..0c6c3a7e7e2 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2009-01-08 Ira Rosen + + PR tree-optimization/37194 + * gcc.dg/vect/costmodel/ppc/costmodel-pr37194.c: New test. + 2009-01-07 Jakub Jelinek PR c++/38725 diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-pr37194.c b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-pr37194.c new file mode 100644 index 00000000000..76c7850fa10 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-pr37194.c @@ -0,0 +1,28 @@ +/* { dg-require-effective-target vect_float } */ +/* { dg-do compile } */ + +#include +#include "../../tree-vect.h" + +__attribute__ ((noinline)) void +ggSpectrum_Set8(float * data, float d) +{ + int i; + + for (i = 0; i < 8; i++) + data[i] = d; +} + +__attribute__ ((noinline)) void +ggSpectrum_Set20(float * data, float d) +{ + int i; + + for (i = 0; i < 20; i++) + data[i] = d; +} + +/* { dg-final { scan-tree-dump-times "vectorization not profitable" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c index 2db01676bdb..5b3344ad730 100644 --- a/gcc/tree-vect-transform.c +++ b/gcc/tree-vect-transform.c @@ -122,7 +122,6 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) int vec_outside_cost = 0; int scalar_single_iter_cost = 0; int scalar_outside_cost = 0; - bool runtime_test = false; int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); @@ -141,15 +140,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) return 0; } - /* If the number of iterations is unknown, or the - peeling-for-misalignment amount is unknown, we will have to generate - a runtime test to test the loop count against the threshold. */ - if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) - || (byte_misalign < 0)) - runtime_test = true; - /* Requires loop versioning tests to handle misalignment. */ - if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))) { /* FIXME: Make cost depend on complexity of individual check. */ @@ -240,12 +231,11 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) "peeling for alignment is unknown ."); /* If peeled iterations are unknown, count a taken branch and a not taken - branch per peeled loop. Even if scalar loop iterations are known, - vector iterations are not known since peeled prologue iterations are - not known. Hence guards remain the same. */ + branch per peeled loop. Even if scalar loop iterations are known, + vector iterations are not known since peeled prologue iterations are + not known. Hence guards remain the same. */ peel_guard_costs += 2 * (TARG_COND_TAKEN_BRANCH_COST - + TARG_COND_NOT_TAKEN_BRANCH_COST); - + + TARG_COND_NOT_TAKEN_BRANCH_COST); } else { @@ -337,7 +327,12 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) conditions/branch directions. Change the estimates below to something more reasonable. */ - if (runtime_test) + /* If the number of iterations is known and we do not do versioning, we can + decide whether to vectorize at compile time. Hence the scalar version + do not carry cost model guard costs. */ + if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) + || VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) + || VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo))) { /* Cost model check occurs at versioning. */ if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) @@ -345,8 +340,8 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) scalar_outside_cost += TARG_COND_NOT_TAKEN_BRANCH_COST; else { - /* Cost model occurs at prologue generation. */ - if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) + /* Cost model check occurs at prologue generation. */ + if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0) scalar_outside_cost += 2 * TARG_COND_TAKEN_BRANCH_COST + TARG_COND_NOT_TAKEN_BRANCH_COST; /* Cost model check occurs at epilogue generation. */