diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9a4f1fbc21b..e39fb143119 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,35 @@ +2007-07-05 Dorit Nuzman + + * tree-vectorizer.c (new_loop_vec_info): Initialize + LOOP_VINFO_COST_MODEL_MIN_ITERS. + * tree-vectorizer.h (_loop_vec_info): Added new filed + min_profitable_iters. + (LOOP_VINFO_COST_MODEL_MIN_ITERS): New access macro to above new field. + (TARG_SCALAR_TO_VEC_COST): Define cost of scalar to vector operation. + * tree-vect-analyze.c (vect_analyze_operations): Set + LOOP_VINFO_COST_MODEL_MIN_ITERS. + * tree-vect-transform.c (vect_estimate_min_profitable_iters): Use + VEC_length to determine if there are any LOOP_VINFO_MAY_MISALIGN_STMTS. + Fix calculation of peel_iters_prologue. Move consideration of epilogue + and prologue cost to after they are computed. + (vect_model_induction_cost): Use TARG_SCALAR_TO_VEC_COST instead of + TARG_VEC_STMT_COST. + (vect_model_simple_cost): Takes additional argument dt. Consider cost + of creating vectors from scalars according to dt. + (vect_model_store_cost): Likewise. + (vectorizable_call): Use dt array instead of scalar dt. Call + vect_model_simple_cost with additional argument dt. + (vectorizable_assignment): Likewise. + (vectorizable_operation): Likewise. + (vectorizable_type_demotion): Likewise. + (vectorizable_type_promotion): Likewise. + (vectorizable_store): Use dt array instead of scalar dt. Call + vect_model_store_cost with additional argument dt. + (vect_do_peeling_for_loop_bound): Don't call + vect_estimate_min_profitable_iters. Instead, lookup + LOOP_VINFO_COST_MODEL_MIN_ITERS. Don't always print + "may not be profitable". + 2007-07-05 Dorit Nuzman PR testsuite/32014 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index e7888e1d2c1..d2ff6b2b8d3 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,23 @@ +2007-07-05 Dorit Nuzman + + * gcc.dg/vect/costmodel/ppc: New directory. + * gcc.dg/vect/costmodel/ppc/ppc-costmodel-vect.exp: New. + * gcc.dg/vect/costmodel/ppc/costmodel-fast-math-vect-pr29925.c: New test. + * gcc.dg/vect/costmodel/ppc/costmodel-vect-31a.c: New test. + * gcc.dg/vect/costmodel/ppc/costmodel-vect-31b.c: New test. + * gcc.dg/vect/costmodel/ppc/costmodel-vect-31c.c: New test. + * gcc.dg/vect/costmodel/ppc/costmodel-vect-31d.c: New test. + * gcc.dg/vect/costmodel/ppc/costmodel-vect-33.c: New test. + * gcc.dg/vect/costmodel/ppc/costmodel-vect-76a.c: New test. + * gcc.dg/vect/costmodel/ppc/costmodel-vect-76b.c: New test. + * gcc.dg/vect/costmodel/ppc/costmodel-vect-76c.c: New test. + * gcc.dg/vect/costmodel/ppc/costmodel-vect-68a.c: New test. + * gcc.dg/vect/costmodel/ppc/costmodel-vect-68b.c: New test. + * gcc.dg/vect/costmodel/ppc/costmodel-vect-68c.c: New test. + * gcc.dg/vect/costmodel/ppc/costmodel-vect-68d.c: New test. + * gcc.dg/vect/costmodel/ppc/costmodel-vect-reduc-1char.c: New test. + * gcc.dg/vect/costmodel/i386/costmodel-vect-68.c: Now vectorized. + 2007-07-05 Tobias Burnus PR fortran/32359 diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-68.c b/gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-68.c index 7fa1cd1854a..b916cd91dfb 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-68.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-68.c @@ -84,6 +84,5 @@ int main (void) return main1 (); } -/* { dg-final { scan-tree-dump-times "vectorization not profitable" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-fast-math-vect-pr29925.c b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-fast-math-vect-pr29925.c new file mode 100644 index 00000000000..9347d05ea2d --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-fast-math-vect-pr29925.c @@ -0,0 +1,39 @@ +/* { dg-require-effective-target vect_float } */ + +#include +#include "../../tree-vect.h" + +void interp_pitch(float *exc, float *interp, int pitch, int len) +{ + int i,k; + int maxj; + + maxj=3; + for (i=0;i +#include "../../tree-vect.h" + +#define N 32 + +struct t{ + int k[N]; + int l; +}; + +struct s{ + char a; /* aligned */ + char b[N-1]; /* unaligned (offset 1B) */ + char c[N]; /* aligned (offset NB) */ + struct t d; /* aligned (offset 2NB) */ + struct t e; /* unaligned (offset 2N+4N+4 B) */ +}; + +int main1 () +{ + int i; + struct s tmp; + + /* unaligned */ + for (i = 0; i < N/2; i++) + { + tmp.b[i] = 5; + } + + /* check results: */ + for (i = 0; i +#include "../../tree-vect.h" + +#define N 32 + +struct t{ + int k[N]; + int l; +}; + +struct s{ + char a; /* aligned */ + char b[N-1]; /* unaligned (offset 1B) */ + char c[N]; /* aligned (offset NB) */ + struct t d; /* aligned (offset 2NB) */ + struct t e; /* unaligned (offset 2N+4N+4 B) */ +}; + +int main1 () +{ + int i; + struct s tmp; + + /* aligned */ + for (i = 0; i < N/2; i++) + { + tmp.c[i] = 6; + } + + /* check results: */ + for (i = 0; i +#include "../../tree-vect.h" + +#define N 32 + +struct t{ + int k[N]; + int l; +}; + +struct s{ + char a; /* aligned */ + char b[N-1]; /* unaligned (offset 1B) */ + char c[N]; /* aligned (offset NB) */ + struct t d; /* aligned (offset 2NB) */ + struct t e; /* unaligned (offset 2N+4N+4 B) */ +}; + +int main1 () +{ + int i; + struct s tmp; + + /* aligned */ + for (i = 0; i < N/2; i++) + { + tmp.d.k[i] = 7; + } + + /* check results: */ + for (i = 0; i +#include "../../tree-vect.h" + +#define N 32 + +struct t{ + int k[N]; + int l; +}; + +struct s{ + char a; /* aligned */ + char b[N-1]; /* unaligned (offset 1B) */ + char c[N]; /* aligned (offset NB) */ + struct t d; /* aligned (offset 2NB) */ + struct t e; /* unaligned (offset 2N+4N+4 B) */ +}; + +int main1 () +{ + int i; + struct s tmp; + + /* unaligned */ + for (i = 0; i < N/2; i++) + { + tmp.e.k[i] = 8; + } + + /* check results: */ + for (i = 0; i +#include "../../tree-vect.h" + +#define N 16 +struct test { + char ca[N]; +}; + +extern struct test s; + +int main1 () +{ + int i; + + for (i = 0; i < N; i++) + { + s.ca[i] = 5; + } + + /* check results: */ + for (i = 0; i < N; i++) + { + if (s.ca[i] != 5) + abort (); + } + + return 0; +} + +int main (void) +{ + return main1 (); +} + +/* { dg-final { scan-tree-dump-times "vectorization not profitable" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-68a.c b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-68a.c new file mode 100644 index 00000000000..d0d40ac338a --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-68a.c @@ -0,0 +1,49 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include "../../tree-vect.h" + +#define N 32 + +struct s{ + int m; + int n[N][N][N]; +}; + +struct test1{ + struct s a; /* array a.n is unaligned */ + int b; + int c; + struct s e; /* array e.n is aligned */ +}; + +int main1 () +{ + int i,j; + struct test1 tmp1; + + /* 1. unaligned */ + for (i = 0; i < N; i++) + { + tmp1.a.n[1][2][i] = 5; + } + + /* check results: */ + for (i = 0; i +#include "../../tree-vect.h" + +#define N 32 + +struct s{ + int m; + int n[N][N][N]; +}; + +struct test1{ + struct s a; /* array a.n is unaligned */ + int b; + int c; + struct s e; /* array e.n is aligned */ +}; + +int main1 () +{ + int i,j; + struct test1 tmp1; + + /* 2. aligned */ + for (i = 3; i < N-1; i++) + { + tmp1.a.n[1][2][i] = 6; + } + + /* check results: */ + for (i = 3; i < N-1; i++) + { + if (tmp1.a.n[1][2][i] != 6) + abort (); + } + + return 0; +} + +int main (void) +{ + check_vect (); + + return main1 (); +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-68c.c b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-68c.c new file mode 100644 index 00000000000..58c5e9fdbe5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-68c.c @@ -0,0 +1,49 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include "../../tree-vect.h" + +#define N 32 + +struct s{ + int m; + int n[N][N][N]; +}; + +struct test1{ + struct s a; /* array a.n is unaligned */ + int b; + int c; + struct s e; /* array e.n is aligned */ +}; + +int main1 () +{ + int i,j; + struct test1 tmp1; + + /* 3. aligned */ + for (i = 0; i < N; i++) + { + tmp1.e.n[1][2][i] = 7; + } + + /* check results: */ + for (i = 0; i < N; i++) + { + if (tmp1.e.n[1][2][i] != 7) + abort (); + } + + return 0; +} + +int main (void) +{ + check_vect (); + + return main1 (); +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-68d.c b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-68d.c new file mode 100644 index 00000000000..df30caae047 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-68d.c @@ -0,0 +1,50 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include "../../tree-vect.h" + +#define N 20 + +struct s{ + int m; + int n[N][N][N]; +}; + +struct test1{ + struct s a; /* array a.n is unaligned */ + int b; + int c; + struct s e; /* array e.n is aligned */ +}; + +int main1 () +{ + int i,j; + struct test1 tmp1; + + /* 4. unaligned */ + for (i = 3; i < N-3; i++) + { + tmp1.e.n[1][2][i] = 8; + } + + /* check results: */ + for (i = 3; i +#include "../../tree-vect.h" + +#define N 8 +#define OFF 4 + +/* Check handling of accesses for which the "initial condition" - + the expression that represents the first location accessed - is + more involved than just an ssa_name. */ + +int ib[N+OFF] __attribute__ ((__aligned__(16))) = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10}; + +int main1 (int *pib) +{ + int i; + int ia[N+OFF]; + int ic[N+OFF] = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10}; + + for (i = OFF; i < N; i++) + { + ia[i] = pib[i - OFF]; + } + + + /* check results: */ + for (i = OFF; i < N; i++) + { + if (ia[i] != pib[i - OFF]) + abort (); + } + + return 0; +} + +int main (void) +{ + check_vect (); + + main1 (&ib[OFF]); + return 0; +} + + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-76b.c b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-76b.c new file mode 100644 index 00000000000..08b2239eb9f --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-76b.c @@ -0,0 +1,47 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include "../../tree-vect.h" + +#define N 8 +#define OFF 4 + +/* Check handling of accesses for which the "initial condition" - + the expression that represents the first location accessed - is + more involved than just an ssa_name. */ + +int ib[N+OFF] __attribute__ ((__aligned__(16))) = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10}; + +int main1 (int *pib) +{ + int i; + int ia[N+OFF]; + int ic[N+OFF] = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10}; + + for (i = OFF; i < N; i++) + { + pib[i - OFF] = ic[i]; + } + + + /* check results: */ + for (i = OFF; i < N; i++) + { + if (pib[i - OFF] != ic[i]) + abort (); + } + + return 0; +} + +int main (void) +{ + check_vect (); + + main1 (&ib[OFF]); + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorization not profitable" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-76c.c b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-76c.c new file mode 100644 index 00000000000..f6127ba873b --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-76c.c @@ -0,0 +1,47 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include "../../tree-vect.h" + +#define N 8 +#define OFF 4 + +/* Check handling of accesses for which the "initial condition" - + the expression that represents the first location accessed - is + more involved than just an ssa_name. */ + +int ib[N+OFF] __attribute__ ((__aligned__(16))) = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10}; + +int main1 (int *pib) +{ + int i; + int ia[N+OFF]; + int ic[N+OFF] = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10}; + + for (i = OFF; i < N; i++) + { + ia[i] = ic[i - OFF]; + } + + + /* check results: */ + for (i = OFF; i < N; i++) + { + if (ia[i] != ic[i - OFF]) + abort (); + } + + return 0; +} + +int main (void) +{ + check_vect (); + + main1 (&ib[OFF]); + return 0; +} + + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-reduc-1char.c b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-reduc-1char.c new file mode 100644 index 00000000000..cf5becc4908 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-reduc-1char.c @@ -0,0 +1,51 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include "../../tree-vect.h" + +#define N 16 +#define DIFF 242 + +void +main1 (unsigned char x, unsigned char max_result, unsigned char min_result) +{ + int i; + unsigned char ub[N] = {1,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45}; + unsigned char uc[N] = {1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; + unsigned char udiff = 2; + unsigned char umax = x; + unsigned char umin = x; + + for (i = 0; i < N; i++) { + udiff += (unsigned char)(ub[i] - uc[i]); + } + + for (i = 0; i < N; i++) { + umax = umax < uc[i] ? uc[i] : umax; + } + + for (i = 0; i < N; i++) { + umin = umin > uc[i] ? uc[i] : umin; + } + + /* check results: */ + if (udiff != DIFF) + abort (); + if (umax != max_result) + abort (); + if (umin != min_result) + abort (); +} + +int main (void) +{ + check_vect (); + + main1 (100, 100, 1); + main1 (0, 15, 0); + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_int_max } } } */ +/* { dg-final { scan-tree-dump-times "vectorization not profitable" 2 "vect" { xfail vect_no_int_max } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/ppc/ppc-costmodel-vect.exp b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/ppc-costmodel-vect.exp new file mode 100644 index 00000000000..98af2ff85d9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/ppc-costmodel-vect.exp @@ -0,0 +1,82 @@ +# Copyright (C) 1997, 2004, 2005, 2006 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# GCC testsuite that uses the `dg.exp' driver. + +# Load support procs. +load_lib gcc-dg.exp + +# Exit immediately if this isn't a powerpc target. +if { ![istarget powerpc*-*-*] } then { + return +} + +# Skip targets not supporting -maltivec. +if ![is-effective-target powerpc_altivec_ok] { + return +} + +# Set up flags used for tests that don't specify options. +set DEFAULT_VECTCFLAGS "" + +# These flags are used for all targets. +lappend DEFAULT_VECTCFLAGS "-O2" "-ftree-vectorize" "-fvect-cost-model" + +# If the target system supports vector instructions, the default action +# for a test is 'run', otherwise it's 'compile'. Save current default. +# Executing vector instructions on a system without hardware vector support +# is also disabled by a call to check_vect, but disabling execution here is +# more efficient. +global dg-do-what-default +set save-dg-do-what-default ${dg-do-what-default} + +lappend DEFAULT_VECTCFLAGS "-maltivec" +if [check_vmx_hw_available] { + set dg-do-what-default run +} else { + if [is-effective-target ilp32] { + # Specify a cpu that supports VMX for compile-only tests. + lappend DEFAULT_VECTCFLAGS "-mcpu=7400" + } + set dg-do-what-default compile +} + +# Initialize `dg'. +dg-init + +lappend DEFAULT_VECTCFLAGS "-fdump-tree-vect-details" + +# Main loop. +dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/costmodel-pr*.\[cS\]]] \ + "" $DEFAULT_VECTCFLAGS +dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/costmodel-vect-*.\[cS\]]] \ + "" $DEFAULT_VECTCFLAGS + +#### Tests with special options +global SAVED_DEFAULT_VECTCFLAGS +set SAVED_DEFAULT_VECTCFLAGS $DEFAULT_VECTCFLAGS + +# -ffast-math tests +set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS +lappend DEFAULT_VECTCFLAGS "-ffast-math" +dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/costmodel-fast-math-vect*.\[cS\]]] \ + "" $DEFAULT_VECTCFLAGS + +# Clean up. +set dg-do-what-default ${save-dg-do-what-default} + +# All done. +dg-finish diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-vect-68.c b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-vect-68.c index 7fa1cd1854a..b916cd91dfb 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-vect-68.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-vect-68.c @@ -84,6 +84,5 @@ int main (void) return main1 (); } -/* { dg-final { scan-tree-dump-times "vectorization not profitable" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/tree-vect-analyze.c b/gcc/tree-vect-analyze.c index 6cfea2b1c9f..2b8f3180973 100644 --- a/gcc/tree-vect-analyze.c +++ b/gcc/tree-vect-analyze.c @@ -482,7 +482,7 @@ vect_analyze_operations (loop_vec_info loop_vinfo) /* Analyze cost. Decide if worth while to vectorize. */ min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo); - + LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters; if (min_profitable_iters < 0) { if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c index 5fdbbe122be..cecf1a086cc 100644 --- a/gcc/tree-vect-transform.c +++ b/gcc/tree-vect-transform.c @@ -97,6 +97,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); int nbbs = loop->num_nodes; + int byte_misalign; /* Cost model disabled. */ if (!flag_vect_cost_model) @@ -109,7 +110,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) /* Requires loop versioning tests to handle misalignment. FIXME: Make cost depend on number of stmts in may_misalign list. */ - if (LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) + if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))) { vec_outside_cost += TARG_COND_BRANCH_COST; if (vect_print_dump_info (REPORT_DETAILS)) @@ -117,32 +118,6 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) "versioning.\n"); } - /* Requires a prologue loop when peeling to handle misalignment. Add cost of - two guards, one for the peeled loop and one for the vector loop. */ - - peel_iters_prologue = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo); - if (peel_iters_prologue) - { - vec_outside_cost += 2 * TARG_COND_BRANCH_COST; - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "cost model: Adding cost of checks for " - "prologue.\n"); - } - - /* Requires an epilogue loop to finish up remaining iterations after vector - loop. Add cost of two guards, one for the peeled loop and one for the - vector loop. */ - - if ((peel_iters_prologue < 0) - || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) - || LOOP_VINFO_INT_NITERS (loop_vinfo) % vf) - { - vec_outside_cost += 2 * TARG_COND_BRANCH_COST; - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "cost model : Adding cost of checks for " - "epilogue.\n"); - } - /* Count statements in scalar loop. Using this as scalar cost for a single iteration for now. @@ -178,9 +153,9 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) TODO: Build an expression that represents peel_iters for prologue and epilogue to be used in a run-time test. */ - peel_iters_prologue = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo); + byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo); - if (peel_iters_prologue < 0) + if (byte_misalign < 0) { peel_iters_prologue = vf - 1; if (vect_print_dump_info (REPORT_DETAILS)) @@ -197,6 +172,18 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) } else { + if (byte_misalign) + { + struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo); + int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr)))); + tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))); + int nelements = TYPE_VECTOR_SUBPARTS (vectype); + + peel_iters_prologue = nelements - (byte_misalign / element_size); + } + else + peel_iters_prologue = 0; + if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) { peel_iters_epilogue = vf - 1; @@ -206,9 +193,37 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) "loop iterations are unknown ."); } else - peel_iters_epilogue = - (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_iters_prologue) - % vf; + { + int niters = LOOP_VINFO_INT_NITERS (loop_vinfo); + peel_iters_prologue = niters < peel_iters_prologue ? + niters : peel_iters_prologue; + peel_iters_epilogue = (niters - peel_iters_prologue) % vf; + } + } + + /* Requires a prologue loop when peeling to handle misalignment. Add cost of + two guards, one for the peeled loop and one for the vector loop. */ + + if (peel_iters_prologue) + { + vec_outside_cost += 2 * TARG_COND_BRANCH_COST; + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "cost model: Adding cost of checks for " + "prologue.\n"); + } + + /* Requires an epilogue loop to finish up remaining iterations after vector + loop. Add cost of two guards, one for the peeled loop and one for the + vector loop. */ + + if (peel_iters_epilogue + || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) + || LOOP_VINFO_INT_NITERS (loop_vinfo) % vf) + { + vec_outside_cost += 2 * TARG_COND_BRANCH_COST; + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "cost model : Adding cost of checks for " + "epilogue.\n"); } vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost) @@ -356,7 +371,7 @@ vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies) /* loop cost for vec_loop. */ STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST; /* prologue cost for vec_init and vec_step. */ - STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = 2 * TARG_VEC_STMT_COST; + STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = 2 * TARG_SCALAR_TO_VEC_COST; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "vect_model_induction_cost: inside_cost = %d, " @@ -372,10 +387,19 @@ vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies) be generated for the single vector op. We will handle that shortly. */ static void -vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies) +vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies, enum vect_def_type *dt) { + int i; + STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST; + /* FORNOW: Assuming maximum 2 args per stmts. */ + for (i=0; i<2; i++) + { + if (dt[i] == vect_constant_def || dt[i] == vect_invariant_def) + STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) += TARG_SCALAR_TO_VEC_COST; + } + if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, " "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info), @@ -407,11 +431,14 @@ vect_cost_strided_group_size (stmt_vec_info stmt_info) has the overhead of the strided access attributed to it. */ static void -vect_model_store_cost (stmt_vec_info stmt_info, int ncopies) +vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, enum vect_def_type dt) { int cost = 0; int group_size; + if (dt == vect_constant_def || dt == vect_invariant_def) + STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = TARG_SCALAR_TO_VEC_COST; + /* Strided access? */ if (DR_GROUP_FIRST_DR (stmt_info)) group_size = vect_cost_strided_group_size (stmt_info); @@ -1275,6 +1302,7 @@ vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def) /* FIXME: use build_constructor directly. */ vector_type = get_vectype_for_scalar_type (TREE_TYPE (def)); vec_inv = build_constructor_from_list (vector_type, t); + return vect_init_vector (stmt, vec_inv, vector_type); } @@ -2260,7 +2288,7 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) int nunits_out; loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); tree fndecl, rhs, new_temp, def, def_stmt, rhs_type, lhs_type; - enum vect_def_type dt[2]; + enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; tree new_stmt; int ncopies, j, nargs; call_expr_arg_iterator iter; @@ -2373,7 +2401,7 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) STMT_VINFO_TYPE (stmt_info) = call_vec_info_type; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vectorizable_call ==="); - vect_model_simple_cost (stmt_info, ncopies); + vect_model_simple_cost (stmt_info, ncopies, dt); return true; } @@ -2826,7 +2854,7 @@ vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); tree new_temp; tree def, def_stmt; - enum vect_def_type dt; + enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; int nunits = TYPE_VECTOR_SUBPARTS (vectype); int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; @@ -2857,7 +2885,7 @@ vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) return false; op = GIMPLE_STMT_OPERAND (stmt, 1); - if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt)) + if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[0])) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "use not simple."); @@ -2869,7 +2897,7 @@ vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vectorizable_assignment ==="); - vect_model_simple_cost (stmt_info, ncopies); + vect_model_simple_cost (stmt_info, ncopies, dt); return true; } @@ -3003,7 +3031,7 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) int icode; enum machine_mode optab_op2_mode; tree def, def_stmt; - enum vect_def_type dt0, dt1; + enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; tree new_stmt; stmt_vec_info prev_stmt_info; int nunits_in = TYPE_VECTOR_SUBPARTS (vectype); @@ -3061,7 +3089,7 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) } op0 = TREE_OPERAND (operation, 0); - if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0)) + if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0])) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "use not simple."); @@ -3071,7 +3099,7 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) if (op_type == binary_op) { op1 = TREE_OPERAND (operation, 1); - if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt1)) + if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1])) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "use not simple."); @@ -3120,8 +3148,8 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) by a scalar shift operand. */ optab_op2_mode = insn_data[icode].operand[2].mode; if (! (VECTOR_MODE_P (optab_op2_mode) - || dt1 == vect_constant_def - || dt1 == vect_invariant_def)) + || dt[1] == vect_constant_def + || dt[1] == vect_invariant_def)) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "operand mode requires invariant argument."); @@ -3134,7 +3162,7 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) STMT_VINFO_TYPE (stmt_info) = op_vec_info_type; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vectorizable_operation ==="); - vect_model_simple_cost (stmt_info, ncopies); + vect_model_simple_cost (stmt_info, ncopies, dt); return true; } @@ -3228,9 +3256,9 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) } else { - vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0); + vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0); if (op_type == binary_op) - vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt1, vec_oprnd1); + vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1); } /* Arguments are ready. create the new vector stmt. */ @@ -3278,7 +3306,7 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi, enum tree_code code, code1 = ERROR_MARK; tree new_temp; tree def, def_stmt; - enum vect_def_type dt0; + enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; tree new_stmt; stmt_vec_info prev_stmt_info; int nunits_in; @@ -3336,7 +3364,7 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi, return false; /* Check the operands of the operation. */ - if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0)) + if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0])) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "use not simple."); @@ -3354,7 +3382,7 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi, STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vectorizable_demotion ==="); - vect_model_simple_cost (stmt_info, ncopies); + vect_model_simple_cost (stmt_info, ncopies, dt); return true; } @@ -3377,12 +3405,12 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi, if (j == 0) { vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL); - vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0); + vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0); } else { - vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd1); - vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0); + vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1); + vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0); } /* Arguments are ready. Create the new vector stmt. */ @@ -3428,7 +3456,7 @@ vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi, tree decl1 = NULL_TREE, decl2 = NULL_TREE; int op_type; tree def, def_stmt; - enum vect_def_type dt0, dt1; + enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; tree new_stmt; stmt_vec_info prev_stmt_info; int nunits_in; @@ -3486,7 +3514,7 @@ vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi, return false; /* Check the operands of the operation. */ - if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0)) + if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0])) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "use not simple."); @@ -3497,7 +3525,7 @@ vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi, if (op_type == binary_op) { op1 = TREE_OPERAND (operation, 1); - if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt1)) + if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1])) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "use not simple."); @@ -3517,7 +3545,7 @@ vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi, STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vectorizable_promotion ==="); - vect_model_simple_cost (stmt_info, 2*ncopies); + vect_model_simple_cost (stmt_info, 2*ncopies, dt); return true; } @@ -3547,9 +3575,9 @@ vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi, } else { - vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0); + vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0); if (op_type == binary_op) - vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt1, vec_oprnd1); + vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1); } /* Arguments are ready. Create the new vector stmt. We are creating @@ -3838,7 +3866,7 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) if (!vec_stmt) /* transformation not required. */ { STMT_VINFO_TYPE (stmt_info) = store_vec_info_type; - vect_model_store_cost (stmt_info, ncopies); + vect_model_store_cost (stmt_info, ncopies, dt); return true; } @@ -5423,8 +5451,7 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio) loop_num = loop->num; /* Analyze cost to set threshhold for vectorized loop. */ - min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo); - + min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo); min_scalar_loop_bound = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)) * LOOP_VINFO_VECT_FACTOR (loop_vinfo); @@ -5437,7 +5464,9 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio) || min_profitable_iters > min_scalar_loop_bound)) th = (unsigned) min_profitable_iters; - if (vect_print_dump_info (REPORT_DETAILS)) + if (min_profitable_iters + && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) + && vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "vectorization may not be profitable."); new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop), diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index baf699d23f0..b0bb5076c58 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -1408,6 +1408,7 @@ new_loop_vec_info (struct loop *loop) LOOP_VINFO_BBS (res) = bbs; LOOP_VINFO_EXIT_COND (res) = NULL; LOOP_VINFO_NITERS (res) = NULL; + LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0; LOOP_VINFO_VECTORIZABLE_P (res) = 0; LOOP_PEELING_FOR_ALIGNMENT (res) = 0; LOOP_VINFO_VECT_FACTOR (res) = 0; diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 25299944e4f..e5957ca4a3d 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -99,6 +99,13 @@ typedef struct _loop_vec_info { /* Number of iterations. */ tree num_iters; + /* Minimum number of iterations below which vectorization is expected to + not be profitable (as estimated by the cost model). + -1 indicates that vectorization will not be profitable. + FORNOW: This field is an int. Will be a tree in the future, to represent + values unknown at compile time. */ + int min_profitable_iters; + /* Is the loop vectorizable? */ bool vectorizable; @@ -140,6 +147,7 @@ typedef struct _loop_vec_info { #define LOOP_VINFO_BBS(L) (L)->bbs #define LOOP_VINFO_EXIT_COND(L) (L)->exit_cond #define LOOP_VINFO_NITERS(L) (L)->num_iters +#define LOOP_VINFO_COST_MODEL_MIN_ITERS(L) (L)->min_profitable_iters #define LOOP_VINFO_VECTORIZABLE_P(L) (L)->vectorizable #define LOOP_VINFO_VECT_FACTOR(L) (L)->vectorization_factor #define LOOP_VINFO_PTR_MASK(L) (L)->ptr_mask @@ -329,6 +337,11 @@ typedef struct _stmt_vec_info { #define TARG_VEC_TO_SCALAR_COST 1 #endif +/* Cost of scalar to vector operation. */ +#ifndef TARG_SCALAR_TO_VEC_COST +#define TARG_SCALAR_TO_VEC_COST 1 +#endif + /* Cost of aligned vector load. */ #ifndef TARG_VEC_LOAD_COST #define TARG_VEC_LOAD_COST 1