target.h (builtin_vectorization_cost): Add new target builtin.

2007-07-12  Dorit Nuzman  <dorit@il.ibm.com>

        * target.h (builtin_vectorization_cost): Add new target builtin.
        * target-def.h (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): New.
        * tree-vectorizer.h (TARG_SCALAR_STMT_COST): New.
        (TARG_SCALAR_LOAD_COST, TARG_SCALAR_STORE_COST): New.
        * tree-vect-analyze.c (vect_analyze_slp_instance): Initisliaze
        uninitialized variables.
        * tree-vect-transform.c (cost_for_stmt): New function.
        (vect_estimate_min_profitable_iters): Call cost_for_stmt instead of
        using cost 1 for all scalar stmts. Be less conservative when
        estimating the number of prologue/epulogue iterations. Call
        targetm.vectorize.builtin_vectorization_cost. Return
        min_profitable_iters-1.
        (vect_model_reduction_cost): Use TARG_SCALAR_TO_VEC_COST for
        initialization cost instead of TARG_VEC_STMT_COST. Use
        TARG_VEC_TO_SCALAR_COST instead of TARG_VEC_STMT_COST for reduction
        epilogue code. Fix epilogue cost computation.
        * config/spu/spu.c (spu_builtin_vectorization_cost): New.
        (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): Implement.
        * config/spu/spu.h (TARG_COND_BRANCH_COST, TARG_SCALAR_STMT_COST):
        (TARG_SCALAR_LOAD_COST, TARG_SCALAR_STORE_COST, TARG_VEC_STMT_COST):
        (TARG_VEC_TO_SCALAR_COST, TARG_SCALAR_TO_VEC, TARG_VEC_LOAD_COST):
        (TARG_VEC_UNALIGNED_LOAD_COST, TARG_VEC_STORE_COST): Define.

2007-07-12  Dorit Nuzman  <dorit@il.ibm.com>

        * gcc.dg/vect/costmodel/ppc/costmodel-vect-reduc-1char.c: Loops now
        get vectorized.
        * gcc.dg/vect/costmodel/i386/costmodel-vect-reduc-1char.c: Loops
        now get vectorized.
        * gcc.dg/vect/costmodel/spu/spu-costmodel-vect.exp: New.
        * gcc.dg/vect/costmodel/spu/costmodel-fast-math-vect-pr29925.c: New.
        * gcc.dg/vect/costmodel/spu/costmodel-vect-31a.c: New.
        * gcc.dg/vect/costmodel/spu/costmodel-vect-31b.c: New.
        * gcc.dg/vect/costmodel/spu/costmodel-vect-31c.c: New.
        * gcc.dg/vect/costmodel/spu/costmodel-vect-31d.c: New.
        * gcc.dg/vect/costmodel/spu/costmodel-vect-iv-9.c: New.
        * gcc.dg/vect/costmodel/spu/costmodel-vect-33.c: New.
        * gcc.dg/vect/costmodel/spu/costmodel-vect-76a.c: New.
        * gcc.dg/vect/costmodel/spu/costmodel-vect-76b.c: New.
        * gcc.dg/vect/costmodel/spu/costmodel-vect-76c.c: New.
        * gcc.dg/vect/costmodel/spu/costmodel-vect-68a.c: New.
        * gcc.dg/vect/costmodel/spu/costmodel-vect-68b.c: New.
        * gcc.dg/vect/costmodel/spu/costmodel-vect-68c.c: New.
        * gcc.dg/vect/costmodel/spu/costmodel-vect-68d.c: New.
        * lib/target-supports.exp (check_effective_target_vect_int_mul):
        Add spu.

From-SVN: r126584
This commit is contained in:
Dorit Nuzman 2007-07-12 12:17:03 +00:00 committed by Dorit Nuzman
parent e1c8221962
commit e95b59d2ab
26 changed files with 937 additions and 18 deletions

View File

@ -1,3 +1,28 @@
2007-07-12 Dorit Nuzman <dorit@il.ibm.com>
* target.h (builtin_vectorization_cost): Add new target builtin.
* target-def.h (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): New.
* tree-vectorizer.h (TARG_SCALAR_STMT_COST): New.
(TARG_SCALAR_LOAD_COST, TARG_SCALAR_STORE_COST): New.
* tree-vect-analyze.c (vect_analyze_slp_instance): Initisliaze
uninitialized variables.
* tree-vect-transform.c (cost_for_stmt): New function.
(vect_estimate_min_profitable_iters): Call cost_for_stmt instead of
using cost 1 for all scalar stmts. Be less conservative when
estimating the number of prologue/epulogue iterations. Call
targetm.vectorize.builtin_vectorization_cost. Return
min_profitable_iters-1.
(vect_model_reduction_cost): Use TARG_SCALAR_TO_VEC_COST for
initialization cost instead of TARG_VEC_STMT_COST. Use
TARG_VEC_TO_SCALAR_COST instead of TARG_VEC_STMT_COST for reduction
epilogue code. Fix epilogue cost computation.
* config/spu/spu.c (spu_builtin_vectorization_cost): New.
(TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): Implement.
* config/spu/spu.h (TARG_COND_BRANCH_COST, TARG_SCALAR_STMT_COST):
(TARG_SCALAR_LOAD_COST, TARG_SCALAR_STORE_COST, TARG_VEC_STMT_COST):
(TARG_VEC_TO_SCALAR_COST, TARG_SCALAR_TO_VEC, TARG_VEC_LOAD_COST):
(TARG_VEC_UNALIGNED_LOAD_COST, TARG_VEC_STORE_COST): Define.
2007-07-12 Richard Guenther <rguenther@suse.de>
* gimplify.c (gimplify_conversion): Make sure that the result

View File

@ -133,6 +133,7 @@ static void spu_encode_section_info (tree, rtx, int);
static tree spu_builtin_mul_widen_even (tree);
static tree spu_builtin_mul_widen_odd (tree);
static tree spu_builtin_mask_for_load (void);
static int spu_builtin_vectorization_cost (bool);
extern const char *reg_names[];
rtx spu_compare_op0, spu_compare_op1;
@ -261,6 +262,9 @@ const struct attribute_spec spu_attribute_table[];
#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
struct gcc_target targetm = TARGET_INITIALIZER;
void
@ -5191,6 +5195,21 @@ spu_builtin_mask_for_load (void)
return d->fndecl;
}
/* Implement targetm.vectorize.builtin_vectorization_cost. */
static int
spu_builtin_vectorization_cost (bool runtime_test)
{
/* If the branch of the runtime test is taken - i.e. - the vectorized
version is skipped - this incurs a misprediction cost (because the
vectorized version is expected to be the fall-through). So we subtract
the latency of a mispredicted branch from the costs that are incured
when the vectorized version is executed. */
if (runtime_test)
return -19;
else
return 0;
}
void
spu_init_expanders (void)
{

View File

@ -541,6 +541,52 @@ targetm.resolve_overloaded_builtin = spu_resolve_overloaded_builtin; \
#define ASM_OUTPUT_ALIGN(FILE,LOG) \
do { if (LOG!=0) fprintf (FILE, "\t.align\t%d\n", (LOG)); } while (0)
/* Model costs for the vectorizer. */
/* Cost of conditional branch. */
#ifndef TARG_COND_BRANCH_COST
#define TARG_COND_BRANCH_COST 6
#endif
/* Cost of any scalar operation, excluding load and store. */
#ifndef TARG_SCALAR_STMT_COST
#define TARG_SCALAR_STMT_COST 1
#endif
/* Cost of scalar load. */
#undef TARG_SCALAR_LOAD_COST
#define TARG_SCALAR_LOAD_COST 2 /* load + rotate */
/* Cost of scalar store. */
#undef TARG_SCALAR_STORE_COST
#define TARG_SCALAR_STORE_COST 10
/* Cost of any vector operation, excluding load, store,
or vector to scalar operation. */
#undef TARG_VEC_STMT_COST
#define TARG_VEC_STMT_COST 1
/* Cost of vector to scalar operation. */
#undef TARG_VEC_TO_SCALAR_COST
#define TARG_VEC_TO_SCALAR_COST 1
/* Cost of scalar to vector operation. */
#undef TARG_SCALAR_TO_VEC_COST
#define TARG_SCALAR_TO_VEC_COST 1
/* Cost of aligned vector load. */
#undef TARG_VEC_LOAD_COST
#define TARG_VEC_LOAD_COST 1
/* Cost of misaligned vector load. */
#undef TARG_VEC_UNALIGNED_LOAD_COST
#define TARG_VEC_UNALIGNED_LOAD_COST 2
/* Cost of vector store. */
#undef TARG_VEC_STORE_COST
#define TARG_VEC_STORE_COST 1
/* Misc */

View File

@ -356,6 +356,7 @@ Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
default_builtin_vectorized_conversion
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN 0
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD 0
#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST 0
#define TARGET_VECTORIZE \
{ \
@ -363,7 +364,8 @@ Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION, \
TARGET_VECTORIZE_BUILTIN_CONVERSION, \
TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN, \
TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD \
TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD, \
TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
}
#define TARGET_DEFAULT_TARGET_FLAGS 0

View File

@ -413,6 +413,10 @@ struct gcc_target
element-by-element products for the odd elements. */
tree (* builtin_mul_widen_even) (tree);
tree (* builtin_mul_widen_odd) (tree);
/* Returns the cost to be added to the overheads involved with
executing the vectorized version of a loop. */
int (*builtin_vectorization_cost) (bool);
} vectorize;
/* The initial value of target_flags. */

View File

@ -1,3 +1,27 @@
2007-07-12 Dorit Nuzman <dorit@il.ibm.com>
* gcc.dg/vect/costmodel/ppc/costmodel-vect-reduc-1char.c: Loops now
get vectorized.
* gcc.dg/vect/costmodel/i386/costmodel-vect-reduc-1char.c: Loops
now get vectorized.
* gcc.dg/vect/costmodel/spu/spu-costmodel-vect.exp: New.
* gcc.dg/vect/costmodel/spu/costmodel-fast-math-vect-pr29925.c: New.
* gcc.dg/vect/costmodel/spu/costmodel-vect-31a.c: New.
* gcc.dg/vect/costmodel/spu/costmodel-vect-31b.c: New.
* gcc.dg/vect/costmodel/spu/costmodel-vect-31c.c: New.
* gcc.dg/vect/costmodel/spu/costmodel-vect-31d.c: New.
* gcc.dg/vect/costmodel/spu/costmodel-vect-iv-9.c: New.
* gcc.dg/vect/costmodel/spu/costmodel-vect-33.c: New.
* gcc.dg/vect/costmodel/spu/costmodel-vect-76a.c: New.
* gcc.dg/vect/costmodel/spu/costmodel-vect-76b.c: New.
* gcc.dg/vect/costmodel/spu/costmodel-vect-76c.c: New.
* gcc.dg/vect/costmodel/spu/costmodel-vect-68a.c: New.
* gcc.dg/vect/costmodel/spu/costmodel-vect-68b.c: New.
* gcc.dg/vect/costmodel/spu/costmodel-vect-68c.c: New.
* gcc.dg/vect/costmodel/spu/costmodel-vect-68d.c: New.
* lib/target-supports.exp (check_effective_target_vect_int_mul):
Add spu.
2007-07-12 Jakub Jelinek <jakub@redhat.com>
PR c++/30854

View File

@ -46,6 +46,6 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_int_max } } } */
/* { dg-final { scan-tree-dump-times "vectorization not profitable" 2 "vect" { xfail vect_no_int_max } } } */
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail vect_no_int_max } } } */
/* { dg-final { scan-tree-dump-times "vectorization not profitable" 0 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View File

@ -46,6 +46,6 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_int_max } } } */
/* { dg-final { scan-tree-dump-times "vectorization not profitable" 2 "vect" { xfail vect_no_int_max } } } */
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail vect_no_int_max } } } */
/* { dg-final { scan-tree-dump-times "vectorization not profitable" 0 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View File

@ -0,0 +1,39 @@
/* { dg-require-effective-target vect_float } */
#include <stdlib.h>
#include "../../tree-vect.h"
void interp_pitch(float *exc, float *interp, int pitch, int len)
{
int i,k;
int maxj;
maxj=3;
for (i=0;i<len;i++)
{
float tmp = 0;
for (k=0;k<7;k++)
{
tmp += exc[i-pitch+k+maxj-6];
}
interp[i] = tmp;
}
}
int main()
{
float *exc = calloc(126,sizeof(float));
float *interp = calloc(80,sizeof(float));
int pitch = -35;
check_vect ();
interp_pitch(exc, interp, pitch, 80);
free(exc);
free(interp);
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorization not profitable" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View File

@ -0,0 +1,51 @@
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "../../tree-vect.h"
#define N 32
struct t{
int k[N];
int l;
};
struct s{
char a; /* aligned */
char b[N-1]; /* unaligned (offset 1B) */
char c[N]; /* aligned (offset NB) */
struct t d; /* aligned (offset 2NB) */
struct t e; /* unaligned (offset 2N+4N+4 B) */
};
int main1 ()
{
int i;
struct s tmp;
/* unaligned */
for (i = 0; i < N/2; i++)
{
tmp.b[i] = 5;
}
/* check results: */
for (i = 0; i <N/2; i++)
{
if (tmp.b[i] != 5)
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorization not profitable" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View File

@ -0,0 +1,50 @@
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "../../tree-vect.h"
#define N 32
struct t{
int k[N];
int l;
};
struct s{
char a; /* aligned */
char b[N-1]; /* unaligned (offset 1B) */
char c[N]; /* aligned (offset NB) */
struct t d; /* aligned (offset 2NB) */
struct t e; /* unaligned (offset 2N+4N+4 B) */
};
int main1 ()
{
int i;
struct s tmp;
/* aligned */
for (i = 0; i < N/2; i++)
{
tmp.c[i] = 6;
}
/* check results: */
for (i = 0; i <N/2; i++)
{
if (tmp.c[i] != 6)
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View File

@ -0,0 +1,50 @@
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "../../tree-vect.h"
#define N 32
struct t{
int k[N];
int l;
};
struct s{
char a; /* aligned */
char b[N-1]; /* unaligned (offset 1B) */
char c[N]; /* aligned (offset NB) */
struct t d; /* aligned (offset 2NB) */
struct t e; /* unaligned (offset 2N+4N+4 B) */
};
int main1 ()
{
int i;
struct s tmp;
/* aligned */
for (i = 0; i < N/2; i++)
{
tmp.d.k[i] = 7;
}
/* check results: */
for (i = 0; i <N/2; i++)
{
if (tmp.d.k[i] != 7)
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View File

@ -0,0 +1,51 @@
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "../../tree-vect.h"
#define N 32
struct t{
int k[N];
int l;
};
struct s{
char a; /* aligned */
char b[N-1]; /* unaligned (offset 1B) */
char c[N]; /* aligned (offset NB) */
struct t d; /* aligned (offset 2NB) */
struct t e; /* unaligned (offset 2N+4N+4 B) */
};
int main1 ()
{
int i;
struct s tmp;
/* unaligned */
for (i = 0; i < N/2; i++)
{
tmp.e.k[i] = 8;
}
/* check results: */
for (i = 0; i <N/2; i++)
{
if (tmp.e.k[i] != 8)
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorization not profitable" 0 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View File

@ -0,0 +1,40 @@
/* { dg-do compile } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "../../tree-vect.h"
#define N 16
struct test {
char ca[N];
};
extern struct test s;
int main1 ()
{
int i;
for (i = 0; i < N; i++)
{
s.ca[i] = 5;
}
/* check results: */
for (i = 0; i < N; i++)
{
if (s.ca[i] != 5)
abort ();
}
return 0;
}
int main (void)
{
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorization not profitable" 0 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View File

@ -0,0 +1,49 @@
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "../../tree-vect.h"
#define N 32
struct s{
int m;
int n[N][N][N];
};
struct test1{
struct s a; /* array a.n is unaligned */
int b;
int c;
struct s e; /* array e.n is aligned */
};
int main1 ()
{
int i,j;
struct test1 tmp1;
/* 1. unaligned */
for (i = 0; i < N; i++)
{
tmp1.a.n[1][2][i] = 5;
}
/* check results: */
for (i = 0; i <N; i++)
{
if (tmp1.a.n[1][2][i] != 5)
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View File

@ -0,0 +1,49 @@
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "../../tree-vect.h"
#define N 32
struct s{
int m;
int n[N][N][N];
};
struct test1{
struct s a; /* array a.n is unaligned */
int b;
int c;
struct s e; /* array e.n is aligned */
};
int main1 ()
{
int i,j;
struct test1 tmp1;
/* 2. aligned */
for (i = 3; i < N-1; i++)
{
tmp1.a.n[1][2][i] = 6;
}
/* check results: */
for (i = 3; i < N-1; i++)
{
if (tmp1.a.n[1][2][i] != 6)
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View File

@ -0,0 +1,49 @@
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "../../tree-vect.h"
#define N 32
struct s{
int m;
int n[N][N][N];
};
struct test1{
struct s a; /* array a.n is unaligned */
int b;
int c;
struct s e; /* array e.n is aligned */
};
int main1 ()
{
int i,j;
struct test1 tmp1;
/* 3. aligned */
for (i = 0; i < N; i++)
{
tmp1.e.n[1][2][i] = 7;
}
/* check results: */
for (i = 0; i < N; i++)
{
if (tmp1.e.n[1][2][i] != 7)
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View File

@ -0,0 +1,50 @@
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "../../tree-vect.h"
#define N 20
struct s{
int m;
int n[N][N][N];
};
struct test1{
struct s a; /* array a.n is unaligned */
int b;
int c;
struct s e; /* array e.n is aligned */
};
int main1 ()
{
int i,j;
struct test1 tmp1;
/* 4. unaligned */
for (i = 3; i < N-3; i++)
{
tmp1.e.n[1][2][i] = 8;
}
/* check results: */
for (i = 3; i <N-3; i++)
{
if (tmp1.e.n[1][2][i] != 8)
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorization not profitable" 0 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View File

@ -0,0 +1,47 @@
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "../../tree-vect.h"
#define N 8
#define OFF 4
/* Check handling of accesses for which the "initial condition" -
the expression that represents the first location accessed - is
more involved than just an ssa_name. */
int ib[N+OFF] __attribute__ ((__aligned__(16))) = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10};
int main1 (int *pib)
{
int i;
int ia[N+OFF];
int ic[N+OFF] = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10};
for (i = OFF; i < N; i++)
{
ia[i] = pib[i - OFF];
}
/* check results: */
for (i = OFF; i < N; i++)
{
if (ia[i] != pib[i - OFF])
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
main1 (&ib[OFF]);
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View File

@ -0,0 +1,47 @@
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "../../tree-vect.h"
#define N 8
#define OFF 4
/* Check handling of accesses for which the "initial condition" -
the expression that represents the first location accessed - is
more involved than just an ssa_name. */
int ib[N+OFF] __attribute__ ((__aligned__(16))) = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10};
int main1 (int *pib)
{
int i;
int ia[N+OFF];
int ic[N+OFF] = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10};
for (i = OFF; i < N; i++)
{
pib[i - OFF] = ic[i];
}
/* check results: */
for (i = OFF; i < N; i++)
{
if (pib[i - OFF] != ic[i])
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
main1 (&ib[OFF]);
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorization not profitable" 0 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View File

@ -0,0 +1,47 @@
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "../../tree-vect.h"
#define N 8
#define OFF 4
/* Check handling of accesses for which the "initial condition" -
the expression that represents the first location accessed - is
more involved than just an ssa_name. */
int ib[N+OFF] __attribute__ ((__aligned__(16))) = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10};
int main1 (int *pib)
{
int i;
int ia[N+OFF];
int ic[N+OFF] = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10};
for (i = OFF; i < N; i++)
{
ia[i] = ic[i - OFF];
}
/* check results: */
for (i = OFF; i < N; i++)
{
if (ia[i] != ic[i - OFF])
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
main1 (&ib[OFF]);
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View File

@ -0,0 +1,38 @@
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "../../tree-vect.h"
#define N 26
int a[N];
int main1 (int X)
{
int s = X;
int i;
/* vectorization of reduction with induction. */
for (i = 0; i < N; i++)
s += (i + a[i]);
return s;
}
int main (void)
{
int s, i;
check_vect ();
for (i = 0; i < N; i++)
a[i] = 2*i;
s = main1 (3);
if (s != 978)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_int_mult } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target {! vect_int_mult } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View File

@ -0,0 +1,69 @@
# Copyright (C) 1997, 2004, 2005, 2006 Free Software Foundation, Inc.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
# GCC testsuite that uses the `dg.exp' driver.
# Load support procs.
load_lib gcc-dg.exp
# Exit immediately if this isn't a powerpc target.
if { ![istarget spu*-*-*] } then {
return
}
# Set up flags used for tests that don't specify options.
set DEFAULT_VECTCFLAGS ""
# These flags are used for all targets.
lappend DEFAULT_VECTCFLAGS "-O2" "-ftree-vectorize" "-fvect-cost-model"
# If the target system supports vector instructions, the default action
# for a test is 'run', otherwise it's 'compile'. Save current default.
# Executing vector instructions on a system without hardware vector support
# is also disabled by a call to check_vect, but disabling execution here is
# more efficient.
global dg-do-what-default
set save-dg-do-what-default ${dg-do-what-default}
set dg-do-what-default run
# Initialize `dg'.
dg-init
lappend DEFAULT_VECTCFLAGS "-fdump-tree-vect-details"
# Main loop.
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/costmodel-pr*.\[cS\]]] \
"" $DEFAULT_VECTCFLAGS
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/costmodel-vect-*.\[cS\]]] \
"" $DEFAULT_VECTCFLAGS
#### Tests with special options
global SAVED_DEFAULT_VECTCFLAGS
set SAVED_DEFAULT_VECTCFLAGS $DEFAULT_VECTCFLAGS
# -ffast-math tests
set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
lappend DEFAULT_VECTCFLAGS "-ffast-math"
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/costmodel-fast-math-vect*.\[cS\]]] \
"" $DEFAULT_VECTCFLAGS
# Clean up.
set dg-do-what-default ${save-dg-do-what-default}
# All done.
dg-finish

View File

@ -2039,6 +2039,7 @@ proc check_effective_target_vect_int_mult { } {
} else {
set et_vect_int_mult_saved 0
if { [istarget powerpc*-*-*]
|| [istarget spu-*-*]
|| [istarget i?86-*-*]
|| [istarget x86_64-*-*] } {
set et_vect_int_mult_saved 1

View File

@ -74,6 +74,34 @@ static void vect_update_inits_of_drs (loop_vec_info, tree);
static int vect_min_worthwhile_factor (enum tree_code);
static int
cost_for_stmt (tree stmt)
{
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
switch (STMT_VINFO_TYPE (stmt_info))
{
case load_vec_info_type:
return TARG_SCALAR_LOAD_COST;
case store_vec_info_type:
return TARG_SCALAR_STORE_COST;
case op_vec_info_type:
case condition_vec_info_type:
case assignment_vec_info_type:
case reduc_vec_info_type:
case induc_vec_info_type:
case type_promotion_vec_info_type:
case type_demotion_vec_info_type:
case type_conversion_vec_info_type:
case call_vec_info_type:
return TARG_SCALAR_STMT_COST;
case undef_vec_info_type:
default:
gcc_unreachable ();
}
}
/* Function vect_estimate_min_profitable_iters
Return the number of iterations required for the vector version of the
@ -138,7 +166,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
if (!STMT_VINFO_RELEVANT_P (stmt_info)
&& !STMT_VINFO_LIVE_P (stmt_info))
continue;
scalar_single_iter_cost++;
scalar_single_iter_cost += cost_for_stmt (stmt);
vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info);
vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
}
@ -148,7 +176,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
loop.
FORNOW: If we dont know the value of peel_iters for prologue or epilogue
at compile-time - we assume the worst.
at compile-time - we assume it's (vf-1)/2 (the worst would be vf-1).
TODO: Build an expression that represents peel_iters for prologue and
epilogue to be used in a run-time test. */
@ -157,17 +185,17 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
if (byte_misalign < 0)
{
peel_iters_prologue = vf - 1;
peel_iters_prologue = (vf - 1)/2;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "cost model: "
"prologue peel iters set conservatively.");
"prologue peel iters set to (vf-1)/2.");
/* If peeling for alignment is unknown, loop bound of main loop becomes
unknown. */
peel_iters_epilogue = vf - 1;
peel_iters_epilogue = (vf - 1)/2;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "cost model: "
"epilogue peel iters set conservatively because "
"epilogue peel iters set to (vf-1)/2 because "
"peeling for alignment is unknown .");
}
else
@ -186,10 +214,10 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
{
peel_iters_epilogue = vf - 1;
peel_iters_epilogue = (vf - 1)/2;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "cost model: "
"epilogue peel iters set conservatively because "
"epilogue peel iters set to (vf-1)/2 because "
"loop iterations are unknown .");
}
else
@ -229,6 +257,26 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
+ (peel_iters_epilogue * scalar_single_iter_cost);
/* Allow targets add additional (outside-of-loop) costs. FORNOW, the only
information we provide for the target is whether testing against the
threshold involves a runtime test. */
if (targetm.vectorize.builtin_vectorization_cost)
{
bool runtime_test = false;
/* If the number of iterations is unknown, or the
peeling-for-misalignment amount is unknown, we eill have to generate
a runtime test to test the loop count agains the threshold. */
if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
|| (byte_misalign < 0))
runtime_test = true;
vec_outside_cost +=
targetm.vectorize.builtin_vectorization_cost (runtime_test);
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "cost model : Adding target out-of-loop cost = %d",
targetm.vectorize.builtin_vectorization_cost (runtime_test));
}
/* Calculate number of iterations required to make the vector version
profitable, relative to the loop bodies only. The following condition
must hold true: ((SIC*VF)-VIC)*niters > VOC*VF, where
@ -280,7 +328,14 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
min_profitable_iters < vf ? vf : min_profitable_iters);
}
return min_profitable_iters < vf ? vf : min_profitable_iters;
min_profitable_iters =
min_profitable_iters < vf ? vf : min_profitable_iters;
/* Because the condition we create is:
if (niters <= min_profitable_iters)
then skip the vectorized loop. */
min_profitable_iters--;
return min_profitable_iters;
}
@ -321,7 +376,7 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
/* Add in cost for initial definition. */
outer_cost += TARG_VEC_STMT_COST;
outer_cost += TARG_SCALAR_TO_VEC_COST;
/* Determine cost of epilogue code.
@ -341,11 +396,13 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
optab = optab_for_tree_code (code, vectype);
/* We have a whole vector shift available. */
if (!VECTOR_MODE_P (mode)
|| optab->handlers[mode].insn_code == CODE_FOR_nothing)
if (VECTOR_MODE_P (mode)
&& optab->handlers[mode].insn_code != CODE_FOR_nothing
&& vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
/* Final reduction via vector shifts and the reduction operator. Also
requires scalar extract. */
outer_cost += ((exact_log2(nelements) * 2 + 1) * TARG_VEC_STMT_COST);
outer_cost += ((exact_log2(nelements) * 2) * TARG_VEC_STMT_COST
+ TARG_VEC_TO_SCALAR_COST);
else
/* Use extracts and reduction op for final reduction. For N elements,
we have N extracts and N-1 reduction ops. */

View File

@ -326,6 +326,21 @@ typedef struct _stmt_vec_info {
#define TARG_COND_BRANCH_COST 3
#endif
/* Cost of any scalar operation, excluding load and store. */
#ifndef TARG_SCALAR_STMT_COST
#define TARG_SCALAR_STMT_COST 1
#endif
/* Cost of scalar load. */
#ifndef TARG_SCALAR_LOAD_COST
#define TARG_SCALAR_LOAD_COST 1
#endif
/* Cost of scalar store. */
#ifndef TARG_SCALAR_STORE_COST
#define TARG_SCALAR_STORE_COST 1
#endif
/* Cost of any vector operation, excluding load, store or vector to scalar
operation. */
#ifndef TARG_VEC_STMT_COST