From 5b900a4c0ee38a66b04b4ceeecb8b194ddde7e57 Mon Sep 17 00:00:00 2001 From: Dorit Nuzman Date: Thu, 12 Jul 2007 14:42:08 +0000 Subject: [PATCH] re PR target/25413 (wrong alignment or incorrect address computation in vectorized code on Pentium 4 SSE) 2007-07-12 Dorit Nuzman Devang Patel PR tree-optimization/25413 * targhooks.c (default_builtin_vector_alignment_reachable): New. * targhooks.h (default_builtin_vector_alignment_reachable): New. * tree.h (contains_packed_reference): New. * expr.c (contains_packed_reference): New. * tree-vect-analyze.c (vector_alignment_reachable_p): New. (vect_enhance_data_refs_alignment): Call vector_alignment_reachable_p. * target.h (vector_alignment_reachable): New builtin. * target-def.h (TARGET_VECTOR_ALIGNMENT_REACHABLE): New. * config/rs6000/rs6000.c (rs6000_vector_alignment_reachable): New. (TARGET_VECTOR_ALIGNMENT_REACHABLE): Define. Co-Authored-By: Devang Patel From-SVN: r126591 --- gcc/ChangeLog | 16 +++ gcc/config/rs6000/rs6000.c | 35 ++++++ gcc/expr.c | 41 +++++++ gcc/target-def.h | 5 +- gcc/target.h | 4 + gcc/targhooks.c | 16 +++ gcc/targhooks.h | 2 + gcc/testsuite/ChangeLog | 10 ++ gcc/testsuite/gcc.dg/vect/pr25413.c | 37 +++++++ gcc/testsuite/gcc.dg/vect/pr25413a.c | 129 +++++++++++++++++++++++ gcc/testsuite/gcc.dg/vect/pr31699.c | 3 +- gcc/testsuite/gcc.dg/vect/vect-align-1.c | 50 +++++++++ gcc/testsuite/gcc.dg/vect/vect-align-2.c | 46 ++++++++ gcc/tree-vect-analyze.c | 103 +++++++++++++----- gcc/tree.h | 6 ++ 15 files changed, 474 insertions(+), 29 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr25413.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr25413a.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-align-1.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-align-2.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c1264e3cd63..5a6f61c3ae3 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -10,6 +10,22 @@ * varasm.c (assemble_start_function): Use DECL_ALIGN instead of FUNCTION_BOUNDARY. +2007-07-12 Dorit Nuzman + Devang Patel + + PR tree-optimization/25413 + * targhooks.c (default_builtin_vector_alignment_reachable): New. + * targhooks.h (default_builtin_vector_alignment_reachable): New. + * tree.h (contains_packed_reference): New. + * expr.c (contains_packed_reference): New. + * tree-vect-analyze.c (vector_alignment_reachable_p): New. + (vect_enhance_data_refs_alignment): Call + vector_alignment_reachable_p. + * target.h (vector_alignment_reachable): New builtin. + * target-def.h (TARGET_VECTOR_ALIGNMENT_REACHABLE): New. + * config/rs6000/rs6000.c (rs6000_vector_alignment_reachable): New. + (TARGET_VECTOR_ALIGNMENT_REACHABLE): Define. + 2007-07-12 Dorit Nuzman * target.h (builtin_vectorization_cost): Add new target builtin. diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 722a5a6c342..c9c5a19f878 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -717,6 +717,7 @@ static tree rs6000_builtin_mul_widen_odd (tree); static tree rs6000_builtin_conversion (enum tree_code, tree); static void def_builtin (int, const char *, tree, int); +static bool rs6000_vector_alignment_reachable (tree, bool); static void rs6000_init_builtins (void); static rtx rs6000_expand_unop_builtin (enum insn_code, tree, rtx); static rtx rs6000_expand_binop_builtin (enum insn_code, tree, rtx); @@ -984,6 +985,9 @@ static const char alt_reg_names[][8] = #undef TARGET_VECTORIZE_BUILTIN_CONVERSION #define TARGET_VECTORIZE_BUILTIN_CONVERSION rs6000_builtin_conversion +#undef TARGET_VECTOR_ALIGNMENT_REACHABLE +#define TARGET_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable + #undef TARGET_INIT_BUILTINS #define TARGET_INIT_BUILTINS rs6000_init_builtins @@ -1806,6 +1810,37 @@ rs6000_builtin_mul_widen_odd (tree type) } } + +/* Return true iff, data reference of TYPE can reach vector alignment (16) + after applying N number of iterations. This routine does not determine + how may iterations are required to reach desired alignment. */ + +static bool +rs6000_vector_alignment_reachable (tree type ATTRIBUTE_UNUSED, bool is_packed) +{ + if (is_packed) + return false; + + if (TARGET_32BIT) + { + if (rs6000_alignment_flags == MASK_ALIGN_NATURAL) + return true; + + if (rs6000_alignment_flags == MASK_ALIGN_POWER) + return true; + + return false; + } + else + { + if (TARGET_MACHO) + return false; + + /* Assuming that all other types are naturally aligned. CHECKME! */ + return true; + } +} + /* Handle generic options of the form -mfoo=yes/no. NAME is the option name. VALUE is the option value. diff --git a/gcc/expr.c b/gcc/expr.c index 0739b0533bc..cfc6ed18a2b 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -5924,6 +5924,47 @@ get_inner_reference (tree exp, HOST_WIDE_INT *pbitsize, return exp; } +/* Given an expression EXP that may be a COMPONENT_REF or an ARRAY_REF, + look for whether EXP or any nested component-refs within EXP is marked + as PACKED. */ + +bool +contains_packed_reference (tree exp) +{ + bool packed_p = false; + + while (1) + { + switch (TREE_CODE (exp)) + { + case COMPONENT_REF: + { + tree field = TREE_OPERAND (exp, 1); + packed_p = DECL_PACKED (field) + || TYPE_PACKED (TREE_TYPE (field)) + || TYPE_PACKED (TREE_TYPE (exp)); + if (packed_p) + goto done; + } + break; + + case BIT_FIELD_REF: + case ARRAY_REF: + case ARRAY_RANGE_REF: + case REALPART_EXPR: + case IMAGPART_EXPR: + case VIEW_CONVERT_EXPR: + break; + + default: + goto done; + } + exp = TREE_OPERAND (exp, 0); + } + done: + return packed_p; +} + /* Return a tree of sizetype representing the size, in bytes, of the element of EXP, an ARRAY_REF. */ diff --git a/gcc/target-def.h b/gcc/target-def.h index 8942de74f96..b0ae5978b91 100644 --- a/gcc/target-def.h +++ b/gcc/target-def.h @@ -357,6 +357,8 @@ Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN 0 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD 0 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST 0 +#define TARGET_VECTOR_ALIGNMENT_REACHABLE \ + default_builtin_vector_alignment_reachable #define TARGET_VECTORIZE \ { \ @@ -365,7 +367,8 @@ Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. TARGET_VECTORIZE_BUILTIN_CONVERSION, \ TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN, \ TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD, \ - TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ + TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST, \ + TARGET_VECTOR_ALIGNMENT_REACHABLE \ } #define TARGET_DEFAULT_TARGET_FLAGS 0 diff --git a/gcc/target.h b/gcc/target.h index 56c99bf14b2..e19680918d9 100644 --- a/gcc/target.h +++ b/gcc/target.h @@ -417,6 +417,10 @@ struct gcc_target /* Returns the cost to be added to the overheads involved with executing the vectorized version of a loop. */ int (*builtin_vectorization_cost) (bool); + + /* Return true if vector alignment is reachable (by peeling N + interations) for the given type. */ + bool (* vector_alignment_reachable) (tree, bool); } vectorize; /* The initial value of target_flags. */ diff --git a/gcc/targhooks.c b/gcc/targhooks.c index b063e72305d..f875b87e603 100644 --- a/gcc/targhooks.c +++ b/gcc/targhooks.c @@ -653,4 +653,20 @@ tree default_mangle_decl_assembler_name (tree decl ATTRIBUTE_UNUSED, return id; } +bool +default_builtin_vector_alignment_reachable (tree type, bool is_packed) +{ + if (is_packed) + return false; + + /* Assuming that types whose size is > pointer-size are not guaranteed to be + naturally aligned. */ + if (tree_int_cst_compare (TYPE_SIZE (type), bitsize_int (POINTER_SIZE)) > 0) + return false; + + /* Assuming that types whose size is <= pointer-size + are naturally aligned. */ + return true; +} + #include "gt-targhooks.h" diff --git a/gcc/targhooks.h b/gcc/targhooks.h index a326194b4a3..5848f6c3b53 100644 --- a/gcc/targhooks.h +++ b/gcc/targhooks.h @@ -66,6 +66,8 @@ extern tree default_builtin_vectorized_conversion (enum tree_code, tree); extern tree default_builtin_reciprocal (enum built_in_function, bool, bool); +extern bool default_builtin_vector_alignment_reachable (tree, bool); + /* These are here, and not in hooks.[ch], because not all users of hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS. */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 9a1145a2064..1b0e69af2e8 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,13 @@ +2007-07-12 Dorit Nuzman + Devang Patel + + PR tree-optimization/25413 + * gcc.dg/vect/vect-align-1.c: New. + * gcc.dg/vect/vect-align-2.c: New. + * gcc.dg/vect/pr25413.c: New. + * gcc.dg/vect/pr25413a.c: New. + * gcc.dg/vect/pr31699.c: Fix dg-final check. + 2007-07-12 Nathan Froyd * lib/target-support.exp (check_ultrasparc_hw_available): diff --git a/gcc/testsuite/gcc.dg/vect/pr25413.c b/gcc/testsuite/gcc.dg/vect/pr25413.c new file mode 100644 index 00000000000..a171249ff3c --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr25413.c @@ -0,0 +1,37 @@ +/* { dg-require-effective-target vect_double } */ + +#include +#include "tree-vect.h" + +#define N 8 + +struct +{ + char c; + double d[N]; +} a; + +int main1() +{ + int i; + for ( i=0; i +#include "tree-vect.h" + +#define N 8 + +typedef unsigned int size_t; + +extern void *malloc (size_t __size) __attribute__ ((__nothrow__)) __attribute__ ((__malloc__)); + +typedef double num_t; +static const num_t num__infty = ((num_t)1.0)/((num_t)0.0); + +struct oct_tt; +typedef struct oct_tt oct_t; + +typedef unsigned int var_t; +typedef enum { + OCT_EMPTY = 0, + OCT_NORMAL = 1, + OCT_CLOSED = 2 +} oct_state; + +struct oct_tt { + var_t n; + + int ref; + + oct_state state; + struct oct_tt* closed; + + num_t* c; +}; + +void* octfapg_mm_malloc (size_t t); +oct_t* octfapg_alloc (var_t n); +oct_t* octfapg_full_copy (oct_t* m); + +struct mmalloc_tt; +typedef struct mmalloc_tt mmalloc_t; + +struct mmalloc_tt +{ + int id; + + int nb_alloc; + int nb_realloc; + int nb_free; + + size_t rem; + size_t max; + size_t tot; + +}; + +typedef struct +{ + size_t size; + + mmalloc_t* mm; + int id; + + double dummy; + +} mmheader_t; + +void* +octfapg_mm_malloc (size_t t) +{ + char* m = (char*)malloc(t+sizeof(mmheader_t)); + return m+sizeof(mmheader_t); +} + +oct_t* octfapg_empty (var_t n); + +oct_t* +octfapg_empty (const var_t n) +{ + oct_t* m; + /*octfapg_timing_enter("oct_empty",3);*/ + m = ((oct_t*) octfapg_mm_malloc (sizeof(oct_t))); + m->n = n; + m->ref = 1; + m->state = OCT_EMPTY; + m->closed = (oct_t*)((void *)0); + m->c = (num_t*)((void *)0); + /*octfapg_timing_exit("oct_empty",3);*/ + return m; +} + +oct_t* +octfapg_alloc (const var_t n) +{ + size_t nn = (2*(size_t)(n)*((size_t)(n)+1)); + oct_t* m; + m = octfapg_empty(n); + m->c = ((num_t*) octfapg_mm_malloc (sizeof(num_t)*(nn))); + ; + m->state = OCT_NORMAL; + m->closed = (oct_t*)((void *)0); + return m; +} + +oct_t* +octfapg_universe (const var_t n) +{ + oct_t* m; + size_t i, nn = (2*(size_t)(n)*((size_t)(n)+1)); + m = octfapg_alloc(n); + for (i=0;ic+i) = num__infty; + for (i=0;i<2*n;i++) *(m->c+((size_t)(i)+(((size_t)(i)+1)*((size_t)(i)+1))/2)) = (num_t)(0); + m->state = OCT_CLOSED; + return m; +} + +int main (void) +{ + int i; + check_vect (); + + oct_t *p = octfapg_universe(10); + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vector alignment may not be reachable" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr31699.c b/gcc/testsuite/gcc.dg/vect/pr31699.c index 86099924387..1ea8121691b 100644 --- a/gcc/testsuite/gcc.dg/vect/pr31699.c +++ b/gcc/testsuite/gcc.dg/vect/pr31699.c @@ -31,5 +31,6 @@ int main() } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_intfloat_cvt } } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */ +/* { dg-final { scan-tree-dump-times "vector alignment may not be reachable" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 1 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-align-1.c b/gcc/testsuite/gcc.dg/vect/vect-align-1.c new file mode 100644 index 00000000000..5804d5120ff --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-align-1.c @@ -0,0 +1,50 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include +#include "tree-vect.h" + +/* Compile time known misalignment. Cannot use loop peeling to align + the store. */ + +#define N 16 + +struct foo { + char x; + int y[N]; +} __attribute__((packed)); + +int +main1 (struct foo * __restrict__ p) +{ + int i; + int x[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; + + for (i = 0; i < N; i++) + { + p->y[i] = x[i]; + } + + /* check results: */ + for (i = 0; i < N; i++) + { + if (p->y[i] != x[i]) + abort (); + } + return 0; +} + + +int main (void) +{ + int i; + struct foo *p = malloc (2*sizeof (struct foo)); + check_vect (); + + main1 (p); + return 0; +} + +/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-align-2.c b/gcc/testsuite/gcc.dg/vect/vect-align-2.c new file mode 100644 index 00000000000..75fb21436aa --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-align-2.c @@ -0,0 +1,46 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-do run } */ + +#include +#include +#include "tree-vect.h" + +/* Compile time unknown misalignment. Cannot use loop peeling to align + the store. */ + +#define N 17 + +struct foo { + char x0; + int y[N][N]; +} __attribute__ ((packed)); + +struct foo f2; +int z[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; + +void fbar(struct foo *fp) +{ + int i,j; + for (i=0; i