re PR tree-optimization/49038 (-ftree-vectorise introduces reads past end of array)

PR tree-optimization/49038
        * tree-vect-loop-manip.c (vect_generate_tmps_on_preheader):
        Ensure at least one epilogue iteration if required by data
        accesses with gaps.
        * tree-vectorizer.h (struct _loop_vec_info): Add new field 
        to mark loops that require peeling for gaps.
        * tree-vect-loop.c (new_loop_vec_info): Initialize new field.
        (vect_get_known_peeling_cost): Take peeling for gaps into 
        account.
        (vect_transform_loop): Generate epilogue if required by data
        access with gaps.
        * tree-vect-data-refs.c (vect_analyze_group_access): Mark the
        loop as requiring an epilogue if there are gaps in the end of
        the strided group.

From-SVN: r174265
This commit is contained in:
Ira Rosen 2011-05-26 07:50:09 +00:00 committed by Ira Rosen
parent 688571c7d5
commit 48df3fa66c
8 changed files with 232 additions and 4 deletions

View File

@ -1,3 +1,20 @@
2011-05-26 Ira Rosen <ira.rosen@linaro.org>
PR tree-optimization/49038
* tree-vect-loop-manip.c (vect_generate_tmps_on_preheader):
Ensure at least one epilogue iteration if required by data
accesses with gaps.
* tree-vectorizer.h (struct _loop_vec_info): Add new field
to mark loops that require peeling for gaps.
* tree-vect-loop.c (new_loop_vec_info): Initialize new field.
(vect_get_known_peeling_cost): Take peeling for gaps into
account.
(vect_transform_loop): Generate epilogue if required by data
access with gaps.
* tree-vect-data-refs.c (vect_analyze_group_access): Mark the
loop as requiring an epilogue if there are gaps in the end of
the strided group.
2011-05-25 Ian Lance Taylor <iant@google.com>
* godump.c (go_format_type): Output the first field with a usable

View File

@ -1,3 +1,9 @@
2011-05-26 Ira Rosen <ira.rosen@linaro.org>
PR tree-optimization/49038
* gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c: New test.
* gcc.dg/vect/pr49038.c: New test.
2011-05-26 Fabien Chêne <fabien@gcc.gnu.org>
* g++.dg/init/pr25811-3.C: New.
* g++.dg/init/pr25811-4.C: New.

View File

@ -0,0 +1,38 @@
#include <sys/mman.h>
#include <stdio.h>
#define COUNT 320
#define MMAP_SIZE 0x10000
#define ADDRESS 0x1122000000
#define TYPE unsigned short
void __attribute__((noinline))
foo (TYPE *__restrict a, TYPE *__restrict b)
{
int n;
for (n = 0; n < COUNT; n++)
a[n] = b[n * 2];
}
int
main (void)
{
void *x;
size_t b_offset;
x = mmap ((void *) ADDRESS, MMAP_SIZE, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (x == MAP_FAILED)
{
perror ("mmap");
return 1;
}
b_offset = MMAP_SIZE - (2 * COUNT - 1) * sizeof (TYPE);
foo ((unsigned short *) x,
(unsigned short *) ((char *) x + b_offset));
return 0;
}
/* { dg-final { cleanup-tree-dump "vect" } } */

View File

@ -0,0 +1,103 @@
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include <stdio.h>
#include "tree-vect.h"
#define N 160
typedef struct {
unsigned char a;
unsigned char b;
unsigned char c;
unsigned char d;
unsigned char e;
unsigned char f;
unsigned char g;
unsigned char h;
} s;
__attribute__ ((noinline)) int
main1 (s *arr, int n)
{
int i;
s *ptr = arr;
s res[N];
unsigned char x;
/* Check peeling for gaps for unknown loop bound. */
for (i = 0; i < n; i++)
{
res[i].c = ptr->b + ptr->c;
x = ptr->c + ptr->f;
res[i].a = x + ptr->b;
res[i].d = ptr->b + ptr->c;
res[i].b = ptr->c;
res[i].f = ptr->f + ptr->e;
res[i].e = ptr->b + ptr->e;
res[i].h = ptr->c;
res[i].g = ptr->b + ptr->c;
ptr++;
}
/* check results: */
for (i = 0; i < n; i++)
{
if (res[i].c != arr[i].b + arr[i].c
|| res[i].a != arr[i].c + arr[i].f + arr[i].b
|| res[i].d != arr[i].b + arr[i].c
|| res[i].b != arr[i].c
|| res[i].f != arr[i].f + arr[i].e
|| res[i].e != arr[i].b + arr[i].e
|| res[i].h != arr[i].c
|| res[i].g != arr[i].b + arr[i].c)
abort ();
}
/* Check also that we don't do more iterations than needed. */
for (i = n; i < N; i++)
{
if (res[i].c == arr[i].b + arr[i].c
|| res[i].a == arr[i].c + arr[i].f + arr[i].b
|| res[i].d == arr[i].b + arr[i].c
|| res[i].b == arr[i].c
|| res[i].f == arr[i].f + arr[i].e
|| res[i].e == arr[i].b + arr[i].e
|| res[i].h == arr[i].c
|| res[i].g == arr[i].b + arr[i].c)
abort ();
}
return 0;
}
int main (void)
{
int i;
s arr[N];
check_vect ();
for (i = 0; i < N; i++)
{
arr[i].a = 5;
arr[i].b = 6;
arr[i].c = 17;
arr[i].d = 3;
arr[i].e = 16;
arr[i].f = 16;
arr[i].g = 3;
arr[i].h = 56;
if (arr[i].a == 178)
abort();
}
main1 (arr, N-2);
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View File

@ -2043,7 +2043,7 @@ vect_analyze_group_access (struct data_reference *dr)
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
HOST_WIDE_INT stride;
HOST_WIDE_INT stride, last_accessed_element = 1;
bool slp_impossible = false;
/* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
@ -2072,6 +2072,16 @@ vect_analyze_group_access (struct data_reference *dr)
fprintf (vect_dump, " step ");
print_generic_expr (vect_dump, step, TDF_SLIM);
}
if (loop_vinfo)
{
LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "Data access with gaps requires scalar "
"epilogue loop");
}
return true;
}
@ -2137,6 +2147,7 @@ vect_analyze_group_access (struct data_reference *dr)
next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
continue;
}
prev = next;
/* Check that all the accesses have the same STEP. */
@ -2167,6 +2178,8 @@ vect_analyze_group_access (struct data_reference *dr)
gaps += diff - 1;
}
last_accessed_element += diff;
/* Store the gap from the previous member of the group. If there is no
gap in the access, GROUP_GAP is always 1. */
GROUP_GAP (vinfo_for_stmt (next)) = diff;
@ -2245,6 +2258,15 @@ vect_analyze_group_access (struct data_reference *dr)
VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
stmt);
}
/* There is a gap in the end of the group. */
if (stride - last_accessed_element > 0 && loop_vinfo)
{
LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "Data access with gaps requires scalar "
"epilogue loop");
}
}
return true;

View File

@ -1551,7 +1551,7 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
edge pe;
basic_block new_bb;
gimple_seq stmts;
tree ni_name;
tree ni_name, ni_minus_gap_name;
tree var;
tree ratio_name;
tree ratio_mult_vf_name;
@ -1568,9 +1568,39 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list);
log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
/* If epilogue loop is required because of data accesses with gaps, we
subtract one iteration from the total number of iterations here for
correct calculation of RATIO. */
if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
{
ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
ni_name,
build_one_cst (TREE_TYPE (ni_name)));
if (!is_gimple_val (ni_minus_gap_name))
{
var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
add_referenced_var (var);
stmts = NULL;
ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
true, var);
if (cond_expr_stmt_list)
gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
else
{
pe = loop_preheader_edge (loop);
new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
gcc_assert (!new_bb);
}
}
}
else
ni_minus_gap_name = ni_name;
/* Create: ratio = ni >> log2(vf) */
ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_minus_gap_name),
ni_minus_gap_name, log_vf);
if (!is_gimple_val (ratio_name))
{
var = create_tmp_var (TREE_TYPE (ni), "bnd");

View File

@ -761,6 +761,7 @@ new_loop_vec_info (struct loop *loop)
LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10);
LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
LOOP_VINFO_PEELING_HTAB (res) = NULL;
LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
return res;
}
@ -2333,6 +2334,10 @@ vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
peel_iters_prologue = niters < peel_iters_prologue ?
niters : peel_iters_prologue;
*peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
/* If we need to peel for gaps, but no peeling is required, we have to
peel VF iterations. */
if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
*peel_iters_epilogue = vf;
}
return (peel_iters_prologue * scalar_single_iter_cost)
@ -4987,7 +4992,8 @@ vect_transform_loop (loop_vec_info loop_vinfo)
do_peeling_for_loop_bound
= (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
|| (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
&& LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0));
&& LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
|| LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
|| LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))

View File

@ -255,6 +255,11 @@ typedef struct _loop_vec_info {
/* Hash table used to choose the best peeling option. */
htab_t peeling_htab;
/* When we have strided data accesses with gaps, we may introduce invalid
memory accesses. We peel the last iteration of the loop to prevent
this. */
bool peeling_for_gaps;
} *loop_vec_info;
/* Access Functions. */
@ -283,6 +288,7 @@ typedef struct _loop_vec_info {
#define LOOP_VINFO_REDUCTIONS(L) (L)->reductions
#define LOOP_VINFO_REDUCTION_CHAINS(L) (L)->reduction_chains
#define LOOP_VINFO_PEELING_HTAB(L) (L)->peeling_htab
#define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps
#define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \
VEC_length (gimple, (L)->may_misalign_stmts) > 0