re PR tree-optimization/49038 (-ftree-vectorise introduces reads past end of array)
PR tree-optimization/49038 * tree-vect-loop-manip.c (vect_generate_tmps_on_preheader): Ensure at least one epilogue iteration if required by data accesses with gaps. * tree-vectorizer.h (struct _loop_vec_info): Add new field to mark loops that require peeling for gaps. * tree-vect-loop.c (new_loop_vec_info): Initialize new field. (vect_get_known_peeling_cost): Take peeling for gaps into account. (vect_transform_loop): Generate epilogue if required by data access with gaps. * tree-vect-data-refs.c (vect_analyze_group_access): Mark the loop as requiring an epilogue if there are gaps in the end of the strided group. From-SVN: r174265
This commit is contained in:
parent
688571c7d5
commit
48df3fa66c
|
@ -1,3 +1,20 @@
|
|||
2011-05-26 Ira Rosen <ira.rosen@linaro.org>
|
||||
|
||||
PR tree-optimization/49038
|
||||
* tree-vect-loop-manip.c (vect_generate_tmps_on_preheader):
|
||||
Ensure at least one epilogue iteration if required by data
|
||||
accesses with gaps.
|
||||
* tree-vectorizer.h (struct _loop_vec_info): Add new field
|
||||
to mark loops that require peeling for gaps.
|
||||
* tree-vect-loop.c (new_loop_vec_info): Initialize new field.
|
||||
(vect_get_known_peeling_cost): Take peeling for gaps into
|
||||
account.
|
||||
(vect_transform_loop): Generate epilogue if required by data
|
||||
access with gaps.
|
||||
* tree-vect-data-refs.c (vect_analyze_group_access): Mark the
|
||||
loop as requiring an epilogue if there are gaps in the end of
|
||||
the strided group.
|
||||
|
||||
2011-05-25 Ian Lance Taylor <iant@google.com>
|
||||
|
||||
* godump.c (go_format_type): Output the first field with a usable
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
2011-05-26 Ira Rosen <ira.rosen@linaro.org>
|
||||
|
||||
PR tree-optimization/49038
|
||||
* gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c: New test.
|
||||
* gcc.dg/vect/pr49038.c: New test.
|
||||
|
||||
2011-05-26 Fabien Chêne <fabien@gcc.gnu.org>
|
||||
* g++.dg/init/pr25811-3.C: New.
|
||||
* g++.dg/init/pr25811-4.C: New.
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
#include <sys/mman.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#define COUNT 320
|
||||
#define MMAP_SIZE 0x10000
|
||||
#define ADDRESS 0x1122000000
|
||||
#define TYPE unsigned short
|
||||
|
||||
void __attribute__((noinline))
|
||||
foo (TYPE *__restrict a, TYPE *__restrict b)
|
||||
{
|
||||
int n;
|
||||
|
||||
for (n = 0; n < COUNT; n++)
|
||||
a[n] = b[n * 2];
|
||||
}
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
void *x;
|
||||
size_t b_offset;
|
||||
|
||||
x = mmap ((void *) ADDRESS, MMAP_SIZE, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
if (x == MAP_FAILED)
|
||||
{
|
||||
perror ("mmap");
|
||||
return 1;
|
||||
}
|
||||
|
||||
b_offset = MMAP_SIZE - (2 * COUNT - 1) * sizeof (TYPE);
|
||||
foo ((unsigned short *) x,
|
||||
(unsigned short *) ((char *) x + b_offset));
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
|
@ -0,0 +1,103 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 160
|
||||
|
||||
typedef struct {
|
||||
unsigned char a;
|
||||
unsigned char b;
|
||||
unsigned char c;
|
||||
unsigned char d;
|
||||
unsigned char e;
|
||||
unsigned char f;
|
||||
unsigned char g;
|
||||
unsigned char h;
|
||||
} s;
|
||||
|
||||
__attribute__ ((noinline)) int
|
||||
main1 (s *arr, int n)
|
||||
{
|
||||
int i;
|
||||
s *ptr = arr;
|
||||
s res[N];
|
||||
unsigned char x;
|
||||
|
||||
/* Check peeling for gaps for unknown loop bound. */
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
res[i].c = ptr->b + ptr->c;
|
||||
x = ptr->c + ptr->f;
|
||||
res[i].a = x + ptr->b;
|
||||
res[i].d = ptr->b + ptr->c;
|
||||
res[i].b = ptr->c;
|
||||
res[i].f = ptr->f + ptr->e;
|
||||
res[i].e = ptr->b + ptr->e;
|
||||
res[i].h = ptr->c;
|
||||
res[i].g = ptr->b + ptr->c;
|
||||
ptr++;
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
if (res[i].c != arr[i].b + arr[i].c
|
||||
|| res[i].a != arr[i].c + arr[i].f + arr[i].b
|
||||
|| res[i].d != arr[i].b + arr[i].c
|
||||
|| res[i].b != arr[i].c
|
||||
|| res[i].f != arr[i].f + arr[i].e
|
||||
|| res[i].e != arr[i].b + arr[i].e
|
||||
|| res[i].h != arr[i].c
|
||||
|| res[i].g != arr[i].b + arr[i].c)
|
||||
abort ();
|
||||
}
|
||||
|
||||
/* Check also that we don't do more iterations than needed. */
|
||||
for (i = n; i < N; i++)
|
||||
{
|
||||
if (res[i].c == arr[i].b + arr[i].c
|
||||
|| res[i].a == arr[i].c + arr[i].f + arr[i].b
|
||||
|| res[i].d == arr[i].b + arr[i].c
|
||||
|| res[i].b == arr[i].c
|
||||
|| res[i].f == arr[i].f + arr[i].e
|
||||
|| res[i].e == arr[i].b + arr[i].e
|
||||
|| res[i].h == arr[i].c
|
||||
|| res[i].g == arr[i].b + arr[i].c)
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i;
|
||||
s arr[N];
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
arr[i].a = 5;
|
||||
arr[i].b = 6;
|
||||
arr[i].c = 17;
|
||||
arr[i].d = 3;
|
||||
arr[i].e = 16;
|
||||
arr[i].f = 16;
|
||||
arr[i].g = 3;
|
||||
arr[i].h = 56;
|
||||
if (arr[i].a == 178)
|
||||
abort();
|
||||
}
|
||||
|
||||
main1 (arr, N-2);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
|
@ -2043,7 +2043,7 @@ vect_analyze_group_access (struct data_reference *dr)
|
|||
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
|
||||
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
|
||||
HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
|
||||
HOST_WIDE_INT stride;
|
||||
HOST_WIDE_INT stride, last_accessed_element = 1;
|
||||
bool slp_impossible = false;
|
||||
|
||||
/* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
|
||||
|
@ -2072,6 +2072,16 @@ vect_analyze_group_access (struct data_reference *dr)
|
|||
fprintf (vect_dump, " step ");
|
||||
print_generic_expr (vect_dump, step, TDF_SLIM);
|
||||
}
|
||||
|
||||
if (loop_vinfo)
|
||||
{
|
||||
LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
|
||||
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "Data access with gaps requires scalar "
|
||||
"epilogue loop");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -2137,6 +2147,7 @@ vect_analyze_group_access (struct data_reference *dr)
|
|||
next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
|
||||
continue;
|
||||
}
|
||||
|
||||
prev = next;
|
||||
|
||||
/* Check that all the accesses have the same STEP. */
|
||||
|
@ -2167,6 +2178,8 @@ vect_analyze_group_access (struct data_reference *dr)
|
|||
gaps += diff - 1;
|
||||
}
|
||||
|
||||
last_accessed_element += diff;
|
||||
|
||||
/* Store the gap from the previous member of the group. If there is no
|
||||
gap in the access, GROUP_GAP is always 1. */
|
||||
GROUP_GAP (vinfo_for_stmt (next)) = diff;
|
||||
|
@ -2245,6 +2258,15 @@ vect_analyze_group_access (struct data_reference *dr)
|
|||
VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
|
||||
stmt);
|
||||
}
|
||||
|
||||
/* There is a gap in the end of the group. */
|
||||
if (stride - last_accessed_element > 0 && loop_vinfo)
|
||||
{
|
||||
LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "Data access with gaps requires scalar "
|
||||
"epilogue loop");
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
|
@ -1551,7 +1551,7 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
|
|||
edge pe;
|
||||
basic_block new_bb;
|
||||
gimple_seq stmts;
|
||||
tree ni_name;
|
||||
tree ni_name, ni_minus_gap_name;
|
||||
tree var;
|
||||
tree ratio_name;
|
||||
tree ratio_mult_vf_name;
|
||||
|
@ -1568,9 +1568,39 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
|
|||
ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list);
|
||||
log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
|
||||
|
||||
/* If epilogue loop is required because of data accesses with gaps, we
|
||||
subtract one iteration from the total number of iterations here for
|
||||
correct calculation of RATIO. */
|
||||
if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
|
||||
{
|
||||
ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
|
||||
ni_name,
|
||||
build_one_cst (TREE_TYPE (ni_name)));
|
||||
if (!is_gimple_val (ni_minus_gap_name))
|
||||
{
|
||||
var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
|
||||
add_referenced_var (var);
|
||||
|
||||
stmts = NULL;
|
||||
ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
|
||||
true, var);
|
||||
if (cond_expr_stmt_list)
|
||||
gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
|
||||
else
|
||||
{
|
||||
pe = loop_preheader_edge (loop);
|
||||
new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
|
||||
gcc_assert (!new_bb);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
ni_minus_gap_name = ni_name;
|
||||
|
||||
/* Create: ratio = ni >> log2(vf) */
|
||||
|
||||
ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
|
||||
ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_minus_gap_name),
|
||||
ni_minus_gap_name, log_vf);
|
||||
if (!is_gimple_val (ratio_name))
|
||||
{
|
||||
var = create_tmp_var (TREE_TYPE (ni), "bnd");
|
||||
|
|
|
@ -761,6 +761,7 @@ new_loop_vec_info (struct loop *loop)
|
|||
LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10);
|
||||
LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
|
||||
LOOP_VINFO_PEELING_HTAB (res) = NULL;
|
||||
LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
@ -2333,6 +2334,10 @@ vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
|
|||
peel_iters_prologue = niters < peel_iters_prologue ?
|
||||
niters : peel_iters_prologue;
|
||||
*peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
|
||||
/* If we need to peel for gaps, but no peeling is required, we have to
|
||||
peel VF iterations. */
|
||||
if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
|
||||
*peel_iters_epilogue = vf;
|
||||
}
|
||||
|
||||
return (peel_iters_prologue * scalar_single_iter_cost)
|
||||
|
@ -4987,7 +4992,8 @@ vect_transform_loop (loop_vec_info loop_vinfo)
|
|||
do_peeling_for_loop_bound
|
||||
= (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
|
||||
|| (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
|
||||
&& LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0));
|
||||
&& LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
|
||||
|| LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
|
||||
|
||||
if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
|
||||
|| LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
|
||||
|
|
|
@ -255,6 +255,11 @@ typedef struct _loop_vec_info {
|
|||
/* Hash table used to choose the best peeling option. */
|
||||
htab_t peeling_htab;
|
||||
|
||||
/* When we have strided data accesses with gaps, we may introduce invalid
|
||||
memory accesses. We peel the last iteration of the loop to prevent
|
||||
this. */
|
||||
bool peeling_for_gaps;
|
||||
|
||||
} *loop_vec_info;
|
||||
|
||||
/* Access Functions. */
|
||||
|
@ -283,6 +288,7 @@ typedef struct _loop_vec_info {
|
|||
#define LOOP_VINFO_REDUCTIONS(L) (L)->reductions
|
||||
#define LOOP_VINFO_REDUCTION_CHAINS(L) (L)->reduction_chains
|
||||
#define LOOP_VINFO_PEELING_HTAB(L) (L)->peeling_htab
|
||||
#define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps
|
||||
|
||||
#define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \
|
||||
VEC_length (gimple, (L)->may_misalign_stmts) > 0
|
||||
|
|
Loading…
Reference in New Issue