tree-vect-stmts.c (vectorizable_store): Remove strided grouped store restrictions.
2016-06-15 Richard Biener <rguenther@suse.de> * tree-vect-stmts.c (vectorizable_store): Remove strided grouped store restrictions. * gcc.dg/vect/slp-45.c: New testcase. From-SVN: r237474
This commit is contained in:
parent
2a5825f23b
commit
b17dc4d4e4
@ -1,3 +1,8 @@
|
||||
2016-06-15 Richard Biener <rguenther@suse.de>
|
||||
|
||||
* tree-vect-stmts.c (vectorizable_store): Remove strided grouped
|
||||
store restrictions.
|
||||
|
||||
2016-06-15 Richard Biener <rguenther@suse.de>
|
||||
|
||||
* tree-vect-data-refs.c (vect_analyze_data_ref_dependence): Do
|
||||
|
@ -1,3 +1,7 @@
|
||||
2016-06-15 Richard Biener <rguenther@suse.de>
|
||||
|
||||
* gcc.dg/vect/slp-45.c: New testcase.
|
||||
|
||||
2016-06-15 Richard Biener <rguenther@suse.de>
|
||||
|
||||
* gcc.dg/vect/bb-slp-pattern-2.c: Disable loop vectorization.
|
||||
|
78
gcc/testsuite/gcc.dg/vect/slp-45.c
Normal file
78
gcc/testsuite/gcc.dg/vect/slp-45.c
Normal file
@ -0,0 +1,78 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
/* { dg-additional-options "-O3" } */
|
||||
|
||||
#include <string.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define FOO(T,N) \
|
||||
void __attribute__((noinline,noclone)) \
|
||||
foo_ ## T ## _ ## N (T * __restrict__ in_, T * __restrict__ out_, int s) \
|
||||
{ \
|
||||
T *in = __builtin_assume_aligned (in_, __BIGGEST_ALIGNMENT__); \
|
||||
T *out = __builtin_assume_aligned (out_, __BIGGEST_ALIGNMENT__); \
|
||||
for (int i = 0; i < 16; i++) \
|
||||
{ \
|
||||
for (int j = 0; j < N; ++j) \
|
||||
out[j] = in[j]; \
|
||||
in += N; \
|
||||
out += s*N; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define TEST(T,N) \
|
||||
do { \
|
||||
memset (out, 0, 4096); \
|
||||
foo_ ## T ## _ ## N ((T *)in, (T *)out, 1); \
|
||||
if (memcmp (in, out, sizeof (T) * 16 * N) != 0) \
|
||||
__builtin_abort (); \
|
||||
for (int i = sizeof (T) * 16 * N; i < 4096; ++i) \
|
||||
if (out[i] != 0) \
|
||||
__builtin_abort (); \
|
||||
} while (0)
|
||||
|
||||
FOO(char, 1)
|
||||
FOO(char, 2)
|
||||
FOO(char, 3)
|
||||
FOO(char, 4)
|
||||
FOO(char, 6)
|
||||
FOO(char, 8)
|
||||
FOO(int, 1)
|
||||
FOO(int, 2)
|
||||
FOO(int, 3)
|
||||
FOO(int, 4)
|
||||
FOO(int, 6)
|
||||
FOO(int, 8)
|
||||
FOO(int, 16)
|
||||
|
||||
char in[4096] __attribute__((aligned(__BIGGEST_ALIGNMENT__)));
|
||||
char out[4096] __attribute__((aligned(__BIGGEST_ALIGNMENT__)));
|
||||
|
||||
int main()
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
for (int i = 0; i < 4096; ++i)
|
||||
{
|
||||
in[i] = i;
|
||||
__asm__ volatile ("" : : : "memory");
|
||||
}
|
||||
|
||||
TEST(char, 1);
|
||||
TEST(char, 2);
|
||||
TEST(char, 3);
|
||||
TEST(char, 4);
|
||||
TEST(char, 6);
|
||||
TEST(char, 8);
|
||||
TEST(int, 1);
|
||||
TEST(int, 2);
|
||||
TEST(int, 3);
|
||||
TEST(int, 4);
|
||||
TEST(int, 6);
|
||||
TEST(int, 8);
|
||||
TEST(int, 16);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 13 "vect" } } */
|
@ -5234,6 +5234,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
|
||||
enum vect_def_type scatter_idx_dt = vect_unknown_def_type;
|
||||
enum vect_def_type scatter_src_dt = vect_unknown_def_type;
|
||||
gimple *new_stmt;
|
||||
int vf;
|
||||
|
||||
if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
|
||||
return false;
|
||||
@ -5270,7 +5271,12 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
|
||||
unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
|
||||
|
||||
if (loop_vinfo)
|
||||
loop = LOOP_VINFO_LOOP (loop_vinfo);
|
||||
{
|
||||
loop = LOOP_VINFO_LOOP (loop_vinfo);
|
||||
vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
|
||||
}
|
||||
else
|
||||
vf = 1;
|
||||
|
||||
/* Multiple types in SLP are handled by creating the appropriate number of
|
||||
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
|
||||
@ -5365,16 +5371,6 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
|
||||
return false;
|
||||
}
|
||||
|
||||
if (STMT_VINFO_STRIDED_P (stmt_info)
|
||||
&& slp
|
||||
&& (group_size > nunits
|
||||
|| nunits % group_size != 0))
|
||||
{
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"unhandled strided group store\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (first_stmt == stmt)
|
||||
{
|
||||
/* STMT is the leader of the group. Check the operands of all the
|
||||
@ -5653,23 +5649,31 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
|
||||
*/
|
||||
|
||||
unsigned nstores = nunits;
|
||||
unsigned lnel = 1;
|
||||
tree ltype = elem_type;
|
||||
if (slp)
|
||||
{
|
||||
nstores = nunits / group_size;
|
||||
if (group_size < nunits)
|
||||
ltype = build_vector_type (elem_type, group_size);
|
||||
else
|
||||
ltype = vectype;
|
||||
if (group_size < nunits
|
||||
&& nunits % group_size == 0)
|
||||
{
|
||||
nstores = nunits / group_size;
|
||||
lnel = group_size;
|
||||
ltype = build_vector_type (elem_type, group_size);
|
||||
}
|
||||
else if (group_size >= nunits
|
||||
&& group_size % nunits == 0)
|
||||
{
|
||||
nstores = 1;
|
||||
lnel = nunits;
|
||||
ltype = vectype;
|
||||
}
|
||||
ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
|
||||
ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
|
||||
group_size = 1;
|
||||
}
|
||||
|
||||
ivstep = stride_step;
|
||||
ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
|
||||
build_int_cst (TREE_TYPE (ivstep),
|
||||
ncopies * nstores));
|
||||
build_int_cst (TREE_TYPE (ivstep), vf));
|
||||
|
||||
standard_iv_increment_position (loop, &incr_gsi, &insert_after);
|
||||
|
||||
@ -5700,6 +5704,9 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
|
||||
vect_finish_stmt_generation (stmt, incr, gsi);
|
||||
running_off = newoff;
|
||||
}
|
||||
unsigned int group_el = 0;
|
||||
unsigned HOST_WIDE_INT
|
||||
elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
|
||||
for (j = 0; j < ncopies; j++)
|
||||
{
|
||||
/* We've set op and dt above, from gimple_assign_rhs1(stmt),
|
||||
@ -5745,19 +5752,27 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
|
||||
NULL_TREE, true,
|
||||
GSI_SAME_STMT);
|
||||
|
||||
tree this_off = build_int_cst (TREE_TYPE (alias_off),
|
||||
group_el * elsz);
|
||||
newref = build2 (MEM_REF, ltype,
|
||||
running_off, alias_off);
|
||||
running_off, this_off);
|
||||
|
||||
/* And store it to *running_off. */
|
||||
assign = gimple_build_assign (newref, elem);
|
||||
vect_finish_stmt_generation (stmt, assign, gsi);
|
||||
|
||||
newoff = copy_ssa_name (running_off, NULL);
|
||||
incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
|
||||
running_off, stride_step);
|
||||
vect_finish_stmt_generation (stmt, incr, gsi);
|
||||
group_el += lnel;
|
||||
if (! slp
|
||||
|| group_el == group_size)
|
||||
{
|
||||
newoff = copy_ssa_name (running_off, NULL);
|
||||
incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
|
||||
running_off, stride_step);
|
||||
vect_finish_stmt_generation (stmt, incr, gsi);
|
||||
|
||||
running_off = newoff;
|
||||
running_off = newoff;
|
||||
group_el = 0;
|
||||
}
|
||||
if (g == group_size - 1
|
||||
&& !slp)
|
||||
{
|
||||
@ -5771,6 +5786,8 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
|
||||
}
|
||||
}
|
||||
next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
|
||||
if (slp)
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user