PR/67682, break SLP groups up if only some elements match
gcc/: * tree-vect-slp.c (vect_split_slp_store_group): New. (vect_analyze_slp_instance): During basic block SLP, recurse on subgroups if vect_build_slp_tree fails after 1st vector. gcc/testsuite/: * gcc.dg/vect/bb-slp-7.c (main1): Make subgroups non-isomorphic. * gcc.dg/vect/bb-slp-subgroups-1.c: New. * gcc.dg/vect/bb-slp-subgroups-2.c: New. * gcc.dg/vect/bb-slp-subgroups-3.c: New. From-SVN: r230330
This commit is contained in:
parent
4216708a00
commit
1ba91a49a9
|
@ -1,3 +1,10 @@
|
|||
2015-11-13 Alan Lawrence <alan.lawrence@arm.com>
|
||||
|
||||
PR tree-optimization/67682
|
||||
* tree-vect-slp.c (vect_split_slp_store_group): New.
|
||||
(vect_analyze_slp_instance): During basic block SLP, recurse on
|
||||
subgroups if vect_build_slp_tree fails after 1st vector.
|
||||
|
||||
2015-11-13 Christian Bruel <christian.bruel@st.com>
|
||||
|
||||
PR target/65837
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
2015-11-13 Alan Lawrence <alan.lawrence@arm.com>
|
||||
|
||||
PR tree-optimization/67682
|
||||
* gcc.dg/vect/bb-slp-7.c (main1): Make subgroups non-isomorphic.
|
||||
* gcc.dg/vect/bb-slp-subgroups-1.c: New.
|
||||
* gcc.dg/vect/bb-slp-subgroups-2.c: New.
|
||||
* gcc.dg/vect/bb-slp-subgroups-3.c: New.
|
||||
|
||||
2015-11-13 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
|
||||
|
||||
* gcc.target/aarch64/umaddl_combine_1.c: New test.
|
||||
|
|
|
@ -16,12 +16,12 @@ main1 (unsigned int x, unsigned int y)
|
|||
unsigned int *pout = &out[0];
|
||||
unsigned int a0, a1, a2, a3;
|
||||
|
||||
/* Non isomorphic. */
|
||||
/* Non isomorphic, even 64-bit subgroups. */
|
||||
a0 = *pin++ + 23;
|
||||
a1 = *pin++ + 142;
|
||||
a1 = *pin++ * 142;
|
||||
a2 = *pin++ + 2;
|
||||
a3 = *pin++ * 31;
|
||||
|
||||
|
||||
*pout++ = a0 * x;
|
||||
*pout++ = a1 * y;
|
||||
*pout++ = a2 * x;
|
||||
|
@ -29,7 +29,7 @@ main1 (unsigned int x, unsigned int y)
|
|||
|
||||
/* Check results. */
|
||||
if (out[0] != (in[0] + 23) * x
|
||||
|| out[1] != (in[1] + 142) * y
|
||||
|| out[1] != (in[1] * 142) * y
|
||||
|| out[2] != (in[2] + 2) * x
|
||||
|| out[3] != (in[3] * 31) * y)
|
||||
abort();
|
||||
|
@ -47,4 +47,4 @@ int main (void)
|
|||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "basic block vectorized" 0 "slp2" } } */
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
/* PR tree-optimization/67682. */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
int __attribute__((__aligned__(8))) a[8];
|
||||
int __attribute__((__aligned__(8))) b[4];
|
||||
|
||||
__attribute__ ((noinline)) void
|
||||
test ()
|
||||
{
|
||||
a[0] = b[0];
|
||||
a[1] = b[1];
|
||||
a[2] = b[2];
|
||||
a[3] = b[3];
|
||||
a[4] = 0;
|
||||
a[5] = 0;
|
||||
a[6] = 0;
|
||||
a[7] = 0;
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
for (int i = 0; i < 8; i++)
|
||||
a[i] = 1;
|
||||
for (int i = 0; i < 4; i++)
|
||||
b[i] = i + 4;
|
||||
__asm__ volatile ("" : : : "memory");
|
||||
test (a, b);
|
||||
__asm__ volatile ("" : : : "memory");
|
||||
for (int i = 0; i < 4; i++)
|
||||
if (a[i] != i+4)
|
||||
abort ();
|
||||
for (int i = 4; i < 8; i++)
|
||||
if (a[i] != 0)
|
||||
abort ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "Basic block will be vectorized using SLP" 1 "slp2" } } */
|
||||
/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */
|
|
@ -0,0 +1,41 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
/* PR tree-optimization/67682. */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
int __attribute__((__aligned__(8))) a[8];
|
||||
int __attribute__((__aligned__(8))) b[4];
|
||||
|
||||
__attribute__ ((noinline)) void
|
||||
test ()
|
||||
{
|
||||
a[0] = b[2] + 1;
|
||||
a[1] = b[0] + 2;
|
||||
a[2] = b[1] + 3;
|
||||
a[3] = b[1] + 4;
|
||||
a[4] = b[3] * 3;
|
||||
a[5] = b[0] * 4;
|
||||
a[6] = b[2] * 5;
|
||||
a[7] = b[1] * 7;
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
for (int i = 0; i < 8; i++)
|
||||
a[i] = 1;
|
||||
for (int i = 0; i < 4; i++)
|
||||
b[i] = i + 4;
|
||||
__asm__ volatile ("" : : : "memory");
|
||||
test (a, b);
|
||||
__asm__ volatile ("" : : : "memory");
|
||||
if ((a[0] != 7) || a[1] != 6 || (a[2] != 8) || (a[3] != 9)
|
||||
|| (a[4] != 21) || (a[5] != 16) || (a[6] != 30) || (a[7] != 35))
|
||||
abort ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "Basic block will be vectorized using SLP" 1 "slp2" } } */
|
||||
/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */
|
|
@ -0,0 +1,41 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
/* PR tree-optimization/67682. */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
int __attribute__((__aligned__(8))) a[8];
|
||||
int __attribute__((__aligned__(8))) b[8];
|
||||
|
||||
__attribute__ ((noinline)) void
|
||||
test ()
|
||||
{
|
||||
a[0] = b[0] + 1;
|
||||
a[1] = b[1] + 2;
|
||||
a[2] = b[2] + 3;
|
||||
a[3] = b[3] + 4;
|
||||
a[4] = b[0] * 3;
|
||||
a[5] = b[2] * 4;
|
||||
a[6] = b[4] * 5;
|
||||
a[7] = b[6] * 7;
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
for (int i = 0; i < 8; i++)
|
||||
a[i] = 1;
|
||||
for (int i = 0; i < 8; i++)
|
||||
b[i] = i + 4;
|
||||
__asm__ volatile ("" : : : "memory");
|
||||
test (a, b);
|
||||
__asm__ volatile ("" : : : "memory");
|
||||
if ((a[0] != 5) || (a[1] != 7) || (a[2] != 9) || (a[3] != 11)
|
||||
|| (a[4] != 12) || (a[5] != 24) || (a[6] != 40) || (a[7] != 70))
|
||||
abort ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "Basic block will be vectorized using SLP" 1 "slp2" } } */
|
||||
/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */
|
|
@ -1565,6 +1565,54 @@ vect_analyze_slp_cost (slp_instance instance, void *data)
|
|||
body_cost_vec.release ();
|
||||
}
|
||||
|
||||
/* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
|
||||
one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
|
||||
the first GROUP1_SIZE stmts, since stores are consecutive), the second
|
||||
containing the remainder.
|
||||
Return the first stmt in the second group. */
|
||||
|
||||
static gimple *
|
||||
vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
|
||||
{
|
||||
stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
|
||||
gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
|
||||
gcc_assert (group1_size > 0);
|
||||
int group2_size = GROUP_SIZE (first_vinfo) - group1_size;
|
||||
gcc_assert (group2_size > 0);
|
||||
GROUP_SIZE (first_vinfo) = group1_size;
|
||||
|
||||
gimple *stmt = first_stmt;
|
||||
for (unsigned i = group1_size; i > 1; i--)
|
||||
{
|
||||
stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
|
||||
gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
|
||||
}
|
||||
/* STMT is now the last element of the first group. */
|
||||
gimple *group2 = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
|
||||
GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
|
||||
|
||||
GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size;
|
||||
for (stmt = group2; stmt; stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
|
||||
{
|
||||
GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2;
|
||||
gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
|
||||
}
|
||||
|
||||
/* For the second group, the GROUP_GAP is that before the original group,
|
||||
plus skipping over the first vector. */
|
||||
GROUP_GAP (vinfo_for_stmt (group2)) =
|
||||
GROUP_GAP (first_vinfo) + group1_size;
|
||||
|
||||
/* GROUP_GAP of the first group now has to skip over the second group too. */
|
||||
GROUP_GAP (first_vinfo) += group2_size;
|
||||
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
|
||||
group1_size, group2_size);
|
||||
|
||||
return group2;
|
||||
}
|
||||
|
||||
/* Analyze an SLP instance starting from a group of grouped stores. Call
|
||||
vect_build_slp_tree to build a tree of packed stmts if possible.
|
||||
Return FALSE if it's impossible to SLP any stmt in the loop. */
|
||||
|
@ -1580,7 +1628,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
|
|||
tree vectype, scalar_type = NULL_TREE;
|
||||
gimple *next;
|
||||
unsigned int vectorization_factor = 0;
|
||||
int i;
|
||||
unsigned int i;
|
||||
unsigned int max_nunits = 0;
|
||||
vec<slp_tree> loads;
|
||||
struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
|
||||
|
@ -1774,6 +1822,41 @@ vect_analyze_slp_instance (vec_info *vinfo,
|
|||
vect_free_slp_tree (node);
|
||||
loads.release ();
|
||||
|
||||
/* For basic block SLP, try to break the group up into multiples of the
|
||||
vectorization factor. */
|
||||
if (is_a <bb_vec_info> (vinfo)
|
||||
&& GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
|
||||
&& STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
|
||||
{
|
||||
/* We consider breaking the group only on VF boundaries from the existing
|
||||
start. */
|
||||
for (i = 0; i < group_size; i++)
|
||||
if (!matches[i]) break;
|
||||
|
||||
if (i >= vectorization_factor && i < group_size)
|
||||
{
|
||||
/* Split into two groups at the first vector boundary before i. */
|
||||
gcc_assert ((vectorization_factor & (vectorization_factor - 1)) == 0);
|
||||
unsigned group1_size = i & ~(vectorization_factor - 1);
|
||||
|
||||
gimple *rest = vect_split_slp_store_group (stmt, group1_size);
|
||||
bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
|
||||
/* If the first non-match was in the middle of a vector,
|
||||
skip the rest of that vector. */
|
||||
if (group1_size < i)
|
||||
{
|
||||
i = group1_size + vectorization_factor;
|
||||
if (i < group_size)
|
||||
rest = vect_split_slp_store_group (rest, vectorization_factor);
|
||||
}
|
||||
if (i < group_size)
|
||||
res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
|
||||
return res;
|
||||
}
|
||||
/* Even though the first vector did not all match, we might be able to SLP
|
||||
(some) of the remainder. FORNOW ignore this possibility. */
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue