SLP: support entire BB.

gcc/ChangeLog:

	* tree-vect-data-refs.c (dr_group_sort_cmp): Work on
	data_ref_pair.
	(vect_analyze_data_ref_accesses): Work on groups.
	(vect_find_stmt_data_reference): Add group_id argument and fill
	up dataref_groups vector.
	* tree-vect-loop.c (vect_get_datarefs_in_loop): Pass new
	arguments.
	(vect_analyze_loop_2): Likewise.
	* tree-vect-slp.c (vect_slp_analyze_bb_1): Pass argument.
	(vect_slp_bb_region): Likewise.
	(vect_slp_region): Likewise.
	(vect_slp_bb):Work on the entire BB.
	* tree-vectorizer.h (vect_analyze_data_ref_accesses): Add new
	argument.
	(vect_find_stmt_data_reference): Likewise.

gcc/testsuite/ChangeLog:

	* gcc.dg/vect/bb-slp-38.c: Adjust pattern as now we only process
	a single vectorization and now 2 partial.
	* gcc.dg/vect/bb-slp-45.c: New test.
This commit is contained in:
Martin Liska 2020-07-31 10:53:39 +02:00
parent adc646b10c
commit cdb2e365fc
6 changed files with 119 additions and 75 deletions

View File

@ -41,4 +41,4 @@ int main()
}
/* { dg-final { scan-tree-dump "basic block vectorized" "slp2" { target vect_perm } } } */
/* { dg-final { scan-tree-dump-times "basic block part vectorized" 2 "slp2" { target vect_perm } } } */
/* { dg-final { scan-tree-dump "basic block part vectorized" "slp2" { target vect_perm } } } */

View File

@ -0,0 +1,36 @@
/* { dg-require-effective-target vect_double } */
#include "tree-vect.h"
extern void abort (void);
double a[8], b[8];
int x;
void __attribute__((noinline,noclone))
bar (void)
{
x = 1;
}
void __attribute__((noinline,noclone))
foo(int i)
{
double tem1 = a[2*i];
double tem2 = 2*a[2*i+1];
bar ();
b[2*i] = 2*tem1;
b[2*i+1] = tem2;
}
int main()
{
int i;
check_vect ();
for (i = 0; i < 8; ++i)
b[i] = i;
foo (2);
return 0;
}
/* { dg-final { scan-tree-dump "basic block vectorized" "slp2" } } */

View File

@ -2757,25 +2757,33 @@ vect_analyze_data_ref_access (vec_info *vinfo, dr_vec_info *dr_info)
return vect_analyze_group_access (vinfo, dr_info);
}
typedef std::pair<data_reference_p, int> data_ref_pair;
/* Compare two data-references DRA and DRB to group them into chunks
suitable for grouping. */
static int
dr_group_sort_cmp (const void *dra_, const void *drb_)
{
data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_);
data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_);
data_ref_pair dra_pair = *(data_ref_pair *)const_cast<void *>(dra_);
data_ref_pair drb_pair = *(data_ref_pair *)const_cast<void *>(drb_);
data_reference_p dra = dra_pair.first;
data_reference_p drb = drb_pair.first;
int cmp;
/* Stabilize sort. */
if (dra == drb)
return 0;
/* DRs in different loops never belong to the same group. */
loop_p loopa = gimple_bb (DR_STMT (dra))->loop_father;
loop_p loopb = gimple_bb (DR_STMT (drb))->loop_father;
if (loopa != loopb)
return loopa->num < loopb->num ? -1 : 1;
/* DRs in different basic-blocks never belong to the same group. */
int bb_index1 = gimple_bb (DR_STMT (dra))->index;
int bb_index2 = gimple_bb (DR_STMT (drb))->index;
if (bb_index1 != bb_index2)
return bb_index1 < bb_index2 ? -1 : 1;
/* Different group IDs lead never belong to the same group. */
if (dra_pair.second != drb_pair.second)
return dra_pair.second < drb_pair.second ? -1 : 1;
/* Ordering of DRs according to base. */
cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra),
@ -2881,11 +2889,11 @@ can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info,
FORNOW: handle only arrays and pointer accesses. */
opt_result
vect_analyze_data_ref_accesses (vec_info *vinfo)
vect_analyze_data_ref_accesses (vec_info *vinfo,
vec<int> *dataref_groups)
{
unsigned int i;
vec<data_reference_p> datarefs = vinfo->shared->datarefs;
struct data_reference *dr;
DUMP_VECT_SCOPE ("vect_analyze_data_ref_accesses");
@ -2895,14 +2903,21 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
/* Sort the array of datarefs to make building the interleaving chains
linear. Don't modify the original vector's order, it is needed for
determining what dependencies are reversed. */
vec<data_reference_p> datarefs_copy = datarefs.copy ();
vec<data_ref_pair> datarefs_copy;
datarefs_copy.create (datarefs.length ());
for (unsigned i = 0; i < datarefs.length (); i++)
{
int group_id = dataref_groups ? (*dataref_groups)[i] : 0;
datarefs_copy.quick_push (data_ref_pair (datarefs[i], group_id));
}
datarefs_copy.qsort (dr_group_sort_cmp);
hash_set<stmt_vec_info> to_fixup;
/* Build the interleaving chains. */
for (i = 0; i < datarefs_copy.length () - 1;)
{
data_reference_p dra = datarefs_copy[i];
data_reference_p dra = datarefs_copy[i].first;
int dra_group_id = datarefs_copy[i].second;
dr_vec_info *dr_info_a = vinfo->lookup_dr (dra);
stmt_vec_info stmtinfo_a = dr_info_a->stmt;
stmt_vec_info lastinfo = NULL;
@ -2914,7 +2929,8 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
}
for (i = i + 1; i < datarefs_copy.length (); ++i)
{
data_reference_p drb = datarefs_copy[i];
data_reference_p drb = datarefs_copy[i].first;
int drb_group_id = datarefs_copy[i].second;
dr_vec_info *dr_info_b = vinfo->lookup_dr (drb);
stmt_vec_info stmtinfo_b = dr_info_b->stmt;
if (!STMT_VINFO_VECTORIZABLE (stmtinfo_b)
@ -2927,10 +2943,14 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
matters we can push those to a worklist and re-iterate
over them. The we can just skip ahead to the next DR here. */
/* DRs in a different loop should not be put into the same
/* DRs in a different BBs should not be put into the same
interleaving group. */
if (gimple_bb (DR_STMT (dra))->loop_father
!= gimple_bb (DR_STMT (drb))->loop_father)
int bb_index1 = gimple_bb (DR_STMT (dra))->index;
int bb_index2 = gimple_bb (DR_STMT (drb))->index;
if (bb_index1 != bb_index2)
break;
if (dra_group_id != drb_group_id)
break;
/* Check that the data-refs have same first location (except init)
@ -2977,7 +2997,7 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
HOST_WIDE_INT init_prev
= TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1]));
= TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1].first));
gcc_assert (init_a <= init_b
&& init_a <= init_prev
&& init_prev <= init_b);
@ -2985,7 +3005,7 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
/* Do not place the same access in the interleaving chain twice. */
if (init_b == init_prev)
{
gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1]))
gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1].first))
< gimple_uid (DR_STMT (drb)));
/* Simply link in duplicates and fix up the chain below. */
}
@ -3098,9 +3118,10 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
to_fixup.add (newgroup);
}
FOR_EACH_VEC_ELT (datarefs_copy, i, dr)
data_ref_pair *dr_pair;
FOR_EACH_VEC_ELT (datarefs_copy, i, dr_pair)
{
dr_vec_info *dr_info = vinfo->lookup_dr (dr);
dr_vec_info *dr_info = vinfo->lookup_dr (dr_pair->first);
if (STMT_VINFO_VECTORIZABLE (dr_info->stmt)
&& !vect_analyze_data_ref_access (vinfo, dr_info))
{
@ -3991,7 +4012,8 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
opt_result
vect_find_stmt_data_reference (loop_p loop, gimple *stmt,
vec<data_reference_p> *datarefs)
vec<data_reference_p> *datarefs,
vec<int> *dataref_groups, int group_id)
{
/* We can ignore clobbers for dataref analysis - they are removed during
loop vectorization and BB vectorization checks dependences with a
@ -4118,6 +4140,8 @@ vect_find_stmt_data_reference (loop_p loop, gimple *stmt,
newdr->aux = (void *) (-1 - tree_to_uhwi (arg2));
free_data_ref (dr);
datarefs->safe_push (newdr);
if (dataref_groups)
dataref_groups->safe_push (group_id);
return opt_result::success ();
}
}
@ -4127,6 +4151,8 @@ vect_find_stmt_data_reference (loop_p loop, gimple *stmt,
}
datarefs->safe_push (dr);
if (dataref_groups)
dataref_groups->safe_push (group_id);
return opt_result::success ();
}

View File

@ -1865,7 +1865,8 @@ vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs,
if (is_gimple_debug (stmt))
continue;
++(*n_stmts);
opt_result res = vect_find_stmt_data_reference (loop, stmt, datarefs);
opt_result res = vect_find_stmt_data_reference (loop, stmt, datarefs,
NULL, 0);
if (!res)
{
if (is_gimple_call (stmt) && loop->safelen)
@ -2087,7 +2088,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts)
/* Analyze the access patterns of the data-refs in the loop (consecutive,
complex, etc.). FORNOW: Only handle consecutive access pattern. */
ok = vect_analyze_data_ref_accesses (loop_vinfo);
ok = vect_analyze_data_ref_accesses (loop_vinfo, NULL);
if (!ok)
{
if (dump_enabled_p ())

View File

@ -3217,7 +3217,8 @@ vect_slp_check_for_constructors (bb_vec_info bb_vinfo)
region. */
static bool
vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)
vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal,
vec<int> *dataref_groups)
{
DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
@ -3239,7 +3240,7 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)
return false;
}
if (!vect_analyze_data_ref_accesses (bb_vinfo))
if (!vect_analyze_data_ref_accesses (bb_vinfo, dataref_groups))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@ -3348,10 +3349,11 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)
given by DATAREFS. */
static bool
vect_slp_bb_region (gimple_stmt_iterator region_begin,
gimple_stmt_iterator region_end,
vec<data_reference_p> datarefs,
unsigned int n_stmts)
vect_slp_region (gimple_stmt_iterator region_begin,
gimple_stmt_iterator region_end,
vec<data_reference_p> datarefs,
vec<int> *dataref_groups,
unsigned int n_stmts)
{
bb_vec_info bb_vinfo;
auto_vector_modes vector_modes;
@ -3378,7 +3380,7 @@ vect_slp_bb_region (gimple_stmt_iterator region_begin,
bb_vinfo->shared->check_datarefs ();
bb_vinfo->vector_mode = next_vector_mode;
if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal)
if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal, dataref_groups)
&& dbg_cnt (vect_slp))
{
if (dump_enabled_p ())
@ -3473,45 +3475,30 @@ vect_slp_bb_region (gimple_stmt_iterator region_begin,
bool
vect_slp_bb (basic_block bb)
{
gimple_stmt_iterator gsi;
bool any_vectorized = false;
vec<data_reference_p> datarefs = vNULL;
vec<int> dataref_groups = vNULL;
int insns = 0;
int current_group = 0;
gimple_stmt_iterator region_begin = gsi_start_nondebug_after_labels_bb (bb);
gimple_stmt_iterator region_end = gsi_last_bb (bb);
if (!gsi_end_p (region_end))
gsi_next (&region_end);
gsi = gsi_after_labels (bb);
while (!gsi_end_p (gsi))
for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
gsi_next (&gsi))
{
gimple_stmt_iterator region_begin = gsi;
vec<data_reference_p> datarefs = vNULL;
int insns = 0;
gimple *stmt = gsi_stmt (gsi);
if (is_gimple_debug (stmt))
continue;
for (; !gsi_end_p (gsi); gsi_next (&gsi))
{
gimple *stmt = gsi_stmt (gsi);
if (is_gimple_debug (stmt))
{
/* Skip leading debug stmts. */
if (gsi_stmt (region_begin) == stmt)
gsi_next (&region_begin);
continue;
}
insns++;
insns++;
if (gimple_location (stmt) != UNKNOWN_LOCATION)
vect_location = stmt;
if (gimple_location (stmt) != UNKNOWN_LOCATION)
vect_location = stmt;
if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
break;
}
if (gsi_end_p (region_begin))
break;
/* Skip leading unhandled stmts. */
if (gsi_stmt (region_begin) == gsi_stmt (gsi))
{
gsi_next (&gsi);
continue;
}
gimple_stmt_iterator region_end = gsi;
if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs,
&dataref_groups, current_group))
++current_group;
if (insns > param_slp_max_insns_in_bb)
{
@ -3520,17 +3507,10 @@ vect_slp_bb (basic_block bb)
"not vectorized: too many instructions in "
"basic block.\n");
}
else if (vect_slp_bb_region (region_begin, region_end, datarefs, insns))
any_vectorized = true;
if (gsi_end_p (region_end))
break;
/* Skip the unhandled stmt. */
gsi_next (&gsi);
}
return any_vectorized;
return vect_slp_region (region_begin, region_end, datarefs,
&dataref_groups, insns);
}

View File

@ -1917,14 +1917,15 @@ extern bool vect_slp_analyze_instance_dependence (vec_info *, slp_instance);
extern opt_result vect_enhance_data_refs_alignment (loop_vec_info);
extern opt_result vect_analyze_data_refs_alignment (loop_vec_info);
extern bool vect_slp_analyze_instance_alignment (vec_info *, slp_instance);
extern opt_result vect_analyze_data_ref_accesses (vec_info *);
extern opt_result vect_analyze_data_ref_accesses (vec_info *, vec<int> *);
extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info);
extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree,
tree, int, internal_fn *, tree *);
extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info,
gather_scatter_info *);
extern opt_result vect_find_stmt_data_reference (loop_p, gimple *,
vec<data_reference_p> *);
vec<data_reference_p> *,
vec<int> *, int);
extern opt_result vect_analyze_data_refs (vec_info *, poly_uint64 *, bool *);
extern void vect_record_base_alignments (vec_info *);
extern tree vect_create_data_ref_ptr (vec_info *,