re PR tree-optimization/56270 (loop over array of struct float causes compiler error: segmentation fault)

2013-04-19  Richard Biener  <rguenther@suse.de>

	* tree-vectorizer.h (struct _slp_instance): Move load_permutation
	member ...
	(struct _slp_tree): ... here.  Make it a vector of unsigned ints.
	(SLP_INSTANCE_LOAD_PERMUTATION): Remove.
	(SLP_TREE_LOAD_PERMUTATION): Add.
	(vect_transform_slp_perm_load): Adjust prototype.
	* tree-vect-slp.c (vect_free_slp_tree): Adjust.
	(vect_free_slp_instance): Likewise.
	(vect_create_new_slp_node): Likewise.
	(vect_supported_slp_permutation_p): Remove.
	(vect_slp_rearrange_stmts): Adjust.
	(vect_supported_load_permutation_p): Likewise.  Inline
	vect_supported_slp_permutation_p here.
	(vect_analyze_slp_instance): Compute load permutations per
	slp node instead of per instance.
	(vect_get_slp_defs): Adjust.
	(vect_transform_slp_perm_load): Likewise.
	(vect_schedule_slp_instance): Remove redundant code.
	(vect_schedule_slp): Remove hack for PR56270, add it ...
	* tree-vect-stmts.c (vectorizable_load): ... here, do not
	CSE loads for SLP.  Adjust.

From-SVN: r198095
This commit is contained in:
Richard Biener 2013-04-19 13:37:12 +00:00 committed by Richard Biener
parent ede22fc330
commit 01d8bf070a
4 changed files with 196 additions and 293 deletions

View File

@ -1,3 +1,27 @@
2013-04-19 Richard Biener <rguenther@suse.de>
* tree-vectorizer.h (struct _slp_instance): Move load_permutation
member ...
(struct _slp_tree): ... here. Make it a vector of unsigned ints.
(SLP_INSTANCE_LOAD_PERMUTATION): Remove.
(SLP_TREE_LOAD_PERMUTATION): Add.
(vect_transform_slp_perm_load): Adjust prototype.
* tree-vect-slp.c (vect_free_slp_tree): Adjust.
(vect_free_slp_instance): Likewise.
(vect_create_new_slp_node): Likewise.
(vect_supported_slp_permutation_p): Remove.
(vect_slp_rearrange_stmts): Adjust.
(vect_supported_load_permutation_p): Likewise. Inline
vect_supported_slp_permutation_p here.
(vect_analyze_slp_instance): Compute load permutations per
slp node instead of per instance.
(vect_get_slp_defs): Adjust.
(vect_transform_slp_perm_load): Likewise.
(vect_schedule_slp_instance): Remove redundant code.
(vect_schedule_slp): Remove hack for PR56270, add it ...
* tree-vect-stmts.c (vectorizable_load): ... here, do not
CSE loads for SLP. Adjust.
2013-04-19 Greta Yorsh <Greta.Yorsh@arm.com>
* config/arm/arm.c (load_multiple_sequence, ldm_stm_operation_p): Fix

View File

@ -78,6 +78,7 @@ vect_free_slp_tree (slp_tree node)
SLP_TREE_CHILDREN (node).release ();
SLP_TREE_SCALAR_STMTS (node).release ();
SLP_TREE_VEC_STMTS (node).release ();
SLP_TREE_LOAD_PERMUTATION (node).release ();
free (node);
}
@ -89,7 +90,6 @@ void
vect_free_slp_instance (slp_instance instance)
{
vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
SLP_INSTANCE_LOAD_PERMUTATION (instance).release ();
SLP_INSTANCE_LOADS (instance).release ();
SLP_INSTANCE_BODY_COST_VEC (instance).release ();
free (instance);
@ -120,6 +120,7 @@ vect_create_new_slp_node (vec<gimple> scalar_stmts)
SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
SLP_TREE_VEC_STMTS (node).create (0);
SLP_TREE_CHILDREN (node).create (nops);
SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
return node;
}
@ -1026,73 +1027,11 @@ vect_mark_slp_stmts_relevant (slp_tree node)
}
/* Check if the permutation required by the SLP INSTANCE is supported.
Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
static bool
vect_supported_slp_permutation_p (slp_instance instance)
{
slp_tree node = SLP_INSTANCE_LOADS (instance)[0];
gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
vec<slp_tree> sorted_loads = vNULL;
int index;
slp_tree *tmp_loads = NULL;
int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
slp_tree load;
/* FORNOW: The only supported loads permutation is loads from the same
location in all the loads in the node, when the data-refs in
nodes of LOADS constitute an interleaving chain.
Sort the nodes according to the order of accesses in the chain. */
tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
for (i = 0, j = 0;
SLP_INSTANCE_LOAD_PERMUTATION (instance).iterate (i, &index)
&& SLP_INSTANCE_LOADS (instance).iterate (j, &load);
i += group_size, j++)
{
gimple scalar_stmt = SLP_TREE_SCALAR_STMTS (load)[0];
/* Check that the loads are all in the same interleaving chain. */
if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
{
if (dump_enabled_p ())
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"Build SLP failed: unsupported data "
"permutation ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
scalar_stmt, 0);
}
free (tmp_loads);
return false;
}
tmp_loads[index] = load;
}
sorted_loads.create (group_size);
for (i = 0; i < group_size; i++)
sorted_loads.safe_push (tmp_loads[i]);
SLP_INSTANCE_LOADS (instance).release ();
SLP_INSTANCE_LOADS (instance) = sorted_loads;
free (tmp_loads);
if (!vect_transform_slp_perm_load (stmt, vNULL, NULL,
SLP_INSTANCE_UNROLLING_FACTOR (instance),
instance, true))
return false;
return true;
}
/* Rearrange the statements of NODE according to PERMUTATION. */
static void
vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
vec<int> permutation)
vec<unsigned> permutation)
{
gimple stmt;
vec<gimple> tmp_stmts;
@ -1114,32 +1053,29 @@ vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
}
/* Check if the required load permutation is supported.
LOAD_PERMUTATION contains a list of indices of the loads.
In SLP this permutation is relative to the order of grouped stores that are
the base of the SLP instance. */
/* Check if the required load permutations in the SLP instance
SLP_INSTN are supported. */
static bool
vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
vec<int> load_permutation)
vect_supported_load_permutation_p (slp_instance slp_instn)
{
int i = 0, j, prev = -1, next, k, number_of_groups;
bool supported, bad_permutation = false;
unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
unsigned int i, j, k, next;
sbitmap load_index;
slp_tree node;
gimple stmt, load, next_load, first_load;
struct data_reference *dr;
bb_vec_info bb_vinfo;
/* FORNOW: permutations are only supported in SLP. */
if (!slp_instn)
return false;
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
FOR_EACH_VEC_ELT (load_permutation, i, next)
dump_printf (MSG_NOTE, "%d ", next);
FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
if (node->load_permutation.exists ())
FOR_EACH_VEC_ELT (node->load_permutation, j, next)
dump_printf (MSG_NOTE, "%d ", next);
else
for (i = 0; i < group_size; ++i)
dump_printf (MSG_NOTE, "%d ", i);
}
/* In case of reduction every load permutation is allowed, since the order
@ -1150,209 +1086,161 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
permutation). */
/* Check that all the load nodes are of the same size. */
/* ??? Can't we assert this? */
FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
return false;
node = SLP_INSTANCE_TREE (slp_instn);
stmt = SLP_TREE_SCALAR_STMTS (node)[0];
/* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
instance, not all the loads belong to the same node or interleaving
group. Hence, we need to divide them into groups according to
GROUP_SIZE. */
number_of_groups = load_permutation.length () / group_size;
/* Reduction (there are no data-refs in the root).
In reduction chain the order of the loads is important. */
if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
&& !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
{
int first_group_load_index;
slp_tree load;
unsigned int lidx;
/* Compare all the permutation sequences to the first one. */
for (i = 1; i < number_of_groups; i++)
{
k = 0;
for (j = i * group_size; j < i * group_size + group_size; j++)
{
next = load_permutation[j];
first_group_load_index = load_permutation[k];
/* Compare all the permutation sequences to the first one. We know
that at least one load is permuted. */
node = SLP_INSTANCE_LOADS (slp_instn)[0];
if (!node->load_permutation.exists ())
return false;
for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
{
if (!load->load_permutation.exists ())
return false;
FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
if (lidx != node->load_permutation[j])
return false;
}
if (next != first_group_load_index)
{
bad_permutation = true;
break;
}
/* Check that the loads in the first sequence are different and there
are no gaps between them. */
load_index = sbitmap_alloc (group_size);
bitmap_clear (load_index);
FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
{
if (bitmap_bit_p (load_index, lidx))
{
sbitmap_free (load_index);
return false;
}
bitmap_set_bit (load_index, lidx);
}
for (i = 0; i < group_size; i++)
if (!bitmap_bit_p (load_index, i))
{
sbitmap_free (load_index);
return false;
}
sbitmap_free (load_index);
k++;
}
/* This permutation is valid for reduction. Since the order of the
statements in the nodes is not important unless they are memory
accesses, we can rearrange the statements in all the nodes
according to the order of the loads. */
vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
node->load_permutation);
if (bad_permutation)
break;
}
if (!bad_permutation)
{
/* Check that the loads in the first sequence are different and there
are no gaps between them. */
load_index = sbitmap_alloc (group_size);
bitmap_clear (load_index);
for (k = 0; k < group_size; k++)
{
first_group_load_index = load_permutation[k];
if (bitmap_bit_p (load_index, first_group_load_index))
{
bad_permutation = true;
break;
}
bitmap_set_bit (load_index, first_group_load_index);
}
if (!bad_permutation)
for (k = 0; k < group_size; k++)
if (!bitmap_bit_p (load_index, k))
{
bad_permutation = true;
break;
}
sbitmap_free (load_index);
}
if (!bad_permutation)
{
/* This permutation is valid for reduction. Since the order of the
statements in the nodes is not important unless they are memory
accesses, we can rearrange the statements in all the nodes
according to the order of the loads. */
vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
load_permutation);
SLP_INSTANCE_LOAD_PERMUTATION (slp_instn).release ();
return true;
}
/* We are done, no actual permutations need to be generated. */
FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
SLP_TREE_LOAD_PERMUTATION (node).release ();
return true;
}
/* In basic block vectorization we allow any subchain of an interleaving
chain.
FORNOW: not supported in loop SLP because of realignment compications. */
bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
bad_permutation = false;
/* Check that for every node in the instance the loads form a subchain. */
if (bb_vinfo)
if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
{
/* Check that for every node in the instance the loads
form a subchain. */
FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
{
next_load = NULL;
first_load = NULL;
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
{
if (!first_load)
first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (load));
else if (first_load
!= GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)))
{
bad_permutation = true;
break;
}
if (j != 0 && next_load != load)
{
bad_permutation = true;
break;
}
return false;
next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
}
if (bad_permutation)
break;
}
/* Check that the alignment of the first load in every subchain, i.e.,
the first statement in every load node, is supported. */
if (!bad_permutation)
{
FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
{
first_load = SLP_TREE_SCALAR_STMTS (node)[0];
if (first_load
!= GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
{
dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
if (vect_supportable_dr_alignment (dr, false)
== dr_unaligned_unsupported)
{
if (dump_enabled_p ())
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION,
vect_location,
"unsupported unaligned load ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
first_load, 0);
}
bad_permutation = true;
break;
}
}
}
the first statement in every load node, is supported.
??? This belongs in alignment checking. */
FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
{
first_load = SLP_TREE_SCALAR_STMTS (node)[0];
if (first_load != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
{
dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
if (vect_supportable_dr_alignment (dr, false)
== dr_unaligned_unsupported)
{
if (dump_enabled_p ())
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION,
vect_location,
"unsupported unaligned load ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
first_load, 0);
}
return false;
}
}
}
if (!bad_permutation)
{
SLP_INSTANCE_LOAD_PERMUTATION (slp_instn).release ();
return true;
}
}
/* We are done, no actual permutations need to be generated. */
FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
SLP_TREE_LOAD_PERMUTATION (node).release ();
return true;
}
/* FORNOW: the only supported permutation is 0..01..1.. of length equal to
GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
well (unless it's reduction). */
if (load_permutation.length ()
!= (unsigned int) (group_size * group_size))
if (SLP_INSTANCE_LOADS (slp_instn).length () != group_size)
return false;
FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
if (!node->load_permutation.exists ())
return false;
supported = true;
load_index = sbitmap_alloc (group_size);
bitmap_clear (load_index);
for (j = 0; j < group_size; j++)
FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
{
for (i = j * group_size, k = 0;
load_permutation.iterate (i, &next) && k < group_size;
i++, k++)
{
if (i != j * group_size && next != prev)
{
supported = false;
break;
}
prev = next;
}
if (bitmap_bit_p (load_index, prev))
{
supported = false;
break;
}
bitmap_set_bit (load_index, prev);
unsigned int lidx = node->load_permutation[0];
if (bitmap_bit_p (load_index, lidx))
{
sbitmap_free (load_index);
return false;
}
bitmap_set_bit (load_index, lidx);
FOR_EACH_VEC_ELT (node->load_permutation, j, k)
if (k != lidx)
{
sbitmap_free (load_index);
return false;
}
}
for (j = 0; j < group_size; j++)
if (!bitmap_bit_p (load_index, j))
for (i = 0; i < group_size; i++)
if (!bitmap_bit_p (load_index, i))
{
sbitmap_free (load_index);
return false;
}
sbitmap_free (load_index);
if (supported && i == group_size * group_size
&& vect_supported_slp_permutation_p (slp_instn))
return true;
return false;
FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
if (node->load_permutation.exists ()
&& !vect_transform_slp_perm_load
(node, vNULL, NULL,
SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
return false;
return true;
}
@ -1642,17 +1530,17 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL;
SLP_INSTANCE_LOADS (new_instance) = loads;
SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = vNULL;
/* Compute the load permutation. */
slp_tree load_node;
bool loads_permuted = false;
vec<int> load_permutation;
load_permutation.create (group_size * group_size);
FOR_EACH_VEC_ELT (loads, i, load_node)
{
vec<unsigned> load_permutation;
int j;
gimple load, first_stmt;
bool this_load_permuted = false;
load_permutation.create (group_size);
first_stmt = GROUP_FIRST_ELEMENT
(vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
@ -1661,16 +1549,21 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
= vect_get_place_in_interleaving_chain (load, first_stmt);
gcc_assert (load_place != -1);
if (load_place != j)
loads_permuted = true;
this_load_permuted = true;
load_permutation.safe_push (load_place);
}
if (!this_load_permuted)
{
load_permutation.release ();
continue;
}
SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
loads_permuted = true;
}
if (loads_permuted)
{
SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
if (!vect_supported_load_permutation_p (new_instance, group_size,
load_permutation))
if (!vect_supported_load_permutation_p (new_instance))
{
if (dump_enabled_p ())
{
@ -1679,16 +1572,13 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
"permutation ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
}
vect_free_slp_instance (new_instance);
return false;
}
SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
= vect_find_first_load_in_slp_instance (new_instance);
= vect_find_first_load_in_slp_instance (new_instance);
}
else
load_permutation.release ();
/* Compute the costs of this SLP instance. */
vect_analyze_slp_cost (loop_vinfo, bb_vinfo,
@ -2653,7 +2543,7 @@ vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
vectorized_defs = false;
if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
{
child = (slp_tree) SLP_TREE_CHILDREN (slp_node)[child_index];
child = SLP_TREE_CHILDREN (slp_node)[child_index];
/* We have to check both pattern and original def, if available. */
gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
@ -2854,16 +2744,18 @@ vect_get_mask_element (gimple stmt, int first_mask_element, int m,
/* Generate vector permute statements from a list of loads in DR_CHAIN.
If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
permute statements for SLP_NODE_INSTANCE. */
permute statements for the SLP node NODE of the SLP instance
SLP_NODE_INSTANCE. */
bool
vect_transform_slp_perm_load (gimple stmt, vec<tree> dr_chain,
vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
gimple_stmt_iterator *gsi, int vf,
slp_instance slp_node_instance, bool analyze_only)
{
gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree mask_element_type = NULL_TREE, mask_type;
int i, j, k, nunits, vec_index = 0, scalar_index;
slp_tree node;
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
gimple next_scalar_stmt;
int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
@ -2910,6 +2802,9 @@ vect_transform_slp_perm_load (gimple stmt, vec<tree> dr_chain,
relatively to SLP_NODE_INSTANCE unrolling factor. */
ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
return false;
/* Generate permutation masks for every NODE. Number of masks for each NODE
is equal to GROUP_SIZE.
E.g., we have a group of three nodes with three loads from the same
@ -2928,7 +2823,6 @@ vect_transform_slp_perm_load (gimple stmt, vec<tree> dr_chain,
we need the second and the third vectors: {b1,c1,a2,b2} and
{c2,a3,b3,c3}. */
FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_node_instance), i, node)
{
scalar_index = 0;
index = 0;
@ -2944,6 +2838,7 @@ vect_transform_slp_perm_load (gimple stmt, vec<tree> dr_chain,
{
for (k = 0; k < group_size; k++)
{
i = SLP_TREE_LOAD_PERMUTATION (node)[k];
first_mask_element = i + j * group_size;
if (!vect_get_mask_element (stmt, first_mask_element, 0,
nunits, only_one_vec, index,
@ -2956,9 +2851,7 @@ vect_transform_slp_perm_load (gimple stmt, vec<tree> dr_chain,
if (index == nunits)
{
tree mask_vec, *mask_elts;
int l;
index = 0;
if (!can_vec_perm_p (mode, false, mask))
{
if (dump_enabled_p ())
@ -2974,15 +2867,17 @@ vect_transform_slp_perm_load (gimple stmt, vec<tree> dr_chain,
return false;
}
mask_elts = XALLOCAVEC (tree, nunits);
for (l = 0; l < nunits; ++l)
mask_elts[l] = build_int_cst (mask_element_type, mask[l]);
mask_vec = build_vector (mask_type, mask_elts);
index = 0;
if (!analyze_only)
{
if (need_next_vector)
int l;
tree mask_vec, *mask_elts;
mask_elts = XALLOCAVEC (tree, nunits);
for (l = 0; l < nunits; ++l)
mask_elts[l] = build_int_cst (mask_element_type,
mask[l]);
mask_vec = build_vector (mask_type, mask_elts);
if (need_next_vector)
{
first_vec_index = second_vec_index;
second_vec_index = vec_index;
@ -3019,7 +2914,6 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
unsigned int vec_stmts_size, nunits, group_size;
tree vectype;
int i;
slp_tree loads_node;
slp_tree child;
if (!node)
@ -3043,20 +2937,6 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
size. */
vec_stmts_size = (vectorization_factor * group_size) / nunits;
/* In case of load permutation we have to allocate vectorized statements for
all the nodes that participate in that permutation. */
if (SLP_INSTANCE_LOAD_PERMUTATION (instance).exists ())
{
FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, loads_node)
{
if (!SLP_TREE_VEC_STMTS (loads_node).exists ())
{
SLP_TREE_VEC_STMTS (loads_node).create (vec_stmts_size);
SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
}
}
}
if (!SLP_TREE_VEC_STMTS (node).exists ())
{
SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
@ -3074,7 +2954,7 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
&& STMT_VINFO_GROUPED_ACCESS (stmt_info)
&& !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
&& SLP_INSTANCE_LOAD_PERMUTATION (instance).exists ())
&& SLP_TREE_LOAD_PERMUTATION (node).exists ())
si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
else if (is_pattern_stmt_p (stmt_info))
si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
@ -3153,8 +3033,7 @@ vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
{
vec<slp_instance> slp_instances;
slp_instance instance;
slp_tree loads_node;
unsigned int i, j, vf;
unsigned int i, vf;
bool is_store = false;
if (loop_vinfo)
@ -3173,14 +3052,6 @@ vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
/* Schedule the tree of INSTANCE. */
is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
instance, vf);
/* Clear STMT_VINFO_VEC_STMT of all loads. With shared loads
between SLP instances we fail to properly initialize the
vectorized SLP stmts and confuse different load permutations. */
FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, loads_node)
STMT_VINFO_VEC_STMT
(vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (loads_node)[0])) = NULL;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"vectorizing stmts using SLP.");

View File

@ -4754,12 +4754,21 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
{
first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
if (slp
&& !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance).exists ()
&& !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
&& first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
/* Check if the chain of loads is already vectorized. */
if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
/* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
??? But we can only do so if there is exactly one
as we have no way to get at the rest. Leave the CSE
opportunity alone.
??? With the group load eventually participating
in multiple different permutations (having multiple
slp nodes which refer to the same group) the CSE
is even wrong code. See PR56270. */
&& !slp)
{
*vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
return true;
@ -4772,7 +4781,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
{
grouped_load = false;
vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance).exists ())
if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
slp_perm = true;
group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
}
@ -5163,7 +5172,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
if (slp_perm)
{
if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
slp_node_instance, false))
{
dr_chain.release ();

View File

@ -106,6 +106,9 @@ struct _slp_tree {
vec<slp_tree> children;
/* A group of scalar stmts to be vectorized together. */
vec<gimple> stmts;
/* Load permutation relative to the stores, NULL if there is no
permutation. */
vec<unsigned> load_permutation;
/* Vectorized stmt/s. */
vec<gimple> vec_stmts;
/* Number of vector stmts that are created to replace the group of scalar
@ -131,10 +134,6 @@ typedef struct _slp_instance {
/* Vectorization costs associated with SLP instance. */
stmt_vector_for_cost body_cost_vec;
/* Loads permutation relatively to the stores, NULL if there is no
permutation. */
vec<int> load_permutation;
/* The group of nodes that contain loads of this SLP instance. */
vec<slp_tree> loads;
@ -149,7 +148,6 @@ typedef struct _slp_instance {
#define SLP_INSTANCE_GROUP_SIZE(S) (S)->group_size
#define SLP_INSTANCE_UNROLLING_FACTOR(S) (S)->unrolling_factor
#define SLP_INSTANCE_BODY_COST_VEC(S) (S)->body_cost_vec
#define SLP_INSTANCE_LOAD_PERMUTATION(S) (S)->load_permutation
#define SLP_INSTANCE_LOADS(S) (S)->loads
#define SLP_INSTANCE_FIRST_LOAD_STMT(S) (S)->first_load
@ -157,6 +155,7 @@ typedef struct _slp_instance {
#define SLP_TREE_SCALAR_STMTS(S) (S)->stmts
#define SLP_TREE_VEC_STMTS(S) (S)->vec_stmts
#define SLP_TREE_NUMBER_OF_VEC_STMTS(S) (S)->vec_stmts_size
#define SLP_TREE_LOAD_PERMUTATION(S) (S)->load_permutation
/* This structure is used in creation of an SLP tree. Each instance
corresponds to the same operand in a group of scalar stmts in an SLP
@ -961,7 +960,7 @@ extern int vect_get_single_scalar_iteration_cost (loop_vec_info);
/* In tree-vect-slp.c. */
extern void vect_free_slp_instance (slp_instance);
extern bool vect_transform_slp_perm_load (gimple, vec<tree> ,
extern bool vect_transform_slp_perm_load (slp_tree, vec<tree> ,
gimple_stmt_iterator *, int,
slp_instance, bool);
extern bool vect_schedule_slp (loop_vec_info, bb_vec_info);