Extend can_duplicate_and_interleave_p to mixed-size vectors
This patch makes can_duplicate_and_interleave_p cope with mixtures of vector sizes, by using queries based on get_vectype_for_scalar_type instead of directly querying GET_MODE_SIZE (vinfo->vector_mode). int_mode_for_size is now the first check we do for a candidate mode, so it seemed better to restrict it to MAX_FIXED_MODE_SIZE. This avoids unnecessary work and avoids trying to create scalar types that the target might not support. 2019-11-16 Richard Sandiford <richard.sandiford@arm.com> gcc/ * tree-vectorizer.h (can_duplicate_and_interleave_p): Take an element type rather than an element mode. * tree-vect-slp.c (can_duplicate_and_interleave_p): Likewise. Use get_vectype_for_scalar_type to query the natural types for a given element type rather than basing everything on GET_MODE_SIZE (vinfo->vector_mode). Limit int_mode_for_size query to MAX_FIXED_MODE_SIZE. (duplicate_and_interleave): Update call accordingly. * tree-vect-loop.c (vectorizable_reduction): Likewise. From-SVN: r278335
This commit is contained in:
parent
9b75f56d4b
commit
f884cd2fea
@ -1,3 +1,15 @@
|
||||
2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
* tree-vectorizer.h (can_duplicate_and_interleave_p): Take an
|
||||
element type rather than an element mode.
|
||||
* tree-vect-slp.c (can_duplicate_and_interleave_p): Likewise.
|
||||
Use get_vectype_for_scalar_type to query the natural types
|
||||
for a given element type rather than basing everything on
|
||||
GET_MODE_SIZE (vinfo->vector_mode). Limit int_mode_for_size
|
||||
query to MAX_FIXED_MODE_SIZE.
|
||||
(duplicate_and_interleave): Update call accordingly.
|
||||
* tree-vect-loop.c (vectorizable_reduction): Likewise.
|
||||
|
||||
2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
* tree-vectorizer.h (vect_get_vector_types_for_stmt): Take an
|
||||
|
@ -6365,10 +6365,9 @@ vectorizable_reduction (stmt_vec_info stmt_info, slp_tree slp_node,
|
||||
that value needs to be repeated for every instance of the
|
||||
statement within the initial vector. */
|
||||
unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
|
||||
scalar_mode elt_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype_out));
|
||||
if (!neutral_op
|
||||
&& !can_duplicate_and_interleave_p (loop_vinfo, group_size,
|
||||
elt_mode))
|
||||
TREE_TYPE (vectype_out)))
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
|
@ -264,7 +264,7 @@ vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info,
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Check whether it is possible to load COUNT elements of type ELT_MODE
|
||||
/* Check whether it is possible to load COUNT elements of type ELT_TYPE
|
||||
using the method implemented by duplicate_and_interleave. Return true
|
||||
if so, returning the number of intermediate vectors in *NVECTORS_OUT
|
||||
(if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
|
||||
@ -272,26 +272,37 @@ vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info,
|
||||
|
||||
bool
|
||||
can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count,
|
||||
machine_mode elt_mode,
|
||||
unsigned int *nvectors_out,
|
||||
tree elt_type, unsigned int *nvectors_out,
|
||||
tree *vector_type_out,
|
||||
tree *permutes)
|
||||
{
|
||||
poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
|
||||
poly_int64 nelts;
|
||||
tree base_vector_type = get_vectype_for_scalar_type (vinfo, elt_type, count);
|
||||
if (!base_vector_type || !VECTOR_MODE_P (TYPE_MODE (base_vector_type)))
|
||||
return false;
|
||||
|
||||
machine_mode base_vector_mode = TYPE_MODE (base_vector_type);
|
||||
poly_int64 elt_bytes = count * GET_MODE_UNIT_SIZE (base_vector_mode);
|
||||
unsigned int nvectors = 1;
|
||||
for (;;)
|
||||
{
|
||||
scalar_int_mode int_mode;
|
||||
poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
|
||||
if (multiple_p (GET_MODE_SIZE (vinfo->vector_mode), elt_bytes, &nelts)
|
||||
&& int_mode_for_size (elt_bits, 0).exists (&int_mode))
|
||||
if (int_mode_for_size (elt_bits, 1).exists (&int_mode))
|
||||
{
|
||||
/* Get the natural vector type for this SLP group size. */
|
||||
tree int_type = build_nonstandard_integer_type
|
||||
(GET_MODE_BITSIZE (int_mode), 1);
|
||||
tree vector_type = build_vector_type (int_type, nelts);
|
||||
if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
|
||||
tree vector_type
|
||||
= get_vectype_for_scalar_type (vinfo, int_type, count);
|
||||
if (vector_type
|
||||
&& VECTOR_MODE_P (TYPE_MODE (vector_type))
|
||||
&& known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type)),
|
||||
GET_MODE_SIZE (base_vector_mode)))
|
||||
{
|
||||
/* Try fusing consecutive sequences of COUNT / NVECTORS elements
|
||||
together into elements of type INT_TYPE and using the result
|
||||
to build NVECTORS vectors. */
|
||||
poly_uint64 nelts = GET_MODE_NUNITS (TYPE_MODE (vector_type));
|
||||
vec_perm_builder sel1 (nelts, 2, 3);
|
||||
vec_perm_builder sel2 (nelts, 2, 3);
|
||||
poly_int64 half_nelts = exact_div (nelts, 2);
|
||||
@ -491,7 +502,7 @@ again:
|
||||
&& !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
|
||||
&& (TREE_CODE (type) == BOOLEAN_TYPE
|
||||
|| !can_duplicate_and_interleave_p (vinfo, stmts.length (),
|
||||
TYPE_MODE (type))))
|
||||
type)))
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
@ -3552,7 +3563,7 @@ duplicate_and_interleave (vec_info *vinfo, gimple_seq *seq, tree vector_type,
|
||||
unsigned int nvectors = 1;
|
||||
tree new_vector_type;
|
||||
tree permutes[2];
|
||||
if (!can_duplicate_and_interleave_p (vinfo, nelts, TYPE_MODE (element_type),
|
||||
if (!can_duplicate_and_interleave_p (vinfo, nelts, element_type,
|
||||
&nvectors, &new_vector_type,
|
||||
permutes))
|
||||
gcc_unreachable ();
|
||||
|
@ -1795,8 +1795,7 @@ extern void vect_get_slp_defs (slp_tree, vec<vec<tree> > *, unsigned n = -1U);
|
||||
extern bool vect_slp_bb (basic_block);
|
||||
extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree);
|
||||
extern bool is_simple_and_all_uses_invariant (stmt_vec_info, loop_vec_info);
|
||||
extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int,
|
||||
machine_mode,
|
||||
extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int, tree,
|
||||
unsigned int * = NULL,
|
||||
tree * = NULL, tree * = NULL);
|
||||
extern void duplicate_and_interleave (vec_info *, gimple_seq *, tree,
|
||||
|
Loading…
Reference in New Issue
Block a user