vect: Factor out and rename some functions/macros

Power supports vector memory access with length (in bytes) instructions.
Like existing fully masking for SVE, it is another approach to vectorize
the loop using partially-populated vectors.

As Richard Sandiford suggested, we should share the codes in approaches
with partial vectors if possible.  This patch is to:
  1) factor out two functions:
     - vect_min_prec_for_max_niters
     - vect_known_niters_smaller_than_vf.
  2) rename four functions:
     - vect_iv_limit_for_full_masking
     - check_load_store_masking
     - vect_set_loop_condition_masked
     - vect_set_loop_condition_unmasked
  3) rename macros LOOP_VINFO_MASK_COMPARE_TYPE and LOOP_VINFO_MASK_IV_TYPE.

Bootstrapped/regtested on aarch64-linux-gnu.

gcc/ChangeLog:

	* tree-vect-loop-manip.c (vect_set_loop_controls_directly): Rename
	LOOP_VINFO_MASK_COMPARE_TYPE to LOOP_VINFO_RGROUP_COMPARE_TYPE.  Rename
	LOOP_VINFO_MASK_IV_TYPE to LOOP_VINFO_RGROUP_IV_TYPE.
	(vect_set_loop_condition_masked): Renamed to ...
	(vect_set_loop_condition_partial_vectors): ... this.  Rename
	LOOP_VINFO_MASK_COMPARE_TYPE to LOOP_VINFO_RGROUP_COMPARE_TYPE.  Rename
	vect_iv_limit_for_full_masking to vect_iv_limit_for_partial_vectors.
	(vect_set_loop_condition_unmasked): Renamed to ...
	(vect_set_loop_condition_normal): ... this.
	(vect_set_loop_condition): Rename vect_set_loop_condition_unmasked to
	vect_set_loop_condition_normal.  Rename vect_set_loop_condition_masked
	to vect_set_loop_condition_partial_vectors.
	(vect_prepare_for_masked_peels): Rename LOOP_VINFO_MASK_COMPARE_TYPE
	to LOOP_VINFO_RGROUP_COMPARE_TYPE.
	* tree-vect-loop.c (vect_known_niters_smaller_than_vf): New, factored
	out from ...
	(vect_analyze_loop_costing): ... this.
	(_loop_vec_info::_loop_vec_info): Rename mask_compare_type to
	compare_type.
	(vect_min_prec_for_max_niters): New, factored out from ...
	(vect_verify_full_masking): ... this.  Rename
	vect_iv_limit_for_full_masking to vect_iv_limit_for_partial_vectors.
	Rename LOOP_VINFO_MASK_COMPARE_TYPE to LOOP_VINFO_RGROUP_COMPARE_TYPE.
	Rename LOOP_VINFO_MASK_IV_TYPE to LOOP_VINFO_RGROUP_IV_TYPE.
	(vectorizable_reduction): Update some dumpings with partial
	vectors instead of fully-masked.
	(vectorizable_live_operation): Likewise.
	(vect_iv_limit_for_full_masking): Renamed to ...
	(vect_iv_limit_for_partial_vectors): ... this.
	* tree-vect-stmts.c (check_load_store_masking): Renamed to ...
	(check_load_store_for_partial_vectors): ... this.  Update some
	dumpings with partial vectors instead of fully-masked.
	(vectorizable_store): Rename check_load_store_masking to
	check_load_store_for_partial_vectors.
	(vectorizable_load): Likewise.
	* tree-vectorizer.h (LOOP_VINFO_MASK_COMPARE_TYPE): Renamed to ...
	(LOOP_VINFO_RGROUP_COMPARE_TYPE): ... this.
	(LOOP_VINFO_MASK_IV_TYPE): Renamed to ...
	(LOOP_VINFO_RGROUP_IV_TYPE): ... this.
	(vect_iv_limit_for_full_masking): Renamed to ...
	(vect_iv_limit_for_partial_vectors): this.
	(_loop_vec_info): Rename mask_compare_type to rgroup_compare_type.
	Rename iv_type to rgroup_iv_type.
This commit is contained in:
Kewen Lin 2020-06-12 01:14:40 -05:00 committed by Kewen Lin
parent 3a179232a7
commit 04f0546e3e
4 changed files with 129 additions and 94 deletions

View File

@ -420,8 +420,8 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
rgroup_controls *rgc, tree niters,
tree niters_skip, bool might_wrap_p)
{
tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
tree iv_type = LOOP_VINFO_MASK_IV_TYPE (loop_vinfo);
tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
tree ctrl_type = rgc->type;
unsigned int nscalars_per_iter = rgc->max_nscalars_per_iter;
poly_uint64 nscalars_per_ctrl = TYPE_VECTOR_SUBPARTS (ctrl_type);
@ -644,15 +644,15 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
final gcond. */
static gcond *
vect_set_loop_condition_masked (class loop *loop, loop_vec_info loop_vinfo,
tree niters, tree final_iv,
bool niters_maybe_zero,
gimple_stmt_iterator loop_cond_gsi)
vect_set_loop_condition_partial_vectors (class loop *loop,
loop_vec_info loop_vinfo, tree niters,
tree final_iv, bool niters_maybe_zero,
gimple_stmt_iterator loop_cond_gsi)
{
gimple_seq preheader_seq = NULL;
gimple_seq header_seq = NULL;
tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
unsigned int compare_precision = TYPE_PRECISION (compare_type);
tree orig_niters = niters;
@ -678,7 +678,7 @@ vect_set_loop_condition_masked (class loop *loop, loop_vec_info loop_vinfo,
else
niters = gimple_convert (&preheader_seq, compare_type, niters);
widest_int iv_limit = vect_iv_limit_for_full_masking (loop_vinfo);
widest_int iv_limit = vect_iv_limit_for_partial_vectors (loop_vinfo);
/* Iterate over all the rgroups and fill in their controls. We could use
the first control from any rgroup for the loop condition; here we
@ -748,14 +748,13 @@ vect_set_loop_condition_masked (class loop *loop, loop_vec_info loop_vinfo,
return cond_stmt;
}
/* Like vect_set_loop_condition, but handle the case in which there
are no loop masks. */
/* Like vect_set_loop_condition, but handle the case in which the vector
loop handles exactly VF scalars per iteration. */
static gcond *
vect_set_loop_condition_unmasked (class loop *loop, tree niters,
tree step, tree final_iv,
bool niters_maybe_zero,
gimple_stmt_iterator loop_cond_gsi)
vect_set_loop_condition_normal (class loop *loop, tree niters, tree step,
tree final_iv, bool niters_maybe_zero,
gimple_stmt_iterator loop_cond_gsi)
{
tree indx_before_incr, indx_after_incr;
gcond *cond_stmt;
@ -914,13 +913,14 @@ vect_set_loop_condition (class loop *loop, loop_vec_info loop_vinfo,
gimple_stmt_iterator loop_cond_gsi = gsi_for_stmt (orig_cond);
if (loop_vinfo && LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
cond_stmt = vect_set_loop_condition_masked (loop, loop_vinfo, niters,
final_iv, niters_maybe_zero,
loop_cond_gsi);
cond_stmt = vect_set_loop_condition_partial_vectors (loop, loop_vinfo,
niters, final_iv,
niters_maybe_zero,
loop_cond_gsi);
else
cond_stmt = vect_set_loop_condition_unmasked (loop, niters, step,
final_iv, niters_maybe_zero,
loop_cond_gsi);
cond_stmt = vect_set_loop_condition_normal (loop, niters, step, final_iv,
niters_maybe_zero,
loop_cond_gsi);
/* Remove old loop exit test. */
stmt_vec_info orig_cond_info;
@ -1775,7 +1775,7 @@ void
vect_prepare_for_masked_peels (loop_vec_info loop_vinfo)
{
tree misalign_in_elems;
tree type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
tree type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
gcc_assert (vect_use_loop_mask_for_alignment_p (loop_vinfo));

View File

@ -802,7 +802,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared)
vectorization_factor (0),
max_vectorization_factor (0),
mask_skip_niters (NULL_TREE),
mask_compare_type (NULL_TREE),
rgroup_compare_type (NULL_TREE),
simd_if_cond (NULL_TREE),
unaligned_dr (NULL),
peeling_for_alignment (0),
@ -963,23 +963,17 @@ vect_get_max_nscalars_per_iter (loop_vec_info loop_vinfo)
return res;
}
/* Each statement in LOOP_VINFO can be masked where necessary. Check
whether we can actually generate the masks required. Return true if so,
storing the type of the scalar IV in LOOP_VINFO_MASK_COMPARE_TYPE. */
/* Calculate the minimum precision necessary to represent:
static bool
vect_verify_full_masking (loop_vec_info loop_vinfo)
MAX_NITERS * FACTOR
as an unsigned integer, where MAX_NITERS is the maximum number of
loop header iterations for the original scalar form of LOOP_VINFO. */
static unsigned
vect_min_prec_for_max_niters (loop_vec_info loop_vinfo, unsigned int factor)
{
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
unsigned int min_ni_width;
unsigned int max_nscalars_per_iter
= vect_get_max_nscalars_per_iter (loop_vinfo);
/* Use a normal loop if there are no statements that need masking.
This only happens in rare degenerate cases: it means that the loop
has no loads, no stores, and no live-out values. */
if (LOOP_VINFO_MASKS (loop_vinfo).is_empty ())
return false;
/* Get the maximum number of iterations that is representable
in the counter type. */
@ -991,17 +985,36 @@ vect_verify_full_masking (loop_vec_info loop_vinfo)
if (max_loop_iterations (loop, &max_back_edges))
max_ni = wi::smin (max_ni, max_back_edges + 1);
/* Account for rgroup masks, in which each bit is replicated N times. */
max_ni *= max_nscalars_per_iter;
/* Work out how many bits we need to represent the limit. */
return wi::min_precision (max_ni * factor, UNSIGNED);
}
/* Each statement in LOOP_VINFO can be masked where necessary. Check
whether we can actually generate the masks required. Return true if so,
storing the type of the scalar IV in LOOP_VINFO_RGROUP_COMPARE_TYPE. */
static bool
vect_verify_full_masking (loop_vec_info loop_vinfo)
{
unsigned int min_ni_width;
unsigned int max_nscalars_per_iter
= vect_get_max_nscalars_per_iter (loop_vinfo);
/* Use a normal loop if there are no statements that need masking.
This only happens in rare degenerate cases: it means that the loop
has no loads, no stores, and no live-out values. */
if (LOOP_VINFO_MASKS (loop_vinfo).is_empty ())
return false;
/* Work out how many bits we need to represent the limit. */
min_ni_width = wi::min_precision (max_ni, UNSIGNED);
min_ni_width
= vect_min_prec_for_max_niters (loop_vinfo, max_nscalars_per_iter);
/* Find a scalar mode for which WHILE_ULT is supported. */
opt_scalar_int_mode cmp_mode_iter;
tree cmp_type = NULL_TREE;
tree iv_type = NULL_TREE;
widest_int iv_limit = vect_iv_limit_for_full_masking (loop_vinfo);
widest_int iv_limit = vect_iv_limit_for_partial_vectors (loop_vinfo);
unsigned int iv_precision = UINT_MAX;
if (iv_limit != -1)
@ -1054,8 +1067,8 @@ vect_verify_full_masking (loop_vec_info loop_vinfo)
if (!cmp_type)
return false;
LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo) = cmp_type;
LOOP_VINFO_MASK_IV_TYPE (loop_vinfo) = iv_type;
LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo) = cmp_type;
LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo) = iv_type;
return true;
}
@ -1624,6 +1637,27 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
return opt_result::success ();
}
/* Return true if we know that the iteration count is smaller than the
vectorization factor. Return false if it isn't, or if we can't be sure
either way. */
static bool
vect_known_niters_smaller_than_vf (loop_vec_info loop_vinfo)
{
unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
HOST_WIDE_INT max_niter;
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
max_niter = LOOP_VINFO_INT_NITERS (loop_vinfo);
else
max_niter = max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
if (max_niter != -1 && (unsigned HOST_WIDE_INT) max_niter < assumed_vf)
return true;
return false;
}
/* Analyze the cost of the loop described by LOOP_VINFO. Decide if it
is worthwhile to vectorize. Return 1 if definitely yes, 0 if
definitely no, or -1 if it's worth retrying. */
@ -1638,15 +1672,7 @@ vect_analyze_loop_costing (loop_vec_info loop_vinfo)
counts less than the vectorization factor. */
if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
{
HOST_WIDE_INT max_niter;
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
max_niter = LOOP_VINFO_INT_NITERS (loop_vinfo);
else
max_niter = max_stmt_executions_int (loop);
if (max_niter != -1
&& (unsigned HOST_WIDE_INT) max_niter < assumed_vf)
if (vect_known_niters_smaller_than_vf (loop_vinfo))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@ -6794,8 +6820,8 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"can't use a fully-masked loop because no"
" conditional operation is available.\n");
"can't operate on partial vectors because"
" no conditional operation is available.\n");
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
else if (reduction_type == FOLD_LEFT_REDUCTION
@ -6806,8 +6832,8 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"can't use a fully-masked loop because no"
" conditional operation is available.\n");
"can't operate on partial vectors because"
" no conditional operation is available.\n");
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
else
@ -7886,25 +7912,26 @@ vectorizable_live_operation (loop_vec_info loop_vinfo,
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"can't use a fully-masked loop because "
"the target doesn't support extract last "
"reduction.\n");
"can't operate on partial vectors "
"because the target doesn't support extract "
"last reduction.\n");
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
else if (slp_node)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"can't use a fully-masked loop because an "
"SLP statement is live after the loop.\n");
"can't operate on partial vectors "
"because an SLP statement is live after "
"the loop.\n");
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
else if (ncopies > 1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"can't use a fully-masked loop because"
" ncopies is greater than 1.\n");
"can't operate on partial vectors "
"because ncopies is greater than 1.\n");
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
else
@ -9056,12 +9083,13 @@ optimize_mask_stores (class loop *loop)
}
/* Decide whether it is possible to use a zero-based induction variable
when vectorizing LOOP_VINFO with a fully-masked loop. If it is,
return the value that the induction variable must be able to hold
in order to ensure that the loop ends with an all-false mask.
when vectorizing LOOP_VINFO with partial vectors. If it is, return
the value that the induction variable must be able to hold in order
to ensure that the rgroups eventually have no active vector elements.
Return -1 otherwise. */
widest_int
vect_iv_limit_for_full_masking (loop_vec_info loop_vinfo)
vect_iv_limit_for_partial_vectors (loop_vec_info loop_vinfo)
{
tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);

View File

@ -1655,9 +1655,9 @@ static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
gimple_stmt_iterator *);
/* Check whether a load or store statement in the loop described by
LOOP_VINFO is possible in a fully-masked loop. This is testing
whether the vectorizer pass has the appropriate support, as well as
whether the target does.
LOOP_VINFO is possible in a loop using partial vectors. This is
testing whether the vectorizer pass has the appropriate support,
as well as whether the target does.
VLS_TYPE says whether the statement is a load or store and VECTYPE
is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
@ -1667,14 +1667,18 @@ static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
its arguments. If the load or store is conditional, SCALAR_MASK is the
condition under which it occurs.
Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a fully-masked loop is not
supported, otherwise record the required mask types. */
Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
vectors is not supported, otherwise record the required rgroup control
types. */
static void
check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
vec_load_store_type vls_type, int group_size,
vect_memory_access_type memory_access_type,
gather_scatter_info *gs_info, tree scalar_mask)
check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
vec_load_store_type vls_type,
int group_size,
vect_memory_access_type
memory_access_type,
gather_scatter_info *gs_info,
tree scalar_mask)
{
/* Invariant loads need no special support. */
if (memory_access_type == VMAT_INVARIANT)
@ -1691,8 +1695,8 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"can't use a fully-masked loop because the"
" target doesn't have an appropriate masked"
"can't operate on partial vectors because"
" the target doesn't have an appropriate"
" load/store-lanes instruction.\n");
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
return;
@ -1714,8 +1718,8 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"can't use a fully-masked loop because the"
" target doesn't have an appropriate masked"
"can't operate on partial vectors because"
" the target doesn't have an appropriate"
" gather load or scatter store instruction.\n");
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
return;
@ -1732,8 +1736,8 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
scalar loop. We need more work to support other mappings. */
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"can't use a fully-masked loop because an access"
" isn't contiguous.\n");
"can't operate on partial vectors because an"
" access isn't contiguous.\n");
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
return;
}
@ -7140,8 +7144,9 @@ vectorizable_store (vec_info *vinfo,
if (loop_vinfo
&& LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
memory_access_type, &gs_info, mask);
check_load_store_for_partial_vectors (loop_vinfo, vectype, vls_type,
group_size, memory_access_type,
&gs_info, mask);
if (slp_node
&& !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
@ -8433,8 +8438,9 @@ vectorizable_load (vec_info *vinfo,
if (loop_vinfo
&& LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
memory_access_type, &gs_info, mask);
check_load_store_for_partial_vectors (loop_vinfo, vectype, VLS_LOAD,
group_size, memory_access_type,
&gs_info, mask);
STMT_VINFO_TYPE (orig_stmt_info) = load_vec_info_type;
vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,

View File

@ -545,9 +545,10 @@ public:
elements that should be false in the first mask). */
tree mask_skip_niters;
/* Type of the variables to use in the WHILE_ULT call for fully-masked
loops. */
tree mask_compare_type;
/* The type that the loop control IV should be converted to before
testing which of the VF scalars are active and inactive.
Only meaningful if LOOP_VINFO_USING_PARTIAL_VECTORS_P. */
tree rgroup_compare_type;
/* For #pragma omp simd if (x) loops the x expression. If constant 0,
the loop should not be vectorized, if constant non-zero, simd_if_cond
@ -556,9 +557,9 @@ public:
is false and vectorized loop otherwise. */
tree simd_if_cond;
/* Type of the IV to use in the WHILE_ULT call for fully-masked
loops. */
tree iv_type;
/* The type that the vector loop control IV should have when
LOOP_VINFO_USING_PARTIAL_VECTORS_P is true. */
tree rgroup_iv_type;
/* Unknown DRs according to which loop was peeled. */
class dr_vec_info *unaligned_dr;
@ -710,8 +711,8 @@ public:
#define LOOP_VINFO_MAX_VECT_FACTOR(L) (L)->max_vectorization_factor
#define LOOP_VINFO_MASKS(L) (L)->masks
#define LOOP_VINFO_MASK_SKIP_NITERS(L) (L)->mask_skip_niters
#define LOOP_VINFO_MASK_COMPARE_TYPE(L) (L)->mask_compare_type
#define LOOP_VINFO_MASK_IV_TYPE(L) (L)->iv_type
#define LOOP_VINFO_RGROUP_COMPARE_TYPE(L) (L)->rgroup_compare_type
#define LOOP_VINFO_RGROUP_IV_TYPE(L) (L)->rgroup_iv_type
#define LOOP_VINFO_PTR_MASK(L) (L)->ptr_mask
#define LOOP_VINFO_LOOP_NEST(L) (L)->shared->loop_nest
#define LOOP_VINFO_DATAREFS(L) (L)->shared->datarefs
@ -1847,7 +1848,7 @@ extern tree vect_create_addr_base_for_vector_ref (vec_info *,
tree, tree = NULL_TREE);
/* In tree-vect-loop.c. */
extern widest_int vect_iv_limit_for_full_masking (loop_vec_info loop_vinfo);
extern widest_int vect_iv_limit_for_partial_vectors (loop_vec_info loop_vinfo);
/* Used in tree-vect-loop-manip.c */
extern void determine_peel_for_niter (loop_vec_info);
/* Used in gimple-loop-interchange.c and tree-parloops.c. */