diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c index 806db84cb61..458a6675c47 100644 --- a/gcc/tree-vect-loop-manip.c +++ b/gcc/tree-vect-loop-manip.c @@ -420,8 +420,8 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo, rgroup_controls *rgc, tree niters, tree niters_skip, bool might_wrap_p) { - tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo); - tree iv_type = LOOP_VINFO_MASK_IV_TYPE (loop_vinfo); + tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo); + tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo); tree ctrl_type = rgc->type; unsigned int nscalars_per_iter = rgc->max_nscalars_per_iter; poly_uint64 nscalars_per_ctrl = TYPE_VECTOR_SUBPARTS (ctrl_type); @@ -644,15 +644,15 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo, final gcond. */ static gcond * -vect_set_loop_condition_masked (class loop *loop, loop_vec_info loop_vinfo, - tree niters, tree final_iv, - bool niters_maybe_zero, - gimple_stmt_iterator loop_cond_gsi) +vect_set_loop_condition_partial_vectors (class loop *loop, + loop_vec_info loop_vinfo, tree niters, + tree final_iv, bool niters_maybe_zero, + gimple_stmt_iterator loop_cond_gsi) { gimple_seq preheader_seq = NULL; gimple_seq header_seq = NULL; - tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo); + tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo); unsigned int compare_precision = TYPE_PRECISION (compare_type); tree orig_niters = niters; @@ -678,7 +678,7 @@ vect_set_loop_condition_masked (class loop *loop, loop_vec_info loop_vinfo, else niters = gimple_convert (&preheader_seq, compare_type, niters); - widest_int iv_limit = vect_iv_limit_for_full_masking (loop_vinfo); + widest_int iv_limit = vect_iv_limit_for_partial_vectors (loop_vinfo); /* Iterate over all the rgroups and fill in their controls. We could use the first control from any rgroup for the loop condition; here we @@ -748,14 +748,13 @@ vect_set_loop_condition_masked (class loop *loop, loop_vec_info loop_vinfo, return cond_stmt; } -/* Like vect_set_loop_condition, but handle the case in which there - are no loop masks. */ +/* Like vect_set_loop_condition, but handle the case in which the vector + loop handles exactly VF scalars per iteration. */ static gcond * -vect_set_loop_condition_unmasked (class loop *loop, tree niters, - tree step, tree final_iv, - bool niters_maybe_zero, - gimple_stmt_iterator loop_cond_gsi) +vect_set_loop_condition_normal (class loop *loop, tree niters, tree step, + tree final_iv, bool niters_maybe_zero, + gimple_stmt_iterator loop_cond_gsi) { tree indx_before_incr, indx_after_incr; gcond *cond_stmt; @@ -914,13 +913,14 @@ vect_set_loop_condition (class loop *loop, loop_vec_info loop_vinfo, gimple_stmt_iterator loop_cond_gsi = gsi_for_stmt (orig_cond); if (loop_vinfo && LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)) - cond_stmt = vect_set_loop_condition_masked (loop, loop_vinfo, niters, - final_iv, niters_maybe_zero, - loop_cond_gsi); + cond_stmt = vect_set_loop_condition_partial_vectors (loop, loop_vinfo, + niters, final_iv, + niters_maybe_zero, + loop_cond_gsi); else - cond_stmt = vect_set_loop_condition_unmasked (loop, niters, step, - final_iv, niters_maybe_zero, - loop_cond_gsi); + cond_stmt = vect_set_loop_condition_normal (loop, niters, step, final_iv, + niters_maybe_zero, + loop_cond_gsi); /* Remove old loop exit test. */ stmt_vec_info orig_cond_info; @@ -1775,7 +1775,7 @@ void vect_prepare_for_masked_peels (loop_vec_info loop_vinfo) { tree misalign_in_elems; - tree type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo); + tree type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo); gcc_assert (vect_use_loop_mask_for_alignment_p (loop_vinfo)); diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index f4d47e05bd4..6311e795204 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -802,7 +802,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared) vectorization_factor (0), max_vectorization_factor (0), mask_skip_niters (NULL_TREE), - mask_compare_type (NULL_TREE), + rgroup_compare_type (NULL_TREE), simd_if_cond (NULL_TREE), unaligned_dr (NULL), peeling_for_alignment (0), @@ -963,23 +963,17 @@ vect_get_max_nscalars_per_iter (loop_vec_info loop_vinfo) return res; } -/* Each statement in LOOP_VINFO can be masked where necessary. Check - whether we can actually generate the masks required. Return true if so, - storing the type of the scalar IV in LOOP_VINFO_MASK_COMPARE_TYPE. */ +/* Calculate the minimum precision necessary to represent: -static bool -vect_verify_full_masking (loop_vec_info loop_vinfo) + MAX_NITERS * FACTOR + + as an unsigned integer, where MAX_NITERS is the maximum number of + loop header iterations for the original scalar form of LOOP_VINFO. */ + +static unsigned +vect_min_prec_for_max_niters (loop_vec_info loop_vinfo, unsigned int factor) { class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - unsigned int min_ni_width; - unsigned int max_nscalars_per_iter - = vect_get_max_nscalars_per_iter (loop_vinfo); - - /* Use a normal loop if there are no statements that need masking. - This only happens in rare degenerate cases: it means that the loop - has no loads, no stores, and no live-out values. */ - if (LOOP_VINFO_MASKS (loop_vinfo).is_empty ()) - return false; /* Get the maximum number of iterations that is representable in the counter type. */ @@ -991,17 +985,36 @@ vect_verify_full_masking (loop_vec_info loop_vinfo) if (max_loop_iterations (loop, &max_back_edges)) max_ni = wi::smin (max_ni, max_back_edges + 1); - /* Account for rgroup masks, in which each bit is replicated N times. */ - max_ni *= max_nscalars_per_iter; + /* Work out how many bits we need to represent the limit. */ + return wi::min_precision (max_ni * factor, UNSIGNED); +} + +/* Each statement in LOOP_VINFO can be masked where necessary. Check + whether we can actually generate the masks required. Return true if so, + storing the type of the scalar IV in LOOP_VINFO_RGROUP_COMPARE_TYPE. */ + +static bool +vect_verify_full_masking (loop_vec_info loop_vinfo) +{ + unsigned int min_ni_width; + unsigned int max_nscalars_per_iter + = vect_get_max_nscalars_per_iter (loop_vinfo); + + /* Use a normal loop if there are no statements that need masking. + This only happens in rare degenerate cases: it means that the loop + has no loads, no stores, and no live-out values. */ + if (LOOP_VINFO_MASKS (loop_vinfo).is_empty ()) + return false; /* Work out how many bits we need to represent the limit. */ - min_ni_width = wi::min_precision (max_ni, UNSIGNED); + min_ni_width + = vect_min_prec_for_max_niters (loop_vinfo, max_nscalars_per_iter); /* Find a scalar mode for which WHILE_ULT is supported. */ opt_scalar_int_mode cmp_mode_iter; tree cmp_type = NULL_TREE; tree iv_type = NULL_TREE; - widest_int iv_limit = vect_iv_limit_for_full_masking (loop_vinfo); + widest_int iv_limit = vect_iv_limit_for_partial_vectors (loop_vinfo); unsigned int iv_precision = UINT_MAX; if (iv_limit != -1) @@ -1054,8 +1067,8 @@ vect_verify_full_masking (loop_vec_info loop_vinfo) if (!cmp_type) return false; - LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo) = cmp_type; - LOOP_VINFO_MASK_IV_TYPE (loop_vinfo) = iv_type; + LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo) = cmp_type; + LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo) = iv_type; return true; } @@ -1624,6 +1637,27 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo) return opt_result::success (); } +/* Return true if we know that the iteration count is smaller than the + vectorization factor. Return false if it isn't, or if we can't be sure + either way. */ + +static bool +vect_known_niters_smaller_than_vf (loop_vec_info loop_vinfo) +{ + unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo); + + HOST_WIDE_INT max_niter; + if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) + max_niter = LOOP_VINFO_INT_NITERS (loop_vinfo); + else + max_niter = max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo)); + + if (max_niter != -1 && (unsigned HOST_WIDE_INT) max_niter < assumed_vf) + return true; + + return false; +} + /* Analyze the cost of the loop described by LOOP_VINFO. Decide if it is worthwhile to vectorize. Return 1 if definitely yes, 0 if definitely no, or -1 if it's worth retrying. */ @@ -1638,15 +1672,7 @@ vect_analyze_loop_costing (loop_vec_info loop_vinfo) counts less than the vectorization factor. */ if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)) { - HOST_WIDE_INT max_niter; - - if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) - max_niter = LOOP_VINFO_INT_NITERS (loop_vinfo); - else - max_niter = max_stmt_executions_int (loop); - - if (max_niter != -1 - && (unsigned HOST_WIDE_INT) max_niter < assumed_vf) + if (vect_known_niters_smaller_than_vf (loop_vinfo)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -6794,8 +6820,8 @@ vectorizable_reduction (loop_vec_info loop_vinfo, { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "can't use a fully-masked loop because no" - " conditional operation is available.\n"); + "can't operate on partial vectors because" + " no conditional operation is available.\n"); LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; } else if (reduction_type == FOLD_LEFT_REDUCTION @@ -6806,8 +6832,8 @@ vectorizable_reduction (loop_vec_info loop_vinfo, { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "can't use a fully-masked loop because no" - " conditional operation is available.\n"); + "can't operate on partial vectors because" + " no conditional operation is available.\n"); LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; } else @@ -7886,25 +7912,26 @@ vectorizable_live_operation (loop_vec_info loop_vinfo, { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "can't use a fully-masked loop because " - "the target doesn't support extract last " - "reduction.\n"); + "can't operate on partial vectors " + "because the target doesn't support extract " + "last reduction.\n"); LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; } else if (slp_node) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "can't use a fully-masked loop because an " - "SLP statement is live after the loop.\n"); + "can't operate on partial vectors " + "because an SLP statement is live after " + "the loop.\n"); LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; } else if (ncopies > 1) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "can't use a fully-masked loop because" - " ncopies is greater than 1.\n"); + "can't operate on partial vectors " + "because ncopies is greater than 1.\n"); LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; } else @@ -9056,12 +9083,13 @@ optimize_mask_stores (class loop *loop) } /* Decide whether it is possible to use a zero-based induction variable - when vectorizing LOOP_VINFO with a fully-masked loop. If it is, - return the value that the induction variable must be able to hold - in order to ensure that the loop ends with an all-false mask. + when vectorizing LOOP_VINFO with partial vectors. If it is, return + the value that the induction variable must be able to hold in order + to ensure that the rgroups eventually have no active vector elements. Return -1 otherwise. */ + widest_int -vect_iv_limit_for_full_masking (loop_vec_info loop_vinfo) +vect_iv_limit_for_partial_vectors (loop_vec_info loop_vinfo) { tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo); class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 71ee83176c5..cdd6f6c5e5d 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -1655,9 +1655,9 @@ static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info, gimple_stmt_iterator *); /* Check whether a load or store statement in the loop described by - LOOP_VINFO is possible in a fully-masked loop. This is testing - whether the vectorizer pass has the appropriate support, as well as - whether the target does. + LOOP_VINFO is possible in a loop using partial vectors. This is + testing whether the vectorizer pass has the appropriate support, + as well as whether the target does. VLS_TYPE says whether the statement is a load or store and VECTYPE is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE @@ -1667,14 +1667,18 @@ static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info, its arguments. If the load or store is conditional, SCALAR_MASK is the condition under which it occurs. - Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a fully-masked loop is not - supported, otherwise record the required mask types. */ + Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial + vectors is not supported, otherwise record the required rgroup control + types. */ static void -check_load_store_masking (loop_vec_info loop_vinfo, tree vectype, - vec_load_store_type vls_type, int group_size, - vect_memory_access_type memory_access_type, - gather_scatter_info *gs_info, tree scalar_mask) +check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype, + vec_load_store_type vls_type, + int group_size, + vect_memory_access_type + memory_access_type, + gather_scatter_info *gs_info, + tree scalar_mask) { /* Invariant loads need no special support. */ if (memory_access_type == VMAT_INVARIANT) @@ -1691,8 +1695,8 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype, { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "can't use a fully-masked loop because the" - " target doesn't have an appropriate masked" + "can't operate on partial vectors because" + " the target doesn't have an appropriate" " load/store-lanes instruction.\n"); LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; return; @@ -1714,8 +1718,8 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype, { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "can't use a fully-masked loop because the" - " target doesn't have an appropriate masked" + "can't operate on partial vectors because" + " the target doesn't have an appropriate" " gather load or scatter store instruction.\n"); LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; return; @@ -1732,8 +1736,8 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype, scalar loop. We need more work to support other mappings. */ if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "can't use a fully-masked loop because an access" - " isn't contiguous.\n"); + "can't operate on partial vectors because an" + " access isn't contiguous.\n"); LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; return; } @@ -7140,8 +7144,9 @@ vectorizable_store (vec_info *vinfo, if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) - check_load_store_masking (loop_vinfo, vectype, vls_type, group_size, - memory_access_type, &gs_info, mask); + check_load_store_for_partial_vectors (loop_vinfo, vectype, vls_type, + group_size, memory_access_type, + &gs_info, mask); if (slp_node && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0], @@ -8433,8 +8438,9 @@ vectorizable_load (vec_info *vinfo, if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) - check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size, - memory_access_type, &gs_info, mask); + check_load_store_for_partial_vectors (loop_vinfo, vectype, VLS_LOAD, + group_size, memory_access_type, + &gs_info, mask); STMT_VINFO_TYPE (orig_stmt_info) = load_vec_info_type; vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type, diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 828a54104a6..6c830ad09f4 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -545,9 +545,10 @@ public: elements that should be false in the first mask). */ tree mask_skip_niters; - /* Type of the variables to use in the WHILE_ULT call for fully-masked - loops. */ - tree mask_compare_type; + /* The type that the loop control IV should be converted to before + testing which of the VF scalars are active and inactive. + Only meaningful if LOOP_VINFO_USING_PARTIAL_VECTORS_P. */ + tree rgroup_compare_type; /* For #pragma omp simd if (x) loops the x expression. If constant 0, the loop should not be vectorized, if constant non-zero, simd_if_cond @@ -556,9 +557,9 @@ public: is false and vectorized loop otherwise. */ tree simd_if_cond; - /* Type of the IV to use in the WHILE_ULT call for fully-masked - loops. */ - tree iv_type; + /* The type that the vector loop control IV should have when + LOOP_VINFO_USING_PARTIAL_VECTORS_P is true. */ + tree rgroup_iv_type; /* Unknown DRs according to which loop was peeled. */ class dr_vec_info *unaligned_dr; @@ -710,8 +711,8 @@ public: #define LOOP_VINFO_MAX_VECT_FACTOR(L) (L)->max_vectorization_factor #define LOOP_VINFO_MASKS(L) (L)->masks #define LOOP_VINFO_MASK_SKIP_NITERS(L) (L)->mask_skip_niters -#define LOOP_VINFO_MASK_COMPARE_TYPE(L) (L)->mask_compare_type -#define LOOP_VINFO_MASK_IV_TYPE(L) (L)->iv_type +#define LOOP_VINFO_RGROUP_COMPARE_TYPE(L) (L)->rgroup_compare_type +#define LOOP_VINFO_RGROUP_IV_TYPE(L) (L)->rgroup_iv_type #define LOOP_VINFO_PTR_MASK(L) (L)->ptr_mask #define LOOP_VINFO_LOOP_NEST(L) (L)->shared->loop_nest #define LOOP_VINFO_DATAREFS(L) (L)->shared->datarefs @@ -1847,7 +1848,7 @@ extern tree vect_create_addr_base_for_vector_ref (vec_info *, tree, tree = NULL_TREE); /* In tree-vect-loop.c. */ -extern widest_int vect_iv_limit_for_full_masking (loop_vec_info loop_vinfo); +extern widest_int vect_iv_limit_for_partial_vectors (loop_vec_info loop_vinfo); /* Used in tree-vect-loop-manip.c */ extern void determine_peel_for_niter (loop_vec_info); /* Used in gimple-loop-interchange.c and tree-parloops.c. */