Restore correct iv step for fully-masked loops
r272233 introduced a large number of execution failures on SVE. The patch hard-coded an IV step of VF, but for SLP groups it needs to be VF * group size. Also, iv_precision had type widest_int but only needs to be unsigned int. 2019-06-18 Richard Sandiford <richard.sandiford@arm.com> gcc/ * tree-vect-loop-manip.c (vect_set_loop_masks_directly): Remove vf parameter. Restore the previous iv step of nscalars_step, but give it iv_type rather than compare_type. Tweak code order to match the comments. (vect_set_loop_condition_masked): Update accordingly. * tree-vect-loop.c (vect_verify_full_masking): Use "unsigned int" for iv_precision. Tweak comment formatting. From-SVN: r272411
This commit is contained in:
parent
a9e47ccf26
commit
fcae0292de
@ -1,3 +1,13 @@
|
|||||||
|
2019-06-18 Richard Sandiford <richard.sandiford@arm.com>
|
||||||
|
|
||||||
|
* tree-vect-loop-manip.c (vect_set_loop_masks_directly): Remove
|
||||||
|
vf parameter. Restore the previous iv step of nscalars_step,
|
||||||
|
but give it iv_type rather than compare_type. Tweak code order
|
||||||
|
to match the comments.
|
||||||
|
(vect_set_loop_condition_masked): Update accordingly.
|
||||||
|
* tree-vect-loop.c (vect_verify_full_masking): Use "unsigned int"
|
||||||
|
for iv_precision. Tweak comment formatting.
|
||||||
|
|
||||||
2019-06-18 Iain Sandoe <iain@sandoe.co.uk>
|
2019-06-18 Iain Sandoe <iain@sandoe.co.uk>
|
||||||
|
|
||||||
* config/darwin.c: Strip trailing whitespace.
|
* config/darwin.c: Strip trailing whitespace.
|
||||||
|
@ -382,8 +382,7 @@ vect_maybe_permute_loop_masks (gimple_seq *seq, rgroup_masks *dest_rgm,
|
|||||||
Use LOOP_COND_GSI to insert code before the exit gcond.
|
Use LOOP_COND_GSI to insert code before the exit gcond.
|
||||||
|
|
||||||
RGM belongs to loop LOOP. The loop originally iterated NITERS
|
RGM belongs to loop LOOP. The loop originally iterated NITERS
|
||||||
times and has been vectorized according to LOOP_VINFO. Each iteration
|
times and has been vectorized according to LOOP_VINFO.
|
||||||
of the vectorized loop handles VF iterations of the scalar loop.
|
|
||||||
|
|
||||||
If NITERS_SKIP is nonnull, the first iteration of the vectorized loop
|
If NITERS_SKIP is nonnull, the first iteration of the vectorized loop
|
||||||
starts with NITERS_SKIP dummy iterations of the scalar loop before
|
starts with NITERS_SKIP dummy iterations of the scalar loop before
|
||||||
@ -410,8 +409,7 @@ static tree
|
|||||||
vect_set_loop_masks_directly (struct loop *loop, loop_vec_info loop_vinfo,
|
vect_set_loop_masks_directly (struct loop *loop, loop_vec_info loop_vinfo,
|
||||||
gimple_seq *preheader_seq,
|
gimple_seq *preheader_seq,
|
||||||
gimple_stmt_iterator loop_cond_gsi,
|
gimple_stmt_iterator loop_cond_gsi,
|
||||||
rgroup_masks *rgm, tree vf,
|
rgroup_masks *rgm, tree niters, tree niters_skip,
|
||||||
tree niters, tree niters_skip,
|
|
||||||
bool might_wrap_p)
|
bool might_wrap_p)
|
||||||
{
|
{
|
||||||
tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
|
tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
|
||||||
@ -419,26 +417,28 @@ vect_set_loop_masks_directly (struct loop *loop, loop_vec_info loop_vinfo,
|
|||||||
tree mask_type = rgm->mask_type;
|
tree mask_type = rgm->mask_type;
|
||||||
unsigned int nscalars_per_iter = rgm->max_nscalars_per_iter;
|
unsigned int nscalars_per_iter = rgm->max_nscalars_per_iter;
|
||||||
poly_uint64 nscalars_per_mask = TYPE_VECTOR_SUBPARTS (mask_type);
|
poly_uint64 nscalars_per_mask = TYPE_VECTOR_SUBPARTS (mask_type);
|
||||||
|
poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
|
||||||
|
|
||||||
/* Calculate the maximum number of scalar values that the rgroup
|
/* Calculate the maximum number of scalar values that the rgroup
|
||||||
handles in total, the number that it handles for each iteration
|
handles in total, the number that it handles for each iteration
|
||||||
of the vector loop, and the number that it should skip during the
|
of the vector loop, and the number that it should skip during the
|
||||||
first iteration of the vector loop. */
|
first iteration of the vector loop. */
|
||||||
tree nscalars_total = niters;
|
tree nscalars_total = niters;
|
||||||
tree nscalars_step = vf;
|
tree nscalars_step = build_int_cst (iv_type, vf);
|
||||||
tree nscalars_skip = niters_skip;
|
tree nscalars_skip = niters_skip;
|
||||||
if (nscalars_per_iter != 1)
|
if (nscalars_per_iter != 1)
|
||||||
{
|
{
|
||||||
/* We checked before choosing to use a fully-masked loop that these
|
/* We checked before choosing to use a fully-masked loop that these
|
||||||
multiplications don't overflow. */
|
multiplications don't overflow. */
|
||||||
tree factor = build_int_cst (compare_type, nscalars_per_iter);
|
tree compare_factor = build_int_cst (compare_type, nscalars_per_iter);
|
||||||
|
tree iv_factor = build_int_cst (iv_type, nscalars_per_iter);
|
||||||
nscalars_total = gimple_build (preheader_seq, MULT_EXPR, compare_type,
|
nscalars_total = gimple_build (preheader_seq, MULT_EXPR, compare_type,
|
||||||
nscalars_total, factor);
|
nscalars_total, compare_factor);
|
||||||
nscalars_step = gimple_build (preheader_seq, MULT_EXPR, compare_type,
|
nscalars_step = gimple_build (preheader_seq, MULT_EXPR, iv_type,
|
||||||
nscalars_step, factor);
|
nscalars_step, iv_factor);
|
||||||
if (nscalars_skip)
|
if (nscalars_skip)
|
||||||
nscalars_skip = gimple_build (preheader_seq, MULT_EXPR, compare_type,
|
nscalars_skip = gimple_build (preheader_seq, MULT_EXPR, compare_type,
|
||||||
nscalars_skip, factor);
|
nscalars_skip, compare_factor);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Create an induction variable that counts the number of scalars
|
/* Create an induction variable that counts the number of scalars
|
||||||
@ -447,15 +447,10 @@ vect_set_loop_masks_directly (struct loop *loop, loop_vec_info loop_vinfo,
|
|||||||
gimple_stmt_iterator incr_gsi;
|
gimple_stmt_iterator incr_gsi;
|
||||||
bool insert_after;
|
bool insert_after;
|
||||||
standard_iv_increment_position (loop, &incr_gsi, &insert_after);
|
standard_iv_increment_position (loop, &incr_gsi, &insert_after);
|
||||||
|
create_iv (build_int_cst (iv_type, 0), nscalars_step, NULL_TREE, loop,
|
||||||
|
&incr_gsi, insert_after, &index_before_incr, &index_after_incr);
|
||||||
|
|
||||||
tree zero_index = build_int_cst (iv_type, 0);
|
tree zero_index = build_int_cst (compare_type, 0);
|
||||||
tree step = build_int_cst (iv_type,
|
|
||||||
LOOP_VINFO_VECT_FACTOR (loop_vinfo));
|
|
||||||
/* Create IV of iv_type. */
|
|
||||||
create_iv (zero_index, step, NULL_TREE, loop, &incr_gsi,
|
|
||||||
insert_after, &index_before_incr, &index_after_incr);
|
|
||||||
|
|
||||||
zero_index = build_int_cst (compare_type, 0);
|
|
||||||
tree test_index, test_limit, first_limit;
|
tree test_index, test_limit, first_limit;
|
||||||
gimple_stmt_iterator *test_gsi;
|
gimple_stmt_iterator *test_gsi;
|
||||||
if (might_wrap_p)
|
if (might_wrap_p)
|
||||||
@ -487,7 +482,8 @@ vect_set_loop_masks_directly (struct loop *loop, loop_vec_info loop_vinfo,
|
|||||||
where the rightmost subtraction can be done directly in
|
where the rightmost subtraction can be done directly in
|
||||||
COMPARE_TYPE. */
|
COMPARE_TYPE. */
|
||||||
test_index = index_before_incr;
|
test_index = index_before_incr;
|
||||||
tree adjust = nscalars_step;
|
tree adjust = gimple_convert (preheader_seq, compare_type,
|
||||||
|
nscalars_step);
|
||||||
if (nscalars_skip)
|
if (nscalars_skip)
|
||||||
adjust = gimple_build (preheader_seq, MINUS_EXPR, compare_type,
|
adjust = gimple_build (preheader_seq, MINUS_EXPR, compare_type,
|
||||||
adjust, nscalars_skip);
|
adjust, nscalars_skip);
|
||||||
@ -531,14 +527,16 @@ vect_set_loop_masks_directly (struct loop *loop, loop_vec_info loop_vinfo,
|
|||||||
first_limit = test_limit;
|
first_limit = test_limit;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Provide a definition of each mask in the group. */
|
/* Convert the IV value to the comparison type (either a no-op or
|
||||||
tree next_mask = NULL_TREE;
|
a demotion). */
|
||||||
tree mask;
|
|
||||||
unsigned int i;
|
|
||||||
gimple_seq test_seq = NULL;
|
gimple_seq test_seq = NULL;
|
||||||
test_index = gimple_convert (&test_seq, compare_type, test_index);
|
test_index = gimple_convert (&test_seq, compare_type, test_index);
|
||||||
gsi_insert_seq_before (test_gsi, test_seq, GSI_SAME_STMT);
|
gsi_insert_seq_before (test_gsi, test_seq, GSI_SAME_STMT);
|
||||||
|
|
||||||
|
/* Provide a definition of each mask in the group. */
|
||||||
|
tree next_mask = NULL_TREE;
|
||||||
|
tree mask;
|
||||||
|
unsigned int i;
|
||||||
FOR_EACH_VEC_ELT_REVERSE (rgm->masks, i, mask)
|
FOR_EACH_VEC_ELT_REVERSE (rgm->masks, i, mask)
|
||||||
{
|
{
|
||||||
/* Previous masks will cover BIAS scalars. This mask covers the
|
/* Previous masks will cover BIAS scalars. This mask covers the
|
||||||
@ -672,9 +670,6 @@ vect_set_loop_condition_masked (struct loop *loop, loop_vec_info loop_vinfo,
|
|||||||
niters = gimple_convert (&preheader_seq, compare_type, niters);
|
niters = gimple_convert (&preheader_seq, compare_type, niters);
|
||||||
|
|
||||||
widest_int iv_limit = vect_iv_limit_for_full_masking (loop_vinfo);
|
widest_int iv_limit = vect_iv_limit_for_full_masking (loop_vinfo);
|
||||||
/* Get the vectorization factor in tree form. */
|
|
||||||
tree vf = build_int_cst (compare_type,
|
|
||||||
LOOP_VINFO_VECT_FACTOR (loop_vinfo));
|
|
||||||
|
|
||||||
/* Iterate over all the rgroups and fill in their masks. We could use
|
/* Iterate over all the rgroups and fill in their masks. We could use
|
||||||
the first mask from any rgroup for the loop condition; here we
|
the first mask from any rgroup for the loop condition; here we
|
||||||
@ -709,7 +704,7 @@ vect_set_loop_condition_masked (struct loop *loop, loop_vec_info loop_vinfo,
|
|||||||
/* Set up all masks for this group. */
|
/* Set up all masks for this group. */
|
||||||
test_mask = vect_set_loop_masks_directly (loop, loop_vinfo,
|
test_mask = vect_set_loop_masks_directly (loop, loop_vinfo,
|
||||||
&preheader_seq,
|
&preheader_seq,
|
||||||
loop_cond_gsi, rgm, vf,
|
loop_cond_gsi, rgm,
|
||||||
niters, niters_skip,
|
niters, niters_skip,
|
||||||
might_wrap_p);
|
might_wrap_p);
|
||||||
}
|
}
|
||||||
|
@ -1062,7 +1062,7 @@ vect_verify_full_masking (loop_vec_info loop_vinfo)
|
|||||||
tree cmp_type = NULL_TREE;
|
tree cmp_type = NULL_TREE;
|
||||||
tree iv_type = NULL_TREE;
|
tree iv_type = NULL_TREE;
|
||||||
widest_int iv_limit = vect_iv_limit_for_full_masking (loop_vinfo);
|
widest_int iv_limit = vect_iv_limit_for_full_masking (loop_vinfo);
|
||||||
widest_int iv_precision = UINT_MAX;
|
unsigned int iv_precision = UINT_MAX;
|
||||||
|
|
||||||
if (iv_limit != -1)
|
if (iv_limit != -1)
|
||||||
iv_precision = wi::min_precision (iv_limit * max_nscalars_per_iter,
|
iv_precision = wi::min_precision (iv_limit * max_nscalars_per_iter,
|
||||||
@ -1083,12 +1083,12 @@ vect_verify_full_masking (loop_vec_info loop_vinfo)
|
|||||||
best choice:
|
best choice:
|
||||||
|
|
||||||
- An IV that's Pmode or wider is more likely to be reusable
|
- An IV that's Pmode or wider is more likely to be reusable
|
||||||
in address calculations than an IV that's narrower than
|
in address calculations than an IV that's narrower than
|
||||||
Pmode.
|
Pmode.
|
||||||
|
|
||||||
- Doing the comparison in IV_PRECISION or wider allows
|
- Doing the comparison in IV_PRECISION or wider allows
|
||||||
a natural 0-based IV, whereas using a narrower comparison
|
a natural 0-based IV, whereas using a narrower comparison
|
||||||
type requires mitigations against wrap-around.
|
type requires mitigations against wrap-around.
|
||||||
|
|
||||||
Conversely, if the IV limit is variable, doing the comparison
|
Conversely, if the IV limit is variable, doing the comparison
|
||||||
in a wider type than the original type can introduce
|
in a wider type than the original type can introduce
|
||||||
|
Loading…
Reference in New Issue
Block a user