tree-vect-stmts.c (enum scan_store_kind): New type.

* tree-vect-stmts.c (enum scan_store_kind): New type.
	(scan_store_can_perm_p): Change last argument from int * to
	vec<enum scan_store_kind> *, record precisely which permutations
	need whole vector left shift or that plus VEC_COND_EXPR.
	(vectorizable_scan_store): Adjust caller, use whole vector left shift
	and additional VEC_COND_EXPR only for those iterations that need it.

From-SVN: r272497
This commit is contained in:
Jakub Jelinek 2019-06-20 09:06:08 +02:00 committed by Jakub Jelinek
parent 170f2d181d
commit 48b63a31f8
2 changed files with 83 additions and 42 deletions

View File

@ -1,3 +1,12 @@
2019-06-20 Jakub Jelinek <jakub@redhat.com>
* tree-vect-stmts.c (enum scan_store_kind): New type.
(scan_store_can_perm_p): Change last argument from int * to
vec<enum scan_store_kind> *, record precisely which permutations
need whole vector left shift or that plus VEC_COND_EXPR.
(vectorizable_scan_store): Adjust caller, use whole vector left shift
and additional VEC_COND_EXPR only for those iterations that need it.
2019-06-20 Alexandre Oliva <oliva@adacore.com>
* config.gcc: Fix ARM --with-fpu checking and error message.

View File

@ -6354,13 +6354,27 @@ scan_operand_equal_p (tree ref1, tree ref2)
}
enum scan_store_kind {
/* Normal permutation. */
scan_store_kind_perm,
/* Whole vector left shift permutation with zero init. */
scan_store_kind_lshift_zero,
/* Whole vector left shift permutation and VEC_COND_EXPR. */
scan_store_kind_lshift_cond
};
/* Function check_scan_store.
Verify if we can perform the needed permutations or whole vector shifts.
Return -1 on failure, otherwise exact log2 of vectype's nunits. */
Return -1 on failure, otherwise exact log2 of vectype's nunits.
USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
to do at each step. */
static int
scan_store_can_perm_p (tree vectype, tree init, int *use_whole_vector_p = NULL)
scan_store_can_perm_p (tree vectype, tree init,
vec<enum scan_store_kind> *use_whole_vector = NULL)
{
enum machine_mode vec_mode = TYPE_MODE (vectype);
unsigned HOST_WIDE_INT nunits;
@ -6371,50 +6385,59 @@ scan_store_can_perm_p (tree vectype, tree init, int *use_whole_vector_p = NULL)
return -1;
int i;
enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
for (i = 0; i <= units_log2; ++i)
{
unsigned HOST_WIDE_INT j, k;
enum scan_store_kind kind = scan_store_kind_perm;
vec_perm_builder sel (nunits, nunits, 1);
sel.quick_grow (nunits);
if (i == 0)
if (i == units_log2)
{
for (j = 0; j < nunits; ++j)
sel[j] = nunits - 1;
}
else
{
for (j = 0; j < (HOST_WIDE_INT_1U << (i - 1)); ++j)
for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
sel[j] = j;
for (k = 0; j < nunits; ++j, ++k)
sel[j] = nunits + k;
}
vec_perm_indices indices (sel, i == 0 ? 1 : 2, nunits);
vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
if (!can_vec_perm_const_p (vec_mode, indices))
break;
}
if (i == 0)
return -1;
if (i <= units_log2)
{
if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
return -1;
int kind = 1;
/* Whole vector shifts shift in zeros, so if init is all zero constant,
there is no need to do anything further. */
if ((TREE_CODE (init) != INTEGER_CST
&& TREE_CODE (init) != REAL_CST)
|| !initializer_zerop (init))
{
tree masktype = build_same_sized_truth_vector_type (vectype);
if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
if (i == units_log2)
return -1;
kind = 2;
if (whole_vector_shift_kind == scan_store_kind_perm)
{
if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
return -1;
whole_vector_shift_kind = scan_store_kind_lshift_zero;
/* Whole vector shifts shift in zeros, so if init is all zero
constant, there is no need to do anything further. */
if ((TREE_CODE (init) != INTEGER_CST
&& TREE_CODE (init) != REAL_CST)
|| !initializer_zerop (init))
{
tree masktype = build_same_sized_truth_vector_type (vectype);
if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
return -1;
whole_vector_shift_kind = scan_store_kind_lshift_cond;
}
}
kind = whole_vector_shift_kind;
}
if (use_whole_vector)
{
if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
use_whole_vector->safe_grow_cleared (i);
if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
use_whole_vector->safe_push (kind);
}
if (use_whole_vector_p)
*use_whole_vector_p = kind;
}
return units_log2;
}
@ -6726,11 +6749,12 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
unsigned HOST_WIDE_INT nunits;
if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
gcc_unreachable ();
int use_whole_vector_p = 0;
int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector_p);
auto_vec<enum scan_store_kind, 16> use_whole_vector;
int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
gcc_assert (units_log2 > 0);
auto_vec<tree, 16> perms;
perms.quick_grow (units_log2 + 1);
tree zero_vec = NULL_TREE, masktype = NULL_TREE;
for (int i = 0; i <= units_log2; ++i)
{
unsigned HOST_WIDE_INT j, k;
@ -6739,23 +6763,28 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
if (i == units_log2)
for (j = 0; j < nunits; ++j)
sel[j] = nunits - 1;
else
{
for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
sel[j] = j;
for (k = 0; j < nunits; ++j, ++k)
sel[j] = nunits + k;
}
else
{
for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
sel[j] = j;
for (k = 0; j < nunits; ++j, ++k)
sel[j] = nunits + k;
}
vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
if (use_whole_vector_p && i < units_log2)
perms[i] = vect_gen_perm_mask_any (vectype, indices);
if (!use_whole_vector.is_empty ()
&& use_whole_vector[i] != scan_store_kind_perm)
{
if (zero_vec == NULL_TREE)
zero_vec = build_zero_cst (vectype);
if (masktype == NULL_TREE
&& use_whole_vector[i] == scan_store_kind_lshift_cond)
masktype = build_same_sized_truth_vector_type (vectype);
perms[i] = vect_gen_perm_mask_any (vectype, indices);
}
else
perms[i] = vect_gen_perm_mask_checked (vectype, indices);
}
tree zero_vec = use_whole_vector_p ? build_zero_cst (vectype) : NULL_TREE;
tree masktype = (use_whole_vector_p == 2
? build_same_sized_truth_vector_type (vectype) : NULL_TREE);
stmt_vec_info prev_stmt_info = NULL;
tree vec_oprnd1 = NULL_TREE;
tree vec_oprnd2 = NULL_TREE;
@ -6788,7 +6817,10 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
{
tree new_temp = make_ssa_name (vectype);
gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
zero_vec ? zero_vec : vec_oprnd1, v,
(zero_vec
&& (use_whole_vector[i]
!= scan_store_kind_perm))
? zero_vec : vec_oprnd1, v,
perms[i]);
new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
if (prev_stmt_info == NULL)
@ -6797,7 +6829,7 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
prev_stmt_info = new_stmt_info;
if (use_whole_vector_p == 2)
if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
{
/* Whole vector shift shifted in zero bits, but if *init
is not initializer_zerop, we need to replace those elements