tree-vect-stmts.c (enum scan_store_kind): New type.
* tree-vect-stmts.c (enum scan_store_kind): New type. (scan_store_can_perm_p): Change last argument from int * to vec<enum scan_store_kind> *, record precisely which permutations need whole vector left shift or that plus VEC_COND_EXPR. (vectorizable_scan_store): Adjust caller, use whole vector left shift and additional VEC_COND_EXPR only for those iterations that need it. From-SVN: r272497
This commit is contained in:
parent
170f2d181d
commit
48b63a31f8
|
@ -1,3 +1,12 @@
|
|||
2019-06-20 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* tree-vect-stmts.c (enum scan_store_kind): New type.
|
||||
(scan_store_can_perm_p): Change last argument from int * to
|
||||
vec<enum scan_store_kind> *, record precisely which permutations
|
||||
need whole vector left shift or that plus VEC_COND_EXPR.
|
||||
(vectorizable_scan_store): Adjust caller, use whole vector left shift
|
||||
and additional VEC_COND_EXPR only for those iterations that need it.
|
||||
|
||||
2019-06-20 Alexandre Oliva <oliva@adacore.com>
|
||||
|
||||
* config.gcc: Fix ARM --with-fpu checking and error message.
|
||||
|
|
|
@ -6354,13 +6354,27 @@ scan_operand_equal_p (tree ref1, tree ref2)
|
|||
}
|
||||
|
||||
|
||||
enum scan_store_kind {
|
||||
/* Normal permutation. */
|
||||
scan_store_kind_perm,
|
||||
|
||||
/* Whole vector left shift permutation with zero init. */
|
||||
scan_store_kind_lshift_zero,
|
||||
|
||||
/* Whole vector left shift permutation and VEC_COND_EXPR. */
|
||||
scan_store_kind_lshift_cond
|
||||
};
|
||||
|
||||
/* Function check_scan_store.
|
||||
|
||||
Verify if we can perform the needed permutations or whole vector shifts.
|
||||
Return -1 on failure, otherwise exact log2 of vectype's nunits. */
|
||||
Return -1 on failure, otherwise exact log2 of vectype's nunits.
|
||||
USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
|
||||
to do at each step. */
|
||||
|
||||
static int
|
||||
scan_store_can_perm_p (tree vectype, tree init, int *use_whole_vector_p = NULL)
|
||||
scan_store_can_perm_p (tree vectype, tree init,
|
||||
vec<enum scan_store_kind> *use_whole_vector = NULL)
|
||||
{
|
||||
enum machine_mode vec_mode = TYPE_MODE (vectype);
|
||||
unsigned HOST_WIDE_INT nunits;
|
||||
|
@ -6371,50 +6385,59 @@ scan_store_can_perm_p (tree vectype, tree init, int *use_whole_vector_p = NULL)
|
|||
return -1;
|
||||
|
||||
int i;
|
||||
enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
|
||||
for (i = 0; i <= units_log2; ++i)
|
||||
{
|
||||
unsigned HOST_WIDE_INT j, k;
|
||||
enum scan_store_kind kind = scan_store_kind_perm;
|
||||
vec_perm_builder sel (nunits, nunits, 1);
|
||||
sel.quick_grow (nunits);
|
||||
if (i == 0)
|
||||
if (i == units_log2)
|
||||
{
|
||||
for (j = 0; j < nunits; ++j)
|
||||
sel[j] = nunits - 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (j = 0; j < (HOST_WIDE_INT_1U << (i - 1)); ++j)
|
||||
for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
|
||||
sel[j] = j;
|
||||
for (k = 0; j < nunits; ++j, ++k)
|
||||
sel[j] = nunits + k;
|
||||
}
|
||||
vec_perm_indices indices (sel, i == 0 ? 1 : 2, nunits);
|
||||
vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
|
||||
if (!can_vec_perm_const_p (vec_mode, indices))
|
||||
break;
|
||||
}
|
||||
|
||||
if (i == 0)
|
||||
return -1;
|
||||
|
||||
if (i <= units_log2)
|
||||
{
|
||||
if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
|
||||
return -1;
|
||||
int kind = 1;
|
||||
/* Whole vector shifts shift in zeros, so if init is all zero constant,
|
||||
there is no need to do anything further. */
|
||||
if ((TREE_CODE (init) != INTEGER_CST
|
||||
&& TREE_CODE (init) != REAL_CST)
|
||||
|| !initializer_zerop (init))
|
||||
{
|
||||
tree masktype = build_same_sized_truth_vector_type (vectype);
|
||||
if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
|
||||
if (i == units_log2)
|
||||
return -1;
|
||||
kind = 2;
|
||||
|
||||
if (whole_vector_shift_kind == scan_store_kind_perm)
|
||||
{
|
||||
if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
|
||||
return -1;
|
||||
whole_vector_shift_kind = scan_store_kind_lshift_zero;
|
||||
/* Whole vector shifts shift in zeros, so if init is all zero
|
||||
constant, there is no need to do anything further. */
|
||||
if ((TREE_CODE (init) != INTEGER_CST
|
||||
&& TREE_CODE (init) != REAL_CST)
|
||||
|| !initializer_zerop (init))
|
||||
{
|
||||
tree masktype = build_same_sized_truth_vector_type (vectype);
|
||||
if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
|
||||
return -1;
|
||||
whole_vector_shift_kind = scan_store_kind_lshift_cond;
|
||||
}
|
||||
}
|
||||
kind = whole_vector_shift_kind;
|
||||
}
|
||||
if (use_whole_vector)
|
||||
{
|
||||
if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
|
||||
use_whole_vector->safe_grow_cleared (i);
|
||||
if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
|
||||
use_whole_vector->safe_push (kind);
|
||||
}
|
||||
if (use_whole_vector_p)
|
||||
*use_whole_vector_p = kind;
|
||||
}
|
||||
|
||||
return units_log2;
|
||||
}
|
||||
|
||||
|
@ -6726,11 +6749,12 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
unsigned HOST_WIDE_INT nunits;
|
||||
if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
|
||||
gcc_unreachable ();
|
||||
int use_whole_vector_p = 0;
|
||||
int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector_p);
|
||||
auto_vec<enum scan_store_kind, 16> use_whole_vector;
|
||||
int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
|
||||
gcc_assert (units_log2 > 0);
|
||||
auto_vec<tree, 16> perms;
|
||||
perms.quick_grow (units_log2 + 1);
|
||||
tree zero_vec = NULL_TREE, masktype = NULL_TREE;
|
||||
for (int i = 0; i <= units_log2; ++i)
|
||||
{
|
||||
unsigned HOST_WIDE_INT j, k;
|
||||
|
@ -6739,23 +6763,28 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
if (i == units_log2)
|
||||
for (j = 0; j < nunits; ++j)
|
||||
sel[j] = nunits - 1;
|
||||
else
|
||||
{
|
||||
for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
|
||||
sel[j] = j;
|
||||
for (k = 0; j < nunits; ++j, ++k)
|
||||
sel[j] = nunits + k;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
|
||||
sel[j] = j;
|
||||
for (k = 0; j < nunits; ++j, ++k)
|
||||
sel[j] = nunits + k;
|
||||
}
|
||||
vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
|
||||
if (use_whole_vector_p && i < units_log2)
|
||||
perms[i] = vect_gen_perm_mask_any (vectype, indices);
|
||||
if (!use_whole_vector.is_empty ()
|
||||
&& use_whole_vector[i] != scan_store_kind_perm)
|
||||
{
|
||||
if (zero_vec == NULL_TREE)
|
||||
zero_vec = build_zero_cst (vectype);
|
||||
if (masktype == NULL_TREE
|
||||
&& use_whole_vector[i] == scan_store_kind_lshift_cond)
|
||||
masktype = build_same_sized_truth_vector_type (vectype);
|
||||
perms[i] = vect_gen_perm_mask_any (vectype, indices);
|
||||
}
|
||||
else
|
||||
perms[i] = vect_gen_perm_mask_checked (vectype, indices);
|
||||
}
|
||||
|
||||
tree zero_vec = use_whole_vector_p ? build_zero_cst (vectype) : NULL_TREE;
|
||||
tree masktype = (use_whole_vector_p == 2
|
||||
? build_same_sized_truth_vector_type (vectype) : NULL_TREE);
|
||||
stmt_vec_info prev_stmt_info = NULL;
|
||||
tree vec_oprnd1 = NULL_TREE;
|
||||
tree vec_oprnd2 = NULL_TREE;
|
||||
|
@ -6788,7 +6817,10 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
{
|
||||
tree new_temp = make_ssa_name (vectype);
|
||||
gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
|
||||
zero_vec ? zero_vec : vec_oprnd1, v,
|
||||
(zero_vec
|
||||
&& (use_whole_vector[i]
|
||||
!= scan_store_kind_perm))
|
||||
? zero_vec : vec_oprnd1, v,
|
||||
perms[i]);
|
||||
new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
|
||||
if (prev_stmt_info == NULL)
|
||||
|
@ -6797,7 +6829,7 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
|
||||
prev_stmt_info = new_stmt_info;
|
||||
|
||||
if (use_whole_vector_p == 2)
|
||||
if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
|
||||
{
|
||||
/* Whole vector shift shifted in zero bits, but if *init
|
||||
is not initializer_zerop, we need to replace those elements
|
||||
|
|
Loading…
Reference in New Issue