Extend is_cond_scalar_reduction to handle bit_and/bit_xor/bit_ior.
This will enable transformation like - # sum1_50 = PHI <prephitmp_64(13), 0(4)> - # sum2_52 = PHI <sum2_21(13), 0(4)> + # sum1_50 = PHI <_87(13), 0(4)> + # sum2_52 = PHI <_89(13), 0(4)> # ivtmp_62 = PHI <ivtmp_61(13), 64(4)> i.2_7 = (long unsigned int) i_49; _8 = i.2_7 * 8; ... vec1_i_38 = vec1_29 >> _10; vec2_i_39 = vec2_31 >> _10; _11 = vec1_i_38 & 1; - _63 = tmp_37 ^ sum1_50; - prephitmp_64 = _11 == 0 ? sum1_50 : _63; + _ifc__86 = _11 != 0 ? tmp_37 : 0; + _87 = sum1_50 ^ _ifc__86; _12 = vec2_i_39 & 1; : so that vectorizer won't failed due to /* If this isn't a nested cycle or if the nested cycle reduction value is used ouside of the inner loop we cannot handle uses of the reduction value. */ if (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "reduction used in loop.\n"); return NULL; } gcc/ChangeLog: PR tree-optimization/103126 * tree-vect-loop.c (neutral_op_for_reduction): Remove static. * tree-vectorizer.h (neutral_op_for_reduction): Declare. * tree-if-conv.c : Include tree-vectorizer.h. (is_cond_scalar_reduction): Handle BIT_XOR_EXPR/BIT_IOR_EXPR/BIT_AND_EXPR. (convert_scalar_cond_reduction): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/ifcvt-reduction-logic-op.c: New test.
This commit is contained in:
parent
f2572a398d
commit
249b4eeef1
|
@ -0,0 +1,80 @@
|
|||
/* PR tree-optimization/103126. */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-Ofast -mavx2 -ftree-vectorize -fdump-tree-vect-details" } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 3 "vect" } } */
|
||||
#include<stdint.h>
|
||||
|
||||
void xor_bit_arr_nolcd (uint64_t *__restrict mat, uint64_t* a,uint64_t* b, uint64_t *__restrict ans,
|
||||
int64_t n)
|
||||
{
|
||||
int64_t i;
|
||||
uint64_t vec1, sum1;
|
||||
uint64_t vec2, sum2;
|
||||
|
||||
while (n > 0) {
|
||||
sum1 = 0;
|
||||
vec1 = a[n];
|
||||
sum2 = 0;
|
||||
vec2 = b[n];
|
||||
|
||||
for (i = 0; i < 64; i++) {
|
||||
uint64_t tmp = mat[i];
|
||||
uint64_t vec1_i = (vec1 >> i);
|
||||
uint64_t vec2_i = (vec2 >> i);
|
||||
sum1 ^= (vec1_i & 1) ? tmp : 0;
|
||||
if (vec2_i&1) sum2 ^= tmp;
|
||||
}
|
||||
*ans++ ^= sum1; n--;
|
||||
*ans++ ^= sum2; n--;
|
||||
}
|
||||
}
|
||||
|
||||
void ior_bit_arr_nolcd (uint64_t *__restrict mat, uint64_t* a,uint64_t* b, uint64_t *__restrict ans,
|
||||
int64_t n)
|
||||
{
|
||||
int64_t i;
|
||||
uint64_t vec1, sum1;
|
||||
uint64_t vec2, sum2;
|
||||
|
||||
while (n > 0) {
|
||||
sum1 = 0;
|
||||
vec1 = a[n];
|
||||
sum2 = 0;
|
||||
vec2 = b[n];
|
||||
|
||||
for (i = 0; i < 64; i++) {
|
||||
uint64_t tmp = mat[i];
|
||||
uint64_t vec1_i = (vec1 >> i);
|
||||
uint64_t vec2_i = (vec2 >> i);
|
||||
sum1 |= (vec1_i & 1) ? tmp : 0;
|
||||
if (vec2_i&1) sum2 |= tmp;
|
||||
}
|
||||
*ans++ |= sum1; n--;
|
||||
*ans++ |= sum2; n--;
|
||||
}
|
||||
}
|
||||
|
||||
void and_bit_arr_nolcd (uint64_t *__restrict mat, uint64_t* a,uint64_t* b, uint64_t *__restrict ans,
|
||||
int64_t n)
|
||||
{
|
||||
int64_t i;
|
||||
uint64_t vec1, sum1;
|
||||
uint64_t vec2, sum2;
|
||||
|
||||
while (n > 0) {
|
||||
sum1 = -1;
|
||||
vec1 = a[n];
|
||||
sum2 = 0;
|
||||
vec2 = b[n];
|
||||
|
||||
for (i = 0; i < 64; i++) {
|
||||
uint64_t tmp = mat[i];
|
||||
uint64_t vec1_i = (vec1 >> i);
|
||||
uint64_t vec2_i = (vec2 >> i);
|
||||
sum1 &= (vec1_i & 1) ? tmp : -1;
|
||||
if (vec2_i&1) sum2 &= tmp;
|
||||
}
|
||||
*ans++ &= sum1; n--;
|
||||
*ans++ &= sum2; n--;
|
||||
}
|
||||
}
|
|
@ -120,6 +120,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
#include "tree-ssa-sccvn.h"
|
||||
#include "tree-cfgcleanup.h"
|
||||
#include "tree-ssa-dse.h"
|
||||
#include "tree-vectorizer.h"
|
||||
|
||||
/* Only handle PHIs with no more arguments unless we are asked to by
|
||||
simd pragma. */
|
||||
|
@ -1732,7 +1733,11 @@ is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
|
|||
reduction_op = gimple_assign_rhs_code (stmt);
|
||||
}
|
||||
|
||||
if (reduction_op != PLUS_EXPR && reduction_op != MINUS_EXPR)
|
||||
if (reduction_op != PLUS_EXPR
|
||||
&& reduction_op != MINUS_EXPR
|
||||
&& reduction_op != BIT_IOR_EXPR
|
||||
&& reduction_op != BIT_XOR_EXPR
|
||||
&& reduction_op != BIT_AND_EXPR)
|
||||
return false;
|
||||
r_op1 = gimple_assign_rhs1 (stmt);
|
||||
r_op2 = gimple_assign_rhs2 (stmt);
|
||||
|
@ -1742,7 +1747,7 @@ is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
|
|||
|
||||
/* Make R_OP1 to hold reduction variable. */
|
||||
if (r_nop2 == PHI_RESULT (header_phi)
|
||||
&& reduction_op == PLUS_EXPR)
|
||||
&& commutative_tree_code (reduction_op))
|
||||
{
|
||||
std::swap (r_op1, r_op2);
|
||||
std::swap (r_nop1, r_nop2);
|
||||
|
@ -1811,7 +1816,8 @@ convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
|
|||
tree rhs1 = gimple_assign_rhs1 (reduc);
|
||||
tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
|
||||
tree c;
|
||||
tree zero = build_zero_cst (TREE_TYPE (rhs1));
|
||||
enum tree_code reduction_op = gimple_assign_rhs_code (reduc);
|
||||
tree op_nochange = neutral_op_for_reduction (TREE_TYPE (rhs1), reduction_op, NULL);
|
||||
gimple_seq stmts = NULL;
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
|
@ -1824,14 +1830,14 @@ convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
|
|||
of reduction rhs. */
|
||||
c = fold_build_cond_expr (TREE_TYPE (rhs1),
|
||||
unshare_expr (cond),
|
||||
swap ? zero : op1,
|
||||
swap ? op1 : zero);
|
||||
swap ? op_nochange : op1,
|
||||
swap ? op1 : op_nochange);
|
||||
|
||||
/* Create assignment stmt and insert it at GSI. */
|
||||
new_assign = gimple_build_assign (tmp, c);
|
||||
gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
|
||||
/* Build rhs for unconditional increment/decrement. */
|
||||
rhs = gimple_build (&stmts, gimple_assign_rhs_code (reduc),
|
||||
/* Build rhs for unconditional increment/decrement/logic_operation. */
|
||||
rhs = gimple_build (&stmts, reduction_op,
|
||||
TREE_TYPE (rhs1), op0, tmp);
|
||||
|
||||
if (has_nop)
|
||||
|
|
|
@ -3330,7 +3330,7 @@ reduction_fn_for_scalar_code (enum tree_code code, internal_fn *reduc_fn)
|
|||
of the scalar elements. If the reduction has just a single initial value
|
||||
then INITIAL_VALUE is that value, otherwise it is null. */
|
||||
|
||||
static tree
|
||||
tree
|
||||
neutral_op_for_reduction (tree scalar_type, tree_code code, tree initial_value)
|
||||
{
|
||||
switch (code)
|
||||
|
|
|
@ -2120,6 +2120,7 @@ extern tree vect_create_addr_base_for_vector_ref (vec_info *,
|
|||
tree);
|
||||
|
||||
/* In tree-vect-loop.c. */
|
||||
extern tree neutral_op_for_reduction (tree, tree_code, tree);
|
||||
extern widest_int vect_iv_limit_for_partial_vectors (loop_vec_info loop_vinfo);
|
||||
bool vect_rgroup_iv_might_wrap_p (loop_vec_info, rgroup_controls *);
|
||||
/* Used in tree-vect-loop-manip.c */
|
||||
|
|
Loading…
Reference in New Issue