AArch64: do not keep negated mask and inverse mask live at the same time
The following example: void f11(double * restrict z, double * restrict w, double * restrict x, double * restrict y, int n) { for (int i = 0; i < n; i++) { z[i] = (w[i] > 0) ? w[i] : y[i]; } } Generates currently: ptrue p2.b, all ld1d z0.d, p0/z, [x1, x2, lsl 3] fcmgt p1.d, p2/z, z0.d, #0.0 bic p3.b, p2/z, p0.b, p1.b ld1d z1.d, p3/z, [x3, x2, lsl 3] and after the previous patches generates: ptrue p3.b, all ld1d z0.d, p0/z, [x1, x2, lsl 3] fcmgt p1.d, p0/z, z0.d, #0.0 fcmgt p2.d, p3/z, z0.d, #0.0 not p1.b, p0/z, p1.b ld1d z1.d, p1/z, [x3, x2, lsl 3] where a duplicate comparison is performed for w[i] > 0. This is because in the vectorizer we're emitting a comparison for both a and ~a where we just need to emit one of them and invert the other. After this patch we generate: ld1d z0.d, p0/z, [x1, x2, lsl 3] fcmgt p1.d, p0/z, z0.d, #0.0 mov p2.b, p1.b not p1.b, p0/z, p1.b ld1d z1.d, p1/z, [x3, x2, lsl 3] In order to perform the check I have to fully expand the NOT stmts when recording them as the SSA names for the top level expressions differ but their arguments don't. e.g. in _31 = ~_34 the value of _34 differs but not the operands in _34. But we only do this when the operation is an ordered one because mixing ordered and unordered expressions can lead to de-optimized code. Note: This patch series is working incrementally towards generating the most efficient code for this and other loops in small steps. The mov is created by postreload when it does a late CSE. gcc/ChangeLog: * tree-vectorizer.h (struct scalar_cond_masked_key): Add inverted_p. (default_hash_traits<scalar_conf_masked_key>): Likewise. * tree-vect-stmts.c (vectorizable_condition): Check if inverse of mask is live. * tree-vectorizer.c (scalar_cond_masked_key::get_cond_ops_from_tree): Register mask inverses. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/pred-not-gen-1.c: Update testcase. * gcc.target/aarch64/sve/pred-not-gen-2.c: Update testcase. * gcc.target/aarch64/sve/pred-not-gen-3.c: Update testcase. * gcc.target/aarch64/sve/pred-not-gen-4.c: Update testcase.
This commit is contained in:
parent
8ed62c929c
commit
86ffc845b2
|
@ -1,5 +1,5 @@
|
|||
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
|
||||
/* { dg-options "-O3 --save-temps" } */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O3" } */
|
||||
|
||||
/*
|
||||
** f10:
|
||||
|
@ -21,3 +21,4 @@ void f10(double * restrict z, double * restrict w, double * restrict x, double *
|
|||
|
||||
/* { dg-final { scan-assembler-not {\tbic\t} } } */
|
||||
/* { dg-final { scan-assembler-times {\tnot\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-9]+/z, z[0-9]+\.d, #0} 1 } } */
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
|
||||
/* { dg-options "-O3 --save-temps" } */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O3" } */
|
||||
|
||||
/*
|
||||
** f11:
|
||||
|
@ -21,3 +21,4 @@ void f11(double * restrict z, double * restrict w, double * restrict x, double *
|
|||
|
||||
/* { dg-final { scan-assembler-not {\tbic\t} } } */
|
||||
/* { dg-final { scan-assembler-times {\tnot\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-9]+/z, z[0-9]+\.d, #0.0} 1 } } */
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
|
||||
/* { dg-options "-O3 --save-temps" } */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O3" } */
|
||||
|
||||
/*
|
||||
** f12:
|
||||
|
@ -19,3 +19,4 @@ void f12(int * restrict z, int * restrict w, int * restrict x, int * restrict y,
|
|||
|
||||
/* { dg-final { scan-assembler-not {\tbic\t} } } */
|
||||
/* { dg-final { scan-assembler-not {\tnot\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b\n} } } */
|
||||
/* { dg-final { scan-assembler-times {\tcmple\tp[0-9]+\.s, p[0-9]+/z, z[0-9]+\.s, #0} 1 } } */
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
|
||||
/* { dg-options "-O3 --save-temps" } */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O3" } */
|
||||
|
||||
#include <math.h>
|
||||
|
||||
|
@ -12,3 +12,4 @@ void f13(double * restrict z, double * restrict w, double * restrict x, double *
|
|||
|
||||
/* { dg-final { scan-assembler-not {\tbic\t} } } */
|
||||
/* { dg-final { scan-assembler-times {\tnot\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-9]+/z, z[0-9]+\.d, z[0-9]+\.d} 1 } } */
|
||||
|
|
|
@ -10344,6 +10344,7 @@ vectorizable_condition (vec_info *vinfo,
|
|||
else
|
||||
{
|
||||
bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
|
||||
tree_code orig_code = cond.code;
|
||||
cond.code = invert_tree_comparison (cond.code, honor_nans);
|
||||
if (loop_vinfo->scalar_cond_masked_set.contains (cond))
|
||||
{
|
||||
|
@ -10351,6 +10352,22 @@ vectorizable_condition (vec_info *vinfo,
|
|||
cond_code = cond.code;
|
||||
swap_cond_operands = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Try the inverse of the current mask. We check if the
|
||||
inverse mask is live and if so we generate a negate of
|
||||
the current mask such that we still honor NaNs. */
|
||||
cond.inverted_p = true;
|
||||
cond.code = orig_code;
|
||||
if (loop_vinfo->scalar_cond_masked_set.contains (cond))
|
||||
{
|
||||
bitop1 = orig_code;
|
||||
bitop2 = BIT_NOT_EXPR;
|
||||
masks = &LOOP_VINFO_MASKS (loop_vinfo);
|
||||
cond_code = cond.code;
|
||||
swap_cond_operands = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1678,6 +1678,7 @@ scalar_cond_masked_key::get_cond_ops_from_tree (tree t)
|
|||
this->code = TREE_CODE (t);
|
||||
this->op0 = TREE_OPERAND (t, 0);
|
||||
this->op1 = TREE_OPERAND (t, 1);
|
||||
this->inverted_p = false;
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1690,13 +1691,31 @@ scalar_cond_masked_key::get_cond_ops_from_tree (tree t)
|
|||
this->code = code;
|
||||
this->op0 = gimple_assign_rhs1 (stmt);
|
||||
this->op1 = gimple_assign_rhs2 (stmt);
|
||||
this->inverted_p = false;
|
||||
return;
|
||||
}
|
||||
else if (code == BIT_NOT_EXPR)
|
||||
{
|
||||
tree n_op = gimple_assign_rhs1 (stmt);
|
||||
if ((stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (n_op))))
|
||||
{
|
||||
code = gimple_assign_rhs_code (stmt);
|
||||
if (TREE_CODE_CLASS (code) == tcc_comparison)
|
||||
{
|
||||
this->code = code;
|
||||
this->op0 = gimple_assign_rhs1 (stmt);
|
||||
this->op1 = gimple_assign_rhs2 (stmt);
|
||||
this->inverted_p = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this->code = NE_EXPR;
|
||||
this->op0 = t;
|
||||
this->op1 = build_zero_cst (TREE_TYPE (t));
|
||||
this->inverted_p = false;
|
||||
}
|
||||
|
||||
/* See the comment above the declaration for details. */
|
||||
|
|
|
@ -266,6 +266,7 @@ struct scalar_cond_masked_key
|
|||
void get_cond_ops_from_tree (tree);
|
||||
|
||||
unsigned ncopies;
|
||||
bool inverted_p;
|
||||
tree_code code;
|
||||
tree op0;
|
||||
tree op1;
|
||||
|
@ -285,6 +286,7 @@ struct default_hash_traits<scalar_cond_masked_key>
|
|||
inchash::add_expr (v.op0, h, 0);
|
||||
inchash::add_expr (v.op1, h, 0);
|
||||
h.add_int (v.ncopies);
|
||||
h.add_flag (v.inverted_p);
|
||||
return h.end ();
|
||||
}
|
||||
|
||||
|
@ -292,9 +294,10 @@ struct default_hash_traits<scalar_cond_masked_key>
|
|||
equal (value_type existing, value_type candidate)
|
||||
{
|
||||
return (existing.ncopies == candidate.ncopies
|
||||
&& existing.code == candidate.code
|
||||
&& operand_equal_p (existing.op0, candidate.op0, 0)
|
||||
&& operand_equal_p (existing.op1, candidate.op1, 0));
|
||||
&& existing.code == candidate.code
|
||||
&& existing.inverted_p == candidate.inverted_p
|
||||
&& operand_equal_p (existing.op0, candidate.op0, 0)
|
||||
&& operand_equal_p (existing.op1, candidate.op1, 0));
|
||||
}
|
||||
|
||||
static const bool empty_zero_p = true;
|
||||
|
@ -303,6 +306,7 @@ struct default_hash_traits<scalar_cond_masked_key>
|
|||
mark_empty (value_type &v)
|
||||
{
|
||||
v.ncopies = 0;
|
||||
v.inverted_p = false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
|
|
Loading…
Reference in New Issue