Final value replacement improvements for until-wrap loops.
This middle-end patch is inspired by the Richard Beiner's until-wrap loop example in PR tree-optimization/101145. unsigned foo(unsigned val, unsigned start) { unsigned cnt = 0; for (unsigned i = start; i > val; ++i) cnt++; return cnt; } For this loop, the tree optimizers currently generate: unsigned int foo (unsigned int val, unsigned int start) { unsigned int cnt; unsigned int _1; unsigned int _5; <bb 2> [local count: 118111600]: if (start_3(D) > val_4(D)) goto <bb 3>; [89.00%] else goto <bb 4>; [11.00%] <bb 3> [local count: 105119324]: _1 = start_3(D) + 1; _5 = -start_3(D); cnt_2 = _1 > val_4(D) ? _5 : 1; <bb 4> [local count: 118111600]: # cnt_11 = PHI <cnt_2(3), 0(2)> return cnt_11; } or perhaps slightly easier to read: if (start > val) { cnt = (start+1) > val ? -start : 1; } else cnt = 0; In this snippet, if we know start > val, then (start+1) > val unless start+1 overflows, i.e. (start+1) == 0 and start == ~0. We can use this (loop header) context to simplify the ternary expression to "(start != -1) ? -start : 1", which with a little help from match.pd can be folded to -start. Hence the optimal final value replacement should be: cnt = (start > val) ? -start : 0; Or as now generated by this patch: unsigned int foo (unsigned int val, unsigned int start) { unsigned int cnt; <bb 2> [local count: 118111600]: if (start_3(D) > val_4(D)) goto <bb 3>; [89.00%] else goto <bb 4>; [11.00%] <bb 3> [local count: 105119324]: cnt_2 = -start_3(D); <bb 4> [local count: 118111600]: # cnt_11 = PHI <cnt_2(3), 0(2)> return cnt_11; } We can also improve until-wrap loops that don't have a (suitable) loop header, as determined by simplify_using_initial_conditions. unsigned bar(unsigned val, unsigned start) { unsigned cnt = 0; unsigned i = start; do { cnt++; i++; } while (i > val); return cnt; } which is currently optimized to: unsigned int foo (unsigned int val, unsigned int start) { unsigned int cnt; unsigned int _9; unsigned int _10; <bb 2> [local count: 118111600]: _9 = start_4(D) + 1; _10 = -start_4(D); cnt_3 = val_7(D) < _9 ? _10 : 1; return cnt_3; } Here we have "val < (start+1) ? -start : 1", which again with the help of match.pd can be slightly simplified to "val <= start ? -start : 1" when dealing with unsigned types, because at the complicating value where start == ~0, we fortunately have -start == 1, hence it doesn't matter whether the second or third operand of the ternary operator is returned. To summarize, this patch (in addition to tweaking may_be_zero in number_of_iterations_until_wrap) adds three new constant folding transforms to match.pd. X != C1 ? -X : C2 simplifies to -X when -C1 == C2. which is the generalized form of the simplification above. X != C1 ? ~X : C2 simplifies to ~X when ~C1 == C2. which is the BIT_NOT_EXPR analog of the NEGATE_EXPR case. and the "until-wrap final value replacement without context": (X + 1) > Y ? -X : 1 simplifies to X >= Y ? -X : 1 when X is unsigned, as when X + 1 overflows, X is -1, so -X == 1. 2021-12-01 Roger Sayle <roger@nextmovesoftware.com> Richard Biener <rguenther@suse.de> gcc/ChangeLog * tree-ssa-loop-niter.c (number_of_iterations_until_wrap): Check if simplify_using_initial_conditions allows us to simplify the expression for may_be_zero. * match.pd (X != C ? -X : -C -> -X): New transform. (X != C ? ~X : ~C -> ~X): Likewise. ((X+1) > Y ? -X : 1 -> X >= Y ? -X : 1): Likewise. gcc/testsuite/ChangeLog * gcc.dg/fold-condneg-1.c: New test case. * gcc.dg/fold-condneg-2.c: New test case. * gcc.dg/fold-condnot-1.c: New test case. * gcc.dg/pr101145-1.c: New test case. * gcc.dg/pr101145-2.c: New test case.
This commit is contained in:
parent
5b1ef8b9db
commit
de3e5aae6c
22
gcc/match.pd
22
gcc/match.pd
|
@ -4403,6 +4403,28 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||||
(op (min @X { wide_int_to_tree (from_type, real_c1); })
|
(op (min @X { wide_int_to_tree (from_type, real_c1); })
|
||||||
{ wide_int_to_tree (from_type, c2); })))))))))
|
{ wide_int_to_tree (from_type, c2); })))))))))
|
||||||
|
|
||||||
|
/* X != C1 ? -X : C2 simplifies to -X when -C1 == C2. */
|
||||||
|
(simplify
|
||||||
|
(cond (ne @0 INTEGER_CST@1) (negate@3 @0) INTEGER_CST@2)
|
||||||
|
(if (!TYPE_SATURATING (type)
|
||||||
|
&& (TYPE_OVERFLOW_WRAPS (type)
|
||||||
|
|| !wi::only_sign_bit_p (wi::to_wide (@1)))
|
||||||
|
&& wi::eq_p (wi::neg (wi::to_wide (@1)), wi::to_wide (@2)))
|
||||||
|
@3))
|
||||||
|
|
||||||
|
/* X != C1 ? ~X : C2 simplifies to ~X when ~C1 == C2. */
|
||||||
|
(simplify
|
||||||
|
(cond (ne @0 INTEGER_CST@1) (bit_not@3 @0) INTEGER_CST@2)
|
||||||
|
(if (wi::eq_p (wi::bit_not (wi::to_wide (@1)), wi::to_wide (@2)))
|
||||||
|
@3))
|
||||||
|
|
||||||
|
/* (X + 1) > Y ? -X : 1 simplifies to X >= Y ? -X : 1 when
|
||||||
|
X is unsigned, as when X + 1 overflows, X is -1, so -X == 1. */
|
||||||
|
(simplify
|
||||||
|
(cond (gt (plus @0 integer_onep) @1) (negate @0) integer_onep@2)
|
||||||
|
(if (TYPE_UNSIGNED (type))
|
||||||
|
(cond (ge @0 @1) (negate @0) @2)))
|
||||||
|
|
||||||
(for cnd (cond vec_cond)
|
(for cnd (cond vec_cond)
|
||||||
/* A ? B : (A ? X : C) -> A ? B : C. */
|
/* A ? B : (A ? X : C) -> A ? B : C. */
|
||||||
(simplify
|
(simplify
|
||||||
|
|
|
@ -0,0 +1,59 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||||||
|
|
||||||
|
int test_i0(int x)
|
||||||
|
{
|
||||||
|
return x != 0 ? -x : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int test_i1(int x)
|
||||||
|
{
|
||||||
|
return x != 1 ? -x : -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int test_im1(int x)
|
||||||
|
{
|
||||||
|
return x != -1 ? -x : 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int test_u0(unsigned int x)
|
||||||
|
{
|
||||||
|
return x != 0 ? -x : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int test_u1(unsigned int x)
|
||||||
|
{
|
||||||
|
return x != 1 ? -x : ~0u;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int test_um1(unsigned int x)
|
||||||
|
{
|
||||||
|
return x != ~0u ? -x : 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned char test_uc0(unsigned char x)
|
||||||
|
{
|
||||||
|
return x != 0 ? -x : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned char test_uc1(unsigned char x)
|
||||||
|
{
|
||||||
|
return x != 1 ? -x : 255;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned char test_uc127(unsigned char x)
|
||||||
|
{
|
||||||
|
return x != 127 ? -x : 129;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned char test_uc128(unsigned char x)
|
||||||
|
{
|
||||||
|
return x != 128 ? -x : 128;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned char test_uc255(unsigned char x)
|
||||||
|
{
|
||||||
|
return x != 255 ? -x : 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* { dg-final { scan-tree-dump-not "goto" "optimized" } } */
|
|
@ -0,0 +1,11 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-O2 -ftrapv -fdump-tree-optimized" } */
|
||||||
|
|
||||||
|
#define INT_MIN (-__INT_MAX__ - 1)
|
||||||
|
|
||||||
|
int test(int x)
|
||||||
|
{
|
||||||
|
return x != INT_MIN ? -x : INT_MIN;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* { dg-final { scan-tree-dump "goto" "optimized" } } */
|
|
@ -0,0 +1,84 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||||||
|
|
||||||
|
int test_i0(int x)
|
||||||
|
{
|
||||||
|
return x != 0 ? ~x : ~0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int test_i1(int x)
|
||||||
|
{
|
||||||
|
return x != 1 ? ~x : -2;
|
||||||
|
}
|
||||||
|
|
||||||
|
int test_im1(int x)
|
||||||
|
{
|
||||||
|
return x != ~0 ? ~x : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int test_u0(unsigned int x)
|
||||||
|
{
|
||||||
|
return x != 0 ? ~x : ~0;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int test_u1(unsigned int x)
|
||||||
|
{
|
||||||
|
return x != 1 ? ~x : ~1u;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int test_um1(unsigned int x)
|
||||||
|
{
|
||||||
|
return x != ~0u ? ~x : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
signed char test_c0(signed char x)
|
||||||
|
{
|
||||||
|
return x != 0 ? ~x : -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
signed char test_c1(signed char x)
|
||||||
|
{
|
||||||
|
return x != 1 ? ~x : -2;
|
||||||
|
}
|
||||||
|
|
||||||
|
signed char test_cm1(signed char x)
|
||||||
|
{
|
||||||
|
return x != -1 ? ~x : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
signed char test_cm128(signed char x)
|
||||||
|
{
|
||||||
|
return x != -128 ? ~x : 127;
|
||||||
|
}
|
||||||
|
|
||||||
|
signed char test_c127(signed char x)
|
||||||
|
{
|
||||||
|
return x != 127 ? ~x : -128;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned char test_uc0(unsigned char x)
|
||||||
|
{
|
||||||
|
return x != 0 ? ~x : 255;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned char test_uc1(unsigned char x)
|
||||||
|
{
|
||||||
|
return x != 1 ? ~x : 254;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned char test_uc127(unsigned char x)
|
||||||
|
{
|
||||||
|
return x != 127 ? ~x : 128;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned char test_uc128(unsigned char x)
|
||||||
|
{
|
||||||
|
return x != 128 ? ~x : 127;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned char test_ucm1(unsigned char x)
|
||||||
|
{
|
||||||
|
return x != 255 ? ~x : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* { dg-final { scan-tree-dump-not "goto" "optimized" } } */
|
|
@ -0,0 +1,12 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||||||
|
|
||||||
|
unsigned foo(unsigned val, unsigned start)
|
||||||
|
{
|
||||||
|
unsigned cnt = 0;
|
||||||
|
for (unsigned i = start; i > val; ++i)
|
||||||
|
cnt++;
|
||||||
|
return cnt;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* { dg-final { scan-tree-dump "cnt_\[0-9\] = -start_\[0-9\]\\(D\\);" "optimized" } } */
|
|
@ -0,0 +1,15 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||||||
|
|
||||||
|
unsigned foo(unsigned val, unsigned start)
|
||||||
|
{
|
||||||
|
unsigned cnt = 0;
|
||||||
|
unsigned i = start;
|
||||||
|
do {
|
||||||
|
cnt++;
|
||||||
|
i++;
|
||||||
|
} while (i > val);
|
||||||
|
return cnt;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* { dg-final { scan-tree-dump "cnt_\[0-9\] = start_\[0-9\]\\(D\\) >= val_\[0-9\]\\(D\\) \\? _\[0-9\] : 1;" "optimized" } } */
|
|
@ -1478,7 +1478,7 @@ assert_loop_rolls_lt (tree type, affine_iv *iv0, affine_iv *iv1,
|
||||||
The number of iterations is stored to NITER. */
|
The number of iterations is stored to NITER. */
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
number_of_iterations_until_wrap (class loop *, tree type, affine_iv *iv0,
|
number_of_iterations_until_wrap (class loop *loop, tree type, affine_iv *iv0,
|
||||||
affine_iv *iv1, class tree_niter_desc *niter)
|
affine_iv *iv1, class tree_niter_desc *niter)
|
||||||
{
|
{
|
||||||
tree niter_type = unsigned_type_for (type);
|
tree niter_type = unsigned_type_for (type);
|
||||||
|
@ -1506,6 +1506,23 @@ number_of_iterations_until_wrap (class loop *, tree type, affine_iv *iv0,
|
||||||
|
|
||||||
num = fold_build2 (MINUS_EXPR, niter_type, wide_int_to_tree (type, max),
|
num = fold_build2 (MINUS_EXPR, niter_type, wide_int_to_tree (type, max),
|
||||||
iv1->base);
|
iv1->base);
|
||||||
|
|
||||||
|
/* When base has the form iv + 1, if we know iv >= n, then iv + 1 < n
|
||||||
|
only when iv + 1 overflows, i.e. when iv == TYPE_VALUE_MAX. */
|
||||||
|
if (sgn == UNSIGNED
|
||||||
|
&& integer_onep (step)
|
||||||
|
&& TREE_CODE (iv1->base) == PLUS_EXPR
|
||||||
|
&& integer_onep (TREE_OPERAND (iv1->base, 1)))
|
||||||
|
{
|
||||||
|
tree cond = fold_build2 (GE_EXPR, boolean_type_node,
|
||||||
|
TREE_OPERAND (iv1->base, 0), iv0->base);
|
||||||
|
cond = simplify_using_initial_conditions (loop, cond);
|
||||||
|
if (integer_onep (cond))
|
||||||
|
may_be_zero = fold_build2 (EQ_EXPR, boolean_type_node,
|
||||||
|
TREE_OPERAND (iv1->base, 0),
|
||||||
|
TYPE_MAX_VALUE (type));
|
||||||
|
}
|
||||||
|
|
||||||
high = max;
|
high = max;
|
||||||
if (TREE_CODE (iv1->base) == INTEGER_CST)
|
if (TREE_CODE (iv1->base) == INTEGER_CST)
|
||||||
low = wi::to_wide (iv1->base) - 1;
|
low = wi::to_wide (iv1->base) - 1;
|
||||||
|
|
Loading…
Reference in New Issue