re PR tree-optimization/68398 (coremark regression due to r229685)
PR tree-optimization/68398 * params.def (PARAM_FSM_SCALE_PATH_STMTS): New parameter. (PARAM_FSM_SCALE_PATH_BLOCKS): Likewise. * tree-ssa-threadbackward.c (fsm_find_control_statement_thread_paths): Only count PHIs in the last block in the path. The others will const/copy propagate away. Add heuristic to allow more irreducible subloops to be created when it is likely profitable to do so. * tree-ssa-threadbackward.c (fsm_find_control_statement_thread_paths): Fix typo in comment. Use gsi_after_labels and remove the GIMPLE_LABEL check from within the loop. Use gsi_next_nondebug rather than gsi_next. PR tree-optimization/68398 * gcc.dg/tree-ssa/pr66752-3.c: Update expected output. * gcc.dg/tree-ssa/ssa-dom-thread-2c.c: Add extra statements on thread path to avoid new heuristic allowing more irreducible regions * gcc.dg/tree-ssa/ssa-dom-thread-2d.c: Likewise. * gcc.dg/tree-ssa/vrp46.c: Likewise. * gcc.dg/tree-ssa/ssa-dom-thread-7.c: Update expected output. * gcc.dg/tree-ssa/ssa-dom-thread-2g.c: New test. * gcc.dg/tree-ssa/ssa-dom-thread-2h.c: Likewise. From-SVN: r232897
This commit is contained in:
parent
fa74a4bca8
commit
2b572b3c21
|
@ -1,3 +1,18 @@
|
|||
2016-01-27 Jeff Law <law@redhat.com>
|
||||
|
||||
PR tree-optimization/68398
|
||||
PR tree-optimization/69196
|
||||
* params.def (PARAM_FSM_SCALE_PATH_STMTS): New parameter.
|
||||
(PARAM_FSM_SCALE_PATH_BLOCKS): Likewise.
|
||||
* tree-ssa-threadbackward.c (fsm_find_control_statement_thread_paths):
|
||||
Only count PHIs in the last block in the path. The others will
|
||||
const/copy propagate away. Add heuristic to allow more irreducible
|
||||
subloops to be created when it is likely profitable to do so.
|
||||
|
||||
* tree-ssa-threadbackward.c (fsm_find_control_statement_thread_paths):
|
||||
Fix typo in comment. Use gsi_after_labels and remove the GIMPLE_LABEL
|
||||
check from within the loop. Use gsi_next_nondebug rather than gsi_next.
|
||||
|
||||
2016-01-27 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR lto/69254
|
||||
|
|
|
@ -1145,6 +1145,16 @@ DEFPARAM (PARAM_CHKP_MAX_CTOR_SIZE,
|
|||
"constructor generated by Pointer Bounds Checker.",
|
||||
5000, 100, 0)
|
||||
|
||||
DEFPARAM (PARAM_FSM_SCALE_PATH_STMTS,
|
||||
"fsm-scale-path-stmts",
|
||||
"Scale factor to apply to the number of statements in a threading path when comparing to the number of (scaled) blocks.",
|
||||
2, 1, 10)
|
||||
|
||||
DEFPARAM (PARAM_FSM_SCALE_PATH_BLOCKS,
|
||||
"fsm-scale-path-blocks",
|
||||
"Scale factor to apply to the number of blocks in a threading path when comparing to the number of (scaled) statements.",
|
||||
3, 1, 10)
|
||||
|
||||
DEFPARAM (PARAM_MAX_FSM_THREAD_PATH_INSNS,
|
||||
"max-fsm-thread-path-insns",
|
||||
"Maximum number of instructions to copy when duplicating blocks on a finite state automaton jump thread path.",
|
||||
|
|
|
@ -1,3 +1,16 @@
|
|||
2016-01-25 Jeff Law <law@redhat.com>
|
||||
|
||||
PR tree-optimization/68398
|
||||
PR tree-optimization/69196
|
||||
* gcc.dg/tree-ssa/pr66752-3.c: Update expected output.
|
||||
* gcc.dg/tree-ssa/ssa-dom-thread-2c.c: Add extra statements on thread
|
||||
path to avoid new heuristic allowing more irreducible regions
|
||||
* gcc.dg/tree-ssa/ssa-dom-thread-2d.c: Likewise.
|
||||
* gcc.dg/tree-ssa/vrp46.c: Likewise.
|
||||
* gcc.dg/tree-ssa/ssa-dom-thread-7.c: Update expected output.
|
||||
* gcc.dg/tree-ssa/ssa-dom-thread-2g.c: New test.
|
||||
* gcc.dg/tree-ssa/ssa-dom-thread-2h.c: Likewise.
|
||||
|
||||
2016-01-27 Marek Polacek <polacek@redhat.com>
|
||||
|
||||
PR c/68062
|
||||
|
|
|
@ -32,10 +32,9 @@ foo (int N, int c, int b, int *a)
|
|||
pt--;
|
||||
}
|
||||
|
||||
/* There are 3 FSM jump threading opportunities, one of which will
|
||||
get filtered. */
|
||||
/* { dg-final { scan-tree-dump-times "Registering FSM" 2 "vrp1"} } */
|
||||
/* { dg-final { scan-tree-dump-times "FSM would create irreducible loop" 1 "vrp1"} } */
|
||||
/* There are 3 FSM jump threading opportunities, all of which will be
|
||||
realized, which will eliminate testing of FLAG, completely. */
|
||||
/* { dg-final { scan-tree-dump-times "Registering FSM" 3 "vrp1"} } */
|
||||
|
||||
/* There should be no assignments or references to FLAG. */
|
||||
/* { dg-final { scan-tree-dump-not "flag" "optimized"} } */
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-tree-vrp1-stats -fdump-tree-dom2-stats" } */
|
||||
|
||||
void foo();
|
||||
|
@ -15,6 +15,9 @@ void dont_thread_1 (void)
|
|||
|
||||
do
|
||||
{
|
||||
bla ();
|
||||
bla ();
|
||||
bla ();
|
||||
if (first)
|
||||
foo ();
|
||||
else
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-tree-vrp1-stats -fdump-tree-dom2-stats" } */
|
||||
|
||||
void foo();
|
||||
|
@ -13,6 +13,9 @@ void dont_thread_2 (int first)
|
|||
|
||||
do
|
||||
{
|
||||
bla ();
|
||||
bla ();
|
||||
bla ();
|
||||
if (first)
|
||||
foo ();
|
||||
else
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-tree-vrp1-stats -fdump-tree-dom2-stats" } */
|
||||
|
||||
void foo();
|
||||
void bla();
|
||||
void bar();
|
||||
|
||||
void dont_thread_1 (void)
|
||||
{
|
||||
int i = 0;
|
||||
int first = 1;
|
||||
|
||||
do
|
||||
{
|
||||
if (first)
|
||||
foo ();
|
||||
else
|
||||
bar ();
|
||||
|
||||
first = 0;
|
||||
bla ();
|
||||
} while (i++ < 100);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump "Jumps threaded: 2" "vrp1"} } */
|
||||
/* { dg-final { scan-tree-dump "Jumps threaded: 1" "dom2"} } */
|
|
@ -0,0 +1,29 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-tree-vrp1-stats -fdump-tree-dom2-stats" } */
|
||||
|
||||
void foo();
|
||||
void bla();
|
||||
void bar();
|
||||
|
||||
/* Avoid threading in the following case, to prevent creating subloops. */
|
||||
|
||||
void dont_thread_2 (int first)
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
do
|
||||
{
|
||||
if (first)
|
||||
foo ();
|
||||
else
|
||||
bar ();
|
||||
|
||||
first = 0;
|
||||
bla ();
|
||||
} while (i++ < 100);
|
||||
}
|
||||
|
||||
/* Peeling off the first iteration would make threading through
|
||||
the loop latch safe, but we don't do that currently. */
|
||||
/* { dg-final { scan-tree-dump "Jumps threaded: 1" "vrp1"} } */
|
||||
/* { dg-final { scan-tree-dump "Jumps threaded: 1" "dom2"} } */
|
|
@ -1,8 +1,9 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-tree-vrp1-stats -fdump-tree-dom2-stats -fdump-tree-dom3-stats" } */
|
||||
/* { dg-final { scan-tree-dump "Jumps threaded: 7" "vrp1" } } */
|
||||
/* { dg-options "-O2 -fdump-tree-vrp1-stats -fdump-tree-dom2-stats -fdump-tree-dom3-stats -fdump-tree-vrp2-stats" } */
|
||||
/* { dg-final { scan-tree-dump "Jumps threaded: 19" "vrp1" } } */
|
||||
/* { dg-final { scan-tree-dump "Jumps threaded: 12" "dom2" } } */
|
||||
/* { dg-final { scan-tree-dump "Jumps threaded: 3" "dom3" } } */
|
||||
/* { dg-final { scan-tree-dump-not "Jumps threaded" "dom3" } } */
|
||||
/* { dg-final { scan-tree-dump-not "Jumps threaded" "vrp2" } } */
|
||||
|
||||
enum STATE {
|
||||
S0=0,
|
||||
|
|
|
@ -12,6 +12,8 @@ func_18 ( int t )
|
|||
for (0; 1; ++l_889)
|
||||
{
|
||||
int t1 = 0;
|
||||
func_98 (0);
|
||||
func_98 (0);
|
||||
if (func_81 (1))
|
||||
{
|
||||
int rhs = l_895;
|
||||
|
|
|
@ -266,7 +266,7 @@ fsm_find_control_statement_thread_paths (tree name,
|
|||
basic_block bb = (*path)[j];
|
||||
|
||||
/* Remember, blocks in the path are stored in opposite order
|
||||
in the PATH array. The last entry in the array reprensents
|
||||
in the PATH array. The last entry in the array represents
|
||||
the block with an outgoing edge that we will redirect to the
|
||||
jump threading path. Thus we don't care about that block's
|
||||
loop father, nor how many statements are in that block because
|
||||
|
@ -280,33 +280,19 @@ fsm_find_control_statement_thread_paths (tree name,
|
|||
break;
|
||||
}
|
||||
|
||||
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
||||
for (gsi = gsi_after_labels (bb);
|
||||
!gsi_end_p (gsi);
|
||||
gsi_next_nondebug (&gsi))
|
||||
{
|
||||
gimple *stmt = gsi_stmt (gsi);
|
||||
/* Do not count empty statements and labels. */
|
||||
if (gimple_code (stmt) != GIMPLE_NOP
|
||||
&& gimple_code (stmt) != GIMPLE_LABEL
|
||||
&& !(gimple_code (stmt) == GIMPLE_ASSIGN
|
||||
&& gimple_assign_rhs_code (stmt) == ASSERT_EXPR)
|
||||
&& !is_gimple_debug (stmt))
|
||||
++n_insns;
|
||||
}
|
||||
|
||||
gphi_iterator gsip;
|
||||
for (gsip = gsi_start_phis (bb);
|
||||
!gsi_end_p (gsip);
|
||||
gsi_next (&gsip))
|
||||
{
|
||||
gphi *phi = gsip.phi ();
|
||||
tree dst = gimple_phi_result (phi);
|
||||
|
||||
/* We consider any non-virtual PHI as a statement since it
|
||||
count result in a constant assignment or copy
|
||||
operation. */
|
||||
if (!virtual_operand_p (dst))
|
||||
++n_insns;
|
||||
}
|
||||
|
||||
/* We do not look at the block with the threaded branch
|
||||
in this loop. So if any block with a last statement that
|
||||
is a GIMPLE_SWITCH or GIMPLE_GOTO is seen, then we have a
|
||||
|
@ -360,6 +346,24 @@ fsm_find_control_statement_thread_paths (tree name,
|
|||
== DOMST_NONDOMINATING))
|
||||
creates_irreducible_loop = true;
|
||||
|
||||
/* PHIs in the final target and only the final target will need
|
||||
to be duplicated. So only count those against the number
|
||||
of statements. */
|
||||
gphi_iterator gsip;
|
||||
for (gsip = gsi_start_phis (taken_edge->dest);
|
||||
!gsi_end_p (gsip);
|
||||
gsi_next (&gsip))
|
||||
{
|
||||
gphi *phi = gsip.phi ();
|
||||
tree dst = gimple_phi_result (phi);
|
||||
|
||||
/* We consider any non-virtual PHI as a statement since it
|
||||
count result in a constant assignment or copy
|
||||
operation. */
|
||||
if (!virtual_operand_p (dst))
|
||||
++n_insns;
|
||||
}
|
||||
|
||||
if (path_crosses_loops)
|
||||
{
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
|
@ -379,10 +383,18 @@ fsm_find_control_statement_thread_paths (tree name,
|
|||
continue;
|
||||
}
|
||||
|
||||
/* We avoid creating irreducible loops unless we thread through
|
||||
/* We avoid creating irreducible inner loops unless we thread through
|
||||
a multiway branch, in which case we have deemed it worth losing other
|
||||
loop optimizations later. */
|
||||
if (!threaded_multiway_branch && creates_irreducible_loop)
|
||||
loop optimizations later.
|
||||
|
||||
We also consider it worth creating an irreducible inner loop if
|
||||
the number of copied statement is low relative to the length of
|
||||
the path -- in that case there's little the traditional loop optimizer
|
||||
would have done anyway, so an irreducible loop is not so bad. */
|
||||
if (!threaded_multiway_branch && creates_irreducible_loop
|
||||
&& (n_insns * PARAM_VALUE (PARAM_FSM_SCALE_PATH_STMTS)
|
||||
> path_length * PARAM_VALUE (PARAM_FSM_SCALE_PATH_BLOCKS)))
|
||||
|
||||
{
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
fprintf (dump_file,
|
||||
|
|
Loading…
Reference in New Issue