re PR lto/66752 (spec2000 255.vortex performance compiled with GCC is ~20% lower than with CLANG)

PR lto/66752
	* tree-ssa-threadedge.c (simplify_conrol_stmt_condition): If we are
	unable to find X NE 0 in the tables, return X as the simplified
	condition.
	(fsm_find_control_statement_thread_paths): If nodes in NEXT_PATH are
	in VISISTED_BBS, then return failure.  Else add nodes from NEXT_PATH
	to VISISTED_BBS.  */
	* tree-ssa-threadupdate.c (duplicate_thread_path): Fix up edge flags
	after removing the control flow statement and unnecessary edges.

testsuite/
	PR lto/66752
	* gcc.dg/tree-ssa/pr66752-2.c: New test.
	* gcc.dg/torture/pr66752-1.c: New test
	* g++.dg/torture/pr66752-2.C: New test.

From-SVN: r226125
This commit is contained in:
Jeff Law 2015-07-23 14:42:15 -06:00 committed by Jeff Law
parent 0f2be732d9
commit d37f946ace
7 changed files with 176 additions and 0 deletions

View File

@ -1,3 +1,15 @@
2015-01-12 Jeff Law <law@redhat.com>
PR lto/66752
* tree-ssa-threadedge.c (simplify_conrol_stmt_condition): If we are
unable to find X NE 0 in the tables, return X as the simplified
condition.
(fsm_find_control_statement_thread_paths): If nodes in NEXT_PATH are
in VISISTED_BBS, then return failure. Else add nodes from NEXT_PATH
to VISISTED_BBS. */
* tree-ssa-threadupdate.c (duplicate_thread_path): Fix up edge flags
after removing the control flow statement and unnecessary edges.
2015-07-23 Bernd Edlinger <bernd.edlinger@hotmail.de>
* tree-pass.h (get_current_pass_name): Removed.

View File

@ -1,3 +1,10 @@
2015-07-23 Jeff Law <law@redhat.com>
PR lto/66752
* gcc.dg/tree-ssa/pr66752-2.c: New test.
* gcc.dg/torture/pr66752-1.c: New test
* g++.dg/torture/pr66752-2.C: New test.
2015-07-23 Marek Polacek <polacek@redhat.com>
PR c++/66572

View File

@ -0,0 +1,60 @@
/* { dg-do compile } */
extern "C"
{
typedef struct _IO_FILE FILE;
extern int fprintf (FILE * __restrict __stream,
const char *__restrict __format, ...);
}
typedef union tree_node *tree;
class ipa_polymorphic_call_context
{
};
class ipcp_value_base
{
};
template < typename valtype > class ipcp_value:public ipcp_value_base
{
public:valtype value;
ipcp_value *next;
};
template < typename valtype > class ipcp_lattice
{
public:ipcp_value < valtype > *values;
void print (FILE * f, bool dump_sources, bool dump_benefits);
};
class ipcp_param_lattices
{
public:ipcp_lattice < tree > itself;
ipcp_lattice < ipa_polymorphic_call_context > ctxlat;
};
template < typename valtype > void ipcp_lattice < valtype >::print (FILE * f,
bool
dump_sources,
bool
dump_benefits)
{
ipcp_value < valtype > *val;
bool prev = false;
for (val = values; val; val = val->next)
{
if (dump_benefits && prev)
fprintf (f, " ");
else if (!dump_benefits && prev)
fprintf (f, ", ");
else
prev = true;
if (dump_sources)
fprintf (f, "]");
if (dump_benefits)
fprintf (f, "shit");
}
}
void
print_all_lattices (FILE * f, bool dump_sources, bool dump_benefits)
{
struct ipcp_param_lattices *plats;
plats->ctxlat.print (f, dump_sources, dump_benefits);
}

View File

@ -0,0 +1,27 @@
/* { dg-do compile } */
typedef unsigned int size_t;
struct fde_vector
{
size_t count;
const struct dwarf_fde *array[];
};
struct object;
typedef struct dwarf_fde fde;
typedef int (*fde_compare_t) (struct object *, const fde *, const fde *);
void
fde_merge (struct object *ob, fde_compare_t fde_compare,
struct fde_vector *v1, struct fde_vector *v2)
{
size_t i1, i2;
const fde *fde2;
do
{
i2--;
while (i1 > 0 && fde_compare (ob, v1->array[i1 - 1], fde2) > 0)
{
i1--;
}
}
while (i2 > 0);
}

View File

@ -0,0 +1,39 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-dom1-details -fdump-tree-optimized" } */
extern int status, pt;
extern int count;
void
foo (int N, int c, int b, int *a)
{
int i, flag;
i = b -1;
flag = 1;
if (status && i < N && a[i] == b) {
N--;
flag = 0;
if (pt)
count++;
}
else
for (i = -1, flag = 1; ++i < N && flag;)
if (a[i] == b)
{
--N;
flag = 0;
if (i < N)
a[i] = a[N];
else
a[i] = 0;
if (pt)
count++;
}
if(status && flag)
pt--;
}
/* There are 3 FSM jump threading opportunities. */
/* { dg-final { scan-tree-dump-times "FSM" 3 "dom1"} } */
/* There should be no assignments or references to FLAG. */
/* { dg-final { scan-tree-dump-not "flag" "optimized"} } */

View File

@ -553,6 +553,16 @@ simplify_control_stmt_condition (edge e,
|| !is_gimple_min_invariant (cached_lhs))
cached_lhs = (*simplify) (dummy_cond, stmt);
/* If we were just testing that an integral type was != 0, and that
failed, just return the first operand. This gives the FSM code a
chance to optimize the path. */
if (cached_lhs == NULL
&& cond_code == NE_EXPR
&& INTEGRAL_TYPE_P (TREE_TYPE (op0))
&& TREE_CODE (op0) == SSA_NAME
&& integer_zerop (op1))
return op0;
return cached_lhs;
}
@ -974,6 +984,21 @@ fsm_find_control_statement_thread_paths (tree expr,
return;
}
/* Make sure we haven't already visited any of the nodes in
NEXT_PATH. Don't add them here to avoid pollution. */
for (unsigned int i = 0; i < next_path->length () - 1; i++)
{
if (visited_bbs->contains ((*next_path)[i]))
{
vec_free (next_path);
return;
}
}
/* Now add the nodes to VISISTED_BBS. */
for (unsigned int i = 0; i < next_path->length () - 1; i++)
visited_bbs->add ((*next_path)[i]);
/* Append all the nodes from NEXT_PATH to PATH. */
vec_safe_splice (path, next_path);
next_path_length = next_path->length ();

View File

@ -2480,6 +2480,12 @@ duplicate_thread_path (edge entry, edge exit,
/* Remove the last branch in the jump thread path. */
remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest);
/* And fixup the flags on the single remaining edge. */
edge fix_e = find_edge (region_copy[n_region - 1], exit->dest);
fix_e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
fix_e->flags |= EDGE_FALLTHRU;
edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
if (e) {