re PR tree-optimization/58626 (possible array wrong code bug)

2013-10-25  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/58626
	* tree-loop-distribution.c (enum rdg_dep_type): Remove
	anti_dd, output_dd and input_dd.
	(struct rdg_edge): Remove level and relation members.
	(RDGE_LEVEL, RDGE_RELATION): Remove.
	(dot_rdg_1): Adjust.
	(create_rdg_edge_for_ddr): Remove.
	(create_rdg_edges_for_scalar): Adjust.
	(create_edge_for_control_dependence): Likewise.
	(create_rdg_edges): Split into ...
	(create_rdg_flow_edges): ... this
	(create_rdg_cd_edges): ... and this.
	(free_rdg): Adjust.
	(build_rdg): Likewise, do not compute data dependences or
	add edges for them.
	(pg_add_dependence_edges): New function.
	(pgcmp): Likewise.
	(distribute_loop): First apply all non-dependence based
	partition mergings.  Then compute dependences between partitions
	and merge and order partitions according to them.

	* gcc.dg/torture/pr58626.c: New testcase.

From-SVN: r204062
This commit is contained in:
Richard Biener 2013-10-25 11:51:11 +00:00 committed by Richard Biener
parent 5970573cc1
commit 447f322380
4 changed files with 293 additions and 166 deletions

View File

@ -1,3 +1,26 @@
2013-10-25 Richard Biener <rguenther@suse.de>
PR tree-optimization/58626
* tree-loop-distribution.c (enum rdg_dep_type): Remove
anti_dd, output_dd and input_dd.
(struct rdg_edge): Remove level and relation members.
(RDGE_LEVEL, RDGE_RELATION): Remove.
(dot_rdg_1): Adjust.
(create_rdg_edge_for_ddr): Remove.
(create_rdg_edges_for_scalar): Adjust.
(create_edge_for_control_dependence): Likewise.
(create_rdg_edges): Split into ...
(create_rdg_flow_edges): ... this
(create_rdg_cd_edges): ... and this.
(free_rdg): Adjust.
(build_rdg): Likewise, do not compute data dependences or
add edges for them.
(pg_add_dependence_edges): New function.
(pgcmp): Likewise.
(distribute_loop): First apply all non-dependence based
partition mergings. Then compute dependences between partitions
and merge and order partitions according to them.
2013-10-25 Eric Botcazou <ebotcazou@adacore.com>
PR rtl-optimization/58831

View File

@ -1,3 +1,8 @@
2013-10-25 Richard Biener <rguenther@suse.de>
PR tree-optimization/58626
* gcc.dg/torture/pr58626.c: New testcase.
2013-10-25 Paolo Carlini <paolo.carlini@oracle.com>
PR c++/54812

View File

@ -0,0 +1,20 @@
/* { dg-do run } */
extern void abort (void);
int a[8][6] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 };
int b;
int main(void)
{
for (b = 0; b <= 1; b++) {
a[1][3] = 0;
int c;
for (c = 0; c <= 1; c++) {
a[c + 1][b] = a[c + 2][b];
}
}
if (a[1][1] != 1)
abort ();
return 0;
}

View File

@ -96,15 +96,6 @@ enum rdg_dep_type
/* Read After Write (RAW). */
flow_dd = 'f',
/* Write After Read (WAR). */
anti_dd = 'a',
/* Write After Write (WAW). */
output_dd = 'o',
/* Read After Read (RAR). */
input_dd = 'i',
/* Control dependence (execute conditional on). */
control_dd = 'c'
};
@ -115,19 +106,9 @@ typedef struct rdg_edge
{
/* Type of the dependence. */
enum rdg_dep_type type;
/* Levels of the dependence: the depth of the loops that carry the
dependence. */
unsigned level;
/* Dependence relation between data dependences, NULL when one of
the vertices is a scalar. */
ddr_p relation;
} *rdg_edge_p;
#define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
#define RDGE_LEVEL(E) ((struct rdg_edge *) ((E)->data))->level
#define RDGE_RELATION(E) ((struct rdg_edge *) ((E)->data))->relation
/* Dump vertex I in RDG to FILE. */
@ -215,23 +196,11 @@ dot_rdg_1 (FILE *file, struct graph *rdg)
for (e = v->succ; e; e = e->succ_next)
switch (RDGE_TYPE (e))
{
case input_dd:
fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
break;
case output_dd:
fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
break;
case flow_dd:
/* These are the most common dependences: don't print these. */
fprintf (file, "%d -> %d \n", i, e->dest);
break;
case anti_dd:
fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
break;
case control_dd:
fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
break;
@ -273,52 +242,6 @@ rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
return index;
}
/* Creates an edge in RDG for each distance vector from DDR. The
order that we keep track of in the RDG is the order in which
statements have to be executed. */
static void
create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
{
struct graph_edge *e;
int va, vb;
data_reference_p dra = DDR_A (ddr);
data_reference_p drb = DDR_B (ddr);
unsigned level = ddr_dependence_level (ddr);
/* For non scalar dependences, when the dependence is REVERSED,
statement B has to be executed before statement A. */
if (level > 0
&& !DDR_REVERSED_P (ddr))
{
data_reference_p tmp = dra;
dra = drb;
drb = tmp;
}
va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
if (va < 0 || vb < 0)
return;
e = add_edge (rdg, va, vb);
e->data = XNEW (struct rdg_edge);
RDGE_LEVEL (e) = level;
RDGE_RELATION (e) = ddr;
/* Determines the type of the data dependence. */
if (DR_IS_READ (dra) && DR_IS_READ (drb))
RDGE_TYPE (e) = input_dd;
else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
RDGE_TYPE (e) = output_dd;
else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
RDGE_TYPE (e) = flow_dd;
else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
RDGE_TYPE (e) = anti_dd;
}
/* Creates dependence edges in RDG for all the uses of DEF. IDEF is
the index of DEF in RDG. */
@ -339,7 +262,6 @@ create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
e = add_edge (rdg, idef, use);
e->data = XNEW (struct rdg_edge);
RDGE_TYPE (e) = flow_dd;
RDGE_RELATION (e) = NULL;
}
}
@ -366,7 +288,6 @@ create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
e = add_edge (rdg, c, v);
e->data = XNEW (struct rdg_edge);
RDGE_TYPE (e) = control_dd;
RDGE_RELATION (e) = NULL;
}
}
}
@ -374,38 +295,38 @@ create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
/* Creates the edges of the reduced dependence graph RDG. */
static void
create_rdg_edges (struct graph *rdg, vec<ddr_p> ddrs, control_dependences *cd)
create_rdg_flow_edges (struct graph *rdg)
{
int i;
struct data_dependence_relation *ddr;
def_operand_p def_p;
ssa_op_iter iter;
FOR_EACH_VEC_ELT (ddrs, i, ddr)
if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
create_rdg_edge_for_ddr (rdg, ddr);
else
free_dependence_relation (ddr);
for (i = 0; i < rdg->n_vertices; i++)
FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
iter, SSA_OP_DEF)
create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
}
if (cd)
for (i = 0; i < rdg->n_vertices; i++)
{
gimple stmt = RDG_STMT (rdg, i);
if (gimple_code (stmt) == GIMPLE_PHI)
{
edge_iterator ei;
edge e;
FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
/* Creates the edges of the reduced dependence graph RDG. */
static void
create_rdg_cd_edges (struct graph *rdg, control_dependences *cd)
{
int i;
for (i = 0; i < rdg->n_vertices; i++)
{
gimple stmt = RDG_STMT (rdg, i);
if (gimple_code (stmt) == GIMPLE_PHI)
{
edge_iterator ei;
edge e;
FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
create_edge_for_control_dependence (rdg, e->src, i, cd);
}
else
create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
}
}
else
create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
}
}
/* Build the vertices of the reduced dependence graph RDG. Return false
@ -494,10 +415,7 @@ free_rdg (struct graph *rdg)
struct graph_edge *e;
for (e = v->succ; e; e = e->succ_next)
{
free_dependence_relation (RDGE_RELATION (e));
free (e->data);
}
free (e->data);
if (v->data)
{
@ -520,7 +438,6 @@ build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
struct graph *rdg;
vec<gimple> stmts;
vec<data_reference_p> datarefs;
vec<ddr_p> dependence_relations;
/* Create the RDG vertices from the stmts of the loop nest. */
stmts.create (10);
@ -536,19 +453,10 @@ build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
}
stmts.release ();
/* Create the RDG edges from the data dependences in the loop nest. */
dependence_relations.create (100);
if (!compute_all_dependences (datarefs, &dependence_relations, loop_nest,
false)
|| !known_dependences_p (dependence_relations))
{
free_dependence_relations (dependence_relations);
datarefs.release ();
free_rdg (rdg);
return NULL;
}
create_rdg_edges (rdg, dependence_relations, cd);
dependence_relations.release ();
create_rdg_flow_edges (rdg);
if (cd)
create_rdg_cd_edges (rdg, cd);
datarefs.release ();
return rdg;
@ -1405,6 +1313,70 @@ partition_contains_all_rw (struct graph *rdg,
return false;
}
/* Compute partition dependence created by the data references in DRS1
and DRS2 and modify and return DIR according to that. */
static int
pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
vec<data_reference_p> drs1,
vec<data_reference_p> drs2)
{
data_reference_p dr1, dr2;
/* dependence direction - 0 is no dependence, -1 is back,
1 is forth, 2 is both (we can stop then, merging will occur). */
for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
{
int this_dir = 1;
ddr_p ddr;
/* Re-shuffle data-refs to be in dominator order. */
if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
> rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
{
data_reference_p tem = dr1;
dr1 = dr2;
dr2 = tem;
this_dir = -this_dir;
}
ddr = initialize_data_dependence_relation (dr1, dr2, loops);
compute_affine_dependence (ddr, loops[0]);
if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
this_dir = 2;
else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
{
if (DDR_REVERSED_P (ddr))
{
data_reference_p tem = dr1;
dr1 = dr2;
dr2 = tem;
this_dir = -this_dir;
}
/* Known dependences can still be unordered througout the
iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
if (DDR_NUM_DIST_VECTS (ddr) == 0)
this_dir = 2;
}
else
this_dir = 0;
free_dependence_relation (ddr);
if (dir == 0)
dir = this_dir;
else if (dir != this_dir)
return 2;
}
return dir;
}
/* Compare postorder number of the partition graph vertices V1 and V2. */
static int
pgcmp (const void *v1_, const void *v2_)
{
const vertex *v1 = (const vertex *)v1_;
const vertex *v2 = (const vertex *)v2_;
return v2->post - v1->post;
}
/* Distributes the code from LOOP in such a way that producer
statements are placed before consumer statements. Tries to separate
@ -1421,6 +1393,8 @@ distribute_loop (struct loop *loop, vec<gimple> stmts,
partition_t partition;
bool any_builtin;
int i, nbp;
graph *pg = NULL;
int num_sccs = 1;
*nb_calls = 0;
loop_nest.create (3);
@ -1455,8 +1429,8 @@ distribute_loop (struct loop *loop, vec<gimple> stmts,
any_builtin |= partition_builtin_p (partition);
}
/* If we did not detect any builtin but are not asked to apply
regular loop distribution simply bail out. */
/* If we are only distributing patterns but did not detect any,
simply bail out. */
if (!flag_tree_loop_distribution
&& !any_builtin)
{
@ -1464,9 +1438,56 @@ distribute_loop (struct loop *loop, vec<gimple> stmts,
goto ldist_done;
}
/* If we are only distributing patterns fuse all partitions that
were not classified as builtins. This also avoids chopping
a loop into pieces, separated by builtin calls. That is, we
only want no or a single loop body remaining. */
partition_t into;
if (!flag_tree_loop_distribution)
{
for (i = 0; partitions.iterate (i, &into); ++i)
if (!partition_builtin_p (into))
break;
for (++i; partitions.iterate (i, &partition); ++i)
if (!partition_builtin_p (partition))
{
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "fusing non-builtin partitions\n");
dump_bitmap (dump_file, into->stmts);
dump_bitmap (dump_file, partition->stmts);
}
partition_merge_into (into, partition);
partitions.unordered_remove (i);
partition_free (partition);
i--;
}
}
/* Due to limitations in the transform phase we have to fuse all
reduction partitions into the last partition so the existing
loop will contain all loop-closed PHI nodes. */
for (i = 0; partitions.iterate (i, &into); ++i)
if (partition_reduction_p (into))
break;
for (i = i + 1; partitions.iterate (i, &partition); ++i)
if (partition_reduction_p (partition))
{
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "fusing partitions\n");
dump_bitmap (dump_file, into->stmts);
dump_bitmap (dump_file, partition->stmts);
fprintf (dump_file, "because they have reductions\n");
}
partition_merge_into (into, partition);
partitions.unordered_remove (i);
partition_free (partition);
i--;
}
/* Apply our simple cost model - fuse partitions with similar
memory accesses. */
partition_t into;
for (i = 0; partitions.iterate (i, &into); ++i)
{
if (partition_builtin_p (into))
@ -1486,61 +1507,119 @@ distribute_loop (struct loop *loop, vec<gimple> stmts,
"memory accesses\n");
}
partition_merge_into (into, partition);
partitions.ordered_remove (j);
partitions.unordered_remove (j);
partition_free (partition);
j--;
}
}
}
/* If we are only distributing patterns fuse all partitions that
were not properly classified as builtins. */
if (!flag_tree_loop_distribution)
{
partition_t into;
/* Only fuse adjacent non-builtin partitions, see PR53616.
??? Use dependence information to improve partition ordering. */
i = 0;
do
{
for (; partitions.iterate (i, &into); ++i)
if (!partition_builtin_p (into))
break;
for (++i; partitions.iterate (i, &partition); ++i)
if (!partition_builtin_p (partition))
{
partition_merge_into (into, partition);
partitions.ordered_remove (i);
partition_free (partition);
i--;
}
else
break;
}
while ((unsigned) i < partitions.length ());
}
/* Fuse all reduction partitions into the last. */
/* Build the partition dependency graph. */
if (partitions.length () > 1)
{
partition_t into = partitions.last ();
for (i = partitions.length () - 2; i >= 0; --i)
pg = new_graph (partitions.length ());
struct pgdata {
partition_t partition;
vec<data_reference_p> writes;
vec<data_reference_p> reads;
};
#define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
for (i = 0; partitions.iterate (i, &partition); ++i)
{
partition_t what = partitions[i];
if (partition_reduction_p (what))
{
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "fusing partitions\n");
dump_bitmap (dump_file, into->stmts);
dump_bitmap (dump_file, what->stmts);
fprintf (dump_file, "because the latter has reductions\n");
}
partition_merge_into (into, what);
partitions.ordered_remove (i);
partition_free (what);
}
vertex *v = &pg->vertices[i];
pgdata *data = new pgdata;
data_reference_p dr;
/* FIXME - leaks. */
v->data = data;
bitmap_iterator bi;
unsigned j;
data->partition = partition;
data->reads = vNULL;
data->writes = vNULL;
EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
if (DR_IS_READ (dr))
data->reads.safe_push (dr);
else
data->writes.safe_push (dr);
}
partition_t partition1, partition2;
for (i = 0; partitions.iterate (i, &partition1); ++i)
for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
{
/* dependence direction - 0 is no dependence, -1 is back,
1 is forth, 2 is both (we can stop then, merging will occur). */
int dir = 0;
dir = pg_add_dependence_edges (rdg, loop_nest, dir,
PGDATA(i)->writes,
PGDATA(j)->reads);
if (dir != 2)
dir = pg_add_dependence_edges (rdg, loop_nest, dir,
PGDATA(i)->reads,
PGDATA(j)->writes);
if (dir != 2)
dir = pg_add_dependence_edges (rdg, loop_nest, dir,
PGDATA(i)->writes,
PGDATA(j)->writes);
if (dir == 1 || dir == 2)
add_edge (pg, i, j);
if (dir == -1 || dir == 2)
add_edge (pg, j, i);
}
/* Add edges to the reduction partition (if any) to force it last. */
unsigned j;
for (j = 0; partitions.iterate (j, &partition); ++j)
if (partition_reduction_p (partition))
break;
if (j < partitions.length ())
{
for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
if (i != j)
add_edge (pg, i, j);
}
/* Compute partitions we cannot separate and fuse them. */
num_sccs = graphds_scc (pg, NULL);
for (i = 0; i < num_sccs; ++i)
{
partition_t first;
int j;
for (j = 0; partitions.iterate (j, &first); ++j)
if (pg->vertices[j].component == i)
break;
for (j = j + 1; partitions.iterate (j, &partition); ++j)
if (pg->vertices[j].component == i)
{
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "fusing partitions\n");
dump_bitmap (dump_file, first->stmts);
dump_bitmap (dump_file, partition->stmts);
fprintf (dump_file, "because they are in the same "
"dependence SCC\n");
}
partition_merge_into (first, partition);
partitions[j] = NULL;
partition_free (partition);
PGDATA (j)->partition = NULL;
}
}
/* Now order the remaining nodes in postorder. */
qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
partitions.truncate (0);
for (i = 0; i < pg->n_vertices; ++i)
{
pgdata *data = PGDATA (i);
if (data->partition)
partitions.safe_push (data->partition);
data->reads.release ();
data->writes.release ();
delete data;
}
gcc_assert (partitions.length () == (unsigned)num_sccs);
free_graph (pg);
}
nbp = partitions.length ();