re PR tree-optimization/26854 (Inordinate compile times on large routines)

2008-01-19  Kenneth Zadeck <zadeck@naturalbridge.com>

	PR rtl-optimization/26854
	PR rtl-optimization/34400
	* ddg.c (create_ddg_dep_from_intra_loop_link): Do not use
	DF_RD->gen.
	* df.h (df_changeable_flags.DF_RD_NO_TRIM): New.
	(df_rd_bb_info.expanded_lr_out): New.
	* loop_invariant.c (find_defs): Added DF_RD_NO_TRIM flag.
	* loop_iv.c (iv_analysis_loop_init): Ditto.
	* df-problems.c (df_rd_free_bb_info, df_rd_alloc, df_rd_confluence_n,
	df_rd_bb_local_compute, df_rd_transfer_function, df_rd_free):
	Added code to allocate, initialize or free expanded_lr_out.
	(df_rd_bb_local_compute_process_def): Restructured to make
	more understandable.
	(df_rd_confluence_n): Add code to do nothing with fake edges and
	code to no apply invalidate_by_call sets if the sets are being trimmed.
	(df_lr_local_finalize): Renamed to df_lr_finalize.
	(df_live_local_finalize): Renamed to df_live_finalize.

From-SVN: r131670
This commit is contained in:
Kenneth Zadeck 2008-01-20 01:48:25 +00:00 committed by Kenneth Zadeck
parent 1828e8e9bf
commit 2b49e1a09d
6 changed files with 160 additions and 71 deletions

View File

@ -1,3 +1,23 @@
2008-01-19 Kenneth Zadeck <zadeck@naturalbridge.com>
PR rtl-optimization/26854
PR rtl-optimization/34400
* ddg.c (create_ddg_dep_from_intra_loop_link): Do not use
DF_RD->gen.
* df.h (df_changeable_flags.DF_RD_NO_TRIM): New.
(df_rd_bb_info.expanded_lr_out): New.
* loop_invariant.c (find_defs): Added DF_RD_NO_TRIM flag.
* loop_iv.c (iv_analysis_loop_init): Ditto.
* df-problems.c (df_rd_free_bb_info, df_rd_alloc, df_rd_confluence_n,
df_rd_bb_local_compute, df_rd_transfer_function, df_rd_free):
Added code to allocate, initialize or free expanded_lr_out.
(df_rd_bb_local_compute_process_def): Restructured to make
more understandable.
(df_rd_confluence_n): Add code to do nothing with fake edges and
code to no apply invalidate_by_call sets if the sets are being trimmed.
(df_lr_local_finalize): Renamed to df_lr_finalize.
(df_live_local_finalize): Renamed to df_live_finalize.
2008-01-20 Richard Sandiford <rsandifo@nildram.co.uk>
PR target/34831

View File

@ -184,12 +184,13 @@ create_ddg_dep_from_intra_loop_link (ddg_ptr g, ddg_node_ptr src_node,
{
int regno = REGNO (SET_DEST (set));
struct df_ref *first_def;
struct df_rd_bb_info *bb_info = DF_RD_BB_INFO (g->bb);
struct df_ref *last_def;
first_def = df_bb_regno_first_def_find (g->bb, regno);
gcc_assert (first_def);
if (bitmap_bit_p (bb_info->gen, first_def->id))
last_def = df_bb_regno_last_def_find (g->bb, regno);
if (first_def == last_def)
return;
}
}

View File

@ -245,6 +245,8 @@ df_rd_free_bb_info (basic_block bb ATTRIBUTE_UNUSED,
struct df_rd_bb_info *bb_info = (struct df_rd_bb_info *) vbb_info;
if (bb_info)
{
if (bb_info->expanded_lr_out)
BITMAP_FREE (bb_info->expanded_lr_out);
BITMAP_FREE (bb_info->kill);
BITMAP_FREE (bb_info->sparse_kill);
BITMAP_FREE (bb_info->gen);
@ -298,6 +300,8 @@ df_rd_alloc (bitmap all_blocks)
struct df_rd_bb_info *bb_info = df_rd_get_bb_info (bb_index);
if (bb_info)
{
if (bb_info->expanded_lr_out)
bitmap_clear (bb_info->expanded_lr_out);
bitmap_clear (bb_info->kill);
bitmap_clear (bb_info->sparse_kill);
bitmap_clear (bb_info->gen);
@ -306,6 +310,10 @@ df_rd_alloc (bitmap all_blocks)
{
bb_info = (struct df_rd_bb_info *) pool_alloc (df_rd->block_pool);
df_rd_set_bb_info (bb_index, bb_info);
if (df->changeable_flags & DF_RD_NO_TRIM)
bb_info->expanded_lr_out = NULL;
else
bb_info->expanded_lr_out = BITMAP_ALLOC (&problem_data->rd_bitmaps);
bb_info->kill = BITMAP_ALLOC (&problem_data->rd_bitmaps);
bb_info->sparse_kill = BITMAP_ALLOC (&problem_data->rd_bitmaps);
bb_info->gen = BITMAP_ALLOC (&problem_data->rd_bitmaps);
@ -320,56 +328,53 @@ df_rd_alloc (bitmap all_blocks)
/* Process a list of DEFs for df_rd_bb_local_compute. */
static void
df_rd_bb_local_compute_process_def (struct df_rd_bb_info *bb_info,
df_rd_bb_local_compute_process_def (struct df_rd_bb_info *bb_info,
struct df_ref **def_rec,
enum df_ref_flags top_flag)
{
while (*def_rec)
for (; *def_rec; def_rec++)
{
struct df_ref *def = *def_rec;
if (top_flag == (DF_REF_FLAGS (def) & DF_REF_AT_TOP))
unsigned int regno = DF_REF_REGNO (def);
/* This makes sure we do the artificial defs in the right order
since they are all in the same list. */
if (top_flag != (DF_REF_FLAGS (def) & DF_REF_AT_TOP))
continue;
/* Skip over the hard regs if we do not care about them. */
if ((df->changeable_flags & DF_NO_HARD_REGS) &&
(regno < FIRST_PSEUDO_REGISTER))
continue;
/* Only the last def(s) for a regno in the block has any
effect. */
if (bitmap_bit_p (seen_in_block, regno))
continue;
/* The first def for regno in insn gets to knock out the
defs from other instructions. */
if ((!bitmap_bit_p (seen_in_insn, regno))
/* If the def is to only part of the reg, it does
not kill the other defs that reach here. */
&& (!(DF_REF_FLAGS (def) &
(DF_REF_PARTIAL | DF_REF_CONDITIONAL | DF_REF_MAY_CLOBBER))))
{
unsigned int regno = DF_REF_REGNO (def);
unsigned int begin = DF_DEFS_BEGIN (regno);
unsigned int n_defs = DF_DEFS_COUNT (regno);
if ((!(df->changeable_flags & DF_NO_HARD_REGS))
|| (regno >= FIRST_PSEUDO_REGISTER))
{
/* Only the last def(s) for a regno in the block has any
effect. */
if (!bitmap_bit_p (seen_in_block, regno))
{
/* The first def for regno in insn gets to knock out the
defs from other instructions. */
if ((!bitmap_bit_p (seen_in_insn, regno))
/* If the def is to only part of the reg, it does
not kill the other defs that reach here. */
&& (!(DF_REF_FLAGS (def) &
(DF_REF_PARTIAL | DF_REF_CONDITIONAL | DF_REF_MAY_CLOBBER))))
{
if (n_defs > DF_SPARSE_THRESHOLD)
{
bitmap_set_bit (bb_info->sparse_kill, regno);
bitmap_clear_range(bb_info->gen, begin, n_defs);
}
else
{
bitmap_set_range (bb_info->kill, begin, n_defs);
bitmap_clear_range (bb_info->gen, begin, n_defs);
}
}
bitmap_set_bit (seen_in_insn, regno);
/* All defs for regno in the instruction may be put into
the gen set. */
if (!(DF_REF_FLAGS (def)
& (DF_REF_MUST_CLOBBER | DF_REF_MAY_CLOBBER)))
bitmap_set_bit (bb_info->gen, DF_REF_ID (def));
}
}
if (n_defs > DF_SPARSE_THRESHOLD)
bitmap_set_bit (bb_info->sparse_kill, regno);
else
bitmap_set_range (bb_info->kill, begin, n_defs);
bitmap_clear_range(bb_info->gen, begin, n_defs);
}
def_rec++;
bitmap_set_bit (seen_in_insn, regno);
/* All defs for regno in the instruction may be put into
the gen set. */
if (!(DF_REF_FLAGS (def)
& (DF_REF_MUST_CLOBBER | DF_REF_MAY_CLOBBER)))
bitmap_set_bit (bb_info->gen, DF_REF_ID (def));
}
}
@ -380,14 +385,28 @@ df_rd_bb_local_compute (unsigned int bb_index)
{
basic_block bb = BASIC_BLOCK (bb_index);
struct df_rd_bb_info *bb_info = df_rd_get_bb_info (bb_index);
struct df_lr_bb_info *lr_bb_info = df_lr_get_bb_info (bb_index);
rtx insn;
bitmap_clear (seen_in_block);
bitmap_clear (seen_in_insn);
if (!(df->changeable_flags & DF_RD_NO_TRIM))
{
unsigned int regno;
bitmap_iterator bi;
int first_reg = (df->changeable_flags & DF_NO_HARD_REGS) ? FIRST_PSEUDO_REGISTER : 0;
EXECUTE_IF_SET_IN_BITMAP (lr_bb_info->out, first_reg, regno, bi)
{
unsigned int begin = DF_DEFS_BEGIN (regno);
unsigned int n_defs = DF_DEFS_COUNT (regno);
bitmap_set_range (bb_info->expanded_lr_out, begin, n_defs);
}
}
/* Artificials are only hard regs. */
if (!(df->changeable_flags & DF_NO_HARD_REGS))
df_rd_bb_local_compute_process_def (bb_info,
df_rd_bb_local_compute_process_def (bb_info,
df_get_artificial_defs (bb_index),
0);
@ -482,7 +501,13 @@ df_rd_confluence_n (edge e)
bitmap op1 = df_rd_get_bb_info (e->dest->index)->in;
bitmap op2 = df_rd_get_bb_info (e->src->index)->out;
if (e->flags & EDGE_EH)
if (e->flags & EDGE_FAKE)
return;
/* If we are trimming the solution, the invalidated_by_call code in
the lr problem makes this unnecessary. However, if we do not
trim, we must take this into account. */
if ((df->changeable_flags & DF_RD_NO_TRIM) && e->flags & EDGE_EH)
{
struct df_rd_problem_data *problem_data
= (struct df_rd_problem_data *) df_rd->problem_data;
@ -490,7 +515,7 @@ df_rd_confluence_n (edge e)
bitmap dense_invalidated = problem_data->dense_invalidated_by_call;
bitmap_iterator bi;
unsigned int regno;
bitmap tmp = BITMAP_ALLOC (&df_bitmap_obstack);
bitmap tmp = BITMAP_ALLOC (&problem_data->rd_bitmaps);
bitmap_copy (tmp, op2);
bitmap_and_compl_into (tmp, dense_invalidated);
@ -522,13 +547,13 @@ df_rd_transfer_function (int bb_index)
bitmap gen = bb_info->gen;
bitmap kill = bb_info->kill;
bitmap sparse_kill = bb_info->sparse_kill;
bool changed = false;
if (bitmap_empty_p (sparse_kill))
return bitmap_ior_and_compl (out, gen, in, kill);
if ((df->changeable_flags & DF_RD_NO_TRIM) && bitmap_empty_p (sparse_kill))
changed = bitmap_ior_and_compl (out, gen, in, kill);
else
{
struct df_rd_problem_data *problem_data;
bool changed = false;
bitmap tmp;
/* Note that TMP is _not_ a temporary bitmap if we end up replacing
@ -545,6 +570,8 @@ df_rd_transfer_function (int bb_index)
}
bitmap_and_compl_into (tmp, kill);
bitmap_ior_into (tmp, gen);
if (!(df->changeable_flags & DF_RD_NO_TRIM))
bitmap_and_into (tmp, bb_info->expanded_lr_out);
changed = !bitmap_equal_p (tmp, out);
if (changed)
{
@ -552,9 +579,10 @@ df_rd_transfer_function (int bb_index)
bb_info->out = tmp;
}
else
BITMAP_FREE (tmp);
return changed;
BITMAP_FREE (tmp);
}
return changed;
}
@ -574,6 +602,8 @@ df_rd_free (void)
struct df_rd_bb_info *bb_info = df_rd_get_bb_info (i);
if (bb_info)
{
if (bb_info->expanded_lr_out)
BITMAP_FREE (bb_info->expanded_lr_out);
BITMAP_FREE (bb_info->kill);
BITMAP_FREE (bb_info->sparse_kill);
BITMAP_FREE (bb_info->gen);
@ -1015,7 +1045,7 @@ df_lr_transfer_function (int bb_index)
/* Run the fast dce as a side effect of building LR. */
static void
df_lr_local_finalize (bitmap all_blocks ATTRIBUTE_UNUSED)
df_lr_finalize (bitmap all_blocks ATTRIBUTE_UNUSED)
{
if (df->changeable_flags & DF_LR_RUN_DCE)
{
@ -1161,7 +1191,7 @@ df_lr_verify_solution_end (void)
if (df_lr->solutions_dirty)
/* Do not check if the solution is still dirty. See the comment
in df_lr_local_finalize for details. */
in df_lr_finalize for details. */
df_lr->solutions_dirty = false;
else
FOR_ALL_BB (bb)
@ -1204,7 +1234,7 @@ static struct df_problem problem_LR =
df_lr_confluence_0, /* Confluence operator 0. */
df_lr_confluence_n, /* Confluence operator n. */
df_lr_transfer_function, /* Transfer function. */
df_lr_local_finalize, /* Finalize function. */
df_lr_finalize, /* Finalize function. */
df_lr_free, /* Free all of the problem information. */
NULL, /* Remove this problem from the stack of dataflow problems. */
NULL, /* Debugging. */
@ -1560,7 +1590,7 @@ df_live_transfer_function (int bb_index)
/* And the LR info with the must-initialized registers, to produce the LIVE info. */
static void
df_live_local_finalize (bitmap all_blocks)
df_live_finalize (bitmap all_blocks)
{
if (df_live->solutions_dirty)
@ -1751,7 +1781,7 @@ static struct df_problem problem_LIVE =
NULL, /* Confluence operator 0. */
df_live_confluence_n, /* Confluence operator n. */
df_live_transfer_function, /* Transfer function. */
df_live_local_finalize, /* Finalize function. */
df_live_finalize, /* Finalize function. */
df_live_free, /* Free all of the problem information. */
df_live_free, /* Remove this problem from the stack of dataflow problems. */
NULL, /* Debugging. */

View File

@ -402,22 +402,30 @@ enum df_changeable_flags
{
/* Scanning flags. */
/* Flag to control the running of dce as a side effect of building LR. */
DF_LR_RUN_DCE = 1, /* Run DCE. */
DF_NO_HARD_REGS = 2, /* Skip hard registers in RD and CHAIN Building. */
DF_EQ_NOTES = 4, /* Build chains with uses present in EQUIV/EQUAL notes. */
DF_NO_REGS_EVER_LIVE = 8, /* Do not compute the regs_ever_live. */
DF_LR_RUN_DCE = 1 << 0, /* Run DCE. */
DF_NO_HARD_REGS = 1 << 1, /* Skip hard registers in RD and CHAIN Building. */
/* Do not trim the solution using the LR result. This can make the
solution take much longer and take more memory. This is
necessary for the loop optimizations, but has a very small time
and space penalty because the loop optimizations process only a
single loop at a time. Any pass that looks at the entire
function should not set this flag. */
DF_RD_NO_TRIM = 1 << 2,
DF_EQ_NOTES = 1 << 3, /* Build chains with uses present in EQUIV/EQUAL notes. */
DF_NO_REGS_EVER_LIVE = 1 << 4, /* Do not compute the regs_ever_live. */
/* Cause df_insn_rescan df_notes_rescan and df_insn_delete, to
return immediately. This is used by passes that know how to update
the scanning them selves. */
DF_NO_INSN_RESCAN = 16,
DF_NO_INSN_RESCAN = 1 << 5,
/* Cause df_insn_rescan df_notes_rescan and df_insn_delete, to
return after marking the insn for later processing. This allows all
rescans to be batched. */
DF_DEFER_INSN_RESCAN = 32,
DF_DEFER_INSN_RESCAN = 1 << 6,
DF_VERIFY_SCHEDULED = 64
DF_VERIFY_SCHEDULED = 1 << 7
};
/* Two of these structures are inline in df, one for the uses and one
@ -705,16 +713,43 @@ struct df_scan_bb_info
/* Reaching definitions. All bitmaps are indexed by the id field of
the ref except sparse_kill (see above). */
the ref except sparse_kill which is indexed by regno. */
struct df_rd_bb_info
{
/* Local sets to describe the basic blocks. See the note in the RU
datastructures for kill and sparse_kill. */
/* Local sets to describe the basic blocks. */
bitmap kill;
bitmap sparse_kill;
bitmap gen; /* The set of defs generated in this block. */
/* The results of the dataflow problem. */
/* Expanded version of the DF_LT->out bitmap to match the positions
of gen, in and out here. Only allocated if DF_RD_NO_TRIM is
false. */
bitmap expanded_lr_out;
/* The set of defs generated in this block. This is not set unless
the def reaches the end of the block. */
bitmap gen;
/* The results of the dataflow problem.
If DF_RD_NO_TRIM is not set, these sets are SOMEWHAT trimmed by
the output of the DF_LR problem. The out set is precisely
trimmed during propagation which means that the result is also
trimmed when the propagation terminates. The in set is not
explicitly trimmed, because this is expensive (adding about 5% to
the cost of a bootstrap). However since the out sets are trimmed
and the in sets are built from the out of the pred, the in set is
MOSTLY trimmed.
The counter case happens at a branch where the variable V is in
DF_LR->in the true branch but not the false branch. If V is
defined before the branch, RD will propagate that into the
DF_RD_in sets of both branches. When the block is processed, the
DF_RD->out set will have V trimmed out of it but it will still be
left in DF_RD->in.
If this not a problem for the current optimizers since they were
designed before any trimming was available. This can be fixed by
checking the DF_LR->in set directly. */
bitmap in; /* At the top of the block. */
bitmap out; /* At the bottom of the block. */
};

View File

@ -639,6 +639,7 @@ find_defs (struct loop *loop, basic_block *body)
df_remove_problem (df_chain);
df_process_deferred_rescans ();
df_chain_add_problem (DF_UD_CHAIN);
df_set_flags (DF_RD_NO_TRIM);
df_set_blocks (blocks);
df_analyze ();

View File

@ -22,9 +22,10 @@ along with GCC; see the file COPYING3. If not see
doloop optimization and branch prediction. The iv information is computed
on demand.
Induction variable is analyzed by walking the use-def chains. When a biv
is found, it is cached in the bivs hash table. When register is proved
to be a giv, its description is stored to DF_REF_DATA of the def reference.
Induction variables are analyzed by walking the use-def chains. When
a basic induction variable (biv) is found, it is cached in the bivs
hash table. When register is proved to be a biv, its description
is stored to DF_REF_DATA of the def reference.
The analysis works always with one loop -- you must call
iv_analysis_loop_init (loop) for it. All the other functions then work with
@ -277,6 +278,7 @@ iv_analysis_loop_init (struct loop *loop)
df_remove_problem (df_chain);
df_process_deferred_rescans ();
df_chain_add_problem (DF_UD_CHAIN);
df_set_flags (DF_RD_NO_TRIM);
df_set_blocks (blocks);
df_analyze ();
if (dump_file)