invoke.texi (sink-frequency-threshold): Document.

* doc/invoke.texi (sink-frequency-threshold): Document.
	* tree-ssa-sink.c: Include params.h.
	(select_best_block): New function.
	(statement_sink_location): Use it.
	* params.def (SINK_FREQUENCY_THRESHOLD): New PARAM.

From-SVN: r180524
This commit is contained in:
Jeff Law 2011-10-26 07:26:50 -06:00 committed by Jeff Law
parent b20577eae4
commit 1cc17820c3
4 changed files with 102 additions and 41 deletions

View File

@ -1,3 +1,11 @@
2011-10-26 Jeff Law <law@redhat.com>
* doc/invoke.texi (sink-frequency-threshold): Document.
* tree-ssa-sink.c: Include params.h.
(select_best_block): New function.
(statement_sink_location): Use it.
* params.def (SINK_FREQUENCY_THRESHOLD): New PARAM.
2011-10-14 Iain Sandoe <iains@gcc.gnu.org>
PR target/48108

View File

@ -880,6 +880,7 @@ See RS/6000 and PowerPC Options.
-mfaster-structs -mno-faster-structs -mflat -mno-flat @gol
-mfpu -mno-fpu -mhard-float -msoft-float @gol
-mhard-quad-float -msoft-quad-float @gol
-mlittle-endian @gol
-mstack-bias -mno-stack-bias @gol
-munaligned-doubles -mno-unaligned-doubles @gol
-mv8plus -mno-v8plus -mvis -mno-vis @gol
@ -9132,6 +9133,13 @@ partitions.
The maximum number of namespaces to consult for suggestions when C++
name lookup fails for an identifier. The default is 1000.
@item sink-frequency-threshold
The maximum relative execution frequency (in percents) of the target block
relative to a statement's original block to allow statement sinking of a
statement. Larger numbers result in more aggressive statement sinking.
The default value is 75. A small positive adjustment is applied for
statements with memory operands as those are even more profitable so sink.
@item max-stores-to-sink
The maximum number of conditional stores paires that can be sunk. Set to 0
if either vectorization (@option{-ftree-vectorize}) or if-conversion
@ -17708,6 +17716,11 @@ These @samp{-m} options are supported in addition to the above
on SPARC-V9 processors in 64-bit environments:
@table @gcctabopt
@item -mlittle-endian
@opindex mlittle-endian
Generate code for a processor running in little-endian mode. It is only
available for a few configurations and most notably not on Solaris and Linux.
@item -m32
@itemx -m64
@opindex m32

View File

@ -566,6 +566,11 @@ DEFPARAM(PARAM_MAX_RELOAD_SEARCH_INSNS,
"The maximum number of instructions to search backward when looking for equivalent reload",
100, 0, 0)
DEFPARAM(PARAM_SINK_FREQUENCY_THRESHOLD,
"sink-frequency-threshold",
"Target block's relative execution frequency (as a percentage) required to sink a statement",
75, 0, 100)
DEFPARAM(PARAM_MAX_SCHED_REGION_BLOCKS,
"max-sched-region-blocks",
"The maximum number of blocks in a region to be considered for interblock scheduling",

View File

@ -40,6 +40,7 @@ along with GCC; see the file COPYING3. If not see
#include "bitmap.h"
#include "langhooks.h"
#include "cfgloop.h"
#include "params.h"
/* TODO:
1. Sinking store only using scalar promotion (IE without moving the RHS):
@ -258,6 +259,71 @@ nearest_common_dominator_of_uses (gimple stmt, bool *debug_stmts)
return commondom;
}
/* Given EARLY_BB and LATE_BB, two blocks in a path through the dominator
tree, return the best basic block between them (inclusive) to place
statements.
We want the most control dependent block in the shallowest loop nest.
If the resulting block is in a shallower loop nest, then use it. Else
only use the resulting block if it has significantly lower execution
frequency than EARLY_BB to avoid gratutious statement movement. We
consider statements with VOPS more desirable to move.
This pass would obviously benefit from PDO as it utilizes block
frequencies. It would also benefit from recomputing frequencies
if profile data is not available since frequencies often get out
of sync with reality. */
static basic_block
select_best_block (basic_block early_bb,
basic_block late_bb,
gimple stmt)
{
basic_block best_bb = late_bb;
basic_block temp_bb = late_bb;
int threshold;
while (temp_bb != early_bb)
{
/* If we've moved into a lower loop nest, then that becomes
our best block. */
if (temp_bb->loop_depth < best_bb->loop_depth)
best_bb = temp_bb;
/* Walk up the dominator tree, hopefully we'll find a shallower
loop nest. */
temp_bb = get_immediate_dominator (CDI_DOMINATORS, temp_bb);
}
/* If we found a shallower loop nest, then we always consider that
a win. This will always give us the most control dependent block
within that loop nest. */
if (best_bb->loop_depth < early_bb->loop_depth)
return best_bb;
/* Get the sinking threshold. If the statement to be moved has memory
operands, then increase the threshold by 7% as those are even more
profitable to avoid, clamping at 100%. */
threshold = PARAM_VALUE (PARAM_SINK_FREQUENCY_THRESHOLD);
if (gimple_vuse (stmt) || gimple_vdef (stmt))
{
threshold += 7;
if (threshold > 100)
threshold = 100;
}
/* If BEST_BB is at the same nesting level, then require it to have
significantly lower execution frequency to avoid gratutious movement. */
if (best_bb->loop_depth == early_bb->loop_depth
&& best_bb->frequency < (early_bb->frequency * threshold / 100.0))
return best_bb;
/* No better block found, so return EARLY_BB, which happens to be the
statement's original block. */
return early_bb;
}
/* Given a statement (STMT) and the basic block it is currently in (FROMBB),
determine the location to sink the statement to, if any.
Returns true if there is such location; in that case, TOGSI points to the
@ -379,24 +445,10 @@ statement_sink_location (gimple stmt, basic_block frombb,
if (!dominated_by_p (CDI_DOMINATORS, commondom, frombb))
return false;
/* It doesn't make sense to move to a dominator that post-dominates
frombb, because it means we've just moved it into a path that always
executes if frombb executes, instead of reducing the number of
executions . */
if (dominated_by_p (CDI_POST_DOMINATORS, frombb, commondom))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Not moving store, common dominator post-dominates from block.\n");
return false;
}
commondom = select_best_block (frombb, commondom, stmt);
if (commondom == frombb || commondom->loop_depth > frombb->loop_depth)
return false;
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "Common dominator of all uses is %d\n",
commondom->index);
}
if (commondom == frombb)
return false;
*togsi = gsi_after_labels (commondom);
@ -415,13 +467,9 @@ statement_sink_location (gimple stmt, basic_block frombb,
if (gimple_code (use) != GIMPLE_PHI)
{
sinkbb = gimple_bb (use);
if (sinkbb == frombb || sinkbb->loop_depth > frombb->loop_depth
|| sinkbb->loop_father != frombb->loop_father)
return false;
sinkbb = select_best_block (frombb, gimple_bb (use), stmt);
/* Move the expression to a post dominator can't reduce the number of
executions. */
if (dominated_by_p (CDI_POST_DOMINATORS, frombb, sinkbb))
if (sinkbb == frombb)
return false;
*togsi = gsi_for_stmt (use);
@ -431,21 +479,13 @@ statement_sink_location (gimple stmt, basic_block frombb,
}
sinkbb = find_bb_for_arg (use, DEF_FROM_PTR (def_p));
/* This can happen if there are multiple uses in a PHI. */
if (!sinkbb)
return false;
/* This will happen when you have
a_3 = PHI <a_13, a_26>
a_26 = VDEF <a_3>
If the use is a phi, and is in the same bb as the def,
we can't sink it. */
if (gimple_bb (use) == frombb)
return false;
if (sinkbb == frombb || sinkbb->loop_depth > frombb->loop_depth
|| sinkbb->loop_father != frombb->loop_father)
sinkbb = select_best_block (frombb, sinkbb, stmt);
if (!sinkbb || sinkbb == frombb)
return false;
/* If the latch block is empty, don't make it non-empty by sinking
@ -454,11 +494,6 @@ statement_sink_location (gimple stmt, basic_block frombb,
&& empty_block_p (sinkbb))
return false;
/* Move the expression to a post dominator can't reduce the number of
executions. */
if (dominated_by_p (CDI_POST_DOMINATORS, frombb, sinkbb))
return false;
*togsi = gsi_after_labels (sinkbb);
return true;