invoke.texi (sink-frequency-threshold): Document.
* doc/invoke.texi (sink-frequency-threshold): Document. * tree-ssa-sink.c: Include params.h. (select_best_block): New function. (statement_sink_location): Use it. * params.def (SINK_FREQUENCY_THRESHOLD): New PARAM. From-SVN: r180524
This commit is contained in:
parent
b20577eae4
commit
1cc17820c3
@ -1,3 +1,11 @@
|
||||
2011-10-26 Jeff Law <law@redhat.com>
|
||||
|
||||
* doc/invoke.texi (sink-frequency-threshold): Document.
|
||||
* tree-ssa-sink.c: Include params.h.
|
||||
(select_best_block): New function.
|
||||
(statement_sink_location): Use it.
|
||||
* params.def (SINK_FREQUENCY_THRESHOLD): New PARAM.
|
||||
|
||||
2011-10-14 Iain Sandoe <iains@gcc.gnu.org>
|
||||
|
||||
PR target/48108
|
||||
|
@ -880,6 +880,7 @@ See RS/6000 and PowerPC Options.
|
||||
-mfaster-structs -mno-faster-structs -mflat -mno-flat @gol
|
||||
-mfpu -mno-fpu -mhard-float -msoft-float @gol
|
||||
-mhard-quad-float -msoft-quad-float @gol
|
||||
-mlittle-endian @gol
|
||||
-mstack-bias -mno-stack-bias @gol
|
||||
-munaligned-doubles -mno-unaligned-doubles @gol
|
||||
-mv8plus -mno-v8plus -mvis -mno-vis @gol
|
||||
@ -9132,6 +9133,13 @@ partitions.
|
||||
The maximum number of namespaces to consult for suggestions when C++
|
||||
name lookup fails for an identifier. The default is 1000.
|
||||
|
||||
@item sink-frequency-threshold
|
||||
The maximum relative execution frequency (in percents) of the target block
|
||||
relative to a statement's original block to allow statement sinking of a
|
||||
statement. Larger numbers result in more aggressive statement sinking.
|
||||
The default value is 75. A small positive adjustment is applied for
|
||||
statements with memory operands as those are even more profitable so sink.
|
||||
|
||||
@item max-stores-to-sink
|
||||
The maximum number of conditional stores paires that can be sunk. Set to 0
|
||||
if either vectorization (@option{-ftree-vectorize}) or if-conversion
|
||||
@ -17708,6 +17716,11 @@ These @samp{-m} options are supported in addition to the above
|
||||
on SPARC-V9 processors in 64-bit environments:
|
||||
|
||||
@table @gcctabopt
|
||||
@item -mlittle-endian
|
||||
@opindex mlittle-endian
|
||||
Generate code for a processor running in little-endian mode. It is only
|
||||
available for a few configurations and most notably not on Solaris and Linux.
|
||||
|
||||
@item -m32
|
||||
@itemx -m64
|
||||
@opindex m32
|
||||
|
@ -566,6 +566,11 @@ DEFPARAM(PARAM_MAX_RELOAD_SEARCH_INSNS,
|
||||
"The maximum number of instructions to search backward when looking for equivalent reload",
|
||||
100, 0, 0)
|
||||
|
||||
DEFPARAM(PARAM_SINK_FREQUENCY_THRESHOLD,
|
||||
"sink-frequency-threshold",
|
||||
"Target block's relative execution frequency (as a percentage) required to sink a statement",
|
||||
75, 0, 100)
|
||||
|
||||
DEFPARAM(PARAM_MAX_SCHED_REGION_BLOCKS,
|
||||
"max-sched-region-blocks",
|
||||
"The maximum number of blocks in a region to be considered for interblock scheduling",
|
||||
|
@ -40,6 +40,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "bitmap.h"
|
||||
#include "langhooks.h"
|
||||
#include "cfgloop.h"
|
||||
#include "params.h"
|
||||
|
||||
/* TODO:
|
||||
1. Sinking store only using scalar promotion (IE without moving the RHS):
|
||||
@ -258,6 +259,71 @@ nearest_common_dominator_of_uses (gimple stmt, bool *debug_stmts)
|
||||
return commondom;
|
||||
}
|
||||
|
||||
/* Given EARLY_BB and LATE_BB, two blocks in a path through the dominator
|
||||
tree, return the best basic block between them (inclusive) to place
|
||||
statements.
|
||||
|
||||
We want the most control dependent block in the shallowest loop nest.
|
||||
|
||||
If the resulting block is in a shallower loop nest, then use it. Else
|
||||
only use the resulting block if it has significantly lower execution
|
||||
frequency than EARLY_BB to avoid gratutious statement movement. We
|
||||
consider statements with VOPS more desirable to move.
|
||||
|
||||
This pass would obviously benefit from PDO as it utilizes block
|
||||
frequencies. It would also benefit from recomputing frequencies
|
||||
if profile data is not available since frequencies often get out
|
||||
of sync with reality. */
|
||||
|
||||
static basic_block
|
||||
select_best_block (basic_block early_bb,
|
||||
basic_block late_bb,
|
||||
gimple stmt)
|
||||
{
|
||||
basic_block best_bb = late_bb;
|
||||
basic_block temp_bb = late_bb;
|
||||
int threshold;
|
||||
|
||||
while (temp_bb != early_bb)
|
||||
{
|
||||
/* If we've moved into a lower loop nest, then that becomes
|
||||
our best block. */
|
||||
if (temp_bb->loop_depth < best_bb->loop_depth)
|
||||
best_bb = temp_bb;
|
||||
|
||||
/* Walk up the dominator tree, hopefully we'll find a shallower
|
||||
loop nest. */
|
||||
temp_bb = get_immediate_dominator (CDI_DOMINATORS, temp_bb);
|
||||
}
|
||||
|
||||
/* If we found a shallower loop nest, then we always consider that
|
||||
a win. This will always give us the most control dependent block
|
||||
within that loop nest. */
|
||||
if (best_bb->loop_depth < early_bb->loop_depth)
|
||||
return best_bb;
|
||||
|
||||
/* Get the sinking threshold. If the statement to be moved has memory
|
||||
operands, then increase the threshold by 7% as those are even more
|
||||
profitable to avoid, clamping at 100%. */
|
||||
threshold = PARAM_VALUE (PARAM_SINK_FREQUENCY_THRESHOLD);
|
||||
if (gimple_vuse (stmt) || gimple_vdef (stmt))
|
||||
{
|
||||
threshold += 7;
|
||||
if (threshold > 100)
|
||||
threshold = 100;
|
||||
}
|
||||
|
||||
/* If BEST_BB is at the same nesting level, then require it to have
|
||||
significantly lower execution frequency to avoid gratutious movement. */
|
||||
if (best_bb->loop_depth == early_bb->loop_depth
|
||||
&& best_bb->frequency < (early_bb->frequency * threshold / 100.0))
|
||||
return best_bb;
|
||||
|
||||
/* No better block found, so return EARLY_BB, which happens to be the
|
||||
statement's original block. */
|
||||
return early_bb;
|
||||
}
|
||||
|
||||
/* Given a statement (STMT) and the basic block it is currently in (FROMBB),
|
||||
determine the location to sink the statement to, if any.
|
||||
Returns true if there is such location; in that case, TOGSI points to the
|
||||
@ -379,24 +445,10 @@ statement_sink_location (gimple stmt, basic_block frombb,
|
||||
if (!dominated_by_p (CDI_DOMINATORS, commondom, frombb))
|
||||
return false;
|
||||
|
||||
/* It doesn't make sense to move to a dominator that post-dominates
|
||||
frombb, because it means we've just moved it into a path that always
|
||||
executes if frombb executes, instead of reducing the number of
|
||||
executions . */
|
||||
if (dominated_by_p (CDI_POST_DOMINATORS, frombb, commondom))
|
||||
{
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
fprintf (dump_file, "Not moving store, common dominator post-dominates from block.\n");
|
||||
return false;
|
||||
}
|
||||
commondom = select_best_block (frombb, commondom, stmt);
|
||||
|
||||
if (commondom == frombb || commondom->loop_depth > frombb->loop_depth)
|
||||
return false;
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
fprintf (dump_file, "Common dominator of all uses is %d\n",
|
||||
commondom->index);
|
||||
}
|
||||
if (commondom == frombb)
|
||||
return false;
|
||||
|
||||
*togsi = gsi_after_labels (commondom);
|
||||
|
||||
@ -415,13 +467,9 @@ statement_sink_location (gimple stmt, basic_block frombb,
|
||||
if (gimple_code (use) != GIMPLE_PHI)
|
||||
{
|
||||
sinkbb = gimple_bb (use);
|
||||
if (sinkbb == frombb || sinkbb->loop_depth > frombb->loop_depth
|
||||
|| sinkbb->loop_father != frombb->loop_father)
|
||||
return false;
|
||||
sinkbb = select_best_block (frombb, gimple_bb (use), stmt);
|
||||
|
||||
/* Move the expression to a post dominator can't reduce the number of
|
||||
executions. */
|
||||
if (dominated_by_p (CDI_POST_DOMINATORS, frombb, sinkbb))
|
||||
if (sinkbb == frombb)
|
||||
return false;
|
||||
|
||||
*togsi = gsi_for_stmt (use);
|
||||
@ -431,21 +479,13 @@ statement_sink_location (gimple stmt, basic_block frombb,
|
||||
}
|
||||
|
||||
sinkbb = find_bb_for_arg (use, DEF_FROM_PTR (def_p));
|
||||
|
||||
/* This can happen if there are multiple uses in a PHI. */
|
||||
if (!sinkbb)
|
||||
return false;
|
||||
|
||||
/* This will happen when you have
|
||||
a_3 = PHI <a_13, a_26>
|
||||
|
||||
a_26 = VDEF <a_3>
|
||||
|
||||
If the use is a phi, and is in the same bb as the def,
|
||||
we can't sink it. */
|
||||
|
||||
if (gimple_bb (use) == frombb)
|
||||
return false;
|
||||
if (sinkbb == frombb || sinkbb->loop_depth > frombb->loop_depth
|
||||
|| sinkbb->loop_father != frombb->loop_father)
|
||||
|
||||
sinkbb = select_best_block (frombb, sinkbb, stmt);
|
||||
if (!sinkbb || sinkbb == frombb)
|
||||
return false;
|
||||
|
||||
/* If the latch block is empty, don't make it non-empty by sinking
|
||||
@ -454,11 +494,6 @@ statement_sink_location (gimple stmt, basic_block frombb,
|
||||
&& empty_block_p (sinkbb))
|
||||
return false;
|
||||
|
||||
/* Move the expression to a post dominator can't reduce the number of
|
||||
executions. */
|
||||
if (dominated_by_p (CDI_POST_DOMINATORS, frombb, sinkbb))
|
||||
return false;
|
||||
|
||||
*togsi = gsi_after_labels (sinkbb);
|
||||
|
||||
return true;
|
||||
|
Loading…
Reference in New Issue
Block a user