ipa-cp.c (ipcp_compute_node_scale): Work around completely wrong profile updates.

* ipa-cp.c (ipcp_compute_node_scale): Work around completely
	wrong profile updates.
	* predict.c (counts_to_freqs): Be expected for ENTRY/EXIT block
	having largest frequency.
	* ira-live.c (ira_implicitly_set_insn_hard_regs): Silecne
	used uninitalized warning.
	* tree-optimize.c (execute_fixup_cfg): Rescale entry and exit block
	frequencies.

From-SVN: r154462
This commit is contained in:
Jan Hubicka 2009-11-23 21:01:29 +01:00 committed by Jan Hubicka
parent bf92569441
commit 43558bcc9d
5 changed files with 30 additions and 3 deletions

View File

@ -1,3 +1,14 @@
2009-11-23 Jan Hubicka <jh@suse.cz>
* ipa-cp.c (ipcp_compute_node_scale): Work around completely
wrong profile updates.
* predict.c (counts_to_freqs): Be expected for ENTRY/EXIT block
having largest frequency.
* ira-live.c (ira_implicitly_set_insn_hard_regs): Silecne
used uninitalized warning.
* tree-optimize.c (execute_fixup_cfg): Rescale entry and exit block
frequencies.
2009-11-23 Uros Bizjak <ubizjak@gmail.com>
* config/alpha/alpha.md (*cmp_sadd_sidi): Use gen_lowpart instead

View File

@ -578,7 +578,13 @@ build_const_val (struct ipcp_lattice *lat, tree tree_type)
/* Compute the proper scale for NODE. It is the ratio between the number of
direct calls (represented on the incoming cgraph_edges) and sum of all
invocations of NODE (represented as count in cgraph_node). */
invocations of NODE (represented as count in cgraph_node).
FIXME: This code is wrong. Since the callers can be also clones and
the clones are not scaled yet, the sums gets unrealistically high.
To properly compute the counts, we would need to do propagation across
callgraph (as external call to A might imply call to non-clonned B
if A's clone calls clonned B). */
static void
ipcp_compute_node_scale (struct cgraph_node *node)
{
@ -589,6 +595,12 @@ ipcp_compute_node_scale (struct cgraph_node *node)
/* Compute sum of all counts of callers. */
for (cs = node->callers; cs != NULL; cs = cs->next_caller)
sum += cs->count;
/* Work around the unrealistically high sum problem. We just don't want
the non-cloned body to have negative or very low frequency. Since
majority of execution time will be spent in clones anyway, this should
give good enough profile. */
if (sum > node->count * 9 / 10)
sum = node->count * 9 / 10;
if (node->count == 0)
ipcp_set_node_scale (node, 0);
else

View File

@ -745,7 +745,7 @@ single_reg_operand_class (int op_num)
void
ira_implicitly_set_insn_hard_regs (HARD_REG_SET *set)
{
int i, c, regno;
int i, c, regno = 0;
bool ignore_p;
enum reg_class cl;
rtx op;

View File

@ -2020,7 +2020,7 @@ counts_to_freqs (void)
gcov_type count_max, true_count_max = 0;
basic_block bb;
FOR_EACH_BB (bb)
FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
true_count_max = MAX (bb->count, true_count_max);
count_max = MAX (true_count_max, 1);

View File

@ -255,6 +255,10 @@ execute_fixup_cfg (void)
else
count_scale = REG_BR_PROB_BASE;
ENTRY_BLOCK_PTR->count = cgraph_node (current_function_decl)->count;
EXIT_BLOCK_PTR->count = (EXIT_BLOCK_PTR->count * count_scale
+ REG_BR_PROB_BASE / 2) / REG_BR_PROB_BASE;
FOR_EACH_BB (bb)
{
bb->count = (bb->count * count_scale