lto.c (lto_balanced_map): Fix typos in head comment.
* lto.c (lto_balanced_map): Fix typos in head comment. (lto_promote_cross_file_statics): Fix long lines and remove redundant test. From-SVN: r172584
This commit is contained in:
parent
e7f2301855
commit
b984326076
@ -1,3 +1,9 @@
|
||||
2011-04-16 Eric Botcazou <ebotcazou@adacore.com>
|
||||
|
||||
* lto.c (lto_balanced_map): Fix typos in head comment.
|
||||
(lto_promote_cross_file_statics): Fix long lines and remove redundant
|
||||
test.
|
||||
|
||||
2011-04-16 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* lto.c (lto_balanced_map): Update.
|
||||
@ -5,7 +11,8 @@
|
||||
2011-04-14 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* lto.c: Include ipa-inline.h
|
||||
(add_cgraph_node_to_partition, undo_partition): Use inline_summary accessor.
|
||||
(add_cgraph_node_to_partition, undo_partition): Use inline_summary
|
||||
accessor.
|
||||
(ipa_node_duplication_hook): Fix declaration.
|
||||
* Make-lang.in (lto.o): Update dependencies.
|
||||
|
||||
@ -77,8 +84,8 @@
|
||||
|
||||
PR lto/45721
|
||||
PR lto/45375
|
||||
* lto.c (partition_cgraph_node_p, partition_varpool_node_p): Weakrefs are
|
||||
not partitioned.
|
||||
* lto.c (partition_cgraph_node_p, partition_varpool_node_p): Weakrefs
|
||||
are not partitioned.
|
||||
|
||||
2010-12-22 Nathan Froyd <froydnj@codesourcery.com>
|
||||
|
||||
@ -376,11 +383,12 @@
|
||||
* lto.c (add_cgraph_node_to_partition): Forward declare; walk also
|
||||
nodes from same comdat group as well as all comdat functions referenced
|
||||
here.
|
||||
(add_varpool_node_to_partition, add_references_to_partition): New function.
|
||||
(lto_1_1_map): Skip COMDAT fnctions/variables; use add_varpool_node_to_partition;
|
||||
clear aux flags when done.
|
||||
(lto_promote_cross_file_statics): Do not promote stuff that gets duplicated to
|
||||
each ltrans.
|
||||
(add_varpool_node_to_partition, add_references_to_partition): New
|
||||
function.
|
||||
(lto_1_1_map): Skip COMDAT fnctions/variables; use
|
||||
add_varpool_node_to_partition; clear aux flags when done.
|
||||
(lto_promote_cross_file_statics): Do not promote stuff that gets
|
||||
duplicated to each ltrans.
|
||||
|
||||
2010-07-04 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
@ -588,8 +596,8 @@
|
||||
|
||||
2010-04-30 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* lto.c (get_filename_for_set): Look for cgraph node and if none found, use
|
||||
default name.
|
||||
* lto.c (get_filename_for_set): Look for cgraph node and if none found,
|
||||
use default name.
|
||||
(lto_wpa_write_files): Write any non-empty partition.
|
||||
|
||||
2010-04-30 Jan Hubicka <jh@suse.cz>
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* Top-level LTO routines.
|
||||
Copyright 2009, 2010 Free Software Foundation, Inc.
|
||||
Copyright 2009, 2010, 2011 Free Software Foundation, Inc.
|
||||
Contributed by CodeSourcery, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
@ -962,40 +962,43 @@ lto_1_to_1_map (void)
|
||||
}
|
||||
|
||||
|
||||
/* Group cgraph nodes in qually sized partitions.
|
||||
/* Group cgraph nodes into equally-sized partitions.
|
||||
|
||||
The algorithm deciding paritions are simple: nodes are taken in predefined
|
||||
order. The order correspond to order we wish to have functions in final
|
||||
output. In future this will be given by function reordering pass, but at
|
||||
the moment we use topological order that serve a good approximation.
|
||||
The partitioning algorithm is simple: nodes are taken in predefined order.
|
||||
The order corresponds to the order we want functions to have in the final
|
||||
output. In the future this will be given by function reordering pass, but
|
||||
at the moment we use the topological order, which is a good approximation.
|
||||
|
||||
The goal is to partition this linear order into intervals (partitions) such
|
||||
that all partitions have approximately the same size and that the number of
|
||||
callgraph or IPA reference edgess crossing boundaries is minimal.
|
||||
The goal is to partition this linear order into intervals (partitions) so
|
||||
that all the partitions have approximately the same size and the number of
|
||||
callgraph or IPA reference edges crossing boundaries is minimal.
|
||||
|
||||
This is a lot faster (O(n) in size of callgraph) than algorithms doing
|
||||
priority based graph clustering that are generally O(n^2) and since WHOPR
|
||||
is designed to make things go well across partitions, it leads to good results.
|
||||
priority-based graph clustering that are generally O(n^2) and, since
|
||||
WHOPR is designed to make things go well across partitions, it leads
|
||||
to good results.
|
||||
|
||||
We compute the expected size of partition as
|
||||
max (total_size / lto_partitions, min_partition_size).
|
||||
We use dynamic expected size of partition, so small programs
|
||||
are partitioning into enough partitions to allow use of multiple CPUs while
|
||||
large programs are not partitioned too much. Creating too many partition
|
||||
increase streaming overhead significandly.
|
||||
We compute the expected size of a partition as:
|
||||
|
||||
In the future we would like to bound maximal size of partition to avoid
|
||||
ltrans stage consuming too much memory. At the moment however WPA stage is
|
||||
most memory intensive phase at large benchmark since too many types and
|
||||
declarations are read into memory.
|
||||
max (total_size / lto_partitions, min_partition_size)
|
||||
|
||||
The function implement simple greedy algorithm. Nodes are begin added into
|
||||
current partition until 3/4th of expected partition size is reached.
|
||||
After this threshold we keep track of boundary size (number of edges going to
|
||||
other partitions) and continue adding functions until the current partition
|
||||
grows into a double of expected partition size. Then the process is undone
|
||||
till the point when minimal ration of boundary size and in partition calls
|
||||
was reached. */
|
||||
We use dynamic expected size of partition so small programs are partitioned
|
||||
into enough partitions to allow use of multiple CPUs, while large programs
|
||||
are not partitioned too much. Creating too many partitions significantly
|
||||
increases the streaming overhead.
|
||||
|
||||
In the future, we would like to bound the maximal size of partitions so as
|
||||
to prevent the LTRANS stage from consuming too much memory. At the moment,
|
||||
however, the WPA stage is the most memory intensive for large benchmarks,
|
||||
since too many types and declarations are read into memory.
|
||||
|
||||
The function implements a simple greedy algorithm. Nodes are being added
|
||||
to the current partition until after 3/4 of the expected partition size is
|
||||
reached. Past this threshold, we keep track of boundary size (number of
|
||||
edges going to other partitions) and continue adding functions until after
|
||||
the current partition has grown to twice the expected partition size. Then
|
||||
the process is undone to the point where the minimal ratio of boundary size
|
||||
and in-partition calls was reached. */
|
||||
|
||||
static void
|
||||
lto_balanced_map (void)
|
||||
@ -1330,7 +1333,8 @@ lto_promote_cross_file_statics (void)
|
||||
n_sets = VEC_length (ltrans_partition, ltrans_partitions);
|
||||
for (i = 0; i < n_sets; i++)
|
||||
{
|
||||
ltrans_partition part = VEC_index (ltrans_partition, ltrans_partitions, i);
|
||||
ltrans_partition part
|
||||
= VEC_index (ltrans_partition, ltrans_partitions, i);
|
||||
set = part->cgraph_set;
|
||||
vset = part->varpool_set;
|
||||
|
||||
@ -1361,16 +1365,15 @@ lto_promote_cross_file_statics (void)
|
||||
promote_var (vnode);
|
||||
}
|
||||
|
||||
/* We export initializers of read-only var into each partition
|
||||
referencing it. Folding might take declarations from the
|
||||
initializers and use it; so everything referenced from the
|
||||
initializers needs can be accessed from this partition after
|
||||
folding.
|
||||
/* We export the initializer of a read-only var into each partition
|
||||
referencing the var. Folding might take declarations from the
|
||||
initializer and use them, so everything referenced from the
|
||||
initializer can be accessed from this partition after folding.
|
||||
|
||||
This means that we need to promote all variables and functions
|
||||
referenced from all initializers from readonly vars referenced
|
||||
from this partition that are not in this partition.
|
||||
This needs to be done recursively. */
|
||||
referenced from all initializers of read-only vars referenced
|
||||
from this partition that are not in this partition. This needs
|
||||
to be done recursively. */
|
||||
for (vnode = varpool_nodes; vnode; vnode = vnode->next)
|
||||
if (const_value_known_p (vnode->decl)
|
||||
&& DECL_INITIAL (vnode->decl)
|
||||
@ -1378,13 +1381,16 @@ lto_promote_cross_file_statics (void)
|
||||
&& referenced_from_this_partition_p (&vnode->ref_list, set, vset)
|
||||
&& !pointer_set_insert (inserted, vnode))
|
||||
VEC_safe_push (varpool_node_ptr, heap, promoted_initializers, vnode);
|
||||
|
||||
while (!VEC_empty (varpool_node_ptr, promoted_initializers))
|
||||
{
|
||||
int i;
|
||||
struct ipa_ref *ref;
|
||||
|
||||
vnode = VEC_pop (varpool_node_ptr, promoted_initializers);
|
||||
for (i = 0; ipa_ref_list_reference_iterate (&vnode->ref_list, i, ref); i++)
|
||||
for (i = 0;
|
||||
ipa_ref_list_reference_iterate (&vnode->ref_list, i, ref);
|
||||
i++)
|
||||
{
|
||||
if (ref->refered_type == IPA_REF_CGRAPH)
|
||||
{
|
||||
@ -1399,17 +1405,17 @@ lto_promote_cross_file_statics (void)
|
||||
struct varpool_node *v = ipa_ref_varpool_node (ref);
|
||||
if (varpool_node_in_set_p (v, vset))
|
||||
continue;
|
||||
/* Constant pool references use internal labels and thus can not
|
||||
be made global. It is sensible to keep those ltrans local to
|
||||
allow better optimization. */
|
||||
|
||||
/* Constant pool references use internal labels and thus
|
||||
cannot be made global. It is sensible to keep those
|
||||
ltrans local to allow better optimization. */
|
||||
if (DECL_IN_CONSTANT_POOL (v->decl))
|
||||
{
|
||||
if (!pointer_set_insert (inserted, vnode))
|
||||
VEC_safe_push (varpool_node_ptr, heap,
|
||||
promoted_initializers, v);
|
||||
}
|
||||
else if (!DECL_IN_CONSTANT_POOL (v->decl)
|
||||
&& !v->externally_visible && v->analyzed)
|
||||
else if (!v->externally_visible && v->analyzed)
|
||||
{
|
||||
if (promote_var (v)
|
||||
&& DECL_INITIAL (v->decl)
|
||||
|
Loading…
Reference in New Issue
Block a user