Enable recursive function versioning

2019-12-02  Feng Xue <fxue@os.amperecomputing.com>

        PR ipa/92133
        * doc/invoke.texi (ipa-cp-max-recursive-depth): Document new option.
        (ipa-cp-min-recursive-probability): Likewise.
        * params.opt (ipa-cp-max-recursive-depth): New.
        (ipa-cp-min-recursive-probability): Likewise.
        * ipa-cp.c (ipcp_lattice<valtype>::add_value): Add two new parameters
        val_p and unlimited.
        (self_recursively_generated_p): New function.
        (get_val_across_arith_op): Likewise.
        (propagate_vals_across_arith_jfunc): Add constant propagation for
        self-recursive function.
        (incorporate_penalties): Do not penalize pure self-recursive function.
        (good_cloning_opportunity_p): Dump node_is_self_scc flag.
        (propagate_constants_topo): Set node_is_self_scc flag for cgraph node.
        (get_info_about_necessary_edges): Relax hotness check for edge to
        self-recursive function.
        * ipa-prop.h (ipa_node_params): Add new field node_is_self_scc.

2019-12-02  Feng Xue  <fxue@os.amperecomputing.com>

        PR ipa/92133
        * gcc.dg/ipa/ipa-clone-2.c: New test.

From-SVN: r278893
This commit is contained in:
Feng Xue 2019-12-02 06:37:30 +00:00 committed by Feng Xue
parent 4569f8b365
commit 9b14fc3326
7 changed files with 283 additions and 27 deletions

View File

@ -1,3 +1,23 @@
2019-12-02 Feng Xue <fxue@os.amperecomputing.com>
PR ipa/92133
* doc/invoke.texi (ipa-cp-max-recursive-depth): Document new option.
(ipa-cp-min-recursive-probability): Likewise.
* params.opt (ipa-cp-max-recursive-depth): New.
(ipa-cp-min-recursive-probability): Likewise.
* ipa-cp.c (ipcp_lattice<valtype>::add_value): Add two new parameters
val_p and unlimited.
(self_recursively_generated_p): New function.
(get_val_across_arith_op): Likewise.
(propagate_vals_across_arith_jfunc): Add constant propagation for
self-recursive function.
(incorporate_penalties): Do not penalize pure self-recursive function.
(good_cloning_opportunity_p): Dump node_is_self_scc flag.
(propagate_constants_topo): Set node_is_self_scc flag for cgraph node.
(get_info_about_necessary_edges): Relax hotness check for edge to
self-recursive function.
* ipa-prop.h (ipa_node_params): Add new field node_is_self_scc.
2019-12-01 Sandra Loosemore <sandra@codesourcery.com>
PR target/92499

View File

@ -12035,6 +12035,13 @@ IPA-CP calculates its own score of cloning profitability heuristics
and performs those cloning opportunities with scores that exceed
@option{ipa-cp-eval-threshold}.
@item ipa-cp-max-recursive-depth
Maximum depth of recursive cloning for self-recursive function.
@item ipa-cp-min-recursive-probability
Recursive cloning only when the probability of call being executed exceeds
the parameter.
@item ipa-cp-recursion-penalty
Percentage penalty the recursive functions will receive when they
are evaluated for cloning.

View File

@ -228,7 +228,9 @@ public:
inline bool set_contains_variable ();
bool add_value (valtype newval, cgraph_edge *cs,
ipcp_value<valtype> *src_val = NULL,
int src_idx = 0, HOST_WIDE_INT offset = -1);
int src_idx = 0, HOST_WIDE_INT offset = -1,
ipcp_value<valtype> **val_p = NULL,
bool unlimited = false);
void print (FILE * f, bool dump_sources, bool dump_benefits);
};
@ -1817,22 +1819,32 @@ allocate_and_init_ipcp_value (ipa_polymorphic_call_context source)
/* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS,
SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
meaning. OFFSET -1 means the source is scalar and not a part of an
aggregate. */
aggregate. If non-NULL, VAL_P records address of existing or newly added
ipcp_value. UNLIMITED means whether value count should not exceed the limit
given by PARAM_IPA_CP_VALUE_LIST_SIZE. */
template <typename valtype>
bool
ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
ipcp_value<valtype> *src_val,
int src_idx, HOST_WIDE_INT offset)
int src_idx, HOST_WIDE_INT offset,
ipcp_value<valtype> **val_p,
bool unlimited)
{
ipcp_value<valtype> *val;
ipcp_value<valtype> *val, *last_val = NULL;
if (val_p)
*val_p = NULL;
if (bottom)
return false;
for (val = values; val; val = val->next)
for (val = values; val; last_val = val, val = val->next)
if (values_equal_for_ipcp_p (val->value, newval))
{
if (val_p)
*val_p = val;
if (ipa_edge_within_scc (cs))
{
ipcp_value_source<valtype> *s;
@ -1847,7 +1859,7 @@ ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
return false;
}
if (values_count == param_ipa_cp_value_list_size)
if (!unlimited && values_count == param_ipa_cp_value_list_size)
{
/* We can only free sources, not the values themselves, because sources
of other values in this SCC might point to them. */
@ -1860,7 +1872,6 @@ ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
ipcp_sources_pool.remove ((ipcp_value_source<tree>*)src);
}
}
values = NULL;
return set_to_bottom ();
}
@ -1868,11 +1879,81 @@ ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
values_count++;
val = allocate_and_init_ipcp_value (newval);
val->add_source (cs, src_val, src_idx, offset);
val->next = values;
values = val;
val->next = NULL;
/* Add the new value to end of value list, which can reduce iterations
of propagation stage for recursive function. */
if (last_val)
last_val->next = val;
else
values = val;
if (val_p)
*val_p = val;
return true;
}
/* Return true, if a ipcp_value VAL is orginated from parameter value of
self-feeding recursive function by applying non-passthrough arithmetic
transformation. */
static bool
self_recursively_generated_p (ipcp_value<tree> *val)
{
class ipa_node_params *info = NULL;
for (ipcp_value_source<tree> *src = val->sources; src; src = src->next)
{
cgraph_edge *cs = src->cs;
if (!src->val || cs->caller != cs->callee->function_symbol ()
|| src->val == val)
return false;
if (!info)
info = IPA_NODE_REF (cs->caller);
class ipcp_param_lattices *plats = ipa_get_parm_lattices (info,
src->index);
ipcp_lattice<tree> *src_lat = src->offset == -1 ? &plats->itself
: plats->aggs;
ipcp_value<tree> *src_val;
for (src_val = src_lat->values; src_val; src_val = src_val->next)
if (src_val == val)
break;
if (!src_val)
return false;
}
return true;
}
/* A helper function that returns result of operation specified by OPCODE on
the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the
value of SRC_VAL. If the operation is binary, OPND2 is a constant value
acting as its second operand. If non-NULL, RES_TYPE is expected type of
the result. */
static tree
get_val_across_arith_op (enum tree_code opcode,
tree opnd1_type,
tree opnd2,
ipcp_value<tree> *src_val,
tree res_type)
{
tree opnd1 = src_val->value;
/* Skip source values that is incompatible with specified type. */
if (opnd1_type
&& !useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1)))
return NULL_TREE;
return ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type);
}
/* Propagate values through an arithmetic transformation described by a jump
function associated with edge CS, taking values from SRC_LAT and putting
them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT.
@ -1896,24 +1977,76 @@ propagate_vals_across_arith_jfunc (cgraph_edge *cs,
ipcp_value<tree> *src_val;
bool ret = false;
/* Do not create new values when propagating within an SCC because if there
are arithmetic functions with circular dependencies, there is infinite
number of them and we would just make lattices bottom. If this condition
is ever relaxed we have to detect self-feeding recursive calls in
cgraph_edge_brings_value_p in a smarter way. */
/* Due to circular dependencies, propagating within an SCC through arithmetic
transformation would create infinite number of values. But for
self-feeding recursive function, we could allow propagation in a limited
count, and this can enable a simple kind of recursive function versioning.
For other scenario, we would just make lattices bottom. */
if (opcode != NOP_EXPR && ipa_edge_within_scc (cs))
ret = dest_lat->set_contains_variable ();
{
int i;
if (src_lat != dest_lat || param_ipa_cp_max_recursive_depth < 1)
return dest_lat->set_contains_variable ();
/* No benefit if recursive execution is in low probability. */
if (cs->sreal_frequency () * 100
<= ((sreal) 1) * param_ipa_cp_min_recursive_probability)
return dest_lat->set_contains_variable ();
auto_vec<ipcp_value<tree> *, 8> val_seeds;
for (src_val = src_lat->values; src_val; src_val = src_val->next)
{
/* Now we do not use self-recursively generated value as propagation
source, this is absolutely conservative, but could avoid explosion
of lattice's value space, especially when one recursive function
calls another recursive. */
if (self_recursively_generated_p (src_val))
{
ipcp_value_source<tree> *s;
/* If the lattice has already been propagated for the call site,
no need to do that again. */
for (s = src_val->sources; s; s = s->next)
if (s->cs == cs)
return dest_lat->set_contains_variable ();
}
else
val_seeds.safe_push (src_val);
}
/* Recursively generate lattice values with a limited count. */
FOR_EACH_VEC_ELT (val_seeds, i, src_val)
{
for (int j = 1; j < param_ipa_cp_max_recursive_depth; j++)
{
tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
src_val, res_type);
if (!cstval)
break;
ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
src_offset, &src_val, true);
gcc_checking_assert (src_val);
}
}
ret |= dest_lat->set_contains_variable ();
}
else
for (src_val = src_lat->values; src_val; src_val = src_val->next)
{
tree opnd1 = src_val->value;
tree cstval = NULL_TREE;
/* Skip source values that is incompatible with specified type. */
if (!opnd1_type
|| useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1)))
cstval = ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type);
/* Now we do not use self-recursively generated value as propagation
source, otherwise it is easy to make value space of normal lattice
overflow. */
if (self_recursively_generated_p (src_val))
{
ret |= dest_lat->set_contains_variable ();
continue;
}
tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
src_val, res_type);
if (cstval)
ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
src_offset);
@ -3038,7 +3171,7 @@ hint_time_bonus (ipa_hints hints)
static inline int64_t
incorporate_penalties (ipa_node_params *info, int64_t evaluation)
{
if (info->node_within_scc)
if (info->node_within_scc && !info->node_is_self_scc)
evaluation = (evaluation
* (100 - param_ipa_cp_recursion_penalty)) / 100;
@ -3082,7 +3215,8 @@ good_cloning_opportunity_p (struct cgraph_node *node, int time_benefit,
count_sum.dump (dump_file);
fprintf (dump_file, "%s%s) -> evaluation: " "%" PRId64
", threshold: %i\n",
info->node_within_scc ? ", scc" : "",
info->node_within_scc
? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
info->node_calling_single_call ? ", single_call" : "",
evaluation, param_ipa_cp_eval_threshold);
}
@ -3100,7 +3234,8 @@ good_cloning_opportunity_p (struct cgraph_node *node, int time_benefit,
"size: %i, freq_sum: %i%s%s) -> evaluation: "
"%" PRId64 ", threshold: %i\n",
time_benefit, size_cost, freq_sum,
info->node_within_scc ? ", scc" : "",
info->node_within_scc
? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
info->node_calling_single_call ? ", single_call" : "",
evaluation, param_ipa_cp_eval_threshold);
@ -3594,14 +3729,30 @@ propagate_constants_topo (class ipa_topo_info *topo)
while (v)
{
struct cgraph_edge *cs;
class ipa_node_params *info = NULL;
bool self_scc = true;
for (cs = v->callees; cs; cs = cs->next_callee)
if (ipa_edge_within_scc (cs))
{
IPA_NODE_REF (v)->node_within_scc = true;
cgraph_node *callee = cs->callee->function_symbol ();
if (v != callee)
self_scc = false;
if (!info)
{
info = IPA_NODE_REF (v);
info->node_within_scc = true;
}
if (propagate_constants_across_call (cs))
push_node_to_stack (topo, cs->callee->function_symbol ());
push_node_to_stack (topo, callee);
}
if (info)
info->node_is_self_scc = self_scc;
v = pop_node_from_stack (topo);
}
@ -3983,6 +4134,9 @@ get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest,
hot |= cs->maybe_hot_p ();
if (cs->caller != dest)
non_self_recursive = true;
else if (src->val)
gcc_assert (values_equal_for_ipcp_p (src->val->value,
val->value));
}
cs = get_next_cgraph_edge_clone (cs);
}
@ -3996,6 +4150,19 @@ get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest,
*freq_sum = freq;
*count_sum = cnt;
*caller_count = count;
if (!hot && IPA_NODE_REF (dest)->node_within_scc)
{
struct cgraph_edge *cs;
/* Cold non-SCC source edge could trigger hot recursive execution of
function. Consider the case as hot and rely on following cost model
computation to further select right one. */
for (cs = dest->callers; cs; cs = cs->next_caller)
if (cs->caller == dest && cs->maybe_hot_p ())
return true;
}
return hot;
}

View File

@ -497,6 +497,8 @@ public:
unsigned node_dead : 1;
/* Node is involved in a recursion, potentionally indirect. */
unsigned node_within_scc : 1;
/* Node contains only direct recursion. */
unsigned node_is_self_scc : 1;
/* Node is calling a private function called only once. */
unsigned node_calling_single_call : 1;
/* False when there is something makes versioning impossible. */

View File

@ -198,6 +198,14 @@ Threshold ipa-cp opportunity evaluation that is still considered beneficial to c
Common Joined UInteger Var(param_ipa_cp_loop_hint_bonus) Init(64) Param
Compile-time bonus IPA-CP assigns to candidates which make loop bounds or strides known.
-param=ipa-cp-max-recursive-depth=
Common Joined UInteger Var(param_ipa_cp_max_recursive_depth) Init(8) Param
Maximum depth of recursive cloning for self-recursive function.
-param=ipa-cp-min-recursive-probability=
Common Joined UInteger Var(param_ipa_cp_min_recursive_probability) Init(2) Param
Recursive cloning only when the probability of call being executed exceeds the parameter.
-param=ipa-cp-recursion-penalty=
Common Joined UInteger Var(param_ipa_cp_recursion_penalty) Init(40) IntegerRange(0, 100) Param
Percentage penalty the recursive functions will receive when they are evaluated for cloning.

View File

@ -1,3 +1,8 @@
2019-12-02 Feng Xue <fxue@os.amperecomputing.com>
PR ipa/92133
* gcc.dg/ipa/ipa-clone-2.c: New test.
2019-12-01 Sandra Loosemore <sandra@codesourcery.com>
PR target/92499

View File

@ -0,0 +1,47 @@
/* { dg-do compile } */
/* { dg-options "-O3 -fdump-ipa-cp-details -fno-early-inlining --param ipa-cp-max-recursive-depth=8" } */
int fn();
int data[100];
int recur_fn (int i)
{
int j;
if (i == 6)
{
fn();
fn();
fn();
fn();
fn();
fn();
fn();
fn();
fn();
fn();
fn();
fn();
return 10;
}
data[i] = i;
for (j = 0; j < 100; j++)
recur_fn (i + 1);
return i;
}
int main ()
{
int i;
for (i = 0; i < 100; i++)
recur_fn (1) + recur_fn (-5);
return 1;
}
/* { dg-final { scan-ipa-dump-times "Creating a specialized node of recur_fn/\[0-9\]*\\." 12 "cp" } } */