From 2daffc47991e582d910b576e41891a7e91524575 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Tue, 21 Aug 2012 08:54:01 +0200 Subject: [PATCH] re PR fortran/48636 (Enable more inlining with -O2 and higher) PR fortran/48636 * ipa-inline.c (want_inline_small_function_p): Take loop_iterations hint. (edge_badness): Likewise. * ipa-inline.h (inline_hints_vals): Add INLINE_HINT_loop_iterations. (inline_summary): Add loop_iterations. * ipa-inline-analysis.c: Include tree-scalar-evolution.h. (dump_inline_hints): Dump loop_iterations. (reset_inline_summary): Free loop_iterations. (inline_node_duplication_hook): Update loop_iterations. (dump_inline_summary): Dump loop_iterations. (will_be_nonconstant_expr_predicate): New function. (estimate_function_body_sizes): Analyze loops. (estimate_node_size_and_time): Set hint loop_iterations. (inline_merge_summary): Merge loop iterations. (inline_read_section): Stream in loop_iterations. (inline_write_summary): Stream out loop_iterations. From-SVN: r190556 --- gcc/ChangeLog | 19 +++ gcc/ipa-inline-analysis.c | 173 +++++++++++++++++++++++- gcc/ipa-inline.c | 6 +- gcc/ipa-inline.h | 7 +- gcc/testsuite/ChangeLog | 5 + gcc/testsuite/gcc.dg/ipa/inlinehint-1.c | 16 +++ 6 files changed, 220 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/ipa/inlinehint-1.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6382a8fbd0c..477111f1eba 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,22 @@ +2012-08-20 Jan Hubicka + + PR fortran/48636 + * ipa-inline.c (want_inline_small_function_p): Take loop_iterations hint. + (edge_badness): Likewise. + * ipa-inline.h (inline_hints_vals): Add INLINE_HINT_loop_iterations. + (inline_summary): Add loop_iterations. + * ipa-inline-analysis.c: Include tree-scalar-evolution.h. + (dump_inline_hints): Dump loop_iterations. + (reset_inline_summary): Free loop_iterations. + (inline_node_duplication_hook): Update loop_iterations. + (dump_inline_summary): Dump loop_iterations. + (will_be_nonconstant_expr_predicate): New function. + (estimate_function_body_sizes): Analyze loops. + (estimate_node_size_and_time): Set hint loop_iterations. + (inline_merge_summary): Merge loop iterations. + (inline_read_section): Stream in loop_iterations. + (inline_write_summary): Stream out loop_iterations. + 2012-08-20 Florian Weimer PR c++/19351 diff --git a/gcc/ipa-inline-analysis.c b/gcc/ipa-inline-analysis.c index c30e81a88f1..ca80a8b88ba 100644 --- a/gcc/ipa-inline-analysis.c +++ b/gcc/ipa-inline-analysis.c @@ -88,6 +88,8 @@ along with GCC; see the file COPYING3. If not see #include "ipa-inline.h" #include "alloc-pool.h" #include "cfgloop.h" +#include "cfgloop.h" +#include "tree-scalar-evolution.h" /* Estimate runtime of function can easilly run into huge numbers with many nested loops. Be sure we can compute time * INLINE_SIZE_SCALE * 2 in an @@ -627,6 +629,11 @@ dump_inline_hints (FILE *f, inline_hints hints) hints &= ~INLINE_HINT_indirect_call; fprintf (f, " indirect_call"); } + if (hints & INLINE_HINT_loop_iterations) + { + hints &= ~INLINE_HINT_loop_iterations; + fprintf (f, " loop_iterations"); + } gcc_assert (!hints); } @@ -941,6 +948,11 @@ reset_inline_summary (struct cgraph_node *node) info->stack_frame_offset = 0; info->size = 0; info->time = 0; + if (info->loop_iterations) + { + pool_free (edge_predicate_pool, info->loop_iterations); + info->loop_iterations = NULL; + } VEC_free (condition, gc, info->conds); VEC_free (size_time_entry,gc, info->entry); for (e = node->callees; e; e = e->next_callee) @@ -1078,7 +1090,7 @@ inline_node_duplication_hook (struct cgraph_node *src, struct cgraph_node *dst, * edge->frequency); edge->frequency = 0; } - *es->predicate = new_predicate; + edge_set_predicate (edge, &new_predicate); } /* Remap indirect edge predicates with the same simplificaiton as above. @@ -1110,7 +1122,29 @@ inline_node_duplication_hook (struct cgraph_node *src, struct cgraph_node *dst, * edge->frequency); edge->frequency = 0; } - *es->predicate = new_predicate; + edge_set_predicate (edge, &new_predicate); + } + if (info->loop_iterations) + { + struct predicate new_predicate = true_predicate (); + + for (j = 0; info->loop_iterations->clause[j]; j++) + if (!(possible_truths & info->loop_iterations->clause[j])) + { + new_predicate = false_predicate (); + break; + } + else + add_clause (info->conds, &new_predicate, + possible_truths & info->loop_iterations->clause[j]); + if (false_predicate_p (&new_predicate) + || true_predicate_p (&new_predicate)) + info->loop_iterations = NULL; + else + { + info->loop_iterations = (struct predicate *)pool_alloc (edge_predicate_pool); + *info->loop_iterations = new_predicate; + } } /* If inliner or someone after inliner will ever start producing @@ -1136,7 +1170,15 @@ inline_node_duplication_hook (struct cgraph_node *src, struct cgraph_node *dst, info->self_time = 0; } else - info->entry = VEC_copy (size_time_entry, gc, info->entry); + { + info->entry = VEC_copy (size_time_entry, gc, info->entry); + if (info->loop_iterations) + { + predicate p = *info->loop_iterations; + info->loop_iterations = (struct predicate *)pool_alloc (edge_predicate_pool); + *info->loop_iterations = p; + } + } } @@ -1308,6 +1350,11 @@ dump_inline_summary (FILE * f, struct cgraph_node *node) (double) e->time / INLINE_TIME_SCALE); dump_predicate (f, s->conds, &e->predicate); } + if (s->loop_iterations) + { + fprintf (f, " loop iterations:"); + dump_predicate (f, s->conds, s->loop_iterations); + } fprintf (f, " calls:\n"); dump_inline_edge_summary (f, 4, node, s); fprintf (f, "\n"); @@ -1780,6 +1827,46 @@ compute_bb_predicates (struct cgraph_node *node, typedef struct predicate predicate_t; DEF_VEC_O (predicate_t); DEF_VEC_ALLOC_O (predicate_t, heap); +/* Return predicate specifying when the STMT might have result that is not + a compile time constant. */ + +static struct predicate +will_be_nonconstant_expr_predicate (struct ipa_node_params *info, + struct inline_summary *summary, + tree expr, + VEC (predicate_t, heap) *nonconstant_names) +{ + tree parm; + int index; + + while (UNARY_CLASS_P (expr)) + expr = TREE_OPERAND (expr, 0); + + parm = unmodified_parm (NULL, expr); + if (parm + && (index = ipa_get_param_decl_index (info, parm)) >= 0) + return add_condition (summary, index, NULL, CHANGED, NULL_TREE); + if (is_gimple_min_invariant (expr)) + return false_predicate (); + if (TREE_CODE (expr) == SSA_NAME) + return VEC_index (predicate_t, nonconstant_names, + SSA_NAME_VERSION (expr)); + if (BINARY_CLASS_P (expr)) + { + struct predicate p1 = will_be_nonconstant_expr_predicate (info, summary, TREE_OPERAND (expr, 0), nonconstant_names); + struct predicate p2; + if (true_predicate_p (&p1)) + return p1; + p2 = will_be_nonconstant_expr_predicate (info, summary, TREE_OPERAND (expr, 0), nonconstant_names); + return or_predicates (summary->conds, &p1, &p2); + } + else + { + debug_tree (expr); + gcc_unreachable (); + } + return false_predicate (); +} /* Return predicate specifying when the STMT might have result that is not @@ -2176,6 +2263,51 @@ estimate_function_body_sizes (struct cgraph_node *node, bool early) time = (time + CGRAPH_FREQ_BASE / 2) / CGRAPH_FREQ_BASE; if (time > MAX_TIME) time = MAX_TIME; + + if (!early && nonconstant_names) + { + struct loop *loop; + loop_iterator li; + predicate loop_iterations = true_predicate (); + + calculate_dominance_info (CDI_DOMINATORS); + loop_optimizer_init (LOOPS_NORMAL + | LOOPS_HAVE_RECORDED_EXITS); + if (dump_file && (dump_flags & TDF_DETAILS)) + flow_loops_dump (dump_file, NULL, 0); + scev_initialize (); + FOR_EACH_LOOP (li, loop, 0) + { + VEC (edge, heap) *exits; + edge ex; + unsigned int j; + struct tree_niter_desc niter_desc; + + exits = get_loop_exit_edges (loop); + FOR_EACH_VEC_ELT (edge, exits, j, ex) + if (number_of_iterations_exit (loop, ex, &niter_desc, false) + && !is_gimple_min_invariant (niter_desc.niter)) + { + predicate will_be_nonconstant + = will_be_nonconstant_expr_predicate (parms_info, info, + niter_desc.niter, nonconstant_names); + if (!true_predicate_p (&will_be_nonconstant) + && !false_predicate_p (&will_be_nonconstant)) + /* This is slightly inprecise. We may want to represent each loop with + independent predicate. */ + loop_iterations = and_predicates (info->conds, &loop_iterations, &will_be_nonconstant); + } + VEC_free (edge, heap, exits); + } + if (!true_predicate_p (&loop_iterations)) + { + inline_summary (node)->loop_iterations = (struct predicate *)pool_alloc (edge_predicate_pool); + *inline_summary (node)->loop_iterations = loop_iterations; + } + scev_finalize (); + loop_optimizer_finalize (); + free_dominance_info (CDI_DOMINATORS); + } inline_summary (node)->self_time = time; inline_summary (node)->self_size = size; VEC_free (predicate_t, heap, nonconstant_names); @@ -2459,6 +2591,10 @@ estimate_node_size_and_time (struct cgraph_node *node, } + if (info->loop_iterations + && !evaluate_predicate (info->loop_iterations, possible_truths)) + hints |=INLINE_HINT_loop_iterations; + if (time > MAX_TIME * INLINE_TIME_SCALE) time = MAX_TIME * INLINE_TIME_SCALE; @@ -2842,6 +2978,28 @@ inline_merge_summary (struct cgraph_edge *edge) } remap_edge_summaries (edge, edge->callee, info, callee_info, operand_map, offset_map, clause, &toplev_predicate); + if (callee_info->loop_iterations) + { + predicate p = remap_predicate (info, callee_info, + callee_info->loop_iterations, + operand_map, offset_map, + clause, + &toplev_predicate); + if (!false_predicate_p (&p) + && !true_predicate_p (&p)) + { + if (!info->loop_iterations) + { + info->loop_iterations + = (struct predicate *)pool_alloc (edge_predicate_pool); + *info->loop_iterations = p; + } + else + *info->loop_iterations = and_predicates (info->conds, + info->loop_iterations, + &p); + } + } inline_update_callee_summaries (edge->callee, inline_edge_summary (edge)->loop_depth); @@ -3269,6 +3427,7 @@ inline_read_section (struct lto_file_decl_data *file_data, const char *data, lto_symtab_encoder_t encoder; struct bitpack_d bp; struct cgraph_edge *e; + predicate p; index = streamer_read_uhwi (&ib); encoder = file_data->symtab_node_encoder; @@ -3310,6 +3469,13 @@ inline_read_section (struct lto_file_decl_data *file_data, const char *data, VEC_safe_push (size_time_entry, gc, info->entry, &e); } + + p = read_predicate (&ib); + if (!true_predicate_p (&p)) + { + info->loop_iterations = (struct predicate *)pool_alloc (edge_predicate_pool); + *info->loop_iterations = p; + } for (e = node->callees; e; e = e->next_callee) read_inline_edge_summary (&ib, e); for (e = node->indirect_calls; e; e = e->next_callee) @@ -3456,6 +3622,7 @@ inline_write_summary (void) streamer_write_uhwi (ob, e->time); write_predicate (ob, &e->predicate); } + write_predicate (ob, info->loop_iterations); for (edge = node->callees; edge; edge = edge->next_callee) write_inline_edge_summary (ob, edge); for (edge = node->indirect_calls; edge; edge = edge->next_callee) diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c index 55d9a521c32..a1d703a6b7f 100644 --- a/gcc/ipa-inline.c +++ b/gcc/ipa-inline.c @@ -480,7 +480,8 @@ want_inline_small_function_p (struct cgraph_edge *e, bool report) hints suggests that inlining given function is very profitable. */ else if (DECL_DECLARED_INLINE_P (callee->symbol.decl) && growth >= MAX_INLINE_INSNS_SINGLE - && !(hints & INLINE_HINT_indirect_call)) + && !(hints & (INLINE_HINT_indirect_call + | INLINE_HINT_loop_iterations))) { e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT; want_inline = false; @@ -863,7 +864,8 @@ edge_badness (struct cgraph_edge *edge, bool dump) if (dump) fprintf (dump_file, "Badness overflow\n"); } - if (hints & INLINE_HINT_indirect_call) + if (hints & (INLINE_HINT_indirect_call + | INLINE_HINT_loop_iterations)) badness /= 8; if (dump) { diff --git a/gcc/ipa-inline.h b/gcc/ipa-inline.h index fca99e629b0..839bc237f2a 100644 --- a/gcc/ipa-inline.h +++ b/gcc/ipa-inline.h @@ -45,7 +45,8 @@ typedef struct GTY(()) condition /* Inline hints are reasons why inline heuristics should preffer inlining given function. They are represtented as bitmap of the following values. */ enum inline_hints_vals { - INLINE_HINT_indirect_call = 1 + INLINE_HINT_indirect_call = 1, + INLINE_HINT_loop_iterations = 2 }; typedef int inline_hints; @@ -118,6 +119,10 @@ struct GTY(()) inline_summary merged during inlining. */ conditions conds; VEC(size_time_entry,gc) *entry; + + /* Predicate on when some loop in the function sbecomes to have known + bounds. */ + struct predicate * GTY((skip)) loop_iterations; }; diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 84895e11dd7..f6151fcca7b 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2012-08-20 Jan Hubicka + + PR fortran/48636 + * gcc.dg/ipa/inlinehint-1.c: New. + 2012-08-20 Florian Weimer PR c++/19351 diff --git a/gcc/testsuite/gcc.dg/ipa/inlinehint-1.c b/gcc/testsuite/gcc.dg/ipa/inlinehint-1.c new file mode 100644 index 00000000000..9810e25c19b --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/inlinehint-1.c @@ -0,0 +1,16 @@ +/* { dg-options "-O3 -c -fdump-ipa-inline-details -fno-early-inlining -fno-ipa-cp" } */ +test (int a) +{ + int i; + for (i=0; i