From 08dd2b683f98c6a721329f02f16a9a815a143f48 Mon Sep 17 00:00:00 2001 From: Segher Boessenkool Date: Tue, 29 Nov 2016 03:19:04 +0100 Subject: [PATCH] shrink-wrap: New spread_components This patch changes spread_components to use a simpler algorithm that puts prologue components as early as possible, and epilogue components as late as possible. This allows better scheduling, and also saves a bit of code size. The blocks that run with some specific component enabled after this patch is a strict superset of those that had it before the patch. It does this by finding for every component the basic blocks where that component is not needed on some path from the entry block (it reuses head_components to store this), and similarly the blocks where the component is not needed on some path to the exit block (or the exit can not be reached from that block) (stored in tail_components). Blocks that then are not in both of those two sets get the component active. * shrink-wrap.c (init_separate_shrink_wrap): Do not clear head_components and tail_components. (spread_components): New algorithm. (emit_common_tails_for_components): Clear head_components and tail_components. (insert_prologue_epilogue_for_components): Write extra output to the dump file for sibcalls and abnormal exits. From-SVN: r242948 --- gcc/ChangeLog | 10 +++ gcc/shrink-wrap.c | 181 +++++++++++++++++++++++++++++++++++++--------- 2 files changed, 156 insertions(+), 35 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 07bcc26b5b6..da6c6409700 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2016-11-28 Segher Boessenkool + + * shrink-wrap.c (init_separate_shrink_wrap): Do not clear + head_components and tail_components. + (spread_components): New algorithm. + (emit_common_tails_for_components): Clear head_components and + tail_components. + (insert_prologue_epilogue_for_components): Write extra output to the + dump file for sibcalls and abnormal exits. + 2016-11-28 Segher Boessenkool PR rtl-optimization/78342 diff --git a/gcc/shrink-wrap.c b/gcc/shrink-wrap.c index 8803200fbe0..59feca15aea 100644 --- a/gcc/shrink-wrap.c +++ b/gcc/shrink-wrap.c @@ -1131,8 +1131,6 @@ init_separate_shrink_wrap (sbitmap components) SW (bb)->head_components = sbitmap_alloc (SBITMAP_SIZE (components)); SW (bb)->tail_components = sbitmap_alloc (SBITMAP_SIZE (components)); bitmap_clear (SW (bb)->has_components); - bitmap_clear (SW (bb)->head_components); - bitmap_clear (SW (bb)->tail_components); } } @@ -1253,48 +1251,151 @@ place_prologue_for_one_component (unsigned int which, basic_block head) } } -/* Mark HAS_COMPONENTS for every block dominated by at least one block with - HAS_COMPONENTS set for the respective components, starting at HEAD. */ +/* Set HAS_COMPONENTS in every block to the maximum it can be set to without + setting it on any path from entry to exit where it was not already set + somewhere (or, for blocks that have no path to the exit, consider only + paths from the entry to the block itself). */ static void -spread_components (basic_block head) +spread_components (sbitmap components) { - basic_block bb = head; - bool first_visit = true; - /* This keeps a tally of all components active. */ - sbitmap components = SW (head)->has_components; + basic_block entry_block = ENTRY_BLOCK_PTR_FOR_FN (cfun); + basic_block exit_block = EXIT_BLOCK_PTR_FOR_FN (cfun); - for (;;) + /* A stack of all blocks left to consider, and a bitmap of all blocks + on that stack. */ + vec todo; + todo.create (n_basic_blocks_for_fn (cfun)); + bitmap seen = BITMAP_ALLOC (NULL); + + sbitmap old = sbitmap_alloc (SBITMAP_SIZE (components)); + + /* Find for every block the components that are *not* needed on some path + from the entry to that block. Do this with a flood fill from the entry + block. Every block can be visited at most as often as the number of + components (plus one), and usually much less often. */ + + if (dump_file) + fprintf (dump_file, "Spreading down...\n"); + + basic_block bb; + FOR_ALL_BB_FN (bb, cfun) + bitmap_clear (SW (bb)->head_components); + + bitmap_copy (SW (entry_block)->head_components, components); + + edge e; + edge_iterator ei; + + todo.quick_push (single_succ (entry_block)); + bitmap_set_bit (seen, single_succ (entry_block)->index); + while (!todo.is_empty ()) { - if (first_visit) - { - bitmap_ior (SW (bb)->has_components, SW (bb)->has_components, - components); + bb = todo.pop (); - if (first_dom_son (CDI_DOMINATORS, bb)) - { - components = SW (bb)->has_components; - bb = first_dom_son (CDI_DOMINATORS, bb); - continue; - } - } + bitmap_copy (old, SW (bb)->head_components); - components = SW (bb)->has_components; + FOR_EACH_EDGE (e, ei, bb->preds) + bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, + SW (e->src)->head_components); - if (next_dom_son (CDI_DOMINATORS, bb)) + bitmap_and_compl (SW (bb)->head_components, SW (bb)->head_components, + SW (bb)->has_components); + + if (!bitmap_equal_p (old, SW (bb)->head_components)) + FOR_EACH_EDGE (e, ei, bb->succs) + if (bitmap_set_bit (seen, e->dest->index)) + todo.quick_push (e->dest); + + bitmap_clear_bit (seen, bb->index); + } + + /* Find for every block the components that are *not* needed on some reverse + path from the exit to that block. */ + + if (dump_file) + fprintf (dump_file, "Spreading up...\n"); + + /* First, mark all blocks not reachable from the exit block as not needing + any component on any path to the exit. Mark everything, and then clear + again by a flood fill. */ + + FOR_ALL_BB_FN (bb, cfun) + bitmap_copy (SW (bb)->tail_components, components); + + FOR_EACH_EDGE (e, ei, exit_block->preds) + { + todo.quick_push (e->src); + bitmap_set_bit (seen, e->src->index); + } + + while (!todo.is_empty ()) + { + bb = todo.pop (); + + if (!bitmap_empty_p (SW (bb)->tail_components)) + FOR_EACH_EDGE (e, ei, bb->preds) + if (bitmap_set_bit (seen, e->src->index)) + todo.quick_push (e->src); + + bitmap_clear (SW (bb)->tail_components); + + bitmap_clear_bit (seen, bb->index); + } + + /* And then, flood fill backwards to find for every block the components + not needed on some path to the exit. */ + + bitmap_copy (SW (exit_block)->tail_components, components); + + FOR_EACH_EDGE (e, ei, exit_block->preds) + { + todo.quick_push (e->src); + bitmap_set_bit (seen, e->src->index); + } + + while (!todo.is_empty ()) + { + bb = todo.pop (); + + bitmap_copy (old, SW (bb)->tail_components); + + FOR_EACH_EDGE (e, ei, bb->succs) + bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, + SW (e->dest)->tail_components); + + bitmap_and_compl (SW (bb)->tail_components, SW (bb)->tail_components, + SW (bb)->has_components); + + if (!bitmap_equal_p (old, SW (bb)->tail_components)) + FOR_EACH_EDGE (e, ei, bb->preds) + if (bitmap_set_bit (seen, e->src->index)) + todo.quick_push (e->src); + + bitmap_clear_bit (seen, bb->index); + } + + /* Finally, mark everything not not needed both forwards and backwards. */ + + FOR_EACH_BB_FN (bb, cfun) + { + bitmap_and (SW (bb)->head_components, SW (bb)->head_components, + SW (bb)->tail_components); + bitmap_and_compl (SW (bb)->has_components, components, + SW (bb)->head_components); + } + + FOR_ALL_BB_FN (bb, cfun) + { + if (dump_file) { - bb = next_dom_son (CDI_DOMINATORS, bb); - basic_block parent = get_immediate_dominator (CDI_DOMINATORS, bb); - components = SW (parent)->has_components; - first_visit = true; - } - else - { - if (bb == head) - return; - bb = get_immediate_dominator (CDI_DOMINATORS, bb); - first_visit = false; + fprintf (dump_file, "bb %d components:", bb->index); + dump_components ("has", SW (bb)->has_components); + fprintf (dump_file, "\n"); } } + + sbitmap_free (old); + BITMAP_FREE (seen); } /* If we cannot handle placing some component's prologues or epilogues where @@ -1384,6 +1485,9 @@ emit_common_heads_for_components (sbitmap components) sbitmap tmp = sbitmap_alloc (SBITMAP_SIZE (components)); basic_block bb; + FOR_ALL_BB_FN (bb, cfun) + bitmap_clear (SW (bb)->head_components); + FOR_EACH_BB_FN (bb, cfun) { /* Find which prologue resp. epilogue components are needed for all @@ -1470,6 +1574,9 @@ emit_common_tails_for_components (sbitmap components) sbitmap tmp = sbitmap_alloc (SBITMAP_SIZE (components)); basic_block bb; + FOR_ALL_BB_FN (bb, cfun) + bitmap_clear (SW (bb)->tail_components); + FOR_EACH_BB_FN (bb, cfun) { /* Find which prologue resp. epilogue components are needed for all @@ -1608,6 +1715,10 @@ insert_prologue_epilogue_for_components (sbitmap components) e->dest->index); dump_components ("epi", epi); dump_components ("pro", pro); + if (e->flags & EDGE_SIBCALL) + fprintf (dump_file, " (SIBCALL)"); + else if (e->flags & EDGE_ABNORMAL) + fprintf (dump_file, " (ABNORMAL)"); fprintf (dump_file, "\n"); } @@ -1702,7 +1813,7 @@ try_shrink_wrapping_separate (basic_block first_bb) EXECUTE_IF_SET_IN_BITMAP (components, 0, j, sbi) place_prologue_for_one_component (j, first_bb); - spread_components (first_bb); + spread_components (components); disqualify_problematic_components (components);