diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 50fddfe3ee3..abbb5b63161 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,175 @@ +2004-04-09 Caroline Tice + + * basic-block.h (struct edge_def): Add new field, crossing_edge. + (struct basic_block_def): Add new field, partition. + (UNPARTITIONED, HOT_PARTITION, COLD_PARTITION): New constant macro + definitions. + (partition_hot_cold_basic_blocks): Add extern function + declaration. + * bb-reorder.c (function.h, obstack.h, expr.h, regs.h): Add four new + include statements. + (N_ROUNDS): Increase the maximum number of rounds by 1. + (branch_threshold): Add array value for new round. + (exec_threshold): Add array value for new round. + (push_to_next_round_p): New function. + (add_unlikely_executed_notes): New function. + (find_rarely_executed_basic_blocks_and_crossing_edges): New function. + (mark_bb_for_unlikely_executed_section): New function. + (add_labels_and_missing_jumps): New function. + (add_reg_crossing_jump_notes): New function. + (fix_up_fall_thru_edges): New function. + (find_jump_block): New function. + (fix_crossing_conditional_branches): New function. + (fix_crossing_unconditional_branches): New function. + (fix_edges_for_rarely_executed_code): New function. + (partition_hot_cold_basic_blocks): New function. + (find_traces): Add an extra round for partitioning hot/cold + basic blocks. + (find_traces_1_round): Add a parameter. Modify to push all cold blocks, + and only cold blocks, into the last (extra) round of collecting traces. + (better_edge_p): Add a parameter. Modify to favor non-crossing edges + over crossing edges. + (bb_to_key): Add code to correctly identify cold blocks when + doing partitioning. + (connect_traces): Modify to connect all the non-cold traces first, then + go back and connect up all the cold traces. + (reorder_basic_blocks): Add call to add_unlikely_executed_notes. + * cfg.c (entry_exit_blocks): Add initialization for partition field in + entry and exit blocks. + * cfgbuild.c (make_edges): Update current_function_has_computed_jump + if we are doing hot/cold partitioning. + * cfgcleanup.c (cfglayout.h): Add new include statement. + (try_simplify_condjump): Modify to not attempt on blocks with jumps + that cross section boundaries. + (try_forward_edges): Likewise. + (merge_blocks_move_predecessor_nojumps): Likewise. + (merge_blocks_move_successor_nojumps): Likewise. + (merge_blocks_move): Likewise. + (try_crossjump_to_edge): Modify to not attempt after we have done + the block partitioning. + (try_crossjump_bb): Modify to not attempt on blocks with jumps that + cross section boundaries. + (try_optimize_cfg): Likewise. + * cfghooks.c (tidy_fallthru_edges): Modify to not remove indirect + jumps that cross section boundaries. + * cfglayout.c (flags.h): Add new include statement. + (update_unlikely_executed_notes): New function. + (fixup_reorder_chain): Add code so when a new jumping basic block is + added, it's UNLIKELY_EXECUTED_CODE and REG_CROSSING_JUMP notes are + updated appropriately. + (duplicate_insn_chain): Add code to duplicate the new NOTE insn + introduced by this optimization. + * cfglayout.h (scan_ahead_for_unlikely_executed_note): Add new + extern function declaration. + * cfgrtl.c (can_delete_note_p): Add NOTE_INSN_UNLIKELY_EXECUTED_CODE to + list of notes that can be deleted. + (create_basic_block_structure): Add initialization for partition field. + (rtl_can_merge_blocks): Modify to test blocks for jumps that cross + section boundaries. + (try_redirect_by_replacing_jump): Modify to not attempt on jumps that + cross section boundaries. + (commit_one_edge_insertion): Add code so newly created basic block + ends up in correct (hot or cold) section. Modify to disallow + insertions before NOTE_INSN_UNLIKELY_EXECUTED_CODE notes. + (rtl_verify_flow_info_1): Add code to verify that no fall_thru edge + crosses section boundaries. + (cfg_layout_can_merge_blocks_p): Modify to test blocks for jumps that + cross section boundaries. + (force_nonfallthru_and_redirect): Modify to make sure new basic block + ends up in correct section, with correct notes attached. + * common.opt (freorder-blocks-and-partition): Add new flag for this + optimization. + * dbxout.c (dbx_function_end): Add code to make sure scope labels at + the end of functions are written into the correct (hot or cold) + section. + (dbx_source_file): Add code so writing debug file information + doesn't incorrectly change sections. + * defaults.h (NORMAL_TEXT_SECTION_NAME): New constant macro, for use + in partitioning hot/cold basic blocks into separate sections. + (SECTION_FORMAT_STRING): New constant macro, for linux/i386 hot/cold + section partitioning. + (HAS_LONG_COND_BRANCH): New constant macro, indicating whether or not + conditional branches can span all of memory. + (HAS_LONG_UNCOND_BRANCH): New constant macro, indicationg whether or not + unconditional branches can span all of memory. + * final.c (scan_ahead_for_unlikely_executed_note): New function. + (final_scan_insn): Add code to check for NOTE instruction indicating + whether basic block belongs in hot or cold section, and to make sure + the current basic block is being written to the appropriate section. + Also added code to ensure that jump table basic blocks end up in the + correct section. + * flags.h (flag_reorder_blocks_and_partition): New flag. + * ifcvt.c (find_if_case_1): Modify to not attempt if conversion if + one of the branches has a jump that crosses between sections. + (find_if_case_2): Likewise. + (ifcvt): Modify to not attempt to mark loop exit edges after + hot/cold partitioning has occurred. + * opts.c (decode_options): Code to handle new flag, + flag_reorder_blocks_and_partition; also to turn it off if + flag_exceptions is on. + (common_handle_option): Code to handle new flag, + flag_reorder_blocks_and_partition. + * output.h (unlikely_text_section): New extern function declaration. + (in_unlikely_text_section): New extern function declaration. + * passes.c (rest_of_handle_stack_regs): Add + flag_reorder_blocks_and_partition as an 'or' condition for calling + reorder_basic_blocks. + (rest_of_handle_reorder_blocks): Add flag_reorder_blocks_and_partition + as an 'or' condition for calling reorder_basic_blocks. + (rest_of_compilation): Add call to partition_hot_cold_basic_blocks. + * print-rtl.c (print_rtx): Add code for handling new note, + NOTE_INSN_UNLIKELY_EXECUTED_CODE + * rtl.c (NOTE_INSN_UNLIKELY_EXECUTED_CODE): New note insn (see below). + (REG_CROSSING_JUMP): New kind of reg_note, to mark jumps that + cross between section boundaries. + * rtl.h (NOTE_INSN_UNLIKELY_EXECUTED_CODE): New note instruction, + indicating the basic block containing it belongs in the cold section. + (REG_CROSSING_JUMP): New type of reg_note, to mark jumps that cross + between hot and cold sections. + * toplev.c (flag_reorder_blocks_and_partition): Add code to + initialize this flag, and to tie it to the command-line option + freorder-blocks-and-partition. + * varasm.c (cfglayout.h): Add new include statement. + (unlikely_section_label_printed): New global variable, used for + determining when to output section name labels for cold sections. + (in_section): Add in_unlikely_executed_text to enum data structure. + (text_section): Modify code to use SECTION_FORMAT_STRING and + NORMAL_TEXT_SECTION_NAME macros. + (unlikely_text_section): New function. + (in_unlikely_text_section): New function. + (function_section): Add code to make sure beginning of function is + written into correct section (hot or cold). + (assemble_start_function): Add code to make sure stuff is written to + the correct section. + (assemble_zeros): Add in_unlikely_text_section as an 'or' condition + to an if statement that was checking 'in_text_section'. + (assemble_variable): Add 'in_unlikely_text_section' as an 'or' + condition to an if statement that was checking 'in_text_section'. + (default_section_type_flags_1): Add check: if in cold section + flags = SECTION_CODE. + * config/darwin.c (darwin_asm_named_section): Modify to use + SECTION_FORMAT_STRING if we are partitioning hot/cold blocks. + * config/i386/i386.h (HAS_LONG_COND_BRANCH): Defined this macro + specifically for the i386. + (HAS_LONG_UNCOND_BRANCH): Defined this macro specifically for the i386. + * config/rs6000/darwin.h (UNLIKELY_EXECUTED_TEXT_SECTION_NAME): Change + text string to something more informative. + (NORMAL_TEXT_SECTION_NAME): Add new definition. + (SECTION_FORMAT_STRING): Add new definition. + * config/rs6000/rs6000.c (rs6000_assemble_integer): Add + '!in_unlikely_text_section' as an 'and' condition to an if statement + that was already checking '!in_text_section'. + * config/rs6000/sysv4.h (HOT_TEXT_SECTION_NAME,NORMAL_TEXT_SECTION_NAME, + UNLIKELY_EXECUTED_TEXT_SECTION_NAME,SECTION_FORMAT_STRING): Make + sure these are properly defined for linux on ppc. + * doc/invoke.texi (freorder-blocks-and-partition): Add documentation + for this new flag. + * doc/rtl.texi (REG_CROSSING_JUMP): Add documentation for new + reg_note. + * doc/tm.texi (NORMAL_TEXT_SECTION_NAME, SECTION_FORMAT_STRING, + HAS_LONG_COND_BRANCH, HAS_LONG_UNCOND_BRANCH): Add documentation for + these new macros. + 2004-04-08 Roger Sayle * function.c (gen_mem_addressof): When changing the RTX from a REG diff --git a/gcc/basic-block.h b/gcc/basic-block.h index 05f41c01948..2c3ef581111 100644 --- a/gcc/basic-block.h +++ b/gcc/basic-block.h @@ -138,6 +138,8 @@ typedef struct edge_def { int probability; /* biased by REG_BR_PROB_BASE */ gcov_type count; /* Expected number of executions calculated in profile.c */ + bool crossing_edge; /* Crosses between hot and cold sections, when + we do partitioning. */ } *edge; #define EDGE_FALLTHRU 1 /* 'Straight line' flow */ @@ -246,6 +248,9 @@ typedef struct basic_block_def { /* Various flags. See BB_* below. */ int flags; + /* Which section block belongs in, when partitioning basic blocks. */ + int partition; + /* Additional data maintained by cfg_layout routines. */ struct reorder_block_def *rbi; } *basic_block; @@ -260,6 +265,12 @@ typedef struct basic_block_def { #define BB_IRREDUCIBLE_LOOP 16 #define BB_SUPERBLOCK 32 +/* Partitions, to be used when partitioning hot and cold basic blocks into + separate sections. */ +#define UNPARTITIONED 0 +#define HOT_PARTITION 1 +#define COLD_PARTITION 2 + /* Number of basic blocks in the current function. */ extern int n_basic_blocks; @@ -611,6 +622,7 @@ extern bool control_flow_insn_p (rtx); /* In bb-reorder.c */ extern void reorder_basic_blocks (void); +extern void partition_hot_cold_basic_blocks (void); /* In dominance.c */ diff --git a/gcc/bb-reorder.c b/gcc/bb-reorder.c index 74f93203f23..ae335c1d74b 100644 --- a/gcc/bb-reorder.c +++ b/gcc/bb-reorder.c @@ -77,15 +77,21 @@ #include "cfglayout.h" #include "fibheap.h" #include "target.h" +#include "function.h" +#include "obstack.h" +#include "expr.h" +#include "regs.h" -/* The number of rounds. */ -#define N_ROUNDS 4 +/* The number of rounds. In most cases there will only be 4 rounds, but + when partitioning hot and cold basic blocks into separate sections of + the .o file there will be an extra round.*/ +#define N_ROUNDS 5 /* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE. */ -static int branch_threshold[N_ROUNDS] = {400, 200, 100, 0}; +static int branch_threshold[N_ROUNDS] = {400, 200, 100, 0, 0}; /* Exec thresholds in thousandths (per mille) of the frequency of bb 0. */ -static int exec_threshold[N_ROUNDS] = {500, 200, 50, 0}; +static int exec_threshold[N_ROUNDS] = {500, 200, 50, 0, 0}; /* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry block the edge destination is not duplicated while connecting traces. */ @@ -146,14 +152,58 @@ static void find_traces (int *, struct trace *); static basic_block rotate_loop (edge, struct trace *, int); static void mark_bb_visited (basic_block, int); static void find_traces_1_round (int, int, gcov_type, struct trace *, int *, - int, fibheap_t *); + int, fibheap_t *, int); static basic_block copy_bb (basic_block, edge, basic_block, int); static fibheapkey_t bb_to_key (basic_block); -static bool better_edge_p (basic_block, edge, int, int, int, int); +static bool better_edge_p (basic_block, edge, int, int, int, int, edge); static void connect_traces (int, struct trace *); static bool copy_bb_p (basic_block, int); static int get_uncond_jump_length (void); +static bool push_to_next_round_p (basic_block, int, int, int, gcov_type); +static void add_unlikely_executed_notes (void); +static void find_rarely_executed_basic_blocks_and_crossing_edges (edge *, + int *, + int *); +static void mark_bb_for_unlikely_executed_section (basic_block); +static void add_labels_and_missing_jumps (edge *, int); +static void add_reg_crossing_jump_notes (void); +static void fix_up_fall_thru_edges (void); +static void fix_edges_for_rarely_executed_code (edge *, int); +static void fix_crossing_conditional_branches (void); +static void fix_crossing_unconditional_branches (void); +/* Check to see if bb should be pushed into the next round of trace + collections or not. Reasons for pushing the block forward are 1). + If the block is cold, we are doing partitioning, and there will be + another round (cold partition blocks are not supposed to be + collected into traces until the very last round); or 2). There will + be another round, and the basic block is not "hot enough" for the + current round of trace collection. */ + +static bool +push_to_next_round_p (basic_block bb, int round, int number_of_rounds, + int exec_th, gcov_type count_th) +{ + bool there_exists_another_round; + bool cold_block; + bool block_not_hot_enough; + + there_exists_another_round = round < number_of_rounds - 1; + + cold_block = (flag_reorder_blocks_and_partition + && bb->partition == COLD_PARTITION); + + block_not_hot_enough = (bb->frequency < exec_th + || bb->count < count_th + || probably_never_executed_bb_p (bb)); + + if (there_exists_another_round + && (cold_block || block_not_hot_enough)) + return true; + else + return false; +} + /* Find the traces for Software Trace Cache. Chain each trace through RBI()->next. Store the number of traces to N_TRACES and description of traces to TRACES. */ @@ -162,9 +212,18 @@ static void find_traces (int *n_traces, struct trace *traces) { int i; + int number_of_rounds; edge e; fibheap_t heap; + /* Add one extra round of trace collection when partitioning hot/cold + basic blocks into separate sections. The last round is for all the + cold blocks (and ONLY the cold blocks). */ + + number_of_rounds = N_ROUNDS - 1; + if (flag_reorder_blocks_and_partition) + number_of_rounds = N_ROUNDS; + /* Insert entry points of function into heap. */ heap = fibheap_new (); max_entry_frequency = 0; @@ -181,7 +240,7 @@ find_traces (int *n_traces, struct trace *traces) } /* Find the traces. */ - for (i = 0; i < N_ROUNDS; i++) + for (i = 0; i < number_of_rounds; i++) { gcov_type count_threshold; @@ -195,7 +254,8 @@ find_traces (int *n_traces, struct trace *traces) find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000, max_entry_frequency * exec_threshold[i] / 1000, - count_threshold, traces, n_traces, i, &heap); + count_threshold, traces, n_traces, i, &heap, + number_of_rounds); } fibheap_delete (heap); @@ -354,8 +414,13 @@ mark_bb_visited (basic_block bb, int trace) static void find_traces_1_round (int branch_th, int exec_th, gcov_type count_th, struct trace *traces, int *n_traces, int round, - fibheap_t *heap) + fibheap_t *heap, int number_of_rounds) { + /* The following variable refers to the last round in which non-"cold" + blocks may be collected into a trace. */ + + int last_round = N_ROUNDS - 1; + /* Heap for discarded basic blocks which are possible starting points for the next round. */ fibheap_t new_heap = fibheap_new (); @@ -374,10 +439,13 @@ find_traces_1_round (int branch_th, int exec_th, gcov_type count_th, if (dump_file) fprintf (dump_file, "Getting bb %d\n", bb->index); - /* If the BB's frequency is too low send BB to the next round. */ - if (round < N_ROUNDS - 1 - && (bb->frequency < exec_th || bb->count < count_th - || probably_never_executed_bb_p (bb))) + /* If the BB's frequency is too low send BB to the next round. When + partitioning hot/cold blocks into separate sections, make sure all + the cold blocks (and ONLY the cold blocks) go into the (extra) final + round. */ + + if (push_to_next_round_p (bb, round, number_of_rounds, exec_th, + count_th)) { int key = bb_to_key (bb); bbd[bb->index].heap = new_heap; @@ -427,6 +495,10 @@ find_traces_1_round (int branch_th, int exec_th, gcov_type count_th, && e->dest->rbi->visited != *n_traces) continue; + if (e->dest->partition == COLD_PARTITION + && round < last_round) + continue; + prob = e->probability; freq = EDGE_FREQUENCY (e); @@ -436,7 +508,11 @@ find_traces_1_round (int branch_th, int exec_th, gcov_type count_th, || prob < branch_th || freq < exec_th || e->count < count_th) continue; - if (better_edge_p (bb, e, prob, freq, best_prob, best_freq)) + /* If partitioning hot/cold basic blocks, don't consider edges + that cross section boundaries. */ + + if (better_edge_p (bb, e, prob, freq, best_prob, best_freq, + best_edge)) { best_edge = e; best_prob = prob; @@ -490,7 +566,13 @@ find_traces_1_round (int branch_th, int exec_th, gcov_type count_th, || prob < branch_th || freq < exec_th || e->count < count_th) { - if (round < N_ROUNDS - 1) + /* When partitioning hot/cold basic blocks, make sure + the cold blocks (and only the cold blocks) all get + pushed to the last round of trace collection. */ + + if (push_to_next_round_p (e->dest, round, + number_of_rounds, + exec_th, count_th)) which_heap = new_heap; } @@ -588,6 +670,7 @@ find_traces_1_round (int branch_th, int exec_th, gcov_type count_th, && !(e->flags & EDGE_COMPLEX) && !e->dest->rbi->visited && !e->dest->pred->pred_next + && !e->crossing_edge && e->dest->succ && (e->dest->succ->flags & EDGE_CAN_FALLTHRU) && !(e->dest->succ->flags & EDGE_COMPLEX) @@ -707,7 +790,8 @@ bb_to_key (basic_block bb) int priority = 0; /* Do not start in probably never executed blocks. */ - if (probably_never_executed_bb_p (bb)) + + if (bb->partition == COLD_PARTITION || probably_never_executed_bb_p (bb)) return BB_FREQ_MAX; /* Prefer blocks whose predecessor is an end of some trace @@ -739,7 +823,7 @@ bb_to_key (basic_block bb) static bool better_edge_p (basic_block bb, edge e, int prob, int freq, int best_prob, - int best_freq) + int best_freq, edge cur_best_edge) { bool is_better_edge; @@ -770,6 +854,16 @@ better_edge_p (basic_block bb, edge e, int prob, int freq, int best_prob, else is_better_edge = false; + /* If we are doing hot/cold partitioning, make sure that we always favor + non-crossing edges over crossing edges. */ + + if (!is_better_edge + && flag_reorder_blocks_and_partition + && cur_best_edge + && cur_best_edge->crossing_edge + && !e->crossing_edge) + is_better_edge = true; + return is_better_edge; } @@ -779,7 +873,10 @@ static void connect_traces (int n_traces, struct trace *traces) { int i; + int unconnected_hot_trace_count = 0; + bool cold_connected = true; bool *connected; + bool *cold_traces; int last_trace; int freq_threshold; gcov_type count_threshold; @@ -792,17 +889,66 @@ connect_traces (int n_traces, struct trace *traces) connected = xcalloc (n_traces, sizeof (bool)); last_trace = -1; - for (i = 0; i < n_traces; i++) + + /* If we are partitioning hot/cold basic blocks, mark the cold + traces as already connnected, to remove them from consideration + for connection to the hot traces. After the hot traces have all + been connected (determined by "unconnected_hot_trace_count"), we + will go back and connect the cold traces. */ + + cold_traces = xcalloc (n_traces, sizeof (bool)); + + if (flag_reorder_blocks_and_partition) + for (i = 0; i < n_traces; i++) + { + if (traces[i].first->partition == COLD_PARTITION) + { + connected[i] = true; + cold_traces[i] = true; + cold_connected = false; + } + else + unconnected_hot_trace_count++; + } + + for (i = 0; i < n_traces || !cold_connected ; i++) { int t = i; int t2; edge e, best; int best_len; + /* If we are partitioning hot/cold basic blocks, check to see + if all the hot traces have been connected. If so, go back + and mark the cold traces as unconnected so we can connect + them up too. Re-set "i" to the first (unconnected) cold + trace. Use flag "cold_connected" to make sure we don't do + this step more than once. */ + + if (flag_reorder_blocks_and_partition + && (i >= n_traces || unconnected_hot_trace_count <= 0) + && !cold_connected) + { + int j; + int first_cold_trace = -1; + + for (j = 0; j < n_traces; j++) + if (cold_traces[j]) + { + connected[j] = false; + if (first_cold_trace == -1) + first_cold_trace = j; + } + i = t = first_cold_trace; + cold_connected = true; + } + if (connected[t]) continue; connected[t] = true; + if (unconnected_hot_trace_count > 0) + unconnected_hot_trace_count--; /* Find the predecessor traces. */ for (t2 = t; t2 > 0;) @@ -832,6 +978,10 @@ connect_traces (int n_traces, struct trace *traces) best->src->rbi->next = best->dest; t2 = bbd[best->src->index].end_of_trace; connected[t2] = true; + + if (unconnected_hot_trace_count > 0) + unconnected_hot_trace_count--; + if (dump_file) { fprintf (dump_file, "Connection: %d %d\n", @@ -881,6 +1031,8 @@ connect_traces (int n_traces, struct trace *traces) t = bbd[best->dest->index].start_of_trace; traces[last_trace].last->rbi->next = traces[t].first; connected[t] = true; + if (unconnected_hot_trace_count > 0) + unconnected_hot_trace_count--; last_trace = t; } else @@ -940,6 +1092,9 @@ connect_traces (int n_traces, struct trace *traces) } } + if (flag_reorder_blocks_and_partition) + try_copy = false; + /* Copy tiny blocks always; copy larger blocks only when the edge is traversed frequently enough. */ if (try_copy @@ -969,6 +1124,8 @@ connect_traces (int n_traces, struct trace *traces) t = bbd[next_bb->index].start_of_trace; traces[last_trace].last->rbi->next = traces[t].first; connected[t] = true; + if (unconnected_hot_trace_count > 0) + unconnected_hot_trace_count--; last_trace = t; } else @@ -1063,6 +1220,682 @@ get_uncond_jump_length (void) return length; } +static void +add_unlikely_executed_notes (void) +{ + basic_block bb; + + FOR_EACH_BB (bb) + if (bb->partition == COLD_PARTITION) + mark_bb_for_unlikely_executed_section (bb); +} + +/* Find the basic blocks that are rarely executed and need to be moved to + a separate section of the .o file (to cut down on paging and improve + cache locality). */ + +static void +find_rarely_executed_basic_blocks_and_crossing_edges (edge *crossing_edges, + int *n_crossing_edges, + int *max_idx) +{ + basic_block bb; + edge e; + int i; + + /* Mark which partition (hot/cold) each basic block belongs in. */ + + FOR_EACH_BB (bb) + { + if (probably_never_executed_bb_p (bb)) + bb->partition = COLD_PARTITION; + else + bb->partition = HOT_PARTITION; + } + + /* Mark every edge that crosses between sections. */ + + i = 0; + FOR_EACH_BB (bb) + for (e = bb->succ; e; e = e->succ_next) + { + if (e->src != ENTRY_BLOCK_PTR + && e->dest != EXIT_BLOCK_PTR + && e->src->partition != e->dest->partition) + { + e->crossing_edge = true; + if (i == *max_idx) + { + *max_idx *= 2; + crossing_edges = xrealloc (crossing_edges, + (*max_idx) * sizeof (edge)); + } + crossing_edges[i++] = e; + } + else + e->crossing_edge = false; + } + + *n_crossing_edges = i; +} + +/* Add NOTE_INSN_UNLIKELY_EXECUTED_CODE to top of basic block. This note + is later used to mark the basic block to be put in the + unlikely-to-be-executed section of the .o file. */ + +static void +mark_bb_for_unlikely_executed_section (basic_block bb) +{ + rtx cur_insn; + rtx insert_insn = NULL; + rtx new_note; + + /* Find first non-note instruction and insert new NOTE before it (as + long as new NOTE is not first instruction in basic block). */ + + for (cur_insn = BB_HEAD (bb); cur_insn != NEXT_INSN (BB_END (bb)); + cur_insn = NEXT_INSN (cur_insn)) + if (GET_CODE (cur_insn) != NOTE + && GET_CODE (cur_insn) != CODE_LABEL) + { + insert_insn = cur_insn; + break; + } + + /* Insert note and assign basic block number to it. */ + + if (insert_insn) + { + new_note = emit_note_before (NOTE_INSN_UNLIKELY_EXECUTED_CODE, + insert_insn); + NOTE_BASIC_BLOCK (new_note) = bb; + } + else + { + new_note = emit_note_after (NOTE_INSN_UNLIKELY_EXECUTED_CODE, + BB_END (bb)); + NOTE_BASIC_BLOCK (new_note) = bb; + } +} + +/* If any destination of a crossing edge does not have a label, add label; + Convert any fall-through crossing edges (for blocks that do not contain + a jump) to unconditional jumps. */ + +static void +add_labels_and_missing_jumps (edge *crossing_edges, int n_crossing_edges) +{ + int i; + basic_block src; + basic_block dest; + rtx label; + rtx barrier; + rtx new_jump; + + for (i=0; i < n_crossing_edges; i++) + { + if (crossing_edges[i]) + { + src = crossing_edges[i]->src; + dest = crossing_edges[i]->dest; + + /* Make sure dest has a label. */ + + if (dest && (dest != EXIT_BLOCK_PTR)) + { + label = block_label (dest); + + /* Make sure source block ends with a jump. */ + + if (src && (src != ENTRY_BLOCK_PTR)) + { + if (GET_CODE (BB_END (src)) != JUMP_INSN) + /* bb just falls through. */ + { + /* make sure there's only one successor */ + if (src->succ && (src->succ->succ_next == NULL)) + { + /* Find label in dest block. */ + label = block_label (dest); + + new_jump = emit_jump_insn_after (gen_jump (label), + BB_END (src)); + barrier = emit_barrier_after (new_jump); + JUMP_LABEL (new_jump) = label; + LABEL_NUSES (label) += 1; + src->rbi->footer = unlink_insn_chain (barrier, + barrier); + /* Mark edge as non-fallthru. */ + crossing_edges[i]->flags &= ~EDGE_FALLTHRU; + } + else + { + /* Basic block has two successors, but + doesn't end in a jump; something is wrong + here! */ + abort(); + } + } /* end: 'if (GET_CODE ... ' */ + } /* end: 'if (src && src->index...' */ + } /* end: 'if (dest && dest->index...' */ + } /* end: 'if (crossing_edges[i]...' */ + } /* end for loop */ +} + +/* Find any bb's where the fall-through edge is a crossing edge (note that + these bb's must also contain a conditional jump; we've already + dealt with fall-through edges for blocks that didn't have a + conditional jump in the call to add_labels_and_missing_jumps). + Convert the fall-through edge to non-crossing edge by inserting a + new bb to fall-through into. The new bb will contain an + unconditional jump (crossing edge) to the original fall through + destination. */ + +static void +fix_up_fall_thru_edges (void) +{ + basic_block cur_bb; + basic_block new_bb; + edge succ1; + edge succ2; + edge fall_thru; + edge cond_jump; + edge e; + bool cond_jump_crosses; + int invert_worked; + rtx old_jump; + rtx fall_thru_label; + rtx barrier; + + FOR_EACH_BB (cur_bb) + { + fall_thru = NULL; + succ1 = cur_bb->succ; + if (succ1) + succ2 = succ1->succ_next; + else + succ2 = NULL; + + /* Find the fall-through edge. */ + + if (succ1 + && (succ1->flags & EDGE_FALLTHRU)) + { + fall_thru = succ1; + cond_jump = succ2; + } + else if (succ2 + && (succ2->flags & EDGE_FALLTHRU)) + { + fall_thru = succ2; + cond_jump = succ1; + } + + if (fall_thru && (fall_thru->dest != EXIT_BLOCK_PTR)) + { + /* Check to see if the fall-thru edge is a crossing edge. */ + + if (fall_thru->crossing_edge) + { + /* The fall_thru edge crosses; now check the cond jump edge, if + it exists. */ + + cond_jump_crosses = true; + invert_worked = 0; + old_jump = BB_END (cur_bb); + + /* Find the jump instruction, if there is one. */ + + if (cond_jump) + { + if (!cond_jump->crossing_edge) + cond_jump_crosses = false; + + /* We know the fall-thru edge crosses; if the cond + jump edge does NOT cross, and its destination is the + next block in the bb order, invert the jump + (i.e. fix it so the fall thru does not cross and + the cond jump does). */ + + if (!cond_jump_crosses + && cur_bb->rbi->next == cond_jump->dest) + { + /* Find label in fall_thru block. We've already added + any missing labels, so there must be one. */ + + fall_thru_label = block_label (fall_thru->dest); + + if (old_jump && fall_thru_label) + invert_worked = invert_jump (old_jump, + fall_thru_label,0); + if (invert_worked) + { + fall_thru->flags &= ~EDGE_FALLTHRU; + cond_jump->flags |= EDGE_FALLTHRU; + update_br_prob_note (cur_bb); + e = fall_thru; + fall_thru = cond_jump; + cond_jump = e; + cond_jump->crossing_edge = true; + fall_thru->crossing_edge = false; + } + } + } + + if (cond_jump_crosses || !invert_worked) + { + /* This is the case where both edges out of the basic + block are crossing edges. Here we will fix up the + fall through edge. The jump edge will be taken care + of later. */ + + new_bb = force_nonfallthru (fall_thru); + + if (new_bb) + { + new_bb->rbi->next = cur_bb->rbi->next; + cur_bb->rbi->next = new_bb; + + /* Make sure new fall-through bb is in same + partition as bb it's falling through from. */ + + new_bb->partition = cur_bb->partition; + new_bb->succ->crossing_edge = true; + } + + /* Add barrier after new jump */ + + if (new_bb) + { + barrier = emit_barrier_after (BB_END (new_bb)); + new_bb->rbi->footer = unlink_insn_chain (barrier, + barrier); + } + else + { + barrier = emit_barrier_after (BB_END (cur_bb)); + cur_bb->rbi->footer = unlink_insn_chain (barrier, + barrier); + } + } + } + } + } +} + +/* This function checks the destination blockof a "crossing jump" to + see if it has any crossing predecessors that begin with a code label + and end with an unconditional jump. If so, it returns that predecessor + block. (This is to avoid creating lots of new basic blocks that all + contain unconditional jumps to the same destination). */ + +static basic_block +find_jump_block (basic_block jump_dest) +{ + basic_block source_bb = NULL; + edge e; + rtx insn; + + for (e = jump_dest->pred; e; e = e->pred_next) + if (e->crossing_edge) + { + basic_block src = e->src; + + /* Check each predecessor to see if it has a label, and contains + only one executable instruction, which is an unconditional jump. + If so, we can use it. */ + + if (GET_CODE (BB_HEAD (src)) == CODE_LABEL) + for (insn = BB_HEAD (src); + !INSN_P (insn) && insn != NEXT_INSN (BB_END (src)); + insn = NEXT_INSN (insn)) + { + if (INSN_P (insn) + && insn == BB_END (src) + && GET_CODE (insn) == JUMP_INSN + && !any_condjump_p (insn)) + { + source_bb = src; + break; + } + } + + if (source_bb) + break; + } + + return source_bb; +} + +/* Find all BB's with conditional jumps that are crossing edges; + insert a new bb and make the conditional jump branch to the new + bb instead (make the new bb same color so conditional branch won't + be a 'crossing' edge). Insert an unconditional jump from the + new bb to the original destination of the conditional jump. */ + +static void +fix_crossing_conditional_branches (void) +{ + basic_block cur_bb; + basic_block new_bb; + basic_block last_bb; + basic_block dest; + basic_block prev_bb; + edge succ1; + edge succ2; + edge crossing_edge; + edge new_edge; + rtx old_jump; + rtx set_src; + rtx old_label = NULL_RTX; + rtx new_label; + rtx new_jump; + rtx barrier; + + last_bb = EXIT_BLOCK_PTR->prev_bb; + + FOR_EACH_BB (cur_bb) + { + crossing_edge = NULL; + succ1 = cur_bb->succ; + if (succ1) + succ2 = succ1->succ_next; + else + succ2 = NULL; + + /* We already took care of fall-through edges, so only one successor + can be a crossing edge. */ + + if (succ1 && succ1->crossing_edge) + crossing_edge = succ1; + else if (succ2 && succ2->crossing_edge) + crossing_edge = succ2; + + if (crossing_edge) + { + old_jump = BB_END (cur_bb); + + /* Check to make sure the jump instruction is a + conditional jump. */ + + set_src = NULL_RTX; + + if (any_condjump_p (old_jump)) + { + if (GET_CODE (PATTERN (old_jump)) == SET) + set_src = SET_SRC (PATTERN (old_jump)); + else if (GET_CODE (PATTERN (old_jump)) == PARALLEL) + { + set_src = XVECEXP (PATTERN (old_jump), 0,0); + if (GET_CODE (set_src) == SET) + set_src = SET_SRC (set_src); + else + set_src = NULL_RTX; + } + } + + if (set_src && (GET_CODE (set_src) == IF_THEN_ELSE)) + { + if (GET_CODE (XEXP (set_src, 1)) == PC) + old_label = XEXP (set_src, 2); + else if (GET_CODE (XEXP (set_src, 2)) == PC) + old_label = XEXP (set_src, 1); + + /* Check to see if new bb for jumping to that dest has + already been created; if so, use it; if not, create + a new one. */ + + new_bb = find_jump_block (crossing_edge->dest); + + if (new_bb) + new_label = block_label (new_bb); + else + { + /* Create new basic block to be dest for + conditional jump. */ + + new_bb = create_basic_block (NULL, NULL, last_bb); + new_bb->rbi->next = last_bb->rbi->next; + last_bb->rbi->next = new_bb; + prev_bb = last_bb; + last_bb = new_bb; + + /* Update register liveness information. */ + + new_bb->global_live_at_start = + OBSTACK_ALLOC_REG_SET (&flow_obstack); + new_bb->global_live_at_end = + OBSTACK_ALLOC_REG_SET (&flow_obstack); + COPY_REG_SET (new_bb->global_live_at_end, + prev_bb->global_live_at_end); + COPY_REG_SET (new_bb->global_live_at_start, + prev_bb->global_live_at_end); + + /* Put appropriate instructions in new bb. */ + + new_label = gen_label_rtx (); + emit_label_before (new_label, BB_HEAD (new_bb)); + BB_HEAD (new_bb) = new_label; + + if (GET_CODE (old_label) == LABEL_REF) + { + old_label = JUMP_LABEL (old_jump); + new_jump = emit_jump_insn_after (gen_jump + (old_label), + BB_END (new_bb)); + } + else if (GET_CODE (old_label) == RETURN) + new_jump = emit_jump_insn_after (gen_return (), + BB_END (new_bb)); + else + abort (); + + barrier = emit_barrier_after (new_jump); + JUMP_LABEL (new_jump) = old_label; + new_bb->rbi->footer = unlink_insn_chain (barrier, + barrier); + + /* Make sure new bb is in same partition as source + of conditional branch. */ + + new_bb->partition = cur_bb->partition; + } + + /* Make old jump branch to new bb. */ + + redirect_jump (old_jump, new_label, 0); + + /* Remove crossing_edge as predecessor of 'dest'. */ + + dest = crossing_edge->dest; + + redirect_edge_succ (crossing_edge, new_bb); + + /* Make a new edge from new_bb to old dest; new edge + will be a successor for new_bb and a predecessor + for 'dest'. */ + + if (!new_bb->succ) + new_edge = make_edge (new_bb, dest, 0); + else + new_edge = new_bb->succ; + + crossing_edge->crossing_edge = false; + new_edge->crossing_edge = true; + } + } + } +} + +/* Find any unconditional branches that cross between hot and cold + sections. Convert them into indirect jumps instead. */ + +static void +fix_crossing_unconditional_branches (void) +{ + basic_block cur_bb; + rtx last_insn; + rtx label; + rtx label_addr; + rtx indirect_jump_sequence; + rtx jump_insn = NULL_RTX; + rtx new_reg; + rtx cur_insn; + edge succ; + + FOR_EACH_BB (cur_bb) + { + last_insn = BB_END (cur_bb); + succ = cur_bb->succ; + + /* Check to see if bb ends in a crossing (unconditional) jump. At + this point, no crossing jumps should be conditional. */ + + if (GET_CODE (last_insn) == JUMP_INSN + && succ->crossing_edge) + { + rtx label2, table; + + if (any_condjump_p (last_insn)) + abort (); + + /* Make sure the jump is not already an indirect or table jump. */ + + else if (!computed_jump_p (last_insn) + && !tablejump_p (last_insn, &label2, &table)) + { + /* We have found a "crossing" unconditional branch. Now + we must convert it to an indirect jump. First create + reference of label, as target for jump. */ + + label = JUMP_LABEL (last_insn); + label_addr = gen_rtx_LABEL_REF (VOIDmode, label); + LABEL_NUSES (label) += 1; + + /* Get a register to use for the indirect jump. */ + + new_reg = gen_reg_rtx (Pmode); + + /* Generate indirect the jump sequence. */ + + start_sequence (); + emit_move_insn (new_reg, label_addr); + emit_indirect_jump (new_reg); + indirect_jump_sequence = get_insns (); + end_sequence (); + + /* Make sure every instruction in the new jump sequence has + its basic block set to be cur_bb. */ + + for (cur_insn = indirect_jump_sequence; cur_insn; + cur_insn = NEXT_INSN (cur_insn)) + { + BLOCK_FOR_INSN (cur_insn) = cur_bb; + if (GET_CODE (cur_insn) == JUMP_INSN) + jump_insn = cur_insn; + } + + /* Insert the new (indirect) jump sequence immediately before + the unconditional jump, then delete the unconditional jump. */ + + emit_insn_before (indirect_jump_sequence, last_insn); + delete_insn (last_insn); + + /* Make BB_END for cur_bb be the jump instruction (NOT the + barrier instruction at the end of the sequence...). */ + + BB_END (cur_bb) = jump_insn; + } + } + } +} + +/* Add REG_CROSSING_JUMP note to all crossing jump insns. */ + +static void +add_reg_crossing_jump_notes (void) +{ + basic_block bb; + edge e; + + FOR_EACH_BB (bb) + for (e = bb->succ; e; e = e->succ_next) + if (e->crossing_edge + && GET_CODE (BB_END (e->src)) == JUMP_INSN) + REG_NOTES (BB_END (e->src)) = gen_rtx_EXPR_LIST (REG_CROSSING_JUMP, + NULL_RTX, + REG_NOTES (BB_END + (e->src))); +} + +/* Basic blocks containing NOTE_INSN_UNLIKELY_EXECUTED_CODE will be + put in a separate section of the .o file, to reduce paging and + improve cache performance (hopefully). This can result in bits of + code from the same function being widely separated in the .o file. + However this is not obvious to the current bb structure. Therefore + we must take care to ensure that: 1). There are no fall_thru edges + that cross between sections; 2). For those architectures which + have "short" conditional branches, all conditional branches that + attempt to cross between sections are converted to unconditional + branches; and, 3). For those architectures which have "short" + unconditional branches, all unconditional branches that attempt + to cross between sections are converted to indirect jumps. + + The code for fixing up fall_thru edges that cross between hot and + cold basic blocks does so by creating new basic blocks containing + unconditional branches to the appropriate label in the "other" + section. The new basic block is then put in the same (hot or cold) + section as the original conditional branch, and the fall_thru edge + is modified to fall into the new basic block instead. By adding + this level of indirection we end up with only unconditional branches + crossing between hot and cold sections. + + Conditional branches are dealt with by adding a level of indirection. + A new basic block is added in the same (hot/cold) section as the + conditional branch, and the conditional branch is retargeted to the + new basic block. The new basic block contains an unconditional branch + to the original target of the conditional branch (in the other section). + + Unconditional branches are dealt with by converting them into + indirect jumps. */ + +static void +fix_edges_for_rarely_executed_code (edge *crossing_edges, + int n_crossing_edges) +{ + /* Make sure the source of any crossing edge ends in a jump and the + destination of any crossing edge has a label. */ + + add_labels_and_missing_jumps (crossing_edges, n_crossing_edges); + + /* Convert all crossing fall_thru edges to non-crossing fall + thrus to unconditional jumps (that jump to the original fall + thru dest). */ + + fix_up_fall_thru_edges (); + + /* If the architecture does not have conditional branches that can + span all of memory, convert crossing conditional branches into + crossing unconditional branches. */ + + if (!HAS_LONG_COND_BRANCH) + fix_crossing_conditional_branches (); + + /* If the architecture does not have unconditional branches that + can span all of memory, convert crossing unconditional branches + into indirect jumps. Since adding an indirect jump also adds + a new register usage, update the register usage information as + well. */ + + if (!HAS_LONG_UNCOND_BRANCH) + { + fix_crossing_unconditional_branches (); + reg_scan (get_insns(), max_reg_num (), 1); + } + + add_reg_crossing_jump_notes (); +} + /* Reorder basic blocks. The main entry point to this file. */ void @@ -1111,7 +1944,64 @@ reorder_basic_blocks (void) if (dump_file) dump_flow_info (dump_file); + if (flag_reorder_blocks_and_partition) + add_unlikely_executed_notes (); + cfg_layout_finalize (); timevar_pop (TV_REORDER_BLOCKS); } + +/* This function is the main 'entrance' for the optimization that + partitions hot and cold basic blocks into separate sections of the + .o file (to improve performance and cache locality). Ideally it + would be called after all optimizations that rearrange the CFG have + been called. However part of this optimization may introduce new + register usage, so it must be called before register allocation has + occurred. This means that this optimization is actually called + well before the optimization that reorders basic blocks (see function + above). + + This optimization checks the feedback information to determine + which basic blocks are hot/cold and adds + NOTE_INSN_UNLIKELY_EXECUTED_CODE to non-hot basic blocks. The + presence or absence of this note is later used for writing out + sections in the .o file. This optimization must also modify the + CFG to make sure there are no fallthru edges between hot & cold + blocks, as those blocks will not necessarily be contiguous in the + .o (or assembly) file; and in those cases where the architecture + requires it, conditional and unconditional branches that cross + between sections are converted into unconditional or indirect + jumps, depending on what is appropriate. */ + +void +partition_hot_cold_basic_blocks (void) +{ + basic_block cur_bb; + edge *crossing_edges; + int n_crossing_edges; + int max_edges = 2 * last_basic_block; + + if (n_basic_blocks <= 1) + return; + + crossing_edges = xcalloc (max_edges, sizeof (edge)); + + cfg_layout_initialize (); + + FOR_EACH_BB (cur_bb) + if (cur_bb->index >= 0 + && cur_bb->next_bb->index >= 0) + cur_bb->rbi->next = cur_bb->next_bb; + + find_rarely_executed_basic_blocks_and_crossing_edges (crossing_edges, + &n_crossing_edges, + &max_edges); + + if (n_crossing_edges > 0) + fix_edges_for_rarely_executed_code (crossing_edges, n_crossing_edges); + + free (crossing_edges); + + cfg_layout_finalize(); +} diff --git a/gcc/cfg.c b/gcc/cfg.c index ff3f3679136..5eb9c248c5b 100644 --- a/gcc/cfg.c +++ b/gcc/cfg.c @@ -115,6 +115,7 @@ struct basic_block_def entry_exit_blocks[2] 0, /* count */ 0, /* frequency */ 0, /* flags */ + 0, /* partition */ NULL /* rbi */ }, { @@ -138,6 +139,7 @@ struct basic_block_def entry_exit_blocks[2] 0, /* count */ 0, /* frequency */ 0, /* flags */ + 0, /* partition */ NULL /* rbi */ } }; diff --git a/gcc/cfgbuild.c b/gcc/cfgbuild.c index 8fcee25073b..cd936a27b9d 100644 --- a/gcc/cfgbuild.c +++ b/gcc/cfgbuild.c @@ -271,6 +271,12 @@ make_edges (rtx label_value_list, basic_block min, basic_block max, int update_p /* Assume no computed jump; revise as we create edges. */ current_function_has_computed_jump = 0; + /* If we are partitioning hot and cold basic blocks into separate + sections, we cannot assume there is no computed jump. */ + + if (flag_reorder_blocks_and_partition) + current_function_has_computed_jump = 1; + /* Heavy use of computed goto in machine-generated code can lead to nearly fully-connected CFGs. In that case we spend a significant amount of time searching the edge lists for duplicates. */ diff --git a/gcc/cfgcleanup.c b/gcc/cfgcleanup.c index f40d2c1525b..51182e3f400 100644 --- a/gcc/cfgcleanup.c +++ b/gcc/cfgcleanup.c @@ -49,6 +49,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "tm_p.h" #include "target.h" #include "regs.h" +#include "cfglayout.h" #include "expr.h" /* cleanup_cfg maintains following flags for each basic block. */ @@ -150,6 +151,15 @@ try_simplify_condjump (basic_block cbranch_block) return false; jump_dest_block = jump_block->succ->dest; + /* If we are partitioning hot/cold basic blocks, we don't want to + mess up unconditional or indirect jumps that cross between hot + and cold sections. */ + + if (flag_reorder_blocks_and_partition + && (jump_block->partition != jump_dest_block->partition + || cbranch_jump_edge->crossing_edge)) + return false; + /* The conditional branch must target the block after the unconditional branch. */ cbranch_dest_block = cbranch_jump_edge->dest; @@ -428,6 +438,14 @@ try_forward_edges (int mode, basic_block b) bool changed = false; edge e, next, *threaded_edges = NULL; + /* If we are partitioning hot/cold basic blocks, we don't want to + mess up unconditional or indirect jumps that cross between hot + and cold sections. */ + + if (flag_reorder_blocks_and_partition + && find_reg_note (BB_END (b), REG_CROSSING_JUMP, NULL_RTX)) + return false; + for (e = b->succ; e; e = next) { basic_block target, first; @@ -675,6 +693,15 @@ merge_blocks_move_predecessor_nojumps (basic_block a, basic_block b) { rtx barrier; + /* If we are partitioning hot/cold basic blocks, we don't want to + mess up unconditional or indirect jumps that cross between hot + and cold sections. */ + + if (flag_reorder_blocks_and_partition + && (a->partition != b->partition + || find_reg_note (BB_END (a), REG_CROSSING_JUMP, NULL_RTX))) + return; + barrier = next_nonnote_insn (BB_END (a)); if (GET_CODE (barrier) != BARRIER) abort (); @@ -718,6 +745,15 @@ merge_blocks_move_successor_nojumps (basic_block a, basic_block b) rtx barrier, real_b_end; rtx label, table; + /* If we are partitioning hot/cold basic blocks, we don't want to + mess up unconditional or indirect jumps that cross between hot + and cold sections. */ + + if (flag_reorder_blocks_and_partition + && (find_reg_note (BB_END (a), REG_CROSSING_JUMP, NULL_RTX) + || a->partition != b->partition)) + return; + real_b_end = BB_END (b); /* If there is a jump table following block B temporarily add the jump table @@ -782,6 +818,18 @@ merge_blocks_move (edge e, basic_block b, basic_block c, int mode) && tail_recursion_label_p (BB_HEAD (c))) return NULL; + /* If we are partitioning hot/cold basic blocks, we don't want to + mess up unconditional or indirect jumps that cross between hot + and cold sections. */ + + if (flag_reorder_blocks_and_partition + && (find_reg_note (BB_END (b), REG_CROSSING_JUMP, NULL_RTX) + || find_reg_note (BB_END (c), REG_CROSSING_JUMP, NULL_RTX) + || b->partition != c->partition)) + return NULL; + + + /* If B has a fallthru edge to C, no need to move anything. */ if (e->flags & EDGE_FALLTHRU) { @@ -1453,6 +1501,12 @@ try_crossjump_to_edge (int mode, edge e1, edge e2) rtx newpos1, newpos2; edge s; + /* If we have partitioned hot/cold basic blocks, it is a bad idea + to try this optimization. */ + + if (flag_reorder_blocks_and_partition && no_new_pseudos) + return false; + /* Search backward through forwarder blocks. We don't need to worry about multiple entry or chained forwarders, as they will be optimized away. We do this to look past the unconditional jump following a @@ -1639,6 +1693,15 @@ try_crossjump_bb (int mode, basic_block bb) if (!bb->pred || !bb->pred->pred_next) return false; + /* If we are partitioning hot/cold basic blocks, we don't want to + mess up unconditional or indirect jumps that cross between hot + and cold sections. */ + + if (flag_reorder_blocks_and_partition + && (bb->pred->src->partition != bb->pred->pred_next->src->partition + || bb->pred->crossing_edge)) + return false; + /* It is always cheapest to redirect a block that ends in a branch to a block that falls through into BB, as that adds no branches to the program. We'll try that combination first. */ @@ -1895,6 +1958,7 @@ try_optimize_cfg (int mode) && ! b->succ->succ_next && b->succ->dest != EXIT_BLOCK_PTR && onlyjump_p (BB_END (b)) + && !find_reg_note (BB_END (b), REG_CROSSING_JUMP, NULL_RTX) && try_redirect_by_replacing_jump (b->succ, b->succ->dest, (mode & CLEANUP_CFGLAYOUT) != 0)) { diff --git a/gcc/cfghooks.c b/gcc/cfghooks.c index 9f70604c6f2..001dfe29bd4 100644 --- a/gcc/cfghooks.c +++ b/gcc/cfghooks.c @@ -600,7 +600,8 @@ tidy_fallthru_edges (void) if ((s = b->succ) != NULL && ! (s->flags & EDGE_COMPLEX) && s->succ_next == NULL - && s->dest == c) + && s->dest == c + && !find_reg_note (BB_END (b), REG_CROSSING_JUMP, NULL_RTX)) tidy_fallthru_edge (s); } } diff --git a/gcc/cfglayout.c b/gcc/cfglayout.c index aa79d6ec549..c5b89ac92ad 100644 --- a/gcc/cfglayout.c +++ b/gcc/cfglayout.c @@ -35,6 +35,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "target.h" #include "ggc.h" #include "alloc-pool.h" +#include "flags.h" /* The contents of the current function definition are allocated in this obstack, and all are freed at the end of the function. */ @@ -57,6 +58,7 @@ void verify_insn_chain (void); static void fixup_fallthru_exit_predecessor (void); static rtx duplicate_insn_chain (rtx, rtx); static tree insn_scope (rtx); +static void update_unlikely_executed_notes (basic_block); rtx unlink_insn_chain (rtx first, rtx last) @@ -635,6 +637,7 @@ fixup_reorder_chain (void) edge e_fall, e_taken, e; rtx bb_end_insn; basic_block nb; + basic_block old_bb; if (bb->succ == NULL) continue; @@ -711,6 +714,11 @@ fixup_reorder_chain (void) } } + /* If the "jumping" edge is a crossing edge, and the fall + through edge is non-crossing, leave things as they are. */ + else if (e_taken->crossing_edge && !e_fall->crossing_edge) + continue; + /* Otherwise we can try to invert the jump. This will basically never fail, however, keep up the pretense. */ else if (invert_jump (bb_end_insn, @@ -768,7 +776,34 @@ fixup_reorder_chain (void) nb->rbi->next = bb->rbi->next; bb->rbi->next = nb; /* Don't process this new block. */ + old_bb = bb; bb = nb; + + /* Make sure new bb is tagged for correct section (same as + fall-thru source). */ + e_fall->src->partition = bb->pred->src->partition; + if (flag_reorder_blocks_and_partition) + { + if (bb->pred->src->partition == COLD_PARTITION) + { + rtx new_note; + rtx note = BB_HEAD (e_fall->src); + + while (!INSN_P (note) + && note != BB_END (e_fall->src)) + note = NEXT_INSN (note); + + new_note = emit_note_before + (NOTE_INSN_UNLIKELY_EXECUTED_CODE, + note); + NOTE_BASIC_BLOCK (new_note) = bb; + } + if (GET_CODE (BB_END (bb)) == JUMP_INSN + && !any_condjump_p (BB_END (bb)) + && bb->succ->crossing_edge ) + REG_NOTES (BB_END (bb)) = gen_rtx_EXPR_LIST + (REG_CROSSING_JUMP, NULL_RTX, REG_NOTES (BB_END (bb))); + } } } @@ -803,6 +838,8 @@ fixup_reorder_chain (void) bb->index = index; BASIC_BLOCK (index) = bb; + update_unlikely_executed_notes (bb); + bb->prev_bb = prev_bb; prev_bb->next_bb = bb; } @@ -820,6 +857,21 @@ fixup_reorder_chain (void) } } +/* Update the basic block number information in any + NOTE_INSN_UNLIKELY_EXECUTED_CODE notes within the basic block. */ + +static void +update_unlikely_executed_notes (basic_block bb) +{ + rtx cur_insn; + + for (cur_insn = BB_HEAD (bb); cur_insn != BB_END (bb); + cur_insn = NEXT_INSN (cur_insn)) + if (GET_CODE (cur_insn) == NOTE + && NOTE_LINE_NUMBER (cur_insn) == NOTE_INSN_UNLIKELY_EXECUTED_CODE) + NOTE_BASIC_BLOCK (cur_insn) = bb; +} + /* Perform sanity checks on the insn chain. 1. Check that next/prev pointers are consistent in both the forward and reverse direction. @@ -990,6 +1042,7 @@ duplicate_insn_chain (rtx from, rtx to) abort (); break; case NOTE_INSN_REPEATED_LINE_NUMBER: + case NOTE_INSN_UNLIKELY_EXECUTED_CODE: emit_note_copy (insn); break; diff --git a/gcc/cfglayout.h b/gcc/cfglayout.h index ca79e26c433..0361dc68e05 100644 --- a/gcc/cfglayout.h +++ b/gcc/cfglayout.h @@ -45,3 +45,4 @@ extern bool can_copy_bbs_p (basic_block *, unsigned); extern void copy_bbs (basic_block *, unsigned, basic_block *, edge *, unsigned, edge *, struct loop *); extern void cfg_layout_initialize_rbi (basic_block); +extern bool scan_ahead_for_unlikely_executed_note (rtx); diff --git a/gcc/cfgrtl.c b/gcc/cfgrtl.c index c292c7ad974..2e09ba2055d 100644 --- a/gcc/cfgrtl.c +++ b/gcc/cfgrtl.c @@ -99,6 +99,7 @@ can_delete_note_p (rtx note) { return (NOTE_LINE_NUMBER (note) == NOTE_INSN_DELETED || NOTE_LINE_NUMBER (note) == NOTE_INSN_BASIC_BLOCK + || NOTE_LINE_NUMBER (note) == NOTE_INSN_UNLIKELY_EXECUTED_CODE || NOTE_LINE_NUMBER (note) == NOTE_INSN_PREDICTION); } @@ -319,6 +320,7 @@ create_basic_block_structure (rtx head, rtx end, rtx bb_note, basic_block after) link_block (bb, after); BASIC_BLOCK (bb->index) = bb; update_bb_for_insn (bb); + bb->partition = UNPARTITIONED; /* Tag the block so that we know it has been used when considering other basic block notes. */ @@ -620,11 +622,24 @@ rtl_merge_blocks (basic_block a, basic_block b) static bool rtl_can_merge_blocks (basic_block a,basic_block b) { + bool partitions_ok = true; + + /* If we are partitioning hot/cold basic blocks, we don't want to + mess up unconditional or indirect jumps that cross between hot + and cold sections. */ + + if (flag_reorder_blocks_and_partition + && (find_reg_note (BB_END (a), REG_CROSSING_JUMP, NULL_RTX) + || find_reg_note (BB_END (b), REG_CROSSING_JUMP, NULL_RTX) + || a->partition != b->partition)) + partitions_ok = false; + /* There must be exactly one edge in between the blocks. */ return (a->succ && !a->succ->succ_next && a->succ->dest == b && !b->pred->pred_next && a != b /* Must be simple edge. */ && !(a->succ->flags & EDGE_COMPLEX) + && partitions_ok && a->next_bb == b && a != ENTRY_BLOCK_PTR && b != EXIT_BLOCK_PTR /* If the jump insn has side effects, @@ -665,6 +680,15 @@ try_redirect_by_replacing_jump (edge e, basic_block target, bool in_cfglayout) rtx set; int fallthru = 0; + + /* If we are partitioning hot/cold basic blocks, we don't want to + mess up unconditional or indirect jumps that cross between hot + and cold sections. */ + + if (flag_reorder_blocks_and_partition + && find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)) + return false; + /* Verify that all targets will be TARGET. */ for (tmp = src->succ; tmp; tmp = tmp->succ_next) if (tmp->dest != target && tmp != e) @@ -1066,6 +1090,33 @@ force_nonfallthru_and_redirect (edge e, basic_block target) target->global_live_at_start); } + /* Make sure new block ends up in correct hot/cold section. */ + + jump_block->partition = e->src->partition; + if (flag_reorder_blocks_and_partition) + { + if (e->src->partition == COLD_PARTITION) + { + rtx bb_note, new_note; + for (bb_note = BB_HEAD (jump_block); + bb_note && bb_note != NEXT_INSN (BB_END (jump_block)); + bb_note = NEXT_INSN (bb_note)) + if (GET_CODE (bb_note) == NOTE + && NOTE_LINE_NUMBER (bb_note) == NOTE_INSN_BASIC_BLOCK) + break; + new_note = emit_note_after (NOTE_INSN_UNLIKELY_EXECUTED_CODE, + bb_note); + NOTE_BASIC_BLOCK (new_note) = jump_block; + jump_block->partition = COLD_PARTITION; + } + if (GET_CODE (BB_END (jump_block)) == JUMP_INSN + && !any_condjump_p (BB_END (jump_block)) + && jump_block->succ->crossing_edge ) + REG_NOTES (BB_END (jump_block)) = gen_rtx_EXPR_LIST + (REG_CROSSING_JUMP, NULL_RTX, + REG_NOTES (BB_END (jump_block))); + } + /* Wire edge in. */ new_edge = make_edge (e->src, jump_block, EDGE_FALLTHRU); new_edge->probability = e->probability; @@ -1480,6 +1531,10 @@ commit_one_edge_insertion (edge e, int watch_calls) tmp = NEXT_INSN (tmp); if (NOTE_INSN_BASIC_BLOCK_P (tmp)) tmp = NEXT_INSN (tmp); + if (tmp + && GET_CODE (tmp) == NOTE + && NOTE_LINE_NUMBER (tmp) == NOTE_INSN_UNLIKELY_EXECUTED_CODE) + tmp = NEXT_INSN (tmp); if (tmp == BB_HEAD (bb)) before = tmp; else if (tmp) @@ -1522,6 +1577,38 @@ commit_one_edge_insertion (edge e, int watch_calls) { bb = split_edge (e); after = BB_END (bb); + + /* If we are partitioning hot/cold basic blocks, we must make sure + that the new basic block ends up in the correct section. */ + + bb->partition = e->src->partition; + if (flag_reorder_blocks_and_partition + && e->src != ENTRY_BLOCK_PTR + && e->src->partition == COLD_PARTITION) + { + rtx bb_note, new_note, cur_insn; + + bb_note = NULL_RTX; + for (cur_insn = BB_HEAD (bb); cur_insn != NEXT_INSN (BB_END (bb)); + cur_insn = NEXT_INSN (cur_insn)) + if (GET_CODE (cur_insn) == NOTE + && NOTE_LINE_NUMBER (cur_insn) == NOTE_INSN_BASIC_BLOCK) + { + bb_note = cur_insn; + break; + } + + new_note = emit_note_after (NOTE_INSN_UNLIKELY_EXECUTED_CODE, + bb_note); + NOTE_BASIC_BLOCK (new_note) = bb; + if (GET_CODE (BB_END (bb)) == JUMP_INSN + && !any_condjump_p (BB_END (bb)) + && bb->succ->crossing_edge ) + REG_NOTES (BB_END (bb)) = gen_rtx_EXPR_LIST + (REG_CROSSING_JUMP, NULL_RTX, REG_NOTES (BB_END (bb))); + if (after == bb_note) + after = new_note; + } } } @@ -1791,6 +1878,7 @@ update_br_prob_note (basic_block bb) - tails of basic blocks (ensure that boundary is necessary) - scans body of the basic block for JUMP_INSN, CODE_LABEL and NOTE_INSN_BASIC_BLOCK + - verify that no fall_thru edge crosses hot/cold partition boundaries In future it can be extended check a lot of other stuff as well (reachability of basic blocks, life information, etc. etc.). */ @@ -1878,7 +1966,15 @@ rtl_verify_flow_info_1 (void) for (e = bb->succ; e; e = e->succ_next) { if (e->flags & EDGE_FALLTHRU) - n_fallthru++, fallthru = e; + { + n_fallthru++, fallthru = e; + if (e->crossing_edge) + { + error ("Fallthru edge crosses section boundary (bb %i)", + e->src->index); + err = 1; + } + } if ((e->flags & ~(EDGE_DFS_BACK | EDGE_CAN_FALLTHRU @@ -2553,11 +2649,24 @@ cfg_layout_delete_block (basic_block bb) static bool cfg_layout_can_merge_blocks_p (basic_block a, basic_block b) { + bool partitions_ok = true; + + /* If we are partitioning hot/cold basic blocks, we don't want to + mess up unconditional or indirect jumps that cross between hot + and cold sections. */ + + if (flag_reorder_blocks_and_partition + && (find_reg_note (BB_END (a), REG_CROSSING_JUMP, NULL_RTX) + || find_reg_note (BB_END (b), REG_CROSSING_JUMP, NULL_RTX) + || a->partition != b->partition)) + partitions_ok = false; + /* There must be exactly one edge in between the blocks. */ return (a->succ && !a->succ->succ_next && a->succ->dest == b && !b->pred->pred_next && a != b /* Must be simple edge. */ && !(a->succ->flags & EDGE_COMPLEX) + && partitions_ok && a != ENTRY_BLOCK_PTR && b != EXIT_BLOCK_PTR /* If the jump insn has side effects, we can't kill the edge. */ diff --git a/gcc/common.opt b/gcc/common.opt index f1a045316e4..d7bed08d4ae 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -569,6 +569,10 @@ freorder-blocks Common Reorder basic blocks to improve code placement +freorder-blocks-and-partition +Common +Reorder basic blocks and partition into hot and cold sections + freorder-functions Common Reorder functions to improve code placement diff --git a/gcc/config/darwin.c b/gcc/config/darwin.c index fd4ff3234ee..a626045a9fc 100644 --- a/gcc/config/darwin.c +++ b/gcc/config/darwin.c @@ -1304,7 +1304,10 @@ darwin_globalize_label (FILE *stream, const char *name) void darwin_asm_named_section (const char *name, unsigned int flags ATTRIBUTE_UNUSED) { - fprintf (asm_out_file, ".section %s\n", name); + if (flag_reorder_blocks_and_partition) + fprintf (asm_out_file, SECTION_FORMAT_STRING, name); + else + fprintf (asm_out_file, ".section %s\n", name); } unsigned int diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index b537435f1cc..846b6e9dbae 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -207,6 +207,9 @@ extern int target_flags; #endif #endif +#define HAS_LONG_COND_BRANCH 1 +#define HAS_LONG_UNCOND_BRANCH 1 + /* Avoid adding %gs:0 in TLS references; use %gs:address directly. */ #define TARGET_TLS_DIRECT_SEG_REFS (target_flags & MASK_TLS_DIRECT_SEG_REFS) diff --git a/gcc/config/rs6000/darwin.h b/gcc/config/rs6000/darwin.h index 6f193f739c3..993126f8489 100644 --- a/gcc/config/rs6000/darwin.h +++ b/gcc/config/rs6000/darwin.h @@ -145,8 +145,10 @@ do { \ /* These are used by -fbranch-probabilities */ #define HOT_TEXT_SECTION_NAME "__TEXT,__text,regular,pure_instructions" +#define NORMAL_TEXT_SECTION_NAME "__TEXT,__text,regular,pure_instructions" #define UNLIKELY_EXECUTED_TEXT_SECTION_NAME \ - "__TEXT,__text2,regular,pure_instructions" + "__TEXT,__unlikely,regular,pure_instructions" +#define SECTION_FORMAT_STRING ".section %s\n\t.align 2\n" /* Define cutoff for using external functions to save floating point. Currently on Darwin, always use inline stores. */ diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index b140dd622fa..49132641729 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -9702,6 +9702,7 @@ rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p) if (TARGET_RELOCATABLE && !in_toc_section () && !in_text_section () + && !in_text_unlikely_section () && !recurse && GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE diff --git a/gcc/config/rs6000/sysv4.h b/gcc/config/rs6000/sysv4.h index 1e0ac3707f6..39497e9736b 100644 --- a/gcc/config/rs6000/sysv4.h +++ b/gcc/config/rs6000/sysv4.h @@ -434,6 +434,11 @@ do { \ #define BSS_SECTION_ASM_OP "\t.section\t\".bss\"" +#define HOT_TEXT_SECTION_NAME ".text" +#define NORMAL_TEXT_SECTION_NAME ".text" +#define UNLIKELY_EXECUTED_TEXT_SECTION_NAME ".text.unlikely" +#define SECTION_FORMAT_STRING ".section\t\"%s\"\n\t.align 2\n" + /* Override elfos.h definition. */ #undef INIT_SECTION_ASM_OP #define INIT_SECTION_ASM_OP "\t.section\t\".init\",\"ax\"" diff --git a/gcc/dbxout.c b/gcc/dbxout.c index 1118a182690..a93946d2ec7 100644 --- a/gcc/dbxout.c +++ b/gcc/dbxout.c @@ -448,6 +448,9 @@ static void dbxout_function_end (void) { char lscope_label_name[100]; + + function_section (current_function_decl); + /* Convert Ltext into the appropriate format for local labels in case the system doesn't insert underscores in front of user generated labels. */ @@ -728,7 +731,10 @@ dbxout_source_file (FILE *file, const char *filename) && DECL_SECTION_NAME (current_function_decl) != NULL_TREE) ; /* Don't change section amid function. */ else - text_section (); + { + if (!in_text_section () && !in_unlikely_text_section ()) + text_section (); + } targetm.asm_out.internal_label (file, "Ltext", source_label_number); source_label_number++; lastfile = filename; diff --git a/gcc/defaults.h b/gcc/defaults.h index 9f261d0e0de..efd28af753a 100644 --- a/gcc/defaults.h +++ b/gcc/defaults.h @@ -623,10 +623,26 @@ You Lose! You must define PREFERRED_DEBUGGING_TYPE! #define HOT_TEXT_SECTION_NAME "text.hot" #endif +#ifndef NORMAL_TEXT_SECTION_NAME +#define NORMAL_TEXT_SECTION_NAME ".text" +#endif + #ifndef UNLIKELY_EXECUTED_TEXT_SECTION_NAME #define UNLIKELY_EXECUTED_TEXT_SECTION_NAME "text.unlikely" #endif +#ifndef SECTION_FORMAT_STRING +#define SECTION_FORMAT_STRING "\t.section\t%s\n\t.align 2\n" +#endif + +#ifndef HAS_LONG_COND_BRANCH +#define HAS_LONG_COND_BRANCH 0 +#endif + +#ifndef HAS_LONG_UNCOND_BRANCH +#define HAS_LONG_UNCOND_BRANCH 0 +#endif + #ifndef VECTOR_MODE_SUPPORTED_P #define VECTOR_MODE_SUPPORTED_P(MODE) 0 #endif diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 2812bf97c8a..83845f0ea40 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -284,7 +284,7 @@ in the following sections. -foptimize-sibling-calls -fprefetch-loop-arrays @gol -fprofile-generate -fprofile-use @gol -freduce-all-givs -fregmove -frename-registers @gol --freorder-blocks -freorder-functions @gol +-freorder-blocks -freorder-blocks-and-partition -freorder-functions @gol -frerun-cse-after-loop -frerun-loop-opt @gol -frounding-math -fschedule-insns -fschedule-insns2 @gol -fno-sched-interblock -fno-sched-spec -fsched-spec-load @gol @@ -3680,7 +3680,7 @@ optimizations designed to reduce code size. @option{-Os} disables the following optimization flags: @gccoptlist{-falign-functions -falign-jumps -falign-loops @gol --falign-labels -freorder-blocks -fprefetch-loop-arrays} +-falign-labels -freorder-blocks -freorder-blocks-and-partition -fprefetch-loop-arrays} If you use multiple @option{-O} options, with or without level numbers, the last such option is the one that is effective. @@ -4206,6 +4206,13 @@ taken branches and improve code locality. Enabled at levels @option{-O2}, @option{-O3}. +@item -freorder-blocks-and-partition +@opindex freorder-blocks-and-partition +In addition to reordering basic blocks in the compiled function, in order +to reduce number of taken branches, partitions hot and cold basic blocks +into separate sections of the assembly and .o files, to improve +paging and cache locality performance. + @item -freorder-functions @opindex freorder-functions Reorder basic blocks in the compiled function in order to reduce number of diff --git a/gcc/doc/rtl.texi b/gcc/doc/rtl.texi index ecc18521294..746bca587bd 100644 --- a/gcc/doc/rtl.texi +++ b/gcc/doc/rtl.texi @@ -3288,6 +3288,16 @@ This insn uses @var{op}, a @code{code_label} or a @code{note} of type be held in a register. The presence of this note allows jump optimization to be aware that @var{op} is, in fact, being used, and flow optimization to build an accurate flow graph. + +@findex REG_CROSSING_JUMP +@item REG_CROSSING_JUMP +This insn is an branching instruction (either an unconditional jump or +an indirect jump) which crosses between hot and cold sections, which +could potentially be very far apart in the executable. The presence +of this note indicates to other optimizations that this this branching +instruction should not be ``collapsed'' into a simpler branching +construct. It is used when the optimization to partition basic blocks +into hot and cold sections is turned on. @end table The following notes describe attributes of outputs of an insn: diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 230ee524643..1d413e0dad5 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -5802,6 +5802,13 @@ frequently executed functions of the program. If not defined, GCC will provide a default definition if the target supports named sections. @end defmac +@defmac NORMAL_TEXT_SECTION_NAME +If defined, a C string constant for the name of the section containing code +that does not go into the ``unlikely executed text'' partition. This is used +as part of the optimization that partitions hot and cold basic blocks into +separate sections in the .o and executable files. +@end defmac + @defmac UNLIKELY_EXECUTED_TEXT_SECTION_NAME If defined, a C string constant for the name of the section containing unlikely executed functions in the program. @@ -5853,6 +5860,12 @@ finalization code. If not defined, GCC will assume such a section does not exist. @end defmac +@defmac SECTION_FORMAT_STRING +If defined, format string used by fprintf to write out the +text section headers for the hot and cold sections of the +assembly file, when hot and cold partitioning is being performed. +@end defmac + @defmac CRT_CALL_STATIC_FUNCTION (@var{section_op}, @var{function}) If defined, an ASM statement that switches to a different section via @var{section_op}, calls @var{function}, and switches back to @@ -8454,6 +8467,24 @@ For each predicate function named in @code{PREDICATE_CODES}, a declaration will be generated in @file{insn-codes.h}. @end defmac +@defmac HAS_LONG_COND_BRANCH +Define this boolean macro to indicate whether or not your architecture +has conditional branches that can span all of memory. It is used in +conjunction with an optimization that partitions hot and cold basic +blocks into separate sections of the executable. If this macro is +set to false, gcc will convert any conditional branches that attempt +to cross between sections into unconditional branches or indirect jumps. +@end defmac + +@defmac HAS_LONG_UNCOND_BRANCH +Define this boolean macro to indicate whether or not your architecture +has unconditional branches that can span all of memory. It is used in +conjunction with an optimization that partitions hot and cold basic +blocks into separate sections of the executable. If this macro is +set to false, gcc will convert any unconditional branches that attempt +to cross between sections into indirect jumps. +@end defmac + @defmac SPECIAL_MODE_PREDICATES Define this if you have special predicates that know special things about modes. Genrecog will warn about certain forms of diff --git a/gcc/final.c b/gcc/final.c index b53e9a8e931..9aa4657b068 100644 --- a/gcc/final.c +++ b/gcc/final.c @@ -1623,6 +1623,35 @@ output_alternate_entry_point (FILE *file, rtx insn) } } +/* Return boolean indicating if there is a NOTE_INSN_UNLIKELY_EXECUTED_CODE + note in the instruction chain (going forward) between the current + instruction, and the next 'executable' instruction. */ + +bool +scan_ahead_for_unlikely_executed_note (rtx insn) +{ + rtx temp; + int bb_note_count = 0; + + for (temp = insn; temp; temp = NEXT_INSN (temp)) + { + if (GET_CODE (temp) == NOTE + && NOTE_LINE_NUMBER (temp) == NOTE_INSN_UNLIKELY_EXECUTED_CODE) + return true; + if (GET_CODE (temp) == NOTE + && NOTE_LINE_NUMBER (temp) == NOTE_INSN_BASIC_BLOCK) + { + bb_note_count++; + if (bb_note_count > 1) + return false; + } + if (INSN_P (temp)) + return false; + } + + return false; +} + /* The final scan for one insn, INSN. Args are same as in `final', except that INSN is the insn being scanned. @@ -1672,7 +1701,31 @@ final_scan_insn (rtx insn, FILE *file, int optimize ATTRIBUTE_UNUSED, case NOTE_INSN_EXPECTED_VALUE: break; + case NOTE_INSN_UNLIKELY_EXECUTED_CODE: + + /* The presence of this note indicates that this basic block + belongs in the "cold" section of the .o file. If we are + not already writing to the cold section we need to change + to it. */ + + unlikely_text_section (); + break; + case NOTE_INSN_BASIC_BLOCK: + + /* If we are performing the optimization that paritions + basic blocks into hot & cold sections of the .o file, + then at the start of each new basic block, before + beginning to write code for the basic block, we need to + check to see whether the basic block belongs in the hot + or cold section of the .o file, and change the section we + are writing to appropriately. */ + + if (flag_reorder_blocks_and_partition + && in_unlikely_text_section() + && !scan_ahead_for_unlikely_executed_note (insn)) + text_section (); + #ifdef IA64_UNWIND_INFO IA64_UNWIND_EMIT (asm_out_file, insn); #endif @@ -1859,6 +1912,27 @@ final_scan_insn (rtx insn, FILE *file, int optimize ATTRIBUTE_UNUSED, if (LABEL_NAME (insn)) (*debug_hooks->label) (insn); + /* If we are doing the optimization that partitions hot & cold + basic blocks into separate sections of the .o file, we need + to ensure the jump table ends up in the correct section... */ + + if (flag_reorder_blocks_and_partition) + { + rtx tmp_table, tmp_label; + if (GET_CODE (insn) == CODE_LABEL + && tablejump_p (NEXT_INSN (insn), &tmp_label, &tmp_table)) + { + /* Do nothing; Do NOT change the current section. */ + } + else if (scan_ahead_for_unlikely_executed_note (insn)) + unlikely_text_section (); + else + { + if (in_unlikely_text_section ()) + text_section (); + } + } + if (app_on) { fputs (ASM_APP_OFF, file); diff --git a/gcc/flags.h b/gcc/flags.h index 8a70fc31b78..472df5426d6 100644 --- a/gcc/flags.h +++ b/gcc/flags.h @@ -210,6 +210,11 @@ extern int flag_branch_probabilities; extern int flag_reorder_blocks; +/* Nonzero if basic blocks should be partitioned into hot and cold + sections of the .o file, in addition to being reordered. */ + +extern int flag_reorder_blocks_and_partition; + /* Nonzero if functions should be reordered. */ extern int flag_reorder_functions; diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c index 651f2a691a5..f0802372f7b 100644 --- a/gcc/ifcvt.c +++ b/gcc/ifcvt.c @@ -2851,6 +2851,18 @@ find_if_case_1 (basic_block test_bb, edge then_edge, edge else_edge) edge then_succ = then_bb->succ; int then_bb_index; + /* If we are partitioning hot/cold basic blocks, we don't want to + mess up unconditional or indirect jumps that cross between hot + and cold sections. */ + + if (flag_reorder_blocks_and_partition + && ((BB_END (then_bb) + && find_reg_note (BB_END (then_bb), REG_CROSSING_JUMP, NULL_RTX)) + || (BB_END (else_bb) + && find_reg_note (BB_END (else_bb), REG_CROSSING_JUMP, + NULL_RTX)))) + return FALSE; + /* THEN has one successor. */ if (!then_succ || then_succ->succ_next != NULL) return FALSE; @@ -2919,6 +2931,18 @@ find_if_case_2 (basic_block test_bb, edge then_edge, edge else_edge) edge else_succ = else_bb->succ; rtx note; + /* If we are partitioning hot/cold basic blocks, we don't want to + mess up unconditional or indirect jumps that cross between hot + and cold sections. */ + + if (flag_reorder_blocks_and_partition + && ((BB_END (then_bb) + && find_reg_note (BB_END (then_bb), REG_CROSSING_JUMP, NULL_RTX)) + || (BB_END (else_bb) + && find_reg_note (BB_END (else_bb), REG_CROSSING_JUMP, + NULL_RTX)))) + return FALSE; + /* ELSE has one successor. */ if (!else_succ || else_succ->succ_next != NULL) return FALSE; @@ -3263,7 +3287,8 @@ if_convert (int x_life_data_ok) num_true_changes = 0; life_data_ok = (x_life_data_ok != 0); - if (! targetm.cannot_modify_jumps_p ()) + if ((! targetm.cannot_modify_jumps_p ()) + && (!flag_reorder_blocks_and_partition || !no_new_pseudos)) mark_loop_exit_edges (); /* Free up basic_block_for_insn so that we don't have to keep it diff --git a/gcc/opts.c b/gcc/opts.c index b5cb3d99c0a..a2df41a489a 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -592,6 +592,7 @@ decode_options (unsigned int argc, const char **argv) or less automatically remove extra jumps, but would also try to use more short jumps instead of long jumps. */ flag_reorder_blocks = 0; + flag_reorder_blocks_and_partition = 0; } if (optimize_size) @@ -657,6 +658,19 @@ decode_options (unsigned int argc, const char **argv) if (flag_really_no_inline == 2) flag_really_no_inline = flag_no_inline; + + /* The optimization to partition hot and cold basic blocks into separate + sections of the .o and executable files does not work (currently) + with exception handling. If flag_exceptions is turned on we need to + turn off the partitioning optimization. */ + + if (flag_exceptions && flag_reorder_blocks_and_partition) + { + warning + ("-freorder-blocks-and-partition does not work with exceptions"); + flag_reorder_blocks_and_partition = 0; + flag_reorder_blocks = 1; + } } /* Handle target- and language-independent options. Return zero to @@ -1268,6 +1282,10 @@ common_handle_option (size_t scode, const char *arg, flag_reorder_blocks = value; break; + case OPT_freorder_blocks_and_partition: + flag_reorder_blocks_and_partition = value; + break; + case OPT_freorder_functions: flag_reorder_functions = value; break; diff --git a/gcc/output.h b/gcc/output.h index 36dab8cca82..9040327879c 100644 --- a/gcc/output.h +++ b/gcc/output.h @@ -162,6 +162,9 @@ extern void check_function_return_warnings (void); /* Tell assembler to switch to text section. */ extern void text_section (void); +/* Tell assembler to switch to unlikely-to-be-executed text section. */ +extern void unlikely_text_section (void); + /* Tell assembler to switch to data section. */ extern void data_section (void); @@ -172,6 +175,9 @@ extern void readonly_data_section (void); /* Determine if we're in the text section. */ extern int in_text_section (void); +/* Determine if we're in the unlikely-to-be-executed text section. */ +extern int in_unlikely_text_section (void); + #ifdef CTORS_SECTION_ASM_OP extern void ctors_section (void); #endif diff --git a/gcc/passes.c b/gcc/passes.c index d0ab392e36c..2453c748dc3 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -522,7 +522,7 @@ rest_of_handle_stack_regs (tree decl, rtx insns) { if (cleanup_cfg (CLEANUP_EXPENSIVE | CLEANUP_POST_REGSTACK | (flag_crossjumping ? CLEANUP_CROSSJUMP : 0)) - && flag_reorder_blocks) + && (flag_reorder_blocks || flag_reorder_blocks_and_partition)) { reorder_basic_blocks (); cleanup_cfg (CLEANUP_EXPENSIVE | CLEANUP_POST_REGSTACK); @@ -718,9 +718,9 @@ rest_of_handle_reorder_blocks (tree decl, rtx insns) if (flag_sched2_use_traces && flag_schedule_insns_after_reload) tracer (); - if (flag_reorder_blocks) + if (flag_reorder_blocks || flag_reorder_blocks_and_partition) reorder_basic_blocks (); - if (flag_reorder_blocks + if (flag_reorder_blocks || flag_reorder_blocks_and_partition || (flag_sched2_use_traces && flag_schedule_insns_after_reload)) changed |= cleanup_cfg (CLEANUP_EXPENSIVE | (!HAVE_conditional_execution @@ -1806,6 +1806,20 @@ rest_of_compilation (tree decl) if (flag_if_conversion) rest_of_handle_if_after_combine (decl, insns); + /* The optimization to partition hot/cold basic blocks into separate + sections of the .o file does not work well with exception handling. + Don't call it if there are exceptions. */ + + if (flag_reorder_blocks_and_partition && !flag_exceptions) + { + no_new_pseudos = 0; + partition_hot_cold_basic_blocks (); + allocate_reg_life_data (); + update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, + PROP_LOG_LINKS | PROP_REG_INFO | PROP_DEATH_NOTES); + no_new_pseudos = 1; + } + if (optimize > 0 && (flag_regmove || flag_expensive_optimizations)) rest_of_handle_regmove (decl, insns); diff --git a/gcc/print-rtl.c b/gcc/print-rtl.c index f1496c2463b..6562afcc524 100644 --- a/gcc/print-rtl.c +++ b/gcc/print-rtl.c @@ -291,6 +291,14 @@ print_rtx (rtx in_rtx) fprintf (outfile, " [ ERROR ]"); break; + case NOTE_INSN_UNLIKELY_EXECUTED_CODE: + { + basic_block bb = NOTE_BASIC_BLOCK (in_rtx); + if (bb != 0) + fprintf (outfile, " [bb %d]", bb->index); + break; + } + case NOTE_INSN_VAR_LOCATION: fprintf (outfile, " ("); print_mem_expr (outfile, NOTE_VAR_LOCATION_DECL (in_rtx)); diff --git a/gcc/rtl.c b/gcc/rtl.c index 9f545d83623..59d77999c2b 100644 --- a/gcc/rtl.c +++ b/gcc/rtl.c @@ -122,7 +122,9 @@ const char * const note_insn_name[NOTE_INSN_MAX - NOTE_INSN_BIAS] = "NOTE_INSN_EH_REGION_BEG", "NOTE_INSN_EH_REGION_END", "NOTE_INSN_REPEATED_LINE_NUMBER", "NOTE_INSN_BASIC_BLOCK", "NOTE_INSN_EXPECTED_VALUE", - "NOTE_INSN_PREDICTION", "NOTE_INSN_VAR_LOCATION" + "NOTE_INSN_PREDICTION", + "NOTE_INSN_UNLIKELY_EXECUTED_CODE", + "NOTE_INSN_VAR_LOCATION" }; const char * const reg_note_name[] = @@ -134,7 +136,7 @@ const char * const reg_note_name[] = "REG_VALUE_PROFILE", "REG_NOALIAS", "REG_SAVE_AREA", "REG_BR_PRED", "REG_FRAME_RELATED_EXPR", "REG_EH_CONTEXT", "REG_EH_REGION", "REG_SAVE_NOTE", "REG_MAYBE_DEAD", "REG_NORETURN", - "REG_NON_LOCAL_GOTO", "REG_SETJMP", "REG_ALWAYS_RETURN", + "REG_NON_LOCAL_GOTO", "REG_CROSSING_JUMP", "REG_SETJMP", "REG_ALWAYS_RETURN", "REG_VTABLE_REF" }; diff --git a/gcc/rtl.h b/gcc/rtl.h index d17325c2b9d..f6ebe34caa4 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -857,6 +857,11 @@ enum reg_note computed goto. */ REG_NON_LOCAL_GOTO, + /* Indicates that a jump crosses between hot and cold sections + in a (partitioned) assembly or .o file, and therefore should not be + reduced to a simpler jump by optimizations. */ + REG_CROSSING_JUMP, + /* This kind of note is generated at each to `setjmp', and similar functions that can return twice. */ REG_SETJMP, @@ -1018,6 +1023,10 @@ enum insn_note /* Record a prediction. Uses NOTE_PREDICTION. */ NOTE_INSN_PREDICTION, + /* Record that the current basic block is unlikely to be executed and + should be moved to the UNLIKELY_EXECUTED_TEXT_SECTION. */ + NOTE_INSN_UNLIKELY_EXECUTED_CODE, + /* The location of a variable. */ NOTE_INSN_VAR_LOCATION, diff --git a/gcc/toplev.c b/gcc/toplev.c index fcfb99d2e66..39b2ddf1f6f 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -249,6 +249,11 @@ int flag_branch_probabilities = 0; int flag_reorder_blocks = 0; +/* Nonzero if blocks should be partitioned into hot and cold sections in + addition to being reordered. */ + +int flag_reorder_blocks_and_partition = 0; + /* Nonzero if functions should be reordered. */ int flag_reorder_functions = 0; @@ -955,6 +960,7 @@ static const lang_independent_options f_options[] = {"branch-probabilities", &flag_branch_probabilities, 1 }, {"profile", &profile_flag, 1 }, {"reorder-blocks", &flag_reorder_blocks, 1 }, + {"reorder-blocks-and-partition", &flag_reorder_blocks_and_partition, 1}, {"reorder-functions", &flag_reorder_functions, 1 }, {"rename-registers", &flag_rename_registers, 1 }, {"cprop-registers", &flag_cprop_registers, 1 }, diff --git a/gcc/varasm.c b/gcc/varasm.c index 707ad4bbe65..c35c9b7773e 100644 --- a/gcc/varasm.c +++ b/gcc/varasm.c @@ -49,6 +49,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "debug.h" #include "target.h" #include "cgraph.h" +#include "cfglayout.h" #ifdef XCOFF_DEBUGGING_INFO #include "xcoffout.h" /* Needed for external data @@ -100,6 +101,14 @@ int size_directive_output; tree last_assemble_variable_decl; +/* The following global variable indicates if the section label for the + "cold" section of code has been output yet to the assembler. The + label is useful when running gdb. This is part of the optimization that + partitions hot and cold basic blocks into separate sections of the .o + file. */ + +bool unlikely_section_label_printed = false; + /* RTX_UNCHANGING_P in a MEM can mean it is stored into, for initialization. So giving constant the alias set for the type will allow such initializations to appear to conflict with the load of the constant. We @@ -145,7 +154,8 @@ static bool asm_emit_uninitialised (tree, const char*, unsigned HOST_WIDE_INT); static void mark_weak (tree); -enum in_section { no_section, in_text, in_data, in_named +enum in_section { no_section, in_text, in_unlikely_executed_text, in_data, + in_named #ifdef BSS_SECTION_ASM_OP , in_bss #endif @@ -198,7 +208,34 @@ text_section (void) if (in_section != in_text) { in_section = in_text; - fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP); + fprintf (asm_out_file, SECTION_FORMAT_STRING, NORMAL_TEXT_SECTION_NAME); + } +} + +/* Tell assembler to switch to unlikely-to-be-executed text section. */ + +void +unlikely_text_section (void) +{ + if ((in_section != in_unlikely_executed_text) + && (in_section != in_named + || strcmp (in_named_name, UNLIKELY_EXECUTED_TEXT_SECTION_NAME) != 0)) + { +#ifdef TARGET_ASM_NAMED_SECTION + + named_section (NULL_TREE, UNLIKELY_EXECUTED_TEXT_SECTION_NAME, 0); + +#else + in_section = in_unlikely_executed_text; + fprintf (asm_out_file, SECTION_FORMAT_STRING, + UNLIKELY_EXECUTED_TEXT_SECTION_NAME); +#endif /* ifdef TARGET_ASM_NAMED_SECTION */ + if (!unlikely_section_label_printed) + { + fprintf (asm_out_file, "__%s_unlikely_section:\n", + current_function_name ()); + unlikely_section_label_printed = true; + } } } @@ -244,6 +281,14 @@ in_text_section (void) return in_section == in_text; } +/* Determine if we're in the unlikely-to-be-executed text section. */ + +int +in_unlikely_text_section (void) +{ + return in_section == in_unlikely_executed_text; +} + /* Determine if we're in the data section. */ int @@ -483,11 +528,16 @@ asm_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED, void function_section (tree decl) { - if (decl != NULL_TREE - && DECL_SECTION_NAME (decl) != NULL_TREE) - named_section (decl, (char *) 0, 0); + if (scan_ahead_for_unlikely_executed_note (get_insns())) + unlikely_text_section (); else - text_section (); + { + if (decl != NULL_TREE + && DECL_SECTION_NAME (decl) != NULL_TREE) + named_section (decl, (char *) 0, 0); + else + text_section (); + } } /* Switch to section for variable DECL. RELOC is the same as the @@ -1030,6 +1080,8 @@ assemble_start_function (tree decl, const char *fnname) { int align; + unlikely_section_label_printed = false; + /* The following code does not need preprocessing in the assembler. */ app_disable (); @@ -1117,7 +1169,8 @@ assemble_zeros (unsigned HOST_WIDE_INT size) #ifdef ASM_NO_SKIP_IN_TEXT /* The `space' pseudo in the text section outputs nop insns rather than 0s, so we must output 0s explicitly in the text section. */ - if (ASM_NO_SKIP_IN_TEXT && in_text_section ()) + if ((ASM_NO_SKIP_IN_TEXT && in_text_section ()) + || (ASM_NO_SKIP_IN_TEXT && in_unlikely_text_section ())) { unsigned HOST_WIDE_INT i; for (i = 0; i < size; i++) @@ -1479,7 +1532,7 @@ assemble_variable (tree decl, int top_level ATTRIBUTE_UNUSED, variable_section (decl, reloc); /* dbxout.c needs to know this. */ - if (in_text_section ()) + if (in_text_section () || in_unlikely_text_section ()) DECL_IN_TEXT_SECTION (decl) = 1; /* Output the alignment of this data. */ @@ -4328,6 +4381,8 @@ default_section_type_flags_1 (tree decl, const char *name, int reloc, flags = SECTION_CODE; else if (decl && decl_readonly_section_1 (decl, reloc, shlib)) flags = 0; + else if (strcmp (name, UNLIKELY_EXECUTED_TEXT_SECTION_NAME) == 0) + flags = SECTION_CODE; else flags = SECTION_WRITE;