diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b233e9496e4..edacc43e1a5 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,29 @@ +2003-10-11 Jan Hubicka + + * Makefile.in (web.o): New. + * web.c: New file. + * rtl.h (web_main): Declare. + * timervar.def (TV_WEB): New. + * toplev.c (dump_file_index, dump_file_info): Add DFI_web. + (rest_of_hanle_web): New. + (flag_web): New static variable. + (lang_independent_options): Add "web". + (rest_of_compilation): Call rest_of_handle_web. + * invoke.texi (-fweb): Document. + * common.opt (fweb): New. + * flags.h (flag_web): New. + * opts.c (decode_options): Set flag_web at -O3. + + * passes.texi (web construction): Document. + * invoke.texi (-O3): Document that -fweb is enabled. + + * regrename.c (regrename_optimize): Deal better with situation when + replacement failed. + + * sched-ebb.c: Include params.h and profile.h + (schedule_ebbs): Use tracer parameters to discover superblocks + * Makefile.in (sched-ebb.o): Add dependencies. + 2003-10-11 Roger Sayle * fold-const.c (negate_mathfn_p): New function to determine whether diff --git a/gcc/Makefile.in b/gcc/Makefile.in index aafd1b536f7..3c5cf664f9a 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -847,7 +847,7 @@ OBJS-common = \ sibcall.o simplify-rtx.o sreal.o ssa.o ssa-ccp.o ssa-dce.o stmt.o \ stor-layout.o stringpool.o targhooks.o timevar.o toplev.o tracer.o tree.o tree-dump.o \ unroll.o varasm.o varray.o version.o vmsdbgout.o xcoffout.o \ - alloc-pool.o et-forest.o cfghooks.o bt-load.o pretty-print.o $(GGC) + alloc-pool.o et-forest.o cfghooks.o bt-load.o pretty-print.o $(GGC) web.o OBJS-md = $(out_object_file) OBJS-archive = $(EXTRA_OBJS) $(host_hook_obj) hashtable.o tree-inline.o \ @@ -1622,6 +1622,8 @@ cse.o : cse.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(REGS_H) \ hard-reg-set.h flags.h real.h insn-config.h $(RECOG_H) $(EXPR_H) toplev.h \ output.h function.h $(BASIC_BLOCK_H) $(GGC_H) $(TM_P_H) $(TIMEVAR_H) \ except.h $(TARGET_H) $(PARAMS_H) +web.o : web.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(REGS_H) \ + hard-reg-set.h flags.h $(BASIC_BLOCK_H) function.h output.h toplev.h df.h gcse.o : gcse.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(REGS_H) \ hard-reg-set.h flags.h real.h insn-config.h $(GGC_H) $(RECOG_H) $(EXPR_H) \ $(BASIC_BLOCK_H) function.h output.h toplev.h $(TM_P_H) $(PARAMS_H) except.h gt-gcse.h diff --git a/gcc/common.opt b/gcc/common.opt index ab1a69dcf4b..26af2806a24 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -715,6 +715,10 @@ fverbose-asm Common Add extra commentary to assembler output +fweb +Common +Construct webs and split unrelated uses of single variable + fwrapv Common Assume signed arithmetic overflow wraps around diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 20ef67f7586..67d61c84734 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -3680,13 +3680,14 @@ also turns on the following optimization flags: -fgcse -fgcse-lm -fgcse-sm @gol -fdelete-null-pointer-checks @gol -fexpensive-optimizations @gol --fregmove @gol +-fregmove -@gol -fschedule-insns -fschedule-insns2 @gol -fsched-interblock -fsched-spec @gol -fcaller-saves @gol -fpeephole2 @gol -freorder-blocks -freorder-functions @gol -fstrict-aliasing @gol +-funit-at-a-time @gol -falign-functions -falign-jumps @gol -falign-loops -falign-labels} @@ -3697,7 +3698,7 @@ invoking @option{-O2} on programs that use computed gotos. @opindex O3 Optimize yet more. @option{-O3} turns on all optimizations specified by @option{-O2} and also turns on the @option{-finline-functions}, -@option{-funit-at-a-time} and @option{-frename-registers} options. +@option{-fweb} and @option{-frename-registers} options. @item -O0 @opindex O0 diff --git a/gcc/doc/passes.texi b/gcc/doc/passes.texi index ed32827238a..ad07f60841e 100644 --- a/gcc/doc/passes.texi +++ b/gcc/doc/passes.texi @@ -366,6 +366,18 @@ The option @option{-dG} causes a debugging dump of the RTL code after this pass. This dump file's name is made by appending @samp{.bypass} to the input file name. +@cindex web construction +@item +Simple optimization pass that splits independent uses of each pseudo +increasing effect of other optimizations. This can improve effect of the +other transformation, such as CSE or register allocation. +Its source files are @file{web.c}. + +@opindex dZ +The option @option{-dZ} causes a debugging dump of the RTL code after +this pass. This dump file's name is made by appending @samp{.web} to +the input file name. + @item @opindex frerun-cse-after-loop If @option{-frerun-cse-after-loop} was enabled, a second common diff --git a/gcc/flags.h b/gcc/flags.h index f345b497197..93600fbd15f 100644 --- a/gcc/flags.h +++ b/gcc/flags.h @@ -709,6 +709,8 @@ extern int flag_signaling_nans; extern int flag_unit_at_a_time; +extern int flag_web; + /* A string that's used when a random name is required. NULL means to make it really random. */ diff --git a/gcc/opts.c b/gcc/opts.c index dc489e19b31..9da64d543bb 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -564,6 +564,7 @@ decode_options (unsigned int argc, const char **argv) flag_inline_functions = 1; flag_rename_registers = 1; flag_unswitch_loops = 1; + flag_web = 1; } if (optimize < 2 || optimize_size) diff --git a/gcc/regrename.c b/gcc/regrename.c index c1ff6255316..4e93fab796b 100644 --- a/gcc/regrename.c +++ b/gcc/regrename.c @@ -230,7 +230,7 @@ regrename_optimize (void) CLEAR_HARD_REG_SET (regs_seen); while (all_chains) { - int new_reg, best_new_reg = -1; + int new_reg, best_new_reg; int n_uses; struct du_chain *this = all_chains; struct du_chain *tmp, *last; @@ -240,6 +240,8 @@ regrename_optimize (void) all_chains = this->next_chain; + best_new_reg = reg; + #if 0 /* This just disables optimization opportunities. */ /* Only rename once we've seen the reg more than once. */ if (! TEST_HARD_REG_BIT (regs_seen, reg)) @@ -320,8 +322,7 @@ regrename_optimize (void) break; if (! tmp) { - if (best_new_reg == -1 - || tick[best_new_reg] > tick[new_reg]) + if (tick[best_new_reg] > tick[new_reg]) best_new_reg = new_reg; } } @@ -334,15 +335,16 @@ regrename_optimize (void) fprintf (rtl_dump_file, " crosses a call"); } - if (best_new_reg == -1) + if (best_new_reg == reg) { + tick[reg] = ++this_tick; if (rtl_dump_file) - fprintf (rtl_dump_file, "; no available registers\n"); + fprintf (rtl_dump_file, "; no available better choice\n"); continue; } do_replace (this, best_new_reg); - tick[best_new_reg] = this_tick++; + tick[best_new_reg] = ++this_tick; if (rtl_dump_file) fprintf (rtl_dump_file, ", renamed as %s\n", reg_names[best_new_reg]); diff --git a/gcc/rtl.h b/gcc/rtl.h index fb1922a3b9d..ddbda985e02 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -2055,6 +2055,8 @@ extern rtx remove_death (unsigned int, rtx); extern void dump_combine_stats (FILE *); extern void dump_combine_total_stats (FILE *); #endif +/* In web.c */ +extern void web_main PARAMS ((void)); /* In sched.c. */ #ifdef BUFSIZ diff --git a/gcc/sched-ebb.c b/gcc/sched-ebb.c index 06637d75593..e005f74160a 100644 --- a/gcc/sched-ebb.c +++ b/gcc/sched-ebb.c @@ -39,6 +39,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "toplev.h" #include "recog.h" #include "cfglayout.h" +#include "params.h" #include "sched-int.h" #include "target.h" @@ -561,6 +562,13 @@ void schedule_ebbs (FILE *dump_file) { basic_block bb; + int probability_cutoff; + + if (profile_info && flag_branch_probabilities) + probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY_FEEDBACK); + else + probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY); + probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff; /* Taking care of this degenerate case makes the rest of this code simpler. */ @@ -592,7 +600,7 @@ schedule_ebbs (FILE *dump_file) break; if (! e) break; - if (e->probability < REG_BR_PROB_BASE / 2) + if (e->probability <= probability_cutoff) break; bb = bb->next_bb; } diff --git a/gcc/timevar.def b/gcc/timevar.def index 22d3097cf6a..99a0b09d1e7 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -69,6 +69,7 @@ DEFTIMEVAR (TV_GCSE , "global CSE") DEFTIMEVAR (TV_LOOP , "loop analysis") DEFTIMEVAR (TV_BYPASS , "bypass jumps") DEFTIMEVAR (TV_TRACER , "tracer") +DEFTIMEVAR (TV_WEB , "web") DEFTIMEVAR (TV_CSE2 , "CSE 2") DEFTIMEVAR (TV_BRANCH_PROB , "branch prediction") DEFTIMEVAR (TV_FLOW , "flow analysis") diff --git a/gcc/toplev.c b/gcc/toplev.c index bc96c649879..797a9908360 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -266,6 +266,7 @@ enum dump_file_index DFI_bp, DFI_ce1, DFI_tracer, + DFI_web, DFI_loop2, DFI_cse2, DFI_life, @@ -295,7 +296,7 @@ enum dump_file_index Remaining -d letters: " m q " - " JK O Q V YZ" + " JK O Q V Y " */ static struct dump_file_info dump_file[DFI_MAX] = @@ -319,6 +320,7 @@ static struct dump_file_info dump_file[DFI_MAX] = { "bp", 'b', 1, 0, 0 }, { "ce1", 'C', 1, 0, 0 }, { "tracer", 'T', 1, 0, 0 }, + { "web", 'Z', 0, 0, 0 }, { "loop2", 'L', 1, 0, 0 }, { "cse2", 't', 1, 0, 0 }, { "life", 'f', 1, 0, 0 }, /* Yes, duplicate enable switch. */ @@ -654,6 +656,10 @@ int flag_complex_divide_method = 0; int flag_syntax_only = 0; +/* Nonzero means performs web construction pass. */ + +int flag_web; + /* Nonzero means perform loop optimizer. */ int flag_loop_optimize; @@ -1065,6 +1071,7 @@ static const lang_independent_options f_options[] = {"pcc-struct-return", &flag_pcc_struct_return, 1 }, {"reg-struct-return", &flag_pcc_struct_return, 0 }, {"delayed-branch", &flag_delayed_branch, 1 }, + {"web", &flag_web, 1}, {"gcse", &flag_gcse, 1 }, {"gcse-lm", &flag_gcse_lm, 1 }, {"gcse-sm", &flag_gcse_sm, 1 }, @@ -2427,6 +2434,20 @@ rest_of_handle_if_after_combine (tree decl, rtx insns) timevar_pop (TV_IFCVT); } +static void +rest_of_handle_web (tree decl, rtx insns) +{ + open_dump_file (DFI_web, decl); + timevar_push (TV_WEB); + web_main (); + delete_trivially_dead_insns (insns, max_reg_num ()); + cleanup_cfg (CLEANUP_EXPENSIVE); + + timevar_pop (TV_WEB); + close_dump_file (DFI_web, print_rtl_with_bb, insns); + reg_scan (get_insns (), max_reg_num (), 0); +} + /* Do branch profiling and static profile estimation passes. */ static void rest_of_handle_branch_prob (tree decl, rtx insns) @@ -3313,6 +3334,9 @@ rest_of_compilation (tree decl) rest_of_handle_cfg (decl, insns); + if (flag_web) + rest_of_handle_web (decl, insns); + if (optimize > 0 || profile_arc_flag || flag_test_coverage || flag_branch_probabilities) rest_of_handle_branch_prob (decl, insns); diff --git a/gcc/web.c b/gcc/web.c new file mode 100644 index 00000000000..e200453971a --- /dev/null +++ b/gcc/web.c @@ -0,0 +1,323 @@ +/* Web construction code for GNU compiler. + Contributed by Jan Hubicka + Copyright (C) 2001, 2002 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING. If not, write to the Free +Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. */ + +/* Simple optimization pass that splits indepdendent uses of each pseudo + increasing effectivity of other optimizations. The optimization can + serve as an example of the use of dataflow module. + + We don't split registers with REG_USERVAR set unless -fmessy-debugging is + used, because debug information about such split variables is almost + useless. + + TODO + - Add code to keep debugging up-to-date after splitting of user variable + pseudos. This can be done by remembering all the pseudos used for the + variable and use life analysis information before reload to determing + wich one of the possible choices is alive and in case more are live, + choose one with latest definition. + + Some other optimization passes will benefit from the infrastructure + too. + + - We may use profile information and ignore infrequent use for purposes + of web unifying inserting the compensation code later to implement full + induction variable expansion for loops (currently we expand only if + induction is dead afterwards, that is often the case anyway). */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "toplev.h" + +#include "rtl.h" +#include "hard-reg-set.h" +#include "flags.h" +#include "basic-block.h" +#include "output.h" +#include "df.h" +#include "function.h" + + +/* This entry is allocated for each reference in the insn stream. */ +struct web_entry +{ + /* pointer to the parent in the union/find tree. */ + struct web_entry *pred; + /* Newly assigned register to the entry. Set only for roots. */ + rtx reg; +}; + +static struct web_entry *unionfind_root PARAMS ((struct web_entry *)); +static void unionfind_union PARAMS ((struct web_entry *, + struct web_entry *)); +static void union_defs PARAMS ((struct df *, struct ref *, + struct web_entry *, + struct web_entry *)); +static rtx entry_register PARAMS ((struct web_entry *, + struct ref *, char *, char *)); +static void replace_ref PARAMS ((struct ref *, rtx)); +static int mark_addressof PARAMS ((rtx *, void *)); + +/* Find the root of unionfind tree (the representatnt of set). */ + +static struct web_entry * +unionfind_root (element) + struct web_entry *element; +{ + struct web_entry *element1 = element, *element2; + + while (element->pred) + element = element->pred; + while (element1->pred) + { + element2 = element1->pred; + element1->pred = element; + element1 = element2; + } + return element; +} + +/* Union sets. */ + +static void +unionfind_union (first, second) + struct web_entry *first, *second; +{ + first = unionfind_root (first); + second = unionfind_root (second); + if (first == second) + return; + second->pred = first; +} + +/* For each use, all possible defs reaching it must come in same register, + union them. */ + +static void +union_defs (df, use, def_entry, use_entry) + struct df *df; + struct ref *use; + struct web_entry *def_entry; + struct web_entry *use_entry; +{ + rtx insn = DF_REF_INSN (use); + struct df_link *link = DF_REF_CHAIN (use); + struct df_link *use_link = DF_INSN_USES (df, insn); + struct df_link *def_link = DF_INSN_DEFS (df, insn); + rtx set = single_set (insn); + + /* Some instructions may use match_dup for it's operands. In case the + operands are dead, we will assign them different pseudos creating + invalid instruction, so union all uses of the same operands for each + insn. */ + + while (use_link) + { + if (use != use_link->ref + && DF_REF_REAL_REG (use) == DF_REF_REAL_REG (use_link->ref)) + unionfind_union (use_entry + DF_REF_ID (use), + use_entry + DF_REF_ID (use_link->ref)); + use_link = use_link->next; + } + + /* Recognize trivial noop moves and attempt to keep them noop. + While most of noop moves should be removed we still keep some at + libcall boundaries and such. */ + + if (set + && SET_SRC (set) == DF_REF_REG (use) + && SET_SRC (set) == SET_DEST (set)) + { + while (def_link) + { + if (DF_REF_REAL_REG (use) == DF_REF_REAL_REG (def_link->ref)) + unionfind_union (use_entry + DF_REF_ID (use), + def_entry + DF_REF_ID (def_link->ref)); + def_link = def_link->next; + } + } + while (link) + { + unionfind_union (use_entry + DF_REF_ID (use), + def_entry + DF_REF_ID (link->ref)); + link = link->next; + } + + /* An READ_WRITE use require the corresponding def to be in the same + register. Find it and union. */ + if (use->flags & DF_REF_READ_WRITE) + { + struct df_link *link = DF_INSN_DEFS (df, DF_REF_INSN (use)); + + while (DF_REF_REAL_REG (link->ref) != DF_REF_REAL_REG (use)) + link = link->next; + + unionfind_union (use_entry + DF_REF_ID (use), + def_entry + DF_REF_ID (link->ref)); + } +} + +/* Find corresponding register for given entry. */ + +static rtx +entry_register (entry, ref, used, use_addressof) + struct web_entry *entry; + struct ref *ref; + char *used; + char *use_addressof; +{ + struct web_entry *root; + rtx reg, newreg; + + /* Find corresponding web and see if it has been visited. */ + + root = unionfind_root (entry); + if (root->reg) + return root->reg; + + /* We are seeing this web first time, do the assignment. */ + + reg = DF_REF_REAL_REG (ref); + + /* In case the original register is already assigned, generate new one. */ + if (!used[REGNO (reg)]) + newreg = reg, used[REGNO (reg)] = 1; + else if (REG_USERVAR_P (reg) && 0/*&& !flag_messy_debugging*/) + { + newreg = reg; + if (rtl_dump_file) + fprintf (rtl_dump_file, + "New web forced to keep reg=%i (user variable)\n", + REGNO (reg)); + } + else if (use_addressof [REGNO (reg)]) + { + newreg = reg; + if (rtl_dump_file) + fprintf (rtl_dump_file, + "New web forced to keep reg=%i (address taken)\n", + REGNO (reg)); + } + else + { + newreg = gen_reg_rtx (GET_MODE (reg)); + REG_USERVAR_P (newreg) = REG_USERVAR_P (reg); + REG_POINTER (newreg) = REG_POINTER (reg); + REG_LOOP_TEST_P (newreg) = REG_LOOP_TEST_P (reg); + RTX_UNCHANGING_P (newreg) = RTX_UNCHANGING_P (reg); + REG_ATTRS (newreg) = REG_ATTRS (reg); + if (rtl_dump_file) + fprintf (rtl_dump_file, "Web oldreg=%i newreg=%i\n", REGNO (reg), + REGNO (newreg)); + } + + root->reg = newreg; + return newreg; +} + +/* Replace the reference by REG. */ + +static void +replace_ref (ref, reg) + struct ref *ref; + rtx reg; +{ + rtx oldreg = DF_REF_REAL_REG (ref); + rtx *loc = DF_REF_REAL_LOC (ref); + + if (oldreg == reg) + return; + if (rtl_dump_file) + fprintf (rtl_dump_file, "Updating insn %i (%i->%i)\n", + INSN_UID (DF_REF_INSN (ref)), REGNO (oldreg), REGNO (reg)); + *loc = reg; +} + +/* Mark each pseudo, whose address is taken. */ + +static int +mark_addressof (rtl, data) + rtx *rtl; + void *data; +{ + if (!*rtl) + return 0; + if (GET_CODE (*rtl) == ADDRESSOF + && REG_P (XEXP (*rtl, 0))) + ((char *)data)[REGNO (XEXP (*rtl, 0))] = 1; + return 0; +} + +/* Main entry point. */ + +void +web_main () +{ + struct df *df; + struct web_entry *def_entry; + struct web_entry *use_entry; + unsigned int i; + int max = max_reg_num (); + char *used; + char *use_addressof; + rtx insn; + + df = df_init (); + df_analyse (df, 0, DF_UD_CHAIN | DF_EQUIV_NOTES); + + def_entry = + (struct web_entry *) xcalloc (df->n_defs, sizeof (struct web_entry)); + use_entry = + (struct web_entry *) xcalloc (df->n_uses, sizeof (struct web_entry)); + used = (char *) xcalloc (max, sizeof (char)); + use_addressof = (char *) xcalloc (max, sizeof (char)); + + if (rtl_dump_file) + df_dump (df, DF_UD_CHAIN | DF_DU_CHAIN, rtl_dump_file); + + /* Produce the web. */ + for (i = 0; i < df->n_uses; i++) + union_defs (df, df->uses[i], def_entry, use_entry); + + /* We can not safely rename registers whose address is taken. */ + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + if (INSN_P (insn)) + for_each_rtx (&PATTERN (insn), mark_addressof, use_addressof); + + /* Update the instruction stream, allocating new registers for split pseudos + in progress. */ + for (i = 0; i < df->n_uses; i++) + replace_ref (df->uses[i], entry_register (use_entry + i, df->uses[i], + used, use_addressof)); + for (i = 0; i < df->n_defs; i++) + replace_ref (df->defs[i], entry_register (def_entry + i, df->defs[i], + used, use_addressof)); + + /* Dataflow information is corrupt here, but it can be easy to update it + by creating new entries for new registers and update or calilng + df_insns_modify. */ + free (def_entry); + free (use_entry); + free (used); + free (use_addressof); + df_finish (df); +}