From b9422b69c4574ea53d1a5fdbc90de3935589e502 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Thu, 20 Feb 2003 21:56:53 +0100 Subject: [PATCH] toplev.c (flag_sched2_use_superblocks, [...]): New global variables. * toplev.c (flag_sched2_use_superblocks, flag_sched2_use_traces): New global variables. (lang_independent_options): Add -fsched2-use-superblocks -fsced2-use-traces. (rest_of_compilation): Deal with it. * invoke.texi (-fsched2-use-traces, fsched2-use-superblocks): Declare. * flags.h (flag_sched2_use_superblocks, flag_sched2_use_traces): Declare. * rtl.h (reg_to_stack): Update prototype. * reg-stack.c (reg_to_stack): Return when something has changed; update liveness when executing after superblock scheduling. * combine.c (simplify_shift_const): Simplify few special cases into constants. From-SVN: r63183 --- gcc/ChangeLog | 14 +++++++++++ gcc/combine.c | 10 ++++++++ gcc/doc/invoke.texi | 27 +++++++++++++++++++- gcc/flags.h | 2 ++ gcc/reg-stack.c | 13 +++++++--- gcc/rtl.h | 2 +- gcc/toplev.c | 60 ++++++++++++++++++++++++++++++++++++++++++--- 7 files changed, 118 insertions(+), 10 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 98fce0af178..4d708ef4bdf 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,17 @@ +Thu Feb 20 21:41:19 CET 2003 Jan Hubicka + + * toplev.c (flag_sched2_use_superblocks, flag_sched2_use_traces): New global variables. + (lang_independent_options): Add -fsched2-use-superblocks -fsced2-use-traces. + (rest_of_compilation): Deal with it. + * invoke.texi (-fsched2-use-traces, fsched2-use-superblocks): Declare. + * flags.h (flag_sched2_use_superblocks, flag_sched2_use_traces): Declare. + * rtl.h (reg_to_stack): Update prototype. + * reg-stack.c (reg_to_stack): Return when something has changed; + update liveness when executing after superblock scheduling. + + * combine.c (simplify_shift_const): Simplify few special cases + into constants. + 2003-02-20 David Edelsohn * config/rs6000/rs6000.md: (attr "type"): Add fast_compare. diff --git a/gcc/combine.c b/gcc/combine.c index ce4d3aa35dc..7792537d34b 100644 --- a/gcc/combine.c +++ b/gcc/combine.c @@ -9431,6 +9431,16 @@ simplify_shift_const (x, code, result_mode, varop, orig_count) == 0)) code = LSHIFTRT; + if (code == LSHIFTRT + && GET_MODE_BITSIZE (shift_mode) <= HOST_BITS_PER_WIDE_INT + && !(nonzero_bits (varop, shift_mode) >> count)) + return const0_rtx; + if (code == ASHIFT + && GET_MODE_BITSIZE (shift_mode) <= HOST_BITS_PER_WIDE_INT + && !((nonzero_bits (varop, shift_mode) << count) + & GET_MODE_MASK (shift_mode))) + return const0_rtx; + switch (GET_CODE (varop)) { case SIGN_EXTEND: diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index c4828d6176d..f47c51ec666 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -287,7 +287,8 @@ in the following sections. -frerun-cse-after-loop -frerun-loop-opt @gol -fschedule-insns -fschedule-insns2 @gol -fno-sched-interblock -fno-sched-spec -fsched-spec-load @gol --fsched-spec-load-dangerous -fsignaling-nans @gol +-fsched-spec-load-dangerous -fsched2-use-superblocks @gol +-fsched2-use-traces -fsignaling-nans @gol -fsingle-precision-constant -fssa -fssa-ccp -fssa-dce @gol -fstrength-reduce -fstrict-aliasing -ftracer -fthread-jumps @gol -funit-at-a-time -funroll-all-loops -funroll-loops -funswitch-loops @gol @@ -3895,6 +3896,30 @@ Allow speculative motion of more load instructions. This only makes sense when scheduling before register allocation, i.e.@: with @option{-fschedule-insns} or at @option{-O2} or higher. +@item -fsched2-use-superblocks +@opindex fsched2-use-superblocks +When schedulilng after register allocation, do use superblock scheduling +algorithm. Superblock scheduling allows motion acress basic block boundaries +resulting on faster schedules. This option is experimental, as not all machine +descriptions used by GCC model the CPU closely enought to avoid unreliable +results from the algorithm. + +This only makes sense when scheduling after register allocation, i.e.@: with +@option{-fschedule-insns2} or at @option{-O2} or higher. + +@item -fsched2-use-traces +@opindex fsched2-use-traces +Use @option{-fsched2-use-superblocks} algorithm when scheduling after register +allocation and additionally perform code duplication in order to increase the +size of superblocks using tracer pass. See @option{-ftracer} for details on +trace formation. + +This mode should produce faster but singificantly longer programs. Also +without @code{-fbranch-probabilities} the traces constructed may not match the +reality and hurt the performance. This only makes +sense when scheduling after register allocation, i.e.@: with +@option{-fschedule-insns2} or at @option{-O2} or higher. + @item -fcaller-saves @opindex fcaller-saves Enable values to be allocated in registers that will be clobbered by diff --git a/gcc/flags.h b/gcc/flags.h index 18cffaa479b..559f70fd6b8 100644 --- a/gcc/flags.h +++ b/gcc/flags.h @@ -416,6 +416,8 @@ extern int flag_shared_data; extern int flag_schedule_insns; extern int flag_schedule_insns_after_reload; +extern int flag_sched2_use_superblocks; +extern int flag_sched2_use_traces; /* The following flags have effect only for scheduling before register allocation: diff --git a/gcc/reg-stack.c b/gcc/reg-stack.c index 965aad0b24d..f8f4b3144bf 100644 --- a/gcc/reg-stack.c +++ b/gcc/reg-stack.c @@ -416,7 +416,7 @@ pop_stack (regstack, regno) code duplication created when the converter inserts pop insns on the edges. */ -void +bool reg_to_stack (first, file) rtx first; FILE *file; @@ -437,11 +437,15 @@ reg_to_stack (first, file) if (regs_ever_live[i]) break; if (i > LAST_STACK_REG) - return; + return false; /* Ok, floating point instructions exist. If not optimizing, - build the CFG and run life analysis. */ - if (!optimize) + build the CFG and run life analysis. + Also need to rebuild life when superblock scheduling is done + as it don't update liveness yet. */ + if (!optimize + || (flag_sched2_use_superblocks + && flag_schedule_insns_after_reload)) { count_or_remove_death_notes (NULL, 1); life_analysis (first, file, PROP_DEATH_NOTES); @@ -498,6 +502,7 @@ reg_to_stack (first, file) convert_regs (file); free_aux_for_blocks (); + return true; } /* Check PAT, which is in INSN, for LABEL_REFs. Add INSN to the diff --git a/gcc/rtl.h b/gcc/rtl.h index 680550a6520..ff6c5810232 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -2167,7 +2167,7 @@ extern void create_profiler PARAMS ((void)); /* In reg-stack.c */ #ifdef BUFSIZ -extern void reg_to_stack PARAMS ((rtx, FILE *)); +extern bool reg_to_stack PARAMS ((rtx, FILE *)); #endif /* In fold-const.c */ diff --git a/gcc/toplev.c b/gcc/toplev.c index 9322f4fecf3..ffd3760a6a6 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -254,9 +254,9 @@ enum dump_file_index DFI_peephole2, DFI_rnreg, DFI_ce3, + DFI_bbro, DFI_sched2, DFI_stack, - DFI_bbro, DFI_mach, DFI_dbr, DFI_MAX @@ -305,9 +305,9 @@ static struct dump_file_info dump_file[DFI_MAX] = { "peephole2", 'z', 1, 0, 0 }, { "rnreg", 'n', 1, 0, 0 }, { "ce3", 'E', 1, 0, 0 }, + { "bbro", 'B', 1, 0, 0 }, { "sched2", 'R', 1, 0, 0 }, { "stack", 'k', 1, 0, 0 }, - { "bbro", 'B', 1, 0, 0 }, { "mach", 'M', 1, 0, 0 }, { "dbr", 'd', 0, 0, 0 }, }; @@ -745,6 +745,13 @@ int flag_pedantic_errors = 0; int flag_schedule_insns = 0; int flag_schedule_insns_after_reload = 0; +/* When flag_schedule_insns_after_reload is set, use EBB scheduler. */ +int flag_sched2_use_superblocks = 0; + +/* When flag_schedule_insns_after_reload is set, construct traces and EBB + scheduler. */ +int flag_sched2_use_traces = 0; + /* The following flags have effect only for scheduling before register allocation: @@ -1079,6 +1086,10 @@ static const lang_independent_options f_options[] = N_("Allow speculative motion of some loads") }, {"sched-spec-load-dangerous",&flag_schedule_speculative_load_dangerous, 1, N_("Allow speculative motion of more loads") }, + {"sched2-use-superblocks", &flag_sched2_use_superblocks, 1, + N_("If scheduling post reload, do superblock sheduling") }, + {"sched2-use-traces", &flag_sched2_use_traces, 1, + N_("If scheduling post reload, do trace sheduling") }, {"branch-count-reg",&flag_branch_on_count_reg, 1, N_("Replace add,compare,branch with branch on count reg") }, {"pic", &flag_pic, 1, @@ -3490,6 +3501,28 @@ rest_of_compilation (decl) split_all_insns (1); #endif + if (optimize > 0) + { + timevar_push (TV_REORDER_BLOCKS); + open_dump_file (DFI_bbro, decl); + + /* Last attempt to optimize CFG, as scheduling, peepholing and insn + splitting possibly introduced more crossjumping opportunities. */ + cleanup_cfg (CLEANUP_EXPENSIVE | CLEANUP_UPDATE_LIFE + | (flag_crossjumping ? CLEANUP_CROSSJUMP : 0)); + + if (flag_sched2_use_traces && flag_schedule_insns_after_reload) + tracer (); + if (flag_reorder_blocks) + reorder_basic_blocks (); + if (flag_reorder_blocks + || (flag_sched2_use_traces && flag_schedule_insns_after_reload)) + cleanup_cfg (CLEANUP_EXPENSIVE | CLEANUP_UPDATE_LIFE); + + close_dump_file (DFI_bbro, print_rtl_with_bb, insns); + timevar_pop (TV_REORDER_BLOCKS); + } + #ifdef INSN_SCHEDULING if (optimize > 0 && flag_schedule_insns_after_reload) { @@ -3501,7 +3534,16 @@ rest_of_compilation (decl) split_all_insns (1); - schedule_insns (rtl_dump_file); + if (flag_sched2_use_superblocks || flag_sched2_use_traces) + { + schedule_ebbs (rtl_dump_file); + /* No liveness updating code yet, but it should be easy to do. + reg-stack recompute the liveness when needed for now. */ + count_or_remove_death_notes (NULL, 1); + cleanup_cfg (CLEANUP_EXPENSIVE); + } + else + schedule_insns (rtl_dump_file); close_dump_file (DFI_sched2, print_rtl_with_bb, insns); timevar_pop (TV_SCHED2); @@ -3519,7 +3561,16 @@ rest_of_compilation (decl) timevar_push (TV_REG_STACK); open_dump_file (DFI_stack, decl); - reg_to_stack (insns, rtl_dump_file); + if (reg_to_stack (insns, rtl_dump_file) && optimize) + { + if (cleanup_cfg (CLEANUP_EXPENSIVE | CLEANUP_POST_REGSTACK + | (flag_crossjumping ? CLEANUP_CROSSJUMP : 0)) + && flag_reorder_blocks) + { + reorder_basic_blocks (); + cleanup_cfg (CLEANUP_EXPENSIVE); + } + } close_dump_file (DFI_stack, print_rtl_with_bb, insns); timevar_pop (TV_REG_STACK); @@ -4904,6 +4955,7 @@ parse_options_and_default_flags (argc, argv) flag_cse_skip_blocks = 1; flag_gcse = 1; flag_expensive_optimizations = 1; + flag_unit_at_a_time = 1; flag_strength_reduce = 1; flag_rerun_cse_after_loop = 1; flag_rerun_loop_opt = 1;