From 8c660648cab7bed53c82b8283dcf170db7e97983 Mon Sep 17 00:00:00 2001 From: Jeffrey A Law Date: Tue, 12 Aug 1997 04:07:19 +0000 Subject: [PATCH] * Integrate Haifa instruction scheduler. * Integrate regmove pass. See ChangeLog for deatils. From-SVN: r14770 --- gcc/ChangeLog | 73 + gcc/Makefile.in | 15 +- gcc/configure | 22 + gcc/configure.in | 20 + gcc/flags.h | 28 + gcc/flow.c | 3 +- gcc/genattrtab.c | 4 + gcc/haifa-sched.c | 8713 +++++++++++++++++++++++++++++++++++++++++++++ gcc/invoke.texi | 8 +- gcc/loop.c | 752 ++++ gcc/loop.h | 10 + gcc/regmove.c | 983 +++++ gcc/rtl.h | 1 + gcc/toplev.c | 92 + gcc/unroll.c | 23 +- 15 files changed, 10737 insertions(+), 10 deletions(-) create mode 100644 gcc/haifa-sched.c create mode 100644 gcc/regmove.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ff0aa85597e..608c3d73ba2 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,76 @@ +Mon Aug 11 14:50:55 1997 Jeffrey A Law (law@cygnus.com) + + * Integrate Haifa instruction scheduler. + * Makefile.in (ALL_CFLAGS): Add SCHED_CFLAGS. Prefix all references + to sched with $(SCHED_CFLAGS. + * configure.in: Handle --enable-haifa. + * configure: Rebuilt. + * flags.h: Add new flags for haifa instruction scheduler. + * genattrtab.c (expand_units): For haifa, don't subtract one + when computing blockage. + * toplev.h (flag_schedule_interblock): Haifa scheduler flag. + (flag_schedule_speculative): Ditto. + (flag_schedule_speculative_load): Ditto. + (flag_schedule_speculative_load_dangerous): Ditto. + (flag_schedule_reverse_before_reload): Ditto. + (flag_schedule_reverse_after_reload): Ditto. + (flag_branch_on_count_reg): Ditto. + (f_options): Add Haifa switches. + (main): Turn off some Haifa options if appropriate macro is + defined. Process Haifa switches. + * unroll.c (iteration_info): No longer static, since Haifa + scheduler uses it. + (unroll_loop): Inform HAIFA scheduler about loop unrolling factor. + * unroll.c (unroll_loop): Set loop_unroll_iter, loop_start_value. + * loop.h (loop_unroll_factor, loop_number): Add HAIFA decls. + * loop.h (loop_initial_value,loop_unroll_iter): New globals. + * loop.c (loop_optimize): If HAIFA is defined, allocate additional + storage for the Haifa scheduler. + (mark_loop_jump): If HAIFA defined, set LABEL_OUTSIDE_LOOP_P and + LABEL_NEXTREF. + (strength_reduce): If HAIFA and HAVE_decrement_and_branch_on_count + are defined, call analyze_loop_iterations and insert_bct to use + countdown loops. + (record_giv): Refine test for jumps out of loops if HAIFA is + defined. + (analyze_loop_iterations): New function to identify if we can use + a countdown loop. + (insert_bct): Insert countdown loop. + (instrument_loop_bct): Low level code to insert countdown loop. + (loop_number): Calculate UID of loop. + (indirect_jump_in_function_p): Return true if an indirect jump is + in the function. + (is_power_of_2): Return true if value is a power of 2. + (is_conditional_branch): Return true if insn is a conditional + jump. + (fix_bct_param): Process -fbct-{min,max}-N switches. + (check_bct_param): Return true if loop should be instrumented. + * loop.c (loop_initial_value,loop_unroll_iter): New globals. + (loop_optimize): Initialize. + (get_condition_for_loop): Ditto. + * loop.c (strength_reduce): Inside of code that uses #ifdef + HAVE_decrement_and_branch_on_count code, test it to make sure the + condition is true. + (instrument_loop_bct): Ditto. + * haifa-sched.c: New file. + + + * Integrate regmove pass. + * Makefile.in (OBJS): Add regmove.o + (regmove.o): Add dependencies. + * flow.c (find_use_as_address): No longer static. + * rtl.h (find_use_as_address): Declare. + * toplev.c (regmove_dump, flag_regmove): Define. + (f_options): Add -fregmove. + (regmove_dump_file, regmove_time): Define. + (fatal_insn): Close the regmove dump file. + (compile_file): Initialize regmove_time; open/close the regmove dump + file as needed. Print regmove time as needed. + (rest_of_compilation): Run regmove pass if requested, dump + RTL after regmove if requested. + (main): If -O2 or more, turn on regmove. Handle dump switches. + * regmove.c: New file. + Mon Aug 11 14:15:02 1997 Jeffrey A Law (law@cygnus.com) * Integrate tlink patch from jason@cygnus.com diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 76d83967f48..adf5db49806 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -444,7 +444,7 @@ INTERNAL_CFLAGS = $(CROSS) -DIN_GCC @extra_c_flags@ # This is the variable actually used when we compile. ALL_CFLAGS = $(INTERNAL_CFLAGS) $(X_CFLAGS) $(T_CFLAGS) $(CFLAGS) $(XCFLAGS) \ - @DEFS@ + @DEFS@ $(SCHED_CFLAGS) # Likewise. ALL_CPPFLAGS = $(CPPFLAGS) $(X_CPPFLAGS) $(T_CPPFLAGS) @@ -548,14 +548,17 @@ BC_OBJS = bc-emit.o bc-optab.o # Bytecode header files constructed at build time; vmsconfig.com wants this. BC_ALL = bc-arity.h bc-opcode.h bc-opname.h +SCHED_PREFIX = @sched_prefix@ +SCHED_CFLAGS = @sched_cflags@ + # Language-independent object files. OBJS = toplev.o version.o tree.o print-tree.o stor-layout.o fold-const.o \ function.o stmt.o except.o expr.o calls.o expmed.o explow.o optabs.o \ - varasm.o rtl.o print-rtl.o rtlanal.o emit-rtl.o real.o \ - dbxout.o sdbout.o dwarfout.o dwarf2out.o xcoffout.o bitmap.o \ + varasm.o rtl.o print-rtl.o rtlanal.o emit-rtl.o real.o regmove.o \ + dbxout.o sdbout.o dwarfout.o dwarf2out.o xcoffout.o bitmap.o alias.o \ integrate.o jump.o cse.o loop.o unroll.o flow.o stupid.o combine.o \ regclass.o local-alloc.o global.o reload.o reload1.o caller-save.o \ - insn-peep.o reorg.o alias.o sched.o final.o recog.o reg-stack.o \ + insn-peep.o reorg.o $(SCHED_PREFIX)sched.o final.o recog.o reg-stack.o \ insn-opinit.o insn-recog.o insn-extract.o insn-output.o insn-emit.o \ profile.o insn-attrtab.o $(out_object_file) getpwd.o convert.o $(EXTRA_OBJS) @@ -1326,7 +1329,9 @@ reorg.o : reorg.c $(CONFIG_H) $(RTL_H) conditions.h hard-reg-set.h \ flags.h output.h alias.o : alias.c $(CONFIG_H) $(RTL_H) flags.h hard-reg-set.h regs.h \ insn-codes.h -sched.o : $(SCHED_PREFIX)sched.c $(CONFIG_H) $(RTL_H) $(BASIC_BLOCK_H) regs.h hard-reg-set.h \ +regmove.o : regmove.c $(CONFIG_H) $(RTL_H) insn-config.h recog.h output.h \ + reload.h regs.h hard-reg-set.h flags.h expr.h insn-flags.h +$(SCHED_PREFIX)sched.o : $(SCHED_PREFIX)sched.c $(CONFIG_H) $(RTL_H) $(BASIC_BLOCK_H) regs.h hard-reg-set.h \ flags.h insn-config.h insn-attr.h final.o : final.c $(CONFIG_H) $(RTL_H) $(TREE_H) flags.h regs.h \ recog.h conditions.h insn-config.h insn-attr.h except.h real.h output.h \ diff --git a/gcc/configure b/gcc/configure index 64999d034ab..74dd86eeead 100755 --- a/gcc/configure +++ b/gcc/configure @@ -4361,6 +4361,26 @@ if [ ! -f Makefile.in ]; then echo "source ${srcdir}/.gdbinit" >> .gdbinit fi +# Override SCHED_OBJ and SCHED_CFLAGS to enable the Haifa scheduler. +sched_prefix= +sched_cflags= +if [[ x$enable_haifa = xyes ]]; then + echo "Using the Haifa scheduler." + sched_prefix=haifa- + sched_cflags=-DHAIFA +fi + + +if [[ x$enable_haifa != x ]]; then + # Explicitly remove files that need to be recompiled for the Haifa scheduler. + for x in genattrtab.o toplev.o loop.o unroll.o *sched.o; do + if [ -f $x ]; then + echo "Removing $x" + rm -f $x + fi + done +fi + # Process the language and host/target makefile fragments. ${CONFIG_SHELL-/bin/sh} $srcdir/configure.frag $srcdir "$subdirs" "$dep_host_xmake_file" "$dep_tmake_file" @@ -4602,6 +4622,8 @@ s%@CC@%$CC%g s%@SET_MAKE@%$SET_MAKE%g s%@CPP@%$CPP%g s%@manext@%$manext%g +s%@sched_prefix@%$sched_prefix%g +s%@sched_cflags@%$sched_cflags%g s%@objext@%$objext%g s%@subdirs@%$subdirs%g s%@all_languages@%$all_languages%g diff --git a/gcc/configure.in b/gcc/configure.in index da6bdb630ea..17ac2f41701 100644 --- a/gcc/configure.in +++ b/gcc/configure.in @@ -3027,6 +3027,26 @@ if [[ ! -f Makefile.in ]]; then echo "source ${srcdir}/.gdbinit" >> .gdbinit fi +# Override SCHED_OBJ and SCHED_CFLAGS to enable the Haifa scheduler. +sched_prefix= +sched_cflags= +if [[ x$enable_haifa = xyes ]]; then + echo "Using the Haifa scheduler." + sched_prefix=haifa- + sched_cflags=-DHAIFA +fi +AC_SUBST(sched_prefix) +AC_SUBST(sched_cflags) +if [[ x$enable_haifa != x ]]; then + # Explicitly remove files that need to be recompiled for the Haifa scheduler. + for x in genattrtab.o toplev.o loop.o unroll.o *sched.o; do + if [ -f $x ]; then + echo "Removing $x" + rm -f $x + fi + done +fi + # Process the language and host/target makefile fragments. ${CONFIG_SHELL-/bin/sh} $srcdir/configure.frag $srcdir "$subdirs" "$dep_host_xmake_file" "$dep_tmake_file" diff --git a/gcc/flags.h b/gcc/flags.h index b5c6d75446b..58f5bc0af1c 100644 --- a/gcc/flags.h +++ b/gcc/flags.h @@ -304,6 +304,34 @@ extern int flag_shared_data; extern int flag_schedule_insns; extern int flag_schedule_insns_after_reload; +#ifdef HAIFA +/* The following flags have effect only for scheduling before register + allocation: + + flag_schedule_interblock means schedule insns accross basic blocks. + flag_schedule_speculative means allow speculative motion of non-load insns. + flag_schedule_speculative_load means allow speculative motion of some + load insns. + flag_schedule_speculative_load_dangerous allows speculative motion of more + load insns. + flag_schedule_reverse_before_reload means try to reverse original order + of insns (S). + flag_schedule_reverse_after_reload means try to reverse original order + of insns (R). */ + +extern int flag_schedule_interblock; +extern int flag_schedule_speculative; +extern int flag_schedule_speculative_load; +extern int flag_schedule_speculative_load_dangerous; +extern int flag_schedule_reverse_before_reload; +extern int flag_schedule_reverse_after_reload; + +/* flag_on_branch_count_reg means try to replace add-1,compare,branch tupple + by a cheaper branch, on a count register. */ +extern int flag_branch_on_count_reg; +#endif /* HAIFA */ + + /* Nonzero means put things in delayed-branch slots if supported. */ extern int flag_delayed_branch; diff --git a/gcc/flow.c b/gcc/flow.c index 44f1748e6e1..3ca1dcf256f 100644 --- a/gcc/flow.c +++ b/gcc/flow.c @@ -268,7 +268,6 @@ static void find_auto_inc PROTO((regset, rtx, rtx)); static void mark_used_regs PROTO((regset, regset, rtx, int, rtx)); static int try_pre_increment_1 PROTO((rtx)); static int try_pre_increment PROTO((rtx, rtx, HOST_WIDE_INT)); -static rtx find_use_as_address PROTO((rtx, rtx, HOST_WIDE_INT)); void dump_flow_info PROTO((FILE *)); /* Find basic blocks of the current function and perform data flow analysis. @@ -2795,7 +2794,7 @@ try_pre_increment (insn, reg, amount) If REG appears more than once, or is used other than in such an address, return (rtx)1. */ -static rtx +rtx find_use_as_address (x, reg, plusconst) register rtx x; rtx reg; diff --git a/gcc/genattrtab.c b/gcc/genattrtab.c index 14ecac1f1e5..bde3f5199a2 100644 --- a/gcc/genattrtab.c +++ b/gcc/genattrtab.c @@ -2003,6 +2003,9 @@ expand_units () for (op = unit->ops; op; op = op->next) { +#ifdef HAIFA + rtx blockage = op->issue_exp; +#else rtx blockage = operate_exp (POS_MINUS_OP, readycost, make_numeric_value (1)); @@ -2018,6 +2021,7 @@ expand_units () blockage); blockage = operate_exp (MAX_OP, blockage, op->issue_exp); +#endif blockage = simplify_knowing (blockage, unit->condexp); /* Add this op's contribution to MAX (BLOCKAGE (E,*)) and diff --git a/gcc/haifa-sched.c b/gcc/haifa-sched.c new file mode 100644 index 00000000000..ced081416a6 --- /dev/null +++ b/gcc/haifa-sched.c @@ -0,0 +1,8713 @@ +/* Instruction scheduling pass. + Copyright (C) 1992, 1993, 1994, 1995, 1997 Free Software Foundation, Inc. + Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by, + and currently maintained by, Jim Wilson (wilson@cygnus.com) + + This file is part of GNU CC. + + GNU CC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GNU CC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GNU CC; see the file COPYING. If not, write to the Free + the Free Software Foundation, 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + + +/* Instruction scheduling pass. + + This pass implements list scheduling within basic blocks. It is + run twice: (1) after flow analysis, but before register allocation, + and (2) after register allocation. + + The first run performs interblock scheduling, moving insns between + different blocks in the same "region", and the second runs only + basic block scheduling. + + Interblock motions performed are useful motions and speculative + motions, including speculative loads. Motions requiring code + duplication are not supported. The identification of motion type + and the check for validity of speculative motions requires + construction and analysis of the function's control flow graph. + The scheduler works as follows: + + We compute insn priorities based on data dependencies. Flow + analysis only creates a fraction of the data-dependencies we must + observe: namely, only those dependencies which the combiner can be + expected to use. For this pass, we must therefore create the + remaining dependencies we need to observe: register dependencies, + memory dependencies, dependencies to keep function calls in order, + and the dependence between a conditional branch and the setting of + condition codes are all dealt with here. + + The scheduler first traverses the data flow graph, starting with + the last instruction, and proceeding to the first, assigning values + to insn_priority as it goes. This sorts the instructions + topologically by data dependence. + + Once priorities have been established, we order the insns using + list scheduling. This works as follows: starting with a list of + all the ready insns, and sorted according to priority number, we + schedule the insn from the end of the list by placing its + predecessors in the list according to their priority order. We + consider this insn scheduled by setting the pointer to the "end" of + the list to point to the previous insn. When an insn has no + predecessors, we either queue it until sufficient time has elapsed + or add it to the ready list. As the instructions are scheduled or + when stalls are introduced, the queue advances and dumps insns into + the ready list. When all insns down to the lowest priority have + been scheduled, the critical path of the basic block has been made + as short as possible. The remaining insns are then scheduled in + remaining slots. + + Function unit conflicts are resolved during forward list scheduling + by tracking the time when each insn is committed to the schedule + and from that, the time the function units it uses must be free. + As insns on the ready list are considered for scheduling, those + that would result in a blockage of the already committed insns are + queued until no blockage will result. + + The following list shows the order in which we want to break ties + among insns in the ready list: + + 1. choose insn with the longest path to end of bb, ties + broken by + 2. choose insn with least contribution to register pressure, + ties broken by + 3. prefer in-block upon interblock motion, ties broken by + 4. prefer useful upon speculative motion, ties broken by + 5. choose insn with largest control flow probability, ties + broken by + 6. choose insn with the least dependences upon the previously + scheduled insn, or finally + 7. choose insn with lowest UID. + + Memory references complicate matters. Only if we can be certain + that memory references are not part of the data dependency graph + (via true, anti, or output dependence), can we move operations past + memory references. To first approximation, reads can be done + independently, while writes introduce dependencies. Better + approximations will yield fewer dependencies. + + Before reload, an extended analysis of interblock data dependences + is required for interblock scheduling. This is performed in + compute_block_backward_dependences (). + + Dependencies set up by memory references are treated in exactly the + same way as other dependencies, by using LOG_LINKS backward + dependences. LOG_LINKS are translated into INSN_DEPEND forward + dependences for the purpose of forward list scheduling. + + Having optimized the critical path, we may have also unduly + extended the lifetimes of some registers. If an operation requires + that constants be loaded into registers, it is certainly desirable + to load those constants as early as necessary, but no earlier. + I.e., it will not do to load up a bunch of registers at the + beginning of a basic block only to use them at the end, if they + could be loaded later, since this may result in excessive register + utilization. + + Note that since branches are never in basic blocks, but only end + basic blocks, this pass will not move branches. But that is ok, + since we can use GNU's delayed branch scheduling pass to take care + of this case. + + Also note that no further optimizations based on algebraic + identities are performed, so this pass would be a good one to + perform instruction splitting, such as breaking up a multiply + instruction into shifts and adds where that is profitable. + + Given the memory aliasing analysis that this pass should perform, + it should be possible to remove redundant stores to memory, and to + load values from registers instead of hitting memory. + + Before reload, speculative insns are moved only if a 'proof' exists + that no exception will be caused by this, and if no live registers + exist that inhibit the motion (live registers constraints are not + represented by data dependence edges). + + This pass must update information that subsequent passes expect to + be correct. Namely: reg_n_refs, reg_n_sets, reg_n_deaths, + reg_n_calls_crossed, and reg_live_length. Also, basic_block_head, + basic_block_end. + + The information in the line number notes is carefully retained by + this pass. Notes that refer to the starting and ending of + exception regions are also carefully retained by this pass. All + other NOTE insns are grouped in their same relative order at the + beginning of basic blocks and regions that have been scheduled. + + The main entry point for this pass is schedule_insns(), called for + each function. The work of the scheduler is organized in three + levels: (1) function level: insns are subject to splitting, + control-flow-graph is constructed, regions are computed (after + reload, each region is of one block), (2) region level: control + flow graph attributes required for interblock scheduling are + computed (dominators, reachability, etc.), data dependences and + priorities are computed, and (3) block level: insns in the block + are actually scheduled. */ + +#include +#include "config.h" +#include "rtl.h" +#include "basic-block.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "flags.h" +#include "insn-config.h" +#include "insn-attr.h" +#include "except.h" + +extern char *reg_known_equiv_p; +extern rtx *reg_known_value; + +#ifdef INSN_SCHEDULING + +/* enable interblock scheduling code */ + +/* define INTERBLOCK_DEBUG for using the -fsched-max debugging facility */ +/* #define INTERBLOCK_DEBUG */ + +/* target_units bitmask has 1 for each unit in the cpu. It should be + possible to compute this variable from the machine description. + But currently it is computed by examinning the insn list. Since + this is only needed for visualization, it seems an acceptable + solution. (For understanding the mapping of bits to units, see + definition of function_units[] in "insn-attrtab.c") */ + +int target_units = 0; + +/* issue_rate is the number of insns that can be scheduled in the same + machine cycle. It can be defined in the config/mach/mach.h file, + otherwise we set it to 1. */ + +static int issue_rate; + +#ifndef MACHINE_issue_rate +#define get_issue_rate() (1) +#endif + +/* sched_debug_count is used for debugging the scheduler by limiting + the number of scheduled insns. It is controlled by the option + -fsched-max-N (N is a number). + + sched-verbose controls the amount of debugging output the + scheduler prints. It is controlled by -fsched-verbose-N: + N>0 and no -DSR : the output is directed to stderr. + N>=10 will direct the printouts to stderr (regardless of -dSR). + N=1: same as -dSR. + N=2: bb's probabilities, detailed ready list info, unit/insn info. + N=3: rtl at abort point, control-flow, regions info. + N=5: dependences info. + + max_rgn_blocks and max_region_insns limit region size for + interblock scheduling. They are controlled by + -fsched-interblock-max-blocks-N, -fsched-interblock-max-insns-N */ + +#define MAX_RGN_BLOCKS 10 +#define MAX_RGN_INSNS 100 + +static int sched_debug_count = -1; +static int sched_verbose_param = 0; +static int sched_verbose = 0; +static int max_rgn_blocks = MAX_RGN_BLOCKS; +static int max_rgn_insns = MAX_RGN_INSNS; + +/* nr_inter/spec counts interblock/speculative motion for the function */ +static int nr_inter, nr_spec; + + +/* debugging file. all printouts are sent to dump, which is always set, + either to stderr, or to the dump listing file (-dRS). */ +static FILE *dump = 0; + +/* fix_sched_param() is called from toplev.c upon detection + of the -fsched-***-N options. */ + +void +fix_sched_param (param, val) + char *param, *val; +{ + if (!strcmp (param, "max")) + sched_debug_count = ((sched_debug_count == -1) ? + atoi (val) : sched_debug_count); + else if (!strcmp (param, "verbose")) + sched_verbose_param = atoi (val); + else if (!strcmp (param, "interblock-max-blocks")) + max_rgn_blocks = atoi (val); + else if (!strcmp (param, "interblock-max-insns")) + max_rgn_insns = atoi (val); + else + warning ("fix_sched_param: unknown param: %s", param); +} + + +/* Arrays set up by scheduling for the same respective purposes as + similar-named arrays set up by flow analysis. We work with these + arrays during the scheduling pass so we can compare values against + unscheduled code. + + Values of these arrays are copied at the end of this pass into the + arrays set up by flow analysis. */ +static int *sched_reg_n_calls_crossed; +static int *sched_reg_live_length; +static int *sched_reg_basic_block; + +/* We need to know the current block number during the post scheduling + update of live register information so that we can also update + REG_BASIC_BLOCK if a register changes blocks. */ +static int current_block_num; + +/* Element N is the next insn that sets (hard or pseudo) register + N within the current basic block; or zero, if there is no + such insn. Needed for new registers which may be introduced + by splitting insns. */ +static rtx *reg_last_uses; +static rtx *reg_last_sets; +static regset reg_pending_sets; +static int reg_pending_sets_all; + +/* Vector indexed by INSN_UID giving the original ordering of the insns. */ +static int *insn_luid; +#define INSN_LUID(INSN) (insn_luid[INSN_UID (INSN)]) + +/* Vector indexed by INSN_UID giving each instruction a priority. */ +static int *insn_priority; +#define INSN_PRIORITY(INSN) (insn_priority[INSN_UID (INSN)]) + +static short *insn_costs; +#define INSN_COST(INSN) insn_costs[INSN_UID (INSN)] + +/* Vector indexed by INSN_UID giving an encoding of the function units + used. */ +static short *insn_units; +#define INSN_UNIT(INSN) insn_units[INSN_UID (INSN)] + +/* Vector indexed by INSN_UID giving each instruction a register-weight. + This weight is an estimation of the insn contribution to registers pressure. */ +static int *insn_reg_weight; +#define INSN_REG_WEIGHT(INSN) (insn_reg_weight[INSN_UID (INSN)]) + +/* Vector indexed by INSN_UID giving list of insns which + depend upon INSN. Unlike LOG_LINKS, it represents forward dependences. */ +static rtx *insn_depend; +#define INSN_DEPEND(INSN) insn_depend[INSN_UID (INSN)] + +/* Vector indexed by INSN_UID. Initialized to the number of incoming + edges in forward dependence graph (= number of LOG_LINKS). As + scheduling procedes, dependence counts are decreased. An + instruction moves to the ready list when its counter is zero. */ +static int *insn_dep_count; +#define INSN_DEP_COUNT(INSN) (insn_dep_count[INSN_UID (INSN)]) + +/* Vector indexed by INSN_UID giving an encoding of the blockage range + function. The unit and the range are encoded. */ +static unsigned int *insn_blockage; +#define INSN_BLOCKAGE(INSN) insn_blockage[INSN_UID (INSN)] +#define UNIT_BITS 5 +#define BLOCKAGE_MASK ((1 << BLOCKAGE_BITS) - 1) +#define ENCODE_BLOCKAGE(U, R) \ +((((U) << UNIT_BITS) << BLOCKAGE_BITS \ + | MIN_BLOCKAGE_COST (R)) << BLOCKAGE_BITS \ + | MAX_BLOCKAGE_COST (R)) +#define UNIT_BLOCKED(B) ((B) >> (2 * BLOCKAGE_BITS)) +#define BLOCKAGE_RANGE(B) \ + (((((B) >> BLOCKAGE_BITS) & BLOCKAGE_MASK) << (HOST_BITS_PER_INT / 2)) \ + | (B) & BLOCKAGE_MASK) + +/* Encodings of the `_unit_blockage_range' function. */ +#define MIN_BLOCKAGE_COST(R) ((R) >> (HOST_BITS_PER_INT / 2)) +#define MAX_BLOCKAGE_COST(R) ((R) & ((1 << (HOST_BITS_PER_INT / 2)) - 1)) + +#define DONE_PRIORITY -1 +#define MAX_PRIORITY 0x7fffffff +#define TAIL_PRIORITY 0x7ffffffe +#define LAUNCH_PRIORITY 0x7f000001 +#define DONE_PRIORITY_P(INSN) (INSN_PRIORITY (INSN) < 0) +#define LOW_PRIORITY_P(INSN) ((INSN_PRIORITY (INSN) & 0x7f000000) == 0) + +/* Vector indexed by INSN_UID giving number of insns referring to this insn. */ +static int *insn_ref_count; +#define INSN_REF_COUNT(INSN) (insn_ref_count[INSN_UID (INSN)]) + +/* Vector indexed by INSN_UID giving line-number note in effect for each + insn. For line-number notes, this indicates whether the note may be + reused. */ +static rtx *line_note; +#define LINE_NOTE(INSN) (line_note[INSN_UID (INSN)]) + +/* Vector indexed by basic block number giving the starting line-number + for each basic block. */ +static rtx *line_note_head; + +/* List of important notes we must keep around. This is a pointer to the + last element in the list. */ +static rtx note_list; + +/* Regsets telling whether a given register is live or dead before the last + scheduled insn. Must scan the instructions once before scheduling to + determine what registers are live or dead at the end of the block. */ +static regset bb_live_regs; + +/* Regset telling whether a given register is live after the insn currently + being scheduled. Before processing an insn, this is equal to bb_live_regs + above. This is used so that we can find registers that are newly born/dead + after processing an insn. */ +static regset old_live_regs; + +/* The chain of REG_DEAD notes. REG_DEAD notes are removed from all insns + during the initial scan and reused later. If there are not exactly as + many REG_DEAD notes in the post scheduled code as there were in the + prescheduled code then we trigger an abort because this indicates a bug. */ +static rtx dead_notes; + +/* Queues, etc. */ + +/* An instruction is ready to be scheduled when all insns preceding it + have already been scheduled. It is important to ensure that all + insns which use its result will not be executed until its result + has been computed. An insn is maintained in one of four structures: + + (P) the "Pending" set of insns which cannot be scheduled until + their dependencies have been satisfied. + (Q) the "Queued" set of insns that can be scheduled when sufficient + time has passed. + (R) the "Ready" list of unscheduled, uncommitted insns. + (S) the "Scheduled" list of insns. + + Initially, all insns are either "Pending" or "Ready" depending on + whether their dependencies are satisfied. + + Insns move from the "Ready" list to the "Scheduled" list as they + are committed to the schedule. As this occurs, the insns in the + "Pending" list have their dependencies satisfied and move to either + the "Ready" list or the "Queued" set depending on whether + sufficient time has passed to make them ready. As time passes, + insns move from the "Queued" set to the "Ready" list. Insns may + move from the "Ready" list to the "Queued" set if they are blocked + due to a function unit conflict. + + The "Pending" list (P) are the insns in the INSN_DEPEND of the unscheduled + insns, i.e., those that are ready, queued, and pending. + The "Queued" set (Q) is implemented by the variable `insn_queue'. + The "Ready" list (R) is implemented by the variables `ready' and + `n_ready'. + The "Scheduled" list (S) is the new insn chain built by this pass. + + The transition (R->S) is implemented in the scheduling loop in + `schedule_block' when the best insn to schedule is chosen. + The transition (R->Q) is implemented in `queue_insn' when an + insn is found to to have a function unit conflict with the already + committed insns. + The transitions (P->R and P->Q) are implemented in `schedule_insn' as + insns move from the ready list to the scheduled list. + The transition (Q->R) is implemented in 'queue_to_insn' as time + passes or stalls are introduced. */ + +/* Implement a circular buffer to delay instructions until sufficient + time has passed. INSN_QUEUE_SIZE is a power of two larger than + MAX_BLOCKAGE and MAX_READY_COST computed by genattr.c. This is the + longest time an isnsn may be queued. */ +static rtx insn_queue[INSN_QUEUE_SIZE]; +static int q_ptr = 0; +static int q_size = 0; +#define NEXT_Q(X) (((X)+1) & (INSN_QUEUE_SIZE-1)) +#define NEXT_Q_AFTER(X, C) (((X)+C) & (INSN_QUEUE_SIZE-1)) + +/* Vector indexed by INSN_UID giving the minimum clock tick at which + the insn becomes ready. This is used to note timing constraints for + insns in the pending list. */ +static int *insn_tick; +#define INSN_TICK(INSN) (insn_tick[INSN_UID (INSN)]) + +/* Data structure for keeping track of register information + during that register's life. */ + +struct sometimes + { + int regno; + int live_length; + int calls_crossed; + }; + +/* Forward declarations. */ +static void add_dependence PROTO ((rtx, rtx, enum reg_note)); +static void remove_dependence PROTO ((rtx, rtx)); +static rtx find_insn_list PROTO ((rtx, rtx)); +static int insn_unit PROTO ((rtx)); +static unsigned int blockage_range PROTO ((int, rtx)); +static void clear_units PROTO ((void)); +static int actual_hazard_this_instance PROTO ((int, int, rtx, int, int)); +static void schedule_unit PROTO ((int, rtx, int)); +static int actual_hazard PROTO ((int, rtx, int, int)); +static int potential_hazard PROTO ((int, rtx, int)); +static int insn_cost PROTO ((rtx, rtx, rtx)); +static int priority PROTO ((rtx)); +static void free_pending_lists PROTO ((void)); +static void add_insn_mem_dependence PROTO ((rtx *, rtx *, rtx, rtx)); +static void flush_pending_lists PROTO ((rtx, int)); +static void sched_analyze_1 PROTO ((rtx, rtx)); +static void sched_analyze_2 PROTO ((rtx, rtx)); +static void sched_analyze_insn PROTO ((rtx, rtx, rtx)); +static void sched_analyze PROTO ((rtx, rtx)); +static void sched_note_set PROTO ((int, rtx, int)); +static int rank_for_schedule PROTO ((rtx *, rtx *)); +static void swap_sort PROTO ((rtx *, int)); +static void queue_insn PROTO ((rtx, int)); +static int schedule_insn PROTO ((rtx, rtx *, int, int)); +static void create_reg_dead_note PROTO ((rtx, rtx)); +static void attach_deaths PROTO ((rtx, rtx, int)); +static void attach_deaths_insn PROTO ((rtx)); +static int new_sometimes_live PROTO ((struct sometimes *, int, int)); +static void finish_sometimes_live PROTO ((struct sometimes *, int)); +static int schedule_block PROTO ((int, int, int)); +static rtx regno_use_in PROTO ((int, rtx)); +static void split_hard_reg_notes PROTO ((rtx, rtx, rtx, rtx)); +static void new_insn_dead_notes PROTO ((rtx, rtx, rtx, rtx)); +static void update_n_sets PROTO ((rtx, int)); +static void update_flow_info PROTO ((rtx, rtx, rtx, rtx)); + +/* Main entry point of this file. */ +void schedule_insns PROTO ((FILE *)); + +/* Mapping of insns to their original block prior to scheduling. */ +static int *insn_orig_block; +#define INSN_BLOCK(insn) (insn_orig_block[INSN_UID (insn)]) + +/* Some insns (e.g. call) are not allowed to move across blocks. */ +static char *cant_move; +#define CANT_MOVE(insn) (cant_move[INSN_UID (insn)]) + +/* Control flow graph edges are kept in circular lists. */ +typedef struct + { + int from_block; + int to_block; + int next_in; + int next_out; + } +edge; +static edge *edge_table; + +#define NEXT_IN(edge) (edge_table[edge].next_in) +#define NEXT_OUT(edge) (edge_table[edge].next_out) +#define FROM_BLOCK(edge) (edge_table[edge].from_block) +#define TO_BLOCK(edge) (edge_table[edge].to_block) + +/* Number of edges in the control flow graph. (in fact larger than + that by 1, since edge 0 is unused.) */ +static int nr_edges; + +/* Circular list of incoming/outgoing edges of a block */ +static int *in_edges; +static int *out_edges; + +#define IN_EDGES(block) (in_edges[block]) +#define OUT_EDGES(block) (out_edges[block]) + +/* List of labels which cannot be deleted, needed for control + flow graph construction. */ +extern rtx forced_labels; + + +static char is_cfg_nonregular PROTO ((void)); +static int uses_reg_or_mem PROTO ((rtx)); +void debug_control_flow PROTO ((void)); +static void build_control_flow PROTO ((void)); +static void build_jmp_edges PROTO ((rtx, int)); +static void new_edge PROTO ((int, int)); + + +/* A region is the main entity for interblock scheduling: insns + are allowed to move between blocks in the same region, along + control flow graph edges, in the 'up' direction. */ +typedef struct + { + int rgn_nr_blocks; /* number of blocks in region */ + int rgn_blocks; /* blocks in the region (actually index in rgn_bb_table) */ + } +region; + +/* Number of regions in the procedure */ +static int nr_regions; + +/* Table of region descriptions */ +static region *rgn_table; + +/* Array of lists of regions' blocks */ +static int *rgn_bb_table; + +/* Topological order of blocks in the region (if b2 is reachable from + b1, block_to_bb[b2] > block_to_bb[b1]). + Note: A basic block is always referred to by either block or b, + while its topological order name (in the region) is refered to by + bb. + */ +static int *block_to_bb; + +/* The number of the region containing a block. */ +static int *containing_rgn; + +#define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks) +#define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks) +#define BLOCK_TO_BB(block) (block_to_bb[block]) +#define CONTAINING_RGN(block) (containing_rgn[block]) + +void debug_regions PROTO ((void)); +static void find_single_block_region PROTO ((void)); +static void find_rgns PROTO ((void)); +static int too_large PROTO ((int, int *, int *)); + +extern void debug_live PROTO ((int, int)); + +/* Blocks of the current region being scheduled. */ +static int current_nr_blocks; +static int current_blocks; + +/* The mapping from bb to block */ +#define BB_TO_BLOCK(bb) (rgn_bb_table[current_blocks + (bb)]) + + +/* Bit vectors and bitset operations are needed for computations on + the control flow graph. */ + +typedef unsigned HOST_WIDE_INT *bitset; +typedef struct + { + int *first_member; /* pointer to the list start in bitlst_table. */ + int nr_members; /* the number of members of the bit list. */ + } +bitlst; + +int bitlst_table_last; +int bitlst_table_size; +static int *bitlst_table; + +static char bitset_member PROTO ((bitset, int, int)); +static void extract_bitlst PROTO ((bitset, int, bitlst *)); + +/* target info declarations. + + The block currently being scheduled is referred to as the "target" block, + while other blocks in the region from which insns can be moved to the + target are called "source" blocks. The candidate structure holds info + about such sources: are they valid? Speculative? Etc. */ +typedef bitlst bblst; +typedef struct + { + char is_valid; + char is_speculative; + int src_prob; + bblst split_bbs; + bblst update_bbs; + } +candidate; + +static candidate *candidate_table; + +/* A speculative motion requires checking live information on the path + from 'source' to 'target'. The split blocks are those to be checked. + After a speculative motion, live information should be modified in + the 'update' blocks. + + Lists of split and update blocks for each candidate of the current + target are in array bblst_table */ +int *bblst_table, bblst_size, bblst_last; + +#define IS_VALID(src) ( candidate_table[src].is_valid ) +#define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative ) +#define SRC_PROB(src) ( candidate_table[src].src_prob ) + +/* The bb being currently scheduled. */ +int target_bb; + +/* List of edges. */ +typedef bitlst edgelst; + +/* target info functions */ +static void split_edges PROTO ((int, int, edgelst *)); +static void compute_trg_info PROTO ((int)); +void debug_candidate PROTO ((int)); +void debug_candidates PROTO ((int)); + + +/* Bit-set of bbs, where bit 'i' stands for bb 'i'. */ +typedef bitset bbset; + +/* Number of words of the bbset. */ +int bbset_size; + +/* Dominators array: dom[i] contains the bbset of dominators of + bb i in the region. */ +bbset *dom; + +/* bb 0 is the only region entry */ +#define IS_RGN_ENTRY(bb) (!bb) + +/* Is bb_src dominated by bb_trg. */ +#define IS_DOMINATED(bb_src, bb_trg) \ +( bitset_member (dom[bb_src], bb_trg, bbset_size) ) + +/* Probability: Prob[i] is a float in [0, 1] which is the probability + of bb i relative to the region entry. */ +float *prob; + +/* The probability of bb_src, relative to bb_trg. Note, that while the + 'prob[bb]' is a float in [0, 1], this macro returns an integer + in [0, 100]. */ +#define GET_SRC_PROB(bb_src, bb_trg) ((int) (100.0 * (prob[bb_src] / \ + prob[bb_trg]))) + +/* Bit-set of edges, where bit i stands for edge i. */ +typedef bitset edgeset; + +/* Number of edges in the region. */ +int rgn_nr_edges; + +/* Array of size rgn_nr_edges. */ +int *rgn_edges; + +/* Number of words in an edgeset. */ +int edgeset_size; + +/* Mapping from each edge in the graph to its number in the rgn. */ +int *edge_to_bit; +#define EDGE_TO_BIT(edge) (edge_to_bit[edge]) + +/* The split edges of a source bb is different for each target + bb. In order to compute this efficiently, the 'potential-split edges' + are computed for each bb prior to scheduling a region. This is actually + the split edges of each bb relative to the region entry. + + pot_split[bb] is the set of potential split edges of bb. */ +edgeset *pot_split; + +/* For every bb, a set of its ancestor edges. */ +edgeset *ancestor_edges; + +static void compute_dom_prob_ps PROTO ((int)); + +#define ABS_VALUE(x) (((x)<0)?(-(x)):(x)) +#define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (INSN_BLOCK (INSN)))) +#define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (INSN_BLOCK (INSN)))) +#define INSN_BB(INSN) (BLOCK_TO_BB (INSN_BLOCK (INSN))) + +/* parameters affecting the decision of rank_for_schedule() */ +#define MIN_DIFF_PRIORITY 2 +#define MIN_PROBABILITY 40 +#define MIN_PROB_DIFF 10 + +/* speculative scheduling functions */ +static int check_live_1 PROTO ((int, rtx)); +static void update_live_1 PROTO ((int, rtx)); +static int check_live PROTO ((rtx, int, int)); +static void update_live PROTO ((rtx, int, int)); +static void set_spec_fed PROTO ((rtx)); +static int is_pfree PROTO ((rtx, int, int)); +static int find_conditional_protection PROTO ((rtx, int)); +static int is_conditionally_protected PROTO ((rtx, int, int)); +static int may_trap_exp PROTO ((rtx, int)); +static int classify_insn PROTO ((rtx)); +static int is_exception_free PROTO ((rtx, int, int)); + +static char find_insn_mem_list PROTO ((rtx, rtx, rtx, rtx)); +static void compute_block_forward_dependences PROTO ((int)); +static void init_rgn_data_dependences PROTO ((int)); +static void add_branch_dependences PROTO ((rtx, rtx)); +static void compute_block_backward_dependences PROTO ((int)); +void debug_dependencies PROTO ((void)); + +/* Notes handling mechanism: + ========================= + Generally, NOTES are saved before scheduling and restored after scheduling. + The scheduler distinguishes between three types of notes: + + (1) LINE_NUMBER notes, generated and used for debugging. Here, + before scheduling a region, a pointer to the LINE_NUMBER note is + added to the insn following it (in save_line_notes()), and the note + is removed (in rm_line_notes() and unlink_line_notes()). After + scheduling the region, this pointer is used for regeneration of + the LINE_NUMBER note (in restore_line_notes()). + + (2) LOOP_BEGIN, LOOP_END, SETJMP, EHREGION_BEG, EHREGION_END notes: + Before scheduling a region, a pointer to the note is added to the insn + that follows or precedes it. (This happens as part of the data dependence + computation). After scheduling an insn, the pointer contained in it is + used for regenerating the corresponding note (in reemit_notes). + + (3) All other notes (e.g. INSN_DELETED): Before scheduling a block, + these notes are put in a list (in rm_other_notes() and + unlink_other_notes ()). After scheduling the block, these notes are + inserted at the beginning of the block (in schedule_block()). */ + +static rtx unlink_other_notes PROTO ((rtx, rtx)); +static rtx unlink_line_notes PROTO ((rtx, rtx)); +static void rm_line_notes PROTO ((int)); +static void save_line_notes PROTO ((int)); +static void restore_line_notes PROTO ((int)); +static void rm_redundant_line_notes PROTO ((void)); +static void rm_other_notes PROTO ((rtx, rtx)); +static rtx reemit_notes PROTO ((rtx, rtx)); + +static void get_block_head_tail PROTO ((int, rtx *, rtx *)); + +static void find_pre_sched_live PROTO ((int)); +static void find_post_sched_live PROTO ((int)); +static void update_reg_usage PROTO ((void)); + +void debug_ready_list PROTO ((rtx[], int)); +static void init_target_units PROTO (()); +static void insn_print_units PROTO ((rtx)); +static int get_visual_tbl_length PROTO (()); +static void init_block_visualization PROTO (()); +static void print_block_visualization PROTO ((int, char *)); +static void visualize_scheduled_insns PROTO ((int, int)); +static void visualize_no_unit PROTO ((rtx)); +static void visualize_stall_cycles PROTO ((int, int)); +static void print_exp PROTO ((char *, rtx, int)); +static void print_value PROTO ((char *, rtx, int)); +static void print_pattern PROTO ((char *, rtx, int)); +static void print_insn PROTO ((char *, rtx, int)); +void debug_reg_vector PROTO ((regset)); + +static rtx move_insn1 PROTO ((rtx, rtx)); +static rtx move_insn PROTO ((rtx, rtx)); +static rtx group_leader PROTO ((rtx)); +static int set_priorities PROTO ((int)); +static void init_rtx_vector PROTO ((rtx **, rtx *, int, int)); +static void schedule_region PROTO ((int)); +static void split_block_insns PROTO ((int)); + +#endif /* INSN_SCHEDULING */ + +#define SIZE_FOR_MODE(X) (GET_MODE_SIZE (GET_MODE (X))) + +/* Helper functions for instruction scheduling. */ + +/* Add ELEM wrapped in an INSN_LIST with reg note kind DEP_TYPE to the + LOG_LINKS of INSN, if not already there. DEP_TYPE indicates the type + of dependence that this link represents. */ + +static void +add_dependence (insn, elem, dep_type) + rtx insn; + rtx elem; + enum reg_note dep_type; +{ + rtx link, next; + + /* Don't depend an insn on itself. */ + if (insn == elem) + return; + + /* If elem is part of a sequence that must be scheduled together, then + make the dependence point to the last insn of the sequence. + When HAVE_cc0, it is possible for NOTEs to exist between users and + setters of the condition codes, so we must skip past notes here. + Otherwise, NOTEs are impossible here. */ + + next = NEXT_INSN (elem); + +#ifdef HAVE_cc0 + while (next && GET_CODE (next) == NOTE) + next = NEXT_INSN (next); +#endif + + if (next && SCHED_GROUP_P (next) + && GET_CODE (next) != CODE_LABEL) + { + /* Notes will never intervene here though, so don't bother checking + for them. */ + /* We must reject CODE_LABELs, so that we don't get confused by one + that has LABEL_PRESERVE_P set, which is represented by the same + bit in the rtl as SCHED_GROUP_P. A CODE_LABEL can never be + SCHED_GROUP_P. */ + while (NEXT_INSN (next) && SCHED_GROUP_P (NEXT_INSN (next)) + && GET_CODE (NEXT_INSN (next)) != CODE_LABEL) + next = NEXT_INSN (next); + + /* Again, don't depend an insn on itself. */ + if (insn == next) + return; + + /* Make the dependence to NEXT, the last insn of the group, instead + of the original ELEM. */ + elem = next; + } + +#ifdef INSN_SCHEDULING + /* (This code is guarded by INSN_SCHEDULING, otherwise INSN_BB is undefined.) + No need for interblock dependences with calls, since + calls are not moved between blocks. Note: the edge where + elem is a CALL is still required. */ + if (GET_CODE (insn) == CALL_INSN + && (INSN_BB (elem) != INSN_BB (insn))) + return; + +#endif + + /* Check that we don't already have this dependence. */ + for (link = LOG_LINKS (insn); link; link = XEXP (link, 1)) + if (XEXP (link, 0) == elem) + { + /* If this is a more restrictive type of dependence than the existing + one, then change the existing dependence to this type. */ + if ((int) dep_type < (int) REG_NOTE_KIND (link)) + PUT_REG_NOTE_KIND (link, dep_type); + return; + } + /* Might want to check one level of transitivity to save conses. */ + + link = rtx_alloc (INSN_LIST); + /* Insn dependency, not data dependency. */ + PUT_REG_NOTE_KIND (link, dep_type); + XEXP (link, 0) = elem; + XEXP (link, 1) = LOG_LINKS (insn); + LOG_LINKS (insn) = link; +} + +/* Remove ELEM wrapped in an INSN_LIST from the LOG_LINKS + of INSN. Abort if not found. */ + +static void +remove_dependence (insn, elem) + rtx insn; + rtx elem; +{ + rtx prev, link; + int found = 0; + + for (prev = 0, link = LOG_LINKS (insn); link; + prev = link, link = XEXP (link, 1)) + { + if (XEXP (link, 0) == elem) + { + if (prev) + XEXP (prev, 1) = XEXP (link, 1); + else + LOG_LINKS (insn) = XEXP (link, 1); + found = 1; + } + } + + if (!found) + abort (); + return; +} + +#ifndef INSN_SCHEDULING +void +schedule_insns (dump_file) + FILE *dump_file; +{ +} +#else +#ifndef __GNUC__ +#define __inline +#endif + +/* Computation of memory dependencies. */ + +/* The *_insns and *_mems are paired lists. Each pending memory operation + will have a pointer to the MEM rtx on one list and a pointer to the + containing insn on the other list in the same place in the list. */ + +/* We can't use add_dependence like the old code did, because a single insn + may have multiple memory accesses, and hence needs to be on the list + once for each memory access. Add_dependence won't let you add an insn + to a list more than once. */ + +/* An INSN_LIST containing all insns with pending read operations. */ +static rtx pending_read_insns; + +/* An EXPR_LIST containing all MEM rtx's which are pending reads. */ +static rtx pending_read_mems; + +/* An INSN_LIST containing all insns with pending write operations. */ +static rtx pending_write_insns; + +/* An EXPR_LIST containing all MEM rtx's which are pending writes. */ +static rtx pending_write_mems; + +/* Indicates the combined length of the two pending lists. We must prevent + these lists from ever growing too large since the number of dependencies + produced is at least O(N*N), and execution time is at least O(4*N*N), as + a function of the length of these pending lists. */ + +static int pending_lists_length; + +/* An INSN_LIST containing all INSN_LISTs allocated but currently unused. */ + +static rtx unused_insn_list; + +/* An EXPR_LIST containing all EXPR_LISTs allocated but currently unused. */ + +static rtx unused_expr_list; + +/* The last insn upon which all memory references must depend. + This is an insn which flushed the pending lists, creating a dependency + between it and all previously pending memory references. This creates + a barrier (or a checkpoint) which no memory reference is allowed to cross. + + This includes all non constant CALL_INSNs. When we do interprocedural + alias analysis, this restriction can be relaxed. + This may also be an INSN that writes memory if the pending lists grow + too large. */ + +static rtx last_pending_memory_flush; + +/* The last function call we have seen. All hard regs, and, of course, + the last function call, must depend on this. */ + +static rtx last_function_call; + +/* The LOG_LINKS field of this is a list of insns which use a pseudo register + that does not already cross a call. We create dependencies between each + of those insn and the next call insn, to ensure that they won't cross a call + after scheduling is done. */ + +static rtx sched_before_next_call; + +/* Pointer to the last instruction scheduled. Used by rank_for_schedule, + so that insns independent of the last scheduled insn will be preferred + over dependent instructions. */ + +static rtx last_scheduled_insn; + +/* Data structures for the computation of data dependences in a regions. We + keep one copy of each of the declared above variables for each bb in the + region. Before analyzing the data dependences for a bb, its variables + are initialized as a function of the variables of its predecessors. When + the analysis for a bb completes, we save the contents of each variable X + to a corresponding bb_X[bb] variable. For example, pending_read_insns is + copied to bb_pending_read_insns[bb]. Another change is that few + variables are now a list of insns rather than a single insn: + last_pending_memory_flash, last_function_call, reg_last_sets. The + manipulation of these variables was changed appropriately. */ + +static rtx **bb_reg_last_uses; +static rtx **bb_reg_last_sets; + +static rtx *bb_pending_read_insns; +static rtx *bb_pending_read_mems; +static rtx *bb_pending_write_insns; +static rtx *bb_pending_write_mems; +static int *bb_pending_lists_length; + +static rtx *bb_last_pending_memory_flush; +static rtx *bb_last_function_call; +static rtx *bb_sched_before_next_call; + +/* functions for construction of the control flow graph. */ + +/* Return 1 if control flow graph should not be constructed, 0 otherwise. + Estimate in nr_edges the number of edges on the graph. + We decide not to build the control flow graph if there is possibly more + than one entry to the function, or if computed branches exist. */ + +static char +is_cfg_nonregular () +{ + int b; + rtx insn; + RTX_CODE code; + + rtx nonlocal_label_list = nonlocal_label_rtx_list (); + + /* check for non local labels */ + if (nonlocal_label_list) + { + return 1; + } + + /* check for labels which cannot be deleted */ + if (forced_labels) + { + return 1; + } + + /* check for labels which probably cannot be deleted */ + if (exception_handler_labels) + { + return 1; + } + + /* check for labels referred to other thn by jumps */ + for (b = 0; b < n_basic_blocks; b++) + for (insn = basic_block_head[b];; insn = NEXT_INSN (insn)) + { + code = GET_CODE (insn); + if (GET_RTX_CLASS (code) == 'i') + { + rtx note; + + for (note = REG_NOTES (insn); note; note = XEXP (note, 1)) + if (REG_NOTE_KIND (note) == REG_LABEL) + { + return 1; + } + } + + if (insn == basic_block_end[b]) + break; + } + + nr_edges = 0; + + /* check for computed branches */ + for (b = 0; b < n_basic_blocks; b++) + { + for (insn = basic_block_head[b];; insn = NEXT_INSN (insn)) + { + + if (GET_CODE (insn) == JUMP_INSN) + { + rtx pat = PATTERN (insn); + int i; + + if (GET_CODE (pat) == PARALLEL) + { + int len = XVECLEN (pat, 0); + int has_use_labelref = 0; + + for (i = len - 1; i >= 0; i--) + if (GET_CODE (XVECEXP (pat, 0, i)) == USE + && (GET_CODE (XEXP (XVECEXP (pat, 0, i), 0)) + == LABEL_REF)) + { + nr_edges++; + has_use_labelref = 1; + } + + if (!has_use_labelref) + for (i = len - 1; i >= 0; i--) + if (GET_CODE (XVECEXP (pat, 0, i)) == SET + && SET_DEST (XVECEXP (pat, 0, i)) == pc_rtx + && uses_reg_or_mem (SET_SRC (XVECEXP (pat, 0, i)))) + { + return 1; + } + } + /* check for branch table */ + else if (GET_CODE (pat) == ADDR_VEC + || GET_CODE (pat) == ADDR_DIFF_VEC) + { + int diff_vec_p = GET_CODE (pat) == ADDR_DIFF_VEC; + int len = XVECLEN (pat, diff_vec_p); + + nr_edges += len; + } + else + { + /* check for computed branch */ + if (GET_CODE (pat) == SET + && SET_DEST (pat) == pc_rtx + && uses_reg_or_mem (SET_SRC (pat))) + { + return 1; + } + } + } + + if (insn == basic_block_end[b]) + break; + } + } + + /* count for the fallthrough edges */ + for (b = 0; b < n_basic_blocks; b++) + { + for (insn = PREV_INSN (basic_block_head[b]); + insn && GET_CODE (insn) == NOTE; insn = PREV_INSN (insn)) + ; + + if (!insn && b != 0) + nr_edges++; + else if (insn && GET_CODE (insn) != BARRIER) + nr_edges++; + } + + nr_edges++; + + return 0; +} + + +/* Returns 1 if x uses a reg or a mem (function was taken from flow.c). + x is a target of a jump. Used for the detection of computed + branches. For each label seen, updates the edges estimation + counter nr_edges. */ + +static int +uses_reg_or_mem (x) + rtx x; +{ + enum rtx_code code = GET_CODE (x); + int i, j; + char *fmt; + + if (code == REG) + return 1; + + if (code == MEM + && !(GET_CODE (XEXP (x, 0)) == SYMBOL_REF + && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)))) + return 1; + + if (code == IF_THEN_ELSE) + { + if (uses_reg_or_mem (XEXP (x, 1)) + || uses_reg_or_mem (XEXP (x, 2))) + return 1; + else + return 0; + } + + if (code == LABEL_REF) + { + nr_edges++; + + return 0; + } + + fmt = GET_RTX_FORMAT (code); + for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) + { + if (fmt[i] == 'e' + && uses_reg_or_mem (XEXP (x, i))) + return 1; + + if (fmt[i] == 'E') + for (j = 0; j < XVECLEN (x, i); j++) + if (uses_reg_or_mem (XVECEXP (x, i, j))) + return 1; + } + + return 0; +} + + +/* Print the control flow graph, for debugging purposes. + Callable from the debugger. */ + +void +debug_control_flow () +{ + int i, e, next; + + fprintf (dump, ";; --------- CONTROL FLOW GRAPH --------- \n\n"); + + for (i = 0; i < n_basic_blocks; i++) + { + fprintf (dump, ";;\tBasic block %d: first insn %d, last %d.\n", + i, + INSN_UID (basic_block_head[i]), + INSN_UID (basic_block_end[i])); + + fprintf (dump, ";;\tPredecessor blocks:"); + for (e = IN_EDGES (i); e; e = next) + { + fprintf (dump, " %d", FROM_BLOCK (e)); + + next = NEXT_IN (e); + + if (next == IN_EDGES (i)) + break; + } + + fprintf (dump, "\n;;\tSuccesor blocks:"); + for (e = OUT_EDGES (i); e; e = next) + { + fprintf (dump, " %d", TO_BLOCK (e)); + + next = NEXT_OUT (e); + + if (next == OUT_EDGES (i)) + break; + } + + fprintf (dump, " \n\n"); + + } +} + + +/* build the control flow graph. (also set nr_edges accurately) */ + +static void +build_control_flow () +{ + int i; + + nr_edges = 0; + for (i = 0; i < n_basic_blocks; i++) + { + rtx insn; + + insn = basic_block_end[i]; + if (GET_CODE (insn) == JUMP_INSN) + { + build_jmp_edges (PATTERN (insn), i); + } + + for (insn = PREV_INSN (basic_block_head[i]); + insn && GET_CODE (insn) == NOTE; insn = PREV_INSN (insn)) + ; + + /* build fallthrough edges */ + if (!insn && i != 0) + new_edge (i - 1, i); + else if (insn && GET_CODE (insn) != BARRIER) + new_edge (i - 1, i); + } + + /* increment by 1, since edge 0 is unused. */ + nr_edges++; + +} + + +/* construct edges in the control flow graph, from 'source' block, to + blocks refered to by 'pattern'. */ + +static +void +build_jmp_edges (pattern, source) + rtx pattern; + int source; +{ + register RTX_CODE code; + register int i; + register char *fmt; + + code = GET_CODE (pattern); + + if (code == LABEL_REF) + { + register rtx label = XEXP (pattern, 0); + register int target; + + /* This can happen as a result of a syntax error + and a diagnostic has already been printed. */ + if (INSN_UID (label) == 0) + return; + + target = INSN_BLOCK (label); + new_edge (source, target); + + return; + } + + /* proper handling of ADDR_DIFF_VEC: do not add a non-existing edge + from the block containing the branch-on-table, to itself. */ + if (code == ADDR_VEC + || code == ADDR_DIFF_VEC) + { + int diff_vec_p = GET_CODE (pattern) == ADDR_DIFF_VEC; + int len = XVECLEN (pattern, diff_vec_p); + int k; + + for (k = 0; k < len; k++) + { + rtx tem = XVECEXP (pattern, diff_vec_p, k); + + build_jmp_edges (tem, source); + } + } + + fmt = GET_RTX_FORMAT (code); + for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) + { + if (fmt[i] == 'e') + build_jmp_edges (XEXP (pattern, i), source); + if (fmt[i] == 'E') + { + register int j; + for (j = 0; j < XVECLEN (pattern, i); j++) + build_jmp_edges (XVECEXP (pattern, i, j), source); + } + } +} + + +/* construct an edge in the control flow graph, from 'source' to 'target'. */ + +static void +new_edge (source, target) + int source, target; +{ + int e, next_edge; + int curr_edge, fst_edge; + + /* check for duplicates */ + fst_edge = curr_edge = OUT_EDGES (source); + while (curr_edge) + { + if (FROM_BLOCK (curr_edge) == source + && TO_BLOCK (curr_edge) == target) + { + return; + } + + curr_edge = NEXT_OUT (curr_edge); + + if (fst_edge == curr_edge) + break; + } + + e = ++nr_edges; + + FROM_BLOCK (e) = source; + TO_BLOCK (e) = target; + + if (OUT_EDGES (source)) + { + next_edge = NEXT_OUT (OUT_EDGES (source)); + NEXT_OUT (OUT_EDGES (source)) = e; + NEXT_OUT (e) = next_edge; + } + else + { + OUT_EDGES (source) = e; + NEXT_OUT (e) = e; + } + + if (IN_EDGES (target)) + { + next_edge = NEXT_IN (IN_EDGES (target)); + NEXT_IN (IN_EDGES (target)) = e; + NEXT_IN (e) = next_edge; + } + else + { + IN_EDGES (target) = e; + NEXT_IN (e) = e; + } +} + + +/* BITSET macros for operations on the control flow graph. */ + +/* Compute bitwise union of two bitsets. */ +#define BITSET_UNION(set1, set2, len) \ +do { register bitset tp = set1, sp = set2; \ + register int i; \ + for (i = 0; i < len; i++) \ + *(tp++) |= *(sp++); } while (0) + +/* Compute bitwise intersection of two bitsets. */ +#define BITSET_INTER(set1, set2, len) \ +do { register bitset tp = set1, sp = set2; \ + register int i; \ + for (i = 0; i < len; i++) \ + *(tp++) &= *(sp++); } while (0) + +/* Compute bitwise difference of two bitsets. */ +#define BITSET_DIFFER(set1, set2, len) \ +do { register bitset tp = set1, sp = set2; \ + register int i; \ + for (i = 0; i < len; i++) \ + *(tp++) &= ~*(sp++); } while (0) + +/* Inverts every bit of bitset 'set' */ +#define BITSET_INVERT(set, len) \ +do { register bitset tmpset = set; \ + register int i; \ + for (i = 0; i < len; i++, tmpset++) \ + *tmpset = ~*tmpset; } while (0) + +/* Turn on the index'th bit in bitset set. */ +#define BITSET_ADD(set, index, len) \ +{ \ + if (index >= HOST_BITS_PER_WIDE_INT * len) \ + abort (); \ + else \ + set[index/HOST_BITS_PER_WIDE_INT] |= \ + 1 << (index % HOST_BITS_PER_WIDE_INT); \ +} + +/* Turn off the index'th bit in set. */ +#define BITSET_REMOVE(set, index, len) \ +{ \ + if (index >= HOST_BITS_PER_WIDE_INT * len) \ + abort (); \ + else \ + set[index/HOST_BITS_PER_WIDE_INT] &= \ + ~(1 << (index%HOST_BITS_PER_WIDE_INT)); \ +} + + +/* Check if the index'th bit in bitset set is on. */ + +static char +bitset_member (set, index, len) + bitset set; + int index, len; +{ + if (index >= HOST_BITS_PER_WIDE_INT * len) + abort (); + return (set[index / HOST_BITS_PER_WIDE_INT] & + 1 << (index % HOST_BITS_PER_WIDE_INT)) ? 1 : 0; +} + + +/* Translate a bit-set SET to a list BL of the bit-set members. */ + +static void +extract_bitlst (set, len, bl) + bitset set; + int len; + bitlst *bl; +{ + int i, j, offset; + unsigned HOST_WIDE_INT word; + + /* bblst table space is reused in each call to extract_bitlst */ + bitlst_table_last = 0; + + bl->first_member = &bitlst_table[bitlst_table_last]; + bl->nr_members = 0; + + for (i = 0; i < len; i++) + { + word = set[i]; + offset = i * HOST_BITS_PER_WIDE_INT; + for (j = 0; word; j++) + { + if (word & 1) + { + bitlst_table[bitlst_table_last++] = offset; + (bl->nr_members)++; + } + word >>= 1; + ++offset; + } + } + +} + + +/* functions for the construction of regions */ + +/* Print the regions, for debugging purposes. Callable from debugger. */ + +void +debug_regions () +{ + int rgn, bb; + + fprintf (dump, "\n;; ------------ REGIONS ----------\n\n"); + for (rgn = 0; rgn < nr_regions; rgn++) + { + fprintf (dump, ";;\trgn %d nr_blocks %d:\n", rgn, + rgn_table[rgn].rgn_nr_blocks); + fprintf (dump, ";;\tbb/block: "); + + for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++) + { + current_blocks = RGN_BLOCKS (rgn); + + if (bb != BLOCK_TO_BB (BB_TO_BLOCK (bb))) + abort (); + + fprintf (dump, " %d/%d ", bb, BB_TO_BLOCK (bb)); + } + + fprintf (dump, "\n\n"); + } +} + + +/* Build a single block region for each basic block in the function. + This allows for using the same code for interblock and basic block + scheduling. */ + +static void +find_single_block_region () +{ + int i; + + for (i = 0; i < n_basic_blocks; i++) + { + rgn_bb_table[i] = i; + RGN_NR_BLOCKS (i) = 1; + RGN_BLOCKS (i) = i; + CONTAINING_RGN (i) = i; + BLOCK_TO_BB (i) = 0; + } + nr_regions = n_basic_blocks; +} + + +/* Update number of blocks and the estimate for number of insns + in the region. Return 1 if the region is "too large" for interblock + scheduling (compile time considerations), otherwise return 0. */ + +static int +too_large (block, num_bbs, num_insns) + int block, *num_bbs, *num_insns; +{ + (*num_bbs)++; + (*num_insns) += (INSN_LUID (basic_block_end[block]) - + INSN_LUID (basic_block_head[block])); + if ((*num_bbs > max_rgn_blocks) || (*num_insns > max_rgn_insns)) + return 1; + else + return 0; +} + + +/* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk] + is still an inner loop. Put in max_hdr[blk] the header of the most inner + loop containing blk. */ +#define UPDATE_LOOP_RELATIONS(blk, hdr) \ +{ \ + if (max_hdr[blk] == -1) \ + max_hdr[blk] = hdr; \ + else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \ + inner[hdr] = 0; \ + else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \ + { \ + inner[max_hdr[blk]] = 0; \ + max_hdr[blk] = hdr; \ + } \ +} + + +/* Find regions for interblock scheduling: a loop-free procedure, a reducible + inner loop, or a basic block not contained in any other region. + The procedures control flow graph is traversed twice. + First traversal, a DFS, finds the headers of inner loops in the graph, + and verifies that there are no unreacable blocks. + Second traversal processes headers of inner loops, checking that the + loop is reducible. The loop blocks that form a region are put into the + region's blocks list in topological order. + + The following variables are changed by the function: rgn_nr, rgn_table, + rgn_bb_table, block_to_bb and containing_rgn. */ + +static void +find_rgns () +{ + int *max_hdr, *dfs_nr, *stack, *queue, *degree; + char *header, *inner, *passed, *in_stack, *in_queue, no_loops = 1; + int node, child, loop_head, i, j, fst_edge, head, tail; + int count = 0, sp, idx = 0, current_edge = out_edges[0]; + int num_bbs, num_insns; + int too_large_failure; + char *reachable; + + /* + The following data structures are computed by the first traversal and + are used by the second traversal: + header[i] - flag set if the block i is the header of a loop. + inner[i] - initially set. It is reset if the the block i is the header + of a non-inner loop. + max_hdr[i] - the header of the inner loop containing block i. + (for a block i not in an inner loop it may be -1 or the + header of the most inner loop containing the block). + + These data structures are used by the first traversal only: + stack - non-recursive DFS implementation which uses a stack of edges. + sp - top of the stack of edges + dfs_nr[i] - the DFS ordering of block i. + in_stack[i] - flag set if the block i is in the DFS stack. + + These data structures are used by the second traversal only: + queue - queue containing the blocks of the current region. + head and tail - queue boundaries. + in_queue[i] - flag set if the block i is in queue */ + + /* function's inner arrays allocation and initialization */ + max_hdr = (int *) alloca (n_basic_blocks * sizeof (int)); + dfs_nr = (int *) alloca (n_basic_blocks * sizeof (int)); + bzero ((int *) dfs_nr, n_basic_blocks * sizeof (int)); + stack = (int *) alloca (nr_edges * sizeof (int)); + queue = (int *) alloca (n_basic_blocks * sizeof (int)); + + inner = (char *) alloca (n_basic_blocks * sizeof (char)); + header = (char *) alloca (n_basic_blocks * sizeof (char)); + bzero ((char *) header, n_basic_blocks * sizeof (char)); + passed = (char *) alloca (nr_edges * sizeof (char)); + bzero ((char *) passed, nr_edges * sizeof (char)); + in_stack = (char *) alloca (nr_edges * sizeof (char)); + bzero ((char *) in_stack, nr_edges * sizeof (char)); + reachable = (char *) alloca (n_basic_blocks * sizeof (char)); + bzero ((char *) reachable, n_basic_blocks * sizeof (char)); + + in_queue = (char *) alloca (n_basic_blocks * sizeof (char)); + + for (i = 0; i < n_basic_blocks; i++) + { + inner[i] = 1; + max_hdr[i] = -1; + } + + /* First traversal: DFS, finds inner loops in control flow graph */ + + reachable[0] = 1; + sp = -1; + while (1) + { + if (current_edge == 0 || passed[current_edge]) + { + /* Here, if current_edge < 0, this is a leaf block. + Otherwise current_edge was already passed. Note that in + the latter case, not only current_edge but also all its + NEXT_OUT edges are also passed. We have to "climb up on + edges in the stack", looking for the first (already + passed) edge whose NEXT_OUT was not passed yet. */ + + while (sp >= 0 && (current_edge == 0 || passed[current_edge])) + { + current_edge = stack[sp--]; + node = FROM_BLOCK (current_edge); + child = TO_BLOCK (current_edge); + in_stack[child] = 0; + if (max_hdr[child] >= 0 && in_stack[max_hdr[child]]) + UPDATE_LOOP_RELATIONS (node, max_hdr[child]); + current_edge = NEXT_OUT (current_edge); + } + + /* stack empty - the whole graph is traversed. */ + if (sp < 0 && passed[current_edge]) + break; + continue; + } + + node = FROM_BLOCK (current_edge); + dfs_nr[node] = ++count; + in_stack[node] = 1; + child = TO_BLOCK (current_edge); + reachable[child] = 1; + + /* found a loop header */ + if (in_stack[child]) + { + no_loops = 0; + header[child] = 1; + max_hdr[child] = child; + UPDATE_LOOP_RELATIONS (node, child); + passed[current_edge] = 1; + current_edge = NEXT_OUT (current_edge); + continue; + } + + /* the child was already visited once, no need to go down from + it, everything is traversed there. */ + if (dfs_nr[child]) + { + if (max_hdr[child] >= 0 && in_stack[max_hdr[child]]) + UPDATE_LOOP_RELATIONS (node, max_hdr[child]); + passed[current_edge] = 1; + current_edge = NEXT_OUT (current_edge); + continue; + } + + /* this is a step down in the dfs traversal */ + stack[++sp] = current_edge; + passed[current_edge] = 1; + current_edge = OUT_EDGES (child); + } /* while (1); */ + + /* if there are unreachable blocks, or more than one entry to + the subroutine, give up on interblock scheduling */ + for (i = 1; i < n_basic_blocks; i++) + { + if (reachable[i] == 0) + { + find_single_block_region (); + if (sched_verbose >= 3) + fprintf (stderr, "sched: warning: found an unreachable block %d \n", i); + return; + } + } + + /* Second travsersal: find reducible inner loops, and sort + topologically the blocks of each region */ + degree = dfs_nr; /* reuse dfs_nr array - it is not needed anymore */ + bzero ((char *) in_queue, n_basic_blocks * sizeof (char)); + + if (no_loops) + header[0] = 1; + + /* compute the in-degree of every block in the graph */ + for (i = 0; i < n_basic_blocks; i++) + { + fst_edge = IN_EDGES (i); + if (fst_edge > 0) + { + degree[i] = 1; + current_edge = NEXT_IN (fst_edge); + while (fst_edge != current_edge) + { + ++degree[i]; + current_edge = NEXT_IN (current_edge); + } + } + else + degree[i] = 0; + } + + /* pass through all graph blocks, looking for headers of inner loops */ + for (i = 0; i < n_basic_blocks; i++) + { + + if (header[i] && inner[i]) + { + + /* i is a header of a potentially reducible inner loop, or + block 0 in a subroutine with no loops at all */ + head = tail = -1; + too_large_failure = 0; + loop_head = max_hdr[i]; + + /* decrease in_degree of all i's successors, (this is needed + for the topological ordering) */ + fst_edge = current_edge = OUT_EDGES (i); + if (fst_edge > 0) + { + do + { + --degree[TO_BLOCK (current_edge)]; + current_edge = NEXT_OUT (current_edge); + } + while (fst_edge != current_edge); + } + + /* estimate # insns, and count # blocks in the region. */ + num_bbs = 1; + num_insns = INSN_LUID (basic_block_end[i]) - INSN_LUID (basic_block_head[i]); + + + /* find all loop latches, if it is a true loop header, or + all leaves if the graph has no loops at all */ + if (no_loops) + { + for (j = 0; j < n_basic_blocks; j++) + if (out_edges[j] == 0) /* a leaf */ + { + queue[++tail] = j; + in_queue[j] = 1; + + if (too_large (j, &num_bbs, &num_insns)) + { + too_large_failure = 1; + break; + } + } + } + else + { + fst_edge = current_edge = IN_EDGES (i); + do + { + node = FROM_BLOCK (current_edge); + if (max_hdr[node] == loop_head && node != i) /* a latch */ + { + queue[++tail] = node; + in_queue[node] = 1; + + if (too_large (node, &num_bbs, &num_insns)) + { + too_large_failure = 1; + break; + } + } + current_edge = NEXT_IN (current_edge); + } + while (fst_edge != current_edge); + } + + /* Put in queue[] all blocks that belong to the loop. Check + that the loop is reducible, traversing back from the loop + latches up to the loop header. */ + while (head < tail && !too_large_failure) + { + child = queue[++head]; + fst_edge = current_edge = IN_EDGES (child); + do + { + node = FROM_BLOCK (current_edge); + + if (max_hdr[node] != loop_head) + { /* another entry to loop, it is irreducible */ + tail = -1; + break; + } + else if (!in_queue[node] && node != i) + { + queue[++tail] = node; + in_queue[node] = 1; + + if (too_large (node, &num_bbs, &num_insns)) + { + too_large_failure = 1; + break; + } + } + current_edge = NEXT_IN (current_edge); + } + while (fst_edge != current_edge); + } + + if (tail >= 0 && !too_large_failure) + { + /* Place the loop header into list of region blocks */ + degree[i] = -1; + rgn_bb_table[idx] = i; + RGN_NR_BLOCKS (nr_regions) = num_bbs; + RGN_BLOCKS (nr_regions) = idx++; + CONTAINING_RGN (i) = nr_regions; + BLOCK_TO_BB (i) = count = 0; + + /* remove blocks from queue[], (in topological order), when + their in_degree becomes 0. We scan the queue over and + over again until it is empty. Note: there may be a more + efficient way to do it. */ + while (tail >= 0) + { + if (head < 0) + head = tail; + child = queue[head]; + if (degree[child] == 0) + { + degree[child] = -1; + rgn_bb_table[idx++] = child; + BLOCK_TO_BB (child) = ++count; + CONTAINING_RGN (child) = nr_regions; + queue[head] = queue[tail--]; + fst_edge = current_edge = OUT_EDGES (child); + + if (fst_edge > 0) + { + do + { + --degree[TO_BLOCK (current_edge)]; + current_edge = NEXT_OUT (current_edge); + } + while (fst_edge != current_edge); + } + } + else + --head; + } + ++nr_regions; + } + } + } + + /* define each of all other blocks as a region itself */ + for (i = 0; i < n_basic_blocks; i++) + if (degree[i] >= 0) + { + rgn_bb_table[idx] = i; + RGN_NR_BLOCKS (nr_regions) = 1; + RGN_BLOCKS (nr_regions) = idx++; + CONTAINING_RGN (i) = nr_regions++; + BLOCK_TO_BB (i) = 0; + } + +} /* find_rgns */ + + +/* functions for regions scheduling information */ + +/* Compute dominators, probability, and potential-split-edges of bb. + Assume that these values were already computed for bb's predecessors. */ + +static void +compute_dom_prob_ps (bb) + int bb; +{ + int nxt_in_edge, fst_in_edge, pred; + int fst_out_edge, nxt_out_edge, nr_out_edges, nr_rgn_out_edges; + + prob[bb] = 0.0; + if (IS_RGN_ENTRY (bb)) + { + BITSET_ADD (dom[bb], 0, bbset_size); + prob[bb] = 1.0; + return; + } + + fst_in_edge = nxt_in_edge = IN_EDGES (BB_TO_BLOCK (bb)); + + /* intialize dom[bb] to '111..1' */ + BITSET_INVERT (dom[bb], bbset_size); + + do + { + pred = FROM_BLOCK (nxt_in_edge); + BITSET_INTER (dom[bb], dom[BLOCK_TO_BB (pred)], bbset_size); + + BITSET_UNION (ancestor_edges[bb], ancestor_edges[BLOCK_TO_BB (pred)], + edgeset_size); + + BITSET_ADD (ancestor_edges[bb], EDGE_TO_BIT (nxt_in_edge), edgeset_size); + + nr_out_edges = 1; + nr_rgn_out_edges = 0; + fst_out_edge = OUT_EDGES (pred); + nxt_out_edge = NEXT_OUT (fst_out_edge); + BITSET_UNION (pot_split[bb], pot_split[BLOCK_TO_BB (pred)], + edgeset_size); + + BITSET_ADD (pot_split[bb], EDGE_TO_BIT (fst_out_edge), edgeset_size); + + /* the successor doesn't belong the region? */ + if (CONTAINING_RGN (TO_BLOCK (fst_out_edge)) != + CONTAINING_RGN (BB_TO_BLOCK (bb))) + ++nr_rgn_out_edges; + + while (fst_out_edge != nxt_out_edge) + { + ++nr_out_edges; + /* the successor doesn't belong the region? */ + if (CONTAINING_RGN (TO_BLOCK (nxt_out_edge)) != + CONTAINING_RGN (BB_TO_BLOCK (bb))) + ++nr_rgn_out_edges; + BITSET_ADD (pot_split[bb], EDGE_TO_BIT (nxt_out_edge), edgeset_size); + nxt_out_edge = NEXT_OUT (nxt_out_edge); + + } + + /* now nr_rgn_out_edges is the number of region-exit edges from pred, + and nr_out_edges will be the number of pred out edges not leaving + the region. */ + nr_out_edges -= nr_rgn_out_edges; + if (nr_rgn_out_edges > 0) + prob[bb] += 0.9 * prob[BLOCK_TO_BB (pred)] / nr_out_edges; + else + prob[bb] += prob[BLOCK_TO_BB (pred)] / nr_out_edges; + nxt_in_edge = NEXT_IN (nxt_in_edge); + } + while (fst_in_edge != nxt_in_edge); + + BITSET_ADD (dom[bb], bb, bbset_size); + BITSET_DIFFER (pot_split[bb], ancestor_edges[bb], edgeset_size); + + if (sched_verbose >= 2) + fprintf (dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb), (int) (100.0 * prob[bb])); +} /* compute_dom_prob_ps */ + +/* functions for target info */ + +/* Compute in BL the list of split-edges of bb_src relatively to bb_trg. + Note that bb_trg dominates bb_src. */ + +static void +split_edges (bb_src, bb_trg, bl) + int bb_src; + int bb_trg; + edgelst *bl; +{ + int es = edgeset_size; + edgeset src = (edgeset) alloca (es * sizeof (HOST_WIDE_INT)); + + while (es--) + src[es] = (pot_split[bb_src])[es]; + BITSET_DIFFER (src, pot_split[bb_trg], edgeset_size); + extract_bitlst (src, edgeset_size, bl); +} + + +/* Find the valid candidate-source-blocks for the target block TRG, compute + their probability, and check if they are speculative or not. + For speculative sources, compute their update-blocks and split-blocks. */ + +static void +compute_trg_info (trg) + int trg; +{ + register candidate *sp; + edgelst el; + int check_block, update_idx; + int i, j, k, fst_edge, nxt_edge; + + /* define some of the fields for the target bb as well */ + sp = candidate_table + trg; + sp->is_valid = 1; + sp->is_speculative = 0; + sp->src_prob = 100; + + for (i = trg + 1; i < current_nr_blocks; i++) + { + sp = candidate_table + i; + + sp->is_valid = IS_DOMINATED (i, trg); + if (sp->is_valid) + { + sp->src_prob = GET_SRC_PROB (i, trg); + sp->is_valid = (sp->src_prob >= MIN_PROBABILITY); + } + + if (sp->is_valid) + { + split_edges (i, trg, &el); + sp->is_speculative = (el.nr_members) ? 1 : 0; + if (sp->is_speculative && !flag_schedule_speculative) + sp->is_valid = 0; + } + + if (sp->is_valid) + { + sp->split_bbs.first_member = &bblst_table[bblst_last]; + sp->split_bbs.nr_members = el.nr_members; + for (j = 0; j < el.nr_members; bblst_last++, j++) + bblst_table[bblst_last] = + TO_BLOCK (rgn_edges[el.first_member[j]]); + sp->update_bbs.first_member = &bblst_table[bblst_last]; + update_idx = 0; + for (j = 0; j < el.nr_members; j++) + { + check_block = FROM_BLOCK (rgn_edges[el.first_member[j]]); + fst_edge = nxt_edge = OUT_EDGES (check_block); + do + { + for (k = 0; k < el.nr_members; k++) + if (EDGE_TO_BIT (nxt_edge) == el.first_member[k]) + break; + + if (k >= el.nr_members) + { + bblst_table[bblst_last++] = TO_BLOCK (nxt_edge); + update_idx++; + } + + nxt_edge = NEXT_OUT (nxt_edge); + } + while (fst_edge != nxt_edge); + } + sp->update_bbs.nr_members = update_idx; + + } + else + { + sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0; + + sp->is_speculative = 0; + sp->src_prob = 0; + } + } +} /* compute_trg_info */ + + +/* Print candidates info, for debugging purposes. Callable from debugger. */ + +void +debug_candidate (i) + int i; +{ + if (!candidate_table[i].is_valid) + return; + + if (candidate_table[i].is_speculative) + { + int j; + fprintf (dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i); + + fprintf (dump, "split path: "); + for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++) + { + int b = candidate_table[i].split_bbs.first_member[j]; + + fprintf (dump, " %d ", b); + } + fprintf (dump, "\n"); + + fprintf (dump, "update path: "); + for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++) + { + int b = candidate_table[i].update_bbs.first_member[j]; + + fprintf (dump, " %d ", b); + } + fprintf (dump, "\n"); + } + else + { + fprintf (dump, " src %d equivalent\n", BB_TO_BLOCK (i)); + } +} + + +/* Print candidates info, for debugging purposes. Callable from debugger. */ + +void +debug_candidates (trg) + int trg; +{ + int i; + + fprintf (dump, "----------- candidate table: target: b=%d bb=%d ---\n", + BB_TO_BLOCK (trg), trg); + for (i = trg + 1; i < current_nr_blocks; i++) + debug_candidate (i); +} + + +/* functions for speculative scheduing */ + +/* Return 0 if x is a set of a register alive in the beginning of one + of the split-blocks of src, otherwise return 1. */ + +static int +check_live_1 (src, x) + int src; + rtx x; +{ + register i; + register int regno; + register rtx reg = SET_DEST (x); + + if (reg == 0) + return 1; + + while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT + || GET_CODE (reg) == SIGN_EXTRACT + || GET_CODE (reg) == STRICT_LOW_PART) + reg = XEXP (reg, 0); + + if (GET_CODE (reg) != REG) + return 1; + + regno = REGNO (reg); + + if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno]) + { + /* Global registers are assumed live */ + return 0; + } + else + { + if (regno < FIRST_PSEUDO_REGISTER) + { + /* check for hard registers */ + int j = HARD_REGNO_NREGS (regno, GET_MODE (reg)); + while (--j >= 0) + { + for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++) + { + int b = candidate_table[src].split_bbs.first_member[i]; + + if (REGNO_REG_SET_P (basic_block_live_at_start[b], regno + j)) + { + return 0; + } + } + } + } + else + { + /* check for psuedo registers */ + for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++) + { + int b = candidate_table[src].split_bbs.first_member[i]; + + if (REGNO_REG_SET_P (basic_block_live_at_start[b], regno)) + { + return 0; + } + } + } + } + + return 1; +} + + +/* If x is a set of a register R, mark that R is alive in the beginning + of every update-block of src. */ + +static void +update_live_1 (src, x) + int src; + rtx x; +{ + register i; + register int regno; + register rtx reg = SET_DEST (x); + + if (reg == 0) + return; + + while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT + || GET_CODE (reg) == SIGN_EXTRACT + || GET_CODE (reg) == STRICT_LOW_PART) + reg = XEXP (reg, 0); + + if (GET_CODE (reg) != REG) + return; + + /* Global registers are always live, so the code below does not apply + to them. */ + + regno = REGNO (reg); + + if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno]) + { + if (regno < FIRST_PSEUDO_REGISTER) + { + int j = HARD_REGNO_NREGS (regno, GET_MODE (reg)); + while (--j >= 0) + { + for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++) + { + int b = candidate_table[src].update_bbs.first_member[i]; + + SET_REGNO_REG_SET (basic_block_live_at_start[b], regno + j); + } + } + } + else + { + for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++) + { + int b = candidate_table[src].update_bbs.first_member[i]; + + SET_REGNO_REG_SET (basic_block_live_at_start[b], regno); + } + } + } +} + + +/* Return 1 if insn can be speculatively moved from block src to trg, + otherwise return 0. Called before first insertion of insn to + ready-list or before the scheduling. */ + +static int +check_live (insn, src, trg) + rtx insn; + int src; + int trg; +{ + /* find the registers set by instruction */ + if (GET_CODE (PATTERN (insn)) == SET + || GET_CODE (PATTERN (insn)) == CLOBBER) + return check_live_1 (src, PATTERN (insn)); + else if (GET_CODE (PATTERN (insn)) == PARALLEL) + { + int j; + for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--) + if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET + || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER) + && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j))) + return 0; + + return 1; + } + + return 1; +} + + +/* Update the live registers info after insn was moved speculatively from + block src to trg. */ + +static void +update_live (insn, src, trg) + rtx insn; + int src, trg; +{ + /* find the registers set by instruction */ + if (GET_CODE (PATTERN (insn)) == SET + || GET_CODE (PATTERN (insn)) == CLOBBER) + update_live_1 (src, PATTERN (insn)); + else if (GET_CODE (PATTERN (insn)) == PARALLEL) + { + int j; + for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--) + if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET + || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER) + update_live_1 (src, XVECEXP (PATTERN (insn), 0, j)); + } +} + +/* Exception Free Loads: + + We define five classes of speculative loads: IFREE, IRISKY, + PFREE, PRISKY, and MFREE. + + IFREE loads are loads that are proved to be exception-free, just + by examining the load insn. Examples for such loads are loads + from TOC and loads of global data. + + IRISKY loads are loads that are proved to be exception-risky, + just by examining the load insn. Examples for such loads are + volatile loads and loads from shared memory. + + PFREE loads are loads for which we can prove, by examining other + insns, that they are exception-free. Currently, this class consists + of loads for which we are able to find a "similar load", either in + the target block, or, if only one split-block exists, in that split + block. Load2 is similar to load1 if both have same single base + register. We identify only part of the similar loads, by finding + an insn upon which both load1 and load2 have a DEF-USE dependence. + + PRISKY loads are loads for which we can prove, by examining other + insns, that they are exception-risky. Currently we have two proofs for + such loads. The first proof detects loads that are probably guarded by a + test on the memory address. This proof is based on the + backward and forward data dependence information for the region. + Let load-insn be the examined load. + Load-insn is PRISKY iff ALL the following hold: + + - insn1 is not in the same block as load-insn + - there is a DEF-USE dependence chain (insn1, ..., load-insn) + - test-insn is either a compare or a branch, not in the same block as load-insn + - load-insn is reachable from test-insn + - there is a DEF-USE dependence chain (insn1, ..., test-insn) + + This proof might fail when the compare and the load are fed + by an insn not in the region. To solve this, we will add to this + group all loads that have no input DEF-USE dependence. + + The second proof detects loads that are directly or indirectly + fed by a speculative load. This proof is affected by the + scheduling process. We will use the flag fed_by_spec_load. + Initially, all insns have this flag reset. After a speculative + motion of an insn, if insn is either a load, or marked as + fed_by_spec_load, we will also mark as fed_by_spec_load every + insn1 for which a DEF-USE dependence (insn, insn1) exists. A + load which is fed_by_spec_load is also PRISKY. + + MFREE (maybe-free) loads are all the remaining loads. They may be + exception-free, but we cannot prove it. + + Now, all loads in IFREE and PFREE classes are considered + exception-free, while all loads in IRISKY and PRISKY classes are + considered exception-risky. As for loads in the MFREE class, + these are considered either exception-free or exception-risky, + depending on whether we are pessimistic or optimistic. We have + to take the pessimistic approach to assure the safety of + speculative scheduling, but we can take the optimistic approach + by invoking the -fsched_spec_load_dangerous option. */ + +enum INSN_TRAP_CLASS +{ + TRAP_FREE = 0, IFREE = 1, PFREE_CANDIDATE = 2, + PRISKY_CANDIDATE = 3, IRISKY = 4, TRAP_RISKY = 5 +}; + +#define WORST_CLASS(class1, class2) \ +((class1 > class2) ? class1 : class2) + +/* Indexed by INSN_UID, and set if there's DEF-USE dependence between */ +/* some speculatively moved load insn and this one. */ +char *fed_by_spec_load; +char *is_load_insn; + +/* Non-zero if block bb_to is equal to, or reachable from block bb_from. */ +#define IS_REACHABLE(bb_from, bb_to) \ +(bb_from == bb_to \ + || IS_RGN_ENTRY (bb_from) \ + || (bitset_member (ancestor_edges[bb_to], \ + EDGE_TO_BIT (IN_EDGES (BB_TO_BLOCK (bb_from))), \ + edgeset_size))) +#define FED_BY_SPEC_LOAD(insn) (fed_by_spec_load[INSN_UID (insn)]) +#define IS_LOAD_INSN(insn) (is_load_insn[INSN_UID (insn)]) + +/* Non-zero iff the address is comprised from at most 1 register */ +#define CONST_BASED_ADDRESS_P(x) \ + (GET_CODE (x) == REG \ + || ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS \ + || (GET_CODE (x) == LO_SUM)) \ + && (GET_CODE (XEXP (x, 0)) == CONST_INT \ + || GET_CODE (XEXP (x, 1)) == CONST_INT))) + +/* Turns on the fed_by_spec_load flag for insns fed by load_insn. */ + +static void +set_spec_fed (load_insn) + rtx load_insn; +{ + rtx link; + + for (link = INSN_DEPEND (load_insn); link; link = XEXP (link, 1)) + if (GET_MODE (link) == VOIDmode) + FED_BY_SPEC_LOAD (XEXP (link, 0)) = 1; +} /* set_spec_fed */ + +/* On the path from the insn to load_insn_bb, find a conditional branch */ +/* depending on insn, that guards the speculative load. */ + +static int +find_conditional_protection (insn, load_insn_bb) + rtx insn; + int load_insn_bb; +{ + rtx link; + + /* iterate through DEF-USE forward dependences */ + for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1)) + { + rtx next = XEXP (link, 0); + if ((CONTAINING_RGN (INSN_BLOCK (next)) == + CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb))) + && IS_REACHABLE (INSN_BB (next), load_insn_bb) + && load_insn_bb != INSN_BB (next) + && GET_MODE (link) == VOIDmode + && (GET_CODE (next) == JUMP_INSN + || find_conditional_protection (next, load_insn_bb))) + return 1; + } + return 0; +} /* find_conditional_protection */ + +/* Returns 1 if the same insn1 that participates in the computation + of load_insn's address is feeding a conditional branch that is + guarding on load_insn. This is true if we find a the two DEF-USE + chains: + insn1 -> ... -> conditional-branch + insn1 -> ... -> load_insn, + and if a flow path exist: + insn1 -> ... -> conditional-branch -> ... -> load_insn, + and if insn1 is on the path + region-entry -> ... -> bb_trg -> ... load_insn. + + Locate insn1 by climbing on LOG_LINKS from load_insn. + Locate the branch by following INSN_DEPEND from insn1. */ + +static int +is_conditionally_protected (load_insn, bb_src, bb_trg) + rtx load_insn; + int bb_src, bb_trg; +{ + rtx link; + + for (link = LOG_LINKS (load_insn); link; link = XEXP (link, 1)) + { + rtx insn1 = XEXP (link, 0); + + /* must be a DEF-USE dependence upon non-branch */ + if (GET_MODE (link) != VOIDmode + || GET_CODE (insn1) == JUMP_INSN) + continue; + + /* must exist a path: region-entry -> ... -> bb_trg -> ... load_insn */ + if (INSN_BB (insn1) == bb_src + || (CONTAINING_RGN (INSN_BLOCK (insn1)) + != CONTAINING_RGN (BB_TO_BLOCK (bb_src))) + || (!IS_REACHABLE (bb_trg, INSN_BB (insn1)) + && !IS_REACHABLE (INSN_BB (insn1), bb_trg))) + continue; + + /* now search for the conditional-branch */ + if (find_conditional_protection (insn1, bb_src)) + return 1; + + /* recursive step: search another insn1, "above" current insn1. */ + return is_conditionally_protected (insn1, bb_src, bb_trg); + } + + /* the chain does not exsist */ + return 0; +} /* is_conditionally_protected */ + +/* Returns 1 if a clue for "similar load" 'insn2' is found, and hence + load_insn can move speculatively from bb_src to bb_trg. All the + following must hold: + + (1) both loads have 1 base register (PFREE_CANDIDATEs). + (2) load_insn and load1 have a def-use dependence upon + the same insn 'insn1'. + (3) either load2 is in bb_trg, or: + - there's only one split-block, and + - load1 is on the escape path, and + + From all these we can conclude that the two loads access memory + addresses that differ at most by a constant, and hence if moving + load_insn would cause an exception, it would have been caused by + load2 anyhow. */ + +static int +is_pfree (load_insn, bb_src, bb_trg) + rtx load_insn; + int bb_src, bb_trg; +{ + rtx back_link; + register candidate *candp = candidate_table + bb_src; + + if (candp->split_bbs.nr_members != 1) + /* must have exactly one escape block */ + return 0; + + for (back_link = LOG_LINKS (load_insn); + back_link; back_link = XEXP (back_link, 1)) + { + rtx insn1 = XEXP (back_link, 0); + + if (GET_MODE (back_link) == VOIDmode) + { + /* found a DEF-USE dependence (insn1, load_insn) */ + rtx fore_link; + + for (fore_link = INSN_DEPEND (insn1); + fore_link; fore_link = XEXP (fore_link, 1)) + { + rtx insn2 = XEXP (fore_link, 0); + if (GET_MODE (fore_link) == VOIDmode) + { + /* found a DEF-USE dependence (insn1, insn2) */ + if (classify_insn (insn2) != PFREE_CANDIDATE) + /* insn2 not guaranteed to be a 1 base reg load */ + continue; + + if (INSN_BB (insn2) == bb_trg) + /* insn2 is the similar load, in the target block */ + return 1; + + if (*(candp->split_bbs.first_member) == INSN_BLOCK (insn2)) + /* insn2 is a similar load, in a split-block */ + return 1; + } + } + } + } + + /* couldn't find a similar load */ + return 0; +} /* is_pfree */ + +/* Returns a class that insn with GET_DEST(insn)=x may belong to, + as found by analyzing insn's expression. */ + +static int +may_trap_exp (x, is_store) + rtx x; + int is_store; +{ + enum rtx_code code; + + if (x == 0) + return TRAP_FREE; + code = GET_CODE (x); + if (is_store) + { + if (code == MEM) + return TRAP_RISKY; + else + return TRAP_FREE; + } + if (code == MEM) + { + /* The insn uses memory */ + /* a volatile load */ + if (MEM_VOLATILE_P (x)) + return IRISKY; + /* an exception-free load */ + if (!may_trap_p (x)) + return IFREE; + /* a load with 1 base register, to be further checked */ + if (CONST_BASED_ADDRESS_P (XEXP (x, 0))) + return PFREE_CANDIDATE; + /* no info on the load, to be further checked */ + return PRISKY_CANDIDATE; + } + else + { + char *fmt; + int i, insn_class = TRAP_FREE; + + /* neither store nor load, check if it may cause a trap */ + if (may_trap_p (x)) + return TRAP_RISKY; + /* recursive step: walk the insn... */ + fmt = GET_RTX_FORMAT (code); + for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) + { + if (fmt[i] == 'e') + { + int tmp_class = may_trap_exp (XEXP (x, i), is_store); + insn_class = WORST_CLASS (insn_class, tmp_class); + } + else if (fmt[i] == 'E') + { + int j; + for (j = 0; j < XVECLEN (x, i); j++) + { + int tmp_class = may_trap_exp (XVECEXP (x, i, j), is_store); + insn_class = WORST_CLASS (insn_class, tmp_class); + if (insn_class == TRAP_RISKY || insn_class == IRISKY) + break; + } + } + if (insn_class == TRAP_RISKY || insn_class == IRISKY) + break; + } + return insn_class; + } +} /* may_trap_exp */ + + +/* Classifies insn for the purpose of verifying that it can be + moved speculatively, by examining it's patterns, returning: + TRAP_RISKY: store, or risky non-load insn (e.g. division by variable). + TRAP_FREE: non-load insn. + IFREE: load from a globaly safe location. + IRISKY: volatile load. + PFREE_CANDIDATE, PRISKY_CANDIDATE: load that need to be checked for + being either PFREE or PRISKY. */ + +static int +classify_insn (insn) + rtx insn; +{ + rtx pat = PATTERN (insn); + int tmp_class = TRAP_FREE; + int insn_class = TRAP_FREE; + enum rtx_code code; + + if (GET_CODE (pat) == PARALLEL) + { + int i, len = XVECLEN (pat, 0); + + for (i = len - 1; i >= 0; i--) + { + code = GET_CODE (XVECEXP (pat, 0, i)); + switch (code) + { + case CLOBBER: + /* test if it is a 'store' */ + tmp_class = may_trap_exp (XEXP (XVECEXP (pat, 0, i), 0), 1); + break; + case SET: + /* test if it is a store */ + tmp_class = may_trap_exp (SET_DEST (XVECEXP (pat, 0, i)), 1); + if (tmp_class == TRAP_RISKY) + break; + /* test if it is a load */ + tmp_class = + WORST_CLASS (tmp_class, + may_trap_exp (SET_SRC (XVECEXP (pat, 0, i)), 0)); + default:; + } + insn_class = WORST_CLASS (insn_class, tmp_class); + if (insn_class == TRAP_RISKY || insn_class == IRISKY) + break; + } + } + else + { + code = GET_CODE (pat); + switch (code) + { + case CLOBBER: + /* test if it is a 'store' */ + tmp_class = may_trap_exp (XEXP (pat, 0), 1); + break; + case SET: + /* test if it is a store */ + tmp_class = may_trap_exp (SET_DEST (pat), 1); + if (tmp_class == TRAP_RISKY) + break; + /* test if it is a load */ + tmp_class = + WORST_CLASS (tmp_class, + may_trap_exp (SET_SRC (pat), 0)); + default:; + } + insn_class = tmp_class; + } + + return insn_class; + +} /* classify_insn */ + +/* Return 1 if load_insn is prisky (i.e. if load_insn is fed by + a load moved speculatively, or if load_insn is protected by + a compare on load_insn's address). */ + +static int +is_prisky (load_insn, bb_src, bb_trg) + rtx load_insn; + int bb_src, bb_trg; +{ + if (FED_BY_SPEC_LOAD (load_insn)) + return 1; + + if (LOG_LINKS (load_insn) == NULL) + /* dependence may 'hide' out of the region. */ + return 1; + + if (is_conditionally_protected (load_insn, bb_src, bb_trg)) + return 1; + + return 0; +} /* is_prisky */ + +/* Insn is a candidate to be moved speculatively from bb_src to bb_trg. + Return 1 if insn is exception-free (and the motion is valid) + and 0 otherwise. */ + +static int +is_exception_free (insn, bb_src, bb_trg) + rtx insn; + int bb_src, bb_trg; +{ + int insn_class = classify_insn (insn); + + /* handle non-load insns */ + switch (insn_class) + { + case TRAP_FREE: + return 1; + case TRAP_RISKY: + return 0; + default:; + } + + /* handle loads */ + if (!flag_schedule_speculative_load) + return 0; + IS_LOAD_INSN (insn) = 1; + switch (insn_class) + { + case IFREE: + return (1); + case IRISKY: + return 0; + case PFREE_CANDIDATE: + if (is_pfree (insn, bb_src, bb_trg)) + return 1; + /* don't 'break' here: PFREE-candidate is also PRISKY-candidate */ + case PRISKY_CANDIDATE: + if (!flag_schedule_speculative_load_dangerous + || is_prisky (insn, bb_src, bb_trg)) + return 0; + break; + default:; + } + + return flag_schedule_speculative_load_dangerous; +} /* is_exception_free */ + + +/* Process an insn's memory dependencies. There are four kinds of + dependencies: + + (0) read dependence: read follows read + (1) true dependence: read follows write + (2) anti dependence: write follows read + (3) output dependence: write follows write + + We are careful to build only dependencies which actually exist, and + use transitivity to avoid building too many links. */ + +/* Return the INSN_LIST containing INSN in LIST, or NULL + if LIST does not contain INSN. */ + +__inline static rtx +find_insn_list (insn, list) + rtx insn; + rtx list; +{ + while (list) + { + if (XEXP (list, 0) == insn) + return list; + list = XEXP (list, 1); + } + return 0; +} + + +/* Return 1 if the pair (insn, x) is found in (LIST, LIST1), or 0 otherwise. */ + +__inline static char +find_insn_mem_list (insn, x, list, list1) + rtx insn, x; + rtx list, list1; +{ + while (list) + { + if (XEXP (list, 0) == insn + && XEXP (list1, 0) == x) + return 1; + list = XEXP (list, 1); + list1 = XEXP (list1, 1); + } + return 0; +} + + +/* Compute the function units used by INSN. This caches the value + returned by function_units_used. A function unit is encoded as the + unit number if the value is non-negative and the compliment of a + mask if the value is negative. A function unit index is the + non-negative encoding. */ + +__inline static int +insn_unit (insn) + rtx insn; +{ + register int unit = INSN_UNIT (insn); + + if (unit == 0) + { + recog_memoized (insn); + + /* A USE insn, or something else we don't need to understand. + We can't pass these directly to function_units_used because it will + trigger a fatal error for unrecognizable insns. */ + if (INSN_CODE (insn) < 0) + unit = -1; + else + { + unit = function_units_used (insn); + /* Increment non-negative values so we can cache zero. */ + if (unit >= 0) + unit++; + } + /* We only cache 16 bits of the result, so if the value is out of + range, don't cache it. */ + if (FUNCTION_UNITS_SIZE < HOST_BITS_PER_SHORT + || unit >= 0 + || (~unit & ((1 << (HOST_BITS_PER_SHORT - 1)) - 1)) == 0) + INSN_UNIT (insn) = unit; + } + return (unit > 0 ? unit - 1 : unit); +} + +/* Compute the blockage range for executing INSN on UNIT. This caches + the value returned by the blockage_range_function for the unit. + These values are encoded in an int where the upper half gives the + minimum value and the lower half gives the maximum value. */ + +__inline static unsigned int +blockage_range (unit, insn) + int unit; + rtx insn; +{ + unsigned int blockage = INSN_BLOCKAGE (insn); + unsigned int range; + + if (UNIT_BLOCKED (blockage) != unit + 1) + { + range = function_units[unit].blockage_range_function (insn); + /* We only cache the blockage range for one unit and then only if + the values fit. */ + if (HOST_BITS_PER_INT >= UNIT_BITS + 2 * BLOCKAGE_BITS) + INSN_BLOCKAGE (insn) = ENCODE_BLOCKAGE (unit + 1, range); + } + else + range = BLOCKAGE_RANGE (blockage); + + return range; +} + +/* A vector indexed by function unit instance giving the last insn to use + the unit. The value of the function unit instance index for unit U + instance I is (U + I * FUNCTION_UNITS_SIZE). */ +static rtx unit_last_insn[FUNCTION_UNITS_SIZE * MAX_MULTIPLICITY]; + +/* A vector indexed by function unit instance giving the minimum time when + the unit will unblock based on the maximum blockage cost. */ +static int unit_tick[FUNCTION_UNITS_SIZE * MAX_MULTIPLICITY]; + +/* A vector indexed by function unit number giving the number of insns + that remain to use the unit. */ +static int unit_n_insns[FUNCTION_UNITS_SIZE]; + +/* Reset the function unit state to the null state. */ + +static void +clear_units () +{ + bzero ((char *) unit_last_insn, sizeof (unit_last_insn)); + bzero ((char *) unit_tick, sizeof (unit_tick)); + bzero ((char *) unit_n_insns, sizeof (unit_n_insns)); +} + +/* Return the issue-delay of an insn */ + +__inline static int +insn_issue_delay (insn) + rtx insn; +{ + rtx link; + int i, delay = 0; + int unit = insn_unit (insn); + + /* efficiency note: in fact, we are working 'hard' to compute a + value that was available in md file, and is not available in + function_units[] structure. It would be nice to have this + value there, too. */ + if (unit >= 0) + { + if (function_units[unit].blockage_range_function && + function_units[unit].blockage_function) + delay = function_units[unit].blockage_function (insn, insn); + } + else + for (i = 0, unit = ~unit; unit; i++, unit >>= 1) + if ((unit & 1) != 0 && function_units[i].blockage_range_function + && function_units[i].blockage_function) + delay = MAX (delay, function_units[i].blockage_function (insn, insn)); + + return delay; +} + +/* Return the actual hazard cost of executing INSN on the unit UNIT, + instance INSTANCE at time CLOCK if the previous actual hazard cost + was COST. */ + +__inline static int +actual_hazard_this_instance (unit, instance, insn, clock, cost) + int unit, instance, clock, cost; + rtx insn; +{ + int tick = unit_tick[instance]; /* issue time of the last issued insn */ + + if (tick - clock > cost) + { + /* The scheduler is operating forward, so unit's last insn is the + executing insn and INSN is the candidate insn. We want a + more exact measure of the blockage if we execute INSN at CLOCK + given when we committed the execution of the unit's last insn. + + The blockage value is given by either the unit's max blockage + constant, blockage range function, or blockage function. Use + the most exact form for the given unit. */ + + if (function_units[unit].blockage_range_function) + { + if (function_units[unit].blockage_function) + tick += (function_units[unit].blockage_function + (unit_last_insn[instance], insn) + - function_units[unit].max_blockage); + else + tick += ((int) MAX_BLOCKAGE_COST (blockage_range (unit, insn)) + - function_units[unit].max_blockage); + } + if (tick - clock > cost) + cost = tick - clock; + } + return cost; +} + +/* Record INSN as having begun execution on the units encoded by UNIT at + time CLOCK. */ + +__inline static void +schedule_unit (unit, insn, clock) + int unit, clock; + rtx insn; +{ + int i; + + if (unit >= 0) + { + int instance = unit; +#if MAX_MULTIPLICITY > 1 + /* Find the first free instance of the function unit and use that + one. We assume that one is free. */ + for (i = function_units[unit].multiplicity - 1; i > 0; i--) + { + if (!actual_hazard_this_instance (unit, instance, insn, clock, 0)) + break; + instance += FUNCTION_UNITS_SIZE; + } +#endif + unit_last_insn[instance] = insn; + unit_tick[instance] = (clock + function_units[unit].max_blockage); + } + else + for (i = 0, unit = ~unit; unit; i++, unit >>= 1) + if ((unit & 1) != 0) + schedule_unit (i, insn, clock); +} + +/* Return the actual hazard cost of executing INSN on the units encoded by + UNIT at time CLOCK if the previous actual hazard cost was COST. */ + +__inline static int +actual_hazard (unit, insn, clock, cost) + int unit, clock, cost; + rtx insn; +{ + int i; + + if (unit >= 0) + { + /* Find the instance of the function unit with the minimum hazard. */ + int instance = unit; + int best_cost = actual_hazard_this_instance (unit, instance, insn, + clock, cost); + int this_cost; + +#if MAX_MULTIPLICITY > 1 + if (best_cost > cost) + { + for (i = function_units[unit].multiplicity - 1; i > 0; i--) + { + instance += FUNCTION_UNITS_SIZE; + this_cost = actual_hazard_this_instance (unit, instance, insn, + clock, cost); + if (this_cost < best_cost) + { + best_cost = this_cost; + if (this_cost <= cost) + break; + } + } + } +#endif + cost = MAX (cost, best_cost); + } + else + for (i = 0, unit = ~unit; unit; i++, unit >>= 1) + if ((unit & 1) != 0) + cost = actual_hazard (i, insn, clock, cost); + + return cost; +} + +/* Return the potential hazard cost of executing an instruction on the + units encoded by UNIT if the previous potential hazard cost was COST. + An insn with a large blockage time is chosen in preference to one + with a smaller time; an insn that uses a unit that is more likely + to be used is chosen in preference to one with a unit that is less + used. We are trying to minimize a subsequent actual hazard. */ + +__inline static int +potential_hazard (unit, insn, cost) + int unit, cost; + rtx insn; +{ + int i, ncost; + unsigned int minb, maxb; + + if (unit >= 0) + { + minb = maxb = function_units[unit].max_blockage; + if (maxb > 1) + { + if (function_units[unit].blockage_range_function) + { + maxb = minb = blockage_range (unit, insn); + maxb = MAX_BLOCKAGE_COST (maxb); + minb = MIN_BLOCKAGE_COST (minb); + } + + if (maxb > 1) + { + /* Make the number of instructions left dominate. Make the + minimum delay dominate the maximum delay. If all these + are the same, use the unit number to add an arbitrary + ordering. Other terms can be added. */ + ncost = minb * 0x40 + maxb; + ncost *= (unit_n_insns[unit] - 1) * 0x1000 + unit; + if (ncost > cost) + cost = ncost; + } + } + } + else + for (i = 0, unit = ~unit; unit; i++, unit >>= 1) + if ((unit & 1) != 0) + cost = potential_hazard (i, insn, cost); + + return cost; +} + +/* Compute cost of executing INSN given the dependence LINK on the insn USED. + This is the number of cycles between instruction issue and + instruction results. */ + +__inline static int +insn_cost (insn, link, used) + rtx insn, link, used; +{ + register int cost = INSN_COST (insn); + + if (cost == 0) + { + recog_memoized (insn); + + /* A USE insn, or something else we don't need to understand. + We can't pass these directly to result_ready_cost because it will + trigger a fatal error for unrecognizable insns. */ + if (INSN_CODE (insn) < 0) + { + INSN_COST (insn) = 1; + return 1; + } + else + { + cost = result_ready_cost (insn); + + if (cost < 1) + cost = 1; + + INSN_COST (insn) = cost; + } + } + + /* in this case estimate cost without caring how insn is used. */ + if (link == 0 && used == 0) + return cost; + + /* A USE insn should never require the value used to be computed. This + allows the computation of a function's result and parameter values to + overlap the return and call. */ + recog_memoized (used); + if (INSN_CODE (used) < 0) + LINK_COST_FREE (link) = 1; + + /* If some dependencies vary the cost, compute the adjustment. Most + commonly, the adjustment is complete: either the cost is ignored + (in the case of an output- or anti-dependence), or the cost is + unchanged. These values are cached in the link as LINK_COST_FREE + and LINK_COST_ZERO. */ + + if (LINK_COST_FREE (link)) + cost = 1; +#ifdef ADJUST_COST + else if (!LINK_COST_ZERO (link)) + { + int ncost = cost; + + ADJUST_COST (used, link, insn, ncost); + if (ncost <= 1) + LINK_COST_FREE (link) = ncost = 1; + if (cost == ncost) + LINK_COST_ZERO (link) = 1; + cost = ncost; + } +#endif + return cost; +} + +/* Compute the priority number for INSN. */ + +static int +priority (insn) + rtx insn; +{ + int this_priority; + rtx link; + + if (GET_RTX_CLASS (GET_CODE (insn)) != 'i') + return 0; + + if ((this_priority = INSN_PRIORITY (insn)) == 0) + { + if (INSN_DEPEND (insn) == 0) + this_priority = insn_cost (insn, 0, 0); + else + for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1)) + { + rtx next; + int next_priority; + + next = XEXP (link, 0); + + /* critical path is meaningful in block boundaries only */ + if (INSN_BLOCK (next) != INSN_BLOCK (insn)) + continue; + + next_priority = insn_cost (insn, link, next) + priority (next); + if (next_priority > this_priority) + this_priority = next_priority; + } + INSN_PRIORITY (insn) = this_priority; + } + return this_priority; +} + + +/* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add + them to the unused_*_list variables, so that they can be reused. */ + +__inline static void +free_pnd_lst (listp, unused_listp) + rtx *listp, *unused_listp; +{ + register rtx link, prev_link; + + if (*listp == 0) + return; + + prev_link = *listp; + link = XEXP (prev_link, 1); + + while (link) + { + prev_link = link; + link = XEXP (link, 1); + } + + XEXP (prev_link, 1) = *unused_listp; + *unused_listp = *listp; + *listp = 0; +} + +static void +free_pending_lists () +{ + + + if (current_nr_blocks <= 1) + { + free_pnd_lst (&pending_read_insns, &unused_insn_list); + free_pnd_lst (&pending_write_insns, &unused_insn_list); + free_pnd_lst (&pending_read_mems, &unused_expr_list); + free_pnd_lst (&pending_write_mems, &unused_expr_list); + } + else + { + /* interblock scheduling */ + int bb; + + for (bb = 0; bb < current_nr_blocks; bb++) + { + free_pnd_lst (&bb_pending_read_insns[bb], &unused_insn_list); + free_pnd_lst (&bb_pending_write_insns[bb], &unused_insn_list); + free_pnd_lst (&bb_pending_read_mems[bb], &unused_expr_list); + free_pnd_lst (&bb_pending_write_mems[bb], &unused_expr_list); + } + } +} + +/* Add an INSN and MEM reference pair to a pending INSN_LIST and MEM_LIST. + The MEM is a memory reference contained within INSN, which we are saving + so that we can do memory aliasing on it. */ + +static void +add_insn_mem_dependence (insn_list, mem_list, insn, mem) + rtx *insn_list, *mem_list, insn, mem; +{ + register rtx link; + + if (unused_insn_list) + { + link = unused_insn_list; + unused_insn_list = XEXP (link, 1); + } + else + link = rtx_alloc (INSN_LIST); + XEXP (link, 0) = insn; + XEXP (link, 1) = *insn_list; + *insn_list = link; + + if (unused_expr_list) + { + link = unused_expr_list; + unused_expr_list = XEXP (link, 1); + } + else + link = rtx_alloc (EXPR_LIST); + XEXP (link, 0) = mem; + XEXP (link, 1) = *mem_list; + *mem_list = link; + + pending_lists_length++; +} + + +/* Make a dependency between every memory reference on the pending lists + and INSN, thus flushing the pending lists. If ONLY_WRITE, don't flush + the read list. */ + +static void +flush_pending_lists (insn, only_write) + rtx insn; + int only_write; +{ + rtx u; + rtx link; + + while (pending_read_insns && ! only_write) + { + add_dependence (insn, XEXP (pending_read_insns, 0), REG_DEP_ANTI); + + link = pending_read_insns; + pending_read_insns = XEXP (pending_read_insns, 1); + XEXP (link, 1) = unused_insn_list; + unused_insn_list = link; + + link = pending_read_mems; + pending_read_mems = XEXP (pending_read_mems, 1); + XEXP (link, 1) = unused_expr_list; + unused_expr_list = link; + } + while (pending_write_insns) + { + add_dependence (insn, XEXP (pending_write_insns, 0), REG_DEP_ANTI); + + link = pending_write_insns; + pending_write_insns = XEXP (pending_write_insns, 1); + XEXP (link, 1) = unused_insn_list; + unused_insn_list = link; + + link = pending_write_mems; + pending_write_mems = XEXP (pending_write_mems, 1); + XEXP (link, 1) = unused_expr_list; + unused_expr_list = link; + } + pending_lists_length = 0; + + /* last_pending_memory_flush is now a list of insns */ + for (u = last_pending_memory_flush; u; u = XEXP (u, 1)) + add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI); + + last_pending_memory_flush = + gen_rtx (INSN_LIST, VOIDmode, insn, 0); +} + +/* Analyze a single SET or CLOBBER rtx, X, creating all dependencies generated + by the write to the destination of X, and reads of everything mentioned. */ + +static void +sched_analyze_1 (x, insn) + rtx x; + rtx insn; +{ + register int regno; + register rtx dest = SET_DEST (x); + + if (dest == 0) + return; + + while (GET_CODE (dest) == STRICT_LOW_PART || GET_CODE (dest) == SUBREG + || GET_CODE (dest) == ZERO_EXTRACT || GET_CODE (dest) == SIGN_EXTRACT) + { + if (GET_CODE (dest) == ZERO_EXTRACT || GET_CODE (dest) == SIGN_EXTRACT) + { + /* The second and third arguments are values read by this insn. */ + sched_analyze_2 (XEXP (dest, 1), insn); + sched_analyze_2 (XEXP (dest, 2), insn); + } + dest = SUBREG_REG (dest); + } + + if (GET_CODE (dest) == REG) + { + register int i; + + regno = REGNO (dest); + + /* A hard reg in a wide mode may really be multiple registers. + If so, mark all of them just like the first. */ + if (regno < FIRST_PSEUDO_REGISTER) + { + i = HARD_REGNO_NREGS (regno, GET_MODE (dest)); + while (--i >= 0) + { + rtx u; + + for (u = reg_last_uses[regno + i]; u; u = XEXP (u, 1)) + add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI); + reg_last_uses[regno + i] = 0; + + for (u = reg_last_sets[regno + i]; u; u = XEXP (u, 1)) + add_dependence (insn, XEXP (u, 0), REG_DEP_OUTPUT); + + SET_REGNO_REG_SET (reg_pending_sets, regno + i); + + if ((call_used_regs[regno + i] || global_regs[regno + i])) + /* Function calls clobber all call_used regs. */ + for (u = last_function_call; u; u = XEXP (u, 1)) + add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI); + } + } + else + { + rtx u; + + for (u = reg_last_uses[regno]; u; u = XEXP (u, 1)) + add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI); + reg_last_uses[regno] = 0; + + for (u = reg_last_sets[regno]; u; u = XEXP (u, 1)) + add_dependence (insn, XEXP (u, 0), REG_DEP_OUTPUT); + + SET_REGNO_REG_SET (reg_pending_sets, regno); + + /* Pseudos that are REG_EQUIV to something may be replaced + by that during reloading. We need only add dependencies for + the address in the REG_EQUIV note. */ + if (!reload_completed + && reg_known_equiv_p[regno] + && GET_CODE (reg_known_value[regno]) == MEM) + sched_analyze_2 (XEXP (reg_known_value[regno], 0), insn); + + /* Don't let it cross a call after scheduling if it doesn't + already cross one. */ + + if (REG_N_CALLS_CROSSED (regno) == 0) + for (u = last_function_call; u; u = XEXP (u, 1)) + add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI); + } + } + else if (GET_CODE (dest) == MEM) + { + /* Writing memory. */ + + if (pending_lists_length > 32) + { + /* Flush all pending reads and writes to prevent the pending lists + from getting any larger. Insn scheduling runs too slowly when + these lists get long. The number 32 was chosen because it + seems like a reasonable number. When compiling GCC with itself, + this flush occurs 8 times for sparc, and 10 times for m88k using + the number 32. */ + flush_pending_lists (insn, 0); + } + else + { + rtx u; + rtx pending, pending_mem; + + pending = pending_read_insns; + pending_mem = pending_read_mems; + while (pending) + { + /* If a dependency already exists, don't create a new one. */ + if (!find_insn_list (XEXP (pending, 0), LOG_LINKS (insn))) + if (anti_dependence (XEXP (pending_mem, 0), dest)) + add_dependence (insn, XEXP (pending, 0), REG_DEP_ANTI); + + pending = XEXP (pending, 1); + pending_mem = XEXP (pending_mem, 1); + } + + pending = pending_write_insns; + pending_mem = pending_write_mems; + while (pending) + { + /* If a dependency already exists, don't create a new one. */ + if (!find_insn_list (XEXP (pending, 0), LOG_LINKS (insn))) + if (output_dependence (XEXP (pending_mem, 0), dest)) + add_dependence (insn, XEXP (pending, 0), REG_DEP_OUTPUT); + + pending = XEXP (pending, 1); + pending_mem = XEXP (pending_mem, 1); + } + + for (u = last_pending_memory_flush; u; u = XEXP (u, 1)) + add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI); + + add_insn_mem_dependence (&pending_write_insns, &pending_write_mems, + insn, dest); + } + sched_analyze_2 (XEXP (dest, 0), insn); + } + + /* Analyze reads. */ + if (GET_CODE (x) == SET) + sched_analyze_2 (SET_SRC (x), insn); +} + +/* Analyze the uses of memory and registers in rtx X in INSN. */ + +static void +sched_analyze_2 (x, insn) + rtx x; + rtx insn; +{ + register int i; + register int j; + register enum rtx_code code; + register char *fmt; + + if (x == 0) + return; + + code = GET_CODE (x); + + switch (code) + { + case CONST_INT: + case CONST_DOUBLE: + case SYMBOL_REF: + case CONST: + case LABEL_REF: + /* Ignore constants. Note that we must handle CONST_DOUBLE here + because it may have a cc0_rtx in its CONST_DOUBLE_CHAIN field, but + this does not mean that this insn is using cc0. */ + return; + +#ifdef HAVE_cc0 + case CC0: + { + rtx link, prev; + + /* User of CC0 depends on immediately preceding insn. */ + SCHED_GROUP_P (insn) = 1; + + /* There may be a note before this insn now, but all notes will + be removed before we actually try to schedule the insns, so + it won't cause a problem later. We must avoid it here though. */ + prev = prev_nonnote_insn (insn); + + /* Make a copy of all dependencies on the immediately previous insn, + and add to this insn. This is so that all the dependencies will + apply to the group. Remove an explicit dependence on this insn + as SCHED_GROUP_P now represents it. */ + + if (find_insn_list (prev, LOG_LINKS (insn))) + remove_dependence (insn, prev); + + for (link = LOG_LINKS (prev); link; link = XEXP (link, 1)) + add_dependence (insn, XEXP (link, 0), REG_NOTE_KIND (link)); + + return; + } +#endif + + case REG: + { + rtx u; + int regno = REGNO (x); + if (regno < FIRST_PSEUDO_REGISTER) + { + int i; + + i = HARD_REGNO_NREGS (regno, GET_MODE (x)); + while (--i >= 0) + { + reg_last_uses[regno + i] + = gen_rtx (INSN_LIST, VOIDmode, + insn, reg_last_uses[regno + i]); + + for (u = reg_last_sets[regno + i]; u; u = XEXP (u, 1)) + add_dependence (insn, XEXP (u, 0), 0); + + if ((call_used_regs[regno + i] || global_regs[regno + i])) + /* Function calls clobber all call_used regs. */ + for (u = last_function_call; u; u = XEXP (u, 1)) + add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI); + } + } + else + { + reg_last_uses[regno] + = gen_rtx (INSN_LIST, VOIDmode, insn, reg_last_uses[regno]); + + for (u = reg_last_sets[regno]; u; u = XEXP (u, 1)) + add_dependence (insn, XEXP (u, 0), 0); + + /* Pseudos that are REG_EQUIV to something may be replaced + by that during reloading. We need only add dependencies for + the address in the REG_EQUIV note. */ + if (!reload_completed + && reg_known_equiv_p[regno] + && GET_CODE (reg_known_value[regno]) == MEM) + sched_analyze_2 (XEXP (reg_known_value[regno], 0), insn); + + /* If the register does not already cross any calls, then add this + insn to the sched_before_next_call list so that it will still + not cross calls after scheduling. */ + if (REG_N_CALLS_CROSSED (regno) == 0) + add_dependence (sched_before_next_call, insn, REG_DEP_ANTI); + } + return; + } + + case MEM: + { + /* Reading memory. */ + rtx u; + rtx pending, pending_mem; + + pending = pending_read_insns; + pending_mem = pending_read_mems; + while (pending) + { + /* If a dependency already exists, don't create a new one. */ + if (!find_insn_list (XEXP (pending, 0), LOG_LINKS (insn))) + if (read_dependence (XEXP (pending_mem, 0), x)) + add_dependence (insn, XEXP (pending, 0), REG_DEP_ANTI); + + pending = XEXP (pending, 1); + pending_mem = XEXP (pending_mem, 1); + } + + pending = pending_write_insns; + pending_mem = pending_write_mems; + while (pending) + { + /* If a dependency already exists, don't create a new one. */ + if (!find_insn_list (XEXP (pending, 0), LOG_LINKS (insn))) + if (true_dependence (XEXP (pending_mem, 0), VOIDmode, + x, rtx_varies_p)) + add_dependence (insn, XEXP (pending, 0), 0); + + pending = XEXP (pending, 1); + pending_mem = XEXP (pending_mem, 1); + } + + for (u = last_pending_memory_flush; u; u = XEXP (u, 1)) + add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI); + + /* Always add these dependencies to pending_reads, since + this insn may be followed by a write. */ + add_insn_mem_dependence (&pending_read_insns, &pending_read_mems, + insn, x); + + /* Take advantage of tail recursion here. */ + sched_analyze_2 (XEXP (x, 0), insn); + return; + } + + case ASM_OPERANDS: + case ASM_INPUT: + case UNSPEC_VOLATILE: + case TRAP_IF: + { + rtx u; + + /* Traditional and volatile asm instructions must be considered to use + and clobber all hard registers, all pseudo-registers and all of + memory. So must TRAP_IF and UNSPEC_VOLATILE operations. + + Consider for instance a volatile asm that changes the fpu rounding + mode. An insn should not be moved across this even if it only uses + pseudo-regs because it might give an incorrectly rounded result. */ + if (code != ASM_OPERANDS || MEM_VOLATILE_P (x)) + { + int max_reg = max_reg_num (); + for (i = 0; i < max_reg; i++) + { + for (u = reg_last_uses[i]; u; u = XEXP (u, 1)) + add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI); + reg_last_uses[i] = 0; + + /* reg_last_sets[r] is now a list of insns */ + for (u = reg_last_sets[i]; u; u = XEXP (u, 1)) + add_dependence (insn, XEXP (u, 0), 0); + } + reg_pending_sets_all = 1; + + flush_pending_lists (insn, 0); + } + + /* For all ASM_OPERANDS, we must traverse the vector of input operands. + We can not just fall through here since then we would be confused + by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate + traditional asms unlike their normal usage. */ + + if (code == ASM_OPERANDS) + { + for (j = 0; j < ASM_OPERANDS_INPUT_LENGTH (x); j++) + sched_analyze_2 (ASM_OPERANDS_INPUT (x, j), insn); + return; + } + break; + } + + case PRE_DEC: + case POST_DEC: + case PRE_INC: + case POST_INC: + /* These both read and modify the result. We must handle them as writes + to get proper dependencies for following instructions. We must handle + them as reads to get proper dependencies from this to previous + instructions. Thus we need to pass them to both sched_analyze_1 + and sched_analyze_2. We must call sched_analyze_2 first in order + to get the proper antecedent for the read. */ + sched_analyze_2 (XEXP (x, 0), insn); + sched_analyze_1 (x, insn); + return; + } + + /* Other cases: walk the insn. */ + fmt = GET_RTX_FORMAT (code); + for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) + { + if (fmt[i] == 'e') + sched_analyze_2 (XEXP (x, i), insn); + else if (fmt[i] == 'E') + for (j = 0; j < XVECLEN (x, i); j++) + sched_analyze_2 (XVECEXP (x, i, j), insn); + } +} + +/* Analyze an INSN with pattern X to find all dependencies. */ + +static void +sched_analyze_insn (x, insn, loop_notes) + rtx x, insn; + rtx loop_notes; +{ + register RTX_CODE code = GET_CODE (x); + rtx link; + int maxreg = max_reg_num (); + int i; + + if (code == SET || code == CLOBBER) + sched_analyze_1 (x, insn); + else if (code == PARALLEL) + { + register int i; + for (i = XVECLEN (x, 0) - 1; i >= 0; i--) + { + code = GET_CODE (XVECEXP (x, 0, i)); + if (code == SET || code == CLOBBER) + sched_analyze_1 (XVECEXP (x, 0, i), insn); + else + sched_analyze_2 (XVECEXP (x, 0, i), insn); + } + } + else + sched_analyze_2 (x, insn); + + /* Mark registers CLOBBERED or used by called function. */ + if (GET_CODE (insn) == CALL_INSN) + for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1)) + { + if (GET_CODE (XEXP (link, 0)) == CLOBBER) + sched_analyze_1 (XEXP (link, 0), insn); + else + sched_analyze_2 (XEXP (link, 0), insn); + } + + /* If there is a {LOOP,EHREGION}_{BEG,END} note in the middle of a basic block, then + we must be sure that no instructions are scheduled across it. + Otherwise, the reg_n_refs info (which depends on loop_depth) would + become incorrect. */ + + if (loop_notes) + { + int max_reg = max_reg_num (); + rtx link; + + for (i = 0; i < max_reg; i++) + { + rtx u; + for (u = reg_last_uses[i]; u; u = XEXP (u, 1)) + add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI); + reg_last_uses[i] = 0; + + /* reg_last_sets[r] is now a list of insns */ + for (u = reg_last_sets[i]; u; u = XEXP (u, 1)) + add_dependence (insn, XEXP (u, 0), 0); + } + reg_pending_sets_all = 1; + + flush_pending_lists (insn, 0); + + link = loop_notes; + while (XEXP (link, 1)) + link = XEXP (link, 1); + XEXP (link, 1) = REG_NOTES (insn); + REG_NOTES (insn) = loop_notes; + } + + /* After reload, it is possible for an instruction to have a REG_DEAD note + for a register that actually dies a few instructions earlier. For + example, this can happen with SECONDARY_MEMORY_NEEDED reloads. + In this case, we must consider the insn to use the register mentioned + in the REG_DEAD note. Otherwise, we may accidentally move this insn + after another insn that sets the register, thus getting obviously invalid + rtl. This confuses reorg which believes that REG_DEAD notes are still + meaningful. + + ??? We would get better code if we fixed reload to put the REG_DEAD + notes in the right places, but that may not be worth the effort. */ + + if (reload_completed) + { + rtx note; + + for (note = REG_NOTES (insn); note; note = XEXP (note, 1)) + if (REG_NOTE_KIND (note) == REG_DEAD) + sched_analyze_2 (XEXP (note, 0), insn); + } + + EXECUTE_IF_SET_IN_REG_SET (reg_pending_sets, 0, i, + { + /* reg_last_sets[r] is now a list of insns */ + reg_last_sets[i] + = gen_rtx (INSN_LIST, VOIDmode, insn, 0); + }); + CLEAR_REG_SET (reg_pending_sets); + + if (reg_pending_sets_all) + { + for (i = 0; i < maxreg; i++) + + /* reg_last_sets[r] is now a list of insns */ + reg_last_sets[i] + = gen_rtx (INSN_LIST, VOIDmode, insn, 0); + + reg_pending_sets_all = 0; + } + + /* Handle function calls and function returns created by the epilogue + threading code. */ + if (GET_CODE (insn) == CALL_INSN || GET_CODE (insn) == JUMP_INSN) + { + rtx dep_insn; + rtx prev_dep_insn; + + /* When scheduling instructions, we make sure calls don't lose their + accompanying USE insns by depending them one on another in order. + + Also, we must do the same thing for returns created by the epilogue + threading code. Note this code works only in this special case, + because other passes make no guarantee that they will never emit + an instruction between a USE and a RETURN. There is such a guarantee + for USE instructions immediately before a call. */ + + prev_dep_insn = insn; + dep_insn = PREV_INSN (insn); + while (GET_CODE (dep_insn) == INSN + && GET_CODE (PATTERN (dep_insn)) == USE + && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == REG) + { + SCHED_GROUP_P (prev_dep_insn) = 1; + + /* Make a copy of all dependencies on dep_insn, and add to insn. + This is so that all of the dependencies will apply to the + group. */ + + for (link = LOG_LINKS (dep_insn); link; link = XEXP (link, 1)) + add_dependence (insn, XEXP (link, 0), REG_NOTE_KIND (link)); + + prev_dep_insn = dep_insn; + dep_insn = PREV_INSN (dep_insn); + } + } +} + +/* Analyze every insn between HEAD and TAIL inclusive, creating LOG_LINKS + for every dependency. */ + +static void +sched_analyze (head, tail) + rtx head, tail; +{ + register rtx insn; + register rtx u; + rtx loop_notes = 0; + + for (insn = head;; insn = NEXT_INSN (insn)) + { + if (GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN) + { + sched_analyze_insn (PATTERN (insn), insn, loop_notes); + loop_notes = 0; + } + else if (GET_CODE (insn) == CALL_INSN) + { + rtx x; + register int i; + + CANT_MOVE (insn) = 1; + + /* Any instruction using a hard register which may get clobbered + by a call needs to be marked as dependent on this call. + This prevents a use of a hard return reg from being moved + past a void call (i.e. it does not explicitly set the hard + return reg). */ + + /* If this call is followed by a NOTE_INSN_SETJMP, then assume that + all registers, not just hard registers, may be clobbered by this + call. */ + + /* Insn, being a CALL_INSN, magically depends on + `last_function_call' already. */ + + if (NEXT_INSN (insn) && GET_CODE (NEXT_INSN (insn)) == NOTE + && NOTE_LINE_NUMBER (NEXT_INSN (insn)) == NOTE_INSN_SETJMP) + { + int max_reg = max_reg_num (); + for (i = 0; i < max_reg; i++) + { + for (u = reg_last_uses[i]; u; u = XEXP (u, 1)) + add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI); + + reg_last_uses[i] = 0; + + /* reg_last_sets[r] is now a list of insns */ + for (u = reg_last_sets[i]; u; u = XEXP (u, 1)) + add_dependence (insn, XEXP (u, 0), 0); + } + reg_pending_sets_all = 1; + + /* Add a pair of fake REG_NOTE which we will later + convert back into a NOTE_INSN_SETJMP note. See + reemit_notes for why we use a pair of NOTEs. */ + REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_DEAD, + GEN_INT (0), + REG_NOTES (insn)); + REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_DEAD, + GEN_INT (NOTE_INSN_SETJMP), + REG_NOTES (insn)); + } + else + { + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (call_used_regs[i] || global_regs[i]) + { + for (u = reg_last_uses[i]; u; u = XEXP (u, 1)) + add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI); + reg_last_uses[i] = 0; + + /* reg_last_sets[r] is now a list of insns */ + for (u = reg_last_sets[i]; u; u = XEXP (u, 1)) + add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI); + + SET_REGNO_REG_SET (reg_pending_sets, i); + } + } + + /* For each insn which shouldn't cross a call, add a dependence + between that insn and this call insn. */ + x = LOG_LINKS (sched_before_next_call); + while (x) + { + add_dependence (insn, XEXP (x, 0), REG_DEP_ANTI); + x = XEXP (x, 1); + } + LOG_LINKS (sched_before_next_call) = 0; + + sched_analyze_insn (PATTERN (insn), insn, loop_notes); + loop_notes = 0; + + /* In the absence of interprocedural alias analysis, we must flush + all pending reads and writes, and start new dependencies starting + from here. But only flush writes for constant calls (which may + be passed a pointer to something we haven't written yet). */ + flush_pending_lists (insn, CONST_CALL_P (insn)); + + /* Depend this function call (actually, the user of this + function call) on all hard register clobberage. */ + + /* last_function_call is now a list of insns */ + last_function_call + = gen_rtx (INSN_LIST, VOIDmode, insn, 0); + } + + /* See comments on reemit_notes as to why we do this. */ + else if (GET_CODE (insn) == NOTE + && (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG + || NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_END + || NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_BEG + || NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_END + || (NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP + && GET_CODE (PREV_INSN (insn)) != CALL_INSN))) + { + loop_notes = gen_rtx (EXPR_LIST, REG_DEAD, + GEN_INT (NOTE_BLOCK_NUMBER (insn)), loop_notes); + loop_notes = gen_rtx (EXPR_LIST, REG_DEAD, + GEN_INT (NOTE_LINE_NUMBER (insn)), loop_notes); + CONST_CALL_P (loop_notes) = CONST_CALL_P (insn); + } + + if (insn == tail) + return; + } + abort (); +} + +/* Called when we see a set of a register. If death is true, then we are + scanning backwards. Mark that register as unborn. If nobody says + otherwise, that is how things will remain. If death is false, then we + are scanning forwards. Mark that register as being born. */ + +static void +sched_note_set (b, x, death) + int b; + rtx x; + int death; +{ + register int regno; + register rtx reg = SET_DEST (x); + int subreg_p = 0; + + if (reg == 0) + return; + + while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == STRICT_LOW_PART + || GET_CODE (reg) == SIGN_EXTRACT || GET_CODE (reg) == ZERO_EXTRACT) + { + /* Must treat modification of just one hardware register of a multi-reg + value or just a byte field of a register exactly the same way that + mark_set_1 in flow.c does, i.e. anything except a paradoxical subreg + does not kill the entire register. */ + if (GET_CODE (reg) != SUBREG + || REG_SIZE (SUBREG_REG (reg)) > REG_SIZE (reg)) + subreg_p = 1; + + reg = SUBREG_REG (reg); + } + + if (GET_CODE (reg) != REG) + return; + + /* Global registers are always live, so the code below does not apply + to them. */ + + regno = REGNO (reg); + if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno]) + { + if (death) + { + /* If we only set part of the register, then this set does not + kill it. */ + if (subreg_p) + return; + + /* Try killing this register. */ + if (regno < FIRST_PSEUDO_REGISTER) + { + int j = HARD_REGNO_NREGS (regno, GET_MODE (reg)); + while (--j >= 0) + { + CLEAR_REGNO_REG_SET (bb_live_regs, regno + j); + } + } + else + { + /* Recompute REG_BASIC_BLOCK as we update all the other + dataflow information. */ + if (sched_reg_basic_block[regno] == REG_BLOCK_UNKNOWN) + sched_reg_basic_block[regno] = current_block_num; + else if (sched_reg_basic_block[regno] != current_block_num) + sched_reg_basic_block[regno] = REG_BLOCK_GLOBAL; + + CLEAR_REGNO_REG_SET (bb_live_regs, regno); + } + } + else + { + /* Make the register live again. */ + if (regno < FIRST_PSEUDO_REGISTER) + { + int j = HARD_REGNO_NREGS (regno, GET_MODE (reg)); + while (--j >= 0) + { + SET_REGNO_REG_SET (bb_live_regs, regno + j); + } + } + else + { + SET_REGNO_REG_SET (bb_live_regs, regno); + } + } + } +} + +/* Macros and functions for keeping the priority queue sorted, and + dealing with queueing and dequeueing of instructions. */ + +#define SCHED_SORT(READY, N_READY) \ +do { if ((N_READY) == 2) \ + swap_sort (READY, N_READY); \ + else if ((N_READY) > 2) \ + qsort (READY, N_READY, sizeof (rtx), rank_for_schedule); } \ +while (0) + +/* Returns a positive value if x is preferred; returns a negative value if + y is preferred. Should never return 0, since that will make the sort + unstable. */ + +static int +rank_for_schedule (x, y) + rtx *x, *y; +{ + rtx tmp = *y; + rtx tmp2 = *x; + rtx link; + int tmp_class, tmp2_class; + int val, priority_val, spec_val, prob_val, weight_val; + + + /* schedule reverse is a stress test of the scheduler correctness, + controlled by -fsched-reverse option. */ + if ((reload_completed && flag_schedule_reverse_after_reload) || + (!reload_completed && flag_schedule_reverse_before_reload)) + return INSN_LUID (tmp2) - INSN_LUID (tmp); + + /* prefer insn with higher priority */ + priority_val = INSN_PRIORITY (tmp2) - INSN_PRIORITY (tmp); + if (priority_val) + return priority_val; + + /* prefer an insn with smaller contribution to registers-pressure */ + if (!reload_completed && + (weight_val = INSN_REG_WEIGHT (tmp) - INSN_REG_WEIGHT (tmp2))) + return (weight_val); + + /* some comparison make sense in interblock scheduling only */ + if (INSN_BB (tmp) != INSN_BB (tmp2)) + { + /* prefer an inblock motion on an interblock motion */ + if ((INSN_BB (tmp2) == target_bb) && (INSN_BB (tmp) != target_bb)) + return 1; + if ((INSN_BB (tmp) == target_bb) && (INSN_BB (tmp2) != target_bb)) + return -1; + + /* prefer a useful motion on a speculative one */ + if ((spec_val = IS_SPECULATIVE_INSN (tmp) - IS_SPECULATIVE_INSN (tmp2))) + return (spec_val); + + /* prefer a more probable (speculative) insn */ + prob_val = INSN_PROBABILITY (tmp2) - INSN_PROBABILITY (tmp); + if (prob_val) + return (prob_val); + } + + /* compare insns based on their relation to the last-scheduled-insn */ + if (last_scheduled_insn) + { + /* Classify the instructions into three classes: + 1) Data dependent on last schedule insn. + 2) Anti/Output dependent on last scheduled insn. + 3) Independent of last scheduled insn, or has latency of one. + Choose the insn from the highest numbered class if different. */ + link = find_insn_list (tmp, INSN_DEPEND (last_scheduled_insn)); + if (link == 0 || insn_cost (last_scheduled_insn, link, tmp) == 1) + tmp_class = 3; + else if (REG_NOTE_KIND (link) == 0) /* Data dependence. */ + tmp_class = 1; + else + tmp_class = 2; + + link = find_insn_list (tmp2, INSN_DEPEND (last_scheduled_insn)); + if (link == 0 || insn_cost (last_scheduled_insn, link, tmp2) == 1) + tmp2_class = 3; + else if (REG_NOTE_KIND (link) == 0) /* Data dependence. */ + tmp2_class = 1; + else + tmp2_class = 2; + + if ((val = tmp2_class - tmp_class)) + return val; + } + + /* If insns are equally good, sort by INSN_LUID (original insn order), + so that we make the sort stable. This minimizes instruction movement, + thus minimizing sched's effect on debugging and cross-jumping. */ + return INSN_LUID (tmp) - INSN_LUID (tmp2); +} + +/* Resort the array A in which only element at index N may be out of order. */ + +__inline static void +swap_sort (a, n) + rtx *a; + int n; +{ + rtx insn = a[n - 1]; + int i = n - 2; + + while (i >= 0 && rank_for_schedule (a + i, &insn) >= 0) + { + a[i + 1] = a[i]; + i -= 1; + } + a[i + 1] = insn; +} + +static int max_priority; + +/* Add INSN to the insn queue so that it can be executed at least + N_CYCLES after the currently executing insn. Preserve insns + chain for debugging purposes. */ + +__inline static void +queue_insn (insn, n_cycles) + rtx insn; + int n_cycles; +{ + int next_q = NEXT_Q_AFTER (q_ptr, n_cycles); + rtx link = rtx_alloc (INSN_LIST); + XEXP (link, 0) = insn; + XEXP (link, 1) = insn_queue[next_q]; + insn_queue[next_q] = link; + q_size += 1; + + if (sched_verbose >= 2) + { + fprintf (dump, ";;\t\tReady-->Q: insn %d: ", INSN_UID (insn)); + + if (INSN_BB (insn) != target_bb) + fprintf (dump, "(b%d) ", INSN_BLOCK (insn)); + + fprintf (dump, "queued for %d cycles.\n", n_cycles); + } + +} + +/* Return nonzero if PAT is the pattern of an insn which makes a + register live. */ + +__inline static int +birthing_insn_p (pat) + rtx pat; +{ + int j; + + if (reload_completed == 1) + return 0; + + if (GET_CODE (pat) == SET + && GET_CODE (SET_DEST (pat)) == REG) + { + rtx dest = SET_DEST (pat); + int i = REGNO (dest); + + /* It would be more accurate to use refers_to_regno_p or + reg_mentioned_p to determine when the dest is not live before this + insn. */ + + if (REGNO_REG_SET_P (bb_live_regs, i)) + return (REG_N_SETS (i) == 1); + + return 0; + } + if (GET_CODE (pat) == PARALLEL) + { + for (j = 0; j < XVECLEN (pat, 0); j++) + if (birthing_insn_p (XVECEXP (pat, 0, j))) + return 1; + } + return 0; +} + +/* PREV is an insn that is ready to execute. Adjust its priority if that + will help shorten register lifetimes. */ + +__inline static void +adjust_priority (prev) + rtx prev; +{ + /* Trying to shorten register lives after reload has completed + is useless and wrong. It gives inaccurate schedules. */ + if (reload_completed == 0) + { + rtx note; + int n_deaths = 0; + + /* ??? This code has no effect, because REG_DEAD notes are removed + before we ever get here. */ + for (note = REG_NOTES (prev); note; note = XEXP (note, 1)) + if (REG_NOTE_KIND (note) == REG_DEAD) + n_deaths += 1; + + /* Defer scheduling insns which kill registers, since that + shortens register lives. Prefer scheduling insns which + make registers live for the same reason. */ + switch (n_deaths) + { + default: + INSN_PRIORITY (prev) >>= 3; + break; + case 3: + INSN_PRIORITY (prev) >>= 2; + break; + case 2: + case 1: + INSN_PRIORITY (prev) >>= 1; + break; + case 0: + if (birthing_insn_p (PATTERN (prev))) + { + int max = max_priority; + + if (max > INSN_PRIORITY (prev)) + INSN_PRIORITY (prev) = max; + } + break; + } +#ifdef ADJUST_PRIORITY + ADJUST_PRIORITY (prev); +#endif + } +} + +/* INSN is the "currently executing insn". Launch each insn which was + waiting on INSN. READY is a vector of insns which are ready to fire. + N_READY is the number of elements in READY. CLOCK is the current + cycle. */ + +static int +schedule_insn (insn, ready, n_ready, clock) + rtx insn; + rtx *ready; + int n_ready; + int clock; +{ + rtx link; + int unit; + + unit = insn_unit (insn); + + if (sched_verbose >= 2) + { + fprintf (dump, ";;\t\t--> scheduling insn <<<%d>>> on unit ", INSN_UID (insn)); + insn_print_units (insn); + fprintf (dump, "\n"); + } + + if (sched_verbose && unit == -1) + visualize_no_unit (insn); + + if (MAX_BLOCKAGE > 1 || issue_rate > 1 || sched_verbose) + schedule_unit (unit, insn, clock); + + if (INSN_DEPEND (insn) == 0) + return n_ready; + + /* This is used by the function adjust_priority above. */ + if (n_ready > 0) + max_priority = MAX (INSN_PRIORITY (ready[0]), INSN_PRIORITY (insn)); + else + max_priority = INSN_PRIORITY (insn); + + for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1)) + { + rtx next = XEXP (link, 0); + int cost = insn_cost (insn, link, next); + + INSN_TICK (next) = MAX (INSN_TICK (next), clock + cost); + + if ((INSN_DEP_COUNT (next) -= 1) == 0) + { + int effective_cost = INSN_TICK (next) - clock; + + /* For speculative insns, before inserting to ready/queue, + check live, exception-free, and issue-delay */ + if (INSN_BB (next) != target_bb + && (!IS_VALID (INSN_BB (next)) + || CANT_MOVE (next) + || (IS_SPECULATIVE_INSN (next) + && (insn_issue_delay (next) > 3 + || !check_live (next, INSN_BB (next), target_bb) + || !is_exception_free (next, INSN_BB (next), target_bb))))) + continue; + + if (sched_verbose >= 2) + { + fprintf (dump, ";;\t\tdependences resolved: insn %d ", INSN_UID (next)); + + if (current_nr_blocks > 1 && INSN_BB (next) != target_bb) + fprintf (dump, "/b%d ", INSN_BLOCK (next)); + + if (effective_cost <= 1) + fprintf (dump, "into ready\n"); + else + fprintf (dump, "into queue with cost=%d\n", effective_cost); + } + + /* Adjust the priority of NEXT and either put it on the ready + list or queue it. */ + adjust_priority (next); + if (effective_cost <= 1) + ready[n_ready++] = next; + else + queue_insn (next, effective_cost); + } + } + + return n_ready; +} + + +/* Add a REG_DEAD note for REG to INSN, reusing a REG_DEAD note from the + dead_notes list. */ + +static void +create_reg_dead_note (reg, insn) + rtx reg, insn; +{ + rtx link; + + /* The number of registers killed after scheduling must be the same as the + number of registers killed before scheduling. The number of REG_DEAD + notes may not be conserved, i.e. two SImode hard register REG_DEAD notes + might become one DImode hard register REG_DEAD note, but the number of + registers killed will be conserved. + + We carefully remove REG_DEAD notes from the dead_notes list, so that + there will be none left at the end. If we run out early, then there + is a bug somewhere in flow, combine and/or sched. */ + + if (dead_notes == 0) + { + if (current_nr_blocks <= 1) + abort (); + else + { + link = rtx_alloc (EXPR_LIST); + PUT_REG_NOTE_KIND (link, REG_DEAD); + } + } + else + { + /* Number of regs killed by REG. */ + int regs_killed = (REGNO (reg) >= FIRST_PSEUDO_REGISTER ? 1 + : HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg))); + /* Number of regs killed by REG_DEAD notes taken off the list. */ + int reg_note_regs; + + link = dead_notes; + reg_note_regs = (REGNO (XEXP (link, 0)) >= FIRST_PSEUDO_REGISTER ? 1 + : HARD_REGNO_NREGS (REGNO (XEXP (link, 0)), + GET_MODE (XEXP (link, 0)))); + while (reg_note_regs < regs_killed) + { + link = XEXP (link, 1); + reg_note_regs += (REGNO (XEXP (link, 0)) >= FIRST_PSEUDO_REGISTER ? 1 + : HARD_REGNO_NREGS (REGNO (XEXP (link, 0)), + GET_MODE (XEXP (link, 0)))); + } + dead_notes = XEXP (link, 1); + + /* If we took too many regs kills off, put the extra ones back. */ + while (reg_note_regs > regs_killed) + { + rtx temp_reg, temp_link; + + temp_reg = gen_rtx (REG, word_mode, 0); + temp_link = rtx_alloc (EXPR_LIST); + PUT_REG_NOTE_KIND (temp_link, REG_DEAD); + XEXP (temp_link, 0) = temp_reg; + XEXP (temp_link, 1) = dead_notes; + dead_notes = temp_link; + reg_note_regs--; + } + } + + XEXP (link, 0) = reg; + XEXP (link, 1) = REG_NOTES (insn); + REG_NOTES (insn) = link; +} + +/* Subroutine on attach_deaths_insn--handles the recursive search + through INSN. If SET_P is true, then x is being modified by the insn. */ + +static void +attach_deaths (x, insn, set_p) + rtx x; + rtx insn; + int set_p; +{ + register int i; + register int j; + register enum rtx_code code; + register char *fmt; + + if (x == 0) + return; + + code = GET_CODE (x); + + switch (code) + { + case CONST_INT: + case CONST_DOUBLE: + case LABEL_REF: + case SYMBOL_REF: + case CONST: + case CODE_LABEL: + case PC: + case CC0: + /* Get rid of the easy cases first. */ + return; + + case REG: + { + /* If the register dies in this insn, queue that note, and mark + this register as needing to die. */ + /* This code is very similar to mark_used_1 (if set_p is false) + and mark_set_1 (if set_p is true) in flow.c. */ + + register int regno; + int some_needed; + int all_needed; + + if (set_p) + return; + + regno = REGNO (x); + all_needed = some_needed = REGNO_REG_SET_P (old_live_regs, regno); + if (regno < FIRST_PSEUDO_REGISTER) + { + int n; + + n = HARD_REGNO_NREGS (regno, GET_MODE (x)); + while (--n > 0) + { + int needed = (REGNO_REG_SET_P (old_live_regs, regno + n)); + some_needed |= needed; + all_needed &= needed; + } + } + + /* If it wasn't live before we started, then add a REG_DEAD note. + We must check the previous lifetime info not the current info, + because we may have to execute this code several times, e.g. + once for a clobber (which doesn't add a note) and later + for a use (which does add a note). + + Always make the register live. We must do this even if it was + live before, because this may be an insn which sets and uses + the same register, in which case the register has already been + killed, so we must make it live again. + + Global registers are always live, and should never have a REG_DEAD + note added for them, so none of the code below applies to them. */ + + if (regno >= FIRST_PSEUDO_REGISTER || ! global_regs[regno]) + { + /* Never add REG_DEAD notes for the FRAME_POINTER_REGNUM or the + STACK_POINTER_REGNUM, since these are always considered to be + live. Similarly for ARG_POINTER_REGNUM if it is fixed. */ + if (regno != FRAME_POINTER_REGNUM +#if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM + && ! (regno == HARD_FRAME_POINTER_REGNUM) +#endif +#if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM + && ! (regno == ARG_POINTER_REGNUM && fixed_regs[regno]) +#endif + && regno != STACK_POINTER_REGNUM) + { + /* ??? It is perhaps a dead_or_set_p bug that it does + not check for REG_UNUSED notes itself. This is necessary + for the case where the SET_DEST is a subreg of regno, as + dead_or_set_p handles subregs specially. */ + if (! all_needed && ! dead_or_set_p (insn, x) + && ! find_reg_note (insn, REG_UNUSED, x)) + { + /* Check for the case where the register dying partially + overlaps the register set by this insn. */ + if (regno < FIRST_PSEUDO_REGISTER + && HARD_REGNO_NREGS (regno, GET_MODE (x)) > 1) + { + int n = HARD_REGNO_NREGS (regno, GET_MODE (x)); + while (--n >= 0) + some_needed |= dead_or_set_regno_p (insn, regno + n); + } + + /* If none of the words in X is needed, make a REG_DEAD + note. Otherwise, we must make partial REG_DEAD + notes. */ + if (! some_needed) + create_reg_dead_note (x, insn); + else + { + int i; + + /* Don't make a REG_DEAD note for a part of a + register that is set in the insn. */ + for (i = HARD_REGNO_NREGS (regno, GET_MODE (x)) - 1; + i >= 0; i--) + if (! REGNO_REG_SET_P (old_live_regs, regno+i) + && ! dead_or_set_regno_p (insn, regno + i)) + create_reg_dead_note (gen_rtx (REG, + reg_raw_mode[regno + i], + regno + i), + insn); + } + } + } + + if (regno < FIRST_PSEUDO_REGISTER) + { + int j = HARD_REGNO_NREGS (regno, GET_MODE (x)); + while (--j >= 0) + { + SET_REGNO_REG_SET (bb_live_regs, regno + j); + } + } + else + { + /* Recompute REG_BASIC_BLOCK as we update all the other + dataflow information. */ + if (sched_reg_basic_block[regno] == REG_BLOCK_UNKNOWN) + sched_reg_basic_block[regno] = current_block_num; + else if (sched_reg_basic_block[regno] != current_block_num) + sched_reg_basic_block[regno] = REG_BLOCK_GLOBAL; + + SET_REGNO_REG_SET (bb_live_regs, regno); + } + } + return; + } + + case MEM: + /* Handle tail-recursive case. */ + attach_deaths (XEXP (x, 0), insn, 0); + return; + + case SUBREG: + case STRICT_LOW_PART: + /* These two cases preserve the value of SET_P, so handle them + separately. */ + attach_deaths (XEXP (x, 0), insn, set_p); + return; + + case ZERO_EXTRACT: + case SIGN_EXTRACT: + /* This case preserves the value of SET_P for the first operand, but + clears it for the other two. */ + attach_deaths (XEXP (x, 0), insn, set_p); + attach_deaths (XEXP (x, 1), insn, 0); + attach_deaths (XEXP (x, 2), insn, 0); + return; + + default: + /* Other cases: walk the insn. */ + fmt = GET_RTX_FORMAT (code); + for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) + { + if (fmt[i] == 'e') + attach_deaths (XEXP (x, i), insn, 0); + else if (fmt[i] == 'E') + for (j = 0; j < XVECLEN (x, i); j++) + attach_deaths (XVECEXP (x, i, j), insn, 0); + } + } +} + +/* After INSN has executed, add register death notes for each register + that is dead after INSN. */ + +static void +attach_deaths_insn (insn) + rtx insn; +{ + rtx x = PATTERN (insn); + register RTX_CODE code = GET_CODE (x); + rtx link; + + if (code == SET) + { + attach_deaths (SET_SRC (x), insn, 0); + + /* A register might die here even if it is the destination, e.g. + it is the target of a volatile read and is otherwise unused. + Hence we must always call attach_deaths for the SET_DEST. */ + attach_deaths (SET_DEST (x), insn, 1); + } + else if (code == PARALLEL) + { + register int i; + for (i = XVECLEN (x, 0) - 1; i >= 0; i--) + { + code = GET_CODE (XVECEXP (x, 0, i)); + if (code == SET) + { + attach_deaths (SET_SRC (XVECEXP (x, 0, i)), insn, 0); + + attach_deaths (SET_DEST (XVECEXP (x, 0, i)), insn, 1); + } + /* Flow does not add REG_DEAD notes to registers that die in + clobbers, so we can't either. */ + else if (code != CLOBBER) + attach_deaths (XVECEXP (x, 0, i), insn, 0); + } + } + /* If this is a CLOBBER, only add REG_DEAD notes to registers inside a + MEM being clobbered, just like flow. */ + else if (code == CLOBBER && GET_CODE (XEXP (x, 0)) == MEM) + attach_deaths (XEXP (XEXP (x, 0), 0), insn, 0); + /* Otherwise don't add a death note to things being clobbered. */ + else if (code != CLOBBER) + attach_deaths (x, insn, 0); + + /* Make death notes for things used in the called function. */ + if (GET_CODE (insn) == CALL_INSN) + for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1)) + attach_deaths (XEXP (XEXP (link, 0), 0), insn, + GET_CODE (XEXP (link, 0)) == CLOBBER); +} + +/* functions for handlnig of notes */ + +/* Delete notes beginning with INSN and put them in the chain + of notes ended by NOTE_LIST. + Returns the insn following the notes. */ + +static rtx +unlink_other_notes (insn, tail) + rtx insn, tail; +{ + rtx prev = PREV_INSN (insn); + + while (insn != tail && GET_CODE (insn) == NOTE) + { + rtx next = NEXT_INSN (insn); + /* Delete the note from its current position. */ + if (prev) + NEXT_INSN (prev) = next; + if (next) + PREV_INSN (next) = prev; + + /* Don't save away NOTE_INSN_SETJMPs, because they must remain + immediately after the call they follow. We use a fake + (REG_DEAD (const_int -1)) note to remember them. + Likewise with NOTE_INSN_{LOOP,EHREGION}_{BEG, END}. */ + if (NOTE_LINE_NUMBER (insn) != NOTE_INSN_SETJMP + && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_BEG + && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_END + && NOTE_LINE_NUMBER (insn) != NOTE_INSN_EH_REGION_BEG + && NOTE_LINE_NUMBER (insn) != NOTE_INSN_EH_REGION_END) + { + /* Insert the note at the end of the notes list. */ + PREV_INSN (insn) = note_list; + if (note_list) + NEXT_INSN (note_list) = insn; + note_list = insn; + } + + insn = next; + } + return insn; +} + +/* Delete line notes beginning with INSN. Record line-number notes so + they can be reused. Returns the insn following the notes. */ + +static rtx +unlink_line_notes (insn, tail) + rtx insn, tail; +{ + rtx prev = PREV_INSN (insn); + + while (insn != tail && GET_CODE (insn) == NOTE) + { + rtx next = NEXT_INSN (insn); + + if (write_symbols != NO_DEBUG && NOTE_LINE_NUMBER (insn) > 0) + { + /* Delete the note from its current position. */ + if (prev) + NEXT_INSN (prev) = next; + if (next) + PREV_INSN (next) = prev; + + /* Record line-number notes so they can be reused. */ + LINE_NOTE (insn) = insn; + } + else + prev = insn; + + insn = next; + } + return insn; +} + +/* Return the head and tail pointers of BB. */ + +__inline static void +get_block_head_tail (bb, headp, tailp) + int bb; + rtx *headp; + rtx *tailp; +{ + + rtx head = *headp; + rtx tail = *tailp; + int b; + + b = BB_TO_BLOCK (bb); + + /* HEAD and TAIL delimit the basic block being scheduled. */ + head = basic_block_head[b]; + tail = basic_block_end[b]; + + /* Don't include any notes or labels at the beginning of the + basic block, or notes at the ends of basic blocks. */ + while (head != tail) + { + if (GET_CODE (head) == NOTE) + head = NEXT_INSN (head); + else if (GET_CODE (tail) == NOTE) + tail = PREV_INSN (tail); + else if (GET_CODE (head) == CODE_LABEL) + head = NEXT_INSN (head); + else + break; + } + + *headp = head; + *tailp = tail; +} + +/* Delete line notes from bb. Save them so they can be later restored + (in restore_line_notes ()). */ + +static void +rm_line_notes (bb) + int bb; +{ + rtx next_tail; + rtx tail; + rtx head; + rtx insn; + + get_block_head_tail (bb, &head, &tail); + + if (head == tail + && (GET_RTX_CLASS (GET_CODE (head)) != 'i')) + return; + + next_tail = NEXT_INSN (tail); + for (insn = head; insn != next_tail; insn = NEXT_INSN (insn)) + { + rtx prev; + + /* Farm out notes, and maybe save them in NOTE_LIST. + This is needed to keep the debugger from + getting completely deranged. */ + if (GET_CODE (insn) == NOTE) + { + prev = insn; + insn = unlink_line_notes (insn, next_tail); + + if (prev == tail) + abort (); + if (prev == head) + abort (); + if (insn == next_tail) + abort (); + } + } +} + +/* Save line number notes for each insn in bb. */ + +static void +save_line_notes (bb) + int bb; +{ + rtx head, tail; + rtx next_tail; + + /* We must use the true line number for the first insn in the block + that was computed and saved at the start of this pass. We can't + use the current line number, because scheduling of the previous + block may have changed the current line number. */ + + rtx line = line_note_head[BB_TO_BLOCK (bb)]; + rtx insn; + + get_block_head_tail (bb, &head, &tail); + next_tail = NEXT_INSN (tail); + + for (insn = basic_block_head[BB_TO_BLOCK (bb)]; + insn != next_tail; + insn = NEXT_INSN (insn)) + if (GET_CODE (insn) == NOTE && NOTE_LINE_NUMBER (insn) > 0) + line = insn; + else + LINE_NOTE (insn) = line; +} + + +/* After bb was scheduled, insert line notes into the insns list. */ + +static void +restore_line_notes (bb) + int bb; +{ + rtx line, note, prev, new; + int added_notes = 0; + int b; + rtx head, next_tail, insn; + + b = BB_TO_BLOCK (bb); + + head = basic_block_head[b]; + next_tail = NEXT_INSN (basic_block_end[b]); + + /* Determine the current line-number. We want to know the current + line number of the first insn of the block here, in case it is + different from the true line number that was saved earlier. If + different, then we need a line number note before the first insn + of this block. If it happens to be the same, then we don't want to + emit another line number note here. */ + for (line = head; line; line = PREV_INSN (line)) + if (GET_CODE (line) == NOTE && NOTE_LINE_NUMBER (line) > 0) + break; + + /* Walk the insns keeping track of the current line-number and inserting + the line-number notes as needed. */ + for (insn = head; insn != next_tail; insn = NEXT_INSN (insn)) + if (GET_CODE (insn) == NOTE && NOTE_LINE_NUMBER (insn) > 0) + line = insn; + /* This used to emit line number notes before every non-deleted note. + However, this confuses a debugger, because line notes not separated + by real instructions all end up at the same address. I can find no + use for line number notes before other notes, so none are emitted. */ + else if (GET_CODE (insn) != NOTE + && (note = LINE_NOTE (insn)) != 0 + && note != line + && (line == 0 + || NOTE_LINE_NUMBER (note) != NOTE_LINE_NUMBER (line) + || NOTE_SOURCE_FILE (note) != NOTE_SOURCE_FILE (line))) + { + line = note; + prev = PREV_INSN (insn); + if (LINE_NOTE (note)) + { + /* Re-use the original line-number note. */ + LINE_NOTE (note) = 0; + PREV_INSN (note) = prev; + NEXT_INSN (prev) = note; + PREV_INSN (insn) = note; + NEXT_INSN (note) = insn; + } + else + { + added_notes++; + new = emit_note_after (NOTE_LINE_NUMBER (note), prev); + NOTE_SOURCE_FILE (new) = NOTE_SOURCE_FILE (note); + RTX_INTEGRATED_P (new) = RTX_INTEGRATED_P (note); + } + } + if (sched_verbose && added_notes) + fprintf (dump, ";; added %d line-number notes\n", added_notes); +} + +/* After scheduling the function, delete redundant line notes from the + insns list. */ + +static void +rm_redundant_line_notes () +{ + rtx line = 0; + rtx insn = get_insns (); + int active_insn = 0; + int notes = 0; + + /* Walk the insns deleting redundant line-number notes. Many of these + are already present. The remainder tend to occur at basic + block boundaries. */ + for (insn = get_last_insn (); insn; insn = PREV_INSN (insn)) + if (GET_CODE (insn) == NOTE && NOTE_LINE_NUMBER (insn) > 0) + { + /* If there are no active insns following, INSN is redundant. */ + if (active_insn == 0) + { + notes++; + NOTE_SOURCE_FILE (insn) = 0; + NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED; + } + /* If the line number is unchanged, LINE is redundant. */ + else if (line + && NOTE_LINE_NUMBER (line) == NOTE_LINE_NUMBER (insn) + && NOTE_SOURCE_FILE (line) == NOTE_SOURCE_FILE (insn)) + { + notes++; + NOTE_SOURCE_FILE (line) = 0; + NOTE_LINE_NUMBER (line) = NOTE_INSN_DELETED; + line = insn; + } + else + line = insn; + active_insn = 0; + } + else if (!((GET_CODE (insn) == NOTE + && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED) + || (GET_CODE (insn) == INSN + && (GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER)))) + active_insn++; + + if (sched_verbose && notes) + fprintf (dump, ";; deleted %d line-number notes\n", notes); +} + +/* Delete notes between head and tail and put them in the chain + of notes ended by NOTE_LIST. */ + +static void +rm_other_notes (head, tail) + rtx head; + rtx tail; +{ + rtx next_tail; + rtx insn; + + if (head == tail + && (GET_RTX_CLASS (GET_CODE (head)) != 'i')) + return; + + next_tail = NEXT_INSN (tail); + for (insn = head; insn != next_tail; insn = NEXT_INSN (insn)) + { + rtx prev; + + /* Farm out notes, and maybe save them in NOTE_LIST. + This is needed to keep the debugger from + getting completely deranged. */ + if (GET_CODE (insn) == NOTE) + { + prev = insn; + + insn = unlink_other_notes (insn, next_tail); + + if (prev == tail) + abort (); + if (prev == head) + abort (); + if (insn == next_tail) + abort (); + } + } +} + +/* Constructor for `sometimes' data structure. */ + +static int +new_sometimes_live (regs_sometimes_live, regno, sometimes_max) + struct sometimes *regs_sometimes_live; + int regno; + int sometimes_max; +{ + register struct sometimes *p; + + /* There should never be a register greater than max_regno here. If there + is, it means that a define_split has created a new pseudo reg. This + is not allowed, since there will not be flow info available for any + new register, so catch the error here. */ + if (regno >= max_regno) + abort (); + + p = ®s_sometimes_live[sometimes_max]; + p->regno = regno; + p->live_length = 0; + p->calls_crossed = 0; + sometimes_max++; + return sometimes_max; +} + +/* Count lengths of all regs we are currently tracking, + and find new registers no longer live. */ + +static void +finish_sometimes_live (regs_sometimes_live, sometimes_max) + struct sometimes *regs_sometimes_live; + int sometimes_max; +{ + int i; + + for (i = 0; i < sometimes_max; i++) + { + register struct sometimes *p = ®s_sometimes_live[i]; + int regno = p->regno; + + sched_reg_live_length[regno] += p->live_length; + sched_reg_n_calls_crossed[regno] += p->calls_crossed; + } +} + +/* functions for computation of registers live/usage info */ + +/* It is assumed that prior to scheduling basic_block_live_at_start (b) + contains the registers that are alive at the entry to b. + + Two passes follow: The first pass is performed before the scheduling + of a region. It scans each block of the region forward, computing + the set of registers alive at the end of the basic block and + discard REG_DEAD notes (done by find_pre_sched_live ()). + + The second path is invoked after scheduling all region blocks. + It scans each block of the region backward, a block being traversed + only after its succesors in the region. When the set of registers + live at the end of a basic block may be changed by the scheduling + (this may happen for multiple blocks region), it is computed as + the union of the registers live at the start of its succesors. + The last-use information is updated by inserting REG_DEAD notes. + (done by find_post_sched_live ()) */ + +/* Scan all the insns to be scheduled, removing register death notes. + Register death notes end up in DEAD_NOTES. + Recreate the register life information for the end of this basic + block. */ + +static void +find_pre_sched_live (bb) + int bb; +{ + rtx insn, next_tail, head, tail; + int b = BB_TO_BLOCK (bb); + + get_block_head_tail (bb, &head, &tail); + COPY_REG_SET (bb_live_regs, basic_block_live_at_start[b]); + next_tail = NEXT_INSN (tail); + + for (insn = head; insn != next_tail; insn = NEXT_INSN (insn)) + { + rtx prev, next, link; + int reg_weight = 0; + + /* Handle register life information. */ + if (GET_RTX_CLASS (GET_CODE (insn)) == 'i') + { + /* See if the register gets born here. */ + /* We must check for registers being born before we check for + registers dying. It is possible for a register to be born and + die in the same insn, e.g. reading from a volatile memory + location into an otherwise unused register. Such a register + must be marked as dead after this insn. */ + if (GET_CODE (PATTERN (insn)) == SET + || GET_CODE (PATTERN (insn)) == CLOBBER) + { + sched_note_set (b, PATTERN (insn), 0); + reg_weight++; + } + + else if (GET_CODE (PATTERN (insn)) == PARALLEL) + { + int j; + for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--) + if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET + || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER) + { + sched_note_set (b, XVECEXP (PATTERN (insn), 0, j), 0); + reg_weight++; + } + + /* ??? This code is obsolete and should be deleted. It + is harmless though, so we will leave it in for now. */ + for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--) + if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == USE) + sched_note_set (b, XVECEXP (PATTERN (insn), 0, j), 0); + } + + /* Each call cobbers (makes live) all call-clobbered regs + that are not global or fixed. Note that the function-value + reg is a call_clobbered reg. */ + if (GET_CODE (insn) == CALL_INSN) + { + int j; + for (j = 0; j < FIRST_PSEUDO_REGISTER; j++) + if (call_used_regs[j] && !global_regs[j] + && ! fixed_regs[j]) + { + SET_REGNO_REG_SET (bb_live_regs, j); +#if 0 + CLEAR_REGNO_REG_SET (bb_dead_regs, j); +#endif + } + } + + /* Need to know what registers this insn kills. */ + for (prev = 0, link = REG_NOTES (insn); link; link = next) + { + next = XEXP (link, 1); + if ((REG_NOTE_KIND (link) == REG_DEAD + || REG_NOTE_KIND (link) == REG_UNUSED) + /* Verify that the REG_NOTE has a valid value. */ + && GET_CODE (XEXP (link, 0)) == REG) + { + register int regno = REGNO (XEXP (link, 0)); + + reg_weight--; + + /* Only unlink REG_DEAD notes; leave REG_UNUSED notes + alone. */ + if (REG_NOTE_KIND (link) == REG_DEAD) + { + if (prev) + XEXP (prev, 1) = next; + else + REG_NOTES (insn) = next; + XEXP (link, 1) = dead_notes; + dead_notes = link; + } + else + prev = link; + + if (regno < FIRST_PSEUDO_REGISTER) + { + int j = HARD_REGNO_NREGS (regno, + GET_MODE (XEXP (link, 0))); + while (--j >= 0) + { + CLEAR_REGNO_REG_SET (bb_live_regs, regno+j); + } + } + else + { + CLEAR_REGNO_REG_SET (bb_live_regs, regno); + } + } + else + prev = link; + } + } + + INSN_REG_WEIGHT (insn) = reg_weight; + } +} + +/* Update register life and usage information for block bb + after scheduling. Put register dead notes back in the code. */ + +static void +find_post_sched_live (bb) + int bb; +{ + int sometimes_max; + int j, i; + int b; + rtx insn; + rtx head, tail, prev_head, next_tail; + + register struct sometimes *regs_sometimes_live; + + b = BB_TO_BLOCK (bb); + + /* compute live regs at the end of bb as a function of its successors. */ + if (current_nr_blocks > 1) + { + int e; + int first_edge; + + first_edge = e = OUT_EDGES (b); + CLEAR_REG_SET (bb_live_regs); + + if (e) + do + { + int b_succ; + + b_succ = TO_BLOCK (e); + IOR_REG_SET (bb_live_regs, basic_block_live_at_start[b_succ]); + e = NEXT_OUT (e); + } + while (e != first_edge); + } + + get_block_head_tail (bb, &head, &tail); + next_tail = NEXT_INSN (tail); + prev_head = PREV_INSN (head); + + for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++) + if (REGNO_REG_SET_P (bb_live_regs, i)) + sched_reg_basic_block[i] = REG_BLOCK_GLOBAL; + + /* if the block is empty, same regs are alive at its end and its start. + since this is not guaranteed after interblock scheduling, make sure they + are truly identical. */ + if (NEXT_INSN (prev_head) == tail + && (GET_RTX_CLASS (GET_CODE (tail)) != 'i')) + { + if (current_nr_blocks > 1) + COPY_REG_SET (basic_block_live_at_start[b], bb_live_regs); + + return; + } + + b = BB_TO_BLOCK (bb); + current_block_num = b; + + /* Keep track of register lives. */ + old_live_regs = ALLOCA_REG_SET (); + regs_sometimes_live + = (struct sometimes *) alloca (max_regno * sizeof (struct sometimes)); + sometimes_max = 0; + + /* initiate "sometimes" data, starting with registers live at end */ + sometimes_max = 0; + COPY_REG_SET (old_live_regs, bb_live_regs); + EXECUTE_IF_SET_IN_REG_SET (bb_live_regs, 0, j, + { + sometimes_max + = new_sometimes_live (regs_sometimes_live, + j, sometimes_max); + }); + + /* scan insns back, computing regs live info */ + for (insn = tail; insn != prev_head; insn = PREV_INSN (insn)) + { + /* First we kill registers set by this insn, and then we + make registers used by this insn live. This is the opposite + order used above because we are traversing the instructions + backwards. */ + + /* Strictly speaking, we should scan REG_UNUSED notes and make + every register mentioned there live, however, we will just + kill them again immediately below, so there doesn't seem to + be any reason why we bother to do this. */ + + /* See if this is the last notice we must take of a register. */ + if (GET_RTX_CLASS (GET_CODE (insn)) != 'i') + continue; + + if (GET_CODE (PATTERN (insn)) == SET + || GET_CODE (PATTERN (insn)) == CLOBBER) + sched_note_set (b, PATTERN (insn), 1); + else if (GET_CODE (PATTERN (insn)) == PARALLEL) + { + for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--) + if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET + || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER) + sched_note_set (b, XVECEXP (PATTERN (insn), 0, j), 1); + } + + /* This code keeps life analysis information up to date. */ + if (GET_CODE (insn) == CALL_INSN) + { + register struct sometimes *p; + + /* A call kills all call used registers that are not + global or fixed, except for those mentioned in the call + pattern which will be made live again later. */ + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (call_used_regs[i] && ! global_regs[i] + && ! fixed_regs[i]) + { + CLEAR_REGNO_REG_SET (bb_live_regs, i); +#if 0 + SET_REGNO_REG_SET (bb_dead_regs, i); +#endif + } + + /* Regs live at the time of a call instruction must not + go in a register clobbered by calls. Record this for + all regs now live. Note that insns which are born or + die in a call do not cross a call, so this must be done + after the killings (above) and before the births + (below). */ + p = regs_sometimes_live; + for (i = 0; i < sometimes_max; i++, p++) + if (REGNO_REG_SET_P (bb_live_regs, p->regno)) + p->calls_crossed += 1; + } + + /* Make every register used live, and add REG_DEAD notes for + registers which were not live before we started. */ + attach_deaths_insn (insn); + + /* Find registers now made live by that instruction. */ + EXECUTE_IF_AND_COMPL_IN_REG_SET (bb_live_regs, old_live_regs, 0, j, + { + sometimes_max + = new_sometimes_live (regs_sometimes_live, + j, sometimes_max); + }); + IOR_REG_SET (old_live_regs, bb_live_regs); + + /* Count lengths of all regs we are worrying about now, + and handle registers no longer live. */ + + for (i = 0; i < sometimes_max; i++) + { + register struct sometimes *p = ®s_sometimes_live[i]; + int regno = p->regno; + + p->live_length += 1; + + if (!REGNO_REG_SET_P (bb_live_regs, regno)) + { + /* This is the end of one of this register's lifetime + segments. Save the lifetime info collected so far, + and clear its bit in the old_live_regs entry. */ + sched_reg_live_length[regno] += p->live_length; + sched_reg_n_calls_crossed[regno] += p->calls_crossed; + CLEAR_REGNO_REG_SET (old_live_regs, p->regno); + + /* Delete the reg_sometimes_live entry for this reg by + copying the last entry over top of it. */ + *p = regs_sometimes_live[--sometimes_max]; + /* ...and decrement i so that this newly copied entry + will be processed. */ + i--; + } + } + } + + finish_sometimes_live (regs_sometimes_live, sometimes_max); + + /* In interblock scheduling, basic_block_live_at_start may have changed. */ + if (current_nr_blocks > 1) + COPY_REG_SET (basic_block_live_at_start[b], bb_live_regs); + +} /* find_post_sched_live */ + +/* After scheduling the subroutine, restore information about uses of + registers. */ + +static void +update_reg_usage () +{ + int regno; + + if (n_basic_blocks > 0) + for (regno = FIRST_PSEUDO_REGISTER; regno < max_regno; regno++) + if (REGNO_REG_SET_P (basic_block_live_at_start[0], regno)) + sched_reg_basic_block[regno] = REG_BLOCK_GLOBAL; + + for (regno = 0; regno < max_regno; regno++) + if (sched_reg_live_length[regno]) + { + if (sched_verbose) + { + if (REG_LIVE_LENGTH (regno) > sched_reg_live_length[regno]) + fprintf (dump, + ";; register %d life shortened from %d to %d\n", + regno, REG_LIVE_LENGTH (regno), + sched_reg_live_length[regno]); + /* Negative values are special; don't overwrite the current + reg_live_length value if it is negative. */ + else if (REG_LIVE_LENGTH (regno) < sched_reg_live_length[regno] + && REG_LIVE_LENGTH (regno) >= 0) + fprintf (dump, + ";; register %d life extended from %d to %d\n", + regno, REG_LIVE_LENGTH (regno), + sched_reg_live_length[regno]); + + if (!REG_N_CALLS_CROSSED (regno) + && sched_reg_n_calls_crossed[regno]) + fprintf (dump, + ";; register %d now crosses calls\n", regno); + else if (REG_N_CALLS_CROSSED (regno) + && !sched_reg_n_calls_crossed[regno] + && REG_BASIC_BLOCK (regno) != REG_BLOCK_GLOBAL) + fprintf (dump, + ";; register %d no longer crosses calls\n", regno); + + if (REG_BASIC_BLOCK (regno) != sched_reg_basic_block[regno] + && sched_reg_basic_block[regno] != REG_BLOCK_UNKNOWN + && REG_BASIC_BLOCK(regno) != REG_BLOCK_UNKNOWN) + fprintf (dump, + ";; register %d changed basic block from %d to %d\n", + regno, REG_BASIC_BLOCK(regno), + sched_reg_basic_block[regno]); + + } + /* Negative values are special; don't overwrite the current + reg_live_length value if it is negative. */ + if (REG_LIVE_LENGTH (regno) >= 0) + REG_LIVE_LENGTH (regno) = sched_reg_live_length[regno]; + + if (sched_reg_basic_block[regno] != REG_BLOCK_UNKNOWN + && REG_BASIC_BLOCK(regno) != REG_BLOCK_UNKNOWN) + REG_BASIC_BLOCK(regno) = sched_reg_basic_block[regno]; + + /* We can't change the value of reg_n_calls_crossed to zero for + pseudos which are live in more than one block. + + This is because combine might have made an optimization which + invalidated basic_block_live_at_start and reg_n_calls_crossed, + but it does not update them. If we update reg_n_calls_crossed + here, the two variables are now inconsistent, and this might + confuse the caller-save code into saving a register that doesn't + need to be saved. This is only a problem when we zero calls + crossed for a pseudo live in multiple basic blocks. + + Alternatively, we could try to correctly update basic block live + at start here in sched, but that seems complicated. + + Note: it is possible that a global register became local, as result + of interblock motion, but will remain marked as a global register. */ + if (sched_reg_n_calls_crossed[regno] + || REG_BASIC_BLOCK (regno) != REG_BLOCK_GLOBAL) + REG_N_CALLS_CROSSED (regno) = sched_reg_n_calls_crossed[regno]; + + } +} + +/* Scheduling clock, modified in schedule_block() and queue_to_ready () */ +static int clock_var; + +/* Move insns that became ready to fire from queue to ready list. */ + +static int +queue_to_ready (ready, n_ready) + rtx ready[]; + int n_ready; +{ + rtx insn; + rtx link; + + q_ptr = NEXT_Q (q_ptr); + + /* Add all pending insns that can be scheduled without stalls to the + ready list. */ + for (link = insn_queue[q_ptr]; link; link = XEXP (link, 1)) + { + + insn = XEXP (link, 0); + q_size -= 1; + + if (sched_verbose >= 2) + fprintf (dump, ";;\t\tQ-->Ready: insn %d: ", INSN_UID (insn)); + + if (sched_verbose >= 2 && INSN_BB (insn) != target_bb) + fprintf (dump, "(b%d) ", INSN_BLOCK (insn)); + + ready[n_ready++] = insn; + if (sched_verbose >= 2) + fprintf (dump, "moving to ready without stalls\n"); + } + insn_queue[q_ptr] = 0; + + /* If there are no ready insns, stall until one is ready and add all + of the pending insns at that point to the ready list. */ + if (n_ready == 0) + { + register int stalls; + + for (stalls = 1; stalls < INSN_QUEUE_SIZE; stalls++) + { + if ((link = insn_queue[NEXT_Q_AFTER (q_ptr, stalls)])) + { + for (; link; link = XEXP (link, 1)) + { + insn = XEXP (link, 0); + q_size -= 1; + + if (sched_verbose >= 2) + fprintf (dump, ";;\t\tQ-->Ready: insn %d: ", INSN_UID (insn)); + + if (sched_verbose >= 2 && INSN_BB (insn) != target_bb) + fprintf (dump, "(b%d) ", INSN_BLOCK (insn)); + + ready[n_ready++] = insn; + if (sched_verbose >= 2) + fprintf (dump, "moving to ready with %d stalls\n", stalls); + } + insn_queue[NEXT_Q_AFTER (q_ptr, stalls)] = 0; + + if (n_ready) + break; + } + } + + if (sched_verbose && stalls) + visualize_stall_cycles (BB_TO_BLOCK (target_bb), stalls); + q_ptr = NEXT_Q_AFTER (q_ptr, stalls); + clock_var += stalls; + } + return n_ready; +} + +/* Print the ready list for debugging purposes. Callable from debugger. */ + +extern void +debug_ready_list (ready, n_ready) + rtx ready[]; + int n_ready; +{ + int i; + + for (i = 0; i < n_ready; i++) + { + fprintf (dump, " %d", INSN_UID (ready[i])); + if (current_nr_blocks > 1 && INSN_BB (ready[i]) != target_bb) + fprintf (dump, "/b%d", INSN_BLOCK (ready[i])); + } + fprintf (dump, "\n"); +} + +/* Print names of units on which insn can/should execute, for debugging. */ + +static void +insn_print_units (insn) + rtx insn; +{ + int i; + int unit = insn_unit (insn); + + if (unit == -1) + fprintf (dump, "none"); + else if (unit >= 0) + fprintf (dump, "%s", function_units[unit].name); + else + { + fprintf (dump, "["); + for (i = 0, unit = ~unit; unit; i++, unit >>= 1) + if (unit & 1) + { + fprintf (dump, "%s", function_units[i].name); + if (unit != 1) + fprintf (dump, " "); + } + fprintf (dump, "]"); + } +} + +/* MAX_VISUAL_LINES is the maximum number of lines in visualization table + of a basic block. If more lines are needed, table is splitted to two. + n_visual_lines is the number of lines printed so far for a block. + visual_tbl contains the block visualization info. + vis_no_unit holds insns in a cycle that are not mapped to any unit. */ +#define MAX_VISUAL_LINES 100 +#define INSN_LEN 30 +int n_visual_lines; +char *visual_tbl; +int n_vis_no_unit; +rtx vis_no_unit[10]; + +/* Finds units that are in use in this fuction. Required only + for visualization. */ + +static void +init_target_units () +{ + rtx insn; + int unit; + + for (insn = get_last_insn (); insn; insn = PREV_INSN (insn)) + { + if (GET_RTX_CLASS (GET_CODE (insn)) != 'i') + continue; + + unit = insn_unit (insn); + + if (unit < 0) + target_units |= ~unit; + else + target_units |= (1 << unit); + } +} + +/* Return the length of the visualization table */ + +static int +get_visual_tbl_length () +{ + int unit, i; + int n, n1; + char *s; + + /* compute length of one field in line */ + s = (char *) alloca (INSN_LEN + 5); + sprintf (s, " %33s", "uname"); + n1 = strlen (s); + + /* compute length of one line */ + n = strlen (";; "); + n += n1; + for (unit = 0; unit < FUNCTION_UNITS_SIZE; unit++) + if (function_units[unit].bitmask & target_units) + for (i = 0; i < function_units[unit].multiplicity; i++) + n += n1; + n += n1; + n += strlen ("\n") + 2; + + /* compute length of visualization string */ + return (MAX_VISUAL_LINES * n); +} + +/* Init block visualization debugging info */ + +static void +init_block_visualization () +{ + strcpy (visual_tbl, ""); + n_visual_lines = 0; + n_vis_no_unit = 0; +} + +#define BUF_LEN 256 + +/* This recognizes rtx, I classified as expressions. These are always */ +/* represent some action on values or results of other expression, */ +/* that may be stored in objects representing values. */ + +static void +print_exp (buf, x, verbose) + char *buf; + rtx x; + int verbose; +{ + char t1[BUF_LEN], t2[BUF_LEN], t3[BUF_LEN]; + + switch (GET_CODE (x)) + { + case PLUS: + print_value (t1, XEXP (x, 0), verbose); + print_value (t2, XEXP (x, 1), verbose); + sprintf (buf, "%s+%s", t1, t2); + break; + case LO_SUM: + print_value (t1, XEXP (x, 0), verbose); + print_value (t2, XEXP (x, 1), verbose); + sprintf (buf, "%sl+%s", t1, t2); + break; + case MINUS: + print_value (t1, XEXP (x, 0), verbose); + print_value (t2, XEXP (x, 1), verbose); + sprintf (buf, "%s-%s", t1, t2); + break; + case COMPARE: + print_value (t1, XEXP (x, 0), verbose); + print_value (t2, XEXP (x, 1), verbose); + sprintf (buf, "%s??%s", t1, t2); + break; + case NEG: + print_value (t1, XEXP (x, 0), verbose); + sprintf (buf, "-%s", t1); + break; + case MULT: + print_value (t1, XEXP (x, 0), verbose); + print_value (t2, XEXP (x, 1), verbose); + sprintf (buf, "%s*%s", t1, t2); + break; + case DIV: + print_value (t1, XEXP (x, 0), verbose); + print_value (t2, XEXP (x, 1), verbose); + sprintf (buf, "%s/%s", t1, t2); + break; + case UDIV: + print_value (t1, XEXP (x, 0), verbose); + print_value (t2, XEXP (x, 1), verbose); + sprintf (buf, "%su/%s", t1, t2); + break; + case MOD: + print_value (t1, XEXP (x, 0), verbose); + print_value (t2, XEXP (x, 1), verbose); + sprintf (buf, "%s%%%s", t1, t2); + break; + case UMOD: + print_value (t1, XEXP (x, 0), verbose); + print_value (t2, XEXP (x, 1), verbose); + sprintf (buf, "%su%%%s", t1, t2); + break; + case SMIN: + print_value (t1, XEXP (x, 0), verbose); + print_value (t2, XEXP (x, 1), verbose); + sprintf (buf, "smin (%s, %s)", t1, t2); + break; + case SMAX: + print_value (t1, XEXP (x, 0), verbose); + print_value (t2, XEXP (x, 1), verbose); + sprintf (buf, "smax(%s,%s)", t1, t2); + break; + case UMIN: + print_value (t1, XEXP (x, 0), verbose); + print_value (t2, XEXP (x, 1), verbose); + sprintf (buf, "umin (%s, %s)", t1, t2); + break; + case UMAX: + print_value (t1, XEXP (x, 0), verbose); + print_value (t2, XEXP (x, 1), verbose); + sprintf (buf, "umax(%s,%s)", t1, t2); + break; + case NOT: + print_value (t1, XEXP (x, 0), verbose); + sprintf (buf, "!%s", t1); + break; + case AND: + print_value (t1, XEXP (x, 0), verbose); + print_value (t2, XEXP (x, 1), verbose); + sprintf (buf, "%s&%s", t1, t2); + break; + case IOR: + print_value (t1, XEXP (x, 0), verbose); + print_value (t2, XEXP (x, 1), verbose); + sprintf (buf, "%s|%s", t1, t2); + break; + case XOR: + print_value (t1, XEXP (x, 0), verbose); + print_value (t2, XEXP (x, 1), verbose); + sprintf (buf, "%s^%s", t1, t2); + break; + case ASHIFT: + print_value (t1, XEXP (x, 0), verbose); + print_value (t2, XEXP (x, 1), verbose); + sprintf (buf, "%s<<%s", t1, t2); + break; + case LSHIFTRT: + print_value (t1, XEXP (x, 0), verbose); + print_value (t2, XEXP (x, 1), verbose); + sprintf (buf, "%s0>%s", t1, t2); + break; + case ASHIFTRT: + print_value (t1, XEXP (x, 0), verbose); + print_value (t2, XEXP (x, 1), verbose); + sprintf (buf, "%s>>%s", t1, t2); + break; + case ROTATE: + print_value (t1, XEXP (x, 0), verbose); + print_value (t2, XEXP (x, 1), verbose); + sprintf (buf, "%s<-<%s", t1, t2); + break; + case ROTATERT: + print_value (t1, XEXP (x, 0), verbose); + print_value (t2, XEXP (x, 1), verbose); + sprintf (buf, "%s>->%s", t1, t2); + break; + case ABS: + print_value (t1, XEXP (x, 0), verbose); + sprintf (buf, "abs(%s)", t1); + break; + case SQRT: + print_value (t1, XEXP (x, 0), verbose); + sprintf (buf, "sqrt(%s)", t1); + break; + case FFS: + print_value (t1, XEXP (x, 0), verbose); + sprintf (buf, "ffs(%s)", t1); + break; + case EQ: + print_value (t1, XEXP (x, 0), verbose); + print_value (t2, XEXP (x, 1), verbose); + sprintf (buf, "%s == %s", t1, t2); + break; + case NE: + print_value (t1, XEXP (x, 0), verbose); + print_value (t2, XEXP (x, 1), verbose); + sprintf (buf, "%s!=%s", t1, t2); + break; + case GT: + print_value (t1, XEXP (x, 0), verbose); + print_value (t2, XEXP (x, 1), verbose); + sprintf (buf, "%s>%s", t1, t2); + break; + case GTU: + print_value (t1, XEXP (x, 0), verbose); + print_value (t2, XEXP (x, 1), verbose); + sprintf (buf, "%s>u%s", t1, t2); + break; + case LT: + print_value (t1, XEXP (x, 0), verbose); + print_value (t2, XEXP (x, 1), verbose); + sprintf (buf, "%s<%s", t1, t2); + break; + case LTU: + print_value (t1, XEXP (x, 0), verbose); + print_value (t2, XEXP (x, 1), verbose); + sprintf (buf, "%s=%s", t1, t2); + break; + case GEU: + print_value (t1, XEXP (x, 0), verbose); + print_value (t2, XEXP (x, 1), verbose); + sprintf (buf, "%s>=u%s", t1, t2); + break; + case LE: + print_value (t1, XEXP (x, 0), verbose); + print_value (t2, XEXP (x, 1), verbose); + sprintf (buf, "%s<=%s", t1, t2); + break; + case LEU: + print_value (t1, XEXP (x, 0), verbose); + print_value (t2, XEXP (x, 1), verbose); + sprintf (buf, "%s<=u%s", t1, t2); + break; + case SIGN_EXTRACT: + print_value (t1, XEXP (x, 0), verbose); + print_value (t2, XEXP (x, 1), verbose); + print_value (t3, XEXP (x, 2), verbose); + if (verbose) + sprintf (buf, "sign_extract(%s,%s,%s)", t1, t2, t3); + else + sprintf (buf, "sxt(%s,%s,%s)", t1, t2, t3); + break; + case ZERO_EXTRACT: + print_value (t1, XEXP (x, 0), verbose); + print_value (t2, XEXP (x, 1), verbose); + print_value (t3, XEXP (x, 2), verbose); + if (verbose) + sprintf (buf, "zero_extract(%s,%s,%s)", t1, t2, t3); + else + sprintf (buf, "zxt(%s,%s,%s)", t1, t2, t3); + break; + case SIGN_EXTEND: + print_value (t1, XEXP (x, 0), verbose); + if (verbose) + sprintf (buf, "sign_extend(%s)", t1); + else + sprintf (buf, "sxn(%s)", t1); + break; + case ZERO_EXTEND: + print_value (t1, XEXP (x, 0), verbose); + if (verbose) + sprintf (buf, "zero_extend(%s)", t1); + else + sprintf (buf, "zxn(%s)", t1); + break; + case FLOAT_EXTEND: + print_value (t1, XEXP (x, 0), verbose); + if (verbose) + sprintf (buf, "float_extend(%s)", t1); + else + sprintf (buf, "fxn(%s)", t1); + break; + case TRUNCATE: + print_value (t1, XEXP (x, 0), verbose); + if (verbose) + sprintf (buf, "trunc(%s)", t1); + else + sprintf (buf, "trn(%s)", t1); + break; + case FLOAT_TRUNCATE: + print_value (t1, XEXP (x, 0), verbose); + if (verbose) + sprintf (buf, "float_trunc(%s)", t1); + else + sprintf (buf, "ftr(%s)", t1); + break; + case FLOAT: + print_value (t1, XEXP (x, 0), verbose); + if (verbose) + sprintf (buf, "float(%s)", t1); + else + sprintf (buf, "flt(%s)", t1); + break; + case UNSIGNED_FLOAT: + print_value (t1, XEXP (x, 0), verbose); + if (verbose) + sprintf (buf, "uns_float(%s)", t1); + else + sprintf (buf, "ufl(%s)", t1); + break; + case FIX: + print_value (t1, XEXP (x, 0), verbose); + sprintf (buf, "fix(%s)", t1); + break; + case UNSIGNED_FIX: + print_value (t1, XEXP (x, 0), verbose); + if (verbose) + sprintf (buf, "uns_fix(%s)", t1); + else + sprintf (buf, "ufx(%s)", t1); + break; + case PRE_DEC: + print_value (t1, XEXP (x, 0), verbose); + sprintf (buf, "--%s", t1); + break; + case PRE_INC: + print_value (t1, XEXP (x, 0), verbose); + sprintf (buf, "++%s", t1); + break; + case POST_DEC: + print_value (t1, XEXP (x, 0), verbose); + sprintf (buf, "%s--", t1); + break; + case POST_INC: + print_value (t1, XEXP (x, 0), verbose); + sprintf (buf, "%s++", t1); + break; + case CALL: + print_value (t1, XEXP (x, 0), verbose); + if (verbose) + { + print_value (t2, XEXP (x, 1), verbose); + sprintf (buf, "call %s argc:%s", t1, t2); + } + else + sprintf (buf, "call %s", t1); + break; + case IF_THEN_ELSE: + print_exp (t1, XEXP (x, 0), verbose); + print_value (t2, XEXP (x, 1), verbose); + print_value (t3, XEXP (x, 2), verbose); + sprintf (buf, "{(%s)?%s:%s}", t1, t2, t3); + break; + case TRAP_IF: + print_value (t1, TRAP_CONDITION (x), verbose); + sprintf (buf, "trap_if %s", t1); + break; + case UNSPEC: + { + int i; + + sprintf (t1, "unspec{"); + for (i = 0; i < XVECLEN (x, 0); i++) + { + print_pattern (t2, XVECEXP (x, 0, i), verbose); + sprintf (t3, "%s%s;", t1, t2); + strcpy (t1, t3); + } + sprintf (buf, "%s}", t1); + } + break; + case UNSPEC_VOLATILE: + { + int i; + + sprintf (t1, "unspec/v{"); + for (i = 0; i < XVECLEN (x, 0); i++) + { + print_pattern (t2, XVECEXP (x, 0, i), verbose); + sprintf (t3, "%s%s;", t1, t2); + strcpy (t1, t3); + } + sprintf (buf, "%s}", t1); + } + break; + default: +/* if (verbose) debug_rtx (x); else sprintf (buf, "$$$"); */ + sprintf (buf, "$$$"); + } +} /* print_exp */ + +/* Prints rtxes, i customly classified as values. They're constants, */ +/* registers, labels, symbols and memory accesses. */ + +static void +print_value (buf, x, verbose) + char *buf; + rtx x; + int verbose; +{ + char t[BUF_LEN]; + + switch (GET_CODE (x)) + { + case CONST_INT: + sprintf (buf, "%Xh", INTVAL (x)); + break; + case CONST_DOUBLE: + print_value (t, XEXP (x, 0), verbose); + sprintf (buf, "<%s>", t); + break; + case CONST_STRING: + sprintf (buf, "\"%s\"", (char *) XEXP (x, 0)); + break; + case SYMBOL_REF: + sprintf (buf, "`%s'", (char *) XEXP (x, 0)); + break; + case LABEL_REF: + sprintf (buf, "L%d", INSN_UID (XEXP (x, 0))); + break; + case CONST: + print_value (buf, XEXP (x, 0), verbose); + break; + case HIGH: + print_value (buf, XEXP (x, 0), verbose); + break; + case REG: + if (GET_MODE (x) == SFmode + || GET_MODE (x) == DFmode + || GET_MODE (x) == XFmode + || GET_MODE (x) == TFmode) + strcpy (t, "fr"); + else + strcpy (t, "r"); + sprintf (buf, "%s%d", t, (int) XEXP (x, 0)); + break; + case SUBREG: + print_value (t, XEXP (x, 0), verbose); + sprintf (buf, "%s#%d", t, (int) XEXP (x, 1)); + break; + case SCRATCH: + sprintf (buf, "scratch"); + break; + case CC0: + sprintf (buf, "cc0"); + break; + case PC: + sprintf (buf, "pc"); + break; + case MEM: + print_value (t, XEXP (x, 0), verbose); + sprintf (buf, "[%s]", t); + break; + default: + print_exp (buf, x, verbose); + } +} /* print_value */ + +/* The next step in insn detalization, its pattern recognition */ + +static void +print_pattern (buf, x, verbose) + char *buf; + rtx x; + int verbose; +{ + char t1[BUF_LEN], t2[BUF_LEN], t3[BUF_LEN]; + + switch (GET_CODE (x)) + { + case SET: + print_value (t1, SET_DEST (x), verbose); + print_value (t2, SET_SRC (x), verbose); + sprintf (buf, "%s=%s", t1, t2); + break; + case RETURN: + sprintf (buf, "return"); + break; + case CALL: + print_exp (buf, x, verbose); + break; + case CLOBBER: + print_value (t1, XEXP (x, 0), verbose); + sprintf (buf, "clobber %s", t1); + break; + case USE: + print_value (t1, XEXP (x, 0), verbose); + sprintf (buf, "use %s", t1); + break; + case PARALLEL: + { + int i; + + sprintf (t1, "{"); + for (i = 0; i < XVECLEN (x, 0); i++) + { + print_pattern (t2, XVECEXP (x, 0, i), verbose); + sprintf (t3, "%s%s;", t1, t2); + strcpy (t1, t3); + } + sprintf (buf, "%s}", t1); + } + break; + case SEQUENCE: + { + int i; + + sprintf (t1, "%%{"); + for (i = 0; i < XVECLEN (x, 0); i++) + { + print_insn (t2, XVECEXP (x, 0, i), verbose); + sprintf (t3, "%s%s;", t1, t2); + strcpy (t1, t3); + } + sprintf (buf, "%s%%}", t1); + } + break; + case ASM_INPUT: + sprintf (buf, "asm {%s}", XEXP (x, 0)); + break; + case ADDR_VEC: + break; + case ADDR_DIFF_VEC: + print_value (buf, XEXP (x, 0), verbose); + break; + case TRAP_IF: + print_value (t1, TRAP_CONDITION (x), verbose); + sprintf (buf, "trap_if %s", t1); + break; + case UNSPEC: + { + int i; + + sprintf (t1, "unspec{"); + for (i = 0; i < XVECLEN (x, 0); i++) + { + print_pattern (t2, XVECEXP (x, 0, i), verbose); + sprintf (t3, "%s%s;", t1, t2); + strcpy (t1, t3); + } + sprintf (buf, "%s}", t1); + } + break; + case UNSPEC_VOLATILE: + { + int i; + + sprintf (t1, "unspec/v{"); + for (i = 0; i < XVECLEN (x, 0); i++) + { + print_pattern (t2, XVECEXP (x, 0, i), verbose); + sprintf (t3, "%s%s;", t1, t2); + strcpy (t1, t3); + } + sprintf (buf, "%s}", t1); + } + break; + default: + print_value (buf, x, verbose); + } +} /* print_pattern */ + +/* This is the main function in rtl visualization mechanism. It + accepts an rtx and tries to recognize it as an insn, then prints it + properly in human readable form, resembling assembler mnemonics. */ +/* For every insn it prints its UID and BB the insn belongs */ +/* too. (probably the last "option" should be extended somehow, since */ +/* it depends now on sched.c inner variables ...) */ + +static void +print_insn (buf, x, verbose) + char *buf; + rtx x; + int verbose; +{ + char t[BUF_LEN]; + rtx insn = x; + + switch (GET_CODE (x)) + { + case INSN: + print_pattern (t, PATTERN (x), verbose); + if (verbose) + sprintf (buf, "b%d: i% 4d: %s", INSN_BB (x), + INSN_UID (x), t); + else + sprintf (buf, "%-4d %s", INSN_UID (x), t); + break; + case JUMP_INSN: + print_pattern (t, PATTERN (x), verbose); + if (verbose) + sprintf (buf, "b%d: i% 4d: jump %s", INSN_BB (x), + INSN_UID (x), t); + else + sprintf (buf, "%-4d %s", INSN_UID (x), t); + break; + case CALL_INSN: + x = PATTERN (insn); + if (GET_CODE (x) == PARALLEL) + { + x = XVECEXP (x, 0, 0); + print_pattern (t, x, verbose); + } + else + strcpy (t, "call <...>"); + if (verbose) + sprintf (buf, "b%d: i% 4d: %s", INSN_BB (insn), + INSN_UID (insn), t); + else + sprintf (buf, "%-4d %s", INSN_UID (insn), t); + break; + case CODE_LABEL: + sprintf (buf, "L%d:", INSN_UID (x)); + break; + case BARRIER: + sprintf (buf, "i% 4d: barrier", INSN_UID (x)); + break; + case NOTE: + if (NOTE_LINE_NUMBER (x) > 0) + sprintf (buf, "%4d note \"%s\" %d", INSN_UID (x), + NOTE_SOURCE_FILE (x), NOTE_LINE_NUMBER (x)); + else + sprintf (buf, "%4d %s", INSN_UID (x), + GET_NOTE_INSN_NAME (NOTE_LINE_NUMBER (x))); + break; + default: + if (verbose) + { + sprintf (buf, "Not an INSN at all\n"); + debug_rtx (x); + } + else + sprintf (buf, "i%-4d ", INSN_UID (x)); + } +} /* print_insn */ + +void +print_insn_chain (rtx_first) + rtx rtx_first; +{ + register rtx tmp_rtx; + char str[BUF_LEN]; + + strcpy (str, "(nil)\n"); + if (rtx_first != 0) + switch (GET_CODE (rtx_first)) + { + case INSN: + case JUMP_INSN: + case CALL_INSN: + case NOTE: + case CODE_LABEL: + case BARRIER: + for (tmp_rtx = rtx_first; tmp_rtx != NULL; + tmp_rtx = NEXT_INSN (tmp_rtx)) + { + print_insn (str, tmp_rtx, 0); + printf ("%s\n", str); + } + break; + default: + print_insn (str, rtx_first, 0); + printf ("%s\n", str); + } +} /* print_insn_chain */ + +/* Print visualization debugging info */ + +static void +print_block_visualization (b, s) + int b; + char *s; +{ + int unit, i; + char *names; /* names of units */ + char *delim; /* separation line */ + + /* print header */ + fprintf (dump, "\n;; ==================== scheduling visualization for block %d %s \n", b, s); + + /* Print names of units */ + names = (char *) alloca (256); + delim = (char *) alloca (256); + sprintf (names, ";; %-8s", "clock"); + sprintf (delim, ";; %-8s", "====="); + for (unit = 0; unit < FUNCTION_UNITS_SIZE; unit++) + if (function_units[unit].bitmask & target_units) + for (i = 0; i < function_units[unit].multiplicity; i++) + { + sprintf (names + strlen (names), " %-33s", function_units[unit].name); + sprintf (delim + strlen (delim), " %-33s", "=============================="); + } + sprintf (names + strlen (names), " %-8s", "no-unit"); + sprintf (delim + strlen (delim), " %-8s", "======="); + fprintf (dump, "\n%s\n%s\n", names, delim); + + /* Print insns in each cycle */ + fprintf (dump, "%s\n", visual_tbl); +} + +/* Print insns in the 'no_unit' column of visualization */ + +static void +visualize_no_unit (insn) + rtx insn; +{ + vis_no_unit[n_vis_no_unit] = insn; + n_vis_no_unit++; +} + +/* Print insns scheduled in clock, for visualization. */ + +static void +visualize_scheduled_insns (b, clock) + int b, clock; +{ + int i, unit; + + /* if no more room, split table into two */ + if (n_visual_lines >= MAX_VISUAL_LINES) + { + print_block_visualization (b, "(incomplete)"); + init_block_visualization (); + } + + n_visual_lines++; + + sprintf (visual_tbl + strlen (visual_tbl), ";; %-8d", clock); + for (unit = 0; unit < FUNCTION_UNITS_SIZE; unit++) + if (function_units[unit].bitmask & target_units) + for (i = 0; i < function_units[unit].multiplicity; i++) + { + int instance = unit + i * FUNCTION_UNITS_SIZE; + rtx insn = unit_last_insn[instance]; + + /* print insns that still keep the unit busy */ + if (insn && + actual_hazard_this_instance (unit, instance, insn, clock, 0)) + { + char str[BUF_LEN]; + print_insn (str, insn, 0); + str[INSN_LEN] = '\0'; + sprintf (visual_tbl + strlen (visual_tbl), " %-33s", str); + } + else + sprintf (visual_tbl + strlen (visual_tbl), " %-33s", "------------------------------"); + } + + /* print insns that are not assigned to any unit */ + for (i = 0; i < n_vis_no_unit; i++) + sprintf (visual_tbl + strlen (visual_tbl), " %-8d", + INSN_UID (vis_no_unit[i])); + n_vis_no_unit = 0; + + sprintf (visual_tbl + strlen (visual_tbl), "\n"); +} + +/* Print stalled cycles */ + +static void +visualize_stall_cycles (b, stalls) + int b, stalls; +{ + int i; + + /* if no more room, split table into two */ + if (n_visual_lines >= MAX_VISUAL_LINES) + { + print_block_visualization (b, "(incomplete)"); + init_block_visualization (); + } + + n_visual_lines++; + + sprintf (visual_tbl + strlen (visual_tbl), ";; "); + for (i = 0; i < stalls; i++) + sprintf (visual_tbl + strlen (visual_tbl), "."); + sprintf (visual_tbl + strlen (visual_tbl), "\n"); +} + +/* move_insn1: Remove INSN from insn chain, and link it after LAST insn */ + +static rtx +move_insn1 (insn, last) + rtx insn, last; +{ + NEXT_INSN (PREV_INSN (insn)) = NEXT_INSN (insn); + PREV_INSN (NEXT_INSN (insn)) = PREV_INSN (insn); + + NEXT_INSN (insn) = NEXT_INSN (last); + PREV_INSN (NEXT_INSN (last)) = insn; + + NEXT_INSN (last) = insn; + PREV_INSN (insn) = last; + + return insn; +} + +/* Search INSN for fake REG_DEAD note pairs for NOTE_INSN_SETJMP, + NOTE_INSN_{LOOP,EHREGION}_{BEG,END}; and convert them back into + NOTEs. The REG_DEAD note following first one is contains the saved + value for NOTE_BLOCK_NUMBER which is useful for + NOTE_INSN_EH_REGION_{BEG,END} NOTEs. LAST is the last instruction + output by the instruction scheduler. Return the new value of LAST. */ + +static rtx +reemit_notes (insn, last) + rtx insn; + rtx last; +{ + rtx note, retval; + + retval = last; + for (note = REG_NOTES (insn); note; note = XEXP (note, 1)) + { + if (REG_NOTE_KIND (note) == REG_DEAD + && GET_CODE (XEXP (note, 0)) == CONST_INT) + { + if (INTVAL (XEXP (note, 0)) == NOTE_INSN_SETJMP) + { + retval = emit_note_after (INTVAL (XEXP (note, 0)), insn); + CONST_CALL_P (retval) = CONST_CALL_P (note); + remove_note (insn, note); + note = XEXP (note, 1); + } + else + { + last = emit_note_before (INTVAL (XEXP (note, 0)), last); + remove_note (insn, note); + note = XEXP (note, 1); + NOTE_BLOCK_NUMBER (last) = INTVAL (XEXP (note, 0)); + } + remove_note (insn, note); + } + } + return retval; +} + +/* Move INSN, and all insns which should be issued before it, + due to SCHED_GROUP_P flag. Reemit notes if needed. */ + +static rtx +move_insn (insn, last) + rtx insn, last; +{ + rtx new_last = insn; + + while (SCHED_GROUP_P (insn)) + { + rtx prev = PREV_INSN (insn); + move_insn1 (insn, last); + insn = prev; + } + + move_insn1 (insn, last); + return reemit_notes (new_last, new_last); +} + +/* Return an insn which represents a SCHED_GROUP, which is + the last insn in the group. */ + +static rtx +group_leader (insn) + rtx insn; +{ + rtx prev; + + do + { + prev = insn; + insn = next_nonnote_insn (insn); + } + while (insn && SCHED_GROUP_P (insn) && (GET_CODE (insn) != CODE_LABEL)); + + return prev; +} + +/* Use forward list scheduling to rearrange insns of block BB in region RGN, + possibly bringing insns from subsequent blocks in the same region. + Return number of insns scheduled. */ + +static int +schedule_block (bb, rgn, rgn_n_insns) + int bb; + int rgn; + int rgn_n_insns; +{ + /* Local variables. */ + rtx insn, last; + rtx *ready; + int i; + int n_ready = 0; + int can_issue_more; + + /* flow block of this bb */ + int b = BB_TO_BLOCK (bb); + + /* target_n_insns == number of insns in b before scheduling starts. + sched_target_n_insns == how many of b's insns were scheduled. + sched_n_insns == how many insns were scheduled in b */ + int target_n_insns = 0; + int sched_target_n_insns = 0; + int sched_n_insns = 0; + +#define NEED_NOTHING 0 +#define NEED_HEAD 1 +#define NEED_TAIL 2 + int new_needs; + + /* head/tail info for this block */ + rtx prev_head; + rtx next_tail; + rtx head; + rtx tail; + int bb_src; + + /* At the start of a function, before reload has run, don't delay getting + parameters from hard registers into pseudo registers. */ + if (reload_completed == 0 && b == 0) + { + head = basic_block_head[b]; + tail = basic_block_end[b]; + + while (head != tail + && GET_CODE (head) == NOTE + && NOTE_LINE_NUMBER (head) != NOTE_INSN_FUNCTION_BEG) + head = NEXT_INSN (head); + + while (head != tail + && GET_CODE (head) == INSN + && GET_CODE (PATTERN (head)) == SET) + { + rtx link; + rtx src = SET_SRC (PATTERN (head)); + while (GET_CODE (src) == SUBREG + || GET_CODE (src) == SIGN_EXTEND + || GET_CODE (src) == ZERO_EXTEND + || GET_CODE (src) == SIGN_EXTRACT + || GET_CODE (src) == ZERO_EXTRACT) + src = XEXP (src, 0); + if (GET_CODE (src) != REG + || REGNO (src) >= FIRST_PSEUDO_REGISTER) + break; + + for (link = INSN_DEPEND (head); link != 0; link = XEXP (link, 1)) + INSN_DEP_COUNT (XEXP (link, 0)) -= 1; + + if (GET_CODE (head) != NOTE) + sched_n_insns++; + + head = NEXT_INSN (head); + } + + /* Don't include any notes or labels at the beginning of the + basic block, or notes at the ends of basic blocks. */ + while (head != tail) + { + if (GET_CODE (head) == NOTE) + head = NEXT_INSN (head); + else if (GET_CODE (tail) == NOTE) + tail = PREV_INSN (tail); + else if (GET_CODE (head) == CODE_LABEL) + head = NEXT_INSN (head); + else + break; + } + } + else + get_block_head_tail (bb, &head, &tail); + + next_tail = NEXT_INSN (tail); + prev_head = PREV_INSN (head); + + /* If the only insn left is a NOTE or a CODE_LABEL, then there is no need + to schedule this block. */ + if (head == tail + && (GET_RTX_CLASS (GET_CODE (head)) != 'i')) + return (sched_n_insns); + + /* debug info */ + if (sched_verbose) + { + fprintf (dump, ";; ======================================================\n"); + fprintf (dump, + ";; -- basic block %d from %d to %d -- %s reload\n", + b, INSN_UID (basic_block_head[b]), + INSN_UID (basic_block_end[b]), + (reload_completed ? "after" : "before")); + fprintf (dump, ";; ======================================================\n"); + if (sched_debug_count >= 0) + fprintf (dump, ";;\t -- sched_debug_count=%d\n", sched_debug_count); + fprintf (dump, "\n"); + + visual_tbl = (char *) alloca (get_visual_tbl_length ()); + init_block_visualization (); + } + + /* remove remaining note insns from the block, save them in + note_list. These notes are restored at the end of + schedule_block (). */ + note_list = 0; + rm_other_notes (head, tail); + + target_bb = bb; + + /* prepare current target block info */ + if (current_nr_blocks > 1) + { + candidate_table = (candidate *) alloca (current_nr_blocks * sizeof (candidate)); + + bblst_last = 0; + /* ??? It is not clear why bblst_size is computed this way. The original + number was clearly too small as it resulted in compiler failures. + Multiplying by the original number by 2 (to account for update_bbs + members) seems to be a reasonable solution. */ + /* ??? Or perhaps there is a bug somewhere else in this file? */ + bblst_size = (current_nr_blocks - bb) * rgn_nr_edges * 2; + bblst_table = (int *) alloca (bblst_size * sizeof (int)); + + bitlst_table_last = 0; + bitlst_table_size = rgn_nr_edges; + bitlst_table = (int *) alloca (rgn_nr_edges * sizeof (int)); + + compute_trg_info (bb); + } + + clear_units (); + + /* Allocate the ready list */ + ready = (rtx *) alloca ((rgn_n_insns + 1) * sizeof (rtx)); + + /* Print debugging information. */ + if (sched_verbose >= 5) + debug_dependencies (); + + + /* Initialize ready list with all 'ready' insns in target block. + Count number of insns in the target block being scheduled. */ + n_ready = 0; + for (insn = head; insn != next_tail; insn = NEXT_INSN (insn)) + { + rtx next; + + if (GET_RTX_CLASS (GET_CODE (insn)) != 'i') + continue; + next = NEXT_INSN (insn); + + if (INSN_DEP_COUNT (insn) == 0 + && (SCHED_GROUP_P (next) == 0 || GET_RTX_CLASS (GET_CODE (next)) != 'i')) + ready[n_ready++] = insn; + if (!(SCHED_GROUP_P (insn))) + target_n_insns++; + } + + /* Add to ready list all 'ready' insns in valid source blocks. + For speculative insns, check-live, exception-free, and + issue-delay. */ + for (bb_src = bb + 1; bb_src < current_nr_blocks; bb_src++) + if (IS_VALID (bb_src)) + { + rtx src_head; + rtx src_next_tail; + rtx tail, head; + + get_block_head_tail (bb_src, &head, &tail); + src_next_tail = NEXT_INSN (tail); + src_head = head; + + if (head == tail + && (GET_RTX_CLASS (GET_CODE (head)) != 'i')) + continue; + + for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn)) + { + if (GET_RTX_CLASS (GET_CODE (insn)) != 'i') + continue; + + if (!CANT_MOVE (insn) + && (!IS_SPECULATIVE_INSN (insn) + || (insn_issue_delay (insn) <= 3 + && check_live (insn, bb_src, target_bb) + && is_exception_free (insn, bb_src, target_bb)))) + + { + rtx next; + + next = NEXT_INSN (insn); + if (INSN_DEP_COUNT (insn) == 0 + && (SCHED_GROUP_P (next) == 0 + || GET_RTX_CLASS (GET_CODE (next)) != 'i')) + ready[n_ready++] = insn; + } + } + } + + /* no insns scheduled in this block yet */ + last_scheduled_insn = 0; + + /* Sort the ready list */ + SCHED_SORT (ready, n_ready); + + if (sched_verbose >= 2) + { + fprintf (dump, ";;\t\tReady list initially: "); + debug_ready_list (ready, n_ready); + } + + /* Q_SIZE is the total number of insns in the queue. */ + q_ptr = 0; + q_size = 0; + clock_var = 0; + bzero ((char *) insn_queue, sizeof (insn_queue)); + + /* We start inserting insns after PREV_HEAD. */ + last = prev_head; + + /* Initialize INSN_QUEUE, LIST and NEW_NEEDS. */ + new_needs = (NEXT_INSN (prev_head) == basic_block_head[b] + ? NEED_HEAD : NEED_NOTHING); + if (PREV_INSN (next_tail) == basic_block_end[b]) + new_needs |= NEED_TAIL; + + /* loop until all the insns in BB are scheduled. */ + while (sched_target_n_insns < target_n_insns) + { + int b1; + +#ifdef INTERBLOCK_DEBUG + if (sched_debug_count == 0) + break; +#endif + clock_var++; + + /* Add to the ready list all pending insns that can be issued now. + If there are no ready insns, increment clock until one + is ready and add all pending insns at that point to the ready + list. */ + n_ready = queue_to_ready (ready, n_ready); + + if (n_ready == 0) + abort (); + + if (sched_verbose >= 2) + { + fprintf (dump, ";;\t\tReady list after queue_to_ready: "); + debug_ready_list (ready, n_ready); + } + + /* Sort the ready list. */ + SCHED_SORT (ready, n_ready); + + if (sched_verbose) + { + fprintf (dump, ";;\tReady list (t =%3d): ", clock_var); + debug_ready_list (ready, n_ready); + } + + /* Issue insns from ready list. + It is important to count down from n_ready, because n_ready may change + as insns are issued. */ + can_issue_more = issue_rate; + for (i = n_ready - 1; i >= 0 && can_issue_more; i--) + { + rtx insn = ready[i]; + int cost = actual_hazard (insn_unit (insn), insn, clock_var, 0); + + if (cost > 1) + { + queue_insn (insn, cost); + ready[i] = ready[--n_ready]; /* remove insn from ready list */ + } + else if (cost == 0) + { +#ifdef INTERBLOCK_DEBUG + if (sched_debug_count == 0) + break; +#endif + + /* an interblock motion? */ + if (INSN_BB (insn) != target_bb) + { + if (IS_SPECULATIVE_INSN (insn)) + { + + if (!check_live (insn, INSN_BB (insn), target_bb)) + { + /* speculative motion, live check failed, remove + insn from ready list */ + ready[i] = ready[--n_ready]; + continue; + } + update_live (insn, INSN_BB (insn), target_bb); + + /* for speculative load, mark insns fed by it. */ + if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn)) + set_spec_fed (insn); + + nr_spec++; + } + nr_inter++; + + /* update source block boundaries */ + b1 = INSN_BLOCK (insn); + if (insn == basic_block_head[b1] + && insn == basic_block_end[b1]) + { + emit_note_after (NOTE_INSN_DELETED, basic_block_head[b1]); + basic_block_end[b1] = basic_block_head[b1] = NEXT_INSN (insn); + } + else if (insn == basic_block_end[b1]) + { + basic_block_end[b1] = PREV_INSN (insn); + } + else if (insn == basic_block_head[b1]) + { + basic_block_head[b1] = NEXT_INSN (insn); + } + } + else + { + /* in block motion */ + sched_target_n_insns++; + } + + last_scheduled_insn = insn; + last = move_insn (insn, last); + sched_n_insns++; + + can_issue_more--; + +#ifdef INTERBLOCK_DEBUG + if (sched_debug_count > 0) + sched_debug_count--; +#endif + + n_ready = schedule_insn (insn, ready, n_ready, clock_var); + + /* remove insn from ready list */ + ready[i] = ready[--n_ready]; + + /* close this block after scheduling its jump */ + if (GET_CODE (last_scheduled_insn) == JUMP_INSN) + break; + } + } + + /* debug info */ + if (sched_verbose) + { + visualize_scheduled_insns (b, clock_var); +#ifdef INTERBLOCK_DEBUG + if (sched_debug_count == 0) + fprintf (dump, "........ sched_debug_count == 0 .................\n"); +#endif + } + } + + /* debug info */ + if (sched_verbose) + { + fprintf (dump, ";;\tReady list (final): "); + debug_ready_list (ready, n_ready); + print_block_visualization (b, ""); + } + + /* Sanity check -- queue must be empty now. Meaningless if region has + multiple bbs, or if scheduling stopped by sched_debug_count. */ + if (current_nr_blocks > 1) +#ifdef INTERBLOCK_DEBUG + if (sched_debug_count != 0) +#endif + if (!flag_schedule_interblock && q_size != 0) + abort (); + + /* update head/tail boundaries. */ + head = NEXT_INSN (prev_head); + tail = last; + +#ifdef INTERBLOCK_DEBUG + if (sched_debug_count == 0) + /* compensate for stopping scheduling prematurely */ + for (i = sched_target_n_insns; i < target_n_insns; i++) + tail = move_insn (group_leader (NEXT_INSN (tail)), tail); +#endif + + /* Restore-other-notes: NOTE_LIST is the end of a chain of notes + previously found among the insns. Insert them at the beginning + of the insns. */ + if (note_list != 0) + { + rtx note_head = note_list; + + while (PREV_INSN (note_head)) + { + note_head = PREV_INSN (note_head); + } + + PREV_INSN (note_head) = PREV_INSN (head); + NEXT_INSN (PREV_INSN (head)) = note_head; + PREV_INSN (head) = note_list; + NEXT_INSN (note_list) = head; + head = note_head; + } + + /* update target block boundaries. */ + if (new_needs & NEED_HEAD) + basic_block_head[b] = head; + + if (new_needs & NEED_TAIL) + basic_block_end[b] = tail; + + /* debugging */ + if (sched_verbose) + { + fprintf (dump, ";; total time = %d\n;; new basic block head = %d\n", + clock_var, INSN_UID (basic_block_head[b])); + fprintf (dump, ";; new basic block end = %d\n\n", + INSN_UID (basic_block_end[b])); + } + + return (sched_n_insns); +} /* schedule_block () */ + + +/* print the bit-set of registers, S. callable from debugger */ + +extern void +debug_reg_vector (s) + regset s; +{ + int regno; + + EXECUTE_IF_SET_IN_REG_SET (s, 0, regno, + { + fprintf (dump, " %d", regno); + }); + + fprintf (dump, "\n"); +} + +/* Use the backward dependences from LOG_LINKS to build + forward dependences in INSN_DEPEND. */ + +static void +compute_block_forward_dependences (bb) + int bb; +{ + rtx insn, link; + rtx tail, head; + rtx next_tail; + enum reg_note dep_type; + + get_block_head_tail (bb, &head, &tail); + next_tail = NEXT_INSN (tail); + for (insn = head; insn != next_tail; insn = NEXT_INSN (insn)) + { + if (GET_RTX_CLASS (GET_CODE (insn)) != 'i') + continue; + + insn = group_leader (insn); + + for (link = LOG_LINKS (insn); link; link = XEXP (link, 1)) + { + rtx x = group_leader (XEXP (link, 0)); + rtx new_link; + + if (x != XEXP (link, 0)) + continue; + + /* Ignore dependences upon deleted insn */ + if (GET_CODE (x) == NOTE || INSN_DELETED_P (x)) + continue; + if (find_insn_list (insn, INSN_DEPEND (x))) + continue; + + new_link = rtx_alloc (INSN_LIST); + + dep_type = REG_NOTE_KIND (link); + PUT_REG_NOTE_KIND (new_link, dep_type); + + XEXP (new_link, 0) = insn; + XEXP (new_link, 1) = INSN_DEPEND (x); + + INSN_DEPEND (x) = new_link; + INSN_DEP_COUNT (insn) += 1; + } + } +} + +/* Initialize variables for region data dependence analysis. + n_bbs is the number of region blocks */ + +__inline static void +init_rgn_data_dependences (n_bbs) + int n_bbs; +{ + int bb; + + /* variables for which one copy exists for each block */ + bzero ((char *) bb_pending_read_insns, n_bbs * sizeof (rtx)); + bzero ((char *) bb_pending_read_mems, n_bbs * sizeof (rtx)); + bzero ((char *) bb_pending_write_insns, n_bbs * sizeof (rtx)); + bzero ((char *) bb_pending_write_mems, n_bbs * sizeof (rtx)); + bzero ((char *) bb_pending_lists_length, n_bbs * sizeof (rtx)); + bzero ((char *) bb_last_pending_memory_flush, n_bbs * sizeof (rtx)); + bzero ((char *) bb_last_function_call, n_bbs * sizeof (rtx)); + bzero ((char *) bb_sched_before_next_call, n_bbs * sizeof (rtx)); + + /* Create an insn here so that we can hang dependencies off of it later. */ + for (bb = 0; bb < n_bbs; bb++) + { + bb_sched_before_next_call[bb] = + gen_rtx (INSN, VOIDmode, 0, NULL_RTX, NULL_RTX, + NULL_RTX, 0, NULL_RTX, 0); + LOG_LINKS (bb_sched_before_next_call[bb]) = 0; + } +} + +/* Add dependences so that branches are scheduled to run last in their block */ + +static void +add_branch_dependences (head, tail) + rtx head, tail; +{ + + rtx insn, last; + + /* For all branches, calls, uses, and cc0 setters, force them to remain + in order at the end of the block by adding dependencies and giving + the last a high priority. There may be notes present, and prev_head + may also be a note. + + Branches must obviously remain at the end. Calls should remain at the + end since moving them results in worse register allocation. Uses remain + at the end to ensure proper register allocation. cc0 setters remaim + at the end because they can't be moved away from their cc0 user. */ + insn = tail; + last = 0; + while (GET_CODE (insn) == CALL_INSN || GET_CODE (insn) == JUMP_INSN + || (GET_CODE (insn) == INSN + && (GET_CODE (PATTERN (insn)) == USE +#ifdef HAVE_cc0 + || sets_cc0_p (PATTERN (insn)) +#endif + )) + || GET_CODE (insn) == NOTE) + { + if (GET_CODE (insn) != NOTE) + { + if (last != 0 + && !find_insn_list (insn, LOG_LINKS (last))) + { + add_dependence (last, insn, REG_DEP_ANTI); + INSN_REF_COUNT (insn)++; + } + + CANT_MOVE (insn) = 1; + + last = insn; + /* Skip over insns that are part of a group. */ + while (SCHED_GROUP_P (insn)) + insn = prev_nonnote_insn (insn); + } + + /* Don't overrun the bounds of the basic block. */ + if (insn == head) + break; + + insn = PREV_INSN (insn); + } + + /* make sure these insns are scheduled last in their block */ + insn = last; + if (insn != 0) + while (insn != head) + { + insn = prev_nonnote_insn (insn); + + if (INSN_REF_COUNT (insn) != 0) + continue; + + if (!find_insn_list (last, LOG_LINKS (insn))) + add_dependence (last, insn, REG_DEP_ANTI); + INSN_REF_COUNT (insn) = 1; + + /* Skip over insns that are part of a group. */ + while (SCHED_GROUP_P (insn)) + insn = prev_nonnote_insn (insn); + } +} + +/* Compute bacward dependences inside BB. In a multiple blocks region: + (1) a bb is analyzed after its predecessors, and (2) the lists in + effect at the end of bb (after analyzing for bb) are inherited by + bb's successrs. + + Specifically for reg-reg data dependences, the block insns are + scanned by sched_analyze () top-to-bottom. Two lists are + naintained by sched_analyze (): reg_last_defs[] for register DEFs, + and reg_last_uses[] for register USEs. + + When analysis is completed for bb, we update for its successors: + ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb]) + ; - USES[succ] = Union (USES [succ], DEFS [bb]) + + The mechanism for computing mem-mem data dependence is very + similar, and the result is interblock dependences in the region. */ + +static void +compute_block_backward_dependences (bb) + int bb; +{ + int b; + rtx x; + rtx head, tail; + int max_reg = max_reg_num (); + + b = BB_TO_BLOCK (bb); + + if (current_nr_blocks == 1) + { + reg_last_uses = (rtx *) alloca (max_reg * sizeof (rtx)); + reg_last_sets = (rtx *) alloca (max_reg * sizeof (rtx)); + + bzero ((char *) reg_last_uses, max_reg * sizeof (rtx)); + bzero ((char *) reg_last_sets, max_reg * sizeof (rtx)); + + pending_read_insns = 0; + pending_read_mems = 0; + pending_write_insns = 0; + pending_write_mems = 0; + pending_lists_length = 0; + last_function_call = 0; + last_pending_memory_flush = 0; + sched_before_next_call + = gen_rtx (INSN, VOIDmode, 0, NULL_RTX, NULL_RTX, + NULL_RTX, 0, NULL_RTX, 0); + LOG_LINKS (sched_before_next_call) = 0; + } + else + { + reg_last_uses = bb_reg_last_uses[bb]; + reg_last_sets = bb_reg_last_sets[bb]; + + pending_read_insns = bb_pending_read_insns[bb]; + pending_read_mems = bb_pending_read_mems[bb]; + pending_write_insns = bb_pending_write_insns[bb]; + pending_write_mems = bb_pending_write_mems[bb]; + pending_lists_length = bb_pending_lists_length[bb]; + last_function_call = bb_last_function_call[bb]; + last_pending_memory_flush = bb_last_pending_memory_flush[bb]; + + sched_before_next_call = bb_sched_before_next_call[bb]; + } + + /* do the analysis for this block */ + get_block_head_tail (bb, &head, &tail); + sched_analyze (head, tail); + add_branch_dependences (head, tail); + + if (current_nr_blocks > 1) + { + int e, first_edge; + int b_succ, bb_succ; + int reg; + rtx link_insn, link_mem; + rtx u; + + /* these lists should point to the right place, for correct freeing later. */ + bb_pending_read_insns[bb] = pending_read_insns; + bb_pending_read_mems[bb] = pending_read_mems; + bb_pending_write_insns[bb] = pending_write_insns; + bb_pending_write_mems[bb] = pending_write_mems; + + /* bb's structures are inherited by it's successors */ + first_edge = e = OUT_EDGES (b); + if (e > 0) + do + { + b_succ = TO_BLOCK (e); + bb_succ = BLOCK_TO_BB (b_succ); + + /* only bbs "below" bb, in the same region, are interesting */ + if (CONTAINING_RGN (b) != CONTAINING_RGN (b_succ) + || bb_succ <= bb) + { + e = NEXT_OUT (e); + continue; + } + + for (reg = 0; reg < max_reg; reg++) + { + + /* reg-last-uses lists are inherited by bb_succ */ + for (u = reg_last_uses[reg]; u; u = XEXP (u, 1)) + { + if (find_insn_list (XEXP (u, 0), (bb_reg_last_uses[bb_succ])[reg])) + continue; + + (bb_reg_last_uses[bb_succ])[reg] + = gen_rtx (INSN_LIST, VOIDmode, XEXP (u, 0), + (bb_reg_last_uses[bb_succ])[reg]); + } + + /* reg-last-defs lists are inherited by bb_succ */ + for (u = reg_last_sets[reg]; u; u = XEXP (u, 1)) + { + if (find_insn_list (XEXP (u, 0), (bb_reg_last_sets[bb_succ])[reg])) + continue; + + (bb_reg_last_sets[bb_succ])[reg] + = gen_rtx (INSN_LIST, VOIDmode, XEXP (u, 0), + (bb_reg_last_sets[bb_succ])[reg]); + } + } + + /* mem read/write lists are inherited by bb_succ */ + link_insn = pending_read_insns; + link_mem = pending_read_mems; + while (link_insn) + { + if (!(find_insn_mem_list (XEXP (link_insn, 0), XEXP (link_mem, 0), + bb_pending_read_insns[bb_succ], + bb_pending_read_mems[bb_succ]))) + add_insn_mem_dependence (&bb_pending_read_insns[bb_succ], + &bb_pending_read_mems[bb_succ], + XEXP (link_insn, 0), XEXP (link_mem, 0)); + link_insn = XEXP (link_insn, 1); + link_mem = XEXP (link_mem, 1); + } + + link_insn = pending_write_insns; + link_mem = pending_write_mems; + while (link_insn) + { + if (!(find_insn_mem_list (XEXP (link_insn, 0), XEXP (link_mem, 0), + bb_pending_write_insns[bb_succ], + bb_pending_write_mems[bb_succ]))) + add_insn_mem_dependence (&bb_pending_write_insns[bb_succ], + &bb_pending_write_mems[bb_succ], + XEXP (link_insn, 0), XEXP (link_mem, 0)); + + link_insn = XEXP (link_insn, 1); + link_mem = XEXP (link_mem, 1); + } + + /* last_function_call is inherited by bb_succ */ + for (u = last_function_call; u; u = XEXP (u, 1)) + { + if (find_insn_list (XEXP (u, 0), bb_last_function_call[bb_succ])) + continue; + + bb_last_function_call[bb_succ] + = gen_rtx (INSN_LIST, VOIDmode, XEXP (u, 0), + bb_last_function_call[bb_succ]); + } + + /* last_pending_memory_flush is inherited by bb_succ */ + for (u = last_pending_memory_flush; u; u = XEXP (u, 1)) + { + if (find_insn_list (XEXP (u, 0), bb_last_pending_memory_flush[bb_succ])) + continue; + + bb_last_pending_memory_flush[bb_succ] + = gen_rtx (INSN_LIST, VOIDmode, XEXP (u, 0), + bb_last_pending_memory_flush[bb_succ]); + } + + /* sched_before_next_call is inherited by bb_succ */ + x = LOG_LINKS (sched_before_next_call); + for (; x; x = XEXP (x, 1)) + add_dependence (bb_sched_before_next_call[bb_succ], + XEXP (x, 0), REG_DEP_ANTI); + + e = NEXT_OUT (e); + } + while (e != first_edge); + } +} + +/* Print dependences for debugging, callable from debugger */ + +void +debug_dependencies () +{ + int bb; + + fprintf (dump, ";; --------------- forward dependences: ------------ \n"); + for (bb = 0; bb < current_nr_blocks; bb++) + { + if (1) + { + rtx head, tail; + rtx next_tail; + rtx insn; + + get_block_head_tail (bb, &head, &tail); + next_tail = NEXT_INSN (tail); + fprintf (dump, "\n;; --- Region Dependences --- b %d bb %d \n", + BB_TO_BLOCK (bb), bb); + + fprintf (dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n", + "insn", "code", "bb", "dep", "prio", "cost", "blockage", "units"); + fprintf (dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n", + "----", "----", "--", "---", "----", "----", "--------", "-----"); + for (insn = head; insn != next_tail; insn = NEXT_INSN (insn)) + { + rtx link; + int unit, range; + + if (GET_RTX_CLASS (GET_CODE (insn)) != 'i') + { + int n; + fprintf (dump, ";; %6d ", INSN_UID (insn)); + if (GET_CODE (insn) == NOTE) + switch (n = NOTE_LINE_NUMBER (insn)) + { + case NOTE_INSN_DELETED: + fprintf (dump, "NOTE_INSN_DELETED"); + break; + case NOTE_INSN_BLOCK_BEG: + fprintf (dump, "NOTE_INSN_BLOCK_BEG"); + break; + case NOTE_INSN_BLOCK_END: + fprintf (dump, "NOTE_INSN_BLOCK_END"); + break; + case NOTE_INSN_LOOP_BEG: + fprintf (dump, "NOTE_INSN_LOOP_BEG"); + break; + case NOTE_INSN_LOOP_END: + fprintf (dump, "NOTE_INSN_LOOP_END"); + break; + case NOTE_INSN_LOOP_CONT: + fprintf (dump, "NOTE_INSN_LOOP_CONT"); + break; + case NOTE_INSN_LOOP_VTOP: + fprintf (dump, "NOTE_INSN_LOOP_VTOP"); + break; + case NOTE_INSN_FUNCTION_BEG: + fprintf (dump, "NOTE_INSN_FUNCTION_BEG"); + break; + case NOTE_INSN_FUNCTION_END: + fprintf (dump, "NOTE_INSN_FUNCTION_END"); + break; + case NOTE_INSN_EH_REGION_BEG: + fprintf (dump, "NOTE_INSN_EH_REGION_BEG"); + break; + case NOTE_INSN_EH_REGION_END: + fprintf (dump, "NOTE_INSN_EH_REGION_END"); + break; + case NOTE_INSN_SETJMP: + fprintf (dump, "NOTE_INSN_SETJMP"); + break; + default: + if (n > 0) + fprintf (dump, "NOTE_LINE_NUMBER %d", n); + else + fprintf (dump, "??? UNRECOGNIZED NOTE %d", n); + } + fprintf (dump, "\n"); + continue; + } + + unit = insn_unit (insn); + range = (unit < 0 + || function_units[unit].blockage_range_function == 0) ? 0 : + function_units[unit].blockage_range_function (insn); + fprintf (dump, + ";; %s%5d%6d%6d%6d%6d%6d %3d -%3d ", + (SCHED_GROUP_P (insn) ? "+" : " "), + INSN_UID (insn), + INSN_CODE (insn), + INSN_BB (insn), + INSN_DEP_COUNT (insn), + INSN_PRIORITY (insn), + insn_cost (insn, 0, 0), + (int) MIN_BLOCKAGE_COST (range), + (int) MAX_BLOCKAGE_COST (range)); + insn_print_units (insn); + fprintf (dump, "\t: "); + for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1)) + fprintf (dump, "%d ", INSN_UID (XEXP (link, 0))); + fprintf (dump, "\n"); + } + } + } + fprintf (dump, "\n"); +} + +/* Set_priorities: compute priority of each insn in the block */ + +static int +set_priorities (bb) + int bb; +{ + rtx insn; + int n_insn; + + rtx tail; + rtx prev_head; + rtx head; + + get_block_head_tail (bb, &head, &tail); + prev_head = PREV_INSN (head); + + if (head == tail + && (GET_RTX_CLASS (GET_CODE (head)) != 'i')) + return 0; + + n_insn = 0; + for (insn = tail; insn != prev_head; insn = PREV_INSN (insn)) + { + + if (GET_CODE (insn) == NOTE) + continue; + + if (!(SCHED_GROUP_P (insn))) + n_insn++; + (void) priority (insn); + } + + return n_insn; +} + +/* Make each element of VECTOR point at an rtx-vector, + taking the space for all those rtx-vectors from SPACE. + SPACE is of type (rtx *), but it is really as long as NELTS rtx-vectors. + BYTES_PER_ELT is the number of bytes in one rtx-vector. + (this is the same as init_regset_vector () in flow.c) */ + +static void +init_rtx_vector (vector, space, nelts, bytes_per_elt) + rtx **vector; + rtx *space; + int nelts; + int bytes_per_elt; +{ + register int i; + register rtx *p = space; + + for (i = 0; i < nelts; i++) + { + vector[i] = p; + p += bytes_per_elt / sizeof (*p); + } +} + +/* Schedule a region. A region is either an inner loop, a loop-free + subroutine, or a single basic block. Each bb in the region is + scheduled after its flow predecessors. */ + +static void +schedule_region (rgn) + int rgn; +{ + int bb; + int rgn_n_insns = 0; + int sched_rgn_n_insns = 0; + + /* set variables for the current region */ + current_nr_blocks = RGN_NR_BLOCKS (rgn); + current_blocks = RGN_BLOCKS (rgn); + + reg_pending_sets = ALLOCA_REG_SET (); + reg_pending_sets_all = 0; + + /* initializations for region data dependence analyisis */ + if (current_nr_blocks > 1) + { + rtx *space; + int maxreg = max_reg_num (); + + bb_reg_last_uses = (rtx **) alloca (current_nr_blocks * sizeof (rtx *)); + space = (rtx *) alloca (current_nr_blocks * maxreg * sizeof (rtx)); + bzero ((char *) space, current_nr_blocks * maxreg * sizeof (rtx)); + init_rtx_vector (bb_reg_last_uses, space, current_nr_blocks, maxreg * sizeof (rtx *)); + + bb_reg_last_sets = (rtx **) alloca (current_nr_blocks * sizeof (rtx *)); + space = (rtx *) alloca (current_nr_blocks * maxreg * sizeof (rtx)); + bzero ((char *) space, current_nr_blocks * maxreg * sizeof (rtx)); + init_rtx_vector (bb_reg_last_sets, space, current_nr_blocks, maxreg * sizeof (rtx *)); + + bb_pending_read_insns = (rtx *) alloca (current_nr_blocks * sizeof (rtx)); + bb_pending_read_mems = (rtx *) alloca (current_nr_blocks * sizeof (rtx)); + bb_pending_write_insns = (rtx *) alloca (current_nr_blocks * sizeof (rtx)); + bb_pending_write_mems = (rtx *) alloca (current_nr_blocks * sizeof (rtx)); + bb_pending_lists_length = (int *) alloca (current_nr_blocks * sizeof (int)); + bb_last_pending_memory_flush = (rtx *) alloca (current_nr_blocks * sizeof (rtx)); + bb_last_function_call = (rtx *) alloca (current_nr_blocks * sizeof (rtx)); + bb_sched_before_next_call = (rtx *) alloca (current_nr_blocks * sizeof (rtx)); + + init_rgn_data_dependences (current_nr_blocks); + } + + /* compute LOG_LINKS */ + for (bb = 0; bb < current_nr_blocks; bb++) + compute_block_backward_dependences (bb); + + /* compute INSN_DEPEND */ + for (bb = current_nr_blocks - 1; bb >= 0; bb--) + compute_block_forward_dependences (bb); + + /* Delete line notes, compute live-regs at block end, and set priorities. */ + dead_notes = 0; + for (bb = 0; bb < current_nr_blocks; bb++) + { + if (reload_completed == 0) + find_pre_sched_live (bb); + + if (write_symbols != NO_DEBUG) + { + save_line_notes (bb); + rm_line_notes (bb); + } + + rgn_n_insns += set_priorities (bb); + } + + /* compute interblock info: probabilities, split-edges, dominators, etc. */ + if (current_nr_blocks > 1) + { + int i; + + prob = (float *) alloca ((current_nr_blocks) * sizeof (float)); + + bbset_size = current_nr_blocks / HOST_BITS_PER_WIDE_INT + 1; + dom = (bbset *) alloca (current_nr_blocks * sizeof (bbset)); + for (i = 0; i < current_nr_blocks; i++) + { + dom[i] = (bbset) alloca (bbset_size * sizeof (HOST_WIDE_INT)); + bzero ((char *) dom[i], bbset_size * sizeof (HOST_WIDE_INT)); + } + + /* edge to bit */ + rgn_nr_edges = 0; + edge_to_bit = (int *) alloca (nr_edges * sizeof (int)); + for (i = 1; i < nr_edges; i++) + if (CONTAINING_RGN (FROM_BLOCK (i)) == rgn) + EDGE_TO_BIT (i) = rgn_nr_edges++; + rgn_edges = (int *) alloca (rgn_nr_edges * sizeof (int)); + + rgn_nr_edges = 0; + for (i = 1; i < nr_edges; i++) + if (CONTAINING_RGN (FROM_BLOCK (i)) == (rgn)) + rgn_edges[rgn_nr_edges++] = i; + + /* split edges */ + edgeset_size = rgn_nr_edges / HOST_BITS_PER_WIDE_INT + 1; + pot_split = (edgeset *) alloca (current_nr_blocks * sizeof (edgeset)); + ancestor_edges = (edgeset *) alloca (current_nr_blocks * sizeof (edgeset)); + for (i = 0; i < current_nr_blocks; i++) + { + pot_split[i] = + (edgeset) alloca (edgeset_size * sizeof (HOST_WIDE_INT)); + bzero ((char *) pot_split[i], + edgeset_size * sizeof (HOST_WIDE_INT)); + ancestor_edges[i] = + (edgeset) alloca (edgeset_size * sizeof (HOST_WIDE_INT)); + bzero ((char *) ancestor_edges[i], + edgeset_size * sizeof (HOST_WIDE_INT)); + } + + /* compute probabilities, dominators, split_edges */ + for (bb = 0; bb < current_nr_blocks; bb++) + compute_dom_prob_ps (bb); + } + + /* now we can schedule all blocks */ + for (bb = 0; bb < current_nr_blocks; bb++) + { + sched_rgn_n_insns += schedule_block (bb, rgn, rgn_n_insns); + +#ifdef USE_C_ALLOCA + alloca (0); +#endif + } + +#ifdef INTERBLOCK_DEBUG + if (sched_debug_count != 0) +#endif + /* sanity check: verify that all region insns were scheduled */ + if (sched_rgn_n_insns != rgn_n_insns) + abort (); + + /* update register life and usage information */ + if (reload_completed == 0) + { + for (bb = current_nr_blocks - 1; bb >= 0; bb--) + find_post_sched_live (bb); + + if (current_nr_blocks <= 1) + /* Sanity check. There should be no REG_DEAD notes leftover at the end. + In practice, this can occur as the result of bugs in flow, combine.c, + and/or sched.c. The values of the REG_DEAD notes remaining are + meaningless, because dead_notes is just used as a free list. */ + if (dead_notes != 0) + abort (); + } + + /* restore line notes. */ + if (write_symbols != NO_DEBUG) + { + for (bb = 0; bb < current_nr_blocks; bb++) + restore_line_notes (bb); + } + + /* Done with this region */ + free_pending_lists (); +} + +/* Subroutine of split_hard_reg_notes. Searches X for any reference to + REGNO, returning the rtx of the reference found if any. Otherwise, + returns 0. */ + +static rtx +regno_use_in (regno, x) + int regno; + rtx x; +{ + register char *fmt; + int i, j; + rtx tem; + + if (GET_CODE (x) == REG && REGNO (x) == regno) + return x; + + fmt = GET_RTX_FORMAT (GET_CODE (x)); + for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) + { + if (fmt[i] == 'e') + { + if ((tem = regno_use_in (regno, XEXP (x, i)))) + return tem; + } + else if (fmt[i] == 'E') + for (j = XVECLEN (x, i) - 1; j >= 0; j--) + if ((tem = regno_use_in (regno, XVECEXP (x, i, j)))) + return tem; + } + + return 0; +} + +/* Subroutine of update_flow_info. Determines whether any new REG_NOTEs are + needed for the hard register mentioned in the note. This can happen + if the reference to the hard register in the original insn was split into + several smaller hard register references in the split insns. */ + +static void +split_hard_reg_notes (note, first, last, orig_insn) + rtx note, first, last, orig_insn; +{ + rtx reg, temp, link; + int n_regs, i, new_reg; + rtx insn; + + /* Assume that this is a REG_DEAD note. */ + if (REG_NOTE_KIND (note) != REG_DEAD) + abort (); + + reg = XEXP (note, 0); + + n_regs = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg)); + + for (i = 0; i < n_regs; i++) + { + new_reg = REGNO (reg) + i; + + /* Check for references to new_reg in the split insns. */ + for (insn = last;; insn = PREV_INSN (insn)) + { + if (GET_RTX_CLASS (GET_CODE (insn)) == 'i' + && (temp = regno_use_in (new_reg, PATTERN (insn)))) + { + /* Create a new reg dead note ere. */ + link = rtx_alloc (EXPR_LIST); + PUT_REG_NOTE_KIND (link, REG_DEAD); + XEXP (link, 0) = temp; + XEXP (link, 1) = REG_NOTES (insn); + REG_NOTES (insn) = link; + + /* If killed multiple registers here, then add in the excess. */ + i += HARD_REGNO_NREGS (REGNO (temp), GET_MODE (temp)) - 1; + + break; + } + /* It isn't mentioned anywhere, so no new reg note is needed for + this register. */ + if (insn == first) + break; + } + } +} + +/* Subroutine of update_flow_info. Determines whether a SET or CLOBBER in an + insn created by splitting needs a REG_DEAD or REG_UNUSED note added. */ + +static void +new_insn_dead_notes (pat, insn, last, orig_insn) + rtx pat, insn, last, orig_insn; +{ + rtx dest, tem, set; + + /* PAT is either a CLOBBER or a SET here. */ + dest = XEXP (pat, 0); + + while (GET_CODE (dest) == ZERO_EXTRACT || GET_CODE (dest) == SUBREG + || GET_CODE (dest) == STRICT_LOW_PART + || GET_CODE (dest) == SIGN_EXTRACT) + dest = XEXP (dest, 0); + + if (GET_CODE (dest) == REG) + { + for (tem = last; tem != insn; tem = PREV_INSN (tem)) + { + if (GET_RTX_CLASS (GET_CODE (tem)) == 'i' + && reg_overlap_mentioned_p (dest, PATTERN (tem)) + && (set = single_set (tem))) + { + rtx tem_dest = SET_DEST (set); + + while (GET_CODE (tem_dest) == ZERO_EXTRACT + || GET_CODE (tem_dest) == SUBREG + || GET_CODE (tem_dest) == STRICT_LOW_PART + || GET_CODE (tem_dest) == SIGN_EXTRACT) + tem_dest = XEXP (tem_dest, 0); + + if (!rtx_equal_p (tem_dest, dest)) + { + /* Use the same scheme as combine.c, don't put both REG_DEAD + and REG_UNUSED notes on the same insn. */ + if (!find_regno_note (tem, REG_UNUSED, REGNO (dest)) + && !find_regno_note (tem, REG_DEAD, REGNO (dest))) + { + rtx note = rtx_alloc (EXPR_LIST); + PUT_REG_NOTE_KIND (note, REG_DEAD); + XEXP (note, 0) = dest; + XEXP (note, 1) = REG_NOTES (tem); + REG_NOTES (tem) = note; + } + /* The reg only dies in one insn, the last one that uses + it. */ + break; + } + else if (reg_overlap_mentioned_p (dest, SET_SRC (set))) + /* We found an instruction that both uses the register, + and sets it, so no new REG_NOTE is needed for this set. */ + break; + } + } + /* If this is a set, it must die somewhere, unless it is the dest of + the original insn, and hence is live after the original insn. Abort + if it isn't supposed to be live after the original insn. + + If this is a clobber, then just add a REG_UNUSED note. */ + if (tem == insn) + { + int live_after_orig_insn = 0; + rtx pattern = PATTERN (orig_insn); + int i; + + if (GET_CODE (pat) == CLOBBER) + { + rtx note = rtx_alloc (EXPR_LIST); + PUT_REG_NOTE_KIND (note, REG_UNUSED); + XEXP (note, 0) = dest; + XEXP (note, 1) = REG_NOTES (insn); + REG_NOTES (insn) = note; + return; + } + + /* The original insn could have multiple sets, so search the + insn for all sets. */ + if (GET_CODE (pattern) == SET) + { + if (reg_overlap_mentioned_p (dest, SET_DEST (pattern))) + live_after_orig_insn = 1; + } + else if (GET_CODE (pattern) == PARALLEL) + { + for (i = 0; i < XVECLEN (pattern, 0); i++) + if (GET_CODE (XVECEXP (pattern, 0, i)) == SET + && reg_overlap_mentioned_p (dest, + SET_DEST (XVECEXP (pattern, + 0, i)))) + live_after_orig_insn = 1; + } + + if (!live_after_orig_insn) + abort (); + } + } +} + +/* Subroutine of update_flow_info. Update the value of reg_n_sets for all + registers modified by X. INC is -1 if the containing insn is being deleted, + and is 1 if the containing insn is a newly generated insn. */ + +static void +update_n_sets (x, inc) + rtx x; + int inc; +{ + rtx dest = SET_DEST (x); + + while (GET_CODE (dest) == STRICT_LOW_PART || GET_CODE (dest) == SUBREG + || GET_CODE (dest) == ZERO_EXTRACT || GET_CODE (dest) == SIGN_EXTRACT) + dest = SUBREG_REG (dest); + + if (GET_CODE (dest) == REG) + { + int regno = REGNO (dest); + + if (regno < FIRST_PSEUDO_REGISTER) + { + register int i; + int endregno = regno + HARD_REGNO_NREGS (regno, GET_MODE (dest)); + + for (i = regno; i < endregno; i++) + REG_N_SETS (i) += inc; + } + else + REG_N_SETS (regno) += inc; + } +} + +/* Updates all flow-analysis related quantities (including REG_NOTES) for + the insns from FIRST to LAST inclusive that were created by splitting + ORIG_INSN. NOTES are the original REG_NOTES. */ + +static void +update_flow_info (notes, first, last, orig_insn) + rtx notes; + rtx first, last; + rtx orig_insn; +{ + rtx insn, note; + rtx next; + rtx orig_dest, temp; + rtx set; + + /* Get and save the destination set by the original insn. */ + + orig_dest = single_set (orig_insn); + if (orig_dest) + orig_dest = SET_DEST (orig_dest); + + /* Move REG_NOTES from the original insn to where they now belong. */ + + for (note = notes; note; note = next) + { + next = XEXP (note, 1); + switch (REG_NOTE_KIND (note)) + { + case REG_DEAD: + case REG_UNUSED: + /* Move these notes from the original insn to the last new insn where + the register is now set. */ + + for (insn = last;; insn = PREV_INSN (insn)) + { + if (GET_RTX_CLASS (GET_CODE (insn)) == 'i' + && reg_mentioned_p (XEXP (note, 0), PATTERN (insn))) + { + /* If this note refers to a multiple word hard register, it + may have been split into several smaller hard register + references, so handle it specially. */ + temp = XEXP (note, 0); + if (REG_NOTE_KIND (note) == REG_DEAD + && GET_CODE (temp) == REG + && REGNO (temp) < FIRST_PSEUDO_REGISTER + && HARD_REGNO_NREGS (REGNO (temp), GET_MODE (temp)) > 1) + split_hard_reg_notes (note, first, last, orig_insn); + else + { + XEXP (note, 1) = REG_NOTES (insn); + REG_NOTES (insn) = note; + } + + /* Sometimes need to convert REG_UNUSED notes to REG_DEAD + notes. */ + /* ??? This won't handle multiple word registers correctly, + but should be good enough for now. */ + if (REG_NOTE_KIND (note) == REG_UNUSED + && !dead_or_set_p (insn, XEXP (note, 0))) + PUT_REG_NOTE_KIND (note, REG_DEAD); + + /* The reg only dies in one insn, the last one that uses + it. */ + break; + } + /* It must die somewhere, fail it we couldn't find where it died. + + If this is a REG_UNUSED note, then it must be a temporary + register that was not needed by this instantiation of the + pattern, so we can safely ignore it. */ + if (insn == first) + { + /* After reload, REG_DEAD notes come sometimes an + instruction after the register actually dies. */ + if (reload_completed && REG_NOTE_KIND (note) == REG_DEAD) + { + XEXP (note, 1) = REG_NOTES (insn); + REG_NOTES (insn) = note; + break; + } + + if (REG_NOTE_KIND (note) != REG_UNUSED) + abort (); + + break; + } + } + break; + + case REG_WAS_0: + /* This note applies to the dest of the original insn. Find the + first new insn that now has the same dest, and move the note + there. */ + + if (!orig_dest) + abort (); + + for (insn = first;; insn = NEXT_INSN (insn)) + { + if (GET_RTX_CLASS (GET_CODE (insn)) == 'i' + && (temp = single_set (insn)) + && rtx_equal_p (SET_DEST (temp), orig_dest)) + { + XEXP (note, 1) = REG_NOTES (insn); + REG_NOTES (insn) = note; + /* The reg is only zero before one insn, the first that + uses it. */ + break; + } + /* If this note refers to a multiple word hard + register, it may have been split into several smaller + hard register references. We could split the notes, + but simply dropping them is good enough. */ + if (GET_CODE (orig_dest) == REG + && REGNO (orig_dest) < FIRST_PSEUDO_REGISTER + && HARD_REGNO_NREGS (REGNO (orig_dest), + GET_MODE (orig_dest)) > 1) + break; + /* It must be set somewhere, fail if we couldn't find where it + was set. */ + if (insn == last) + abort (); + } + break; + + case REG_EQUAL: + case REG_EQUIV: + /* A REG_EQUIV or REG_EQUAL note on an insn with more than one + set is meaningless. Just drop the note. */ + if (!orig_dest) + break; + + case REG_NO_CONFLICT: + /* These notes apply to the dest of the original insn. Find the last + new insn that now has the same dest, and move the note there. */ + + if (!orig_dest) + abort (); + + for (insn = last;; insn = PREV_INSN (insn)) + { + if (GET_RTX_CLASS (GET_CODE (insn)) == 'i' + && (temp = single_set (insn)) + && rtx_equal_p (SET_DEST (temp), orig_dest)) + { + XEXP (note, 1) = REG_NOTES (insn); + REG_NOTES (insn) = note; + /* Only put this note on one of the new insns. */ + break; + } + + /* The original dest must still be set someplace. Abort if we + couldn't find it. */ + if (insn == first) + { + /* However, if this note refers to a multiple word hard + register, it may have been split into several smaller + hard register references. We could split the notes, + but simply dropping them is good enough. */ + if (GET_CODE (orig_dest) == REG + && REGNO (orig_dest) < FIRST_PSEUDO_REGISTER + && HARD_REGNO_NREGS (REGNO (orig_dest), + GET_MODE (orig_dest)) > 1) + break; + /* Likewise for multi-word memory references. */ + if (GET_CODE (orig_dest) == MEM + && SIZE_FOR_MODE (orig_dest) > MOVE_MAX) + break; + abort (); + } + } + break; + + case REG_LIBCALL: + /* Move a REG_LIBCALL note to the first insn created, and update + the corresponding REG_RETVAL note. */ + XEXP (note, 1) = REG_NOTES (first); + REG_NOTES (first) = note; + + insn = XEXP (note, 0); + note = find_reg_note (insn, REG_RETVAL, NULL_RTX); + if (note) + XEXP (note, 0) = first; + break; + + case REG_EXEC_COUNT: + /* Move a REG_EXEC_COUNT note to the first insn created. */ + XEXP (note, 1) = REG_NOTES (first); + REG_NOTES (first) = note; + break; + + case REG_RETVAL: + /* Move a REG_RETVAL note to the last insn created, and update + the corresponding REG_LIBCALL note. */ + XEXP (note, 1) = REG_NOTES (last); + REG_NOTES (last) = note; + + insn = XEXP (note, 0); + note = find_reg_note (insn, REG_LIBCALL, NULL_RTX); + if (note) + XEXP (note, 0) = last; + break; + + case REG_NONNEG: + case REG_BR_PROB: + /* This should be moved to whichever instruction is a JUMP_INSN. */ + + for (insn = last;; insn = PREV_INSN (insn)) + { + if (GET_CODE (insn) == JUMP_INSN) + { + XEXP (note, 1) = REG_NOTES (insn); + REG_NOTES (insn) = note; + /* Only put this note on one of the new insns. */ + break; + } + /* Fail if we couldn't find a JUMP_INSN. */ + if (insn == first) + abort (); + } + break; + + case REG_INC: + /* reload sometimes leaves obsolete REG_INC notes around. */ + if (reload_completed) + break; + /* This should be moved to whichever instruction now has the + increment operation. */ + abort (); + + case REG_LABEL: + /* Should be moved to the new insn(s) which use the label. */ + for (insn = first; insn != NEXT_INSN (last); insn = NEXT_INSN (insn)) + if (GET_RTX_CLASS (GET_CODE (insn)) == 'i' + && reg_mentioned_p (XEXP (note, 0), PATTERN (insn))) + REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_LABEL, + XEXP (note, 0), REG_NOTES (insn)); + break; + + case REG_CC_SETTER: + case REG_CC_USER: + /* These two notes will never appear until after reorg, so we don't + have to handle them here. */ + default: + abort (); + } + } + + /* Each new insn created, except the last, has a new set. If the destination + is a register, then this reg is now live across several insns, whereas + previously the dest reg was born and died within the same insn. To + reflect this, we now need a REG_DEAD note on the insn where this + dest reg dies. + + Similarly, the new insns may have clobbers that need REG_UNUSED notes. */ + + for (insn = first; insn != last; insn = NEXT_INSN (insn)) + { + rtx pat; + int i; + + pat = PATTERN (insn); + if (GET_CODE (pat) == SET || GET_CODE (pat) == CLOBBER) + new_insn_dead_notes (pat, insn, last, orig_insn); + else if (GET_CODE (pat) == PARALLEL) + { + for (i = 0; i < XVECLEN (pat, 0); i++) + if (GET_CODE (XVECEXP (pat, 0, i)) == SET + || GET_CODE (XVECEXP (pat, 0, i)) == CLOBBER) + new_insn_dead_notes (XVECEXP (pat, 0, i), insn, last, orig_insn); + } + } + + /* If any insn, except the last, uses the register set by the last insn, + then we need a new REG_DEAD note on that insn. In this case, there + would not have been a REG_DEAD note for this register in the original + insn because it was used and set within one insn. */ + + set = single_set (last); + if (set) + { + rtx dest = SET_DEST (set); + + while (GET_CODE (dest) == ZERO_EXTRACT || GET_CODE (dest) == SUBREG + || GET_CODE (dest) == STRICT_LOW_PART + || GET_CODE (dest) == SIGN_EXTRACT) + dest = XEXP (dest, 0); + + if (GET_CODE (dest) == REG + /* Global registers are always live, so the code below does not + apply to them. */ + && (REGNO (dest) >= FIRST_PSEUDO_REGISTER + || ! global_regs[REGNO (dest)])) + { + rtx stop_insn = PREV_INSN (first); + + /* If the last insn uses the register that it is setting, then + we don't want to put a REG_DEAD note there. Search backwards + to find the first insn that sets but does not use DEST. */ + + insn = last; + if (reg_overlap_mentioned_p (dest, SET_SRC (set))) + { + for (insn = PREV_INSN (insn); insn != first; + insn = PREV_INSN (insn)) + { + if ((set = single_set (insn)) + && reg_mentioned_p (dest, SET_DEST (set)) + && ! reg_overlap_mentioned_p (dest, SET_SRC (set))) + break; + } + } + + /* Now find the first insn that uses but does not set DEST. */ + + for (insn = PREV_INSN (insn); insn != stop_insn; + insn = PREV_INSN (insn)) + { + if (GET_RTX_CLASS (GET_CODE (insn)) == 'i' + && reg_mentioned_p (dest, PATTERN (insn)) + && (set = single_set (insn))) + { + rtx insn_dest = SET_DEST (set); + + while (GET_CODE (insn_dest) == ZERO_EXTRACT + || GET_CODE (insn_dest) == SUBREG + || GET_CODE (insn_dest) == STRICT_LOW_PART + || GET_CODE (insn_dest) == SIGN_EXTRACT) + insn_dest = XEXP (insn_dest, 0); + + if (insn_dest != dest) + { + note = rtx_alloc (EXPR_LIST); + PUT_REG_NOTE_KIND (note, REG_DEAD); + XEXP (note, 0) = dest; + XEXP (note, 1) = REG_NOTES (insn); + REG_NOTES (insn) = note; + /* The reg only dies in one insn, the last one + that uses it. */ + break; + } + } + } + } + } + + /* If the original dest is modifying a multiple register target, and the + original instruction was split such that the original dest is now set + by two or more SUBREG sets, then the split insns no longer kill the + destination of the original insn. + + In this case, if there exists an instruction in the same basic block, + before the split insn, which uses the original dest, and this use is + killed by the original insn, then we must remove the REG_DEAD note on + this insn, because it is now superfluous. + + This does not apply when a hard register gets split, because the code + knows how to handle overlapping hard registers properly. */ + if (orig_dest && GET_CODE (orig_dest) == REG) + { + int found_orig_dest = 0; + int found_split_dest = 0; + + for (insn = first;; insn = NEXT_INSN (insn)) + { + set = single_set (insn); + if (set) + { + if (GET_CODE (SET_DEST (set)) == REG + && REGNO (SET_DEST (set)) == REGNO (orig_dest)) + { + found_orig_dest = 1; + break; + } + else if (GET_CODE (SET_DEST (set)) == SUBREG + && SUBREG_REG (SET_DEST (set)) == orig_dest) + { + found_split_dest = 1; + break; + } + } + + if (insn == last) + break; + } + + if (found_split_dest) + { + /* Search backwards from FIRST, looking for the first insn that uses + the original dest. Stop if we pass a CODE_LABEL or a JUMP_INSN. + If we find an insn, and it has a REG_DEAD note, then delete the + note. */ + + for (insn = first; insn; insn = PREV_INSN (insn)) + { + if (GET_CODE (insn) == CODE_LABEL + || GET_CODE (insn) == JUMP_INSN) + break; + else if (GET_RTX_CLASS (GET_CODE (insn)) == 'i' + && reg_mentioned_p (orig_dest, insn)) + { + note = find_regno_note (insn, REG_DEAD, REGNO (orig_dest)); + if (note) + remove_note (insn, note); + } + } + } + else if (!found_orig_dest) + { + /* This should never happen. */ + abort (); + } + } + + /* Update reg_n_sets. This is necessary to prevent local alloc from + converting REG_EQUAL notes to REG_EQUIV when splitting has modified + a reg from set once to set multiple times. */ + + { + rtx x = PATTERN (orig_insn); + RTX_CODE code = GET_CODE (x); + + if (code == SET || code == CLOBBER) + update_n_sets (x, -1); + else if (code == PARALLEL) + { + int i; + for (i = XVECLEN (x, 0) - 1; i >= 0; i--) + { + code = GET_CODE (XVECEXP (x, 0, i)); + if (code == SET || code == CLOBBER) + update_n_sets (XVECEXP (x, 0, i), -1); + } + } + + for (insn = first;; insn = NEXT_INSN (insn)) + { + x = PATTERN (insn); + code = GET_CODE (x); + + if (code == SET || code == CLOBBER) + update_n_sets (x, 1); + else if (code == PARALLEL) + { + int i; + for (i = XVECLEN (x, 0) - 1; i >= 0; i--) + { + code = GET_CODE (XVECEXP (x, 0, i)); + if (code == SET || code == CLOBBER) + update_n_sets (XVECEXP (x, 0, i), 1); + } + } + + if (insn == last) + break; + } + } +} + +/* Do the splitting of insns in the block b. */ + +static void +split_block_insns (b) + int b; +{ + rtx insn, next; + + for (insn = basic_block_head[b];; insn = next) + { + rtx prev; + rtx set; + + /* Can't use `next_real_insn' because that + might go across CODE_LABELS and short-out basic blocks. */ + next = NEXT_INSN (insn); + if (GET_CODE (insn) != INSN) + { + if (insn == basic_block_end[b]) + break; + + continue; + } + + /* Don't split no-op move insns. These should silently disappear + later in final. Splitting such insns would break the code + that handles REG_NO_CONFLICT blocks. */ + set = single_set (insn); + if (set && rtx_equal_p (SET_SRC (set), SET_DEST (set))) + { + if (insn == basic_block_end[b]) + break; + + /* Nops get in the way while scheduling, so delete them now if + register allocation has already been done. It is too risky + to try to do this before register allocation, and there are + unlikely to be very many nops then anyways. */ + if (reload_completed) + { + PUT_CODE (insn, NOTE); + NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED; + NOTE_SOURCE_FILE (insn) = 0; + } + + continue; + } + + /* Split insns here to get max fine-grain parallelism. */ + prev = PREV_INSN (insn); + /* It is probably not worthwhile to try to split again in + the second pass. However, if flag_schedule_insns is not set, + the first and only (if any) scheduling pass is after reload. */ + if (reload_completed == 0 || ! flag_schedule_insns) + { + rtx last, first = PREV_INSN (insn); + rtx notes = REG_NOTES (insn); + last = try_split (PATTERN (insn), insn, 1); + if (last != insn) + { + /* try_split returns the NOTE that INSN became. */ + first = NEXT_INSN (first); + update_flow_info (notes, first, last, insn); + + PUT_CODE (insn, NOTE); + NOTE_SOURCE_FILE (insn) = 0; + NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED; + if (insn == basic_block_head[b]) + basic_block_head[b] = first; + if (insn == basic_block_end[b]) + { + basic_block_end[b] = last; + break; + } + } + } + + if (insn == basic_block_end[b]) + break; + } +} + +/* The one entry point in this file. DUMP_FILE is the dump file for + this pass. */ + +void +schedule_insns (dump_file) + FILE *dump_file; +{ + + int max_uid; + int b; + int i; + rtx insn; + int rgn; + + int luid; + + /* disable speculative loads in their presence if cc0 defined */ +#ifdef HAVE_cc0 + flag_schedule_speculative_load = 0; +#endif + + /* Taking care of this degenerate case makes the rest of + this code simpler. */ + if (n_basic_blocks == 0) + return; + + /* set dump and sched_verbose for the desired debugging output. If no + dump-file was specified, but -fsched-verbose-N (any N), print to stderr. + For -fsched-verbose-N, N>=10, print everything to stderr. */ + sched_verbose = sched_verbose_param; + if (sched_verbose_param == 0 && dump_file) + sched_verbose = 1; + dump = ((sched_verbose_param >= 10 || !dump_file) ? stderr : dump_file); + + nr_inter = 0; + nr_spec = 0; + + /* Initialize the unused_*_lists. We can't use the ones left over from + the previous function, because gcc has freed that memory. We can use + the ones left over from the first sched pass in the second pass however, + so only clear them on the first sched pass. The first pass is before + reload if flag_schedule_insns is set, otherwise it is afterwards. */ + + if (reload_completed == 0 || !flag_schedule_insns) + { + unused_insn_list = 0; + unused_expr_list = 0; + } + + /* initialize issue_rate */ + issue_rate = get_issue_rate (); + + /* do the splitting first for all blocks */ + for (b = 0; b < n_basic_blocks; b++) + split_block_insns (b); + + max_uid = (get_max_uid () + 1); + + cant_move = (char *) alloca (max_uid * sizeof (char)); + bzero ((char *) cant_move, max_uid * sizeof (char)); + + fed_by_spec_load = (char *) alloca (max_uid * sizeof (char)); + bzero ((char *) fed_by_spec_load, max_uid * sizeof (char)); + + is_load_insn = (char *) alloca (max_uid * sizeof (char)); + bzero ((char *) is_load_insn, max_uid * sizeof (char)); + + insn_orig_block = (int *) alloca (max_uid * sizeof (int)); + insn_luid = (int *) alloca (max_uid * sizeof (int)); + + luid = 0; + for (b = 0; b < n_basic_blocks; b++) + for (insn = basic_block_head[b];; insn = NEXT_INSN (insn)) + { + INSN_BLOCK (insn) = b; + INSN_LUID (insn) = luid++; + + if (insn == basic_block_end[b]) + break; + } + + /* after reload, remove inter-blocks dependences computed before reload. */ + if (reload_completed) + { + int b; + rtx insn; + + for (b = 0; b < n_basic_blocks; b++) + for (insn = basic_block_head[b];; insn = NEXT_INSN (insn)) + { + rtx link; + + if (GET_RTX_CLASS (GET_CODE (insn)) == 'i') + { + for (link = LOG_LINKS (insn); link; link = XEXP (link, 1)) + { + rtx x = XEXP (link, 0); + + if (INSN_BLOCK (x) != b) + remove_dependence (insn, x); + } + } + + if (insn == basic_block_end[b]) + break; + } + } + + nr_regions = 0; + rgn_table = (region *) alloca ((n_basic_blocks) * sizeof (region)); + rgn_bb_table = (int *) alloca ((n_basic_blocks) * sizeof (int)); + block_to_bb = (int *) alloca ((n_basic_blocks) * sizeof (int)); + containing_rgn = (int *) alloca ((n_basic_blocks) * sizeof (int)); + + /* compute regions for scheduling */ + if (reload_completed + || n_basic_blocks == 1 + || !flag_schedule_interblock) + { + find_single_block_region (); + } + else + { + /* an estimation for nr_edges is computed in is_cfg_nonregular () */ + nr_edges = 0; + + /* verify that a 'good' control flow graph can be built */ + if (is_cfg_nonregular () + || nr_edges <= 1) + { + find_single_block_region (); + } + else + { + /* build control flow graph */ + in_edges = (int *) alloca (n_basic_blocks * sizeof (int)); + out_edges = (int *) alloca (n_basic_blocks * sizeof (int)); + bzero ((char *) in_edges, n_basic_blocks * sizeof (int)); + bzero ((char *) out_edges, n_basic_blocks * sizeof (int)); + + edge_table = + (edge *) alloca ((nr_edges) * sizeof (edge)); + bzero ((char *) edge_table, + ((nr_edges) * sizeof (edge))); + build_control_flow (); + + /* identify reducible inner loops and compute regions */ + find_rgns (); + + if (sched_verbose >= 3) + { + debug_control_flow (); + debug_regions (); + } + + } + } + + /* Allocate data for this pass. See comments, above, + for what these vectors do. */ + insn_priority = (int *) alloca (max_uid * sizeof (int)); + insn_reg_weight = (int *) alloca (max_uid * sizeof (int)); + insn_tick = (int *) alloca (max_uid * sizeof (int)); + insn_costs = (short *) alloca (max_uid * sizeof (short)); + insn_units = (short *) alloca (max_uid * sizeof (short)); + insn_blockage = (unsigned int *) alloca (max_uid * sizeof (unsigned int)); + insn_ref_count = (int *) alloca (max_uid * sizeof (int)); + + /* Allocate for forward dependencies */ + insn_dep_count = (int *) alloca (max_uid * sizeof (int)); + insn_depend = (rtx *) alloca (max_uid * sizeof (rtx)); + + if (reload_completed == 0) + { + int i; + + sched_reg_n_calls_crossed = (int *) alloca (max_regno * sizeof (int)); + sched_reg_live_length = (int *) alloca (max_regno * sizeof (int)); + sched_reg_basic_block = (int *) alloca (max_regno * sizeof (int)); + bb_live_regs = ALLOCA_REG_SET (); + bzero ((char *) sched_reg_n_calls_crossed, max_regno * sizeof (int)); + bzero ((char *) sched_reg_live_length, max_regno * sizeof (int)); + + for (i = 0; i < max_regno; i++) + sched_reg_basic_block[i] = REG_BLOCK_UNKNOWN; + } + else + { + sched_reg_n_calls_crossed = 0; + sched_reg_live_length = 0; + bb_live_regs = 0; + } + init_alias_analysis (); + + if (write_symbols != NO_DEBUG) + { + rtx line; + + line_note = (rtx *) alloca (max_uid * sizeof (rtx)); + bzero ((char *) line_note, max_uid * sizeof (rtx)); + line_note_head = (rtx *) alloca (n_basic_blocks * sizeof (rtx)); + bzero ((char *) line_note_head, n_basic_blocks * sizeof (rtx)); + + /* Save-line-note-head: + Determine the line-number at the start of each basic block. + This must be computed and saved now, because after a basic block's + predecessor has been scheduled, it is impossible to accurately + determine the correct line number for the first insn of the block. */ + + for (b = 0; b < n_basic_blocks; b++) + for (line = basic_block_head[b]; line; line = PREV_INSN (line)) + if (GET_CODE (line) == NOTE && NOTE_LINE_NUMBER (line) > 0) + { + line_note_head[b] = line; + break; + } + } + + bzero ((char *) insn_priority, max_uid * sizeof (int)); + bzero ((char *) insn_reg_weight, max_uid * sizeof (int)); + bzero ((char *) insn_tick, max_uid * sizeof (int)); + bzero ((char *) insn_costs, max_uid * sizeof (short)); + bzero ((char *) insn_units, max_uid * sizeof (short)); + bzero ((char *) insn_blockage, max_uid * sizeof (unsigned int)); + bzero ((char *) insn_ref_count, max_uid * sizeof (int)); + + /* Initialize for forward dependencies */ + bzero ((char *) insn_depend, max_uid * sizeof (rtx)); + bzero ((char *) insn_dep_count, max_uid * sizeof (int)); + + /* Find units used in this fuction, for visualization */ + if (sched_verbose) + init_target_units (); + + /* ??? Add a NOTE after the last insn of the last basic block. It is not + known why this is done. */ + + insn = basic_block_end[n_basic_blocks - 1]; + if (NEXT_INSN (insn) == 0 + || (GET_CODE (insn) != NOTE + && GET_CODE (insn) != CODE_LABEL + /* Don't emit a NOTE if it would end up between an unconditional + jump and a BARRIER. */ + && !(GET_CODE (insn) == JUMP_INSN + && GET_CODE (NEXT_INSN (insn)) == BARRIER))) + emit_note_after (NOTE_INSN_DELETED, basic_block_end[n_basic_blocks - 1]); + + /* Schedule every region in the subroutine */ + for (rgn = 0; rgn < nr_regions; rgn++) + { + schedule_region (rgn); + +#ifdef USE_C_ALLOCA + alloca (0); +#endif + } + + /* Reposition the prologue and epilogue notes in case we moved the + prologue/epilogue insns. */ + if (reload_completed) + reposition_prologue_and_epilogue_notes (get_insns ()); + + /* delete redundant line notes. */ + if (write_symbols != NO_DEBUG) + rm_redundant_line_notes (); + + /* Update information about uses of registers in the subroutine. */ + if (reload_completed == 0) + update_reg_usage (); + + if (sched_verbose) + { + if (reload_completed == 0 && flag_schedule_interblock) + { + fprintf (dump, "\n;; Procedure interblock/speculative motions == %d/%d \n", + nr_inter, nr_spec); + } + else + { + if (nr_inter > 0) + abort (); + } + fprintf (dump, "\n\n"); + } +} +#endif /* INSN_SCHEDULING */ diff --git a/gcc/invoke.texi b/gcc/invoke.texi index 7a643d60aa1..2c7ae54b993 100644 --- a/gcc/invoke.texi +++ b/gcc/invoke.texi @@ -147,7 +147,7 @@ in the following sections. -ffunction-sections -finline-functions -fkeep-inline-functions -fno-default-inline -fno-defer-pop -fno-function-cse --fno-inline -fno-peephole -fomit-frame-pointer +-fno-inline -fno-peephole -fomit-frame-pointer -fregmove -frerun-cse-after-loop -fschedule-insns -fschedule-insns2 -fstrength-reduce -fthread-jumps -funroll-all-loops -funroll-loops @@ -2195,6 +2195,12 @@ used in one place: in @file{reorg.c}, instead of guessing which path a branch is mostly to take, the @samp{REG_BR_PROB} values are used to exactly determine which path is taken more often. @end ifset + +@item -fregmove +Some machines only support 2 operands per instruction. On such +machines, GNU CC might have to do extra copies. The @samp{-fregmove} +option overrides the default for the machine to do the copy before +register allocation. @end table @node Preprocessor Options diff --git a/gcc/loop.c b/gcc/loop.c index b54e677b928..2c4092d0c85 100644 --- a/gcc/loop.c +++ b/gcc/loop.c @@ -81,6 +81,42 @@ static rtx *loop_number_loop_starts, *loop_number_loop_ends; int *loop_outer_loop; +#ifdef HAIFA +/* The main output of analyze_loop_iterations is placed here */ + +int *loop_can_insert_bct; + +/* For each loop, determines whether some of its inner loops has used + count register */ + +int *loop_used_count_register; + +/* For each loop, remember its unrolling factor (if at all). + contents of the array: + 0/1: not unrolled. + -1: completely unrolled - no further instrumentation is needed. + >1: holds the exact amount of unrolling. */ + +int *loop_unroll_factor; +int *loop_unroll_iter; + +/* loop parameters for arithmetic loops. These loops have a loop variable + which is initialized to loop_start_value, incremented in each iteration + by "loop_increment". At the end of the iteration the loop variable is + compared to the loop_comparison_value (using loop_comparison_code). */ + +rtx *loop_increment; +rtx *loop_comparison_value; +rtx *loop_start_value; +enum rtx_code *loop_comparison_code; + +/* for debugging: selects sub-range of loops for which the bct optimization + is invoked. The numbering is per compilation-unit. */ +int dbg_bct_min = -1; +int dbg_bct_max = -1; +#endif /* HAIFA */ + + /* Indexed by loop number, contains a nonzero value if the "loop" isn't really a loop (an insn outside the loop branches into it). */ @@ -286,6 +322,32 @@ static int maybe_eliminate_biv_1 (); static int last_use_this_basic_block (); static void record_initial (); static void update_reg_last_use (); + +#ifdef HAIFA +/* This is extern from unroll.c */ +void iteration_info (); + +/* Two main functions for implementing bct: + first - to be called before loop unrolling, and the second - after */ +static void analyze_loop_iterations (); +static void insert_bct (); + +/* Auxiliary function that inserts the bct pattern into the loop */ +static void instrument_loop_bct (); + +/* Indirect_jump_in_function is computed once per function. */ +int indirect_jump_in_function = 0; +static int indirect_jump_in_function_p (); + +int loop_number (); +static int is_power_of_2(); +static int is_conditional_branch (); + +/* Debugging functions. */ +int fix_bct_param (); +static int check_bct_param (); +#endif /* HAIFA */ + /* Relative gain of eliminating various kinds of operations. */ int add_cost; @@ -379,6 +441,32 @@ loop_optimize (f, dumpfile) loop_number_exit_labels = (rtx *) alloca (max_loop_num * sizeof (rtx)); loop_number_exit_count = (int *) alloca (max_loop_num * sizeof (int)); +#ifdef HAIFA + /* Allocate for BCT optimization */ + loop_can_insert_bct = (int *) alloca (max_loop_num * sizeof (int)); + bzero ((char *) loop_can_insert_bct, max_loop_num * sizeof (int)); + + loop_used_count_register = (int *) alloca (max_loop_num * sizeof (int)); + bzero ((char *) loop_used_count_register, max_loop_num * sizeof (int)); + + loop_unroll_factor = (int *) alloca (max_loop_num *sizeof (int)); + bzero ((char *) loop_unroll_factor, max_loop_num * sizeof (int)); + + loop_unroll_iter = (int *) alloca (max_loop_num *sizeof (int)); + bzero ((char *) loop_unroll_iter, max_loop_num * sizeof (int)); + + loop_increment = (rtx *) alloca (max_loop_num * sizeof (rtx)); + loop_comparison_value = (rtx *) alloca (max_loop_num * sizeof (rtx)); + loop_start_value = (rtx *) alloca (max_loop_num * sizeof (rtx)); + bzero ((char *) loop_increment, max_loop_num * sizeof (rtx)); + bzero ((char *) loop_comparison_value, max_loop_num * sizeof (rtx)); + bzero ((char *) loop_start_value, max_loop_num * sizeof (rtx)); + + loop_comparison_code + = (enum rtx_code *) alloca (max_loop_num * sizeof (enum rtx_code)); + bzero ((char *) loop_comparison_code, max_loop_num * sizeof (enum rtx_code)); +#endif /* HAIFA */ + /* Find and process each loop. First, find them, and record them in order of their beginnings. */ find_and_verify_loops (f); @@ -430,6 +518,12 @@ loop_optimize (f, dumpfile) if (flag_unroll_loops && write_symbols != NO_DEBUG) find_loop_tree_blocks (); +#ifdef HAIFA + /* determine if the function has indirect jump. If it does, + we cannot instrument loops in this function with bct */ + indirect_jump_in_function = indirect_jump_in_function_p (f); +#endif /* HAIFA */ + /* Now scan the loops, last ones first, since this means inner ones are done before outer ones. */ for (i = max_loop_num-1; i >= 0; i--) @@ -2639,6 +2733,11 @@ mark_loop_jump (x, loop_num) if (loop_num != -1) { +#ifdef HAIFA + LABEL_OUTSIDE_LOOP_P (x) = 1; + LABEL_NEXTREF (x) = loop_number_exit_labels[loop_num]; +#endif /* HAIFA */ + loop_number_exit_labels[loop_num] = x; for (outer_loop = loop_num; outer_loop != -1; @@ -3755,6 +3854,16 @@ strength_reduce (scan_start, end, loop_top, insn_count, so that "decrement and branch until zero" insn can be used. */ check_dbra_loop (loop_end, insn_count, loop_start); +#ifdef HAIFA + /* record loop-variables relevant for BCT optimization before unrolling + the loop. Unrolling may update part of this information, and the + correct data will be used for generating the BCT. */ +#ifdef HAVE_decrement_and_branch_on_count + if (HAVE_decrement_and_branch_on_count) + analyze_loop_iterations (loop_start, loop_end); +#endif +#endif /* HAIFA */ + /* Create reg_map to hold substitutions for replaceable giv regs. */ reg_map = (rtx *) alloca (max_reg_before_loop * sizeof (rtx)); bzero ((char *) reg_map, max_reg_before_loop * sizeof (rtx)); @@ -4247,6 +4356,14 @@ strength_reduce (scan_start, end, loop_top, insn_count, if (flag_unroll_loops) unroll_loop (loop_end, insn_count, loop_start, end_insert_before, 1); +#ifdef HAIFA + /* instrument the loop with bct insn */ +#ifdef HAVE_decrement_and_branch_on_count + if (HAVE_decrement_and_branch_on_count) + insert_bct (loop_start, loop_end); +#endif +#endif /* HAIFA */ + if (loop_dump_stream) fprintf (loop_dump_stream, "\n"); } @@ -6932,3 +7049,638 @@ get_condition_for_loop (x) return gen_rtx (swap_condition (GET_CODE (comparison)), VOIDmode, XEXP (comparison, 1), XEXP (comparison, 0)); } + +#ifdef HAIFA +/* Analyze a loop in order to instrument it with the use of count register. + loop_start and loop_end are the first and last insns of the loop. + This function works in cooperation with insert_bct (). + loop_can_insert_bct[loop_num] is set according to whether the optimization + is applicable to the loop. When it is applicable, the following variables + are also set: + loop_start_value[loop_num] + loop_comparison_value[loop_num] + loop_increment[loop_num] + loop_comparison_code[loop_num] */ + +static +void analyze_loop_iterations (loop_start, loop_end) + rtx loop_start, loop_end; +{ + rtx comparison, comparison_value; + rtx iteration_var, initial_value, increment; + enum rtx_code comparison_code; + + rtx last_loop_insn; + rtx insn; + int i; + + /* loop_variable mode */ + enum machine_mode original_mode; + + /* find the number of the loop */ + int loop_num = loop_number (loop_start, loop_end); + + /* we change our mind only when we are sure that loop will be instrumented */ + loop_can_insert_bct[loop_num] = 0; + + /* debugging: do we wish to instrument this loop? */ + if ( !check_bct_param () ) + return; + + /* is the optimization suppressed. */ + if ( !flag_branch_on_count_reg ) + return; + + /* make sure that count-reg is not in use */ + if (loop_used_count_register[loop_num]){ + if (loop_dump_stream) + fprintf (loop_dump_stream, + "analyze_loop_iterations %d: BCT instrumentation failed: count register already in use\n", + loop_num); + return; + } + + /* make sure that the function has no indirect jumps. */ + if (indirect_jump_in_function){ + if (loop_dump_stream) + fprintf (loop_dump_stream, + "analyze_loop_iterations %d: BCT instrumentation failed: indirect jump in function\n", + loop_num); + return; + } + + /* make sure that the last loop insn is a conditional jump */ + last_loop_insn = PREV_INSN (loop_end); + if (!is_conditional_branch (last_loop_insn)) { + if (loop_dump_stream) + fprintf (loop_dump_stream, + "analyze_loop_iterations %d: BCT instrumentation failed: invalid jump at loop end\n", + loop_num); + return; + } + + /* First find the iteration variable. If the last insn is a conditional + branch, and the insn preceding it tests a register value, make that + register the iteration variable. */ + + /* We used to use prev_nonnote_insn here, but that fails because it might + accidentally get the branch for a contained loop if the branch for this + loop was deleted. We can only trust branches immediately before the + loop_end. */ + + comparison = get_condition_for_loop (last_loop_insn); + /* ??? Get_condition may switch position of induction variable and + invariant register when it canonicalizes the comparison. */ + + if (comparison == 0) { + if (loop_dump_stream) + fprintf (loop_dump_stream, + "analyze_loop_iterations %d: BCT instrumentation failed: comparison not found\n", + loop_num); + return; + } + + comparison_code = GET_CODE (comparison); + iteration_var = XEXP (comparison, 0); + comparison_value = XEXP (comparison, 1); + + original_mode = GET_MODE (iteration_var); + if (GET_MODE_CLASS (original_mode) != MODE_INT + || GET_MODE_SIZE (original_mode) != UNITS_PER_WORD) { + if (loop_dump_stream) + fprintf (loop_dump_stream, + "analyze_loop_iterations %d: BCT Instrumentation failed: loop variable not integer\n", + loop_num); + return; + } + + /* get info about loop bounds and increment */ + iteration_info (iteration_var, &initial_value, &increment, + loop_start, loop_end); + + /* make sure that all required loop data were found */ + if (!(initial_value && increment && comparison_value + && invariant_p (comparison_value) && invariant_p (increment) + && ! indirect_jump_in_function)) + { + if (loop_dump_stream) { + fprintf (loop_dump_stream, + "analyze_loop_iterations %d: BCT instrumentation failed because of wrong loop: ", loop_num); + if (!(initial_value && increment && comparison_value)) { + fprintf (loop_dump_stream, "\tbounds not available: "); + if ( ! initial_value ) + fprintf (loop_dump_stream, "initial "); + if ( ! increment ) + fprintf (loop_dump_stream, "increment "); + if ( ! comparison_value ) + fprintf (loop_dump_stream, "comparison "); + fprintf (loop_dump_stream, "\n"); + } + if (!invariant_p (comparison_value) || !invariant_p (increment)) + fprintf (loop_dump_stream, "\tloop bounds not invariant\n"); + } + return; + } + + /* make sure that the increment is constant */ + if (GET_CODE (increment) != CONST_INT) { + if (loop_dump_stream) + fprintf (loop_dump_stream, + "analyze_loop_iterations %d: instrumentation failed: not arithmetic loop\n", + loop_num); + return; + } + + /* make sure that the loop contains neither function call, nor jump on table. + (the count register might be altered by the called function, and might + be used for a branch on table). */ + for (insn = loop_start; insn && insn != loop_end; insn = NEXT_INSN (insn)) { + if (GET_CODE (insn) == CALL_INSN){ + if (loop_dump_stream) + fprintf (loop_dump_stream, + "analyze_loop_iterations %d: BCT instrumentation failed: function call in the loop\n", + loop_num); + return; + } + + if (GET_CODE (insn) == JUMP_INSN + && (GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC + || GET_CODE (PATTERN (insn)) == ADDR_VEC)){ + if (loop_dump_stream) + fprintf (loop_dump_stream, + "analyze_loop_iterations %d: BCT instrumentation failed: computed branch in the loop\n", + loop_num); + return; + } + } + + /* At this point, we are sure that the loop can be instrumented with BCT. + Some of the loops, however, will not be instrumented - the final decision + is taken by insert_bct () */ + if (loop_dump_stream) + fprintf (loop_dump_stream, + "analyze_loop_iterations: loop (luid =%d) can be BCT instrumented.\n", + loop_num); + + /* mark all enclosing loops that they cannot use count register */ + /* ???: In fact, since insert_bct may decide not to instrument this loop, + marking here may prevent instrumenting an enclosing loop that could + actually be instrumented. But since this is rare, it is safer to mark + here in case the order of calling (analyze/insert)_bct would be changed. */ + for (i=loop_num; i != -1; i = loop_outer_loop[i]) + loop_used_count_register[i] = 1; + + /* Set data structures which will be used by the instrumentation phase */ + loop_start_value[loop_num] = initial_value; + loop_comparison_value[loop_num] = comparison_value; + loop_increment[loop_num] = increment; + loop_comparison_code[loop_num] = comparison_code; + loop_can_insert_bct[loop_num] = 1; +} + + +/* instrument loop for insertion of bct instruction. We distinguish between + loops with compile-time bounds, to those with run-time bounds. The loop + behaviour is analized according to the following characteristics/variables: + ; Input variables: + ; comparison-value: the value to which the iteration counter is compared. + ; initial-value: iteration-counter initial value. + ; increment: iteration-counter increment. + ; Computed variables: + ; increment-direction: the sign of the increment. + ; compare-direction: '1' for GT, GTE, '-1' for LT, LTE, '0' for NE. + ; range-direction: sign (comparison-value - initial-value) + We give up on the following cases: + ; loop variable overflow. + ; run-time loop bounds with comparison code NE. + */ + +static void +insert_bct (loop_start, loop_end) + rtx loop_start, loop_end; +{ + rtx initial_value, comparison_value, increment; + enum rtx_code comparison_code; + + int increment_direction, compare_direction; + int unsigned_p = 0; + + /* if the loop condition is <= or >=, the number of iteration + is 1 more than the range of the bounds of the loop */ + int add_iteration = 0; + + /* the only machine mode we work with - is the integer of the size that the + machine has */ + enum machine_mode loop_var_mode = SImode; + + int loop_num = loop_number (loop_start, loop_end); + + /* get loop-variables. No need to check that these are valid - already + checked in analyze_loop_iterations (). */ + comparison_code = loop_comparison_code[loop_num]; + initial_value = loop_start_value[loop_num]; + comparison_value = loop_comparison_value[loop_num]; + increment = loop_increment[loop_num]; + + /* check analyze_loop_iterations decision for this loop. */ + if (! loop_can_insert_bct[loop_num]){ + if (loop_dump_stream) + fprintf (loop_dump_stream, + "insert_bct: [%d] - was decided not to instrument by analyze_loop_iterations ()\n", + loop_num); + return; + } + + /* make sure that the loop was not fully unrolled. */ + if (loop_unroll_factor[loop_num] == -1){ + if (loop_dump_stream) + fprintf (loop_dump_stream, "insert_bct %d: was completely unrolled\n", loop_num); + return; + } + + /* make sure that the last loop insn is a conditional jump . + This check is repeated from analyze_loop_iterations (), + because unrolling might have changed that. */ + if (!is_conditional_branch (PREV_INSN (loop_end))){ + if (loop_dump_stream) + fprintf (loop_dump_stream, + "insert_bct: not instrumenting BCT because of invalid branch\n"); + return; + } + + /* fix increment in case loop was unrolled. */ + if (loop_unroll_factor[loop_num] > 1) + increment = GEN_INT ( INTVAL (increment) * loop_unroll_factor[loop_num] ); + + /* determine properties and directions of the loop */ + increment_direction = (INTVAL (increment) > 0) ? 1:-1; + switch ( comparison_code ) { + case LEU: + unsigned_p = 1; + /* fallthrough */ + case LE: + compare_direction = 1; + add_iteration = 1; + break; + case GEU: + unsigned_p = 1; + /* fallthrough */ + case GE: + compare_direction = -1; + add_iteration = 1; + break; + case EQ: + /* in this case we cannot know the number of iterations */ + if (loop_dump_stream) + fprintf (loop_dump_stream, + "insert_bct: %d: loop cannot be instrumented: == in condition\n", + loop_num); + return; + case LTU: + unsigned_p = 1; + /* fallthrough */ + case LT: + compare_direction = 1; + break; + case GTU: + unsigned_p = 1; + /* fallthrough */ + case GT: + compare_direction = -1; + break; + case NE: + compare_direction = 0; + break; + default: + abort (); + } + + + /* make sure that the loop does not end by an overflow */ + if (compare_direction != increment_direction) { + if (loop_dump_stream) + fprintf (loop_dump_stream, + "insert_bct: %d: loop cannot be instrumented: terminated by overflow\n", + loop_num); + return; + } + + /* try to instrument the loop. */ + + /* Handle the simpler case, where the bounds are known at compile time. */ + if (GET_CODE (initial_value) == CONST_INT && GET_CODE (comparison_value) == CONST_INT) + { + int n_iterations; + int increment_value_abs = INTVAL (increment) * increment_direction; + + /* check the relation between compare-val and initial-val */ + int difference = INTVAL (comparison_value) - INTVAL (initial_value); + int range_direction = (difference > 0) ? 1 : -1; + + /* make sure the loop executes enough iterations to gain from BCT */ + if (difference > -3 && difference < 3) { + if (loop_dump_stream) + fprintf (loop_dump_stream, + "insert_bct: loop %d not BCT instrumented: too small iteration count.\n", + loop_num); + return; + } + + /* make sure that the loop executes at least once */ + if ((range_direction == 1 && compare_direction == -1) + || (range_direction == -1 && compare_direction == 1)) + { + if (loop_dump_stream) + fprintf (loop_dump_stream, + "insert_bct: loop %d: does not iterate even once. Not instrumenting.\n", + loop_num); + return; + } + + /* make sure that the loop does not end by an overflow (in compile time + bounds we must have an additional check for overflow, because here + we also support the compare code of 'NE'. */ + if (comparison_code == NE + && increment_direction != range_direction) { + if (loop_dump_stream) + fprintf (loop_dump_stream, + "insert_bct (compile time bounds): %d: loop not instrumented: terminated by overflow\n", + loop_num); + return; + } + + /* Determine the number of iterations by: + ; + ; compare-val - initial-val + (increment -1) + additional-iteration + ; num_iterations = ----------------------------------------------------------------- + ; increment + */ + difference = (range_direction > 0) ? difference : -difference; +#if 0 + fprintf (stderr, "difference is: %d\n", difference); /* @*/ + fprintf (stderr, "increment_value_abs is: %d\n", increment_value_abs); /* @*/ + fprintf (stderr, "add_iteration is: %d\n", add_iteration); /* @*/ + fprintf (stderr, "INTVAL (comparison_value) is: %d\n", INTVAL (comparison_value)); /* @*/ + fprintf (stderr, "INTVAL (initial_value) is: %d\n", INTVAL (initial_value)); /* @*/ +#endif + + if (increment_value_abs == 0) { + fprintf (stderr, "insert_bct: error: increment == 0 !!!\n"); + abort (); + } + n_iterations = (difference + increment_value_abs - 1 + add_iteration) + / increment_value_abs; + +#if 0 + fprintf (stderr, "number of iterations is: %d\n", n_iterations); /* @*/ +#endif + instrument_loop_bct (loop_start, loop_end, GEN_INT (n_iterations)); + + /* Done with this loop. */ + return; + } + + /* Handle the more complex case, that the bounds are NOT known at compile time. */ + /* In this case we generate run_time calculation of the number of iterations */ + + /* With runtime bounds, if the compare is of the form '!=' we give up */ + if (comparison_code == NE) { + if (loop_dump_stream) + fprintf (loop_dump_stream, + "insert_bct: fail for loop %d: runtime bounds with != comparison\n", + loop_num); + return; + } + + else { + /* We rely on the existence of run-time guard to ensure that the + loop executes at least once. */ + rtx sequence; + rtx iterations_num_reg; + + int increment_value_abs = INTVAL (increment) * increment_direction; + + /* make sure that the increment is a power of two, otherwise (an + expensive) divide is needed. */ + if ( !is_power_of_2(increment_value_abs) ) + { + if (loop_dump_stream) + fprintf (loop_dump_stream, + "insert_bct: not instrumenting BCT because the increment is not power of 2\n"); + return; + } + + /* compute the number of iterations */ + start_sequence (); + { + /* CYGNUS LOCAL: HAIFA bug fix */ + rtx temp_reg; + + /* Again, the number of iterations is calculated by: + ; + ; compare-val - initial-val + (increment -1) + additional-iteration + ; num_iterations = ----------------------------------------------------------------- + ; increment + */ + /* ??? Do we have to call copy_rtx here before passing rtx to + expand_binop? */ + if (compare_direction > 0) { + /* <, <= :the loop variable is increasing */ + temp_reg = expand_binop (loop_var_mode, sub_optab, comparison_value, + initial_value, NULL_RTX, 0, OPTAB_LIB_WIDEN); + } + else { + temp_reg = expand_binop (loop_var_mode, sub_optab, initial_value, + comparison_value, NULL_RTX, 0, OPTAB_LIB_WIDEN); + } + + if (increment_value_abs - 1 + add_iteration != 0) + temp_reg = expand_binop (loop_var_mode, add_optab, temp_reg, + GEN_INT (increment_value_abs - 1 + add_iteration), + NULL_RTX, 0, OPTAB_LIB_WIDEN); + + if (increment_value_abs != 1) + { + /* ??? This will generate an expensive divide instruction for + most targets. The original authors apparently expected this + to be a shift, since they test for power-of-2 divisors above, + but just naively generating a divide instruction will not give + a shift. It happens to work for the PowerPC target because + the rs6000.md file has a divide pattern that emits shifts. + It will probably not work for any other target. */ + iterations_num_reg = expand_binop (loop_var_mode, sdiv_optab, + temp_reg, + GEN_INT (increment_value_abs), + NULL_RTX, 0, OPTAB_LIB_WIDEN); + } + else + iterations_num_reg = temp_reg; + /* END CYGNUS LOCAL: HAIFA bug fix */ + } + sequence = gen_sequence (); + end_sequence (); + emit_insn_before (sequence, loop_start); + instrument_loop_bct (loop_start, loop_end, iterations_num_reg); + } +} + +/* instrument loop by inserting a bct in it. This is done in the following way: + 1. A new register is created and assigned the hard register number of the count + register. + 2. In the head of the loop the new variable is initialized by the value passed in the + loop_num_iterations parameter. + 3. At the end of the loop, comparison of the register with 0 is generated. + The created comparison follows the pattern defined for the + decrement_and_branch_on_count insn, so this insn will be generated in assembly + generation phase. + 4. The compare&branch on the old variable is deleted. So, if the loop-variable was + not used elsewhere, it will be eliminated by data-flow analisys. */ + +static void +instrument_loop_bct (loop_start, loop_end, loop_num_iterations) + rtx loop_start, loop_end; + rtx loop_num_iterations; +{ + rtx temp_reg1, temp_reg2; + rtx start_label; + + rtx sequence; + enum machine_mode loop_var_mode = SImode; + +#ifdef HAVE_decrement_and_branch_on_count + if (HAVE_decrement_and_branch_on_count) + { + if (loop_dump_stream) + fprintf (loop_dump_stream, "Loop: Inserting BCT\n"); + + /* eliminate the check on the old variable */ + delete_insn (PREV_INSN (loop_end)); + delete_insn (PREV_INSN (loop_end)); + + /* insert the label which will delimit the start of the loop */ + start_label = gen_label_rtx (); + emit_label_after (start_label, loop_start); + + /* insert initialization of the count register into the loop header */ + start_sequence (); + temp_reg1 = gen_reg_rtx (loop_var_mode); + emit_insn (gen_move_insn (temp_reg1, loop_num_iterations)); + + /* this will be count register */ + temp_reg2 = gen_rtx (REG, loop_var_mode, COUNT_REGISTER_REGNUM); + /* we have to move the value to the count register from an GPR + because rtx pointed to by loop_num_iterations could contain + expression which cannot be moved into count register */ + emit_insn (gen_move_insn (temp_reg2, temp_reg1)); + + sequence = gen_sequence (); + end_sequence (); + emit_insn_after (sequence, loop_start); + + /* insert new comparison on the count register instead of the + old one, generating the needed BCT pattern (that will be + later recognized by assembly generation phase). */ + emit_jump_insn_before (gen_decrement_and_branch_on_count (temp_reg2, start_label), + loop_end); + LABEL_NUSES (start_label)++; + } + +#endif /* HAVE_decrement_and_branch_on_count */ +} + +/* calculate the uid of the given loop */ +int +loop_number (loop_start, loop_end) + rtx loop_start, loop_end; +{ + int loop_num = -1; + + /* assume that this insn contains the LOOP_START + note, so it will not be changed by the loop unrolling */ + loop_num = uid_loop_num[INSN_UID (loop_start)]; + /* sanity check - should never happen */ + if (loop_num == -1) + abort (); + + return loop_num; +} + +/* scan the function and determine whether it has indirect (computed) jump */ +static int +indirect_jump_in_function_p (start) + rtx start; +{ + rtx insn; + int is_indirect_jump = 0; + + for (insn = start; insn; insn = NEXT_INSN (insn)) { + if (GET_CODE (insn) == JUMP_INSN) { + if (GET_CODE (PATTERN (insn)) == SET) { + rtx insn_work_code = XEXP (PATTERN (insn), 1); + + if (GET_CODE (insn_work_code) == LABEL_REF) + continue; + if (GET_CODE (insn_work_code) == IF_THEN_ELSE) { + rtx jump_target = XEXP (insn_work_code, 1); + + if (jump_target == pc_rtx + || (GET_CODE (jump_target) == (enum rtx_code)LABEL_REF)) + continue; + } + } + is_indirect_jump = 1; + } + } + return is_indirect_jump; +} + +/* return 1 iff n is a power of 2 */ +static int +is_power_of_2(n) + int n; +{ + return (n & (n-1)) == 0; +} + +/* return 1 iff insn is a conditional jump */ +is_conditional_branch (insn) + rtx insn; +{ + rtx work_code; + if (GET_CODE (insn) != JUMP_INSN) + return 0; + work_code = PATTERN (insn); + if (GET_CODE (work_code) != SET) + return 0; + if (GET_CODE (XEXP (work_code, 1)) != IF_THEN_ELSE) + return 0; + return 1; +} + +/* debugging: fix_bct_param () is called from toplev.c upon detection + of the -fbct-***-N options. */ +int +fix_bct_param (param, val) + char *param, *val; +{ + if ( !strcmp (param, "max") ) + dbg_bct_max = atoi (val); + else if ( !strcmp (param, "min") ) + dbg_bct_min = atoi (val); +} + +/* debugging: return 1 if the loop should be instrumented, + according to bct-min/max. */ +static int +check_bct_param () +{ + static int dbg_bct_num = 0; + + dbg_bct_num++; + if (dbg_bct_num > dbg_bct_min || dbg_bct_min == -1) + if (dbg_bct_num <= dbg_bct_max || dbg_bct_max == -1) + return 1; + return 0; +} +#endif /* HAIFA */ +/* END CYGNUS LOCAL haifa */ diff --git a/gcc/loop.h b/gcc/loop.h index 3536fa1e20d..d8c83cdcab6 100644 --- a/gcc/loop.h +++ b/gcc/loop.h @@ -181,3 +181,13 @@ rtx final_biv_value PROTO((struct iv_class *, rtx, rtx)); rtx final_giv_value PROTO((struct induction *, rtx, rtx)); void emit_unrolled_add PROTO((rtx, rtx, rtx)); int back_branch_in_range_p PROTO((rtx, rtx, rtx)); + +#ifdef HAIFA +/* variables for interaction between unroll.c and loop.c, for + the insertion of branch-on-count instruction. */ +extern int *loop_unroll_factor; +extern rtx *loop_start_value; +extern int *loop_unroll_iter; +extern int loop_number(); +#endif /* HAIFA */ + diff --git a/gcc/regmove.c b/gcc/regmove.c new file mode 100644 index 00000000000..639059b3686 --- /dev/null +++ b/gcc/regmove.c @@ -0,0 +1,983 @@ +/* Move registers around to reduce number of move instructions needed. + Copyright (C) 1987, 88, 89, 92-5, 1996, 1997 Free Software Foundation, Inc. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + + +/* This module looks for cases where matching constraints would force + an instruction to need a reload, and this reload would be a register + to register move. It then attempts to change the registers used by the + instruction to avoid the move instruction. */ + +#include "config.h" +#ifdef __STDC__ +#include +#else +#include +#endif + +/* Must precede rtl.h for FFS. */ +#include + +#include "rtl.h" +#include "insn-config.h" +#include "recog.h" +#include "output.h" +#include "reload.h" +#include "regs.h" + +static int stable_but_for_p PROTO((rtx, rtx, rtx)); + +#if defined (HAVE_POST_INCREMENT) || defined (HAVE_POST_DECREMENT) \ + || defined (HAVE_PRE_INCREMENT) || defined (HAVE_PRE_DECREMENT) + +/* INC_INSN is an instruction that adds INCREMENT to REG. + Try to fold INC_INSN as a post/pre in/decrement into INSN. + Iff INC_INSN_SET is nonzero, inc_insn has a destination different from src. + Return nonzero for success. */ +static int +try_auto_increment (insn, inc_insn, inc_insn_set, reg, increment, pre) + rtx reg, insn, inc_insn ,inc_insn_set; + HOST_WIDE_INT increment; + int pre; +{ + enum rtx_code inc_code; + + rtx pset = single_set (insn); + if (pset) + { + /* Can't use the size of SET_SRC, we might have something like + (sign_extend:SI (mem:QI ... */ + rtx use = find_use_as_address (pset, reg, 0); + if (use != 0 && use != (rtx) 1) + { + int size = GET_MODE_SIZE (GET_MODE (use)); + if (0 +#ifdef HAVE_POST_INCREMENT + || (pre == 0 && (inc_code = POST_INC, increment == size)) +#endif +#ifdef HAVE_PRE_INCREMENT + || (pre == 1 && (inc_code = PRE_INC, increment == size)) +#endif +#ifdef HAVE_POST_DECREMENT + || (pre == 0 && (inc_code = POST_DEC, increment == -size)) +#endif +#ifdef HAVE_PRE_DECREMENT + || (pre == 1 && (inc_code = PRE_DEC, increment == -size)) +#endif + ) + { + if (inc_insn_set) + validate_change + (inc_insn, + &SET_SRC (inc_insn_set), + XEXP (SET_SRC (inc_insn_set), 0), 1); + validate_change (insn, &XEXP (use, 0), + gen_rtx (inc_code, + Pmode, + reg), 1); + if (apply_change_group ()) + { + REG_NOTES (insn) + = gen_rtx (EXPR_LIST, REG_INC, + reg, REG_NOTES (insn)); + if (! inc_insn_set) + { + PUT_CODE (inc_insn, NOTE); + NOTE_LINE_NUMBER (inc_insn) = NOTE_INSN_DELETED; + NOTE_SOURCE_FILE (inc_insn) = 0; + } + return 1; + } + } + } + } + return 0; +} +#endif /* defined (HAVE_POST_INCREMENT) || defined (HAVE_POST_DECREMENT) */ + +void +regmove_optimize (f, nregs, regmove_dump_file) + rtx f; + int nregs; + FILE *regmove_dump_file; +{ +#ifdef REGISTER_CONSTRAINTS + rtx insn; + int matches[MAX_RECOG_OPERANDS][MAX_RECOG_OPERANDS]; + int modified[MAX_RECOG_OPERANDS]; + int early_clobber[MAX_RECOG_OPERANDS]; + int commutative; + int pass; + + /* A forward/backward pass. Replace output operands with input operands. */ + + for (pass = 0; pass < 2; pass++) + { + if (regmove_dump_file) + fprintf (regmove_dump_file, "Starting %s pass...\n", + pass ? "backward" : "forward"); + + for (insn = pass ? get_last_insn () : f; insn; + insn = pass ? PREV_INSN (insn) : NEXT_INSN (insn)) + { + if (GET_RTX_CLASS (GET_CODE (insn)) == 'i') + { + int insn_code_number = recog_memoized (insn); + int operand_number, match_number; + + if (insn_code_number < 0) + continue; + + insn_extract (insn); + if (! constrain_operands (insn_code_number, 0)) + continue; + + commutative = -1; + + /* Must initialize this before the loop, because the code for + the commutative case may set matches for operands other than + the current one. */ + bzero (matches, sizeof (matches)); + + for (operand_number = 0; + operand_number < insn_n_operands[insn_code_number]; + operand_number++) + { + int output_operand = 0; + int matching_operand = operand_number; + char *p, c; + int i = 0; + + modified[operand_number] = 0; + early_clobber[operand_number] = 0; + + p = insn_operand_constraint[insn_code_number][operand_number]; + + if (*p == '=') + modified[operand_number] = 2; + else if (*p == '+') + modified[operand_number] = 1; + + for (;*p && i < which_alternative; p++) + if (*p == ',') + i++; + + while ((c = *p++) != '\0' && c != ',') + switch (c) + { + case '=': + break; + case '+': + break; + case '&': + early_clobber[operand_number] = 1; + break; + case '%': + commutative = operand_number; + break; + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c -= '0'; + matches[operand_number][c] = 1; + if (commutative >= 0) + { + if (c == commutative || c == commutative + 1) + { + int other = c + (c == commutative ? 1 : -1); + matches[operand_number][other] = 1; + } + if (operand_number == commutative + || operand_number == commutative + 1) + { + int other = (operand_number + + (operand_number == commutative + ? 1 : -1)); + matches[other][c] = 1; + } + } + break; + } + } + + /* Now scan through the operands looking for a source operand + which is supposed to match the destination operand. + Then scan forward for an instruction which uses the dest + operand. + If it dies there, then replace the dest in both operands with + the source operand. */ + + for (operand_number = 0; + operand_number < insn_n_operands[insn_code_number]; + operand_number++) + { + for (match_number = 0; + match_number < insn_n_operands[insn_code_number]; + match_number++) + { + rtx set, p, src, dst, src_subreg; + rtx post_inc = 0, post_inc_set = 0, search_end = 0; + rtx src_note, dst_note; + int success = 0; + int num_calls = 0; + enum rtx_code code = NOTE; + HOST_WIDE_INT insn_const, newconst; + rtx overlap = 0; /* need to move insn ? */ + + /* Nothing to do if the two operands aren't supposed to + match. */ + if (matches[operand_number][match_number] == 0) + continue; + + src = recog_operand[operand_number]; + dst = recog_operand[match_number]; + + if (GET_CODE (src) != REG + || REGNO (src) < FIRST_PSEUDO_REGISTER) + continue; + + src_subreg = src; + if (GET_CODE (dst) == SUBREG + && GET_MODE_SIZE (GET_MODE (dst)) + >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (dst)))) + { + src_subreg + = gen_rtx(SUBREG, GET_MODE (SUBREG_REG (dst)), + src, SUBREG_WORD (dst)); + dst = SUBREG_REG (dst); + } + if (GET_CODE (dst) != REG + || REGNO (dst) < FIRST_PSEUDO_REGISTER) + continue; + + /* If the operands already match, then there is nothing + to do. */ + if (operands_match_p (src, dst)) + continue; + + set = single_set (insn); + if (! set) + continue; + + /* operand_number/src must be a read-only operand, and + match_operand/dst must be a write-only operand. */ + if (modified[match_number] != 2) + continue; + + if (early_clobber[match_number] == 1) + continue; + + if (modified[operand_number] != 0) + continue; + + /* Make sure match_operand is the destination. */ + if (recog_operand[match_number] != SET_DEST (set)) + continue; + + src_note = find_reg_note (insn, REG_DEAD, src); + + if (! src_note) + { + /* Look for (set (regX) (op regA constX)) + (set (regY) (op regA constY)) + and change that to + (set (regA) (op regA constX)). + (set (regY) (op regA constY-constX)). + This works for add and shift operations, if + regA is dead after or set by the second insn. */ + + code = GET_CODE (SET_SRC (set)); + if ((code == PLUS || code == LSHIFTRT + || code == ASHIFT || code == ASHIFTRT) + && XEXP (SET_SRC (set), 0) == src + && (GET_CODE (XEXP (SET_SRC (set), 1)) + == CONST_INT)) + insn_const = INTVAL (XEXP (SET_SRC (set), 1)); + else if (! stable_but_for_p (SET_SRC (set), src, dst)) + continue; + else + /* We might find a src_note while scanning. */ + code = NOTE; + } + + if (regmove_dump_file) + fprintf (regmove_dump_file, + "Could fix operand %d of insn %d matching operand %d.\n", + operand_number, INSN_UID (insn), match_number); + + /* ??? If src is set once, and is set equal to a + constant, then do not use it for this optimization, + as this would make it no longer equivalent to a + constant? */ + + /* Scan forward to find the next instruction that + uses the output operand. If the operand dies here, + then replace it in both instructions with + operand_number. */ + + for (p = NEXT_INSN (insn); p; p = NEXT_INSN (p)) + { + if (GET_CODE (p) == CODE_LABEL + || GET_CODE (p) == JUMP_INSN + || (GET_CODE (p) == NOTE + && ((NOTE_LINE_NUMBER (p) + == NOTE_INSN_LOOP_BEG) + || (NOTE_LINE_NUMBER (p) + == NOTE_INSN_LOOP_END)))) + break; + + if (GET_RTX_CLASS (GET_CODE (p)) != 'i') + continue; + + if (reg_set_p (src, p) || reg_set_p (dst, p) + || (GET_CODE (PATTERN (p)) == USE + && reg_overlap_mentioned_p (src, + XEXP (PATTERN (p), + 0)))) + break; + + /* See if all of DST dies in P. This test is + slightly more conservative than it needs to be. */ + if ((dst_note + = find_regno_note (p, REG_DEAD, REGNO (dst))) + && (GET_MODE (XEXP (dst_note, 0)) + == GET_MODE (dst))) + { + if (! src_note) + { + rtx q; + rtx set2; + + /* If an optimization is done, the value + of SRC while P is executed will be + changed. Check that this is OK. */ + if (reg_overlap_mentioned_p (src, + PATTERN (p))) + break; + for (q = p; q; q = NEXT_INSN (q)) + { + if (GET_CODE (q) == CODE_LABEL + || GET_CODE (q) == JUMP_INSN + || (GET_CODE (q) == NOTE + && ((NOTE_LINE_NUMBER (q) + == NOTE_INSN_LOOP_BEG) + || (NOTE_LINE_NUMBER (q) + == NOTE_INSN_LOOP_END)))) + { + q = 0; + break; + } + if (GET_RTX_CLASS (GET_CODE (q)) != 'i') + continue; + if (reg_overlap_mentioned_p (src, + PATTERN (q)) + || reg_set_p (src, q)) + break; + } + if (q) + set2 = single_set (q); + if (! q || ! set2 + || GET_CODE (SET_SRC (set2)) != code + || XEXP (SET_SRC (set2), 0) != src + || (GET_CODE (XEXP (SET_SRC (set2), 1)) + != CONST_INT) + || (SET_DEST (set2) != src + && !find_reg_note (q, REG_DEAD, src))) + { + /* If this is a PLUS, we can still save + a register by doing + src += insn_const; + P; + src -= insn_const; . + This also gives opportunities for + subsequent optimizations in the + backward pass, so do it there. */ + if (code == PLUS && pass == 1 +#ifdef HAVE_cc0 + /* We man not emit an insn directly + after P if the latter sets CC0. */ + && ! sets_cc0_p (PATTERN (p)) +#endif + ) + + { + search_end = q; + q = insn; + set2 = set; + newconst = -insn_const; + code = MINUS; + } + else + break; + } + else + { + newconst + = (INTVAL (XEXP (SET_SRC (set2), 1)) + - insn_const); + /* Reject out of range shifts. */ + if (code != PLUS + && (newconst < 0 + || (newconst + >= GET_MODE_BITSIZE (GET_MODE (SET_SRC (set2)))))) + break; + if (code == PLUS) + { + post_inc = q; + if (SET_DEST (set2) != src) + post_inc_set = set2; + } + } + /* We use 1 as last argument to + validate_change so that all changes + are accepted or rejected together by + apply_change_group when it is called + by validate_replace_rtx . */ + validate_change (q, &XEXP (SET_SRC (set2), 1), + GEN_INT (newconst), 1); + } + validate_change (insn, + recog_operand_loc[match_number], + src, 1); + if (validate_replace_rtx (dst, src_subreg, p)) + success = 1; + break; + } + + if (reg_overlap_mentioned_p (dst, PATTERN (p))) + break; + if (! src_note + && reg_overlap_mentioned_p (src, PATTERN (p))) + { + /* INSN was already checked to be movable when + we found no REG_DEAD note for src on it. */ + overlap = p; + src_note = find_reg_note (p, REG_DEAD, src); + } + + /* If we have passed a call instruction, and the + pseudo-reg SRC is not already live across a call, + then don't perform the optimization. */ + if (GET_CODE (p) == CALL_INSN) + { + num_calls++; + + if (REG_N_CALLS_CROSSED (REGNO (src)) == 0) + break; + } + } + + if (success) + { + /* Remove the death note for DST from P. */ + remove_note (p, dst_note); + if (code == MINUS) + { + post_inc + = emit_insn_after (copy_rtx (PATTERN (insn)), + p); +#if defined (HAVE_PRE_INCREMENT) || defined (HAVE_PRE_DECREMENT) + if (search_end + && try_auto_increment (search_end, post_inc, + 0, src, newconst, 1)) + post_inc = 0; +#endif + validate_change (insn, &XEXP (SET_SRC (set), 1), + GEN_INT (insn_const), 0); + REG_N_SETS (REGNO (src))++; + } + if (overlap) + { + /* The lifetime of src and dest overlap, + but we can change this by moving insn. */ + rtx pat = PATTERN (insn); + if (src_note) + remove_note (overlap, src_note); +#if defined (HAVE_POST_INCREMENT) || defined (HAVE_POST_DECREMENT) + if (code == PLUS + && try_auto_increment (overlap, insn, 0, + src, insn_const, 0)) + insn = overlap; + else +#endif + { + emit_insn_after_with_line_notes + (pat, PREV_INSN (p), insn); + PUT_CODE (insn, NOTE); + NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED; + NOTE_SOURCE_FILE (insn) = 0; + /* emit_insn_after_with_line_notes + has no return value, so search + for the new insn. */ + for (insn = p; PATTERN (insn) != pat; ) + insn = PREV_INSN (insn); + } + } + /* Sometimes we'd generate src = const; src += n; + if so, replace the instruction that set src + in the first place. */ + + if (! overlap && (code == PLUS || code == MINUS)) + { + rtx note + = find_reg_note (insn, REG_EQUAL, NULL_RTX); + rtx q, set2; + int num_calls2 = 0; + + if (note && CONSTANT_P (XEXP (note, 0))) + { + for (q = PREV_INSN (insn); q; + q = PREV_INSN(q)) + { + if (GET_CODE (q) == JUMP_INSN) + { + q = 0; + break; + } + if (GET_RTX_CLASS (GET_CODE (q)) != 'i') + continue; + if (reg_set_p (src, q)) + { + set2 = single_set (q); + break; + } + if (reg_overlap_mentioned_p (src, + PATTERN (q))) + { + q = 0; + break; + } + if (GET_CODE (p) == CALL_INSN) + num_calls2++; + } + if (q && set2 && SET_DEST (set2) == src + && CONSTANT_P (SET_SRC (set2)) + && validate_change (insn, &SET_SRC (set), + XEXP (note, 0), 0)) + { + PUT_CODE (q, NOTE); + NOTE_LINE_NUMBER (q) = NOTE_INSN_DELETED; + NOTE_SOURCE_FILE (q) = 0; + REG_N_SETS (REGNO (src))--; + REG_N_CALLS_CROSSED (REGNO (src)) + -= num_calls2; + insn_const = 0; + } + } + } + if (0) ; +#if defined (HAVE_PRE_INCREMENT) || defined (HAVE_PRE_DECREMENT) + else if ((code == PLUS || code == MINUS) + && insn_const + && try_auto_increment (p, insn, 0, + src, insn_const, 1)) + insn = p; +#endif +#if defined (HAVE_POST_INCREMENT) || defined (HAVE_POST_DECREMENT) + else if (post_inc + && try_auto_increment (p, post_inc, + post_inc_set, src, + newconst, 0)) + post_inc = 0; +#endif +#if defined (HAVE_PRE_INCREMENT) || defined (HAVE_PRE_DECREMENT) + /* If post_inc still prevails, try to find an + insn where it can be used as a pre-in/decrement. + If code is MINUS, this was already tried. */ + if (post_inc && code == PLUS + /* Check that newconst is likely to be usable + in a pre-in/decrement before starting the + search. */ + && (0 +#if defined (HAVE_PRE_INCREMENT) + || (newconst > 0 && newconst <= MOVE_MAX) +#endif +#if defined (HAVE_PRE_DECREMENT) + || (newconst < 0 && newconst >= -MOVE_MAX) +#endif + ) && exact_log2 (newconst)) + { + rtx q, inc_dest; + + inc_dest + = post_inc_set ? SET_DEST (post_inc_set) : src; + for (q = post_inc; q = NEXT_INSN (q); ) + { + if (GET_CODE (q) == CODE_LABEL + || GET_CODE (q) == JUMP_INSN + || (GET_CODE (q) == NOTE + && ((NOTE_LINE_NUMBER (q) + == NOTE_INSN_LOOP_BEG) + || (NOTE_LINE_NUMBER (q) + == NOTE_INSN_LOOP_END)))) + break; + if (GET_RTX_CLASS (GET_CODE (q)) != 'i') + continue; + if (src != inc_dest + && (reg_overlap_mentioned_p (src, + PATTERN (q)) + || reg_set_p (src, q))) + break; + if (reg_set_p (inc_dest, q)) + break; + if (reg_overlap_mentioned_p (inc_dest, + PATTERN (q))) + { + try_auto_increment (q, post_inc, + post_inc_set, + inc_dest, + newconst, 1); + break; + } + } + } +#endif /* defined (HAVE_PRE_INCREMENT) || defined (HAVE_PRE_DECREMENT) */ + /* Move the death note for DST to INSN if it is used + there. */ + if (reg_overlap_mentioned_p (dst, PATTERN (insn))) + { + XEXP (dst_note, 1) = REG_NOTES (insn); + REG_NOTES (insn) = dst_note; + } + + if (src_note) + { + /* Move the death note for SRC from INSN to P. */ + if (! overlap) + remove_note (insn, src_note); + XEXP (src_note, 1) = REG_NOTES (p); + REG_NOTES (p) = src_note; + + REG_N_CALLS_CROSSED (REGNO (src)) += num_calls; + } + + REG_N_SETS (REGNO (src))++; + REG_N_SETS (REGNO (dst))--; + + REG_N_CALLS_CROSSED (REGNO (dst)) -= num_calls; + + /* ??? Must adjust reg_live_length, and reg_n_refs for + both registers. Must keep track of loop_depth in + order to get reg_n_refs adjustment correct. */ + + if (regmove_dump_file) + fprintf (regmove_dump_file, + "Fixed operand %d of insn %d matching operand %d.\n", + operand_number, INSN_UID (insn), + match_number); + + goto done_forwards; + } + } + } + done_forwards: + ; + } + } + } + + /* A backward pass. Replace input operands with output operands. */ + + if (regmove_dump_file) + fprintf (regmove_dump_file, "Starting backward pass...\n"); + + for (insn = get_last_insn (); insn; insn = PREV_INSN (insn)) + { + if (GET_RTX_CLASS (GET_CODE (insn)) == 'i') + { + int insn_code_number = recog_memoized (insn); + int operand_number, match_number; + + if (insn_code_number < 0) + continue; + + insn_extract (insn); + if (! constrain_operands (insn_code_number, 0)) + continue; + + commutative = -1; + + /* Must initialize this before the loop, because the code for + the commutative case may set matches for operands other than + the current one. */ + bzero (matches, sizeof (matches)); + + for (operand_number = 0; + operand_number < insn_n_operands[insn_code_number]; + operand_number++) + { + int output_operand = 0; + int matching_operand = operand_number; + char *p, c; + int i = 0; + + modified[operand_number] = 0; + early_clobber[operand_number] = 0; + + p = insn_operand_constraint[insn_code_number][operand_number]; + + if (*p == '=') + modified[operand_number] = 2; + else if (*p == '+') + modified[operand_number] = 1; + + for (; *p && i < which_alternative; p++) + if (*p == ',') + i++; + + while ((c = *p++) != '\0' && c != ',') + switch (c) + { + case '=': + break; + case '+': + break; + case '&': + early_clobber[operand_number] = 1; + break; + case '%': + commutative = operand_number; + break; + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c -= '0'; + matches[c][operand_number] = 1; + if (commutative >= 0) + { + if (c == commutative || c == commutative + 1) + { + int other = c + (c == commutative ? 1 : -1); + matches[other][operand_number] = 1; + } + if (operand_number == commutative + || operand_number == commutative + 1) + { + int other = (operand_number + + (operand_number == commutative + ? 1 : -1)); + matches[c][other] = 1; + } + } + break; + } + } + + /* Now scan through the operands looking for a destination operand + which is supposed to match a source operand. + Then scan backward for an instruction which sets the source + operand. If safe, then replace the source operand with the + dest operand in both instructions. */ + + for (operand_number = 0; + operand_number < insn_n_operands[insn_code_number]; + operand_number++) + { + for (match_number = 0; + match_number < insn_n_operands[insn_code_number]; + match_number++) + { + rtx set, p, src, dst; + rtx src_note, dst_note; + int success = 0; + int num_calls = 0; + + /* Nothing to do if the two operands aren't supposed to + match. */ + if (matches[operand_number][match_number] == 0) + continue; + + dst = recog_operand[operand_number]; + src = recog_operand[match_number]; + + if (GET_CODE (src) != REG + || REGNO (src) < FIRST_PSEUDO_REGISTER) + continue; + + if (GET_CODE (dst) != REG + || REGNO (dst) < FIRST_PSEUDO_REGISTER) + continue; + + /* If the operands already match, then there is nothing + to do. */ + if (operands_match_p (src, dst)) + continue; + + set = single_set (insn); + if (! set) + continue; + + /* operand_number/dst must be a write-only operand, and + match_operand/src must be a read-only operand. */ + if (modified[match_number] != 0) + continue; + + if (early_clobber[operand_number] == 1) + continue; + + if (modified[operand_number] != 2) + continue; + + /* Make sure operand_number is the destination. */ + if (recog_operand[operand_number] != SET_DEST (set)) + continue; + + if (! (src_note = find_reg_note (insn, REG_DEAD, src))) + continue; + + /* Can not modify an earlier insn to set dst if this insn + uses an old value in the source. */ + if (reg_overlap_mentioned_p (dst, SET_SRC (set))) + continue; + + if (regmove_dump_file) + fprintf (regmove_dump_file, + "Could fix operand %d of insn %d matching operand %d.\n", + operand_number, INSN_UID (insn), match_number); + + /* ??? If src is set once, and is set equal to a constant, + then do not use it for this optimization, as this would + make it no longer equivalent to a constant? */ + + /* Scan backward to find the first instruction that uses + the input operand. If the operand is set here, then + replace it in both instructions with operand_number. */ + + for (p = PREV_INSN (insn); p; p = PREV_INSN (p)) + { + rtx pset; + + if (GET_CODE (p) == CODE_LABEL + || GET_CODE (p) == JUMP_INSN + || (GET_CODE (p) == NOTE + && (NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_BEG + || NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_END))) + break; + + if (GET_RTX_CLASS (GET_CODE (p)) != 'i') + continue; + + /* ??? See if all of SRC is set in P. This test is much + more conservative than it needs to be. */ + pset = single_set (p); + if (pset && SET_DEST (pset) == src) + { + /* We use validate_replace_rtx, in case there + are multiple identical source operands. All of + them have to be changed at the same time. */ + if (validate_replace_rtx (src, dst, insn)) + { + if (validate_change (p, &SET_DEST (pset), + dst, 0)) + success = 1; + else + { + /* Change all source operands back. + This modifies the dst as a side-effect. */ + validate_replace_rtx (dst, src, insn); + /* Now make sure the dst is right. */ + validate_change (insn, + recog_operand_loc[operand_number], + dst, 0); + } + } + break; + } + + if (reg_overlap_mentioned_p (src, PATTERN (p)) + || reg_overlap_mentioned_p (dst, PATTERN (p))) + break; + + /* If we have passed a call instruction, and the + pseudo-reg DST is not already live across a call, + then don't perform the optimization. */ + if (GET_CODE (p) == CALL_INSN) + { + num_calls++; + + if (REG_N_CALLS_CROSSED (REGNO (dst)) == 0) + break; + } + } + + if (success) + { + /* Remove the death note for SRC from INSN. */ + remove_note (insn, src_note); + /* Move the death note for SRC to P if it is used + there. */ + if (reg_overlap_mentioned_p (src, PATTERN (p))) + { + XEXP (src_note, 1) = REG_NOTES (p); + REG_NOTES (p) = src_note; + } + /* If there is a REG_DEAD note for DST on P, then remove + it, because DST is now set there. */ + if (dst_note = find_reg_note (p, REG_DEAD, dst)) + remove_note (p, dst_note); + + REG_N_SETS (REGNO (dst))++; + REG_N_SETS (REGNO (src))--; + + REG_N_CALLS_CROSSED (REGNO (dst)) += num_calls; + REG_N_CALLS_CROSSED (REGNO (src)) -= num_calls; + + /* ??? Must adjust reg_live_length, and reg_n_refs for + both registers. Must keep track of loop_depth in + order to get reg_n_refs adjustment correct. */ + + if (regmove_dump_file) + fprintf (regmove_dump_file, + "Fixed operand %d of insn %d matching operand %d.\n", + operand_number, INSN_UID (insn), match_number); + + goto done_backwards; + } + } + } + done_backwards: + ; + } + } +#endif /* REGISTER_CONSTRAINTS */ +} + +/* return nonzero if X is stable but for mentioning SRC or mentioning / + changing DST . If in doubt, presume it is unstable. */ +static int +stable_but_for_p (x, src, dst) + rtx x, src, dst; +{ + RTX_CODE code = GET_CODE (x); + switch (GET_RTX_CLASS (code)) + { + case '<': case '1': case 'c': case '2': case 'b': case '3': + { + int i; + char *fmt = GET_RTX_FORMAT (code); + for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) + if (fmt[i] == 'e' && ! stable_but_for_p (XEXP (x, i), src, dst)) + return 0; + return 1; + } + case 'o': + if (x == src || x == dst) + return 1; + /* fall through */ + default: + return ! rtx_unstable_p (x); + } +} diff --git a/gcc/rtl.h b/gcc/rtl.h index de9386df210..a5bdfca61d5 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -812,6 +812,7 @@ extern void remove_note PROTO((rtx, rtx)); extern void note_stores PROTO((rtx, void (*)())); extern int refers_to_regno_p PROTO((int, int, rtx, rtx *)); extern int reg_overlap_mentioned_p PROTO((rtx, rtx)); +extern rtx find_use_as_address PROTO((rtx, rtx, HOST_WIDE_INT)); /* Maximum number of parallel sets and clobbers in any insn in this fn. diff --git a/gcc/toplev.c b/gcc/toplev.c index 43be973bcbe..790a51b8fd8 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -245,6 +245,7 @@ int cse2_dump = 0; int branch_prob_dump = 0; int flow_dump = 0; int combine_dump = 0; +int regmove_dump = 0; int sched_dump = 0; int local_reg_dump = 0; int global_reg_dump = 0; @@ -566,6 +567,35 @@ int flag_pedantic_errors = 0; int flag_schedule_insns = 0; int flag_schedule_insns_after_reload = 0; +#ifdef HAIFA +/* The following flags have effect only for scheduling before register + allocation: + + flag_schedule_interblock means schedule insns accross basic blocks. + flag_schedule_speculative means allow speculative motion of non-load insns. + flag_schedule_speculative_load means allow speculative motion of some + load insns. + flag_schedule_speculative_load_dangerous allows speculative motion of more + load insns. + flag_schedule_reverse_before_reload means try to reverse original order + of insns (S). + flag_schedule_reverse_after_reload means try to reverse original order + of insns (R). */ + +int flag_schedule_interblock = 1; +int flag_schedule_speculative = 1; +int flag_schedule_speculative_load = 0; +int flag_schedule_speculative_load_dangerous = 0; +int flag_schedule_reverse_before_reload = 0; +int flag_schedule_reverse_after_reload = 0; + + +/* flag_on_branch_count_reg means try to replace add-1,compare,branch tupple + by a cheaper branch, on a count register. */ +int flag_branch_on_count_reg; +#endif /* HAIFA */ + + /* -finhibit-size-directive inhibits output of .size for ELF. This is used only for compiling crtstuff.c, and it may be extended to other effects @@ -616,6 +646,8 @@ int flag_check_memory_usage = 0; -fcheck-memory-usage. */ int flag_prefix_function_name = 0; +int flag_regmove = 0; + /* 1 if alias checking is on (by default, when -O). */ int flag_alias_check = 0; @@ -666,6 +698,15 @@ struct { char *string; int *variable; int on_value;} f_options[] = {"pretend-float", &flag_pretend_float, 1}, {"schedule-insns", &flag_schedule_insns, 1}, {"schedule-insns2", &flag_schedule_insns_after_reload, 1}, +#ifdef HAIFA + {"sched-interblock",&flag_schedule_interblock, 1}, + {"sched-spec",&flag_schedule_speculative, 1}, + {"sched-spec-load",&flag_schedule_speculative_load, 1}, + {"sched-spec-load-dangerous",&flag_schedule_speculative_load_dangerous, 1}, + {"sched-reverse-S",&flag_schedule_reverse_before_reload, 1}, + {"sched-reverse-R",&flag_schedule_reverse_after_reload, 1}, + {"branch-count-reg",&flag_branch_on_count_reg, 1}, +#endif /* HAIFA */ {"pic", &flag_pic, 1}, {"PIC", &flag_pic, 2}, {"exceptions", &flag_exceptions, 1}, @@ -680,6 +721,7 @@ struct { char *string; int *variable; int on_value;} f_options[] = {"function-sections", &flag_function_sections, 1}, {"verbose-asm", &flag_verbose_asm, 1}, {"gnu-linker", &flag_gnu_linker, 1}, + {"regmove", &flag_regmove, 1}, {"pack-struct", &flag_pack_struct, 1}, {"stack-check", &flag_stack_check, 1}, {"bytecode", &output_bytecode, 1}, @@ -885,6 +927,7 @@ FILE *cse2_dump_file; FILE *branch_prob_dump_file; FILE *flow_dump_file; FILE *combine_dump_file; +FILE *regmove_dump_file; FILE *sched_dump_file; FILE *local_reg_dump_file; FILE *global_reg_dump_file; @@ -905,6 +948,7 @@ int cse2_time; int branch_prob_time; int flow_time; int combine_time; +int regmove_time; int sched_time; int local_alloc_time; int global_alloc_time; @@ -1053,6 +1097,8 @@ fatal_insn (message, insn) fflush (flow_dump_file); if (combine_dump_file) fflush (combine_dump_file); + if (regmove_dump_file) + fflush (regmove_dump_file); if (sched_dump_file) fflush (sched_dump_file); if (local_reg_dump_file) @@ -2131,6 +2177,7 @@ compile_file (name) branch_prob_time = 0; flow_time = 0; combine_time = 0; + regmove_time = 0; sched_time = 0; local_alloc_time = 0; global_alloc_time = 0; @@ -2229,6 +2276,10 @@ compile_file (name) if (combine_dump) combine_dump_file = open_dump_file (dump_base_name, ".combine"); + /* If regmove dump desired, open the output file. */ + if (regmove_dump) + regmove_dump_file = open_dump_file (dump_base_name, ".regmove"); + /* If scheduling dump desired, open the output file. */ if (sched_dump) sched_dump_file = open_dump_file (dump_base_name, ".sched"); @@ -2713,6 +2764,9 @@ compile_file (name) fclose (combine_dump_file); } + if (regmove_dump) + fclose (regmove_dump_file); + if (sched_dump) fclose (sched_dump_file); @@ -2765,6 +2819,7 @@ compile_file (name) print_time ("branch-prob", branch_prob_time); print_time ("flow", flow_time); print_time ("combine", combine_time); + print_time ("regmove", regmove_time); print_time ("sched", sched_time); print_time ("local-alloc", local_alloc_time); print_time ("global-alloc", global_alloc_time); @@ -3304,6 +3359,26 @@ rest_of_compilation (decl) fflush (combine_dump_file); }); + if (regmove_dump) + TIMEVAR (dump_time, + { + fprintf (regmove_dump_file, "\n;; Function %s\n\n", + (*decl_printable_name) (decl, 2)); + }); + + /* Register allocation pre-pass, to reduce number of moves + necessary for two-address machines. */ + if (optimize > 0 && flag_regmove) + TIMEVAR (regmove_time, regmove_optimize (insns, max_reg_num (), + regmove_dump_file)); + + if (regmove_dump) + TIMEVAR (dump_time, + { + print_rtl (regmove_dump_file, insns); + fflush (regmove_dump_file); + }); + /* Print function header into sched dump now because doing the sched analysis makes some of the dump. */ @@ -3703,6 +3778,7 @@ main (argc, argv, envp) flag_schedule_insns = 1; flag_schedule_insns_after_reload = 1; #endif + flag_regmove = 1; } if (optimize >= 3) @@ -3764,6 +3840,7 @@ main (argc, argv, envp) jump2_opt_dump = 1; local_reg_dump = 1; loop_dump = 1; + regmove_dump = 1; rtl_dump = 1; cse_dump = 1, cse2_dump = 1; sched_dump = 1; @@ -3815,6 +3892,9 @@ main (argc, argv, envp) case 't': cse2_dump = 1; break; + case 'N': + regmove_dump = 1; + break; case 'S': sched_dump = 1; break; @@ -3862,6 +3942,18 @@ main (argc, argv, envp) if (found) ; +#ifdef HAIFA +#ifdef INSN_SCHEDULING + else if (!strncmp (p, "sched-verbose-",14)) + fix_sched_param("verbose",&p[14]); + else if (!strncmp (p, "sched-max-",10)) + fix_sched_param("max",&p[10]); + else if (!strncmp (p, "sched-inter-max-b-",18)) + fix_sched_param("interblock-max-blocks",&p[18]); + else if (!strncmp (p, "sched-inter-max-i-",18)) + fix_sched_param("interblock-max-insns",&p[18]); +#endif +#endif /* HAIFA */ else if (!strncmp (p, "fixed-", 6)) fix_register (&p[6], 1, 1); else if (!strncmp (p, "call-used-", 10)) diff --git a/gcc/unroll.c b/gcc/unroll.c index 46339988ede..f1864d398b4 100644 --- a/gcc/unroll.c +++ b/gcc/unroll.c @@ -202,7 +202,7 @@ static rtx initial_reg_note_copy PROTO((rtx, struct inline_remap *)); static void final_reg_note_copy PROTO((rtx, struct inline_remap *)); static void copy_loop_body PROTO((rtx, rtx, struct inline_remap *, rtx, int, enum unroll_types, rtx, rtx, rtx, rtx)); -static void iteration_info PROTO((rtx, rtx *, rtx *, rtx, rtx)); +void iteration_info PROTO((rtx, rtx *, rtx *, rtx, rtx)); static rtx approx_final_value PROTO((enum rtx_code, rtx, int *, int *)); static int find_splittable_regs PROTO((enum unroll_types, rtx, rtx, rtx, int)); static int find_splittable_givs PROTO((struct iv_class *,enum unroll_types, @@ -1094,6 +1094,16 @@ unroll_loop (loop_end, insn_count, loop_start, end_insert_before, /* Set unroll type to MODULO now. */ unroll_type = UNROLL_MODULO; loop_preconditioned = 1; +#ifdef HAIFA + if (loop_n_iterations > 0) + loop_unroll_iter[ loop_number(loop_start, loop_end) ] + = (loop_n_iterations + - loop_n_iterations % (abs_inc * unroll_number)); + else + /* inform loop.c about the new initial value */ + loop_start_value[loop_number(loop_start, loop_end)] = initial_value; +#endif + } } @@ -1108,6 +1118,15 @@ unroll_loop (loop_end, insn_count, loop_start, end_insert_before, /* At this point, we are guaranteed to unroll the loop. */ +#ifdef HAIFA + /* inform loop.c about the factor of unrolling */ + if (unroll_type == UNROLL_COMPLETELY) + loop_unroll_factor[ loop_number(loop_start, loop_end) ] = -1; + else + loop_unroll_factor[ loop_number(loop_start, loop_end) ] = unroll_number; +#endif /* HAIFA */ + + /* For each biv and giv, determine whether it can be safely split into a different variable for each unrolled copy of the loop body. We precalculate and save this info here, since computing it is @@ -2263,7 +2282,7 @@ biv_total_increment (bl, loop_start, loop_end) Initial_value and/or increment are set to zero if their values could not be calculated. */ -static void +void iteration_info (iteration_var, initial_value, increment, loop_start, loop_end) rtx iteration_var, *initial_value, *increment; rtx loop_start, loop_end;