* Integrate Haifa instruction scheduler.
* Integrate regmove pass. See ChangeLog for deatils. From-SVN: r14770
This commit is contained in:
parent
aa32d84158
commit
8c660648ca
|
@ -1,3 +1,76 @@
|
|||
Mon Aug 11 14:50:55 1997 Jeffrey A Law (law@cygnus.com)
|
||||
|
||||
* Integrate Haifa instruction scheduler.
|
||||
* Makefile.in (ALL_CFLAGS): Add SCHED_CFLAGS. Prefix all references
|
||||
to sched with $(SCHED_CFLAGS.
|
||||
* configure.in: Handle --enable-haifa.
|
||||
* configure: Rebuilt.
|
||||
* flags.h: Add new flags for haifa instruction scheduler.
|
||||
* genattrtab.c (expand_units): For haifa, don't subtract one
|
||||
when computing blockage.
|
||||
* toplev.h (flag_schedule_interblock): Haifa scheduler flag.
|
||||
(flag_schedule_speculative): Ditto.
|
||||
(flag_schedule_speculative_load): Ditto.
|
||||
(flag_schedule_speculative_load_dangerous): Ditto.
|
||||
(flag_schedule_reverse_before_reload): Ditto.
|
||||
(flag_schedule_reverse_after_reload): Ditto.
|
||||
(flag_branch_on_count_reg): Ditto.
|
||||
(f_options): Add Haifa switches.
|
||||
(main): Turn off some Haifa options if appropriate macro is
|
||||
defined. Process Haifa switches.
|
||||
* unroll.c (iteration_info): No longer static, since Haifa
|
||||
scheduler uses it.
|
||||
(unroll_loop): Inform HAIFA scheduler about loop unrolling factor.
|
||||
* unroll.c (unroll_loop): Set loop_unroll_iter, loop_start_value.
|
||||
* loop.h (loop_unroll_factor, loop_number): Add HAIFA decls.
|
||||
* loop.h (loop_initial_value,loop_unroll_iter): New globals.
|
||||
* loop.c (loop_optimize): If HAIFA is defined, allocate additional
|
||||
storage for the Haifa scheduler.
|
||||
(mark_loop_jump): If HAIFA defined, set LABEL_OUTSIDE_LOOP_P and
|
||||
LABEL_NEXTREF.
|
||||
(strength_reduce): If HAIFA and HAVE_decrement_and_branch_on_count
|
||||
are defined, call analyze_loop_iterations and insert_bct to use
|
||||
countdown loops.
|
||||
(record_giv): Refine test for jumps out of loops if HAIFA is
|
||||
defined.
|
||||
(analyze_loop_iterations): New function to identify if we can use
|
||||
a countdown loop.
|
||||
(insert_bct): Insert countdown loop.
|
||||
(instrument_loop_bct): Low level code to insert countdown loop.
|
||||
(loop_number): Calculate UID of loop.
|
||||
(indirect_jump_in_function_p): Return true if an indirect jump is
|
||||
in the function.
|
||||
(is_power_of_2): Return true if value is a power of 2.
|
||||
(is_conditional_branch): Return true if insn is a conditional
|
||||
jump.
|
||||
(fix_bct_param): Process -fbct-{min,max}-N switches.
|
||||
(check_bct_param): Return true if loop should be instrumented.
|
||||
* loop.c (loop_initial_value,loop_unroll_iter): New globals.
|
||||
(loop_optimize): Initialize.
|
||||
(get_condition_for_loop): Ditto.
|
||||
* loop.c (strength_reduce): Inside of code that uses #ifdef
|
||||
HAVE_decrement_and_branch_on_count code, test it to make sure the
|
||||
condition is true.
|
||||
(instrument_loop_bct): Ditto.
|
||||
* haifa-sched.c: New file.
|
||||
|
||||
|
||||
* Integrate regmove pass.
|
||||
* Makefile.in (OBJS): Add regmove.o
|
||||
(regmove.o): Add dependencies.
|
||||
* flow.c (find_use_as_address): No longer static.
|
||||
* rtl.h (find_use_as_address): Declare.
|
||||
* toplev.c (regmove_dump, flag_regmove): Define.
|
||||
(f_options): Add -fregmove.
|
||||
(regmove_dump_file, regmove_time): Define.
|
||||
(fatal_insn): Close the regmove dump file.
|
||||
(compile_file): Initialize regmove_time; open/close the regmove dump
|
||||
file as needed. Print regmove time as needed.
|
||||
(rest_of_compilation): Run regmove pass if requested, dump
|
||||
RTL after regmove if requested.
|
||||
(main): If -O2 or more, turn on regmove. Handle dump switches.
|
||||
* regmove.c: New file.
|
||||
|
||||
Mon Aug 11 14:15:02 1997 Jeffrey A Law (law@cygnus.com)
|
||||
|
||||
* Integrate tlink patch from jason@cygnus.com
|
||||
|
|
|
@ -444,7 +444,7 @@ INTERNAL_CFLAGS = $(CROSS) -DIN_GCC @extra_c_flags@
|
|||
|
||||
# This is the variable actually used when we compile.
|
||||
ALL_CFLAGS = $(INTERNAL_CFLAGS) $(X_CFLAGS) $(T_CFLAGS) $(CFLAGS) $(XCFLAGS) \
|
||||
@DEFS@
|
||||
@DEFS@ $(SCHED_CFLAGS)
|
||||
|
||||
# Likewise.
|
||||
ALL_CPPFLAGS = $(CPPFLAGS) $(X_CPPFLAGS) $(T_CPPFLAGS)
|
||||
|
@ -548,14 +548,17 @@ BC_OBJS = bc-emit.o bc-optab.o
|
|||
# Bytecode header files constructed at build time; vmsconfig.com wants this.
|
||||
BC_ALL = bc-arity.h bc-opcode.h bc-opname.h
|
||||
|
||||
SCHED_PREFIX = @sched_prefix@
|
||||
SCHED_CFLAGS = @sched_cflags@
|
||||
|
||||
# Language-independent object files.
|
||||
OBJS = toplev.o version.o tree.o print-tree.o stor-layout.o fold-const.o \
|
||||
function.o stmt.o except.o expr.o calls.o expmed.o explow.o optabs.o \
|
||||
varasm.o rtl.o print-rtl.o rtlanal.o emit-rtl.o real.o \
|
||||
dbxout.o sdbout.o dwarfout.o dwarf2out.o xcoffout.o bitmap.o \
|
||||
varasm.o rtl.o print-rtl.o rtlanal.o emit-rtl.o real.o regmove.o \
|
||||
dbxout.o sdbout.o dwarfout.o dwarf2out.o xcoffout.o bitmap.o alias.o \
|
||||
integrate.o jump.o cse.o loop.o unroll.o flow.o stupid.o combine.o \
|
||||
regclass.o local-alloc.o global.o reload.o reload1.o caller-save.o \
|
||||
insn-peep.o reorg.o alias.o sched.o final.o recog.o reg-stack.o \
|
||||
insn-peep.o reorg.o $(SCHED_PREFIX)sched.o final.o recog.o reg-stack.o \
|
||||
insn-opinit.o insn-recog.o insn-extract.o insn-output.o insn-emit.o \
|
||||
profile.o insn-attrtab.o $(out_object_file) getpwd.o convert.o $(EXTRA_OBJS)
|
||||
|
||||
|
@ -1326,7 +1329,9 @@ reorg.o : reorg.c $(CONFIG_H) $(RTL_H) conditions.h hard-reg-set.h \
|
|||
flags.h output.h
|
||||
alias.o : alias.c $(CONFIG_H) $(RTL_H) flags.h hard-reg-set.h regs.h \
|
||||
insn-codes.h
|
||||
sched.o : $(SCHED_PREFIX)sched.c $(CONFIG_H) $(RTL_H) $(BASIC_BLOCK_H) regs.h hard-reg-set.h \
|
||||
regmove.o : regmove.c $(CONFIG_H) $(RTL_H) insn-config.h recog.h output.h \
|
||||
reload.h regs.h hard-reg-set.h flags.h expr.h insn-flags.h
|
||||
$(SCHED_PREFIX)sched.o : $(SCHED_PREFIX)sched.c $(CONFIG_H) $(RTL_H) $(BASIC_BLOCK_H) regs.h hard-reg-set.h \
|
||||
flags.h insn-config.h insn-attr.h
|
||||
final.o : final.c $(CONFIG_H) $(RTL_H) $(TREE_H) flags.h regs.h \
|
||||
recog.h conditions.h insn-config.h insn-attr.h except.h real.h output.h \
|
||||
|
|
|
@ -4361,6 +4361,26 @@ if [ ! -f Makefile.in ]; then
|
|||
echo "source ${srcdir}/.gdbinit" >> .gdbinit
|
||||
fi
|
||||
|
||||
# Override SCHED_OBJ and SCHED_CFLAGS to enable the Haifa scheduler.
|
||||
sched_prefix=
|
||||
sched_cflags=
|
||||
if [[ x$enable_haifa = xyes ]]; then
|
||||
echo "Using the Haifa scheduler."
|
||||
sched_prefix=haifa-
|
||||
sched_cflags=-DHAIFA
|
||||
fi
|
||||
|
||||
|
||||
if [[ x$enable_haifa != x ]]; then
|
||||
# Explicitly remove files that need to be recompiled for the Haifa scheduler.
|
||||
for x in genattrtab.o toplev.o loop.o unroll.o *sched.o; do
|
||||
if [ -f $x ]; then
|
||||
echo "Removing $x"
|
||||
rm -f $x
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
# Process the language and host/target makefile fragments.
|
||||
${CONFIG_SHELL-/bin/sh} $srcdir/configure.frag $srcdir "$subdirs" "$dep_host_xmake_file" "$dep_tmake_file"
|
||||
|
||||
|
@ -4602,6 +4622,8 @@ s%@CC@%$CC%g
|
|||
s%@SET_MAKE@%$SET_MAKE%g
|
||||
s%@CPP@%$CPP%g
|
||||
s%@manext@%$manext%g
|
||||
s%@sched_prefix@%$sched_prefix%g
|
||||
s%@sched_cflags@%$sched_cflags%g
|
||||
s%@objext@%$objext%g
|
||||
s%@subdirs@%$subdirs%g
|
||||
s%@all_languages@%$all_languages%g
|
||||
|
|
|
@ -3027,6 +3027,26 @@ if [[ ! -f Makefile.in ]]; then
|
|||
echo "source ${srcdir}/.gdbinit" >> .gdbinit
|
||||
fi
|
||||
|
||||
# Override SCHED_OBJ and SCHED_CFLAGS to enable the Haifa scheduler.
|
||||
sched_prefix=
|
||||
sched_cflags=
|
||||
if [[ x$enable_haifa = xyes ]]; then
|
||||
echo "Using the Haifa scheduler."
|
||||
sched_prefix=haifa-
|
||||
sched_cflags=-DHAIFA
|
||||
fi
|
||||
AC_SUBST(sched_prefix)
|
||||
AC_SUBST(sched_cflags)
|
||||
if [[ x$enable_haifa != x ]]; then
|
||||
# Explicitly remove files that need to be recompiled for the Haifa scheduler.
|
||||
for x in genattrtab.o toplev.o loop.o unroll.o *sched.o; do
|
||||
if [ -f $x ]; then
|
||||
echo "Removing $x"
|
||||
rm -f $x
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
# Process the language and host/target makefile fragments.
|
||||
${CONFIG_SHELL-/bin/sh} $srcdir/configure.frag $srcdir "$subdirs" "$dep_host_xmake_file" "$dep_tmake_file"
|
||||
|
||||
|
|
28
gcc/flags.h
28
gcc/flags.h
|
@ -304,6 +304,34 @@ extern int flag_shared_data;
|
|||
extern int flag_schedule_insns;
|
||||
extern int flag_schedule_insns_after_reload;
|
||||
|
||||
#ifdef HAIFA
|
||||
/* The following flags have effect only for scheduling before register
|
||||
allocation:
|
||||
|
||||
flag_schedule_interblock means schedule insns accross basic blocks.
|
||||
flag_schedule_speculative means allow speculative motion of non-load insns.
|
||||
flag_schedule_speculative_load means allow speculative motion of some
|
||||
load insns.
|
||||
flag_schedule_speculative_load_dangerous allows speculative motion of more
|
||||
load insns.
|
||||
flag_schedule_reverse_before_reload means try to reverse original order
|
||||
of insns (S).
|
||||
flag_schedule_reverse_after_reload means try to reverse original order
|
||||
of insns (R). */
|
||||
|
||||
extern int flag_schedule_interblock;
|
||||
extern int flag_schedule_speculative;
|
||||
extern int flag_schedule_speculative_load;
|
||||
extern int flag_schedule_speculative_load_dangerous;
|
||||
extern int flag_schedule_reverse_before_reload;
|
||||
extern int flag_schedule_reverse_after_reload;
|
||||
|
||||
/* flag_on_branch_count_reg means try to replace add-1,compare,branch tupple
|
||||
by a cheaper branch, on a count register. */
|
||||
extern int flag_branch_on_count_reg;
|
||||
#endif /* HAIFA */
|
||||
|
||||
|
||||
/* Nonzero means put things in delayed-branch slots if supported. */
|
||||
|
||||
extern int flag_delayed_branch;
|
||||
|
|
|
@ -268,7 +268,6 @@ static void find_auto_inc PROTO((regset, rtx, rtx));
|
|||
static void mark_used_regs PROTO((regset, regset, rtx, int, rtx));
|
||||
static int try_pre_increment_1 PROTO((rtx));
|
||||
static int try_pre_increment PROTO((rtx, rtx, HOST_WIDE_INT));
|
||||
static rtx find_use_as_address PROTO((rtx, rtx, HOST_WIDE_INT));
|
||||
void dump_flow_info PROTO((FILE *));
|
||||
|
||||
/* Find basic blocks of the current function and perform data flow analysis.
|
||||
|
@ -2795,7 +2794,7 @@ try_pre_increment (insn, reg, amount)
|
|||
If REG appears more than once, or is used other than in such an address,
|
||||
return (rtx)1. */
|
||||
|
||||
static rtx
|
||||
rtx
|
||||
find_use_as_address (x, reg, plusconst)
|
||||
register rtx x;
|
||||
rtx reg;
|
||||
|
|
|
@ -2003,6 +2003,9 @@ expand_units ()
|
|||
|
||||
for (op = unit->ops; op; op = op->next)
|
||||
{
|
||||
#ifdef HAIFA
|
||||
rtx blockage = op->issue_exp;
|
||||
#else
|
||||
rtx blockage = operate_exp (POS_MINUS_OP, readycost,
|
||||
make_numeric_value (1));
|
||||
|
||||
|
@ -2018,6 +2021,7 @@ expand_units ()
|
|||
blockage);
|
||||
|
||||
blockage = operate_exp (MAX_OP, blockage, op->issue_exp);
|
||||
#endif
|
||||
blockage = simplify_knowing (blockage, unit->condexp);
|
||||
|
||||
/* Add this op's contribution to MAX (BLOCKAGE (E,*)) and
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -147,7 +147,7 @@ in the following sections.
|
|||
-ffunction-sections -finline-functions
|
||||
-fkeep-inline-functions -fno-default-inline
|
||||
-fno-defer-pop -fno-function-cse
|
||||
-fno-inline -fno-peephole -fomit-frame-pointer
|
||||
-fno-inline -fno-peephole -fomit-frame-pointer -fregmove
|
||||
-frerun-cse-after-loop -fschedule-insns
|
||||
-fschedule-insns2 -fstrength-reduce -fthread-jumps
|
||||
-funroll-all-loops -funroll-loops
|
||||
|
@ -2195,6 +2195,12 @@ used in one place: in @file{reorg.c}, instead of guessing which path a
|
|||
branch is mostly to take, the @samp{REG_BR_PROB} values are used to
|
||||
exactly determine which path is taken more often.
|
||||
@end ifset
|
||||
|
||||
@item -fregmove
|
||||
Some machines only support 2 operands per instruction. On such
|
||||
machines, GNU CC might have to do extra copies. The @samp{-fregmove}
|
||||
option overrides the default for the machine to do the copy before
|
||||
register allocation.
|
||||
@end table
|
||||
|
||||
@node Preprocessor Options
|
||||
|
|
752
gcc/loop.c
752
gcc/loop.c
|
@ -81,6 +81,42 @@ static rtx *loop_number_loop_starts, *loop_number_loop_ends;
|
|||
|
||||
int *loop_outer_loop;
|
||||
|
||||
#ifdef HAIFA
|
||||
/* The main output of analyze_loop_iterations is placed here */
|
||||
|
||||
int *loop_can_insert_bct;
|
||||
|
||||
/* For each loop, determines whether some of its inner loops has used
|
||||
count register */
|
||||
|
||||
int *loop_used_count_register;
|
||||
|
||||
/* For each loop, remember its unrolling factor (if at all).
|
||||
contents of the array:
|
||||
0/1: not unrolled.
|
||||
-1: completely unrolled - no further instrumentation is needed.
|
||||
>1: holds the exact amount of unrolling. */
|
||||
|
||||
int *loop_unroll_factor;
|
||||
int *loop_unroll_iter;
|
||||
|
||||
/* loop parameters for arithmetic loops. These loops have a loop variable
|
||||
which is initialized to loop_start_value, incremented in each iteration
|
||||
by "loop_increment". At the end of the iteration the loop variable is
|
||||
compared to the loop_comparison_value (using loop_comparison_code). */
|
||||
|
||||
rtx *loop_increment;
|
||||
rtx *loop_comparison_value;
|
||||
rtx *loop_start_value;
|
||||
enum rtx_code *loop_comparison_code;
|
||||
|
||||
/* for debugging: selects sub-range of loops for which the bct optimization
|
||||
is invoked. The numbering is per compilation-unit. */
|
||||
int dbg_bct_min = -1;
|
||||
int dbg_bct_max = -1;
|
||||
#endif /* HAIFA */
|
||||
|
||||
|
||||
/* Indexed by loop number, contains a nonzero value if the "loop" isn't
|
||||
really a loop (an insn outside the loop branches into it). */
|
||||
|
||||
|
@ -286,6 +322,32 @@ static int maybe_eliminate_biv_1 ();
|
|||
static int last_use_this_basic_block ();
|
||||
static void record_initial ();
|
||||
static void update_reg_last_use ();
|
||||
|
||||
#ifdef HAIFA
|
||||
/* This is extern from unroll.c */
|
||||
void iteration_info ();
|
||||
|
||||
/* Two main functions for implementing bct:
|
||||
first - to be called before loop unrolling, and the second - after */
|
||||
static void analyze_loop_iterations ();
|
||||
static void insert_bct ();
|
||||
|
||||
/* Auxiliary function that inserts the bct pattern into the loop */
|
||||
static void instrument_loop_bct ();
|
||||
|
||||
/* Indirect_jump_in_function is computed once per function. */
|
||||
int indirect_jump_in_function = 0;
|
||||
static int indirect_jump_in_function_p ();
|
||||
|
||||
int loop_number ();
|
||||
static int is_power_of_2();
|
||||
static int is_conditional_branch ();
|
||||
|
||||
/* Debugging functions. */
|
||||
int fix_bct_param ();
|
||||
static int check_bct_param ();
|
||||
#endif /* HAIFA */
|
||||
|
||||
|
||||
/* Relative gain of eliminating various kinds of operations. */
|
||||
int add_cost;
|
||||
|
@ -379,6 +441,32 @@ loop_optimize (f, dumpfile)
|
|||
loop_number_exit_labels = (rtx *) alloca (max_loop_num * sizeof (rtx));
|
||||
loop_number_exit_count = (int *) alloca (max_loop_num * sizeof (int));
|
||||
|
||||
#ifdef HAIFA
|
||||
/* Allocate for BCT optimization */
|
||||
loop_can_insert_bct = (int *) alloca (max_loop_num * sizeof (int));
|
||||
bzero ((char *) loop_can_insert_bct, max_loop_num * sizeof (int));
|
||||
|
||||
loop_used_count_register = (int *) alloca (max_loop_num * sizeof (int));
|
||||
bzero ((char *) loop_used_count_register, max_loop_num * sizeof (int));
|
||||
|
||||
loop_unroll_factor = (int *) alloca (max_loop_num *sizeof (int));
|
||||
bzero ((char *) loop_unroll_factor, max_loop_num * sizeof (int));
|
||||
|
||||
loop_unroll_iter = (int *) alloca (max_loop_num *sizeof (int));
|
||||
bzero ((char *) loop_unroll_iter, max_loop_num * sizeof (int));
|
||||
|
||||
loop_increment = (rtx *) alloca (max_loop_num * sizeof (rtx));
|
||||
loop_comparison_value = (rtx *) alloca (max_loop_num * sizeof (rtx));
|
||||
loop_start_value = (rtx *) alloca (max_loop_num * sizeof (rtx));
|
||||
bzero ((char *) loop_increment, max_loop_num * sizeof (rtx));
|
||||
bzero ((char *) loop_comparison_value, max_loop_num * sizeof (rtx));
|
||||
bzero ((char *) loop_start_value, max_loop_num * sizeof (rtx));
|
||||
|
||||
loop_comparison_code
|
||||
= (enum rtx_code *) alloca (max_loop_num * sizeof (enum rtx_code));
|
||||
bzero ((char *) loop_comparison_code, max_loop_num * sizeof (enum rtx_code));
|
||||
#endif /* HAIFA */
|
||||
|
||||
/* Find and process each loop.
|
||||
First, find them, and record them in order of their beginnings. */
|
||||
find_and_verify_loops (f);
|
||||
|
@ -430,6 +518,12 @@ loop_optimize (f, dumpfile)
|
|||
if (flag_unroll_loops && write_symbols != NO_DEBUG)
|
||||
find_loop_tree_blocks ();
|
||||
|
||||
#ifdef HAIFA
|
||||
/* determine if the function has indirect jump. If it does,
|
||||
we cannot instrument loops in this function with bct */
|
||||
indirect_jump_in_function = indirect_jump_in_function_p (f);
|
||||
#endif /* HAIFA */
|
||||
|
||||
/* Now scan the loops, last ones first, since this means inner ones are done
|
||||
before outer ones. */
|
||||
for (i = max_loop_num-1; i >= 0; i--)
|
||||
|
@ -2639,6 +2733,11 @@ mark_loop_jump (x, loop_num)
|
|||
|
||||
if (loop_num != -1)
|
||||
{
|
||||
#ifdef HAIFA
|
||||
LABEL_OUTSIDE_LOOP_P (x) = 1;
|
||||
LABEL_NEXTREF (x) = loop_number_exit_labels[loop_num];
|
||||
#endif /* HAIFA */
|
||||
|
||||
loop_number_exit_labels[loop_num] = x;
|
||||
|
||||
for (outer_loop = loop_num; outer_loop != -1;
|
||||
|
@ -3755,6 +3854,16 @@ strength_reduce (scan_start, end, loop_top, insn_count,
|
|||
so that "decrement and branch until zero" insn can be used. */
|
||||
check_dbra_loop (loop_end, insn_count, loop_start);
|
||||
|
||||
#ifdef HAIFA
|
||||
/* record loop-variables relevant for BCT optimization before unrolling
|
||||
the loop. Unrolling may update part of this information, and the
|
||||
correct data will be used for generating the BCT. */
|
||||
#ifdef HAVE_decrement_and_branch_on_count
|
||||
if (HAVE_decrement_and_branch_on_count)
|
||||
analyze_loop_iterations (loop_start, loop_end);
|
||||
#endif
|
||||
#endif /* HAIFA */
|
||||
|
||||
/* Create reg_map to hold substitutions for replaceable giv regs. */
|
||||
reg_map = (rtx *) alloca (max_reg_before_loop * sizeof (rtx));
|
||||
bzero ((char *) reg_map, max_reg_before_loop * sizeof (rtx));
|
||||
|
@ -4247,6 +4356,14 @@ strength_reduce (scan_start, end, loop_top, insn_count,
|
|||
if (flag_unroll_loops)
|
||||
unroll_loop (loop_end, insn_count, loop_start, end_insert_before, 1);
|
||||
|
||||
#ifdef HAIFA
|
||||
/* instrument the loop with bct insn */
|
||||
#ifdef HAVE_decrement_and_branch_on_count
|
||||
if (HAVE_decrement_and_branch_on_count)
|
||||
insert_bct (loop_start, loop_end);
|
||||
#endif
|
||||
#endif /* HAIFA */
|
||||
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream, "\n");
|
||||
}
|
||||
|
@ -6932,3 +7049,638 @@ get_condition_for_loop (x)
|
|||
return gen_rtx (swap_condition (GET_CODE (comparison)), VOIDmode,
|
||||
XEXP (comparison, 1), XEXP (comparison, 0));
|
||||
}
|
||||
|
||||
#ifdef HAIFA
|
||||
/* Analyze a loop in order to instrument it with the use of count register.
|
||||
loop_start and loop_end are the first and last insns of the loop.
|
||||
This function works in cooperation with insert_bct ().
|
||||
loop_can_insert_bct[loop_num] is set according to whether the optimization
|
||||
is applicable to the loop. When it is applicable, the following variables
|
||||
are also set:
|
||||
loop_start_value[loop_num]
|
||||
loop_comparison_value[loop_num]
|
||||
loop_increment[loop_num]
|
||||
loop_comparison_code[loop_num] */
|
||||
|
||||
static
|
||||
void analyze_loop_iterations (loop_start, loop_end)
|
||||
rtx loop_start, loop_end;
|
||||
{
|
||||
rtx comparison, comparison_value;
|
||||
rtx iteration_var, initial_value, increment;
|
||||
enum rtx_code comparison_code;
|
||||
|
||||
rtx last_loop_insn;
|
||||
rtx insn;
|
||||
int i;
|
||||
|
||||
/* loop_variable mode */
|
||||
enum machine_mode original_mode;
|
||||
|
||||
/* find the number of the loop */
|
||||
int loop_num = loop_number (loop_start, loop_end);
|
||||
|
||||
/* we change our mind only when we are sure that loop will be instrumented */
|
||||
loop_can_insert_bct[loop_num] = 0;
|
||||
|
||||
/* debugging: do we wish to instrument this loop? */
|
||||
if ( !check_bct_param () )
|
||||
return;
|
||||
|
||||
/* is the optimization suppressed. */
|
||||
if ( !flag_branch_on_count_reg )
|
||||
return;
|
||||
|
||||
/* make sure that count-reg is not in use */
|
||||
if (loop_used_count_register[loop_num]){
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"analyze_loop_iterations %d: BCT instrumentation failed: count register already in use\n",
|
||||
loop_num);
|
||||
return;
|
||||
}
|
||||
|
||||
/* make sure that the function has no indirect jumps. */
|
||||
if (indirect_jump_in_function){
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"analyze_loop_iterations %d: BCT instrumentation failed: indirect jump in function\n",
|
||||
loop_num);
|
||||
return;
|
||||
}
|
||||
|
||||
/* make sure that the last loop insn is a conditional jump */
|
||||
last_loop_insn = PREV_INSN (loop_end);
|
||||
if (!is_conditional_branch (last_loop_insn)) {
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"analyze_loop_iterations %d: BCT instrumentation failed: invalid jump at loop end\n",
|
||||
loop_num);
|
||||
return;
|
||||
}
|
||||
|
||||
/* First find the iteration variable. If the last insn is a conditional
|
||||
branch, and the insn preceding it tests a register value, make that
|
||||
register the iteration variable. */
|
||||
|
||||
/* We used to use prev_nonnote_insn here, but that fails because it might
|
||||
accidentally get the branch for a contained loop if the branch for this
|
||||
loop was deleted. We can only trust branches immediately before the
|
||||
loop_end. */
|
||||
|
||||
comparison = get_condition_for_loop (last_loop_insn);
|
||||
/* ??? Get_condition may switch position of induction variable and
|
||||
invariant register when it canonicalizes the comparison. */
|
||||
|
||||
if (comparison == 0) {
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"analyze_loop_iterations %d: BCT instrumentation failed: comparison not found\n",
|
||||
loop_num);
|
||||
return;
|
||||
}
|
||||
|
||||
comparison_code = GET_CODE (comparison);
|
||||
iteration_var = XEXP (comparison, 0);
|
||||
comparison_value = XEXP (comparison, 1);
|
||||
|
||||
original_mode = GET_MODE (iteration_var);
|
||||
if (GET_MODE_CLASS (original_mode) != MODE_INT
|
||||
|| GET_MODE_SIZE (original_mode) != UNITS_PER_WORD) {
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"analyze_loop_iterations %d: BCT Instrumentation failed: loop variable not integer\n",
|
||||
loop_num);
|
||||
return;
|
||||
}
|
||||
|
||||
/* get info about loop bounds and increment */
|
||||
iteration_info (iteration_var, &initial_value, &increment,
|
||||
loop_start, loop_end);
|
||||
|
||||
/* make sure that all required loop data were found */
|
||||
if (!(initial_value && increment && comparison_value
|
||||
&& invariant_p (comparison_value) && invariant_p (increment)
|
||||
&& ! indirect_jump_in_function))
|
||||
{
|
||||
if (loop_dump_stream) {
|
||||
fprintf (loop_dump_stream,
|
||||
"analyze_loop_iterations %d: BCT instrumentation failed because of wrong loop: ", loop_num);
|
||||
if (!(initial_value && increment && comparison_value)) {
|
||||
fprintf (loop_dump_stream, "\tbounds not available: ");
|
||||
if ( ! initial_value )
|
||||
fprintf (loop_dump_stream, "initial ");
|
||||
if ( ! increment )
|
||||
fprintf (loop_dump_stream, "increment ");
|
||||
if ( ! comparison_value )
|
||||
fprintf (loop_dump_stream, "comparison ");
|
||||
fprintf (loop_dump_stream, "\n");
|
||||
}
|
||||
if (!invariant_p (comparison_value) || !invariant_p (increment))
|
||||
fprintf (loop_dump_stream, "\tloop bounds not invariant\n");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* make sure that the increment is constant */
|
||||
if (GET_CODE (increment) != CONST_INT) {
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"analyze_loop_iterations %d: instrumentation failed: not arithmetic loop\n",
|
||||
loop_num);
|
||||
return;
|
||||
}
|
||||
|
||||
/* make sure that the loop contains neither function call, nor jump on table.
|
||||
(the count register might be altered by the called function, and might
|
||||
be used for a branch on table). */
|
||||
for (insn = loop_start; insn && insn != loop_end; insn = NEXT_INSN (insn)) {
|
||||
if (GET_CODE (insn) == CALL_INSN){
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"analyze_loop_iterations %d: BCT instrumentation failed: function call in the loop\n",
|
||||
loop_num);
|
||||
return;
|
||||
}
|
||||
|
||||
if (GET_CODE (insn) == JUMP_INSN
|
||||
&& (GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
|
||||
|| GET_CODE (PATTERN (insn)) == ADDR_VEC)){
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"analyze_loop_iterations %d: BCT instrumentation failed: computed branch in the loop\n",
|
||||
loop_num);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* At this point, we are sure that the loop can be instrumented with BCT.
|
||||
Some of the loops, however, will not be instrumented - the final decision
|
||||
is taken by insert_bct () */
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"analyze_loop_iterations: loop (luid =%d) can be BCT instrumented.\n",
|
||||
loop_num);
|
||||
|
||||
/* mark all enclosing loops that they cannot use count register */
|
||||
/* ???: In fact, since insert_bct may decide not to instrument this loop,
|
||||
marking here may prevent instrumenting an enclosing loop that could
|
||||
actually be instrumented. But since this is rare, it is safer to mark
|
||||
here in case the order of calling (analyze/insert)_bct would be changed. */
|
||||
for (i=loop_num; i != -1; i = loop_outer_loop[i])
|
||||
loop_used_count_register[i] = 1;
|
||||
|
||||
/* Set data structures which will be used by the instrumentation phase */
|
||||
loop_start_value[loop_num] = initial_value;
|
||||
loop_comparison_value[loop_num] = comparison_value;
|
||||
loop_increment[loop_num] = increment;
|
||||
loop_comparison_code[loop_num] = comparison_code;
|
||||
loop_can_insert_bct[loop_num] = 1;
|
||||
}
|
||||
|
||||
|
||||
/* instrument loop for insertion of bct instruction. We distinguish between
|
||||
loops with compile-time bounds, to those with run-time bounds. The loop
|
||||
behaviour is analized according to the following characteristics/variables:
|
||||
; Input variables:
|
||||
; comparison-value: the value to which the iteration counter is compared.
|
||||
; initial-value: iteration-counter initial value.
|
||||
; increment: iteration-counter increment.
|
||||
; Computed variables:
|
||||
; increment-direction: the sign of the increment.
|
||||
; compare-direction: '1' for GT, GTE, '-1' for LT, LTE, '0' for NE.
|
||||
; range-direction: sign (comparison-value - initial-value)
|
||||
We give up on the following cases:
|
||||
; loop variable overflow.
|
||||
; run-time loop bounds with comparison code NE.
|
||||
*/
|
||||
|
||||
static void
|
||||
insert_bct (loop_start, loop_end)
|
||||
rtx loop_start, loop_end;
|
||||
{
|
||||
rtx initial_value, comparison_value, increment;
|
||||
enum rtx_code comparison_code;
|
||||
|
||||
int increment_direction, compare_direction;
|
||||
int unsigned_p = 0;
|
||||
|
||||
/* if the loop condition is <= or >=, the number of iteration
|
||||
is 1 more than the range of the bounds of the loop */
|
||||
int add_iteration = 0;
|
||||
|
||||
/* the only machine mode we work with - is the integer of the size that the
|
||||
machine has */
|
||||
enum machine_mode loop_var_mode = SImode;
|
||||
|
||||
int loop_num = loop_number (loop_start, loop_end);
|
||||
|
||||
/* get loop-variables. No need to check that these are valid - already
|
||||
checked in analyze_loop_iterations (). */
|
||||
comparison_code = loop_comparison_code[loop_num];
|
||||
initial_value = loop_start_value[loop_num];
|
||||
comparison_value = loop_comparison_value[loop_num];
|
||||
increment = loop_increment[loop_num];
|
||||
|
||||
/* check analyze_loop_iterations decision for this loop. */
|
||||
if (! loop_can_insert_bct[loop_num]){
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"insert_bct: [%d] - was decided not to instrument by analyze_loop_iterations ()\n",
|
||||
loop_num);
|
||||
return;
|
||||
}
|
||||
|
||||
/* make sure that the loop was not fully unrolled. */
|
||||
if (loop_unroll_factor[loop_num] == -1){
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream, "insert_bct %d: was completely unrolled\n", loop_num);
|
||||
return;
|
||||
}
|
||||
|
||||
/* make sure that the last loop insn is a conditional jump .
|
||||
This check is repeated from analyze_loop_iterations (),
|
||||
because unrolling might have changed that. */
|
||||
if (!is_conditional_branch (PREV_INSN (loop_end))){
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"insert_bct: not instrumenting BCT because of invalid branch\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* fix increment in case loop was unrolled. */
|
||||
if (loop_unroll_factor[loop_num] > 1)
|
||||
increment = GEN_INT ( INTVAL (increment) * loop_unroll_factor[loop_num] );
|
||||
|
||||
/* determine properties and directions of the loop */
|
||||
increment_direction = (INTVAL (increment) > 0) ? 1:-1;
|
||||
switch ( comparison_code ) {
|
||||
case LEU:
|
||||
unsigned_p = 1;
|
||||
/* fallthrough */
|
||||
case LE:
|
||||
compare_direction = 1;
|
||||
add_iteration = 1;
|
||||
break;
|
||||
case GEU:
|
||||
unsigned_p = 1;
|
||||
/* fallthrough */
|
||||
case GE:
|
||||
compare_direction = -1;
|
||||
add_iteration = 1;
|
||||
break;
|
||||
case EQ:
|
||||
/* in this case we cannot know the number of iterations */
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"insert_bct: %d: loop cannot be instrumented: == in condition\n",
|
||||
loop_num);
|
||||
return;
|
||||
case LTU:
|
||||
unsigned_p = 1;
|
||||
/* fallthrough */
|
||||
case LT:
|
||||
compare_direction = 1;
|
||||
break;
|
||||
case GTU:
|
||||
unsigned_p = 1;
|
||||
/* fallthrough */
|
||||
case GT:
|
||||
compare_direction = -1;
|
||||
break;
|
||||
case NE:
|
||||
compare_direction = 0;
|
||||
break;
|
||||
default:
|
||||
abort ();
|
||||
}
|
||||
|
||||
|
||||
/* make sure that the loop does not end by an overflow */
|
||||
if (compare_direction != increment_direction) {
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"insert_bct: %d: loop cannot be instrumented: terminated by overflow\n",
|
||||
loop_num);
|
||||
return;
|
||||
}
|
||||
|
||||
/* try to instrument the loop. */
|
||||
|
||||
/* Handle the simpler case, where the bounds are known at compile time. */
|
||||
if (GET_CODE (initial_value) == CONST_INT && GET_CODE (comparison_value) == CONST_INT)
|
||||
{
|
||||
int n_iterations;
|
||||
int increment_value_abs = INTVAL (increment) * increment_direction;
|
||||
|
||||
/* check the relation between compare-val and initial-val */
|
||||
int difference = INTVAL (comparison_value) - INTVAL (initial_value);
|
||||
int range_direction = (difference > 0) ? 1 : -1;
|
||||
|
||||
/* make sure the loop executes enough iterations to gain from BCT */
|
||||
if (difference > -3 && difference < 3) {
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"insert_bct: loop %d not BCT instrumented: too small iteration count.\n",
|
||||
loop_num);
|
||||
return;
|
||||
}
|
||||
|
||||
/* make sure that the loop executes at least once */
|
||||
if ((range_direction == 1 && compare_direction == -1)
|
||||
|| (range_direction == -1 && compare_direction == 1))
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"insert_bct: loop %d: does not iterate even once. Not instrumenting.\n",
|
||||
loop_num);
|
||||
return;
|
||||
}
|
||||
|
||||
/* make sure that the loop does not end by an overflow (in compile time
|
||||
bounds we must have an additional check for overflow, because here
|
||||
we also support the compare code of 'NE'. */
|
||||
if (comparison_code == NE
|
||||
&& increment_direction != range_direction) {
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"insert_bct (compile time bounds): %d: loop not instrumented: terminated by overflow\n",
|
||||
loop_num);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Determine the number of iterations by:
|
||||
;
|
||||
; compare-val - initial-val + (increment -1) + additional-iteration
|
||||
; num_iterations = -----------------------------------------------------------------
|
||||
; increment
|
||||
*/
|
||||
difference = (range_direction > 0) ? difference : -difference;
|
||||
#if 0
|
||||
fprintf (stderr, "difference is: %d\n", difference); /* @*/
|
||||
fprintf (stderr, "increment_value_abs is: %d\n", increment_value_abs); /* @*/
|
||||
fprintf (stderr, "add_iteration is: %d\n", add_iteration); /* @*/
|
||||
fprintf (stderr, "INTVAL (comparison_value) is: %d\n", INTVAL (comparison_value)); /* @*/
|
||||
fprintf (stderr, "INTVAL (initial_value) is: %d\n", INTVAL (initial_value)); /* @*/
|
||||
#endif
|
||||
|
||||
if (increment_value_abs == 0) {
|
||||
fprintf (stderr, "insert_bct: error: increment == 0 !!!\n");
|
||||
abort ();
|
||||
}
|
||||
n_iterations = (difference + increment_value_abs - 1 + add_iteration)
|
||||
/ increment_value_abs;
|
||||
|
||||
#if 0
|
||||
fprintf (stderr, "number of iterations is: %d\n", n_iterations); /* @*/
|
||||
#endif
|
||||
instrument_loop_bct (loop_start, loop_end, GEN_INT (n_iterations));
|
||||
|
||||
/* Done with this loop. */
|
||||
return;
|
||||
}
|
||||
|
||||
/* Handle the more complex case, that the bounds are NOT known at compile time. */
|
||||
/* In this case we generate run_time calculation of the number of iterations */
|
||||
|
||||
/* With runtime bounds, if the compare is of the form '!=' we give up */
|
||||
if (comparison_code == NE) {
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"insert_bct: fail for loop %d: runtime bounds with != comparison\n",
|
||||
loop_num);
|
||||
return;
|
||||
}
|
||||
|
||||
else {
|
||||
/* We rely on the existence of run-time guard to ensure that the
|
||||
loop executes at least once. */
|
||||
rtx sequence;
|
||||
rtx iterations_num_reg;
|
||||
|
||||
int increment_value_abs = INTVAL (increment) * increment_direction;
|
||||
|
||||
/* make sure that the increment is a power of two, otherwise (an
|
||||
expensive) divide is needed. */
|
||||
if ( !is_power_of_2(increment_value_abs) )
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"insert_bct: not instrumenting BCT because the increment is not power of 2\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* compute the number of iterations */
|
||||
start_sequence ();
|
||||
{
|
||||
/* CYGNUS LOCAL: HAIFA bug fix */
|
||||
rtx temp_reg;
|
||||
|
||||
/* Again, the number of iterations is calculated by:
|
||||
;
|
||||
; compare-val - initial-val + (increment -1) + additional-iteration
|
||||
; num_iterations = -----------------------------------------------------------------
|
||||
; increment
|
||||
*/
|
||||
/* ??? Do we have to call copy_rtx here before passing rtx to
|
||||
expand_binop? */
|
||||
if (compare_direction > 0) {
|
||||
/* <, <= :the loop variable is increasing */
|
||||
temp_reg = expand_binop (loop_var_mode, sub_optab, comparison_value,
|
||||
initial_value, NULL_RTX, 0, OPTAB_LIB_WIDEN);
|
||||
}
|
||||
else {
|
||||
temp_reg = expand_binop (loop_var_mode, sub_optab, initial_value,
|
||||
comparison_value, NULL_RTX, 0, OPTAB_LIB_WIDEN);
|
||||
}
|
||||
|
||||
if (increment_value_abs - 1 + add_iteration != 0)
|
||||
temp_reg = expand_binop (loop_var_mode, add_optab, temp_reg,
|
||||
GEN_INT (increment_value_abs - 1 + add_iteration),
|
||||
NULL_RTX, 0, OPTAB_LIB_WIDEN);
|
||||
|
||||
if (increment_value_abs != 1)
|
||||
{
|
||||
/* ??? This will generate an expensive divide instruction for
|
||||
most targets. The original authors apparently expected this
|
||||
to be a shift, since they test for power-of-2 divisors above,
|
||||
but just naively generating a divide instruction will not give
|
||||
a shift. It happens to work for the PowerPC target because
|
||||
the rs6000.md file has a divide pattern that emits shifts.
|
||||
It will probably not work for any other target. */
|
||||
iterations_num_reg = expand_binop (loop_var_mode, sdiv_optab,
|
||||
temp_reg,
|
||||
GEN_INT (increment_value_abs),
|
||||
NULL_RTX, 0, OPTAB_LIB_WIDEN);
|
||||
}
|
||||
else
|
||||
iterations_num_reg = temp_reg;
|
||||
/* END CYGNUS LOCAL: HAIFA bug fix */
|
||||
}
|
||||
sequence = gen_sequence ();
|
||||
end_sequence ();
|
||||
emit_insn_before (sequence, loop_start);
|
||||
instrument_loop_bct (loop_start, loop_end, iterations_num_reg);
|
||||
}
|
||||
}
|
||||
|
||||
/* instrument loop by inserting a bct in it. This is done in the following way:
|
||||
1. A new register is created and assigned the hard register number of the count
|
||||
register.
|
||||
2. In the head of the loop the new variable is initialized by the value passed in the
|
||||
loop_num_iterations parameter.
|
||||
3. At the end of the loop, comparison of the register with 0 is generated.
|
||||
The created comparison follows the pattern defined for the
|
||||
decrement_and_branch_on_count insn, so this insn will be generated in assembly
|
||||
generation phase.
|
||||
4. The compare&branch on the old variable is deleted. So, if the loop-variable was
|
||||
not used elsewhere, it will be eliminated by data-flow analisys. */
|
||||
|
||||
static void
|
||||
instrument_loop_bct (loop_start, loop_end, loop_num_iterations)
|
||||
rtx loop_start, loop_end;
|
||||
rtx loop_num_iterations;
|
||||
{
|
||||
rtx temp_reg1, temp_reg2;
|
||||
rtx start_label;
|
||||
|
||||
rtx sequence;
|
||||
enum machine_mode loop_var_mode = SImode;
|
||||
|
||||
#ifdef HAVE_decrement_and_branch_on_count
|
||||
if (HAVE_decrement_and_branch_on_count)
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream, "Loop: Inserting BCT\n");
|
||||
|
||||
/* eliminate the check on the old variable */
|
||||
delete_insn (PREV_INSN (loop_end));
|
||||
delete_insn (PREV_INSN (loop_end));
|
||||
|
||||
/* insert the label which will delimit the start of the loop */
|
||||
start_label = gen_label_rtx ();
|
||||
emit_label_after (start_label, loop_start);
|
||||
|
||||
/* insert initialization of the count register into the loop header */
|
||||
start_sequence ();
|
||||
temp_reg1 = gen_reg_rtx (loop_var_mode);
|
||||
emit_insn (gen_move_insn (temp_reg1, loop_num_iterations));
|
||||
|
||||
/* this will be count register */
|
||||
temp_reg2 = gen_rtx (REG, loop_var_mode, COUNT_REGISTER_REGNUM);
|
||||
/* we have to move the value to the count register from an GPR
|
||||
because rtx pointed to by loop_num_iterations could contain
|
||||
expression which cannot be moved into count register */
|
||||
emit_insn (gen_move_insn (temp_reg2, temp_reg1));
|
||||
|
||||
sequence = gen_sequence ();
|
||||
end_sequence ();
|
||||
emit_insn_after (sequence, loop_start);
|
||||
|
||||
/* insert new comparison on the count register instead of the
|
||||
old one, generating the needed BCT pattern (that will be
|
||||
later recognized by assembly generation phase). */
|
||||
emit_jump_insn_before (gen_decrement_and_branch_on_count (temp_reg2, start_label),
|
||||
loop_end);
|
||||
LABEL_NUSES (start_label)++;
|
||||
}
|
||||
|
||||
#endif /* HAVE_decrement_and_branch_on_count */
|
||||
}
|
||||
|
||||
/* calculate the uid of the given loop */
|
||||
int
|
||||
loop_number (loop_start, loop_end)
|
||||
rtx loop_start, loop_end;
|
||||
{
|
||||
int loop_num = -1;
|
||||
|
||||
/* assume that this insn contains the LOOP_START
|
||||
note, so it will not be changed by the loop unrolling */
|
||||
loop_num = uid_loop_num[INSN_UID (loop_start)];
|
||||
/* sanity check - should never happen */
|
||||
if (loop_num == -1)
|
||||
abort ();
|
||||
|
||||
return loop_num;
|
||||
}
|
||||
|
||||
/* scan the function and determine whether it has indirect (computed) jump */
|
||||
static int
|
||||
indirect_jump_in_function_p (start)
|
||||
rtx start;
|
||||
{
|
||||
rtx insn;
|
||||
int is_indirect_jump = 0;
|
||||
|
||||
for (insn = start; insn; insn = NEXT_INSN (insn)) {
|
||||
if (GET_CODE (insn) == JUMP_INSN) {
|
||||
if (GET_CODE (PATTERN (insn)) == SET) {
|
||||
rtx insn_work_code = XEXP (PATTERN (insn), 1);
|
||||
|
||||
if (GET_CODE (insn_work_code) == LABEL_REF)
|
||||
continue;
|
||||
if (GET_CODE (insn_work_code) == IF_THEN_ELSE) {
|
||||
rtx jump_target = XEXP (insn_work_code, 1);
|
||||
|
||||
if (jump_target == pc_rtx
|
||||
|| (GET_CODE (jump_target) == (enum rtx_code)LABEL_REF))
|
||||
continue;
|
||||
}
|
||||
}
|
||||
is_indirect_jump = 1;
|
||||
}
|
||||
}
|
||||
return is_indirect_jump;
|
||||
}
|
||||
|
||||
/* return 1 iff n is a power of 2 */
|
||||
static int
|
||||
is_power_of_2(n)
|
||||
int n;
|
||||
{
|
||||
return (n & (n-1)) == 0;
|
||||
}
|
||||
|
||||
/* return 1 iff insn is a conditional jump */
|
||||
is_conditional_branch (insn)
|
||||
rtx insn;
|
||||
{
|
||||
rtx work_code;
|
||||
if (GET_CODE (insn) != JUMP_INSN)
|
||||
return 0;
|
||||
work_code = PATTERN (insn);
|
||||
if (GET_CODE (work_code) != SET)
|
||||
return 0;
|
||||
if (GET_CODE (XEXP (work_code, 1)) != IF_THEN_ELSE)
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* debugging: fix_bct_param () is called from toplev.c upon detection
|
||||
of the -fbct-***-N options. */
|
||||
int
|
||||
fix_bct_param (param, val)
|
||||
char *param, *val;
|
||||
{
|
||||
if ( !strcmp (param, "max") )
|
||||
dbg_bct_max = atoi (val);
|
||||
else if ( !strcmp (param, "min") )
|
||||
dbg_bct_min = atoi (val);
|
||||
}
|
||||
|
||||
/* debugging: return 1 if the loop should be instrumented,
|
||||
according to bct-min/max. */
|
||||
static int
|
||||
check_bct_param ()
|
||||
{
|
||||
static int dbg_bct_num = 0;
|
||||
|
||||
dbg_bct_num++;
|
||||
if (dbg_bct_num > dbg_bct_min || dbg_bct_min == -1)
|
||||
if (dbg_bct_num <= dbg_bct_max || dbg_bct_max == -1)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
#endif /* HAIFA */
|
||||
/* END CYGNUS LOCAL haifa */
|
||||
|
|
10
gcc/loop.h
10
gcc/loop.h
|
@ -181,3 +181,13 @@ rtx final_biv_value PROTO((struct iv_class *, rtx, rtx));
|
|||
rtx final_giv_value PROTO((struct induction *, rtx, rtx));
|
||||
void emit_unrolled_add PROTO((rtx, rtx, rtx));
|
||||
int back_branch_in_range_p PROTO((rtx, rtx, rtx));
|
||||
|
||||
#ifdef HAIFA
|
||||
/* variables for interaction between unroll.c and loop.c, for
|
||||
the insertion of branch-on-count instruction. */
|
||||
extern int *loop_unroll_factor;
|
||||
extern rtx *loop_start_value;
|
||||
extern int *loop_unroll_iter;
|
||||
extern int loop_number();
|
||||
#endif /* HAIFA */
|
||||
|
||||
|
|
|
@ -0,0 +1,983 @@
|
|||
/* Move registers around to reduce number of move instructions needed.
|
||||
Copyright (C) 1987, 88, 89, 92-5, 1996, 1997 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU CC.
|
||||
|
||||
GNU CC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
GNU CC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GNU CC; see the file COPYING. If not, write to
|
||||
the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||
|
||||
|
||||
/* This module looks for cases where matching constraints would force
|
||||
an instruction to need a reload, and this reload would be a register
|
||||
to register move. It then attempts to change the registers used by the
|
||||
instruction to avoid the move instruction. */
|
||||
|
||||
#include "config.h"
|
||||
#ifdef __STDC__
|
||||
#include <stdarg.h>
|
||||
#else
|
||||
#include <varargs.h>
|
||||
#endif
|
||||
|
||||
/* Must precede rtl.h for FFS. */
|
||||
#include <stdio.h>
|
||||
|
||||
#include "rtl.h"
|
||||
#include "insn-config.h"
|
||||
#include "recog.h"
|
||||
#include "output.h"
|
||||
#include "reload.h"
|
||||
#include "regs.h"
|
||||
|
||||
static int stable_but_for_p PROTO((rtx, rtx, rtx));
|
||||
|
||||
#if defined (HAVE_POST_INCREMENT) || defined (HAVE_POST_DECREMENT) \
|
||||
|| defined (HAVE_PRE_INCREMENT) || defined (HAVE_PRE_DECREMENT)
|
||||
|
||||
/* INC_INSN is an instruction that adds INCREMENT to REG.
|
||||
Try to fold INC_INSN as a post/pre in/decrement into INSN.
|
||||
Iff INC_INSN_SET is nonzero, inc_insn has a destination different from src.
|
||||
Return nonzero for success. */
|
||||
static int
|
||||
try_auto_increment (insn, inc_insn, inc_insn_set, reg, increment, pre)
|
||||
rtx reg, insn, inc_insn ,inc_insn_set;
|
||||
HOST_WIDE_INT increment;
|
||||
int pre;
|
||||
{
|
||||
enum rtx_code inc_code;
|
||||
|
||||
rtx pset = single_set (insn);
|
||||
if (pset)
|
||||
{
|
||||
/* Can't use the size of SET_SRC, we might have something like
|
||||
(sign_extend:SI (mem:QI ... */
|
||||
rtx use = find_use_as_address (pset, reg, 0);
|
||||
if (use != 0 && use != (rtx) 1)
|
||||
{
|
||||
int size = GET_MODE_SIZE (GET_MODE (use));
|
||||
if (0
|
||||
#ifdef HAVE_POST_INCREMENT
|
||||
|| (pre == 0 && (inc_code = POST_INC, increment == size))
|
||||
#endif
|
||||
#ifdef HAVE_PRE_INCREMENT
|
||||
|| (pre == 1 && (inc_code = PRE_INC, increment == size))
|
||||
#endif
|
||||
#ifdef HAVE_POST_DECREMENT
|
||||
|| (pre == 0 && (inc_code = POST_DEC, increment == -size))
|
||||
#endif
|
||||
#ifdef HAVE_PRE_DECREMENT
|
||||
|| (pre == 1 && (inc_code = PRE_DEC, increment == -size))
|
||||
#endif
|
||||
)
|
||||
{
|
||||
if (inc_insn_set)
|
||||
validate_change
|
||||
(inc_insn,
|
||||
&SET_SRC (inc_insn_set),
|
||||
XEXP (SET_SRC (inc_insn_set), 0), 1);
|
||||
validate_change (insn, &XEXP (use, 0),
|
||||
gen_rtx (inc_code,
|
||||
Pmode,
|
||||
reg), 1);
|
||||
if (apply_change_group ())
|
||||
{
|
||||
REG_NOTES (insn)
|
||||
= gen_rtx (EXPR_LIST, REG_INC,
|
||||
reg, REG_NOTES (insn));
|
||||
if (! inc_insn_set)
|
||||
{
|
||||
PUT_CODE (inc_insn, NOTE);
|
||||
NOTE_LINE_NUMBER (inc_insn) = NOTE_INSN_DELETED;
|
||||
NOTE_SOURCE_FILE (inc_insn) = 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif /* defined (HAVE_POST_INCREMENT) || defined (HAVE_POST_DECREMENT) */
|
||||
|
||||
void
|
||||
regmove_optimize (f, nregs, regmove_dump_file)
|
||||
rtx f;
|
||||
int nregs;
|
||||
FILE *regmove_dump_file;
|
||||
{
|
||||
#ifdef REGISTER_CONSTRAINTS
|
||||
rtx insn;
|
||||
int matches[MAX_RECOG_OPERANDS][MAX_RECOG_OPERANDS];
|
||||
int modified[MAX_RECOG_OPERANDS];
|
||||
int early_clobber[MAX_RECOG_OPERANDS];
|
||||
int commutative;
|
||||
int pass;
|
||||
|
||||
/* A forward/backward pass. Replace output operands with input operands. */
|
||||
|
||||
for (pass = 0; pass < 2; pass++)
|
||||
{
|
||||
if (regmove_dump_file)
|
||||
fprintf (regmove_dump_file, "Starting %s pass...\n",
|
||||
pass ? "backward" : "forward");
|
||||
|
||||
for (insn = pass ? get_last_insn () : f; insn;
|
||||
insn = pass ? PREV_INSN (insn) : NEXT_INSN (insn))
|
||||
{
|
||||
if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
|
||||
{
|
||||
int insn_code_number = recog_memoized (insn);
|
||||
int operand_number, match_number;
|
||||
|
||||
if (insn_code_number < 0)
|
||||
continue;
|
||||
|
||||
insn_extract (insn);
|
||||
if (! constrain_operands (insn_code_number, 0))
|
||||
continue;
|
||||
|
||||
commutative = -1;
|
||||
|
||||
/* Must initialize this before the loop, because the code for
|
||||
the commutative case may set matches for operands other than
|
||||
the current one. */
|
||||
bzero (matches, sizeof (matches));
|
||||
|
||||
for (operand_number = 0;
|
||||
operand_number < insn_n_operands[insn_code_number];
|
||||
operand_number++)
|
||||
{
|
||||
int output_operand = 0;
|
||||
int matching_operand = operand_number;
|
||||
char *p, c;
|
||||
int i = 0;
|
||||
|
||||
modified[operand_number] = 0;
|
||||
early_clobber[operand_number] = 0;
|
||||
|
||||
p = insn_operand_constraint[insn_code_number][operand_number];
|
||||
|
||||
if (*p == '=')
|
||||
modified[operand_number] = 2;
|
||||
else if (*p == '+')
|
||||
modified[operand_number] = 1;
|
||||
|
||||
for (;*p && i < which_alternative; p++)
|
||||
if (*p == ',')
|
||||
i++;
|
||||
|
||||
while ((c = *p++) != '\0' && c != ',')
|
||||
switch (c)
|
||||
{
|
||||
case '=':
|
||||
break;
|
||||
case '+':
|
||||
break;
|
||||
case '&':
|
||||
early_clobber[operand_number] = 1;
|
||||
break;
|
||||
case '%':
|
||||
commutative = operand_number;
|
||||
break;
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
c -= '0';
|
||||
matches[operand_number][c] = 1;
|
||||
if (commutative >= 0)
|
||||
{
|
||||
if (c == commutative || c == commutative + 1)
|
||||
{
|
||||
int other = c + (c == commutative ? 1 : -1);
|
||||
matches[operand_number][other] = 1;
|
||||
}
|
||||
if (operand_number == commutative
|
||||
|| operand_number == commutative + 1)
|
||||
{
|
||||
int other = (operand_number
|
||||
+ (operand_number == commutative
|
||||
? 1 : -1));
|
||||
matches[other][c] = 1;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Now scan through the operands looking for a source operand
|
||||
which is supposed to match the destination operand.
|
||||
Then scan forward for an instruction which uses the dest
|
||||
operand.
|
||||
If it dies there, then replace the dest in both operands with
|
||||
the source operand. */
|
||||
|
||||
for (operand_number = 0;
|
||||
operand_number < insn_n_operands[insn_code_number];
|
||||
operand_number++)
|
||||
{
|
||||
for (match_number = 0;
|
||||
match_number < insn_n_operands[insn_code_number];
|
||||
match_number++)
|
||||
{
|
||||
rtx set, p, src, dst, src_subreg;
|
||||
rtx post_inc = 0, post_inc_set = 0, search_end = 0;
|
||||
rtx src_note, dst_note;
|
||||
int success = 0;
|
||||
int num_calls = 0;
|
||||
enum rtx_code code = NOTE;
|
||||
HOST_WIDE_INT insn_const, newconst;
|
||||
rtx overlap = 0; /* need to move insn ? */
|
||||
|
||||
/* Nothing to do if the two operands aren't supposed to
|
||||
match. */
|
||||
if (matches[operand_number][match_number] == 0)
|
||||
continue;
|
||||
|
||||
src = recog_operand[operand_number];
|
||||
dst = recog_operand[match_number];
|
||||
|
||||
if (GET_CODE (src) != REG
|
||||
|| REGNO (src) < FIRST_PSEUDO_REGISTER)
|
||||
continue;
|
||||
|
||||
src_subreg = src;
|
||||
if (GET_CODE (dst) == SUBREG
|
||||
&& GET_MODE_SIZE (GET_MODE (dst))
|
||||
>= GET_MODE_SIZE (GET_MODE (SUBREG_REG (dst))))
|
||||
{
|
||||
src_subreg
|
||||
= gen_rtx(SUBREG, GET_MODE (SUBREG_REG (dst)),
|
||||
src, SUBREG_WORD (dst));
|
||||
dst = SUBREG_REG (dst);
|
||||
}
|
||||
if (GET_CODE (dst) != REG
|
||||
|| REGNO (dst) < FIRST_PSEUDO_REGISTER)
|
||||
continue;
|
||||
|
||||
/* If the operands already match, then there is nothing
|
||||
to do. */
|
||||
if (operands_match_p (src, dst))
|
||||
continue;
|
||||
|
||||
set = single_set (insn);
|
||||
if (! set)
|
||||
continue;
|
||||
|
||||
/* operand_number/src must be a read-only operand, and
|
||||
match_operand/dst must be a write-only operand. */
|
||||
if (modified[match_number] != 2)
|
||||
continue;
|
||||
|
||||
if (early_clobber[match_number] == 1)
|
||||
continue;
|
||||
|
||||
if (modified[operand_number] != 0)
|
||||
continue;
|
||||
|
||||
/* Make sure match_operand is the destination. */
|
||||
if (recog_operand[match_number] != SET_DEST (set))
|
||||
continue;
|
||||
|
||||
src_note = find_reg_note (insn, REG_DEAD, src);
|
||||
|
||||
if (! src_note)
|
||||
{
|
||||
/* Look for (set (regX) (op regA constX))
|
||||
(set (regY) (op regA constY))
|
||||
and change that to
|
||||
(set (regA) (op regA constX)).
|
||||
(set (regY) (op regA constY-constX)).
|
||||
This works for add and shift operations, if
|
||||
regA is dead after or set by the second insn. */
|
||||
|
||||
code = GET_CODE (SET_SRC (set));
|
||||
if ((code == PLUS || code == LSHIFTRT
|
||||
|| code == ASHIFT || code == ASHIFTRT)
|
||||
&& XEXP (SET_SRC (set), 0) == src
|
||||
&& (GET_CODE (XEXP (SET_SRC (set), 1))
|
||||
== CONST_INT))
|
||||
insn_const = INTVAL (XEXP (SET_SRC (set), 1));
|
||||
else if (! stable_but_for_p (SET_SRC (set), src, dst))
|
||||
continue;
|
||||
else
|
||||
/* We might find a src_note while scanning. */
|
||||
code = NOTE;
|
||||
}
|
||||
|
||||
if (regmove_dump_file)
|
||||
fprintf (regmove_dump_file,
|
||||
"Could fix operand %d of insn %d matching operand %d.\n",
|
||||
operand_number, INSN_UID (insn), match_number);
|
||||
|
||||
/* ??? If src is set once, and is set equal to a
|
||||
constant, then do not use it for this optimization,
|
||||
as this would make it no longer equivalent to a
|
||||
constant? */
|
||||
|
||||
/* Scan forward to find the next instruction that
|
||||
uses the output operand. If the operand dies here,
|
||||
then replace it in both instructions with
|
||||
operand_number. */
|
||||
|
||||
for (p = NEXT_INSN (insn); p; p = NEXT_INSN (p))
|
||||
{
|
||||
if (GET_CODE (p) == CODE_LABEL
|
||||
|| GET_CODE (p) == JUMP_INSN
|
||||
|| (GET_CODE (p) == NOTE
|
||||
&& ((NOTE_LINE_NUMBER (p)
|
||||
== NOTE_INSN_LOOP_BEG)
|
||||
|| (NOTE_LINE_NUMBER (p)
|
||||
== NOTE_INSN_LOOP_END))))
|
||||
break;
|
||||
|
||||
if (GET_RTX_CLASS (GET_CODE (p)) != 'i')
|
||||
continue;
|
||||
|
||||
if (reg_set_p (src, p) || reg_set_p (dst, p)
|
||||
|| (GET_CODE (PATTERN (p)) == USE
|
||||
&& reg_overlap_mentioned_p (src,
|
||||
XEXP (PATTERN (p),
|
||||
0))))
|
||||
break;
|
||||
|
||||
/* See if all of DST dies in P. This test is
|
||||
slightly more conservative than it needs to be. */
|
||||
if ((dst_note
|
||||
= find_regno_note (p, REG_DEAD, REGNO (dst)))
|
||||
&& (GET_MODE (XEXP (dst_note, 0))
|
||||
== GET_MODE (dst)))
|
||||
{
|
||||
if (! src_note)
|
||||
{
|
||||
rtx q;
|
||||
rtx set2;
|
||||
|
||||
/* If an optimization is done, the value
|
||||
of SRC while P is executed will be
|
||||
changed. Check that this is OK. */
|
||||
if (reg_overlap_mentioned_p (src,
|
||||
PATTERN (p)))
|
||||
break;
|
||||
for (q = p; q; q = NEXT_INSN (q))
|
||||
{
|
||||
if (GET_CODE (q) == CODE_LABEL
|
||||
|| GET_CODE (q) == JUMP_INSN
|
||||
|| (GET_CODE (q) == NOTE
|
||||
&& ((NOTE_LINE_NUMBER (q)
|
||||
== NOTE_INSN_LOOP_BEG)
|
||||
|| (NOTE_LINE_NUMBER (q)
|
||||
== NOTE_INSN_LOOP_END))))
|
||||
{
|
||||
q = 0;
|
||||
break;
|
||||
}
|
||||
if (GET_RTX_CLASS (GET_CODE (q)) != 'i')
|
||||
continue;
|
||||
if (reg_overlap_mentioned_p (src,
|
||||
PATTERN (q))
|
||||
|| reg_set_p (src, q))
|
||||
break;
|
||||
}
|
||||
if (q)
|
||||
set2 = single_set (q);
|
||||
if (! q || ! set2
|
||||
|| GET_CODE (SET_SRC (set2)) != code
|
||||
|| XEXP (SET_SRC (set2), 0) != src
|
||||
|| (GET_CODE (XEXP (SET_SRC (set2), 1))
|
||||
!= CONST_INT)
|
||||
|| (SET_DEST (set2) != src
|
||||
&& !find_reg_note (q, REG_DEAD, src)))
|
||||
{
|
||||
/* If this is a PLUS, we can still save
|
||||
a register by doing
|
||||
src += insn_const;
|
||||
P;
|
||||
src -= insn_const; .
|
||||
This also gives opportunities for
|
||||
subsequent optimizations in the
|
||||
backward pass, so do it there. */
|
||||
if (code == PLUS && pass == 1
|
||||
#ifdef HAVE_cc0
|
||||
/* We man not emit an insn directly
|
||||
after P if the latter sets CC0. */
|
||||
&& ! sets_cc0_p (PATTERN (p))
|
||||
#endif
|
||||
)
|
||||
|
||||
{
|
||||
search_end = q;
|
||||
q = insn;
|
||||
set2 = set;
|
||||
newconst = -insn_const;
|
||||
code = MINUS;
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
newconst
|
||||
= (INTVAL (XEXP (SET_SRC (set2), 1))
|
||||
- insn_const);
|
||||
/* Reject out of range shifts. */
|
||||
if (code != PLUS
|
||||
&& (newconst < 0
|
||||
|| (newconst
|
||||
>= GET_MODE_BITSIZE (GET_MODE (SET_SRC (set2))))))
|
||||
break;
|
||||
if (code == PLUS)
|
||||
{
|
||||
post_inc = q;
|
||||
if (SET_DEST (set2) != src)
|
||||
post_inc_set = set2;
|
||||
}
|
||||
}
|
||||
/* We use 1 as last argument to
|
||||
validate_change so that all changes
|
||||
are accepted or rejected together by
|
||||
apply_change_group when it is called
|
||||
by validate_replace_rtx . */
|
||||
validate_change (q, &XEXP (SET_SRC (set2), 1),
|
||||
GEN_INT (newconst), 1);
|
||||
}
|
||||
validate_change (insn,
|
||||
recog_operand_loc[match_number],
|
||||
src, 1);
|
||||
if (validate_replace_rtx (dst, src_subreg, p))
|
||||
success = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (reg_overlap_mentioned_p (dst, PATTERN (p)))
|
||||
break;
|
||||
if (! src_note
|
||||
&& reg_overlap_mentioned_p (src, PATTERN (p)))
|
||||
{
|
||||
/* INSN was already checked to be movable when
|
||||
we found no REG_DEAD note for src on it. */
|
||||
overlap = p;
|
||||
src_note = find_reg_note (p, REG_DEAD, src);
|
||||
}
|
||||
|
||||
/* If we have passed a call instruction, and the
|
||||
pseudo-reg SRC is not already live across a call,
|
||||
then don't perform the optimization. */
|
||||
if (GET_CODE (p) == CALL_INSN)
|
||||
{
|
||||
num_calls++;
|
||||
|
||||
if (REG_N_CALLS_CROSSED (REGNO (src)) == 0)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (success)
|
||||
{
|
||||
/* Remove the death note for DST from P. */
|
||||
remove_note (p, dst_note);
|
||||
if (code == MINUS)
|
||||
{
|
||||
post_inc
|
||||
= emit_insn_after (copy_rtx (PATTERN (insn)),
|
||||
p);
|
||||
#if defined (HAVE_PRE_INCREMENT) || defined (HAVE_PRE_DECREMENT)
|
||||
if (search_end
|
||||
&& try_auto_increment (search_end, post_inc,
|
||||
0, src, newconst, 1))
|
||||
post_inc = 0;
|
||||
#endif
|
||||
validate_change (insn, &XEXP (SET_SRC (set), 1),
|
||||
GEN_INT (insn_const), 0);
|
||||
REG_N_SETS (REGNO (src))++;
|
||||
}
|
||||
if (overlap)
|
||||
{
|
||||
/* The lifetime of src and dest overlap,
|
||||
but we can change this by moving insn. */
|
||||
rtx pat = PATTERN (insn);
|
||||
if (src_note)
|
||||
remove_note (overlap, src_note);
|
||||
#if defined (HAVE_POST_INCREMENT) || defined (HAVE_POST_DECREMENT)
|
||||
if (code == PLUS
|
||||
&& try_auto_increment (overlap, insn, 0,
|
||||
src, insn_const, 0))
|
||||
insn = overlap;
|
||||
else
|
||||
#endif
|
||||
{
|
||||
emit_insn_after_with_line_notes
|
||||
(pat, PREV_INSN (p), insn);
|
||||
PUT_CODE (insn, NOTE);
|
||||
NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
|
||||
NOTE_SOURCE_FILE (insn) = 0;
|
||||
/* emit_insn_after_with_line_notes
|
||||
has no return value, so search
|
||||
for the new insn. */
|
||||
for (insn = p; PATTERN (insn) != pat; )
|
||||
insn = PREV_INSN (insn);
|
||||
}
|
||||
}
|
||||
/* Sometimes we'd generate src = const; src += n;
|
||||
if so, replace the instruction that set src
|
||||
in the first place. */
|
||||
|
||||
if (! overlap && (code == PLUS || code == MINUS))
|
||||
{
|
||||
rtx note
|
||||
= find_reg_note (insn, REG_EQUAL, NULL_RTX);
|
||||
rtx q, set2;
|
||||
int num_calls2 = 0;
|
||||
|
||||
if (note && CONSTANT_P (XEXP (note, 0)))
|
||||
{
|
||||
for (q = PREV_INSN (insn); q;
|
||||
q = PREV_INSN(q))
|
||||
{
|
||||
if (GET_CODE (q) == JUMP_INSN)
|
||||
{
|
||||
q = 0;
|
||||
break;
|
||||
}
|
||||
if (GET_RTX_CLASS (GET_CODE (q)) != 'i')
|
||||
continue;
|
||||
if (reg_set_p (src, q))
|
||||
{
|
||||
set2 = single_set (q);
|
||||
break;
|
||||
}
|
||||
if (reg_overlap_mentioned_p (src,
|
||||
PATTERN (q)))
|
||||
{
|
||||
q = 0;
|
||||
break;
|
||||
}
|
||||
if (GET_CODE (p) == CALL_INSN)
|
||||
num_calls2++;
|
||||
}
|
||||
if (q && set2 && SET_DEST (set2) == src
|
||||
&& CONSTANT_P (SET_SRC (set2))
|
||||
&& validate_change (insn, &SET_SRC (set),
|
||||
XEXP (note, 0), 0))
|
||||
{
|
||||
PUT_CODE (q, NOTE);
|
||||
NOTE_LINE_NUMBER (q) = NOTE_INSN_DELETED;
|
||||
NOTE_SOURCE_FILE (q) = 0;
|
||||
REG_N_SETS (REGNO (src))--;
|
||||
REG_N_CALLS_CROSSED (REGNO (src))
|
||||
-= num_calls2;
|
||||
insn_const = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (0) ;
|
||||
#if defined (HAVE_PRE_INCREMENT) || defined (HAVE_PRE_DECREMENT)
|
||||
else if ((code == PLUS || code == MINUS)
|
||||
&& insn_const
|
||||
&& try_auto_increment (p, insn, 0,
|
||||
src, insn_const, 1))
|
||||
insn = p;
|
||||
#endif
|
||||
#if defined (HAVE_POST_INCREMENT) || defined (HAVE_POST_DECREMENT)
|
||||
else if (post_inc
|
||||
&& try_auto_increment (p, post_inc,
|
||||
post_inc_set, src,
|
||||
newconst, 0))
|
||||
post_inc = 0;
|
||||
#endif
|
||||
#if defined (HAVE_PRE_INCREMENT) || defined (HAVE_PRE_DECREMENT)
|
||||
/* If post_inc still prevails, try to find an
|
||||
insn where it can be used as a pre-in/decrement.
|
||||
If code is MINUS, this was already tried. */
|
||||
if (post_inc && code == PLUS
|
||||
/* Check that newconst is likely to be usable
|
||||
in a pre-in/decrement before starting the
|
||||
search. */
|
||||
&& (0
|
||||
#if defined (HAVE_PRE_INCREMENT)
|
||||
|| (newconst > 0 && newconst <= MOVE_MAX)
|
||||
#endif
|
||||
#if defined (HAVE_PRE_DECREMENT)
|
||||
|| (newconst < 0 && newconst >= -MOVE_MAX)
|
||||
#endif
|
||||
) && exact_log2 (newconst))
|
||||
{
|
||||
rtx q, inc_dest;
|
||||
|
||||
inc_dest
|
||||
= post_inc_set ? SET_DEST (post_inc_set) : src;
|
||||
for (q = post_inc; q = NEXT_INSN (q); )
|
||||
{
|
||||
if (GET_CODE (q) == CODE_LABEL
|
||||
|| GET_CODE (q) == JUMP_INSN
|
||||
|| (GET_CODE (q) == NOTE
|
||||
&& ((NOTE_LINE_NUMBER (q)
|
||||
== NOTE_INSN_LOOP_BEG)
|
||||
|| (NOTE_LINE_NUMBER (q)
|
||||
== NOTE_INSN_LOOP_END))))
|
||||
break;
|
||||
if (GET_RTX_CLASS (GET_CODE (q)) != 'i')
|
||||
continue;
|
||||
if (src != inc_dest
|
||||
&& (reg_overlap_mentioned_p (src,
|
||||
PATTERN (q))
|
||||
|| reg_set_p (src, q)))
|
||||
break;
|
||||
if (reg_set_p (inc_dest, q))
|
||||
break;
|
||||
if (reg_overlap_mentioned_p (inc_dest,
|
||||
PATTERN (q)))
|
||||
{
|
||||
try_auto_increment (q, post_inc,
|
||||
post_inc_set,
|
||||
inc_dest,
|
||||
newconst, 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* defined (HAVE_PRE_INCREMENT) || defined (HAVE_PRE_DECREMENT) */
|
||||
/* Move the death note for DST to INSN if it is used
|
||||
there. */
|
||||
if (reg_overlap_mentioned_p (dst, PATTERN (insn)))
|
||||
{
|
||||
XEXP (dst_note, 1) = REG_NOTES (insn);
|
||||
REG_NOTES (insn) = dst_note;
|
||||
}
|
||||
|
||||
if (src_note)
|
||||
{
|
||||
/* Move the death note for SRC from INSN to P. */
|
||||
if (! overlap)
|
||||
remove_note (insn, src_note);
|
||||
XEXP (src_note, 1) = REG_NOTES (p);
|
||||
REG_NOTES (p) = src_note;
|
||||
|
||||
REG_N_CALLS_CROSSED (REGNO (src)) += num_calls;
|
||||
}
|
||||
|
||||
REG_N_SETS (REGNO (src))++;
|
||||
REG_N_SETS (REGNO (dst))--;
|
||||
|
||||
REG_N_CALLS_CROSSED (REGNO (dst)) -= num_calls;
|
||||
|
||||
/* ??? Must adjust reg_live_length, and reg_n_refs for
|
||||
both registers. Must keep track of loop_depth in
|
||||
order to get reg_n_refs adjustment correct. */
|
||||
|
||||
if (regmove_dump_file)
|
||||
fprintf (regmove_dump_file,
|
||||
"Fixed operand %d of insn %d matching operand %d.\n",
|
||||
operand_number, INSN_UID (insn),
|
||||
match_number);
|
||||
|
||||
goto done_forwards;
|
||||
}
|
||||
}
|
||||
}
|
||||
done_forwards:
|
||||
;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* A backward pass. Replace input operands with output operands. */
|
||||
|
||||
if (regmove_dump_file)
|
||||
fprintf (regmove_dump_file, "Starting backward pass...\n");
|
||||
|
||||
for (insn = get_last_insn (); insn; insn = PREV_INSN (insn))
|
||||
{
|
||||
if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
|
||||
{
|
||||
int insn_code_number = recog_memoized (insn);
|
||||
int operand_number, match_number;
|
||||
|
||||
if (insn_code_number < 0)
|
||||
continue;
|
||||
|
||||
insn_extract (insn);
|
||||
if (! constrain_operands (insn_code_number, 0))
|
||||
continue;
|
||||
|
||||
commutative = -1;
|
||||
|
||||
/* Must initialize this before the loop, because the code for
|
||||
the commutative case may set matches for operands other than
|
||||
the current one. */
|
||||
bzero (matches, sizeof (matches));
|
||||
|
||||
for (operand_number = 0;
|
||||
operand_number < insn_n_operands[insn_code_number];
|
||||
operand_number++)
|
||||
{
|
||||
int output_operand = 0;
|
||||
int matching_operand = operand_number;
|
||||
char *p, c;
|
||||
int i = 0;
|
||||
|
||||
modified[operand_number] = 0;
|
||||
early_clobber[operand_number] = 0;
|
||||
|
||||
p = insn_operand_constraint[insn_code_number][operand_number];
|
||||
|
||||
if (*p == '=')
|
||||
modified[operand_number] = 2;
|
||||
else if (*p == '+')
|
||||
modified[operand_number] = 1;
|
||||
|
||||
for (; *p && i < which_alternative; p++)
|
||||
if (*p == ',')
|
||||
i++;
|
||||
|
||||
while ((c = *p++) != '\0' && c != ',')
|
||||
switch (c)
|
||||
{
|
||||
case '=':
|
||||
break;
|
||||
case '+':
|
||||
break;
|
||||
case '&':
|
||||
early_clobber[operand_number] = 1;
|
||||
break;
|
||||
case '%':
|
||||
commutative = operand_number;
|
||||
break;
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
c -= '0';
|
||||
matches[c][operand_number] = 1;
|
||||
if (commutative >= 0)
|
||||
{
|
||||
if (c == commutative || c == commutative + 1)
|
||||
{
|
||||
int other = c + (c == commutative ? 1 : -1);
|
||||
matches[other][operand_number] = 1;
|
||||
}
|
||||
if (operand_number == commutative
|
||||
|| operand_number == commutative + 1)
|
||||
{
|
||||
int other = (operand_number
|
||||
+ (operand_number == commutative
|
||||
? 1 : -1));
|
||||
matches[c][other] = 1;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Now scan through the operands looking for a destination operand
|
||||
which is supposed to match a source operand.
|
||||
Then scan backward for an instruction which sets the source
|
||||
operand. If safe, then replace the source operand with the
|
||||
dest operand in both instructions. */
|
||||
|
||||
for (operand_number = 0;
|
||||
operand_number < insn_n_operands[insn_code_number];
|
||||
operand_number++)
|
||||
{
|
||||
for (match_number = 0;
|
||||
match_number < insn_n_operands[insn_code_number];
|
||||
match_number++)
|
||||
{
|
||||
rtx set, p, src, dst;
|
||||
rtx src_note, dst_note;
|
||||
int success = 0;
|
||||
int num_calls = 0;
|
||||
|
||||
/* Nothing to do if the two operands aren't supposed to
|
||||
match. */
|
||||
if (matches[operand_number][match_number] == 0)
|
||||
continue;
|
||||
|
||||
dst = recog_operand[operand_number];
|
||||
src = recog_operand[match_number];
|
||||
|
||||
if (GET_CODE (src) != REG
|
||||
|| REGNO (src) < FIRST_PSEUDO_REGISTER)
|
||||
continue;
|
||||
|
||||
if (GET_CODE (dst) != REG
|
||||
|| REGNO (dst) < FIRST_PSEUDO_REGISTER)
|
||||
continue;
|
||||
|
||||
/* If the operands already match, then there is nothing
|
||||
to do. */
|
||||
if (operands_match_p (src, dst))
|
||||
continue;
|
||||
|
||||
set = single_set (insn);
|
||||
if (! set)
|
||||
continue;
|
||||
|
||||
/* operand_number/dst must be a write-only operand, and
|
||||
match_operand/src must be a read-only operand. */
|
||||
if (modified[match_number] != 0)
|
||||
continue;
|
||||
|
||||
if (early_clobber[operand_number] == 1)
|
||||
continue;
|
||||
|
||||
if (modified[operand_number] != 2)
|
||||
continue;
|
||||
|
||||
/* Make sure operand_number is the destination. */
|
||||
if (recog_operand[operand_number] != SET_DEST (set))
|
||||
continue;
|
||||
|
||||
if (! (src_note = find_reg_note (insn, REG_DEAD, src)))
|
||||
continue;
|
||||
|
||||
/* Can not modify an earlier insn to set dst if this insn
|
||||
uses an old value in the source. */
|
||||
if (reg_overlap_mentioned_p (dst, SET_SRC (set)))
|
||||
continue;
|
||||
|
||||
if (regmove_dump_file)
|
||||
fprintf (regmove_dump_file,
|
||||
"Could fix operand %d of insn %d matching operand %d.\n",
|
||||
operand_number, INSN_UID (insn), match_number);
|
||||
|
||||
/* ??? If src is set once, and is set equal to a constant,
|
||||
then do not use it for this optimization, as this would
|
||||
make it no longer equivalent to a constant? */
|
||||
|
||||
/* Scan backward to find the first instruction that uses
|
||||
the input operand. If the operand is set here, then
|
||||
replace it in both instructions with operand_number. */
|
||||
|
||||
for (p = PREV_INSN (insn); p; p = PREV_INSN (p))
|
||||
{
|
||||
rtx pset;
|
||||
|
||||
if (GET_CODE (p) == CODE_LABEL
|
||||
|| GET_CODE (p) == JUMP_INSN
|
||||
|| (GET_CODE (p) == NOTE
|
||||
&& (NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_BEG
|
||||
|| NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_END)))
|
||||
break;
|
||||
|
||||
if (GET_RTX_CLASS (GET_CODE (p)) != 'i')
|
||||
continue;
|
||||
|
||||
/* ??? See if all of SRC is set in P. This test is much
|
||||
more conservative than it needs to be. */
|
||||
pset = single_set (p);
|
||||
if (pset && SET_DEST (pset) == src)
|
||||
{
|
||||
/* We use validate_replace_rtx, in case there
|
||||
are multiple identical source operands. All of
|
||||
them have to be changed at the same time. */
|
||||
if (validate_replace_rtx (src, dst, insn))
|
||||
{
|
||||
if (validate_change (p, &SET_DEST (pset),
|
||||
dst, 0))
|
||||
success = 1;
|
||||
else
|
||||
{
|
||||
/* Change all source operands back.
|
||||
This modifies the dst as a side-effect. */
|
||||
validate_replace_rtx (dst, src, insn);
|
||||
/* Now make sure the dst is right. */
|
||||
validate_change (insn,
|
||||
recog_operand_loc[operand_number],
|
||||
dst, 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (reg_overlap_mentioned_p (src, PATTERN (p))
|
||||
|| reg_overlap_mentioned_p (dst, PATTERN (p)))
|
||||
break;
|
||||
|
||||
/* If we have passed a call instruction, and the
|
||||
pseudo-reg DST is not already live across a call,
|
||||
then don't perform the optimization. */
|
||||
if (GET_CODE (p) == CALL_INSN)
|
||||
{
|
||||
num_calls++;
|
||||
|
||||
if (REG_N_CALLS_CROSSED (REGNO (dst)) == 0)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (success)
|
||||
{
|
||||
/* Remove the death note for SRC from INSN. */
|
||||
remove_note (insn, src_note);
|
||||
/* Move the death note for SRC to P if it is used
|
||||
there. */
|
||||
if (reg_overlap_mentioned_p (src, PATTERN (p)))
|
||||
{
|
||||
XEXP (src_note, 1) = REG_NOTES (p);
|
||||
REG_NOTES (p) = src_note;
|
||||
}
|
||||
/* If there is a REG_DEAD note for DST on P, then remove
|
||||
it, because DST is now set there. */
|
||||
if (dst_note = find_reg_note (p, REG_DEAD, dst))
|
||||
remove_note (p, dst_note);
|
||||
|
||||
REG_N_SETS (REGNO (dst))++;
|
||||
REG_N_SETS (REGNO (src))--;
|
||||
|
||||
REG_N_CALLS_CROSSED (REGNO (dst)) += num_calls;
|
||||
REG_N_CALLS_CROSSED (REGNO (src)) -= num_calls;
|
||||
|
||||
/* ??? Must adjust reg_live_length, and reg_n_refs for
|
||||
both registers. Must keep track of loop_depth in
|
||||
order to get reg_n_refs adjustment correct. */
|
||||
|
||||
if (regmove_dump_file)
|
||||
fprintf (regmove_dump_file,
|
||||
"Fixed operand %d of insn %d matching operand %d.\n",
|
||||
operand_number, INSN_UID (insn), match_number);
|
||||
|
||||
goto done_backwards;
|
||||
}
|
||||
}
|
||||
}
|
||||
done_backwards:
|
||||
;
|
||||
}
|
||||
}
|
||||
#endif /* REGISTER_CONSTRAINTS */
|
||||
}
|
||||
|
||||
/* return nonzero if X is stable but for mentioning SRC or mentioning /
|
||||
changing DST . If in doubt, presume it is unstable. */
|
||||
static int
|
||||
stable_but_for_p (x, src, dst)
|
||||
rtx x, src, dst;
|
||||
{
|
||||
RTX_CODE code = GET_CODE (x);
|
||||
switch (GET_RTX_CLASS (code))
|
||||
{
|
||||
case '<': case '1': case 'c': case '2': case 'b': case '3':
|
||||
{
|
||||
int i;
|
||||
char *fmt = GET_RTX_FORMAT (code);
|
||||
for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
|
||||
if (fmt[i] == 'e' && ! stable_but_for_p (XEXP (x, i), src, dst))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
case 'o':
|
||||
if (x == src || x == dst)
|
||||
return 1;
|
||||
/* fall through */
|
||||
default:
|
||||
return ! rtx_unstable_p (x);
|
||||
}
|
||||
}
|
|
@ -812,6 +812,7 @@ extern void remove_note PROTO((rtx, rtx));
|
|||
extern void note_stores PROTO((rtx, void (*)()));
|
||||
extern int refers_to_regno_p PROTO((int, int, rtx, rtx *));
|
||||
extern int reg_overlap_mentioned_p PROTO((rtx, rtx));
|
||||
extern rtx find_use_as_address PROTO((rtx, rtx, HOST_WIDE_INT));
|
||||
|
||||
|
||||
/* Maximum number of parallel sets and clobbers in any insn in this fn.
|
||||
|
|
92
gcc/toplev.c
92
gcc/toplev.c
|
@ -245,6 +245,7 @@ int cse2_dump = 0;
|
|||
int branch_prob_dump = 0;
|
||||
int flow_dump = 0;
|
||||
int combine_dump = 0;
|
||||
int regmove_dump = 0;
|
||||
int sched_dump = 0;
|
||||
int local_reg_dump = 0;
|
||||
int global_reg_dump = 0;
|
||||
|
@ -566,6 +567,35 @@ int flag_pedantic_errors = 0;
|
|||
int flag_schedule_insns = 0;
|
||||
int flag_schedule_insns_after_reload = 0;
|
||||
|
||||
#ifdef HAIFA
|
||||
/* The following flags have effect only for scheduling before register
|
||||
allocation:
|
||||
|
||||
flag_schedule_interblock means schedule insns accross basic blocks.
|
||||
flag_schedule_speculative means allow speculative motion of non-load insns.
|
||||
flag_schedule_speculative_load means allow speculative motion of some
|
||||
load insns.
|
||||
flag_schedule_speculative_load_dangerous allows speculative motion of more
|
||||
load insns.
|
||||
flag_schedule_reverse_before_reload means try to reverse original order
|
||||
of insns (S).
|
||||
flag_schedule_reverse_after_reload means try to reverse original order
|
||||
of insns (R). */
|
||||
|
||||
int flag_schedule_interblock = 1;
|
||||
int flag_schedule_speculative = 1;
|
||||
int flag_schedule_speculative_load = 0;
|
||||
int flag_schedule_speculative_load_dangerous = 0;
|
||||
int flag_schedule_reverse_before_reload = 0;
|
||||
int flag_schedule_reverse_after_reload = 0;
|
||||
|
||||
|
||||
/* flag_on_branch_count_reg means try to replace add-1,compare,branch tupple
|
||||
by a cheaper branch, on a count register. */
|
||||
int flag_branch_on_count_reg;
|
||||
#endif /* HAIFA */
|
||||
|
||||
|
||||
/* -finhibit-size-directive inhibits output of .size for ELF.
|
||||
This is used only for compiling crtstuff.c,
|
||||
and it may be extended to other effects
|
||||
|
@ -616,6 +646,8 @@ int flag_check_memory_usage = 0;
|
|||
-fcheck-memory-usage. */
|
||||
int flag_prefix_function_name = 0;
|
||||
|
||||
int flag_regmove = 0;
|
||||
|
||||
/* 1 if alias checking is on (by default, when -O). */
|
||||
int flag_alias_check = 0;
|
||||
|
||||
|
@ -666,6 +698,15 @@ struct { char *string; int *variable; int on_value;} f_options[] =
|
|||
{"pretend-float", &flag_pretend_float, 1},
|
||||
{"schedule-insns", &flag_schedule_insns, 1},
|
||||
{"schedule-insns2", &flag_schedule_insns_after_reload, 1},
|
||||
#ifdef HAIFA
|
||||
{"sched-interblock",&flag_schedule_interblock, 1},
|
||||
{"sched-spec",&flag_schedule_speculative, 1},
|
||||
{"sched-spec-load",&flag_schedule_speculative_load, 1},
|
||||
{"sched-spec-load-dangerous",&flag_schedule_speculative_load_dangerous, 1},
|
||||
{"sched-reverse-S",&flag_schedule_reverse_before_reload, 1},
|
||||
{"sched-reverse-R",&flag_schedule_reverse_after_reload, 1},
|
||||
{"branch-count-reg",&flag_branch_on_count_reg, 1},
|
||||
#endif /* HAIFA */
|
||||
{"pic", &flag_pic, 1},
|
||||
{"PIC", &flag_pic, 2},
|
||||
{"exceptions", &flag_exceptions, 1},
|
||||
|
@ -680,6 +721,7 @@ struct { char *string; int *variable; int on_value;} f_options[] =
|
|||
{"function-sections", &flag_function_sections, 1},
|
||||
{"verbose-asm", &flag_verbose_asm, 1},
|
||||
{"gnu-linker", &flag_gnu_linker, 1},
|
||||
{"regmove", &flag_regmove, 1},
|
||||
{"pack-struct", &flag_pack_struct, 1},
|
||||
{"stack-check", &flag_stack_check, 1},
|
||||
{"bytecode", &output_bytecode, 1},
|
||||
|
@ -885,6 +927,7 @@ FILE *cse2_dump_file;
|
|||
FILE *branch_prob_dump_file;
|
||||
FILE *flow_dump_file;
|
||||
FILE *combine_dump_file;
|
||||
FILE *regmove_dump_file;
|
||||
FILE *sched_dump_file;
|
||||
FILE *local_reg_dump_file;
|
||||
FILE *global_reg_dump_file;
|
||||
|
@ -905,6 +948,7 @@ int cse2_time;
|
|||
int branch_prob_time;
|
||||
int flow_time;
|
||||
int combine_time;
|
||||
int regmove_time;
|
||||
int sched_time;
|
||||
int local_alloc_time;
|
||||
int global_alloc_time;
|
||||
|
@ -1053,6 +1097,8 @@ fatal_insn (message, insn)
|
|||
fflush (flow_dump_file);
|
||||
if (combine_dump_file)
|
||||
fflush (combine_dump_file);
|
||||
if (regmove_dump_file)
|
||||
fflush (regmove_dump_file);
|
||||
if (sched_dump_file)
|
||||
fflush (sched_dump_file);
|
||||
if (local_reg_dump_file)
|
||||
|
@ -2131,6 +2177,7 @@ compile_file (name)
|
|||
branch_prob_time = 0;
|
||||
flow_time = 0;
|
||||
combine_time = 0;
|
||||
regmove_time = 0;
|
||||
sched_time = 0;
|
||||
local_alloc_time = 0;
|
||||
global_alloc_time = 0;
|
||||
|
@ -2229,6 +2276,10 @@ compile_file (name)
|
|||
if (combine_dump)
|
||||
combine_dump_file = open_dump_file (dump_base_name, ".combine");
|
||||
|
||||
/* If regmove dump desired, open the output file. */
|
||||
if (regmove_dump)
|
||||
regmove_dump_file = open_dump_file (dump_base_name, ".regmove");
|
||||
|
||||
/* If scheduling dump desired, open the output file. */
|
||||
if (sched_dump)
|
||||
sched_dump_file = open_dump_file (dump_base_name, ".sched");
|
||||
|
@ -2713,6 +2764,9 @@ compile_file (name)
|
|||
fclose (combine_dump_file);
|
||||
}
|
||||
|
||||
if (regmove_dump)
|
||||
fclose (regmove_dump_file);
|
||||
|
||||
if (sched_dump)
|
||||
fclose (sched_dump_file);
|
||||
|
||||
|
@ -2765,6 +2819,7 @@ compile_file (name)
|
|||
print_time ("branch-prob", branch_prob_time);
|
||||
print_time ("flow", flow_time);
|
||||
print_time ("combine", combine_time);
|
||||
print_time ("regmove", regmove_time);
|
||||
print_time ("sched", sched_time);
|
||||
print_time ("local-alloc", local_alloc_time);
|
||||
print_time ("global-alloc", global_alloc_time);
|
||||
|
@ -3304,6 +3359,26 @@ rest_of_compilation (decl)
|
|||
fflush (combine_dump_file);
|
||||
});
|
||||
|
||||
if (regmove_dump)
|
||||
TIMEVAR (dump_time,
|
||||
{
|
||||
fprintf (regmove_dump_file, "\n;; Function %s\n\n",
|
||||
(*decl_printable_name) (decl, 2));
|
||||
});
|
||||
|
||||
/* Register allocation pre-pass, to reduce number of moves
|
||||
necessary for two-address machines. */
|
||||
if (optimize > 0 && flag_regmove)
|
||||
TIMEVAR (regmove_time, regmove_optimize (insns, max_reg_num (),
|
||||
regmove_dump_file));
|
||||
|
||||
if (regmove_dump)
|
||||
TIMEVAR (dump_time,
|
||||
{
|
||||
print_rtl (regmove_dump_file, insns);
|
||||
fflush (regmove_dump_file);
|
||||
});
|
||||
|
||||
/* Print function header into sched dump now
|
||||
because doing the sched analysis makes some of the dump. */
|
||||
|
||||
|
@ -3703,6 +3778,7 @@ main (argc, argv, envp)
|
|||
flag_schedule_insns = 1;
|
||||
flag_schedule_insns_after_reload = 1;
|
||||
#endif
|
||||
flag_regmove = 1;
|
||||
}
|
||||
|
||||
if (optimize >= 3)
|
||||
|
@ -3764,6 +3840,7 @@ main (argc, argv, envp)
|
|||
jump2_opt_dump = 1;
|
||||
local_reg_dump = 1;
|
||||
loop_dump = 1;
|
||||
regmove_dump = 1;
|
||||
rtl_dump = 1;
|
||||
cse_dump = 1, cse2_dump = 1;
|
||||
sched_dump = 1;
|
||||
|
@ -3815,6 +3892,9 @@ main (argc, argv, envp)
|
|||
case 't':
|
||||
cse2_dump = 1;
|
||||
break;
|
||||
case 'N':
|
||||
regmove_dump = 1;
|
||||
break;
|
||||
case 'S':
|
||||
sched_dump = 1;
|
||||
break;
|
||||
|
@ -3862,6 +3942,18 @@ main (argc, argv, envp)
|
|||
|
||||
if (found)
|
||||
;
|
||||
#ifdef HAIFA
|
||||
#ifdef INSN_SCHEDULING
|
||||
else if (!strncmp (p, "sched-verbose-",14))
|
||||
fix_sched_param("verbose",&p[14]);
|
||||
else if (!strncmp (p, "sched-max-",10))
|
||||
fix_sched_param("max",&p[10]);
|
||||
else if (!strncmp (p, "sched-inter-max-b-",18))
|
||||
fix_sched_param("interblock-max-blocks",&p[18]);
|
||||
else if (!strncmp (p, "sched-inter-max-i-",18))
|
||||
fix_sched_param("interblock-max-insns",&p[18]);
|
||||
#endif
|
||||
#endif /* HAIFA */
|
||||
else if (!strncmp (p, "fixed-", 6))
|
||||
fix_register (&p[6], 1, 1);
|
||||
else if (!strncmp (p, "call-used-", 10))
|
||||
|
|
23
gcc/unroll.c
23
gcc/unroll.c
|
@ -202,7 +202,7 @@ static rtx initial_reg_note_copy PROTO((rtx, struct inline_remap *));
|
|||
static void final_reg_note_copy PROTO((rtx, struct inline_remap *));
|
||||
static void copy_loop_body PROTO((rtx, rtx, struct inline_remap *, rtx, int,
|
||||
enum unroll_types, rtx, rtx, rtx, rtx));
|
||||
static void iteration_info PROTO((rtx, rtx *, rtx *, rtx, rtx));
|
||||
void iteration_info PROTO((rtx, rtx *, rtx *, rtx, rtx));
|
||||
static rtx approx_final_value PROTO((enum rtx_code, rtx, int *, int *));
|
||||
static int find_splittable_regs PROTO((enum unroll_types, rtx, rtx, rtx, int));
|
||||
static int find_splittable_givs PROTO((struct iv_class *,enum unroll_types,
|
||||
|
@ -1094,6 +1094,16 @@ unroll_loop (loop_end, insn_count, loop_start, end_insert_before,
|
|||
/* Set unroll type to MODULO now. */
|
||||
unroll_type = UNROLL_MODULO;
|
||||
loop_preconditioned = 1;
|
||||
#ifdef HAIFA
|
||||
if (loop_n_iterations > 0)
|
||||
loop_unroll_iter[ loop_number(loop_start, loop_end) ]
|
||||
= (loop_n_iterations
|
||||
- loop_n_iterations % (abs_inc * unroll_number));
|
||||
else
|
||||
/* inform loop.c about the new initial value */
|
||||
loop_start_value[loop_number(loop_start, loop_end)] = initial_value;
|
||||
#endif
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1108,6 +1118,15 @@ unroll_loop (loop_end, insn_count, loop_start, end_insert_before,
|
|||
|
||||
/* At this point, we are guaranteed to unroll the loop. */
|
||||
|
||||
#ifdef HAIFA
|
||||
/* inform loop.c about the factor of unrolling */
|
||||
if (unroll_type == UNROLL_COMPLETELY)
|
||||
loop_unroll_factor[ loop_number(loop_start, loop_end) ] = -1;
|
||||
else
|
||||
loop_unroll_factor[ loop_number(loop_start, loop_end) ] = unroll_number;
|
||||
#endif /* HAIFA */
|
||||
|
||||
|
||||
/* For each biv and giv, determine whether it can be safely split into
|
||||
a different variable for each unrolled copy of the loop body.
|
||||
We precalculate and save this info here, since computing it is
|
||||
|
@ -2263,7 +2282,7 @@ biv_total_increment (bl, loop_start, loop_end)
|
|||
Initial_value and/or increment are set to zero if their values could not
|
||||
be calculated. */
|
||||
|
||||
static void
|
||||
void
|
||||
iteration_info (iteration_var, initial_value, increment, loop_start, loop_end)
|
||||
rtx iteration_var, *initial_value, *increment;
|
||||
rtx loop_start, loop_end;
|
||||
|
|
Loading…
Reference in New Issue