* Integrate Haifa instruction scheduler.

* Integrate regmove pass.
See ChangeLog for deatils.

From-SVN: r14770
This commit is contained in:
Jeffrey A Law 1997-08-12 04:07:19 +00:00 committed by Jeff Law
parent aa32d84158
commit 8c660648ca
15 changed files with 10737 additions and 10 deletions

View File

@ -1,3 +1,76 @@
Mon Aug 11 14:50:55 1997 Jeffrey A Law (law@cygnus.com)
* Integrate Haifa instruction scheduler.
* Makefile.in (ALL_CFLAGS): Add SCHED_CFLAGS. Prefix all references
to sched with $(SCHED_CFLAGS.
* configure.in: Handle --enable-haifa.
* configure: Rebuilt.
* flags.h: Add new flags for haifa instruction scheduler.
* genattrtab.c (expand_units): For haifa, don't subtract one
when computing blockage.
* toplev.h (flag_schedule_interblock): Haifa scheduler flag.
(flag_schedule_speculative): Ditto.
(flag_schedule_speculative_load): Ditto.
(flag_schedule_speculative_load_dangerous): Ditto.
(flag_schedule_reverse_before_reload): Ditto.
(flag_schedule_reverse_after_reload): Ditto.
(flag_branch_on_count_reg): Ditto.
(f_options): Add Haifa switches.
(main): Turn off some Haifa options if appropriate macro is
defined. Process Haifa switches.
* unroll.c (iteration_info): No longer static, since Haifa
scheduler uses it.
(unroll_loop): Inform HAIFA scheduler about loop unrolling factor.
* unroll.c (unroll_loop): Set loop_unroll_iter, loop_start_value.
* loop.h (loop_unroll_factor, loop_number): Add HAIFA decls.
* loop.h (loop_initial_value,loop_unroll_iter): New globals.
* loop.c (loop_optimize): If HAIFA is defined, allocate additional
storage for the Haifa scheduler.
(mark_loop_jump): If HAIFA defined, set LABEL_OUTSIDE_LOOP_P and
LABEL_NEXTREF.
(strength_reduce): If HAIFA and HAVE_decrement_and_branch_on_count
are defined, call analyze_loop_iterations and insert_bct to use
countdown loops.
(record_giv): Refine test for jumps out of loops if HAIFA is
defined.
(analyze_loop_iterations): New function to identify if we can use
a countdown loop.
(insert_bct): Insert countdown loop.
(instrument_loop_bct): Low level code to insert countdown loop.
(loop_number): Calculate UID of loop.
(indirect_jump_in_function_p): Return true if an indirect jump is
in the function.
(is_power_of_2): Return true if value is a power of 2.
(is_conditional_branch): Return true if insn is a conditional
jump.
(fix_bct_param): Process -fbct-{min,max}-N switches.
(check_bct_param): Return true if loop should be instrumented.
* loop.c (loop_initial_value,loop_unroll_iter): New globals.
(loop_optimize): Initialize.
(get_condition_for_loop): Ditto.
* loop.c (strength_reduce): Inside of code that uses #ifdef
HAVE_decrement_and_branch_on_count code, test it to make sure the
condition is true.
(instrument_loop_bct): Ditto.
* haifa-sched.c: New file.
* Integrate regmove pass.
* Makefile.in (OBJS): Add regmove.o
(regmove.o): Add dependencies.
* flow.c (find_use_as_address): No longer static.
* rtl.h (find_use_as_address): Declare.
* toplev.c (regmove_dump, flag_regmove): Define.
(f_options): Add -fregmove.
(regmove_dump_file, regmove_time): Define.
(fatal_insn): Close the regmove dump file.
(compile_file): Initialize regmove_time; open/close the regmove dump
file as needed. Print regmove time as needed.
(rest_of_compilation): Run regmove pass if requested, dump
RTL after regmove if requested.
(main): If -O2 or more, turn on regmove. Handle dump switches.
* regmove.c: New file.
Mon Aug 11 14:15:02 1997 Jeffrey A Law (law@cygnus.com)
* Integrate tlink patch from jason@cygnus.com

View File

@ -444,7 +444,7 @@ INTERNAL_CFLAGS = $(CROSS) -DIN_GCC @extra_c_flags@
# This is the variable actually used when we compile.
ALL_CFLAGS = $(INTERNAL_CFLAGS) $(X_CFLAGS) $(T_CFLAGS) $(CFLAGS) $(XCFLAGS) \
@DEFS@
@DEFS@ $(SCHED_CFLAGS)
# Likewise.
ALL_CPPFLAGS = $(CPPFLAGS) $(X_CPPFLAGS) $(T_CPPFLAGS)
@ -548,14 +548,17 @@ BC_OBJS = bc-emit.o bc-optab.o
# Bytecode header files constructed at build time; vmsconfig.com wants this.
BC_ALL = bc-arity.h bc-opcode.h bc-opname.h
SCHED_PREFIX = @sched_prefix@
SCHED_CFLAGS = @sched_cflags@
# Language-independent object files.
OBJS = toplev.o version.o tree.o print-tree.o stor-layout.o fold-const.o \
function.o stmt.o except.o expr.o calls.o expmed.o explow.o optabs.o \
varasm.o rtl.o print-rtl.o rtlanal.o emit-rtl.o real.o \
dbxout.o sdbout.o dwarfout.o dwarf2out.o xcoffout.o bitmap.o \
varasm.o rtl.o print-rtl.o rtlanal.o emit-rtl.o real.o regmove.o \
dbxout.o sdbout.o dwarfout.o dwarf2out.o xcoffout.o bitmap.o alias.o \
integrate.o jump.o cse.o loop.o unroll.o flow.o stupid.o combine.o \
regclass.o local-alloc.o global.o reload.o reload1.o caller-save.o \
insn-peep.o reorg.o alias.o sched.o final.o recog.o reg-stack.o \
insn-peep.o reorg.o $(SCHED_PREFIX)sched.o final.o recog.o reg-stack.o \
insn-opinit.o insn-recog.o insn-extract.o insn-output.o insn-emit.o \
profile.o insn-attrtab.o $(out_object_file) getpwd.o convert.o $(EXTRA_OBJS)
@ -1326,7 +1329,9 @@ reorg.o : reorg.c $(CONFIG_H) $(RTL_H) conditions.h hard-reg-set.h \
flags.h output.h
alias.o : alias.c $(CONFIG_H) $(RTL_H) flags.h hard-reg-set.h regs.h \
insn-codes.h
sched.o : $(SCHED_PREFIX)sched.c $(CONFIG_H) $(RTL_H) $(BASIC_BLOCK_H) regs.h hard-reg-set.h \
regmove.o : regmove.c $(CONFIG_H) $(RTL_H) insn-config.h recog.h output.h \
reload.h regs.h hard-reg-set.h flags.h expr.h insn-flags.h
$(SCHED_PREFIX)sched.o : $(SCHED_PREFIX)sched.c $(CONFIG_H) $(RTL_H) $(BASIC_BLOCK_H) regs.h hard-reg-set.h \
flags.h insn-config.h insn-attr.h
final.o : final.c $(CONFIG_H) $(RTL_H) $(TREE_H) flags.h regs.h \
recog.h conditions.h insn-config.h insn-attr.h except.h real.h output.h \

22
gcc/configure vendored
View File

@ -4361,6 +4361,26 @@ if [ ! -f Makefile.in ]; then
echo "source ${srcdir}/.gdbinit" >> .gdbinit
fi
# Override SCHED_OBJ and SCHED_CFLAGS to enable the Haifa scheduler.
sched_prefix=
sched_cflags=
if [[ x$enable_haifa = xyes ]]; then
echo "Using the Haifa scheduler."
sched_prefix=haifa-
sched_cflags=-DHAIFA
fi
if [[ x$enable_haifa != x ]]; then
# Explicitly remove files that need to be recompiled for the Haifa scheduler.
for x in genattrtab.o toplev.o loop.o unroll.o *sched.o; do
if [ -f $x ]; then
echo "Removing $x"
rm -f $x
fi
done
fi
# Process the language and host/target makefile fragments.
${CONFIG_SHELL-/bin/sh} $srcdir/configure.frag $srcdir "$subdirs" "$dep_host_xmake_file" "$dep_tmake_file"
@ -4602,6 +4622,8 @@ s%@CC@%$CC%g
s%@SET_MAKE@%$SET_MAKE%g
s%@CPP@%$CPP%g
s%@manext@%$manext%g
s%@sched_prefix@%$sched_prefix%g
s%@sched_cflags@%$sched_cflags%g
s%@objext@%$objext%g
s%@subdirs@%$subdirs%g
s%@all_languages@%$all_languages%g

View File

@ -3027,6 +3027,26 @@ if [[ ! -f Makefile.in ]]; then
echo "source ${srcdir}/.gdbinit" >> .gdbinit
fi
# Override SCHED_OBJ and SCHED_CFLAGS to enable the Haifa scheduler.
sched_prefix=
sched_cflags=
if [[ x$enable_haifa = xyes ]]; then
echo "Using the Haifa scheduler."
sched_prefix=haifa-
sched_cflags=-DHAIFA
fi
AC_SUBST(sched_prefix)
AC_SUBST(sched_cflags)
if [[ x$enable_haifa != x ]]; then
# Explicitly remove files that need to be recompiled for the Haifa scheduler.
for x in genattrtab.o toplev.o loop.o unroll.o *sched.o; do
if [ -f $x ]; then
echo "Removing $x"
rm -f $x
fi
done
fi
# Process the language and host/target makefile fragments.
${CONFIG_SHELL-/bin/sh} $srcdir/configure.frag $srcdir "$subdirs" "$dep_host_xmake_file" "$dep_tmake_file"

View File

@ -304,6 +304,34 @@ extern int flag_shared_data;
extern int flag_schedule_insns;
extern int flag_schedule_insns_after_reload;
#ifdef HAIFA
/* The following flags have effect only for scheduling before register
allocation:
flag_schedule_interblock means schedule insns accross basic blocks.
flag_schedule_speculative means allow speculative motion of non-load insns.
flag_schedule_speculative_load means allow speculative motion of some
load insns.
flag_schedule_speculative_load_dangerous allows speculative motion of more
load insns.
flag_schedule_reverse_before_reload means try to reverse original order
of insns (S).
flag_schedule_reverse_after_reload means try to reverse original order
of insns (R). */
extern int flag_schedule_interblock;
extern int flag_schedule_speculative;
extern int flag_schedule_speculative_load;
extern int flag_schedule_speculative_load_dangerous;
extern int flag_schedule_reverse_before_reload;
extern int flag_schedule_reverse_after_reload;
/* flag_on_branch_count_reg means try to replace add-1,compare,branch tupple
by a cheaper branch, on a count register. */
extern int flag_branch_on_count_reg;
#endif /* HAIFA */
/* Nonzero means put things in delayed-branch slots if supported. */
extern int flag_delayed_branch;

View File

@ -268,7 +268,6 @@ static void find_auto_inc PROTO((regset, rtx, rtx));
static void mark_used_regs PROTO((regset, regset, rtx, int, rtx));
static int try_pre_increment_1 PROTO((rtx));
static int try_pre_increment PROTO((rtx, rtx, HOST_WIDE_INT));
static rtx find_use_as_address PROTO((rtx, rtx, HOST_WIDE_INT));
void dump_flow_info PROTO((FILE *));
/* Find basic blocks of the current function and perform data flow analysis.
@ -2795,7 +2794,7 @@ try_pre_increment (insn, reg, amount)
If REG appears more than once, or is used other than in such an address,
return (rtx)1. */
static rtx
rtx
find_use_as_address (x, reg, plusconst)
register rtx x;
rtx reg;

View File

@ -2003,6 +2003,9 @@ expand_units ()
for (op = unit->ops; op; op = op->next)
{
#ifdef HAIFA
rtx blockage = op->issue_exp;
#else
rtx blockage = operate_exp (POS_MINUS_OP, readycost,
make_numeric_value (1));
@ -2018,6 +2021,7 @@ expand_units ()
blockage);
blockage = operate_exp (MAX_OP, blockage, op->issue_exp);
#endif
blockage = simplify_knowing (blockage, unit->condexp);
/* Add this op's contribution to MAX (BLOCKAGE (E,*)) and

8713
gcc/haifa-sched.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -147,7 +147,7 @@ in the following sections.
-ffunction-sections -finline-functions
-fkeep-inline-functions -fno-default-inline
-fno-defer-pop -fno-function-cse
-fno-inline -fno-peephole -fomit-frame-pointer
-fno-inline -fno-peephole -fomit-frame-pointer -fregmove
-frerun-cse-after-loop -fschedule-insns
-fschedule-insns2 -fstrength-reduce -fthread-jumps
-funroll-all-loops -funroll-loops
@ -2195,6 +2195,12 @@ used in one place: in @file{reorg.c}, instead of guessing which path a
branch is mostly to take, the @samp{REG_BR_PROB} values are used to
exactly determine which path is taken more often.
@end ifset
@item -fregmove
Some machines only support 2 operands per instruction. On such
machines, GNU CC might have to do extra copies. The @samp{-fregmove}
option overrides the default for the machine to do the copy before
register allocation.
@end table
@node Preprocessor Options

View File

@ -81,6 +81,42 @@ static rtx *loop_number_loop_starts, *loop_number_loop_ends;
int *loop_outer_loop;
#ifdef HAIFA
/* The main output of analyze_loop_iterations is placed here */
int *loop_can_insert_bct;
/* For each loop, determines whether some of its inner loops has used
count register */
int *loop_used_count_register;
/* For each loop, remember its unrolling factor (if at all).
contents of the array:
0/1: not unrolled.
-1: completely unrolled - no further instrumentation is needed.
>1: holds the exact amount of unrolling. */
int *loop_unroll_factor;
int *loop_unroll_iter;
/* loop parameters for arithmetic loops. These loops have a loop variable
which is initialized to loop_start_value, incremented in each iteration
by "loop_increment". At the end of the iteration the loop variable is
compared to the loop_comparison_value (using loop_comparison_code). */
rtx *loop_increment;
rtx *loop_comparison_value;
rtx *loop_start_value;
enum rtx_code *loop_comparison_code;
/* for debugging: selects sub-range of loops for which the bct optimization
is invoked. The numbering is per compilation-unit. */
int dbg_bct_min = -1;
int dbg_bct_max = -1;
#endif /* HAIFA */
/* Indexed by loop number, contains a nonzero value if the "loop" isn't
really a loop (an insn outside the loop branches into it). */
@ -286,6 +322,32 @@ static int maybe_eliminate_biv_1 ();
static int last_use_this_basic_block ();
static void record_initial ();
static void update_reg_last_use ();
#ifdef HAIFA
/* This is extern from unroll.c */
void iteration_info ();
/* Two main functions for implementing bct:
first - to be called before loop unrolling, and the second - after */
static void analyze_loop_iterations ();
static void insert_bct ();
/* Auxiliary function that inserts the bct pattern into the loop */
static void instrument_loop_bct ();
/* Indirect_jump_in_function is computed once per function. */
int indirect_jump_in_function = 0;
static int indirect_jump_in_function_p ();
int loop_number ();
static int is_power_of_2();
static int is_conditional_branch ();
/* Debugging functions. */
int fix_bct_param ();
static int check_bct_param ();
#endif /* HAIFA */
/* Relative gain of eliminating various kinds of operations. */
int add_cost;
@ -379,6 +441,32 @@ loop_optimize (f, dumpfile)
loop_number_exit_labels = (rtx *) alloca (max_loop_num * sizeof (rtx));
loop_number_exit_count = (int *) alloca (max_loop_num * sizeof (int));
#ifdef HAIFA
/* Allocate for BCT optimization */
loop_can_insert_bct = (int *) alloca (max_loop_num * sizeof (int));
bzero ((char *) loop_can_insert_bct, max_loop_num * sizeof (int));
loop_used_count_register = (int *) alloca (max_loop_num * sizeof (int));
bzero ((char *) loop_used_count_register, max_loop_num * sizeof (int));
loop_unroll_factor = (int *) alloca (max_loop_num *sizeof (int));
bzero ((char *) loop_unroll_factor, max_loop_num * sizeof (int));
loop_unroll_iter = (int *) alloca (max_loop_num *sizeof (int));
bzero ((char *) loop_unroll_iter, max_loop_num * sizeof (int));
loop_increment = (rtx *) alloca (max_loop_num * sizeof (rtx));
loop_comparison_value = (rtx *) alloca (max_loop_num * sizeof (rtx));
loop_start_value = (rtx *) alloca (max_loop_num * sizeof (rtx));
bzero ((char *) loop_increment, max_loop_num * sizeof (rtx));
bzero ((char *) loop_comparison_value, max_loop_num * sizeof (rtx));
bzero ((char *) loop_start_value, max_loop_num * sizeof (rtx));
loop_comparison_code
= (enum rtx_code *) alloca (max_loop_num * sizeof (enum rtx_code));
bzero ((char *) loop_comparison_code, max_loop_num * sizeof (enum rtx_code));
#endif /* HAIFA */
/* Find and process each loop.
First, find them, and record them in order of their beginnings. */
find_and_verify_loops (f);
@ -430,6 +518,12 @@ loop_optimize (f, dumpfile)
if (flag_unroll_loops && write_symbols != NO_DEBUG)
find_loop_tree_blocks ();
#ifdef HAIFA
/* determine if the function has indirect jump. If it does,
we cannot instrument loops in this function with bct */
indirect_jump_in_function = indirect_jump_in_function_p (f);
#endif /* HAIFA */
/* Now scan the loops, last ones first, since this means inner ones are done
before outer ones. */
for (i = max_loop_num-1; i >= 0; i--)
@ -2639,6 +2733,11 @@ mark_loop_jump (x, loop_num)
if (loop_num != -1)
{
#ifdef HAIFA
LABEL_OUTSIDE_LOOP_P (x) = 1;
LABEL_NEXTREF (x) = loop_number_exit_labels[loop_num];
#endif /* HAIFA */
loop_number_exit_labels[loop_num] = x;
for (outer_loop = loop_num; outer_loop != -1;
@ -3755,6 +3854,16 @@ strength_reduce (scan_start, end, loop_top, insn_count,
so that "decrement and branch until zero" insn can be used. */
check_dbra_loop (loop_end, insn_count, loop_start);
#ifdef HAIFA
/* record loop-variables relevant for BCT optimization before unrolling
the loop. Unrolling may update part of this information, and the
correct data will be used for generating the BCT. */
#ifdef HAVE_decrement_and_branch_on_count
if (HAVE_decrement_and_branch_on_count)
analyze_loop_iterations (loop_start, loop_end);
#endif
#endif /* HAIFA */
/* Create reg_map to hold substitutions for replaceable giv regs. */
reg_map = (rtx *) alloca (max_reg_before_loop * sizeof (rtx));
bzero ((char *) reg_map, max_reg_before_loop * sizeof (rtx));
@ -4247,6 +4356,14 @@ strength_reduce (scan_start, end, loop_top, insn_count,
if (flag_unroll_loops)
unroll_loop (loop_end, insn_count, loop_start, end_insert_before, 1);
#ifdef HAIFA
/* instrument the loop with bct insn */
#ifdef HAVE_decrement_and_branch_on_count
if (HAVE_decrement_and_branch_on_count)
insert_bct (loop_start, loop_end);
#endif
#endif /* HAIFA */
if (loop_dump_stream)
fprintf (loop_dump_stream, "\n");
}
@ -6932,3 +7049,638 @@ get_condition_for_loop (x)
return gen_rtx (swap_condition (GET_CODE (comparison)), VOIDmode,
XEXP (comparison, 1), XEXP (comparison, 0));
}
#ifdef HAIFA
/* Analyze a loop in order to instrument it with the use of count register.
loop_start and loop_end are the first and last insns of the loop.
This function works in cooperation with insert_bct ().
loop_can_insert_bct[loop_num] is set according to whether the optimization
is applicable to the loop. When it is applicable, the following variables
are also set:
loop_start_value[loop_num]
loop_comparison_value[loop_num]
loop_increment[loop_num]
loop_comparison_code[loop_num] */
static
void analyze_loop_iterations (loop_start, loop_end)
rtx loop_start, loop_end;
{
rtx comparison, comparison_value;
rtx iteration_var, initial_value, increment;
enum rtx_code comparison_code;
rtx last_loop_insn;
rtx insn;
int i;
/* loop_variable mode */
enum machine_mode original_mode;
/* find the number of the loop */
int loop_num = loop_number (loop_start, loop_end);
/* we change our mind only when we are sure that loop will be instrumented */
loop_can_insert_bct[loop_num] = 0;
/* debugging: do we wish to instrument this loop? */
if ( !check_bct_param () )
return;
/* is the optimization suppressed. */
if ( !flag_branch_on_count_reg )
return;
/* make sure that count-reg is not in use */
if (loop_used_count_register[loop_num]){
if (loop_dump_stream)
fprintf (loop_dump_stream,
"analyze_loop_iterations %d: BCT instrumentation failed: count register already in use\n",
loop_num);
return;
}
/* make sure that the function has no indirect jumps. */
if (indirect_jump_in_function){
if (loop_dump_stream)
fprintf (loop_dump_stream,
"analyze_loop_iterations %d: BCT instrumentation failed: indirect jump in function\n",
loop_num);
return;
}
/* make sure that the last loop insn is a conditional jump */
last_loop_insn = PREV_INSN (loop_end);
if (!is_conditional_branch (last_loop_insn)) {
if (loop_dump_stream)
fprintf (loop_dump_stream,
"analyze_loop_iterations %d: BCT instrumentation failed: invalid jump at loop end\n",
loop_num);
return;
}
/* First find the iteration variable. If the last insn is a conditional
branch, and the insn preceding it tests a register value, make that
register the iteration variable. */
/* We used to use prev_nonnote_insn here, but that fails because it might
accidentally get the branch for a contained loop if the branch for this
loop was deleted. We can only trust branches immediately before the
loop_end. */
comparison = get_condition_for_loop (last_loop_insn);
/* ??? Get_condition may switch position of induction variable and
invariant register when it canonicalizes the comparison. */
if (comparison == 0) {
if (loop_dump_stream)
fprintf (loop_dump_stream,
"analyze_loop_iterations %d: BCT instrumentation failed: comparison not found\n",
loop_num);
return;
}
comparison_code = GET_CODE (comparison);
iteration_var = XEXP (comparison, 0);
comparison_value = XEXP (comparison, 1);
original_mode = GET_MODE (iteration_var);
if (GET_MODE_CLASS (original_mode) != MODE_INT
|| GET_MODE_SIZE (original_mode) != UNITS_PER_WORD) {
if (loop_dump_stream)
fprintf (loop_dump_stream,
"analyze_loop_iterations %d: BCT Instrumentation failed: loop variable not integer\n",
loop_num);
return;
}
/* get info about loop bounds and increment */
iteration_info (iteration_var, &initial_value, &increment,
loop_start, loop_end);
/* make sure that all required loop data were found */
if (!(initial_value && increment && comparison_value
&& invariant_p (comparison_value) && invariant_p (increment)
&& ! indirect_jump_in_function))
{
if (loop_dump_stream) {
fprintf (loop_dump_stream,
"analyze_loop_iterations %d: BCT instrumentation failed because of wrong loop: ", loop_num);
if (!(initial_value && increment && comparison_value)) {
fprintf (loop_dump_stream, "\tbounds not available: ");
if ( ! initial_value )
fprintf (loop_dump_stream, "initial ");
if ( ! increment )
fprintf (loop_dump_stream, "increment ");
if ( ! comparison_value )
fprintf (loop_dump_stream, "comparison ");
fprintf (loop_dump_stream, "\n");
}
if (!invariant_p (comparison_value) || !invariant_p (increment))
fprintf (loop_dump_stream, "\tloop bounds not invariant\n");
}
return;
}
/* make sure that the increment is constant */
if (GET_CODE (increment) != CONST_INT) {
if (loop_dump_stream)
fprintf (loop_dump_stream,
"analyze_loop_iterations %d: instrumentation failed: not arithmetic loop\n",
loop_num);
return;
}
/* make sure that the loop contains neither function call, nor jump on table.
(the count register might be altered by the called function, and might
be used for a branch on table). */
for (insn = loop_start; insn && insn != loop_end; insn = NEXT_INSN (insn)) {
if (GET_CODE (insn) == CALL_INSN){
if (loop_dump_stream)
fprintf (loop_dump_stream,
"analyze_loop_iterations %d: BCT instrumentation failed: function call in the loop\n",
loop_num);
return;
}
if (GET_CODE (insn) == JUMP_INSN
&& (GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
|| GET_CODE (PATTERN (insn)) == ADDR_VEC)){
if (loop_dump_stream)
fprintf (loop_dump_stream,
"analyze_loop_iterations %d: BCT instrumentation failed: computed branch in the loop\n",
loop_num);
return;
}
}
/* At this point, we are sure that the loop can be instrumented with BCT.
Some of the loops, however, will not be instrumented - the final decision
is taken by insert_bct () */
if (loop_dump_stream)
fprintf (loop_dump_stream,
"analyze_loop_iterations: loop (luid =%d) can be BCT instrumented.\n",
loop_num);
/* mark all enclosing loops that they cannot use count register */
/* ???: In fact, since insert_bct may decide not to instrument this loop,
marking here may prevent instrumenting an enclosing loop that could
actually be instrumented. But since this is rare, it is safer to mark
here in case the order of calling (analyze/insert)_bct would be changed. */
for (i=loop_num; i != -1; i = loop_outer_loop[i])
loop_used_count_register[i] = 1;
/* Set data structures which will be used by the instrumentation phase */
loop_start_value[loop_num] = initial_value;
loop_comparison_value[loop_num] = comparison_value;
loop_increment[loop_num] = increment;
loop_comparison_code[loop_num] = comparison_code;
loop_can_insert_bct[loop_num] = 1;
}
/* instrument loop for insertion of bct instruction. We distinguish between
loops with compile-time bounds, to those with run-time bounds. The loop
behaviour is analized according to the following characteristics/variables:
; Input variables:
; comparison-value: the value to which the iteration counter is compared.
; initial-value: iteration-counter initial value.
; increment: iteration-counter increment.
; Computed variables:
; increment-direction: the sign of the increment.
; compare-direction: '1' for GT, GTE, '-1' for LT, LTE, '0' for NE.
; range-direction: sign (comparison-value - initial-value)
We give up on the following cases:
; loop variable overflow.
; run-time loop bounds with comparison code NE.
*/
static void
insert_bct (loop_start, loop_end)
rtx loop_start, loop_end;
{
rtx initial_value, comparison_value, increment;
enum rtx_code comparison_code;
int increment_direction, compare_direction;
int unsigned_p = 0;
/* if the loop condition is <= or >=, the number of iteration
is 1 more than the range of the bounds of the loop */
int add_iteration = 0;
/* the only machine mode we work with - is the integer of the size that the
machine has */
enum machine_mode loop_var_mode = SImode;
int loop_num = loop_number (loop_start, loop_end);
/* get loop-variables. No need to check that these are valid - already
checked in analyze_loop_iterations (). */
comparison_code = loop_comparison_code[loop_num];
initial_value = loop_start_value[loop_num];
comparison_value = loop_comparison_value[loop_num];
increment = loop_increment[loop_num];
/* check analyze_loop_iterations decision for this loop. */
if (! loop_can_insert_bct[loop_num]){
if (loop_dump_stream)
fprintf (loop_dump_stream,
"insert_bct: [%d] - was decided not to instrument by analyze_loop_iterations ()\n",
loop_num);
return;
}
/* make sure that the loop was not fully unrolled. */
if (loop_unroll_factor[loop_num] == -1){
if (loop_dump_stream)
fprintf (loop_dump_stream, "insert_bct %d: was completely unrolled\n", loop_num);
return;
}
/* make sure that the last loop insn is a conditional jump .
This check is repeated from analyze_loop_iterations (),
because unrolling might have changed that. */
if (!is_conditional_branch (PREV_INSN (loop_end))){
if (loop_dump_stream)
fprintf (loop_dump_stream,
"insert_bct: not instrumenting BCT because of invalid branch\n");
return;
}
/* fix increment in case loop was unrolled. */
if (loop_unroll_factor[loop_num] > 1)
increment = GEN_INT ( INTVAL (increment) * loop_unroll_factor[loop_num] );
/* determine properties and directions of the loop */
increment_direction = (INTVAL (increment) > 0) ? 1:-1;
switch ( comparison_code ) {
case LEU:
unsigned_p = 1;
/* fallthrough */
case LE:
compare_direction = 1;
add_iteration = 1;
break;
case GEU:
unsigned_p = 1;
/* fallthrough */
case GE:
compare_direction = -1;
add_iteration = 1;
break;
case EQ:
/* in this case we cannot know the number of iterations */
if (loop_dump_stream)
fprintf (loop_dump_stream,
"insert_bct: %d: loop cannot be instrumented: == in condition\n",
loop_num);
return;
case LTU:
unsigned_p = 1;
/* fallthrough */
case LT:
compare_direction = 1;
break;
case GTU:
unsigned_p = 1;
/* fallthrough */
case GT:
compare_direction = -1;
break;
case NE:
compare_direction = 0;
break;
default:
abort ();
}
/* make sure that the loop does not end by an overflow */
if (compare_direction != increment_direction) {
if (loop_dump_stream)
fprintf (loop_dump_stream,
"insert_bct: %d: loop cannot be instrumented: terminated by overflow\n",
loop_num);
return;
}
/* try to instrument the loop. */
/* Handle the simpler case, where the bounds are known at compile time. */
if (GET_CODE (initial_value) == CONST_INT && GET_CODE (comparison_value) == CONST_INT)
{
int n_iterations;
int increment_value_abs = INTVAL (increment) * increment_direction;
/* check the relation between compare-val and initial-val */
int difference = INTVAL (comparison_value) - INTVAL (initial_value);
int range_direction = (difference > 0) ? 1 : -1;
/* make sure the loop executes enough iterations to gain from BCT */
if (difference > -3 && difference < 3) {
if (loop_dump_stream)
fprintf (loop_dump_stream,
"insert_bct: loop %d not BCT instrumented: too small iteration count.\n",
loop_num);
return;
}
/* make sure that the loop executes at least once */
if ((range_direction == 1 && compare_direction == -1)
|| (range_direction == -1 && compare_direction == 1))
{
if (loop_dump_stream)
fprintf (loop_dump_stream,
"insert_bct: loop %d: does not iterate even once. Not instrumenting.\n",
loop_num);
return;
}
/* make sure that the loop does not end by an overflow (in compile time
bounds we must have an additional check for overflow, because here
we also support the compare code of 'NE'. */
if (comparison_code == NE
&& increment_direction != range_direction) {
if (loop_dump_stream)
fprintf (loop_dump_stream,
"insert_bct (compile time bounds): %d: loop not instrumented: terminated by overflow\n",
loop_num);
return;
}
/* Determine the number of iterations by:
;
; compare-val - initial-val + (increment -1) + additional-iteration
; num_iterations = -----------------------------------------------------------------
; increment
*/
difference = (range_direction > 0) ? difference : -difference;
#if 0
fprintf (stderr, "difference is: %d\n", difference); /* @*/
fprintf (stderr, "increment_value_abs is: %d\n", increment_value_abs); /* @*/
fprintf (stderr, "add_iteration is: %d\n", add_iteration); /* @*/
fprintf (stderr, "INTVAL (comparison_value) is: %d\n", INTVAL (comparison_value)); /* @*/
fprintf (stderr, "INTVAL (initial_value) is: %d\n", INTVAL (initial_value)); /* @*/
#endif
if (increment_value_abs == 0) {
fprintf (stderr, "insert_bct: error: increment == 0 !!!\n");
abort ();
}
n_iterations = (difference + increment_value_abs - 1 + add_iteration)
/ increment_value_abs;
#if 0
fprintf (stderr, "number of iterations is: %d\n", n_iterations); /* @*/
#endif
instrument_loop_bct (loop_start, loop_end, GEN_INT (n_iterations));
/* Done with this loop. */
return;
}
/* Handle the more complex case, that the bounds are NOT known at compile time. */
/* In this case we generate run_time calculation of the number of iterations */
/* With runtime bounds, if the compare is of the form '!=' we give up */
if (comparison_code == NE) {
if (loop_dump_stream)
fprintf (loop_dump_stream,
"insert_bct: fail for loop %d: runtime bounds with != comparison\n",
loop_num);
return;
}
else {
/* We rely on the existence of run-time guard to ensure that the
loop executes at least once. */
rtx sequence;
rtx iterations_num_reg;
int increment_value_abs = INTVAL (increment) * increment_direction;
/* make sure that the increment is a power of two, otherwise (an
expensive) divide is needed. */
if ( !is_power_of_2(increment_value_abs) )
{
if (loop_dump_stream)
fprintf (loop_dump_stream,
"insert_bct: not instrumenting BCT because the increment is not power of 2\n");
return;
}
/* compute the number of iterations */
start_sequence ();
{
/* CYGNUS LOCAL: HAIFA bug fix */
rtx temp_reg;
/* Again, the number of iterations is calculated by:
;
; compare-val - initial-val + (increment -1) + additional-iteration
; num_iterations = -----------------------------------------------------------------
; increment
*/
/* ??? Do we have to call copy_rtx here before passing rtx to
expand_binop? */
if (compare_direction > 0) {
/* <, <= :the loop variable is increasing */
temp_reg = expand_binop (loop_var_mode, sub_optab, comparison_value,
initial_value, NULL_RTX, 0, OPTAB_LIB_WIDEN);
}
else {
temp_reg = expand_binop (loop_var_mode, sub_optab, initial_value,
comparison_value, NULL_RTX, 0, OPTAB_LIB_WIDEN);
}
if (increment_value_abs - 1 + add_iteration != 0)
temp_reg = expand_binop (loop_var_mode, add_optab, temp_reg,
GEN_INT (increment_value_abs - 1 + add_iteration),
NULL_RTX, 0, OPTAB_LIB_WIDEN);
if (increment_value_abs != 1)
{
/* ??? This will generate an expensive divide instruction for
most targets. The original authors apparently expected this
to be a shift, since they test for power-of-2 divisors above,
but just naively generating a divide instruction will not give
a shift. It happens to work for the PowerPC target because
the rs6000.md file has a divide pattern that emits shifts.
It will probably not work for any other target. */
iterations_num_reg = expand_binop (loop_var_mode, sdiv_optab,
temp_reg,
GEN_INT (increment_value_abs),
NULL_RTX, 0, OPTAB_LIB_WIDEN);
}
else
iterations_num_reg = temp_reg;
/* END CYGNUS LOCAL: HAIFA bug fix */
}
sequence = gen_sequence ();
end_sequence ();
emit_insn_before (sequence, loop_start);
instrument_loop_bct (loop_start, loop_end, iterations_num_reg);
}
}
/* instrument loop by inserting a bct in it. This is done in the following way:
1. A new register is created and assigned the hard register number of the count
register.
2. In the head of the loop the new variable is initialized by the value passed in the
loop_num_iterations parameter.
3. At the end of the loop, comparison of the register with 0 is generated.
The created comparison follows the pattern defined for the
decrement_and_branch_on_count insn, so this insn will be generated in assembly
generation phase.
4. The compare&branch on the old variable is deleted. So, if the loop-variable was
not used elsewhere, it will be eliminated by data-flow analisys. */
static void
instrument_loop_bct (loop_start, loop_end, loop_num_iterations)
rtx loop_start, loop_end;
rtx loop_num_iterations;
{
rtx temp_reg1, temp_reg2;
rtx start_label;
rtx sequence;
enum machine_mode loop_var_mode = SImode;
#ifdef HAVE_decrement_and_branch_on_count
if (HAVE_decrement_and_branch_on_count)
{
if (loop_dump_stream)
fprintf (loop_dump_stream, "Loop: Inserting BCT\n");
/* eliminate the check on the old variable */
delete_insn (PREV_INSN (loop_end));
delete_insn (PREV_INSN (loop_end));
/* insert the label which will delimit the start of the loop */
start_label = gen_label_rtx ();
emit_label_after (start_label, loop_start);
/* insert initialization of the count register into the loop header */
start_sequence ();
temp_reg1 = gen_reg_rtx (loop_var_mode);
emit_insn (gen_move_insn (temp_reg1, loop_num_iterations));
/* this will be count register */
temp_reg2 = gen_rtx (REG, loop_var_mode, COUNT_REGISTER_REGNUM);
/* we have to move the value to the count register from an GPR
because rtx pointed to by loop_num_iterations could contain
expression which cannot be moved into count register */
emit_insn (gen_move_insn (temp_reg2, temp_reg1));
sequence = gen_sequence ();
end_sequence ();
emit_insn_after (sequence, loop_start);
/* insert new comparison on the count register instead of the
old one, generating the needed BCT pattern (that will be
later recognized by assembly generation phase). */
emit_jump_insn_before (gen_decrement_and_branch_on_count (temp_reg2, start_label),
loop_end);
LABEL_NUSES (start_label)++;
}
#endif /* HAVE_decrement_and_branch_on_count */
}
/* calculate the uid of the given loop */
int
loop_number (loop_start, loop_end)
rtx loop_start, loop_end;
{
int loop_num = -1;
/* assume that this insn contains the LOOP_START
note, so it will not be changed by the loop unrolling */
loop_num = uid_loop_num[INSN_UID (loop_start)];
/* sanity check - should never happen */
if (loop_num == -1)
abort ();
return loop_num;
}
/* scan the function and determine whether it has indirect (computed) jump */
static int
indirect_jump_in_function_p (start)
rtx start;
{
rtx insn;
int is_indirect_jump = 0;
for (insn = start; insn; insn = NEXT_INSN (insn)) {
if (GET_CODE (insn) == JUMP_INSN) {
if (GET_CODE (PATTERN (insn)) == SET) {
rtx insn_work_code = XEXP (PATTERN (insn), 1);
if (GET_CODE (insn_work_code) == LABEL_REF)
continue;
if (GET_CODE (insn_work_code) == IF_THEN_ELSE) {
rtx jump_target = XEXP (insn_work_code, 1);
if (jump_target == pc_rtx
|| (GET_CODE (jump_target) == (enum rtx_code)LABEL_REF))
continue;
}
}
is_indirect_jump = 1;
}
}
return is_indirect_jump;
}
/* return 1 iff n is a power of 2 */
static int
is_power_of_2(n)
int n;
{
return (n & (n-1)) == 0;
}
/* return 1 iff insn is a conditional jump */
is_conditional_branch (insn)
rtx insn;
{
rtx work_code;
if (GET_CODE (insn) != JUMP_INSN)
return 0;
work_code = PATTERN (insn);
if (GET_CODE (work_code) != SET)
return 0;
if (GET_CODE (XEXP (work_code, 1)) != IF_THEN_ELSE)
return 0;
return 1;
}
/* debugging: fix_bct_param () is called from toplev.c upon detection
of the -fbct-***-N options. */
int
fix_bct_param (param, val)
char *param, *val;
{
if ( !strcmp (param, "max") )
dbg_bct_max = atoi (val);
else if ( !strcmp (param, "min") )
dbg_bct_min = atoi (val);
}
/* debugging: return 1 if the loop should be instrumented,
according to bct-min/max. */
static int
check_bct_param ()
{
static int dbg_bct_num = 0;
dbg_bct_num++;
if (dbg_bct_num > dbg_bct_min || dbg_bct_min == -1)
if (dbg_bct_num <= dbg_bct_max || dbg_bct_max == -1)
return 1;
return 0;
}
#endif /* HAIFA */
/* END CYGNUS LOCAL haifa */

View File

@ -181,3 +181,13 @@ rtx final_biv_value PROTO((struct iv_class *, rtx, rtx));
rtx final_giv_value PROTO((struct induction *, rtx, rtx));
void emit_unrolled_add PROTO((rtx, rtx, rtx));
int back_branch_in_range_p PROTO((rtx, rtx, rtx));
#ifdef HAIFA
/* variables for interaction between unroll.c and loop.c, for
the insertion of branch-on-count instruction. */
extern int *loop_unroll_factor;
extern rtx *loop_start_value;
extern int *loop_unroll_iter;
extern int loop_number();
#endif /* HAIFA */

983
gcc/regmove.c Normal file
View File

@ -0,0 +1,983 @@
/* Move registers around to reduce number of move instructions needed.
Copyright (C) 1987, 88, 89, 92-5, 1996, 1997 Free Software Foundation, Inc.
This file is part of GNU CC.
GNU CC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU CC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU CC; see the file COPYING. If not, write to
the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
/* This module looks for cases where matching constraints would force
an instruction to need a reload, and this reload would be a register
to register move. It then attempts to change the registers used by the
instruction to avoid the move instruction. */
#include "config.h"
#ifdef __STDC__
#include <stdarg.h>
#else
#include <varargs.h>
#endif
/* Must precede rtl.h for FFS. */
#include <stdio.h>
#include "rtl.h"
#include "insn-config.h"
#include "recog.h"
#include "output.h"
#include "reload.h"
#include "regs.h"
static int stable_but_for_p PROTO((rtx, rtx, rtx));
#if defined (HAVE_POST_INCREMENT) || defined (HAVE_POST_DECREMENT) \
|| defined (HAVE_PRE_INCREMENT) || defined (HAVE_PRE_DECREMENT)
/* INC_INSN is an instruction that adds INCREMENT to REG.
Try to fold INC_INSN as a post/pre in/decrement into INSN.
Iff INC_INSN_SET is nonzero, inc_insn has a destination different from src.
Return nonzero for success. */
static int
try_auto_increment (insn, inc_insn, inc_insn_set, reg, increment, pre)
rtx reg, insn, inc_insn ,inc_insn_set;
HOST_WIDE_INT increment;
int pre;
{
enum rtx_code inc_code;
rtx pset = single_set (insn);
if (pset)
{
/* Can't use the size of SET_SRC, we might have something like
(sign_extend:SI (mem:QI ... */
rtx use = find_use_as_address (pset, reg, 0);
if (use != 0 && use != (rtx) 1)
{
int size = GET_MODE_SIZE (GET_MODE (use));
if (0
#ifdef HAVE_POST_INCREMENT
|| (pre == 0 && (inc_code = POST_INC, increment == size))
#endif
#ifdef HAVE_PRE_INCREMENT
|| (pre == 1 && (inc_code = PRE_INC, increment == size))
#endif
#ifdef HAVE_POST_DECREMENT
|| (pre == 0 && (inc_code = POST_DEC, increment == -size))
#endif
#ifdef HAVE_PRE_DECREMENT
|| (pre == 1 && (inc_code = PRE_DEC, increment == -size))
#endif
)
{
if (inc_insn_set)
validate_change
(inc_insn,
&SET_SRC (inc_insn_set),
XEXP (SET_SRC (inc_insn_set), 0), 1);
validate_change (insn, &XEXP (use, 0),
gen_rtx (inc_code,
Pmode,
reg), 1);
if (apply_change_group ())
{
REG_NOTES (insn)
= gen_rtx (EXPR_LIST, REG_INC,
reg, REG_NOTES (insn));
if (! inc_insn_set)
{
PUT_CODE (inc_insn, NOTE);
NOTE_LINE_NUMBER (inc_insn) = NOTE_INSN_DELETED;
NOTE_SOURCE_FILE (inc_insn) = 0;
}
return 1;
}
}
}
}
return 0;
}
#endif /* defined (HAVE_POST_INCREMENT) || defined (HAVE_POST_DECREMENT) */
void
regmove_optimize (f, nregs, regmove_dump_file)
rtx f;
int nregs;
FILE *regmove_dump_file;
{
#ifdef REGISTER_CONSTRAINTS
rtx insn;
int matches[MAX_RECOG_OPERANDS][MAX_RECOG_OPERANDS];
int modified[MAX_RECOG_OPERANDS];
int early_clobber[MAX_RECOG_OPERANDS];
int commutative;
int pass;
/* A forward/backward pass. Replace output operands with input operands. */
for (pass = 0; pass < 2; pass++)
{
if (regmove_dump_file)
fprintf (regmove_dump_file, "Starting %s pass...\n",
pass ? "backward" : "forward");
for (insn = pass ? get_last_insn () : f; insn;
insn = pass ? PREV_INSN (insn) : NEXT_INSN (insn))
{
if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
{
int insn_code_number = recog_memoized (insn);
int operand_number, match_number;
if (insn_code_number < 0)
continue;
insn_extract (insn);
if (! constrain_operands (insn_code_number, 0))
continue;
commutative = -1;
/* Must initialize this before the loop, because the code for
the commutative case may set matches for operands other than
the current one. */
bzero (matches, sizeof (matches));
for (operand_number = 0;
operand_number < insn_n_operands[insn_code_number];
operand_number++)
{
int output_operand = 0;
int matching_operand = operand_number;
char *p, c;
int i = 0;
modified[operand_number] = 0;
early_clobber[operand_number] = 0;
p = insn_operand_constraint[insn_code_number][operand_number];
if (*p == '=')
modified[operand_number] = 2;
else if (*p == '+')
modified[operand_number] = 1;
for (;*p && i < which_alternative; p++)
if (*p == ',')
i++;
while ((c = *p++) != '\0' && c != ',')
switch (c)
{
case '=':
break;
case '+':
break;
case '&':
early_clobber[operand_number] = 1;
break;
case '%':
commutative = operand_number;
break;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
c -= '0';
matches[operand_number][c] = 1;
if (commutative >= 0)
{
if (c == commutative || c == commutative + 1)
{
int other = c + (c == commutative ? 1 : -1);
matches[operand_number][other] = 1;
}
if (operand_number == commutative
|| operand_number == commutative + 1)
{
int other = (operand_number
+ (operand_number == commutative
? 1 : -1));
matches[other][c] = 1;
}
}
break;
}
}
/* Now scan through the operands looking for a source operand
which is supposed to match the destination operand.
Then scan forward for an instruction which uses the dest
operand.
If it dies there, then replace the dest in both operands with
the source operand. */
for (operand_number = 0;
operand_number < insn_n_operands[insn_code_number];
operand_number++)
{
for (match_number = 0;
match_number < insn_n_operands[insn_code_number];
match_number++)
{
rtx set, p, src, dst, src_subreg;
rtx post_inc = 0, post_inc_set = 0, search_end = 0;
rtx src_note, dst_note;
int success = 0;
int num_calls = 0;
enum rtx_code code = NOTE;
HOST_WIDE_INT insn_const, newconst;
rtx overlap = 0; /* need to move insn ? */
/* Nothing to do if the two operands aren't supposed to
match. */
if (matches[operand_number][match_number] == 0)
continue;
src = recog_operand[operand_number];
dst = recog_operand[match_number];
if (GET_CODE (src) != REG
|| REGNO (src) < FIRST_PSEUDO_REGISTER)
continue;
src_subreg = src;
if (GET_CODE (dst) == SUBREG
&& GET_MODE_SIZE (GET_MODE (dst))
>= GET_MODE_SIZE (GET_MODE (SUBREG_REG (dst))))
{
src_subreg
= gen_rtx(SUBREG, GET_MODE (SUBREG_REG (dst)),
src, SUBREG_WORD (dst));
dst = SUBREG_REG (dst);
}
if (GET_CODE (dst) != REG
|| REGNO (dst) < FIRST_PSEUDO_REGISTER)
continue;
/* If the operands already match, then there is nothing
to do. */
if (operands_match_p (src, dst))
continue;
set = single_set (insn);
if (! set)
continue;
/* operand_number/src must be a read-only operand, and
match_operand/dst must be a write-only operand. */
if (modified[match_number] != 2)
continue;
if (early_clobber[match_number] == 1)
continue;
if (modified[operand_number] != 0)
continue;
/* Make sure match_operand is the destination. */
if (recog_operand[match_number] != SET_DEST (set))
continue;
src_note = find_reg_note (insn, REG_DEAD, src);
if (! src_note)
{
/* Look for (set (regX) (op regA constX))
(set (regY) (op regA constY))
and change that to
(set (regA) (op regA constX)).
(set (regY) (op regA constY-constX)).
This works for add and shift operations, if
regA is dead after or set by the second insn. */
code = GET_CODE (SET_SRC (set));
if ((code == PLUS || code == LSHIFTRT
|| code == ASHIFT || code == ASHIFTRT)
&& XEXP (SET_SRC (set), 0) == src
&& (GET_CODE (XEXP (SET_SRC (set), 1))
== CONST_INT))
insn_const = INTVAL (XEXP (SET_SRC (set), 1));
else if (! stable_but_for_p (SET_SRC (set), src, dst))
continue;
else
/* We might find a src_note while scanning. */
code = NOTE;
}
if (regmove_dump_file)
fprintf (regmove_dump_file,
"Could fix operand %d of insn %d matching operand %d.\n",
operand_number, INSN_UID (insn), match_number);
/* ??? If src is set once, and is set equal to a
constant, then do not use it for this optimization,
as this would make it no longer equivalent to a
constant? */
/* Scan forward to find the next instruction that
uses the output operand. If the operand dies here,
then replace it in both instructions with
operand_number. */
for (p = NEXT_INSN (insn); p; p = NEXT_INSN (p))
{
if (GET_CODE (p) == CODE_LABEL
|| GET_CODE (p) == JUMP_INSN
|| (GET_CODE (p) == NOTE
&& ((NOTE_LINE_NUMBER (p)
== NOTE_INSN_LOOP_BEG)
|| (NOTE_LINE_NUMBER (p)
== NOTE_INSN_LOOP_END))))
break;
if (GET_RTX_CLASS (GET_CODE (p)) != 'i')
continue;
if (reg_set_p (src, p) || reg_set_p (dst, p)
|| (GET_CODE (PATTERN (p)) == USE
&& reg_overlap_mentioned_p (src,
XEXP (PATTERN (p),
0))))
break;
/* See if all of DST dies in P. This test is
slightly more conservative than it needs to be. */
if ((dst_note
= find_regno_note (p, REG_DEAD, REGNO (dst)))
&& (GET_MODE (XEXP (dst_note, 0))
== GET_MODE (dst)))
{
if (! src_note)
{
rtx q;
rtx set2;
/* If an optimization is done, the value
of SRC while P is executed will be
changed. Check that this is OK. */
if (reg_overlap_mentioned_p (src,
PATTERN (p)))
break;
for (q = p; q; q = NEXT_INSN (q))
{
if (GET_CODE (q) == CODE_LABEL
|| GET_CODE (q) == JUMP_INSN
|| (GET_CODE (q) == NOTE
&& ((NOTE_LINE_NUMBER (q)
== NOTE_INSN_LOOP_BEG)
|| (NOTE_LINE_NUMBER (q)
== NOTE_INSN_LOOP_END))))
{
q = 0;
break;
}
if (GET_RTX_CLASS (GET_CODE (q)) != 'i')
continue;
if (reg_overlap_mentioned_p (src,
PATTERN (q))
|| reg_set_p (src, q))
break;
}
if (q)
set2 = single_set (q);
if (! q || ! set2
|| GET_CODE (SET_SRC (set2)) != code
|| XEXP (SET_SRC (set2), 0) != src
|| (GET_CODE (XEXP (SET_SRC (set2), 1))
!= CONST_INT)
|| (SET_DEST (set2) != src
&& !find_reg_note (q, REG_DEAD, src)))
{
/* If this is a PLUS, we can still save
a register by doing
src += insn_const;
P;
src -= insn_const; .
This also gives opportunities for
subsequent optimizations in the
backward pass, so do it there. */
if (code == PLUS && pass == 1
#ifdef HAVE_cc0
/* We man not emit an insn directly
after P if the latter sets CC0. */
&& ! sets_cc0_p (PATTERN (p))
#endif
)
{
search_end = q;
q = insn;
set2 = set;
newconst = -insn_const;
code = MINUS;
}
else
break;
}
else
{
newconst
= (INTVAL (XEXP (SET_SRC (set2), 1))
- insn_const);
/* Reject out of range shifts. */
if (code != PLUS
&& (newconst < 0
|| (newconst
>= GET_MODE_BITSIZE (GET_MODE (SET_SRC (set2))))))
break;
if (code == PLUS)
{
post_inc = q;
if (SET_DEST (set2) != src)
post_inc_set = set2;
}
}
/* We use 1 as last argument to
validate_change so that all changes
are accepted or rejected together by
apply_change_group when it is called
by validate_replace_rtx . */
validate_change (q, &XEXP (SET_SRC (set2), 1),
GEN_INT (newconst), 1);
}
validate_change (insn,
recog_operand_loc[match_number],
src, 1);
if (validate_replace_rtx (dst, src_subreg, p))
success = 1;
break;
}
if (reg_overlap_mentioned_p (dst, PATTERN (p)))
break;
if (! src_note
&& reg_overlap_mentioned_p (src, PATTERN (p)))
{
/* INSN was already checked to be movable when
we found no REG_DEAD note for src on it. */
overlap = p;
src_note = find_reg_note (p, REG_DEAD, src);
}
/* If we have passed a call instruction, and the
pseudo-reg SRC is not already live across a call,
then don't perform the optimization. */
if (GET_CODE (p) == CALL_INSN)
{
num_calls++;
if (REG_N_CALLS_CROSSED (REGNO (src)) == 0)
break;
}
}
if (success)
{
/* Remove the death note for DST from P. */
remove_note (p, dst_note);
if (code == MINUS)
{
post_inc
= emit_insn_after (copy_rtx (PATTERN (insn)),
p);
#if defined (HAVE_PRE_INCREMENT) || defined (HAVE_PRE_DECREMENT)
if (search_end
&& try_auto_increment (search_end, post_inc,
0, src, newconst, 1))
post_inc = 0;
#endif
validate_change (insn, &XEXP (SET_SRC (set), 1),
GEN_INT (insn_const), 0);
REG_N_SETS (REGNO (src))++;
}
if (overlap)
{
/* The lifetime of src and dest overlap,
but we can change this by moving insn. */
rtx pat = PATTERN (insn);
if (src_note)
remove_note (overlap, src_note);
#if defined (HAVE_POST_INCREMENT) || defined (HAVE_POST_DECREMENT)
if (code == PLUS
&& try_auto_increment (overlap, insn, 0,
src, insn_const, 0))
insn = overlap;
else
#endif
{
emit_insn_after_with_line_notes
(pat, PREV_INSN (p), insn);
PUT_CODE (insn, NOTE);
NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
NOTE_SOURCE_FILE (insn) = 0;
/* emit_insn_after_with_line_notes
has no return value, so search
for the new insn. */
for (insn = p; PATTERN (insn) != pat; )
insn = PREV_INSN (insn);
}
}
/* Sometimes we'd generate src = const; src += n;
if so, replace the instruction that set src
in the first place. */
if (! overlap && (code == PLUS || code == MINUS))
{
rtx note
= find_reg_note (insn, REG_EQUAL, NULL_RTX);
rtx q, set2;
int num_calls2 = 0;
if (note && CONSTANT_P (XEXP (note, 0)))
{
for (q = PREV_INSN (insn); q;
q = PREV_INSN(q))
{
if (GET_CODE (q) == JUMP_INSN)
{
q = 0;
break;
}
if (GET_RTX_CLASS (GET_CODE (q)) != 'i')
continue;
if (reg_set_p (src, q))
{
set2 = single_set (q);
break;
}
if (reg_overlap_mentioned_p (src,
PATTERN (q)))
{
q = 0;
break;
}
if (GET_CODE (p) == CALL_INSN)
num_calls2++;
}
if (q && set2 && SET_DEST (set2) == src
&& CONSTANT_P (SET_SRC (set2))
&& validate_change (insn, &SET_SRC (set),
XEXP (note, 0), 0))
{
PUT_CODE (q, NOTE);
NOTE_LINE_NUMBER (q) = NOTE_INSN_DELETED;
NOTE_SOURCE_FILE (q) = 0;
REG_N_SETS (REGNO (src))--;
REG_N_CALLS_CROSSED (REGNO (src))
-= num_calls2;
insn_const = 0;
}
}
}
if (0) ;
#if defined (HAVE_PRE_INCREMENT) || defined (HAVE_PRE_DECREMENT)
else if ((code == PLUS || code == MINUS)
&& insn_const
&& try_auto_increment (p, insn, 0,
src, insn_const, 1))
insn = p;
#endif
#if defined (HAVE_POST_INCREMENT) || defined (HAVE_POST_DECREMENT)
else if (post_inc
&& try_auto_increment (p, post_inc,
post_inc_set, src,
newconst, 0))
post_inc = 0;
#endif
#if defined (HAVE_PRE_INCREMENT) || defined (HAVE_PRE_DECREMENT)
/* If post_inc still prevails, try to find an
insn where it can be used as a pre-in/decrement.
If code is MINUS, this was already tried. */
if (post_inc && code == PLUS
/* Check that newconst is likely to be usable
in a pre-in/decrement before starting the
search. */
&& (0
#if defined (HAVE_PRE_INCREMENT)
|| (newconst > 0 && newconst <= MOVE_MAX)
#endif
#if defined (HAVE_PRE_DECREMENT)
|| (newconst < 0 && newconst >= -MOVE_MAX)
#endif
) && exact_log2 (newconst))
{
rtx q, inc_dest;
inc_dest
= post_inc_set ? SET_DEST (post_inc_set) : src;
for (q = post_inc; q = NEXT_INSN (q); )
{
if (GET_CODE (q) == CODE_LABEL
|| GET_CODE (q) == JUMP_INSN
|| (GET_CODE (q) == NOTE
&& ((NOTE_LINE_NUMBER (q)
== NOTE_INSN_LOOP_BEG)
|| (NOTE_LINE_NUMBER (q)
== NOTE_INSN_LOOP_END))))
break;
if (GET_RTX_CLASS (GET_CODE (q)) != 'i')
continue;
if (src != inc_dest
&& (reg_overlap_mentioned_p (src,
PATTERN (q))
|| reg_set_p (src, q)))
break;
if (reg_set_p (inc_dest, q))
break;
if (reg_overlap_mentioned_p (inc_dest,
PATTERN (q)))
{
try_auto_increment (q, post_inc,
post_inc_set,
inc_dest,
newconst, 1);
break;
}
}
}
#endif /* defined (HAVE_PRE_INCREMENT) || defined (HAVE_PRE_DECREMENT) */
/* Move the death note for DST to INSN if it is used
there. */
if (reg_overlap_mentioned_p (dst, PATTERN (insn)))
{
XEXP (dst_note, 1) = REG_NOTES (insn);
REG_NOTES (insn) = dst_note;
}
if (src_note)
{
/* Move the death note for SRC from INSN to P. */
if (! overlap)
remove_note (insn, src_note);
XEXP (src_note, 1) = REG_NOTES (p);
REG_NOTES (p) = src_note;
REG_N_CALLS_CROSSED (REGNO (src)) += num_calls;
}
REG_N_SETS (REGNO (src))++;
REG_N_SETS (REGNO (dst))--;
REG_N_CALLS_CROSSED (REGNO (dst)) -= num_calls;
/* ??? Must adjust reg_live_length, and reg_n_refs for
both registers. Must keep track of loop_depth in
order to get reg_n_refs adjustment correct. */
if (regmove_dump_file)
fprintf (regmove_dump_file,
"Fixed operand %d of insn %d matching operand %d.\n",
operand_number, INSN_UID (insn),
match_number);
goto done_forwards;
}
}
}
done_forwards:
;
}
}
}
/* A backward pass. Replace input operands with output operands. */
if (regmove_dump_file)
fprintf (regmove_dump_file, "Starting backward pass...\n");
for (insn = get_last_insn (); insn; insn = PREV_INSN (insn))
{
if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
{
int insn_code_number = recog_memoized (insn);
int operand_number, match_number;
if (insn_code_number < 0)
continue;
insn_extract (insn);
if (! constrain_operands (insn_code_number, 0))
continue;
commutative = -1;
/* Must initialize this before the loop, because the code for
the commutative case may set matches for operands other than
the current one. */
bzero (matches, sizeof (matches));
for (operand_number = 0;
operand_number < insn_n_operands[insn_code_number];
operand_number++)
{
int output_operand = 0;
int matching_operand = operand_number;
char *p, c;
int i = 0;
modified[operand_number] = 0;
early_clobber[operand_number] = 0;
p = insn_operand_constraint[insn_code_number][operand_number];
if (*p == '=')
modified[operand_number] = 2;
else if (*p == '+')
modified[operand_number] = 1;
for (; *p && i < which_alternative; p++)
if (*p == ',')
i++;
while ((c = *p++) != '\0' && c != ',')
switch (c)
{
case '=':
break;
case '+':
break;
case '&':
early_clobber[operand_number] = 1;
break;
case '%':
commutative = operand_number;
break;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
c -= '0';
matches[c][operand_number] = 1;
if (commutative >= 0)
{
if (c == commutative || c == commutative + 1)
{
int other = c + (c == commutative ? 1 : -1);
matches[other][operand_number] = 1;
}
if (operand_number == commutative
|| operand_number == commutative + 1)
{
int other = (operand_number
+ (operand_number == commutative
? 1 : -1));
matches[c][other] = 1;
}
}
break;
}
}
/* Now scan through the operands looking for a destination operand
which is supposed to match a source operand.
Then scan backward for an instruction which sets the source
operand. If safe, then replace the source operand with the
dest operand in both instructions. */
for (operand_number = 0;
operand_number < insn_n_operands[insn_code_number];
operand_number++)
{
for (match_number = 0;
match_number < insn_n_operands[insn_code_number];
match_number++)
{
rtx set, p, src, dst;
rtx src_note, dst_note;
int success = 0;
int num_calls = 0;
/* Nothing to do if the two operands aren't supposed to
match. */
if (matches[operand_number][match_number] == 0)
continue;
dst = recog_operand[operand_number];
src = recog_operand[match_number];
if (GET_CODE (src) != REG
|| REGNO (src) < FIRST_PSEUDO_REGISTER)
continue;
if (GET_CODE (dst) != REG
|| REGNO (dst) < FIRST_PSEUDO_REGISTER)
continue;
/* If the operands already match, then there is nothing
to do. */
if (operands_match_p (src, dst))
continue;
set = single_set (insn);
if (! set)
continue;
/* operand_number/dst must be a write-only operand, and
match_operand/src must be a read-only operand. */
if (modified[match_number] != 0)
continue;
if (early_clobber[operand_number] == 1)
continue;
if (modified[operand_number] != 2)
continue;
/* Make sure operand_number is the destination. */
if (recog_operand[operand_number] != SET_DEST (set))
continue;
if (! (src_note = find_reg_note (insn, REG_DEAD, src)))
continue;
/* Can not modify an earlier insn to set dst if this insn
uses an old value in the source. */
if (reg_overlap_mentioned_p (dst, SET_SRC (set)))
continue;
if (regmove_dump_file)
fprintf (regmove_dump_file,
"Could fix operand %d of insn %d matching operand %d.\n",
operand_number, INSN_UID (insn), match_number);
/* ??? If src is set once, and is set equal to a constant,
then do not use it for this optimization, as this would
make it no longer equivalent to a constant? */
/* Scan backward to find the first instruction that uses
the input operand. If the operand is set here, then
replace it in both instructions with operand_number. */
for (p = PREV_INSN (insn); p; p = PREV_INSN (p))
{
rtx pset;
if (GET_CODE (p) == CODE_LABEL
|| GET_CODE (p) == JUMP_INSN
|| (GET_CODE (p) == NOTE
&& (NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_BEG
|| NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_END)))
break;
if (GET_RTX_CLASS (GET_CODE (p)) != 'i')
continue;
/* ??? See if all of SRC is set in P. This test is much
more conservative than it needs to be. */
pset = single_set (p);
if (pset && SET_DEST (pset) == src)
{
/* We use validate_replace_rtx, in case there
are multiple identical source operands. All of
them have to be changed at the same time. */
if (validate_replace_rtx (src, dst, insn))
{
if (validate_change (p, &SET_DEST (pset),
dst, 0))
success = 1;
else
{
/* Change all source operands back.
This modifies the dst as a side-effect. */
validate_replace_rtx (dst, src, insn);
/* Now make sure the dst is right. */
validate_change (insn,
recog_operand_loc[operand_number],
dst, 0);
}
}
break;
}
if (reg_overlap_mentioned_p (src, PATTERN (p))
|| reg_overlap_mentioned_p (dst, PATTERN (p)))
break;
/* If we have passed a call instruction, and the
pseudo-reg DST is not already live across a call,
then don't perform the optimization. */
if (GET_CODE (p) == CALL_INSN)
{
num_calls++;
if (REG_N_CALLS_CROSSED (REGNO (dst)) == 0)
break;
}
}
if (success)
{
/* Remove the death note for SRC from INSN. */
remove_note (insn, src_note);
/* Move the death note for SRC to P if it is used
there. */
if (reg_overlap_mentioned_p (src, PATTERN (p)))
{
XEXP (src_note, 1) = REG_NOTES (p);
REG_NOTES (p) = src_note;
}
/* If there is a REG_DEAD note for DST on P, then remove
it, because DST is now set there. */
if (dst_note = find_reg_note (p, REG_DEAD, dst))
remove_note (p, dst_note);
REG_N_SETS (REGNO (dst))++;
REG_N_SETS (REGNO (src))--;
REG_N_CALLS_CROSSED (REGNO (dst)) += num_calls;
REG_N_CALLS_CROSSED (REGNO (src)) -= num_calls;
/* ??? Must adjust reg_live_length, and reg_n_refs for
both registers. Must keep track of loop_depth in
order to get reg_n_refs adjustment correct. */
if (regmove_dump_file)
fprintf (regmove_dump_file,
"Fixed operand %d of insn %d matching operand %d.\n",
operand_number, INSN_UID (insn), match_number);
goto done_backwards;
}
}
}
done_backwards:
;
}
}
#endif /* REGISTER_CONSTRAINTS */
}
/* return nonzero if X is stable but for mentioning SRC or mentioning /
changing DST . If in doubt, presume it is unstable. */
static int
stable_but_for_p (x, src, dst)
rtx x, src, dst;
{
RTX_CODE code = GET_CODE (x);
switch (GET_RTX_CLASS (code))
{
case '<': case '1': case 'c': case '2': case 'b': case '3':
{
int i;
char *fmt = GET_RTX_FORMAT (code);
for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
if (fmt[i] == 'e' && ! stable_but_for_p (XEXP (x, i), src, dst))
return 0;
return 1;
}
case 'o':
if (x == src || x == dst)
return 1;
/* fall through */
default:
return ! rtx_unstable_p (x);
}
}

View File

@ -812,6 +812,7 @@ extern void remove_note PROTO((rtx, rtx));
extern void note_stores PROTO((rtx, void (*)()));
extern int refers_to_regno_p PROTO((int, int, rtx, rtx *));
extern int reg_overlap_mentioned_p PROTO((rtx, rtx));
extern rtx find_use_as_address PROTO((rtx, rtx, HOST_WIDE_INT));
/* Maximum number of parallel sets and clobbers in any insn in this fn.

View File

@ -245,6 +245,7 @@ int cse2_dump = 0;
int branch_prob_dump = 0;
int flow_dump = 0;
int combine_dump = 0;
int regmove_dump = 0;
int sched_dump = 0;
int local_reg_dump = 0;
int global_reg_dump = 0;
@ -566,6 +567,35 @@ int flag_pedantic_errors = 0;
int flag_schedule_insns = 0;
int flag_schedule_insns_after_reload = 0;
#ifdef HAIFA
/* The following flags have effect only for scheduling before register
allocation:
flag_schedule_interblock means schedule insns accross basic blocks.
flag_schedule_speculative means allow speculative motion of non-load insns.
flag_schedule_speculative_load means allow speculative motion of some
load insns.
flag_schedule_speculative_load_dangerous allows speculative motion of more
load insns.
flag_schedule_reverse_before_reload means try to reverse original order
of insns (S).
flag_schedule_reverse_after_reload means try to reverse original order
of insns (R). */
int flag_schedule_interblock = 1;
int flag_schedule_speculative = 1;
int flag_schedule_speculative_load = 0;
int flag_schedule_speculative_load_dangerous = 0;
int flag_schedule_reverse_before_reload = 0;
int flag_schedule_reverse_after_reload = 0;
/* flag_on_branch_count_reg means try to replace add-1,compare,branch tupple
by a cheaper branch, on a count register. */
int flag_branch_on_count_reg;
#endif /* HAIFA */
/* -finhibit-size-directive inhibits output of .size for ELF.
This is used only for compiling crtstuff.c,
and it may be extended to other effects
@ -616,6 +646,8 @@ int flag_check_memory_usage = 0;
-fcheck-memory-usage. */
int flag_prefix_function_name = 0;
int flag_regmove = 0;
/* 1 if alias checking is on (by default, when -O). */
int flag_alias_check = 0;
@ -666,6 +698,15 @@ struct { char *string; int *variable; int on_value;} f_options[] =
{"pretend-float", &flag_pretend_float, 1},
{"schedule-insns", &flag_schedule_insns, 1},
{"schedule-insns2", &flag_schedule_insns_after_reload, 1},
#ifdef HAIFA
{"sched-interblock",&flag_schedule_interblock, 1},
{"sched-spec",&flag_schedule_speculative, 1},
{"sched-spec-load",&flag_schedule_speculative_load, 1},
{"sched-spec-load-dangerous",&flag_schedule_speculative_load_dangerous, 1},
{"sched-reverse-S",&flag_schedule_reverse_before_reload, 1},
{"sched-reverse-R",&flag_schedule_reverse_after_reload, 1},
{"branch-count-reg",&flag_branch_on_count_reg, 1},
#endif /* HAIFA */
{"pic", &flag_pic, 1},
{"PIC", &flag_pic, 2},
{"exceptions", &flag_exceptions, 1},
@ -680,6 +721,7 @@ struct { char *string; int *variable; int on_value;} f_options[] =
{"function-sections", &flag_function_sections, 1},
{"verbose-asm", &flag_verbose_asm, 1},
{"gnu-linker", &flag_gnu_linker, 1},
{"regmove", &flag_regmove, 1},
{"pack-struct", &flag_pack_struct, 1},
{"stack-check", &flag_stack_check, 1},
{"bytecode", &output_bytecode, 1},
@ -885,6 +927,7 @@ FILE *cse2_dump_file;
FILE *branch_prob_dump_file;
FILE *flow_dump_file;
FILE *combine_dump_file;
FILE *regmove_dump_file;
FILE *sched_dump_file;
FILE *local_reg_dump_file;
FILE *global_reg_dump_file;
@ -905,6 +948,7 @@ int cse2_time;
int branch_prob_time;
int flow_time;
int combine_time;
int regmove_time;
int sched_time;
int local_alloc_time;
int global_alloc_time;
@ -1053,6 +1097,8 @@ fatal_insn (message, insn)
fflush (flow_dump_file);
if (combine_dump_file)
fflush (combine_dump_file);
if (regmove_dump_file)
fflush (regmove_dump_file);
if (sched_dump_file)
fflush (sched_dump_file);
if (local_reg_dump_file)
@ -2131,6 +2177,7 @@ compile_file (name)
branch_prob_time = 0;
flow_time = 0;
combine_time = 0;
regmove_time = 0;
sched_time = 0;
local_alloc_time = 0;
global_alloc_time = 0;
@ -2229,6 +2276,10 @@ compile_file (name)
if (combine_dump)
combine_dump_file = open_dump_file (dump_base_name, ".combine");
/* If regmove dump desired, open the output file. */
if (regmove_dump)
regmove_dump_file = open_dump_file (dump_base_name, ".regmove");
/* If scheduling dump desired, open the output file. */
if (sched_dump)
sched_dump_file = open_dump_file (dump_base_name, ".sched");
@ -2713,6 +2764,9 @@ compile_file (name)
fclose (combine_dump_file);
}
if (regmove_dump)
fclose (regmove_dump_file);
if (sched_dump)
fclose (sched_dump_file);
@ -2765,6 +2819,7 @@ compile_file (name)
print_time ("branch-prob", branch_prob_time);
print_time ("flow", flow_time);
print_time ("combine", combine_time);
print_time ("regmove", regmove_time);
print_time ("sched", sched_time);
print_time ("local-alloc", local_alloc_time);
print_time ("global-alloc", global_alloc_time);
@ -3304,6 +3359,26 @@ rest_of_compilation (decl)
fflush (combine_dump_file);
});
if (regmove_dump)
TIMEVAR (dump_time,
{
fprintf (regmove_dump_file, "\n;; Function %s\n\n",
(*decl_printable_name) (decl, 2));
});
/* Register allocation pre-pass, to reduce number of moves
necessary for two-address machines. */
if (optimize > 0 && flag_regmove)
TIMEVAR (regmove_time, regmove_optimize (insns, max_reg_num (),
regmove_dump_file));
if (regmove_dump)
TIMEVAR (dump_time,
{
print_rtl (regmove_dump_file, insns);
fflush (regmove_dump_file);
});
/* Print function header into sched dump now
because doing the sched analysis makes some of the dump. */
@ -3703,6 +3778,7 @@ main (argc, argv, envp)
flag_schedule_insns = 1;
flag_schedule_insns_after_reload = 1;
#endif
flag_regmove = 1;
}
if (optimize >= 3)
@ -3764,6 +3840,7 @@ main (argc, argv, envp)
jump2_opt_dump = 1;
local_reg_dump = 1;
loop_dump = 1;
regmove_dump = 1;
rtl_dump = 1;
cse_dump = 1, cse2_dump = 1;
sched_dump = 1;
@ -3815,6 +3892,9 @@ main (argc, argv, envp)
case 't':
cse2_dump = 1;
break;
case 'N':
regmove_dump = 1;
break;
case 'S':
sched_dump = 1;
break;
@ -3862,6 +3942,18 @@ main (argc, argv, envp)
if (found)
;
#ifdef HAIFA
#ifdef INSN_SCHEDULING
else if (!strncmp (p, "sched-verbose-",14))
fix_sched_param("verbose",&p[14]);
else if (!strncmp (p, "sched-max-",10))
fix_sched_param("max",&p[10]);
else if (!strncmp (p, "sched-inter-max-b-",18))
fix_sched_param("interblock-max-blocks",&p[18]);
else if (!strncmp (p, "sched-inter-max-i-",18))
fix_sched_param("interblock-max-insns",&p[18]);
#endif
#endif /* HAIFA */
else if (!strncmp (p, "fixed-", 6))
fix_register (&p[6], 1, 1);
else if (!strncmp (p, "call-used-", 10))

View File

@ -202,7 +202,7 @@ static rtx initial_reg_note_copy PROTO((rtx, struct inline_remap *));
static void final_reg_note_copy PROTO((rtx, struct inline_remap *));
static void copy_loop_body PROTO((rtx, rtx, struct inline_remap *, rtx, int,
enum unroll_types, rtx, rtx, rtx, rtx));
static void iteration_info PROTO((rtx, rtx *, rtx *, rtx, rtx));
void iteration_info PROTO((rtx, rtx *, rtx *, rtx, rtx));
static rtx approx_final_value PROTO((enum rtx_code, rtx, int *, int *));
static int find_splittable_regs PROTO((enum unroll_types, rtx, rtx, rtx, int));
static int find_splittable_givs PROTO((struct iv_class *,enum unroll_types,
@ -1094,6 +1094,16 @@ unroll_loop (loop_end, insn_count, loop_start, end_insert_before,
/* Set unroll type to MODULO now. */
unroll_type = UNROLL_MODULO;
loop_preconditioned = 1;
#ifdef HAIFA
if (loop_n_iterations > 0)
loop_unroll_iter[ loop_number(loop_start, loop_end) ]
= (loop_n_iterations
- loop_n_iterations % (abs_inc * unroll_number));
else
/* inform loop.c about the new initial value */
loop_start_value[loop_number(loop_start, loop_end)] = initial_value;
#endif
}
}
@ -1108,6 +1118,15 @@ unroll_loop (loop_end, insn_count, loop_start, end_insert_before,
/* At this point, we are guaranteed to unroll the loop. */
#ifdef HAIFA
/* inform loop.c about the factor of unrolling */
if (unroll_type == UNROLL_COMPLETELY)
loop_unroll_factor[ loop_number(loop_start, loop_end) ] = -1;
else
loop_unroll_factor[ loop_number(loop_start, loop_end) ] = unroll_number;
#endif /* HAIFA */
/* For each biv and giv, determine whether it can be safely split into
a different variable for each unrolled copy of the loop body.
We precalculate and save this info here, since computing it is
@ -2263,7 +2282,7 @@ biv_total_increment (bl, loop_start, loop_end)
Initial_value and/or increment are set to zero if their values could not
be calculated. */
static void
void
iteration_info (iteration_var, initial_value, increment, loop_start, loop_end)
rtx iteration_var, *initial_value, *increment;
rtx loop_start, loop_end;