rtl.h (rtx_function): New type.
* rtl.h (rtx_function): New type. (for_each_rtx): New function. * rtlanal.c (for_each_rtx): Define it. * recog.c (change_t): New type. (change_objects, change_old_codes, change_locs, change_olds): Replace with ... (changes): New variable. (validate_change): Dynamically allocate room for more changes, if necessary. Uses changes array instead of change_objects, etc. (apply_change_group): Use changes array instead of change_objects, etc. * loop.c (loop_mem_info): New type. (loop_mems): New variable. (loop_mems_idx): Likewise. (looop_mems_allocated): Likewise. (scan_loop): Remove nregs parameter. (next_insn_in_loop): New function. (load_mems_and_recount_loop_regs_set): Likewise. (load_mems): Likewise. (insert_loop_mem): Likewise. (replace_loop_mem): Likewise. (replace_label): Likewise. (INSN_IN_RANGE_P): New macro. (loop_optimize): Don't pass max_reg_num() to scan_loop. (scan_loop): Remove nregs parameter, compute it after any new registers are created by load_mems. Use INSN_IN_RANGE_P and next_insn_in_loop rather than expanding them inline. Call load_mems to load memory into pseudos, if appropriate. (prescan_loop): Figure out whether or not there are jumps from the loop to targets other than the label immediately following the loop. Call insert_loop_mem to notice all the MEMs used in the loop, if it could be safe to pull MEMs into REGs for the duration of the loop. (strength_reduce): Use next_insn_in_loop. Tweak comments. From-SVN: r21845
This commit is contained in:
parent
031fec0070
commit
41a972a956
@ -1,3 +1,42 @@
|
||||
Wed Aug 19 13:28:41 1998 Mark Mitchell <mark@markmitchell.com>
|
||||
|
||||
* rtl.h (rtx_function): New type.
|
||||
(for_each_rtx): New function.
|
||||
* rtlanal.c (for_each_rtx): Define it.
|
||||
|
||||
* recog.c (change_t): New type.
|
||||
(change_objects, change_old_codes, change_locs, change_olds):
|
||||
Replace with ...
|
||||
(changes): New variable.
|
||||
(validate_change): Dynamically allocate room for more changes, if
|
||||
necessary. Uses changes array instead of change_objects, etc.
|
||||
(apply_change_group): Use changes array instead of
|
||||
change_objects, etc.
|
||||
|
||||
* loop.c (loop_mem_info): New type.
|
||||
(loop_mems): New variable.
|
||||
(loop_mems_idx): Likewise.
|
||||
(looop_mems_allocated): Likewise.
|
||||
(scan_loop): Remove nregs parameter.
|
||||
(next_insn_in_loop): New function.
|
||||
(load_mems_and_recount_loop_regs_set): Likewise.
|
||||
(load_mems): Likewise.
|
||||
(insert_loop_mem): Likewise.
|
||||
(replace_loop_mem): Likewise.
|
||||
(replace_label): Likewise.
|
||||
(INSN_IN_RANGE_P): New macro.
|
||||
(loop_optimize): Don't pass max_reg_num() to scan_loop.
|
||||
(scan_loop): Remove nregs parameter, compute it after any new
|
||||
registers are created by load_mems. Use INSN_IN_RANGE_P and
|
||||
next_insn_in_loop rather than expanding them inline. Call
|
||||
load_mems to load memory into pseudos, if appropriate.
|
||||
(prescan_loop): Figure out whether or not there are jumps from the
|
||||
loop to targets other than the label immediately following the
|
||||
loop. Call insert_loop_mem to notice all the MEMs used in the
|
||||
loop, if it could be safe to pull MEMs into REGs for the duration
|
||||
of the loop.
|
||||
(strength_reduce): Use next_insn_in_loop. Tweak comments.
|
||||
|
||||
Wed Aug 19 08:29:44 1998 Richard Earnshaw (rearnsha@arm.com)
|
||||
|
||||
* arm.c (arm_override_options): Remove lie about ignoring PIC flag.
|
||||
|
631
gcc/loop.c
631
gcc/loop.c
@ -195,6 +195,27 @@ static rtx loop_store_mems[NUM_STORES];
|
||||
/* Index of first available slot in above array. */
|
||||
static int loop_store_mems_idx;
|
||||
|
||||
typedef struct loop_mem_info {
|
||||
rtx mem; /* The MEM itself. */
|
||||
rtx reg; /* Corresponding pseudo, if any. */
|
||||
int optimize; /* Nonzero if we can optimize access to this MEM. */
|
||||
} loop_mem_info;
|
||||
|
||||
/* Array of MEMs that are used (read or written) in this loop, but
|
||||
cannot be aliased by anything in this loop, except perhaps
|
||||
themselves. In other words, if loop_mems[i] is altered during the
|
||||
loop, it is altered by an expression that is rtx_equal_p to it. */
|
||||
|
||||
static loop_mem_info *loop_mems;
|
||||
|
||||
/* The index of the next available slot in LOOP_MEMS. */
|
||||
|
||||
static int loop_mems_idx;
|
||||
|
||||
/* The number of elements allocated in LOOP_MEMs. */
|
||||
|
||||
static int loop_mems_allocated;
|
||||
|
||||
/* Nonzero if we don't know what MEMs were changed in the current loop.
|
||||
This happens if the loop contains a call (in which case `loop_has_call'
|
||||
will also be set) or if we store into more than NUM_STORES MEMs. */
|
||||
@ -288,7 +309,7 @@ static int labels_in_range_p PROTO((rtx, int));
|
||||
static void count_loop_regs_set PROTO((rtx, rtx, char *, rtx *, int *, int));
|
||||
static void note_addr_stored PROTO((rtx, rtx));
|
||||
static int loop_reg_used_before_p PROTO((rtx, rtx, rtx, rtx, rtx));
|
||||
static void scan_loop PROTO((rtx, rtx, int, int));
|
||||
static void scan_loop PROTO((rtx, rtx, int));
|
||||
#if 0
|
||||
static void replace_call_address PROTO((rtx, rtx, rtx));
|
||||
#endif
|
||||
@ -325,6 +346,29 @@ static int maybe_eliminate_biv_1 PROTO((rtx, rtx, struct iv_class *, int, rtx));
|
||||
static int last_use_this_basic_block PROTO((rtx, rtx));
|
||||
static void record_initial PROTO((rtx, rtx));
|
||||
static void update_reg_last_use PROTO((rtx, rtx));
|
||||
static rtx next_insn_in_loop PROTO((rtx, rtx, rtx, rtx));
|
||||
static void load_mems_and_recount_loop_regs_set PROTO((rtx, rtx, rtx,
|
||||
rtx, rtx *, int *));
|
||||
static void load_mems PROTO((rtx, rtx, rtx, rtx));
|
||||
static int insert_loop_mem PROTO((rtx *, void *));
|
||||
static int replace_loop_mem PROTO((rtx *, void *));
|
||||
static int replace_label PROTO((rtx *, void *));
|
||||
|
||||
typedef struct rtx_and_int {
|
||||
rtx r;
|
||||
int i;
|
||||
} rtx_and_int;
|
||||
|
||||
typedef struct rtx_pair {
|
||||
rtx r1;
|
||||
rtx r2;
|
||||
} rtx_pair;
|
||||
|
||||
/* Nonzero iff INSN is between START and END, inclusive. */
|
||||
#define INSN_IN_RANGE_P(INSN, START, END) \
|
||||
(INSN_UID (INSN) < max_uid_for_loop \
|
||||
&& INSN_LUID (INSN) >= INSN_LUID (START) \
|
||||
&& INSN_LUID (INSN) <= INSN_LUID (END))
|
||||
|
||||
#ifdef HAIFA
|
||||
/* This is extern from unroll.c */
|
||||
@ -543,7 +587,7 @@ loop_optimize (f, dumpfile, unroll_p)
|
||||
for (i = max_loop_num-1; i >= 0; i--)
|
||||
if (! loop_invalid[i] && loop_number_loop_ends[i])
|
||||
scan_loop (loop_number_loop_starts[i], loop_number_loop_ends[i],
|
||||
max_reg_num (), unroll_p);
|
||||
unroll_p);
|
||||
|
||||
/* If debugging and unrolling loops, we must replicate the tree nodes
|
||||
corresponding to the blocks inside the loop, so that the original one
|
||||
@ -554,6 +598,38 @@ loop_optimize (f, dumpfile, unroll_p)
|
||||
end_alias_analysis ();
|
||||
}
|
||||
|
||||
/* Returns the next insn, in execution order, after INSN. START and
|
||||
END are the NOTE_INSN_LOOP_BEG and NOTE_INSN_LOOP_END for the loop,
|
||||
respectively. LOOP_TOP, if non-NULL, is the top of the loop in the
|
||||
insn-stream; it is used with loops that are entered near the
|
||||
bottom. */
|
||||
|
||||
static rtx
|
||||
next_insn_in_loop (insn, start, end, loop_top)
|
||||
rtx insn;
|
||||
rtx start;
|
||||
rtx end;
|
||||
rtx loop_top;
|
||||
{
|
||||
insn = NEXT_INSN (insn);
|
||||
|
||||
if (insn == end)
|
||||
{
|
||||
if (loop_top)
|
||||
/* Go to the top of the loop, and continue there. */
|
||||
insn = loop_top;
|
||||
else
|
||||
/* We're done. */
|
||||
insn = NULL_RTX;
|
||||
}
|
||||
|
||||
if (insn == start)
|
||||
/* We're done. */
|
||||
insn = NULL_RTX;
|
||||
|
||||
return insn;
|
||||
}
|
||||
|
||||
/* Optimize one loop whose start is LOOP_START and end is END.
|
||||
LOOP_START is the NOTE_INSN_LOOP_BEG and END is the matching
|
||||
NOTE_INSN_LOOP_END. */
|
||||
@ -565,13 +641,12 @@ loop_optimize (f, dumpfile, unroll_p)
|
||||
write, then we can also mark the memory read as invariant. */
|
||||
|
||||
static void
|
||||
scan_loop (loop_start, end, nregs, unroll_p)
|
||||
scan_loop (loop_start, end, unroll_p)
|
||||
rtx loop_start, end;
|
||||
int nregs;
|
||||
int unroll_p;
|
||||
{
|
||||
register int i;
|
||||
register rtx p;
|
||||
rtx p;
|
||||
/* 1 if we are scanning insns that could be executed zero times. */
|
||||
int maybe_never = 0;
|
||||
/* 1 if we are scanning insns that might never be executed
|
||||
@ -606,10 +681,7 @@ scan_loop (loop_start, end, nregs, unroll_p)
|
||||
rtx *reg_single_usage = 0;
|
||||
/* Nonzero if we are scanning instructions in a sub-loop. */
|
||||
int loop_depth = 0;
|
||||
|
||||
n_times_set = (int *) alloca (nregs * sizeof (int));
|
||||
n_times_used = (int *) alloca (nregs * sizeof (int));
|
||||
may_not_optimize = (char *) alloca (nregs);
|
||||
int nregs;
|
||||
|
||||
/* Determine whether this loop starts with a jump down to a test at
|
||||
the end. This will occur for a small number of loops with a test
|
||||
@ -660,9 +732,7 @@ scan_loop (loop_start, end, nregs, unroll_p)
|
||||
do {..} while (0). If this label was generated previously
|
||||
by loop, we can't tell anything about it and have to reject
|
||||
the loop. */
|
||||
&& INSN_UID (JUMP_LABEL (p)) < max_uid_for_loop
|
||||
&& INSN_LUID (JUMP_LABEL (p)) >= INSN_LUID (loop_start)
|
||||
&& INSN_LUID (JUMP_LABEL (p)) < INSN_LUID (end))
|
||||
&& INSN_IN_RANGE_P (JUMP_LABEL (p), loop_start, end))
|
||||
{
|
||||
loop_top = next_label (scan_start);
|
||||
scan_start = JUMP_LABEL (p);
|
||||
@ -691,6 +761,12 @@ scan_loop (loop_start, end, nregs, unroll_p)
|
||||
the setting of register I. If this loop has calls, set
|
||||
reg_single_usage[I]. */
|
||||
|
||||
/* Allocate extra space for REGS that might be created by
|
||||
load_mems. */
|
||||
nregs = max_reg_num () + loop_mems_idx;
|
||||
n_times_set = (int *) alloca (nregs * sizeof (int));
|
||||
n_times_used = (int *) alloca (nregs * sizeof (int));
|
||||
may_not_optimize = (char *) alloca (nregs);
|
||||
bzero ((char *) n_times_set, nregs * sizeof (int));
|
||||
bzero (may_not_optimize, nregs);
|
||||
|
||||
@ -729,24 +805,10 @@ scan_loop (loop_start, end, nregs, unroll_p)
|
||||
When MAYBE_NEVER is 0, all insns will be executed at least once
|
||||
so that is not a problem. */
|
||||
|
||||
p = scan_start;
|
||||
while (1)
|
||||
for (p = next_insn_in_loop (scan_start, scan_start, end, loop_top);
|
||||
p != NULL_RTX;
|
||||
p = next_insn_in_loop (p, scan_start, end, loop_top))
|
||||
{
|
||||
p = NEXT_INSN (p);
|
||||
/* At end of a straight-in loop, we are done.
|
||||
At end of a loop entered at the bottom, scan the top. */
|
||||
if (p == scan_start)
|
||||
break;
|
||||
if (p == end)
|
||||
{
|
||||
if (loop_top != 0)
|
||||
p = loop_top;
|
||||
else
|
||||
break;
|
||||
if (p == scan_start)
|
||||
break;
|
||||
}
|
||||
|
||||
if (GET_RTX_CLASS (GET_CODE (p)) == 'i'
|
||||
&& find_reg_note (p, REG_LIBCALL, NULL_RTX))
|
||||
in_libcall = 1;
|
||||
@ -1093,6 +1155,13 @@ scan_loop (loop_start, end, nregs, unroll_p)
|
||||
if (n_times_set[i] < 0)
|
||||
n_times_set[i] = n_times_used[i];
|
||||
|
||||
/* Now that we've moved some things out of the loop, we able to
|
||||
hoist even more memory references. There's no need to pass
|
||||
reg_single_usage this time, since we're done with it. */
|
||||
load_mems_and_recount_loop_regs_set (scan_start, end, loop_top,
|
||||
loop_start, 0,
|
||||
&insn_count);
|
||||
|
||||
if (flag_strength_reduce)
|
||||
{
|
||||
the_movables = movables;
|
||||
@ -2293,20 +2362,29 @@ constant_high_bytes (p, loop_start)
|
||||
|
||||
/* Scan a loop setting the variables `unknown_address_altered',
|
||||
`num_mem_sets', `loop_continue', loops_enclosed', `loop_has_call',
|
||||
and `loop_has_volatile'.
|
||||
Also, fill in the array `loop_store_mems'. */
|
||||
and `loop_has_volatile'. Also, fill in the arrays `loop_mems' and
|
||||
`loop_store_mems'. */
|
||||
|
||||
static void
|
||||
prescan_loop (start, end)
|
||||
rtx start, end;
|
||||
{
|
||||
register int level = 1;
|
||||
register rtx insn;
|
||||
rtx insn;
|
||||
int loop_has_multiple_exit_targets = 0;
|
||||
/* The label after END. Jumping here is just like falling off the
|
||||
end of the loop. We use next_nonnote_insn instead of next_label
|
||||
as a hedge against the (pathological) case where some actual insn
|
||||
might end up between the two. */
|
||||
rtx exit_target = next_nonnote_insn (end);
|
||||
if (exit_target == NULL_RTX || GET_CODE (exit_target) != CODE_LABEL)
|
||||
loop_has_multiple_exit_targets = 1;
|
||||
|
||||
unknown_address_altered = 0;
|
||||
loop_has_call = 0;
|
||||
loop_has_volatile = 0;
|
||||
loop_store_mems_idx = 0;
|
||||
loop_mems_idx = 0;
|
||||
|
||||
num_mem_sets = 0;
|
||||
loops_enclosed = 1;
|
||||
@ -2344,17 +2422,75 @@ prescan_loop (start, end)
|
||||
unknown_address_altered = 1;
|
||||
loop_has_call = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN)
|
||||
else if (GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN)
|
||||
{
|
||||
rtx label1 = NULL_RTX;
|
||||
rtx label2 = NULL_RTX;
|
||||
|
||||
if (volatile_refs_p (PATTERN (insn)))
|
||||
loop_has_volatile = 1;
|
||||
|
||||
note_stores (PATTERN (insn), note_addr_stored);
|
||||
|
||||
if (!loop_has_multiple_exit_targets
|
||||
&& GET_CODE (insn) == JUMP_INSN
|
||||
&& GET_CODE (PATTERN (insn)) == SET
|
||||
&& SET_DEST (PATTERN (insn)) == pc_rtx)
|
||||
{
|
||||
if (GET_CODE (SET_SRC (PATTERN (insn))) == IF_THEN_ELSE)
|
||||
{
|
||||
label1 = XEXP (SET_SRC (PATTERN (insn)), 1);
|
||||
label2 = XEXP (SET_SRC (PATTERN (insn)), 2);
|
||||
}
|
||||
else
|
||||
{
|
||||
label1 = SET_SRC (PATTERN (insn));
|
||||
}
|
||||
|
||||
do {
|
||||
if (label1 && label1 != pc_rtx)
|
||||
{
|
||||
if (GET_CODE (label1) != LABEL_REF)
|
||||
{
|
||||
/* Something tricky. */
|
||||
loop_has_multiple_exit_targets = 1;
|
||||
break;
|
||||
}
|
||||
else if (XEXP (label1, 0) != exit_target
|
||||
&& LABEL_OUTSIDE_LOOP_P (label1))
|
||||
{
|
||||
/* A jump outside the current loop. */
|
||||
loop_has_multiple_exit_targets = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
label1 = label2;
|
||||
label2 = NULL_RTX;
|
||||
} while (label1);
|
||||
}
|
||||
}
|
||||
else if (GET_CODE (insn) == RETURN)
|
||||
loop_has_multiple_exit_targets = 1;
|
||||
}
|
||||
|
||||
/* Now, rescan the loop, setting up the LOOP_MEMS array. */
|
||||
if (/* We can't tell what MEMs are aliased by what. */
|
||||
!unknown_address_altered
|
||||
/* An exception thrown by a called function might land us
|
||||
anywhere. */
|
||||
&& !loop_has_call
|
||||
/* We don't want loads for MEMs moved to a location before the
|
||||
one at which their stack memory becomes allocated. (Note
|
||||
that this is not a problem for malloc, etc., since those
|
||||
require actual function calls. */
|
||||
&& !current_function_calls_alloca
|
||||
/* There are ways to leave the loop other than falling off the
|
||||
end. */
|
||||
&& !loop_has_multiple_exit_targets)
|
||||
for (insn = NEXT_INSN (start); insn != NEXT_INSN (end);
|
||||
insn = NEXT_INSN (insn))
|
||||
for_each_rtx (&insn, insert_loop_mem, 0);
|
||||
}
|
||||
|
||||
/* Scan the function looking for loops. Record the start and end of each loop.
|
||||
@ -3360,18 +3496,6 @@ static rtx addr_placeholder;
|
||||
/* ??? Unfinished optimizations, and possible future optimizations,
|
||||
for the strength reduction code. */
|
||||
|
||||
/* ??? There is one more optimization you might be interested in doing: to
|
||||
allocate pseudo registers for frequently-accessed memory locations.
|
||||
If the same memory location is referenced each time around, it might
|
||||
be possible to copy it into a register before and out after.
|
||||
This is especially useful when the memory location is a variable which
|
||||
is in a stack slot because somewhere its address is taken. If the
|
||||
loop doesn't contain a function call and the variable isn't volatile,
|
||||
it is safe to keep the value in a register for the duration of the
|
||||
loop. One tricky thing is that the copying of the value back from the
|
||||
register has to be done on all exits from the loop. You need to check that
|
||||
all the exits from the loop go to the same place. */
|
||||
|
||||
/* ??? The interaction of biv elimination, and recognition of 'constant'
|
||||
bivs, may cause problems. */
|
||||
|
||||
@ -3396,13 +3520,18 @@ static rtx addr_placeholder;
|
||||
was rerun in loop_optimize whenever a register was added or moved.
|
||||
Also, some of the optimizations could be a little less conservative. */
|
||||
|
||||
/* Perform strength reduction and induction variable elimination. */
|
||||
/* Perform strength reduction and induction variable elimination.
|
||||
|
||||
/* Pseudo registers created during this function will be beyond the last
|
||||
Pseudo registers created during this function will be beyond the last
|
||||
valid index in several tables including n_times_set and regno_last_uid.
|
||||
This does not cause a problem here, because the added registers cannot be
|
||||
givs outside of their loop, and hence will never be reconsidered.
|
||||
But scan_loop must check regnos to make sure they are in bounds. */
|
||||
But scan_loop must check regnos to make sure they are in bounds.
|
||||
|
||||
SCAN_START is the first instruction in the loop, as the loop would
|
||||
actually be executed. END is the NOTE_INSN_LOOP_END. LOOP_TOP is
|
||||
the first instruction in the loop, as it is layed out in the
|
||||
instruction stream. LOOP_START is the NOTE_INSN_LOOP_BEG. */
|
||||
|
||||
static void
|
||||
strength_reduce (scan_start, end, loop_top, insn_count,
|
||||
@ -3470,24 +3599,10 @@ strength_reduce (scan_start, end, loop_top, insn_count,
|
||||
|
||||
/* Scan through loop to find all possible bivs. */
|
||||
|
||||
p = scan_start;
|
||||
while (1)
|
||||
for (p = next_insn_in_loop (scan_start, scan_start, end, loop_top);
|
||||
p != NULL_RTX;
|
||||
p = next_insn_in_loop (p, scan_start, end, loop_top))
|
||||
{
|
||||
p = NEXT_INSN (p);
|
||||
/* At end of a straight-in loop, we are done.
|
||||
At end of a loop entered at the bottom, scan the top. */
|
||||
if (p == scan_start)
|
||||
break;
|
||||
if (p == end)
|
||||
{
|
||||
if (loop_top != 0)
|
||||
p = loop_top;
|
||||
else
|
||||
break;
|
||||
if (p == scan_start)
|
||||
break;
|
||||
}
|
||||
|
||||
if (GET_CODE (p) == INSN
|
||||
&& (set = single_set (p))
|
||||
&& GET_CODE (SET_DEST (set)) == REG)
|
||||
@ -8208,3 +8323,385 @@ indirect_jump_in_function_p (start)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Add MEM to the LOOP_MEMS array, if appropriate. See the
|
||||
documentation for LOOP_MEMS for the definition of `appropriate'.
|
||||
This function is called from prescan_loop via for_each_rtx. */
|
||||
|
||||
static int
|
||||
insert_loop_mem (mem, data)
|
||||
rtx *mem;
|
||||
void *data;
|
||||
{
|
||||
int i;
|
||||
rtx m = *mem;
|
||||
|
||||
if (m == NULL_RTX)
|
||||
return 0;
|
||||
|
||||
switch (GET_CODE (m))
|
||||
{
|
||||
case MEM:
|
||||
break;
|
||||
|
||||
case CONST_DOUBLE:
|
||||
/* We're not interested in the MEM associated with a
|
||||
CONST_DOUBLE, so there's no need to traverse into this. */
|
||||
return -1;
|
||||
|
||||
default:
|
||||
/* This is not a MEM. */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* See if we've already seen this MEM. */
|
||||
for (i = 0; i < loop_mems_idx; ++i)
|
||||
if (rtx_equal_p (m, loop_mems[i].mem))
|
||||
{
|
||||
if (GET_MODE (m) != GET_MODE (loop_mems[i].mem))
|
||||
/* The modes of the two memory accesses are different. If
|
||||
this happens, something tricky is going on, and we just
|
||||
don't optimize accesses to this MEM. */
|
||||
loop_mems[i].optimize = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Resize the array, if necessary. */
|
||||
if (loop_mems_idx == loop_mems_allocated)
|
||||
{
|
||||
if (loop_mems_allocated != 0)
|
||||
loop_mems_allocated *= 2;
|
||||
else
|
||||
loop_mems_allocated = 32;
|
||||
|
||||
loop_mems = (loop_mem_info*)
|
||||
xrealloc (loop_mems,
|
||||
loop_mems_allocated * sizeof (loop_mem_info));
|
||||
}
|
||||
|
||||
/* Actually insert the MEM. */
|
||||
loop_mems[loop_mems_idx].mem = m;
|
||||
/* We can't hoist this MEM out of the loop if it's a BLKmode MEM
|
||||
because we can't put it in a register. We still store it in the
|
||||
table, though, so that if we see the same address later, but in a
|
||||
non-BLK mode, we'll not think we can optimize it at that point. */
|
||||
loop_mems[loop_mems_idx].optimize = (GET_MODE (m) != BLKmode);
|
||||
loop_mems[loop_mems_idx].reg = NULL_RTX;
|
||||
++loop_mems_idx;
|
||||
}
|
||||
|
||||
/* Like load_mems, but also ensures that N_TIMES_SET,
|
||||
MAY_NOT_OPTIMIZE, REG_SINGLE_USAGE, and INSN_COUNT have the correct
|
||||
values after load_mems. */
|
||||
|
||||
static void
|
||||
load_mems_and_recount_loop_regs_set (scan_start, end, loop_top, start,
|
||||
reg_single_usage, insn_count)
|
||||
rtx scan_start;
|
||||
rtx end;
|
||||
rtx loop_top;
|
||||
rtx start;
|
||||
rtx *reg_single_usage;
|
||||
int *insn_count;
|
||||
{
|
||||
int nregs = max_reg_num ();
|
||||
|
||||
load_mems (scan_start, end, loop_top, start);
|
||||
|
||||
/* Recalculate n_times_set and friends since load_mems may have
|
||||
created new registers. */
|
||||
if (max_reg_num () > nregs)
|
||||
{
|
||||
int i;
|
||||
int old_nregs;
|
||||
|
||||
old_nregs = nregs;
|
||||
nregs = max_reg_num ();
|
||||
|
||||
/* Note that we assume here that enough room was allocated in
|
||||
the various arrays to accomodate the extra registers created
|
||||
by load_mems. */
|
||||
bzero ((char *) n_times_set, nregs * sizeof (int));
|
||||
bzero (may_not_optimize, nregs);
|
||||
if (loop_has_call && reg_single_usage)
|
||||
bzero ((char *) reg_single_usage, nregs * sizeof (rtx));
|
||||
|
||||
count_loop_regs_set (loop_top ? loop_top : start, end,
|
||||
may_not_optimize, reg_single_usage,
|
||||
insn_count, nregs);
|
||||
|
||||
for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
|
||||
may_not_optimize[i] = 1, n_times_set[i] = 1;
|
||||
|
||||
/* Set n_times_used for the new registers. */
|
||||
bcopy ((char *) (n_times_set + old_nregs),
|
||||
(char *) (n_times_used + old_nregs),
|
||||
(nregs - old_nregs) * sizeof (int));
|
||||
}
|
||||
}
|
||||
|
||||
/* Move MEMs into registers for the duration of the loop. SCAN_START
|
||||
is the first instruction in the loop (as it is executed). The
|
||||
other parameters are as for next_insn_in_loop. */
|
||||
|
||||
static void
|
||||
load_mems (scan_start, end, loop_top, start)
|
||||
rtx scan_start;
|
||||
rtx end;
|
||||
rtx loop_top;
|
||||
rtx start;
|
||||
{
|
||||
int maybe_never = 0;
|
||||
int i;
|
||||
rtx p;
|
||||
rtx label = NULL_RTX;
|
||||
rtx end_label;
|
||||
|
||||
if (loop_mems_idx > 0)
|
||||
{
|
||||
/* Nonzero if the next instruction may never be executed. */
|
||||
int next_maybe_never = 0;
|
||||
|
||||
/* Check to see if it's possible that some instructions in the
|
||||
loop are never executed. */
|
||||
for (p = next_insn_in_loop (scan_start, scan_start, end, loop_top);
|
||||
p != NULL_RTX && !maybe_never;
|
||||
p = next_insn_in_loop (p, scan_start, end, loop_top))
|
||||
{
|
||||
if (GET_CODE (p) == CODE_LABEL)
|
||||
maybe_never = 1;
|
||||
else if (GET_CODE (p) == JUMP_INSN
|
||||
/* If we enter the loop in the middle, and scan
|
||||
around to the beginning, don't set maybe_never
|
||||
for that. This must be an unconditional jump,
|
||||
otherwise the code at the top of the loop might
|
||||
never be executed. Unconditional jumps are
|
||||
followed a by barrier then loop end. */
|
||||
&& ! (GET_CODE (p) == JUMP_INSN
|
||||
&& JUMP_LABEL (p) == loop_top
|
||||
&& NEXT_INSN (NEXT_INSN (p)) == end
|
||||
&& simplejump_p (p)))
|
||||
{
|
||||
if (!condjump_p (p))
|
||||
/* Something complicated. */
|
||||
maybe_never = 1;
|
||||
else
|
||||
/* If there are any more instructions in the loop, they
|
||||
might not be reached. */
|
||||
next_maybe_never = 1;
|
||||
}
|
||||
else if (next_maybe_never)
|
||||
maybe_never = 1;
|
||||
}
|
||||
|
||||
/* Actually move the MEMs. */
|
||||
for (i = 0; i < loop_mems_idx; ++i)
|
||||
{
|
||||
int j;
|
||||
int written = 0;
|
||||
rtx reg;
|
||||
rtx mem = loop_mems[i].mem;
|
||||
|
||||
if (MEM_VOLATILE_P (mem)
|
||||
|| invariant_p (XEXP (mem, 0)) != 1)
|
||||
/* There's no telling whether or not MEM is modified. */
|
||||
loop_mems[i].optimize = 0;
|
||||
|
||||
/* Go through the MEMs written to in the loop to see if this
|
||||
one is aliased by one of them. */
|
||||
for (j = 0; j < loop_store_mems_idx; ++j)
|
||||
{
|
||||
if (rtx_equal_p (mem, loop_store_mems[j]))
|
||||
written = 1;
|
||||
else if (true_dependence (loop_store_mems[j], VOIDmode,
|
||||
mem, rtx_varies_p))
|
||||
{
|
||||
/* MEM is indeed aliased by this store. */
|
||||
loop_mems[i].optimize = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* If this MEM is written to, we must be sure that there
|
||||
are no reads from another MEM that aliases this one. */
|
||||
if (loop_mems[i].optimize && written)
|
||||
{
|
||||
int j;
|
||||
|
||||
for (j = 0; j < loop_mems_idx; ++j)
|
||||
{
|
||||
if (j == i)
|
||||
continue;
|
||||
else if (true_dependence (mem,
|
||||
VOIDmode,
|
||||
loop_mems[j].mem,
|
||||
rtx_varies_p))
|
||||
{
|
||||
/* It's not safe to hoist loop_mems[i] out of
|
||||
the loop because writes to it might not be
|
||||
seen by reads from loop_mems[j]. */
|
||||
loop_mems[i].optimize = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (maybe_never && may_trap_p (mem))
|
||||
/* We can't access the MEM outside the loop; it might
|
||||
cause a trap that wouldn't have happened otherwise. */
|
||||
loop_mems[i].optimize = 0;
|
||||
|
||||
if (!loop_mems[i].optimize)
|
||||
/* We thought we were going to lift this MEM out of the
|
||||
loop, but later discovered that we could not. */
|
||||
continue;
|
||||
|
||||
/* Allocate a pseudo for this MEM. We set REG_USERVAR_P in
|
||||
order to keep scan_loop from moving stores to this MEM
|
||||
out of the loop just because this REG is neither a
|
||||
user-variable nor used in the loop test. */
|
||||
reg = gen_reg_rtx (GET_MODE (mem));
|
||||
REG_USERVAR_P (reg) = 1;
|
||||
loop_mems[i].reg = reg;
|
||||
|
||||
/* Now, replace all references to the MEM with the
|
||||
corresponding pesudos. */
|
||||
for (p = next_insn_in_loop (scan_start, scan_start, end, loop_top);
|
||||
p != NULL_RTX;
|
||||
p = next_insn_in_loop (p, scan_start, end, loop_top))
|
||||
{
|
||||
rtx_and_int ri = { p, i };
|
||||
for_each_rtx (&p, replace_loop_mem, &ri);
|
||||
}
|
||||
|
||||
if (!apply_change_group ())
|
||||
/* We couldn't replace all occurrences of the MEM. */
|
||||
loop_mems[i].optimize = 0;
|
||||
else
|
||||
{
|
||||
rtx set;
|
||||
|
||||
/* Load the memory immediately before START, which is
|
||||
the NOTE_LOOP_BEG. */
|
||||
set = gen_rtx_SET (GET_MODE (reg), reg, mem);
|
||||
emit_insn_before (set, start);
|
||||
|
||||
if (written)
|
||||
{
|
||||
if (label == NULL_RTX)
|
||||
{
|
||||
/* We must compute the former
|
||||
right-after-the-end label before we insert
|
||||
the new one. */
|
||||
end_label = next_label (end);
|
||||
label = gen_label_rtx ();
|
||||
emit_label_after (label, end);
|
||||
}
|
||||
|
||||
/* Store the memory immediately after END, which is
|
||||
the NOTE_LOOP_END. */
|
||||
set = gen_rtx_SET (GET_MODE (reg), mem, reg);
|
||||
emit_insn_after (set, label);
|
||||
}
|
||||
|
||||
if (loop_dump_stream)
|
||||
{
|
||||
fprintf (loop_dump_stream, "Hoisted regno %d %s from ",
|
||||
REGNO (reg), (written ? "r/w" : "r/o"));
|
||||
print_rtl (loop_dump_stream, mem);
|
||||
fputc ('\n', loop_dump_stream);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (label != NULL_RTX)
|
||||
{
|
||||
/* Now, we need to replace all references to the previous exit
|
||||
label with the new one. */
|
||||
rtx_pair rr = { end_label, label };
|
||||
|
||||
for (p = start; p != end; p = NEXT_INSN (p))
|
||||
for_each_rtx (&p, replace_label, &rr);
|
||||
}
|
||||
}
|
||||
|
||||
/* Replace MEM with its associated pseudo register. This function is
|
||||
called from load_mems via for_each_rtx. DATA is actually an
|
||||
rtx_and_int * describing the instruction currently being scanned
|
||||
and the MEM we are currently replacing. */
|
||||
|
||||
static int
|
||||
replace_loop_mem (mem, data)
|
||||
rtx *mem;
|
||||
void *data;
|
||||
{
|
||||
rtx_and_int *ri;
|
||||
rtx insn;
|
||||
int i;
|
||||
rtx m = *mem;
|
||||
|
||||
if (m == NULL_RTX)
|
||||
return 0;
|
||||
|
||||
switch (GET_CODE (m))
|
||||
{
|
||||
case MEM:
|
||||
break;
|
||||
|
||||
case CONST_DOUBLE:
|
||||
/* We're not interested in the MEM associated with a
|
||||
CONST_DOUBLE, so there's no need to traverse into one. */
|
||||
return -1;
|
||||
|
||||
default:
|
||||
/* This is not a MEM. */
|
||||
return 0;
|
||||
}
|
||||
|
||||
ri = (rtx_and_int*) data;
|
||||
i = ri->i;
|
||||
|
||||
if (!rtx_equal_p (loop_mems[i].mem, m))
|
||||
/* This is not the MEM we are currently replacing. */
|
||||
return 0;
|
||||
|
||||
insn = ri->r;
|
||||
|
||||
/* Actually replace the MEM. */
|
||||
validate_change (insn, mem, loop_mems[i].reg, 1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Replace occurrences of the old exit label for the loop with the new
|
||||
one. DATA is an rtx_pair containing the old and new labels,
|
||||
respectively. */
|
||||
|
||||
static int
|
||||
replace_label (x, data)
|
||||
rtx *x;
|
||||
void *data;
|
||||
{
|
||||
rtx l = *x;
|
||||
rtx old_label = ((rtx_pair*) data)->r1;
|
||||
rtx new_label = ((rtx_pair*) data)->r2;
|
||||
|
||||
if (l == NULL_RTX)
|
||||
return 0;
|
||||
|
||||
if (GET_CODE (l) != LABEL_REF)
|
||||
return 0;
|
||||
|
||||
if (XEXP (l, 0) != old_label)
|
||||
return 0;
|
||||
|
||||
XEXP (l, 0) = new_label;
|
||||
++LABEL_NUSES (new_label);
|
||||
--LABEL_NUSES (old_label);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
54
gcc/recog.c
54
gcc/recog.c
@ -128,19 +128,18 @@ check_asm_operands (x)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Static data for the next two routines.
|
||||
/* Static data for the next two routines. */
|
||||
|
||||
The maximum number of changes supported is defined as the maximum
|
||||
number of operands times 5. This allows for repeated substitutions
|
||||
inside complex indexed address, or, alternatively, changes in up
|
||||
to 5 insns. */
|
||||
typedef struct change_t
|
||||
{
|
||||
rtx object;
|
||||
int old_code;
|
||||
rtx *loc;
|
||||
rtx old;
|
||||
} change_t;
|
||||
|
||||
#define MAX_CHANGE_LOCS (MAX_RECOG_OPERANDS * 5)
|
||||
|
||||
static rtx change_objects[MAX_CHANGE_LOCS];
|
||||
static int change_old_codes[MAX_CHANGE_LOCS];
|
||||
static rtx *change_locs[MAX_CHANGE_LOCS];
|
||||
static rtx change_olds[MAX_CHANGE_LOCS];
|
||||
static change_t *changes;
|
||||
static int changes_allocated;
|
||||
|
||||
static int num_changes = 0;
|
||||
|
||||
@ -174,22 +173,35 @@ validate_change (object, loc, new, in_group)
|
||||
if (old == new || rtx_equal_p (old, new))
|
||||
return 1;
|
||||
|
||||
if (num_changes >= MAX_CHANGE_LOCS
|
||||
|| (in_group == 0 && num_changes != 0))
|
||||
if (in_group == 0 && num_changes != 0)
|
||||
abort ();
|
||||
|
||||
*loc = new;
|
||||
|
||||
/* Save the information describing this change. */
|
||||
change_objects[num_changes] = object;
|
||||
change_locs[num_changes] = loc;
|
||||
change_olds[num_changes] = old;
|
||||
if (num_changes >= changes_allocated)
|
||||
{
|
||||
if (changes_allocated == 0)
|
||||
/* This value allows for repeated substitutions inside complex
|
||||
indexed addresses, or changes in up to 5 insns. */
|
||||
changes_allocated = MAX_RECOG_OPERANDS * 5;
|
||||
else
|
||||
changes_allocated *= 2;
|
||||
|
||||
changes =
|
||||
(change_t*) xrealloc (changes,
|
||||
sizeof (change_t) * changes_allocated);
|
||||
}
|
||||
|
||||
changes[num_changes].object = object;
|
||||
changes[num_changes].loc = loc;
|
||||
changes[num_changes].old = old;
|
||||
|
||||
if (object && GET_CODE (object) != MEM)
|
||||
{
|
||||
/* Set INSN_CODE to force rerecognition of insn. Save old code in
|
||||
case invalid. */
|
||||
change_old_codes[num_changes] = INSN_CODE (object);
|
||||
changes[num_changes].old_code = INSN_CODE (object);
|
||||
INSN_CODE (object) = -1;
|
||||
}
|
||||
|
||||
@ -224,7 +236,7 @@ apply_change_group ()
|
||||
|
||||
for (i = 0; i < num_changes; i++)
|
||||
{
|
||||
rtx object = change_objects[i];
|
||||
rtx object = changes[i].object;
|
||||
|
||||
if (object == 0)
|
||||
continue;
|
||||
@ -319,9 +331,9 @@ cancel_changes (num)
|
||||
they were made. */
|
||||
for (i = num_changes - 1; i >= num; i--)
|
||||
{
|
||||
*change_locs[i] = change_olds[i];
|
||||
if (change_objects[i] && GET_CODE (change_objects[i]) != MEM)
|
||||
INSN_CODE (change_objects[i]) = change_old_codes[i];
|
||||
*changes[i].loc = changes[i].old;
|
||||
if (changes[i].object && GET_CODE (changes[i].object) != MEM)
|
||||
INSN_CODE (changes[i].object) = changes[i].old_code;
|
||||
}
|
||||
num_changes = num;
|
||||
}
|
||||
|
@ -1000,6 +1000,8 @@ extern int inequality_comparison_p PROTO((rtx));
|
||||
extern rtx replace_rtx PROTO((rtx, rtx, rtx));
|
||||
extern rtx replace_regs PROTO((rtx, rtx *, int, int));
|
||||
extern int computed_jump_p PROTO((rtx));
|
||||
typedef int (*rtx_function) PROTO((rtx *, void *));
|
||||
extern int for_each_rtx PROTO((rtx *, rtx_function, void *));
|
||||
|
||||
/* Maximum number of parallel sets and clobbers in any insn in this fn.
|
||||
Always at least 3, since the combiner could put that many togetherm
|
||||
|
Loading…
Reference in New Issue
Block a user