toplev.c (rest_of_compilation): Set bct_p on second call to loop_optimize.
* toplev.c (rest_of_compilation): Set bct_p on second call to loop_optimize. * loop.c (loop_optimize, scan_loop, strength_reduce): New argument bct_p. (strength_reduce): Only call analyze_loop_iterations and insert_bct if bct_p set. (check_dbra_loop): Fix typo. (insert_bct): Use word_mode instead of SImode. (instrument_loop_bct): Likewise. Do not delete iteration count condition code generation insn. Initialize iteration count before loop start. * rtl.h (loop_optimize): Update prototype. * ginclude/va-ppc.h (va_arg): longlong types in overflow area are not doubleword aligned. * rs6000.c (optimization_options): New function. (secondary_reload_class): Only call true_regnum for PSEUDO_REGs. * rs6000.h (OPTIMIZATION_OPTIONS): Define. (REG_ALLOC_ORDER): Allocate highest numbered condition regsiters first; cr1 can be used for FP record condition insns. From-SVN: r22471
This commit is contained in:
parent
735955227e
commit
5accd82209
@ -1,3 +1,27 @@
|
||||
Fri Sep 18 23:50:56 1998 David Edelsohn <edelsohn@mhpcc.edu>
|
||||
|
||||
* toplev.c (rest_of_compilation): Set bct_p on second call to
|
||||
loop_optimize.
|
||||
* loop.c (loop_optimize, scan_loop, strength_reduce): New argument
|
||||
bct_p.
|
||||
(strength_reduce): Only call analyze_loop_iterations and
|
||||
insert_bct if bct_p set.
|
||||
(check_dbra_loop): Fix typo.
|
||||
(insert_bct): Use word_mode instead of SImode.
|
||||
(instrument_loop_bct): Likewise. Do not delete iteration count
|
||||
condition code generation insn. Initialize iteration count before
|
||||
loop start.
|
||||
* rtl.h (loop_optimize): Update prototype.
|
||||
|
||||
* ginclude/va-ppc.h (va_arg): longlong types in overflow area are
|
||||
not doubleword aligned.
|
||||
|
||||
* rs6000.c (optimization_options): New function.
|
||||
(secondary_reload_class): Only call true_regnum for PSEUDO_REGs.
|
||||
* rs6000.h (OPTIMIZATION_OPTIONS): Define.
|
||||
(REG_ALLOC_ORDER): Allocate highest numbered condition regsiters
|
||||
first; cr1 can be used for FP record condition insns.
|
||||
|
||||
Fri Sep 18 09:44:55 1998 Nick Clifton <nickc@cygnus.com>
|
||||
|
||||
* config/m32r/m32r.h (m32r_block_immediate_operand): Add to
|
||||
|
@ -347,6 +347,20 @@ rs6000_override_options (default_cpu)
|
||||
SUBTARGET_OVERRIDE_OPTIONS;
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
optimization_options (level, size)
|
||||
int level;
|
||||
int size ATTRIBUTE_UNUSED;
|
||||
{
|
||||
#if 0
|
||||
#ifdef HAIFA
|
||||
/* When optimizing, enable use of BCT instruction. */
|
||||
if (level >= 1)
|
||||
flag_branch_on_count_reg = 1;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Do anything needed at the start of the asm file. */
|
||||
|
||||
@ -1305,14 +1319,18 @@ function_arg_padding (mode, type)
|
||||
|
||||
Windows NT wants anything >= 8 bytes to be double word aligned.
|
||||
|
||||
V.4 wants long longs to be double word aligned. */
|
||||
V.4 wants long longs to be double word aligned.
|
||||
|
||||
FP emulation: double precision passed, returned, and same alignment
|
||||
as long long. */
|
||||
|
||||
int
|
||||
function_arg_boundary (mode, type)
|
||||
enum machine_mode mode;
|
||||
tree type;
|
||||
{
|
||||
if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_SOLARIS) && mode == DImode)
|
||||
if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_SOLARIS)
|
||||
&& ((mode == DImode) || (TARGET_SOFT_FLOAT && mode == DFmode)))
|
||||
return 64;
|
||||
|
||||
if (DEFAULT_ABI != ABI_NT || TARGET_64BIT)
|
||||
@ -2188,10 +2206,7 @@ secondary_reload_class (class, mode, in)
|
||||
enum machine_mode mode ATTRIBUTE_UNUSED;
|
||||
rtx in;
|
||||
{
|
||||
int regno = true_regnum (in);
|
||||
|
||||
if (regno >= FIRST_PSEUDO_REGISTER)
|
||||
regno = -1;
|
||||
int regno;
|
||||
|
||||
/* We can not copy a symbolic operand directly into anything other than
|
||||
BASE_REGS for TARGET_ELF. So indicate that a register from BASE_REGS
|
||||
@ -2203,6 +2218,25 @@ secondary_reload_class (class, mode, in)
|
||||
|| GET_CODE (in) == CONST))
|
||||
return BASE_REGS;
|
||||
|
||||
if (GET_CODE (in) == REG)
|
||||
{
|
||||
regno = REGNO (in);
|
||||
if (regno >= FIRST_PSEUDO_REGISTER)
|
||||
{
|
||||
regno = true_regnum (in);
|
||||
if (regno >= FIRST_PSEUDO_REGISTER)
|
||||
regno = -1;
|
||||
}
|
||||
}
|
||||
else if (GET_CODE (in) == SUBREG)
|
||||
{
|
||||
regno = true_regnum (in);
|
||||
if (regno >= FIRST_PSEUDO_REGISTER)
|
||||
regno = -1;
|
||||
}
|
||||
else
|
||||
regno = -1;
|
||||
|
||||
/* We can place anything into GENERAL_REGS and can put GENERAL_REGS
|
||||
into anything. */
|
||||
if (class == GENERAL_REGS || class == BASE_REGS
|
||||
|
@ -473,10 +473,17 @@ extern int rs6000_debug_arg; /* debug argument handling */
|
||||
defined, is executed once just after all the command options have
|
||||
been parsed.
|
||||
|
||||
Don't use this macro to turn on various extra optimizations for
|
||||
`-O'. That is what `OPTIMIZATION_OPTIONS' is for.
|
||||
|
||||
On the RS/6000 this is used to define the target cpu type. */
|
||||
|
||||
#define OVERRIDE_OPTIONS rs6000_override_options (TARGET_CPU_DEFAULT)
|
||||
|
||||
/* Define this to change the optimizations performed by default. */
|
||||
#define OPTIMIZATION_OPTIONS(LEVEL,SIZE) optimization_options(LEVEL,SIZE)
|
||||
|
||||
|
||||
/* Show we can debug even without a frame pointer. */
|
||||
#define CAN_DEBUG_WITHOUT_FP
|
||||
|
||||
@ -712,9 +719,10 @@ extern int rs6000_debug_arg; /* debug argument handling */
|
||||
fp13 - fp2 (not saved; incoming fp arg registers)
|
||||
fp1 (not saved; return value)
|
||||
fp31 - fp14 (saved; order given to save least number)
|
||||
cr1, cr6, cr7 (not saved or special)
|
||||
cr7, cr6 (not saved or special)
|
||||
cr1 (not saved, but used for FP operations)
|
||||
cr0 (not saved, but used for arithmetic operations)
|
||||
cr2, cr3, cr4 (saved)
|
||||
cr4, cr3, cr2 (saved)
|
||||
r0 (not saved; cannot be base reg)
|
||||
r9 (not saved; best for TImode)
|
||||
r11, r10, r8-r4 (not saved; highest used first to make less conflict)
|
||||
@ -732,7 +740,7 @@ extern int rs6000_debug_arg; /* debug argument handling */
|
||||
33, \
|
||||
63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, \
|
||||
50, 49, 48, 47, 46, \
|
||||
69, 74, 75, 68, 70, 71, 72, \
|
||||
75, 74, 69, 68, 72, 71, 70, \
|
||||
0, \
|
||||
9, 11, 10, 8, 7, 6, 5, 4, \
|
||||
3, \
|
||||
|
@ -158,9 +158,6 @@ __extension__ (*({ \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
if (__va_longlong_p(TYPE) && ((long)__va_overflow(AP) & 4) != 0) \
|
||||
__va_overflow(AP) += 4; \
|
||||
\
|
||||
__ptr = (TYPE *) (void *) (__va_overflow(AP)); \
|
||||
__va_overflow(AP) += __va_size (TYPE) * sizeof (long); \
|
||||
} \
|
||||
|
43
gcc/loop.c
43
gcc/loop.c
@ -310,7 +310,7 @@ static void count_loop_regs_set PROTO((rtx, rtx, varray_type, varray_type,
|
||||
int *, int));
|
||||
static void note_addr_stored PROTO((rtx, rtx));
|
||||
static int loop_reg_used_before_p PROTO((rtx, rtx, rtx, rtx, rtx));
|
||||
static void scan_loop PROTO((rtx, rtx, int));
|
||||
static void scan_loop PROTO((rtx, rtx, int, int));
|
||||
#if 0
|
||||
static void replace_call_address PROTO((rtx, rtx, rtx));
|
||||
#endif
|
||||
@ -324,7 +324,7 @@ static int rtx_equal_for_loop_p PROTO((rtx, rtx, struct movable *));
|
||||
static void add_label_notes PROTO((rtx, rtx));
|
||||
static void move_movables PROTO((struct movable *, int, int, rtx, rtx, int));
|
||||
static int count_nonfixed_reads PROTO((rtx));
|
||||
static void strength_reduce PROTO((rtx, rtx, rtx, int, rtx, rtx, int));
|
||||
static void strength_reduce PROTO((rtx, rtx, rtx, int, rtx, rtx, int, int));
|
||||
static void find_single_use_in_loop PROTO((rtx, rtx, varray_type));
|
||||
static int valid_initial_value_p PROTO((rtx, rtx, int, rtx));
|
||||
static void find_mem_givs PROTO((rtx, rtx, int, rtx, rtx));
|
||||
@ -440,11 +440,11 @@ init_loop ()
|
||||
(or 0 if none should be output). */
|
||||
|
||||
void
|
||||
loop_optimize (f, dumpfile, unroll_p)
|
||||
loop_optimize (f, dumpfile, unroll_p, bct_p)
|
||||
/* f is the first instruction of a chain of insns for one function */
|
||||
rtx f;
|
||||
FILE *dumpfile;
|
||||
int unroll_p;
|
||||
int unroll_p, bct_p;
|
||||
{
|
||||
register rtx insn;
|
||||
register int i;
|
||||
@ -589,7 +589,7 @@ loop_optimize (f, dumpfile, unroll_p)
|
||||
for (i = max_loop_num-1; i >= 0; i--)
|
||||
if (! loop_invalid[i] && loop_number_loop_ends[i])
|
||||
scan_loop (loop_number_loop_starts[i], loop_number_loop_ends[i],
|
||||
unroll_p);
|
||||
unroll_p, bct_p);
|
||||
|
||||
/* If debugging and unrolling loops, we must replicate the tree nodes
|
||||
corresponding to the blocks inside the loop, so that the original one
|
||||
@ -643,9 +643,9 @@ next_insn_in_loop (insn, start, end, loop_top)
|
||||
write, then we can also mark the memory read as invariant. */
|
||||
|
||||
static void
|
||||
scan_loop (loop_start, end, unroll_p)
|
||||
scan_loop (loop_start, end, unroll_p, bct_p)
|
||||
rtx loop_start, end;
|
||||
int unroll_p;
|
||||
int unroll_p, bct_p;
|
||||
{
|
||||
register int i;
|
||||
rtx p;
|
||||
@ -1185,7 +1185,7 @@ scan_loop (loop_start, end, unroll_p)
|
||||
{
|
||||
the_movables = movables;
|
||||
strength_reduce (scan_start, end, loop_top,
|
||||
insn_count, loop_start, end, unroll_p);
|
||||
insn_count, loop_start, end, unroll_p, bct_p);
|
||||
}
|
||||
|
||||
VARRAY_FREE (n_times_set);
|
||||
@ -3579,14 +3579,14 @@ static rtx addr_placeholder;
|
||||
|
||||
static void
|
||||
strength_reduce (scan_start, end, loop_top, insn_count,
|
||||
loop_start, loop_end, unroll_p)
|
||||
loop_start, loop_end, unroll_p, bct_p)
|
||||
rtx scan_start;
|
||||
rtx end;
|
||||
rtx loop_top;
|
||||
int insn_count;
|
||||
rtx loop_start;
|
||||
rtx loop_end;
|
||||
int unroll_p;
|
||||
int unroll_p, bct_p;
|
||||
{
|
||||
rtx p;
|
||||
rtx set;
|
||||
@ -4106,7 +4106,7 @@ strength_reduce (scan_start, end, loop_top, insn_count,
|
||||
the loop. Unrolling may update part of this information, and the
|
||||
correct data will be used for generating the BCT. */
|
||||
#ifdef HAVE_decrement_and_branch_on_count
|
||||
if (HAVE_decrement_and_branch_on_count)
|
||||
if (HAVE_decrement_and_branch_on_count && bct_p)
|
||||
analyze_loop_iterations (loop_start, loop_end);
|
||||
#endif
|
||||
#endif /* HAIFA */
|
||||
@ -4613,7 +4613,7 @@ strength_reduce (scan_start, end, loop_top, insn_count,
|
||||
#ifdef HAIFA
|
||||
/* instrument the loop with bct insn */
|
||||
#ifdef HAVE_decrement_and_branch_on_count
|
||||
if (HAVE_decrement_and_branch_on_count)
|
||||
if (HAVE_decrement_and_branch_on_count && bct_p)
|
||||
insert_bct (loop_start, loop_end);
|
||||
#endif
|
||||
#endif /* HAIFA */
|
||||
@ -6981,7 +6981,7 @@ check_dbra_loop (loop_end, insn_count, loop_start)
|
||||
/* If we have a decrement_and_branch_on_count, prefer
|
||||
the NE test, since this will allow that instruction to
|
||||
be generated. */
|
||||
#if ! defined (HAVE_decrement_and_branch_on_zero) && defined (HAVE_decrement_and_branch_on_count)
|
||||
#if ! defined (HAVE_decrement_and_branch_until_zero) && defined (HAVE_decrement_and_branch_on_count)
|
||||
&& (add_val != 1 || ! vtop)
|
||||
#endif
|
||||
&& GET_CODE (comparison_value) == CONST_INT
|
||||
@ -8189,7 +8189,7 @@ insert_bct (loop_start, loop_end)
|
||||
|
||||
/* the only machine mode we work with - is the integer of the size that the
|
||||
machine has */
|
||||
enum machine_mode loop_var_mode = SImode;
|
||||
enum machine_mode loop_var_mode = word_mode;
|
||||
|
||||
int loop_num = uid_loop_num [INSN_UID (loop_start)];
|
||||
|
||||
@ -8284,7 +8284,8 @@ insert_bct (loop_start, loop_end)
|
||||
/* try to instrument the loop. */
|
||||
|
||||
/* Handle the simpler case, where the bounds are known at compile time. */
|
||||
if (GET_CODE (initial_value) == CONST_INT && GET_CODE (comparison_value) == CONST_INT)
|
||||
if (GET_CODE (initial_value) == CONST_INT
|
||||
&& GET_CODE (comparison_value) == CONST_INT)
|
||||
{
|
||||
int n_iterations;
|
||||
int increment_value_abs = INTVAL (increment) * increment_direction;
|
||||
@ -8459,15 +8460,15 @@ instrument_loop_bct (loop_start, loop_end, loop_num_iterations)
|
||||
rtx start_label;
|
||||
|
||||
rtx sequence;
|
||||
enum machine_mode loop_var_mode = SImode;
|
||||
enum machine_mode loop_var_mode = word_mode;
|
||||
|
||||
if (HAVE_decrement_and_branch_on_count)
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream, "Loop: Inserting BCT\n");
|
||||
|
||||
/* eliminate the check on the old variable */
|
||||
delete_insn (PREV_INSN (loop_end));
|
||||
/* Discard original jump to continue loop. Original compare result
|
||||
may still be live, so it cannot be discarded explicitly. */
|
||||
delete_insn (PREV_INSN (loop_end));
|
||||
|
||||
/* insert the label which will delimit the start of the loop */
|
||||
@ -8488,12 +8489,13 @@ instrument_loop_bct (loop_start, loop_end, loop_num_iterations)
|
||||
|
||||
sequence = gen_sequence ();
|
||||
end_sequence ();
|
||||
emit_insn_after (sequence, loop_start);
|
||||
emit_insn_before (sequence, loop_start);
|
||||
|
||||
/* insert new comparison on the count register instead of the
|
||||
old one, generating the needed BCT pattern (that will be
|
||||
later recognized by assembly generation phase). */
|
||||
emit_jump_insn_before (gen_decrement_and_branch_on_count (temp_reg2, start_label),
|
||||
emit_jump_insn_before (gen_decrement_and_branch_on_count (temp_reg2,
|
||||
start_label),
|
||||
loop_end);
|
||||
LABEL_NUSES (start_label)++;
|
||||
}
|
||||
@ -8935,4 +8937,3 @@ replace_label (x, data)
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1350,7 +1350,7 @@ extern void print_inline_rtx PROTO ((FILE *, rtx, int));
|
||||
/* In loop.c */
|
||||
extern void init_loop PROTO ((void));
|
||||
#ifdef BUFSIZ
|
||||
extern void loop_optimize PROTO ((rtx, FILE *, int));
|
||||
extern void loop_optimize PROTO ((rtx, FILE *, int, int));
|
||||
#endif
|
||||
extern void record_excess_regs PROTO ((rtx, rtx, rtx *));
|
||||
|
||||
|
@ -3519,7 +3519,7 @@ rest_of_compilation (decl)
|
||||
{
|
||||
/* We only want to perform unrolling once. */
|
||||
|
||||
loop_optimize (insns, rtl_dump_file, 0);
|
||||
loop_optimize (insns, rtl_dump_file, 0, 0);
|
||||
|
||||
|
||||
/* The first call to loop_optimize makes some instructions
|
||||
@ -3532,7 +3532,7 @@ rest_of_compilation (decl)
|
||||
analysis code depends on this information. */
|
||||
reg_scan (insns, max_reg_num (), 1);
|
||||
}
|
||||
loop_optimize (insns, rtl_dump_file, flag_unroll_loops);
|
||||
loop_optimize (insns, rtl_dump_file, flag_unroll_loops, 1);
|
||||
});
|
||||
|
||||
/* Dump rtl code after loop opt, if we are doing that. */
|
||||
|
Loading…
x
Reference in New Issue
Block a user