toplev.c (rest_of_compilation): Set bct_p on second call to loop_optimize.

* toplev.c (rest_of_compilation): Set bct_p on second call to
        loop_optimize.
        * loop.c (loop_optimize, scan_loop, strength_reduce): New argument
        bct_p.
        (strength_reduce): Only call analyze_loop_iterations and
        insert_bct if bct_p set.
        (check_dbra_loop): Fix typo.
        (insert_bct): Use word_mode instead of SImode.
        (instrument_loop_bct): Likewise.  Do not delete iteration count
        condition code generation insn.  Initialize iteration count before
        loop start.
        * rtl.h (loop_optimize): Update prototype.
        * ginclude/va-ppc.h (va_arg): longlong types in overflow area are
        not doubleword aligned.
        * rs6000.c (optimization_options): New function.
        (secondary_reload_class): Only call true_regnum for PSEUDO_REGs.
        * rs6000.h (OPTIMIZATION_OPTIONS): Define.
        (REG_ALLOC_ORDER): Allocate highest numbered condition regsiters
        first; cr1 can be used for FP record condition insns.

From-SVN: r22471
This commit is contained in:
David Edelsohn 1998-09-18 20:55:00 +00:00 committed by David Edelsohn
parent 735955227e
commit 5accd82209
7 changed files with 100 additions and 36 deletions

View File

@ -1,3 +1,27 @@
Fri Sep 18 23:50:56 1998 David Edelsohn <edelsohn@mhpcc.edu>
* toplev.c (rest_of_compilation): Set bct_p on second call to
loop_optimize.
* loop.c (loop_optimize, scan_loop, strength_reduce): New argument
bct_p.
(strength_reduce): Only call analyze_loop_iterations and
insert_bct if bct_p set.
(check_dbra_loop): Fix typo.
(insert_bct): Use word_mode instead of SImode.
(instrument_loop_bct): Likewise. Do not delete iteration count
condition code generation insn. Initialize iteration count before
loop start.
* rtl.h (loop_optimize): Update prototype.
* ginclude/va-ppc.h (va_arg): longlong types in overflow area are
not doubleword aligned.
* rs6000.c (optimization_options): New function.
(secondary_reload_class): Only call true_regnum for PSEUDO_REGs.
* rs6000.h (OPTIMIZATION_OPTIONS): Define.
(REG_ALLOC_ORDER): Allocate highest numbered condition regsiters
first; cr1 can be used for FP record condition insns.
Fri Sep 18 09:44:55 1998 Nick Clifton <nickc@cygnus.com>
* config/m32r/m32r.h (m32r_block_immediate_operand): Add to

View File

@ -347,6 +347,20 @@ rs6000_override_options (default_cpu)
SUBTARGET_OVERRIDE_OPTIONS;
#endif
}
void
optimization_options (level, size)
int level;
int size ATTRIBUTE_UNUSED;
{
#if 0
#ifdef HAIFA
/* When optimizing, enable use of BCT instruction. */
if (level >= 1)
flag_branch_on_count_reg = 1;
#endif
#endif
}
/* Do anything needed at the start of the asm file. */
@ -1305,14 +1319,18 @@ function_arg_padding (mode, type)
Windows NT wants anything >= 8 bytes to be double word aligned.
V.4 wants long longs to be double word aligned. */
V.4 wants long longs to be double word aligned.
FP emulation: double precision passed, returned, and same alignment
as long long. */
int
function_arg_boundary (mode, type)
enum machine_mode mode;
tree type;
{
if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_SOLARIS) && mode == DImode)
if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_SOLARIS)
&& ((mode == DImode) || (TARGET_SOFT_FLOAT && mode == DFmode)))
return 64;
if (DEFAULT_ABI != ABI_NT || TARGET_64BIT)
@ -2188,10 +2206,7 @@ secondary_reload_class (class, mode, in)
enum machine_mode mode ATTRIBUTE_UNUSED;
rtx in;
{
int regno = true_regnum (in);
if (regno >= FIRST_PSEUDO_REGISTER)
regno = -1;
int regno;
/* We can not copy a symbolic operand directly into anything other than
BASE_REGS for TARGET_ELF. So indicate that a register from BASE_REGS
@ -2203,6 +2218,25 @@ secondary_reload_class (class, mode, in)
|| GET_CODE (in) == CONST))
return BASE_REGS;
if (GET_CODE (in) == REG)
{
regno = REGNO (in);
if (regno >= FIRST_PSEUDO_REGISTER)
{
regno = true_regnum (in);
if (regno >= FIRST_PSEUDO_REGISTER)
regno = -1;
}
}
else if (GET_CODE (in) == SUBREG)
{
regno = true_regnum (in);
if (regno >= FIRST_PSEUDO_REGISTER)
regno = -1;
}
else
regno = -1;
/* We can place anything into GENERAL_REGS and can put GENERAL_REGS
into anything. */
if (class == GENERAL_REGS || class == BASE_REGS

View File

@ -473,10 +473,17 @@ extern int rs6000_debug_arg; /* debug argument handling */
defined, is executed once just after all the command options have
been parsed.
Don't use this macro to turn on various extra optimizations for
`-O'. That is what `OPTIMIZATION_OPTIONS' is for.
On the RS/6000 this is used to define the target cpu type. */
#define OVERRIDE_OPTIONS rs6000_override_options (TARGET_CPU_DEFAULT)
/* Define this to change the optimizations performed by default. */
#define OPTIMIZATION_OPTIONS(LEVEL,SIZE) optimization_options(LEVEL,SIZE)
/* Show we can debug even without a frame pointer. */
#define CAN_DEBUG_WITHOUT_FP
@ -712,9 +719,10 @@ extern int rs6000_debug_arg; /* debug argument handling */
fp13 - fp2 (not saved; incoming fp arg registers)
fp1 (not saved; return value)
fp31 - fp14 (saved; order given to save least number)
cr1, cr6, cr7 (not saved or special)
cr7, cr6 (not saved or special)
cr1 (not saved, but used for FP operations)
cr0 (not saved, but used for arithmetic operations)
cr2, cr3, cr4 (saved)
cr4, cr3, cr2 (saved)
r0 (not saved; cannot be base reg)
r9 (not saved; best for TImode)
r11, r10, r8-r4 (not saved; highest used first to make less conflict)
@ -732,7 +740,7 @@ extern int rs6000_debug_arg; /* debug argument handling */
33, \
63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, \
50, 49, 48, 47, 46, \
69, 74, 75, 68, 70, 71, 72, \
75, 74, 69, 68, 72, 71, 70, \
0, \
9, 11, 10, 8, 7, 6, 5, 4, \
3, \

View File

@ -158,9 +158,6 @@ __extension__ (*({ \
} \
else \
{ \
if (__va_longlong_p(TYPE) && ((long)__va_overflow(AP) & 4) != 0) \
__va_overflow(AP) += 4; \
\
__ptr = (TYPE *) (void *) (__va_overflow(AP)); \
__va_overflow(AP) += __va_size (TYPE) * sizeof (long); \
} \

View File

@ -310,7 +310,7 @@ static void count_loop_regs_set PROTO((rtx, rtx, varray_type, varray_type,
int *, int));
static void note_addr_stored PROTO((rtx, rtx));
static int loop_reg_used_before_p PROTO((rtx, rtx, rtx, rtx, rtx));
static void scan_loop PROTO((rtx, rtx, int));
static void scan_loop PROTO((rtx, rtx, int, int));
#if 0
static void replace_call_address PROTO((rtx, rtx, rtx));
#endif
@ -324,7 +324,7 @@ static int rtx_equal_for_loop_p PROTO((rtx, rtx, struct movable *));
static void add_label_notes PROTO((rtx, rtx));
static void move_movables PROTO((struct movable *, int, int, rtx, rtx, int));
static int count_nonfixed_reads PROTO((rtx));
static void strength_reduce PROTO((rtx, rtx, rtx, int, rtx, rtx, int));
static void strength_reduce PROTO((rtx, rtx, rtx, int, rtx, rtx, int, int));
static void find_single_use_in_loop PROTO((rtx, rtx, varray_type));
static int valid_initial_value_p PROTO((rtx, rtx, int, rtx));
static void find_mem_givs PROTO((rtx, rtx, int, rtx, rtx));
@ -440,11 +440,11 @@ init_loop ()
(or 0 if none should be output). */
void
loop_optimize (f, dumpfile, unroll_p)
loop_optimize (f, dumpfile, unroll_p, bct_p)
/* f is the first instruction of a chain of insns for one function */
rtx f;
FILE *dumpfile;
int unroll_p;
int unroll_p, bct_p;
{
register rtx insn;
register int i;
@ -589,7 +589,7 @@ loop_optimize (f, dumpfile, unroll_p)
for (i = max_loop_num-1; i >= 0; i--)
if (! loop_invalid[i] && loop_number_loop_ends[i])
scan_loop (loop_number_loop_starts[i], loop_number_loop_ends[i],
unroll_p);
unroll_p, bct_p);
/* If debugging and unrolling loops, we must replicate the tree nodes
corresponding to the blocks inside the loop, so that the original one
@ -643,9 +643,9 @@ next_insn_in_loop (insn, start, end, loop_top)
write, then we can also mark the memory read as invariant. */
static void
scan_loop (loop_start, end, unroll_p)
scan_loop (loop_start, end, unroll_p, bct_p)
rtx loop_start, end;
int unroll_p;
int unroll_p, bct_p;
{
register int i;
rtx p;
@ -1185,7 +1185,7 @@ scan_loop (loop_start, end, unroll_p)
{
the_movables = movables;
strength_reduce (scan_start, end, loop_top,
insn_count, loop_start, end, unroll_p);
insn_count, loop_start, end, unroll_p, bct_p);
}
VARRAY_FREE (n_times_set);
@ -3579,14 +3579,14 @@ static rtx addr_placeholder;
static void
strength_reduce (scan_start, end, loop_top, insn_count,
loop_start, loop_end, unroll_p)
loop_start, loop_end, unroll_p, bct_p)
rtx scan_start;
rtx end;
rtx loop_top;
int insn_count;
rtx loop_start;
rtx loop_end;
int unroll_p;
int unroll_p, bct_p;
{
rtx p;
rtx set;
@ -4106,7 +4106,7 @@ strength_reduce (scan_start, end, loop_top, insn_count,
the loop. Unrolling may update part of this information, and the
correct data will be used for generating the BCT. */
#ifdef HAVE_decrement_and_branch_on_count
if (HAVE_decrement_and_branch_on_count)
if (HAVE_decrement_and_branch_on_count && bct_p)
analyze_loop_iterations (loop_start, loop_end);
#endif
#endif /* HAIFA */
@ -4613,7 +4613,7 @@ strength_reduce (scan_start, end, loop_top, insn_count,
#ifdef HAIFA
/* instrument the loop with bct insn */
#ifdef HAVE_decrement_and_branch_on_count
if (HAVE_decrement_and_branch_on_count)
if (HAVE_decrement_and_branch_on_count && bct_p)
insert_bct (loop_start, loop_end);
#endif
#endif /* HAIFA */
@ -6981,7 +6981,7 @@ check_dbra_loop (loop_end, insn_count, loop_start)
/* If we have a decrement_and_branch_on_count, prefer
the NE test, since this will allow that instruction to
be generated. */
#if ! defined (HAVE_decrement_and_branch_on_zero) && defined (HAVE_decrement_and_branch_on_count)
#if ! defined (HAVE_decrement_and_branch_until_zero) && defined (HAVE_decrement_and_branch_on_count)
&& (add_val != 1 || ! vtop)
#endif
&& GET_CODE (comparison_value) == CONST_INT
@ -8189,7 +8189,7 @@ insert_bct (loop_start, loop_end)
/* the only machine mode we work with - is the integer of the size that the
machine has */
enum machine_mode loop_var_mode = SImode;
enum machine_mode loop_var_mode = word_mode;
int loop_num = uid_loop_num [INSN_UID (loop_start)];
@ -8284,7 +8284,8 @@ insert_bct (loop_start, loop_end)
/* try to instrument the loop. */
/* Handle the simpler case, where the bounds are known at compile time. */
if (GET_CODE (initial_value) == CONST_INT && GET_CODE (comparison_value) == CONST_INT)
if (GET_CODE (initial_value) == CONST_INT
&& GET_CODE (comparison_value) == CONST_INT)
{
int n_iterations;
int increment_value_abs = INTVAL (increment) * increment_direction;
@ -8459,15 +8460,15 @@ instrument_loop_bct (loop_start, loop_end, loop_num_iterations)
rtx start_label;
rtx sequence;
enum machine_mode loop_var_mode = SImode;
enum machine_mode loop_var_mode = word_mode;
if (HAVE_decrement_and_branch_on_count)
{
if (loop_dump_stream)
fprintf (loop_dump_stream, "Loop: Inserting BCT\n");
/* eliminate the check on the old variable */
delete_insn (PREV_INSN (loop_end));
/* Discard original jump to continue loop. Original compare result
may still be live, so it cannot be discarded explicitly. */
delete_insn (PREV_INSN (loop_end));
/* insert the label which will delimit the start of the loop */
@ -8488,12 +8489,13 @@ instrument_loop_bct (loop_start, loop_end, loop_num_iterations)
sequence = gen_sequence ();
end_sequence ();
emit_insn_after (sequence, loop_start);
emit_insn_before (sequence, loop_start);
/* insert new comparison on the count register instead of the
old one, generating the needed BCT pattern (that will be
later recognized by assembly generation phase). */
emit_jump_insn_before (gen_decrement_and_branch_on_count (temp_reg2, start_label),
emit_jump_insn_before (gen_decrement_and_branch_on_count (temp_reg2,
start_label),
loop_end);
LABEL_NUSES (start_label)++;
}
@ -8935,4 +8937,3 @@ replace_label (x, data)
return 0;
}

View File

@ -1350,7 +1350,7 @@ extern void print_inline_rtx PROTO ((FILE *, rtx, int));
/* In loop.c */
extern void init_loop PROTO ((void));
#ifdef BUFSIZ
extern void loop_optimize PROTO ((rtx, FILE *, int));
extern void loop_optimize PROTO ((rtx, FILE *, int, int));
#endif
extern void record_excess_regs PROTO ((rtx, rtx, rtx *));

View File

@ -3519,7 +3519,7 @@ rest_of_compilation (decl)
{
/* We only want to perform unrolling once. */
loop_optimize (insns, rtl_dump_file, 0);
loop_optimize (insns, rtl_dump_file, 0, 0);
/* The first call to loop_optimize makes some instructions
@ -3532,7 +3532,7 @@ rest_of_compilation (decl)
analysis code depends on this information. */
reg_scan (insns, max_reg_num (), 1);
}
loop_optimize (insns, rtl_dump_file, flag_unroll_loops);
loop_optimize (insns, rtl_dump_file, flag_unroll_loops, 1);
});
/* Dump rtl code after loop opt, if we are doing that. */