unroll.c (loop_iteration_var, [...]): No longer static.

* unroll.c (loop_iteration_var, loop_initial_value, loop_increment
        loop_final_value, loop_comparison_code): No longer static.
        (unroll_loop): Delete loop_start_value update.
        * loop.h (loop_iteration_var, loop_initial_value, loop_increment,
        loop_final_value, loop_comparison_code): Extern.
        (loop_start_value): Delete extern.
        * loop.c (loop_can_insert_bct, loop_increment, loop_start_value,
        loop_comparison_value, loop_comparison_code): Delete.
        (loop_optimize): Remove initialization for deleted variables.
        (strength_reduce): Delete analyze_loop_iterations call.  Only call
        insert_bct if flag_branch_count_on_reg set.
        (analyze_loop_iterations): Delete.
        (insert_bct): Remove iteration count calculation.  Move checks for
        viable BCT optimization to here.  Obtain iteration count from
        loop_iterations and correct for unrolling.  Check for enough
        iteration to be beneficial.  Comment out runtime iteration count
        case.
        (insert_bct): Print iteration count in dump file.  Remove
        loop_var_mode and use word_mode directly.
        * rs6000.h (processor_type): Add PROCESSOR_PPC604e.
        * rs6000.c (rs6000_override_options): Use it.
        (optimization_options): Enable use of flag_branch_on_count_reg.
        * rs6000.md (define_function_unit): Describe 604e.

From-SVN: r22852
This commit is contained in:
David Edelsohn 1998-10-05 22:03:25 +00:00 committed by David Edelsohn
parent d64db93fbc
commit cac8ce95a1
7 changed files with 293 additions and 565 deletions

View File

@ -1,3 +1,30 @@
Mon Oct 5 22:43:36 1998 David Edelsohn <edelsohn@mhpcc.edu>
* unroll.c (loop_iteration_var, loop_initial_value, loop_increment
loop_final_value, loop_comparison_code): No longer static.
(unroll_loop): Delete loop_start_value update.
* loop.h (loop_iteration_var, loop_initial_value, loop_increment,
loop_final_value, loop_comparison_code): Extern.
(loop_start_value): Delete extern.
* loop.c (loop_can_insert_bct, loop_increment, loop_start_value,
loop_comparison_value, loop_comparison_code): Delete.
(loop_optimize): Remove initialization for deleted variables.
(strength_reduce): Delete analyze_loop_iterations call. Only call
insert_bct if flag_branch_count_on_reg set.
(analyze_loop_iterations): Delete.
(insert_bct): Remove iteration count calculation. Move checks for
viable BCT optimization to here. Obtain iteration count from
loop_iterations and correct for unrolling. Check for enough
iteration to be beneficial. Comment out runtime iteration count
case.
(insert_bct): Print iteration count in dump file. Remove
loop_var_mode and use word_mode directly.
* rs6000.h (processor_type): Add PROCESSOR_PPC604e.
* rs6000.c (rs6000_override_options): Use it.
(optimization_options): Enable use of flag_branch_on_count_reg.
* rs6000.md (define_function_unit): Describe 604e.
1998-10-05 Herman A.J. ten Brugge <Haj.Ten.Brugge@net.HCC.nl>
* loop.c (move_movables): Corrected threshold calculation for

View File

@ -228,7 +228,7 @@ rs6000_override_options (default_cpu)
{"604", PROCESSOR_PPC604,
MASK_POWERPC | MASK_PPC_GFXOPT | MASK_NEW_MNEMONICS,
POWER_MASKS | MASK_PPC_GPOPT | MASK_POWERPC64},
{"604e", PROCESSOR_PPC604,
{"604e", PROCESSOR_PPC604e,
MASK_POWERPC | MASK_PPC_GFXOPT | MASK_NEW_MNEMONICS,
POWER_MASKS | MASK_PPC_GPOPT | MASK_POWERPC64},
{"620", PROCESSOR_PPC620,
@ -353,13 +353,11 @@ optimization_options (level, size)
int level;
int size ATTRIBUTE_UNUSED;
{
#if 0
#ifdef HAIFA
/* When optimizing, enable use of BCT instruction. */
if (level >= 1)
flag_branch_on_count_reg = 1;
#endif
#endif
}
/* Do anything needed at the start of the asm file. */

View File

@ -389,7 +389,7 @@ extern int target_flags;
#define TARGET_DEFAULT (MASK_POWER | MASK_MULTIPLE | MASK_STRING)
/* Processor type. */
/* Processor type. Order must match cpu attribute in MD file. */
enum processor_type
{PROCESSOR_RIOS1,
PROCESSOR_RIOS2,
@ -398,6 +398,7 @@ enum processor_type
PROCESSOR_PPC601,
PROCESSOR_PPC603,
PROCESSOR_PPC604,
PROCESSOR_PPC604e,
PROCESSOR_PPC620};
extern enum processor_type rs6000_cpu;

View File

@ -40,7 +40,7 @@
;; Processor type -- this attribute must exactly match the processor_type
;; enumeration in rs6000.h.
(define_attr "cpu" "rios1,rios2,mpccore,ppc403,ppc601,ppc603,ppc604,ppc620"
(define_attr "cpu" "rios1,rios2,mpccore,ppc403,ppc601,ppc603,ppc604,ppc604e,ppc620"
(const (symbol_ref "rs6000_cpu_attr")))
; (define_function_unit NAME MULTIPLICITY SIMULTANEITY
@ -50,12 +50,12 @@
; (POWER and 601 use Integer Unit)
(define_function_unit "lsu" 1 0
(and (eq_attr "type" "load")
(eq_attr "cpu" "mpccore,ppc603,ppc604,ppc620"))
(eq_attr "cpu" "mpccore,ppc603,ppc604,ppc604e,ppc620"))
2 1)
(define_function_unit "lsu" 1 0
(and (eq_attr "type" "store,fpstore")
(eq_attr "cpu" "mpccore,ppc603,ppc604,ppc620"))
(eq_attr "cpu" "mpccore,ppc603,ppc604,ppc604e,ppc620"))
1 1)
(define_function_unit "lsu" 1 0
@ -65,7 +65,7 @@
(define_function_unit "lsu" 1 0
(and (eq_attr "type" "fpload")
(eq_attr "cpu" "ppc604,ppc620"))
(eq_attr "cpu" "ppc604,ppc604e,ppc620"))
3 1)
(define_function_unit "iu" 1 0
@ -181,12 +181,12 @@
(eq_attr "cpu" "mpccore"))
6 6)
; PPC604 has two units that perform integer operations
; PPC604{,e} has two units that perform integer operations
; and one unit for divide/multiply operations (and move
; from/to spr).
(define_function_unit "iu2" 2 0
(and (eq_attr "type" "integer")
(eq_attr "cpu" "ppc604,ppc620"))
(eq_attr "cpu" "ppc604,ppc604e,ppc620"))
1 1)
(define_function_unit "imuldiv" 1 0
@ -194,9 +194,14 @@
(eq_attr "cpu" "ppc604,ppc620"))
4 2)
(define_function_unit "imuldiv" 1 0
(and (eq_attr "type" "imul")
(eq_attr "cpu" "ppc604e"))
2 1)
(define_function_unit "imuldiv" 1 0
(and (eq_attr "type" "idiv")
(eq_attr "cpu" "ppc604,ppc620"))
(eq_attr "cpu" "ppc604,ppc604e,ppc620"))
20 19)
; compare is done on integer unit, but feeds insns which
@ -213,7 +218,7 @@
(define_function_unit "iu" 1 0
(and (eq_attr "type" "compare,delayed_compare")
(eq_attr "cpu" "mpccore,ppc403,ppc601,ppc603,ppc604,ppc620"))
(eq_attr "cpu" "mpccore,ppc403,ppc601,ppc603,ppc604,ppc604e,ppc620"))
3 1)
(define_function_unit "iu2" 2 0
@ -223,7 +228,7 @@
(define_function_unit "iu2" 2 0
(and (eq_attr "type" "compare,delayed_compare")
(eq_attr "cpu" "ppc604,ppc620"))
(eq_attr "cpu" "ppc604,ppc604e,ppc620"))
1 1)
; fp compare uses fp unit
@ -250,7 +255,7 @@
; fp compare uses fp unit
(define_function_unit "fpu" 1 0
(and (eq_attr "type" "fpcompare")
(eq_attr "cpu" "ppc601,ppc603,ppc604,ppc620"))
(eq_attr "cpu" "ppc601,ppc603,ppc604,ppc604e,ppc620"))
5 1)
(define_function_unit "fpu" 1 0
@ -265,7 +270,7 @@
(define_function_unit "bpu" 1 0
(and (eq_attr "type" "mtjmpr")
(eq_attr "cpu" "mpccore,ppc403,ppc601,ppc603,ppc604,ppc620"))
(eq_attr "cpu" "mpccore,ppc403,ppc601,ppc603,ppc604,ppc604e,ppc620"))
4 1)
; all jumps/branches are executing on the bpu, in 1 cycle, for all machines.
@ -295,7 +300,7 @@
(define_function_unit "fpu" 1 0
(and (eq_attr "type" "fp")
(eq_attr "cpu" "ppc603,ppc604,ppc620"))
(eq_attr "cpu" "ppc603,ppc604,ppc604e,ppc620"))
3 1)
(define_function_unit "fpu" 1 0
@ -316,7 +321,7 @@
(define_function_unit "fpu" 1 0
(and (eq_attr "type" "dmul")
(eq_attr "cpu" "ppc604,ppc620"))
(eq_attr "cpu" "ppc604,ppc604e,ppc620"))
3 1)
(define_function_unit "fpu" 1 0
@ -336,7 +341,7 @@
(define_function_unit "fpu" 1 0
(and (eq_attr "type" "sdiv")
(eq_attr "cpu" "ppc603,ppc604,ppc620"))
(eq_attr "cpu" "ppc603,ppc604,ppc604e,ppc620"))
18 18)
(define_function_unit "fpu" 1 0
@ -346,7 +351,7 @@
(define_function_unit "fpu" 1 0
(and (eq_attr "type" "ddiv")
(eq_attr "cpu" "ppc601,ppc604,ppc620"))
(eq_attr "cpu" "ppc601,ppc604,ppc604e,ppc620"))
31 31)
(define_function_unit "fpu" 1 0

View File

@ -82,26 +82,11 @@ static rtx *loop_number_loop_starts, *loop_number_loop_ends;
int *loop_outer_loop;
#ifdef HAIFA
/* The main output of analyze_loop_iterations is placed here */
int *loop_can_insert_bct;
/* For each loop, determines whether some of its inner loops has used
count register */
#ifdef HAVE_decrement_and_branch_on_count
/* Records whether resource in use by inner loop. */
int *loop_used_count_register;
/* loop parameters for arithmetic loops. These loops have a loop variable
which is initialized to loop_start_value, incremented in each iteration
by "loop_increment". At the end of the iteration the loop variable is
compared to the loop_comparison_value (using loop_comparison_code). */
rtx *loop_increment;
rtx *loop_comparison_value;
rtx *loop_start_value;
enum rtx_code *loop_comparison_code;
#endif /* HAIFA */
#endif /* HAVE_decrement_and_branch_on_count */
/* For each loop, keep track of its unrolling factor.
Potential values:
@ -372,20 +357,13 @@ typedef struct rtx_pair {
&& INSN_LUID (INSN) >= INSN_LUID (START) \
&& INSN_LUID (INSN) <= INSN_LUID (END))
#ifdef HAIFA
/* This is extern from unroll.c */
extern void iteration_info PROTO((rtx, rtx *, rtx *, rtx, rtx));
/* Two main functions for implementing bct:
first - to be called before loop unrolling, and the second - after */
#ifdef HAVE_decrement_and_branch_on_count
static void analyze_loop_iterations PROTO((rtx, rtx));
/* Test whether BCT applicable and safe. */
static void insert_bct PROTO((rtx, rtx));
/* Auxiliary function that inserts the bct pattern into the loop */
/* Auxiliary function that inserts the BCT pattern into the loop. */
static void instrument_loop_bct PROTO((rtx, rtx, rtx));
#endif /* HAVE_decrement_and_branch_on_count */
#endif /* HAIFA */
/* Indirect_jump_in_function is computed once per function. */
int indirect_jump_in_function = 0;
@ -500,25 +478,11 @@ loop_optimize (f, dumpfile, unroll_p, bct_p)
loop_unroll_factor = (int *) alloca (max_loop_num *sizeof (int));
bzero ((char *) loop_unroll_factor, max_loop_num * sizeof (int));
#ifdef HAIFA
#ifdef HAVE_decrement_and_branch_on_count
/* Allocate for BCT optimization */
loop_can_insert_bct = (int *) alloca (max_loop_num * sizeof (int));
bzero ((char *) loop_can_insert_bct, max_loop_num * sizeof (int));
loop_used_count_register = (int *) alloca (max_loop_num * sizeof (int));
bzero ((char *) loop_used_count_register, max_loop_num * sizeof (int));
loop_increment = (rtx *) alloca (max_loop_num * sizeof (rtx));
loop_comparison_value = (rtx *) alloca (max_loop_num * sizeof (rtx));
loop_start_value = (rtx *) alloca (max_loop_num * sizeof (rtx));
bzero ((char *) loop_increment, max_loop_num * sizeof (rtx));
bzero ((char *) loop_comparison_value, max_loop_num * sizeof (rtx));
bzero ((char *) loop_start_value, max_loop_num * sizeof (rtx));
loop_comparison_code
= (enum rtx_code *) alloca (max_loop_num * sizeof (enum rtx_code));
bzero ((char *) loop_comparison_code, max_loop_num * sizeof (enum rtx_code));
#endif /* HAIFA */
#endif /* HAVE_decrement_and_branch_on_count */
/* Find and process each loop.
First, find them, and record them in order of their beginnings. */
@ -2989,10 +2953,10 @@ mark_loop_jump (x, loop_num)
if (loop_num != -1)
{
#ifdef HAIFA
#ifdef HAVE_decrement_and_branch_on_count
LABEL_OUTSIDE_LOOP_P (x) = 1;
LABEL_NEXTREF (x) = loop_number_exit_labels[loop_num];
#endif /* HAIFA */
#endif /* HAVE_decrement_and_branch_on_count */
loop_number_exit_labels[loop_num] = x;
@ -4109,16 +4073,6 @@ strength_reduce (scan_start, end, loop_top, insn_count,
so that "decrement and branch until zero" insn can be used. */
check_dbra_loop (loop_end, insn_count, loop_start);
#ifdef HAIFA
/* record loop-variables relevant for BCT optimization before unrolling
the loop. Unrolling may update part of this information, and the
correct data will be used for generating the BCT. */
#ifdef HAVE_decrement_and_branch_on_count
if (HAVE_decrement_and_branch_on_count && bct_p)
analyze_loop_iterations (loop_start, loop_end);
#endif
#endif /* HAIFA */
/* Create reg_map to hold substitutions for replaceable giv regs. */
reg_map = (rtx *) alloca (max_reg_before_loop * sizeof (rtx));
bzero ((char *) reg_map, max_reg_before_loop * sizeof (rtx));
@ -4618,13 +4572,12 @@ strength_reduce (scan_start, end, loop_top, insn_count,
if (unroll_p)
unroll_loop (loop_end, insn_count, loop_start, end_insert_before, 1);
#ifdef HAIFA
/* instrument the loop with bct insn */
#ifdef HAVE_decrement_and_branch_on_count
if (HAVE_decrement_and_branch_on_count && bct_p)
/* Instrument the loop with BCT insn. */
if (HAVE_decrement_and_branch_on_count && bct_p
&& flag_branch_on_count_reg)
insert_bct (loop_start, loop_end);
#endif
#endif /* HAIFA */
#endif /* HAVE_decrement_and_branch_on_count */
if (loop_dump_stream)
fprintf (loop_dump_stream, "\n");
@ -7985,530 +7938,280 @@ get_condition_for_loop (x)
XEXP (comparison, 1), XEXP (comparison, 0));
}
#ifdef HAIFA
/* Analyze a loop in order to instrument it with the use of count register.
loop_start and loop_end are the first and last insns of the loop.
This function works in cooperation with insert_bct ().
loop_can_insert_bct[loop_num] is set according to whether the optimization
is applicable to the loop. When it is applicable, the following variables
are also set:
loop_start_value[loop_num]
loop_comparison_value[loop_num]
loop_increment[loop_num]
loop_comparison_code[loop_num] */
#ifdef HAVE_decrement_and_branch_on_count
static void
analyze_loop_iterations (loop_start, loop_end)
rtx loop_start, loop_end;
{
rtx comparison, comparison_value;
rtx iteration_var, initial_value, increment;
enum rtx_code comparison_code;
rtx last_loop_insn;
rtx insn;
int i;
/* loop_variable mode */
enum machine_mode original_mode;
/* find the number of the loop */
int loop_num = uid_loop_num [INSN_UID (loop_start)];
/* we change our mind only when we are sure that loop will be instrumented */
loop_can_insert_bct[loop_num] = 0;
/* is the optimization suppressed. */
if ( !flag_branch_on_count_reg )
return;
/* make sure that count-reg is not in use */
if (loop_used_count_register[loop_num]){
if (loop_dump_stream)
fprintf (loop_dump_stream,
"analyze_loop_iterations %d: BCT instrumentation failed: count register already in use\n",
loop_num);
return;
}
/* make sure that the function has no indirect jumps. */
if (indirect_jump_in_function){
if (loop_dump_stream)
fprintf (loop_dump_stream,
"analyze_loop_iterations %d: BCT instrumentation failed: indirect jump in function\n",
loop_num);
return;
}
/* make sure that the last loop insn is a conditional jump */
last_loop_insn = PREV_INSN (loop_end);
if (GET_CODE (last_loop_insn) != JUMP_INSN || !condjump_p (last_loop_insn)) {
if (loop_dump_stream)
fprintf (loop_dump_stream,
"analyze_loop_iterations %d: BCT instrumentation failed: invalid jump at loop end\n",
loop_num);
return;
}
/* First find the iteration variable. If the last insn is a conditional
branch, and the insn preceding it tests a register value, make that
register the iteration variable. */
/* We used to use prev_nonnote_insn here, but that fails because it might
accidentally get the branch for a contained loop if the branch for this
loop was deleted. We can only trust branches immediately before the
loop_end. */
comparison = get_condition_for_loop (last_loop_insn);
/* ??? Get_condition may switch position of induction variable and
invariant register when it canonicalizes the comparison. */
if (comparison == 0) {
if (loop_dump_stream)
fprintf (loop_dump_stream,
"analyze_loop_iterations %d: BCT instrumentation failed: comparison not found\n",
loop_num);
return;
}
comparison_code = GET_CODE (comparison);
iteration_var = XEXP (comparison, 0);
comparison_value = XEXP (comparison, 1);
original_mode = GET_MODE (iteration_var);
if (GET_MODE_CLASS (original_mode) != MODE_INT
|| GET_MODE_SIZE (original_mode) != UNITS_PER_WORD) {
if (loop_dump_stream)
fprintf (loop_dump_stream,
"analyze_loop_iterations %d: BCT Instrumentation failed: loop variable not integer\n",
loop_num);
return;
}
/* get info about loop bounds and increment */
iteration_info (iteration_var, &initial_value, &increment,
loop_start, loop_end);
/* make sure that all required loop data were found */
if (!(initial_value && increment && comparison_value
&& invariant_p (comparison_value) && invariant_p (increment)
&& ! indirect_jump_in_function))
{
if (loop_dump_stream) {
fprintf (loop_dump_stream,
"analyze_loop_iterations %d: BCT instrumentation failed because of wrong loop: ", loop_num);
if (!(initial_value && increment && comparison_value)) {
fprintf (loop_dump_stream, "\tbounds not available: ");
if ( ! initial_value )
fprintf (loop_dump_stream, "initial ");
if ( ! increment )
fprintf (loop_dump_stream, "increment ");
if ( ! comparison_value )
fprintf (loop_dump_stream, "comparison ");
fprintf (loop_dump_stream, "\n");
}
if (!invariant_p (comparison_value) || !invariant_p (increment))
fprintf (loop_dump_stream, "\tloop bounds not invariant\n");
}
return;
}
/* make sure that the increment is constant */
if (GET_CODE (increment) != CONST_INT) {
if (loop_dump_stream)
fprintf (loop_dump_stream,
"analyze_loop_iterations %d: instrumentation failed: not arithmetic loop\n",
loop_num);
return;
}
/* make sure that the loop contains neither function call, nor jump on table.
(the count register might be altered by the called function, and might
be used for a branch on table). */
for (insn = loop_start; insn && insn != loop_end; insn = NEXT_INSN (insn)) {
if (GET_CODE (insn) == CALL_INSN){
if (loop_dump_stream)
fprintf (loop_dump_stream,
"analyze_loop_iterations %d: BCT instrumentation failed: function call in the loop\n",
loop_num);
return;
}
if (GET_CODE (insn) == JUMP_INSN
&& (GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
|| GET_CODE (PATTERN (insn)) == ADDR_VEC)){
if (loop_dump_stream)
fprintf (loop_dump_stream,
"analyze_loop_iterations %d: BCT instrumentation failed: computed branch in the loop\n",
loop_num);
return;
}
}
/* At this point, we are sure that the loop can be instrumented with BCT.
Some of the loops, however, will not be instrumented - the final decision
is taken by insert_bct () */
if (loop_dump_stream)
fprintf (loop_dump_stream,
"analyze_loop_iterations: loop (luid =%d) can be BCT instrumented.\n",
loop_num);
/* mark all enclosing loops that they cannot use count register */
/* ???: In fact, since insert_bct may decide not to instrument this loop,
marking here may prevent instrumenting an enclosing loop that could
actually be instrumented. But since this is rare, it is safer to mark
here in case the order of calling (analyze/insert)_bct would be changed. */
for (i=loop_num; i != -1; i = loop_outer_loop[i])
loop_used_count_register[i] = 1;
/* Set data structures which will be used by the instrumentation phase */
loop_start_value[loop_num] = initial_value;
loop_comparison_value[loop_num] = comparison_value;
loop_increment[loop_num] = increment;
loop_comparison_code[loop_num] = comparison_code;
loop_can_insert_bct[loop_num] = 1;
}
/* instrument loop for insertion of bct instruction. We distinguish between
loops with compile-time bounds, to those with run-time bounds. The loop
behaviour is analized according to the following characteristics/variables:
; Input variables:
; comparison-value: the value to which the iteration counter is compared.
; initial-value: iteration-counter initial value.
; increment: iteration-counter increment.
; Computed variables:
; increment-direction: the sign of the increment.
; compare-direction: '1' for GT, GTE, '-1' for LT, LTE, '0' for NE.
; range-direction: sign (comparison-value - initial-value)
We give up on the following cases:
; loop variable overflow.
; run-time loop bounds with comparison code NE.
/* Instrument loop for insertion of bct instruction. We distinguish between
loops with compile-time bounds and those with run-time bounds.
Information from loop_iterations() is used to compute compile-time bounds.
Run-time bounds should use loop preconditioning, but currently ignored.
*/
static void
insert_bct (loop_start, loop_end)
rtx loop_start, loop_end;
{
rtx initial_value, comparison_value, increment;
enum rtx_code comparison_code;
int i;
unsigned HOST_WIDE_INT n_iterations;
rtx insn;
int increment_direction, compare_direction;
int unsigned_p = 0;
/* if the loop condition is <= or >=, the number of iteration
is 1 more than the range of the bounds of the loop */
/* If the loop condition is <= or >=, the number of iteration
is 1 more than the range of the bounds of the loop. */
int add_iteration = 0;
/* the only machine mode we work with - is the integer of the size that the
machine has */
enum machine_mode loop_var_mode = word_mode;
int loop_num = uid_loop_num [INSN_UID (loop_start)];
/* get loop-variables. No need to check that these are valid - already
checked in analyze_loop_iterations (). */
comparison_code = loop_comparison_code[loop_num];
initial_value = loop_start_value[loop_num];
comparison_value = loop_comparison_value[loop_num];
increment = loop_increment[loop_num];
/* check analyze_loop_iterations decision for this loop. */
if (! loop_can_insert_bct[loop_num]){
if (loop_dump_stream)
fprintf (loop_dump_stream,
"insert_bct: [%d] - was decided not to instrument by analyze_loop_iterations ()\n",
loop_num);
return;
}
/* It's impossible to instrument a competely unrolled loop. */
if (loop_unroll_factor [loop_num] == -1)
return;
/* make sure that the last loop insn is a conditional jump .
This check is repeated from analyze_loop_iterations (),
because unrolling might have changed that. */
if (GET_CODE (PREV_INSN (loop_end)) != JUMP_INSN
|| !condjump_p (PREV_INSN (loop_end))) {
if (loop_dump_stream)
fprintf (loop_dump_stream,
"insert_bct: not instrumenting BCT because of invalid branch\n");
return;
}
/* fix increment in case loop was unrolled. */
if (loop_unroll_factor [loop_num] > 1)
increment = GEN_INT ( INTVAL (increment) * loop_unroll_factor [loop_num] );
/* determine properties and directions of the loop */
increment_direction = (INTVAL (increment) > 0) ? 1:-1;
switch ( comparison_code ) {
case LEU:
unsigned_p = 1;
/* fallthrough */
case LE:
compare_direction = 1;
add_iteration = 1;
break;
case GEU:
unsigned_p = 1;
/* fallthrough */
case GE:
compare_direction = -1;
add_iteration = 1;
break;
case EQ:
/* in this case we cannot know the number of iterations */
if (loop_dump_stream)
fprintf (loop_dump_stream,
"insert_bct: %d: loop cannot be instrumented: == in condition\n",
loop_num);
return;
case LTU:
unsigned_p = 1;
/* fallthrough */
case LT:
compare_direction = 1;
break;
case GTU:
unsigned_p = 1;
/* fallthrough */
case GT:
compare_direction = -1;
break;
case NE:
compare_direction = 0;
break;
default:
abort ();
}
/* make sure that the loop does not end by an overflow */
if (compare_direction != increment_direction) {
if (loop_dump_stream)
fprintf (loop_dump_stream,
"insert_bct: %d: loop cannot be instrumented: terminated by overflow\n",
loop_num);
return;
}
/* try to instrument the loop. */
/* Handle the simpler case, where the bounds are known at compile time. */
if (GET_CODE (initial_value) == CONST_INT
&& GET_CODE (comparison_value) == CONST_INT)
/* Make sure that the count register is not in use. */
if (loop_used_count_register [loop_num])
{
int n_iterations;
int increment_value_abs = INTVAL (increment) * increment_direction;
/* check the relation between compare-val and initial-val */
int difference = INTVAL (comparison_value) - INTVAL (initial_value);
int range_direction = (difference > 0) ? 1 : -1;
/* make sure the loop executes enough iterations to gain from BCT */
if (difference > -3 && difference < 3) {
if (loop_dump_stream)
fprintf (loop_dump_stream,
"insert_bct: loop %d not BCT instrumented: too small iteration count.\n",
loop_num);
return;
}
/* make sure that the loop executes at least once */
if ((range_direction == 1 && compare_direction == -1)
|| (range_direction == -1 && compare_direction == 1))
{
if (loop_dump_stream)
fprintf (loop_dump_stream,
"insert_bct: loop %d: does not iterate even once. Not instrumenting.\n",
loop_num);
return;
}
/* make sure that the loop does not end by an overflow (in compile time
bounds we must have an additional check for overflow, because here
we also support the compare code of 'NE'. */
if (comparison_code == NE
&& increment_direction != range_direction) {
if (loop_dump_stream)
fprintf (loop_dump_stream,
"insert_bct (compile time bounds): %d: loop not instrumented: terminated by overflow\n",
loop_num);
return;
}
/* Determine the number of iterations by:
;
; compare-val - initial-val + (increment -1) + additional-iteration
; num_iterations = -----------------------------------------------------------------
; increment
*/
difference = (range_direction > 0) ? difference : -difference;
#if 0
fprintf (stderr, "difference is: %d\n", difference); /* @*/
fprintf (stderr, "increment_value_abs is: %d\n", increment_value_abs); /* @*/
fprintf (stderr, "add_iteration is: %d\n", add_iteration); /* @*/
fprintf (stderr, "INTVAL (comparison_value) is: %d\n", INTVAL (comparison_value)); /* @*/
fprintf (stderr, "INTVAL (initial_value) is: %d\n", INTVAL (initial_value)); /* @*/
#endif
if (increment_value_abs == 0) {
fprintf (stderr, "insert_bct: error: increment == 0 !!!\n");
abort ();
}
n_iterations = (difference + increment_value_abs - 1 + add_iteration)
/ increment_value_abs;
#if 0
fprintf (stderr, "number of iterations is: %d\n", n_iterations); /* @*/
#endif
instrument_loop_bct (loop_start, loop_end, GEN_INT (n_iterations));
/* Done with this loop. */
if (loop_dump_stream)
fprintf (loop_dump_stream,
"insert_bct %d: BCT instrumentation failed: count register already in use\n",
loop_num);
return;
}
/* Handle the more complex case, that the bounds are NOT known at compile time. */
/* In this case we generate run_time calculation of the number of iterations */
/* Make sure that the function has no indirect jumps. */
if (indirect_jump_in_function)
{
if (loop_dump_stream)
fprintf (loop_dump_stream,
"insert_bct %d: BCT instrumentation failed: indirect jump in function\n",
loop_num);
return;
}
/* Make sure that the last loop insn is a conditional jump. */
if (GET_CODE (PREV_INSN (loop_end)) != JUMP_INSN
|| ! condjump_p (PREV_INSN (loop_end))
|| simplejump_p (PREV_INSN (loop_end)))
{
if (loop_dump_stream)
fprintf (loop_dump_stream,
"insert_bct %d: BCT instrumentation failed: invalid jump at loop end\n",
loop_num);
return;
}
/* Make sure that the loop does not contain a function call
(the count register might be altered by the called function). */
if (loop_has_call)
{
if (loop_dump_stream)
fprintf (loop_dump_stream,
"insert_bct %d: BCT instrumentation failed: function call in loop\n",
loop_num);
return;
}
/* Make sure that the loop does not jump via a table.
(the count register might be used to perform the branch on table). */
for (insn = loop_start; insn && insn != loop_end; insn = NEXT_INSN (insn))
{
if (GET_CODE (insn) == JUMP_INSN
&& (GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
|| GET_CODE (PATTERN (insn)) == ADDR_VEC))
{
if (loop_dump_stream)
fprintf (loop_dump_stream,
"insert_bct %d: BCT instrumentation failed: computed branch in the loop\n",
loop_num);
return;
}
}
/* Account for loop unrolling in instrumented iteration count. */
if (loop_unroll_factor [loop_num] > 1)
n_iterations = loop_n_iterations / loop_unroll_factor [loop_num];
else
n_iterations = loop_n_iterations;
if (n_iterations != 0 && n_iterations < 3)
{
/* Allow an enclosing outer loop to benefit if possible. */
if (loop_dump_stream)
fprintf (loop_dump_stream,
"insert_bct %d: Too few iterations to benefit from BCT optimization\n",
loop_num);
return;
}
/* Try to instrument the loop. */
/* Handle the simpler case, where the bounds are known at compile time. */
if (n_iterations > 0)
{
/* Mark all enclosing loops that they cannot use count register. */
for (i=loop_num; i != -1; i = loop_outer_loop[i])
loop_used_count_register[i] = 1;
instrument_loop_bct (loop_start, loop_end, GEN_INT (n_iterations));
return;
}
/* Handle the more complex case, that the bounds are NOT known
at compile time. In this case we generate run_time calculation
of the number of iterations. */
if (GET_MODE_CLASS (GET_MODE (loop_iteration_var)) != MODE_INT
|| GET_MODE_SIZE (GET_MODE (loop_iteration_var)) != UNITS_PER_WORD)
{
if (loop_dump_stream)
fprintf (loop_dump_stream,
"insert_bct %d: BCT Instrumentation failed: loop variable not integer\n",
loop_num);
return;
}
/* With runtime bounds, if the compare is of the form '!=' we give up */
if (comparison_code == NE) {
if (loop_dump_stream)
fprintf (loop_dump_stream,
"insert_bct: fail for loop %d: runtime bounds with != comparison\n",
loop_num);
return;
}
else {
/* We rely on the existence of run-time guard to ensure that the
loop executes at least once. */
rtx sequence;
rtx iterations_num_reg;
int increment_value_abs = INTVAL (increment) * increment_direction;
/* make sure that the increment is a power of two, otherwise (an
expensive) divide is needed. */
if (exact_log2 (increment_value_abs) == -1)
{
if (loop_dump_stream)
fprintf (loop_dump_stream,
"insert_bct: not instrumenting BCT because the increment is not power of 2\n");
return;
}
/* compute the number of iterations */
start_sequence ();
if (loop_comparison_code == NE)
{
rtx temp_reg;
/* Again, the number of iterations is calculated by:
;
; compare-val - initial-val + (increment -1) + additional-iteration
; num_iterations = -----------------------------------------------------------------
; increment
*/
/* ??? Do we have to call copy_rtx here before passing rtx to
expand_binop? */
if (compare_direction > 0) {
/* <, <= :the loop variable is increasing */
temp_reg = expand_binop (loop_var_mode, sub_optab, comparison_value,
initial_value, NULL_RTX, 0, OPTAB_LIB_WIDEN);
}
else {
temp_reg = expand_binop (loop_var_mode, sub_optab, initial_value,
comparison_value, NULL_RTX, 0, OPTAB_LIB_WIDEN);
}
if (increment_value_abs - 1 + add_iteration != 0)
temp_reg = expand_binop (loop_var_mode, add_optab, temp_reg,
GEN_INT (increment_value_abs - 1 + add_iteration),
NULL_RTX, 0, OPTAB_LIB_WIDEN);
if (increment_value_abs != 1)
{
/* ??? This will generate an expensive divide instruction for
most targets. The original authors apparently expected this
to be a shift, since they test for power-of-2 divisors above,
but just naively generating a divide instruction will not give
a shift. It happens to work for the PowerPC target because
the rs6000.md file has a divide pattern that emits shifts.
It will probably not work for any other target. */
iterations_num_reg = expand_binop (loop_var_mode, sdiv_optab,
temp_reg,
GEN_INT (increment_value_abs),
NULL_RTX, 0, OPTAB_LIB_WIDEN);
}
else
iterations_num_reg = temp_reg;
if (loop_dump_stream)
fprintf (loop_dump_stream,
"insert_bct %d: runtime bounds with != comparison\n",
loop_num);
return;
}
sequence = gen_sequence ();
end_sequence ();
emit_insn_before (sequence, loop_start);
instrument_loop_bct (loop_start, loop_end, iterations_num_reg);
}
/* Use common loop preconditioning code instead. */
#if 0
else
{
/* We rely on the existence of run-time guard to ensure that the
loop executes at least once. */
rtx sequence;
rtx iterations_num_reg;
unsigned HOST_WIDE_INT increment_value_abs
= INTVAL (increment) * increment_direction;
/* make sure that the increment is a power of two, otherwise (an
expensive) divide is needed. */
if (exact_log2 (increment_value_abs) == -1)
{
if (loop_dump_stream)
fprintf (loop_dump_stream,
"insert_bct: not instrumenting BCT because the increment is not power of 2\n");
return;
}
/* compute the number of iterations */
start_sequence ();
{
rtx temp_reg;
/* Again, the number of iterations is calculated by:
;
; compare-val - initial-val + (increment -1) + additional-iteration
; num_iterations = -----------------------------------------------------------------
; increment
*/
/* ??? Do we have to call copy_rtx here before passing rtx to
expand_binop? */
if (compare_direction > 0)
{
/* <, <= :the loop variable is increasing */
temp_reg = expand_binop (loop_var_mode, sub_optab,
comparison_value, initial_value,
NULL_RTX, 0, OPTAB_LIB_WIDEN);
}
else
{
temp_reg = expand_binop (loop_var_mode, sub_optab,
initial_value, comparison_value,
NULL_RTX, 0, OPTAB_LIB_WIDEN);
}
if (increment_value_abs - 1 + add_iteration != 0)
temp_reg = expand_binop (loop_var_mode, add_optab, temp_reg,
GEN_INT (increment_value_abs - 1
+ add_iteration),
NULL_RTX, 0, OPTAB_LIB_WIDEN);
if (increment_value_abs != 1)
{
/* ??? This will generate an expensive divide instruction for
most targets. The original authors apparently expected this
to be a shift, since they test for power-of-2 divisors above,
but just naively generating a divide instruction will not give
a shift. It happens to work for the PowerPC target because
the rs6000.md file has a divide pattern that emits shifts.
It will probably not work for any other target. */
iterations_num_reg = expand_binop (loop_var_mode, sdiv_optab,
temp_reg,
GEN_INT (increment_value_abs),
NULL_RTX, 0, OPTAB_LIB_WIDEN);
}
else
iterations_num_reg = temp_reg;
}
sequence = gen_sequence ();
end_sequence ();
emit_insn_before (sequence, loop_start);
instrument_loop_bct (loop_start, loop_end, iterations_num_reg);
}
return;
#endif /* Complex case */
}
/* instrument loop by inserting a bct in it. This is done in the following way:
1. A new register is created and assigned the hard register number of the count
register.
2. In the head of the loop the new variable is initialized by the value passed in the
loop_num_iterations parameter.
/* Instrument loop by inserting a bct in it as follows:
1. A new counter register is created.
2. In the head of the loop the new variable is initialized to the value
passed in the loop_num_iterations parameter.
3. At the end of the loop, comparison of the register with 0 is generated.
The created comparison follows the pattern defined for the
decrement_and_branch_on_count insn, so this insn will be generated in assembly
generation phase.
4. The compare&branch on the old variable is deleted. So, if the loop-variable was
not used elsewhere, it will be eliminated by data-flow analisys. */
The created comparison follows the pattern defined for the
decrement_and_branch_on_count insn, so this insn will be generated.
4. The branch on the old variable are deleted. The compare must remain
because it might be used elsewhere. If the loop-variable or condition
register are used elsewhere, they will be eliminated by flow. */
static void
instrument_loop_bct (loop_start, loop_end, loop_num_iterations)
rtx loop_start, loop_end;
rtx loop_num_iterations;
{
rtx temp_reg1, temp_reg2;
rtx counter_reg;
rtx start_label;
rtx sequence;
enum machine_mode loop_var_mode = word_mode;
if (HAVE_decrement_and_branch_on_count)
{
if (loop_dump_stream)
fprintf (loop_dump_stream, "Loop: Inserting BCT\n");
{
fputs ("instrument_bct: Inserting BCT (", loop_dump_stream);
if (GET_CODE (loop_num_iterations) == CONST_INT)
fprintf (loop_dump_stream, HOST_WIDE_INT_PRINT_DEC,
INTVAL (loop_num_iterations));
else
fputs ("runtime", loop_dump_stream);
fputs (" iterations)", loop_dump_stream);
}
/* Discard original jump to continue loop. Original compare result
may still be live, so it cannot be discarded explicitly. */
delete_insn (PREV_INSN (loop_end));
/* insert the label which will delimit the start of the loop */
/* Insert the label which will delimit the start of the loop. */
start_label = gen_label_rtx ();
emit_label_after (start_label, loop_start);
/* insert initialization of the count register into the loop header */
/* Insert initialization of the count register into the loop header. */
start_sequence ();
temp_reg1 = gen_reg_rtx (loop_var_mode);
emit_insn (gen_move_insn (temp_reg1, loop_num_iterations));
/* this will be count register */
temp_reg2 = gen_rtx_REG (loop_var_mode, COUNT_REGISTER_REGNUM);
/* we have to move the value to the count register from an GPR
because rtx pointed to by loop_num_iterations could contain
expression which cannot be moved into count register */
emit_insn (gen_move_insn (temp_reg2, temp_reg1));
counter_reg = gen_reg_rtx (word_mode);
emit_insn (gen_move_insn (counter_reg, loop_num_iterations));
sequence = gen_sequence ();
end_sequence ();
emit_insn_before (sequence, loop_start);
/* insert new comparison on the count register instead of the
/* Insert new comparison on the count register instead of the
old one, generating the needed BCT pattern (that will be
later recognized by assembly generation phase). */
emit_jump_insn_before (gen_decrement_and_branch_on_count (temp_reg2,
emit_jump_insn_before (gen_decrement_and_branch_on_count (counter_reg,
start_label),
loop_end);
LABEL_NUSES (start_label)++;
@ -8517,8 +8220,6 @@ instrument_loop_bct (loop_start, loop_end, loop_num_iterations)
}
#endif /* HAVE_decrement_and_branch_on_count */
#endif /* HAIFA */
/* Scan the function and determine whether it has indirect (computed) jumps.
This is taken mostly from flow.c; similar code exists elsewhere

View File

@ -1,5 +1,5 @@
/* Loop optimization definitions for GNU C-Compiler
Copyright (C) 1991, 1995 Free Software Foundation, Inc.
Copyright (C) 1991, 1995, 1998 Free Software Foundation, Inc.
This file is part of GNU CC.
@ -184,9 +184,12 @@ void emit_unrolled_add PROTO((rtx, rtx, rtx));
int back_branch_in_range_p PROTO((rtx, rtx, rtx));
extern int *loop_unroll_factor;
#ifdef HAIFA
/* variables for interaction between unroll.c and loop.c, for
the insertion of branch-on-count instruction. */
extern rtx *loop_start_value;
#endif /* HAIFA */
#ifdef HAVE_decrement_and_branch_on_count
extern rtx loop_iteration_var;
extern rtx loop_initial_value;
extern rtx loop_increment;
extern rtx loop_final_value;
extern enum rtx_code loop_comparison_code;
#endif /* HAVE_decrement_and_branch_on_count */

View File

@ -189,11 +189,11 @@ static int *splittable_regs_updates;
/* Values describing the current loop's iteration variable. These are set up
by loop_iterations, and used by precondition_loop_p. */
static rtx loop_iteration_var;
static rtx loop_initial_value;
static rtx loop_increment;
static rtx loop_final_value;
static enum rtx_code loop_comparison_code;
rtx loop_iteration_var;
rtx loop_initial_value;
rtx loop_increment;
rtx loop_final_value;
enum rtx_code loop_comparison_code;
/* Forward declarations. */
@ -1127,13 +1127,6 @@ unroll_loop (loop_end, insn_count, loop_start, end_insert_before,
/* Set unroll type to MODULO now. */
unroll_type = UNROLL_MODULO;
loop_preconditioned = 1;
#ifdef HAIFA
/* Fix the initial value for the loop as needed. */
if (loop_n_iterations <= 0)
loop_start_value [uid_loop_num [INSN_UID (loop_start)]]
= initial_value;
#endif
}
}