Makefile.in (OBJS): Add doloop.o.
* Makefile.in (OBJS): Add doloop.o. * doloop.c: New file. * final.c (insn_current_reference_address): Return 0 before final. * flags.h (flag_branch_on_count_reg): Fix typos in commentary. * jump.c (any_uncondjump_p): Likewise. * loop.c (indirect_jump_in_function): Make static. (strength_reduce): Call doloop_optimize. (insert_bct, instrument_loop_bct): Remove. * loop.h (doloop_optimize): Prototype. * recog.c (split_all_insns): Split all INSN_P. * toplev.c (flag_branch_on_count_reg): Default on. * config/c4x/c4x.c (c4x_optimization_options): Don't set flag_branch_on_count_reg. * config/i386/i386.c (override_options): Likewise. * config/rs6000/rs6000.c (optimization_options): Likewise. * config/i386/i386.md (decrement_and_branch_on_count): Remove. (doloop_end): New. (dbra_ge): Remove, as well as all it's splitters. * config/rs6000/rs6000.md (decrement_and_branch_on_count): Remove. (doloop_end): New. * config/ia64/ia64-protos.h (ar_lc_reg_operand): Declare. (ia64_register_move_cost): Declare. * config/ia64/ia64.c (ar_lc_reg_operand): New. (struct ia64_frame_info): Add ar_size. (ia64_compute_frame_size): Set it. (save_restore_insns): Save and restore ar.lc. (ia64_register_move_cost): New, moved from header file. Handle application registers. (REG_AR_PFS, REG_AR_EC): Remove. Replace with AR_*_REGNUM numbers. (emit_insn_group_barriers): Special case doloop_end_internal. (ia64_epilogue_uses): Mark ar.lc live at end. * config/ia64/ia64.h (AR_CCV_REGNUM, AR_LC_REGNUM): New registers. (AR_EC_REGNUM, AR_PFS_REGNUM): New registers. (FIRST_PSEUDO_REGISTER): Make room. (AR_M_REGNO_P, AR_I_REGNO_P, AR_REGNO_P): New. (FIXED_REGISTERS, CALL_USED_REGISTERS): Update. (REG_ALLOC_ORDER): Update. (HARD_REGNO_MODE_OK): Update. (REGISTER_NAMES): Update. (enum reg_class): Add AR_M_REGS and AR_I_REGS. (REG_CLASS_NAMES, REG_CLASS_CONTENTS): Update. (REGNO_REG_CLASS): Update. (LEGITIMATE_ADDRESS_DISP): Displacement range is 9 bits, not 10. (REGISTER_MOVE_COST): Move out of line. (PREDICATE_CODES): Update. * config/ia64/ia64.md (movdi patterns): Handle ar register classes. (addsi3_plus1_alt, adddi3_plus1_alt): New. (shladd_elim splitter): Allow constants in the predicate. (doloop_end, doloop_end_internal): New. From-SVN: r35358
This commit is contained in:
parent
1cf0acdd19
commit
5527bf14a5
@ -1,3 +1,84 @@
|
||||
2000-07-30 Michael Hayes <mhayes@cygnus.com>
|
||||
Richard Henderson <rth@cygnus.com>
|
||||
|
||||
* Makefile.in (OBJS): Add doloop.o.
|
||||
* doloop.c: New file.
|
||||
|
||||
* final.c (insn_current_reference_address): Return 0 before final.
|
||||
* flags.h (flag_branch_on_count_reg): Fix typos in commentary.
|
||||
* jump.c (any_uncondjump_p): Likewise.
|
||||
* loop.c (indirect_jump_in_function): Make static.
|
||||
(strength_reduce): Call doloop_optimize.
|
||||
(insert_bct, instrument_loop_bct): Remove.
|
||||
* loop.h (doloop_optimize): Prototype.
|
||||
* recog.c (split_all_insns): Split all INSN_P.
|
||||
* toplev.c (flag_branch_on_count_reg): Default on.
|
||||
|
||||
* config/c4x/c4x.c (c4x_optimization_options): Don't set
|
||||
flag_branch_on_count_reg.
|
||||
* config/i386/i386.c (override_options): Likewise.
|
||||
* config/rs6000/rs6000.c (optimization_options): Likewise.
|
||||
|
||||
* config/i386/i386.md (decrement_and_branch_on_count): Remove.
|
||||
(doloop_end): New.
|
||||
(dbra_ge): Remove, as well as all it's splitters.
|
||||
|
||||
* config/rs6000/rs6000.md (decrement_and_branch_on_count): Remove.
|
||||
(doloop_end): New.
|
||||
|
||||
* config/ia64/ia64-protos.h (ar_lc_reg_operand): Declare.
|
||||
(ia64_register_move_cost): Declare.
|
||||
* config/ia64/ia64.c (ar_lc_reg_operand): New.
|
||||
(struct ia64_frame_info): Add ar_size.
|
||||
(ia64_compute_frame_size): Set it.
|
||||
(save_restore_insns): Save and restore ar.lc.
|
||||
(ia64_register_move_cost): New, moved from header file. Handle
|
||||
application registers.
|
||||
(REG_AR_PFS, REG_AR_EC): Remove. Replace with AR_*_REGNUM numbers.
|
||||
(emit_insn_group_barriers): Special case doloop_end_internal.
|
||||
(ia64_epilogue_uses): Mark ar.lc live at end.
|
||||
* config/ia64/ia64.h (AR_CCV_REGNUM, AR_LC_REGNUM): New registers.
|
||||
(AR_EC_REGNUM, AR_PFS_REGNUM): New registers.
|
||||
(FIRST_PSEUDO_REGISTER): Make room.
|
||||
(AR_M_REGNO_P, AR_I_REGNO_P, AR_REGNO_P): New.
|
||||
(FIXED_REGISTERS, CALL_USED_REGISTERS): Update.
|
||||
(REG_ALLOC_ORDER): Update.
|
||||
(HARD_REGNO_MODE_OK): Update.
|
||||
(REGISTER_NAMES): Update.
|
||||
(enum reg_class): Add AR_M_REGS and AR_I_REGS.
|
||||
(REG_CLASS_NAMES, REG_CLASS_CONTENTS): Update.
|
||||
(REGNO_REG_CLASS): Update.
|
||||
(LEGITIMATE_ADDRESS_DISP): Displacement range is 9 bits, not 10.
|
||||
(REGISTER_MOVE_COST): Move out of line.
|
||||
(PREDICATE_CODES): Update.
|
||||
* config/ia64/ia64.md (movdi patterns): Handle ar register classes.
|
||||
(addsi3_plus1_alt, adddi3_plus1_alt): New.
|
||||
(shladd_elim splitter): Allow constants in the predicate.
|
||||
(doloop_end, doloop_end_internal): New.
|
||||
|
||||
2000-07-30 Richard Henderson <rth@cygnus.com>
|
||||
|
||||
* genattrtab.c (struct insn_def): Add lineno member.
|
||||
(struct insn_ent): Likewise.
|
||||
(struct attr_desc): Likewise.
|
||||
(struct delay_desc): Likewise.
|
||||
(struct function_unit_op): Likewise.
|
||||
(struct function_unit): Likewise.
|
||||
(check_attr_value): Use message_with_line.
|
||||
(check_defs): Likewise.
|
||||
(expand_units): Likewise.
|
||||
(check_attr_test): Take a lineno argument.
|
||||
(gen_attr): Likewise.
|
||||
(gen_insn): Likewise.
|
||||
(gen_delay): Likewise.
|
||||
(gen_unit): Likewise.
|
||||
(main): Give it to them.
|
||||
(convert_set_attr_alternative): Take an insn_def argument
|
||||
instead of num_alt and insn_index.
|
||||
(convert_set_attr): Likewise.
|
||||
(write_test_expr): Protect INSN_ADDRESSES load
|
||||
with INSN_ADDRESSES_SET_P.
|
||||
|
||||
2000-07-30 Richard Henderson <rth@cygnus.com>
|
||||
|
||||
* flow.c (init_propagate_block_info): Use pc_set.
|
||||
|
@ -689,7 +689,7 @@ OBJS = diagnostic.o \
|
||||
function.o stmt.o except.o expr.o calls.o expmed.o explow.o optabs.o real.o \
|
||||
builtins.o intl.o varasm.o rtl.o print-rtl.o rtlanal.o emit-rtl.o genrtl.o \
|
||||
dbxout.o sdbout.o dwarfout.o dwarf2out.o xcoffout.o bitmap.o alias.o gcse.o \
|
||||
integrate.o jump.o cse.o loop.o unroll.o flow.o combine.o varray.o \
|
||||
integrate.o jump.o cse.o loop.o doloop.o unroll.o flow.o combine.o varray.o \
|
||||
regclass.o regmove.o local-alloc.o global.o reload.o reload1.o caller-save.o \
|
||||
insn-peep.o reorg.o haifa-sched.o final.o recog.o reg-stack.o regrename.o \
|
||||
insn-opinit.o insn-recog.o insn-extract.o insn-output.o insn-emit.o lcm.o \
|
||||
@ -1338,6 +1338,8 @@ profile.o : profile.c $(CONFIG_H) system.h $(RTL_H) $(TREE_H) flags.h \
|
||||
loop.o : loop.c $(CONFIG_H) system.h $(RTL_H) flags.h $(LOOP_H) insn-config.h \
|
||||
insn-flags.h $(REGS_H) hard-reg-set.h $(RECOG_H) $(EXPR_H) real.h \
|
||||
$(BASIC_BLOCK_H) function.h toplev.h varray.h except.h cselib.h
|
||||
doloop.o : doloop.c $(CONFIG_H) system.h $(RTL_H) flags.h $(LOOP_H) \
|
||||
insn-flags.h $(EXPR_H) hard-reg-set.h $(BASIC_BLOCK_H)
|
||||
unroll.o : unroll.c $(CONFIG_H) system.h $(RTL_H) insn-config.h function.h \
|
||||
$(INTEGRATE_H) $(REGS_H) $(RECOG_H) flags.h $(EXPR_H) $(LOOP_H) toplev.h \
|
||||
hard-reg-set.h varray.h $(BASIC_BLOCK_H)
|
||||
|
@ -295,10 +295,6 @@ c4x_optimization_options (level, size)
|
||||
instructions. The benefit we gain we get by scheduling before
|
||||
register allocation is probably marginal anyhow. */
|
||||
flag_schedule_insns = 0;
|
||||
|
||||
/* When optimizing, enable use of RPTB instruction. */
|
||||
if (level >= 1)
|
||||
flag_branch_on_count_reg = 1;
|
||||
}
|
||||
|
||||
|
||||
|
@ -630,10 +630,6 @@ override_options ()
|
||||
if (flag_fast_math)
|
||||
target_flags &= ~MASK_IEEE_FP;
|
||||
|
||||
/* If we're planning on using `loop', use it. */
|
||||
if (TARGET_USE_LOOP && optimize)
|
||||
flag_branch_on_count_reg = 1;
|
||||
|
||||
/* It makes no sense to ask for just SSE builtins, so MMX is also turned
|
||||
on by -msse. */
|
||||
if (TARGET_SSE)
|
||||
|
@ -8327,27 +8327,32 @@
|
||||
;; This is all complicated by the fact that since this is a jump insn
|
||||
;; we must handle our own reloads.
|
||||
|
||||
(define_expand "decrement_and_branch_on_count"
|
||||
[(parallel [(set (pc) (if_then_else
|
||||
(ne (match_operand:SI 0 "register_operand" "")
|
||||
(const_int 1))
|
||||
(label_ref (match_operand 1 "" ""))
|
||||
(pc)))
|
||||
(set (match_dup 0)
|
||||
(plus:SI (match_dup 0)
|
||||
(const_int -1)))
|
||||
(clobber (match_scratch:SI 2 ""))
|
||||
(clobber (reg:CC 17))])]
|
||||
(define_expand "doloop_end"
|
||||
[(use (match_operand 0 "" "")) ; loop pseudo
|
||||
(use (match_operand 1 "" "")) ; iterations; zero if unknown
|
||||
(use (match_operand 2 "" "")) ; max iterations
|
||||
(use (match_operand 3 "" "")) ; loop level
|
||||
(use (match_operand 4 "" ""))] ; label
|
||||
"TARGET_USE_LOOP"
|
||||
"")
|
||||
"
|
||||
{
|
||||
/* Only use cloop on innermost loops. */
|
||||
if (INTVAL (operands[3]) > 1)
|
||||
FAIL;
|
||||
if (GET_MODE (operands[0]) != SImode)
|
||||
FAIL;
|
||||
emit_jump_insn (gen_doloop_end_internal (operands[4], operands[0],
|
||||
operands[0]));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_insn "*dbra_ne"
|
||||
(define_insn "doloop_end_internal"
|
||||
[(set (pc)
|
||||
(if_then_else (ne (match_operand:SI 1 "register_operand" "c,*r,*r")
|
||||
(if_then_else (ne (match_operand:SI 1 "register_operand" "c,?*r,?*r")
|
||||
(const_int 1))
|
||||
(label_ref (match_operand 0 "" ""))
|
||||
(pc)))
|
||||
(set (match_operand:SI 2 "register_operand" "=1,*r,*m*r")
|
||||
(set (match_operand:SI 2 "register_operand" "=1,1,*m*r")
|
||||
(plus:SI (match_dup 1)
|
||||
(const_int -1)))
|
||||
(clobber (match_scratch:SI 3 "=X,X,r"))
|
||||
@ -8372,55 +8377,24 @@
|
||||
(const_string "ibr")
|
||||
(const_string "multi")))])
|
||||
|
||||
(define_insn "*dbra_ge"
|
||||
[(set (pc)
|
||||
(if_then_else (ge (match_operand:SI 1 "register_operand" "c,*r,*r")
|
||||
(const_int 0))
|
||||
(label_ref (match_operand 0 "" ""))
|
||||
(pc)))
|
||||
(set (match_operand:SI 2 "register_operand" "=1,*r,*m*r")
|
||||
(plus:SI (match_dup 1)
|
||||
(const_int -1)))
|
||||
(clobber (match_scratch:SI 3 "=X,X,r"))
|
||||
(clobber (reg:CC 17))]
|
||||
"TARGET_USE_LOOP && find_reg_note (insn, REG_NONNEG, 0)"
|
||||
"*
|
||||
{
|
||||
if (which_alternative != 0)
|
||||
return \"#\";
|
||||
if (get_attr_length (insn) == 2)
|
||||
return \"loop\\t%l0\";
|
||||
else
|
||||
return \"dec{l}\\t%1\;jne\\t%l0\";
|
||||
}"
|
||||
[(set (attr "type")
|
||||
(if_then_else (and (eq_attr "alternative" "0")
|
||||
(and (ge (minus (match_dup 0) (pc))
|
||||
(const_int -128))
|
||||
(lt (minus (match_dup 0) (pc))
|
||||
(const_int 124))))
|
||||
(const_string "ibr")
|
||||
(const_string "multi")))
|
||||
(set_attr "ppro_uops" "many")])
|
||||
|
||||
(define_split
|
||||
[(set (pc)
|
||||
(if_then_else (ne (match_operand:SI 1 "register_operand" "")
|
||||
(const_int 1))
|
||||
(match_operand 0 "" "")
|
||||
(pc)))
|
||||
(set (match_operand:SI 2 "register_operand" "")
|
||||
(set (match_dup 1)
|
||||
(plus:SI (match_dup 1)
|
||||
(const_int -1)))
|
||||
(clobber (match_scratch:SI 3 ""))
|
||||
(clobber (match_scratch:SI 2 ""))
|
||||
(clobber (reg:CC 17))]
|
||||
"TARGET_USE_LOOP && reload_completed
|
||||
&& ! (REGNO (operands[1]) == 2 && rtx_equal_p (operands[1], operands[2]))"
|
||||
[(set (match_dup 2) (match_dup 1))
|
||||
(parallel [(set (reg:CCZ 17)
|
||||
(compare:CCZ (plus:SI (match_dup 2) (const_int -1))
|
||||
"TARGET_USE_LOOP
|
||||
&& reload_completed
|
||||
&& REGNO (operands[1]) != 2"
|
||||
[(parallel [(set (reg:CCZ 17)
|
||||
(compare:CCZ (plus:SI (match_dup 1) (const_int -1))
|
||||
(const_int 0)))
|
||||
(set (match_dup 2) (plus:SI (match_dup 2) (const_int -1)))])
|
||||
(set (match_dup 1) (plus:SI (match_dup 1) (const_int -1)))])
|
||||
(set (pc) (if_then_else (ne (reg:CCZ 17) (const_int 0))
|
||||
(match_dup 0)
|
||||
(pc)))]
|
||||
@ -8432,12 +8406,15 @@
|
||||
(const_int 1))
|
||||
(match_operand 0 "" "")
|
||||
(pc)))
|
||||
(set (match_operand:SI 2 "memory_operand" "")
|
||||
(set (match_operand:SI 2 "nonimmediate_operand" "")
|
||||
(plus:SI (match_dup 1)
|
||||
(const_int -1)))
|
||||
(clobber (match_scratch:SI 3 ""))
|
||||
(clobber (reg:CC 17))]
|
||||
"TARGET_USE_LOOP && reload_completed"
|
||||
"TARGET_USE_LOOP
|
||||
&& reload_completed
|
||||
&& (! REG_P (operands[2])
|
||||
|| ! rtx_equal_p (operands[1], operands[2]))"
|
||||
[(set (match_dup 3) (match_dup 1))
|
||||
(parallel [(set (reg:CCZ 17)
|
||||
(compare:CCZ (plus:SI (match_dup 3) (const_int -1))
|
||||
@ -8448,52 +8425,6 @@
|
||||
(match_dup 0)
|
||||
(pc)))]
|
||||
"")
|
||||
|
||||
(define_split
|
||||
[(set (pc)
|
||||
(if_then_else (ge (match_operand:SI 1 "register_operand" "")
|
||||
(const_int 0))
|
||||
(match_operand 0 "" "")
|
||||
(pc)))
|
||||
(set (match_operand:SI 2 "register_operand" "")
|
||||
(plus:SI (match_dup 1)
|
||||
(const_int -1)))
|
||||
(clobber (match_scratch:SI 3 ""))
|
||||
(clobber (reg:CC 17))]
|
||||
"TARGET_USE_LOOP && reload_completed
|
||||
&& ! (REGNO (operands[1]) == 2 && rtx_equal_p (operands[1], operands[2]))"
|
||||
[(set (match_dup 2) (match_dup 1))
|
||||
(parallel [(set (reg:CCNO 17)
|
||||
(compare:CCNO (plus:SI (match_dup 2) (const_int -1))
|
||||
(const_int 0)))
|
||||
(set (match_dup 2) (plus:SI (match_dup 2) (const_int -1)))])
|
||||
(set (pc) (if_then_else (lt (reg:CCNO 17) (const_int 0))
|
||||
(match_dup 0)
|
||||
(pc)))]
|
||||
"")
|
||||
|
||||
(define_split
|
||||
[(set (pc)
|
||||
(if_then_else (ge (match_operand:SI 1 "register_operand" "")
|
||||
(const_int 0))
|
||||
(match_operand 0 "" "")
|
||||
(pc)))
|
||||
(set (match_operand:SI 2 "memory_operand" "")
|
||||
(plus:SI (match_dup 1)
|
||||
(const_int -1)))
|
||||
(clobber (match_scratch:SI 3 ""))
|
||||
(clobber (reg:CC 17))]
|
||||
"TARGET_USE_LOOP && reload_completed"
|
||||
[(set (match_dup 3) (match_dup 1))
|
||||
(parallel [(set (reg:CCNO 17)
|
||||
(compare:CCNO (plus:SI (match_dup 3) (const_int -1))
|
||||
(const_int 0)))
|
||||
(set (match_dup 3) (plus:SI (match_dup 3) (const_int -1)))])
|
||||
(set (match_dup 2) (match_dup 3))
|
||||
(set (pc) (if_then_else (lt (reg:CCNO 17) (const_int 0))
|
||||
(match_dup 0)
|
||||
(pc)))]
|
||||
"")
|
||||
|
||||
;; Call instructions.
|
||||
|
||||
|
@ -59,6 +59,8 @@ extern void ia64_function_prologue PARAMS((FILE *, int));
|
||||
extern void ia64_funtion_epilogue PARAMS((FILE *, int));
|
||||
extern int ia64_direct_return PARAMS((void));
|
||||
extern int predicate_operator PARAMS((rtx, enum machine_mode));
|
||||
extern int ar_lc_reg_operand PARAMS((rtx, enum machine_mode));
|
||||
|
||||
extern int ia64_move_ok PARAMS((rtx, rtx));
|
||||
|
||||
extern void ia64_expand_load_address PARAMS((rtx, rtx));
|
||||
@ -102,6 +104,7 @@ extern int ia64_valid_type_attribute PARAMS((tree, tree, tree, tree));
|
||||
extern void ia64_encode_section_info PARAMS((tree));
|
||||
#endif /* TREE_CODE */
|
||||
|
||||
extern int ia64_register_move_cost PARAMS((enum reg_class, enum reg_class));
|
||||
extern int ia64_epilogue_uses PARAMS((int));
|
||||
extern void ia64_file_start PARAMS((FILE *));
|
||||
extern void ia64_expand_prologue PARAMS((void));
|
||||
|
@ -559,6 +559,19 @@ predicate_operator (op, mode)
|
||||
return ((GET_MODE (op) == mode || mode == VOIDmode)
|
||||
&& (code == EQ || code == NE));
|
||||
}
|
||||
|
||||
/* Return 1 if this is the ar.lc register. */
|
||||
|
||||
int
|
||||
ar_lc_reg_operand (op, mode)
|
||||
register rtx op;
|
||||
enum machine_mode mode;
|
||||
{
|
||||
return (GET_MODE (op) == DImode
|
||||
&& (mode == DImode || mode == VOIDmode)
|
||||
&& GET_CODE (op) == REG
|
||||
&& REGNO (op) == AR_LC_REGNUM);
|
||||
}
|
||||
|
||||
/* Return 1 if the operands of a move are ok. */
|
||||
|
||||
@ -683,6 +696,7 @@ struct ia64_frame_info
|
||||
long fr_pad_size; /* # bytes needed to align FP save area. */
|
||||
long pr_size; /* # bytes needed to store predicate regs. */
|
||||
long br_size; /* # bytes needed to store branch regs. */
|
||||
long ar_size; /* # bytes needed to store AR regs. */
|
||||
HARD_REG_SET mask; /* mask of saved registers. */
|
||||
int initialized; /* != 0 is frame size already calculated. */
|
||||
};
|
||||
@ -713,6 +727,7 @@ ia64_compute_frame_size (size)
|
||||
int fr_pad_size = 0;
|
||||
int pr_size = 0;
|
||||
int br_size = 0;
|
||||
int ar_size = 0;
|
||||
int pretend_pad_size = 0;
|
||||
int tmp;
|
||||
int regno;
|
||||
@ -772,6 +787,13 @@ ia64_compute_frame_size (size)
|
||||
else
|
||||
fr_pad_size = 0;
|
||||
|
||||
/* AR.LC, for reasons unexplained, is call saved. */
|
||||
if (regs_ever_live[AR_LC_REGNUM])
|
||||
{
|
||||
SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
|
||||
ar_size = 8;
|
||||
}
|
||||
|
||||
/* If we have an odd number of words of pretend arguments written to the
|
||||
stack, then the FR save area will be unaligned. We pad below this area
|
||||
to keep things 16 byte aligned. This needs to be kept distinct, to
|
||||
@ -780,7 +802,7 @@ ia64_compute_frame_size (size)
|
||||
pretend_pad_size = current_function_pretend_args_size % 16;
|
||||
|
||||
/* The 16 bytes is for the scratch area. */
|
||||
tmp = (size + gr_size + fr_pad_size + fr_size + pr_size + br_size
|
||||
tmp = (size + gr_size + fr_pad_size + fr_size + pr_size + br_size + ar_size
|
||||
+ current_function_outgoing_args_size + 16);
|
||||
tmp += (current_function_pretend_args_size
|
||||
? current_function_pretend_args_size - 16
|
||||
@ -810,6 +832,7 @@ ia64_compute_frame_size (size)
|
||||
current_frame_info.fr_pad_size = fr_pad_size;
|
||||
current_frame_info.pr_size = pr_size;
|
||||
current_frame_info.br_size = br_size;
|
||||
current_frame_info.ar_size = ar_size;
|
||||
COPY_HARD_REG_SET (current_frame_info.mask, mask);
|
||||
current_frame_info.initialized = reload_completed;
|
||||
|
||||
@ -822,8 +845,11 @@ save_restore_insns (save_p)
|
||||
{
|
||||
rtx insn;
|
||||
|
||||
if (current_frame_info.gr_size + current_frame_info.fr_size
|
||||
+ current_frame_info.br_size + current_frame_info.pr_size)
|
||||
if (current_frame_info.gr_size
|
||||
+ current_frame_info.fr_size
|
||||
+ current_frame_info.br_size
|
||||
+ current_frame_info.pr_size
|
||||
+ current_frame_info.ar_size)
|
||||
{
|
||||
rtx tmp_reg = gen_rtx_REG (DImode, GR_REG (2));
|
||||
rtx tmp_post_inc = gen_rtx_POST_INC (DImode, tmp_reg);
|
||||
@ -833,6 +859,7 @@ save_restore_insns (save_p)
|
||||
+ current_frame_info.fr_pad_size
|
||||
+ current_frame_info.br_size
|
||||
+ current_frame_info.pr_size
|
||||
+ current_frame_info.ar_size
|
||||
+ current_frame_info.var_size
|
||||
+ current_frame_info.pretend_size
|
||||
+ current_frame_info.pretend_pad_size));
|
||||
@ -961,6 +988,29 @@ save_restore_insns (save_p)
|
||||
if (save_p)
|
||||
RTX_FRAME_RELATED_P (insn) = 1;
|
||||
}
|
||||
|
||||
if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
|
||||
{
|
||||
rtx src, dest;
|
||||
|
||||
if (save_p)
|
||||
{
|
||||
src = gen_rtx_REG (DImode, AR_LC_REGNUM);
|
||||
dest = gen_rtx_MEM (DImode, tmp_post_inc);
|
||||
}
|
||||
else
|
||||
{
|
||||
src = gen_rtx_MEM (DImode, tmp_post_inc);
|
||||
dest = gen_rtx_REG (DImode, AR_LC_REGNUM);
|
||||
}
|
||||
|
||||
insn = emit_insn (gen_movdi (tmp2_reg, src));
|
||||
if (save_p)
|
||||
RTX_FRAME_RELATED_P (insn) = 1;
|
||||
insn = emit_insn (gen_movdi (dest, tmp2_reg));
|
||||
if (save_p)
|
||||
RTX_FRAME_RELATED_P (insn) = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -2149,6 +2199,28 @@ ia64_print_operand (file, x, code)
|
||||
return;
|
||||
}
|
||||
|
||||
/* Calulate the cost of moving data from a register in class FROM to
|
||||
one in class TO. */
|
||||
|
||||
int
|
||||
ia64_register_move_cost (from, to)
|
||||
enum reg_class from, to;
|
||||
{
|
||||
int from_hard, to_hard;
|
||||
int from_gr, to_gr;
|
||||
|
||||
from_hard = (from == BR_REGS || from == AR_M_REGS || from == AR_I_REGS);
|
||||
to_hard = (to == BR_REGS || to == AR_M_REGS || to == AR_I_REGS);
|
||||
from_gr = (from == GENERAL_REGS);
|
||||
to_gr = (to == GENERAL_REGS);
|
||||
|
||||
if (from_hard && to_hard)
|
||||
return 8;
|
||||
else if ((from_hard && !to_gr) || (!from_gr && to_hard))
|
||||
return 6;
|
||||
|
||||
return 2;
|
||||
}
|
||||
|
||||
/* This function returns the register class required for a secondary
|
||||
register when copying between one of the registers in CLASS, and X,
|
||||
@ -2382,14 +2454,11 @@ ia64_override_options ()
|
||||
complex). */
|
||||
#define REG_GP (GR_REG (1))
|
||||
#define REG_RP (BR_REG (0))
|
||||
#define REG_AR_PFS (FIRST_PSEUDO_REGISTER)
|
||||
#define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
|
||||
/* ??? This will eventually need to be a hard register. */
|
||||
#define REG_AR_EC (FIRST_PSEUDO_REGISTER + 2)
|
||||
/* This is used for volatile asms which may require a stop bit immediately
|
||||
before and after them. */
|
||||
#define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 3)
|
||||
#define NUM_REGS (FIRST_PSEUDO_REGISTER + 4)
|
||||
#define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
|
||||
#define NUM_REGS (FIRST_PSEUDO_REGISTER + 3)
|
||||
|
||||
/* For each register, we keep track of how many times it has been
|
||||
written in the current instruction group. If a register is written
|
||||
@ -2521,15 +2590,13 @@ rws_access_reg (regno, flags, pred)
|
||||
/* Branches have several RAW exceptions that allow to avoid
|
||||
barriers. */
|
||||
|
||||
if (REGNO_REG_CLASS (regno) == BR_REGS || regno == REG_AR_PFS)
|
||||
if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
|
||||
/* RAW dependencies on branch regs are permissible as long
|
||||
as the writer is a non-branch instruction. Since we
|
||||
never generate code that uses a branch register written
|
||||
by a branch instruction, handling this case is
|
||||
easy. */
|
||||
/* ??? This assumes that we don't emit br.cloop, br.cexit, br.ctop,
|
||||
br.wexit, br.wtop. This is true currently. */
|
||||
return 0;
|
||||
return 0;
|
||||
|
||||
if (REGNO_REG_CLASS (regno) == PR_REGS
|
||||
&& ! rws_sum[regno].written_by_fp)
|
||||
@ -2678,7 +2745,7 @@ rtx_needs_barrier (x, flags, pred)
|
||||
new_flags.is_write = 0;
|
||||
/* ??? Why is this here? It seems unnecessary. */
|
||||
need_barrier |= rws_access_reg (REG_GP, new_flags, pred);
|
||||
need_barrier |= rws_access_reg (REG_AR_EC, new_flags, pred);
|
||||
need_barrier |= rws_access_reg (AR_EC_REGNUM, new_flags, pred);
|
||||
|
||||
/* Avoid multiple register writes, in case this is a pattern with
|
||||
multiple CALL rtx. This avoids an abort in rws_access_reg. */
|
||||
@ -2688,7 +2755,7 @@ rtx_needs_barrier (x, flags, pred)
|
||||
{
|
||||
new_flags.is_write = 1;
|
||||
need_barrier |= rws_access_reg (REG_RP, new_flags, pred);
|
||||
need_barrier |= rws_access_reg (REG_AR_PFS, new_flags, pred);
|
||||
need_barrier |= rws_access_reg (AR_PFS_REGNUM, new_flags, pred);
|
||||
need_barrier |= rws_access_reg (REG_AR_CFM, new_flags, pred);
|
||||
}
|
||||
break;
|
||||
@ -2877,7 +2944,7 @@ rtx_needs_barrier (x, flags, pred)
|
||||
/* Alloc must always be the first instruction. Currently, we
|
||||
only emit it at the function start, so we don't need to worry
|
||||
about emitting a stop bit before it. */
|
||||
need_barrier = rws_access_reg (REG_AR_PFS, flags, pred);
|
||||
need_barrier = rws_access_reg (AR_PFS_REGNUM, flags, pred);
|
||||
|
||||
new_flags.is_write = 1;
|
||||
need_barrier |= rws_access_reg (REG_AR_CFM, new_flags, pred);
|
||||
@ -2892,7 +2959,7 @@ rtx_needs_barrier (x, flags, pred)
|
||||
|
||||
case 4: /* mov ar.pfs= */
|
||||
new_flags.is_write = 1;
|
||||
need_barrier = rws_access_reg (REG_AR_PFS, new_flags, pred);
|
||||
need_barrier = rws_access_reg (AR_PFS_REGNUM, new_flags, pred);
|
||||
break;
|
||||
|
||||
case 5: /* set_bsp */
|
||||
@ -2920,10 +2987,10 @@ rtx_needs_barrier (x, flags, pred)
|
||||
case RETURN:
|
||||
new_flags.is_write = 0;
|
||||
need_barrier = rws_access_reg (REG_RP, flags, pred);
|
||||
need_barrier |= rws_access_reg (REG_AR_PFS, flags, pred);
|
||||
need_barrier |= rws_access_reg (AR_PFS_REGNUM, flags, pred);
|
||||
|
||||
new_flags.is_write = 1;
|
||||
need_barrier |= rws_access_reg (REG_AR_EC, new_flags, pred);
|
||||
need_barrier |= rws_access_reg (AR_EC_REGNUM, new_flags, pred);
|
||||
need_barrier |= rws_access_reg (REG_AR_CFM, new_flags, pred);
|
||||
break;
|
||||
|
||||
@ -3042,6 +3109,12 @@ emit_insn_group_barriers (insns)
|
||||
if (INSN_CODE (insn) == CODE_FOR_epilogue_deallocate_stack)
|
||||
pat = XVECEXP (pat, 0, 0);
|
||||
|
||||
/* ??? Similarly, the pattern we use for br.cloop
|
||||
confuses the code above. The second element of the
|
||||
vector is representative. */
|
||||
else if (INSN_CODE (insn) == CODE_FOR_doloop_end_internal)
|
||||
pat = XVECEXP (pat, 0, 1);
|
||||
|
||||
memset (rws_insn, 0, sizeof (rws_insn));
|
||||
need_barrier |= rtx_needs_barrier (pat, flags, 0);
|
||||
|
||||
@ -3164,6 +3237,9 @@ ia64_epilogue_uses (regno)
|
||||
if (regno == R_BR (0))
|
||||
return 1;
|
||||
|
||||
if (regno == AR_LC_REGNUM)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -535,12 +535,11 @@ while (0)
|
||||
/* Register Basics */
|
||||
|
||||
/* Number of hardware registers known to the compiler.
|
||||
We have 128 general registers, 128 floating point registers, 64 predicate
|
||||
registers, 8 branch registers, and one frame pointer register. */
|
||||
We have 128 general registers, 128 floating point registers,
|
||||
64 predicate registers, 8 branch registers, one frame pointer,
|
||||
and several "application" registers. */
|
||||
|
||||
/* ??? Should add ar.lc, ar.ec and probably also ar.pfs. */
|
||||
|
||||
#define FIRST_PSEUDO_REGISTER 330
|
||||
#define FIRST_PSEUDO_REGISTER 334
|
||||
|
||||
/* Ranges for the various kinds of registers. */
|
||||
#define ADDL_REGNO_P(REGNO) ((unsigned HOST_WIDE_INT) (REGNO) <= 3)
|
||||
@ -561,10 +560,23 @@ while (0)
|
||||
#define IN_REG(REGNO) ((REGNO) + 112)
|
||||
#define LOC_REG(REGNO) ((REGNO) + 32)
|
||||
|
||||
#define AR_CCV_REGNUM 330
|
||||
#define AR_LC_REGNUM 331
|
||||
#define AR_EC_REGNUM 332
|
||||
#define AR_PFS_REGNUM 333
|
||||
|
||||
#define IN_REGNO_P(REGNO) ((REGNO) >= IN_REG (0) && (REGNO) <= IN_REG (7))
|
||||
#define LOC_REGNO_P(REGNO) ((REGNO) >= LOC_REG (0) && (REGNO) <= LOC_REG (79))
|
||||
#define OUT_REGNO_P(REGNO) ((REGNO) >= OUT_REG (0) && (REGNO) <= OUT_REG (7))
|
||||
|
||||
#define AR_M_REGNO_P(REGNO) ((REGNO) == AR_CCV_REGNUM)
|
||||
#define AR_I_REGNO_P(REGNO) ((REGNO) >= AR_LC_REGNUM \
|
||||
&& (REGNO) < FIRST_PSEUDO_REGISTER)
|
||||
#define AR_REGNO_P(REGNO) ((REGNO) >= AR_CCV_REGNUM \
|
||||
&& (REGNO) < FIRST_PSEUDO_REGISTER)
|
||||
|
||||
|
||||
|
||||
/* ??? Don't really need two sets of macros. I like this one better because
|
||||
it is less typing. */
|
||||
#define R_GR(REGNO) GR_REG (REGNO)
|
||||
@ -619,14 +631,14 @@ while (0)
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
/* Branch registers. */ \
|
||||
0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
/*FP RA*/ \
|
||||
1, 1, \
|
||||
/*FP RA CCV LC EC PFS */ \
|
||||
1, 1, 1, 1, 1, 1 \
|
||||
}
|
||||
|
||||
/* Like `FIXED_REGISTERS' but has 1 for each register that is clobbered (in
|
||||
general) by function calls as well as for fixed registers. This macro
|
||||
therefore identifies the registers that are not available for general
|
||||
allocation of values that must live across function calls. */
|
||||
/* Like `FIXED_REGISTERS' but has 1 for each register that is clobbered
|
||||
(in general) by function calls as well as for fixed registers. This
|
||||
macro therefore identifies the registers that are not available for
|
||||
general allocation of values that must live across function calls. */
|
||||
|
||||
#define CALL_USED_REGISTERS \
|
||||
{ /* General registers. */ \
|
||||
@ -654,8 +666,8 @@ while (0)
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
/* Branch registers. */ \
|
||||
1, 0, 0, 0, 0, 0, 1, 1, \
|
||||
/*FP RA*/ \
|
||||
1, 1, \
|
||||
/*FP RA CCV LC EC PFS */ \
|
||||
1, 1, 1, 1, 1, 1 \
|
||||
}
|
||||
|
||||
/* Define this macro if the target machine has register windows. This C
|
||||
@ -787,10 +799,10 @@ while (0)
|
||||
R_PR (0), \
|
||||
/* Special branch registers. */ \
|
||||
R_BR (0), \
|
||||
/* Frame pointer. Return address. */ \
|
||||
/* Other fixed registers. */ \
|
||||
FRAME_POINTER_REGNUM, RETURN_ADDRESS_POINTER_REGNUM, \
|
||||
AR_CCV_REGNUM, AR_LC_REGNUM, AR_EC_REGNUM, AR_PFS_REGNUM \
|
||||
}
|
||||
|
||||
|
||||
/* How Values Fit in Registers */
|
||||
|
||||
@ -815,6 +827,7 @@ while (0)
|
||||
(FR_REGNO_P (REGNO) ? (MODE) != CCmode \
|
||||
: PR_REGNO_P (REGNO) ? (MODE) == CCmode \
|
||||
: GR_REGNO_P (REGNO) ? (MODE) != XFmode \
|
||||
: AR_REGNO_P (REGNO) ? (MODE) == DImode \
|
||||
: 1)
|
||||
|
||||
/* A C expression that is nonzero if it is desirable to choose register
|
||||
@ -877,6 +890,8 @@ enum reg_class
|
||||
GR_REGS,
|
||||
FR_REGS,
|
||||
GR_AND_FR_REGS,
|
||||
AR_M_REGS,
|
||||
AR_I_REGS,
|
||||
ALL_REGS,
|
||||
LIM_REG_CLASSES
|
||||
};
|
||||
@ -890,7 +905,8 @@ enum reg_class
|
||||
constants. These names are used in writing some of the debugging dumps. */
|
||||
#define REG_CLASS_NAMES \
|
||||
{ "NO_REGS", "PR_REGS", "BR_REGS", "ADDL_REGS", "GR_REGS", \
|
||||
"FR_REGS", "GR_AND_FR_REGS", "ALL_REGS" }
|
||||
"FR_REGS", "GR_AND_FR_REGS", "AR_M_REGS", "AR_I_REGS", \
|
||||
"ALL_REGS" }
|
||||
|
||||
/* An initializer containing the contents of the register classes, as integers
|
||||
which are bit masks. The Nth integer specifies the contents of class N.
|
||||
@ -901,35 +917,43 @@ enum reg_class
|
||||
/* NO_REGS. */ \
|
||||
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, \
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, \
|
||||
0x00000000, 0x00000000, 0x000 }, \
|
||||
0x00000000, 0x00000000, 0x0000 }, \
|
||||
/* PR_REGS. */ \
|
||||
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, \
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, \
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0x000 }, \
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0x0000 }, \
|
||||
/* BR_REGS. */ \
|
||||
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, \
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, \
|
||||
0x00000000, 0x00000000, 0x0FF }, \
|
||||
0x00000000, 0x00000000, 0x00FF }, \
|
||||
/* ADDL_REGS. */ \
|
||||
{ 0x0000000F, 0x00000000, 0x00000000, 0x00000000, \
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, \
|
||||
0x00000000, 0x00000000, 0x000 }, \
|
||||
0x00000000, 0x00000000, 0x0000 }, \
|
||||
/* GR_REGS. */ \
|
||||
{ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, \
|
||||
0x00000000, 0x00000000, 0x300 }, \
|
||||
0x00000000, 0x00000000, 0x0300 }, \
|
||||
/* FR_REGS. */ \
|
||||
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, \
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \
|
||||
0x00000000, 0x00000000, 0x000 }, \
|
||||
0x00000000, 0x00000000, 0x0000 }, \
|
||||
/* GR_AND_FR_REGS. */ \
|
||||
{ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \
|
||||
0x00000000, 0x00000000, 0x300 }, \
|
||||
0x00000000, 0x00000000, 0x0300 }, \
|
||||
/* AR_M_REGS. */ \
|
||||
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, \
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, \
|
||||
0x00000000, 0x00000000, 0x0400 }, \
|
||||
/* AR_I_REGS. */ \
|
||||
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, \
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, \
|
||||
0x00000000, 0x00000000, 0x3800 }, \
|
||||
/* ALL_REGS. */ \
|
||||
{ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0x3FF }, \
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0x3FFF }, \
|
||||
}
|
||||
|
||||
/* A C expression whose value is a register class containing hard register
|
||||
@ -944,6 +968,8 @@ enum reg_class
|
||||
: FR_REGNO_P (REGNO) ? FR_REGS \
|
||||
: PR_REGNO_P (REGNO) ? PR_REGS \
|
||||
: BR_REGNO_P (REGNO) ? BR_REGS \
|
||||
: AR_M_REGNO_P (REGNO) ? AR_I_REGS \
|
||||
: AR_I_REGNO_P (REGNO) ? AR_M_REGS \
|
||||
: NO_REGS)
|
||||
|
||||
/* A macro whose definition is the name of the class to which a valid base
|
||||
@ -968,6 +994,8 @@ enum reg_class
|
||||
: (CHAR) == 'a' ? ADDL_REGS \
|
||||
: (CHAR) == 'b' ? BR_REGS \
|
||||
: (CHAR) == 'c' ? PR_REGS \
|
||||
: (CHAR) == 'd' ? AR_M_REGS \
|
||||
: (CHAR) == 'e' ? AR_I_REGS \
|
||||
: NO_REGS)
|
||||
|
||||
/* A C expression which is nonzero if register number NUM is suitable for use
|
||||
@ -1816,8 +1844,8 @@ do { \
|
||||
&& rtx_equal_p (R, XEXP (X, 0)) \
|
||||
&& (GET_CODE (XEXP (X, 1)) == REG \
|
||||
|| (GET_CODE (XEXP (X, 1)) == CONST_INT \
|
||||
&& INTVAL (XEXP (X, 1)) >= -512 \
|
||||
&& INTVAL (XEXP (X, 1)) < 512)))
|
||||
&& INTVAL (XEXP (X, 1)) >= -256 \
|
||||
&& INTVAL (XEXP (X, 1)) < 256)))
|
||||
|
||||
#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, LABEL) \
|
||||
do { \
|
||||
@ -1926,10 +1954,7 @@ do { \
|
||||
one in class TO. */
|
||||
|
||||
#define REGISTER_MOVE_COST(FROM, TO) \
|
||||
((FROM) == BR_REGS && (TO) == BR_REGS ? 8 \
|
||||
: (((FROM) == BR_REGS && (TO) != GENERAL_REGS) \
|
||||
|| ((TO) == BR_REGS && (FROM) != GENERAL_REGS)) ? 6 \
|
||||
: 2)
|
||||
ia64_register_move_cost((FROM), (TO))
|
||||
|
||||
/* A C expression for the cost of moving data of mode M between a register and
|
||||
memory. */
|
||||
@ -2363,7 +2388,7 @@ do { \
|
||||
/* Branch registers. */ \
|
||||
"b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7", \
|
||||
/* Frame pointer. Return address. */ \
|
||||
"sfp", "retaddr" \
|
||||
"sfp", "retaddr", "ar.ccv", "ar.lc", "ar.ec", "ar.pfs" \
|
||||
}
|
||||
|
||||
/* If defined, a C initializer for an array of structures containing a name and
|
||||
@ -2749,7 +2774,8 @@ do { \
|
||||
{ "normal_comparison_operator", {EQ, NE, GT, LE, GTU, LEU}}, \
|
||||
{ "adjusted_comparison_operator", {LT, GE, LTU, GEU}}, \
|
||||
{ "call_multiple_values_operation", {PARALLEL}}, \
|
||||
{ "predicate_operator", {NE, EQ}},
|
||||
{ "predicate_operator", {NE, EQ}}, \
|
||||
{ "ar_lc_reg_operand", {REG}},
|
||||
|
||||
/* An alias for a machine mode name. This is the machine mode that elements of
|
||||
a jump-table should have. */
|
||||
|
@ -368,10 +368,12 @@
|
||||
(define_insn ""
|
||||
[(cond_exec
|
||||
(match_operator 2 "predicate_operator"
|
||||
[(match_operand:CC 3 "register_operand" "c,c,c,c,c,c,c,c")
|
||||
[(match_operand:CC 3 "register_operand" "c,c,c,c,c,c,c,c,c,c")
|
||||
(const_int 0)])
|
||||
(set (match_operand:DI 0 "register_operand" "=r,r,r, r,*f,*f, r,*b")
|
||||
(match_operand:DI 1 "nonmemory_operand" "rO,J,i,*f,rO,*f,*b,rO")))]
|
||||
(set (match_operand:DI 0 "register_operand"
|
||||
"=r,r,r, r,*f,*f, r,*b*e, r,*d")
|
||||
(match_operand:DI 1 "nonmemory_operand"
|
||||
"rO,J,i,*f,rO,*f,*b*e, rO,*d,rO")))]
|
||||
"TARGET_A_STEP && ia64_move_ok (operands[0], operands[1])"
|
||||
"*
|
||||
{
|
||||
@ -383,6 +385,8 @@
|
||||
\"(%J2) setf.sig %0 = %r1\",
|
||||
\"(%J2) mov %0 = %1\",
|
||||
\"(%J2) mov %0 = %1\",
|
||||
\"(%J2) mov %0 = %r1\",
|
||||
\"(%J2) mov %0 = %1\",
|
||||
\"(%J2) mov %0 = %r1\"
|
||||
};
|
||||
|
||||
@ -403,14 +407,14 @@
|
||||
|
||||
return alt[which_alternative];
|
||||
}"
|
||||
[(set_attr "type" "A,A,L,M,M,F,I,I")
|
||||
[(set_attr "type" "A,A,L,M,M,F,I,I,M,M")
|
||||
(set_attr "predicable" "no")])
|
||||
|
||||
(define_insn "*movdi_internal_astep"
|
||||
[(set (match_operand:DI 0 "destination_operand"
|
||||
"=r,r,r,r, m, r,*f,*f,*f, Q, r,*b")
|
||||
"=r,r,r,r, m, r,*f,*f,*f, Q, r,*b*e, r,*d")
|
||||
(match_operand:DI 1 "move_operand"
|
||||
"rO,J,i,m,rO,*f,rO,*f, Q,*f,*b,rO"))]
|
||||
"rO,J,i,m,rO,*f,rO,*f, Q,*f,*b*e, rO,*d,rO"))]
|
||||
"TARGET_A_STEP && ia64_move_ok (operands[0], operands[1])"
|
||||
"*
|
||||
{
|
||||
@ -426,6 +430,8 @@
|
||||
\"ldf8 %0 = %1%P1\",
|
||||
\"stf8 %0 = %1%P0\",
|
||||
\"mov %0 = %1\",
|
||||
\"mov %0 = %r1\",
|
||||
\"mov %0 = %1\",
|
||||
\"mov %0 = %r1\"
|
||||
};
|
||||
|
||||
@ -435,14 +441,14 @@
|
||||
|
||||
return alt[which_alternative];
|
||||
}"
|
||||
[(set_attr "type" "A,A,L,M,M,M,M,F,M,M,I,I")
|
||||
[(set_attr "type" "A,A,L,M,M,M,M,F,M,M,I,I,M,M")
|
||||
(set_attr "predicable" "no")])
|
||||
|
||||
(define_insn "*movdi_internal"
|
||||
[(set (match_operand:DI 0 "destination_operand"
|
||||
"=r,r,r,r, m, r,*f,*f,*f, Q, r,*b")
|
||||
"=r,r,r,r, m, r,*f,*f,*f, Q, r,*b*e, r,*d")
|
||||
(match_operand:DI 1 "move_operand"
|
||||
"rO,J,i,m,rO,*f,rO,*f, Q,*f,*b,rO"))]
|
||||
"rO,J,i,m,rO,*f,rO,*f, Q,*f,*b*e, rO,*d,rO"))]
|
||||
"! TARGET_A_STEP && ia64_move_ok (operands[0], operands[1])"
|
||||
"*
|
||||
{
|
||||
@ -458,6 +464,8 @@
|
||||
\"%,ldf8 %0 = %1%P1\",
|
||||
\"%,stf8 %0 = %1%P0\",
|
||||
\"%,mov %0 = %1\",
|
||||
\"%,mov %0 = %r1\",
|
||||
\"%,mov %0 = %1\",
|
||||
\"%,mov %0 = %r1\"
|
||||
};
|
||||
|
||||
@ -467,7 +475,7 @@
|
||||
|
||||
return alt[which_alternative];
|
||||
}"
|
||||
[(set_attr "type" "A,A,L,M,M,M,M,F,M,M,I,I")])
|
||||
[(set_attr "type" "A,A,L,M,M,M,M,F,M,M,I,I,M,M")])
|
||||
|
||||
(define_split
|
||||
[(set (match_operand:DI 0 "register_operand" "")
|
||||
@ -1104,6 +1112,15 @@
|
||||
"add %0 = %1, %2, 1"
|
||||
[(set_attr "type" "A")])
|
||||
|
||||
(define_insn "*addsi3_plus1_alt"
|
||||
[(set (match_operand:SI 0 "register_operand" "=r")
|
||||
(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r")
|
||||
(const_int 2))
|
||||
(const_int 1)))]
|
||||
""
|
||||
"add %0 = %1, %1, 1"
|
||||
[(set_attr "type" "A")])
|
||||
|
||||
(define_expand "subsi3"
|
||||
[(set (match_operand:SI 0 "register_operand" "")
|
||||
(minus:SI (match_operand:SI 1 "reg_or_8bit_operand" "")
|
||||
@ -1299,6 +1316,18 @@
|
||||
"add %0 = %1, %2, 1"
|
||||
[(set_attr "type" "A")])
|
||||
|
||||
;; This has some of the same problems as shladd. We let the shladd
|
||||
;; eliminator hack handle it, which results in the 1 being forced into
|
||||
;; a register, but not more ugliness here.
|
||||
(define_insn "*adddi3_plus1_alt"
|
||||
[(set (match_operand:DI 0 "register_operand" "=r")
|
||||
(plus:DI (mult:DI (match_operand:DI 1 "register_operand" "r")
|
||||
(const_int 2))
|
||||
(const_int 1)))]
|
||||
""
|
||||
"add %0 = %1, %1, 1"
|
||||
[(set_attr "type" "A")])
|
||||
|
||||
(define_insn "subdi3"
|
||||
[(set (match_operand:DI 0 "register_operand" "=r")
|
||||
(minus:DI (match_operand:DI 1 "reg_or_8bit_operand" "rK")
|
||||
@ -1862,31 +1891,20 @@
|
||||
;; doesn't succeed, then this remain a shladd pattern, and will be reloaded
|
||||
;; incorrectly.
|
||||
|
||||
(define_insn "*shladd_elim"
|
||||
(define_insn_and_split "*shladd_elim"
|
||||
[(set (match_operand:DI 0 "register_operand" "=&r")
|
||||
(plus:DI (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "r")
|
||||
(match_operand:DI 2 "shladd_operand" "n"))
|
||||
(match_operand:DI 3 "register_operand" "r"))
|
||||
(match_operand:DI 3 "nonmemory_operand" "r"))
|
||||
(match_operand:DI 4 "nonmemory_operand" "rI")))]
|
||||
"reload_in_progress"
|
||||
"#"
|
||||
[(set_attr "type" "unknown")])
|
||||
|
||||
;; ??? Need to emit an instruction group barrier here because this gets split
|
||||
;; after md_reorg.
|
||||
|
||||
(define_split
|
||||
[(set (match_operand:DI 0 "register_operand" "")
|
||||
(plus:DI (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "")
|
||||
(match_operand:DI 2 "shladd_operand" ""))
|
||||
(match_operand:DI 3 "register_operand" ""))
|
||||
(match_operand:DI 4 "reg_or_14bit_operand" "")))]
|
||||
"* abort ();"
|
||||
"reload_completed"
|
||||
[(set (match_dup 0) (plus:DI (mult:DI (match_dup 1) (match_dup 2))
|
||||
(match_dup 3)))
|
||||
(unspec_volatile [(const_int 0)] 2)
|
||||
(set (match_dup 0) (plus:DI (match_dup 0) (match_dup 4)))]
|
||||
"")
|
||||
""
|
||||
[(set_attr "type" "unknown")])
|
||||
|
||||
(define_insn "ashrdi3"
|
||||
[(set (match_operand:DI 0 "register_operand" "=r")
|
||||
@ -2465,6 +2483,42 @@
|
||||
|
||||
;; ::::::::::::::::::::
|
||||
;; ::
|
||||
;; :: Counted loop operations
|
||||
;; ::
|
||||
;; ::::::::::::::::::::
|
||||
|
||||
(define_expand "doloop_end"
|
||||
[(use (match_operand 0 "" "")) ; loop pseudo
|
||||
(use (match_operand 1 "" "")) ; iterations; zero if unknown
|
||||
(use (match_operand 2 "" "")) ; max iterations
|
||||
(use (match_operand 3 "" "")) ; loop level
|
||||
(use (match_operand 4 "" ""))] ; label
|
||||
""
|
||||
"
|
||||
{
|
||||
/* Only use cloop on innermost loops. */
|
||||
if (INTVAL (operands[3]) > 1)
|
||||
FAIL;
|
||||
emit_jump_insn (gen_doloop_end_internal (gen_rtx_REG (DImode, AR_LC_REGNUM),
|
||||
operands[4]));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_insn "doloop_end_internal"
|
||||
[(set (pc) (if_then_else (ne (match_operand:DI 0 "ar_lc_reg_operand" "")
|
||||
(const_int 0))
|
||||
(label_ref (match_operand 1 "" ""))
|
||||
(pc)))
|
||||
(set (match_dup 0) (if_then_else:DI (ne (match_dup 0) (const_int 0))
|
||||
(match_dup 0)
|
||||
(plus:DI (match_dup 0) (const_int -1))))]
|
||||
""
|
||||
"br.cloop.sptk.few %l1"
|
||||
[(set_attr "type" "B")
|
||||
(set_attr "predicable" "no")])
|
||||
|
||||
;; ::::::::::::::::::::
|
||||
;; ::
|
||||
;; :: Set flag operations
|
||||
;; ::
|
||||
;; ::::::::::::::::::::
|
||||
@ -2706,32 +2760,32 @@
|
||||
;; Errata 72 workaround.
|
||||
(define_insn "*cmovdi_internal_astep"
|
||||
[(set (match_operand:DI 0 "nonimmediate_operand"
|
||||
"=r,*f,Q,*b,r,*f,Q,*b,r,*f,Q,*b")
|
||||
"=r,*f,Q,*b*d*e,r,*f,Q,*b*d*e,r,*f,Q,*b*d*e")
|
||||
(if_then_else:DI
|
||||
(match_operator:CC 4 "predicate_operator"
|
||||
[(match_operand:CC 1 "register_operand"
|
||||
"c,c,c,c,c,c,c,c,c,c,c,c")
|
||||
(const_int 0)])
|
||||
(match_operand:DI 2 "general_operand"
|
||||
"0,0,0,0,ri*f*b,rO,*f,r,ri*f*b,rO,*f,r")
|
||||
"0,0,0,0,ri*f*b*d*e,rO,*f,r,ri*f*b*d*e,rO,*f,r")
|
||||
(match_operand:DI 3 "general_operand"
|
||||
"ri*f*b,rO,*f,r,0,0,0,0,ri*f*b,rO,*f,r")))]
|
||||
"ri*f*b*d*e,rO,*f,r,0,0,0,0,ri*f*b*d*e,rO,*f,r")))]
|
||||
"TARGET_A_STEP"
|
||||
"* abort ();"
|
||||
[(set_attr "predicable" "no")])
|
||||
|
||||
(define_insn "*cmovdi_internal"
|
||||
[(set (match_operand:DI 0 "nonimmediate_operand"
|
||||
"=r,m,*f,Q,*b,r,m,*f,Q,*b,r,m,*f,Q,*b")
|
||||
"=r,m,*f,Q,*b*d*e,r,m,*f,Q,*b*d*e,r,m,*f,Q,*b*d*e")
|
||||
(if_then_else:DI
|
||||
(match_operator:CC 4 "predicate_operator"
|
||||
[(match_operand:CC 1 "register_operand"
|
||||
"c,c,c,c,c,c,c,c,c,c,c,c,c,c,c")
|
||||
(const_int 0)])
|
||||
(match_operand:DI 2 "general_operand"
|
||||
"0,0,0,0,0,rim*f*b,rO,rOQ,*f,r,rim*f*b,rO,rOQ,*f,r")
|
||||
"0,0,0,0,0,rim*f*b*d*e,rO,rOQ,*f,r,rim*f*b*d*e,rO,rOQ,*f,r")
|
||||
(match_operand:DI 3 "general_operand"
|
||||
"rim*f*b,rO,rOQ,*f,r,0,0,0,0,0,rim*f*b,rO,rOQ,*f,r")))]
|
||||
"rim*f*b*d*e,rO,rOQ,*f,r,0,0,0,0,0,rim*f*b*d*e,rO,rOQ,*f,r")))]
|
||||
"! TARGET_A_STEP"
|
||||
"* abort ();"
|
||||
[(set_attr "predicable" "no")])
|
||||
|
@ -400,11 +400,6 @@ optimization_options (level, size)
|
||||
int level;
|
||||
int size ATTRIBUTE_UNUSED;
|
||||
{
|
||||
#ifdef HAVE_decrement_and_branch_on_count
|
||||
/* When optimizing, enable use of BCT instruction. */
|
||||
if (level >= 1)
|
||||
flag_branch_on_count_reg = 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Do anything needed at the start of the asm file. */
|
||||
|
@ -13291,16 +13291,30 @@ operands[2] = GEN_INT (INTVAL (operands[1]) >> 32);
|
||||
;; Define the subtract-one-and-jump insns, starting with the template
|
||||
;; so loop.c knows what to generate.
|
||||
|
||||
(define_expand "decrement_and_branch_on_count"
|
||||
[(use (match_operand 0 "register_operand" ""))
|
||||
(use (label_ref (match_operand 1 "" "")))]
|
||||
(define_expand "doloop_end"
|
||||
[(use (match_operand 0 "" "")) ; loop pseudo
|
||||
(use (match_operand 1 "" "")) ; iterations; zero if unknown
|
||||
(use (match_operand 2 "" "")) ; max iterations
|
||||
(use (match_operand 3 "" "")) ; loop level
|
||||
(use (match_operand 4 "" ""))] ; label
|
||||
""
|
||||
"
|
||||
{
|
||||
/* Only use this on innermost loops. */
|
||||
if (INTVAL (operands[3]) > 1)
|
||||
FAIL;
|
||||
if (TARGET_POWERPC64)
|
||||
emit_jump_insn (gen_ctrdi (operands[0], operands[1]));
|
||||
{
|
||||
if (GET_MODE (operands[0]) != DImode)
|
||||
FAIL;
|
||||
emit_jump_insn (gen_ctrdi (operands[0], operands[4]));
|
||||
}
|
||||
else
|
||||
emit_jump_insn (gen_ctrsi (operands[0], operands[1]));
|
||||
{
|
||||
if (GET_MODE (operands[0]) != SImode)
|
||||
FAIL;
|
||||
emit_jump_insn (gen_ctrsi (operands[0], operands[4]));
|
||||
}
|
||||
DONE;
|
||||
}")
|
||||
|
||||
|
846
gcc/doloop.c
Normal file
846
gcc/doloop.c
Normal file
@ -0,0 +1,846 @@
|
||||
/* Perform doloop optimizations
|
||||
Copyright (C) 1999, 2000 Free Software Foundation, Inc.
|
||||
Contributed by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz)
|
||||
|
||||
This file is part of GNU CC.
|
||||
|
||||
GNU CC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
GNU CC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GNU CC; see the file COPYING. If not, write to
|
||||
the Free Software Foundation, 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
#include "config.h"
|
||||
#include "system.h"
|
||||
#include "rtl.h"
|
||||
#include "insn-flags.h"
|
||||
#include "flags.h"
|
||||
#include "expr.h"
|
||||
#include "loop.h"
|
||||
#include "hard-reg-set.h"
|
||||
#include "basic-block.h"
|
||||
#include "tm_p.h"
|
||||
|
||||
|
||||
/* This module is used to modify loops with a determinable number of
|
||||
iterations to use special low-overhead looping instructions.
|
||||
|
||||
It first validates whether the loop is well behaved and has a
|
||||
determinable number of iterations (either at compile or run-time).
|
||||
It then modifies the loop to use a low-overhead looping pattern as
|
||||
follows:
|
||||
|
||||
1. A pseudo register is allocated as the loop iteration counter.
|
||||
|
||||
2. The number of loop iterations is calculated and is stored
|
||||
in the loop counter.
|
||||
|
||||
3. At the end of the loop, the jump insn is replaced by the
|
||||
doloop_end pattern. The compare must remain because it might be
|
||||
used elsewhere. If the loop-variable or condition register are
|
||||
used elsewhere, they will be eliminated by flow.
|
||||
|
||||
4. An optional doloop_begin pattern is inserted at the top of the
|
||||
loop.
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_doloop_end
|
||||
|
||||
static rtx doloop_condition_get
|
||||
PARAMS ((rtx));
|
||||
static unsigned HOST_WIDE_INT doloop_iterations_max
|
||||
PARAMS ((const struct loop_info *, enum machine_mode, int));
|
||||
static int doloop_valid_p
|
||||
PARAMS ((const struct loop *, rtx));
|
||||
static int doloop_modify
|
||||
PARAMS ((const struct loop *, rtx, rtx, rtx, rtx, rtx));
|
||||
static int doloop_modify_runtime
|
||||
PARAMS ((const struct loop *, rtx, rtx, rtx, enum machine_mode, rtx));
|
||||
|
||||
|
||||
/* Return the loop termination condition for PATTERN or zero
|
||||
if it is not a decrement and branch jump insn. */
|
||||
static rtx
|
||||
doloop_condition_get (pattern)
|
||||
rtx pattern;
|
||||
{
|
||||
rtx cmp;
|
||||
rtx inc;
|
||||
rtx reg;
|
||||
rtx condition;
|
||||
|
||||
/* The canonical doloop pattern we expect is:
|
||||
|
||||
(parallel [(set (pc) (if_then_else (condition)
|
||||
(label_ref (label))
|
||||
(pc)))
|
||||
(set (reg) (plus (reg) (const_int -1)))
|
||||
(additional clobbers and uses)])
|
||||
|
||||
Some machines (IA-64) make the decrement conditional on
|
||||
the condition as well, so we don't bother verifying the
|
||||
actual decrement. In summary, the branch must be the
|
||||
first entry of the parallel (also required by jump.c),
|
||||
and the second entry of the parallel must be a set of
|
||||
the loop counter register. */
|
||||
|
||||
if (GET_CODE (pattern) != PARALLEL)
|
||||
return 0;
|
||||
|
||||
cmp = XVECEXP (pattern, 0, 0);
|
||||
inc = XVECEXP (pattern, 0, 1);
|
||||
|
||||
/* Check for (set (reg) (something)). */
|
||||
if (GET_CODE (inc) != SET || ! REG_P (SET_DEST (inc)))
|
||||
return 0;
|
||||
|
||||
/* Extract loop counter register. */
|
||||
reg = SET_DEST (inc);
|
||||
|
||||
/* Check for (set (pc) (if_then_else (condition)
|
||||
(label_ref (label))
|
||||
(pc))). */
|
||||
if (GET_CODE (cmp) != SET
|
||||
|| SET_DEST (cmp) != pc_rtx
|
||||
|| GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE
|
||||
|| GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF
|
||||
|| XEXP (SET_SRC (cmp), 2) != pc_rtx)
|
||||
return 0;
|
||||
|
||||
/* Extract loop termination condition. */
|
||||
condition = XEXP (SET_SRC (cmp), 0);
|
||||
|
||||
if ((GET_CODE (condition) != GE && GET_CODE (condition) != NE)
|
||||
|| GET_CODE (XEXP (condition, 1)) != CONST_INT)
|
||||
return 0;
|
||||
|
||||
if (XEXP (condition, 0) == reg)
|
||||
return condition;
|
||||
|
||||
if (GET_CODE (XEXP (condition, 0)) == PLUS
|
||||
&& XEXP (XEXP (condition, 0), 0) == reg)
|
||||
return condition;
|
||||
|
||||
/* ??? If a machine uses a funny comparison, we could return a
|
||||
canonicalised form here. */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* Return an estimate of the maximum number of loop iterations for the
|
||||
loop specified by LOOP or zero if the loop is not normal.
|
||||
MODE is the mode of the iteration count and NONNEG is non-zero if
|
||||
the the iteration count has been proved to be non-negative. */
|
||||
static unsigned HOST_WIDE_INT
|
||||
doloop_iterations_max (loop_info, mode, nonneg)
|
||||
const struct loop_info *loop_info;
|
||||
enum machine_mode mode;
|
||||
int nonneg;
|
||||
{
|
||||
unsigned HOST_WIDE_INT n_iterations_max;
|
||||
enum rtx_code code;
|
||||
rtx min_value;
|
||||
rtx max_value;
|
||||
HOST_WIDE_INT abs_inc;
|
||||
int neg_inc;
|
||||
|
||||
neg_inc = 0;
|
||||
abs_inc = INTVAL (loop_info->increment);
|
||||
if (abs_inc < 0)
|
||||
{
|
||||
abs_inc = -abs_inc;
|
||||
neg_inc = 1;
|
||||
}
|
||||
|
||||
if (neg_inc)
|
||||
{
|
||||
code = swap_condition (loop_info->comparison_code);
|
||||
min_value = loop_info->final_equiv_value;
|
||||
max_value = loop_info->initial_equiv_value;
|
||||
}
|
||||
else
|
||||
{
|
||||
code = loop_info->comparison_code;
|
||||
min_value = loop_info->initial_equiv_value;
|
||||
max_value = loop_info->final_equiv_value;
|
||||
}
|
||||
|
||||
/* Since the loop has a VTOP, we know that the initial test will be
|
||||
true and thus the value of max_value should be greater than the
|
||||
value of min_value. Thus the difference should always be positive
|
||||
and the code must be LT, LE, LTU, LEU, or NE. Otherwise the loop is
|
||||
not normal, e.g., `for (i = 0; i < 10; i--)'. */
|
||||
switch (code)
|
||||
{
|
||||
case LTU:
|
||||
case LEU:
|
||||
{
|
||||
unsigned HOST_WIDE_INT umax;
|
||||
unsigned HOST_WIDE_INT umin;
|
||||
|
||||
if (GET_CODE (min_value) == CONST_INT)
|
||||
umin = INTVAL (min_value);
|
||||
else
|
||||
umin = 0;
|
||||
|
||||
if (GET_CODE (max_value) == CONST_INT)
|
||||
umax = INTVAL (max_value);
|
||||
else
|
||||
umax = (2U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
|
||||
|
||||
n_iterations_max = umax - umin;
|
||||
break;
|
||||
}
|
||||
|
||||
case LT:
|
||||
case LE:
|
||||
{
|
||||
HOST_WIDE_INT smax;
|
||||
HOST_WIDE_INT smin;
|
||||
|
||||
if (GET_CODE (min_value) == CONST_INT)
|
||||
smin = INTVAL (min_value);
|
||||
else
|
||||
smin = -(1U << (GET_MODE_BITSIZE (mode) - 1));
|
||||
|
||||
if (GET_CODE (max_value) == CONST_INT)
|
||||
smax = INTVAL (max_value);
|
||||
else
|
||||
smax = (1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
|
||||
|
||||
n_iterations_max = smax - smin;
|
||||
break;
|
||||
}
|
||||
|
||||
case NE:
|
||||
if (GET_CODE (min_value) == CONST_INT
|
||||
&& GET_CODE (max_value) == CONST_INT)
|
||||
n_iterations_max = INTVAL (max_value) - INTVAL (min_value);
|
||||
else
|
||||
/* We need to conservatively assume that we might have the maximum
|
||||
number of iterations without any additional knowledge. */
|
||||
n_iterations_max = (2U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
||||
n_iterations_max /= abs_inc;
|
||||
|
||||
/* If we know that the iteration count is non-negative then adjust
|
||||
n_iterations_max if it is so large that it appears negative. */
|
||||
if (nonneg && n_iterations_max > (1U << (GET_MODE_BITSIZE (mode) - 1)))
|
||||
n_iterations_max = (1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
|
||||
|
||||
return n_iterations_max;
|
||||
}
|
||||
|
||||
|
||||
/* Return non-zero if the loop specified by LOOP is suitable for
|
||||
the use of special low-overhead looping instructions. */
|
||||
static int
|
||||
doloop_valid_p (loop, jump_insn)
|
||||
const struct loop *loop;
|
||||
rtx jump_insn;
|
||||
{
|
||||
const struct loop_info *loop_info = LOOP_INFO (loop);
|
||||
|
||||
/* The loop must have a conditional jump at the end. */
|
||||
if (! any_condjump_p (jump_insn)
|
||||
|| ! onlyjump_p (jump_insn))
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"Doloop: Invalid jump at loop end.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Give up if a loop has been completely unrolled. */
|
||||
if (loop_info->n_iterations == loop_info->unroll_number)
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"Doloop: Loop completely unrolled.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* The loop must have a single exit target. A break or return
|
||||
statement within a loop will generate multiple loop exits.
|
||||
Another example of a loop that currently generates multiple exit
|
||||
targets is for (i = 0; i < (foo ? 8 : 4); i++) { }. */
|
||||
if (loop_info->has_multiple_exit_targets)
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"Doloop: Loop has multiple exit targets.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* An indirect jump may jump out of the loop. */
|
||||
if (loop_info->has_indirect_jump)
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"Doloop: Indirect jump in function.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* A called function may clobber any special registers required for
|
||||
low-overhead looping. */
|
||||
if (loop_info->has_call)
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"Doloop: Function call in loop.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Some targets (eg, PPC) use the count register for branch on table
|
||||
instructions. ??? This should be a target specific check. */
|
||||
if (loop_info->has_tablejump)
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"Doloop: Computed branch in the loop.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (! loop_info->increment)
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"Doloop: Could not determine iteration info.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (GET_CODE (loop_info->increment) != CONST_INT)
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"Doloop: Increment not an integer constant.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* There is no guarantee that a NE loop will terminate if the
|
||||
absolute increment is not unity. ??? We could compute this
|
||||
condition at run-time and have a additional jump around the loop
|
||||
to ensure an infinite loop. */
|
||||
if (loop_info->comparison_code == NE
|
||||
&& INTVAL (loop_info->increment) != -1
|
||||
&& INTVAL (loop_info->increment) != 1)
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"Doloop: NE loop with non-unity increment.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Check for loops that may not terminate under special conditions. */
|
||||
if (! loop_info->n_iterations
|
||||
&& ((loop_info->comparison_code == LEU
|
||||
&& INTVAL (loop_info->increment) > 0)
|
||||
|| (loop_info->comparison_code == GEU
|
||||
&& INTVAL (loop_info->increment) < 0)))
|
||||
{
|
||||
/* If the comparison is LEU and the comparison value is UINT_MAX
|
||||
then the loop will not terminate. Similarly, if the
|
||||
comparison code is GEU and the initial value is 0, the loop
|
||||
will not terminate.
|
||||
|
||||
Note that with LE and GE, the loop behaviour can be
|
||||
implementation dependent if an overflow occurs, say between
|
||||
INT_MAX and INT_MAX + 1. We thus don't have to worry about
|
||||
these two cases.
|
||||
|
||||
??? We could compute these conditions at run-time and have a
|
||||
additional jump around the loop to ensure an infinite loop.
|
||||
However, it is very unlikely that this is the intended
|
||||
behaviour of the loop and checking for these rare boundary
|
||||
conditions would pessimize all other code. */
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"Doloop: Possible infinite iteration case ignored.\n");
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/* Modify the loop to use the low-overhead looping insn where LOOP
|
||||
describes the loop, ITERATIONS is an RTX containing the desired
|
||||
number of loop iterations, ITERATIONS_MAX is a CONST_INT specifying
|
||||
the maximum number of loop iterations, and DOLOOP_INSN is the
|
||||
low-overhead looping insn to emit at the end of the loop. This
|
||||
returns non-zero if it was successful. */
|
||||
static int
|
||||
doloop_modify (loop, iterations, iterations_max,
|
||||
doloop_seq, start_label, condition)
|
||||
const struct loop *loop;
|
||||
rtx iterations;
|
||||
rtx iterations_max;
|
||||
rtx doloop_seq;
|
||||
rtx start_label;
|
||||
rtx condition;
|
||||
{
|
||||
rtx counter_reg;
|
||||
rtx count;
|
||||
rtx sequence;
|
||||
rtx jump_insn;
|
||||
int nonneg = 0;
|
||||
int decrement_count;
|
||||
|
||||
jump_insn = prev_nonnote_insn (loop->end);
|
||||
|
||||
if (loop_dump_stream)
|
||||
{
|
||||
fprintf (loop_dump_stream, "Doloop: Inserting doloop pattern (");
|
||||
if (GET_CODE (iterations) == CONST_INT)
|
||||
fprintf (loop_dump_stream, HOST_WIDE_INT_PRINT_DEC,
|
||||
INTVAL (iterations));
|
||||
else
|
||||
fputs ("runtime", loop_dump_stream);
|
||||
fputs (" iterations).", loop_dump_stream);
|
||||
}
|
||||
|
||||
/* Discard original jump to continue loop. The original compare
|
||||
result may still be live, so it cannot be discarded explicitly. */
|
||||
delete_insn (jump_insn);
|
||||
|
||||
/* Emit the label that will delimit the start of the loop. */
|
||||
emit_label_after (start_label, loop->start);
|
||||
LABEL_NUSES (start_label)++;
|
||||
|
||||
counter_reg = XEXP (condition, 0);
|
||||
if (GET_CODE (counter_reg) == PLUS)
|
||||
counter_reg = XEXP (counter_reg, 0);
|
||||
|
||||
start_sequence ();
|
||||
|
||||
count = iterations;
|
||||
decrement_count = 0;
|
||||
switch (GET_CODE (condition))
|
||||
{
|
||||
case NE:
|
||||
/* Currently only NE tests against zero and one are supported. */
|
||||
if (XEXP (condition, 1) == const0_rtx)
|
||||
decrement_count = 1;
|
||||
else if (XEXP (condition, 1) != const1_rtx)
|
||||
abort ();
|
||||
break;
|
||||
|
||||
case GE:
|
||||
/* Currently only GE tests against zero are supported. */
|
||||
if (XEXP (condition, 1) != const0_rtx)
|
||||
abort ();
|
||||
|
||||
/* The iteration count needs decrementing for a GE test. */
|
||||
decrement_count = 1;
|
||||
|
||||
/* Determine if the iteration counter will be non-negative.
|
||||
Note that the maximum value loaded is iterations_max - 1. */
|
||||
if ((unsigned HOST_WIDE_INT) INTVAL (iterations_max)
|
||||
<= (1U << (GET_MODE_BITSIZE (GET_MODE (counter_reg)) - 1)))
|
||||
nonneg = 1;
|
||||
break;
|
||||
|
||||
/* Abort if an invalid doloop pattern has been generated. */
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
|
||||
if (decrement_count)
|
||||
{
|
||||
if (GET_CODE (count) == CONST_INT)
|
||||
count = GEN_INT (INTVAL (count) - 1);
|
||||
else
|
||||
count = expand_binop (GET_MODE (counter_reg), sub_optab,
|
||||
count, GEN_INT (1),
|
||||
0, 0, OPTAB_LIB_WIDEN);
|
||||
}
|
||||
|
||||
/* Insert initialization of the count register into the loop header. */
|
||||
convert_move (counter_reg, count, 1);
|
||||
sequence = gen_sequence ();
|
||||
end_sequence ();
|
||||
emit_insn_before (sequence, loop->start);
|
||||
|
||||
/* Some targets (eg, C4x) need to initialize special looping
|
||||
registers. */
|
||||
#ifdef HAVE_doloop_begin
|
||||
{
|
||||
rtx init;
|
||||
|
||||
init = gen_doloop_begin (counter_reg,
|
||||
GET_CODE (iterations) == CONST_INT
|
||||
? iterations : const0_rtx, iterations_max,
|
||||
GEN_INT (loop->level));
|
||||
if (init)
|
||||
{
|
||||
start_sequence ();
|
||||
emit_insn (init);
|
||||
sequence = gen_sequence ();
|
||||
end_sequence ();
|
||||
emit_insn_after (sequence, loop->start);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Insert the new low-overhead looping insn. */
|
||||
emit_jump_insn_before (doloop_seq, loop->end);
|
||||
jump_insn = prev_nonnote_insn (loop->end);
|
||||
JUMP_LABEL (jump_insn) = start_label;
|
||||
|
||||
/* Add a REG_NONNEG note if the actual or estimated maximum number
|
||||
of iterations is non-negative. */
|
||||
if (nonneg)
|
||||
{
|
||||
REG_NOTES (jump_insn)
|
||||
= gen_rtx_EXPR_LIST (REG_NONNEG, NULL_RTX, REG_NOTES (jump_insn));
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/* Handle the more complex case, where the bounds are not known at
|
||||
compile time. In this case we generate a run_time calculation of
|
||||
the number of iterations. We rely on the existence of a run-time
|
||||
guard to ensure that the loop executes at least once, i.e.,
|
||||
initial_value obeys the loop comparison condition. If a guard is
|
||||
not present, we emit one. The loop to modify is described by LOOP.
|
||||
ITERATIONS_MAX is a CONST_INT specifying the estimated maximum
|
||||
number of loop iterations. DOLOOP_INSN is the low-overhead looping
|
||||
insn to insert. Returns non-zero if loop successfully modified. */
|
||||
static int
|
||||
doloop_modify_runtime (loop, iterations_max,
|
||||
doloop_seq, start_label, mode, condition)
|
||||
const struct loop *loop;
|
||||
rtx iterations_max;
|
||||
rtx doloop_seq;
|
||||
rtx start_label;
|
||||
enum machine_mode mode;
|
||||
rtx condition;
|
||||
{
|
||||
const struct loop_info *loop_info = LOOP_INFO (loop);
|
||||
HOST_WIDE_INT abs_inc;
|
||||
int neg_inc;
|
||||
rtx diff;
|
||||
rtx sequence;
|
||||
rtx iterations;
|
||||
rtx initial_value;
|
||||
rtx final_value;
|
||||
rtx increment;
|
||||
int unsigned_p;
|
||||
enum rtx_code comparison_code;
|
||||
|
||||
increment = loop_info->increment;
|
||||
initial_value = loop_info->initial_value;
|
||||
final_value = loop_info->final_value;
|
||||
|
||||
neg_inc = 0;
|
||||
abs_inc = INTVAL (increment);
|
||||
if (abs_inc < 0)
|
||||
{
|
||||
abs_inc = -abs_inc;
|
||||
neg_inc = 1;
|
||||
}
|
||||
|
||||
comparison_code = loop_info->comparison_code;
|
||||
unsigned_p = (comparison_code == LTU
|
||||
|| comparison_code == LEU
|
||||
|| comparison_code == GTU
|
||||
|| comparison_code == GEU
|
||||
|| comparison_code == NE);
|
||||
|
||||
/* The number of iterations (prior to any loop unrolling) is given by:
|
||||
(abs (final - initial) + abs_inc - 1) / abs_inc.
|
||||
|
||||
However, it is possible for the summation to overflow, and a
|
||||
safer method is:
|
||||
|
||||
abs (final - initial) / abs_inc + (abs (final - initial) % abs_inc) != 0
|
||||
|
||||
If the loop has been unrolled, then the loop body has been
|
||||
preconditioned to iterate a multiple of unroll_number times.
|
||||
The number of iterations of the loop body is simply:
|
||||
abs (final - initial) / (abs_inc * unroll_number).
|
||||
|
||||
The division and modulo operations can be avoided by requiring
|
||||
that the increment is a power of 2 (precondition_loop_p enforces
|
||||
this requirement). Nevertheless, the RTX_COSTS should be checked
|
||||
to see if a fast divmod is available. */
|
||||
|
||||
start_sequence ();
|
||||
/* abs (final - initial) */
|
||||
diff = expand_binop (mode, sub_optab,
|
||||
copy_rtx (neg_inc ? initial_value : final_value),
|
||||
copy_rtx (neg_inc ? final_value : initial_value),
|
||||
NULL_RTX, unsigned_p, OPTAB_LIB_WIDEN);
|
||||
|
||||
if (loop_info->unroll_number == 1)
|
||||
{
|
||||
if (abs_inc != 1)
|
||||
{
|
||||
int shift_count;
|
||||
rtx extra;
|
||||
rtx label;
|
||||
|
||||
shift_count = exact_log2 (abs_inc);
|
||||
if (shift_count < 0)
|
||||
abort ();
|
||||
|
||||
/* abs (final - initial) / abs_inc */
|
||||
iterations = expand_binop (GET_MODE (diff), lshr_optab,
|
||||
diff, GEN_INT (shift_count),
|
||||
NULL_RTX, 1,
|
||||
OPTAB_LIB_WIDEN);
|
||||
|
||||
/* abs (final - initial) % abs_inc */
|
||||
extra = expand_binop (GET_MODE (iterations), and_optab,
|
||||
diff, GEN_INT (abs_inc - 1),
|
||||
NULL_RTX, 1,
|
||||
OPTAB_LIB_WIDEN);
|
||||
|
||||
/* If (abs (final - initial) % abs_inc == 0) jump past
|
||||
following increment instruction. */
|
||||
label = gen_label_rtx();
|
||||
emit_cmp_and_jump_insns (extra, const0_rtx, EQ, NULL_RTX,
|
||||
GET_MODE (extra), 0, 0, label);
|
||||
JUMP_LABEL (get_last_insn ()) = label;
|
||||
LABEL_NUSES (label)++;
|
||||
|
||||
/* Increment the iteration count by one. */
|
||||
iterations = expand_binop (GET_MODE (iterations), add_optab,
|
||||
iterations, GEN_INT (1),
|
||||
iterations, 1,
|
||||
OPTAB_LIB_WIDEN);
|
||||
|
||||
emit_label (label);
|
||||
}
|
||||
else
|
||||
iterations = diff;
|
||||
}
|
||||
else
|
||||
{
|
||||
int shift_count;
|
||||
|
||||
/* precondition_loop_p has preconditioned the loop so that the
|
||||
iteration count of the loop body is always a power of 2.
|
||||
Since we won't get an overflow calculating the loop count,
|
||||
the code we emit is simpler. */
|
||||
shift_count = exact_log2 (loop_info->unroll_number * abs_inc);
|
||||
if (shift_count < 0)
|
||||
abort ();
|
||||
|
||||
iterations = expand_binop (GET_MODE (diff), lshr_optab,
|
||||
diff, GEN_INT (shift_count),
|
||||
NULL_RTX, 1,
|
||||
OPTAB_LIB_WIDEN);
|
||||
}
|
||||
|
||||
|
||||
/* If there is a NOTE_INSN_LOOP_VTOP, we have a `for' or `while'
|
||||
style loop, with a loop exit test at the start. Thus, we can
|
||||
assume that the loop condition was true when the loop was
|
||||
entered.
|
||||
|
||||
`do-while' loops require special treatment since the exit test is
|
||||
not executed before the start of the loop. We need to determine
|
||||
if the loop will terminate after the first pass and to limit the
|
||||
iteration count to one if necessary. */
|
||||
if (! loop->vtop)
|
||||
{
|
||||
rtx label;
|
||||
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream, "Doloop: Do-while loop.\n");
|
||||
|
||||
/* A `do-while' loop must iterate at least once. If the
|
||||
iteration count is bogus, we set the iteration count to 1.
|
||||
Note that if the loop has been unrolled, then the loop body
|
||||
is guaranteed to execute at least once. */
|
||||
if (loop_info->unroll_number == 1)
|
||||
{
|
||||
/* Emit insns to test if the loop will immediately
|
||||
terminate and to set the iteration count to 1 if true. */
|
||||
label = gen_label_rtx();
|
||||
emit_cmp_and_jump_insns (copy_rtx (initial_value),
|
||||
copy_rtx (loop_info->comparison_value),
|
||||
comparison_code, NULL_RTX, mode, 0, 0,
|
||||
label);
|
||||
JUMP_LABEL (get_last_insn ()) = label;
|
||||
LABEL_NUSES (label)++;
|
||||
emit_move_insn (iterations, const1_rtx);
|
||||
emit_label (label);
|
||||
}
|
||||
}
|
||||
|
||||
sequence = gen_sequence ();
|
||||
end_sequence ();
|
||||
emit_insn_before (sequence, loop->start);
|
||||
|
||||
return doloop_modify (loop, iterations, iterations_max, doloop_seq,
|
||||
start_label, condition);
|
||||
}
|
||||
|
||||
|
||||
/* This is the main entry point. Process loop described by LOOP
|
||||
validating that the loop is suitable for conversion to use a low
|
||||
overhead looping instruction, replacing the jump insn where
|
||||
suitable. We distinguish between loops with compile-time bounds
|
||||
and those with run-time bounds. Information from LOOP is used to
|
||||
compute the number of iterations and to determine whether the loop
|
||||
is a candidate for this optimization. Returns non-zero if loop
|
||||
successfully modified. */
|
||||
int
|
||||
doloop_optimize (loop)
|
||||
const struct loop *loop;
|
||||
{
|
||||
struct loop_info *loop_info = LOOP_INFO (loop);
|
||||
rtx initial_value;
|
||||
rtx final_value;
|
||||
rtx increment;
|
||||
rtx jump_insn;
|
||||
enum machine_mode mode;
|
||||
unsigned HOST_WIDE_INT n_iterations;
|
||||
unsigned HOST_WIDE_INT n_iterations_max;
|
||||
rtx doloop_seq, doloop_pat, doloop_reg;
|
||||
rtx iterations;
|
||||
rtx iterations_max;
|
||||
rtx start_label;
|
||||
rtx condition;
|
||||
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"Doloop: Processing loop %d, enclosed levels %d.\n",
|
||||
loop->num, loop->level);
|
||||
|
||||
jump_insn = prev_nonnote_insn (loop->end);
|
||||
|
||||
/* Check that loop is a candidate for a low-overhead looping insn. */
|
||||
if (! doloop_valid_p (loop, jump_insn))
|
||||
return 0;
|
||||
|
||||
/* Determine if the loop can be safely, and profitably,
|
||||
preconditioned. While we don't precondition the loop in a loop
|
||||
unrolling sense, this test ensures that the loop is well behaved
|
||||
and that the increment is a constant integer. */
|
||||
if (! precondition_loop_p (loop, &initial_value, &final_value,
|
||||
&increment, &mode))
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"Doloop: Cannot precondition loop.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Determine or estimate the maximum number of loop iterations. */
|
||||
n_iterations = loop_info->n_iterations;
|
||||
if (n_iterations)
|
||||
{
|
||||
/* This is the simple case where the initial and final loop
|
||||
values are constants. */
|
||||
n_iterations_max = n_iterations;
|
||||
}
|
||||
else
|
||||
{
|
||||
int nonneg = find_reg_note (jump_insn, REG_NONNEG, 0) != 0;
|
||||
|
||||
/* This is the harder case where the initial and final loop
|
||||
values may not be constants. */
|
||||
n_iterations_max = doloop_iterations_max (loop_info, mode, nonneg);
|
||||
|
||||
if (! n_iterations_max)
|
||||
{
|
||||
/* We have something like `for (i = 0; i < 10; i--)'. */
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"Doloop: Not normal loop.\n");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Account for loop unrolling in the iteration count. This will
|
||||
have no effect if loop_iterations could not determine the number
|
||||
of iterations. */
|
||||
n_iterations /= loop_info->unroll_number;
|
||||
n_iterations_max /= loop_info->unroll_number;
|
||||
|
||||
if (n_iterations && n_iterations < 3)
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"Doloop: Too few iterations (%ld) to be profitable.\n",
|
||||
(long int) n_iterations);
|
||||
return 0;
|
||||
}
|
||||
|
||||
iterations = GEN_INT (n_iterations);
|
||||
iterations_max = GEN_INT (n_iterations_max);
|
||||
|
||||
/* Generate looping insn. If the pattern FAILs then give up trying
|
||||
to modify the loop since there is some aspect the back-end does
|
||||
not like. */
|
||||
start_label = gen_label_rtx ();
|
||||
doloop_reg = gen_reg_rtx (mode);
|
||||
doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max,
|
||||
GEN_INT (loop->level), start_label);
|
||||
if (! doloop_seq && mode != word_mode)
|
||||
{
|
||||
PUT_MODE (doloop_reg, word_mode);
|
||||
doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max,
|
||||
GEN_INT (loop->level), start_label);
|
||||
}
|
||||
if (! doloop_seq)
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"Doloop: Target unwilling to use doloop pattern!\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* A raw define_insn may yield a plain pattern. If a sequence
|
||||
was involved, the last must be the jump instruction. */
|
||||
if (GET_CODE (doloop_seq) == SEQUENCE)
|
||||
{
|
||||
doloop_pat = XVECEXP (doloop_seq, 0, XVECLEN (doloop_seq, 0) - 1);
|
||||
if (GET_CODE (doloop_pat) == JUMP_INSN)
|
||||
doloop_pat = PATTERN (doloop_pat);
|
||||
else
|
||||
doloop_pat = NULL_RTX;
|
||||
}
|
||||
else
|
||||
doloop_pat = doloop_seq;
|
||||
|
||||
if (! doloop_pat
|
||||
|| ! (condition = doloop_condition_get (doloop_pat)))
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"Doloop: Unrecognizable doloop pattern!\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (n_iterations != 0)
|
||||
/* Handle the simpler case, where we know the iteration count at
|
||||
compile time. */
|
||||
return doloop_modify (loop, iterations, iterations_max, doloop_seq,
|
||||
start_label, condition);
|
||||
else
|
||||
/* Handle the harder case, where we must add additional runtime tests. */
|
||||
return doloop_modify_runtime (loop, iterations_max, doloop_seq,
|
||||
start_label, mode, condition);
|
||||
}
|
||||
|
||||
#endif /* HAVE_doloop_end */
|
12
gcc/final.c
12
gcc/final.c
@ -918,9 +918,14 @@ int
|
||||
insn_current_reference_address (branch)
|
||||
rtx branch;
|
||||
{
|
||||
rtx dest;
|
||||
rtx seq = NEXT_INSN (PREV_INSN (branch));
|
||||
int seq_uid = INSN_UID (seq);
|
||||
rtx dest, seq;
|
||||
int seq_uid;
|
||||
|
||||
if (! INSN_ADDRESSES_SET_P ())
|
||||
return 0;
|
||||
|
||||
seq = NEXT_INSN (PREV_INSN (branch));
|
||||
seq_uid = INSN_UID (seq);
|
||||
if (GET_CODE (branch) != JUMP_INSN)
|
||||
/* This can happen for example on the PA; the objective is to know the
|
||||
offset to address something in front of the start of the function.
|
||||
@ -929,6 +934,7 @@ insn_current_reference_address (branch)
|
||||
any alignment we'd encounter, so we skip the call to align_fuzz. */
|
||||
return insn_current_address;
|
||||
dest = JUMP_LABEL (branch);
|
||||
|
||||
/* BRANCH has no proper alignment chain set, so use SEQ. */
|
||||
if (INSN_SHUID (branch) < INSN_SHUID (dest))
|
||||
{
|
||||
|
@ -375,7 +375,7 @@ extern int flag_schedule_speculative;
|
||||
extern int flag_schedule_speculative_load;
|
||||
extern int flag_schedule_speculative_load_dangerous;
|
||||
|
||||
/* flag_on_branch_count_reg means try to replace add-1,compare,branch tupple
|
||||
/* flag_branch_on_count_reg means try to replace add-1,compare,branch tupple
|
||||
by a cheaper branch, on a count register. */
|
||||
extern int flag_branch_on_count_reg;
|
||||
|
||||
|
@ -2217,7 +2217,7 @@ any_uncondjump_p (insn)
|
||||
/* Return true when insn is a conditional jump. This function works for
|
||||
instructions containing PC sets in PARALLELs. The instruction may have
|
||||
various other effects so before removing the jump you must verify
|
||||
safe_to_remove_jump_p.
|
||||
onlyjump_p.
|
||||
|
||||
Note that unlike condjump_p it returns false for unconditional jumps. */
|
||||
|
||||
|
306
gcc/loop.c
306
gcc/loop.c
@ -327,16 +327,8 @@ typedef struct rtx_pair {
|
||||
&& INSN_LUID (INSN) >= INSN_LUID (START) \
|
||||
&& INSN_LUID (INSN) <= INSN_LUID (END))
|
||||
|
||||
#ifdef HAVE_decrement_and_branch_on_count
|
||||
/* Test whether BCT applicable and safe. */
|
||||
static void insert_bct PARAMS ((struct loop *));
|
||||
|
||||
/* Auxiliary function that inserts the BCT pattern into the loop. */
|
||||
static void instrument_loop_bct PARAMS ((rtx, rtx, rtx));
|
||||
#endif /* HAVE_decrement_and_branch_on_count */
|
||||
|
||||
/* Indirect_jump_in_function is computed once per function. */
|
||||
int indirect_jump_in_function = 0;
|
||||
static int indirect_jump_in_function;
|
||||
static int indirect_jump_in_function_p PARAMS ((rtx));
|
||||
|
||||
static int compute_luids PARAMS ((rtx, rtx, int));
|
||||
@ -5025,12 +5017,10 @@ strength_reduce (loop, insn_count, flags)
|
||||
&& unrolled_insn_copies <= insn_count))
|
||||
unroll_loop (loop, insn_count, end_insert_before, 1);
|
||||
|
||||
#ifdef HAVE_decrement_and_branch_on_count
|
||||
/* Instrument the loop with BCT insn. */
|
||||
if (HAVE_decrement_and_branch_on_count && (flags & LOOP_BCT)
|
||||
&& flag_branch_on_count_reg)
|
||||
insert_bct (loop);
|
||||
#endif /* HAVE_decrement_and_branch_on_count */
|
||||
#ifdef HAVE_doloop_end
|
||||
if (HAVE_doloop_end && (flags & LOOP_BCT) && flag_branch_on_count_reg)
|
||||
doloop_optimize (loop);
|
||||
#endif /* HAVE_doloop_end */
|
||||
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream, "\n");
|
||||
@ -9187,6 +9177,7 @@ canonicalize_condition (insn, cond, reverse, earliest, want_reg)
|
||||
return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
|
||||
}
|
||||
|
||||
|
||||
/* Given a jump insn JUMP, return the condition that will cause it to branch
|
||||
to its JUMP_LABEL. If the condition cannot be understood, or is an
|
||||
inequality floating-point comparison which needs to be reversed, 0 will
|
||||
@ -9242,291 +9233,6 @@ get_condition_for_loop (loop, x)
|
||||
XEXP (comparison, 1), XEXP (comparison, 0));
|
||||
}
|
||||
|
||||
#ifdef HAVE_decrement_and_branch_on_count
|
||||
/* Instrument loop for insertion of bct instruction. We distinguish between
|
||||
loops with compile-time bounds and those with run-time bounds.
|
||||
Information from loop_iterations() is used to compute compile-time bounds.
|
||||
Run-time bounds should use loop preconditioning, but currently ignored.
|
||||
*/
|
||||
|
||||
static void
|
||||
insert_bct (loop)
|
||||
struct loop *loop;
|
||||
{
|
||||
unsigned HOST_WIDE_INT n_iterations;
|
||||
rtx loop_start = loop->start;
|
||||
rtx loop_end = loop->end;
|
||||
struct loop_info *loop_info = LOOP_INFO (loop);
|
||||
int loop_num = loop->num;
|
||||
|
||||
#if 0
|
||||
int increment_direction, compare_direction;
|
||||
/* If the loop condition is <= or >=, the number of iteration
|
||||
is 1 more than the range of the bounds of the loop. */
|
||||
int add_iteration = 0;
|
||||
enum machine_mode loop_var_mode = word_mode;
|
||||
#endif
|
||||
|
||||
/* It's impossible to instrument a competely unrolled loop. */
|
||||
if (loop_info->unroll_number == loop_info->n_iterations)
|
||||
return;
|
||||
|
||||
/* Make sure that the count register is not in use. */
|
||||
if (loop_info->used_count_register)
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"insert_bct %d: BCT instrumentation failed: count register already in use\n",
|
||||
loop_num);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Make sure that the function has no indirect jumps. */
|
||||
if (indirect_jump_in_function)
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"insert_bct %d: BCT instrumentation failed: indirect jump in function\n",
|
||||
loop_num);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Make sure that the last loop insn is a conditional jump. */
|
||||
if (GET_CODE (PREV_INSN (loop_end)) != JUMP_INSN
|
||||
|| ! onlyjump_p (PREV_INSN (loop_end))
|
||||
|| ! any_condjump_p (PREV_INSN (loop_end)))
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"insert_bct %d: BCT instrumentation failed: invalid jump at loop end\n",
|
||||
loop_num);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Make sure that the loop does not contain a function call
|
||||
(the count register might be altered by the called function). */
|
||||
if (loop_info->has_call)
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"insert_bct %d: BCT instrumentation failed: function call in loop\n",
|
||||
loop_num);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Make sure that the loop does not jump via a table.
|
||||
(the count register might be used to perform the branch on table). */
|
||||
if (loop_info->has_tablejump)
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"insert_bct %d: BCT instrumentation failed: computed branch in the loop\n",
|
||||
loop_num);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Account for loop unrolling in instrumented iteration count. */
|
||||
if (loop_info->unroll_number > 1)
|
||||
n_iterations = loop_info->n_iterations / loop_info->unroll_number;
|
||||
else
|
||||
n_iterations = loop_info->n_iterations;
|
||||
|
||||
if (n_iterations != 0 && n_iterations < 3)
|
||||
{
|
||||
/* Allow an enclosing outer loop to benefit if possible. */
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"insert_bct %d: Too few iterations to benefit from BCT optimization\n",
|
||||
loop_num);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Try to instrument the loop. */
|
||||
|
||||
/* Handle the simpler case, where the bounds are known at compile time. */
|
||||
if (n_iterations > 0)
|
||||
{
|
||||
struct loop *outer_loop;
|
||||
struct loop_info *outer_loop_info;
|
||||
|
||||
/* Mark all enclosing loops that they cannot use count register. */
|
||||
for (outer_loop = loop; outer_loop; outer_loop = outer_loop->outer)
|
||||
{
|
||||
outer_loop_info = LOOP_INFO (outer_loop);
|
||||
outer_loop_info->used_count_register = 1;
|
||||
}
|
||||
instrument_loop_bct (loop_start, loop_end, GEN_INT (n_iterations));
|
||||
return;
|
||||
}
|
||||
|
||||
/* Handle the more complex case, that the bounds are NOT known
|
||||
at compile time. In this case we generate run_time calculation
|
||||
of the number of iterations. */
|
||||
|
||||
if (loop_info->iteration_var == 0)
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"insert_bct %d: BCT Runtime Instrumentation failed: no loop iteration variable found\n",
|
||||
loop_num);
|
||||
return;
|
||||
}
|
||||
|
||||
if (GET_MODE_CLASS (GET_MODE (loop_info->iteration_var)) != MODE_INT
|
||||
|| GET_MODE_SIZE (GET_MODE (loop_info->iteration_var)) != UNITS_PER_WORD)
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"insert_bct %d: BCT Runtime Instrumentation failed: loop variable not integer\n",
|
||||
loop_num);
|
||||
return;
|
||||
}
|
||||
|
||||
/* With runtime bounds, if the compare is of the form '!=' we give up */
|
||||
if (loop_info->comparison_code == NE)
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"insert_bct %d: BCT Runtime Instrumentation failed: runtime bounds with != comparison\n",
|
||||
loop_num);
|
||||
return;
|
||||
}
|
||||
/* Use common loop preconditioning code instead. */
|
||||
#if 0
|
||||
else
|
||||
{
|
||||
/* We rely on the existence of run-time guard to ensure that the
|
||||
loop executes at least once. */
|
||||
rtx sequence;
|
||||
rtx iterations_num_reg;
|
||||
|
||||
unsigned HOST_WIDE_INT increment_value_abs
|
||||
= INTVAL (increment) * increment_direction;
|
||||
|
||||
/* make sure that the increment is a power of two, otherwise (an
|
||||
expensive) divide is needed. */
|
||||
if (exact_log2 (increment_value_abs) == -1)
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"insert_bct: not instrumenting BCT because the increment is not power of 2\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* compute the number of iterations */
|
||||
start_sequence ();
|
||||
{
|
||||
rtx temp_reg;
|
||||
|
||||
/* Again, the number of iterations is calculated by:
|
||||
;
|
||||
; compare-val - initial-val + (increment -1) + additional-iteration
|
||||
; num_iterations = -----------------------------------------------------------------
|
||||
; increment
|
||||
*/
|
||||
/* ??? Do we have to call copy_rtx here before passing rtx to
|
||||
expand_binop? */
|
||||
if (compare_direction > 0)
|
||||
{
|
||||
/* <, <= :the loop variable is increasing */
|
||||
temp_reg = expand_binop (loop_var_mode, sub_optab,
|
||||
comparison_value, initial_value,
|
||||
NULL_RTX, 0, OPTAB_LIB_WIDEN);
|
||||
}
|
||||
else
|
||||
{
|
||||
temp_reg = expand_binop (loop_var_mode, sub_optab,
|
||||
initial_value, comparison_value,
|
||||
NULL_RTX, 0, OPTAB_LIB_WIDEN);
|
||||
}
|
||||
|
||||
if (increment_value_abs - 1 + add_iteration != 0)
|
||||
temp_reg = expand_binop (loop_var_mode, add_optab, temp_reg,
|
||||
GEN_INT (increment_value_abs - 1
|
||||
+ add_iteration),
|
||||
NULL_RTX, 0, OPTAB_LIB_WIDEN);
|
||||
|
||||
if (increment_value_abs != 1)
|
||||
iterations_num_reg = expand_binop (loop_var_mode, asr_optab,
|
||||
temp_reg,
|
||||
GEN_INT (exact_log2 (increment_value_abs)),
|
||||
NULL_RTX, 0, OPTAB_LIB_WIDEN);
|
||||
else
|
||||
iterations_num_reg = temp_reg;
|
||||
}
|
||||
sequence = gen_sequence ();
|
||||
end_sequence ();
|
||||
emit_insn_before (sequence, loop_start);
|
||||
instrument_loop_bct (loop_start, loop_end, iterations_num_reg);
|
||||
}
|
||||
|
||||
return;
|
||||
#endif /* Complex case */
|
||||
}
|
||||
|
||||
/* Instrument loop by inserting a bct in it as follows:
|
||||
1. A new counter register is created.
|
||||
2. In the head of the loop the new variable is initialized to the value
|
||||
passed in the loop_num_iterations parameter.
|
||||
3. At the end of the loop, comparison of the register with 0 is generated.
|
||||
The created comparison follows the pattern defined for the
|
||||
decrement_and_branch_on_count insn, so this insn will be generated.
|
||||
4. The branch on the old variable are deleted. The compare must remain
|
||||
because it might be used elsewhere. If the loop-variable or condition
|
||||
register are used elsewhere, they will be eliminated by flow. */
|
||||
|
||||
static void
|
||||
instrument_loop_bct (loop_start, loop_end, loop_num_iterations)
|
||||
rtx loop_start, loop_end;
|
||||
rtx loop_num_iterations;
|
||||
{
|
||||
rtx counter_reg;
|
||||
rtx start_label;
|
||||
rtx sequence;
|
||||
|
||||
if (HAVE_decrement_and_branch_on_count)
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
{
|
||||
fputs ("instrument_bct: Inserting BCT (", loop_dump_stream);
|
||||
if (GET_CODE (loop_num_iterations) == CONST_INT)
|
||||
fprintf (loop_dump_stream, HOST_WIDE_INT_PRINT_DEC,
|
||||
INTVAL (loop_num_iterations));
|
||||
else
|
||||
fputs ("runtime", loop_dump_stream);
|
||||
fputs (" iterations)", loop_dump_stream);
|
||||
}
|
||||
|
||||
/* Discard original jump to continue loop. Original compare result
|
||||
may still be live, so it cannot be discarded explicitly. */
|
||||
delete_insn (PREV_INSN (loop_end));
|
||||
|
||||
/* Insert the label which will delimit the start of the loop. */
|
||||
start_label = gen_label_rtx ();
|
||||
emit_label_after (start_label, loop_start);
|
||||
|
||||
/* Insert initialization of the count register into the loop header. */
|
||||
start_sequence ();
|
||||
counter_reg = gen_reg_rtx (word_mode);
|
||||
emit_insn (gen_move_insn (counter_reg, loop_num_iterations));
|
||||
sequence = gen_sequence ();
|
||||
end_sequence ();
|
||||
emit_insn_before (sequence, loop_start);
|
||||
|
||||
/* Insert new comparison on the count register instead of the
|
||||
old one, generating the needed BCT pattern (that will be
|
||||
later recognized by assembly generation phase). */
|
||||
sequence = emit_jump_insn_before (
|
||||
gen_decrement_and_branch_on_count (counter_reg, start_label),
|
||||
loop_end);
|
||||
|
||||
if (GET_CODE (sequence) != JUMP_INSN)
|
||||
abort ();
|
||||
JUMP_LABEL (sequence) = start_label;
|
||||
LABEL_NUSES (start_label)++;
|
||||
}
|
||||
}
|
||||
#endif /* HAVE_decrement_and_branch_on_count */
|
||||
|
||||
/* Scan the function and determine whether it has indirect (computed) jumps.
|
||||
|
||||
|
@ -254,3 +254,5 @@ int loop_insn_first_p PARAMS ((rtx, rtx));
|
||||
typedef rtx (*loop_insn_callback ) PARAMS ((struct loop *, rtx, int, int));
|
||||
void for_each_insn_in_loop PARAMS ((struct loop *, loop_insn_callback));
|
||||
|
||||
/* Forward declarations for non-static functions declared in doloop.c. */
|
||||
int doloop_optimize PARAMS ((const struct loop *));
|
||||
|
@ -2626,7 +2626,7 @@ split_all_insns (upd_life)
|
||||
/* Can't use `next_real_insn' because that might go across
|
||||
CODE_LABELS and short-out basic blocks. */
|
||||
next = NEXT_INSN (insn);
|
||||
if (GET_CODE (insn) != INSN)
|
||||
if (! INSN_P (insn))
|
||||
;
|
||||
|
||||
/* Don't split no-op move insns. These should silently
|
||||
|
@ -746,9 +746,9 @@ int flag_schedule_speculative_load_dangerous = 0;
|
||||
|
||||
int flag_single_precision_constant;
|
||||
|
||||
/* flag_on_branch_count_reg means try to replace add-1,compare,branch tupple
|
||||
by a cheaper branch, on a count register. */
|
||||
int flag_branch_on_count_reg;
|
||||
/* flag_branch_on_count_reg means try to replace add-1,compare,branch tupple
|
||||
by a cheaper branch on a count register. */
|
||||
int flag_branch_on_count_reg = 1;
|
||||
|
||||
/* -finhibit-size-directive inhibits output of .size for ELF.
|
||||
This is used only for compiling crtstuff.c,
|
||||
|
Loading…
Reference in New Issue
Block a user