ia64 specific scheduling bits

From-SVN: r38419
This commit is contained in:
Bernd Schmidt 2000-12-21 18:26:07 +00:00 committed by Bernd Schmidt
parent 5f446d2172
commit 2130b7fb30
8 changed files with 1691 additions and 259 deletions

View File

@ -1,3 +1,54 @@
2000-12-21 Bernd Schmidt <bernds@redhat.com>
* Makefile.in (out_object_file): Depend on sched-int.h.
* rtl.h (single_set_1): New macro.
(single_set_2): Renamed from single_set_1 and extra argument added.
* rtlanal.c (single_set_2): Likewise.
* config/ia64/ia64-protos.h (get_bundle_name, ia64_issue_rate,
ia64_adjust_cost, ia64_sched_init, ia64_sched_finish,
ia64_sched_reorder, ia64_sched_reorder2, ia64_variable_issue):
Declare.
* config/ia64/ia64.c: Include "sched-int.h".
(hard_regno_rename_ok): Also disallow renaming from the various
reg_save_* regs.
(ia64_safe_itanium_requiers_unit0, ia64_safe_itanium_class,
ia64_safe_type, init_insn_group_barriers, group_barrier_needed_p,
safe_group_barrier_needed_p, fixup_errata): New static functions.
(rtx_needs_barrier): Handle bundle selector and cycle display
insns.
(emit_insn_group_barriers): Accept additional FILE * arg. All
callers changed. Rework to only generate stop bits between
basic blocks that haven't been inserted by scheduling.
(struct bundle, struct ia64_packet): New structures.
(NR_BUNDLES, NR_PACKETS): New macros.
(bundle, packets, type_names): New static arrays.
(ia64_final_schedule): New variable.
(ia64_single_set, insn_matches_slot, ia64_emit_insn_before,
gen_nop_type, finish_last_head, rotate_one_bundle, rotate_two_bundles,
cycle_end_fill_slots, packet_matches_p, get_split, find_best_insn,
find_best_packet, itanium_reorder, dump_current_packet, schedule_stop):
New static functions.
(ia64_issue_rate, ia64_sched_init, ia64_sched_reorder,
ia64_sched_finish, ia64_sched_reorder2, ia64_variable_issue): New
functions.
(ia64_reorg): Perform a final scheduling pass.
* config/ia64/ia64.h (CONST_COSTS): Slightly increase SYMBOL_REF costs.
(MAX_CONDITIONAL_EXECUTE, ADJUST_COST, ISSUE_RATE, MD_SCHED_INIT,
MD_SCHED_REORDER, MD_SCHED_REORDER2, MD_SCHED_FINISH,
MD_SCHED_VARIABLE_ISSUE): Define macros.
(ia64_final_schedule): Declare variable.
* config/ia64/ia64.md (attr itanium_class): Add some more classes.
(attr type): Account for them.
(itanium_requires_unit0): New attribute.
(function units): Rewrite.
(some splitters): Don't create scheduling barriers here.
(gr_spill_internal, gr_restore_internal): Don't predicate the
pseudo-op.
(nop_m, nop_i, nop_f, nop_b, nop_x, cycle_display, cycle_display_1,
bundle_selector): New patterns.
(insn_group_barrier): Now has an operand.
2000-12-21 DJ Delorie <dj@redhat.com>
* dwarf2out.c (simple_decl_align_in_bits): new

View File

@ -1499,7 +1499,8 @@ dependence.o : dependence.c $(CONFIG_H) system.h $(RTL_H) $(TREE_H) \
$(out_object_file): $(out_file) $(CONFIG_H) $(TREE_H) $(GGC_H) \
$(RTL_H) $(REGS_H) hard-reg-set.h real.h insn-config.h conditions.h \
insn-flags.h output.h $(INSN_ATTR_H) insn-codes.h system.h toplev.h function.h
insn-flags.h output.h $(INSN_ATTR_H) insn-codes.h system.h toplev.h \
function.h sched-int.h
$(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
$(out_file) $(OUTPUT_OPTION)

View File

@ -92,6 +92,14 @@ extern enum reg_class ia64_secondary_reload_class PARAMS((enum reg_class,
rtx));
extern void ia64_reorg PARAMS((rtx));
extern void process_for_unwind_directive PARAMS ((FILE *, rtx));
extern const char *get_bundle_name PARAMS ((int));
extern int ia64_issue_rate PARAMS ((void));
extern int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int));
extern void ia64_sched_init PARAMS ((FILE *, int, int));
extern void ia64_sched_finish PARAMS ((FILE *, int));
extern int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
extern int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int));
extern int ia64_variable_issue PARAMS ((FILE *, int, rtx, int));
#endif /* RTX_CODE */
#ifdef TREE_CODE

File diff suppressed because it is too large Load Diff

View File

@ -1849,7 +1849,7 @@ do { \
case CONST: \
case SYMBOL_REF: \
case LABEL_REF: \
return COSTS_N_INSNS (2);
return COSTS_N_INSNS (3);
/* Like `CONST_COSTS' but applies to nonconstant RTL expressions. */
@ -1916,19 +1916,6 @@ do { \
#define NO_FUNCTION_CSE
/* A C statement (sans semicolon) to update the integer variable COST based on
the relationship between INSN that is dependent on DEP_INSN through the
dependence LINK. */
/* ??? Investigate. */
/* #define ADJUST_COST(INSN, LINK, DEP_INSN, COST) */
/* A C statement (sans semicolon) to update the integer scheduling
priority `INSN_PRIORITY(INSN)'. */
/* ??? Investigate. */
/* #define ADJUST_PRIORITY (INSN) */
/* Dividing the output into sections. */
@ -2816,13 +2803,43 @@ do { \
BRANCH_COST+1 is the default if the machine does not use
cc0, and 1 if it does use cc0. */
/* ??? Investigate. */
/* #define MAX_CONDITIONAL_EXECUTE */
#define MAX_CONDITIONAL_EXECUTE 12
/* Indicate how many instructions can be issued at the same time. */
/* A C statement (sans semicolon) to update the integer scheduling
priority `INSN_PRIORITY(INSN)'. */
/* ??? For now, we just schedule to fill bundles. */
/* ??? Investigate. */
/* #define ADJUST_PRIORITY (INSN) */
#define ISSUE_RATE 3
/* A C statement (sans semicolon) to update the integer variable COST
based on the relationship between INSN that is dependent on
DEP_INSN through the dependence LINK. The default is to make no
adjustment to COST. This can be used for example to specify to
the scheduler that an output- or anti-dependence does not incur
the same cost as a data-dependence. */
#define ADJUST_COST(insn,link,dep_insn,cost) \
(cost) = ia64_adjust_cost(insn, link, dep_insn, cost)
#define ISSUE_RATE ia64_issue_rate ()
#define MD_SCHED_INIT(DUMP, SCHED_VERBOSE, MAX_READY) \
ia64_sched_init (DUMP, SCHED_VERBOSE, MAX_READY)
#define MD_SCHED_REORDER(DUMP, SCHED_VERBOSE, READY, N_READY, CLOCK, CIM) \
(CIM) = ia64_sched_reorder (DUMP, SCHED_VERBOSE, READY, &N_READY, 0)
#define MD_SCHED_REORDER2(DUMP, SCHED_VERBOSE, READY, N_READY, CLOCK, CIM) \
(CIM) = ia64_sched_reorder2 (DUMP, SCHED_VERBOSE, READY, &N_READY, 1)
#define MD_SCHED_FINISH(DUMP, SCHED_VERBOSE) \
ia64_sched_finish (DUMP, SCHED_VERBOSE)
#define MD_SCHED_VARIABLE_ISSUE(DUMP, SCHED_VERBOSE, INSN, CAN_ISSUE_MORE) \
((CAN_ISSUE_MORE) \
= ia64_variable_issue (DUMP, SCHED_VERBOSE, INSN, CAN_ISSUE_MORE))
extern int ia64_final_schedule;
#define IA64_UNWIND_INFO 1
#define HANDLER_SECTION fprintf (asm_out_file, "\t.personality\t__ia64_personality_v1\n\t.handlerdata\n");

View File

@ -68,6 +68,8 @@
;; 19 fetchadd_acq
;; 20 bsp_value
;; 21 flushrs
;; 22 bundle selector
;; 23 cycle display
;;
;; unspec_volatile:
;; 0 alloc
@ -99,23 +101,35 @@
;; multiple instructions, patterns which emit 0 instructions, and patterns
;; which emit instruction that can go in any slot (e.g. nop).
(define_attr "itanium_class" "unknown,ignore,stop_bit,br,fcmp,fcvtfx,fld,fmac,fmisc,frar_i,frar_m,frbr,frfr,frpr,ialu,icmp,ilog,ishf,ld,long_i,mmmul,mmshf,mmshfi,rse_m,scall,sem,stf,st,syst_m0,syst_m,tbit,toar_i,toar_m,tobr,tofr,topr,xmpy,xtd"
(define_attr "itanium_class" "unknown,ignore,stop_bit,br,fcmp,fcvtfx,fld,fmac,fmisc,frar_i,frar_m,frbr,frfr,frpr,ialu,icmp,ilog,ishf,ld,chk_s,long_i,mmmul,mmshf,mmshfi,rse_m,scall,sem,stf,st,syst_m0,syst_m,tbit,toar_i,toar_m,tobr,tofr,topr,xmpy,xtd,nop_b,nop_f,nop_i,nop_m,nop_x"
(const_string "unknown"))
(define_attr "type" "unknown,A,I,M,F,B,L,S"
(cond [(eq_attr "itanium_class" "ld,st,fld,stf,sem") (const_string "M")
;; chk_s has an I and an M form; use type A for convenience.
(define_attr "type" "unknown,A,I,M,F,B,L,X,S"
(cond [(eq_attr "itanium_class" "ld,st,fld,stf,sem,nop_m") (const_string "M")
(eq_attr "itanium_class" "rse_m,syst_m,syst_m0") (const_string "M")
(eq_attr "itanium_class" "frar_m,toar_m,frfr,tofr") (const_string "M")
(eq_attr "itanium_class" "ialu,icmp,ilog") (const_string "A")
(eq_attr "itanium_class" "fmisc,fmac,fcmp,xmpy,fcvtfx") (const_string "F")
(eq_attr "itanium_class" "chk_s,ialu,icmp,ilog") (const_string "A")
(eq_attr "itanium_class" "fmisc,fmac,fcmp,xmpy") (const_string "F")
(eq_attr "itanium_class" "fcvtfx,nop_f") (const_string "F")
(eq_attr "itanium_class" "frar_i,toar_i,frbr,tobr") (const_string "I")
(eq_attr "itanium_class" "frpr,topr,ishf,xtd,tbit") (const_string "I")
(eq_attr "itanium_class" "mmmul,mmshf,mmshfi") (const_string "I")
(eq_attr "itanium_class" "br,scall") (const_string "B")
(eq_attr "itanium_class" "mmmul,mmshf,mmshfi,nop_i") (const_string "I")
(eq_attr "itanium_class" "br,scall,nop_b") (const_string "B")
(eq_attr "itanium_class" "stop_bit") (const_string "S")
(eq_attr "itanium_class" "nop_x") (const_string "X")
(eq_attr "itanium_class" "long_i") (const_string "L")]
(const_string "unknown")))
(define_attr "itanium_requires_unit0" "no,yes"
(cond [(eq_attr "itanium_class" "syst_m0,sem,frfr,rse_m") (const_string "yes")
(eq_attr "itanium_class" "toar_m,frar_m") (const_string "yes")
(eq_attr "itanium_class" "frbr,tobr,mmmul") (const_string "yes")
(eq_attr "itanium_class" "tbit,ishf,topr,frpr") (const_string "yes")
(eq_attr "itanium_class" "toar_i,frar_i") (const_string "yes")
(eq_attr "itanium_class" "fmisc,fcmp") (const_string "yes")]
(const_string "no")))
;; Predication. True iff this instruction can be predicated.
(define_attr "predicable" "no,yes" (const_string "yes"))
@ -127,47 +141,70 @@
;; ::
;; ::::::::::::::::::::
;; Each usage of a function units by a class of insns is specified with a
;; `define_function_unit' expression, which looks like this:
;; (define_function_unit NAME MULTIPLICITY SIMULTANEITY TEST READY-DELAY
;; ISSUE-DELAY [CONFLICT-LIST])
;; We define 6 "dummy" functional units. All the real work to decide which
;; insn uses which unit is done by our MD_SCHED_REORDER hooks. We only
;; have to ensure here that there are enough copies of the dummy unit so
;; that the scheduler doesn't get confused by MD_SCHED_REORDER.
;; Other than the 6 dummies for normal insns, we also add a single dummy unit
;; for stop bits.
;; This default scheduling info seeks to pack instructions into bundles
;; efficiently to reduce code size, so we just list how many of each
;; instruction type can go in a bundle. ISSUE_RATE is set to 3.
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "br") 0 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "scall") 0 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fcmp") 2 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fcvtfx") 7 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fld") 9 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fmac") 5 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fmisc") 5 0)
;; ??? Add scheduler ready-list hook (MD_SCHED_REORDER) that orders
;; instructions, so that the next instruction can fill the next bundle slot.
;; This really needs to know where the stop bits are though.
;; There is only one insn `mov = ar.bsp' for frar_i:
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frar_i") 13 0)
;; There is only ony insn `mov = ar.unat' for frar_m:
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frar_m") 6 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frbr") 2 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frfr") 2 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frpr") 2 0)
;; ??? Use MD_SCHED_REORDER to put alloc first instead of using an unspec
;; volatile. Use ADJUST_PRIORITY to set the priority of alloc very high to
;; make it schedule first.
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ialu") 1 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "icmp") 1 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ilog") 1 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ishf") 1 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ld") 2 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "long_i") 1 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "mmmul") 2 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "mmshf") 2 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "mmshfi") 2 0)
;; ??? Modify the md_reorg code that emits stop bits so that instead of putting
;; them in the last possible place, we put them in places where bundles allow
;; them. This should reduce code size, but may decrease performance if we end
;; up with more stop bits than the minimum we need.
;; Now we have only one insn (flushrs) of such class. We assume that flushrs
;; is the 1st syllable of the bundle after stop bit.
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "rse_m") 0 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "sem") 11 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "stf") 1 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "st") 1 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "syst_m0") 1 0)
;; Now we use only one insn `mf'. Therfore latency time is set up to 0.
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "syst_m") 0 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "tbit") 1 0)
;; Alu instructions can execute on either the integer or memory function
;; unit. We indicate this by defining an alu function unit, and then marking
;; it as busy everytime we issue a integer or memory type instruction.
;; There is only one insn `mov ar.pfs =' for toar_i therefore we use
;; latency time equal to 0:
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "toar_i") 0 0)
;; There are only ony 2 insns `mov ar.ccv =' and `mov ar.unat =' for toar_m:
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "toar_m") 5 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "tobr") 1 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "tofr") 9 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "topr") 1 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "xmpy") 7 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "xtd") 1 0)
(define_function_unit "alu" 3 1 (eq_attr "type" "A,I,M") 1 0)
(define_function_unit "integer" 2 1 (eq_attr "type" "I") 1 0)
(define_function_unit "memory" 3 1 (eq_attr "type" "M") 1 0)
(define_function_unit "floating_point" 1 1 (eq_attr "type" "F") 1 0)
(define_function_unit "branch" 3 1 (eq_attr "type" "B") 1 0)
;; ??? This isn't quite right, because we can only fit two insns in a bundle
;; when using an L type instruction. That isn't modeled currently.
(define_function_unit "long_immediate" 1 1 (eq_attr "type" "L") 1 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_m") 0 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_i") 0 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_f") 0 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_b") 0 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_x") 0 0)
(define_function_unit "stop_bit" 1 1 (eq_attr "itanium_class" "stop_bit") 0 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ignore") 0 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "unknown") 0 0)
;; ::::::::::::::::::::
;; ::
@ -1411,7 +1448,6 @@
(clobber (match_operand:DI 2 "register_operand" ""))]
"reload_completed"
[(set (match_dup 3) (ashift:DI (match_dup 1) (const_int 32)))
(unspec_volatile [(const_int 0)] 2)
(set (zero_extract:DI (match_dup 0) (const_int 32) (const_int 0))
(lshiftrt:DI (match_dup 3) (const_int 32)))]
"operands[3] = operands[2];")
@ -2408,9 +2444,6 @@
"#"
[(set_attr "itanium_class" "unknown")])
;; ??? Need to emit an instruction group barrier here because this gets split
;; after md_reorg.
(define_split
[(set (match_operand:DI 0 "register_operand" "")
(plus:DI (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "")
@ -2422,9 +2455,7 @@
[(parallel [(set (match_dup 5) (plus:DI (mult:DI (match_dup 1) (match_dup 2))
(match_dup 3)))
(clobber (match_dup 0))])
(unspec_volatile [(const_int 0)] 2)
(set (match_dup 0) (match_dup 5))
(unspec_volatile [(const_int 0)] 2)
(set (match_dup 0) (plus:DI (match_dup 0) (match_dup 4)))]
"")
@ -5122,7 +5153,10 @@
(match_operand:DI 2 "const_int_operand" "")] 1))
(clobber (match_operand:DI 3 "register_operand" ""))]
""
".mem.offset %2, 0\;st8.spill %0 = %1%P0"
"*
{
return \".mem.offset %2, 0\;%,st8.spill %0 = %1%P0\";
}"
[(set_attr "itanium_class" "st")])
;; Reads ar.unat
@ -5140,7 +5174,10 @@
(match_operand:DI 2 "const_int_operand" "")] 2))
(use (match_operand:DI 3 "register_operand" ""))]
""
".mem.offset %2, 0\;ld8.fill %0 = %1%P1"
"*
{
return \".mem.offset %2, 0\;%,ld8.fill %0 = %1%P1\";
}"
[(set_attr "itanium_class" "ld")])
(define_insn "fr_spill"
@ -5193,6 +5230,58 @@
"nop 0"
[(set_attr "itanium_class" "unknown")])
(define_insn "nop_m"
[(const_int 1)]
""
"nop.m 0"
[(set_attr "itanium_class" "nop_m")])
(define_insn "nop_i"
[(const_int 2)]
""
"nop.i 0"
[(set_attr "itanium_class" "nop_i")])
(define_insn "nop_f"
[(const_int 3)]
""
"nop.f 0"
[(set_attr "itanium_class" "nop_f")])
(define_insn "nop_b"
[(const_int 4)]
""
"nop.b 0"
[(set_attr "itanium_class" "nop_b")])
(define_insn "nop_x"
[(const_int 5)]
""
""
[(set_attr "itanium_class" "nop_x")])
(define_expand "cycle_display"
[(unspec [(match_operand 0 "const_int_operand" "")] 23)]
"ia64_final_schedule"
"")
(define_insn "*cycle_display_1"
[(unspec [(match_operand 0 "const_int_operand" "")] 23)]
""
"// cycle %0"
[(set_attr "itanium_class" "ignore")
(set_attr "predicable" "no")])
(define_insn "bundle_selector"
[(unspec [(match_operand 0 "const_int_operand" "")] 22)]
""
"*
{
return get_bundle_name (INTVAL (operands[0]));
}"
[(set_attr "itanium_class" "ignore")
(set_attr "predicable" "no")])
;; Pseudo instruction that prevents the scheduler from moving code above this
;; point.
(define_insn "blockage"
@ -5203,7 +5292,7 @@
(set_attr "predicable" "no")])
(define_insn "insn_group_barrier"
[(unspec_volatile [(const_int 0)] 2)]
[(unspec_volatile [(match_operand 0 "const_int_operand" "")] 2)]
""
";;"
[(set_attr "itanium_class" "stop_bit")

View File

@ -1346,6 +1346,7 @@ extern void set_unique_reg_note PARAMS ((rtx, enum reg_note, rtx));
? (GET_CODE (PATTERN (I)) == SET \
? PATTERN (I) : single_set_1 (I)) \
: NULL_RTX)
#define single_set_1(I) single_set_2 (I, PATTERN (I))
extern int rtx_unstable_p PARAMS ((rtx));
extern int rtx_varies_p PARAMS ((rtx));
@ -1365,7 +1366,7 @@ extern int no_jumps_between_p PARAMS ((rtx, rtx));
extern int modified_in_p PARAMS ((rtx, rtx));
extern int insn_dependent_p PARAMS ((rtx, rtx));
extern int reg_set_p PARAMS ((rtx, rtx));
extern rtx single_set_1 PARAMS ((rtx));
extern rtx single_set_2 PARAMS ((rtx, rtx));
extern int multiple_sets PARAMS ((rtx));
extern rtx find_last_value PARAMS ((rtx, rtx *, rtx, int));
extern int refers_to_regno_p PARAMS ((unsigned int, unsigned int,

View File

@ -860,12 +860,11 @@ insn_dependent_p_1 (x, pat, data)
will not be used, which we ignore. */
rtx
single_set_1 (insn)
rtx insn;
single_set_2 (insn, pat)
rtx insn, pat;
{
rtx set = NULL;
int set_verified = 1;
rtx pat = PATTERN (insn);
int i;
if (GET_CODE (pat) == PARALLEL)