ddg.h, [...]: New files.

* ddg.h, ddg.c, modulo-sched.c: New files.
 	* cfglayout.c (duplicate_insn_chain): Remove "static" and push
 	internals to "dupicate_insn".
 	(duplicate_insn): New function.
 	* cfglayout.h (duplicate_insn_chain, duplicate_insn): New
 	declarations.
 	* common.opt (fmodulo-sched): New flag.
 	* df.c (df_bb_regno_last_use_find, df_bb_regno_first_def_find):
 	Remove static and forward declaration.
 	(df_find_def, df_reg_used, df_bb_regno_last_def_find): New
 	functions.
 	* df.h (df_bb_regno_last_use_find, df_bb_regno_first_def_find,
 	df_bb_regno_last_def_find, df_find_def, df_reg_used): New
 	declarations.
 	* flags.h (flag_modulo_sched): New flag.
 	* opts.c (common_handle_option): Handle modulo-sched flag.
 	* params.def (max-sms-loop-number, sms-max-ii-factor,
 	sms-dfa-history, sms-loop-average-count-threshold): New
 	parameters.
	* params.h (MAX_SMS_LOOP_NUMBER, SMS_MAX_II_FACTOR,
 	SMS_DFA_HISTORY, SMS_LOOP_AVERAGE_COUNT_THRESHOLD): New
 	parameters.
 	* passes.c ("sms", "sms-vcg"): New dumps.
 	(rest_of_handle_sched): Call sms_schedule.
 	* rtl.h (sms_schedule): New declaration.
 	* timevar.def (TV_SMS): New.
 	* toplev.c (flag_modulo_sched): Initialize.
 	(f_options): Handle -fmodulo-sched option.
 	* docs/invoke.texi: Document -fmodulo-sched & -dm options.
 	* docs/passes.texi: Document new SMS pass.f toplev.c doc/invoke.texi doc/passes.texi

Co-Authored-By: Mostafa Hagog <mustafa@il.ibm.com>

From-SVN: r82235
This commit is contained in:
Ayal Zaks 2004-05-25 12:55:06 +00:00 committed by Mostafa Hagog
parent 2f93eea861
commit e56261981b
17 changed files with 223 additions and 27 deletions

View File

@ -1,3 +1,38 @@
2004-05-25 Ayal Zaks <zaks@il.ibm.com>
Mostafa Hagog <mustafa@il.ibm.com>
* Makefile.in (modulo-sched.o, ddg.o): New.
* ddg.h, ddg.c, modulo-sched.c: New files.
* cfglayout.c (duplicate_insn_chain): Remove "static" and push
internals to "dupicate_insn".
(duplicate_insn): New function.
* cfglayout.h (duplicate_insn_chain, duplicate_insn): New
declarations.
* common.opt (fmodulo-sched): New flag.
* df.c (df_bb_regno_last_use_find, df_bb_regno_first_def_find):
Remove static and forward declaration.
(df_find_def, df_reg_used, df_bb_regno_last_def_find): New
functions.
* df.h (df_bb_regno_last_use_find, df_bb_regno_first_def_find,
df_bb_regno_last_def_find, df_find_def, df_reg_used): New
declarations.
* flags.h (flag_modulo_sched): New flag.
* opts.c (common_handle_option): Handle modulo-sched flag.
* params.def (max-sms-loop-number, sms-max-ii-factor,
sms-dfa-history, sms-loop-average-count-threshold): New
parameters.
* params.h (MAX_SMS_LOOP_NUMBER, SMS_MAX_II_FACTOR,
SMS_DFA_HISTORY, SMS_LOOP_AVERAGE_COUNT_THRESHOLD): New
parameters.
* passes.c ("sms", "sms-vcg"): New dumps.
(rest_of_handle_sched): Call sms_schedule.
* rtl.h (sms_schedule): New declaration.
* timevar.def (TV_SMS): New.
* toplev.c (flag_modulo_sched): Initialize.
(f_options): Handle -fmodulo-sched option.
* docs/invoke.texi: Document -fmodulo-sched & -dm options.
* docs/passes.texi: Document new SMS pass.
2004-05-25 Paolo Bonzini <bonzini@gnu.org> 2004-05-25 Paolo Bonzini <bonzini@gnu.org>
* Makefile.in (OBJS): Add rtlhooks.o. * Makefile.in (OBJS): Add rtlhooks.o.

View File

@ -884,14 +884,16 @@ OBJS-common = \
cfg.o cfganal.o cfgbuild.o cfgcleanup.o cfglayout.o cfgloop.o \ cfg.o cfganal.o cfgbuild.o cfgcleanup.o cfglayout.o cfgloop.o \
cfgloopanal.o cfgloopmanip.o loop-init.o loop-unswitch.o loop-unroll.o \ cfgloopanal.o cfgloopmanip.o loop-init.o loop-unswitch.o loop-unroll.o \
cfgrtl.o combine.o conflict.o convert.o coverage.o cse.o cselib.o \ cfgrtl.o combine.o conflict.o convert.o coverage.o cse.o cselib.o \
dbxout.o debug.o df.o diagnostic.o dojump.o dominance.o loop-doloop.o \ dbxout.o ddg.o \
debug.o df.o diagnostic.o dojump.o dominance.o loop-doloop.o \
dwarf2asm.o dwarf2out.o emit-rtl.o except.o explow.o loop-iv.o \ dwarf2asm.o dwarf2out.o emit-rtl.o except.o explow.o loop-iv.o \
expmed.o expr.o final.o flow.o fold-const.o function.o gcse.o \ expmed.o expr.o final.o flow.o fold-const.o function.o gcse.o \
genrtl.o ggc-common.o global.o graph.o gtype-desc.o \ genrtl.o ggc-common.o global.o graph.o gtype-desc.o \
haifa-sched.o hooks.o ifcvt.o insn-attrtab.o insn-emit.o insn-modes.o \ haifa-sched.o hooks.o ifcvt.o insn-attrtab.o insn-emit.o insn-modes.o \
insn-extract.o insn-opinit.o insn-output.o insn-peep.o insn-recog.o \ insn-extract.o insn-opinit.o insn-output.o insn-peep.o insn-recog.o \
integrate.o intl.o jump.o langhooks.o lcm.o lists.o local-alloc.o \ integrate.o intl.o jump.o langhooks.o lcm.o lists.o local-alloc.o \
loop.o optabs.o options.o opts.o params.o postreload.o predict.o \ loop.o modulo-sched.o \
optabs.o options.o opts.o params.o postreload.o predict.o \
print-rtl.o print-tree.o value-prof.o var-tracking.o \ print-rtl.o print-tree.o value-prof.o var-tracking.o \
profile.o ra.o ra-build.o ra-colorize.o ra-debug.o ra-rewrite.o \ profile.o ra.o ra-build.o ra-colorize.o ra-debug.o ra-rewrite.o \
real.o recog.o reg-stack.o regclass.o regmove.o regrename.o \ real.o recog.o reg-stack.o regclass.o regmove.o regrename.o \
@ -1997,6 +1999,15 @@ alias.o : alias.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) flags.h \
regmove.o : regmove.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) insn-config.h \ regmove.o : regmove.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) insn-config.h \
$(RECOG_H) output.h $(REGS_H) hard-reg-set.h flags.h function.h \ $(RECOG_H) output.h $(REGS_H) hard-reg-set.h flags.h function.h \
$(EXPR_H) $(BASIC_BLOCK_H) toplev.h $(TM_P_H) except.h reload.h $(EXPR_H) $(BASIC_BLOCK_H) toplev.h $(TM_P_H) except.h reload.h
ddg.o : ddg.c ddg.h $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TARGET_H) \
toplev.h $(RTL_H) $(TM_P_H) $(BASIC_BLOCK_H) regs.h function.h flags.h \
insn-config.h insn-attr.h except.h $(RECOG_H) sched-int.h \
cfglayout.h cfgloop.h $(EXPR_H)
modulo-sched.o : modulo-sched.c ddg.h cfgloop.h $(CONFIG_H) \
$(CONFIG_H) $(SYSTEM_H) coretypes.h $(TARGET_H) \
toplev.h $(RTL_H) $(TM_P_H) $(BASIC_BLOCK_H) regs.h function.h flags.h \
insn-config.h insn-attr.h except.h $(RECOG_H) sched-int.h \
cfglayout.h cfgloop.h $(EXPR_H) params.h $(COVERAGE_H)
haifa-sched.o : haifa-sched.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ haifa-sched.o : haifa-sched.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
sched-int.h $(BASIC_BLOCK_H) $(REGS_H) hard-reg-set.h flags.h insn-config.h function.h \ sched-int.h $(BASIC_BLOCK_H) $(REGS_H) hard-reg-set.h flags.h insn-config.h function.h \
$(INSN_ATTR_H) toplev.h $(RECOG_H) except.h $(TM_P_H) $(TARGET_H) $(INSN_ATTR_H) toplev.h $(RECOG_H) except.h $(TM_P_H) $(TARGET_H)

View File

@ -54,7 +54,6 @@ static void change_scope (rtx, tree, tree);
void verify_insn_chain (void); void verify_insn_chain (void);
static void fixup_fallthru_exit_predecessor (void); static void fixup_fallthru_exit_predecessor (void);
static rtx duplicate_insn_chain (rtx, rtx);
static tree insn_scope (rtx); static tree insn_scope (rtx);
static void update_unlikely_executed_notes (basic_block); static void update_unlikely_executed_notes (basic_block);
@ -973,7 +972,7 @@ cfg_layout_can_duplicate_bb_p (basic_block bb)
return true; return true;
} }
static rtx rtx
duplicate_insn_chain (rtx from, rtx to) duplicate_insn_chain (rtx from, rtx to)
{ {
rtx insn, last; rtx insn, last;

View File

@ -28,3 +28,4 @@ extern bool can_copy_bbs_p (basic_block *, unsigned);
extern void copy_bbs (basic_block *, unsigned, basic_block *, extern void copy_bbs (basic_block *, unsigned, basic_block *,
edge *, unsigned, edge *, struct loop *); edge *, unsigned, edge *, struct loop *);
extern bool scan_ahead_for_unlikely_executed_note (rtx); extern bool scan_ahead_for_unlikely_executed_note (rtx);
extern rtx duplicate_insn_chain (rtx, rtx);

View File

@ -466,6 +466,10 @@ fmessage-length=
Common RejectNegative Joined UInteger Common RejectNegative Joined UInteger
-fmessage-length=<number> Limit diagnostics to <number> characters per line. 0 suppresses line-wrapping -fmessage-length=<number> Limit diagnostics to <number> characters per line. 0 suppresses line-wrapping
fmodulo-sched
Common
Perform SMS based modulo scheduling before the first scheduling pass
fmove-all-movables fmove-all-movables
Common Common
Force all loop invariant computations out of loops Force all loop invariant computations out of loops

View File

@ -270,10 +270,6 @@ void df_refs_reg_replace (struct df *, bitmap, struct df_link *, rtx, rtx);
static int df_def_dominates_all_uses_p (struct df *, struct ref *def); static int df_def_dominates_all_uses_p (struct df *, struct ref *def);
static int df_def_dominates_uses_p (struct df *, struct ref *def, bitmap); static int df_def_dominates_uses_p (struct df *, struct ref *def, bitmap);
static struct ref *df_bb_regno_last_use_find (struct df *, basic_block,
unsigned int);
static struct ref *df_bb_regno_first_def_find (struct df *, basic_block,
unsigned int);
static struct ref *df_bb_insn_regno_last_use_find (struct df *, basic_block, static struct ref *df_bb_insn_regno_last_use_find (struct df *, basic_block,
rtx, unsigned int); rtx, unsigned int);
static struct ref *df_bb_insn_regno_first_def_find (struct df *, basic_block, static struct ref *df_bb_insn_regno_first_def_find (struct df *, basic_block,
@ -2688,6 +2684,34 @@ df_insn_regno_def_p (struct df *df, basic_block bb ATTRIBUTE_UNUSED,
return 0; return 0;
} }
/* Finds the reference corresponding to the definition of REG in INSN.
DF is the dataflow object. */
struct ref *
df_find_def (struct df *df, rtx insn, rtx reg)
{
struct df_link *defs;
for (defs = DF_INSN_DEFS (df, insn); defs; defs = defs->next)
if (rtx_equal_p (DF_REF_REG (defs->ref), reg))
return defs->ref;
return NULL;
}
/* Return 1 if REG is referenced in INSN, zero otherwise. */
int
df_reg_used (struct df *df, rtx insn, rtx reg)
{
struct df_link *uses;
for (uses = DF_INSN_USES (df, insn); uses; uses = uses->next)
if (rtx_equal_p (DF_REF_REG (uses->ref), reg))
return 1;
return 0;
}
static int static int
df_def_dominates_all_uses_p (struct df *df ATTRIBUTE_UNUSED, struct ref *def) df_def_dominates_all_uses_p (struct df *df ATTRIBUTE_UNUSED, struct ref *def)
@ -2884,7 +2908,7 @@ df_bb_regs_lives_compare (struct df *df, basic_block bb, rtx reg1, rtx reg2)
/* Return last use of REGNO within BB. */ /* Return last use of REGNO within BB. */
static struct ref * struct ref *
df_bb_regno_last_use_find (struct df *df, basic_block bb, unsigned int regno) df_bb_regno_last_use_find (struct df *df, basic_block bb, unsigned int regno)
{ {
struct df_link *link; struct df_link *link;
@ -2905,7 +2929,7 @@ df_bb_regno_last_use_find (struct df *df, basic_block bb, unsigned int regno)
/* Return first def of REGNO within BB. */ /* Return first def of REGNO within BB. */
static struct ref * struct ref *
df_bb_regno_first_def_find (struct df *df, basic_block bb, unsigned int regno) df_bb_regno_first_def_find (struct df *df, basic_block bb, unsigned int regno)
{ {
struct df_link *link; struct df_link *link;
@ -2924,6 +2948,31 @@ df_bb_regno_first_def_find (struct df *df, basic_block bb, unsigned int regno)
return 0; return 0;
} }
/* Return last def of REGNO within BB. */
struct ref *
df_bb_regno_last_def_find (struct df *df, basic_block bb, unsigned int regno)
{
struct df_link *link;
struct ref *last_def = NULL;
int in_bb = 0;
/* This assumes that the reg-def list is ordered such that for any
BB, the first def is found first. However, since the BBs are not
ordered, the first def in the chain is not necessarily the first
def in the function. */
for (link = df->regs[regno].defs; link; link = link->next)
{
struct ref *def = link->ref;
/* The first time in the desired block. */
if (DF_REF_BB (def) == bb)
in_bb = 1;
/* The last def in the desired block. */
else if (in_bb)
return last_def;
last_def = def;
}
return last_def;
}
/* Return first use of REGNO inside INSN within BB. */ /* Return first use of REGNO inside INSN within BB. */
static struct ref * static struct ref *

View File

@ -291,6 +291,15 @@ extern int df_bb_regs_lives_compare (struct df *, basic_block, rtx, rtx);
extern rtx df_bb_single_def_use_insn_find (struct df *, basic_block, rtx, extern rtx df_bb_single_def_use_insn_find (struct df *, basic_block, rtx,
rtx); rtx);
extern struct ref *df_bb_regno_last_use_find (struct df *, basic_block, unsigned int);
extern struct ref *df_bb_regno_first_def_find (struct df *, basic_block, unsigned int);
extern struct ref *df_bb_regno_last_def_find (struct df *, basic_block, unsigned int);
extern struct ref *df_find_def (struct df *, rtx, rtx);
extern int df_reg_used (struct df *, rtx, rtx);
/* Functions for debugging from GDB. */ /* Functions for debugging from GDB. */

View File

@ -289,7 +289,7 @@ in the following sections.
-floop-optimize -fcrossjumping -fif-conversion -fif-conversion2 @gol -floop-optimize -fcrossjumping -fif-conversion -fif-conversion2 @gol
-finline-functions -finline-limit=@var{n} -fkeep-inline-functions @gol -finline-functions -finline-limit=@var{n} -fkeep-inline-functions @gol
-fkeep-static-consts -fmerge-constants -fmerge-all-constants @gol -fkeep-static-consts -fmerge-constants -fmerge-all-constants @gol
-fmove-all-movables -fnew-ra -fno-branch-count-reg @gol -fmodulo-sched -fmove-all-movables -fnew-ra -fno-branch-count-reg @gol
-fno-default-inline -fno-defer-pop @gol -fno-default-inline -fno-defer-pop @gol
-fno-function-cse -fno-guess-branch-probability @gol -fno-function-cse -fno-guess-branch-probability @gol
-fno-inline -fno-math-errno -fno-peephole -fno-peephole2 @gol -fno-inline -fno-math-errno -fno-peephole -fno-peephole2 @gol
@ -3278,7 +3278,7 @@ Annotate the assembler output with miscellaneous debugging information.
Dump after computing branch probabilities, to @file{@var{file}.12.bp}. Dump after computing branch probabilities, to @file{@var{file}.12.bp}.
@item B @item B
@opindex dB @opindex dB
Dump after block reordering, to @file{@var{file}.31.bbro}. Dump after block reordering, to @file{@var{file}.32.bbro}.
@item c @item c
@opindex dc @opindex dc
Dump after instruction combination, to the file @file{@var{file}.20.combine}. Dump after instruction combination, to the file @file{@var{file}.20.combine}.
@ -3288,15 +3288,15 @@ Dump after the first if conversion, to the file @file{@var{file}.14.ce1}.
Also dump after the second if conversion, to the file @file{@var{file}.21.ce2}. Also dump after the second if conversion, to the file @file{@var{file}.21.ce2}.
@item d @item d
@opindex dd @opindex dd
Dump after branch target load optimization, to to @file{@var{file}.32.btl}. Dump after branch target load optimization, to to @file{@var{file}.33.btl}.
Also dump after delayed branch scheduling, to @file{@var{file}.36.dbr}. Also dump after delayed branch scheduling, to @file{@var{file}.37.dbr}.
@item D @item D
@opindex dD @opindex dD
Dump all macro definitions, at the end of preprocessing, in addition to Dump all macro definitions, at the end of preprocessing, in addition to
normal output. normal output.
@item E @item E
@opindex dE @opindex dE
Dump after the third if conversion, to @file{@var{file}.30.ce3}. Dump after the third if conversion, to @file{@var{file}.31.ce3}.
@item f @item f
@opindex df @opindex df
Dump after control and data flow analysis, to @file{@var{file}.11.cfg}. Dump after control and data flow analysis, to @file{@var{file}.11.cfg}.
@ -3306,7 +3306,7 @@ Also dump after life analysis, to @file{@var{file}.19.life}.
Dump after purging @code{ADDRESSOF} codes, to @file{@var{file}.07.addressof}. Dump after purging @code{ADDRESSOF} codes, to @file{@var{file}.07.addressof}.
@item g @item g
@opindex dg @opindex dg
Dump after global register allocation, to @file{@var{file}.25.greg}. Dump after global register allocation, to @file{@var{file}.26.greg}.
@item G @item G
@opindex dG @opindex dG
Dump after GCSE, to @file{@var{file}.08.gcse}. Dump after GCSE, to @file{@var{file}.08.gcse}.
@ -3323,40 +3323,43 @@ Dump after sibling call optimizations, to @file{@var{file}.02.sibling}.
Dump after the first jump optimization, to @file{@var{file}.04.jump}. Dump after the first jump optimization, to @file{@var{file}.04.jump}.
@item k @item k
@opindex dk @opindex dk
Dump after conversion from registers to stack, to @file{@var{file}.34.stack}. Dump after conversion from registers to stack, to @file{@var{file}.35.stack}.
@item l @item l
@opindex dl @opindex dl
Dump after local register allocation, to @file{@var{file}.24.lreg}. Dump after local register allocation, to @file{@var{file}.25.lreg}.
@item L @item L
@opindex dL @opindex dL
Dump after loop optimization passes, to @file{@var{file}.09.loop} and Dump after loop optimization passes, to @file{@var{file}.09.loop} and
@file{@var{file}.16.loop2}. @file{@var{file}.16.loop2}.
@item m
@opindex dm
Dump after modulo scheduling, to @file{@var{file}.23.sms}.
@item M @item M
@opindex dM @opindex dM
Dump after performing the machine dependent reorganization pass, to Dump after performing the machine dependent reorganization pass, to
@file{@var{file}.35.mach}. @file{@var{file}.36.mach}.
@item n @item n
@opindex dn @opindex dn
Dump after register renumbering, to @file{@var{file}.29.rnreg}. Dump after register renumbering, to @file{@var{file}.30.rnreg}.
@item N @item N
@opindex dN @opindex dN
Dump after the register move pass, to @file{@var{file}.22.regmove}. Dump after the register move pass, to @file{@var{file}.22.regmove}.
@item o @item o
@opindex do @opindex do
Dump after post-reload optimizations, to @file{@var{file}.26.postreload}. Dump after post-reload optimizations, to @file{@var{file}.27.postreload}.
@item r @item r
@opindex dr @opindex dr
Dump after RTL generation, to @file{@var{file}.01.rtl}. Dump after RTL generation, to @file{@var{file}.01.rtl}.
@item R @item R
@opindex dR @opindex dR
Dump after the second scheduling pass, to @file{@var{file}.33.sched2}. Dump after the second scheduling pass, to @file{@var{file}.34.sched2}.
@item s @item s
@opindex ds @opindex ds
Dump after CSE (including the jump optimization that sometimes follows Dump after CSE (including the jump optimization that sometimes follows
CSE), to @file{@var{file}.06.cse}. CSE), to @file{@var{file}.06.cse}.
@item S @item S
@opindex dS @opindex dS
Dump after the first scheduling pass, to @file{@var{file}.23.sched}. Dump after the first scheduling pass, to @file{@var{file}.24.sched}.
@item t @item t
@opindex dt @opindex dt
Dump after the second CSE pass (including the jump optimization that Dump after the second CSE pass (including the jump optimization that
@ -3376,10 +3379,10 @@ Dump after the value profile transformations, to @file{@var{file}.13.vpt}.
Also dump after variable tracking, to @file{@var{file}.35.vartrack}. Also dump after variable tracking, to @file{@var{file}.35.vartrack}.
@item w @item w
@opindex dw @opindex dw
Dump after the second flow pass, to @file{@var{file}.27.flow2}. Dump after the second flow pass, to @file{@var{file}.28.flow2}.
@item z @item z
@opindex dz @opindex dz
Dump after the peephole pass, to @file{@var{file}.28.peephole2}. Dump after the peephole pass, to @file{@var{file}.29.peephole2}.
@item Z @item Z
@opindex dZ @opindex dZ
Dump after constructing the web, to @file{@var{file}.17.web}. Dump after constructing the web, to @file{@var{file}.17.web}.
@ -4004,6 +4007,12 @@ types. Languages like C or C++ require each non-automatic variable to
have distinct location, so using this option will result in non-conforming have distinct location, so using this option will result in non-conforming
behavior. behavior.
@item -fmodulo-sched
@opindex fmodulo-sched
Perform swing modulo scheduling immediately before the first scheduling
pass. This pass looks at innermost loops and reorders their
instructions by overlapping different iterations.
@item -fnew-ra @item -fnew-ra
@opindex fnew-ra @opindex fnew-ra
Use a graph coloring register allocator. Currently this option is meant Use a graph coloring register allocator. Currently this option is meant

View File

@ -580,6 +580,15 @@ specific ``mode'' and minimizes the number of mode changes required to
satisfy all users. What these modes are, and what they apply to are satisfy all users. What these modes are, and what they apply to are
completely target-specific. The source is located in @file{lcm.c}. completely target-specific. The source is located in @file{lcm.c}.
@cindex modulo scheduling
@cindex sms, swing, software pipelining
@item Modulo scheduling
This pass looks at innermost loops and reorders their instructions
by overlapping different iterations. Modulo scheduling is performed
immediately before instruction scheduling.
The pass is located in (@file{modulo-sched.c}).
@item Instruction scheduling @item Instruction scheduling
This pass looks for instructions whose output will not be available by This pass looks for instructions whose output will not be available by

View File

@ -424,6 +424,9 @@ extern int flag_gen_aux_info;
extern int flag_shared_data; extern int flag_shared_data;
/* Controls the activiation of SMS modulo scheduling. */
extern int flag_modulo_sched;
/* flag_schedule_insns means schedule insns within basic blocks (before /* flag_schedule_insns means schedule insns within basic blocks (before
local_alloc). local_alloc).
flag_schedule_insns_after_reload means schedule insns after flag_schedule_insns_after_reload means schedule insns after

View File

@ -1398,7 +1398,9 @@ common_handle_option (size_t scode, const char *arg,
case OPT_fsched_stalled_insns_dep_: case OPT_fsched_stalled_insns_dep_:
flag_sched_stalled_insns_dep = value; flag_sched_stalled_insns_dep = value;
break; break;
case OPT_fmodulo_sched:
flag_modulo_sched = 1;
break;
case OPT_fshared_data: case OPT_fshared_data:
flag_shared_data = value; flag_shared_data = value;
break; break;

View File

@ -229,6 +229,29 @@ DEFPARAM(PARAM_MAX_UNSWITCH_LEVEL,
"The maximum number of unswitchings in a single loop", "The maximum number of unswitchings in a single loop",
3) 3)
DEFPARAM(PARAM_MAX_SMS_LOOP_NUMBER,
"max-sms-loop-number",
"Maximum number of loops to perform swing modulo scheduling on \
(mainly for debugging)",
-1)
/* This parameter is used to tune SMS MAX II calculations. */
DEFPARAM(PARAM_SMS_MAX_II_FACTOR,
"sms-max-ii-factor",
"A factor for tuning the upper bound that swing modulo scheduler uses \
for scheduling a loop",
100)
DEFPARAM(PARAM_SMS_DFA_HISTORY,
"sms-dfa-history",
"The number of cycles the swing modulo scheduler considers when \
checking conflicts using DFA",
0)
DEFPARAM(PARAM_SMS_LOOP_AVERAGE_COUNT_THRESHOLD,
"sms-loop-average-count-threshold",
"A threshold on the average loop count considered by the swing modulo \
scheduler",
0)
DEFPARAM(HOT_BB_COUNT_FRACTION, DEFPARAM(HOT_BB_COUNT_FRACTION,
"hot-bb-count-fraction", "hot-bb-count-fraction",
"Select fraction of the maximal count of repetitions of basic block in \ "Select fraction of the maximal count of repetitions of basic block in \

View File

@ -110,6 +110,14 @@ typedef enum compiler_param
PARAM_VALUE (PARAM_GCSE_AFTER_RELOAD_CRITICAL_FRACTION) PARAM_VALUE (PARAM_GCSE_AFTER_RELOAD_CRITICAL_FRACTION)
#define MAX_UNROLLED_INSNS \ #define MAX_UNROLLED_INSNS \
PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS)
#define MAX_SMS_LOOP_NUMBER \
PARAM_VALUE (PARAM_MAX_SMS_LOOP_NUMBER)
#define SMS_MAX_II_FACTOR \
PARAM_VALUE (PARAM_SMS_MAX_II_FACTOR)
#define SMS_DFA_HISTORY \
PARAM_VALUE (PARAM_SMS_DFA_HISTORY)
#define SMS_LOOP_AVERAGE_COUNT_THRESHOLD \
PARAM_VALUE (PARAM_SMS_LOOP_AVERAGE_COUNT_THRESHOLD)
#define GLOBAL_VAR_THRESHOLD \ #define GLOBAL_VAR_THRESHOLD \
PARAM_VALUE (PARAM_GLOBAL_VAR_THRESHOLD) PARAM_VALUE (PARAM_GLOBAL_VAR_THRESHOLD)
#define MAX_ALIASED_VOPS \ #define MAX_ALIASED_VOPS \

View File

@ -154,6 +154,7 @@ enum dump_file_index
DFI_combine, DFI_combine,
DFI_ce2, DFI_ce2,
DFI_regmove, DFI_regmove,
DFI_sms,
DFI_sched, DFI_sched,
DFI_lreg, DFI_lreg,
DFI_greg, DFI_greg,
@ -178,7 +179,7 @@ enum dump_file_index
Remaining -d letters: Remaining -d letters:
" e m q " " e q "
" K O Q WXY " " K O Q WXY "
*/ */
@ -207,6 +208,7 @@ static struct dump_file_info dump_file_tbl[DFI_MAX] =
{ "combine", 'c', 1, 0, 0 }, { "combine", 'c', 1, 0, 0 },
{ "ce2", 'C', 1, 0, 0 }, { "ce2", 'C', 1, 0, 0 },
{ "regmove", 'N', 1, 0, 0 }, { "regmove", 'N', 1, 0, 0 },
{ "sms", 'm', 0, 0, 0 },
{ "sched", 'S', 1, 0, 0 }, { "sched", 'S', 1, 0, 0 },
{ "lreg", 'l', 1, 0, 0 }, { "lreg", 'l', 1, 0, 0 },
{ "greg", 'g', 1, 0, 0 }, { "greg", 'g', 1, 0, 0 },
@ -742,6 +744,29 @@ rest_of_handle_reorder_blocks (tree decl, rtx insns)
static void static void
rest_of_handle_sched (tree decl, rtx insns) rest_of_handle_sched (tree decl, rtx insns)
{ {
timevar_push (TV_SMS);
if (optimize > 0 && flag_modulo_sched)
{
/* Perform SMS module scheduling. */
open_dump_file (DFI_sms, decl);
/* We want to be able to create new pseudos. */
no_new_pseudos = 0;
sms_schedule (dump_file);
close_dump_file (DFI_sms, print_rtl, get_insns ());
/* Update the life information, becuase we add pseudos. */
max_regno = max_reg_num ();
allocate_reg_info (max_regno, FALSE, FALSE);
update_life_info_in_dirty_blocks (UPDATE_LIFE_GLOBAL_RM_NOTES,
(PROP_DEATH_NOTES
| PROP_KILL_DEAD_CODE
| PROP_SCAN_DEAD_CODE));
no_new_pseudos = 1;
}
timevar_pop (TV_SMS);
timevar_push (TV_SCHED); timevar_push (TV_SCHED);
/* Print function header into sched dump now /* Print function header into sched dump now

View File

@ -2461,6 +2461,10 @@ extern void simplify_using_condition (rtx, rtx *, struct bitmap_head_def *);
/* In ra.c. */ /* In ra.c. */
extern void reg_alloc (void); extern void reg_alloc (void);
/* In modulo-sched.c. */
#ifdef BUFSIZ
extern void sms_schedule (FILE *);
#endif
struct rtl_hooks struct rtl_hooks
{ {

View File

@ -109,6 +109,7 @@ DEFTIMEVAR (TV_COMBINE , "combiner")
DEFTIMEVAR (TV_IFCVT , "if-conversion") DEFTIMEVAR (TV_IFCVT , "if-conversion")
DEFTIMEVAR (TV_REGMOVE , "regmove") DEFTIMEVAR (TV_REGMOVE , "regmove")
DEFTIMEVAR (TV_MODE_SWITCH , "mode switching") DEFTIMEVAR (TV_MODE_SWITCH , "mode switching")
DEFTIMEVAR (TV_SMS , "sms modulo scheduling")
DEFTIMEVAR (TV_SCHED , "scheduling") DEFTIMEVAR (TV_SCHED , "scheduling")
DEFTIMEVAR (TV_LOCAL_ALLOC , "local alloc") DEFTIMEVAR (TV_LOCAL_ALLOC , "local alloc")
DEFTIMEVAR (TV_GLOBAL_ALLOC , "global alloc") DEFTIMEVAR (TV_GLOBAL_ALLOC , "global alloc")

View File

@ -707,6 +707,9 @@ int flag_schedule_speculative_load_dangerous = 0;
int flag_sched_stalled_insns = 0; int flag_sched_stalled_insns = 0;
int flag_sched_stalled_insns_dep = 1; int flag_sched_stalled_insns_dep = 1;
/* The following flag controls the module scheduling activation. */
int flag_modulo_sched = 0;
int flag_single_precision_constant; int flag_single_precision_constant;
/* flag_branch_on_count_reg means try to replace add-1,compare,branch tupple /* flag_branch_on_count_reg means try to replace add-1,compare,branch tupple
@ -1007,6 +1010,7 @@ static const lang_independent_options f_options[] =
{"sched-stalled-insns-dep", &flag_sched_stalled_insns_dep, 1 }, {"sched-stalled-insns-dep", &flag_sched_stalled_insns_dep, 1 },
{"sched2-use-superblocks", &flag_sched2_use_superblocks, 1 }, {"sched2-use-superblocks", &flag_sched2_use_superblocks, 1 },
{"sched2-use-traces", &flag_sched2_use_traces, 1 }, {"sched2-use-traces", &flag_sched2_use_traces, 1 },
{"modulo-sched", &flag_modulo_sched, 1 },
{"branch-count-reg",&flag_branch_on_count_reg, 1 }, {"branch-count-reg",&flag_branch_on_count_reg, 1 },
{"pic", &flag_pic, 1 }, {"pic", &flag_pic, 1 },
{"PIC", &flag_pic, 2 }, {"PIC", &flag_pic, 2 },