bt-load.c: New file.
2003-06-18 Stephen Clarke <stephen.clarke@superh.com> J"orn Rennecke <joern.rennecke@superh.com> * bt-load.c: New file. * Makefile.in (OBJS): Include bt-load.o (bt-load.o): Add dependencies. * flags.h (flag_branch_target_load_optimize): Declare. (flag_branch_target_load_optimize2): Likewise. * hooks.c (hook_reg_class_void_no_regs): New function. (hook_bool_bool_false): Likewise. * hooks.h (hook_reg_class_void_no_regs, hook_bool_bool_false): Declare. * rtl.h (branch_target_load_optimize): Declare. * target-def.h (TARGET_BRANCH_TARGET_REGISTER_CLASS): Define. (TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED): Likewise. (TARGET_INITIALIZER): Include these. * target.h (struct gcc_target): Add branch_target_register_class and branch_target_register_callee_saved members. * toplev.c (enum dump_file_index): Add DFI_branch_target_load (dump_file) Add "tars" entry. (flag_branch_target_load_optimize): New variable. (flag_branch_target_load_optimize2): Likewise. (lang_independent_options): Add entries for new options. (rest_of_compilation): Call branch_target_load_optimize. * doc/tm.texi (TARGET_BRANCH_TARGET_REGISTER_CLASS): Document. (TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED): Likewise. * doc/invoke.texi: Document -fbranch-target-load-optimize and -fbranch-target-load-optimize2. * rtl.h (epilogue_completed): Declare. * recog.c (epilogue_completed): New variable. * toplev.c (rest_of_compilation): Set it. * flow.c (mark_regs_live_at_end): Use it. * config/ia64/ia64.c (ia64_output_mi_thunk): Set it. * config/rs6000/rs6000.c (rs6000_output_mi_thunk): Likewise. * config/sh/sh.c (sh_output_mi_thunk): Likewise. * config/sparc/sparc.c (sparc_output_mi_thunk): Likewise. * sh.c (shmedia_space_reserved_for_target_registers): New variable. (sh_target_reg_class): New function. (sh_optimize_target_register_callee_saved): Likwise. (shmedia_target_regs_stack_space): Likewise. (shmedia_reserve_space_for_target_registers_p): Likewise. (shmedia_target_regs_stack_adjust): Likewise. (TARGET_BRANCH_TARGET_REGISTER_CLASS): Override. (TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED): Likewise. (calc_live_regs): If flag_branch_target_load_optimize2 and TARGET_SAVE_ALL_TARGET_REGS is enabled, and we have space reserved for target registers, make sure that we save all target registers. (sh_expand_prologue, sh_expand_epilogue): Take target register optimizations into account. Collapse stack adjustments if that is beneficial. (initial_elimination_offset): Reserve space for target registers if necessary. * sh.h (SAVE_ALL_TR_BIT, TARGET_SAVE_ALL_TARGET_REGS): Define. (OPTIMIZATION_OPTIONS): Enable flag_branch_target_load_optimize. Co-Authored-By: J"orn Rennecke <joern.rennecke@superh.com> From-SVN: r68165
This commit is contained in:
parent
acd8e2d08f
commit
fe3ad57221
|
@ -1,3 +1,58 @@
|
|||
2003-06-18 Stephen Clarke <stephen.clarke@superh.com>
|
||||
J"orn Rennecke <joern.rennecke@superh.com>
|
||||
|
||||
* bt-load.c: New file.
|
||||
* Makefile.in (OBJS): Include bt-load.o
|
||||
(bt-load.o): Add dependencies.
|
||||
* flags.h (flag_branch_target_load_optimize): Declare.
|
||||
(flag_branch_target_load_optimize2): Likewise.
|
||||
* hooks.c (hook_reg_class_void_no_regs): New function.
|
||||
(hook_bool_bool_false): Likewise.
|
||||
* hooks.h (hook_reg_class_void_no_regs, hook_bool_bool_false): Declare.
|
||||
* rtl.h (branch_target_load_optimize): Declare.
|
||||
* target-def.h (TARGET_BRANCH_TARGET_REGISTER_CLASS): Define.
|
||||
(TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED): Likewise.
|
||||
(TARGET_INITIALIZER): Include these.
|
||||
* target.h (struct gcc_target): Add branch_target_register_class
|
||||
and branch_target_register_callee_saved members.
|
||||
* toplev.c (enum dump_file_index): Add DFI_branch_target_load
|
||||
(dump_file) Add "tars" entry.
|
||||
(flag_branch_target_load_optimize): New variable.
|
||||
(flag_branch_target_load_optimize2): Likewise.
|
||||
(lang_independent_options): Add entries for new options.
|
||||
(rest_of_compilation): Call branch_target_load_optimize.
|
||||
* doc/tm.texi (TARGET_BRANCH_TARGET_REGISTER_CLASS): Document.
|
||||
(TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED): Likewise.
|
||||
* doc/invoke.texi: Document -fbranch-target-load-optimize and
|
||||
-fbranch-target-load-optimize2.
|
||||
* rtl.h (epilogue_completed): Declare.
|
||||
* recog.c (epilogue_completed): New variable.
|
||||
* toplev.c (rest_of_compilation): Set it.
|
||||
* flow.c (mark_regs_live_at_end): Use it.
|
||||
* config/ia64/ia64.c (ia64_output_mi_thunk): Set it.
|
||||
* config/rs6000/rs6000.c (rs6000_output_mi_thunk): Likewise.
|
||||
* config/sh/sh.c (sh_output_mi_thunk): Likewise.
|
||||
* config/sparc/sparc.c (sparc_output_mi_thunk): Likewise.
|
||||
|
||||
* sh.c (shmedia_space_reserved_for_target_registers): New variable.
|
||||
(sh_target_reg_class): New function.
|
||||
(sh_optimize_target_register_callee_saved): Likwise.
|
||||
(shmedia_target_regs_stack_space): Likewise.
|
||||
(shmedia_reserve_space_for_target_registers_p): Likewise.
|
||||
(shmedia_target_regs_stack_adjust): Likewise.
|
||||
(TARGET_BRANCH_TARGET_REGISTER_CLASS): Override.
|
||||
(TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED): Likewise.
|
||||
(calc_live_regs): If flag_branch_target_load_optimize2 and
|
||||
TARGET_SAVE_ALL_TARGET_REGS is enabled, and we have space reserved
|
||||
for target registers, make sure that we save all target registers.
|
||||
(sh_expand_prologue, sh_expand_epilogue): Take target register
|
||||
optimizations into account. Collapse stack adjustments if that
|
||||
is beneficial.
|
||||
(initial_elimination_offset): Reserve space for target registers
|
||||
if necessary.
|
||||
* sh.h (SAVE_ALL_TR_BIT, TARGET_SAVE_ALL_TARGET_REGS): Define.
|
||||
(OPTIMIZATION_OPTIONS): Enable flag_branch_target_load_optimize.
|
||||
|
||||
2003-06-18 Nick Clifton <nickc@redhat.com>
|
||||
|
||||
* config.gcc: Add an extra_header for ARM targets.
|
||||
|
|
|
@ -822,7 +822,7 @@ OBJS = alias.o bb-reorder.o bitmap.o builtins.o caller-save.o calls.o \
|
|||
sibcall.o simplify-rtx.o sreal.o ssa.o ssa-ccp.o ssa-dce.o stmt.o \
|
||||
stor-layout.o stringpool.o timevar.o toplev.o tracer.o tree.o tree-dump.o \
|
||||
tree-inline.o unroll.o varasm.o varray.o version.o vmsdbgout.o xcoffout.o \
|
||||
alloc-pool.o et-forest.o cgraph.o cgraphunit.o cfghooks.o \
|
||||
alloc-pool.o et-forest.o cgraph.o cgraphunit.o cfghooks.o bt-load.o \
|
||||
$(GGC) $(out_object_file) $(EXTRA_OBJS) $(host_hook_obj)
|
||||
|
||||
BACKEND = main.o libbackend.a
|
||||
|
@ -1726,6 +1726,9 @@ reload1.o : reload1.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) real.
|
|||
caller-save.o : caller-save.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
|
||||
flags.h $(REGS_H) hard-reg-set.h insn-config.h $(BASIC_BLOCK_H) function.h \
|
||||
$(RECOG_H) reload.h $(EXPR_H) toplev.h $(TM_P_H)
|
||||
bt-load.o : bt-load.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
|
||||
$(BASIC_BLOCK_H) $(RTL_H) hard-reg-set.h $(REGS_H) $(OBSTACK_H) \
|
||||
$(FIBHEAP_H) output.h $(TARGET_H) $(EXPR_H) flags.h $(INSN_ATTR_H)
|
||||
reorg.o : reorg.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) conditions.h \
|
||||
hard-reg-set.h $(BASIC_BLOCK_H) $(REGS_H) insn-config.h $(INSN_ATTR_H) except.h \
|
||||
$(RECOG_H) function.h flags.h output.h $(EXPR_H) toplev.h $(PARAMS_H) $(TM_P_H)
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -8470,6 +8470,7 @@ ia64_output_mi_thunk (file, thunk, delta, vcall_offset, function)
|
|||
rtx this, insn, funexp;
|
||||
|
||||
reload_completed = 1;
|
||||
epilogue_completed = 1;
|
||||
no_new_pseudos = 1;
|
||||
|
||||
/* Set things up as ia64_expand_prologue might. */
|
||||
|
@ -8537,6 +8538,7 @@ ia64_output_mi_thunk (file, thunk, delta, vcall_offset, function)
|
|||
|
||||
/* Code generation for calls relies on splitting. */
|
||||
reload_completed = 1;
|
||||
epilogue_completed = 1;
|
||||
try_split (PATTERN (insn), insn, 0);
|
||||
|
||||
emit_barrier ();
|
||||
|
@ -8555,6 +8557,7 @@ ia64_output_mi_thunk (file, thunk, delta, vcall_offset, function)
|
|||
final_end_function ();
|
||||
|
||||
reload_completed = 0;
|
||||
epilogue_completed = 0;
|
||||
no_new_pseudos = 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -12237,6 +12237,7 @@ rs6000_output_mi_thunk (file, thunk_fndecl, delta, vcall_offset, function)
|
|||
rtx this, insn, funexp;
|
||||
|
||||
reload_completed = 1;
|
||||
epilogue_completed = 1;
|
||||
no_new_pseudos = 1;
|
||||
|
||||
/* Mark the end of the (empty) prologue. */
|
||||
|
@ -12316,6 +12317,7 @@ rs6000_output_mi_thunk (file, thunk_fndecl, delta, vcall_offset, function)
|
|||
final_end_function ();
|
||||
|
||||
reload_completed = 0;
|
||||
epilogue_completed = 0;
|
||||
no_new_pseudos = 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -175,6 +175,8 @@ enum reg_class reg_class_from_letter[] =
|
|||
|
||||
int assembler_dialect;
|
||||
|
||||
static bool shmedia_space_reserved_for_target_registers;
|
||||
|
||||
static void split_branches PARAMS ((rtx));
|
||||
static int branch_dest PARAMS ((rtx));
|
||||
static void force_into PARAMS ((rtx, rtx));
|
||||
|
@ -209,6 +211,8 @@ static int sh_issue_rate PARAMS ((void));
|
|||
static bool sh_function_ok_for_sibcall PARAMS ((tree, tree));
|
||||
|
||||
static bool sh_cannot_modify_jumps_p PARAMS ((void));
|
||||
static enum reg_class sh_target_reg_class (void);
|
||||
static bool sh_optimize_target_register_callee_saved (bool);
|
||||
static bool sh_ms_bitfield_layout_p PARAMS ((tree));
|
||||
|
||||
static void sh_init_builtins PARAMS ((void));
|
||||
|
@ -226,6 +230,9 @@ static bool unspec_caller_rtx_p PARAMS ((rtx));
|
|||
static bool sh_cannot_copy_insn_p PARAMS ((rtx));
|
||||
static bool sh_rtx_costs PARAMS ((rtx, int, int, int *));
|
||||
static int sh_address_cost PARAMS ((rtx));
|
||||
static int shmedia_target_regs_stack_space (HARD_REG_SET *);
|
||||
static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
|
||||
static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
|
||||
|
||||
/* Initialize the GCC target structure. */
|
||||
#undef TARGET_ATTRIBUTE_TABLE
|
||||
|
@ -266,6 +273,11 @@ static int sh_address_cost PARAMS ((rtx));
|
|||
|
||||
#undef TARGET_CANNOT_MODIFY_JUMPS_P
|
||||
#define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
|
||||
#undef TARGET_BRANCH_TARGET_REGISTER_CLASS
|
||||
#define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
|
||||
#undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
|
||||
#define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
|
||||
sh_optimize_target_register_callee_saved
|
||||
|
||||
#undef TARGET_MS_BITFIELD_LAYOUT_P
|
||||
#define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
|
||||
|
@ -4698,6 +4710,53 @@ push_regs (mask, interrupt_handler)
|
|||
push (PR_REG);
|
||||
}
|
||||
|
||||
/* Calculate how much extra space is needed to save all callee-saved
|
||||
target registers.
|
||||
LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
|
||||
|
||||
static int
|
||||
shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
|
||||
{
|
||||
int reg;
|
||||
int stack_space = 0;
|
||||
int interrupt_handler = sh_cfun_interrupt_handler_p ();
|
||||
|
||||
for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
|
||||
if ((! call_used_regs[reg] || interrupt_handler)
|
||||
&& ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
|
||||
/* Leave space to save this target register on the stack,
|
||||
in case target register allocation wants to use it. */
|
||||
stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
|
||||
return stack_space;
|
||||
}
|
||||
|
||||
/* Decide whether we should reserve space for callee-save target registers,
|
||||
in case target register allocation wants to use them. REGS_SAVED is
|
||||
the space, in bytes, that is already required for register saves.
|
||||
LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
|
||||
|
||||
static int
|
||||
shmedia_reserve_space_for_target_registers_p (int regs_saved,
|
||||
HARD_REG_SET *live_regs_mask)
|
||||
{
|
||||
if (optimize_size)
|
||||
return 0;
|
||||
return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
|
||||
}
|
||||
|
||||
/* Decide how much space to reserve for callee-save target registers
|
||||
in case target register allocation wants to use them.
|
||||
LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
|
||||
|
||||
static int
|
||||
shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
|
||||
{
|
||||
if (shmedia_space_reserved_for_target_registers)
|
||||
return shmedia_target_regs_stack_space (live_regs_mask);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Work out the registers which need to be saved, both as a mask and a
|
||||
count of saved words. Return the count.
|
||||
|
||||
|
@ -4801,6 +4860,19 @@ calc_live_regs (live_regs_mask)
|
|||
}
|
||||
}
|
||||
}
|
||||
/* If we have a target register optimization pass after prologue / epilogue
|
||||
threading, we need to assume all target registers will be live even if
|
||||
they aren't now. */
|
||||
if (flag_branch_target_load_optimize2
|
||||
&& TARGET_SAVE_ALL_TARGET_REGS
|
||||
&& shmedia_space_reserved_for_target_registers)
|
||||
for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
|
||||
if ((! call_used_regs[reg] || interrupt_handler)
|
||||
&& ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
|
||||
{
|
||||
SET_HARD_REG_BIT (*live_regs_mask, reg);
|
||||
count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
@ -4950,13 +5022,37 @@ sh_expand_prologue ()
|
|||
rtx r0 = gen_rtx_REG (Pmode, R0_REG);
|
||||
int offset_in_r0 = -1;
|
||||
int sp_in_r0 = 0;
|
||||
int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
|
||||
int total_size, save_size;
|
||||
|
||||
if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
|
||||
/* D is the actual number of bytes that we need for saving registers,
|
||||
however, in initial_elimination_offset we have committed to using
|
||||
an additional TREGS_SPACE amount of bytes - in order to keep both
|
||||
addresses to arguments supplied by the caller and local variables
|
||||
valid, we must keep this gap. Place it between the incoming
|
||||
arguments and the actually saved registers in a bid to optimize
|
||||
locality of reference. */
|
||||
total_size = d + tregs_space;
|
||||
total_size += rounded_frame_size (total_size);
|
||||
save_size = total_size - rounded_frame_size (d);
|
||||
if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
|
||||
d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
|
||||
- d % (STACK_BOUNDARY / BITS_PER_UNIT));
|
||||
- save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
|
||||
|
||||
/* If adjusting the stack in a single step costs nothing extra, do so.
|
||||
I.e. either if a single addi is enough, or we need a movi anyway,
|
||||
and we don't exceed the maximum offset range (the test for the
|
||||
latter is conservative for simplicity). */
|
||||
if (TARGET_SHMEDIA
|
||||
&& (CONST_OK_FOR_I10 (-total_size)
|
||||
|| (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
|
||||
&& total_size <= 2044)))
|
||||
d_rounding = total_size - save_size;
|
||||
|
||||
offset = d + d_rounding;
|
||||
output_stack_adjust (-offset, stack_pointer_rtx, 1, frame_insn);
|
||||
|
||||
output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
|
||||
1, frame_insn);
|
||||
|
||||
/* We loop twice: first, we save 8-byte aligned registers in the
|
||||
higher addresses, that are known to be aligned. Then, we
|
||||
|
@ -5168,16 +5264,39 @@ sh_expand_epilogue ()
|
|||
int d_rounding = 0;
|
||||
|
||||
int save_flags = target_flags;
|
||||
int frame_size;
|
||||
int frame_size, save_size;
|
||||
int fpscr_deferred = 0;
|
||||
|
||||
d = calc_live_regs (&live_regs_mask);
|
||||
|
||||
if (TARGET_SH5 && d % (STACK_BOUNDARY / BITS_PER_UNIT))
|
||||
d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
|
||||
- d % (STACK_BOUNDARY / BITS_PER_UNIT));
|
||||
save_size = d;
|
||||
frame_size = rounded_frame_size (d);
|
||||
|
||||
frame_size = rounded_frame_size (d) - d_rounding;
|
||||
if (TARGET_SH5)
|
||||
{
|
||||
int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
|
||||
int total_size;
|
||||
if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
|
||||
d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
|
||||
- d % (STACK_BOUNDARY / BITS_PER_UNIT));
|
||||
|
||||
total_size = d + tregs_space;
|
||||
total_size += rounded_frame_size (total_size);
|
||||
save_size = total_size - frame_size;
|
||||
|
||||
/* If adjusting the stack in a single step costs nothing extra, do so.
|
||||
I.e. either if a single addi is enough, or we need a movi anyway,
|
||||
and we don't exceed the maximum offset range (the test for the
|
||||
latter is conservative for simplicity). */
|
||||
if (TARGET_SHMEDIA
|
||||
&& ! frame_pointer_needed
|
||||
&& (CONST_OK_FOR_I10 (total_size)
|
||||
|| (! CONST_OK_FOR_I10 (save_size + d_rounding)
|
||||
&& total_size <= 2044)))
|
||||
d_rounding = frame_size;
|
||||
|
||||
frame_size -= d_rounding;
|
||||
}
|
||||
|
||||
if (frame_pointer_needed)
|
||||
{
|
||||
|
@ -5356,33 +5475,33 @@ sh_expand_epilogue ()
|
|||
|
||||
if (offset != d + d_rounding)
|
||||
abort ();
|
||||
|
||||
goto finish;
|
||||
}
|
||||
else
|
||||
d = 0;
|
||||
if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
|
||||
pop (PR_REG);
|
||||
for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
|
||||
else /* ! TARGET_SH5 */
|
||||
{
|
||||
int j = (FIRST_PSEUDO_REGISTER - 1) - i;
|
||||
save_size = 0;
|
||||
if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
|
||||
pop (PR_REG);
|
||||
for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
|
||||
{
|
||||
int j = (FIRST_PSEUDO_REGISTER - 1) - i;
|
||||
|
||||
if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
|
||||
&& hard_regs_intersect_p (&live_regs_mask,
|
||||
®_class_contents[DF_REGS]))
|
||||
fpscr_deferred = 1;
|
||||
else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
|
||||
pop (j);
|
||||
if (j == FIRST_FP_REG && fpscr_deferred)
|
||||
pop (FPSCR_REG);
|
||||
|
||||
if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
|
||||
&& hard_regs_intersect_p (&live_regs_mask,
|
||||
®_class_contents[DF_REGS]))
|
||||
fpscr_deferred = 1;
|
||||
else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
|
||||
pop (j);
|
||||
if (j == FIRST_FP_REG && fpscr_deferred)
|
||||
pop (FPSCR_REG);
|
||||
}
|
||||
}
|
||||
finish:
|
||||
if (target_flags != save_flags && ! current_function_interrupt)
|
||||
emit_insn (gen_toggle_sz ());
|
||||
target_flags = save_flags;
|
||||
|
||||
output_stack_adjust (extra_push + current_function_pretend_args_size
|
||||
+ d + d_rounding
|
||||
+ save_size + d_rounding
|
||||
+ current_function_args_info.stack_regs * 8,
|
||||
stack_pointer_rtx, 7, emit_insn);
|
||||
|
||||
|
@ -5961,10 +6080,18 @@ initial_elimination_offset (from, to)
|
|||
int total_auto_space;
|
||||
int save_flags = target_flags;
|
||||
int copy_flags;
|
||||
|
||||
HARD_REG_SET live_regs_mask;
|
||||
|
||||
shmedia_space_reserved_for_target_registers = false;
|
||||
regs_saved = calc_live_regs (&live_regs_mask);
|
||||
regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
|
||||
|
||||
if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
|
||||
{
|
||||
shmedia_space_reserved_for_target_registers = true;
|
||||
regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
|
||||
}
|
||||
|
||||
if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
|
||||
regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
|
||||
- regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
|
||||
|
@ -7659,6 +7786,19 @@ sh_cannot_modify_jumps_p ()
|
|||
return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
|
||||
}
|
||||
|
||||
static enum reg_class
|
||||
sh_target_reg_class (void)
|
||||
{
|
||||
return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
|
||||
}
|
||||
|
||||
static bool
|
||||
sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
|
||||
{
|
||||
return (shmedia_space_reserved_for_target_registers
|
||||
&& (! after_prologue_epilogue_gen || TARGET_SAVE_ALL_TARGET_REGS));
|
||||
}
|
||||
|
||||
static bool
|
||||
sh_ms_bitfield_layout_p (record_type)
|
||||
tree record_type ATTRIBUTE_UNUSED;
|
||||
|
@ -8331,6 +8471,7 @@ sh_output_mi_thunk (file, thunk_fndecl, delta, vcall_offset, function)
|
|||
rtx scratch0, scratch1, scratch2;
|
||||
|
||||
reload_completed = 1;
|
||||
epilogue_completed = 1;
|
||||
no_new_pseudos = 1;
|
||||
current_function_uses_only_leaf_regs = 1;
|
||||
|
||||
|
@ -8480,6 +8621,7 @@ sh_output_mi_thunk (file, thunk_fndecl, delta, vcall_offset, function)
|
|||
}
|
||||
|
||||
reload_completed = 0;
|
||||
epilogue_completed = 0;
|
||||
no_new_pseudos = 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -150,6 +150,7 @@ extern int target_flags;
|
|||
#define PADSTRUCT_BIT (1<<28)
|
||||
#define LITTLE_ENDIAN_BIT (1<<29)
|
||||
#define IEEE_BIT (1<<30)
|
||||
#define SAVE_ALL_TR_BIT (1<<2)
|
||||
|
||||
/* Nonzero if we should dump out instruction size info. */
|
||||
#define TARGET_DUMPISIZE (target_flags & ISIZE_BIT)
|
||||
|
@ -257,6 +258,8 @@ extern int target_flags;
|
|||
/* Nonzero if we should prefer @GOT calls when generating PIC. */
|
||||
#define TARGET_PREFERGOT (target_flags & PREFERGOT_BIT)
|
||||
|
||||
#define TARGET_SAVE_ALL_TARGET_REGS (target_flags & SAVE_ALL_TR_BIT)
|
||||
|
||||
#define SELECT_SH1 (SH1_BIT)
|
||||
#define SELECT_SH2 (SH2_BIT | SELECT_SH1)
|
||||
#define SELECT_SH2E (SH_E_BIT | SH2_BIT | SH1_BIT | FPU_SINGLE_BIT)
|
||||
|
@ -417,6 +420,12 @@ do { \
|
|||
flag_omit_frame_pointer = -1; \
|
||||
if (SIZE) \
|
||||
target_flags |= SPACE_BIT; \
|
||||
if (TARGET_SHMEDIA && LEVEL > 1) \
|
||||
{ \
|
||||
flag_branch_target_load_optimize = 1; \
|
||||
if (! (SIZE)) \
|
||||
target_flags |= SAVE_ALL_TR_BIT; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define ASSEMBLER_DIALECT assembler_dialect
|
||||
|
|
|
@ -8589,6 +8589,7 @@ sparc_output_mi_thunk (file, thunk_fndecl, delta, vcall_offset, function)
|
|||
rtx this, insn, funexp, delta_rtx, tmp;
|
||||
|
||||
reload_completed = 1;
|
||||
epilogue_completed = 1;
|
||||
no_new_pseudos = 1;
|
||||
current_function_uses_only_leaf_regs = 1;
|
||||
|
||||
|
@ -8641,6 +8642,7 @@ sparc_output_mi_thunk (file, thunk_fndecl, delta, vcall_offset, function)
|
|||
final_end_function ();
|
||||
|
||||
reload_completed = 0;
|
||||
epilogue_completed = 0;
|
||||
no_new_pseudos = 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -258,7 +258,8 @@ in the following sections.
|
|||
@xref{Optimize Options,,Options that Control Optimization}.
|
||||
@gccoptlist{-falign-functions=@var{n} -falign-jumps=@var{n} @gol
|
||||
-falign-labels=@var{n} -falign-loops=@var{n} @gol
|
||||
-fbranch-probabilities -fcaller-saves -fcprop-registers @gol
|
||||
-fbranch-probabilities -fbranch-target-load-optimize @gol
|
||||
-fbranch-target-load-optimize2 -fcaller-saves -fcprop-registers @gol
|
||||
-fcse-follow-jumps -fcse-skip-blocks -fdata-sections @gol
|
||||
-fdelayed-branch -fdelete-null-pointer-checks @gol
|
||||
-fexpensive-optimizations -ffast-math -ffloat-store @gol
|
||||
|
@ -4420,6 +4421,19 @@ Perform Sparse Conditional Constant Propagation in SSA form. Requires
|
|||
Perform aggressive dead-code elimination in SSA form. Requires @option{-fssa}.
|
||||
Like @option{-fssa}, this is an experimental feature.
|
||||
|
||||
@item -fbranch-target-load-optimize
|
||||
@opindex fbranch-target-load-optimize
|
||||
Perform branch target register load optimization before prologue / epilogue
|
||||
threading.
|
||||
The use of target registers can typically be exposed only during reload,
|
||||
thus hoisting loads out of loops and doing inter-block scheduling needs
|
||||
a separate optimization pass.
|
||||
|
||||
@item -fbranch-target-load-optimize2
|
||||
@opindex fbranch-target-load-optimize2
|
||||
Perform branch target register load optimization after prologue / epilogue
|
||||
threading.
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -9271,3 +9271,25 @@ cannot_modify_jumps_past_reload_p ()
|
|||
@}
|
||||
@end smallexample
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} enum reg_class TARGET_BRANCH_TARGET_REGISTER_CLASS (void)
|
||||
This target hook returns a register class for which branch target register
|
||||
optimizations should be applied. All registers in this class should be
|
||||
usable interchangably. After reload, registers in this class will be
|
||||
re-allocated and loads will be hoisted out of loops and be subjected
|
||||
to inter-block scheduling.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} bool TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED (bool @var{after_prologue_epilogue_gen})
|
||||
Branch target register optimization will by default exclude callee-saved
|
||||
registers
|
||||
that are not already live during the current function; if this target hook
|
||||
returns true, they will be included. The target code must than make sure
|
||||
that all target registers in the class returned by
|
||||
@samp{TARGET_BRANCH_TARGET_REGISTER_CLASS} that might need saving are
|
||||
saved. @var{after_prologue_epilogue_gen} indicates if prologues and
|
||||
epilogues have already been generated. Note, even if you only return
|
||||
true when @var{after_prologue_epilogue_gen} is false, you still are likely
|
||||
to have to make special provisions in @code{INITIAL_ELIMINATION_OFFSET}
|
||||
to reserve space for caller-saved target registers.
|
||||
@end deftypefn
|
||||
|
|
10
gcc/flags.h
10
gcc/flags.h
|
@ -652,6 +652,16 @@ extern int flag_gcse_lm;
|
|||
|
||||
extern int flag_gcse_sm;
|
||||
|
||||
/* Perform branch target register optimization before prologue / epilogue
|
||||
threading. */
|
||||
|
||||
extern int flag_branch_target_load_optimize;
|
||||
|
||||
/* Perform branch target register optimization after prologue / epilogue
|
||||
threading and jump2. */
|
||||
|
||||
extern int flag_branch_target_load_optimize2;
|
||||
|
||||
|
||||
/* Nonzero means we should do dwarf2 duplicate elimination. */
|
||||
|
||||
|
|
|
@ -985,7 +985,7 @@ mark_regs_live_at_end (set)
|
|||
|
||||
/* If exiting needs the right stack value, consider the stack pointer
|
||||
live at the end of the function. */
|
||||
if ((HAVE_epilogue && reload_completed)
|
||||
if ((HAVE_epilogue && epilogue_completed)
|
||||
|| ! EXIT_IGNORE_STACK
|
||||
|| (! FRAME_POINTER_REQUIRED
|
||||
&& ! current_function_calls_alloca
|
||||
|
@ -1025,7 +1025,7 @@ mark_regs_live_at_end (set)
|
|||
if (global_regs[i] || EPILOGUE_USES (i))
|
||||
SET_REGNO_REG_SET (set, i);
|
||||
|
||||
if (HAVE_epilogue && reload_completed)
|
||||
if (HAVE_epilogue && epilogue_completed)
|
||||
{
|
||||
/* Mark all call-saved registers that we actually used. */
|
||||
for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
|
||||
|
@ -1046,7 +1046,7 @@ mark_regs_live_at_end (set)
|
|||
}
|
||||
#endif
|
||||
#ifdef EH_RETURN_STACKADJ_RTX
|
||||
if ((! HAVE_epilogue || ! reload_completed)
|
||||
if ((! HAVE_epilogue || ! epilogue_completed)
|
||||
&& current_function_calls_eh_return)
|
||||
{
|
||||
rtx tmp = EH_RETURN_STACKADJ_RTX;
|
||||
|
@ -1055,7 +1055,7 @@ mark_regs_live_at_end (set)
|
|||
}
|
||||
#endif
|
||||
#ifdef EH_RETURN_HANDLER_RTX
|
||||
if ((! HAVE_epilogue || ! reload_completed)
|
||||
if ((! HAVE_epilogue || ! epilogue_completed)
|
||||
&& current_function_calls_eh_return)
|
||||
{
|
||||
rtx tmp = EH_RETURN_HANDLER_RTX;
|
||||
|
|
15
gcc/hooks.c
15
gcc/hooks.c
|
@ -41,6 +41,21 @@ hook_bool_void_false ()
|
|||
return false;
|
||||
}
|
||||
|
||||
/* The same, but formally returning an enum reg_class. */
|
||||
enum reg_class
|
||||
hook_reg_class_void_no_regs (void)
|
||||
{
|
||||
return NO_REGS;
|
||||
}
|
||||
|
||||
/* Generic hook that takes (bool) and returns false. */
|
||||
bool
|
||||
hook_bool_bool_false (bool a ATTRIBUTE_UNUSED)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/* Generic hook that takes (tree, int) and does nothing. */
|
||||
void
|
||||
hook_void_tree_int (a, b)
|
||||
|
|
|
@ -23,6 +23,8 @@ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|||
#define GCC_HOOKS_H
|
||||
|
||||
bool hook_bool_void_false PARAMS ((void));
|
||||
enum reg_class hook_reg_class_void_no_regs (void);
|
||||
bool hook_bool_bool_false (bool);
|
||||
bool hook_bool_tree_false PARAMS ((tree));
|
||||
bool hook_bool_tree_hwi_hwi_tree_false
|
||||
PARAMS ((tree, HOST_WIDE_INT, HOST_WIDE_INT, tree));
|
||||
|
|
|
@ -88,6 +88,9 @@ int which_alternative;
|
|||
|
||||
int reload_completed;
|
||||
|
||||
/* Nonzero after thread_prologue_and_epilogue_insns has run. */
|
||||
int epilogue_completed;
|
||||
|
||||
/* Initialize data used by the function `recog'.
|
||||
This must be called once in the compilation of a function
|
||||
before any insn recognition may be done in the function. */
|
||||
|
|
|
@ -1965,6 +1965,9 @@ extern int flow2_completed;
|
|||
|
||||
extern int reload_completed;
|
||||
|
||||
/* Nonzero after thread_prologue_and_epilogue_insns has run. */
|
||||
extern int epilogue_completed;
|
||||
|
||||
/* Set to 1 while reload_as_needed is operating.
|
||||
Required by some machines to handle any generated moves differently. */
|
||||
|
||||
|
@ -2123,6 +2126,7 @@ extern rtx libcall_other_reg PARAMS ((rtx, rtx));
|
|||
#ifdef BUFSIZ
|
||||
extern void loop_optimize PARAMS ((rtx, FILE *, int));
|
||||
#endif
|
||||
extern void branch_target_load_optimize (rtx, bool);
|
||||
extern void record_excess_regs PARAMS ((rtx, rtx, rtx *));
|
||||
|
||||
/* In function.c */
|
||||
|
|
|
@ -269,6 +269,8 @@ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|||
|
||||
/* In hook.c. */
|
||||
#define TARGET_CANNOT_MODIFY_JUMPS_P hook_bool_void_false
|
||||
#define TARGET_BRANCH_TARGET_REGISTER_CLASS hook_reg_class_void_no_regs
|
||||
#define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED hook_bool_bool_false
|
||||
#define TARGET_CANNOT_FORCE_CONST_MEM hook_bool_rtx_false
|
||||
#define TARGET_CANNOT_COPY_INSN_P NULL
|
||||
#define TARGET_DELEGITIMIZE_ADDRESS hook_rtx_rtx_identity
|
||||
|
@ -307,6 +309,8 @@ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|||
TARGET_EXPAND_BUILTIN, \
|
||||
TARGET_SECTION_TYPE_FLAGS, \
|
||||
TARGET_CANNOT_MODIFY_JUMPS_P, \
|
||||
TARGET_BRANCH_TARGET_REGISTER_CLASS, \
|
||||
TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED, \
|
||||
TARGET_CANNOT_FORCE_CONST_MEM, \
|
||||
TARGET_CANNOT_COPY_INSN_P, \
|
||||
TARGET_DELEGITIMIZE_ADDRESS, \
|
||||
|
|
|
@ -291,6 +291,9 @@ struct gcc_target
|
|||
not, at the current point in the compilation. */
|
||||
bool (* cannot_modify_jumps_p) PARAMS ((void));
|
||||
|
||||
enum reg_class (* branch_target_register_class) PARAMS ((void));
|
||||
bool (* branch_target_register_callee_saved) PARAMS ((bool));
|
||||
|
||||
/* True if the constant X cannot be placed in the constant pool. */
|
||||
bool (* cannot_force_const_mem) PARAMS ((rtx));
|
||||
|
||||
|
|
47
gcc/toplev.c
47
gcc/toplev.c
|
@ -287,6 +287,7 @@ enum dump_file_index
|
|||
DFI_rnreg,
|
||||
DFI_bbro,
|
||||
DFI_ce3,
|
||||
DFI_branch_target_load,
|
||||
DFI_sched2,
|
||||
DFI_stack,
|
||||
DFI_mach,
|
||||
|
@ -338,6 +339,7 @@ static struct dump_file_info dump_file[DFI_MAX] =
|
|||
{ "rnreg", 'n', 1, 0, 0 },
|
||||
{ "bbro", 'B', 1, 0, 0 },
|
||||
{ "ce3", 'E', 1, 0, 0 },
|
||||
{ "btl", 'd', 1, 0, 0 }, /* Yes, duplicate enable switch. */
|
||||
{ "sched2", 'R', 1, 0, 0 },
|
||||
{ "stack", 'k', 1, 0, 0 },
|
||||
{ "mach", 'M', 1, 0, 0 },
|
||||
|
@ -689,6 +691,16 @@ int flag_gcse_lm = 1;
|
|||
|
||||
int flag_gcse_sm = 1;
|
||||
|
||||
/* Perform target register optimization before prologue / epilogue
|
||||
threading. */
|
||||
|
||||
int flag_branch_target_load_optimize = 0;
|
||||
|
||||
/* Perform target register optimization after prologue / epilogue
|
||||
threading and jump2. */
|
||||
|
||||
int flag_branch_target_load_optimize2 = 0;
|
||||
|
||||
/* Nonzero means to rerun cse after loop optimization. This increases
|
||||
compilation time about 20% and picks up a few more common expressions. */
|
||||
|
||||
|
@ -1118,6 +1130,10 @@ static const lang_independent_options f_options[] =
|
|||
N_("Perform enhanced load motion during global subexpression elimination") },
|
||||
{"gcse-sm", &flag_gcse_sm, 1,
|
||||
N_("Perform store motion after global subexpression elimination") },
|
||||
{"branch-target-load-optimize", &flag_branch_target_load_optimize, 1,
|
||||
N_("Perform branch target load optimization before prologue / epilogue threading") },
|
||||
{"branch-target-load-optimize2", &flag_branch_target_load_optimize2, 1,
|
||||
N_("Perform branch target load optimization after prologue / epilogue threading") },
|
||||
{"loop-optimize", &flag_loop_optimize, 1,
|
||||
N_("Perform the loop optimizations") },
|
||||
{"crossjumping", &flag_crossjumping, 1,
|
||||
|
@ -3761,6 +3777,17 @@ rest_of_compilation (tree decl)
|
|||
#endif
|
||||
split_all_insns (0);
|
||||
|
||||
if (flag_branch_target_load_optimize)
|
||||
{
|
||||
open_dump_file (DFI_branch_target_load, decl);
|
||||
|
||||
branch_target_load_optimize (insns, false);
|
||||
|
||||
close_dump_file (DFI_branch_target_load, print_rtl_with_bb, insns);
|
||||
|
||||
ggc_collect ();
|
||||
}
|
||||
|
||||
if (optimize)
|
||||
cleanup_cfg (CLEANUP_EXPENSIVE);
|
||||
|
||||
|
@ -3769,6 +3796,7 @@ rest_of_compilation (tree decl)
|
|||
it and the rest of the code and also allows delayed branch
|
||||
scheduling to operate in the epilogue. */
|
||||
thread_prologue_and_epilogue_insns (insns);
|
||||
epilogue_completed = 1;
|
||||
|
||||
if (optimize)
|
||||
{
|
||||
|
@ -3825,6 +3853,24 @@ rest_of_compilation (tree decl)
|
|||
timevar_pop (TV_IFCVT2);
|
||||
}
|
||||
|
||||
if (flag_branch_target_load_optimize2)
|
||||
{
|
||||
/* Leave this a warning for now so that it is possible to experiment
|
||||
with running this pass twice. In 3.6, we should either make this
|
||||
an error, or use separate dump files. */
|
||||
if (flag_branch_target_load_optimize)
|
||||
warning ("branch target register load optimization is not intended "
|
||||
"to be run twice");
|
||||
|
||||
open_dump_file (DFI_branch_target_load, decl);
|
||||
|
||||
branch_target_load_optimize (insns, true);
|
||||
|
||||
close_dump_file (DFI_branch_target_load, print_rtl_with_bb, insns);
|
||||
|
||||
ggc_collect ();
|
||||
}
|
||||
|
||||
#ifdef INSN_SCHEDULING
|
||||
if (optimize > 0 && flag_schedule_insns_after_reload)
|
||||
rest_of_handle_sched2 (decl, insns);
|
||||
|
@ -3904,6 +3950,7 @@ rest_of_compilation (tree decl)
|
|||
#endif
|
||||
|
||||
reload_completed = 0;
|
||||
epilogue_completed = 0;
|
||||
flow2_completed = 0;
|
||||
no_new_pseudos = 0;
|
||||
|
||||
|
|
Loading…
Reference in New Issue