bfin.c (n_regs_to_save): New static variable.

* config/bfin/bfin.c (n_regs_to_save): New static variable.
	(push_multiple_operation, pop_multiple_operation): Set it.
	(workaround_rts_anomaly): New function.
	(workaround_speculation): New function, broken out of bfin_reorg.
	(bfin_reorg): Call the new functions.

From-SVN: r140146
This commit is contained in:
Bernd Schmidt 2008-09-09 12:09:37 +00:00 committed by Bernd Schmidt
parent 8b17cc05d3
commit 22fb24d53a
2 changed files with 146 additions and 49 deletions

View File

@ -1,3 +1,11 @@
2008-09-09 Bernd Schmidt <bernd.schmidt@analog.com>
* config/bfin/bfin.c (n_regs_to_save): New static variable.
(push_multiple_operation, pop_multiple_operation): Set it.
(workaround_rts_anomaly): New function.
(workaround_speculation): New function, broken out of bfin_reorg.
(bfin_reorg): Call the new functions.
2008-09-09 Richard Guenther <rguenther@suse.de>
PR middle-end/37354

View File

@ -3100,6 +3100,7 @@ bfin_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed)
/* Used for communication between {push,pop}_multiple_operation (which
we use not only as a predicate) and the corresponding output functions. */
static int first_preg_to_save, first_dreg_to_save;
static int n_regs_to_save;
int
push_multiple_operation (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
@ -3168,6 +3169,7 @@ push_multiple_operation (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
lastpreg++;
}
}
n_regs_to_save = 8 - first_dreg_to_save + 6 - first_preg_to_save;
return 1;
}
@ -3227,6 +3229,7 @@ pop_multiple_operation (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
}
first_dreg_to_save = lastdreg;
first_preg_to_save = lastpreg;
n_regs_to_save = 8 - first_dreg_to_save + 6 - first_preg_to_save;
return 1;
}
@ -4631,6 +4634,85 @@ reorder_var_tracking_notes (void)
}
}
/* On some silicon revisions, functions shorter than a certain number of cycles
can cause unpredictable behaviour. Work around this by adding NOPs as
needed. */
static void
workaround_rts_anomaly (void)
{
rtx insn, first_insn = NULL_RTX;
int cycles = 4;
if (! ENABLE_WA_RETS)
return;
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
{
rtx pat;
if (BARRIER_P (insn))
return;
if (NOTE_P (insn) || LABEL_P (insn))
continue;
if (first_insn == NULL_RTX)
first_insn = insn;
pat = PATTERN (insn);
if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
|| GET_CODE (pat) == ASM_INPUT || GET_CODE (pat) == ADDR_VEC
|| GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0)
continue;
if (CALL_P (insn))
return;
if (JUMP_P (insn))
{
if (recog_memoized (insn) == CODE_FOR_return_internal)
break;
/* Nothing to worry about for direct jumps. */
if (!any_condjump_p (insn))
return;
if (cycles <= 1)
return;
cycles--;
}
else if (INSN_P (insn))
{
rtx pat = PATTERN (insn);
int this_cycles = 1;
if (GET_CODE (pat) == PARALLEL)
{
if (push_multiple_operation (pat, VOIDmode)
|| pop_multiple_operation (pat, VOIDmode))
this_cycles = n_regs_to_save;
}
else
{
enum insn_code icode = recog_memoized (insn);
if (icode == CODE_FOR_link)
this_cycles = 4;
else if (icode == CODE_FOR_unlink)
this_cycles = 3;
else if (icode == CODE_FOR_mulsi3)
this_cycles = 5;
}
if (this_cycles >= cycles)
return;
cycles -= this_cycles;
}
}
while (cycles > 0)
{
emit_insn_before (gen_nop (), first_insn);
cycles--;
}
}
/* Return an insn type for INSN that can be used by the caller for anomaly
workarounds. This differs from plain get_attr_type in that it handles
SEQUENCEs. */
@ -4711,58 +4793,13 @@ find_load (rtx insn)
return NULL_RTX;
}
/* We use the machine specific reorg pass for emitting CSYNC instructions
after conditional branches as needed.
The Blackfin is unusual in that a code sequence like
if cc jump label
r0 = (p0)
may speculatively perform the load even if the condition isn't true. This
happens for a branch that is predicted not taken, because the pipeline
isn't flushed or stalled, so the early stages of the following instructions,
which perform the memory reference, are allowed to execute before the
jump condition is evaluated.
Therefore, we must insert additional instructions in all places where this
could lead to incorrect behavior. The manual recommends CSYNC, while
VDSP seems to use NOPs (even though its corresponding compiler option is
named CSYNC).
When optimizing for speed, we emit NOPs, which seems faster than a CSYNC.
When optimizing for size, we turn the branch into a predicted taken one.
This may be slower due to mispredicts, but saves code size. */
static void
bfin_reorg (void)
workaround_speculation (void)
{
rtx insn, next;
rtx last_condjump = NULL_RTX;
int cycles_since_jump = INT_MAX;
/* We are freeing block_for_insn in the toplev to keep compatibility
with old MDEP_REORGS that are not CFG based. Recompute it now. */
compute_bb_for_insn ();
if (bfin_flag_schedule_insns2)
{
splitting_for_sched = 1;
split_all_insns ();
splitting_for_sched = 0;
timevar_push (TV_SCHED2);
schedule_insns ();
timevar_pop (TV_SCHED2);
/* Examine the schedule and insert nops as necessary for 64-bit parallel
instructions. */
bfin_gen_bundles ();
}
df_analyze ();
/* Doloop optimization */
if (cfun->machine->has_hardware_loops)
bfin_reorg_loops (dump_file);
if (! ENABLE_WA_SPECULATIVE_LOADS && ! ENABLE_WA_SPECULATIVE_SYNCS)
return;
@ -4841,11 +4878,9 @@ bfin_reorg (void)
if (! ENABLE_WA_SPECULATIVE_SYNCS)
return;
if (! ENABLE_WA_RETS)
return;
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
{
int cycles_since_jump;
if (JUMP_P (insn)
&& any_condjump_p (insn)
&& (INSN_CODE (insn) == CODE_FOR_cbranch_predicted_taken
@ -4907,6 +4942,57 @@ bfin_reorg (void)
}
}
}
}
/* We use the machine specific reorg pass for emitting CSYNC instructions
after conditional branches as needed.
The Blackfin is unusual in that a code sequence like
if cc jump label
r0 = (p0)
may speculatively perform the load even if the condition isn't true. This
happens for a branch that is predicted not taken, because the pipeline
isn't flushed or stalled, so the early stages of the following instructions,
which perform the memory reference, are allowed to execute before the
jump condition is evaluated.
Therefore, we must insert additional instructions in all places where this
could lead to incorrect behavior. The manual recommends CSYNC, while
VDSP seems to use NOPs (even though its corresponding compiler option is
named CSYNC).
When optimizing for speed, we emit NOPs, which seems faster than a CSYNC.
When optimizing for size, we turn the branch into a predicted taken one.
This may be slower due to mispredicts, but saves code size. */
static void
bfin_reorg (void)
{
/* We are freeing block_for_insn in the toplev to keep compatibility
with old MDEP_REORGS that are not CFG based. Recompute it now. */
compute_bb_for_insn ();
if (bfin_flag_schedule_insns2)
{
splitting_for_sched = 1;
split_all_insns ();
splitting_for_sched = 0;
timevar_push (TV_SCHED2);
schedule_insns ();
timevar_pop (TV_SCHED2);
/* Examine the schedule and insert nops as necessary for 64-bit parallel
instructions. */
bfin_gen_bundles ();
}
df_analyze ();
/* Doloop optimization */
if (cfun->machine->has_hardware_loops)
bfin_reorg_loops (dump_file);
workaround_speculation ();
if (bfin_flag_var_tracking)
{
@ -4915,7 +5001,10 @@ bfin_reorg (void)
reorder_var_tracking_notes ();
timevar_pop (TV_VAR_TRACKING);
}
df_finish_pass (false);
workaround_rts_anomaly ();
}
/* Handle interrupt_handler, exception_handler and nmi_handler function