bfin.c (n_regs_to_save): New static variable.
* config/bfin/bfin.c (n_regs_to_save): New static variable. (push_multiple_operation, pop_multiple_operation): Set it. (workaround_rts_anomaly): New function. (workaround_speculation): New function, broken out of bfin_reorg. (bfin_reorg): Call the new functions. From-SVN: r140146
This commit is contained in:
parent
8b17cc05d3
commit
22fb24d53a
@ -1,3 +1,11 @@
|
||||
2008-09-09 Bernd Schmidt <bernd.schmidt@analog.com>
|
||||
|
||||
* config/bfin/bfin.c (n_regs_to_save): New static variable.
|
||||
(push_multiple_operation, pop_multiple_operation): Set it.
|
||||
(workaround_rts_anomaly): New function.
|
||||
(workaround_speculation): New function, broken out of bfin_reorg.
|
||||
(bfin_reorg): Call the new functions.
|
||||
|
||||
2008-09-09 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
PR middle-end/37354
|
||||
|
@ -3100,6 +3100,7 @@ bfin_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed)
|
||||
/* Used for communication between {push,pop}_multiple_operation (which
|
||||
we use not only as a predicate) and the corresponding output functions. */
|
||||
static int first_preg_to_save, first_dreg_to_save;
|
||||
static int n_regs_to_save;
|
||||
|
||||
int
|
||||
push_multiple_operation (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
|
||||
@ -3168,6 +3169,7 @@ push_multiple_operation (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
|
||||
lastpreg++;
|
||||
}
|
||||
}
|
||||
n_regs_to_save = 8 - first_dreg_to_save + 6 - first_preg_to_save;
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -3227,6 +3229,7 @@ pop_multiple_operation (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
|
||||
}
|
||||
first_dreg_to_save = lastdreg;
|
||||
first_preg_to_save = lastpreg;
|
||||
n_regs_to_save = 8 - first_dreg_to_save + 6 - first_preg_to_save;
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -4631,6 +4634,85 @@ reorder_var_tracking_notes (void)
|
||||
}
|
||||
}
|
||||
|
||||
/* On some silicon revisions, functions shorter than a certain number of cycles
|
||||
can cause unpredictable behaviour. Work around this by adding NOPs as
|
||||
needed. */
|
||||
static void
|
||||
workaround_rts_anomaly (void)
|
||||
{
|
||||
rtx insn, first_insn = NULL_RTX;
|
||||
int cycles = 4;
|
||||
|
||||
if (! ENABLE_WA_RETS)
|
||||
return;
|
||||
|
||||
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
|
||||
{
|
||||
rtx pat;
|
||||
|
||||
if (BARRIER_P (insn))
|
||||
return;
|
||||
|
||||
if (NOTE_P (insn) || LABEL_P (insn))
|
||||
continue;
|
||||
|
||||
if (first_insn == NULL_RTX)
|
||||
first_insn = insn;
|
||||
pat = PATTERN (insn);
|
||||
if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
|
||||
|| GET_CODE (pat) == ASM_INPUT || GET_CODE (pat) == ADDR_VEC
|
||||
|| GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0)
|
||||
continue;
|
||||
|
||||
if (CALL_P (insn))
|
||||
return;
|
||||
|
||||
if (JUMP_P (insn))
|
||||
{
|
||||
if (recog_memoized (insn) == CODE_FOR_return_internal)
|
||||
break;
|
||||
|
||||
/* Nothing to worry about for direct jumps. */
|
||||
if (!any_condjump_p (insn))
|
||||
return;
|
||||
if (cycles <= 1)
|
||||
return;
|
||||
cycles--;
|
||||
}
|
||||
else if (INSN_P (insn))
|
||||
{
|
||||
rtx pat = PATTERN (insn);
|
||||
int this_cycles = 1;
|
||||
|
||||
if (GET_CODE (pat) == PARALLEL)
|
||||
{
|
||||
if (push_multiple_operation (pat, VOIDmode)
|
||||
|| pop_multiple_operation (pat, VOIDmode))
|
||||
this_cycles = n_regs_to_save;
|
||||
}
|
||||
else
|
||||
{
|
||||
enum insn_code icode = recog_memoized (insn);
|
||||
if (icode == CODE_FOR_link)
|
||||
this_cycles = 4;
|
||||
else if (icode == CODE_FOR_unlink)
|
||||
this_cycles = 3;
|
||||
else if (icode == CODE_FOR_mulsi3)
|
||||
this_cycles = 5;
|
||||
}
|
||||
if (this_cycles >= cycles)
|
||||
return;
|
||||
|
||||
cycles -= this_cycles;
|
||||
}
|
||||
}
|
||||
while (cycles > 0)
|
||||
{
|
||||
emit_insn_before (gen_nop (), first_insn);
|
||||
cycles--;
|
||||
}
|
||||
}
|
||||
|
||||
/* Return an insn type for INSN that can be used by the caller for anomaly
|
||||
workarounds. This differs from plain get_attr_type in that it handles
|
||||
SEQUENCEs. */
|
||||
@ -4711,58 +4793,13 @@ find_load (rtx insn)
|
||||
return NULL_RTX;
|
||||
}
|
||||
|
||||
/* We use the machine specific reorg pass for emitting CSYNC instructions
|
||||
after conditional branches as needed.
|
||||
|
||||
The Blackfin is unusual in that a code sequence like
|
||||
if cc jump label
|
||||
r0 = (p0)
|
||||
may speculatively perform the load even if the condition isn't true. This
|
||||
happens for a branch that is predicted not taken, because the pipeline
|
||||
isn't flushed or stalled, so the early stages of the following instructions,
|
||||
which perform the memory reference, are allowed to execute before the
|
||||
jump condition is evaluated.
|
||||
Therefore, we must insert additional instructions in all places where this
|
||||
could lead to incorrect behavior. The manual recommends CSYNC, while
|
||||
VDSP seems to use NOPs (even though its corresponding compiler option is
|
||||
named CSYNC).
|
||||
|
||||
When optimizing for speed, we emit NOPs, which seems faster than a CSYNC.
|
||||
When optimizing for size, we turn the branch into a predicted taken one.
|
||||
This may be slower due to mispredicts, but saves code size. */
|
||||
|
||||
static void
|
||||
bfin_reorg (void)
|
||||
workaround_speculation (void)
|
||||
{
|
||||
rtx insn, next;
|
||||
rtx last_condjump = NULL_RTX;
|
||||
int cycles_since_jump = INT_MAX;
|
||||
|
||||
/* We are freeing block_for_insn in the toplev to keep compatibility
|
||||
with old MDEP_REORGS that are not CFG based. Recompute it now. */
|
||||
compute_bb_for_insn ();
|
||||
|
||||
if (bfin_flag_schedule_insns2)
|
||||
{
|
||||
splitting_for_sched = 1;
|
||||
split_all_insns ();
|
||||
splitting_for_sched = 0;
|
||||
|
||||
timevar_push (TV_SCHED2);
|
||||
schedule_insns ();
|
||||
timevar_pop (TV_SCHED2);
|
||||
|
||||
/* Examine the schedule and insert nops as necessary for 64-bit parallel
|
||||
instructions. */
|
||||
bfin_gen_bundles ();
|
||||
}
|
||||
|
||||
df_analyze ();
|
||||
|
||||
/* Doloop optimization */
|
||||
if (cfun->machine->has_hardware_loops)
|
||||
bfin_reorg_loops (dump_file);
|
||||
|
||||
if (! ENABLE_WA_SPECULATIVE_LOADS && ! ENABLE_WA_SPECULATIVE_SYNCS)
|
||||
return;
|
||||
|
||||
@ -4841,11 +4878,9 @@ bfin_reorg (void)
|
||||
if (! ENABLE_WA_SPECULATIVE_SYNCS)
|
||||
return;
|
||||
|
||||
if (! ENABLE_WA_RETS)
|
||||
return;
|
||||
|
||||
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
|
||||
{
|
||||
int cycles_since_jump;
|
||||
if (JUMP_P (insn)
|
||||
&& any_condjump_p (insn)
|
||||
&& (INSN_CODE (insn) == CODE_FOR_cbranch_predicted_taken
|
||||
@ -4907,6 +4942,57 @@ bfin_reorg (void)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* We use the machine specific reorg pass for emitting CSYNC instructions
|
||||
after conditional branches as needed.
|
||||
|
||||
The Blackfin is unusual in that a code sequence like
|
||||
if cc jump label
|
||||
r0 = (p0)
|
||||
may speculatively perform the load even if the condition isn't true. This
|
||||
happens for a branch that is predicted not taken, because the pipeline
|
||||
isn't flushed or stalled, so the early stages of the following instructions,
|
||||
which perform the memory reference, are allowed to execute before the
|
||||
jump condition is evaluated.
|
||||
Therefore, we must insert additional instructions in all places where this
|
||||
could lead to incorrect behavior. The manual recommends CSYNC, while
|
||||
VDSP seems to use NOPs (even though its corresponding compiler option is
|
||||
named CSYNC).
|
||||
|
||||
When optimizing for speed, we emit NOPs, which seems faster than a CSYNC.
|
||||
When optimizing for size, we turn the branch into a predicted taken one.
|
||||
This may be slower due to mispredicts, but saves code size. */
|
||||
|
||||
static void
|
||||
bfin_reorg (void)
|
||||
{
|
||||
/* We are freeing block_for_insn in the toplev to keep compatibility
|
||||
with old MDEP_REORGS that are not CFG based. Recompute it now. */
|
||||
compute_bb_for_insn ();
|
||||
|
||||
if (bfin_flag_schedule_insns2)
|
||||
{
|
||||
splitting_for_sched = 1;
|
||||
split_all_insns ();
|
||||
splitting_for_sched = 0;
|
||||
|
||||
timevar_push (TV_SCHED2);
|
||||
schedule_insns ();
|
||||
timevar_pop (TV_SCHED2);
|
||||
|
||||
/* Examine the schedule and insert nops as necessary for 64-bit parallel
|
||||
instructions. */
|
||||
bfin_gen_bundles ();
|
||||
}
|
||||
|
||||
df_analyze ();
|
||||
|
||||
/* Doloop optimization */
|
||||
if (cfun->machine->has_hardware_loops)
|
||||
bfin_reorg_loops (dump_file);
|
||||
|
||||
workaround_speculation ();
|
||||
|
||||
if (bfin_flag_var_tracking)
|
||||
{
|
||||
@ -4915,7 +5001,10 @@ bfin_reorg (void)
|
||||
reorder_var_tracking_notes ();
|
||||
timevar_pop (TV_VAR_TRACKING);
|
||||
}
|
||||
|
||||
df_finish_pass (false);
|
||||
|
||||
workaround_rts_anomaly ();
|
||||
}
|
||||
|
||||
/* Handle interrupt_handler, exception_handler and nmi_handler function
|
||||
|
Loading…
Reference in New Issue
Block a user