diff --git a/gcc/ChangeLog b/gcc/ChangeLog index bccdea9ec24..86087a374cb 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2008-09-09 Bernd Schmidt + + * config/bfin/bfin.c (n_regs_to_save): New static variable. + (push_multiple_operation, pop_multiple_operation): Set it. + (workaround_rts_anomaly): New function. + (workaround_speculation): New function, broken out of bfin_reorg. + (bfin_reorg): Call the new functions. + 2008-09-09 Richard Guenther PR middle-end/37354 diff --git a/gcc/config/bfin/bfin.c b/gcc/config/bfin/bfin.c index 8e0f355d176..7ff1379f2c9 100644 --- a/gcc/config/bfin/bfin.c +++ b/gcc/config/bfin/bfin.c @@ -3100,6 +3100,7 @@ bfin_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed) /* Used for communication between {push,pop}_multiple_operation (which we use not only as a predicate) and the corresponding output functions. */ static int first_preg_to_save, first_dreg_to_save; +static int n_regs_to_save; int push_multiple_operation (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) @@ -3168,6 +3169,7 @@ push_multiple_operation (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) lastpreg++; } } + n_regs_to_save = 8 - first_dreg_to_save + 6 - first_preg_to_save; return 1; } @@ -3227,6 +3229,7 @@ pop_multiple_operation (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) } first_dreg_to_save = lastdreg; first_preg_to_save = lastpreg; + n_regs_to_save = 8 - first_dreg_to_save + 6 - first_preg_to_save; return 1; } @@ -4631,6 +4634,85 @@ reorder_var_tracking_notes (void) } } +/* On some silicon revisions, functions shorter than a certain number of cycles + can cause unpredictable behaviour. Work around this by adding NOPs as + needed. */ +static void +workaround_rts_anomaly (void) +{ + rtx insn, first_insn = NULL_RTX; + int cycles = 4; + + if (! ENABLE_WA_RETS) + return; + + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + rtx pat; + + if (BARRIER_P (insn)) + return; + + if (NOTE_P (insn) || LABEL_P (insn)) + continue; + + if (first_insn == NULL_RTX) + first_insn = insn; + pat = PATTERN (insn); + if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER + || GET_CODE (pat) == ASM_INPUT || GET_CODE (pat) == ADDR_VEC + || GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0) + continue; + + if (CALL_P (insn)) + return; + + if (JUMP_P (insn)) + { + if (recog_memoized (insn) == CODE_FOR_return_internal) + break; + + /* Nothing to worry about for direct jumps. */ + if (!any_condjump_p (insn)) + return; + if (cycles <= 1) + return; + cycles--; + } + else if (INSN_P (insn)) + { + rtx pat = PATTERN (insn); + int this_cycles = 1; + + if (GET_CODE (pat) == PARALLEL) + { + if (push_multiple_operation (pat, VOIDmode) + || pop_multiple_operation (pat, VOIDmode)) + this_cycles = n_regs_to_save; + } + else + { + enum insn_code icode = recog_memoized (insn); + if (icode == CODE_FOR_link) + this_cycles = 4; + else if (icode == CODE_FOR_unlink) + this_cycles = 3; + else if (icode == CODE_FOR_mulsi3) + this_cycles = 5; + } + if (this_cycles >= cycles) + return; + + cycles -= this_cycles; + } + } + while (cycles > 0) + { + emit_insn_before (gen_nop (), first_insn); + cycles--; + } +} + /* Return an insn type for INSN that can be used by the caller for anomaly workarounds. This differs from plain get_attr_type in that it handles SEQUENCEs. */ @@ -4711,58 +4793,13 @@ find_load (rtx insn) return NULL_RTX; } -/* We use the machine specific reorg pass for emitting CSYNC instructions - after conditional branches as needed. - - The Blackfin is unusual in that a code sequence like - if cc jump label - r0 = (p0) - may speculatively perform the load even if the condition isn't true. This - happens for a branch that is predicted not taken, because the pipeline - isn't flushed or stalled, so the early stages of the following instructions, - which perform the memory reference, are allowed to execute before the - jump condition is evaluated. - Therefore, we must insert additional instructions in all places where this - could lead to incorrect behavior. The manual recommends CSYNC, while - VDSP seems to use NOPs (even though its corresponding compiler option is - named CSYNC). - - When optimizing for speed, we emit NOPs, which seems faster than a CSYNC. - When optimizing for size, we turn the branch into a predicted taken one. - This may be slower due to mispredicts, but saves code size. */ - static void -bfin_reorg (void) +workaround_speculation (void) { rtx insn, next; rtx last_condjump = NULL_RTX; int cycles_since_jump = INT_MAX; - /* We are freeing block_for_insn in the toplev to keep compatibility - with old MDEP_REORGS that are not CFG based. Recompute it now. */ - compute_bb_for_insn (); - - if (bfin_flag_schedule_insns2) - { - splitting_for_sched = 1; - split_all_insns (); - splitting_for_sched = 0; - - timevar_push (TV_SCHED2); - schedule_insns (); - timevar_pop (TV_SCHED2); - - /* Examine the schedule and insert nops as necessary for 64-bit parallel - instructions. */ - bfin_gen_bundles (); - } - - df_analyze (); - - /* Doloop optimization */ - if (cfun->machine->has_hardware_loops) - bfin_reorg_loops (dump_file); - if (! ENABLE_WA_SPECULATIVE_LOADS && ! ENABLE_WA_SPECULATIVE_SYNCS) return; @@ -4841,11 +4878,9 @@ bfin_reorg (void) if (! ENABLE_WA_SPECULATIVE_SYNCS) return; - if (! ENABLE_WA_RETS) - return; - for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) { + int cycles_since_jump; if (JUMP_P (insn) && any_condjump_p (insn) && (INSN_CODE (insn) == CODE_FOR_cbranch_predicted_taken @@ -4907,6 +4942,57 @@ bfin_reorg (void) } } } +} + +/* We use the machine specific reorg pass for emitting CSYNC instructions + after conditional branches as needed. + + The Blackfin is unusual in that a code sequence like + if cc jump label + r0 = (p0) + may speculatively perform the load even if the condition isn't true. This + happens for a branch that is predicted not taken, because the pipeline + isn't flushed or stalled, so the early stages of the following instructions, + which perform the memory reference, are allowed to execute before the + jump condition is evaluated. + Therefore, we must insert additional instructions in all places where this + could lead to incorrect behavior. The manual recommends CSYNC, while + VDSP seems to use NOPs (even though its corresponding compiler option is + named CSYNC). + + When optimizing for speed, we emit NOPs, which seems faster than a CSYNC. + When optimizing for size, we turn the branch into a predicted taken one. + This may be slower due to mispredicts, but saves code size. */ + +static void +bfin_reorg (void) +{ + /* We are freeing block_for_insn in the toplev to keep compatibility + with old MDEP_REORGS that are not CFG based. Recompute it now. */ + compute_bb_for_insn (); + + if (bfin_flag_schedule_insns2) + { + splitting_for_sched = 1; + split_all_insns (); + splitting_for_sched = 0; + + timevar_push (TV_SCHED2); + schedule_insns (); + timevar_pop (TV_SCHED2); + + /* Examine the schedule and insert nops as necessary for 64-bit parallel + instructions. */ + bfin_gen_bundles (); + } + + df_analyze (); + + /* Doloop optimization */ + if (cfun->machine->has_hardware_loops) + bfin_reorg_loops (dump_file); + + workaround_speculation (); if (bfin_flag_var_tracking) { @@ -4915,7 +5001,10 @@ bfin_reorg (void) reorder_var_tracking_notes (); timevar_pop (TV_VAR_TRACKING); } + df_finish_pass (false); + + workaround_rts_anomaly (); } /* Handle interrupt_handler, exception_handler and nmi_handler function