diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a5e907c6305..24428811ca2 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,17 @@ +2005-05-10 Richard Henderson + + * config/ia64/ia64.c (ia64_expand_atomic_op): New. + * config/ia64/ia64-protos.h: Declare it. + * config/ia64/sync.md (I124MODE, FETCHOP, fetchop_name): New. + (sync_add, sync_old_add): Remove. + (sync_, sync_nand): New. + (sync_old_, sync_old_nand): New. + (sync_new_, sync_new_nand): New. + (cmpxchg_rel_): Split from cmpxchg_acq_. Zero + extend result; use release semantics. + (cmpxchg_rel_di): Rename from cmpxchg_acq_; use release. + (sync_val_compare_and_swap_): Update to match. + 2005-05-10 Richard Henderson * optabs.c (expand_compare_and_swap_loop): Don't clobber old value diff --git a/gcc/config/ia64/ia64-protos.h b/gcc/config/ia64/ia64-protos.h index afe8e597905..1eb45883860 100644 --- a/gcc/config/ia64/ia64-protos.h +++ b/gcc/config/ia64/ia64-protos.h @@ -51,6 +51,7 @@ extern bool ia64_expand_vecint_minmax (enum rtx_code, enum machine_mode, rtx[]); extern void ia64_expand_call (rtx, rtx, rtx, int); extern void ia64_split_call (rtx, rtx, rtx, rtx, rtx, int, int); extern void ia64_reload_gp (void); +extern void ia64_expand_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx); extern HOST_WIDE_INT ia64_initial_elimination_offset (int, int); extern void ia64_expand_prologue (void); diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c index 35efbb8af59..d75a74f0f1e 100644 --- a/gcc/config/ia64/ia64.c +++ b/gcc/config/ia64/ia64.c @@ -1662,6 +1662,111 @@ ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r, if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p) ia64_reload_gp (); } + +/* Expand an atomic operation. We want to perform MEM = VAL atomically. + + This differs from the generic code in that we know about the zero-extending + properties of cmpxchg, and the zero-extending requirements of ar.ccv. We + also know that ld.acq+cmpxchg.rel equals a full barrier. + + The loop we want to generate looks like + + cmp_reg = mem; + label: + old_reg = cmp_reg; + new_reg = cmp_reg op val; + cmp_reg = compare-and-swap(mem, old_reg, new_reg) + if (cmp_reg != old_reg) + goto label; + + Note that we only do the plain load from memory once. Subsequent + iterations use the value loaded by the compare-and-swap pattern. */ + +void +ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val, + rtx old_dst, rtx new_dst) +{ + enum machine_mode mode = GET_MODE (mem); + rtx old_reg, new_reg, cmp_reg, ar_ccv, label; + enum insn_code icode; + + /* Special case for using fetchadd. */ + if ((mode == SImode || mode == DImode) && fetchadd_operand (val, mode)) + { + if (!old_dst) + old_dst = gen_reg_rtx (mode); + + emit_insn (gen_memory_barrier ()); + + if (mode == SImode) + icode = CODE_FOR_fetchadd_acq_si; + else + icode = CODE_FOR_fetchadd_acq_di; + emit_insn (GEN_FCN (icode) (old_dst, mem, val)); + + if (new_dst) + { + new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst, + true, OPTAB_WIDEN); + if (new_reg != new_dst) + emit_move_insn (new_dst, new_reg); + } + return; + } + + /* Because of the volatile mem read, we get an ld.acq, which is the + front half of the full barrier. The end half is the cmpxchg.rel. */ + gcc_assert (MEM_VOLATILE_P (mem)); + + old_reg = gen_reg_rtx (DImode); + cmp_reg = gen_reg_rtx (DImode); + label = gen_label_rtx (); + + if (mode != DImode) + { + val = simplify_gen_subreg (DImode, val, mode, 0); + emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1)); + } + else + emit_move_insn (cmp_reg, mem); + + emit_label (label); + + ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM); + emit_move_insn (old_reg, cmp_reg); + emit_move_insn (ar_ccv, cmp_reg); + + if (old_dst) + emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg)); + + new_reg = cmp_reg; + if (code == NOT) + { + new_reg = expand_simple_unop (DImode, NOT, new_reg, NULL_RTX, true); + code = AND; + } + new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX, + true, OPTAB_DIRECT); + + if (mode != DImode) + new_reg = gen_lowpart (mode, new_reg); + if (new_dst) + emit_move_insn (new_dst, new_reg); + + switch (mode) + { + case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break; + case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break; + case SImode: icode = CODE_FOR_cmpxchg_rel_si; break; + case DImode: icode = CODE_FOR_cmpxchg_rel_di; break; + default: + gcc_unreachable (); + } + + emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg)); + + emit_cmp_and_jump_insns (cmp_reg, old_reg, EQ, NULL, DImode, true, label); +} /* Begin the assembly file. */ diff --git a/gcc/config/ia64/sync.md b/gcc/config/ia64/sync.md index 7e9c98a997b..a5fa1ff304c 100644 --- a/gcc/config/ia64/sync.md +++ b/gcc/config/ia64/sync.md @@ -20,9 +20,13 @@ ;; Boston, MA 02111-1307, USA. (define_mode_macro IMODE [QI HI SI DI]) +(define_mode_macro I124MODE [QI HI SI]) (define_mode_macro I48MODE [SI DI]) (define_mode_attr modesuffix [(QI "1") (HI "2") (SI "4") (DI "8")]) +(define_code_macro FETCHOP [plus minus ior xor and]) +(define_code_attr fetchop_name + [(plus "add") (minus "sub") (ior "ior") (xor "xor") (and "and")]) (define_insn "memory_barrier" [(set (mem:BLK (match_scratch:DI 0 "X")) @@ -31,34 +35,6 @@ "mf" [(set_attr "itanium_class" "syst_m")]) -(define_expand "sync_add" - [(match_operand:I48MODE 0 "memory_operand" "") - (match_operand:I48MODE 1 "general_operand" "")] - "" -{ - rtx tmp; - if (!fetchadd_operand (operands[1], mode)) - FAIL; - - tmp = gen_reg_rtx (mode); - emit_insn (gen_memory_barrier ()); - emit_insn (gen_fetchadd_acq_ (tmp, operands[0], operands[1])); - DONE; -}) - -(define_expand "sync_old_add" - [(match_operand:I48MODE 0 "gr_register_operand" "") - (match_operand:I48MODE 1 "memory_operand" "") - (match_operand:I48MODE 2 "general_operand" "")] - "" -{ - if (!fetchadd_operand (operands[2], mode)) - FAIL; - emit_insn (gen_memory_barrier ()); - emit_insn (gen_fetchadd_acq_ (operands[0], operands[1], operands[2])); - DONE; -}) - (define_insn "fetchadd_acq_" [(set (match_operand:I48MODE 0 "gr_register_operand" "=r") (match_operand:I48MODE 1 "not_postinc_memory_operand" "+S")) @@ -70,6 +46,70 @@ "fetchadd.acq %0 = %1, %2" [(set_attr "itanium_class" "sem")]) +(define_expand "sync_" + [(set (match_operand:IMODE 0 "memory_operand" "") + (FETCHOP:IMODE (match_dup 0) + (match_operand:IMODE 1 "general_operand" "")))] + "" +{ + ia64_expand_atomic_op (, operands[0], operands[1], NULL, NULL); + DONE; +}) + +(define_expand "sync_nand" + [(set (match_operand:IMODE 0 "memory_operand" "") + (and:IMODE (not:IMODE (match_dup 0)) + (match_operand:IMODE 1 "general_operand" "")))] + "" +{ + ia64_expand_atomic_op (NOT, operands[0], operands[1], NULL, NULL); + DONE; +}) + +(define_expand "sync_old_" + [(set (match_operand:IMODE 0 "gr_register_operand" "") + (FETCHOP:IMODE + (match_operand:IMODE 1 "memory_operand" "") + (match_operand:IMODE 2 "general_operand" "")))] + "" +{ + ia64_expand_atomic_op (, operands[1], operands[2], operands[0], NULL); + DONE; +}) + +(define_expand "sync_old_nand" + [(set (match_operand:IMODE 0 "gr_register_operand" "") + (and:IMODE + (not:IMODE (match_operand:IMODE 1 "memory_operand" "")) + (match_operand:IMODE 2 "general_operand" "")))] + "" +{ + ia64_expand_atomic_op (NOT, operands[1], operands[2], operands[0], NULL); + DONE; +}) + +(define_expand "sync_new_" + [(set (match_operand:IMODE 0 "gr_register_operand" "") + (FETCHOP:IMODE + (match_operand:IMODE 1 "memory_operand" "") + (match_operand:IMODE 2 "general_operand" "")))] + "" +{ + ia64_expand_atomic_op (, operands[1], operands[2], NULL, operands[0]); + DONE; +}) + +(define_expand "sync_new_nand" + [(set (match_operand:IMODE 0 "gr_register_operand" "") + (and:IMODE + (not:IMODE (match_operand:IMODE 1 "memory_operand" "")) + (match_operand:IMODE 2 "general_operand" "")))] + "" +{ + ia64_expand_atomic_op (NOT, operands[1], operands[2], NULL, operands[0]); + DONE; +}) + (define_expand "sync_compare_and_swap" [(match_operand:IMODE 0 "gr_register_operand" "") (match_operand:IMODE 1 "memory_operand" "") @@ -78,23 +118,46 @@ "" { rtx ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM); + rtx dst; + convert_move (ccv, operands[2], 1); + + dst = operands[0]; + if (GET_MODE (dst) != DImode) + dst = gen_reg_rtx (DImode); + emit_insn (gen_memory_barrier ()); - emit_insn (gen_cmpxchg_acq_ (operands[0], operands[1], - ccv, operands[3])); + emit_insn (gen_cmpxchg_rel_ (dst, operands[1], ccv, operands[3])); + + if (dst != operands[0]) + emit_move_insn (operands[0], gen_lowpart (mode, dst)); DONE; }) -(define_insn "cmpxchg_acq_" - [(set (match_operand:IMODE 0 "gr_register_operand" "=r") - (match_operand:IMODE 1 "not_postinc_memory_operand" "+S")) +(define_insn "cmpxchg_rel_" + [(set (match_operand:DI 0 "gr_register_operand" "=r") + (zero_extend:DI + (match_operand:I124MODE 1 "not_postinc_memory_operand" "+S"))) (set (match_dup 1) - (unspec:IMODE [(match_dup 1) - (match_operand:DI 2 "ar_ccv_reg_operand" "") - (match_operand:IMODE 3 "gr_register_operand" "r")] - UNSPEC_CMPXCHG_ACQ))] + (unspec:I124MODE + [(match_dup 1) + (match_operand:DI 2 "ar_ccv_reg_operand" "") + (match_operand:I124MODE 3 "gr_register_operand" "r")] + UNSPEC_CMPXCHG_ACQ))] "" - "cmpxchg.acq %0 = %1, %3, %2" + "cmpxchg.rel %0 = %1, %3, %2" + [(set_attr "itanium_class" "sem")]) + +(define_insn "cmpxchg_rel_di" + [(set (match_operand:DI 0 "gr_register_operand" "=r") + (match_operand:DI 1 "not_postinc_memory_operand" "+S")) + (set (match_dup 1) + (unspec:DI [(match_dup 1) + (match_operand:DI 2 "ar_ccv_reg_operand" "") + (match_operand:DI 3 "gr_register_operand" "r")] + UNSPEC_CMPXCHG_ACQ))] + "" + "cmpxchg8.rel %0 = %1, %3, %2" [(set_attr "itanium_class" "sem")]) (define_insn "sync_lock_test_and_set"