Fix __atomic to not implement atomic loads with CAS.

gcc/ * builtins.c (fold_builtin_atomic_always_lock_free): Make "lock-free" conditional on existance of a fast atomic load. * optabs-query.c (can_atomic_load_p): New function. * optabs-query.h (can_atomic_load_p): Declare it. * optabs.c (expand_atomic_exchange): Always delegate to libatomic if no fast atomic load is available for the particular size of access. (expand_atomic_compare_and_swap): Likewise. (expand_atomic_load): Likewise. (expand_atomic_store): Likewise. (expand_atomic_fetch_op): Likewise. * testsuite/lib/target-supports.exp (check_effective_target_sync_int_128): Remove x86 because it provides no fast atomic load. (check_effective_target_sync_int_128_runtime): Likewise. libatomic/ * acinclude.m4: Add #define FAST_ATOMIC_LDST_*. * auto-config.h.in: Regenerate. * config/x86/host-config.h (FAST_ATOMIC_LDST_16): Define to 0. (atomic_compare_exchange_n): New. * glfree.c (EXACT, LARGER): Change condition and add comments. From-SVN: r245098
2017-02-01 17:21:59 +00:00 · 2017-02-01 17:21:59 +00:00 · 969a32ce93
commit 969a32ce93
parent 55e75c7c6b
11 changed files with 154 additions and 56 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,21 @@
 2017-02-01  Torvald Riegel  <triegel@redhat.com>
            Richard Henderson  <rth@redhat.com>
 	* builtins.c (fold_builtin_atomic_always_lock_free): Make "lock-free"
 	conditional on existance of a fast atomic load.
 	* optabs-query.c (can_atomic_load_p): New function.
 	* optabs-query.h (can_atomic_load_p): Declare it.
 	* optabs.c (expand_atomic_exchange): Always delegate to libatomic if
 	no fast atomic load is available for the particular size of access.
 	(expand_atomic_compare_and_swap): Likewise.
 	(expand_atomic_load): Likewise.
 	(expand_atomic_store): Likewise.
 	(expand_atomic_fetch_op): Likewise.
 	* testsuite/lib/target-supports.exp
 	(check_effective_target_sync_int_128): Remove x86 because it provides
 	no fast atomic load.
 	(check_effective_target_sync_int_128_runtime): Likewise.
 2017-02-01  Richard Biener  <rguenther@suse.de>
 	* graphite.c: Include tree-vectorizer.h for find_loop_location.
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@ -6157,8 +6157,9 @@ fold_builtin_atomic_always_lock_free (tree arg0, tree arg1)
  /* Check if a compare_and_swap pattern exists for the mode which represents
     the required size.  The pattern is not allowed to fail, so the existence
-     of the pattern indicates support is present.  */
+     of the pattern indicates support is present.  Also require that an
-  if (can_compare_and_swap_p (mode, true))
+     atomic load exists for the required size.  */
  if (can_compare_and_swap_p (mode, true) && can_atomic_load_p (mode))
    return boolean_true_node;
  else
    return boolean_false_node;
--- a/gcc/optabs-query.c
+++ b/gcc/optabs-query.c
@ -584,6 +584,25 @@ can_atomic_exchange_p (machine_mode mode, bool allow_libcall)
  return can_compare_and_swap_p (mode, allow_libcall);
 }
 /* Return true if an atomic load can be performed without falling back to
   a compare-and-swap.  */
 bool
 can_atomic_load_p (machine_mode mode)
 {
  enum insn_code icode;
  /* Does the target supports the load directly?  */
  icode = direct_optab_handler (atomic_load_optab, mode);
  if (icode != CODE_FOR_nothing)
    return true;
  /* If the size of the object is greater than word size on this target,
     then we assume that a load will not be atomic.  Also see
     expand_atomic_load.  */
  return GET_MODE_PRECISION (mode) <= BITS_PER_WORD;
 }
 /* Determine whether "1 << x" is relatively cheap in word_mode.  */
 bool
--- a/gcc/optabs-query.h
+++ b/gcc/optabs-query.h
@ -176,6 +176,7 @@ int can_mult_highpart_p (machine_mode, bool);
 bool can_vec_mask_load_store_p (machine_mode, machine_mode, bool);
 bool can_compare_and_swap_p (machine_mode, bool);
 bool can_atomic_exchange_p (machine_mode, bool);
 bool can_atomic_load_p (machine_mode);
 bool lshift_cheap_p (bool);
 #endif
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@ -6086,8 +6086,15 @@ expand_atomic_test_and_set (rtx target, rtx mem, enum memmodel model)
 rtx
 expand_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model)
 {
  machine_mode mode = GET_MODE (mem);
  rtx ret;
  /* If loads are not atomic for the required size and we are not called to
     provide a __sync builtin, do not do anything so that we stay consistent
     with atomic loads of the same size.  */
  if (!can_atomic_load_p (mode) && !is_mm_sync (model))
    return NULL_RTX;
  ret = maybe_emit_atomic_exchange (target, mem, val, model);
  /* Next try a compare-and-swap loop for the exchange.  */
@ -6121,6 +6128,12 @@ expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval,
  rtx target_oval, target_bool = NULL_RTX;
  rtx libfunc;
  /* If loads are not atomic for the required size and we are not called to
     provide a __sync builtin, do not do anything so that we stay consistent
     with atomic loads of the same size.  */
  if (!can_atomic_load_p (mode) && !is_mm_sync (succ_model))
    return false;
  /* Load expected into a register for the compare and swap.  */
  if (MEM_P (expected))
    expected = copy_to_reg (expected);
@ -6316,19 +6329,13 @@ expand_atomic_load (rtx target, rtx mem, enum memmodel model)
    }
  /* If the size of the object is greater than word size on this target,
-     then we assume that a load will not be atomic.  */
+     then we assume that a load will not be atomic.  We could try to
     emulate a load with a compare-and-swap operation, but the store that
     doing this could result in would be incorrect if this is a volatile
     atomic load or targetting read-only-mapped memory.  */
  if (GET_MODE_PRECISION (mode) > BITS_PER_WORD)
-    {
+    /* If there is no atomic load, leave the library call.  */
-      /* Issue val = compare_and_swap (mem, 0, 0).
+    return NULL_RTX;
 	 This may cause the occasional harmless store of 0 when the value is
 	 already 0, but it seems to be OK according to the standards guys.  */
      if (expand_atomic_compare_and_swap (NULL, &target, mem, const0_rtx,
 					  const0_rtx, false, model, model))
 	return target;
      else
      /* Otherwise there is no atomic load, leave the library call.  */
        return NULL_RTX;
    }
  /* Otherwise assume loads are atomic, and emit the proper barriers.  */
  if (!target || target == const0_rtx)
@ -6370,7 +6377,9 @@ expand_atomic_store (rtx mem, rtx val, enum memmodel model, bool use_release)
 	return const0_rtx;
    }
-  /* If using __sync_lock_release is a viable alternative, try it.  */
+  /* If using __sync_lock_release is a viable alternative, try it.
     Note that this will not be set to true if we are expanding a generic
     __atomic_store_n.  */
  if (use_release)
    {
      icode = direct_optab_handler (sync_lock_release_optab, mode);
@ -6389,16 +6398,22 @@ expand_atomic_store (rtx mem, rtx val, enum memmodel model, bool use_release)
    }
  /* If the size of the object is greater than word size on this target,
-     a default store will not be atomic, Try a mem_exchange and throw away
+     a default store will not be atomic.  */
     the result.  If that doesn't work, don't do anything.  */
  if (GET_MODE_PRECISION (mode) > BITS_PER_WORD)
    {
-      rtx target = maybe_emit_atomic_exchange (NULL_RTX, mem, val, model);
+      /* If loads are atomic or we are called to provide a __sync builtin,
-      if (!target)
+	 we can try a atomic_exchange and throw away the result.  Otherwise,
-        target = maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val);
+	 don't do anything so that we do not create an inconsistency between
-      if (target)
+	 loads and stores.  */
-        return const0_rtx;
+      if (can_atomic_load_p (mode) || is_mm_sync (model))
-      else
+	{
 	  rtx target = maybe_emit_atomic_exchange (NULL_RTX, mem, val, model);
 	  if (!target)
 	    target = maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem,
 								val);
 	  if (target)
 	    return const0_rtx;
 	}
        return NULL_RTX;
    }
@ -6713,6 +6728,12 @@ expand_atomic_fetch_op (rtx target, rtx mem, rtx val, enum rtx_code code,
  rtx result;
  bool unused_result = (target == const0_rtx);
  /* If loads are not atomic for the required size and we are not called to
     provide a __sync builtin, do not do anything so that we stay consistent
     with atomic loads of the same size.  */
  if (!can_atomic_load_p (mode) && !is_mm_sync (model))
    return NULL_RTX;
  result = expand_atomic_fetch_op_no_fallback (target, mem, val, code, model,
 					       after);
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@ -6514,9 +6514,7 @@ proc check_effective_target_section_anchors { } {
 # Return 1 if the target supports atomic operations on "int_128" values.
 proc check_effective_target_sync_int_128 { } {
-    if { (([istarget i?86-*-*] || [istarget x86_64-*-*])
+    if { [istarget spu-*-*] } {
 	  && ![is-effective-target ia32])
 	 || [istarget spu-*-*] } {
 	return 1
    } else {
 	return 0
@ -6525,23 +6523,10 @@ proc check_effective_target_sync_int_128 { } {
 # Return 1 if the target supports atomic operations on "int_128" values
 # and can execute them.
 # This requires support for both compare-and-swap and true atomic loads.
 proc check_effective_target_sync_int_128_runtime { } {
-    if { (([istarget i?86-*-*] || [istarget x86_64-*-*])
+    if { [istarget spu-*-*] } {
 	  && ![is-effective-target ia32]
 	  && [check_cached_effective_target sync_int_128_available {
 	      check_runtime_nocache sync_int_128_available {
 		  #include "cpuid.h"
 		  int main ()
 		  {
 		      unsigned int eax, ebx, ecx, edx;
 		      if (__get_cpuid (1, &eax, &ebx, &ecx, &edx))
 			return !(ecx & bit_CMPXCHG16B);
 		      return 1;
 		  }
 	      } ""
 	  }])
 	 || [istarget spu-*-*] } {
 	return 1
    } else {
 	return 0
--- a/libatomic/ChangeLog
+++ b/libatomic/ChangeLog
@ -1,3 +1,12 @@
 2017-02-01  Richard Henderson  <rth@redhat.com>
            Torvald Riegel  <triegel@redhat.com>
 	* acinclude.m4: Add #define FAST_ATOMIC_LDST_*.
 	* auto-config.h.in: Regenerate.
 	* config/x86/host-config.h (FAST_ATOMIC_LDST_16): Define to 0.
 	(atomic_compare_exchange_n): New.
 	* glfree.c (EXACT, LARGER): Change condition and add comments.
 2017-01-30  Szabolcs Nagy  <szabolcs.nagy@arm.com>
 	PR target/78945
--- a/libatomic/acinclude.m4
+++ b/libatomic/acinclude.m4
@ -96,6 +96,7 @@ AC_DEFUN([LIBAT_HAVE_ATOMIC_LOADSTORE],[
  LIBAT_DEFINE_YESNO([HAVE_ATOMIC_LDST_$2], [$libat_cv_have_at_ldst_$2],
 	[Have __atomic_load/store for $2 byte integers.])
  AH_BOTTOM([#define MAYBE_HAVE_ATOMIC_LDST_$2 HAVE_ATOMIC_LDST_$2])
  AH_BOTTOM([#define FAST_ATOMIC_LDST_$2 HAVE_ATOMIC_LDST_$2])
 ])
 dnl
--- a/libatomic/auto-config.h.in
+++ b/libatomic/auto-config.h.in
@ -222,6 +222,16 @@
 #define MAYBE_HAVE_ATOMIC_LDST_1 HAVE_ATOMIC_LDST_1
 #define FAST_ATOMIC_LDST_16 HAVE_ATOMIC_LDST_16
 #define MAYBE_HAVE_ATOMIC_TAS_1 HAVE_ATOMIC_TAS_1
 #define MAYBE_HAVE_ATOMIC_TAS_2 HAVE_ATOMIC_TAS_2
 #define MAYBE_HAVE_ATOMIC_TAS_4 HAVE_ATOMIC_TAS_4
 #define MAYBE_HAVE_ATOMIC_TAS_8 HAVE_ATOMIC_TAS_8
 #define MAYBE_HAVE_ATOMIC_TAS_16 HAVE_ATOMIC_TAS_16
 #define MAYBE_HAVE_ATOMIC_EXCHANGE_1 HAVE_ATOMIC_EXCHANGE_1
@ -232,6 +242,8 @@
 #define MAYBE_HAVE_ATOMIC_EXCHANGE_8 HAVE_ATOMIC_EXCHANGE_8
 #define FAST_ATOMIC_LDST_1 HAVE_ATOMIC_LDST_1
 #define MAYBE_HAVE_ATOMIC_EXCHANGE_16 HAVE_ATOMIC_EXCHANGE_16
 #define MAYBE_HAVE_ATOMIC_CAS_1 HAVE_ATOMIC_CAS_1
@ -242,8 +254,6 @@
 #define MAYBE_HAVE_ATOMIC_CAS_8 HAVE_ATOMIC_CAS_8
 #define MAYBE_HAVE_ATOMIC_LDST_2 HAVE_ATOMIC_LDST_2
 #define MAYBE_HAVE_ATOMIC_CAS_16 HAVE_ATOMIC_CAS_16
 #define MAYBE_HAVE_ATOMIC_FETCH_ADD_1 HAVE_ATOMIC_FETCH_ADD_1
@ -254,6 +264,8 @@
 #define MAYBE_HAVE_ATOMIC_FETCH_ADD_8 HAVE_ATOMIC_FETCH_ADD_8
 #define MAYBE_HAVE_ATOMIC_LDST_2 HAVE_ATOMIC_LDST_2
 #define MAYBE_HAVE_ATOMIC_FETCH_ADD_16 HAVE_ATOMIC_FETCH_ADD_16
 #define MAYBE_HAVE_ATOMIC_FETCH_OP_1 HAVE_ATOMIC_FETCH_OP_1
@ -264,22 +276,20 @@
 #define MAYBE_HAVE_ATOMIC_FETCH_OP_8 HAVE_ATOMIC_FETCH_OP_8
 #define MAYBE_HAVE_ATOMIC_LDST_4 HAVE_ATOMIC_LDST_4
 #define MAYBE_HAVE_ATOMIC_FETCH_OP_16 HAVE_ATOMIC_FETCH_OP_16
 #ifndef WORDS_BIGENDIAN
 #define WORDS_BIGENDIAN 0
 #endif
 #define FAST_ATOMIC_LDST_2 HAVE_ATOMIC_LDST_2
 #define MAYBE_HAVE_ATOMIC_LDST_4 HAVE_ATOMIC_LDST_4
 #define FAST_ATOMIC_LDST_4 HAVE_ATOMIC_LDST_4
 #define MAYBE_HAVE_ATOMIC_LDST_8 HAVE_ATOMIC_LDST_8
 #define FAST_ATOMIC_LDST_8 HAVE_ATOMIC_LDST_8
 #define MAYBE_HAVE_ATOMIC_LDST_16 HAVE_ATOMIC_LDST_16
 #define MAYBE_HAVE_ATOMIC_TAS_1 HAVE_ATOMIC_TAS_1
 #define MAYBE_HAVE_ATOMIC_TAS_2 HAVE_ATOMIC_TAS_2
 #define MAYBE_HAVE_ATOMIC_TAS_4 HAVE_ATOMIC_TAS_4
 #define MAYBE_HAVE_ATOMIC_TAS_8 HAVE_ATOMIC_TAS_8
--- a/libatomic/config/x86/host-config.h
+++ b/libatomic/config/x86/host-config.h
@ -47,6 +47,9 @@ extern unsigned int libat_feat1_edx HIDDEN;
 # define MAYBE_HAVE_ATOMIC_EXCHANGE_16	IFUNC_COND_1
 # undef MAYBE_HAVE_ATOMIC_LDST_16
 # define MAYBE_HAVE_ATOMIC_LDST_16	IFUNC_COND_1
 /* Since load and store are implemented with CAS, they are not fast.  */
 # undef FAST_ATOMIC_LDST_16
 # define FAST_ATOMIC_LDST_16		0
 # if IFUNC_ALT == 1
 #  undef HAVE_ATOMIC_CAS_16
 #  define HAVE_ATOMIC_CAS_16 1
@ -64,6 +67,21 @@ extern unsigned int libat_feat1_edx HIDDEN;
 # endif
 #endif
 #if defined(__x86_64__) && N == 16 && IFUNC_ALT == 1
 static inline bool
 atomic_compare_exchange_n (UTYPE *mptr, UTYPE *eptr, UTYPE newval,
                           bool weak_p UNUSED, int sm UNUSED, int fm UNUSED)
 {
  UTYPE cmpval = *eptr;
  UTYPE oldval = __sync_val_compare_and_swap_16 (mptr, cmpval, newval);
  if (oldval == cmpval)
    return true;
  *eptr = oldval;
  return false;
 }
 # define atomic_compare_exchange_n atomic_compare_exchange_n
 #endif /* Have CAS 16 */
 #endif /* HAVE_IFUNC */
 #include_next <host-config.h>
--- a/libatomic/glfree.c
+++ b/libatomic/glfree.c
@ -24,26 +24,41 @@
 #include "libatomic_i.h"
-
+/* Accesses with a power-of-two size are not lock-free if we don't have an
   integer type of this size or if they are not naturally aligned.  They
   are lock-free if such a naturally aligned access is always lock-free
   according to the compiler, which requires that both atomic loads and CAS
   are available.
   In all other cases, we fall through to LARGER (see below).  */
 #define EXACT(N)						\
  do {								\
    if (!C2(HAVE_INT,N)) break;					\
    if ((uintptr_t)ptr & (N - 1)) break;			\
    if (__atomic_always_lock_free(N, 0)) return true;		\
-    if (C2(MAYBE_HAVE_ATOMIC_CAS_,N)) return true;		\
+    if (!C2(MAYBE_HAVE_ATOMIC_CAS_,N)) break;			\
    if (C2(FAST_ATOMIC_LDST_,N)) return true;			\
  } while (0)
 /* We next check to see if an access of a larger size is lock-free.  We use
   a similar check as in EXACT, except that we also check that the alignment
   of the access is so that the data to be accessed is completely covered
   by the larger access.  */
 #define LARGER(N)						\
  do {								\
    uintptr_t r = (uintptr_t)ptr & (N - 1);			\
    if (!C2(HAVE_INT,N)) break;					\
-    if (!C2(HAVE_ATOMIC_LDST_,N)) break;			\
+    if (!C2(FAST_ATOMIC_LDST_,N)) break;			\
    if (!C2(MAYBE_HAVE_ATOMIC_CAS_,N)) break;			\
    if (r + n <= N) return true;				\
  } while (0)
 /* Note that this can return that a size/alignment is not lock-free even if
   all the operations that we use to implement the respective accesses provide
   lock-free forward progress as specified in C++14:  Users likely expect
   "lock-free" to also mean "fast", which is why we do not return true if, for
   example, we implement loads with this size/alignment using a CAS.  */
 bool
 libat_is_lock_free (size_t n, void *ptr)
 {