Fix __atomic to not implement atomic loads with CAS.

gcc/
	* builtins.c (fold_builtin_atomic_always_lock_free): Make "lock-free"
	conditional on existance of a fast atomic load.
	* optabs-query.c (can_atomic_load_p): New function.
	* optabs-query.h (can_atomic_load_p): Declare it.
	* optabs.c (expand_atomic_exchange): Always delegate to libatomic if
	no fast atomic load is available for the particular size of access.
	(expand_atomic_compare_and_swap): Likewise.
	(expand_atomic_load): Likewise.
	(expand_atomic_store): Likewise.
	(expand_atomic_fetch_op): Likewise.
	* testsuite/lib/target-supports.exp
	(check_effective_target_sync_int_128): Remove x86 because it provides
	no fast atomic load.
	(check_effective_target_sync_int_128_runtime): Likewise.

libatomic/
	* acinclude.m4: Add #define FAST_ATOMIC_LDST_*.
	* auto-config.h.in: Regenerate.
	* config/x86/host-config.h (FAST_ATOMIC_LDST_16): Define to 0.
	(atomic_compare_exchange_n): New.
	* glfree.c (EXACT, LARGER): Change condition and add comments.

From-SVN: r245098
This commit is contained in:
Torvald Riegel 2017-02-01 17:21:59 +00:00
parent 55e75c7c6b
commit 969a32ce93
11 changed files with 154 additions and 56 deletions

View File

@ -1,3 +1,21 @@
2017-02-01 Torvald Riegel <triegel@redhat.com>
Richard Henderson <rth@redhat.com>
* builtins.c (fold_builtin_atomic_always_lock_free): Make "lock-free"
conditional on existance of a fast atomic load.
* optabs-query.c (can_atomic_load_p): New function.
* optabs-query.h (can_atomic_load_p): Declare it.
* optabs.c (expand_atomic_exchange): Always delegate to libatomic if
no fast atomic load is available for the particular size of access.
(expand_atomic_compare_and_swap): Likewise.
(expand_atomic_load): Likewise.
(expand_atomic_store): Likewise.
(expand_atomic_fetch_op): Likewise.
* testsuite/lib/target-supports.exp
(check_effective_target_sync_int_128): Remove x86 because it provides
no fast atomic load.
(check_effective_target_sync_int_128_runtime): Likewise.
2017-02-01 Richard Biener <rguenther@suse.de>
* graphite.c: Include tree-vectorizer.h for find_loop_location.

View File

@ -6157,8 +6157,9 @@ fold_builtin_atomic_always_lock_free (tree arg0, tree arg1)
/* Check if a compare_and_swap pattern exists for the mode which represents
the required size. The pattern is not allowed to fail, so the existence
of the pattern indicates support is present. */
if (can_compare_and_swap_p (mode, true))
of the pattern indicates support is present. Also require that an
atomic load exists for the required size. */
if (can_compare_and_swap_p (mode, true) && can_atomic_load_p (mode))
return boolean_true_node;
else
return boolean_false_node;

View File

@ -584,6 +584,25 @@ can_atomic_exchange_p (machine_mode mode, bool allow_libcall)
return can_compare_and_swap_p (mode, allow_libcall);
}
/* Return true if an atomic load can be performed without falling back to
a compare-and-swap. */
bool
can_atomic_load_p (machine_mode mode)
{
enum insn_code icode;
/* Does the target supports the load directly? */
icode = direct_optab_handler (atomic_load_optab, mode);
if (icode != CODE_FOR_nothing)
return true;
/* If the size of the object is greater than word size on this target,
then we assume that a load will not be atomic. Also see
expand_atomic_load. */
return GET_MODE_PRECISION (mode) <= BITS_PER_WORD;
}
/* Determine whether "1 << x" is relatively cheap in word_mode. */
bool

View File

@ -176,6 +176,7 @@ int can_mult_highpart_p (machine_mode, bool);
bool can_vec_mask_load_store_p (machine_mode, machine_mode, bool);
bool can_compare_and_swap_p (machine_mode, bool);
bool can_atomic_exchange_p (machine_mode, bool);
bool can_atomic_load_p (machine_mode);
bool lshift_cheap_p (bool);
#endif

View File

@ -6086,8 +6086,15 @@ expand_atomic_test_and_set (rtx target, rtx mem, enum memmodel model)
rtx
expand_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model)
{
machine_mode mode = GET_MODE (mem);
rtx ret;
/* If loads are not atomic for the required size and we are not called to
provide a __sync builtin, do not do anything so that we stay consistent
with atomic loads of the same size. */
if (!can_atomic_load_p (mode) && !is_mm_sync (model))
return NULL_RTX;
ret = maybe_emit_atomic_exchange (target, mem, val, model);
/* Next try a compare-and-swap loop for the exchange. */
@ -6121,6 +6128,12 @@ expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval,
rtx target_oval, target_bool = NULL_RTX;
rtx libfunc;
/* If loads are not atomic for the required size and we are not called to
provide a __sync builtin, do not do anything so that we stay consistent
with atomic loads of the same size. */
if (!can_atomic_load_p (mode) && !is_mm_sync (succ_model))
return false;
/* Load expected into a register for the compare and swap. */
if (MEM_P (expected))
expected = copy_to_reg (expected);
@ -6316,19 +6329,13 @@ expand_atomic_load (rtx target, rtx mem, enum memmodel model)
}
/* If the size of the object is greater than word size on this target,
then we assume that a load will not be atomic. */
then we assume that a load will not be atomic. We could try to
emulate a load with a compare-and-swap operation, but the store that
doing this could result in would be incorrect if this is a volatile
atomic load or targetting read-only-mapped memory. */
if (GET_MODE_PRECISION (mode) > BITS_PER_WORD)
{
/* Issue val = compare_and_swap (mem, 0, 0).
This may cause the occasional harmless store of 0 when the value is
already 0, but it seems to be OK according to the standards guys. */
if (expand_atomic_compare_and_swap (NULL, &target, mem, const0_rtx,
const0_rtx, false, model, model))
return target;
else
/* Otherwise there is no atomic load, leave the library call. */
/* If there is no atomic load, leave the library call. */
return NULL_RTX;
}
/* Otherwise assume loads are atomic, and emit the proper barriers. */
if (!target || target == const0_rtx)
@ -6370,7 +6377,9 @@ expand_atomic_store (rtx mem, rtx val, enum memmodel model, bool use_release)
return const0_rtx;
}
/* If using __sync_lock_release is a viable alternative, try it. */
/* If using __sync_lock_release is a viable alternative, try it.
Note that this will not be set to true if we are expanding a generic
__atomic_store_n. */
if (use_release)
{
icode = direct_optab_handler (sync_lock_release_optab, mode);
@ -6389,16 +6398,22 @@ expand_atomic_store (rtx mem, rtx val, enum memmodel model, bool use_release)
}
/* If the size of the object is greater than word size on this target,
a default store will not be atomic, Try a mem_exchange and throw away
the result. If that doesn't work, don't do anything. */
a default store will not be atomic. */
if (GET_MODE_PRECISION (mode) > BITS_PER_WORD)
{
/* If loads are atomic or we are called to provide a __sync builtin,
we can try a atomic_exchange and throw away the result. Otherwise,
don't do anything so that we do not create an inconsistency between
loads and stores. */
if (can_atomic_load_p (mode) || is_mm_sync (model))
{
rtx target = maybe_emit_atomic_exchange (NULL_RTX, mem, val, model);
if (!target)
target = maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val);
target = maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem,
val);
if (target)
return const0_rtx;
else
}
return NULL_RTX;
}
@ -6713,6 +6728,12 @@ expand_atomic_fetch_op (rtx target, rtx mem, rtx val, enum rtx_code code,
rtx result;
bool unused_result = (target == const0_rtx);
/* If loads are not atomic for the required size and we are not called to
provide a __sync builtin, do not do anything so that we stay consistent
with atomic loads of the same size. */
if (!can_atomic_load_p (mode) && !is_mm_sync (model))
return NULL_RTX;
result = expand_atomic_fetch_op_no_fallback (target, mem, val, code, model,
after);

View File

@ -6514,9 +6514,7 @@ proc check_effective_target_section_anchors { } {
# Return 1 if the target supports atomic operations on "int_128" values.
proc check_effective_target_sync_int_128 { } {
if { (([istarget i?86-*-*] || [istarget x86_64-*-*])
&& ![is-effective-target ia32])
|| [istarget spu-*-*] } {
if { [istarget spu-*-*] } {
return 1
} else {
return 0
@ -6525,23 +6523,10 @@ proc check_effective_target_sync_int_128 { } {
# Return 1 if the target supports atomic operations on "int_128" values
# and can execute them.
# This requires support for both compare-and-swap and true atomic loads.
proc check_effective_target_sync_int_128_runtime { } {
if { (([istarget i?86-*-*] || [istarget x86_64-*-*])
&& ![is-effective-target ia32]
&& [check_cached_effective_target sync_int_128_available {
check_runtime_nocache sync_int_128_available {
#include "cpuid.h"
int main ()
{
unsigned int eax, ebx, ecx, edx;
if (__get_cpuid (1, &eax, &ebx, &ecx, &edx))
return !(ecx & bit_CMPXCHG16B);
return 1;
}
} ""
}])
|| [istarget spu-*-*] } {
if { [istarget spu-*-*] } {
return 1
} else {
return 0

View File

@ -1,3 +1,12 @@
2017-02-01 Richard Henderson <rth@redhat.com>
Torvald Riegel <triegel@redhat.com>
* acinclude.m4: Add #define FAST_ATOMIC_LDST_*.
* auto-config.h.in: Regenerate.
* config/x86/host-config.h (FAST_ATOMIC_LDST_16): Define to 0.
(atomic_compare_exchange_n): New.
* glfree.c (EXACT, LARGER): Change condition and add comments.
2017-01-30 Szabolcs Nagy <szabolcs.nagy@arm.com>
PR target/78945

View File

@ -96,6 +96,7 @@ AC_DEFUN([LIBAT_HAVE_ATOMIC_LOADSTORE],[
LIBAT_DEFINE_YESNO([HAVE_ATOMIC_LDST_$2], [$libat_cv_have_at_ldst_$2],
[Have __atomic_load/store for $2 byte integers.])
AH_BOTTOM([#define MAYBE_HAVE_ATOMIC_LDST_$2 HAVE_ATOMIC_LDST_$2])
AH_BOTTOM([#define FAST_ATOMIC_LDST_$2 HAVE_ATOMIC_LDST_$2])
])
dnl

View File

@ -222,6 +222,16 @@
#define MAYBE_HAVE_ATOMIC_LDST_1 HAVE_ATOMIC_LDST_1
#define FAST_ATOMIC_LDST_16 HAVE_ATOMIC_LDST_16
#define MAYBE_HAVE_ATOMIC_TAS_1 HAVE_ATOMIC_TAS_1
#define MAYBE_HAVE_ATOMIC_TAS_2 HAVE_ATOMIC_TAS_2
#define MAYBE_HAVE_ATOMIC_TAS_4 HAVE_ATOMIC_TAS_4
#define MAYBE_HAVE_ATOMIC_TAS_8 HAVE_ATOMIC_TAS_8
#define MAYBE_HAVE_ATOMIC_TAS_16 HAVE_ATOMIC_TAS_16
#define MAYBE_HAVE_ATOMIC_EXCHANGE_1 HAVE_ATOMIC_EXCHANGE_1
@ -232,6 +242,8 @@
#define MAYBE_HAVE_ATOMIC_EXCHANGE_8 HAVE_ATOMIC_EXCHANGE_8
#define FAST_ATOMIC_LDST_1 HAVE_ATOMIC_LDST_1
#define MAYBE_HAVE_ATOMIC_EXCHANGE_16 HAVE_ATOMIC_EXCHANGE_16
#define MAYBE_HAVE_ATOMIC_CAS_1 HAVE_ATOMIC_CAS_1
@ -242,8 +254,6 @@
#define MAYBE_HAVE_ATOMIC_CAS_8 HAVE_ATOMIC_CAS_8
#define MAYBE_HAVE_ATOMIC_LDST_2 HAVE_ATOMIC_LDST_2
#define MAYBE_HAVE_ATOMIC_CAS_16 HAVE_ATOMIC_CAS_16
#define MAYBE_HAVE_ATOMIC_FETCH_ADD_1 HAVE_ATOMIC_FETCH_ADD_1
@ -254,6 +264,8 @@
#define MAYBE_HAVE_ATOMIC_FETCH_ADD_8 HAVE_ATOMIC_FETCH_ADD_8
#define MAYBE_HAVE_ATOMIC_LDST_2 HAVE_ATOMIC_LDST_2
#define MAYBE_HAVE_ATOMIC_FETCH_ADD_16 HAVE_ATOMIC_FETCH_ADD_16
#define MAYBE_HAVE_ATOMIC_FETCH_OP_1 HAVE_ATOMIC_FETCH_OP_1
@ -264,22 +276,20 @@
#define MAYBE_HAVE_ATOMIC_FETCH_OP_8 HAVE_ATOMIC_FETCH_OP_8
#define MAYBE_HAVE_ATOMIC_LDST_4 HAVE_ATOMIC_LDST_4
#define MAYBE_HAVE_ATOMIC_FETCH_OP_16 HAVE_ATOMIC_FETCH_OP_16
#ifndef WORDS_BIGENDIAN
#define WORDS_BIGENDIAN 0
#endif
#define FAST_ATOMIC_LDST_2 HAVE_ATOMIC_LDST_2
#define MAYBE_HAVE_ATOMIC_LDST_4 HAVE_ATOMIC_LDST_4
#define FAST_ATOMIC_LDST_4 HAVE_ATOMIC_LDST_4
#define MAYBE_HAVE_ATOMIC_LDST_8 HAVE_ATOMIC_LDST_8
#define FAST_ATOMIC_LDST_8 HAVE_ATOMIC_LDST_8
#define MAYBE_HAVE_ATOMIC_LDST_16 HAVE_ATOMIC_LDST_16
#define MAYBE_HAVE_ATOMIC_TAS_1 HAVE_ATOMIC_TAS_1
#define MAYBE_HAVE_ATOMIC_TAS_2 HAVE_ATOMIC_TAS_2
#define MAYBE_HAVE_ATOMIC_TAS_4 HAVE_ATOMIC_TAS_4
#define MAYBE_HAVE_ATOMIC_TAS_8 HAVE_ATOMIC_TAS_8

View File

@ -47,6 +47,9 @@ extern unsigned int libat_feat1_edx HIDDEN;
# define MAYBE_HAVE_ATOMIC_EXCHANGE_16 IFUNC_COND_1
# undef MAYBE_HAVE_ATOMIC_LDST_16
# define MAYBE_HAVE_ATOMIC_LDST_16 IFUNC_COND_1
/* Since load and store are implemented with CAS, they are not fast. */
# undef FAST_ATOMIC_LDST_16
# define FAST_ATOMIC_LDST_16 0
# if IFUNC_ALT == 1
# undef HAVE_ATOMIC_CAS_16
# define HAVE_ATOMIC_CAS_16 1
@ -64,6 +67,21 @@ extern unsigned int libat_feat1_edx HIDDEN;
# endif
#endif
#if defined(__x86_64__) && N == 16 && IFUNC_ALT == 1
static inline bool
atomic_compare_exchange_n (UTYPE *mptr, UTYPE *eptr, UTYPE newval,
bool weak_p UNUSED, int sm UNUSED, int fm UNUSED)
{
UTYPE cmpval = *eptr;
UTYPE oldval = __sync_val_compare_and_swap_16 (mptr, cmpval, newval);
if (oldval == cmpval)
return true;
*eptr = oldval;
return false;
}
# define atomic_compare_exchange_n atomic_compare_exchange_n
#endif /* Have CAS 16 */
#endif /* HAVE_IFUNC */
#include_next <host-config.h>

View File

@ -24,26 +24,41 @@
#include "libatomic_i.h"
/* Accesses with a power-of-two size are not lock-free if we don't have an
integer type of this size or if they are not naturally aligned. They
are lock-free if such a naturally aligned access is always lock-free
according to the compiler, which requires that both atomic loads and CAS
are available.
In all other cases, we fall through to LARGER (see below). */
#define EXACT(N) \
do { \
if (!C2(HAVE_INT,N)) break; \
if ((uintptr_t)ptr & (N - 1)) break; \
if (__atomic_always_lock_free(N, 0)) return true; \
if (C2(MAYBE_HAVE_ATOMIC_CAS_,N)) return true; \
if (!C2(MAYBE_HAVE_ATOMIC_CAS_,N)) break; \
if (C2(FAST_ATOMIC_LDST_,N)) return true; \
} while (0)
/* We next check to see if an access of a larger size is lock-free. We use
a similar check as in EXACT, except that we also check that the alignment
of the access is so that the data to be accessed is completely covered
by the larger access. */
#define LARGER(N) \
do { \
uintptr_t r = (uintptr_t)ptr & (N - 1); \
if (!C2(HAVE_INT,N)) break; \
if (!C2(HAVE_ATOMIC_LDST_,N)) break; \
if (!C2(FAST_ATOMIC_LDST_,N)) break; \
if (!C2(MAYBE_HAVE_ATOMIC_CAS_,N)) break; \
if (r + n <= N) return true; \
} while (0)
/* Note that this can return that a size/alignment is not lock-free even if
all the operations that we use to implement the respective accesses provide
lock-free forward progress as specified in C++14: Users likely expect
"lock-free" to also mean "fast", which is why we do not return true if, for
example, we implement loads with this size/alignment using a CAS. */
bool
libat_is_lock_free (size_t n, void *ptr)
{