S390: Use C11-like atomics instead of plain memory accesses in lock elision code.

This uses atomic operations to access lock elision metadata that is accessed
concurrently (ie, adapt_count fields).  The size of the data is less than a
word but accessed only with atomic loads and stores.

See also x86 commit ca6e601a9d4a72b3699cca15bad12ac1716bf49a:
"Use C11-like atomics instead of plain memory accesses in x86 lock elision."

ChangeLog:

	* sysdeps/unix/sysv/linux/s390/elision-lock.c
	(__lll_lock_elision): Use atomics to load / store adapt_count.
	* sysdeps/unix/sysv/linux/s390/elision-trylock.c
	(__lll_trylock_elision): Likewise.
This commit is contained in:
Stefan Liebler 2016-12-20 15:12:48 +01:00
parent 8d71242eb7
commit c813dae5d8
3 changed files with 34 additions and 12 deletions

View File

@ -1,3 +1,10 @@
2016-12-20 Stefan Liebler <stli@linux.vnet.ibm.com>
* sysdeps/unix/sysv/linux/s390/elision-lock.c
(__lll_lock_elision): Use atomics to load / store adapt_count.
* sysdeps/unix/sysv/linux/s390/elision-trylock.c
(__lll_trylock_elision): Likewise.
2016-12-20 Florian Weimer <fweimer@redhat.com> 2016-12-20 Florian Weimer <fweimer@redhat.com>
Do not require memset elimination in explicit_bzero test. Do not require memset elimination in explicit_bzero test.

View File

@ -45,11 +45,18 @@
int int
__lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private) __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
{ {
if (*adapt_count > 0) /* adapt_count can be accessed concurrently; these accesses can be both
inside of transactions (if critical sections are nested and the outer
critical section uses lock elision) and outside of transactions. Thus,
we need to use atomic accesses to avoid data races. However, the
value of adapt_count is just a hint, so relaxed MO accesses are
sufficient. */
if (atomic_load_relaxed (adapt_count) > 0)
{ {
/* Lost updates are possible, but harmless. Due to races this might lead /* Lost updates are possible, but harmless. Due to races this might lead
to *adapt_count becoming less than zero. */ to *adapt_count becoming less than zero. */
(*adapt_count)--; atomic_store_relaxed (adapt_count,
atomic_load_relaxed (adapt_count) - 1);
goto use_lock; goto use_lock;
} }
@ -74,8 +81,10 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
/* In a non-nested transaction there is no need to abort, /* In a non-nested transaction there is no need to abort,
which is expensive. */ which is expensive. */
__builtin_tend (); __builtin_tend ();
/* Don't try to use transactions for the next couple of times.
See above for why relaxed MO is sufficient. */
if (aconf.skip_lock_busy > 0) if (aconf.skip_lock_busy > 0)
*adapt_count = aconf.skip_lock_busy; atomic_store_relaxed (adapt_count, aconf.skip_lock_busy);
goto use_lock; goto use_lock;
} }
else /* nesting depth is > 1 */ else /* nesting depth is > 1 */
@ -101,18 +110,20 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
/* A persistent abort (cc 1 or 3) indicates that a retry is /* A persistent abort (cc 1 or 3) indicates that a retry is
probably futile. Use the normal locking now and for the probably futile. Use the normal locking now and for the
next couple of calls. next couple of calls.
Be careful to avoid writing to the lock. */ Be careful to avoid writing to the lock. See above for why
relaxed MO is sufficient. */
if (aconf.skip_lock_internal_abort > 0) if (aconf.skip_lock_internal_abort > 0)
*adapt_count = aconf.skip_lock_internal_abort; atomic_store_relaxed (adapt_count,
aconf.skip_lock_internal_abort);
goto use_lock; goto use_lock;
} }
} }
} }
/* Same logic as above, but for for a number of temporary failures in a /* Same logic as above, but for for a number of temporary failures in a
row. */ row. See above for why relaxed MO is sufficient. */
if (aconf.skip_lock_out_of_tbegin_retries > 0 && aconf.try_tbegin > 0) if (aconf.skip_lock_out_of_tbegin_retries > 0 && aconf.try_tbegin > 0)
*adapt_count = aconf.skip_lock_out_of_tbegin_retries; atomic_store_relaxed (adapt_count, aconf.skip_lock_out_of_tbegin_retries);
use_lock: use_lock:
return LLL_LOCK ((*futex), private); return LLL_LOCK ((*futex), private);

View File

@ -49,8 +49,10 @@ __lll_trylock_elision (int *futex, short *adapt_count)
__builtin_tabort (_HTM_FIRST_USER_ABORT_CODE | 1); __builtin_tabort (_HTM_FIRST_USER_ABORT_CODE | 1);
} }
/* Only try a transaction if it's worth it. */ /* Only try a transaction if it's worth it. See __lll_lock_elision for
if (*adapt_count <= 0) why we need atomic accesses. Relaxed MO is sufficient because this is
just a hint. */
if (atomic_load_relaxed (adapt_count) <= 0)
{ {
unsigned status; unsigned status;
@ -65,9 +67,10 @@ __lll_trylock_elision (int *futex, short *adapt_count)
__builtin_tend (); __builtin_tend ();
/* Note: Changing the adapt_count here might abort a transaction on a /* Note: Changing the adapt_count here might abort a transaction on a
different cpu, but that could happen anyway when the futex is different cpu, but that could happen anyway when the futex is
acquired, so there's no need to check the nesting depth here. */ acquired, so there's no need to check the nesting depth here.
See above for why relaxed MO is sufficient. */
if (aconf.skip_lock_busy > 0) if (aconf.skip_lock_busy > 0)
*adapt_count = aconf.skip_lock_busy; atomic_store_relaxed (adapt_count, aconf.skip_lock_busy);
} }
else else
{ {
@ -87,7 +90,8 @@ __lll_trylock_elision (int *futex, short *adapt_count)
{ {
/* Lost updates are possible, but harmless. Due to races this might lead /* Lost updates are possible, but harmless. Due to races this might lead
to *adapt_count becoming less than zero. */ to *adapt_count becoming less than zero. */
(*adapt_count)--; atomic_store_relaxed (adapt_count,
atomic_load_relaxed (adapt_count) - 1);
} }
return lll_trylock (*futex); return lll_trylock (*futex);