S390: Use C11-like atomics instead of plain memory accesses in lock elision code.

This uses atomic operations to access lock elision metadata that is accessed
concurrently (ie, adapt_count fields).  The size of the data is less than a
word but accessed only with atomic loads and stores.

See also x86 commit ca6e601a9d4a72b3699cca15bad12ac1716bf49a:
"Use C11-like atomics instead of plain memory accesses in x86 lock elision."

ChangeLog:

	* sysdeps/unix/sysv/linux/s390/elision-lock.c
	(__lll_lock_elision): Use atomics to load / store adapt_count.
	* sysdeps/unix/sysv/linux/s390/elision-trylock.c
	(__lll_trylock_elision): Likewise.
This commit is contained in:
Stefan Liebler 2016-12-20 15:12:48 +01:00
parent 8d71242eb7
commit c813dae5d8
3 changed files with 34 additions and 12 deletions

View File

@ -1,3 +1,10 @@
2016-12-20 Stefan Liebler <stli@linux.vnet.ibm.com>
* sysdeps/unix/sysv/linux/s390/elision-lock.c
(__lll_lock_elision): Use atomics to load / store adapt_count.
* sysdeps/unix/sysv/linux/s390/elision-trylock.c
(__lll_trylock_elision): Likewise.
2016-12-20 Florian Weimer <fweimer@redhat.com>
Do not require memset elimination in explicit_bzero test.

View File

@ -45,11 +45,18 @@
int
__lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
{
if (*adapt_count > 0)
/* adapt_count can be accessed concurrently; these accesses can be both
inside of transactions (if critical sections are nested and the outer
critical section uses lock elision) and outside of transactions. Thus,
we need to use atomic accesses to avoid data races. However, the
value of adapt_count is just a hint, so relaxed MO accesses are
sufficient. */
if (atomic_load_relaxed (adapt_count) > 0)
{
/* Lost updates are possible, but harmless. Due to races this might lead
to *adapt_count becoming less than zero. */
(*adapt_count)--;
atomic_store_relaxed (adapt_count,
atomic_load_relaxed (adapt_count) - 1);
goto use_lock;
}
@ -74,8 +81,10 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
/* In a non-nested transaction there is no need to abort,
which is expensive. */
__builtin_tend ();
/* Don't try to use transactions for the next couple of times.
See above for why relaxed MO is sufficient. */
if (aconf.skip_lock_busy > 0)
*adapt_count = aconf.skip_lock_busy;
atomic_store_relaxed (adapt_count, aconf.skip_lock_busy);
goto use_lock;
}
else /* nesting depth is > 1 */
@ -101,18 +110,20 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
/* A persistent abort (cc 1 or 3) indicates that a retry is
probably futile. Use the normal locking now and for the
next couple of calls.
Be careful to avoid writing to the lock. */
Be careful to avoid writing to the lock. See above for why
relaxed MO is sufficient. */
if (aconf.skip_lock_internal_abort > 0)
*adapt_count = aconf.skip_lock_internal_abort;
atomic_store_relaxed (adapt_count,
aconf.skip_lock_internal_abort);
goto use_lock;
}
}
}
/* Same logic as above, but for for a number of temporary failures in a
row. */
row. See above for why relaxed MO is sufficient. */
if (aconf.skip_lock_out_of_tbegin_retries > 0 && aconf.try_tbegin > 0)
*adapt_count = aconf.skip_lock_out_of_tbegin_retries;
atomic_store_relaxed (adapt_count, aconf.skip_lock_out_of_tbegin_retries);
use_lock:
return LLL_LOCK ((*futex), private);

View File

@ -49,8 +49,10 @@ __lll_trylock_elision (int *futex, short *adapt_count)
__builtin_tabort (_HTM_FIRST_USER_ABORT_CODE | 1);
}
/* Only try a transaction if it's worth it. */
if (*adapt_count <= 0)
/* Only try a transaction if it's worth it. See __lll_lock_elision for
why we need atomic accesses. Relaxed MO is sufficient because this is
just a hint. */
if (atomic_load_relaxed (adapt_count) <= 0)
{
unsigned status;
@ -65,9 +67,10 @@ __lll_trylock_elision (int *futex, short *adapt_count)
__builtin_tend ();
/* Note: Changing the adapt_count here might abort a transaction on a
different cpu, but that could happen anyway when the futex is
acquired, so there's no need to check the nesting depth here. */
acquired, so there's no need to check the nesting depth here.
See above for why relaxed MO is sufficient. */
if (aconf.skip_lock_busy > 0)
*adapt_count = aconf.skip_lock_busy;
atomic_store_relaxed (adapt_count, aconf.skip_lock_busy);
}
else
{
@ -87,7 +90,8 @@ __lll_trylock_elision (int *futex, short *adapt_count)
{
/* Lost updates are possible, but harmless. Due to races this might lead
to *adapt_count becoming less than zero. */
(*adapt_count)--;
atomic_store_relaxed (adapt_count,
atomic_load_relaxed (adapt_count) - 1);
}
return lll_trylock (*futex);