S390: Use C11-like atomics instead of plain memory accesses in lock elision code.
This uses atomic operations to access lock elision metadata that is accessed concurrently (ie, adapt_count fields). The size of the data is less than a word but accessed only with atomic loads and stores. See also x86 commit ca6e601a9d4a72b3699cca15bad12ac1716bf49a: "Use C11-like atomics instead of plain memory accesses in x86 lock elision." ChangeLog: * sysdeps/unix/sysv/linux/s390/elision-lock.c (__lll_lock_elision): Use atomics to load / store adapt_count. * sysdeps/unix/sysv/linux/s390/elision-trylock.c (__lll_trylock_elision): Likewise.
This commit is contained in:
parent
8d71242eb7
commit
c813dae5d8
|
@ -1,3 +1,10 @@
|
||||||
|
2016-12-20 Stefan Liebler <stli@linux.vnet.ibm.com>
|
||||||
|
|
||||||
|
* sysdeps/unix/sysv/linux/s390/elision-lock.c
|
||||||
|
(__lll_lock_elision): Use atomics to load / store adapt_count.
|
||||||
|
* sysdeps/unix/sysv/linux/s390/elision-trylock.c
|
||||||
|
(__lll_trylock_elision): Likewise.
|
||||||
|
|
||||||
2016-12-20 Florian Weimer <fweimer@redhat.com>
|
2016-12-20 Florian Weimer <fweimer@redhat.com>
|
||||||
|
|
||||||
Do not require memset elimination in explicit_bzero test.
|
Do not require memset elimination in explicit_bzero test.
|
||||||
|
|
|
@ -45,11 +45,18 @@
|
||||||
int
|
int
|
||||||
__lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
|
__lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
|
||||||
{
|
{
|
||||||
if (*adapt_count > 0)
|
/* adapt_count can be accessed concurrently; these accesses can be both
|
||||||
|
inside of transactions (if critical sections are nested and the outer
|
||||||
|
critical section uses lock elision) and outside of transactions. Thus,
|
||||||
|
we need to use atomic accesses to avoid data races. However, the
|
||||||
|
value of adapt_count is just a hint, so relaxed MO accesses are
|
||||||
|
sufficient. */
|
||||||
|
if (atomic_load_relaxed (adapt_count) > 0)
|
||||||
{
|
{
|
||||||
/* Lost updates are possible, but harmless. Due to races this might lead
|
/* Lost updates are possible, but harmless. Due to races this might lead
|
||||||
to *adapt_count becoming less than zero. */
|
to *adapt_count becoming less than zero. */
|
||||||
(*adapt_count)--;
|
atomic_store_relaxed (adapt_count,
|
||||||
|
atomic_load_relaxed (adapt_count) - 1);
|
||||||
goto use_lock;
|
goto use_lock;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -74,8 +81,10 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
|
||||||
/* In a non-nested transaction there is no need to abort,
|
/* In a non-nested transaction there is no need to abort,
|
||||||
which is expensive. */
|
which is expensive. */
|
||||||
__builtin_tend ();
|
__builtin_tend ();
|
||||||
|
/* Don't try to use transactions for the next couple of times.
|
||||||
|
See above for why relaxed MO is sufficient. */
|
||||||
if (aconf.skip_lock_busy > 0)
|
if (aconf.skip_lock_busy > 0)
|
||||||
*adapt_count = aconf.skip_lock_busy;
|
atomic_store_relaxed (adapt_count, aconf.skip_lock_busy);
|
||||||
goto use_lock;
|
goto use_lock;
|
||||||
}
|
}
|
||||||
else /* nesting depth is > 1 */
|
else /* nesting depth is > 1 */
|
||||||
|
@ -101,18 +110,20 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
|
||||||
/* A persistent abort (cc 1 or 3) indicates that a retry is
|
/* A persistent abort (cc 1 or 3) indicates that a retry is
|
||||||
probably futile. Use the normal locking now and for the
|
probably futile. Use the normal locking now and for the
|
||||||
next couple of calls.
|
next couple of calls.
|
||||||
Be careful to avoid writing to the lock. */
|
Be careful to avoid writing to the lock. See above for why
|
||||||
|
relaxed MO is sufficient. */
|
||||||
if (aconf.skip_lock_internal_abort > 0)
|
if (aconf.skip_lock_internal_abort > 0)
|
||||||
*adapt_count = aconf.skip_lock_internal_abort;
|
atomic_store_relaxed (adapt_count,
|
||||||
|
aconf.skip_lock_internal_abort);
|
||||||
goto use_lock;
|
goto use_lock;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Same logic as above, but for for a number of temporary failures in a
|
/* Same logic as above, but for for a number of temporary failures in a
|
||||||
row. */
|
row. See above for why relaxed MO is sufficient. */
|
||||||
if (aconf.skip_lock_out_of_tbegin_retries > 0 && aconf.try_tbegin > 0)
|
if (aconf.skip_lock_out_of_tbegin_retries > 0 && aconf.try_tbegin > 0)
|
||||||
*adapt_count = aconf.skip_lock_out_of_tbegin_retries;
|
atomic_store_relaxed (adapt_count, aconf.skip_lock_out_of_tbegin_retries);
|
||||||
|
|
||||||
use_lock:
|
use_lock:
|
||||||
return LLL_LOCK ((*futex), private);
|
return LLL_LOCK ((*futex), private);
|
||||||
|
|
|
@ -49,8 +49,10 @@ __lll_trylock_elision (int *futex, short *adapt_count)
|
||||||
__builtin_tabort (_HTM_FIRST_USER_ABORT_CODE | 1);
|
__builtin_tabort (_HTM_FIRST_USER_ABORT_CODE | 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Only try a transaction if it's worth it. */
|
/* Only try a transaction if it's worth it. See __lll_lock_elision for
|
||||||
if (*adapt_count <= 0)
|
why we need atomic accesses. Relaxed MO is sufficient because this is
|
||||||
|
just a hint. */
|
||||||
|
if (atomic_load_relaxed (adapt_count) <= 0)
|
||||||
{
|
{
|
||||||
unsigned status;
|
unsigned status;
|
||||||
|
|
||||||
|
@ -65,9 +67,10 @@ __lll_trylock_elision (int *futex, short *adapt_count)
|
||||||
__builtin_tend ();
|
__builtin_tend ();
|
||||||
/* Note: Changing the adapt_count here might abort a transaction on a
|
/* Note: Changing the adapt_count here might abort a transaction on a
|
||||||
different cpu, but that could happen anyway when the futex is
|
different cpu, but that could happen anyway when the futex is
|
||||||
acquired, so there's no need to check the nesting depth here. */
|
acquired, so there's no need to check the nesting depth here.
|
||||||
|
See above for why relaxed MO is sufficient. */
|
||||||
if (aconf.skip_lock_busy > 0)
|
if (aconf.skip_lock_busy > 0)
|
||||||
*adapt_count = aconf.skip_lock_busy;
|
atomic_store_relaxed (adapt_count, aconf.skip_lock_busy);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -87,7 +90,8 @@ __lll_trylock_elision (int *futex, short *adapt_count)
|
||||||
{
|
{
|
||||||
/* Lost updates are possible, but harmless. Due to races this might lead
|
/* Lost updates are possible, but harmless. Due to races this might lead
|
||||||
to *adapt_count becoming less than zero. */
|
to *adapt_count becoming less than zero. */
|
||||||
(*adapt_count)--;
|
atomic_store_relaxed (adapt_count,
|
||||||
|
atomic_load_relaxed (adapt_count) - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
return lll_trylock (*futex);
|
return lll_trylock (*futex);
|
||||||
|
|
Loading…
Reference in New Issue