S390: Optimize lock-elision by decrementing adapt_count at unlock.

This patch decrements the adapt_count while unlocking the futex
instead of before aquiring the futex as it is done on power, too.
Furthermore a transaction is only started if the futex is currently free.
This check is done after starting the transaction, too.
If the futex is not free and the transaction nesting depth is one,
we can simply end the started transaction instead of aborting it.
The implementation of this check was faulty as it always ended the
started transaction.  By using the fallback path, the the outermost
transaction was aborted.  Now the outermost transaction is aborted
directly.

This patch also adds some commentary and aligns the code in
elision-trylock.c to the code in elision-lock.c as possible.

ChangeLog:

	* sysdeps/unix/sysv/linux/s390/lowlevellock.h
	(__lll_unlock_elision, lll_unlock_elision): Add adapt_count argument.
	* sysdeps/unix/sysv/linux/s390/elision-lock.c:
	(__lll_lock_elision): Decrement adapt_count while unlocking
	instead of before locking.
	* sysdeps/unix/sysv/linux/s390/elision-trylock.c
	(__lll_trylock_elision): Likewise.
	* sysdeps/unix/sysv/linux/s390/elision-unlock.c:
	(__lll_unlock_elision): Likewise.
This commit is contained in:
Stefan Liebler 2016-12-20 15:12:48 +01:00
parent 53c5c3d5ac
commit dd037fb3df
5 changed files with 90 additions and 54 deletions

View File

@ -1,3 +1,15 @@
2016-12-20 Stefan Liebler <stli@linux.vnet.ibm.com>
* sysdeps/unix/sysv/linux/s390/lowlevellock.h
(__lll_unlock_elision, lll_unlock_elision): Add adapt_count argument.
* sysdeps/unix/sysv/linux/s390/elision-lock.c:
(__lll_lock_elision): Decrement adapt_count while unlocking
instead of before locking.
* sysdeps/unix/sysv/linux/s390/elision-trylock.c
(__lll_trylock_elision): Likewise.
* sysdeps/unix/sysv/linux/s390/elision-unlock.c:
(__lll_unlock_elision): Likewise.
2016-12-20 Stefan Liebler <stli@linux.vnet.ibm.com>
* sysdeps/unix/sysv/linux/s390/htm.h(__libc_tbegin_retry): New macro.

View File

@ -50,31 +50,30 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
critical section uses lock elision) and outside of transactions. Thus,
we need to use atomic accesses to avoid data races. However, the
value of adapt_count is just a hint, so relaxed MO accesses are
sufficient. */
if (atomic_load_relaxed (adapt_count) > 0)
{
/* Lost updates are possible, but harmless. Due to races this might lead
to *adapt_count becoming less than zero. */
atomic_store_relaxed (adapt_count,
atomic_load_relaxed (adapt_count) - 1);
goto use_lock;
}
if (aconf.try_tbegin > 0)
sufficient.
Do not begin a transaction if another cpu has locked the
futex with normal locking. If adapt_count is zero, it remains and the
next pthread_mutex_lock call will try to start a transaction again. */
if (atomic_load_relaxed (futex) == 0
&& atomic_load_relaxed (adapt_count) <= 0 && aconf.try_tbegin > 0)
{
int status = __libc_tbegin_retry ((void *) 0, aconf.try_tbegin - 1);
if (__builtin_expect (status == _HTM_TBEGIN_STARTED,
_HTM_TBEGIN_STARTED))
{
if (__builtin_expect (*futex == 0, 1))
/* Check the futex to make sure nobody has touched it in the
mean time. This forces the futex into the cache and makes
sure the transaction aborts if some other cpu uses the
lock (writes the futex). */
if (__builtin_expect (atomic_load_relaxed (futex) == 0, 1))
/* Lock was free. Return to user code in a transaction. */
return 0;
/* Lock was busy. Fall back to normal locking. */
if (__builtin_expect (__libc_tx_nesting_depth (), 1))
if (__builtin_expect (__libc_tx_nesting_depth () <= 1, 1))
{
/* In a non-nested transaction there is no need to abort,
which is expensive. */
which is expensive. Simply end the started transaction. */
__libc_tend ();
/* Don't try to use transactions for the next couple of times.
See above for why relaxed MO is sufficient. */
@ -92,9 +91,9 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
is zero.
The adapt_count of this inner mutex is not changed,
because using the default lock with the inner mutex
would abort the outer transaction.
*/
would abort the outer transaction. */
__libc_tabort (_HTM_FIRST_USER_ABORT_CODE | 1);
__builtin_unreachable ();
}
}
else if (status != _HTM_TBEGIN_TRANSIENT)
@ -110,15 +109,15 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
}
else
{
/* Same logic as above, but for for a number of temporary failures in
a row. */
/* The transaction failed for some retries with
_HTM_TBEGIN_TRANSIENT. Use the normal locking now and for the
next couple of calls. */
if (aconf.skip_lock_out_of_tbegin_retries > 0)
atomic_store_relaxed (adapt_count,
aconf.skip_lock_out_of_tbegin_retries);
}
}
use_lock:
/* Use normal locking as fallback path if transaction does not succeed. */
return LLL_LOCK ((*futex), private);
}

View File

@ -43,23 +43,36 @@ __lll_trylock_elision (int *futex, short *adapt_count)
until their try_tbegin is zero.
*/
__libc_tabort (_HTM_FIRST_USER_ABORT_CODE | 1);
__builtin_unreachable ();
}
/* Only try a transaction if it's worth it. See __lll_lock_elision for
why we need atomic accesses. Relaxed MO is sufficient because this is
just a hint. */
if (atomic_load_relaxed (adapt_count) <= 0)
/* adapt_count can be accessed concurrently; these accesses can be both
inside of transactions (if critical sections are nested and the outer
critical section uses lock elision) and outside of transactions. Thus,
we need to use atomic accesses to avoid data races. However, the
value of adapt_count is just a hint, so relaxed MO accesses are
sufficient.
Do not begin a transaction if another cpu has locked the
futex with normal locking. If adapt_count is zero, it remains and the
next pthread_mutex_lock call will try to start a transaction again. */
if (atomic_load_relaxed (futex) == 0
&& atomic_load_relaxed (adapt_count) <= 0 && aconf.try_tbegin > 0)
{
int status;
if (__builtin_expect
((status = __libc_tbegin ((void *) 0)) == _HTM_TBEGIN_STARTED, 1))
int status = __libc_tbegin ((void *) 0);
if (__builtin_expect (status == _HTM_TBEGIN_STARTED,
_HTM_TBEGIN_STARTED))
{
if (*futex == 0)
/* Check the futex to make sure nobody has touched it in the
mean time. This forces the futex into the cache and makes
sure the transaction aborts if some other cpu uses the
lock (writes the futex). */
if (__builtin_expect (atomic_load_relaxed (futex) == 0, 1))
/* Lock was free. Return to user code in a transaction. */
return 0;
/* Lock was busy. Fall back to normal locking. */
/* Since we are in a non-nested transaction there is no need to abort,
which is expensive. */
/* Lock was busy. Fall back to normal locking. Since we are in
a non-nested transaction there is no need to abort, which is
expensive. Simply end the started transaction. */
__libc_tend ();
/* Note: Changing the adapt_count here might abort a transaction on a
different cpu, but that could happen anyway when the futex is
@ -68,27 +81,18 @@ __lll_trylock_elision (int *futex, short *adapt_count)
if (aconf.skip_lock_busy > 0)
atomic_store_relaxed (adapt_count, aconf.skip_lock_busy);
}
else
else if (status != _HTM_TBEGIN_TRANSIENT)
{
if (status != _HTM_TBEGIN_TRANSIENT)
{
/* A persistent abort (cc 1 or 3) indicates that a retry is
probably futile. Use the normal locking now and for the
next couple of calls.
Be careful to avoid writing to the lock. */
if (aconf.skip_trylock_internal_abort > 0)
*adapt_count = aconf.skip_trylock_internal_abort;
}
/* A persistent abort (cc 1 or 3) indicates that a retry is
probably futile. Use the normal locking now and for the
next couple of calls.
Be careful to avoid writing to the lock. */
if (aconf.skip_trylock_internal_abort > 0)
*adapt_count = aconf.skip_trylock_internal_abort;
}
/* Could do some retries here. */
}
else
{
/* Lost updates are possible, but harmless. Due to races this might lead
to *adapt_count becoming less than zero. */
atomic_store_relaxed (adapt_count,
atomic_load_relaxed (adapt_count) - 1);
}
/* Use normal locking as fallback path if transaction does not succeed. */
return lll_trylock (*futex);
}

View File

@ -21,16 +21,37 @@
#include <htm.h>
int
__lll_unlock_elision(int *futex, int private)
__lll_unlock_elision(int *futex, short *adapt_count, int private)
{
/* If the lock is free, we elided the lock earlier. This does not
necessarily mean that we are in a transaction, because the user code may
have closed the transaction, but that is impossible to detect reliably. */
if (*futex == 0)
have closed the transaction, but that is impossible to detect reliably.
Relaxed MO access to futex is sufficient as we only need a hint, if we
started a transaction or acquired the futex in e.g. elision-lock.c. */
if (atomic_load_relaxed (futex) == 0)
{
__libc_tend ();
}
else
lll_unlock ((*futex), private);
{
/* Update the adapt_count while unlocking before completing the critical
section. adapt_count is accessed concurrently outside of a
transaction or an aquired lock e.g. in elision-lock.c so we need to use
atomic accesses. However, the value of adapt_count is just a hint, so
relaxed MO accesses are sufficient.
If adapt_count would be decremented while locking, multiple
CPUs trying to lock the locked mutex will decrement adapt_count to
zero and another CPU will try to start a transaction, which will be
immediately aborted as the mutex is locked.
If adapt_count would be decremented while unlocking after completing
the critical section, possible waiters will be waked up before
decrementing the adapt_count. Those waked up waiters could have
destroyed and freed this mutex! */
short adapt_count_val = atomic_load_relaxed (adapt_count);
if (adapt_count_val > 0)
atomic_store_relaxed (adapt_count, adapt_count_val - 1);
lll_unlock ((*futex), private);
}
return 0;
}

View File

@ -33,7 +33,7 @@ extern int __lll_timedlock_elision
extern int __lll_lock_elision (int *futex, short *adapt_count, int private)
attribute_hidden;
extern int __lll_unlock_elision(int *futex, int private)
extern int __lll_unlock_elision(int *futex, short *adapt_count, int private)
attribute_hidden;
extern int __lll_trylock_elision(int *futex, short *adapt_count)
@ -42,7 +42,7 @@ extern int __lll_trylock_elision(int *futex, short *adapt_count)
# define lll_lock_elision(futex, adapt_count, private) \
__lll_lock_elision (&(futex), &(adapt_count), private)
# define lll_unlock_elision(futex, adapt_count, private) \
__lll_unlock_elision (&(futex), private)
__lll_unlock_elision (&(futex), &(adapt_count), private)
# define lll_trylock_elision(futex, adapt_count) \
__lll_trylock_elision(&(futex), &(adapt_count))
# endif /* ENABLE_LOCK_ELISION */