powerpc: Optimize lock elision for pthread_mutex_t

With TLE enabled, the adapt count variable update incurs
an 8% overhead before entering the critical section of an
elided mutex.

Instead, if it is done right after leaving the critical
section, this serialization can be avoided.

This alters the existing behavior of __lll_trylock_elision
as it will only decrement the adapt_count if it successfully
acquires the lock.

	* sysdeps/unix/sysv/linux/powerpc/elision-lock.c
	(__lll_lock_elision): Remove adapt_count decrement...
	* sysdeps/unix/sysv/linux/powerpc/elision-trylock.c
	(__lll_trylock_elision): Likewise.
	* sysdeps/unix/sysv/linux/powerpc/elision-unlock.c
	(__lll_unlock_elision): ... to here. And utilize
	new adapt_count parameter.
	* sysdeps/unix/sysv/linux/powerpc/lowlevellock.h
	(__lll_unlock_elision): Update to include adapt_count
	parameter.
	(lll_unlock_elision): Pass pointer to adapt_count
	variable.
This commit is contained in:
Paul Murphy 2015-09-03 13:40:21 -05:00 committed by Tulio Magno Quites Machado Filho
parent e5e6bea22a
commit fadd2ad9cc
5 changed files with 27 additions and 6 deletions

View File

@ -1,3 +1,18 @@
2015-10-15 Paul E. Murphy <murphyp@linux.vnet.ibm.com>
* sysdeps/unix/sysv/linux/powerpc/elision-lock.c
(__lll_lock_elision): Remove adapt_count decrement...
* sysdeps/unix/sysv/linux/powerpc/elision-trylock.c
(__lll_trylock_elision): Likewise.
* sysdeps/unix/sysv/linux/powerpc/elision-unlock.c
(__lll_unlock_elision): ... to here. And utilize
new adapt_count parameter.
* sysdeps/unix/sysv/linux/powerpc/lowlevellock.h
(__lll_unlock_elision): Update to include adapt_count
parameter.
(lll_unlock_elision): Pass pointer to adapt_count
variable.
2015-10-15 Paul E. Murphy <murphyp@linux.vnet.ibm.com>
* nptl/pthread_mutex_unlock.c (lll_unlock_elision):

View File

@ -47,7 +47,6 @@ __lll_lock_elision (int *lock, short *adapt_count, EXTRAARG int pshared)
{
if (*adapt_count > 0)
{
(*adapt_count)--;
goto use_lock;
}

View File

@ -36,7 +36,6 @@ __lll_trylock_elision (int *futex, short *adapt_count)
/* Only try a transaction if it's worth it. */
if (*adapt_count > 0)
{
(*adapt_count)--;
goto use_lock;
}

View File

@ -21,12 +21,20 @@
#include "htm.h"
int
__lll_unlock_elision(int *lock, int pshared)
__lll_unlock_elision (int *lock, short *adapt_count, int pshared)
{
/* When the lock was free we're in a transaction. */
if (*lock == 0)
__builtin_tend (0);
else
lll_unlock ((*lock), pshared);
{
lll_unlock ((*lock), pshared);
/* Update the adapt count AFTER completing the critical section.
Doing this here prevents unneeded stalling when entering
a critical section. Saving about 8% runtime on P8. */
if (*adapt_count > 0)
(*adapt_count)--;
}
return 0;
}

View File

@ -32,7 +32,7 @@ extern int __lll_timedlock_elision
extern int __lll_lock_elision (int *futex, short *adapt_count, int private)
attribute_hidden;
extern int __lll_unlock_elision(int *lock, int private)
extern int __lll_unlock_elision (int *lock, short *adapt_count, int private)
attribute_hidden;
extern int __lll_trylock_elision(int *lock, short *adapt_count)
@ -41,7 +41,7 @@ extern int __lll_trylock_elision(int *lock, short *adapt_count)
#define lll_lock_elision(futex, adapt_count, private) \
__lll_lock_elision (&(futex), &(adapt_count), private)
#define lll_unlock_elision(futex, adapt_count, private) \
__lll_unlock_elision (&(futex), private)
__lll_unlock_elision (&(futex), &(adapt_count), private)
#define lll_trylock_elision(futex, adapt_count) \
__lll_trylock_elision (&(futex), &(adapt_count))