diff --git a/ChangeLog b/ChangeLog index cc21db7a77..e51403a010 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +2016-12-20 Stefan Liebler + + * sysdeps/unix/sysv/linux/s390/Makefile (elision-CFLAGS): + Add -msoft-float. + * sysdeps/unix/sysv/linux/s390/htm.h: New File. + * sysdeps/unix/sysv/linux/s390/elision-lock.c: + Use __libc_t* transaction macros instead of __builtin_t*. + * sysdeps/unix/sysv/linux/s390/elision-trylock.c: Likewise. + * sysdeps/unix/sysv/linux/s390/elision-unlock.c: Likewise. + 2016-12-20 Stefan Liebler * sysdeps/unix/sysv/linux/s390/elision-lock.c diff --git a/sysdeps/unix/sysv/linux/s390/Makefile b/sysdeps/unix/sysv/linux/s390/Makefile index f8ed013e9e..3867c33d91 100644 --- a/sysdeps/unix/sysv/linux/s390/Makefile +++ b/sysdeps/unix/sysv/linux/s390/Makefile @@ -22,7 +22,7 @@ ifeq ($(enable-lock-elision),yes) libpthread-sysdep_routines += elision-lock elision-unlock elision-timed \ elision-trylock -elision-CFLAGS = -mhtm +elision-CFLAGS = -mhtm -msoft-float CFLAGS-elision-lock.c = $(elision-CFLAGS) CFLAGS-elision-timed.c = $(elision-CFLAGS) CFLAGS-elision-trylock.c = $(elision-CFLAGS) diff --git a/sysdeps/unix/sysv/linux/s390/elision-lock.c b/sysdeps/unix/sysv/linux/s390/elision-lock.c index 1876d2128d..48cc3db2aa 100644 --- a/sysdeps/unix/sysv/linux/s390/elision-lock.c +++ b/sysdeps/unix/sysv/linux/s390/elision-lock.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include @@ -60,27 +60,23 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private) goto use_lock; } - __asm__ volatile (".machinemode \"zarch_nohighgprs\"\n\t" - ".machine \"all\"" - : : : "memory"); - int try_tbegin; for (try_tbegin = aconf.try_tbegin; try_tbegin > 0; try_tbegin--) { - unsigned status; + int status; if (__builtin_expect - ((status = __builtin_tbegin((void *)0)) == _HTM_TBEGIN_STARTED, 1)) + ((status = __libc_tbegin ((void *) 0)) == _HTM_TBEGIN_STARTED, 1)) { if (*futex == 0) return 0; /* Lock was busy. Fall back to normal locking. */ - if (__builtin_expect (__builtin_tx_nesting_depth (), 1)) + if (__builtin_expect (__libc_tx_nesting_depth (), 1)) { /* In a non-nested transaction there is no need to abort, which is expensive. */ - __builtin_tend (); + __libc_tend (); /* Don't try to use transactions for the next couple of times. See above for why relaxed MO is sufficient. */ if (aconf.skip_lock_busy > 0) @@ -100,7 +96,7 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private) because using the default lock with the inner mutex would abort the outer transaction. */ - __builtin_tabort (_HTM_FIRST_USER_ABORT_CODE | 1); + __libc_tabort (_HTM_FIRST_USER_ABORT_CODE | 1); } } else diff --git a/sysdeps/unix/sysv/linux/s390/elision-trylock.c b/sysdeps/unix/sysv/linux/s390/elision-trylock.c index a3252b83ce..e21fc26253 100644 --- a/sysdeps/unix/sysv/linux/s390/elision-trylock.c +++ b/sysdeps/unix/sysv/linux/s390/elision-trylock.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #define aconf __elision_aconf @@ -30,15 +30,11 @@ int __lll_trylock_elision (int *futex, short *adapt_count) { - __asm__ __volatile__ (".machinemode \"zarch_nohighgprs\"\n\t" - ".machine \"all\"" - : : : "memory"); - /* Implement POSIX semantics by forbiding nesting elided trylocks. Sorry. After the abort the code is re-executed non transactional and if the lock was already locked return an error. */ - if (__builtin_tx_nesting_depth () > 0) + if (__libc_tx_nesting_depth () > 0) { /* Note that this abort may terminate an outermost transaction that was created outside glibc. @@ -46,7 +42,7 @@ __lll_trylock_elision (int *futex, short *adapt_count) them to use the default lock instead of retrying transactions until their try_tbegin is zero. */ - __builtin_tabort (_HTM_FIRST_USER_ABORT_CODE | 1); + __libc_tabort (_HTM_FIRST_USER_ABORT_CODE | 1); } /* Only try a transaction if it's worth it. See __lll_lock_elision for @@ -54,17 +50,17 @@ __lll_trylock_elision (int *futex, short *adapt_count) just a hint. */ if (atomic_load_relaxed (adapt_count) <= 0) { - unsigned status; + int status; if (__builtin_expect - ((status = __builtin_tbegin ((void *)0)) == _HTM_TBEGIN_STARTED, 1)) + ((status = __libc_tbegin ((void *) 0)) == _HTM_TBEGIN_STARTED, 1)) { if (*futex == 0) return 0; /* Lock was busy. Fall back to normal locking. */ /* Since we are in a non-nested transaction there is no need to abort, which is expensive. */ - __builtin_tend (); + __libc_tend (); /* Note: Changing the adapt_count here might abort a transaction on a different cpu, but that could happen anyway when the futex is acquired, so there's no need to check the nesting depth here. diff --git a/sysdeps/unix/sysv/linux/s390/elision-unlock.c b/sysdeps/unix/sysv/linux/s390/elision-unlock.c index 483abe15ff..0b1ade9e5f 100644 --- a/sysdeps/unix/sysv/linux/s390/elision-unlock.c +++ b/sysdeps/unix/sysv/linux/s390/elision-unlock.c @@ -18,6 +18,7 @@ #include #include +#include int __lll_unlock_elision(int *futex, int private) @@ -27,10 +28,7 @@ __lll_unlock_elision(int *futex, int private) have closed the transaction, but that is impossible to detect reliably. */ if (*futex == 0) { - __asm__ volatile (".machinemode \"zarch_nohighgprs\"\n\t" - ".machine \"all\"" - : : : "memory"); - __builtin_tend(); + __libc_tend (); } else lll_unlock ((*futex), private); diff --git a/sysdeps/unix/sysv/linux/s390/htm.h b/sysdeps/unix/sysv/linux/s390/htm.h new file mode 100644 index 0000000000..6b4e8f4634 --- /dev/null +++ b/sysdeps/unix/sysv/linux/s390/htm.h @@ -0,0 +1,149 @@ +/* Shared HTM header. Work around false transactional execution facility + intrinsics. + + Copyright (C) 2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef _HTM_H +#define _HTM_H 1 + +#include + +#ifdef __s390x__ +# define TX_FPRS_BYTES 64 +# define TX_SAVE_FPRS \ + " std %%f8, 0(%[R_FPRS])\n\t" \ + " std %%f9, 8(%[R_FPRS])\n\t" \ + " std %%f10, 16(%[R_FPRS])\n\t" \ + " std %%f11, 24(%[R_FPRS])\n\t" \ + " std %%f12, 32(%[R_FPRS])\n\t" \ + " std %%f13, 40(%[R_FPRS])\n\t" \ + " std %%f14, 48(%[R_FPRS])\n\t" \ + " std %%f15, 56(%[R_FPRS])\n\t" + +# define TX_RESTORE_FPRS \ + " ld %%f8, 0(%[R_FPRS])\n\t" \ + " ld %%f9, 8(%[R_FPRS])\n\t" \ + " ld %%f10, 16(%[R_FPRS])\n\t" \ + " ld %%f11, 24(%[R_FPRS])\n\t" \ + " ld %%f12, 32(%[R_FPRS])\n\t" \ + " ld %%f13, 40(%[R_FPRS])\n\t" \ + " ld %%f14, 48(%[R_FPRS])\n\t" \ + " ld %%f15, 56(%[R_FPRS])\n\t" + +#else + +# define TX_FPRS_BYTES 16 +# define TX_SAVE_FPRS \ + " std %%f4, 0(%[R_FPRS])\n\t" \ + " std %%f6, 8(%[R_FPRS])\n\t" + +# define TX_RESTORE_FPRS \ + " ld %%f4, 0(%[R_FPRS])\n\t" \ + " ld %%f6, 8(%[R_FPRS])\n\t" + +#endif /* ! __s390x__ */ + +/* Use own inline assembly instead of __builtin_tbegin, as tbegin + has to filter program interruptions which can't be done with the builtin. + Now the fprs have to be saved / restored here, too. + The fpc is also not saved / restored with the builtin. + The used inline assembly does not clobber the volatile fprs / vrs! + Clobbering the latter ones would force the compiler to save / restore + the call saved fprs as those overlap with the vrs, but they only need to be + restored if the transaction fails but not if the transaction is successfully + started. Thus the user of the tbegin macros in this header file has to + compile the file / function with -msoft-float. It prevents gcc from using + fprs / vrs. */ +#define __libc_tbegin(tdb) \ + ({ int __ret; \ + int __fpc; \ + char __fprs[TX_FPRS_BYTES]; \ + __asm__ __volatile__ (".machine push\n\t" \ + ".machinemode \"zarch_nohighgprs\"\n\t" \ + ".machine \"all\"\n\t" \ + /* Save state at the outermost transaction. \ + As extracting nesting depth is expensive \ + on at least zEC12, save fprs at inner \ + transactions, too. \ + The fpc and fprs are saved here as they \ + are not saved by tbegin. There exist no \ + call-saved vrs, thus they are not saved \ + here. */ \ + " efpc %[R_FPC]\n\t" \ + TX_SAVE_FPRS \ + /* Begin transaction: save all gprs, allow \ + ar modification and fp operations. Some \ + program-interruptions (e.g. a null \ + pointer access) are filtered and the \ + trancsaction will abort. In this case \ + the normal lock path will execute it \ + again and result in a core dump wich does \ + now show at tbegin but the real executed \ + instruction. */ \ + " tbegin 0, 0xFF0E\n\t" \ + /* Branch away in abort case (this is the \ + prefered sequence. See PoP in chapter 5 \ + Transactional-Execution Facility \ + Operation). */ \ + " jnz 0f\n\t" \ + /* Transaction has successfully started. */ \ + " lhi %[R_RET], 0\n\t" \ + " j 1f\n\t" \ + /* Transaction has aborted. Now we are at \ + the outermost transaction. Restore fprs \ + and fpc. */ \ + "0: ipm %[R_RET]\n\t" \ + " srl %[R_RET], 28\n\t" \ + " sfpc %[R_FPC]\n\t" \ + TX_RESTORE_FPRS \ + "1:\n\t" \ + ".machine pop\n" \ + : [R_RET] "=&d" (__ret), \ + [R_FPC] "=&d" (__fpc) \ + : [R_FPRS] "a" (__fprs) \ + : "cc", "memory"); \ + __ret; \ + }) + +/* These builtins are correct. Use them. */ +#define __libc_tend() \ + ({ __asm__ __volatile__ (".machine push\n\t" \ + ".machinemode \"zarch_nohighgprs\"\n\t" \ + ".machine \"all\"\n\t"); \ + int __ret = __builtin_tend (); \ + __asm__ __volatile__ (".machine pop"); \ + __ret; \ + }) + +#define __libc_tabort(abortcode) \ + __asm__ __volatile__ (".machine push\n\t" \ + ".machinemode \"zarch_nohighgprs\"\n\t" \ + ".machine \"all\"\n\t"); \ + __builtin_tabort (abortcode); \ + __asm__ __volatile__ (".machine pop") + +#define __libc_tx_nesting_depth() \ + ({ __asm__ __volatile__ (".machine push\n\t" \ + ".machinemode \"zarch_nohighgprs\"\n\t" \ + ".machine \"all\"\n\t"); \ + int __ret = __builtin_tx_nesting_depth (); \ + __asm__ __volatile__ (".machine pop"); \ + __ret; \ + }) + +#endif