Improve fesetenv performance by avoiding unnecessary FPSR/FPCR reads/writes.

It uses the same logic as the ARM version. The common case removes 1 FPSR
and 1 FPCR read. For FE_DFL_ENV and FE_NOMASK_ENV a FPCR read is avoided in
case the FPCR does not change.
This commit is contained in:
Wilco Dijkstra 2015-08-05 14:57:37 +01:00
parent 782723d6d8
commit 3136eb7abd
2 changed files with 28 additions and 17 deletions

View File

@ -1,3 +1,8 @@
2015-08-05 Wilco Dijkstra <wdijkstr@arm.com>
* sysdeps/aarch64/fpu/fesetenv.c (fesetenv):
Optimize to reduce FPCR/FPSR accesses.
2015-08-05 H.J. Lu <hongjiu.lu@intel.com>
* locale/loadarchive.c (_nl_archive_subfreeres): Also check

View File

@ -29,8 +29,20 @@ __fesetenv (const fenv_t *envp)
fpu_fpsr_t fpsr_new;
_FPU_GETCW (fpcr);
_FPU_GETFPSR (fpsr);
if ((envp != FE_DFL_ENV) && (envp != FE_NOMASK_ENV))
{
/* The new FPCR/FPSR are valid, so don't merge the reserved flags. */
fpcr_new = envp->__fpcr;
if (fpcr != fpcr_new)
_FPU_SETCW (fpcr_new);
_FPU_SETFPSR (envp->__fpsr);
return 0;
}
_FPU_GETFPSR (fpsr);
fpcr_new = fpcr & _FPU_RESERVED;
fpsr_new = fpsr & _FPU_FPSR_RESERVED;
@ -39,31 +51,25 @@ __fesetenv (const fenv_t *envp)
fpcr_new |= _FPU_DEFAULT;
fpsr_new |= _FPU_FPSR_DEFAULT;
}
else if (envp == FE_NOMASK_ENV)
else
{
fpcr_new |= _FPU_FPCR_IEEE;
fpsr_new |= _FPU_FPSR_IEEE;
}
else
{
fpcr_new |= envp->__fpcr & ~_FPU_RESERVED;
fpsr_new |= envp->__fpsr & ~_FPU_FPSR_RESERVED;
}
if (fpsr != fpsr_new)
_FPU_SETFPSR (fpsr_new);
if (fpcr != fpcr_new)
{
_FPU_SETCW (fpcr_new);
/* Trapping exceptions are optional in AArch64 the relevant enable
bits in FPCR are RES0 hence the absence of support can be
detected by reading back the FPCR and comparing with the required
value. */
/* Trapping exceptions are optional in AArch64; the relevant enable
bits in FPCR are RES0 hence the absence of support can be detected
by reading back the FPCR and comparing with the required value. */
_FPU_GETCW (updated_fpcr);
if ((updated_fpcr & fpcr_new) != fpcr_new)
return 1;
return fpcr_new & ~updated_fpcr;
}
return 0;
}