diff --git a/ChangeLog b/ChangeLog index f42d563fb8..66ac05a36b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,52 @@ +2013-06-12 Siddhesh Poyarekar + + * include/fenv.h: Include stdbool.h. + (struct rm_ctx): New structure. + * sysdeps/generic/math_private.h (SET_RESTORE_ROUND_GENERIC): + Define macro. + (SET_RESTORE_ROUND): Define using SET_RESTORE_ROUND_GENERIC. + (SET_RESTORE_ROUNDF): Likewise. + (SET_RESTORE_ROUNDL): Likewise. + (SET_RESTORE_ROUND_NOEX): Likewise. + (SET_RESTORE_ROUND_NOEXF): Likewise. + (SET_RESTORE_ROUND_NOEXL): Likewise. + (SET_RESTORE_ROUND_53BIT): Likewise. + [HAVE_RM_CTX] (libc_feresetround_noex_ctx): Define macro. + (libc_feresetround_noexf_ctx): Likewise. + (libc_feresetround_noexl_ctx): Likewise. + (libc_feholdsetround_53bit_ctx): Likewise. + (libc_feresetround_53bit_ctx): Likewise. + * sysdeps/i386/fpu/fenv_private.h (HAVE_RM_CTX): Define macro. + (libc_feholdexcept_setround_sse_ctx): New function. + (libc_fesetenv_sse_ctx): Likewise. + (libc_feupdateenv_sse_ctx): Likewise. + (libc_feholdexcept_setround_387_prec_ctx): Likewise. + (libc_feholdexcept_setround_387_ctx): Likewise. + (libc_feholdexcept_setround_387_53bit_ctx): Likewise. + (libc_feholdsetround_387_prec_ctx): Likewise. + (libc_feholdsetround_387_ctx): Likewise. + (libc_feholdsetround_387_53bit_ctx): Likewise. + (libc_feholdsetround_sse_ctx): Likewise. + (libc_feresetround_sse_ctx): Likewise. + (libc_feresetround_387_ctx): Likewise. + (libc_feupdateenv_387_ctx): Likewise. + (libc_feholdexcept_setroundf_ctx): Define macro. + (libc_fesetenvf_ctx): Likewise. + (libc_feupdateenvf_ctx): Likewise. + (libc_feholdsetroundf_ctx): Likewise. + (libc_feresetroundf_ctx): Likewise. + (libc_feholdexcept_setround_ctx): Likewise. + (libc_fesetenv_ctx): Likewise. + (libc_feupdateenv_ctx): Likewise. + (libc_feholdsetround_ctx): Likewise. + (libc_feresetround_ctx): Likewise. + (libc_feholdexcept_setroundl_ctx): Likewise. + (libc_feupdateenvl_ctx): Likewise. + (libc_feholdsetroundl_ctx): Likewise. + (libc_feresetroundl_ctx): Likewise. + [!__SSE2_MATH__] (libc_feholdsetround_53bit_ctx): Likewise. + (libc_feresetround_53bit_ctx): Likewise. + 2013-06-11 Siddhesh Poyarekar * locale/iso-639.def: Convert to UTF-8. diff --git a/include/fenv.h b/include/fenv.h index ed6d1394ba..9f90d17090 100644 --- a/include/fenv.h +++ b/include/fenv.h @@ -1,5 +1,6 @@ #ifndef _FENV_H #include +#include #ifndef _ISOMAC /* Now define the internal interfaces. */ @@ -23,4 +24,13 @@ libm_hidden_proto (fetestexcept) libm_hidden_proto (feclearexcept) #endif +/* Rounding mode context. This allows functions to set/restore rounding mode + only when the desired rounding mode is different from the current rounding + mode. */ +struct rm_ctx +{ + fenv_t env; + bool updated_status; +}; + #endif diff --git a/sysdeps/generic/math_private.h b/sysdeps/generic/math_private.h index e98360dd47..c0fc03d38d 100644 --- a/sysdeps/generic/math_private.h +++ b/sysdeps/generic/math_private.h @@ -553,35 +553,62 @@ default_libc_feupdateenv_test (fenv_t *e, int ex) # define libc_feresetround_noexl libc_fesetenvl #endif +#if HAVE_RM_CTX +/* Set/Restore Rounding Modes only when necessary. If defined, these functions + set/restore floating point state only if the state needed within the lexical + block is different from the current state. This saves a lot of time when + the floating point unit is much slower than the fixed point units. */ + +# ifndef libc_feresetround_noex_ctx +# define libc_feresetround_noex_ctx libc_fesetenv_ctx +# endif +# ifndef libc_feresetround_noexf_ctx +# define libc_feresetround_noexf_ctx libc_fesetenvf_ctx +# endif +# ifndef libc_feresetround_noexl_ctx +# define libc_feresetround_noexl_ctx libc_fesetenvl_ctx +# endif + +# ifndef libc_feholdsetround_53bit_ctx +# define libc_feholdsetround_53bit_ctx libc_feholdsetround_ctx +# endif + +# ifndef libc_feresetround_53bit_ctx +# define libc_feresetround_53bit_ctx libc_feresetround_ctx +# endif + +# define SET_RESTORE_ROUND_GENERIC(RM,ROUNDFUNC,CLEANUPFUNC) \ + struct rm_ctx ctx __attribute__((cleanup(CLEANUPFUNC ## _ctx))); \ + ROUNDFUNC ## _ctx (&ctx, (RM)) +#else +# define SET_RESTORE_ROUND_GENERIC(RM, ROUNDFUNC, CLEANUPFUNC) \ + fenv_t __libc_save_rm __attribute__((cleanup(CLEANUPFUNC))); \ + ROUNDFUNC (&__libc_save_rm, (RM)) +#endif + /* Save and restore the rounding mode within a lexical block. */ #define SET_RESTORE_ROUND(RM) \ - fenv_t __libc_save_rm __attribute__((cleanup(libc_feresetround))); \ - libc_feholdsetround (&__libc_save_rm, (RM)) + SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround, libc_feresetround) #define SET_RESTORE_ROUNDF(RM) \ - fenv_t __libc_save_rm __attribute__((cleanup(libc_feresetroundf))); \ - libc_feholdsetroundf (&__libc_save_rm, (RM)) + SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetroundf, libc_feresetroundf) #define SET_RESTORE_ROUNDL(RM) \ - fenv_t __libc_save_rm __attribute__((cleanup(libc_feresetroundl))); \ - libc_feholdsetroundl (&__libc_save_rm, (RM)) + SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetroundl, libc_feresetroundl) /* Save and restore the rounding mode within a lexical block, and also the set of exceptions raised within the block may be discarded. */ #define SET_RESTORE_ROUND_NOEX(RM) \ - fenv_t __libc_save_rm __attribute__((cleanup(libc_feresetround_noex))); \ - libc_feholdsetround (&__libc_save_rm, (RM)) + SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround, libc_feresetround_noex) #define SET_RESTORE_ROUND_NOEXF(RM) \ - fenv_t __libc_save_rm __attribute__((cleanup(libc_feresetround_noexf))); \ - libc_feholdsetroundf (&__libc_save_rm, (RM)) + SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetroundf, libc_feresetround_noexf) #define SET_RESTORE_ROUND_NOEXL(RM) \ - fenv_t __libc_save_rm __attribute__((cleanup(libc_feresetround_noexl))); \ - libc_feholdsetroundl (&__libc_save_rm, (RM)) + SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetroundl, libc_feresetround_noexl) /* Like SET_RESTORE_ROUND, but also set rounding precision to 53 bits. */ #define SET_RESTORE_ROUND_53BIT(RM) \ - fenv_t __libc_save_rm __attribute__((cleanup(libc_feresetround_53bit))); \ - libc_feholdsetround_53bit (&__libc_save_rm, (RM)) + SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_53bit, \ + libc_feresetround_53bit) #define __nan(str) \ (__builtin_constant_p (str) && str[0] == '\0' ? NAN : __nan (str)) diff --git a/sysdeps/i386/fpu/fenv_private.h b/sysdeps/i386/fpu/fenv_private.h index 1f8336cf96..3998387c57 100644 --- a/sysdeps/i386/fpu/fenv_private.h +++ b/sysdeps/i386/fpu/fenv_private.h @@ -322,6 +322,179 @@ libc_feresetround_387 (fenv_t *e) # define libc_feholdsetround_53bit libc_feholdsetround_387_53bit #endif +/* We have support for rounding mode context. */ +#define HAVE_RM_CTX 1 + +static __always_inline void +libc_feholdexcept_setround_sse_ctx (struct rm_ctx *ctx, int r) +{ + unsigned int mxcsr, new_mxcsr; + asm (STMXCSR " %0" : "=m" (*&mxcsr)); + new_mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | (r << 3); + + ctx->env.__mxcsr = mxcsr; + if (__glibc_unlikely (mxcsr != new_mxcsr)) + { + asm volatile (LDMXCSR " %0" : : "m" (*&new_mxcsr)); + ctx->updated_status = true; + } + else + ctx->updated_status = false; +} + +/* Unconditional since we want to overwrite any exceptions that occurred in the + context. This is also why all fehold* functions unconditionally write into + ctx->env. */ +static __always_inline void +libc_fesetenv_sse_ctx (struct rm_ctx *ctx) +{ + libc_fesetenv_sse (&ctx->env); +} + +static __always_inline void +libc_feupdateenv_sse_ctx (struct rm_ctx *ctx) +{ + if (__glibc_unlikely (ctx->updated_status)) + libc_feupdateenv_test_sse (&ctx->env, 0); +} + +static __always_inline void +libc_feholdexcept_setround_387_prec_ctx (struct rm_ctx *ctx, int r) +{ + libc_feholdexcept_387 (&ctx->env); + + fpu_control_t cw = ctx->env.__control_word; + fpu_control_t old_cw = cw; + cw &= ~(_FPU_RC_ZERO | _FPU_EXTENDED); + cw |= r | 0x3f; + + if (__glibc_unlikely (old_cw != cw)) + { + _FPU_SETCW (cw); + ctx->updated_status = true; + } + else + ctx->updated_status = false; +} + +static __always_inline void +libc_feholdexcept_setround_387_ctx (struct rm_ctx *ctx, int r) +{ + libc_feholdexcept_setround_387_prec_ctx (ctx, r | _FPU_EXTENDED); +} + +static __always_inline void +libc_feholdexcept_setround_387_53bit_ctx (struct rm_ctx *ctx, int r) +{ + libc_feholdexcept_setround_387_prec_ctx (ctx, r | _FPU_DOUBLE); +} + +static __always_inline void +libc_feholdsetround_387_prec_ctx (struct rm_ctx *ctx, int r) +{ + fpu_control_t cw, new_cw; + + _FPU_GETCW (cw); + new_cw = cw; + new_cw &= ~(_FPU_RC_ZERO | _FPU_EXTENDED); + new_cw |= r; + + ctx->env.__control_word = cw; + if (__glibc_unlikely (new_cw != cw)) + { + _FPU_SETCW (new_cw); + ctx->updated_status = true; + } + else + ctx->updated_status = false; +} + +static __always_inline void +libc_feholdsetround_387_ctx (struct rm_ctx *ctx, int r) +{ + libc_feholdsetround_387_prec_ctx (ctx, r | _FPU_EXTENDED); +} + +static __always_inline void +libc_feholdsetround_387_53bit_ctx (struct rm_ctx *ctx, int r) +{ + libc_feholdsetround_387_prec_ctx (ctx, r | _FPU_DOUBLE); +} + +static __always_inline void +libc_feholdsetround_sse_ctx (struct rm_ctx *ctx, int r) +{ + unsigned int mxcsr, new_mxcsr; + + asm (STMXCSR " %0" : "=m" (*&mxcsr)); + new_mxcsr = (mxcsr & ~0x6000) | (r << 3); + + ctx->env.__mxcsr = mxcsr; + if (__glibc_unlikely (new_mxcsr != mxcsr)) + { + asm volatile (LDMXCSR " %0" : : "m" (*&new_mxcsr)); + ctx->updated_status = true; + } + else + ctx->updated_status = false; +} + +static __always_inline void +libc_feresetround_sse_ctx (struct rm_ctx *ctx) +{ + if (__glibc_unlikely (ctx->updated_status)) + libc_feresetround_sse (&ctx->env); +} + +static __always_inline void +libc_feresetround_387_ctx (struct rm_ctx *ctx) +{ + if (__glibc_unlikely (ctx->updated_status)) + _FPU_SETCW (ctx->env.__control_word); +} + +static __always_inline void +libc_feupdateenv_387_ctx (struct rm_ctx *ctx) +{ + if (__glibc_unlikely (ctx->updated_status)) + libc_feupdateenv_test_387 (&ctx->env, 0); +} + +#ifdef __SSE_MATH__ +# define libc_feholdexcept_setroundf_ctx libc_feholdexcept_setround_sse_ctx +# define libc_fesetenvf_ctx libc_fesetenv_sse_ctx +# define libc_feupdateenvf_ctx libc_feupdateenv_sse_ctx +# define libc_feholdsetroundf_ctx libc_feholdsetround_sse_ctx +# define libc_feresetroundf_ctx libc_feresetround_sse_ctx +#else +# define libc_feholdexcept_setroundf_ctx libc_feholdexcept_setround_387_ctx +# define libc_feupdateenvf_ctx libc_feupdateenv_387_ctx +# define libc_feholdsetroundf_ctx libc_feholdsetround_387_ctx +# define libc_feresetroundf_ctx libc_feresetround_387_ctx +#endif /* __SSE_MATH__ */ + +#ifdef __SSE2_MATH__ +# define libc_feholdexcept_setround_ctx libc_feholdexcept_setround_sse_ctx +# define libc_fesetenv_ctx libc_fesetenv_sse_ctx +# define libc_feupdateenv_ctx libc_feupdateenv_sse_ctx +# define libc_feholdsetround_ctx libc_feholdsetround_sse_ctx +# define libc_feresetround_ctx libc_feresetround_sse_ctx +#else +# define libc_feholdexcept_setround_ctx libc_feholdexcept_setround_387_ctx +# define libc_feupdateenv_ctx libc_feupdateenv_387_ctx +# define libc_feresetround_ctx libc_feresetround_387_ctx +#endif /* __SSE2_MATH__ */ + +#define libc_feholdexcept_setroundl_ctx libc_feholdexcept_setround_387_ctx +#define libc_feupdateenvl_ctx libc_feupdateenv_387_ctx +#define libc_feholdsetroundl_ctx libc_feholdsetround_387_ctx +#define libc_feresetroundl_ctx libc_feresetround_387_ctx + +#ifndef __SSE2_MATH__ +# define libc_feholdsetround_53bit_ctx libc_feholdsetround_387_53bit_ctx +# define libc_feresetround_53bit_ctx libc_feresetround_387_ctx +#endif + #undef __mxcsr #endif /* FENV_PRIVATE_H */