498 lines
12 KiB
C
498 lines
12 KiB
C
/* FPU-related code for x86 and x86_64 processors.
|
|
Copyright (C) 2005-2022 Free Software Foundation, Inc.
|
|
Contributed by Francois-Xavier Coudert <coudert@clipper.ens.fr>
|
|
|
|
This file is part of the GNU Fortran 95 runtime library (libgfortran).
|
|
|
|
Libgfortran is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 3 of the License, or (at your option) any later version.
|
|
|
|
Libgfortran is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
Under Section 7 of GPL version 3, you are granted additional
|
|
permissions described in the GCC Runtime Library Exception, version
|
|
3.1, as published by the Free Software Foundation.
|
|
|
|
You should have received a copy of the GNU General Public License and
|
|
a copy of the GCC Runtime Library Exception along with this program;
|
|
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
#ifndef __SSE_MATH__
|
|
#include "cpuid.h"
|
|
#endif
|
|
|
|
static int
|
|
has_sse (void)
|
|
{
|
|
#ifndef __SSE_MATH__
|
|
unsigned int eax, ebx, ecx, edx;
|
|
|
|
if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
|
|
return 0;
|
|
|
|
return edx & bit_SSE;
|
|
#else
|
|
return 1;
|
|
#endif
|
|
}
|
|
|
|
/* i387 exceptions -- see linux <fpu_control.h> header file for details. */
|
|
#define _FPU_MASK_IM 0x01
|
|
#define _FPU_MASK_DM 0x02
|
|
#define _FPU_MASK_ZM 0x04
|
|
#define _FPU_MASK_OM 0x08
|
|
#define _FPU_MASK_UM 0x10
|
|
#define _FPU_MASK_PM 0x20
|
|
#define _FPU_MASK_ALL 0x3f
|
|
|
|
#define _FPU_EX_ALL 0x3f
|
|
|
|
/* i387 rounding modes. */
|
|
|
|
#define _FPU_RC_NEAREST 0x0
|
|
#define _FPU_RC_DOWN 0x1
|
|
#define _FPU_RC_UP 0x2
|
|
#define _FPU_RC_ZERO 0x3
|
|
|
|
#define _FPU_RC_MASK 0x3
|
|
|
|
/* Enable flush to zero mode. */
|
|
|
|
#define MXCSR_FTZ (1 << 15)
|
|
|
|
|
|
/* This structure corresponds to the layout of the block
|
|
written by FSTENV. */
|
|
struct fenv
|
|
{
|
|
unsigned short int __control_word;
|
|
unsigned short int __unused1;
|
|
unsigned short int __status_word;
|
|
unsigned short int __unused2;
|
|
unsigned short int __tags;
|
|
unsigned short int __unused3;
|
|
unsigned int __eip;
|
|
unsigned short int __cs_selector;
|
|
unsigned int __opcode:11;
|
|
unsigned int __unused4:5;
|
|
unsigned int __data_offset;
|
|
unsigned short int __data_selector;
|
|
unsigned short int __unused5;
|
|
unsigned int __mxcsr;
|
|
} __attribute__ ((gcc_struct));
|
|
|
|
/* Check we can actually store the FPU state in the allocated size. */
|
|
_Static_assert (sizeof(struct fenv) <= (size_t) GFC_FPE_STATE_BUFFER_SIZE,
|
|
"GFC_FPE_STATE_BUFFER_SIZE is too small");
|
|
|
|
#ifdef __SSE_MATH__
|
|
# define __math_force_eval_div(x, y) \
|
|
do { \
|
|
__asm__ ("" : "+x" (x)); __asm__ __volatile__ ("" : : "x" (x / y)); \
|
|
} while (0)
|
|
#else
|
|
# define __math_force_eval_div(x, y) \
|
|
do { \
|
|
__asm__ ("" : "+t" (x)); __asm__ __volatile__ ("" : : "f" (x / y)); \
|
|
} while (0)
|
|
#endif
|
|
|
|
/* Raise the supported floating-point exceptions from EXCEPTS. Other
|
|
bits in EXCEPTS are ignored. Code originally borrowed from
|
|
libatomic/config/x86/fenv.c. */
|
|
|
|
static void
|
|
local_feraiseexcept (int excepts)
|
|
{
|
|
struct fenv temp;
|
|
|
|
if (excepts & _FPU_MASK_IM)
|
|
{
|
|
float f = 0.0f;
|
|
__math_force_eval_div (f, f);
|
|
}
|
|
if (excepts & _FPU_MASK_DM)
|
|
{
|
|
__asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
|
|
temp.__status_word |= _FPU_MASK_DM;
|
|
__asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
|
|
__asm__ __volatile__ ("fwait");
|
|
}
|
|
if (excepts & _FPU_MASK_ZM)
|
|
{
|
|
float f = 1.0f, g = 0.0f;
|
|
__math_force_eval_div (f, g);
|
|
}
|
|
if (excepts & _FPU_MASK_OM)
|
|
{
|
|
__asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
|
|
temp.__status_word |= _FPU_MASK_OM;
|
|
__asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
|
|
__asm__ __volatile__ ("fwait");
|
|
}
|
|
if (excepts & _FPU_MASK_UM)
|
|
{
|
|
__asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
|
|
temp.__status_word |= _FPU_MASK_UM;
|
|
__asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
|
|
__asm__ __volatile__ ("fwait");
|
|
}
|
|
if (excepts & _FPU_MASK_PM)
|
|
{
|
|
float f = 1.0f, g = 3.0f;
|
|
__math_force_eval_div (f, g);
|
|
}
|
|
}
|
|
|
|
|
|
void
|
|
set_fpu_trap_exceptions (int trap, int notrap)
|
|
{
|
|
int exc_set = 0, exc_clr = 0;
|
|
unsigned short cw;
|
|
|
|
if (trap & GFC_FPE_INVALID) exc_set |= _FPU_MASK_IM;
|
|
if (trap & GFC_FPE_DENORMAL) exc_set |= _FPU_MASK_DM;
|
|
if (trap & GFC_FPE_ZERO) exc_set |= _FPU_MASK_ZM;
|
|
if (trap & GFC_FPE_OVERFLOW) exc_set |= _FPU_MASK_OM;
|
|
if (trap & GFC_FPE_UNDERFLOW) exc_set |= _FPU_MASK_UM;
|
|
if (trap & GFC_FPE_INEXACT) exc_set |= _FPU_MASK_PM;
|
|
|
|
if (notrap & GFC_FPE_INVALID) exc_clr |= _FPU_MASK_IM;
|
|
if (notrap & GFC_FPE_DENORMAL) exc_clr |= _FPU_MASK_DM;
|
|
if (notrap & GFC_FPE_ZERO) exc_clr |= _FPU_MASK_ZM;
|
|
if (notrap & GFC_FPE_OVERFLOW) exc_clr |= _FPU_MASK_OM;
|
|
if (notrap & GFC_FPE_UNDERFLOW) exc_clr |= _FPU_MASK_UM;
|
|
if (notrap & GFC_FPE_INEXACT) exc_clr |= _FPU_MASK_PM;
|
|
|
|
__asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
|
|
|
|
cw |= exc_clr;
|
|
cw &= ~exc_set;
|
|
|
|
__asm__ __volatile__ ("fnclex\n\tfldcw\t%0" : : "m" (cw));
|
|
|
|
if (has_sse())
|
|
{
|
|
unsigned int cw_sse;
|
|
|
|
__asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
|
|
|
|
/* The SSE exception masks are shifted by 7 bits. */
|
|
cw_sse |= (exc_clr << 7);
|
|
cw_sse &= ~(exc_set << 7);
|
|
|
|
/* Clear stalled exception flags. */
|
|
cw_sse &= ~_FPU_EX_ALL;
|
|
|
|
__asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
|
|
}
|
|
}
|
|
|
|
void
|
|
set_fpu (void)
|
|
{
|
|
set_fpu_trap_exceptions (options.fpe, 0);
|
|
}
|
|
|
|
int
|
|
get_fpu_trap_exceptions (void)
|
|
{
|
|
unsigned short cw;
|
|
int mask;
|
|
int res = 0;
|
|
|
|
__asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
|
|
mask = cw;
|
|
|
|
if (has_sse())
|
|
{
|
|
unsigned int cw_sse;
|
|
|
|
__asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
|
|
|
|
/* The SSE exception masks are shifted by 7 bits. */
|
|
mask |= (cw_sse >> 7);
|
|
}
|
|
|
|
mask = ~mask & _FPU_MASK_ALL;
|
|
|
|
if (mask & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
|
|
if (mask & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
|
|
if (mask & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
|
|
if (mask & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
|
|
if (mask & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
|
|
if (mask & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
|
|
|
|
return res;
|
|
}
|
|
|
|
int
|
|
support_fpu_trap (int flag __attribute__((unused)))
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
int
|
|
get_fpu_except_flags (void)
|
|
{
|
|
unsigned short cw;
|
|
int excepts;
|
|
int res = 0;
|
|
|
|
__asm__ __volatile__ ("fnstsw\t%0" : "=am" (cw));
|
|
excepts = cw;
|
|
|
|
if (has_sse())
|
|
{
|
|
unsigned int cw_sse;
|
|
|
|
__asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
|
|
excepts |= cw_sse;
|
|
}
|
|
|
|
excepts &= _FPU_EX_ALL;
|
|
|
|
if (excepts & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
|
|
if (excepts & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
|
|
if (excepts & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
|
|
if (excepts & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
|
|
if (excepts & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
|
|
if (excepts & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
|
|
|
|
return res;
|
|
}
|
|
|
|
void
|
|
set_fpu_except_flags (int set, int clear)
|
|
{
|
|
struct fenv temp;
|
|
int exc_set = 0, exc_clr = 0;
|
|
|
|
/* Translate from GFC_PE_* values to _FPU_MASK_* values. */
|
|
if (set & GFC_FPE_INVALID)
|
|
exc_set |= _FPU_MASK_IM;
|
|
if (clear & GFC_FPE_INVALID)
|
|
exc_clr |= _FPU_MASK_IM;
|
|
|
|
if (set & GFC_FPE_DENORMAL)
|
|
exc_set |= _FPU_MASK_DM;
|
|
if (clear & GFC_FPE_DENORMAL)
|
|
exc_clr |= _FPU_MASK_DM;
|
|
|
|
if (set & GFC_FPE_ZERO)
|
|
exc_set |= _FPU_MASK_ZM;
|
|
if (clear & GFC_FPE_ZERO)
|
|
exc_clr |= _FPU_MASK_ZM;
|
|
|
|
if (set & GFC_FPE_OVERFLOW)
|
|
exc_set |= _FPU_MASK_OM;
|
|
if (clear & GFC_FPE_OVERFLOW)
|
|
exc_clr |= _FPU_MASK_OM;
|
|
|
|
if (set & GFC_FPE_UNDERFLOW)
|
|
exc_set |= _FPU_MASK_UM;
|
|
if (clear & GFC_FPE_UNDERFLOW)
|
|
exc_clr |= _FPU_MASK_UM;
|
|
|
|
if (set & GFC_FPE_INEXACT)
|
|
exc_set |= _FPU_MASK_PM;
|
|
if (clear & GFC_FPE_INEXACT)
|
|
exc_clr |= _FPU_MASK_PM;
|
|
|
|
|
|
/* Change the flags. This is tricky on 387 (unlike SSE), because we have
|
|
FNSTSW but no FLDSW instruction. */
|
|
__asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
|
|
temp.__status_word &= ~exc_clr;
|
|
__asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
|
|
|
|
/* Change the flags on SSE. */
|
|
|
|
if (has_sse())
|
|
{
|
|
unsigned int cw_sse;
|
|
|
|
__asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
|
|
cw_sse &= ~exc_clr;
|
|
__asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
|
|
}
|
|
|
|
local_feraiseexcept (exc_set);
|
|
}
|
|
|
|
int
|
|
support_fpu_flag (int flag __attribute__((unused)))
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
void
|
|
set_fpu_rounding_mode (int round)
|
|
{
|
|
int round_mode;
|
|
unsigned short cw;
|
|
|
|
switch (round)
|
|
{
|
|
case GFC_FPE_TONEAREST:
|
|
round_mode = _FPU_RC_NEAREST;
|
|
break;
|
|
case GFC_FPE_UPWARD:
|
|
round_mode = _FPU_RC_UP;
|
|
break;
|
|
case GFC_FPE_DOWNWARD:
|
|
round_mode = _FPU_RC_DOWN;
|
|
break;
|
|
case GFC_FPE_TOWARDZERO:
|
|
round_mode = _FPU_RC_ZERO;
|
|
break;
|
|
default:
|
|
return; /* Should be unreachable. */
|
|
}
|
|
|
|
__asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
|
|
|
|
/* The x87 round control bits are shifted by 10 bits. */
|
|
cw &= ~(_FPU_RC_MASK << 10);
|
|
cw |= round_mode << 10;
|
|
|
|
__asm__ __volatile__ ("fldcw\t%0" : : "m" (cw));
|
|
|
|
if (has_sse())
|
|
{
|
|
unsigned int cw_sse;
|
|
|
|
__asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
|
|
|
|
/* The SSE round control bits are shifted by 13 bits. */
|
|
cw_sse &= ~(_FPU_RC_MASK << 13);
|
|
cw_sse |= round_mode << 13;
|
|
|
|
__asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
|
|
}
|
|
}
|
|
|
|
int
|
|
get_fpu_rounding_mode (void)
|
|
{
|
|
int round_mode;
|
|
|
|
#ifdef __SSE_MATH__
|
|
unsigned int cw;
|
|
|
|
__asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw));
|
|
|
|
/* The SSE round control bits are shifted by 13 bits. */
|
|
round_mode = cw >> 13;
|
|
#else
|
|
unsigned short cw;
|
|
|
|
__asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
|
|
|
|
/* The x87 round control bits are shifted by 10 bits. */
|
|
round_mode = cw >> 10;
|
|
#endif
|
|
|
|
round_mode &= _FPU_RC_MASK;
|
|
|
|
switch (round_mode)
|
|
{
|
|
case _FPU_RC_NEAREST:
|
|
return GFC_FPE_TONEAREST;
|
|
case _FPU_RC_UP:
|
|
return GFC_FPE_UPWARD;
|
|
case _FPU_RC_DOWN:
|
|
return GFC_FPE_DOWNWARD;
|
|
case _FPU_RC_ZERO:
|
|
return GFC_FPE_TOWARDZERO;
|
|
default:
|
|
return 0; /* Should be unreachable. */
|
|
}
|
|
}
|
|
|
|
int
|
|
support_fpu_rounding_mode (int mode __attribute__((unused)))
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
void
|
|
get_fpu_state (void *state)
|
|
{
|
|
struct fenv *envp = state;
|
|
|
|
__asm__ __volatile__ ("fnstenv\t%0" : "=m" (*envp));
|
|
|
|
/* fnstenv has the side effect of masking all exceptions, so we need
|
|
to restore the control word after that. */
|
|
__asm__ __volatile__ ("fldcw\t%0" : : "m" (envp->__control_word));
|
|
|
|
if (has_sse())
|
|
__asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (envp->__mxcsr));
|
|
}
|
|
|
|
void
|
|
set_fpu_state (void *state)
|
|
{
|
|
struct fenv *envp = state;
|
|
|
|
/* glibc sources (sysdeps/x86_64/fpu/fesetenv.c) do something more
|
|
complex than this, but I think it suffices in our case. */
|
|
__asm__ __volatile__ ("fldenv\t%0" : : "m" (*envp));
|
|
|
|
if (has_sse())
|
|
__asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (envp->__mxcsr));
|
|
}
|
|
|
|
|
|
int
|
|
support_fpu_underflow_control (int kind)
|
|
{
|
|
if (!has_sse())
|
|
return 0;
|
|
|
|
return (kind == 4 || kind == 8) ? 1 : 0;
|
|
}
|
|
|
|
|
|
int
|
|
get_fpu_underflow_mode (void)
|
|
{
|
|
unsigned int cw_sse;
|
|
|
|
if (!has_sse())
|
|
return 1;
|
|
|
|
__asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
|
|
|
|
/* Return 0 for abrupt underflow (flush to zero), 1 for gradual underflow. */
|
|
return (cw_sse & MXCSR_FTZ) ? 0 : 1;
|
|
}
|
|
|
|
|
|
void
|
|
set_fpu_underflow_mode (int gradual)
|
|
{
|
|
unsigned int cw_sse;
|
|
|
|
if (!has_sse())
|
|
return;
|
|
|
|
__asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
|
|
|
|
if (gradual)
|
|
cw_sse &= ~MXCSR_FTZ;
|
|
else
|
|
cw_sse |= MXCSR_FTZ;
|
|
|
|
__asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
|
|
}
|
|
|