[Patch 14/17] [libgcc, ARM] Generalise float-to-half conversion function.
libgcc/ * config/arm/fp16.c (struct format): New. (binary32): New. (__gnu_float2h_internal): New. Body moved from __gnu_f2h_internal and generalize. (_gnu_f2h_internal): Move body to function __gnu_float2h_internal. Call it with binary32. Co-Authored-By: Matthew Wahab <matthew.wahab@arm.com> From-SVN: r242781
This commit is contained in:
parent
ba75a1747c
commit
8630cadbc5
@ -1,3 +1,13 @@
|
|||||||
|
2016-11-23 James Greenhalgh <james.greenhalgh@arm.com>
|
||||||
|
Matthew Wahab <matthew.wahab@arm.com>
|
||||||
|
|
||||||
|
* config/arm/fp16.c (struct format): New.
|
||||||
|
(binary32): New.
|
||||||
|
(__gnu_float2h_internal): New. Body moved from
|
||||||
|
__gnu_f2h_internal and generalize.
|
||||||
|
(_gnu_f2h_internal): Move body to function __gnu_float2h_internal.
|
||||||
|
Call it with binary32.
|
||||||
|
|
||||||
2016-11-23 James Greenhalgh <james.greenhalgh@arm.com>
|
2016-11-23 James Greenhalgh <james.greenhalgh@arm.com>
|
||||||
|
|
||||||
* soft-fp/extendhftf2.c: Update from glibc.
|
* soft-fp/extendhftf2.c: Update from glibc.
|
||||||
|
@ -22,40 +22,74 @@
|
|||||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||||
<http://www.gnu.org/licenses/>. */
|
<http://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
static inline unsigned short
|
struct format
|
||||||
__gnu_f2h_internal(unsigned int a, int ieee)
|
|
||||||
{
|
{
|
||||||
unsigned short sign = (a >> 16) & 0x8000;
|
/* Number of bits. */
|
||||||
int aexp = (a >> 23) & 0xff;
|
unsigned long long size;
|
||||||
unsigned int mantissa = a & 0x007fffff;
|
/* Exponent bias. */
|
||||||
unsigned int mask;
|
unsigned long long bias;
|
||||||
unsigned int increment;
|
/* Exponent width in bits. */
|
||||||
|
unsigned long long exponent;
|
||||||
|
/* Significand precision in explicitly stored bits. */
|
||||||
|
unsigned long long significand;
|
||||||
|
};
|
||||||
|
|
||||||
if (aexp == 0xff)
|
static const struct format
|
||||||
|
binary32 =
|
||||||
|
{
|
||||||
|
32, /* size. */
|
||||||
|
127, /* bias. */
|
||||||
|
8, /* exponent. */
|
||||||
|
23 /* significand. */
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline unsigned short
|
||||||
|
__gnu_float2h_internal (const struct format* fmt,
|
||||||
|
unsigned long long a, int ieee)
|
||||||
|
{
|
||||||
|
unsigned long long point = 1ULL << fmt->significand;
|
||||||
|
unsigned short sign = (a >> (fmt->size - 16)) & 0x8000;
|
||||||
|
int aexp;
|
||||||
|
unsigned long long mantissa;
|
||||||
|
unsigned long long mask;
|
||||||
|
unsigned long long increment;
|
||||||
|
|
||||||
|
/* Get the exponent and mantissa encodings. */
|
||||||
|
mantissa = a & (point - 1);
|
||||||
|
|
||||||
|
mask = (1 << fmt->exponent) - 1;
|
||||||
|
aexp = (a >> fmt->significand) & mask;
|
||||||
|
|
||||||
|
/* Infinity, NaN and alternative format special case. */
|
||||||
|
if (((unsigned int) aexp) == mask)
|
||||||
{
|
{
|
||||||
if (!ieee)
|
if (!ieee)
|
||||||
return sign;
|
return sign;
|
||||||
if (mantissa == 0)
|
if (mantissa == 0)
|
||||||
return sign | 0x7c00; /* Infinity. */
|
return sign | 0x7c00; /* Infinity. */
|
||||||
/* Remaining cases are NaNs. Convert SNaN to QNaN. */
|
/* Remaining cases are NaNs. Convert SNaN to QNaN. */
|
||||||
return sign | 0x7e00 | (mantissa >> 13);
|
return sign | 0x7e00 | (mantissa >> (fmt->significand - 10));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Zero. */
|
||||||
if (aexp == 0 && mantissa == 0)
|
if (aexp == 0 && mantissa == 0)
|
||||||
return sign;
|
return sign;
|
||||||
|
|
||||||
aexp -= 127;
|
/* Construct the exponent and mantissa. */
|
||||||
|
aexp -= fmt->bias;
|
||||||
|
|
||||||
|
/* Decimal point is immediately after the significand. */
|
||||||
|
mantissa |= point;
|
||||||
|
|
||||||
/* Decimal point between bits 22 and 23. */
|
|
||||||
mantissa |= 0x00800000;
|
|
||||||
if (aexp < -14)
|
if (aexp < -14)
|
||||||
{
|
{
|
||||||
mask = 0x00ffffff;
|
mask = point | (point - 1);
|
||||||
|
/* Minimum exponent for half-precision is 2^-24. */
|
||||||
if (aexp >= -25)
|
if (aexp >= -25)
|
||||||
mask >>= 25 + aexp;
|
mask >>= 25 + aexp;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
mask = 0x00001fff;
|
mask = (point - 1) >> 10;
|
||||||
|
|
||||||
/* Round. */
|
/* Round. */
|
||||||
if (mantissa & mask)
|
if (mantissa & mask)
|
||||||
@ -64,8 +98,8 @@ __gnu_f2h_internal(unsigned int a, int ieee)
|
|||||||
if ((mantissa & mask) == increment)
|
if ((mantissa & mask) == increment)
|
||||||
increment = mantissa & (increment << 1);
|
increment = mantissa & (increment << 1);
|
||||||
mantissa += increment;
|
mantissa += increment;
|
||||||
if (mantissa >= 0x01000000)
|
if (mantissa >= (point << 1))
|
||||||
{
|
{
|
||||||
mantissa >>= 1;
|
mantissa >>= 1;
|
||||||
aexp++;
|
aexp++;
|
||||||
}
|
}
|
||||||
@ -91,9 +125,29 @@ __gnu_f2h_internal(unsigned int a, int ieee)
|
|||||||
aexp = -14;
|
aexp = -14;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* We leave the leading 1 in the mantissa, and subtract one
|
/* Encode the final 16-bit floating-point value.
|
||||||
from the exponent bias to compensate. */
|
|
||||||
return sign | (((aexp + 14) << 10) + (mantissa >> 13));
|
This is formed of the sign bit, the bias-adjusted exponent, and the
|
||||||
|
calculated mantissa, with the following caveats:
|
||||||
|
|
||||||
|
1. The mantissa calculated after rounding could have a leading 1.
|
||||||
|
To compensate for this, subtract one from the exponent bias (15)
|
||||||
|
before adding it to the calculated exponent.
|
||||||
|
2. When we were calculating rounding, we left the mantissa with the
|
||||||
|
number of bits of the source operand, it needs reduced to ten
|
||||||
|
bits (+1 for the afforementioned leading 1) by shifting right by
|
||||||
|
the number of bits in the source mantissa - 10.
|
||||||
|
3. To ensure the leading 1 in the mantissa is applied to the exponent
|
||||||
|
we need to add the mantissa rather than apply an arithmetic "or"
|
||||||
|
to it. */
|
||||||
|
|
||||||
|
return sign | (((aexp + 14) << 10) + (mantissa >> (fmt->significand - 10)));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned short
|
||||||
|
__gnu_f2h_internal (unsigned int a, int ieee)
|
||||||
|
{
|
||||||
|
return __gnu_float2h_internal (&binary32, (unsigned long long) a, ieee);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int
|
unsigned int
|
||||||
|
Loading…
Reference in New Issue
Block a user