softfloat: Convert floatx80 to integer to FloatParts

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2020-11-21 18:13:10 -08:00
parent 8ae5719cd4
commit a1fc527bfb
1 changed files with 42 additions and 294 deletions

View File

@ -2829,6 +2829,28 @@ static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
}
static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
int scale, float_status *s)
{
FloatParts128 p;
if (!floatx80_unpack_canonical(&p, a, s)) {
parts_default_nan(&p, s);
}
return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
}
static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
int scale, float_status *s)
{
FloatParts128 p;
if (!floatx80_unpack_canonical(&p, a, s)) {
parts_default_nan(&p, s);
}
return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
}
int8_t float16_to_int8(float16 a, float_status *s)
{
return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
@ -2889,6 +2911,16 @@ int64_t float128_to_int64(float128 a, float_status *s)
return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
}
int32_t floatx80_to_int32(floatx80 a, float_status *s)
{
return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
}
int64_t floatx80_to_int64(floatx80 a, float_status *s)
{
return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
}
int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
{
return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
@ -2944,6 +2976,16 @@ int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
}
int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
{
return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
}
int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
{
return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
}
int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
{
return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
@ -4160,127 +4202,6 @@ bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
return a;
}
/*----------------------------------------------------------------------------
| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
| and 7, and returns the properly rounded 32-bit integer corresponding to the
| input. If `zSign' is 1, the input is negated before being converted to an
| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
| is simply rounded to an integer, with the inexact exception raised if the
| input cannot be represented exactly as an integer. However, if the fixed-
| point input is too large, the invalid exception is raised and the largest
| positive or negative integer is returned.
*----------------------------------------------------------------------------*/
static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
float_status *status)
{
int8_t roundingMode;
bool roundNearestEven;
int8_t roundIncrement, roundBits;
int32_t z;
roundingMode = status->float_rounding_mode;
roundNearestEven = ( roundingMode == float_round_nearest_even );
switch (roundingMode) {
case float_round_nearest_even:
case float_round_ties_away:
roundIncrement = 0x40;
break;
case float_round_to_zero:
roundIncrement = 0;
break;
case float_round_up:
roundIncrement = zSign ? 0 : 0x7f;
break;
case float_round_down:
roundIncrement = zSign ? 0x7f : 0;
break;
case float_round_to_odd:
roundIncrement = absZ & 0x80 ? 0 : 0x7f;
break;
default:
abort();
}
roundBits = absZ & 0x7F;
absZ = ( absZ + roundIncrement )>>7;
if (!(roundBits ^ 0x40) && roundNearestEven) {
absZ &= ~1;
}
z = absZ;
if ( zSign ) z = - z;
if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
float_raise(float_flag_invalid, status);
return zSign ? INT32_MIN : INT32_MAX;
}
if (roundBits) {
float_raise(float_flag_inexact, status);
}
return z;
}
/*----------------------------------------------------------------------------
| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
| `absZ1', with binary point between bits 63 and 64 (between the input words),
| and returns the properly rounded 64-bit integer corresponding to the input.
| If `zSign' is 1, the input is negated before being converted to an integer.
| Ordinarily, the fixed-point input is simply rounded to an integer, with
| the inexact exception raised if the input cannot be represented exactly as
| an integer. However, if the fixed-point input is too large, the invalid
| exception is raised and the largest positive or negative integer is
| returned.
*----------------------------------------------------------------------------*/
static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
float_status *status)
{
int8_t roundingMode;
bool roundNearestEven, increment;
int64_t z;
roundingMode = status->float_rounding_mode;
roundNearestEven = ( roundingMode == float_round_nearest_even );
switch (roundingMode) {
case float_round_nearest_even:
case float_round_ties_away:
increment = ((int64_t) absZ1 < 0);
break;
case float_round_to_zero:
increment = 0;
break;
case float_round_up:
increment = !zSign && absZ1;
break;
case float_round_down:
increment = zSign && absZ1;
break;
case float_round_to_odd:
increment = !(absZ0 & 1) && absZ1;
break;
default:
abort();
}
if ( increment ) {
++absZ0;
if ( absZ0 == 0 ) goto overflow;
if (!(absZ1 << 1) && roundNearestEven) {
absZ0 &= ~1;
}
}
z = absZ0;
if ( zSign ) z = - z;
if ( z && ( ( z < 0 ) ^ zSign ) ) {
overflow:
float_raise(float_flag_invalid, status);
return zSign ? INT64_MIN : INT64_MAX;
}
if (absZ1) {
float_raise(float_flag_inexact, status);
}
return z;
}
/*----------------------------------------------------------------------------
| Normalizes the subnormal single-precision floating-point value represented
| by the denormalized significand `aSig'. The normalized exponent and
@ -5486,179 +5407,6 @@ float64 float64_log2(float64 a, float_status *status)
return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
}
/*----------------------------------------------------------------------------
| Returns the result of converting the extended double-precision floating-
| point value `a' to the 32-bit two's complement integer format. The
| conversion is performed according to the IEC/IEEE Standard for Binary
| Floating-Point Arithmetic---which means in particular that the conversion
| is rounded according to the current rounding mode. If `a' is a NaN, the
| largest positive integer is returned. Otherwise, if the conversion
| overflows, the largest integer with the same sign as `a' is returned.
*----------------------------------------------------------------------------*/
int32_t floatx80_to_int32(floatx80 a, float_status *status)
{
bool aSign;
int32_t aExp, shiftCount;
uint64_t aSig;
if (floatx80_invalid_encoding(a)) {
float_raise(float_flag_invalid, status);
return 1 << 31;
}
aSig = extractFloatx80Frac( a );
aExp = extractFloatx80Exp( a );
aSign = extractFloatx80Sign( a );
if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
shiftCount = 0x4037 - aExp;
if ( shiftCount <= 0 ) shiftCount = 1;
shift64RightJamming( aSig, shiftCount, &aSig );
return roundAndPackInt32(aSign, aSig, status);
}
/*----------------------------------------------------------------------------
| Returns the result of converting the extended double-precision floating-
| point value `a' to the 32-bit two's complement integer format. The
| conversion is performed according to the IEC/IEEE Standard for Binary
| Floating-Point Arithmetic, except that the conversion is always rounded
| toward zero. If `a' is a NaN, the largest positive integer is returned.
| Otherwise, if the conversion overflows, the largest integer with the same
| sign as `a' is returned.
*----------------------------------------------------------------------------*/
int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
{
bool aSign;
int32_t aExp, shiftCount;
uint64_t aSig, savedASig;
int32_t z;
if (floatx80_invalid_encoding(a)) {
float_raise(float_flag_invalid, status);
return 1 << 31;
}
aSig = extractFloatx80Frac( a );
aExp = extractFloatx80Exp( a );
aSign = extractFloatx80Sign( a );
if ( 0x401E < aExp ) {
if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
goto invalid;
}
else if ( aExp < 0x3FFF ) {
if (aExp || aSig) {
float_raise(float_flag_inexact, status);
}
return 0;
}
shiftCount = 0x403E - aExp;
savedASig = aSig;
aSig >>= shiftCount;
z = aSig;
if ( aSign ) z = - z;
if ( ( z < 0 ) ^ aSign ) {
invalid:
float_raise(float_flag_invalid, status);
return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
}
if ( ( aSig<<shiftCount ) != savedASig ) {
float_raise(float_flag_inexact, status);
}
return z;
}
/*----------------------------------------------------------------------------
| Returns the result of converting the extended double-precision floating-
| point value `a' to the 64-bit two's complement integer format. The
| conversion is performed according to the IEC/IEEE Standard for Binary
| Floating-Point Arithmetic---which means in particular that the conversion
| is rounded according to the current rounding mode. If `a' is a NaN,
| the largest positive integer is returned. Otherwise, if the conversion
| overflows, the largest integer with the same sign as `a' is returned.
*----------------------------------------------------------------------------*/
int64_t floatx80_to_int64(floatx80 a, float_status *status)
{
bool aSign;
int32_t aExp, shiftCount;
uint64_t aSig, aSigExtra;
if (floatx80_invalid_encoding(a)) {
float_raise(float_flag_invalid, status);
return 1ULL << 63;
}
aSig = extractFloatx80Frac( a );
aExp = extractFloatx80Exp( a );
aSign = extractFloatx80Sign( a );
shiftCount = 0x403E - aExp;
if ( shiftCount <= 0 ) {
if ( shiftCount ) {
float_raise(float_flag_invalid, status);
if (!aSign || floatx80_is_any_nan(a)) {
return INT64_MAX;
}
return INT64_MIN;
}
aSigExtra = 0;
}
else {
shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
}
return roundAndPackInt64(aSign, aSig, aSigExtra, status);
}
/*----------------------------------------------------------------------------
| Returns the result of converting the extended double-precision floating-
| point value `a' to the 64-bit two's complement integer format. The
| conversion is performed according to the IEC/IEEE Standard for Binary
| Floating-Point Arithmetic, except that the conversion is always rounded
| toward zero. If `a' is a NaN, the largest positive integer is returned.
| Otherwise, if the conversion overflows, the largest integer with the same
| sign as `a' is returned.
*----------------------------------------------------------------------------*/
int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
{
bool aSign;
int32_t aExp, shiftCount;
uint64_t aSig;
int64_t z;
if (floatx80_invalid_encoding(a)) {
float_raise(float_flag_invalid, status);
return 1ULL << 63;
}
aSig = extractFloatx80Frac( a );
aExp = extractFloatx80Exp( a );
aSign = extractFloatx80Sign( a );
shiftCount = aExp - 0x403E;
if ( 0 <= shiftCount ) {
aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
if ( ( a.high != 0xC03E ) || aSig ) {
float_raise(float_flag_invalid, status);
if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
return INT64_MAX;
}
}
return INT64_MIN;
}
else if ( aExp < 0x3FFF ) {
if (aExp | aSig) {
float_raise(float_flag_inexact, status);
}
return 0;
}
z = aSig>>( - shiftCount );
if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
float_raise(float_flag_inexact, status);
}
if ( aSign ) z = - z;
return z;
}
/*----------------------------------------------------------------------------
| Rounds the extended double-precision floating-point value `a'
| to the precision provided by floatx80_rounding_precision and returns the