fpu: Add float64_to_int{32,64}_modulo
Add versions of float64_to_int* which do not saturate the result. Reviewed-by: Christoph Muellner <christoph.muellner@vrull.eu> Tested-by: Christoph Muellner <christoph.muellner@vrull.eu> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alex Bennée <alex.bennee@linaro.org> Message-Id: <20230527141910.1885950-2-richard.henderson@linaro.org>
This commit is contained in:
parent
e665cf72fe
commit
e2041f4d5d
@ -1181,6 +1181,84 @@ static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode,
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* Like partsN(float_to_sint), except do not saturate the result.
|
||||
* Instead, return the rounded unbounded precision two's compliment result,
|
||||
* modulo 2**(bitsm1 + 1).
|
||||
*/
|
||||
static int64_t partsN(float_to_sint_modulo)(FloatPartsN *p,
|
||||
FloatRoundMode rmode,
|
||||
int bitsm1, float_status *s)
|
||||
{
|
||||
int flags = 0;
|
||||
uint64_t r;
|
||||
bool overflow = false;
|
||||
|
||||
switch (p->cls) {
|
||||
case float_class_snan:
|
||||
flags |= float_flag_invalid_snan;
|
||||
/* fall through */
|
||||
case float_class_qnan:
|
||||
flags |= float_flag_invalid;
|
||||
r = 0;
|
||||
break;
|
||||
|
||||
case float_class_inf:
|
||||
overflow = true;
|
||||
r = 0;
|
||||
break;
|
||||
|
||||
case float_class_zero:
|
||||
return 0;
|
||||
|
||||
case float_class_normal:
|
||||
/* TODO: N - 2 is frac_size for rounding; could use input fmt. */
|
||||
if (parts_round_to_int_normal(p, rmode, 0, N - 2)) {
|
||||
flags = float_flag_inexact;
|
||||
}
|
||||
|
||||
if (p->exp <= DECOMPOSED_BINARY_POINT) {
|
||||
/*
|
||||
* Because we rounded to integral, and exp < 64,
|
||||
* we know frac_low is zero.
|
||||
*/
|
||||
r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
|
||||
if (p->exp < bitsm1) {
|
||||
/* Result in range. */
|
||||
} else if (p->exp == bitsm1) {
|
||||
/* The only in-range value is INT_MIN. */
|
||||
overflow = !p->sign || p->frac_hi != DECOMPOSED_IMPLICIT_BIT;
|
||||
} else {
|
||||
overflow = true;
|
||||
}
|
||||
} else {
|
||||
/* Overflow, but there might still be bits to return. */
|
||||
int shl = p->exp - DECOMPOSED_BINARY_POINT;
|
||||
if (shl < N) {
|
||||
frac_shl(p, shl);
|
||||
r = p->frac_hi;
|
||||
} else {
|
||||
r = 0;
|
||||
}
|
||||
overflow = true;
|
||||
}
|
||||
|
||||
if (p->sign) {
|
||||
r = -r;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
|
||||
if (overflow) {
|
||||
flags = float_flag_invalid | float_flag_invalid_cvti;
|
||||
}
|
||||
float_raise(flags, s);
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* Integer to float conversions
|
||||
*
|
||||
|
@ -852,11 +852,24 @@ static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
|
||||
#define parts_float_to_uint(P, R, Z, M, S) \
|
||||
PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
|
||||
|
||||
static int64_t parts64_float_to_sint_modulo(FloatParts64 *p,
|
||||
FloatRoundMode rmode,
|
||||
int bitsm1, float_status *s);
|
||||
static int64_t parts128_float_to_sint_modulo(FloatParts128 *p,
|
||||
FloatRoundMode rmode,
|
||||
int bitsm1, float_status *s);
|
||||
|
||||
#define parts_float_to_sint_modulo(P, R, M, S) \
|
||||
PARTS_GENERIC_64_128(float_to_sint_modulo, P)(P, R, M, S)
|
||||
|
||||
static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
|
||||
int scale, float_status *s);
|
||||
static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
|
||||
int scale, float_status *s);
|
||||
|
||||
#define parts_float_to_sint(P, R, Z, MN, MX, S) \
|
||||
PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
|
||||
|
||||
#define parts_sint_to_float(P, I, Z, S) \
|
||||
PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
|
||||
|
||||
@ -3409,6 +3422,24 @@ int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
|
||||
return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
|
||||
}
|
||||
|
||||
int32_t float64_to_int32_modulo(float64 a, FloatRoundMode rmode,
|
||||
float_status *s)
|
||||
{
|
||||
FloatParts64 p;
|
||||
|
||||
float64_unpack_canonical(&p, a, s);
|
||||
return parts_float_to_sint_modulo(&p, rmode, 31, s);
|
||||
}
|
||||
|
||||
int64_t float64_to_int64_modulo(float64 a, FloatRoundMode rmode,
|
||||
float_status *s)
|
||||
{
|
||||
FloatParts64 p;
|
||||
|
||||
float64_unpack_canonical(&p, a, s);
|
||||
return parts_float_to_sint_modulo(&p, rmode, 63, s);
|
||||
}
|
||||
|
||||
/*
|
||||
* Floating-point to unsigned integer conversions
|
||||
*/
|
||||
|
@ -751,6 +751,9 @@ int16_t float64_to_int16_round_to_zero(float64, float_status *status);
|
||||
int32_t float64_to_int32_round_to_zero(float64, float_status *status);
|
||||
int64_t float64_to_int64_round_to_zero(float64, float_status *status);
|
||||
|
||||
int32_t float64_to_int32_modulo(float64, FloatRoundMode, float_status *status);
|
||||
int64_t float64_to_int64_modulo(float64, FloatRoundMode, float_status *status);
|
||||
|
||||
uint16_t float64_to_uint16_scalbn(float64, FloatRoundMode, int, float_status *);
|
||||
uint32_t float64_to_uint32_scalbn(float64, FloatRoundMode, int, float_status *);
|
||||
uint64_t float64_to_uint64_scalbn(float64, FloatRoundMode, int, float_status *);
|
||||
|
Loading…
x
Reference in New Issue
Block a user