softfloat: Move mul_floats to softfloat-parts.c.inc

Rename to parts$N_mul.
Reimplement float128_mul with FloatParts128.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2020-11-11 20:44:57 -08:00
parent 3ff49e56a7
commit aca845275a
2 changed files with 128 additions and 129 deletions

View File

@ -362,3 +362,54 @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
p_nan:
return parts_pick_nan(a, b, s);
}
/*
* Returns the result of multiplying the floating-point values `a' and
* `b'. The operation is performed according to the IEC/IEEE Standard
* for Binary Floating-Point Arithmetic.
*/
static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
float_status *s)
{
int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
bool sign = a->sign ^ b->sign;
if (likely(ab_mask == float_cmask_normal)) {
FloatPartsW tmp;
frac_mulw(&tmp, a, b);
frac_truncjam(a, &tmp);
a->exp += b->exp + 1;
if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
frac_add(a, a, a);
a->exp -= 1;
}
a->sign = sign;
return a;
}
/* Inf * Zero == NaN */
if (unlikely(ab_mask == float_cmask_infzero)) {
float_raise(float_flag_invalid, s);
parts_default_nan(a, s);
return a;
}
if (unlikely(ab_mask & float_cmask_anynan)) {
return parts_pick_nan(a, b, s);
}
/* Multiply by 0 or Inf */
if (ab_mask & float_cmask_inf) {
a->cls = float_class_inf;
a->sign = sign;
return a;
}
g_assert(ab_mask & float_cmask_zero);
a->cls = float_class_zero;
a->sign = sign;
return a;
}

View File

@ -533,6 +533,16 @@ typedef struct {
uint64_t frac_lo;
} FloatParts128;
typedef struct {
FloatClass cls;
bool sign;
int32_t exp;
uint64_t frac_hi;
uint64_t frac_hm; /* high-middle */
uint64_t frac_lm; /* low-middle */
uint64_t frac_lo;
} FloatParts256;
/* These apply to the most significant word of each FloatPartsN. */
#define DECOMPOSED_BINARY_POINT 63
#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
@ -769,6 +779,14 @@ static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
#define parts_addsub(A, B, S, Z) \
PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
float_status *s);
static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
float_status *s);
#define parts_mul(A, B, S) \
PARTS_GENERIC_64_128(mul, A)(A, B, S)
/*
* Helper functions for softfloat-parts.c.inc, per-size operations.
*/
@ -859,6 +877,19 @@ static bool frac128_eqz(FloatParts128 *a)
#define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
{
mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
}
static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
{
mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
&r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
}
#define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B)
static void frac64_neg(FloatParts64 *a)
{
a->frac = -a->frac;
@ -955,23 +986,42 @@ static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
#define frac_sub(R, A, B) FRAC_GENERIC_64_128(sub, R)(R, A, B)
static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
{
r->frac = a->frac_hi | (a->frac_lo != 0);
}
static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
{
r->frac_hi = a->frac_hi;
r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
}
#define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
#define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
#define FloatPartsN glue(FloatParts,N)
#define FloatPartsW glue(FloatParts,W)
#define N 64
#define W 128
#include "softfloat-parts-addsub.c.inc"
#include "softfloat-parts.c.inc"
#undef N
#undef W
#define N 128
#define W 256
#include "softfloat-parts-addsub.c.inc"
#include "softfloat-parts.c.inc"
#undef N
#undef W
#undef partsN
#undef FloatPartsN
#undef FloatPartsW
/*
* Pack/unpack routines with a specific FloatFmt.
@ -1250,89 +1300,42 @@ float128 float128_sub(float128 a, float128 b, float_status *status)
}
/*
* Returns the result of multiplying the floating-point values `a' and
* `b'. The operation is performed according to the IEC/IEEE Standard
* for Binary Floating-Point Arithmetic.
* Multiplication
*/
static FloatParts64 mul_floats(FloatParts64 a, FloatParts64 b, float_status *s)
{
bool sign = a.sign ^ b.sign;
if (a.cls == float_class_normal && b.cls == float_class_normal) {
uint64_t hi, lo;
int exp = a.exp + b.exp;
mul64To128(a.frac, b.frac, &hi, &lo);
if (hi & DECOMPOSED_IMPLICIT_BIT) {
exp += 1;
} else {
hi <<= 1;
}
hi |= (lo != 0);
/* Re-use a */
a.exp = exp;
a.sign = sign;
a.frac = hi;
return a;
}
/* handle all the NaN cases */
if (is_nan(a.cls) || is_nan(b.cls)) {
return *parts_pick_nan(&a, &b, s);
}
/* Inf * Zero == NaN */
if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
(a.cls == float_class_zero && b.cls == float_class_inf)) {
float_raise(float_flag_invalid, s);
parts_default_nan(&a, s);
return a;
}
/* Multiply by 0 or Inf */
if (a.cls == float_class_inf || a.cls == float_class_zero) {
a.sign = sign;
return a;
}
if (b.cls == float_class_inf || b.cls == float_class_zero) {
b.sign = sign;
return b;
}
g_assert_not_reached();
}
float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
{
FloatParts64 pa, pb, pr;
FloatParts64 pa, pb, *pr;
float16_unpack_canonical(&pa, a, status);
float16_unpack_canonical(&pb, b, status);
pr = mul_floats(pa, pb, status);
pr = parts_mul(&pa, &pb, status);
return float16_round_pack_canonical(&pr, status);
return float16_round_pack_canonical(pr, status);
}
static float32 QEMU_SOFTFLOAT_ATTR
soft_f32_mul(float32 a, float32 b, float_status *status)
{
FloatParts64 pa, pb, pr;
FloatParts64 pa, pb, *pr;
float32_unpack_canonical(&pa, a, status);
float32_unpack_canonical(&pb, b, status);
pr = mul_floats(pa, pb, status);
pr = parts_mul(&pa, &pb, status);
return float32_round_pack_canonical(&pr, status);
return float32_round_pack_canonical(pr, status);
}
static float64 QEMU_SOFTFLOAT_ATTR
soft_f64_mul(float64 a, float64 b, float_status *status)
{
FloatParts64 pa, pb, pr;
FloatParts64 pa, pb, *pr;
float64_unpack_canonical(&pa, a, status);
float64_unpack_canonical(&pb, b, status);
pr = mul_floats(pa, pb, status);
pr = parts_mul(&pa, &pb, status);
return float64_round_pack_canonical(&pr, status);
return float64_round_pack_canonical(pr, status);
}
static float hard_f32_mul(float a, float b)
@ -1359,20 +1362,28 @@ float64_mul(float64 a, float64 b, float_status *s)
f64_is_zon2, f64_addsubmul_post);
}
/*
* Returns the result of multiplying the bfloat16
* values `a' and `b'.
*/
bfloat16 QEMU_FLATTEN bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
bfloat16 QEMU_FLATTEN
bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
{
FloatParts64 pa, pb, pr;
FloatParts64 pa, pb, *pr;
bfloat16_unpack_canonical(&pa, a, status);
bfloat16_unpack_canonical(&pb, b, status);
pr = mul_floats(pa, pb, status);
pr = parts_mul(&pa, &pb, status);
return bfloat16_round_pack_canonical(&pr, status);
return bfloat16_round_pack_canonical(pr, status);
}
float128 QEMU_FLATTEN
float128_mul(float128 a, float128 b, float_status *status)
{
FloatParts128 pa, pb, *pr;
float128_unpack_canonical(&pa, a, status);
float128_unpack_canonical(&pb, b, status);
pr = parts_mul(&pa, &pb, status);
return float128_round_pack_canonical(pr, status);
}
/*
@ -7068,69 +7079,6 @@ float128 float128_round_to_int(float128 a, float_status *status)
}
/*----------------------------------------------------------------------------
| Returns the result of multiplying the quadruple-precision floating-point
| values `a' and `b'. The operation is performed according to the IEC/IEEE
| Standard for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
float128 float128_mul(float128 a, float128 b, float_status *status)
{
bool aSign, bSign, zSign;
int32_t aExp, bExp, zExp;
uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
aSig1 = extractFloat128Frac1( a );
aSig0 = extractFloat128Frac0( a );
aExp = extractFloat128Exp( a );
aSign = extractFloat128Sign( a );
bSig1 = extractFloat128Frac1( b );
bSig0 = extractFloat128Frac0( b );
bExp = extractFloat128Exp( b );
bSign = extractFloat128Sign( b );
zSign = aSign ^ bSign;
if ( aExp == 0x7FFF ) {
if ( ( aSig0 | aSig1 )
|| ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
return propagateFloat128NaN(a, b, status);
}
if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
return packFloat128( zSign, 0x7FFF, 0, 0 );
}
if ( bExp == 0x7FFF ) {
if (bSig0 | bSig1) {
return propagateFloat128NaN(a, b, status);
}
if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
invalid:
float_raise(float_flag_invalid, status);
return float128_default_nan(status);
}
return packFloat128( zSign, 0x7FFF, 0, 0 );
}
if ( aExp == 0 ) {
if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
}
if ( bExp == 0 ) {
if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
}
zExp = aExp + bExp - 0x4000;
aSig0 |= UINT64_C(0x0001000000000000);
shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
zSig2 |= ( zSig3 != 0 );
if (UINT64_C( 0x0002000000000000) <= zSig0 ) {
shift128ExtraRightJamming(
zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
++zExp;
}
return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
}
/*----------------------------------------------------------------------------
| Returns the result of dividing the quadruple-precision floating-point value
| `a' by the corresponding value `b'. The operation is performed according to