Reorg FloatParts to use QEMU_GENERIC.
Begin replacing the Berkeley float128 routines with FloatParts128. - includes a new implementation of float128_muladd - includes the snan silencing that was missing from float{32,64}_to_float128 and float128_to_float{32,64}. - does not include float128_min/max* (written but not yet reviewed). -----BEGIN PGP SIGNATURE----- iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmChD54dHHJpY2hhcmQu aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV94uAgApJMxVvkRkDuyOXG2 cM0dC+GQQ0prm5id2AW2JREiET+jo2NV7uU8IQGEQq3rtG8trws45gMQFgSRYJk2 sLlAIt4QqD6qzb2H9z+JhOx1yITlsuwrvr+BAwVtK7gw6l4LxKAs35SwWpz/Z5/2 R63bLyontVzzi40Bc4pB/h3CxdOR+UjZ2a2kDIZFuI/j+9pnPoEL/Vp9XMg85ex+ g21rRwE6qv4hrGMhej5YBKQoleoieL3FQ0sXQLi5lLNYejBpU45PjdgdEwbZIBhT 4sQkzV2HRrd84OrQIJU3Jd+zHZoSq6JQUZRSGAnqC7Mvigplo24J5GRjh6T8WoaI y495Lg== =MR2G -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/rth-gitlab/tags/pull-fp-20210516' into staging Reorg FloatParts to use QEMU_GENERIC. Begin replacing the Berkeley float128 routines with FloatParts128. - includes a new implementation of float128_muladd - includes the snan silencing that was missing from float{32,64}_to_float128 and float128_to_float{32,64}. - does not include float128_min/max* (written but not yet reviewed). # gpg: Signature made Sun 16 May 2021 13:27:10 BST # gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F # gpg: issuer "richard.henderson@linaro.org" # gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full] # Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A 05C0 64DF 38E8 AF7E 215F * remotes/rth-gitlab/tags/pull-fp-20210516: (46 commits) softfloat: Move round_to_int_and_pack to softfloat-parts.c.inc softfloat: Move round_to_int to softfloat-parts.c.inc softfloat: Convert float-to-float conversions with float128 softfloat: Split float_to_float softfloat: Move div_floats to softfloat-parts.c.inc softfloat: Introduce sh[lr]_double primitives softfloat: Tidy mul128By64To192 softfloat: Use add192 in mul128To256 softfloat: Use mulu64 for mul64To128 softfloat: Move muladd_floats to softfloat-parts.c.inc softfloat: Move mul_floats to softfloat-parts.c.inc softfloat: Implement float128_add/sub via parts softfloat: Move addsub_floats to softfloat-parts.c.inc softfloat: Use uadd64_carry, usub64_borrow in softfloat-macros.h softfloat: Move round_canonical to softfloat-parts.c.inc softfloat: Move sf_canonicalize to softfloat-parts.c.inc softfloat: Move pick_nan_muladd to softfloat-parts.c.inc softfloat: Move pick_nan to softfloat-parts.c.inc softfloat: Move return_nan to softfloat-parts.c.inc softfloat: Convert float128_default_nan to parts ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
1acbc0fdf2
|
@ -1073,9 +1073,8 @@ void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc)
|
||||||
for (i = 0; i < oprsz; i += sizeof(int32_t)) {
|
for (i = 0; i < oprsz; i += sizeof(int32_t)) {
|
||||||
int32_t ai = *(int32_t *)(a + i);
|
int32_t ai = *(int32_t *)(a + i);
|
||||||
int32_t bi = *(int32_t *)(b + i);
|
int32_t bi = *(int32_t *)(b + i);
|
||||||
int32_t di = ai + bi;
|
int32_t di;
|
||||||
if (((di ^ ai) &~ (ai ^ bi)) < 0) {
|
if (sadd32_overflow(ai, bi, &di)) {
|
||||||
/* Signed overflow. */
|
|
||||||
di = (di < 0 ? INT32_MAX : INT32_MIN);
|
di = (di < 0 ? INT32_MAX : INT32_MIN);
|
||||||
}
|
}
|
||||||
*(int32_t *)(d + i) = di;
|
*(int32_t *)(d + i) = di;
|
||||||
|
@ -1091,9 +1090,8 @@ void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc)
|
||||||
for (i = 0; i < oprsz; i += sizeof(int64_t)) {
|
for (i = 0; i < oprsz; i += sizeof(int64_t)) {
|
||||||
int64_t ai = *(int64_t *)(a + i);
|
int64_t ai = *(int64_t *)(a + i);
|
||||||
int64_t bi = *(int64_t *)(b + i);
|
int64_t bi = *(int64_t *)(b + i);
|
||||||
int64_t di = ai + bi;
|
int64_t di;
|
||||||
if (((di ^ ai) &~ (ai ^ bi)) < 0) {
|
if (sadd64_overflow(ai, bi, &di)) {
|
||||||
/* Signed overflow. */
|
|
||||||
di = (di < 0 ? INT64_MAX : INT64_MIN);
|
di = (di < 0 ? INT64_MAX : INT64_MIN);
|
||||||
}
|
}
|
||||||
*(int64_t *)(d + i) = di;
|
*(int64_t *)(d + i) = di;
|
||||||
|
@ -1143,9 +1141,8 @@ void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc)
|
||||||
for (i = 0; i < oprsz; i += sizeof(int32_t)) {
|
for (i = 0; i < oprsz; i += sizeof(int32_t)) {
|
||||||
int32_t ai = *(int32_t *)(a + i);
|
int32_t ai = *(int32_t *)(a + i);
|
||||||
int32_t bi = *(int32_t *)(b + i);
|
int32_t bi = *(int32_t *)(b + i);
|
||||||
int32_t di = ai - bi;
|
int32_t di;
|
||||||
if (((di ^ ai) & (ai ^ bi)) < 0) {
|
if (ssub32_overflow(ai, bi, &di)) {
|
||||||
/* Signed overflow. */
|
|
||||||
di = (di < 0 ? INT32_MAX : INT32_MIN);
|
di = (di < 0 ? INT32_MAX : INT32_MIN);
|
||||||
}
|
}
|
||||||
*(int32_t *)(d + i) = di;
|
*(int32_t *)(d + i) = di;
|
||||||
|
@ -1161,9 +1158,8 @@ void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc)
|
||||||
for (i = 0; i < oprsz; i += sizeof(int64_t)) {
|
for (i = 0; i < oprsz; i += sizeof(int64_t)) {
|
||||||
int64_t ai = *(int64_t *)(a + i);
|
int64_t ai = *(int64_t *)(a + i);
|
||||||
int64_t bi = *(int64_t *)(b + i);
|
int64_t bi = *(int64_t *)(b + i);
|
||||||
int64_t di = ai - bi;
|
int64_t di;
|
||||||
if (((di ^ ai) & (ai ^ bi)) < 0) {
|
if (ssub64_overflow(ai, bi, &di)) {
|
||||||
/* Signed overflow. */
|
|
||||||
di = (di < 0 ? INT64_MAX : INT64_MIN);
|
di = (di < 0 ? INT64_MAX : INT64_MIN);
|
||||||
}
|
}
|
||||||
*(int64_t *)(d + i) = di;
|
*(int64_t *)(d + i) = di;
|
||||||
|
@ -1209,8 +1205,8 @@ void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc)
|
||||||
for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
|
for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
|
||||||
uint32_t ai = *(uint32_t *)(a + i);
|
uint32_t ai = *(uint32_t *)(a + i);
|
||||||
uint32_t bi = *(uint32_t *)(b + i);
|
uint32_t bi = *(uint32_t *)(b + i);
|
||||||
uint32_t di = ai + bi;
|
uint32_t di;
|
||||||
if (di < ai) {
|
if (uadd32_overflow(ai, bi, &di)) {
|
||||||
di = UINT32_MAX;
|
di = UINT32_MAX;
|
||||||
}
|
}
|
||||||
*(uint32_t *)(d + i) = di;
|
*(uint32_t *)(d + i) = di;
|
||||||
|
@ -1226,8 +1222,8 @@ void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc)
|
||||||
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
|
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
|
||||||
uint64_t ai = *(uint64_t *)(a + i);
|
uint64_t ai = *(uint64_t *)(a + i);
|
||||||
uint64_t bi = *(uint64_t *)(b + i);
|
uint64_t bi = *(uint64_t *)(b + i);
|
||||||
uint64_t di = ai + bi;
|
uint64_t di;
|
||||||
if (di < ai) {
|
if (uadd64_overflow(ai, bi, &di)) {
|
||||||
di = UINT64_MAX;
|
di = UINT64_MAX;
|
||||||
}
|
}
|
||||||
*(uint64_t *)(d + i) = di;
|
*(uint64_t *)(d + i) = di;
|
||||||
|
@ -1273,8 +1269,8 @@ void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc)
|
||||||
for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
|
for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
|
||||||
uint32_t ai = *(uint32_t *)(a + i);
|
uint32_t ai = *(uint32_t *)(a + i);
|
||||||
uint32_t bi = *(uint32_t *)(b + i);
|
uint32_t bi = *(uint32_t *)(b + i);
|
||||||
uint32_t di = ai - bi;
|
uint32_t di;
|
||||||
if (ai < bi) {
|
if (usub32_overflow(ai, bi, &di)) {
|
||||||
di = 0;
|
di = 0;
|
||||||
}
|
}
|
||||||
*(uint32_t *)(d + i) = di;
|
*(uint32_t *)(d + i) = di;
|
||||||
|
@ -1290,8 +1286,8 @@ void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc)
|
||||||
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
|
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
|
||||||
uint64_t ai = *(uint64_t *)(a + i);
|
uint64_t ai = *(uint64_t *)(a + i);
|
||||||
uint64_t bi = *(uint64_t *)(b + i);
|
uint64_t bi = *(uint64_t *)(b + i);
|
||||||
uint64_t di = ai - bi;
|
uint64_t di;
|
||||||
if (ai < bi) {
|
if (usub64_overflow(ai, bi, &di)) {
|
||||||
di = 0;
|
di = 0;
|
||||||
}
|
}
|
||||||
*(uint64_t *)(d + i) = di;
|
*(uint64_t *)(d + i) = di;
|
||||||
|
|
|
@ -0,0 +1,62 @@
|
||||||
|
/*
|
||||||
|
* Floating point arithmetic implementation
|
||||||
|
*
|
||||||
|
* The code in this source file is derived from release 2a of the SoftFloat
|
||||||
|
* IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
|
||||||
|
* some later contributions) are provided under that license, as detailed below.
|
||||||
|
* It has subsequently been modified by contributors to the QEMU Project,
|
||||||
|
* so some portions are provided under:
|
||||||
|
* the SoftFloat-2a license
|
||||||
|
* the BSD license
|
||||||
|
* GPL-v2-or-later
|
||||||
|
*
|
||||||
|
* Any future contributions to this file after December 1st 2014 will be
|
||||||
|
* taken to be licensed under the Softfloat-2a license unless specifically
|
||||||
|
* indicated otherwise.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void partsN(add_normal)(FloatPartsN *a, FloatPartsN *b)
|
||||||
|
{
|
||||||
|
int exp_diff = a->exp - b->exp;
|
||||||
|
|
||||||
|
if (exp_diff > 0) {
|
||||||
|
frac_shrjam(b, exp_diff);
|
||||||
|
} else if (exp_diff < 0) {
|
||||||
|
frac_shrjam(a, -exp_diff);
|
||||||
|
a->exp = b->exp;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (frac_add(a, a, b)) {
|
||||||
|
frac_shrjam(a, 1);
|
||||||
|
a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
|
||||||
|
a->exp += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool partsN(sub_normal)(FloatPartsN *a, FloatPartsN *b)
|
||||||
|
{
|
||||||
|
int exp_diff = a->exp - b->exp;
|
||||||
|
int shift;
|
||||||
|
|
||||||
|
if (exp_diff > 0) {
|
||||||
|
frac_shrjam(b, exp_diff);
|
||||||
|
frac_sub(a, a, b);
|
||||||
|
} else if (exp_diff < 0) {
|
||||||
|
a->exp = b->exp;
|
||||||
|
a->sign ^= 1;
|
||||||
|
frac_shrjam(a, -exp_diff);
|
||||||
|
frac_sub(a, b, a);
|
||||||
|
} else if (frac_sub(a, a, b)) {
|
||||||
|
/* Overflow means that A was less than B. */
|
||||||
|
frac_neg(a);
|
||||||
|
a->sign ^= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
shift = frac_normalize(a);
|
||||||
|
if (likely(shift < N)) {
|
||||||
|
a->exp -= shift;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
a->cls = float_class_zero;
|
||||||
|
return false;
|
||||||
|
}
|
|
@ -0,0 +1,817 @@
|
||||||
|
/*
|
||||||
|
* QEMU float support
|
||||||
|
*
|
||||||
|
* The code in this source file is derived from release 2a of the SoftFloat
|
||||||
|
* IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
|
||||||
|
* some later contributions) are provided under that license, as detailed below.
|
||||||
|
* It has subsequently been modified by contributors to the QEMU Project,
|
||||||
|
* so some portions are provided under:
|
||||||
|
* the SoftFloat-2a license
|
||||||
|
* the BSD license
|
||||||
|
* GPL-v2-or-later
|
||||||
|
*
|
||||||
|
* Any future contributions to this file after December 1st 2014 will be
|
||||||
|
* taken to be licensed under the Softfloat-2a license unless specifically
|
||||||
|
* indicated otherwise.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void partsN(return_nan)(FloatPartsN *a, float_status *s)
|
||||||
|
{
|
||||||
|
switch (a->cls) {
|
||||||
|
case float_class_snan:
|
||||||
|
float_raise(float_flag_invalid, s);
|
||||||
|
if (s->default_nan_mode) {
|
||||||
|
parts_default_nan(a, s);
|
||||||
|
} else {
|
||||||
|
parts_silence_nan(a, s);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case float_class_qnan:
|
||||||
|
if (s->default_nan_mode) {
|
||||||
|
parts_default_nan(a, s);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
g_assert_not_reached();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
|
||||||
|
float_status *s)
|
||||||
|
{
|
||||||
|
if (is_snan(a->cls) || is_snan(b->cls)) {
|
||||||
|
float_raise(float_flag_invalid, s);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (s->default_nan_mode) {
|
||||||
|
parts_default_nan(a, s);
|
||||||
|
} else {
|
||||||
|
int cmp = frac_cmp(a, b);
|
||||||
|
if (cmp == 0) {
|
||||||
|
cmp = a->sign < b->sign;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pickNaN(a->cls, b->cls, cmp > 0, s)) {
|
||||||
|
a = b;
|
||||||
|
}
|
||||||
|
if (is_snan(a->cls)) {
|
||||||
|
parts_silence_nan(a, s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
|
||||||
|
FloatPartsN *c, float_status *s,
|
||||||
|
int ab_mask, int abc_mask)
|
||||||
|
{
|
||||||
|
int which;
|
||||||
|
|
||||||
|
if (unlikely(abc_mask & float_cmask_snan)) {
|
||||||
|
float_raise(float_flag_invalid, s);
|
||||||
|
}
|
||||||
|
|
||||||
|
which = pickNaNMulAdd(a->cls, b->cls, c->cls,
|
||||||
|
ab_mask == float_cmask_infzero, s);
|
||||||
|
|
||||||
|
if (s->default_nan_mode || which == 3) {
|
||||||
|
/*
|
||||||
|
* Note that this check is after pickNaNMulAdd so that function
|
||||||
|
* has an opportunity to set the Invalid flag for infzero.
|
||||||
|
*/
|
||||||
|
parts_default_nan(a, s);
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (which) {
|
||||||
|
case 0:
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
a = b;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
a = c;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
g_assert_not_reached();
|
||||||
|
}
|
||||||
|
if (is_snan(a->cls)) {
|
||||||
|
parts_silence_nan(a, s);
|
||||||
|
}
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Canonicalize the FloatParts structure. Determine the class,
|
||||||
|
* unbias the exponent, and normalize the fraction.
|
||||||
|
*/
|
||||||
|
static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
|
||||||
|
const FloatFmt *fmt)
|
||||||
|
{
|
||||||
|
if (unlikely(p->exp == 0)) {
|
||||||
|
if (likely(frac_eqz(p))) {
|
||||||
|
p->cls = float_class_zero;
|
||||||
|
} else if (status->flush_inputs_to_zero) {
|
||||||
|
float_raise(float_flag_input_denormal, status);
|
||||||
|
p->cls = float_class_zero;
|
||||||
|
frac_clear(p);
|
||||||
|
} else {
|
||||||
|
int shift = frac_normalize(p);
|
||||||
|
p->cls = float_class_normal;
|
||||||
|
p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1;
|
||||||
|
}
|
||||||
|
} else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
|
||||||
|
p->cls = float_class_normal;
|
||||||
|
p->exp -= fmt->exp_bias;
|
||||||
|
frac_shl(p, fmt->frac_shift);
|
||||||
|
p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
|
||||||
|
} else if (likely(frac_eqz(p))) {
|
||||||
|
p->cls = float_class_inf;
|
||||||
|
} else {
|
||||||
|
frac_shl(p, fmt->frac_shift);
|
||||||
|
p->cls = (parts_is_snan_frac(p->frac_hi, status)
|
||||||
|
? float_class_snan : float_class_qnan);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Round and uncanonicalize a floating-point number by parts. There
|
||||||
|
* are FRAC_SHIFT bits that may require rounding at the bottom of the
|
||||||
|
* fraction; these bits will be removed. The exponent will be biased
|
||||||
|
* by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
|
||||||
|
*/
|
||||||
|
static void partsN(uncanon)(FloatPartsN *p, float_status *s,
|
||||||
|
const FloatFmt *fmt)
|
||||||
|
{
|
||||||
|
const int exp_max = fmt->exp_max;
|
||||||
|
const int frac_shift = fmt->frac_shift;
|
||||||
|
const uint64_t frac_lsb = fmt->frac_lsb;
|
||||||
|
const uint64_t frac_lsbm1 = fmt->frac_lsbm1;
|
||||||
|
const uint64_t round_mask = fmt->round_mask;
|
||||||
|
const uint64_t roundeven_mask = fmt->roundeven_mask;
|
||||||
|
uint64_t inc;
|
||||||
|
bool overflow_norm;
|
||||||
|
int exp, flags = 0;
|
||||||
|
|
||||||
|
if (unlikely(p->cls != float_class_normal)) {
|
||||||
|
switch (p->cls) {
|
||||||
|
case float_class_zero:
|
||||||
|
p->exp = 0;
|
||||||
|
frac_clear(p);
|
||||||
|
return;
|
||||||
|
case float_class_inf:
|
||||||
|
g_assert(!fmt->arm_althp);
|
||||||
|
p->exp = fmt->exp_max;
|
||||||
|
frac_clear(p);
|
||||||
|
return;
|
||||||
|
case float_class_qnan:
|
||||||
|
case float_class_snan:
|
||||||
|
g_assert(!fmt->arm_althp);
|
||||||
|
p->exp = fmt->exp_max;
|
||||||
|
frac_shr(p, fmt->frac_shift);
|
||||||
|
return;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
g_assert_not_reached();
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (s->float_rounding_mode) {
|
||||||
|
case float_round_nearest_even:
|
||||||
|
overflow_norm = false;
|
||||||
|
inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
|
||||||
|
break;
|
||||||
|
case float_round_ties_away:
|
||||||
|
overflow_norm = false;
|
||||||
|
inc = frac_lsbm1;
|
||||||
|
break;
|
||||||
|
case float_round_to_zero:
|
||||||
|
overflow_norm = true;
|
||||||
|
inc = 0;
|
||||||
|
break;
|
||||||
|
case float_round_up:
|
||||||
|
inc = p->sign ? 0 : round_mask;
|
||||||
|
overflow_norm = p->sign;
|
||||||
|
break;
|
||||||
|
case float_round_down:
|
||||||
|
inc = p->sign ? round_mask : 0;
|
||||||
|
overflow_norm = !p->sign;
|
||||||
|
break;
|
||||||
|
case float_round_to_odd:
|
||||||
|
overflow_norm = true;
|
||||||
|
inc = p->frac_lo & frac_lsb ? 0 : round_mask;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
g_assert_not_reached();
|
||||||
|
}
|
||||||
|
|
||||||
|
exp = p->exp + fmt->exp_bias;
|
||||||
|
if (likely(exp > 0)) {
|
||||||
|
if (p->frac_lo & round_mask) {
|
||||||
|
flags |= float_flag_inexact;
|
||||||
|
if (frac_addi(p, p, inc)) {
|
||||||
|
frac_shr(p, 1);
|
||||||
|
p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
|
||||||
|
exp++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
frac_shr(p, frac_shift);
|
||||||
|
|
||||||
|
if (fmt->arm_althp) {
|
||||||
|
/* ARM Alt HP eschews Inf and NaN for a wider exponent. */
|
||||||
|
if (unlikely(exp > exp_max)) {
|
||||||
|
/* Overflow. Return the maximum normal. */
|
||||||
|
flags = float_flag_invalid;
|
||||||
|
exp = exp_max;
|
||||||
|
frac_allones(p);
|
||||||
|
}
|
||||||
|
} else if (unlikely(exp >= exp_max)) {
|
||||||
|
flags |= float_flag_overflow | float_flag_inexact;
|
||||||
|
if (overflow_norm) {
|
||||||
|
exp = exp_max - 1;
|
||||||
|
frac_allones(p);
|
||||||
|
} else {
|
||||||
|
p->cls = float_class_inf;
|
||||||
|
exp = exp_max;
|
||||||
|
frac_clear(p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (s->flush_to_zero) {
|
||||||
|
flags |= float_flag_output_denormal;
|
||||||
|
p->cls = float_class_zero;
|
||||||
|
exp = 0;
|
||||||
|
frac_clear(p);
|
||||||
|
} else {
|
||||||
|
bool is_tiny = s->tininess_before_rounding || exp < 0;
|
||||||
|
|
||||||
|
if (!is_tiny) {
|
||||||
|
FloatPartsN discard;
|
||||||
|
is_tiny = !frac_addi(&discard, p, inc);
|
||||||
|
}
|
||||||
|
|
||||||
|
frac_shrjam(p, 1 - exp);
|
||||||
|
|
||||||
|
if (p->frac_lo & round_mask) {
|
||||||
|
/* Need to recompute round-to-even/round-to-odd. */
|
||||||
|
switch (s->float_rounding_mode) {
|
||||||
|
case float_round_nearest_even:
|
||||||
|
inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
|
||||||
|
? frac_lsbm1 : 0);
|
||||||
|
break;
|
||||||
|
case float_round_to_odd:
|
||||||
|
inc = p->frac_lo & frac_lsb ? 0 : round_mask;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
flags |= float_flag_inexact;
|
||||||
|
frac_addi(p, p, inc);
|
||||||
|
}
|
||||||
|
|
||||||
|
exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) != 0;
|
||||||
|
frac_shr(p, frac_shift);
|
||||||
|
|
||||||
|
if (is_tiny && (flags & float_flag_inexact)) {
|
||||||
|
flags |= float_flag_underflow;
|
||||||
|
}
|
||||||
|
if (exp == 0 && frac_eqz(p)) {
|
||||||
|
p->cls = float_class_zero;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
p->exp = exp;
|
||||||
|
float_raise(flags, s);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns the result of adding or subtracting the values of the
|
||||||
|
* floating-point values `a' and `b'. The operation is performed
|
||||||
|
* according to the IEC/IEEE Standard for Binary Floating-Point
|
||||||
|
* Arithmetic.
|
||||||
|
*/
|
||||||
|
static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
|
||||||
|
float_status *s, bool subtract)
|
||||||
|
{
|
||||||
|
bool b_sign = b->sign ^ subtract;
|
||||||
|
int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
|
||||||
|
|
||||||
|
if (a->sign != b_sign) {
|
||||||
|
/* Subtraction */
|
||||||
|
if (likely(ab_mask == float_cmask_normal)) {
|
||||||
|
if (parts_sub_normal(a, b)) {
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
/* Subtract was exact, fall through to set sign. */
|
||||||
|
ab_mask = float_cmask_zero;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ab_mask == float_cmask_zero) {
|
||||||
|
a->sign = s->float_rounding_mode == float_round_down;
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unlikely(ab_mask & float_cmask_anynan)) {
|
||||||
|
goto p_nan;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ab_mask & float_cmask_inf) {
|
||||||
|
if (a->cls != float_class_inf) {
|
||||||
|
/* N - Inf */
|
||||||
|
goto return_b;
|
||||||
|
}
|
||||||
|
if (b->cls != float_class_inf) {
|
||||||
|
/* Inf - N */
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
/* Inf - Inf */
|
||||||
|
float_raise(float_flag_invalid, s);
|
||||||
|
parts_default_nan(a, s);
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* Addition */
|
||||||
|
if (likely(ab_mask == float_cmask_normal)) {
|
||||||
|
parts_add_normal(a, b);
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ab_mask == float_cmask_zero) {
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unlikely(ab_mask & float_cmask_anynan)) {
|
||||||
|
goto p_nan;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ab_mask & float_cmask_inf) {
|
||||||
|
a->cls = float_class_inf;
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (b->cls == float_class_zero) {
|
||||||
|
g_assert(a->cls == float_class_normal);
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
g_assert(a->cls == float_class_zero);
|
||||||
|
g_assert(b->cls == float_class_normal);
|
||||||
|
return_b:
|
||||||
|
b->sign = b_sign;
|
||||||
|
return b;
|
||||||
|
|
||||||
|
p_nan:
|
||||||
|
return parts_pick_nan(a, b, s);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns the result of multiplying the floating-point values `a' and
|
||||||
|
* `b'. The operation is performed according to the IEC/IEEE Standard
|
||||||
|
* for Binary Floating-Point Arithmetic.
|
||||||
|
*/
|
||||||
|
static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
|
||||||
|
float_status *s)
|
||||||
|
{
|
||||||
|
int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
|
||||||
|
bool sign = a->sign ^ b->sign;
|
||||||
|
|
||||||
|
if (likely(ab_mask == float_cmask_normal)) {
|
||||||
|
FloatPartsW tmp;
|
||||||
|
|
||||||
|
frac_mulw(&tmp, a, b);
|
||||||
|
frac_truncjam(a, &tmp);
|
||||||
|
|
||||||
|
a->exp += b->exp + 1;
|
||||||
|
if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
|
||||||
|
frac_add(a, a, a);
|
||||||
|
a->exp -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
a->sign = sign;
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Inf * Zero == NaN */
|
||||||
|
if (unlikely(ab_mask == float_cmask_infzero)) {
|
||||||
|
float_raise(float_flag_invalid, s);
|
||||||
|
parts_default_nan(a, s);
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unlikely(ab_mask & float_cmask_anynan)) {
|
||||||
|
return parts_pick_nan(a, b, s);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Multiply by 0 or Inf */
|
||||||
|
if (ab_mask & float_cmask_inf) {
|
||||||
|
a->cls = float_class_inf;
|
||||||
|
a->sign = sign;
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
g_assert(ab_mask & float_cmask_zero);
|
||||||
|
a->cls = float_class_zero;
|
||||||
|
a->sign = sign;
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns the result of multiplying the floating-point values `a' and
|
||||||
|
* `b' then adding 'c', with no intermediate rounding step after the
|
||||||
|
* multiplication. The operation is performed according to the
|
||||||
|
* IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
|
||||||
|
* The flags argument allows the caller to select negation of the
|
||||||
|
* addend, the intermediate product, or the final result. (The
|
||||||
|
* difference between this and having the caller do a separate
|
||||||
|
* negation is that negating externally will flip the sign bit on NaNs.)
|
||||||
|
*
|
||||||
|
* Requires A and C extracted into a double-sized structure to provide the
|
||||||
|
* extra space for the widening multiply.
|
||||||
|
*/
|
||||||
|
static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
|
||||||
|
FloatPartsN *c, int flags, float_status *s)
|
||||||
|
{
|
||||||
|
int ab_mask, abc_mask;
|
||||||
|
FloatPartsW p_widen, c_widen;
|
||||||
|
|
||||||
|
ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
|
||||||
|
abc_mask = float_cmask(c->cls) | ab_mask;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* It is implementation-defined whether the cases of (0,inf,qnan)
|
||||||
|
* and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
|
||||||
|
* they return if they do), so we have to hand this information
|
||||||
|
* off to the target-specific pick-a-NaN routine.
|
||||||
|
*/
|
||||||
|
if (unlikely(abc_mask & float_cmask_anynan)) {
|
||||||
|
return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (flags & float_muladd_negate_c) {
|
||||||
|
c->sign ^= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Compute the sign of the product into A. */
|
||||||
|
a->sign ^= b->sign;
|
||||||
|
if (flags & float_muladd_negate_product) {
|
||||||
|
a->sign ^= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unlikely(ab_mask != float_cmask_normal)) {
|
||||||
|
if (unlikely(ab_mask == float_cmask_infzero)) {
|
||||||
|
goto d_nan;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ab_mask & float_cmask_inf) {
|
||||||
|
if (c->cls == float_class_inf && a->sign != c->sign) {
|
||||||
|
goto d_nan;
|
||||||
|
}
|
||||||
|
goto return_inf;
|
||||||
|
}
|
||||||
|
|
||||||
|
g_assert(ab_mask & float_cmask_zero);
|
||||||
|
if (c->cls == float_class_normal) {
|
||||||
|
*a = *c;
|
||||||
|
goto return_normal;
|
||||||
|
}
|
||||||
|
if (c->cls == float_class_zero) {
|
||||||
|
if (a->sign != c->sign) {
|
||||||
|
goto return_sub_zero;
|
||||||
|
}
|
||||||
|
goto return_zero;
|
||||||
|
}
|
||||||
|
g_assert(c->cls == float_class_inf);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unlikely(c->cls == float_class_inf)) {
|
||||||
|
a->sign = c->sign;
|
||||||
|
goto return_inf;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Perform the multiplication step. */
|
||||||
|
p_widen.sign = a->sign;
|
||||||
|
p_widen.exp = a->exp + b->exp + 1;
|
||||||
|
frac_mulw(&p_widen, a, b);
|
||||||
|
if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
|
||||||
|
frac_add(&p_widen, &p_widen, &p_widen);
|
||||||
|
p_widen.exp -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Perform the addition step. */
|
||||||
|
if (c->cls != float_class_zero) {
|
||||||
|
/* Zero-extend C to less significant bits. */
|
||||||
|
frac_widen(&c_widen, c);
|
||||||
|
c_widen.exp = c->exp;
|
||||||
|
|
||||||
|
if (a->sign == c->sign) {
|
||||||
|
parts_add_normal(&p_widen, &c_widen);
|
||||||
|
} else if (!parts_sub_normal(&p_widen, &c_widen)) {
|
||||||
|
goto return_sub_zero;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Narrow with sticky bit, for proper rounding later. */
|
||||||
|
frac_truncjam(a, &p_widen);
|
||||||
|
a->sign = p_widen.sign;
|
||||||
|
a->exp = p_widen.exp;
|
||||||
|
|
||||||
|
return_normal:
|
||||||
|
if (flags & float_muladd_halve_result) {
|
||||||
|
a->exp -= 1;
|
||||||
|
}
|
||||||
|
finish_sign:
|
||||||
|
if (flags & float_muladd_negate_result) {
|
||||||
|
a->sign ^= 1;
|
||||||
|
}
|
||||||
|
return a;
|
||||||
|
|
||||||
|
return_sub_zero:
|
||||||
|
a->sign = s->float_rounding_mode == float_round_down;
|
||||||
|
return_zero:
|
||||||
|
a->cls = float_class_zero;
|
||||||
|
goto finish_sign;
|
||||||
|
|
||||||
|
return_inf:
|
||||||
|
a->cls = float_class_inf;
|
||||||
|
goto finish_sign;
|
||||||
|
|
||||||
|
d_nan:
|
||||||
|
float_raise(float_flag_invalid, s);
|
||||||
|
parts_default_nan(a, s);
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns the result of dividing the floating-point value `a' by the
|
||||||
|
* corresponding value `b'. The operation is performed according to
|
||||||
|
* the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
||||||
|
*/
|
||||||
|
static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
|
||||||
|
float_status *s)
|
||||||
|
{
|
||||||
|
int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
|
||||||
|
bool sign = a->sign ^ b->sign;
|
||||||
|
|
||||||
|
if (likely(ab_mask == float_cmask_normal)) {
|
||||||
|
a->sign = sign;
|
||||||
|
a->exp -= b->exp + frac_div(a, b);
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 0/0 or Inf/Inf => NaN */
|
||||||
|
if (unlikely(ab_mask == float_cmask_zero) ||
|
||||||
|
unlikely(ab_mask == float_cmask_inf)) {
|
||||||
|
float_raise(float_flag_invalid, s);
|
||||||
|
parts_default_nan(a, s);
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* All the NaN cases */
|
||||||
|
if (unlikely(ab_mask & float_cmask_anynan)) {
|
||||||
|
return parts_pick_nan(a, b, s);
|
||||||
|
}
|
||||||
|
|
||||||
|
a->sign = sign;
|
||||||
|
|
||||||
|
/* Inf / X */
|
||||||
|
if (a->cls == float_class_inf) {
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 0 / X */
|
||||||
|
if (a->cls == float_class_zero) {
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* X / Inf */
|
||||||
|
if (b->cls == float_class_inf) {
|
||||||
|
a->cls = float_class_zero;
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* X / 0 => Inf */
|
||||||
|
g_assert(b->cls == float_class_zero);
|
||||||
|
float_raise(float_flag_divbyzero, s);
|
||||||
|
a->cls = float_class_inf;
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Rounds the floating-point value `a' to an integer, and returns the
|
||||||
|
* result as a floating-point value. The operation is performed
|
||||||
|
* according to the IEC/IEEE Standard for Binary Floating-Point
|
||||||
|
* Arithmetic.
|
||||||
|
*
|
||||||
|
* parts_round_to_int_normal is an internal helper function for
|
||||||
|
* normal numbers only, returning true for inexact but not directly
|
||||||
|
* raising float_flag_inexact.
|
||||||
|
*/
|
||||||
|
static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode,
|
||||||
|
int scale, int frac_size)
|
||||||
|
{
|
||||||
|
uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc;
|
||||||
|
int shift_adj;
|
||||||
|
|
||||||
|
scale = MIN(MAX(scale, -0x10000), 0x10000);
|
||||||
|
a->exp += scale;
|
||||||
|
|
||||||
|
if (a->exp < 0) {
|
||||||
|
bool one;
|
||||||
|
|
||||||
|
/* All fractional */
|
||||||
|
switch (rmode) {
|
||||||
|
case float_round_nearest_even:
|
||||||
|
one = false;
|
||||||
|
if (a->exp == -1) {
|
||||||
|
FloatPartsN tmp;
|
||||||
|
/* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */
|
||||||
|
frac_add(&tmp, a, a);
|
||||||
|
/* Anything remaining means frac > 0.5. */
|
||||||
|
one = !frac_eqz(&tmp);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case float_round_ties_away:
|
||||||
|
one = a->exp == -1;
|
||||||
|
break;
|
||||||
|
case float_round_to_zero:
|
||||||
|
one = false;
|
||||||
|
break;
|
||||||
|
case float_round_up:
|
||||||
|
one = !a->sign;
|
||||||
|
break;
|
||||||
|
case float_round_down:
|
||||||
|
one = a->sign;
|
||||||
|
break;
|
||||||
|
case float_round_to_odd:
|
||||||
|
one = true;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
g_assert_not_reached();
|
||||||
|
}
|
||||||
|
|
||||||
|
frac_clear(a);
|
||||||
|
a->exp = 0;
|
||||||
|
if (one) {
|
||||||
|
a->frac_hi = DECOMPOSED_IMPLICIT_BIT;
|
||||||
|
} else {
|
||||||
|
a->cls = float_class_zero;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (a->exp >= frac_size) {
|
||||||
|
/* All integral */
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (N > 64 && a->exp < N - 64) {
|
||||||
|
/*
|
||||||
|
* Rounding is not in the low word -- shift lsb to bit 2,
|
||||||
|
* which leaves room for sticky and rounding bit.
|
||||||
|
*/
|
||||||
|
shift_adj = (N - 1) - (a->exp + 2);
|
||||||
|
frac_shrjam(a, shift_adj);
|
||||||
|
frac_lsb = 1 << 2;
|
||||||
|
} else {
|
||||||
|
shift_adj = 0;
|
||||||
|
frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63);
|
||||||
|
}
|
||||||
|
|
||||||
|
frac_lsbm1 = frac_lsb >> 1;
|
||||||
|
rnd_mask = frac_lsb - 1;
|
||||||
|
rnd_even_mask = rnd_mask | frac_lsb;
|
||||||
|
|
||||||
|
if (!(a->frac_lo & rnd_mask)) {
|
||||||
|
/* Fractional bits already clear, undo the shift above. */
|
||||||
|
frac_shl(a, shift_adj);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (rmode) {
|
||||||
|
case float_round_nearest_even:
|
||||||
|
inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
|
||||||
|
break;
|
||||||
|
case float_round_ties_away:
|
||||||
|
inc = frac_lsbm1;
|
||||||
|
break;
|
||||||
|
case float_round_to_zero:
|
||||||
|
inc = 0;
|
||||||
|
break;
|
||||||
|
case float_round_up:
|
||||||
|
inc = a->sign ? 0 : rnd_mask;
|
||||||
|
break;
|
||||||
|
case float_round_down:
|
||||||
|
inc = a->sign ? rnd_mask : 0;
|
||||||
|
break;
|
||||||
|
case float_round_to_odd:
|
||||||
|
inc = a->frac_lo & frac_lsb ? 0 : rnd_mask;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
g_assert_not_reached();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (shift_adj == 0) {
|
||||||
|
if (frac_addi(a, a, inc)) {
|
||||||
|
frac_shr(a, 1);
|
||||||
|
a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
|
||||||
|
a->exp++;
|
||||||
|
}
|
||||||
|
a->frac_lo &= ~rnd_mask;
|
||||||
|
} else {
|
||||||
|
frac_addi(a, a, inc);
|
||||||
|
a->frac_lo &= ~rnd_mask;
|
||||||
|
/* Be careful shifting back, not to overflow */
|
||||||
|
frac_shl(a, shift_adj - 1);
|
||||||
|
if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) {
|
||||||
|
a->exp++;
|
||||||
|
} else {
|
||||||
|
frac_add(a, a, a);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
|
||||||
|
int scale, float_status *s,
|
||||||
|
const FloatFmt *fmt)
|
||||||
|
{
|
||||||
|
switch (a->cls) {
|
||||||
|
case float_class_qnan:
|
||||||
|
case float_class_snan:
|
||||||
|
parts_return_nan(a, s);
|
||||||
|
break;
|
||||||
|
case float_class_zero:
|
||||||
|
case float_class_inf:
|
||||||
|
break;
|
||||||
|
case float_class_normal:
|
||||||
|
if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
|
||||||
|
float_raise(float_flag_inexact, s);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
g_assert_not_reached();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns the result of converting the floating-point value `a' to
|
||||||
|
* the two's complement integer format. The conversion is performed
|
||||||
|
* according to the IEC/IEEE Standard for Binary Floating-Point
|
||||||
|
* Arithmetic---which means in particular that the conversion is
|
||||||
|
* rounded according to the current rounding mode. If `a' is a NaN,
|
||||||
|
* the largest positive integer is returned. Otherwise, if the
|
||||||
|
* conversion overflows, the largest integer with the same sign as `a'
|
||||||
|
* is returned.
|
||||||
|
*/
|
||||||
|
static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
|
||||||
|
int scale, int64_t min, int64_t max,
|
||||||
|
float_status *s)
|
||||||
|
{
|
||||||
|
int flags = 0;
|
||||||
|
uint64_t r;
|
||||||
|
|
||||||
|
switch (p->cls) {
|
||||||
|
case float_class_snan:
|
||||||
|
case float_class_qnan:
|
||||||
|
flags = float_flag_invalid;
|
||||||
|
r = max;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case float_class_inf:
|
||||||
|
flags = float_flag_invalid;
|
||||||
|
r = p->sign ? min : max;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case float_class_zero:
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
case float_class_normal:
|
||||||
|
/* TODO: N - 2 is frac_size for rounding; could use input fmt. */
|
||||||
|
if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
|
||||||
|
flags = float_flag_inexact;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (p->exp <= DECOMPOSED_BINARY_POINT) {
|
||||||
|
r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
|
||||||
|
} else {
|
||||||
|
r = UINT64_MAX;
|
||||||
|
}
|
||||||
|
if (p->sign) {
|
||||||
|
if (r <= -(uint64_t)min) {
|
||||||
|
r = -r;
|
||||||
|
} else {
|
||||||
|
flags = float_flag_invalid;
|
||||||
|
r = min;
|
||||||
|
}
|
||||||
|
} else if (r > max) {
|
||||||
|
flags = float_flag_invalid;
|
||||||
|
r = max;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
g_assert_not_reached();
|
||||||
|
}
|
||||||
|
|
||||||
|
float_raise(flags, s);
|
||||||
|
return r;
|
||||||
|
}
|
|
@ -129,7 +129,7 @@ static bool parts_is_snan_frac(uint64_t frac, float_status *status)
|
||||||
| The pattern for a default generated deconstructed floating-point NaN.
|
| The pattern for a default generated deconstructed floating-point NaN.
|
||||||
*----------------------------------------------------------------------------*/
|
*----------------------------------------------------------------------------*/
|
||||||
|
|
||||||
static FloatParts parts_default_nan(float_status *status)
|
static void parts64_default_nan(FloatParts64 *p, float_status *status)
|
||||||
{
|
{
|
||||||
bool sign = 0;
|
bool sign = 0;
|
||||||
uint64_t frac;
|
uint64_t frac;
|
||||||
|
@ -163,7 +163,7 @@ static FloatParts parts_default_nan(float_status *status)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return (FloatParts) {
|
*p = (FloatParts64) {
|
||||||
.cls = float_class_qnan,
|
.cls = float_class_qnan,
|
||||||
.sign = sign,
|
.sign = sign,
|
||||||
.exp = INT_MAX,
|
.exp = INT_MAX,
|
||||||
|
@ -171,26 +171,55 @@ static FloatParts parts_default_nan(float_status *status)
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void parts128_default_nan(FloatParts128 *p, float_status *status)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Extrapolate from the choices made by parts64_default_nan to fill
|
||||||
|
* in the quad-floating format. If the low bit is set, assume we
|
||||||
|
* want to set all non-snan bits.
|
||||||
|
*/
|
||||||
|
FloatParts64 p64;
|
||||||
|
parts64_default_nan(&p64, status);
|
||||||
|
|
||||||
|
*p = (FloatParts128) {
|
||||||
|
.cls = float_class_qnan,
|
||||||
|
.sign = p64.sign,
|
||||||
|
.exp = INT_MAX,
|
||||||
|
.frac_hi = p64.frac,
|
||||||
|
.frac_lo = -(p64.frac & 1)
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
/*----------------------------------------------------------------------------
|
||||||
| Returns a quiet NaN from a signalling NaN for the deconstructed
|
| Returns a quiet NaN from a signalling NaN for the deconstructed
|
||||||
| floating-point parts.
|
| floating-point parts.
|
||||||
*----------------------------------------------------------------------------*/
|
*----------------------------------------------------------------------------*/
|
||||||
|
|
||||||
static FloatParts parts_silence_nan(FloatParts a, float_status *status)
|
static uint64_t parts_silence_nan_frac(uint64_t frac, float_status *status)
|
||||||
{
|
{
|
||||||
g_assert(!no_signaling_nans(status));
|
g_assert(!no_signaling_nans(status));
|
||||||
#if defined(TARGET_HPPA)
|
g_assert(!status->default_nan_mode);
|
||||||
a.frac &= ~(1ULL << (DECOMPOSED_BINARY_POINT - 1));
|
|
||||||
a.frac |= 1ULL << (DECOMPOSED_BINARY_POINT - 2);
|
/* The only snan_bit_is_one target without default_nan_mode is HPPA. */
|
||||||
#else
|
|
||||||
if (snan_bit_is_one(status)) {
|
if (snan_bit_is_one(status)) {
|
||||||
return parts_default_nan(status);
|
frac &= ~(1ULL << (DECOMPOSED_BINARY_POINT - 1));
|
||||||
|
frac |= 1ULL << (DECOMPOSED_BINARY_POINT - 2);
|
||||||
} else {
|
} else {
|
||||||
a.frac |= 1ULL << (DECOMPOSED_BINARY_POINT - 1);
|
frac |= 1ULL << (DECOMPOSED_BINARY_POINT - 1);
|
||||||
}
|
}
|
||||||
#endif
|
return frac;
|
||||||
a.cls = float_class_qnan;
|
}
|
||||||
return a;
|
|
||||||
|
static void parts64_silence_nan(FloatParts64 *p, float_status *status)
|
||||||
|
{
|
||||||
|
p->frac = parts_silence_nan_frac(p->frac, status);
|
||||||
|
p->cls = float_class_qnan;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void parts128_silence_nan(FloatParts128 *p, float_status *status)
|
||||||
|
{
|
||||||
|
p->frac_hi = parts_silence_nan_frac(p->frac_hi, status);
|
||||||
|
p->cls = float_class_qnan;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
/*----------------------------------------------------------------------------
|
||||||
|
@ -227,18 +256,6 @@ floatx80 floatx80_default_nan(float_status *status)
|
||||||
const floatx80 floatx80_infinity
|
const floatx80 floatx80_infinity
|
||||||
= make_floatx80_init(floatx80_infinity_high, floatx80_infinity_low);
|
= make_floatx80_init(floatx80_infinity_high, floatx80_infinity_low);
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
|
||||||
| Raises the exceptions specified by `flags'. Floating-point traps can be
|
|
||||||
| defined here if desired. It is currently not possible for such a trap
|
|
||||||
| to substitute a result value. If traps are not implemented, this routine
|
|
||||||
| should be simply `float_exception_flags |= flags;'.
|
|
||||||
*----------------------------------------------------------------------------*/
|
|
||||||
|
|
||||||
void float_raise(uint8_t flags, float_status *status)
|
|
||||||
{
|
|
||||||
status->float_exception_flags |= flags;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
/*----------------------------------------------------------------------------
|
||||||
| Internal canonical NaN format.
|
| Internal canonical NaN format.
|
||||||
*----------------------------------------------------------------------------*/
|
*----------------------------------------------------------------------------*/
|
||||||
|
@ -1070,25 +1087,6 @@ bool float128_is_signaling_nan(float128 a, float_status *status)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
|
||||||
| Returns a quiet NaN from a signalling NaN for the quadruple-precision
|
|
||||||
| floating point value `a'.
|
|
||||||
*----------------------------------------------------------------------------*/
|
|
||||||
|
|
||||||
float128 float128_silence_nan(float128 a, float_status *status)
|
|
||||||
{
|
|
||||||
if (no_signaling_nans(status)) {
|
|
||||||
g_assert_not_reached();
|
|
||||||
} else {
|
|
||||||
if (snan_bit_is_one(status)) {
|
|
||||||
return float128_default_nan(status);
|
|
||||||
} else {
|
|
||||||
a.high |= UINT64_C(0x0000800000000000);
|
|
||||||
return a;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
/*----------------------------------------------------------------------------
|
||||||
| Returns the result of converting the quadruple-precision floating-point NaN
|
| Returns the result of converting the quadruple-precision floating-point NaN
|
||||||
| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
|
| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
|
||||||
|
|
3745
fpu/softfloat.c
3745
fpu/softfloat.c
File diff suppressed because it is too large
Load Diff
|
@ -83,6 +83,43 @@ this code that are retained.
|
||||||
#define FPU_SOFTFLOAT_MACROS_H
|
#define FPU_SOFTFLOAT_MACROS_H
|
||||||
|
|
||||||
#include "fpu/softfloat-types.h"
|
#include "fpu/softfloat-types.h"
|
||||||
|
#include "qemu/host-utils.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* shl_double: double-word merging left shift
|
||||||
|
* @l: left or most-significant word
|
||||||
|
* @r: right or least-significant word
|
||||||
|
* @c: shift count
|
||||||
|
*
|
||||||
|
* Shift @l left by @c bits, shifting in bits from @r.
|
||||||
|
*/
|
||||||
|
static inline uint64_t shl_double(uint64_t l, uint64_t r, int c)
|
||||||
|
{
|
||||||
|
#if defined(__x86_64__)
|
||||||
|
asm("shld %b2, %1, %0" : "+r"(l) : "r"(r), "ci"(c));
|
||||||
|
return l;
|
||||||
|
#else
|
||||||
|
return c ? (l << c) | (r >> (64 - c)) : l;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* shr_double: double-word merging right shift
|
||||||
|
* @l: left or most-significant word
|
||||||
|
* @r: right or least-significant word
|
||||||
|
* @c: shift count
|
||||||
|
*
|
||||||
|
* Shift @r right by @c bits, shifting in bits from @l.
|
||||||
|
*/
|
||||||
|
static inline uint64_t shr_double(uint64_t l, uint64_t r, int c)
|
||||||
|
{
|
||||||
|
#if defined(__x86_64__)
|
||||||
|
asm("shrd %b2, %1, %0" : "+r"(r) : "r"(l), "ci"(c));
|
||||||
|
return r;
|
||||||
|
#else
|
||||||
|
return c ? (r >> c) | (l << (64 - c)) : r;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
/*----------------------------------------------------------------------------
|
||||||
| Shifts `a' right by the number of bits given in `count'. If any nonzero
|
| Shifts `a' right by the number of bits given in `count'. If any nonzero
|
||||||
|
@ -403,16 +440,12 @@ static inline void
|
||||||
| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
|
| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
|
||||||
*----------------------------------------------------------------------------*/
|
*----------------------------------------------------------------------------*/
|
||||||
|
|
||||||
static inline void
|
static inline void add128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1,
|
||||||
add128(
|
uint64_t *z0Ptr, uint64_t *z1Ptr)
|
||||||
uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
|
|
||||||
{
|
{
|
||||||
uint64_t z1;
|
bool c = 0;
|
||||||
|
*z1Ptr = uadd64_carry(a1, b1, &c);
|
||||||
z1 = a1 + b1;
|
*z0Ptr = uadd64_carry(a0, b0, &c);
|
||||||
*z1Ptr = z1;
|
|
||||||
*z0Ptr = a0 + b0 + ( z1 < a1 );
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
/*----------------------------------------------------------------------------
|
||||||
|
@ -423,34 +456,14 @@ static inline void
|
||||||
| `z1Ptr', and `z2Ptr'.
|
| `z1Ptr', and `z2Ptr'.
|
||||||
*----------------------------------------------------------------------------*/
|
*----------------------------------------------------------------------------*/
|
||||||
|
|
||||||
static inline void
|
static inline void add192(uint64_t a0, uint64_t a1, uint64_t a2,
|
||||||
add192(
|
uint64_t b0, uint64_t b1, uint64_t b2,
|
||||||
uint64_t a0,
|
uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr)
|
||||||
uint64_t a1,
|
|
||||||
uint64_t a2,
|
|
||||||
uint64_t b0,
|
|
||||||
uint64_t b1,
|
|
||||||
uint64_t b2,
|
|
||||||
uint64_t *z0Ptr,
|
|
||||||
uint64_t *z1Ptr,
|
|
||||||
uint64_t *z2Ptr
|
|
||||||
)
|
|
||||||
{
|
{
|
||||||
uint64_t z0, z1, z2;
|
bool c = 0;
|
||||||
int8_t carry0, carry1;
|
*z2Ptr = uadd64_carry(a2, b2, &c);
|
||||||
|
*z1Ptr = uadd64_carry(a1, b1, &c);
|
||||||
z2 = a2 + b2;
|
*z0Ptr = uadd64_carry(a0, b0, &c);
|
||||||
carry1 = ( z2 < a2 );
|
|
||||||
z1 = a1 + b1;
|
|
||||||
carry0 = ( z1 < a1 );
|
|
||||||
z0 = a0 + b0;
|
|
||||||
z1 += carry1;
|
|
||||||
z0 += ( z1 < carry1 );
|
|
||||||
z0 += carry0;
|
|
||||||
*z2Ptr = z2;
|
|
||||||
*z1Ptr = z1;
|
|
||||||
*z0Ptr = z0;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
/*----------------------------------------------------------------------------
|
||||||
|
@ -461,14 +474,12 @@ static inline void
|
||||||
| `z1Ptr'.
|
| `z1Ptr'.
|
||||||
*----------------------------------------------------------------------------*/
|
*----------------------------------------------------------------------------*/
|
||||||
|
|
||||||
static inline void
|
static inline void sub128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1,
|
||||||
sub128(
|
uint64_t *z0Ptr, uint64_t *z1Ptr)
|
||||||
uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
|
|
||||||
{
|
{
|
||||||
|
bool c = 0;
|
||||||
*z1Ptr = a1 - b1;
|
*z1Ptr = usub64_borrow(a1, b1, &c);
|
||||||
*z0Ptr = a0 - b0 - ( a1 < b1 );
|
*z0Ptr = usub64_borrow(a0, b0, &c);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
/*----------------------------------------------------------------------------
|
||||||
|
@ -479,34 +490,14 @@ static inline void
|
||||||
| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
|
| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
|
||||||
*----------------------------------------------------------------------------*/
|
*----------------------------------------------------------------------------*/
|
||||||
|
|
||||||
static inline void
|
static inline void sub192(uint64_t a0, uint64_t a1, uint64_t a2,
|
||||||
sub192(
|
uint64_t b0, uint64_t b1, uint64_t b2,
|
||||||
uint64_t a0,
|
uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr)
|
||||||
uint64_t a1,
|
|
||||||
uint64_t a2,
|
|
||||||
uint64_t b0,
|
|
||||||
uint64_t b1,
|
|
||||||
uint64_t b2,
|
|
||||||
uint64_t *z0Ptr,
|
|
||||||
uint64_t *z1Ptr,
|
|
||||||
uint64_t *z2Ptr
|
|
||||||
)
|
|
||||||
{
|
{
|
||||||
uint64_t z0, z1, z2;
|
bool c = 0;
|
||||||
int8_t borrow0, borrow1;
|
*z2Ptr = usub64_borrow(a2, b2, &c);
|
||||||
|
*z1Ptr = usub64_borrow(a1, b1, &c);
|
||||||
z2 = a2 - b2;
|
*z0Ptr = usub64_borrow(a0, b0, &c);
|
||||||
borrow1 = ( a2 < b2 );
|
|
||||||
z1 = a1 - b1;
|
|
||||||
borrow0 = ( a1 < b1 );
|
|
||||||
z0 = a0 - b0;
|
|
||||||
z0 -= ( z1 < borrow1 );
|
|
||||||
z1 -= borrow1;
|
|
||||||
z0 -= borrow0;
|
|
||||||
*z2Ptr = z2;
|
|
||||||
*z1Ptr = z1;
|
|
||||||
*z0Ptr = z0;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
/*----------------------------------------------------------------------------
|
||||||
|
@ -515,27 +506,10 @@ static inline void
|
||||||
| `z0Ptr' and `z1Ptr'.
|
| `z0Ptr' and `z1Ptr'.
|
||||||
*----------------------------------------------------------------------------*/
|
*----------------------------------------------------------------------------*/
|
||||||
|
|
||||||
static inline void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr )
|
static inline void
|
||||||
|
mul64To128(uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr)
|
||||||
{
|
{
|
||||||
uint32_t aHigh, aLow, bHigh, bLow;
|
mulu64(z1Ptr, z0Ptr, a, b);
|
||||||
uint64_t z0, zMiddleA, zMiddleB, z1;
|
|
||||||
|
|
||||||
aLow = a;
|
|
||||||
aHigh = a>>32;
|
|
||||||
bLow = b;
|
|
||||||
bHigh = b>>32;
|
|
||||||
z1 = ( (uint64_t) aLow ) * bLow;
|
|
||||||
zMiddleA = ( (uint64_t) aLow ) * bHigh;
|
|
||||||
zMiddleB = ( (uint64_t) aHigh ) * bLow;
|
|
||||||
z0 = ( (uint64_t) aHigh ) * bHigh;
|
|
||||||
zMiddleA += zMiddleB;
|
|
||||||
z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
|
|
||||||
zMiddleA <<= 32;
|
|
||||||
z1 += zMiddleA;
|
|
||||||
z0 += ( z1 < zMiddleA );
|
|
||||||
*z1Ptr = z1;
|
|
||||||
*z0Ptr = z0;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
/*----------------------------------------------------------------------------
|
||||||
|
@ -546,24 +520,14 @@ static inline void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t
|
||||||
*----------------------------------------------------------------------------*/
|
*----------------------------------------------------------------------------*/
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
mul128By64To192(
|
mul128By64To192(uint64_t a0, uint64_t a1, uint64_t b,
|
||||||
uint64_t a0,
|
uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr)
|
||||||
uint64_t a1,
|
|
||||||
uint64_t b,
|
|
||||||
uint64_t *z0Ptr,
|
|
||||||
uint64_t *z1Ptr,
|
|
||||||
uint64_t *z2Ptr
|
|
||||||
)
|
|
||||||
{
|
{
|
||||||
uint64_t z0, z1, z2, more1;
|
uint64_t z0, z1, m1;
|
||||||
|
|
||||||
mul64To128( a1, b, &z1, &z2 );
|
|
||||||
mul64To128( a0, b, &z0, &more1 );
|
|
||||||
add128( z0, more1, 0, z1, &z0, &z1 );
|
|
||||||
*z2Ptr = z2;
|
|
||||||
*z1Ptr = z1;
|
|
||||||
*z0Ptr = z0;
|
|
||||||
|
|
||||||
|
mul64To128(a1, b, &m1, z2Ptr);
|
||||||
|
mul64To128(a0, b, &z0, &z1);
|
||||||
|
add128(z0, z1, 0, m1, z0Ptr, z1Ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
/*----------------------------------------------------------------------------
|
||||||
|
@ -573,34 +537,21 @@ static inline void
|
||||||
| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
|
| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
|
||||||
*----------------------------------------------------------------------------*/
|
*----------------------------------------------------------------------------*/
|
||||||
|
|
||||||
static inline void
|
static inline void mul128To256(uint64_t a0, uint64_t a1,
|
||||||
mul128To256(
|
uint64_t b0, uint64_t b1,
|
||||||
uint64_t a0,
|
uint64_t *z0Ptr, uint64_t *z1Ptr,
|
||||||
uint64_t a1,
|
uint64_t *z2Ptr, uint64_t *z3Ptr)
|
||||||
uint64_t b0,
|
|
||||||
uint64_t b1,
|
|
||||||
uint64_t *z0Ptr,
|
|
||||||
uint64_t *z1Ptr,
|
|
||||||
uint64_t *z2Ptr,
|
|
||||||
uint64_t *z3Ptr
|
|
||||||
)
|
|
||||||
{
|
{
|
||||||
uint64_t z0, z1, z2, z3;
|
uint64_t z0, z1, z2;
|
||||||
uint64_t more1, more2;
|
uint64_t m0, m1, m2, n1, n2;
|
||||||
|
|
||||||
mul64To128( a1, b1, &z2, &z3 );
|
mul64To128(a1, b0, &m1, &m2);
|
||||||
mul64To128( a1, b0, &z1, &more2 );
|
mul64To128(a0, b1, &n1, &n2);
|
||||||
add128( z1, more2, 0, z2, &z1, &z2 );
|
mul64To128(a1, b1, &z2, z3Ptr);
|
||||||
mul64To128( a0, b0, &z0, &more1 );
|
mul64To128(a0, b0, &z0, &z1);
|
||||||
add128( z0, more1, 0, z1, &z0, &z1 );
|
|
||||||
mul64To128( a0, b1, &more1, &more2 );
|
|
||||||
add128( more1, more2, 0, z2, &more1, &z2 );
|
|
||||||
add128( z0, z1, 0, more1, &z0, &z1 );
|
|
||||||
*z3Ptr = z3;
|
|
||||||
*z2Ptr = z2;
|
|
||||||
*z1Ptr = z1;
|
|
||||||
*z0Ptr = z0;
|
|
||||||
|
|
||||||
|
add192( 0, m1, m2, 0, n1, n2, &m0, &m1, &m2);
|
||||||
|
add192(m0, m1, m2, z0, z1, z2, z0Ptr, z1Ptr, z2Ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
/*----------------------------------------------------------------------------
|
||||||
|
|
|
@ -100,7 +100,10 @@ typedef enum {
|
||||||
| Routine to raise any or all of the software IEC/IEEE floating-point
|
| Routine to raise any or all of the software IEC/IEEE floating-point
|
||||||
| exception flags.
|
| exception flags.
|
||||||
*----------------------------------------------------------------------------*/
|
*----------------------------------------------------------------------------*/
|
||||||
void float_raise(uint8_t flags, float_status *status);
|
static inline void float_raise(uint8_t flags, float_status *status)
|
||||||
|
{
|
||||||
|
status->float_exception_flags |= flags;
|
||||||
|
}
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
/*----------------------------------------------------------------------------
|
||||||
| If `a' is denormal and we are in flush-to-zero mode then set the
|
| If `a' is denormal and we are in flush-to-zero mode then set the
|
||||||
|
@ -1194,6 +1197,8 @@ float128 float128_round_to_int(float128, float_status *status);
|
||||||
float128 float128_add(float128, float128, float_status *status);
|
float128 float128_add(float128, float128, float_status *status);
|
||||||
float128 float128_sub(float128, float128, float_status *status);
|
float128 float128_sub(float128, float128, float_status *status);
|
||||||
float128 float128_mul(float128, float128, float_status *status);
|
float128 float128_mul(float128, float128, float_status *status);
|
||||||
|
float128 float128_muladd(float128, float128, float128, int,
|
||||||
|
float_status *status);
|
||||||
float128 float128_div(float128, float128, float_status *status);
|
float128 float128_div(float128, float128, float_status *status);
|
||||||
float128 float128_rem(float128, float128, float_status *status);
|
float128 float128_rem(float128, float128, float_status *status);
|
||||||
float128 float128_sqrt(float128, float_status *status);
|
float128 float128_sqrt(float128, float_status *status);
|
||||||
|
|
|
@ -26,6 +26,7 @@
|
||||||
#ifndef HOST_UTILS_H
|
#ifndef HOST_UTILS_H
|
||||||
#define HOST_UTILS_H
|
#define HOST_UTILS_H
|
||||||
|
|
||||||
|
#include "qemu/compiler.h"
|
||||||
#include "qemu/bswap.h"
|
#include "qemu/bswap.h"
|
||||||
|
|
||||||
#ifdef CONFIG_INT128
|
#ifdef CONFIG_INT128
|
||||||
|
@ -272,6 +273,9 @@ static inline int ctpop64(uint64_t val)
|
||||||
*/
|
*/
|
||||||
static inline uint8_t revbit8(uint8_t x)
|
static inline uint8_t revbit8(uint8_t x)
|
||||||
{
|
{
|
||||||
|
#if __has_builtin(__builtin_bitreverse8)
|
||||||
|
return __builtin_bitreverse8(x);
|
||||||
|
#else
|
||||||
/* Assign the correct nibble position. */
|
/* Assign the correct nibble position. */
|
||||||
x = ((x & 0xf0) >> 4)
|
x = ((x & 0xf0) >> 4)
|
||||||
| ((x & 0x0f) << 4);
|
| ((x & 0x0f) << 4);
|
||||||
|
@ -281,6 +285,7 @@ static inline uint8_t revbit8(uint8_t x)
|
||||||
| ((x & 0x22) << 1)
|
| ((x & 0x22) << 1)
|
||||||
| ((x & 0x11) << 3);
|
| ((x & 0x11) << 3);
|
||||||
return x;
|
return x;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -289,6 +294,9 @@ static inline uint8_t revbit8(uint8_t x)
|
||||||
*/
|
*/
|
||||||
static inline uint16_t revbit16(uint16_t x)
|
static inline uint16_t revbit16(uint16_t x)
|
||||||
{
|
{
|
||||||
|
#if __has_builtin(__builtin_bitreverse16)
|
||||||
|
return __builtin_bitreverse16(x);
|
||||||
|
#else
|
||||||
/* Assign the correct byte position. */
|
/* Assign the correct byte position. */
|
||||||
x = bswap16(x);
|
x = bswap16(x);
|
||||||
/* Assign the correct nibble position. */
|
/* Assign the correct nibble position. */
|
||||||
|
@ -300,6 +308,7 @@ static inline uint16_t revbit16(uint16_t x)
|
||||||
| ((x & 0x2222) << 1)
|
| ((x & 0x2222) << 1)
|
||||||
| ((x & 0x1111) << 3);
|
| ((x & 0x1111) << 3);
|
||||||
return x;
|
return x;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -308,6 +317,9 @@ static inline uint16_t revbit16(uint16_t x)
|
||||||
*/
|
*/
|
||||||
static inline uint32_t revbit32(uint32_t x)
|
static inline uint32_t revbit32(uint32_t x)
|
||||||
{
|
{
|
||||||
|
#if __has_builtin(__builtin_bitreverse32)
|
||||||
|
return __builtin_bitreverse32(x);
|
||||||
|
#else
|
||||||
/* Assign the correct byte position. */
|
/* Assign the correct byte position. */
|
||||||
x = bswap32(x);
|
x = bswap32(x);
|
||||||
/* Assign the correct nibble position. */
|
/* Assign the correct nibble position. */
|
||||||
|
@ -319,6 +331,7 @@ static inline uint32_t revbit32(uint32_t x)
|
||||||
| ((x & 0x22222222u) << 1)
|
| ((x & 0x22222222u) << 1)
|
||||||
| ((x & 0x11111111u) << 3);
|
| ((x & 0x11111111u) << 3);
|
||||||
return x;
|
return x;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -327,6 +340,9 @@ static inline uint32_t revbit32(uint32_t x)
|
||||||
*/
|
*/
|
||||||
static inline uint64_t revbit64(uint64_t x)
|
static inline uint64_t revbit64(uint64_t x)
|
||||||
{
|
{
|
||||||
|
#if __has_builtin(__builtin_bitreverse64)
|
||||||
|
return __builtin_bitreverse64(x);
|
||||||
|
#else
|
||||||
/* Assign the correct byte position. */
|
/* Assign the correct byte position. */
|
||||||
x = bswap64(x);
|
x = bswap64(x);
|
||||||
/* Assign the correct nibble position. */
|
/* Assign the correct nibble position. */
|
||||||
|
@ -338,6 +354,281 @@ static inline uint64_t revbit64(uint64_t x)
|
||||||
| ((x & 0x2222222222222222ull) << 1)
|
| ((x & 0x2222222222222222ull) << 1)
|
||||||
| ((x & 0x1111111111111111ull) << 3);
|
| ((x & 0x1111111111111111ull) << 3);
|
||||||
return x;
|
return x;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* sadd32_overflow - addition with overflow indication
|
||||||
|
* @x, @y: addends
|
||||||
|
* @ret: Output for sum
|
||||||
|
*
|
||||||
|
* Computes *@ret = @x + @y, and returns true if and only if that
|
||||||
|
* value has been truncated.
|
||||||
|
*/
|
||||||
|
static inline bool sadd32_overflow(int32_t x, int32_t y, int32_t *ret)
|
||||||
|
{
|
||||||
|
#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
|
||||||
|
return __builtin_add_overflow(x, y, ret);
|
||||||
|
#else
|
||||||
|
*ret = x + y;
|
||||||
|
return ((*ret ^ x) & ~(x ^ y)) < 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* sadd64_overflow - addition with overflow indication
|
||||||
|
* @x, @y: addends
|
||||||
|
* @ret: Output for sum
|
||||||
|
*
|
||||||
|
* Computes *@ret = @x + @y, and returns true if and only if that
|
||||||
|
* value has been truncated.
|
||||||
|
*/
|
||||||
|
static inline bool sadd64_overflow(int64_t x, int64_t y, int64_t *ret)
|
||||||
|
{
|
||||||
|
#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
|
||||||
|
return __builtin_add_overflow(x, y, ret);
|
||||||
|
#else
|
||||||
|
*ret = x + y;
|
||||||
|
return ((*ret ^ x) & ~(x ^ y)) < 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* uadd32_overflow - addition with overflow indication
|
||||||
|
* @x, @y: addends
|
||||||
|
* @ret: Output for sum
|
||||||
|
*
|
||||||
|
* Computes *@ret = @x + @y, and returns true if and only if that
|
||||||
|
* value has been truncated.
|
||||||
|
*/
|
||||||
|
static inline bool uadd32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
|
||||||
|
{
|
||||||
|
#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
|
||||||
|
return __builtin_add_overflow(x, y, ret);
|
||||||
|
#else
|
||||||
|
*ret = x + y;
|
||||||
|
return *ret < x;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* uadd64_overflow - addition with overflow indication
|
||||||
|
* @x, @y: addends
|
||||||
|
* @ret: Output for sum
|
||||||
|
*
|
||||||
|
* Computes *@ret = @x + @y, and returns true if and only if that
|
||||||
|
* value has been truncated.
|
||||||
|
*/
|
||||||
|
static inline bool uadd64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
|
||||||
|
{
|
||||||
|
#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
|
||||||
|
return __builtin_add_overflow(x, y, ret);
|
||||||
|
#else
|
||||||
|
*ret = x + y;
|
||||||
|
return *ret < x;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ssub32_overflow - subtraction with overflow indication
|
||||||
|
* @x: Minuend
|
||||||
|
* @y: Subtrahend
|
||||||
|
* @ret: Output for difference
|
||||||
|
*
|
||||||
|
* Computes *@ret = @x - @y, and returns true if and only if that
|
||||||
|
* value has been truncated.
|
||||||
|
*/
|
||||||
|
static inline bool ssub32_overflow(int32_t x, int32_t y, int32_t *ret)
|
||||||
|
{
|
||||||
|
#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
|
||||||
|
return __builtin_sub_overflow(x, y, ret);
|
||||||
|
#else
|
||||||
|
*ret = x - y;
|
||||||
|
return ((*ret ^ x) & (x ^ y)) < 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ssub64_overflow - subtraction with overflow indication
|
||||||
|
* @x: Minuend
|
||||||
|
* @y: Subtrahend
|
||||||
|
* @ret: Output for sum
|
||||||
|
*
|
||||||
|
* Computes *@ret = @x - @y, and returns true if and only if that
|
||||||
|
* value has been truncated.
|
||||||
|
*/
|
||||||
|
static inline bool ssub64_overflow(int64_t x, int64_t y, int64_t *ret)
|
||||||
|
{
|
||||||
|
#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
|
||||||
|
return __builtin_sub_overflow(x, y, ret);
|
||||||
|
#else
|
||||||
|
*ret = x - y;
|
||||||
|
return ((*ret ^ x) & (x ^ y)) < 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* usub32_overflow - subtraction with overflow indication
|
||||||
|
* @x: Minuend
|
||||||
|
* @y: Subtrahend
|
||||||
|
* @ret: Output for sum
|
||||||
|
*
|
||||||
|
* Computes *@ret = @x - @y, and returns true if and only if that
|
||||||
|
* value has been truncated.
|
||||||
|
*/
|
||||||
|
static inline bool usub32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
|
||||||
|
{
|
||||||
|
#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
|
||||||
|
return __builtin_sub_overflow(x, y, ret);
|
||||||
|
#else
|
||||||
|
*ret = x - y;
|
||||||
|
return x < y;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* usub64_overflow - subtraction with overflow indication
|
||||||
|
* @x: Minuend
|
||||||
|
* @y: Subtrahend
|
||||||
|
* @ret: Output for sum
|
||||||
|
*
|
||||||
|
* Computes *@ret = @x - @y, and returns true if and only if that
|
||||||
|
* value has been truncated.
|
||||||
|
*/
|
||||||
|
static inline bool usub64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
|
||||||
|
{
|
||||||
|
#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
|
||||||
|
return __builtin_sub_overflow(x, y, ret);
|
||||||
|
#else
|
||||||
|
*ret = x - y;
|
||||||
|
return x < y;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* smul32_overflow - multiplication with overflow indication
|
||||||
|
* @x, @y: Input multipliers
|
||||||
|
* @ret: Output for product
|
||||||
|
*
|
||||||
|
* Computes *@ret = @x * @y, and returns true if and only if that
|
||||||
|
* value has been truncated.
|
||||||
|
*/
|
||||||
|
static inline bool smul32_overflow(int32_t x, int32_t y, int32_t *ret)
|
||||||
|
{
|
||||||
|
#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
|
||||||
|
return __builtin_mul_overflow(x, y, ret);
|
||||||
|
#else
|
||||||
|
int64_t z = (int64_t)x * y;
|
||||||
|
*ret = z;
|
||||||
|
return *ret != z;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* smul64_overflow - multiplication with overflow indication
|
||||||
|
* @x, @y: Input multipliers
|
||||||
|
* @ret: Output for product
|
||||||
|
*
|
||||||
|
* Computes *@ret = @x * @y, and returns true if and only if that
|
||||||
|
* value has been truncated.
|
||||||
|
*/
|
||||||
|
static inline bool smul64_overflow(int64_t x, int64_t y, int64_t *ret)
|
||||||
|
{
|
||||||
|
#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
|
||||||
|
return __builtin_mul_overflow(x, y, ret);
|
||||||
|
#else
|
||||||
|
uint64_t hi, lo;
|
||||||
|
muls64(&lo, &hi, x, y);
|
||||||
|
*ret = lo;
|
||||||
|
return hi != ((int64_t)lo >> 63);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* umul32_overflow - multiplication with overflow indication
|
||||||
|
* @x, @y: Input multipliers
|
||||||
|
* @ret: Output for product
|
||||||
|
*
|
||||||
|
* Computes *@ret = @x * @y, and returns true if and only if that
|
||||||
|
* value has been truncated.
|
||||||
|
*/
|
||||||
|
static inline bool umul32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
|
||||||
|
{
|
||||||
|
#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
|
||||||
|
return __builtin_mul_overflow(x, y, ret);
|
||||||
|
#else
|
||||||
|
uint64_t z = (uint64_t)x * y;
|
||||||
|
*ret = z;
|
||||||
|
return z > UINT32_MAX;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* umul64_overflow - multiplication with overflow indication
|
||||||
|
* @x, @y: Input multipliers
|
||||||
|
* @ret: Output for product
|
||||||
|
*
|
||||||
|
* Computes *@ret = @x * @y, and returns true if and only if that
|
||||||
|
* value has been truncated.
|
||||||
|
*/
|
||||||
|
static inline bool umul64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
|
||||||
|
{
|
||||||
|
#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
|
||||||
|
return __builtin_mul_overflow(x, y, ret);
|
||||||
|
#else
|
||||||
|
uint64_t hi;
|
||||||
|
mulu64(ret, &hi, x, y);
|
||||||
|
return hi != 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* uadd64_carry - addition with carry-in and carry-out
|
||||||
|
* @x, @y: addends
|
||||||
|
* @pcarry: in-out carry value
|
||||||
|
*
|
||||||
|
* Computes @x + @y + *@pcarry, placing the carry-out back
|
||||||
|
* into *@pcarry and returning the 64-bit sum.
|
||||||
|
*/
|
||||||
|
static inline uint64_t uadd64_carry(uint64_t x, uint64_t y, bool *pcarry)
|
||||||
|
{
|
||||||
|
#if __has_builtin(__builtin_addcll)
|
||||||
|
unsigned long long c = *pcarry;
|
||||||
|
x = __builtin_addcll(x, y, c, &c);
|
||||||
|
*pcarry = c & 1;
|
||||||
|
return x;
|
||||||
|
#else
|
||||||
|
bool c = *pcarry;
|
||||||
|
/* This is clang's internal expansion of __builtin_addc. */
|
||||||
|
c = uadd64_overflow(x, c, &x);
|
||||||
|
c |= uadd64_overflow(x, y, &x);
|
||||||
|
*pcarry = c;
|
||||||
|
return x;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* usub64_borrow - subtraction with borrow-in and borrow-out
|
||||||
|
* @x, @y: addends
|
||||||
|
* @pborrow: in-out borrow value
|
||||||
|
*
|
||||||
|
* Computes @x - @y - *@pborrow, placing the borrow-out back
|
||||||
|
* into *@pborrow and returning the 64-bit sum.
|
||||||
|
*/
|
||||||
|
static inline uint64_t usub64_borrow(uint64_t x, uint64_t y, bool *pborrow)
|
||||||
|
{
|
||||||
|
#if __has_builtin(__builtin_subcll)
|
||||||
|
unsigned long long b = *pborrow;
|
||||||
|
x = __builtin_subcll(x, y, b, &b);
|
||||||
|
*pborrow = b & 1;
|
||||||
|
return x;
|
||||||
|
#else
|
||||||
|
bool b = *pborrow;
|
||||||
|
b = usub64_overflow(x, b, &x);
|
||||||
|
b |= usub64_overflow(x, y, &x);
|
||||||
|
*pborrow = b;
|
||||||
|
return x;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Host type specific sizes of these routines. */
|
/* Host type specific sizes of these routines. */
|
||||||
|
|
|
@ -27,8 +27,14 @@ static inline void restore_flush_mode(CPUMIPSState *env)
|
||||||
|
|
||||||
static inline void restore_snan_bit_mode(CPUMIPSState *env)
|
static inline void restore_snan_bit_mode(CPUMIPSState *env)
|
||||||
{
|
{
|
||||||
set_snan_bit_is_one((env->active_fpu.fcr31 & (1 << FCR31_NAN2008)) == 0,
|
bool nan2008 = env->active_fpu.fcr31 & (1 << FCR31_NAN2008);
|
||||||
&env->active_fpu.fp_status);
|
|
||||||
|
/*
|
||||||
|
* With nan2008, SNaNs are silenced in the usual way.
|
||||||
|
* Before that, SNaNs are not silenced; default nans are produced.
|
||||||
|
*/
|
||||||
|
set_snan_bit_is_one(!nan2008, &env->active_fpu.fp_status);
|
||||||
|
set_default_nan_mode(!nan2008, &env->active_fpu.fp_status);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void restore_fp_status(CPUMIPSState *env)
|
static inline void restore_fp_status(CPUMIPSState *env)
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <fenv.h>
|
#include <fenv.h>
|
||||||
#include "qemu/timer.h"
|
#include "qemu/timer.h"
|
||||||
|
#include "qemu/int128.h"
|
||||||
#include "fpu/softfloat.h"
|
#include "fpu/softfloat.h"
|
||||||
|
|
||||||
/* amortize the computation of random inputs */
|
/* amortize the computation of random inputs */
|
||||||
|
@ -50,8 +51,10 @@ static const char * const op_names[] = {
|
||||||
enum precision {
|
enum precision {
|
||||||
PREC_SINGLE,
|
PREC_SINGLE,
|
||||||
PREC_DOUBLE,
|
PREC_DOUBLE,
|
||||||
|
PREC_QUAD,
|
||||||
PREC_FLOAT32,
|
PREC_FLOAT32,
|
||||||
PREC_FLOAT64,
|
PREC_FLOAT64,
|
||||||
|
PREC_FLOAT128,
|
||||||
PREC_MAX_NR,
|
PREC_MAX_NR,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -89,6 +92,7 @@ union fp {
|
||||||
double d;
|
double d;
|
||||||
float32 f32;
|
float32 f32;
|
||||||
float64 f64;
|
float64 f64;
|
||||||
|
float128 f128;
|
||||||
uint64_t u64;
|
uint64_t u64;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -113,6 +117,10 @@ struct op_desc {
|
||||||
static uint64_t random_ops[MAX_OPERANDS] = {
|
static uint64_t random_ops[MAX_OPERANDS] = {
|
||||||
SEED_A, SEED_B, SEED_C,
|
SEED_A, SEED_B, SEED_C,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static float128 random_quad_ops[MAX_OPERANDS] = {
|
||||||
|
{SEED_A, SEED_B}, {SEED_B, SEED_C}, {SEED_C, SEED_A},
|
||||||
|
};
|
||||||
static float_status soft_status;
|
static float_status soft_status;
|
||||||
static enum precision precision;
|
static enum precision precision;
|
||||||
static enum op operation;
|
static enum op operation;
|
||||||
|
@ -141,25 +149,45 @@ static void update_random_ops(int n_ops, enum precision prec)
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < n_ops; i++) {
|
for (i = 0; i < n_ops; i++) {
|
||||||
uint64_t r = random_ops[i];
|
|
||||||
|
|
||||||
switch (prec) {
|
switch (prec) {
|
||||||
case PREC_SINGLE:
|
case PREC_SINGLE:
|
||||||
case PREC_FLOAT32:
|
case PREC_FLOAT32:
|
||||||
|
{
|
||||||
|
uint64_t r = random_ops[i];
|
||||||
do {
|
do {
|
||||||
r = xorshift64star(r);
|
r = xorshift64star(r);
|
||||||
} while (!float32_is_normal(r));
|
} while (!float32_is_normal(r));
|
||||||
|
random_ops[i] = r;
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
case PREC_DOUBLE:
|
case PREC_DOUBLE:
|
||||||
case PREC_FLOAT64:
|
case PREC_FLOAT64:
|
||||||
|
{
|
||||||
|
uint64_t r = random_ops[i];
|
||||||
do {
|
do {
|
||||||
r = xorshift64star(r);
|
r = xorshift64star(r);
|
||||||
} while (!float64_is_normal(r));
|
} while (!float64_is_normal(r));
|
||||||
|
random_ops[i] = r;
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
case PREC_QUAD:
|
||||||
|
case PREC_FLOAT128:
|
||||||
|
{
|
||||||
|
float128 r = random_quad_ops[i];
|
||||||
|
uint64_t hi = r.high;
|
||||||
|
uint64_t lo = r.low;
|
||||||
|
do {
|
||||||
|
hi = xorshift64star(hi);
|
||||||
|
lo = xorshift64star(lo);
|
||||||
|
r = make_float128(hi, lo);
|
||||||
|
} while (!float128_is_normal(r));
|
||||||
|
random_quad_ops[i] = r;
|
||||||
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
g_assert_not_reached();
|
g_assert_not_reached();
|
||||||
}
|
}
|
||||||
random_ops[i] = r;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -184,6 +212,13 @@ static void fill_random(union fp *ops, int n_ops, enum precision prec,
|
||||||
ops[i].f64 = float64_chs(ops[i].f64);
|
ops[i].f64 = float64_chs(ops[i].f64);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case PREC_QUAD:
|
||||||
|
case PREC_FLOAT128:
|
||||||
|
ops[i].f128 = random_quad_ops[i];
|
||||||
|
if (no_neg && float128_is_neg(ops[i].f128)) {
|
||||||
|
ops[i].f128 = float128_chs(ops[i].f128);
|
||||||
|
}
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
g_assert_not_reached();
|
g_assert_not_reached();
|
||||||
}
|
}
|
||||||
|
@ -345,6 +380,41 @@ static void bench(enum precision prec, enum op op, int n_ops, bool no_neg)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case PREC_FLOAT128:
|
||||||
|
fill_random(ops, n_ops, prec, no_neg);
|
||||||
|
t0 = get_clock();
|
||||||
|
for (i = 0; i < OPS_PER_ITER; i++) {
|
||||||
|
float128 a = ops[0].f128;
|
||||||
|
float128 b = ops[1].f128;
|
||||||
|
float128 c = ops[2].f128;
|
||||||
|
|
||||||
|
switch (op) {
|
||||||
|
case OP_ADD:
|
||||||
|
res.f128 = float128_add(a, b, &soft_status);
|
||||||
|
break;
|
||||||
|
case OP_SUB:
|
||||||
|
res.f128 = float128_sub(a, b, &soft_status);
|
||||||
|
break;
|
||||||
|
case OP_MUL:
|
||||||
|
res.f128 = float128_mul(a, b, &soft_status);
|
||||||
|
break;
|
||||||
|
case OP_DIV:
|
||||||
|
res.f128 = float128_div(a, b, &soft_status);
|
||||||
|
break;
|
||||||
|
case OP_FMA:
|
||||||
|
res.f128 = float128_muladd(a, b, c, 0, &soft_status);
|
||||||
|
break;
|
||||||
|
case OP_SQRT:
|
||||||
|
res.f128 = float128_sqrt(a, &soft_status);
|
||||||
|
break;
|
||||||
|
case OP_CMP:
|
||||||
|
res.u64 = float128_compare_quiet(a, b, &soft_status);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
g_assert_not_reached();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
g_assert_not_reached();
|
g_assert_not_reached();
|
||||||
}
|
}
|
||||||
|
@ -369,7 +439,8 @@ static void bench(enum precision prec, enum op op, int n_ops, bool no_neg)
|
||||||
GEN_BENCH(bench_ ## opname ## _float, float, PREC_SINGLE, op, n_ops) \
|
GEN_BENCH(bench_ ## opname ## _float, float, PREC_SINGLE, op, n_ops) \
|
||||||
GEN_BENCH(bench_ ## opname ## _double, double, PREC_DOUBLE, op, n_ops) \
|
GEN_BENCH(bench_ ## opname ## _double, double, PREC_DOUBLE, op, n_ops) \
|
||||||
GEN_BENCH(bench_ ## opname ## _float32, float32, PREC_FLOAT32, op, n_ops) \
|
GEN_BENCH(bench_ ## opname ## _float32, float32, PREC_FLOAT32, op, n_ops) \
|
||||||
GEN_BENCH(bench_ ## opname ## _float64, float64, PREC_FLOAT64, op, n_ops)
|
GEN_BENCH(bench_ ## opname ## _float64, float64, PREC_FLOAT64, op, n_ops) \
|
||||||
|
GEN_BENCH(bench_ ## opname ## _float128, float128, PREC_FLOAT128, op, n_ops)
|
||||||
|
|
||||||
GEN_BENCH_ALL_TYPES(add, OP_ADD, 2)
|
GEN_BENCH_ALL_TYPES(add, OP_ADD, 2)
|
||||||
GEN_BENCH_ALL_TYPES(sub, OP_SUB, 2)
|
GEN_BENCH_ALL_TYPES(sub, OP_SUB, 2)
|
||||||
|
@ -383,7 +454,8 @@ GEN_BENCH_ALL_TYPES(cmp, OP_CMP, 2)
|
||||||
GEN_BENCH_NO_NEG(bench_ ## name ## _float, float, PREC_SINGLE, op, n) \
|
GEN_BENCH_NO_NEG(bench_ ## name ## _float, float, PREC_SINGLE, op, n) \
|
||||||
GEN_BENCH_NO_NEG(bench_ ## name ## _double, double, PREC_DOUBLE, op, n) \
|
GEN_BENCH_NO_NEG(bench_ ## name ## _double, double, PREC_DOUBLE, op, n) \
|
||||||
GEN_BENCH_NO_NEG(bench_ ## name ## _float32, float32, PREC_FLOAT32, op, n) \
|
GEN_BENCH_NO_NEG(bench_ ## name ## _float32, float32, PREC_FLOAT32, op, n) \
|
||||||
GEN_BENCH_NO_NEG(bench_ ## name ## _float64, float64, PREC_FLOAT64, op, n)
|
GEN_BENCH_NO_NEG(bench_ ## name ## _float64, float64, PREC_FLOAT64, op, n) \
|
||||||
|
GEN_BENCH_NO_NEG(bench_ ## name ## _float128, float128, PREC_FLOAT128, op, n)
|
||||||
|
|
||||||
GEN_BENCH_ALL_TYPES_NO_NEG(sqrt, OP_SQRT, 1)
|
GEN_BENCH_ALL_TYPES_NO_NEG(sqrt, OP_SQRT, 1)
|
||||||
#undef GEN_BENCH_ALL_TYPES_NO_NEG
|
#undef GEN_BENCH_ALL_TYPES_NO_NEG
|
||||||
|
@ -397,6 +469,7 @@ GEN_BENCH_ALL_TYPES_NO_NEG(sqrt, OP_SQRT, 1)
|
||||||
[PREC_DOUBLE] = bench_ ## opname ## _double, \
|
[PREC_DOUBLE] = bench_ ## opname ## _double, \
|
||||||
[PREC_FLOAT32] = bench_ ## opname ## _float32, \
|
[PREC_FLOAT32] = bench_ ## opname ## _float32, \
|
||||||
[PREC_FLOAT64] = bench_ ## opname ## _float64, \
|
[PREC_FLOAT64] = bench_ ## opname ## _float64, \
|
||||||
|
[PREC_FLOAT128] = bench_ ## opname ## _float128, \
|
||||||
}
|
}
|
||||||
|
|
||||||
static const bench_func_t bench_funcs[OP_MAX_NR][PREC_MAX_NR] = {
|
static const bench_func_t bench_funcs[OP_MAX_NR][PREC_MAX_NR] = {
|
||||||
|
@ -445,7 +518,7 @@ static void usage_complete(int argc, char *argv[])
|
||||||
fprintf(stderr, " -h = show this help message.\n");
|
fprintf(stderr, " -h = show this help message.\n");
|
||||||
fprintf(stderr, " -o = floating point operation (%s). Default: %s\n",
|
fprintf(stderr, " -o = floating point operation (%s). Default: %s\n",
|
||||||
op_list, op_names[0]);
|
op_list, op_names[0]);
|
||||||
fprintf(stderr, " -p = floating point precision (single, double). "
|
fprintf(stderr, " -p = floating point precision (single, double, quad[soft only]). "
|
||||||
"Default: single\n");
|
"Default: single\n");
|
||||||
fprintf(stderr, " -r = rounding mode (even, zero, down, up, tieaway). "
|
fprintf(stderr, " -r = rounding mode (even, zero, down, up, tieaway). "
|
||||||
"Default: even\n");
|
"Default: even\n");
|
||||||
|
@ -565,6 +638,8 @@ static void parse_args(int argc, char *argv[])
|
||||||
precision = PREC_SINGLE;
|
precision = PREC_SINGLE;
|
||||||
} else if (!strcmp(optarg, "double")) {
|
} else if (!strcmp(optarg, "double")) {
|
||||||
precision = PREC_DOUBLE;
|
precision = PREC_DOUBLE;
|
||||||
|
} else if (!strcmp(optarg, "quad")) {
|
||||||
|
precision = PREC_QUAD;
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "Unsupported precision '%s'\n", optarg);
|
fprintf(stderr, "Unsupported precision '%s'\n", optarg);
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
|
@ -608,6 +683,9 @@ static void parse_args(int argc, char *argv[])
|
||||||
case PREC_DOUBLE:
|
case PREC_DOUBLE:
|
||||||
precision = PREC_FLOAT64;
|
precision = PREC_FLOAT64;
|
||||||
break;
|
break;
|
||||||
|
case PREC_QUAD:
|
||||||
|
precision = PREC_FLOAT128;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
g_assert_not_reached();
|
g_assert_not_reached();
|
||||||
}
|
}
|
||||||
|
|
|
@ -717,7 +717,7 @@ static void do_testfloat(int op, int rmode, bool exact)
|
||||||
test_abz_f128(true_abz_f128M, subj_abz_f128M);
|
test_abz_f128(true_abz_f128M, subj_abz_f128M);
|
||||||
break;
|
break;
|
||||||
case F128_MULADD:
|
case F128_MULADD:
|
||||||
not_implemented();
|
test_abcz_f128(slow_f128M_mulAdd, qemu_f128M_mulAdd);
|
||||||
break;
|
break;
|
||||||
case F128_SQRT:
|
case F128_SQRT:
|
||||||
test_az_f128(slow_f128M_sqrt, qemu_f128M_sqrt);
|
test_az_f128(slow_f128M_sqrt, qemu_f128M_sqrt);
|
||||||
|
|
|
@ -574,6 +574,18 @@ WRAP_MULADD(qemu_f32_mulAdd, float32_muladd, float32)
|
||||||
WRAP_MULADD(qemu_f64_mulAdd, float64_muladd, float64)
|
WRAP_MULADD(qemu_f64_mulAdd, float64_muladd, float64)
|
||||||
#undef WRAP_MULADD
|
#undef WRAP_MULADD
|
||||||
|
|
||||||
|
static void qemu_f128M_mulAdd(const float128_t *ap, const float128_t *bp,
|
||||||
|
const float128_t *cp, float128_t *res)
|
||||||
|
{
|
||||||
|
float128 a, b, c, ret;
|
||||||
|
|
||||||
|
a = soft_to_qemu128(*ap);
|
||||||
|
b = soft_to_qemu128(*bp);
|
||||||
|
c = soft_to_qemu128(*cp);
|
||||||
|
ret = float128_muladd(a, b, c, 0, &qsf);
|
||||||
|
*res = qemu_to_soft128(ret);
|
||||||
|
}
|
||||||
|
|
||||||
#define WRAP_CMP16(name, func, retcond) \
|
#define WRAP_CMP16(name, func, retcond) \
|
||||||
static bool name(float16_t a, float16_t b) \
|
static bool name(float16_t a, float16_t b) \
|
||||||
{ \
|
{ \
|
||||||
|
|
Loading…
Reference in New Issue