From 463e45dcb4aa1a878b5ae5c6bbaf4ae70e24bf50 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 9 Nov 2020 19:08:30 -0800 Subject: [PATCH] softfloat: Introduce sh[lr]_double primitives MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Have x86_64 assembly for them, with a fallback. This avoids shuffling values through %cl in the x86 case. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- fpu/softfloat.c | 102 +++++++++++++++++++++++++-------- include/fpu/softfloat-macros.h | 36 ++++++++++++ 2 files changed, 115 insertions(+), 23 deletions(-) diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 571309e74f..34689959a9 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -957,15 +957,12 @@ static int frac128_normalize(FloatParts128 *a) { if (a->frac_hi) { int shl = clz64(a->frac_hi); - if (shl) { - int shr = 64 - shl; - a->frac_hi = (a->frac_hi << shl) | (a->frac_lo >> shr); - a->frac_lo = (a->frac_lo << shl); - } + a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl); + a->frac_lo <<= shl; return shl; } else if (a->frac_lo) { int shl = clz64(a->frac_lo); - a->frac_hi = (a->frac_lo << shl); + a->frac_hi = a->frac_lo << shl; a->frac_lo = 0; return shl + 64; } @@ -976,7 +973,7 @@ static int frac256_normalize(FloatParts256 *a) { uint64_t a0 = a->frac_hi, a1 = a->frac_hm; uint64_t a2 = a->frac_lm, a3 = a->frac_lo; - int ret, shl, shr; + int ret, shl; if (likely(a0)) { shl = clz64(a0); @@ -1006,11 +1003,10 @@ static int frac256_normalize(FloatParts256 *a) ret += shl; } - shr = -shl & 63; - a0 = (a0 << shl) | (a1 >> shr); - a1 = (a1 << shl) | (a2 >> shr); - a2 = (a2 << shl) | (a3 >> shr); - a3 = (a3 << shl); + a0 = shl_double(a0, a1, shl); + a1 = shl_double(a1, a2, shl); + a2 = shl_double(a2, a3, shl); + a3 <<= shl; done: a->frac_hi = a0; @@ -1029,7 +1025,20 @@ static void frac64_shl(FloatParts64 *a, int c) static void frac128_shl(FloatParts128 *a, int c) { - shift128Left(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo); + uint64_t a0 = a->frac_hi, a1 = a->frac_lo; + + if (c & 64) { + a0 = a1, a1 = 0; + } + + c &= 63; + if (c) { + a0 = shl_double(a0, a1, c); + a1 = a1 << c; + } + + a->frac_hi = a0; + a->frac_lo = a1; } #define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C) @@ -1041,19 +1050,68 @@ static void frac64_shr(FloatParts64 *a, int c) static void frac128_shr(FloatParts128 *a, int c) { - shift128Right(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo); + uint64_t a0 = a->frac_hi, a1 = a->frac_lo; + + if (c & 64) { + a1 = a0, a0 = 0; + } + + c &= 63; + if (c) { + a1 = shr_double(a0, a1, c); + a0 = a0 >> c; + } + + a->frac_hi = a0; + a->frac_lo = a1; } #define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C) static void frac64_shrjam(FloatParts64 *a, int c) { - shift64RightJamming(a->frac, c, &a->frac); + uint64_t a0 = a->frac; + + if (likely(c != 0)) { + if (likely(c < 64)) { + a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0); + } else { + a0 = a0 != 0; + } + a->frac = a0; + } } static void frac128_shrjam(FloatParts128 *a, int c) { - shift128RightJamming(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo); + uint64_t a0 = a->frac_hi, a1 = a->frac_lo; + uint64_t sticky = 0; + + if (unlikely(c == 0)) { + return; + } else if (likely(c < 64)) { + /* nothing */ + } else if (likely(c < 128)) { + sticky = a1; + a1 = a0; + a0 = 0; + c &= 63; + if (c == 0) { + goto done; + } + } else { + sticky = a0 | a1; + a0 = a1 = 0; + goto done; + } + + sticky |= shr_double(a1, 0, c); + a1 = shr_double(a0, a1, c); + a0 = a0 >> c; + + done: + a->frac_lo = a1 | (sticky != 0); + a->frac_hi = a0; } static void frac256_shrjam(FloatParts256 *a, int c) @@ -1061,7 +1119,6 @@ static void frac256_shrjam(FloatParts256 *a, int c) uint64_t a0 = a->frac_hi, a1 = a->frac_hm; uint64_t a2 = a->frac_lm, a3 = a->frac_lo; uint64_t sticky = 0; - int invc; if (unlikely(c == 0)) { return; @@ -1086,12 +1143,11 @@ static void frac256_shrjam(FloatParts256 *a, int c) goto done; } - invc = -c & 63; - sticky |= a3 << invc; - a3 = (a3 >> c) | (a2 << invc); - a2 = (a2 >> c) | (a1 << invc); - a1 = (a1 >> c) | (a0 << invc); - a0 = (a0 >> c); + sticky |= shr_double(a3, 0, c); + a3 = shr_double(a2, a3, c); + a2 = shr_double(a1, a2, c); + a1 = shr_double(a0, a1, c); + a0 = a0 >> c; done: a->frac_lo = a3 | (sticky != 0); diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h index 672c1db555..ec4e27a595 100644 --- a/include/fpu/softfloat-macros.h +++ b/include/fpu/softfloat-macros.h @@ -85,6 +85,42 @@ this code that are retained. #include "fpu/softfloat-types.h" #include "qemu/host-utils.h" +/** + * shl_double: double-word merging left shift + * @l: left or most-significant word + * @r: right or least-significant word + * @c: shift count + * + * Shift @l left by @c bits, shifting in bits from @r. + */ +static inline uint64_t shl_double(uint64_t l, uint64_t r, int c) +{ +#if defined(__x86_64__) + asm("shld %b2, %1, %0" : "+r"(l) : "r"(r), "ci"(c)); + return l; +#else + return c ? (l << c) | (r >> (64 - c)) : l; +#endif +} + +/** + * shr_double: double-word merging right shift + * @l: left or most-significant word + * @r: right or least-significant word + * @c: shift count + * + * Shift @r right by @c bits, shifting in bits from @l. + */ +static inline uint64_t shr_double(uint64_t l, uint64_t r, int c) +{ +#if defined(__x86_64__) + asm("shrd %b2, %1, %0" : "+r"(r) : "r"(l), "ci"(c)); + return r; +#else + return c ? (r >> c) | (l << (64 - c)) : r; +#endif +} + /*---------------------------------------------------------------------------- | Shifts `a' right by the number of bits given in `count'. If any nonzero | bits are shifted off, they are ``jammed'' into the least significant bit of