Improvements to qemu/int128

Fixes for 128/64 division.
 Cleanup tcg/optimize.c
 Optimize redundant sign extensions
 -----BEGIN PGP SIGNATURE-----
 
 iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmF7cygdHHJpY2hhcmQu
 aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV8mAggAtHuBHs018O6k9dSl
 5JJReghwMvsapV5w3MTfN72UR8xTVyC0+dk+P3hv2qJMx/Oofb2Z0m9e9n/iwWxJ
 kktySWUuHXE/Hty4fVSEfUdx0C4FBF49I1PllzzjS8gR2gHbEoHXc2doJVCXCW0C
 BSKzWERZjVdHWT2GeBtSV0n4vOoiHoBaa5ZcH7VVXVOlpT2iu8Tn3RlVELA1h3pY
 NeDLCONWNAXHDQfM+63glLDTZ7eMZ8deOcLgJAiYDA2XVegYGeTZuqdBT3SiTno+
 ts4D5aBkmy8yinCcJQktd3alsM1cwYlco0U/x8+JEvNqzWmLzsRpox7g6+rrpe+d
 KhZ7Ww==
 =UEO3
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20211028' into staging

Improvements to qemu/int128
Fixes for 128/64 division.
Cleanup tcg/optimize.c
Optimize redundant sign extensions

# gpg: Signature made Thu 28 Oct 2021 09:06:00 PM PDT
# gpg:                using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg:                issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [ultimate]

* remotes/rth/tags/pull-tcg-20211028: (60 commits)
  softmmu: fix for "after access" watchpoints
  softmmu: remove useless condition in watchpoint check
  softmmu: fix watchpoint processing in icount mode
  tcg/optimize: Propagate sign info for shifting
  tcg/optimize: Propagate sign info for bit counting
  tcg/optimize: Propagate sign info for setcond
  tcg/optimize: Propagate sign info for logical operations
  tcg/optimize: Optimize sign extensions
  tcg/optimize: Use fold_xx_to_i for rem
  tcg/optimize: Use fold_xi_to_x for div
  tcg/optimize: Use fold_xi_to_x for mul
  tcg/optimize: Use fold_xx_to_i for orc
  tcg/optimize: Stop forcing z_mask to "garbage" for 32-bit values
  tcg: Extend call args using the correct opcodes
  tcg/optimize: Sink commutative operand swapping into fold functions
  tcg/optimize: Expand fold_addsub2_i32 to 64-bit ops
  tcg/optimize: Expand fold_mulu2_i32 to all 4-arg multiplies
  tcg/optimize: Split out fold_masks
  tcg/optimize: Split out fold_ix_to_i
  tcg/optimize: Split out fold_xi_to_x
  ...

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2021-10-29 08:39:44 -07:00
commit a92cecba27
11 changed files with 2075 additions and 1212 deletions

View File

@ -8,7 +8,6 @@
* so some portions are provided under:
* the SoftFloat-2a license
* the BSD license
* GPL-v2-or-later
*
* Any future contributions to this file after December 1st 2014 will be
* taken to be licensed under the Softfloat-2a license unless specifically
@ -75,10 +74,6 @@ this code that are retained.
* THE POSSIBILITY OF SUCH DAMAGE.
*/
/* Portions of this work are licensed under the terms of the GNU GPL,
* version 2 or later. See the COPYING file in the top-level directory.
*/
#ifndef FPU_SOFTFLOAT_MACROS_H
#define FPU_SOFTFLOAT_MACROS_H
@ -585,83 +580,6 @@ static inline uint64_t estimateDiv128To64(uint64_t a0, uint64_t a1, uint64_t b)
}
/* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
* (https://gmplib.org/repo/gmp/file/tip/longlong.h)
*
* Licensed under the GPLv2/LGPLv3
*/
static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
uint64_t n0, uint64_t d)
{
#if defined(__x86_64__)
uint64_t q;
asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
return q;
#elif defined(__s390x__) && !defined(__clang__)
/* Need to use a TImode type to get an even register pair for DLGR. */
unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
asm("dlgr %0, %1" : "+r"(n) : "r"(d));
*r = n >> 64;
return n;
#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
/* From Power ISA 2.06, programming note for divdeu. */
uint64_t q1, q2, Q, r1, r2, R;
asm("divdeu %0,%2,%4; divdu %1,%3,%4"
: "=&r"(q1), "=r"(q2)
: "r"(n1), "r"(n0), "r"(d));
r1 = -(q1 * d); /* low part of (n1<<64) - (q1 * d) */
r2 = n0 - (q2 * d);
Q = q1 + q2;
R = r1 + r2;
if (R >= d || R < r2) { /* overflow implies R > d */
Q += 1;
R -= d;
}
*r = R;
return Q;
#else
uint64_t d0, d1, q0, q1, r1, r0, m;
d0 = (uint32_t)d;
d1 = d >> 32;
r1 = n1 % d1;
q1 = n1 / d1;
m = q1 * d0;
r1 = (r1 << 32) | (n0 >> 32);
if (r1 < m) {
q1 -= 1;
r1 += d;
if (r1 >= d) {
if (r1 < m) {
q1 -= 1;
r1 += d;
}
}
}
r1 -= m;
r0 = r1 % d1;
q0 = r1 / d1;
m = q0 * d0;
r0 = (r0 << 32) | (uint32_t)n0;
if (r0 < m) {
q0 -= 1;
r0 += d;
if (r0 >= d) {
if (r0 < m) {
q0 -= 1;
r0 += d;
}
}
}
r0 -= m;
*r = r0;
return (q1 << 32) | q0;
#endif
}
/*----------------------------------------------------------------------------
| Returns an approximation to the square root of the 32-bit significand given
| by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of

View File

@ -323,10 +323,7 @@ static inline uint64_t clock_ns_to_ticks(const Clock *clk, uint64_t ns)
if (clk->period == 0) {
return 0;
}
/*
* Ignore divu128() return value as we've caught div-by-zero and don't
* need different behaviour for overflow.
*/
divu128(&lo, &hi, clk->period);
return lo;
}

View File

@ -23,6 +23,10 @@
* THE SOFTWARE.
*/
/* Portions of this work are licensed under the terms of the GNU GPL,
* version 2 or later. See the COPYING file in the top-level directory.
*/
#ifndef HOST_UTILS_H
#define HOST_UTILS_H
@ -52,36 +56,32 @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
return (__int128_t)a * b / c;
}
static inline int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
static inline uint64_t divu128(uint64_t *plow, uint64_t *phigh,
uint64_t divisor)
{
if (divisor == 0) {
return 1;
} else {
__uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
__uint128_t result = dividend / divisor;
*plow = result;
*phigh = dividend % divisor;
return result > UINT64_MAX;
}
__uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
__uint128_t result = dividend / divisor;
*plow = result;
*phigh = result >> 64;
return dividend % divisor;
}
static inline int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
static inline int64_t divs128(uint64_t *plow, int64_t *phigh,
int64_t divisor)
{
if (divisor == 0) {
return 1;
} else {
__int128_t dividend = ((__int128_t)*phigh << 64) | (uint64_t)*plow;
__int128_t result = dividend / divisor;
*plow = result;
*phigh = dividend % divisor;
return result != *plow;
}
__int128_t dividend = ((__int128_t)*phigh << 64) | *plow;
__int128_t result = dividend / divisor;
*plow = result;
*phigh = result >> 64;
return dividend % divisor;
}
#else
void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b);
void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
int divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor);
static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
{
@ -736,4 +736,81 @@ void urshift(uint64_t *plow, uint64_t *phigh, int32_t shift);
*/
void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow);
/* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
* (https://gmplib.org/repo/gmp/file/tip/longlong.h)
*
* Licensed under the GPLv2/LGPLv3
*/
static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
uint64_t n0, uint64_t d)
{
#if defined(__x86_64__)
uint64_t q;
asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
return q;
#elif defined(__s390x__) && !defined(__clang__)
/* Need to use a TImode type to get an even register pair for DLGR. */
unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
asm("dlgr %0, %1" : "+r"(n) : "r"(d));
*r = n >> 64;
return n;
#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
/* From Power ISA 2.06, programming note for divdeu. */
uint64_t q1, q2, Q, r1, r2, R;
asm("divdeu %0,%2,%4; divdu %1,%3,%4"
: "=&r"(q1), "=r"(q2)
: "r"(n1), "r"(n0), "r"(d));
r1 = -(q1 * d); /* low part of (n1<<64) - (q1 * d) */
r2 = n0 - (q2 * d);
Q = q1 + q2;
R = r1 + r2;
if (R >= d || R < r2) { /* overflow implies R > d */
Q += 1;
R -= d;
}
*r = R;
return Q;
#else
uint64_t d0, d1, q0, q1, r1, r0, m;
d0 = (uint32_t)d;
d1 = d >> 32;
r1 = n1 % d1;
q1 = n1 / d1;
m = q1 * d0;
r1 = (r1 << 32) | (n0 >> 32);
if (r1 < m) {
q1 -= 1;
r1 += d;
if (r1 >= d) {
if (r1 < m) {
q1 -= 1;
r1 += d;
}
}
}
r1 -= m;
r0 = r1 % d1;
q0 = r1 / d1;
m = q0 * d0;
r0 = (r0 << 32) | (uint32_t)n0;
if (r0 < m) {
q0 -= 1;
r0 += d;
if (r0 >= d) {
if (r0 < m) {
q0 -= 1;
r0 += d;
}
}
}
r0 -= m;
*r = r0;
return (q1 << 32) | q0;
#endif
}
#endif

View File

@ -58,6 +58,11 @@ static inline Int128 int128_exts64(int64_t a)
return a;
}
static inline Int128 int128_not(Int128 a)
{
return ~a;
}
static inline Int128 int128_and(Int128 a, Int128 b)
{
return a & b;
@ -68,6 +73,11 @@ static inline Int128 int128_or(Int128 a, Int128 b)
return a | b;
}
static inline Int128 int128_xor(Int128 a, Int128 b)
{
return a ^ b;
}
static inline Int128 int128_rshift(Int128 a, int n)
{
return a >> n;
@ -235,6 +245,11 @@ static inline Int128 int128_exts64(int64_t a)
return int128_make128(a, (a < 0) ? -1 : 0);
}
static inline Int128 int128_not(Int128 a)
{
return int128_make128(~a.lo, ~a.hi);
}
static inline Int128 int128_and(Int128 a, Int128 b)
{
return int128_make128(a.lo & b.lo, a.hi & b.hi);
@ -245,6 +260,11 @@ static inline Int128 int128_or(Int128 a, Int128 b)
return int128_make128(a.lo | b.lo, a.hi | b.hi);
}
static inline Int128 int128_xor(Int128 a, Int128 b)
{
return int128_make128(a.lo ^ b.lo, a.hi ^ b.hi);
}
static inline Int128 int128_rshift(Int128 a, int n)
{
int64_t h;

View File

@ -929,29 +929,26 @@ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
}
wp->hitaddr = MAX(addr, wp->vaddr);
wp->hitattrs = attrs;
if (!cpu->watchpoint_hit) {
if (wp->flags & BP_CPU && cc->tcg_ops->debug_check_watchpoint &&
!cc->tcg_ops->debug_check_watchpoint(cpu, wp)) {
wp->flags &= ~BP_WATCHPOINT_HIT;
continue;
}
cpu->watchpoint_hit = wp;
mmap_lock();
tb_check_watchpoint(cpu, ra);
if (wp->flags & BP_STOP_BEFORE_ACCESS) {
cpu->exception_index = EXCP_DEBUG;
mmap_unlock();
cpu_loop_exit_restore(cpu, ra);
} else {
/* Force execution of one insn next time. */
cpu->cflags_next_tb = 1 | curr_cflags(cpu);
mmap_unlock();
if (ra) {
cpu_restore_state(cpu, ra, true);
}
cpu_loop_exit_noexc(cpu);
}
if (wp->flags & BP_CPU && cc->tcg_ops->debug_check_watchpoint &&
!cc->tcg_ops->debug_check_watchpoint(cpu, wp)) {
wp->flags &= ~BP_WATCHPOINT_HIT;
continue;
}
cpu->watchpoint_hit = wp;
mmap_lock();
/* This call also restores vCPU state */
tb_check_watchpoint(cpu, ra);
if (wp->flags & BP_STOP_BEFORE_ACCESS) {
cpu->exception_index = EXCP_DEBUG;
mmap_unlock();
cpu_loop_exit(cpu);
} else {
/* Force execution of one insn next time. */
cpu->cflags_next_tb = 1 | CF_LAST_IO | curr_cflags(cpu);
mmap_unlock();
cpu_loop_exit_noexc(cpu);
}
} else {
wp->flags &= ~BP_WATCHPOINT_HIT;

View File

@ -104,10 +104,11 @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
uint64_t rt = 0;
int overflow = 0;
overflow = divu128(&rt, &ra, rb);
if (unlikely(overflow)) {
if (unlikely(rb == 0 || ra >= rb)) {
overflow = 1;
rt = 0; /* Undefined */
} else {
divu128(&rt, &ra, rb);
}
if (oe) {
@ -119,13 +120,16 @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
{
int64_t rt = 0;
uint64_t rt = 0;
int64_t ra = (int64_t)rau;
int64_t rb = (int64_t)rbu;
int overflow = divs128(&rt, &ra, rb);
int overflow = 0;
if (unlikely(overflow)) {
if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) {
overflow = 1;
rt = 0; /* Undefined */
} else {
divs128(&rt, &ra, rb);
}
if (oe) {
@ -2502,6 +2506,7 @@ uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
int cr;
uint64_t lo_value;
uint64_t hi_value;
uint64_t rem;
ppc_avr_t ret = { .u64 = { 0, 0 } };
if (b->VsrSD(0) < 0) {
@ -2537,10 +2542,10 @@ uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
* In that case, we leave r unchanged.
*/
} else {
divu128(&lo_value, &hi_value, 1000000000000000ULL);
rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
for (i = 1; i < 16; hi_value /= 10, i++) {
bcd_put_digit(&ret, hi_value % 10, i);
for (i = 1; i < 16; rem /= 10, i++) {
bcd_put_digit(&ret, rem % 10, i);
}
for (; i < 32; lo_value /= 10, i++) {

File diff suppressed because it is too large Load Diff

View File

@ -1508,11 +1508,11 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
if (is_32bit) {
TCGv_i64 temp = tcg_temp_new_i64();
TCGv_i64 orig = temp_tcgv_i64(args[i]);
TCGv_i32 orig = temp_tcgv_i32(args[i]);
if (is_signed) {
tcg_gen_ext32s_i64(temp, orig);
tcg_gen_ext_i32_i64(temp, orig);
} else {
tcg_gen_ext32u_i64(temp, orig);
tcg_gen_extu_i32_i64(temp, orig);
}
args[i] = tcgv_i64_temp(temp);
}

View File

@ -23,6 +23,7 @@ tests = {
# all code tested by test-x86-cpuid is inside topology.h
'test-x86-cpuid': [],
'test-cutils': [],
'test-div128': [],
'test-shift128': [],
'test-mul64': [],
# all code tested by test-int128 is inside int128.h

197
tests/unit/test-div128.c Normal file
View File

@ -0,0 +1,197 @@
/*
* Test 128-bit division functions
*
* Copyright (c) 2021 Instituto de Pesquisas Eldorado (eldorado.org.br)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
#include "qemu/host-utils.h"
typedef struct {
uint64_t high;
uint64_t low;
uint64_t rhigh;
uint64_t rlow;
uint64_t divisor;
uint64_t remainder;
} test_data_unsigned;
typedef struct {
int64_t high;
uint64_t low;
int64_t rhigh;
uint64_t rlow;
int64_t divisor;
int64_t remainder;
} test_data_signed;
static const test_data_unsigned test_table_unsigned[] = {
/* Dividend fits in 64 bits */
{ 0x0000000000000000ULL, 0x0000000000000000ULL,
0x0000000000000000ULL, 0x0000000000000000ULL,
0x0000000000000001ULL, 0x0000000000000000ULL},
{ 0x0000000000000000ULL, 0x0000000000000001ULL,
0x0000000000000000ULL, 0x0000000000000001ULL,
0x0000000000000001ULL, 0x0000000000000000ULL},
{ 0x0000000000000000ULL, 0x0000000000000003ULL,
0x0000000000000000ULL, 0x0000000000000001ULL,
0x0000000000000002ULL, 0x0000000000000001ULL},
{ 0x0000000000000000ULL, 0x8000000000000000ULL,
0x0000000000000000ULL, 0x8000000000000000ULL,
0x0000000000000001ULL, 0x0000000000000000ULL},
{ 0x0000000000000000ULL, 0xa000000000000000ULL,
0x0000000000000000ULL, 0x0000000000000002ULL,
0x4000000000000000ULL, 0x2000000000000000ULL},
{ 0x0000000000000000ULL, 0x8000000000000000ULL,
0x0000000000000000ULL, 0x0000000000000001ULL,
0x8000000000000000ULL, 0x0000000000000000ULL},
/* Dividend > 64 bits, with MSB 0 */
{ 0x123456789abcdefeULL, 0xefedcba987654321ULL,
0x123456789abcdefeULL, 0xefedcba987654321ULL,
0x0000000000000001ULL, 0x0000000000000000ULL},
{ 0x123456789abcdefeULL, 0xefedcba987654321ULL,
0x0000000000000001ULL, 0x000000000000000dULL,
0x123456789abcdefeULL, 0x03456789abcdf03bULL},
{ 0x123456789abcdefeULL, 0xefedcba987654321ULL,
0x0123456789abcdefULL, 0xeefedcba98765432ULL,
0x0000000000000010ULL, 0x0000000000000001ULL},
/* Dividend > 64 bits, with MSB 1 */
{ 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
0x0000000000000001ULL, 0x0000000000000000ULL},
{ 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
0x0000000000000001ULL, 0x0000000000000000ULL,
0xfeeddccbbaa99887ULL, 0x766554433221100fULL},
{ 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
0x0feeddccbbaa9988ULL, 0x7766554433221100ULL,
0x0000000000000010ULL, 0x000000000000000fULL},
{ 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
0x000000000000000eULL, 0x00f0f0f0f0f0f35aULL,
0x123456789abcdefeULL, 0x0f8922bc55ef90c3ULL},
/**
* Divisor == 64 bits, with MSB 1
* and high 64 bits of dividend >= divisor
* (for testing normalization)
*/
{ 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
0x0000000000000001ULL, 0x0000000000000000ULL,
0xfeeddccbbaa99887ULL, 0x766554433221100fULL},
{ 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
0x0000000000000001ULL, 0xfddbb9977553310aULL,
0x8000000000000001ULL, 0x78899aabbccddf05ULL},
/* Dividend > 64 bits, divisor almost as big */
{ 0x0000000000000001ULL, 0x23456789abcdef01ULL,
0x0000000000000000ULL, 0x000000000000000fULL,
0x123456789abcdefeULL, 0x123456789abcde1fULL},
};
static const test_data_signed test_table_signed[] = {
/* Positive dividend, positive/negative divisors */
{ 0x0000000000000000LL, 0x0000000000bc614eULL,
0x0000000000000000LL, 0x0000000000bc614eULL,
0x0000000000000001LL, 0x0000000000000000LL},
{ 0x0000000000000000LL, 0x0000000000bc614eULL,
0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
0xffffffffffffffffLL, 0x0000000000000000LL},
{ 0x0000000000000000LL, 0x0000000000bc614eULL,
0x0000000000000000LL, 0x00000000005e30a7ULL,
0x0000000000000002LL, 0x0000000000000000LL},
{ 0x0000000000000000LL, 0x0000000000bc614eULL,
0xffffffffffffffffLL, 0xffffffffffa1cf59ULL,
0xfffffffffffffffeLL, 0x0000000000000000LL},
{ 0x0000000000000000LL, 0x0000000000bc614eULL,
0x0000000000000000LL, 0x0000000000178c29ULL,
0x0000000000000008LL, 0x0000000000000006LL},
{ 0x0000000000000000LL, 0x0000000000bc614eULL,
0xffffffffffffffffLL, 0xffffffffffe873d7ULL,
0xfffffffffffffff8LL, 0x0000000000000006LL},
{ 0x0000000000000000LL, 0x0000000000bc614eULL,
0x0000000000000000LL, 0x000000000000550dULL,
0x0000000000000237LL, 0x0000000000000183LL},
{ 0x0000000000000000LL, 0x0000000000bc614eULL,
0xffffffffffffffffLL, 0xffffffffffffaaf3ULL,
0xfffffffffffffdc9LL, 0x0000000000000183LL},
/* Negative dividend, positive/negative divisors */
{ 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
0x0000000000000001LL, 0x0000000000000000LL},
{ 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
0x0000000000000000LL, 0x0000000000bc614eULL,
0xffffffffffffffffLL, 0x0000000000000000LL},
{ 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
0xffffffffffffffffLL, 0xffffffffffa1cf59ULL,
0x0000000000000002LL, 0x0000000000000000LL},
{ 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
0x0000000000000000LL, 0x00000000005e30a7ULL,
0xfffffffffffffffeLL, 0x0000000000000000LL},
{ 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
0xffffffffffffffffLL, 0xffffffffffe873d7ULL,
0x0000000000000008LL, 0xfffffffffffffffaLL},
{ 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
0x0000000000000000LL, 0x0000000000178c29ULL,
0xfffffffffffffff8LL, 0xfffffffffffffffaLL},
{ 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
0xffffffffffffffffLL, 0xffffffffffffaaf3ULL,
0x0000000000000237LL, 0xfffffffffffffe7dLL},
{ 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
0x0000000000000000LL, 0x000000000000550dULL,
0xfffffffffffffdc9LL, 0xfffffffffffffe7dLL},
};
static void test_divu128(void)
{
int i;
uint64_t rem;
test_data_unsigned tmp;
for (i = 0; i < ARRAY_SIZE(test_table_unsigned); ++i) {
tmp = test_table_unsigned[i];
rem = divu128(&tmp.low, &tmp.high, tmp.divisor);
g_assert_cmpuint(tmp.low, ==, tmp.rlow);
g_assert_cmpuint(tmp.high, ==, tmp.rhigh);
g_assert_cmpuint(rem, ==, tmp.remainder);
}
}
static void test_divs128(void)
{
int i;
int64_t rem;
test_data_signed tmp;
for (i = 0; i < ARRAY_SIZE(test_table_signed); ++i) {
tmp = test_table_signed[i];
rem = divs128(&tmp.low, &tmp.high, tmp.divisor);
g_assert_cmpuint(tmp.low, ==, tmp.rlow);
g_assert_cmpuint(tmp.high, ==, tmp.rhigh);
g_assert_cmpuint(rem, ==, tmp.remainder);
}
}
int main(int argc, char **argv)
{
g_test_init(&argc, &argv, NULL);
g_test_add_func("/host-utils/test_divu128", test_divu128);
g_test_add_func("/host-utils/test_divs128", test_divs128);
return g_test_run();
}

View File

@ -86,78 +86,119 @@ void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
*phigh = rh;
}
/* Unsigned 128x64 division. Returns 1 if overflow (divide by zero or */
/* quotient exceeds 64 bits). Otherwise returns quotient via plow and */
/* remainder via phigh. */
int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
/*
* Unsigned 128-by-64 division.
* Returns the remainder.
* Returns quotient via plow and phigh.
* Also returns the remainder via the function return value.
*/
uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
{
uint64_t dhi = *phigh;
uint64_t dlo = *plow;
unsigned i;
uint64_t carry = 0;
uint64_t rem, dhighest;
int sh;
if (divisor == 0) {
return 1;
} else if (dhi == 0) {
if (divisor == 0 || dhi == 0) {
*plow = dlo / divisor;
*phigh = dlo % divisor;
return 0;
} else if (dhi >= divisor) {
return 1;
*phigh = 0;
return dlo % divisor;
} else {
sh = clz64(divisor);
for (i = 0; i < 64; i++) {
carry = dhi >> 63;
dhi = (dhi << 1) | (dlo >> 63);
if (carry || (dhi >= divisor)) {
dhi -= divisor;
carry = 1;
} else {
carry = 0;
if (dhi < divisor) {
if (sh != 0) {
/* normalize the divisor, shifting the dividend accordingly */
divisor <<= sh;
dhi = (dhi << sh) | (dlo >> (64 - sh));
dlo <<= sh;
}
dlo = (dlo << 1) | carry;
*phigh = 0;
*plow = udiv_qrnnd(&rem, dhi, dlo, divisor);
} else {
if (sh != 0) {
/* normalize the divisor, shifting the dividend accordingly */
divisor <<= sh;
dhighest = dhi >> (64 - sh);
dhi = (dhi << sh) | (dlo >> (64 - sh));
dlo <<= sh;
*phigh = udiv_qrnnd(&dhi, dhighest, dhi, divisor);
} else {
/**
* dhi >= divisor
* Since the MSB of divisor is set (sh == 0),
* (dhi - divisor) < divisor
*
* Thus, the high part of the quotient is 1, and we can
* calculate the low part with a single call to udiv_qrnnd
* after subtracting divisor from dhi
*/
dhi -= divisor;
*phigh = 1;
}
*plow = udiv_qrnnd(&rem, dhi, dlo, divisor);
}
*plow = dlo;
*phigh = dhi;
return 0;
/*
* since the dividend/divisor might have been normalized,
* the remainder might also have to be shifted back
*/
return rem >> sh;
}
}
int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
/*
* Signed 128-by-64 division.
* Returns quotient via plow and phigh.
* Also returns the remainder via the function return value.
*/
int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor)
{
int sgn_dvdnd = *phigh < 0;
int sgn_divsr = divisor < 0;
int overflow = 0;
bool neg_quotient = false, neg_remainder = false;
uint64_t unsig_hi = *phigh, unsig_lo = *plow;
uint64_t rem;
if (sgn_dvdnd) {
*plow = ~(*plow);
*phigh = ~(*phigh);
if (*plow == (int64_t)-1) {
*plow = 0;
(*phigh)++;
} else {
(*plow)++;
}
}
if (*phigh < 0) {
neg_quotient = !neg_quotient;
neg_remainder = !neg_remainder;
if (sgn_divsr) {
divisor = 0 - divisor;
}
overflow = divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
if (sgn_dvdnd ^ sgn_divsr) {
*plow = 0 - *plow;
}
if (!overflow) {
if ((*plow < 0) ^ (sgn_dvdnd ^ sgn_divsr)) {
overflow = 1;
if (unsig_lo == 0) {
unsig_hi = -unsig_hi;
} else {
unsig_hi = ~unsig_hi;
unsig_lo = -unsig_lo;
}
}
return overflow;
if (divisor < 0) {
neg_quotient = !neg_quotient;
divisor = -divisor;
}
rem = divu128(&unsig_lo, &unsig_hi, (uint64_t)divisor);
if (neg_quotient) {
if (unsig_lo == 0) {
*phigh = -unsig_hi;
*plow = 0;
} else {
*phigh = ~unsig_hi;
*plow = -unsig_lo;
}
} else {
*phigh = unsig_hi;
*plow = unsig_lo;
}
if (neg_remainder) {
return -rem;
} else {
return rem;
}
}
#endif