Improvements to qemu/int128
Fixes for 128/64 division. Cleanup tcg/optimize.c Optimize redundant sign extensions -----BEGIN PGP SIGNATURE----- iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmF7cygdHHJpY2hhcmQu aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV8mAggAtHuBHs018O6k9dSl 5JJReghwMvsapV5w3MTfN72UR8xTVyC0+dk+P3hv2qJMx/Oofb2Z0m9e9n/iwWxJ kktySWUuHXE/Hty4fVSEfUdx0C4FBF49I1PllzzjS8gR2gHbEoHXc2doJVCXCW0C BSKzWERZjVdHWT2GeBtSV0n4vOoiHoBaa5ZcH7VVXVOlpT2iu8Tn3RlVELA1h3pY NeDLCONWNAXHDQfM+63glLDTZ7eMZ8deOcLgJAiYDA2XVegYGeTZuqdBT3SiTno+ ts4D5aBkmy8yinCcJQktd3alsM1cwYlco0U/x8+JEvNqzWmLzsRpox7g6+rrpe+d KhZ7Ww== =UEO3 -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20211028' into staging Improvements to qemu/int128 Fixes for 128/64 division. Cleanup tcg/optimize.c Optimize redundant sign extensions # gpg: Signature made Thu 28 Oct 2021 09:06:00 PM PDT # gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F # gpg: issuer "richard.henderson@linaro.org" # gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [ultimate] * remotes/rth/tags/pull-tcg-20211028: (60 commits) softmmu: fix for "after access" watchpoints softmmu: remove useless condition in watchpoint check softmmu: fix watchpoint processing in icount mode tcg/optimize: Propagate sign info for shifting tcg/optimize: Propagate sign info for bit counting tcg/optimize: Propagate sign info for setcond tcg/optimize: Propagate sign info for logical operations tcg/optimize: Optimize sign extensions tcg/optimize: Use fold_xx_to_i for rem tcg/optimize: Use fold_xi_to_x for div tcg/optimize: Use fold_xi_to_x for mul tcg/optimize: Use fold_xx_to_i for orc tcg/optimize: Stop forcing z_mask to "garbage" for 32-bit values tcg: Extend call args using the correct opcodes tcg/optimize: Sink commutative operand swapping into fold functions tcg/optimize: Expand fold_addsub2_i32 to 64-bit ops tcg/optimize: Expand fold_mulu2_i32 to all 4-arg multiplies tcg/optimize: Split out fold_masks tcg/optimize: Split out fold_ix_to_i tcg/optimize: Split out fold_xi_to_x ... Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
commit
a92cecba27
@ -8,7 +8,6 @@
|
||||
* so some portions are provided under:
|
||||
* the SoftFloat-2a license
|
||||
* the BSD license
|
||||
* GPL-v2-or-later
|
||||
*
|
||||
* Any future contributions to this file after December 1st 2014 will be
|
||||
* taken to be licensed under the Softfloat-2a license unless specifically
|
||||
@ -75,10 +74,6 @@ this code that are retained.
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Portions of this work are licensed under the terms of the GNU GPL,
|
||||
* version 2 or later. See the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
#ifndef FPU_SOFTFLOAT_MACROS_H
|
||||
#define FPU_SOFTFLOAT_MACROS_H
|
||||
|
||||
@ -585,83 +580,6 @@ static inline uint64_t estimateDiv128To64(uint64_t a0, uint64_t a1, uint64_t b)
|
||||
|
||||
}
|
||||
|
||||
/* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
|
||||
* (https://gmplib.org/repo/gmp/file/tip/longlong.h)
|
||||
*
|
||||
* Licensed under the GPLv2/LGPLv3
|
||||
*/
|
||||
static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
|
||||
uint64_t n0, uint64_t d)
|
||||
{
|
||||
#if defined(__x86_64__)
|
||||
uint64_t q;
|
||||
asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
|
||||
return q;
|
||||
#elif defined(__s390x__) && !defined(__clang__)
|
||||
/* Need to use a TImode type to get an even register pair for DLGR. */
|
||||
unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
|
||||
asm("dlgr %0, %1" : "+r"(n) : "r"(d));
|
||||
*r = n >> 64;
|
||||
return n;
|
||||
#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
|
||||
/* From Power ISA 2.06, programming note for divdeu. */
|
||||
uint64_t q1, q2, Q, r1, r2, R;
|
||||
asm("divdeu %0,%2,%4; divdu %1,%3,%4"
|
||||
: "=&r"(q1), "=r"(q2)
|
||||
: "r"(n1), "r"(n0), "r"(d));
|
||||
r1 = -(q1 * d); /* low part of (n1<<64) - (q1 * d) */
|
||||
r2 = n0 - (q2 * d);
|
||||
Q = q1 + q2;
|
||||
R = r1 + r2;
|
||||
if (R >= d || R < r2) { /* overflow implies R > d */
|
||||
Q += 1;
|
||||
R -= d;
|
||||
}
|
||||
*r = R;
|
||||
return Q;
|
||||
#else
|
||||
uint64_t d0, d1, q0, q1, r1, r0, m;
|
||||
|
||||
d0 = (uint32_t)d;
|
||||
d1 = d >> 32;
|
||||
|
||||
r1 = n1 % d1;
|
||||
q1 = n1 / d1;
|
||||
m = q1 * d0;
|
||||
r1 = (r1 << 32) | (n0 >> 32);
|
||||
if (r1 < m) {
|
||||
q1 -= 1;
|
||||
r1 += d;
|
||||
if (r1 >= d) {
|
||||
if (r1 < m) {
|
||||
q1 -= 1;
|
||||
r1 += d;
|
||||
}
|
||||
}
|
||||
}
|
||||
r1 -= m;
|
||||
|
||||
r0 = r1 % d1;
|
||||
q0 = r1 / d1;
|
||||
m = q0 * d0;
|
||||
r0 = (r0 << 32) | (uint32_t)n0;
|
||||
if (r0 < m) {
|
||||
q0 -= 1;
|
||||
r0 += d;
|
||||
if (r0 >= d) {
|
||||
if (r0 < m) {
|
||||
q0 -= 1;
|
||||
r0 += d;
|
||||
}
|
||||
}
|
||||
}
|
||||
r0 -= m;
|
||||
|
||||
*r = r0;
|
||||
return (q1 << 32) | q0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns an approximation to the square root of the 32-bit significand given
|
||||
| by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
|
||||
|
@ -323,10 +323,7 @@ static inline uint64_t clock_ns_to_ticks(const Clock *clk, uint64_t ns)
|
||||
if (clk->period == 0) {
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* Ignore divu128() return value as we've caught div-by-zero and don't
|
||||
* need different behaviour for overflow.
|
||||
*/
|
||||
|
||||
divu128(&lo, &hi, clk->period);
|
||||
return lo;
|
||||
}
|
||||
|
@ -23,6 +23,10 @@
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* Portions of this work are licensed under the terms of the GNU GPL,
|
||||
* version 2 or later. See the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
#ifndef HOST_UTILS_H
|
||||
#define HOST_UTILS_H
|
||||
|
||||
@ -52,36 +56,32 @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
|
||||
return (__int128_t)a * b / c;
|
||||
}
|
||||
|
||||
static inline int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
|
||||
static inline uint64_t divu128(uint64_t *plow, uint64_t *phigh,
|
||||
uint64_t divisor)
|
||||
{
|
||||
if (divisor == 0) {
|
||||
return 1;
|
||||
} else {
|
||||
__uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
|
||||
__uint128_t result = dividend / divisor;
|
||||
*plow = result;
|
||||
*phigh = dividend % divisor;
|
||||
return result > UINT64_MAX;
|
||||
}
|
||||
__uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
|
||||
__uint128_t result = dividend / divisor;
|
||||
|
||||
*plow = result;
|
||||
*phigh = result >> 64;
|
||||
return dividend % divisor;
|
||||
}
|
||||
|
||||
static inline int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
|
||||
static inline int64_t divs128(uint64_t *plow, int64_t *phigh,
|
||||
int64_t divisor)
|
||||
{
|
||||
if (divisor == 0) {
|
||||
return 1;
|
||||
} else {
|
||||
__int128_t dividend = ((__int128_t)*phigh << 64) | (uint64_t)*plow;
|
||||
__int128_t result = dividend / divisor;
|
||||
*plow = result;
|
||||
*phigh = dividend % divisor;
|
||||
return result != *plow;
|
||||
}
|
||||
__int128_t dividend = ((__int128_t)*phigh << 64) | *plow;
|
||||
__int128_t result = dividend / divisor;
|
||||
|
||||
*plow = result;
|
||||
*phigh = result >> 64;
|
||||
return dividend % divisor;
|
||||
}
|
||||
#else
|
||||
void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b);
|
||||
void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
|
||||
int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
|
||||
int divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
|
||||
uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
|
||||
int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor);
|
||||
|
||||
static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
|
||||
{
|
||||
@ -736,4 +736,81 @@ void urshift(uint64_t *plow, uint64_t *phigh, int32_t shift);
|
||||
*/
|
||||
void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow);
|
||||
|
||||
/* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
|
||||
* (https://gmplib.org/repo/gmp/file/tip/longlong.h)
|
||||
*
|
||||
* Licensed under the GPLv2/LGPLv3
|
||||
*/
|
||||
static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
|
||||
uint64_t n0, uint64_t d)
|
||||
{
|
||||
#if defined(__x86_64__)
|
||||
uint64_t q;
|
||||
asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
|
||||
return q;
|
||||
#elif defined(__s390x__) && !defined(__clang__)
|
||||
/* Need to use a TImode type to get an even register pair for DLGR. */
|
||||
unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
|
||||
asm("dlgr %0, %1" : "+r"(n) : "r"(d));
|
||||
*r = n >> 64;
|
||||
return n;
|
||||
#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
|
||||
/* From Power ISA 2.06, programming note for divdeu. */
|
||||
uint64_t q1, q2, Q, r1, r2, R;
|
||||
asm("divdeu %0,%2,%4; divdu %1,%3,%4"
|
||||
: "=&r"(q1), "=r"(q2)
|
||||
: "r"(n1), "r"(n0), "r"(d));
|
||||
r1 = -(q1 * d); /* low part of (n1<<64) - (q1 * d) */
|
||||
r2 = n0 - (q2 * d);
|
||||
Q = q1 + q2;
|
||||
R = r1 + r2;
|
||||
if (R >= d || R < r2) { /* overflow implies R > d */
|
||||
Q += 1;
|
||||
R -= d;
|
||||
}
|
||||
*r = R;
|
||||
return Q;
|
||||
#else
|
||||
uint64_t d0, d1, q0, q1, r1, r0, m;
|
||||
|
||||
d0 = (uint32_t)d;
|
||||
d1 = d >> 32;
|
||||
|
||||
r1 = n1 % d1;
|
||||
q1 = n1 / d1;
|
||||
m = q1 * d0;
|
||||
r1 = (r1 << 32) | (n0 >> 32);
|
||||
if (r1 < m) {
|
||||
q1 -= 1;
|
||||
r1 += d;
|
||||
if (r1 >= d) {
|
||||
if (r1 < m) {
|
||||
q1 -= 1;
|
||||
r1 += d;
|
||||
}
|
||||
}
|
||||
}
|
||||
r1 -= m;
|
||||
|
||||
r0 = r1 % d1;
|
||||
q0 = r1 / d1;
|
||||
m = q0 * d0;
|
||||
r0 = (r0 << 32) | (uint32_t)n0;
|
||||
if (r0 < m) {
|
||||
q0 -= 1;
|
||||
r0 += d;
|
||||
if (r0 >= d) {
|
||||
if (r0 < m) {
|
||||
q0 -= 1;
|
||||
r0 += d;
|
||||
}
|
||||
}
|
||||
}
|
||||
r0 -= m;
|
||||
|
||||
*r = r0;
|
||||
return (q1 << 32) | q0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -58,6 +58,11 @@ static inline Int128 int128_exts64(int64_t a)
|
||||
return a;
|
||||
}
|
||||
|
||||
static inline Int128 int128_not(Int128 a)
|
||||
{
|
||||
return ~a;
|
||||
}
|
||||
|
||||
static inline Int128 int128_and(Int128 a, Int128 b)
|
||||
{
|
||||
return a & b;
|
||||
@ -68,6 +73,11 @@ static inline Int128 int128_or(Int128 a, Int128 b)
|
||||
return a | b;
|
||||
}
|
||||
|
||||
static inline Int128 int128_xor(Int128 a, Int128 b)
|
||||
{
|
||||
return a ^ b;
|
||||
}
|
||||
|
||||
static inline Int128 int128_rshift(Int128 a, int n)
|
||||
{
|
||||
return a >> n;
|
||||
@ -235,6 +245,11 @@ static inline Int128 int128_exts64(int64_t a)
|
||||
return int128_make128(a, (a < 0) ? -1 : 0);
|
||||
}
|
||||
|
||||
static inline Int128 int128_not(Int128 a)
|
||||
{
|
||||
return int128_make128(~a.lo, ~a.hi);
|
||||
}
|
||||
|
||||
static inline Int128 int128_and(Int128 a, Int128 b)
|
||||
{
|
||||
return int128_make128(a.lo & b.lo, a.hi & b.hi);
|
||||
@ -245,6 +260,11 @@ static inline Int128 int128_or(Int128 a, Int128 b)
|
||||
return int128_make128(a.lo | b.lo, a.hi | b.hi);
|
||||
}
|
||||
|
||||
static inline Int128 int128_xor(Int128 a, Int128 b)
|
||||
{
|
||||
return int128_make128(a.lo ^ b.lo, a.hi ^ b.hi);
|
||||
}
|
||||
|
||||
static inline Int128 int128_rshift(Int128 a, int n)
|
||||
{
|
||||
int64_t h;
|
||||
|
@ -929,29 +929,26 @@ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
|
||||
}
|
||||
wp->hitaddr = MAX(addr, wp->vaddr);
|
||||
wp->hitattrs = attrs;
|
||||
if (!cpu->watchpoint_hit) {
|
||||
if (wp->flags & BP_CPU && cc->tcg_ops->debug_check_watchpoint &&
|
||||
!cc->tcg_ops->debug_check_watchpoint(cpu, wp)) {
|
||||
wp->flags &= ~BP_WATCHPOINT_HIT;
|
||||
continue;
|
||||
}
|
||||
cpu->watchpoint_hit = wp;
|
||||
|
||||
mmap_lock();
|
||||
tb_check_watchpoint(cpu, ra);
|
||||
if (wp->flags & BP_STOP_BEFORE_ACCESS) {
|
||||
cpu->exception_index = EXCP_DEBUG;
|
||||
mmap_unlock();
|
||||
cpu_loop_exit_restore(cpu, ra);
|
||||
} else {
|
||||
/* Force execution of one insn next time. */
|
||||
cpu->cflags_next_tb = 1 | curr_cflags(cpu);
|
||||
mmap_unlock();
|
||||
if (ra) {
|
||||
cpu_restore_state(cpu, ra, true);
|
||||
}
|
||||
cpu_loop_exit_noexc(cpu);
|
||||
}
|
||||
if (wp->flags & BP_CPU && cc->tcg_ops->debug_check_watchpoint &&
|
||||
!cc->tcg_ops->debug_check_watchpoint(cpu, wp)) {
|
||||
wp->flags &= ~BP_WATCHPOINT_HIT;
|
||||
continue;
|
||||
}
|
||||
cpu->watchpoint_hit = wp;
|
||||
|
||||
mmap_lock();
|
||||
/* This call also restores vCPU state */
|
||||
tb_check_watchpoint(cpu, ra);
|
||||
if (wp->flags & BP_STOP_BEFORE_ACCESS) {
|
||||
cpu->exception_index = EXCP_DEBUG;
|
||||
mmap_unlock();
|
||||
cpu_loop_exit(cpu);
|
||||
} else {
|
||||
/* Force execution of one insn next time. */
|
||||
cpu->cflags_next_tb = 1 | CF_LAST_IO | curr_cflags(cpu);
|
||||
mmap_unlock();
|
||||
cpu_loop_exit_noexc(cpu);
|
||||
}
|
||||
} else {
|
||||
wp->flags &= ~BP_WATCHPOINT_HIT;
|
||||
|
@ -104,10 +104,11 @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
|
||||
uint64_t rt = 0;
|
||||
int overflow = 0;
|
||||
|
||||
overflow = divu128(&rt, &ra, rb);
|
||||
|
||||
if (unlikely(overflow)) {
|
||||
if (unlikely(rb == 0 || ra >= rb)) {
|
||||
overflow = 1;
|
||||
rt = 0; /* Undefined */
|
||||
} else {
|
||||
divu128(&rt, &ra, rb);
|
||||
}
|
||||
|
||||
if (oe) {
|
||||
@ -119,13 +120,16 @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
|
||||
|
||||
uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
|
||||
{
|
||||
int64_t rt = 0;
|
||||
uint64_t rt = 0;
|
||||
int64_t ra = (int64_t)rau;
|
||||
int64_t rb = (int64_t)rbu;
|
||||
int overflow = divs128(&rt, &ra, rb);
|
||||
int overflow = 0;
|
||||
|
||||
if (unlikely(overflow)) {
|
||||
if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) {
|
||||
overflow = 1;
|
||||
rt = 0; /* Undefined */
|
||||
} else {
|
||||
divs128(&rt, &ra, rb);
|
||||
}
|
||||
|
||||
if (oe) {
|
||||
@ -2502,6 +2506,7 @@ uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
|
||||
int cr;
|
||||
uint64_t lo_value;
|
||||
uint64_t hi_value;
|
||||
uint64_t rem;
|
||||
ppc_avr_t ret = { .u64 = { 0, 0 } };
|
||||
|
||||
if (b->VsrSD(0) < 0) {
|
||||
@ -2537,10 +2542,10 @@ uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
|
||||
* In that case, we leave r unchanged.
|
||||
*/
|
||||
} else {
|
||||
divu128(&lo_value, &hi_value, 1000000000000000ULL);
|
||||
rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
|
||||
|
||||
for (i = 1; i < 16; hi_value /= 10, i++) {
|
||||
bcd_put_digit(&ret, hi_value % 10, i);
|
||||
for (i = 1; i < 16; rem /= 10, i++) {
|
||||
bcd_put_digit(&ret, rem % 10, i);
|
||||
}
|
||||
|
||||
for (; i < 32; lo_value /= 10, i++) {
|
||||
|
2644
tcg/optimize.c
2644
tcg/optimize.c
File diff suppressed because it is too large
Load Diff
@ -1508,11 +1508,11 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
|
||||
|
||||
if (is_32bit) {
|
||||
TCGv_i64 temp = tcg_temp_new_i64();
|
||||
TCGv_i64 orig = temp_tcgv_i64(args[i]);
|
||||
TCGv_i32 orig = temp_tcgv_i32(args[i]);
|
||||
if (is_signed) {
|
||||
tcg_gen_ext32s_i64(temp, orig);
|
||||
tcg_gen_ext_i32_i64(temp, orig);
|
||||
} else {
|
||||
tcg_gen_ext32u_i64(temp, orig);
|
||||
tcg_gen_extu_i32_i64(temp, orig);
|
||||
}
|
||||
args[i] = tcgv_i64_temp(temp);
|
||||
}
|
||||
|
@ -23,6 +23,7 @@ tests = {
|
||||
# all code tested by test-x86-cpuid is inside topology.h
|
||||
'test-x86-cpuid': [],
|
||||
'test-cutils': [],
|
||||
'test-div128': [],
|
||||
'test-shift128': [],
|
||||
'test-mul64': [],
|
||||
# all code tested by test-int128 is inside int128.h
|
||||
|
197
tests/unit/test-div128.c
Normal file
197
tests/unit/test-div128.c
Normal file
@ -0,0 +1,197 @@
|
||||
/*
|
||||
* Test 128-bit division functions
|
||||
*
|
||||
* Copyright (c) 2021 Instituto de Pesquisas Eldorado (eldorado.org.br)
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/host-utils.h"
|
||||
|
||||
typedef struct {
|
||||
uint64_t high;
|
||||
uint64_t low;
|
||||
uint64_t rhigh;
|
||||
uint64_t rlow;
|
||||
uint64_t divisor;
|
||||
uint64_t remainder;
|
||||
} test_data_unsigned;
|
||||
|
||||
typedef struct {
|
||||
int64_t high;
|
||||
uint64_t low;
|
||||
int64_t rhigh;
|
||||
uint64_t rlow;
|
||||
int64_t divisor;
|
||||
int64_t remainder;
|
||||
} test_data_signed;
|
||||
|
||||
static const test_data_unsigned test_table_unsigned[] = {
|
||||
/* Dividend fits in 64 bits */
|
||||
{ 0x0000000000000000ULL, 0x0000000000000000ULL,
|
||||
0x0000000000000000ULL, 0x0000000000000000ULL,
|
||||
0x0000000000000001ULL, 0x0000000000000000ULL},
|
||||
{ 0x0000000000000000ULL, 0x0000000000000001ULL,
|
||||
0x0000000000000000ULL, 0x0000000000000001ULL,
|
||||
0x0000000000000001ULL, 0x0000000000000000ULL},
|
||||
{ 0x0000000000000000ULL, 0x0000000000000003ULL,
|
||||
0x0000000000000000ULL, 0x0000000000000001ULL,
|
||||
0x0000000000000002ULL, 0x0000000000000001ULL},
|
||||
{ 0x0000000000000000ULL, 0x8000000000000000ULL,
|
||||
0x0000000000000000ULL, 0x8000000000000000ULL,
|
||||
0x0000000000000001ULL, 0x0000000000000000ULL},
|
||||
{ 0x0000000000000000ULL, 0xa000000000000000ULL,
|
||||
0x0000000000000000ULL, 0x0000000000000002ULL,
|
||||
0x4000000000000000ULL, 0x2000000000000000ULL},
|
||||
{ 0x0000000000000000ULL, 0x8000000000000000ULL,
|
||||
0x0000000000000000ULL, 0x0000000000000001ULL,
|
||||
0x8000000000000000ULL, 0x0000000000000000ULL},
|
||||
|
||||
/* Dividend > 64 bits, with MSB 0 */
|
||||
{ 0x123456789abcdefeULL, 0xefedcba987654321ULL,
|
||||
0x123456789abcdefeULL, 0xefedcba987654321ULL,
|
||||
0x0000000000000001ULL, 0x0000000000000000ULL},
|
||||
{ 0x123456789abcdefeULL, 0xefedcba987654321ULL,
|
||||
0x0000000000000001ULL, 0x000000000000000dULL,
|
||||
0x123456789abcdefeULL, 0x03456789abcdf03bULL},
|
||||
{ 0x123456789abcdefeULL, 0xefedcba987654321ULL,
|
||||
0x0123456789abcdefULL, 0xeefedcba98765432ULL,
|
||||
0x0000000000000010ULL, 0x0000000000000001ULL},
|
||||
|
||||
/* Dividend > 64 bits, with MSB 1 */
|
||||
{ 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
|
||||
0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
|
||||
0x0000000000000001ULL, 0x0000000000000000ULL},
|
||||
{ 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
|
||||
0x0000000000000001ULL, 0x0000000000000000ULL,
|
||||
0xfeeddccbbaa99887ULL, 0x766554433221100fULL},
|
||||
{ 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
|
||||
0x0feeddccbbaa9988ULL, 0x7766554433221100ULL,
|
||||
0x0000000000000010ULL, 0x000000000000000fULL},
|
||||
{ 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
|
||||
0x000000000000000eULL, 0x00f0f0f0f0f0f35aULL,
|
||||
0x123456789abcdefeULL, 0x0f8922bc55ef90c3ULL},
|
||||
|
||||
/**
|
||||
* Divisor == 64 bits, with MSB 1
|
||||
* and high 64 bits of dividend >= divisor
|
||||
* (for testing normalization)
|
||||
*/
|
||||
{ 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
|
||||
0x0000000000000001ULL, 0x0000000000000000ULL,
|
||||
0xfeeddccbbaa99887ULL, 0x766554433221100fULL},
|
||||
{ 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
|
||||
0x0000000000000001ULL, 0xfddbb9977553310aULL,
|
||||
0x8000000000000001ULL, 0x78899aabbccddf05ULL},
|
||||
|
||||
/* Dividend > 64 bits, divisor almost as big */
|
||||
{ 0x0000000000000001ULL, 0x23456789abcdef01ULL,
|
||||
0x0000000000000000ULL, 0x000000000000000fULL,
|
||||
0x123456789abcdefeULL, 0x123456789abcde1fULL},
|
||||
};
|
||||
|
||||
static const test_data_signed test_table_signed[] = {
|
||||
/* Positive dividend, positive/negative divisors */
|
||||
{ 0x0000000000000000LL, 0x0000000000bc614eULL,
|
||||
0x0000000000000000LL, 0x0000000000bc614eULL,
|
||||
0x0000000000000001LL, 0x0000000000000000LL},
|
||||
{ 0x0000000000000000LL, 0x0000000000bc614eULL,
|
||||
0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
|
||||
0xffffffffffffffffLL, 0x0000000000000000LL},
|
||||
{ 0x0000000000000000LL, 0x0000000000bc614eULL,
|
||||
0x0000000000000000LL, 0x00000000005e30a7ULL,
|
||||
0x0000000000000002LL, 0x0000000000000000LL},
|
||||
{ 0x0000000000000000LL, 0x0000000000bc614eULL,
|
||||
0xffffffffffffffffLL, 0xffffffffffa1cf59ULL,
|
||||
0xfffffffffffffffeLL, 0x0000000000000000LL},
|
||||
{ 0x0000000000000000LL, 0x0000000000bc614eULL,
|
||||
0x0000000000000000LL, 0x0000000000178c29ULL,
|
||||
0x0000000000000008LL, 0x0000000000000006LL},
|
||||
{ 0x0000000000000000LL, 0x0000000000bc614eULL,
|
||||
0xffffffffffffffffLL, 0xffffffffffe873d7ULL,
|
||||
0xfffffffffffffff8LL, 0x0000000000000006LL},
|
||||
{ 0x0000000000000000LL, 0x0000000000bc614eULL,
|
||||
0x0000000000000000LL, 0x000000000000550dULL,
|
||||
0x0000000000000237LL, 0x0000000000000183LL},
|
||||
{ 0x0000000000000000LL, 0x0000000000bc614eULL,
|
||||
0xffffffffffffffffLL, 0xffffffffffffaaf3ULL,
|
||||
0xfffffffffffffdc9LL, 0x0000000000000183LL},
|
||||
|
||||
/* Negative dividend, positive/negative divisors */
|
||||
{ 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
|
||||
0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
|
||||
0x0000000000000001LL, 0x0000000000000000LL},
|
||||
{ 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
|
||||
0x0000000000000000LL, 0x0000000000bc614eULL,
|
||||
0xffffffffffffffffLL, 0x0000000000000000LL},
|
||||
{ 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
|
||||
0xffffffffffffffffLL, 0xffffffffffa1cf59ULL,
|
||||
0x0000000000000002LL, 0x0000000000000000LL},
|
||||
{ 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
|
||||
0x0000000000000000LL, 0x00000000005e30a7ULL,
|
||||
0xfffffffffffffffeLL, 0x0000000000000000LL},
|
||||
{ 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
|
||||
0xffffffffffffffffLL, 0xffffffffffe873d7ULL,
|
||||
0x0000000000000008LL, 0xfffffffffffffffaLL},
|
||||
{ 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
|
||||
0x0000000000000000LL, 0x0000000000178c29ULL,
|
||||
0xfffffffffffffff8LL, 0xfffffffffffffffaLL},
|
||||
{ 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
|
||||
0xffffffffffffffffLL, 0xffffffffffffaaf3ULL,
|
||||
0x0000000000000237LL, 0xfffffffffffffe7dLL},
|
||||
{ 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
|
||||
0x0000000000000000LL, 0x000000000000550dULL,
|
||||
0xfffffffffffffdc9LL, 0xfffffffffffffe7dLL},
|
||||
};
|
||||
|
||||
static void test_divu128(void)
|
||||
{
|
||||
int i;
|
||||
uint64_t rem;
|
||||
test_data_unsigned tmp;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(test_table_unsigned); ++i) {
|
||||
tmp = test_table_unsigned[i];
|
||||
|
||||
rem = divu128(&tmp.low, &tmp.high, tmp.divisor);
|
||||
g_assert_cmpuint(tmp.low, ==, tmp.rlow);
|
||||
g_assert_cmpuint(tmp.high, ==, tmp.rhigh);
|
||||
g_assert_cmpuint(rem, ==, tmp.remainder);
|
||||
}
|
||||
}
|
||||
|
||||
static void test_divs128(void)
|
||||
{
|
||||
int i;
|
||||
int64_t rem;
|
||||
test_data_signed tmp;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(test_table_signed); ++i) {
|
||||
tmp = test_table_signed[i];
|
||||
|
||||
rem = divs128(&tmp.low, &tmp.high, tmp.divisor);
|
||||
g_assert_cmpuint(tmp.low, ==, tmp.rlow);
|
||||
g_assert_cmpuint(tmp.high, ==, tmp.rhigh);
|
||||
g_assert_cmpuint(rem, ==, tmp.remainder);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
g_test_init(&argc, &argv, NULL);
|
||||
g_test_add_func("/host-utils/test_divu128", test_divu128);
|
||||
g_test_add_func("/host-utils/test_divs128", test_divs128);
|
||||
return g_test_run();
|
||||
}
|
@ -86,78 +86,119 @@ void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
|
||||
*phigh = rh;
|
||||
}
|
||||
|
||||
/* Unsigned 128x64 division. Returns 1 if overflow (divide by zero or */
|
||||
/* quotient exceeds 64 bits). Otherwise returns quotient via plow and */
|
||||
/* remainder via phigh. */
|
||||
int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
|
||||
/*
|
||||
* Unsigned 128-by-64 division.
|
||||
* Returns the remainder.
|
||||
* Returns quotient via plow and phigh.
|
||||
* Also returns the remainder via the function return value.
|
||||
*/
|
||||
uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
|
||||
{
|
||||
uint64_t dhi = *phigh;
|
||||
uint64_t dlo = *plow;
|
||||
unsigned i;
|
||||
uint64_t carry = 0;
|
||||
uint64_t rem, dhighest;
|
||||
int sh;
|
||||
|
||||
if (divisor == 0) {
|
||||
return 1;
|
||||
} else if (dhi == 0) {
|
||||
if (divisor == 0 || dhi == 0) {
|
||||
*plow = dlo / divisor;
|
||||
*phigh = dlo % divisor;
|
||||
return 0;
|
||||
} else if (dhi >= divisor) {
|
||||
return 1;
|
||||
*phigh = 0;
|
||||
return dlo % divisor;
|
||||
} else {
|
||||
sh = clz64(divisor);
|
||||
|
||||
for (i = 0; i < 64; i++) {
|
||||
carry = dhi >> 63;
|
||||
dhi = (dhi << 1) | (dlo >> 63);
|
||||
if (carry || (dhi >= divisor)) {
|
||||
dhi -= divisor;
|
||||
carry = 1;
|
||||
} else {
|
||||
carry = 0;
|
||||
if (dhi < divisor) {
|
||||
if (sh != 0) {
|
||||
/* normalize the divisor, shifting the dividend accordingly */
|
||||
divisor <<= sh;
|
||||
dhi = (dhi << sh) | (dlo >> (64 - sh));
|
||||
dlo <<= sh;
|
||||
}
|
||||
dlo = (dlo << 1) | carry;
|
||||
|
||||
*phigh = 0;
|
||||
*plow = udiv_qrnnd(&rem, dhi, dlo, divisor);
|
||||
} else {
|
||||
if (sh != 0) {
|
||||
/* normalize the divisor, shifting the dividend accordingly */
|
||||
divisor <<= sh;
|
||||
dhighest = dhi >> (64 - sh);
|
||||
dhi = (dhi << sh) | (dlo >> (64 - sh));
|
||||
dlo <<= sh;
|
||||
|
||||
*phigh = udiv_qrnnd(&dhi, dhighest, dhi, divisor);
|
||||
} else {
|
||||
/**
|
||||
* dhi >= divisor
|
||||
* Since the MSB of divisor is set (sh == 0),
|
||||
* (dhi - divisor) < divisor
|
||||
*
|
||||
* Thus, the high part of the quotient is 1, and we can
|
||||
* calculate the low part with a single call to udiv_qrnnd
|
||||
* after subtracting divisor from dhi
|
||||
*/
|
||||
dhi -= divisor;
|
||||
*phigh = 1;
|
||||
}
|
||||
|
||||
*plow = udiv_qrnnd(&rem, dhi, dlo, divisor);
|
||||
}
|
||||
|
||||
*plow = dlo;
|
||||
*phigh = dhi;
|
||||
return 0;
|
||||
/*
|
||||
* since the dividend/divisor might have been normalized,
|
||||
* the remainder might also have to be shifted back
|
||||
*/
|
||||
return rem >> sh;
|
||||
}
|
||||
}
|
||||
|
||||
int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
|
||||
/*
|
||||
* Signed 128-by-64 division.
|
||||
* Returns quotient via plow and phigh.
|
||||
* Also returns the remainder via the function return value.
|
||||
*/
|
||||
int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor)
|
||||
{
|
||||
int sgn_dvdnd = *phigh < 0;
|
||||
int sgn_divsr = divisor < 0;
|
||||
int overflow = 0;
|
||||
bool neg_quotient = false, neg_remainder = false;
|
||||
uint64_t unsig_hi = *phigh, unsig_lo = *plow;
|
||||
uint64_t rem;
|
||||
|
||||
if (sgn_dvdnd) {
|
||||
*plow = ~(*plow);
|
||||
*phigh = ~(*phigh);
|
||||
if (*plow == (int64_t)-1) {
|
||||
*plow = 0;
|
||||
(*phigh)++;
|
||||
} else {
|
||||
(*plow)++;
|
||||
}
|
||||
}
|
||||
if (*phigh < 0) {
|
||||
neg_quotient = !neg_quotient;
|
||||
neg_remainder = !neg_remainder;
|
||||
|
||||
if (sgn_divsr) {
|
||||
divisor = 0 - divisor;
|
||||
}
|
||||
|
||||
overflow = divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
|
||||
|
||||
if (sgn_dvdnd ^ sgn_divsr) {
|
||||
*plow = 0 - *plow;
|
||||
}
|
||||
|
||||
if (!overflow) {
|
||||
if ((*plow < 0) ^ (sgn_dvdnd ^ sgn_divsr)) {
|
||||
overflow = 1;
|
||||
if (unsig_lo == 0) {
|
||||
unsig_hi = -unsig_hi;
|
||||
} else {
|
||||
unsig_hi = ~unsig_hi;
|
||||
unsig_lo = -unsig_lo;
|
||||
}
|
||||
}
|
||||
|
||||
return overflow;
|
||||
if (divisor < 0) {
|
||||
neg_quotient = !neg_quotient;
|
||||
|
||||
divisor = -divisor;
|
||||
}
|
||||
|
||||
rem = divu128(&unsig_lo, &unsig_hi, (uint64_t)divisor);
|
||||
|
||||
if (neg_quotient) {
|
||||
if (unsig_lo == 0) {
|
||||
*phigh = -unsig_hi;
|
||||
*plow = 0;
|
||||
} else {
|
||||
*phigh = ~unsig_hi;
|
||||
*plow = -unsig_lo;
|
||||
}
|
||||
} else {
|
||||
*phigh = unsig_hi;
|
||||
*plow = unsig_lo;
|
||||
}
|
||||
|
||||
if (neg_remainder) {
|
||||
return -rem;
|
||||
} else {
|
||||
return rem;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user