host-utils: Implemented unsigned 256-by-128 division
Based on already existing QEMU implementation, created an unsigned 256 bit by 128 bit division needed to implement the vector divide extended unsigned instruction from PowerISA3.1 Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-Id: <20220525134954.85056-5-lucas.araujo@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
This commit is contained in:
parent
9a1f0866a3
commit
4724bbd284
|
@ -32,6 +32,7 @@
|
||||||
|
|
||||||
#include "qemu/compiler.h"
|
#include "qemu/compiler.h"
|
||||||
#include "qemu/bswap.h"
|
#include "qemu/bswap.h"
|
||||||
|
#include "qemu/int128.h"
|
||||||
|
|
||||||
#ifdef CONFIG_INT128
|
#ifdef CONFIG_INT128
|
||||||
static inline void mulu64(uint64_t *plow, uint64_t *phigh,
|
static inline void mulu64(uint64_t *plow, uint64_t *phigh,
|
||||||
|
@ -849,4 +850,5 @@ static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Int128 divu256(Int128 *plow, Int128 *phigh, Int128 divisor);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -128,11 +128,21 @@ static inline bool int128_ge(Int128 a, Int128 b)
|
||||||
return a >= b;
|
return a >= b;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool int128_uge(Int128 a, Int128 b)
|
||||||
|
{
|
||||||
|
return ((__uint128_t)a) >= ((__uint128_t)b);
|
||||||
|
}
|
||||||
|
|
||||||
static inline bool int128_lt(Int128 a, Int128 b)
|
static inline bool int128_lt(Int128 a, Int128 b)
|
||||||
{
|
{
|
||||||
return a < b;
|
return a < b;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool int128_ult(Int128 a, Int128 b)
|
||||||
|
{
|
||||||
|
return (__uint128_t)a < (__uint128_t)b;
|
||||||
|
}
|
||||||
|
|
||||||
static inline bool int128_le(Int128 a, Int128 b)
|
static inline bool int128_le(Int128 a, Int128 b)
|
||||||
{
|
{
|
||||||
return a <= b;
|
return a <= b;
|
||||||
|
@ -177,6 +187,15 @@ static inline Int128 bswap128(Int128 a)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int clz128(Int128 a)
|
||||||
|
{
|
||||||
|
if (a >> 64) {
|
||||||
|
return __builtin_clzll(a >> 64);
|
||||||
|
} else {
|
||||||
|
return (a) ? __builtin_clzll((uint64_t)a) + 64 : 128;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static inline Int128 int128_divu(Int128 a, Int128 b)
|
static inline Int128 int128_divu(Int128 a, Int128 b)
|
||||||
{
|
{
|
||||||
return (__uint128_t)a / (__uint128_t)b;
|
return (__uint128_t)a / (__uint128_t)b;
|
||||||
|
@ -373,11 +392,21 @@ static inline bool int128_ge(Int128 a, Int128 b)
|
||||||
return a.hi > b.hi || (a.hi == b.hi && a.lo >= b.lo);
|
return a.hi > b.hi || (a.hi == b.hi && a.lo >= b.lo);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool int128_uge(Int128 a, Int128 b)
|
||||||
|
{
|
||||||
|
return (uint64_t)a.hi > (uint64_t)b.hi || (a.hi == b.hi && a.lo >= b.lo);
|
||||||
|
}
|
||||||
|
|
||||||
static inline bool int128_lt(Int128 a, Int128 b)
|
static inline bool int128_lt(Int128 a, Int128 b)
|
||||||
{
|
{
|
||||||
return !int128_ge(a, b);
|
return !int128_ge(a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool int128_ult(Int128 a, Int128 b)
|
||||||
|
{
|
||||||
|
return !int128_uge(a, b);
|
||||||
|
}
|
||||||
|
|
||||||
static inline bool int128_le(Int128 a, Int128 b)
|
static inline bool int128_le(Int128 a, Int128 b)
|
||||||
{
|
{
|
||||||
return int128_ge(b, a);
|
return int128_ge(b, a);
|
||||||
|
@ -418,6 +447,15 @@ static inline Int128 bswap128(Int128 a)
|
||||||
return int128_make128(bswap64(a.hi), bswap64(a.lo));
|
return int128_make128(bswap64(a.hi), bswap64(a.lo));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int clz128(Int128 a)
|
||||||
|
{
|
||||||
|
if (a.hi) {
|
||||||
|
return __builtin_clzll(a.hi);
|
||||||
|
} else {
|
||||||
|
return (a.lo) ? __builtin_clzll(a.lo) + 64 : 128;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Int128 int128_divu(Int128, Int128);
|
Int128 int128_divu(Int128, Int128);
|
||||||
Int128 int128_remu(Int128, Int128);
|
Int128 int128_remu(Int128, Int128);
|
||||||
Int128 int128_divs(Int128, Int128);
|
Int128 int128_divs(Int128, Int128);
|
||||||
|
|
|
@ -266,3 +266,132 @@ void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow)
|
||||||
*plow = *plow << shift;
|
*plow = *plow << shift;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Unsigned 256-by-128 division.
|
||||||
|
* Returns the remainder via r.
|
||||||
|
* Returns lower 128 bit of quotient.
|
||||||
|
* Needs a normalized divisor (most significant bit set to 1).
|
||||||
|
*
|
||||||
|
* Adapted from include/qemu/host-utils.h udiv_qrnnd,
|
||||||
|
* from the GNU Multi Precision Library - longlong.h __udiv_qrnnd
|
||||||
|
* (https://gmplib.org/repo/gmp/file/tip/longlong.h)
|
||||||
|
*
|
||||||
|
* Licensed under the GPLv2/LGPLv3
|
||||||
|
*/
|
||||||
|
static Int128 udiv256_qrnnd(Int128 *r, Int128 n1, Int128 n0, Int128 d)
|
||||||
|
{
|
||||||
|
Int128 d0, d1, q0, q1, r1, r0, m;
|
||||||
|
uint64_t mp0, mp1;
|
||||||
|
|
||||||
|
d0 = int128_make64(int128_getlo(d));
|
||||||
|
d1 = int128_make64(int128_gethi(d));
|
||||||
|
|
||||||
|
r1 = int128_remu(n1, d1);
|
||||||
|
q1 = int128_divu(n1, d1);
|
||||||
|
mp0 = int128_getlo(q1);
|
||||||
|
mp1 = int128_gethi(q1);
|
||||||
|
mulu128(&mp0, &mp1, int128_getlo(d0));
|
||||||
|
m = int128_make128(mp0, mp1);
|
||||||
|
r1 = int128_make128(int128_gethi(n0), int128_getlo(r1));
|
||||||
|
if (int128_ult(r1, m)) {
|
||||||
|
q1 = int128_sub(q1, int128_one());
|
||||||
|
r1 = int128_add(r1, d);
|
||||||
|
if (int128_uge(r1, d)) {
|
||||||
|
if (int128_ult(r1, m)) {
|
||||||
|
q1 = int128_sub(q1, int128_one());
|
||||||
|
r1 = int128_add(r1, d);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
r1 = int128_sub(r1, m);
|
||||||
|
|
||||||
|
r0 = int128_remu(r1, d1);
|
||||||
|
q0 = int128_divu(r1, d1);
|
||||||
|
mp0 = int128_getlo(q0);
|
||||||
|
mp1 = int128_gethi(q0);
|
||||||
|
mulu128(&mp0, &mp1, int128_getlo(d0));
|
||||||
|
m = int128_make128(mp0, mp1);
|
||||||
|
r0 = int128_make128(int128_getlo(n0), int128_getlo(r0));
|
||||||
|
if (int128_ult(r0, m)) {
|
||||||
|
q0 = int128_sub(q0, int128_one());
|
||||||
|
r0 = int128_add(r0, d);
|
||||||
|
if (int128_uge(r0, d)) {
|
||||||
|
if (int128_ult(r0, m)) {
|
||||||
|
q0 = int128_sub(q0, int128_one());
|
||||||
|
r0 = int128_add(r0, d);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
r0 = int128_sub(r0, m);
|
||||||
|
|
||||||
|
*r = r0;
|
||||||
|
return int128_or(int128_lshift(q1, 64), q0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Unsigned 256-by-128 division.
|
||||||
|
* Returns the remainder.
|
||||||
|
* Returns quotient via plow and phigh.
|
||||||
|
* Also returns the remainder via the function return value.
|
||||||
|
*/
|
||||||
|
Int128 divu256(Int128 *plow, Int128 *phigh, Int128 divisor)
|
||||||
|
{
|
||||||
|
Int128 dhi = *phigh;
|
||||||
|
Int128 dlo = *plow;
|
||||||
|
Int128 rem, dhighest;
|
||||||
|
int sh;
|
||||||
|
|
||||||
|
if (!int128_nz(divisor) || !int128_nz(dhi)) {
|
||||||
|
*plow = int128_divu(dlo, divisor);
|
||||||
|
*phigh = int128_zero();
|
||||||
|
return int128_remu(dlo, divisor);
|
||||||
|
} else {
|
||||||
|
sh = clz128(divisor);
|
||||||
|
|
||||||
|
if (int128_ult(dhi, divisor)) {
|
||||||
|
if (sh != 0) {
|
||||||
|
/* normalize the divisor, shifting the dividend accordingly */
|
||||||
|
divisor = int128_lshift(divisor, sh);
|
||||||
|
dhi = int128_or(int128_lshift(dhi, sh),
|
||||||
|
int128_urshift(dlo, (128 - sh)));
|
||||||
|
dlo = int128_lshift(dlo, sh);
|
||||||
|
}
|
||||||
|
|
||||||
|
*phigh = int128_zero();
|
||||||
|
*plow = udiv256_qrnnd(&rem, dhi, dlo, divisor);
|
||||||
|
} else {
|
||||||
|
if (sh != 0) {
|
||||||
|
/* normalize the divisor, shifting the dividend accordingly */
|
||||||
|
divisor = int128_lshift(divisor, sh);
|
||||||
|
dhighest = int128_rshift(dhi, (128 - sh));
|
||||||
|
dhi = int128_or(int128_lshift(dhi, sh),
|
||||||
|
int128_urshift(dlo, (128 - sh)));
|
||||||
|
dlo = int128_lshift(dlo, sh);
|
||||||
|
|
||||||
|
*phigh = udiv256_qrnnd(&dhi, dhighest, dhi, divisor);
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* dhi >= divisor
|
||||||
|
* Since the MSB of divisor is set (sh == 0),
|
||||||
|
* (dhi - divisor) < divisor
|
||||||
|
*
|
||||||
|
* Thus, the high part of the quotient is 1, and we can
|
||||||
|
* calculate the low part with a single call to udiv_qrnnd
|
||||||
|
* after subtracting divisor from dhi
|
||||||
|
*/
|
||||||
|
dhi = int128_sub(dhi, divisor);
|
||||||
|
*phigh = int128_one();
|
||||||
|
}
|
||||||
|
|
||||||
|
*plow = udiv256_qrnnd(&rem, dhi, dlo, divisor);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* since the dividend/divisor might have been normalized,
|
||||||
|
* the remainder might also have to be shifted back
|
||||||
|
*/
|
||||||
|
rem = int128_urshift(rem, sh);
|
||||||
|
return rem;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue