Iain Buclaw e613d99266 libphobos: Apply core.internal.hash updates from druntime
Backported from upstream druntime 2.084

Reviewed-on: https://github.com/dlang/druntime/pull/2469

libphobos/ChangeLog:

	* libdruntime/Makefile.am (DRUNTIME_DSOURCES): Remove rt/util/hash.d
	* libdruntime/Makefile.in: Rebuild.
	* testsuite/libphobos.aa/aa.exp: New file.
	* testsuite/libphobos.aa/test_aa.d: New test.
	* testsuite/libphobos.hash/hash.exp: New file.
	* testsuite/libphobos.hash/test_hash.d: New test.

From-SVN: r268754
2019-02-10 21:01:24 +00:00

810 lines
21 KiB
D

/**
* Implementation of associative arrays.
*
* Copyright: Copyright Digital Mars 2000 - 2015.
* License: $(WEB www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
* Authors: Martin Nowak
*/
module rt.aaA;
/// AA version for debuggers, bump whenever changing the layout
extern (C) immutable int _aaVersion = 1;
import core.memory : GC;
// grow threshold
private enum GROW_NUM = 4;
private enum GROW_DEN = 5;
// shrink threshold
private enum SHRINK_NUM = 1;
private enum SHRINK_DEN = 8;
// grow factor
private enum GROW_FAC = 4;
// growing the AA doubles it's size, so the shrink threshold must be
// smaller than half the grow threshold to have a hysteresis
static assert(GROW_FAC * SHRINK_NUM * GROW_DEN < GROW_NUM * SHRINK_DEN);
// initial load factor (for literals), mean of both thresholds
private enum INIT_NUM = (GROW_DEN * SHRINK_NUM + GROW_NUM * SHRINK_DEN) / 2;
private enum INIT_DEN = SHRINK_DEN * GROW_DEN;
private enum INIT_NUM_BUCKETS = 8;
// magic hash constants to distinguish empty, deleted, and filled buckets
private enum HASH_EMPTY = 0;
private enum HASH_DELETED = 0x1;
private enum HASH_FILLED_MARK = size_t(1) << 8 * size_t.sizeof - 1;
/// Opaque AA wrapper
struct AA
{
Impl* impl;
alias impl this;
private @property bool empty() const pure nothrow @nogc
{
return impl is null || !impl.length;
}
}
private struct Impl
{
private:
this(in TypeInfo_AssociativeArray ti, size_t sz = INIT_NUM_BUCKETS)
{
keysz = cast(uint) ti.key.tsize;
valsz = cast(uint) ti.value.tsize;
buckets = allocBuckets(sz);
firstUsed = cast(uint) buckets.length;
entryTI = fakeEntryTI(ti.key, ti.value);
valoff = cast(uint) talign(keysz, ti.value.talign);
import rt.lifetime : hasPostblit, unqualify;
if (hasPostblit(unqualify(ti.key)))
flags |= Flags.keyHasPostblit;
if ((ti.key.flags | ti.value.flags) & 1)
flags |= Flags.hasPointers;
}
Bucket[] buckets;
uint used;
uint deleted;
TypeInfo_Struct entryTI;
uint firstUsed;
immutable uint keysz;
immutable uint valsz;
immutable uint valoff;
Flags flags;
enum Flags : ubyte
{
none = 0x0,
keyHasPostblit = 0x1,
hasPointers = 0x2,
}
@property size_t length() const pure nothrow @nogc
{
assert(used >= deleted);
return used - deleted;
}
@property size_t dim() const pure nothrow @nogc @safe
{
return buckets.length;
}
@property size_t mask() const pure nothrow @nogc
{
return dim - 1;
}
// find the first slot to insert a value with hash
inout(Bucket)* findSlotInsert(size_t hash) inout pure nothrow @nogc
{
for (size_t i = hash & mask, j = 1;; ++j)
{
if (!buckets[i].filled)
return &buckets[i];
i = (i + j) & mask;
}
}
// lookup a key
inout(Bucket)* findSlotLookup(size_t hash, in void* pkey, in TypeInfo keyti) inout
{
for (size_t i = hash & mask, j = 1;; ++j)
{
if (buckets[i].hash == hash && keyti.equals(pkey, buckets[i].entry))
return &buckets[i];
else if (buckets[i].empty)
return null;
i = (i + j) & mask;
}
}
void grow(in TypeInfo keyti)
{
// If there are so many deleted entries, that growing would push us
// below the shrink threshold, we just purge deleted entries instead.
if (length * SHRINK_DEN < GROW_FAC * dim * SHRINK_NUM)
resize(dim);
else
resize(GROW_FAC * dim);
}
void shrink(in TypeInfo keyti)
{
if (dim > INIT_NUM_BUCKETS)
resize(dim / GROW_FAC);
}
void resize(size_t ndim) pure nothrow
{
auto obuckets = buckets;
buckets = allocBuckets(ndim);
foreach (ref b; obuckets[firstUsed .. $])
if (b.filled)
*findSlotInsert(b.hash) = b;
firstUsed = 0;
used -= deleted;
deleted = 0;
GC.free(obuckets.ptr); // safe to free b/c impossible to reference
}
void clear() pure nothrow
{
import core.stdc.string : memset;
// clear all data, but don't change bucket array length
memset(&buckets[firstUsed], 0, (buckets.length - firstUsed) * Bucket.sizeof);
deleted = used = 0;
firstUsed = cast(uint) dim;
}
}
//==============================================================================
// Bucket
//------------------------------------------------------------------------------
private struct Bucket
{
private pure nothrow @nogc:
size_t hash;
void* entry;
@property bool empty() const
{
return hash == HASH_EMPTY;
}
@property bool deleted() const
{
return hash == HASH_DELETED;
}
@property bool filled() const @safe
{
return cast(ptrdiff_t) hash < 0;
}
}
Bucket[] allocBuckets(size_t dim) @trusted pure nothrow
{
enum attr = GC.BlkAttr.NO_INTERIOR;
immutable sz = dim * Bucket.sizeof;
return (cast(Bucket*) GC.calloc(sz, attr))[0 .. dim];
}
//==============================================================================
// Entry
//------------------------------------------------------------------------------
private void* allocEntry(in Impl* aa, in void* pkey)
{
import rt.lifetime : _d_newitemU;
import core.stdc.string : memcpy, memset;
immutable akeysz = aa.valoff;
void* res = void;
if (aa.entryTI)
res = _d_newitemU(aa.entryTI);
else
{
auto flags = (aa.flags & Impl.Flags.hasPointers) ? 0 : GC.BlkAttr.NO_SCAN;
res = GC.malloc(akeysz + aa.valsz, flags);
}
memcpy(res, pkey, aa.keysz); // copy key
memset(res + akeysz, 0, aa.valsz); // zero value
return res;
}
package void entryDtor(void* p, const TypeInfo_Struct sti)
{
// key and value type info stored after the TypeInfo_Struct by tiEntry()
auto sizeti = __traits(classInstanceSize, TypeInfo_Struct);
auto extra = cast(const(TypeInfo)*)(cast(void*) sti + sizeti);
extra[0].destroy(p);
extra[1].destroy(p + talign(extra[0].tsize, extra[1].talign));
}
private bool hasDtor(const TypeInfo ti)
{
import rt.lifetime : unqualify;
if (typeid(ti) is typeid(TypeInfo_Struct))
if ((cast(TypeInfo_Struct) cast(void*) ti).xdtor)
return true;
if (typeid(ti) is typeid(TypeInfo_StaticArray))
return hasDtor(unqualify(ti.next));
return false;
}
// build type info for Entry with additional key and value fields
TypeInfo_Struct fakeEntryTI(const TypeInfo keyti, const TypeInfo valti)
{
import rt.lifetime : unqualify;
auto kti = unqualify(keyti);
auto vti = unqualify(valti);
if (!hasDtor(kti) && !hasDtor(vti))
return null;
// save kti and vti after type info for struct
enum sizeti = __traits(classInstanceSize, TypeInfo_Struct);
void* p = GC.malloc(sizeti + 2 * (void*).sizeof);
import core.stdc.string : memcpy;
memcpy(p, typeid(TypeInfo_Struct).initializer().ptr, sizeti);
auto ti = cast(TypeInfo_Struct) p;
auto extra = cast(TypeInfo*)(p + sizeti);
extra[0] = cast() kti;
extra[1] = cast() vti;
static immutable tiName = __MODULE__ ~ ".Entry!(...)";
ti.name = tiName;
// we don't expect the Entry objects to be used outside of this module, so we have control
// over the non-usage of the callback methods and other entries and can keep these null
// xtoHash, xopEquals, xopCmp, xtoString and xpostblit
ti.m_RTInfo = null;
immutable entrySize = talign(kti.tsize, vti.talign) + vti.tsize;
ti.m_init = (cast(ubyte*) null)[0 .. entrySize]; // init length, but not ptr
// xdtor needs to be built from the dtors of key and value for the GC
ti.xdtorti = &entryDtor;
ti.m_flags = TypeInfo_Struct.StructFlags.isDynamicType;
ti.m_flags |= (keyti.flags | valti.flags) & TypeInfo_Struct.StructFlags.hasPointers;
ti.m_align = cast(uint) max(kti.talign, vti.talign);
return ti;
}
//==============================================================================
// Helper functions
//------------------------------------------------------------------------------
private size_t talign(size_t tsize, size_t algn) @safe pure nothrow @nogc
{
immutable mask = algn - 1;
assert(!(mask & algn));
return (tsize + mask) & ~mask;
}
// mix hash to "fix" bad hash functions
private size_t mix(size_t h) @safe pure nothrow @nogc
{
// final mix function of MurmurHash2
enum m = 0x5bd1e995;
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
private size_t calcHash(in void* pkey, in TypeInfo keyti)
{
immutable hash = keyti.getHash(pkey);
// highest bit is set to distinguish empty/deleted from filled buckets
return mix(hash) | HASH_FILLED_MARK;
}
private size_t nextpow2(in size_t n) pure nothrow @nogc
{
import core.bitop : bsr;
if (!n)
return 1;
const isPowerOf2 = !((n - 1) & n);
return 1 << (bsr(n) + !isPowerOf2);
}
pure nothrow @nogc unittest
{
// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9
foreach (const n, const pow2; [1, 1, 2, 4, 4, 8, 8, 8, 8, 16])
assert(nextpow2(n) == pow2);
}
private T min(T)(T a, T b) pure nothrow @nogc
{
return a < b ? a : b;
}
private T max(T)(T a, T b) pure nothrow @nogc
{
return b < a ? a : b;
}
//==============================================================================
// API Implementation
//------------------------------------------------------------------------------
/// Determine number of entries in associative array.
extern (C) size_t _aaLen(in AA aa) pure nothrow @nogc
{
return aa ? aa.length : 0;
}
/******************************
* Lookup *pkey in aa.
* Called only from implementation of (aa[key]) expressions when value is mutable.
* Params:
* aa = associative array opaque pointer
* ti = TypeInfo for the associative array
* valsz = ignored
* pkey = pointer to the key value
* Returns:
* if key was in the aa, a mutable pointer to the existing value.
* If key was not in the aa, a mutable pointer to newly inserted value which
* is set to all zeros
*/
extern (C) void* _aaGetY(AA* aa, const TypeInfo_AssociativeArray ti,
in size_t valsz, in void* pkey)
{
bool found;
return _aaGetX(aa, ti, valsz, pkey, found);
}
/******************************
* Lookup *pkey in aa.
* Called only from implementation of require
* Params:
* aa = associative array opaque pointer
* ti = TypeInfo for the associative array
* valsz = ignored
* pkey = pointer to the key value
* found = true if the value was found
* Returns:
* if key was in the aa, a mutable pointer to the existing value.
* If key was not in the aa, a mutable pointer to newly inserted value which
* is set to all zeros
*/
extern (C) void* _aaGetX(AA* aa, const TypeInfo_AssociativeArray ti,
in size_t valsz, in void* pkey, out bool found)
{
// lazily alloc implementation
if (aa.impl is null)
aa.impl = new Impl(ti);
// get hash and bucket for key
immutable hash = calcHash(pkey, ti.key);
// found a value => return it
if (auto p = aa.findSlotLookup(hash, pkey, ti.key))
{
found = true;
return p.entry + aa.valoff;
}
auto p = aa.findSlotInsert(hash);
if (p.deleted)
--aa.deleted;
// check load factor and possibly grow
else if (++aa.used * GROW_DEN > aa.dim * GROW_NUM)
{
aa.grow(ti.key);
p = aa.findSlotInsert(hash);
assert(p.empty);
}
// update search cache and allocate entry
aa.firstUsed = min(aa.firstUsed, cast(uint)(p - aa.buckets.ptr));
p.hash = hash;
p.entry = allocEntry(aa.impl, pkey);
// postblit for key
if (aa.flags & Impl.Flags.keyHasPostblit)
{
import rt.lifetime : __doPostblit, unqualify;
__doPostblit(p.entry, aa.keysz, unqualify(ti.key));
}
// return pointer to value
return p.entry + aa.valoff;
}
/******************************
* Lookup *pkey in aa.
* Called only from implementation of (aa[key]) expressions when value is not mutable.
* Params:
* aa = associative array opaque pointer
* keyti = TypeInfo for the key
* valsz = ignored
* pkey = pointer to the key value
* Returns:
* pointer to value if present, null otherwise
*/
extern (C) inout(void)* _aaGetRvalueX(inout AA aa, in TypeInfo keyti, in size_t valsz,
in void* pkey)
{
return _aaInX(aa, keyti, pkey);
}
/******************************
* Lookup *pkey in aa.
* Called only from implementation of (key in aa) expressions.
* Params:
* aa = associative array opaque pointer
* keyti = TypeInfo for the key
* pkey = pointer to the key value
* Returns:
* pointer to value if present, null otherwise
*/
extern (C) inout(void)* _aaInX(inout AA aa, in TypeInfo keyti, in void* pkey)
{
if (aa.empty)
return null;
immutable hash = calcHash(pkey, keyti);
if (auto p = aa.findSlotLookup(hash, pkey, keyti))
return p.entry + aa.valoff;
return null;
}
/// Delete entry in AA, return true if it was present
extern (C) bool _aaDelX(AA aa, in TypeInfo keyti, in void* pkey)
{
if (aa.empty)
return false;
immutable hash = calcHash(pkey, keyti);
if (auto p = aa.findSlotLookup(hash, pkey, keyti))
{
// clear entry
p.hash = HASH_DELETED;
p.entry = null;
++aa.deleted;
if (aa.length * SHRINK_DEN < aa.dim * SHRINK_NUM)
aa.shrink(keyti);
return true;
}
return false;
}
/// Remove all elements from AA.
extern (C) void _aaClear(AA aa) pure nothrow
{
if (!aa.empty)
{
aa.impl.clear();
}
}
/// Rehash AA
extern (C) void* _aaRehash(AA* paa, in TypeInfo keyti) pure nothrow
{
if (!paa.empty)
paa.resize(nextpow2(INIT_DEN * paa.length / INIT_NUM));
return *paa;
}
/// Return a GC allocated array of all values
extern (C) inout(void[]) _aaValues(inout AA aa, in size_t keysz, in size_t valsz,
const TypeInfo tiValueArray) pure nothrow
{
if (aa.empty)
return null;
import rt.lifetime : _d_newarrayU;
auto res = _d_newarrayU(tiValueArray, aa.length).ptr;
auto pval = res;
immutable off = aa.valoff;
foreach (b; aa.buckets[aa.firstUsed .. $])
{
if (!b.filled)
continue;
pval[0 .. valsz] = b.entry[off .. valsz + off];
pval += valsz;
}
// postblit is done in object.values
return (cast(inout(void)*) res)[0 .. aa.length]; // fake length, return number of elements
}
/// Return a GC allocated array of all keys
extern (C) inout(void[]) _aaKeys(inout AA aa, in size_t keysz, const TypeInfo tiKeyArray) pure nothrow
{
if (aa.empty)
return null;
import rt.lifetime : _d_newarrayU;
auto res = _d_newarrayU(tiKeyArray, aa.length).ptr;
auto pkey = res;
foreach (b; aa.buckets[aa.firstUsed .. $])
{
if (!b.filled)
continue;
pkey[0 .. keysz] = b.entry[0 .. keysz];
pkey += keysz;
}
// postblit is done in object.keys
return (cast(inout(void)*) res)[0 .. aa.length]; // fake length, return number of elements
}
// opApply callbacks are extern(D)
extern (D) alias dg_t = int delegate(void*);
extern (D) alias dg2_t = int delegate(void*, void*);
/// foreach opApply over all values
extern (C) int _aaApply(AA aa, in size_t keysz, dg_t dg)
{
if (aa.empty)
return 0;
immutable off = aa.valoff;
foreach (b; aa.buckets)
{
if (!b.filled)
continue;
if (auto res = dg(b.entry + off))
return res;
}
return 0;
}
/// foreach opApply over all key/value pairs
extern (C) int _aaApply2(AA aa, in size_t keysz, dg2_t dg)
{
if (aa.empty)
return 0;
immutable off = aa.valoff;
foreach (b; aa.buckets)
{
if (!b.filled)
continue;
if (auto res = dg(b.entry, b.entry + off))
return res;
}
return 0;
}
/// Construct an associative array of type ti from keys and value
extern (C) Impl* _d_assocarrayliteralTX(const TypeInfo_AssociativeArray ti, void[] keys,
void[] vals)
{
assert(keys.length == vals.length);
immutable keysz = ti.key.tsize;
immutable valsz = ti.value.tsize;
immutable length = keys.length;
if (!length)
return null;
auto aa = new Impl(ti, nextpow2(INIT_DEN * length / INIT_NUM));
void* pkey = keys.ptr;
void* pval = vals.ptr;
immutable off = aa.valoff;
uint actualLength = 0;
foreach (_; 0 .. length)
{
immutable hash = calcHash(pkey, ti.key);
auto p = aa.findSlotLookup(hash, pkey, ti.key);
if (p is null)
{
p = aa.findSlotInsert(hash);
p.hash = hash;
p.entry = allocEntry(aa, pkey); // move key, no postblit
aa.firstUsed = min(aa.firstUsed, cast(uint)(p - aa.buckets.ptr));
actualLength++;
}
else if (aa.entryTI && hasDtor(ti.value))
{
// destroy existing value before overwriting it
ti.value.destroy(p.entry + off);
}
// set hash and blit value
auto pdst = p.entry + off;
pdst[0 .. valsz] = pval[0 .. valsz]; // move value, no postblit
pkey += keysz;
pval += valsz;
}
aa.used = actualLength;
return aa;
}
/// compares 2 AAs for equality
extern (C) int _aaEqual(in TypeInfo tiRaw, in AA aa1, in AA aa2)
{
if (aa1.impl is aa2.impl)
return true;
immutable len = _aaLen(aa1);
if (len != _aaLen(aa2))
return false;
if (!len) // both empty
return true;
import rt.lifetime : unqualify;
auto uti = unqualify(tiRaw);
auto ti = *cast(TypeInfo_AssociativeArray*)&uti;
// compare the entries
immutable off = aa1.valoff;
foreach (b1; aa1.buckets)
{
if (!b1.filled)
continue;
auto pb2 = aa2.findSlotLookup(b1.hash, b1.entry, ti.key);
if (pb2 is null || !ti.value.equals(b1.entry + off, pb2.entry + off))
return false;
}
return true;
}
/// compute a hash
extern (C) hash_t _aaGetHash(in AA* aa, in TypeInfo tiRaw) nothrow
{
if (aa.empty)
return 0;
import rt.lifetime : unqualify;
auto uti = unqualify(tiRaw);
auto ti = *cast(TypeInfo_AssociativeArray*)&uti;
immutable off = aa.valoff;
auto keyHash = &ti.key.getHash;
auto valHash = &ti.value.getHash;
size_t h;
foreach (b; aa.buckets)
{
if (!b.filled)
continue;
size_t[2] h2 = [keyHash(b.entry), valHash(b.entry + off)];
// use addition here, so that hash is independent of element order
h += hashOf(h2);
}
return h;
}
/**
* _aaRange implements a ForwardRange
*/
struct Range
{
Impl* impl;
size_t idx;
alias impl this;
}
extern (C) pure nothrow @nogc @safe
{
Range _aaRange(AA aa)
{
if (!aa)
return Range();
foreach (i; aa.firstUsed .. aa.dim)
{
if (aa.buckets[i].filled)
return Range(aa.impl, i);
}
return Range(aa, aa.dim);
}
bool _aaRangeEmpty(Range r)
{
return r.impl is null || r.idx >= r.dim;
}
void* _aaRangeFrontKey(Range r)
{
assert(!_aaRangeEmpty(r));
if (r.idx >= r.dim)
return null;
return r.buckets[r.idx].entry;
}
void* _aaRangeFrontValue(Range r)
{
assert(!_aaRangeEmpty(r));
if (r.idx >= r.dim)
return null;
auto entry = r.buckets[r.idx].entry;
return entry is null ?
null :
(() @trusted { return entry + r.valoff; } ());
}
void _aaRangePopFront(ref Range r)
{
if (r.idx >= r.dim) return;
for (++r.idx; r.idx < r.dim; ++r.idx)
{
if (r.buckets[r.idx].filled)
break;
}
}
}
// Most tests are now in in test_aa.d
// test postblit for AA literals
unittest
{
static struct T
{
ubyte field;
static size_t postblit, dtor;
this(this)
{
++postblit;
}
~this()
{
++dtor;
}
}
T t;
auto aa1 = [0 : t, 1 : t];
assert(T.dtor == 0 && T.postblit == 2);
aa1[0] = t;
assert(T.dtor == 1 && T.postblit == 3);
T.dtor = 0;
T.postblit = 0;
auto aa2 = [0 : t, 1 : t, 0 : t]; // literal with duplicate key => value overwritten
assert(T.dtor == 1 && T.postblit == 3);
T.dtor = 0;
T.postblit = 0;
auto aa3 = [t : 0];
assert(T.dtor == 0 && T.postblit == 1);
aa3[t] = 1;
assert(T.dtor == 0 && T.postblit == 1);
aa3.remove(t);
assert(T.dtor == 0 && T.postblit == 1);
aa3[t] = 2;
assert(T.dtor == 0 && T.postblit == 2);
// dtor will be called by GC finalizers
aa1 = null;
aa2 = null;
aa3 = null;
GC.runFinalizers((cast(char*)(&entryDtor))[0 .. 1]);
assert(T.dtor == 6 && T.postblit == 2);
}