vk: add unordered_roadmap simple hash map with tests

Adds unordered_roadmap simple hash map:
- open addressing with linear probing
- size is fixed at init/compile time
- operates on an pre-allocated array of items with hashmap headers

Also adds basic tests for it.
And properly enables tests for ref_vk (i.e. alolcator)
This commit is contained in:
Ivan Avdeev 2023-10-23 13:13:16 -04:00
parent ab6f18fc32
commit b016de0c83
4 changed files with 394 additions and 7 deletions

View File

@ -0,0 +1,196 @@
#include "../unordered_roadmap.h"
#define URMOM_TEST
#include "../unordered_roadmap.c"
#define LOG(msg, ...) \
fprintf(stderr, "%s:%d: " msg "\n", __FILE__, __LINE__, ##__VA_ARGS__)
#define CHECK_EQUAL_I(a, b) \
do { \
const int ar = (a), br = (b); \
if (ar != br) { \
LOG("CHECK_EQUAL_I("#a", "#b") failed: %d != %d", ar, br); \
return 0; \
} \
} while (0)
#define CHECK_EQUAL_S(a, b) \
do { \
const char *ar = (a), *br = (b); \
if (strcmp(ar, br) != 0) { \
LOG("CHECK_EQUAL_S("#a", "#b") failed: \"%s\" != \"%s\"", ar, br); \
return 0; \
} \
} while (0)
#define CHECK_NOT_EQUAL_I(a, b) \
do { \
const int ar = (a), br = (b); \
if (ar == br) { \
LOG("CHECK_NOT_EQUAL_I("#a", "#b") failed: %d == %d", ar, br); \
return 0; \
} \
} while (0)
typedef struct {
urmom_header_t hdr_;
int i;
float f;
} item_t;
#define PREAMBLE(N) \
item_t items[N]; \
const urmom_desc_t desc = { \
.array = items, \
.count = COUNTOF(items), \
.item_size = sizeof(item_t), \
}; \
urmomInit(&desc)
static int test_insert_find_remove( void ) {
PREAMBLE(4);
const int i = urmomInsert(&desc, "bidonchik");
CHECK_NOT_EQUAL_I(i, -1);
CHECK_EQUAL_S(items[i].hdr_.key, "bidonchik");
const int found = urmomFind(&desc, "bidonchik");
CHECK_EQUAL_I(found, i);
const int removed = urmomRemove(&desc, "bidonchik");
CHECK_EQUAL_I(removed, i);
CHECK_EQUAL_I(items[i].hdr_.key[0], '\0');
const int not_found = urmomFind(&desc, "bidonchik");
CHECK_EQUAL_I(not_found, -1);
return 1;
}
static int test_find_nonexistent( void ) {
PREAMBLE(4);
const int found = urmomFind(&desc, "kishochki");
CHECK_EQUAL_I(found, -1);
return 1;
}
static int test_insert_find_many( void ) {
PREAMBLE(4);
const int a = urmomInsert(&desc, "smetanka");
CHECK_NOT_EQUAL_I(a, -1);
CHECK_EQUAL_S(items[a].hdr_.key, "smetanka");
const int b = urmomInsert(&desc, "tworog");
CHECK_NOT_EQUAL_I(b, -1);
CHECK_NOT_EQUAL_I(a, b);
CHECK_EQUAL_S(items[b].hdr_.key, "tworog");
const int a_found = urmomFind(&desc, "smetanka");
const int b_found = urmomFind(&desc, "tworog");
CHECK_EQUAL_I(a_found, a);
CHECK_EQUAL_I(b_found, b);
return 1;
}
static int test_overflow( void ) {
PREAMBLE(4);
const int a = urmomInsert(&desc, "smetanka");
CHECK_NOT_EQUAL_I(a, -1);
CHECK_EQUAL_S(items[a].hdr_.key, "smetanka");
const int b = urmomInsert(&desc, "tworog");
CHECK_NOT_EQUAL_I(b, -1);
CHECK_NOT_EQUAL_I(a, b);
CHECK_EQUAL_S(items[b].hdr_.key, "tworog");
const int c = urmomInsert(&desc, "kefirushka");
CHECK_NOT_EQUAL_I(c, -1);
CHECK_NOT_EQUAL_I(a, c);
CHECK_NOT_EQUAL_I(b, c);
CHECK_EQUAL_S(items[c].hdr_.key, "kefirushka");
const int d = urmomInsert(&desc, "ryazhenka");
CHECK_NOT_EQUAL_I(d, -1);
CHECK_NOT_EQUAL_I(a, d);
CHECK_NOT_EQUAL_I(b, d);
CHECK_NOT_EQUAL_I(c, d);
CHECK_EQUAL_S(items[d].hdr_.key, "ryazhenka");
{
const int e = urmomInsert(&desc, "riajenka");
CHECK_EQUAL_I(e, -1);
}
const int d_remove = urmomRemove(&desc, "ryazhenka");
CHECK_EQUAL_I(d_remove, d);
CHECK_EQUAL_I(items[d_remove].hdr_.state, 0);
CHECK_NOT_EQUAL_I(items[d_remove].hdr_.hash, 0);
CHECK_EQUAL_I(items[d_remove].hdr_.key[0], '\0');
const int e = urmomInsert(&desc, "riajenka");
CHECK_NOT_EQUAL_I(e, -1);
CHECK_NOT_EQUAL_I(a, e);
CHECK_NOT_EQUAL_I(b, e);
CHECK_NOT_EQUAL_I(c, e);
CHECK_EQUAL_S(items[e].hdr_.key, "riajenka");
return 1;
}
// Assumes FNV-1a
static int test_hash_collision( void ) {
PREAMBLE(4);
const int a = urmomInsert(&desc, "costarring");
CHECK_NOT_EQUAL_I(a, -1);
const int b = urmomInsert(&desc, "liquid");
CHECK_NOT_EQUAL_I(b, -1);
CHECK_NOT_EQUAL_I(b, a);
CHECK_EQUAL_I(items[a].hdr_.hash, items[b].hdr_.hash);
const int a_found = urmomFind(&desc, "costarring");
CHECK_EQUAL_I(a_found, a);
const int b_found = urmomFind(&desc, "liquid");
CHECK_EQUAL_I(b_found, b);
return 1;
}
static int test_fail( void ) {
//CHECK_EQUAL_S("sapogi", "tapki");
return 1;
}
#define LIST_TESTS(X) \
X(test_insert_find_remove) \
X(test_find_nonexistent) \
X(test_insert_find_many) \
X(test_hash_collision) \
X(test_fail) \
int main( void ) {
int retval = 0;
#define X(f) \
do { \
fprintf(stderr, "Running " #f "...\n"); \
const int result = f(); \
fprintf(stderr, #f " => %s\n", result == 0 ? "FAIL" : "OK" ); \
if (!result) \
++retval;\
} while (0);
LIST_TESTS(X)
#undef X
return retval;
}

127
ref/vk/unordered_roadmap.c Normal file
View File

@ -0,0 +1,127 @@
#include "unordered_roadmap.h"
#ifndef URMOM_TEST
#include "vk_common.h"
#include "vk_logs.h"
#else
#include <string.h>
#include <stdio.h>
#include <assert.h>
#define ERR(msg, ...) fprintf(stderr, msg, ##__VA_ARGS__)
#define ASSERT(...) assert(__VA_ARGS__)
#define COUNTOF(a) (sizeof(a)/sizeof(a[0]))
#endif
static uint32_t hash32FNV1aStr(const char *str) {
static const uint32_t fnv_offset_basis = 0x811c9dc5u;
static const uint32_t fnv_prime = 0x01000193u;
uint32_t hash = fnv_offset_basis;
while (*str) {
hash ^= *str;
hash *= fnv_prime;
++str;
}
return hash;
}
// Sets all items to empty
void urmomInit(const urmom_desc_t* desc) {
char *ptr = desc->array;
// Make sure that count is 2^N
ASSERT((desc->count & (desc->count - 1)) == 0);
for (int i = 0; i < desc->count; ++i) {
urmom_header_t *hdr = (urmom_header_t*)(ptr + desc->item_size * i);
hdr->state = 0;
hdr->hash = 0;
}
}
// Returns index of the element with the key if found, -1 otherwise
int urmomFind(const urmom_desc_t* desc, const char* key) {
const char *ptr = desc->array;
const uint32_t hash = hash32FNV1aStr(key) & 0x7fffffffu;
const uint32_t mask = (desc->count - 1);
const int start_index = hash & mask;
for (int index = start_index;;) {
const urmom_header_t *hdr = (urmom_header_t*)(ptr + desc->item_size * index);
if (URMOM_IS_OCCUPIED(*hdr)) {
if (hdr->hash == hash && strcmp(key, hdr->key) == 0)
return index;
} else if (URMOM_IS_EMPTY(*hdr))
// Reached the end of non-empty chain, not found
break;
// No match ;_;, check the next one
index = (index + 1) & mask;
// Searched through the entire thing
if (index == start_index)
break;
}
return -1;
}
// Returns index of the element either found or empty slot where this could be inserted. If full, -1.
int urmomInsert(const urmom_desc_t* desc, const char *key) {
char *ptr = desc->array;
const uint32_t hash = hash32FNV1aStr(key) & 0x7fffffffu;
const uint32_t mask = (desc->count - 1);
const int start_index = hash & mask;
int index = start_index;
for (;;) {
const urmom_header_t *hdr = (urmom_header_t*)(ptr + desc->item_size * index);
if (URMOM_IS_OCCUPIED(*hdr)) {
if (hdr->hash == hash && strcmp(key, hdr->key) == 0)
return index;
} else
// Reached the end of occupied chain, return the available slot
break;
index = (index + 1) & mask;
// Searched through the entire thing
if (index == start_index)
return -1;
}
urmom_header_t *hdr = (urmom_header_t*)(ptr + desc->item_size * index);
hdr->hash = hash;
hdr->state = 1;
// TODO check for key length
strncpy(hdr->key, key, sizeof(hdr->key));
return index;
}
// Return the index of item deleted (if found), -1 otherwise
int urmomRemove(const urmom_desc_t* desc, const char *key) {
const int index = urmomFind(desc, key);
if (index >= 0)
urmomDeleteByIndex(desc, index);
return index;
}
void urmomDeleteByIndex(const urmom_desc_t* desc, int index) {
char *ptr = desc->array;
urmom_header_t *hdr = (urmom_header_t*)(ptr + desc->item_size * index);
if (!URMOM_IS_OCCUPIED(*hdr)) {
ERR("Hashmap=%p(is=%d, n=%d): lot %d is not occupied", desc->array, desc->item_size, desc->count, index);
return;
}
// Mark it as deleted
// TODO when can we mark it as empty? For linear search we can do so if the next one is empty
hdr->state = 0; // not occupied
hdr->hash = 1; // deleted, not empty
hdr->key[0] = '\0';
}

View File

@ -0,0 +1,48 @@
#pragma once
#include <stdint.h>
#define MAX_KEY_STRING_LENGTH 256
// URMOM = Unordered RoadMap Open addressiMg
// Open-addressed hash table item header
typedef struct urmom_header_s {
// state == 1, hash == 0 -- item with hash==0
// state == 0, hash != 0 -- deleted
// state == 0, hash == 0 -- empty
uint32_t state:1;
uint32_t hash:31;
char key[MAX_KEY_STRING_LENGTH];
} urmom_header_t;
#define URMOM_IS_OCCUPIED(hdr) ((hdr).state != 0)
#define URMOM_IS_EMPTY(hdr) ((hdr).state == 0 && (hdr).hash == 0)
#define URMOM_IS_DELETED(hdr) ((hdr).state == 0 && (hdr).hash != 0)
typedef struct urmom_desc_s {
// Pointer to the beginning of the array of items.
// Each item is a struct that has urmom_header_t as its first field
void *array;
// Array item size, including the urmom_header_t
uint32_t item_size;
// Maximum number of items in the array
uint32_t count;
} urmom_desc_t;
// Sets all items to empty
void urmomInit(const urmom_desc_t* desc);
// Returns index of the element with the key if found, -1 otherwise
int urmomFind(const urmom_desc_t* desc, const char* key);
// Returns index of the element either found or empty slot where this could be inserted. If full, -1.
int urmomInsert(const urmom_desc_t* desc, const char *key);
void urmomDeleteByIndex(const urmom_desc_t* desc, int index);
// Return the index of item deleted (if found), -1 otherwise
int urmomRemove(const urmom_desc_t* desc, const char *key);

View File

@ -153,12 +153,28 @@ def build(bld):
#bld.install_files(bld.env.LIBDIR + '/valve', things)
bld.install_files(bld.env.LIBDIR,
bld.path.ant_glob('data/**'),
cwd=bld.path.find_dir('data/'),
relative_trick=True)
bld.path.ant_glob('data/**'),
cwd=bld.path.find_dir('data/'),
relative_trick=True)
#bld.program(features='test', defines=['ALOLCATOR_TEST'],source='alolcator.c', target='alolcator')
bld.add_post_fun(printTestSummary)
if bld.env.TESTS:
bld.program(
features='test',
defines=['ALOLCATOR_TEST'],
source='alolcator.c',
target='test_alolcator',
subsystem = bld.env.CONSOLE_SUBSYSTEM,
install_path = None)
#from waflib.Tools import waf_unit_test
#bld.add_post_fun(waf_unit_test.summary)
tests = {
'unordered_roadmap': 'tests/unordered_roadmap.c',
}
for i in tests:
bld.program(features = 'test',
source = tests[i],
target = 'test_%s' % i,
subsystem = bld.env.CONSOLE_SUBSYSTEM,
install_path = None)
#bld.add_post_fun(printTestSummary)