2004-05-13 02:41:07 -04:00

263 lines
6.6 KiB
C

/*
* Copyright (c) 2000-2001
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
#include <string.h>
#include "termhash.h"
#include "hash.h"
#include "termhash.h"
#include "util.h"
#define UB(n) ((1<<n)-1)
#define CAP(n) (1<<n)
#define INITIAL_TABLE_SIZE 8 /* the initial table size is 2^8 */
/* An individual entry in the table consists of an array of stamps */
/* (same arity as the expr's constructor) in addition to the expr */
/* itself. */
typedef struct hash_entry *hash_entry;
/* A term_bucket is a list of entries (an list of exprs that have */
/* collided after hashing) */
typedef struct term_bucket *term_bucket;
struct hash_entry
{
int length;
stamp *stamps;
gen_e e;
};
struct term_bucket
{
hash_entry entry;
struct term_bucket *next;
};
#define scan_term_bucket(b,var) for(var = b; var; var = var->next)
/* size: initial_table_size + number of rehashes */
/* capacity: 2^size (for array size) */
/* ub: 2^size-1 (for array indexing) */
/* inserts: num of elements inserted into the array */
struct term_hash
{
term_bucket * term_buckets;
region rgn;
int ub;
int size;
int capacity;
int inserts;
};
static int hash(int ub, stamp stamps[], int len);
static void post_insert(term_hash tab) deletes;
static void rehash(term_hash tab) deletes;
static void reinsert(term_hash tab, term_bucket b);
static void insert(term_hash tab, gen_e e, stamp * stamps, int len);
static void insert_entry(term_hash tab, struct hash_entry *entry);
static gen_e walk(term_bucket b, stamp * stamps, int len);
static const int primes[] =
{ 83, 1789, 5189, 5449, 5659, 6703, 7517, 7699, 8287, 8807, 9067, 9587,
10627, 10939, 11239};
/*
static const int prime_1 = 83;
static const int prime_2 = 1789;
*/
static const int initial_table_size = INITIAL_TABLE_SIZE;
term_hash make_term_hash(region rgn)
{
int ub, n;
int i;
region r;
term_hash tab = ralloc(rgn, struct term_hash);
r = newregion();
ub = UB(initial_table_size);
n = CAP(initial_table_size);
tab->term_buckets = rarrayalloc(r, n, term_bucket);
for (i = 0; i < n; i++)
{
tab->term_buckets[i] = NULL;
}
tab->rgn = r;
tab->ub = ub;
tab->size = initial_table_size;
tab->capacity = n;
tab->inserts = 0;
return tab;
}
void term_hash_delete(term_hash tab) deletes
{
deleteregion(tab->rgn);
}
gen_e term_hash_find(term_hash tab, stamp stamps[], int len)
{
int hash_val;
term_bucket b;
hash_val = hash(tab->ub, stamps, len);
b = tab->term_buckets[hash_val];
return walk(b, stamps, len);
}
static gen_e walk(term_bucket b, stamp stamps[], int len)
{
term_bucket cur;
scan_term_bucket(b,cur)
{
if (len == cur->entry->length
&& (memcmp(stamps, cur->entry->stamps, sizeof(int)*len) == 0) )
return cur->entry->e;
}
return NULL;
}
/* Should call t_hash_find to see if a gen_e with the given stamp */
/* signature is already in the table. If so, insert should return */
/* true and do nothing. */
bool term_hash_insert(term_hash tab, gen_e e, stamp * stamps, int len) deletes
{
if (term_hash_find(tab, stamps, len) != NULL)
{
return TRUE;
}
insert(tab, e, stamps, len);
post_insert(tab);
return FALSE;
}
/* Insert an expression e represented by the given stamp array into */
/* the hash table. */
static void insert(term_hash tab, gen_e e, stamp stamps[], int len)
{
hash_entry entry;
stamp * stamp_cpy;
int i;
entry = ralloc(tab->rgn, struct hash_entry);
stamp_cpy = rarrayalloc(tab->rgn, len, stamp);
for (i = 0; i < len; i++)
{
stamp_cpy[i] = stamps[i];
}
entry->length = len;
entry->stamps = stamp_cpy;
entry->e = e;
insert_entry(tab, entry);
}
static void insert_entry(term_hash tab, hash_entry entry)
{
int hash_val;
term_bucket b, new_term_bucket;
hash_val = hash(tab->ub, entry->stamps, entry->length);
b = tab->term_buckets[hash_val];
new_term_bucket = ralloc(tab->rgn, struct term_bucket);
new_term_bucket->entry = entry;
new_term_bucket->next = b;
tab->term_buckets[hash_val] = new_term_bucket;
}
static void post_insert(term_hash tab) deletes
{
if (tab->capacity == ++tab->inserts)
{
rehash(tab);
}
}
/* Double the size of the hash table and reinsert all of the elements. */
static void rehash(term_hash tab) deletes
{
region old_rgn;
term_bucket * old_term_buckets;
int i;
int old_table_size = tab->capacity;
old_term_buckets = tab->term_buckets;
tab->capacity *= 2;
tab->ub = UB(++tab->size);
old_rgn = tab->rgn;
tab->rgn = newregion();
tab->term_buckets = rarrayalloc(tab->rgn, tab->capacity, term_bucket);
for (i = 0; i < old_table_size; i++)
{
if (old_term_buckets[i] != NULL && old_term_buckets[i]->entry != NULL)
reinsert(tab, old_term_buckets[i]);
}
deleteregion(old_rgn);
}
static void reinsert(term_hash tab, term_bucket b)
{
term_bucket cur;
scan_term_bucket(b,cur)
insert(tab, cur->entry->e, cur->entry->stamps, cur->entry->length);
}
static int hash(int ub, stamp stamps[], int len)
{
int i, n;
n = 0;
for (i = 0; i < len; i++)
{
n = (n + (primes[i % 15] * abs(stamps[i]))) & ub;
}
return n;
}