Clean up hash code, parameterize some actions, tweak some parameters. Hash

table entries, table allocation and control structure are larger now, but
collisions are reduced and string compares even further reduced.

Dump lots more statistics, especially hash code data, for --statistics.  Dump
statistics even in error cases.

Details in ChangeLog.
This commit is contained in:
Ken Raeburn 1995-11-28 19:21:09 +00:00
parent da954cd7b6
commit 6594d6b9c2
3 changed files with 221 additions and 173 deletions

View File

@ -15,7 +15,7 @@
You should have received a copy of the GNU General Public License
along with GAS; see the file COPYING. If not, write to
the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
/*
* BUGS, GRIPES, APOLOGIA etc.
@ -136,26 +136,39 @@
#define error as_fatal
#define DELETED ((PTR)1) /* guarenteed invalid address */
#define START_POWER (11) /* power of two: size of new hash table */
static char _deleted_[1];
#define DELETED ((PTR)_deleted_) /* guarenteed unique address */
#define START_POWER (10) /* power of two: size of new hash table */
/* TRUE if a symbol is in entry @ ptr. */
#define islive(ptr) (ptr->hash_string && ptr->hash_string!=DELETED)
/* Number of slots in hash table. The wall does not count here.
We expect this is always a power of 2. */
#define STAT_SIZE (0)
#define STAT_ACCESS (1) /* number of hash_ask()s */
enum stat_enum {
/* Number of slots in hash table. The wall does not count here.
We expect this is always a power of 2. */
STAT_SIZE = 0,
/* Number of hash_ask calls. */
STAT_ACCESS,
STAT_ACCESS_w,
/* Number of collisions (total). This may exceed STAT_ACCESS if we
have lots of collisions/access. */
STAT_COLLIDE,
STAT_COLLIDE_w,
/* Slots used right now. */
STAT_USED,
/* How many string compares? */
STAT_STRCMP,
STAT_STRCMP_w,
/* Size of statistics block... this must be last. */
STATLENGTH
};
#define STAT__READ (0) /* reading */
#define STAT__WRITE (1) /* writing */
/* Number of collisions (total). This may exceed STAT_ACCESS if we
have lots of collisions/access. */
#define STAT_COLLIDE (3)
#define STAT_USED (5) /* slots used right now */
#define STATLENGTH (6) /* size of statistics block */
#if STATLENGTH != HASH_STATLENGTH
Panic! Please make #include "stat.h" agree with previous definitions!
#endif
/* When we grow a hash table, by what power of two do we increase it? */
#define GROW_FACTOR 1
/* When should we grow it? */
#define FULL_VALUE(N) ((N) / 4)
/* #define SUSPECT to do runtime checks */
/* #define TEST to be a test jig for hash...() */
@ -170,6 +183,17 @@ Panic! Please make #include "stat.h" agree with previous definitions!
#define START_FULL (4)
#endif
struct hash_control {
struct hash_entry *hash_where;/* address of hash table */
int hash_sizelog; /* Log of ( hash_mask + 1 ) */
int hash_mask; /* masks a hash into index into table */
int hash_full; /* when hash_stat[STAT_USED] exceeds this, */
/* grow table */
struct hash_entry *hash_wall; /* point just after last (usable) entry */
/* here we have some statistics */
int hash_stat[STATLENGTH]; /* lies & statistics */
};
/*------------------ plan ---------------------------------- i = internal
struct hash_control * c;
@ -267,7 +291,7 @@ hash_new ()
retval->hash_where = room;
retval->hash_wall =
wall = room + (1 << START_POWER);
retval->hash_full = (1 << START_POWER) / 2;
retval->hash_full = FULL_VALUE (1 << START_POWER);
for (entry = room; entry <= wall; entry++)
entry->hash_string = NULL;
return retval;
@ -291,6 +315,7 @@ hash_die (handle)
free ((char *) handle);
}
#ifdef TEST
/*
* h a s h _ s a y ( )
*
@ -304,7 +329,7 @@ hash_die (handle)
* Then contents of hash_stat[] are read out (in ascending order)
* until your buffer or hash_stat[] is exausted.
*/
void
static void
hash_say (handle, buffer, bufsiz)
struct hash_control *handle;
int buffer[ /*bufsiz*/ ];
@ -324,6 +349,7 @@ hash_say (handle, buffer, bufsiz)
}
}
}
#endif
/*
* h a s h _ d e l e t e ( )
@ -438,7 +464,7 @@ hash_insert (handle, string, value)
*
* Regardless of what was in the symbol table before, after hash_jam()
* the named symbol has the given value. The symbol is either inserted or
* (its value is) relpaced.
* (its value is) replaced.
* An error message string is returned, 0 means OK.
*
* WARNING: this may decide to grow the hashed symbol table.
@ -515,69 +541,48 @@ hash_grow (handle) /* make a hash table grow */
* attempt to get enough room for a hash table twice as big
*/
temp = handle->hash_stat[STAT_SIZE];
if ((newwhere = ((struct hash_entry *)
xmalloc ((unsigned long) ((temp + temp + 1)
* sizeof (struct hash_entry)))))
!= NULL)
/* +1 for wall slot */
{
retval = 0; /* assume success until proven otherwise */
/*
* have enough room: now we do all the work.
* double the size of everything in handle,
* note: hash_mask frob works for 1's & for 2's complement machines
*/
handle->hash_mask = handle->hash_mask + handle->hash_mask + 1;
handle->hash_stat[STAT_SIZE] <<= 1;
newsize = handle->hash_stat[STAT_SIZE];
handle->hash_where = newwhere;
handle->hash_full <<= 1;
handle->hash_sizelog += 1;
handle->hash_stat[STAT_USED] = 0;
handle->hash_wall =
newwall = newwhere + newsize;
/*
* set all those pesky new slots to vacant.
*/
for (newtrack = newwhere; newtrack <= newwall; newtrack++)
{
newtrack->hash_string = NULL;
}
/*
* we will do a scan of the old table, the hard way, using the
* new control block to re-insert the data into new hash table.
*/
handle->hash_stat[STAT_USED] = 0; /* inserts will bump it up to correct */
for (oldtrack = oldwhere; oldtrack < oldwall; oldtrack++)
if (((string = oldtrack->hash_string) != NULL) && string != DELETED)
if ((retval = hash_jam (handle, string, oldtrack->hash_value)))
break;
newwhere = ((struct hash_entry *)
xmalloc ((unsigned long) ((temp << GROW_FACTOR + 1)
/* +1 for wall slot */
* sizeof (struct hash_entry))));
if (newwhere == NULL)
return "no_room";
/*
* have enough room: now we do all the work.
* double the size of everything in handle.
*/
handle->hash_mask = ((handle->hash_mask + 1) << GROW_FACTOR) - 1;
handle->hash_stat[STAT_SIZE] <<= GROW_FACTOR;
newsize = handle->hash_stat[STAT_SIZE];
handle->hash_where = newwhere;
handle->hash_full <<= GROW_FACTOR;
handle->hash_sizelog += GROW_FACTOR;
handle->hash_wall = newwall = newwhere + newsize;
/* Set all those pesky new slots to vacant. */
for (newtrack = newwhere; newtrack <= newwall; newtrack++)
newtrack->hash_string = NULL;
/* We will do a scan of the old table, the hard way, using the
* new control block to re-insert the data into new hash table. */
handle->hash_stat[STAT_USED] = 0;
for (oldtrack = oldwhere; oldtrack < oldwall; oldtrack++)
if (((string = oldtrack->hash_string) != NULL) && string != DELETED)
if ((retval = hash_jam (handle, string, oldtrack->hash_value)))
return retval;
#ifdef SUSPECT
if (!retval && handle->hash_stat[STAT_USED] != oldused)
{
retval = "hash_used";
}
if (handle->hash_stat[STAT_USED] != oldused)
return "hash_used";
#endif
if (!retval)
{
/*
* we have a completely faked up control block.
* return the old hash table.
*/
free ((char *) oldwhere);
/*
* Here with success. retval is already 0.
*/
}
}
else
{
retval = "no room";
}
return retval;
/* We have a completely faked up control block.
Return the old hash table. */
free ((char *) oldwhere);
return 0;
}
#ifdef TEST
/*
* h a s h _ a p p l y ( )
*
@ -606,28 +611,20 @@ hash_grow (handle) /* make a hash table grow */
* In future we may use the value returned by your nominated function.
* One idea is to abort the scan if, after applying the function to a
* certain node, the function returns a certain code.
* To be safe, please make your functions of type char *. If you always
* return NULL, then the scan will complete, visiting every symbol in
* the table exactly once. ALL OTHER RETURNED VALUES have no meaning yet!
* Caveat Actor!
*
* The function you supply should be of the form:
* char * myfunct(string,value)
* void myfunct(string,value)
* char * string; |* the symbol's name *|
* char * value; |* the symbol's value *|
* {
* |* ... *|
* return(NULL);
* }
*
* The returned value of hash_apply() is (char*)NULL. In future it may return
* other values. NULL means "completed scan OK". Other values have no meaning
* yet. (The function has no graceful failures.)
*/
char *
void
hash_apply (handle, function)
struct hash_control *handle;
char *(*function) ();
void (*function) ();
{
struct hash_entry *entry;
struct hash_entry *wall;
@ -640,8 +637,8 @@ hash_apply (handle, function)
(*function) (entry->hash_string, entry->hash_value);
}
}
return (NULL);
}
#endif
/*
* h a s h _ f i n d ( )
@ -673,27 +670,40 @@ hash_find (handle, string)
* Internal.
*/
static struct hash_entry * /* string slot, may be empty or deleted */
hash_ask (handle, string, access)
hash_ask (handle, string, access_type)
struct hash_control *handle;
const char *string;
int access; /* access type */
int access_type;
{
const char *s;
struct hash_entry *slot;
int collision; /* count collisions */
int strcmps;
int hcode;
/* start looking here */
slot = handle->hash_where + hash_code (handle, string);
hcode = hash_code (handle, string);
slot = handle->hash_where + (hcode & handle->hash_mask);
handle->hash_stat[STAT_ACCESS + access] += 1;
collision = 0;
handle->hash_stat[STAT_ACCESS + access_type] += 1;
collision = strcmps = 0;
hash_found = FALSE;
while (((s = slot->hash_string) != NULL) && s != DELETED)
{
if (string == s || !strcmp (string, s))
hash_found = TRUE;
if (hash_found)
break;
if (string == s)
{
hash_found = TRUE;
break;
}
if (slot->h == hcode)
{
if (!strcmp (string, s))
{
hash_found = TRUE;
break;
}
strcmps++;
}
collision++;
slot++;
}
@ -710,8 +720,20 @@ hash_ask (handle, string, access)
slot = handle->hash_where;/* now look again */
while (((s = slot->hash_string) != NULL) && s != DELETED)
{
if (string == s || !strcmp (string, s))
hash_found = TRUE;
if (string == s)
{
hash_found = TRUE;
break;
}
if (slot->h == hcode)
{
if (!strcmp (string, s))
{
hash_found = TRUE;
break;
}
strcmps++;
}
collision++;
slot++;
}
@ -723,8 +745,11 @@ hash_ask (handle, string, access)
* DELETED:dig here slot
*/
}
handle->hash_stat[STAT_COLLIDE + access] += collision;
return (slot); /* also return hash_found */
handle->hash_stat[STAT_COLLIDE + access_type] += collision;
handle->hash_stat[STAT_STRCMP + access_type] += strcmps;
if (!hash_found)
slot->h = hcode;
return slot; /* also return hash_found */
}
/*
@ -752,7 +777,7 @@ hash_code (handle, string)
h += c;
h = (h << 3) + (h >> n) + c;
}
return (h & handle->hash_mask);
return h;
#else
/* from bfd */
unsigned long h = 0;
@ -767,10 +792,41 @@ hash_code (handle, string)
}
h += len + (len << 17);
h ^= h >> 2;
return h & handle->hash_mask;
return h;
#endif
}
void
hash_print_statistics (file, name, h)
FILE *file;
const char *name;
struct hash_control *h;
{
unsigned long sz, used, pct;
if (h == 0)
return;
sz = h->hash_stat[STAT_SIZE];
used = h->hash_stat[STAT_USED];
pct = (used * 100 + sz / 2) / sz;
fprintf (file, "%s hash statistics:\n\t%d/%d slots used (%d%%)\n",
name, used, sz, pct);
#define P(name, off) \
fprintf (file, "\t%-16s %6dr + %6dw = %7d\n", name, \
h->hash_stat[off+STAT__READ], \
h->hash_stat[off+STAT__WRITE], \
h->hash_stat[off+STAT__READ] + h->hash_stat[off+STAT__WRITE])
P ("accesses:", STAT_ACCESS);
P ("collisions:", STAT_COLLIDE);
P ("string compares:", STAT_STRCMP);
#undef P
}
/*
* Here is a test program to exercise above.
*/
@ -796,8 +852,8 @@ int number; /* number 0:TABLES-1 of current hashed */
main ()
{
char (*applicatee ());
char *destroy ();
void applicatee ();
void destroy ();
char *what ();
int *ip;
@ -912,24 +968,22 @@ what (description)
return (retval);
}
char *
void
destroy (string, value)
char *string;
char *value;
{
free (string);
free (value);
return (NULL);
}
char *
void
applicatee (string, value)
char *string;
char *value;
{
printf ("%.20s-%.20s\n", string, value);
return (NULL);
}
whattable () /* determine number: what hash table to use */

View File

@ -1,59 +1,55 @@
/* hash.h - for hash.c
Copyright (C) 1987 Free Software Foundation, Inc.
Copyright (C) 1987, 1992 Free Software Foundation, Inc.
This file is part of GAS, the GNU Assembler.
GAS is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GAS is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GAS; see the file COPYING. If not, write to
the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
#ifndef hashH
#define hashH
struct hash_entry
{
char * hash_string; /* points to where the symbol string is */
/* NULL means slot is not used */
/* DELETED means slot was deleted */
char * hash_value; /* user's datum, associated with symbol */
const char *hash_string; /* points to where the symbol string is */
/* NULL means slot is not used */
/* DELETED means slot was deleted */
PTR hash_value; /* user's datum, associated with symbol */
unsigned long h;
};
#define HASH_STATLENGTH (6)
struct hash_control
{
struct hash_entry * hash_where; /* address of hash table */
int hash_sizelog; /* Log of ( hash_mask + 1 ) */
int hash_mask; /* masks a hash into index into table */
int hash_full; /* when hash_stat[STAT_USED] exceeds this, */
/* grow table */
struct hash_entry * hash_wall; /* point just after last (usable) entry */
/* here we have some statistics */
int hash_stat[HASH_STATLENGTH]; /* lies & statistics */
/* we need STAT_USED & STAT_SIZE */
};
struct hash_control;
/* returns control block */
struct hash_control *hash_new PARAMS ((void));
void hash_die PARAMS ((struct hash_control *));
/* returns previous value */
PTR hash_delete PARAMS ((struct hash_control *, const char *str));
/* returns previous value */
PTR hash_replace PARAMS ((struct hash_control *, const char *str, PTR val));
/* returns error string or null */
const char *hash_insert PARAMS ((struct hash_control *, const char *str,
PTR val));
/* returns value */
PTR hash_find PARAMS ((struct hash_control *, const char *str));
/* returns error text or null (internal) */
const char *hash_jam PARAMS ((struct hash_control *, const char *str,
PTR val));
/* returns */
struct hash_control * hash_new(); /* [control block] */
void hash_die();
void hash_say();
char * hash_delete(); /* previous value */
char * hash_relpace(); /* previous value */
char * hash_insert(); /* error string */
char * hash_apply(); /* 0 means OK */
char * hash_find(); /* value */
char * hash_jam(); /* error text (internal) */
#endif /* #ifdef hashH */
void hash_print_statistics PARAMS ((FILE *, const char *,
struct hash_control *));
#endif /* #ifdef hashH */
/* end of hash.c */

View File

@ -90,12 +90,17 @@ die horribly;
#define LEX_QM 0
#endif
#ifndef LEX_DOLLAR
/* The a29k assembler does not permits labels to start with $. */
#define LEX_DOLLAR 3
#endif
/* used by is_... macros. our ctype[] */
char lex_type[256] =
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* @ABCDEFGHIJKLMNO */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* PQRSTUVWXYZ[\]^_ */
0, 0, 0, 0, 3, LEX_PCT, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, /* _!"#$%&'()*+,-./ */
0, 0, 0, 0, LEX_DOLLAR, LEX_PCT, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, /* _!"#$%&'()*+,-./ */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, LEX_QM, /* 0123456789:;<=>? */
LEX_AT, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* @ABCDEFGHIJKLMNO */
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, LEX_BR, 0, LEX_BR, 0, 3, /* PQRSTUVWXYZ[\]^_ */
@ -218,8 +223,8 @@ read_begin ()
/* Something close -- but not too close -- to a multiple of 1024.
The debugging malloc I'm using has 24 bytes of overhead. */
obstack_begin (&notes, 5090);
obstack_begin (&cond_obstack, 990);
obstack_begin (&notes, chunksize);
obstack_begin (&cond_obstack, chunksize);
/* Use machine dependent syntax */
for (p = line_separator_chars; *p; p++)
@ -396,7 +401,8 @@ pop_insert (table)
{
errtxt = hash_insert (po_hash, pop->poc_name, (char *) pop);
if (errtxt && (!pop_override_ok || strcmp (errtxt, "exists")))
as_fatal ("error constructing %s pseudo-op table", pop_table_name);
as_fatal ("error constructing %s pseudo-op table: %s", pop_table_name,
errtxt);
}
}
@ -2716,32 +2722,19 @@ cons_worker (nbytes, rva)
c = 0;
do
{
if (rva)
{
/* If this is an .rva pseudoop then stick
an extra reloc in for this word. */
int reloc;
char *p = frag_more (0);
exp.X_op = O_absent;
#ifdef BFD_ASSEMBLER
reloc = BFD_RELOC_RVA;
#else
#ifdef TC_RVA_RELOC
reloc = TC_RVA_RELOC;
#else
abort();
#endif
#endif
fix_new_exp (frag_now, p - frag_now->fr_literal,
nbytes, &exp, 0, reloc);
}
if (flag_mri)
parse_mri_cons (&exp, (unsigned int) nbytes);
else
TC_PARSE_CONS_EXPRESSION (&exp, (unsigned int) nbytes);
emit_expr (&exp, (unsigned int) nbytes);
if (rva)
{
if (exp.X_op == O_symbol)
exp.X_op = O_symbol_rva;
else
as_fatal ("rva without symbol");
}
emit_expr (&exp, (unsigned int) nbytes);
++c;
}
while (*input_line_pointer++ == ',');
@ -3733,9 +3726,7 @@ demand_copy_C_string (len_pointer)
{
register int len;
for (len = *len_pointer;
len > 0;
len--)
for (len = *len_pointer; len > 0; len--)
{
if (*s == 0)
{
@ -3746,7 +3737,7 @@ demand_copy_C_string (len_pointer)
}
}
}
return (s);
return s;
}
/*
@ -3934,4 +3925,11 @@ s_ignore (arg)
}
void
read_print_statistics (file)
FILE *file;
{
hash_print_statistics (file, "pseudo-op table", po_hash);
}
/* end of read.c */