Update.

2004-08-26 Ulrich Drepper <drepper@redhat.com> * nscd/cache.c: Major rewrite. The data is now optionally kept in a mmaped memory region which is automatically mirrored on disk. This implements persistent data storage. The Memory handled needed to be completely revamped, it now uses a garbage collection mechanism instead of malloc. * nscd/connections.c: Likewise. * nscd/nscd.c: Likewise. * nscd/nscd.h: Likewise. * nscd/nscd_conf.c: Likewise. * nscd/nscd_stat.c: Likewise. * nscd/grpcache.c: Likewise. * nscd/hstcache.c:: Likewise. * nscd/pwdcache.c:: Likewise. * nscd/Makefile: Add rules to build mem.c. * nscd/mem.c: New file. * nscd/nscd.conf: Describe new configuration options.
2004-08-26 18:35:05 +00:00 · 2004-08-26 18:35:05 +00:00 · a95a08b4af
parent 1114ffff54
commit a95a08b4af
14 changed files with 2409 additions and 837 deletions
--- a/19
+++ b/19
@ -1,3 +1,22 @@
+2004-08-26  Ulrich Drepper  <drepper@redhat.com>
+
+	* nscd/cache.c: Major rewrite.  The data is now optionally kept in
+	a mmaped memory region which is automatically mirrored on disk.
+	This implements persistent data storage.  The Memory handled
+	needed to be completely revamped, it now uses a garbage collection
+	mechanism instead of malloc.
+	* nscd/connections.c: Likewise.
+	* nscd/nscd.c: Likewise.
+	* nscd/nscd.h: Likewise.
+	* nscd/nscd_conf.c: Likewise.
+	* nscd/nscd_stat.c: Likewise.
+	* nscd/grpcache.c: Likewise.
+	* nscd/hstcache.c:: Likewise.
+	* nscd/pwdcache.c:: Likewise.
+	* nscd/Makefile: Add rules to build mem.c.
+	* nscd/mem.c: New file.
+	* nscd/nscd.conf: Describe new configuration options.
+
 2004-08-26  Kaz Kojima  <kkojima@rr.iij4u.or.jp>

 	* sysdeps/unix/sysv/linux/mips/pread.c: Include sgidefs.h only if
--- a/nscd/Makefile
+++ b/nscd/Makefile
@ -1,4 +1,4 @@
-# Copyright (C) 1998, 2000, 2002, 2003 Free Software Foundation, Inc.
+# Copyright (C) 1998, 2000, 2002, 2003, 2004 Free Software Foundation, Inc.
 # This file is part of the GNU C Library.

 # The GNU C Library is free software; you can redistribute it and/or
@ -30,7 +30,7 @@ vpath %.c ../locale/programs

 nscd-modules := nscd connections pwdcache getpwnam_r getpwuid_r grpcache \
 		getgrnam_r getgrgid_r hstcache gethstbyad_r gethstbynm2_r \
-		dbg_log nscd_conf nscd_stat cache xmalloc xstrdup
+		dbg_log nscd_conf nscd_stat cache mem xmalloc xstrdup

 ifeq ($(have-thread-library),yes)

@ -78,6 +78,7 @@ CFLAGS-nscd_stat.c = -fpie
 CFLAGS-cache.c = -fpie
 CFLAGS-xmalloc.c = -fpie
 CFLAGS-xstrdup.c = -fpie
+CFLAGS-mem.c = -fpie

 $(objpfx)nscd: $(addprefix $(objpfx),$(nscd-modules:=.o))
 	$(LINK.o) -pie -Wl,-O1 \
--- a/nscd/cache.c
+++ b/nscd/cache.c
@ -17,6 +17,7 @@
   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307 USA.  */

+#include <assert.h>
 #include <atomic.h>
 #include <errno.h>
 #include <error.h>
@ -26,6 +27,7 @@
 #include <libintl.h>
 #include <arpa/inet.h>
 #include <rpcsvc/nis.h>
+#include <sys/mman.h>
 #include <sys/param.h>
 #include <sys/stat.h>
 #include <sys/uio.h>
@ -33,44 +35,69 @@
 #include "nscd.h"
 #include "dbg_log.h"

+
+/* Number of times a value is reloaded without being used.  UINT_MAX
+   means unlimited.  */
+unsigned int reload_count = DEFAULT_RELOAD_LIMIT;
+
+
 /* Search the cache for a matching entry and return it when found.  If
   this fails search the negative cache and return (void *) -1 if this
   search was successful.  Otherwise return NULL.

   This function must be called with the read-lock held.  */
-struct hashentry *
-cache_search (request_type type, void *key, size_t len, struct database *table,
-	      uid_t owner)
+struct datahead *
+cache_search (request_type type, void *key, size_t len,
+	      struct database_dyn *table, uid_t owner)
 {
-  unsigned long int hash = __nis_hash (key, len) % table->module;
-  struct hashentry *work;
+  unsigned long int hash = __nis_hash (key, len) % table->head->module;
+
  unsigned long int nsearched = 0;
+  struct datahead *result = NULL;

-  work = table->array[hash];
-
-  while (work != NULL)
+  ref_t work = table->head->array[hash];
+  while (work != ENDREF)
    {
      ++nsearched;

-      if (type == work->type && len == work->len
-	  && memcmp (key, work->key, len) == 0 && work->owner == owner)
+      struct hashentry *here = (struct hashentry *) (table->data + work);
+
+      if (type == here->type && len == here->len
+	  && memcmp (key, table->data + here->key, len) == 0
+	  && here->owner == owner)
 	{
 	  /* We found the entry.  Increment the appropriate counter.  */
-	  if (work->data == (void *) -1)
-	    ++table->neghit;
-	  else
-	    ++table->poshit;
+	  struct datahead *dh
+	    = (struct datahead *) (table->data + here->packet);

-	  break;
+	  /* See whether we must ignore the entry.  */
+	  if (dh->usable)
+	    {
+	      /* We do not synchronize the memory here.  The statistics
+		 data is not crucial, we synchronize only once in a while
+		 in the cleanup threads.  */
+	      if (dh->notfound)
+		++table->head->neghit;
+	      else
+		{
+		  ++table->head->poshit;
+
+		  if (dh->nreloads != 0)
+		    dh->nreloads = 0;
+		}
+
+	      result = dh;
+	      break;
+	    }
 	}

-      work = work->next;
+      work = here->next;
    }

-  if (nsearched > table->maxnsearched)
-    table->maxnsearched = nsearched;
+  if (nsearched > table->head->maxnsearched)
+    table->head->maxnsearched = nsearched;

-  return work;
+  return result;
 }

 /* Add a new entry to the cache.  The return value is zero if the function
@ -82,45 +109,57 @@ cache_search (request_type type, void *key, size_t len, struct database *table,
   This is ok since we use operations which would be safe even without
   locking, given that the `prune_cache' function never runs.  Using
   the readlock reduces the chance of conflicts.  */
-void
-cache_add (int type, void *key, size_t len, const void *packet, size_t total,
-	   void *data, int last, time_t t, struct database *table, uid_t owner)
+int
+cache_add (int type, const void *key, size_t len, struct datahead *packet,
+	   bool first, struct database_dyn *table,
+	   uid_t owner)
 {
-  unsigned long int hash = __nis_hash (key, len) % table->module;
+  if (__builtin_expect (debug_level >= 2, 0))
+    dbg_log (_("add new entry \"%s\" of type %s for %s to cache%s"),
+	     (const char *) key, serv2str[type], dbnames[table - dbs],
+	     first ? " (first)" : "");
+
+  unsigned long int hash = __nis_hash (key, len) % table->head->module;
  struct hashentry *newp;

-  newp = malloc (sizeof (struct hashentry));
+  newp = mempool_alloc (table, sizeof (struct hashentry));
+  /* If we cannot allocate memory, just do not do anything.  */
  if (newp == NULL)
-    error (EXIT_FAILURE, errno, _("while allocating hash table entry"));
+    return -1;

  newp->type = type;
+  newp->first = first;
  newp->len = len;
-  newp->key = key;
+  newp->key = (char *) key - table->data;
+  assert (newp->key + newp->len <= table->head->first_free);
  newp->owner = owner;
-  newp->data = data;
-  newp->timeout = t;
-  newp->packet = packet;
-  newp->total = total;
-
-  newp->last = last;
+  newp->packet = (char *) packet - table->data;

  /* Put the new entry in the first position.  */
  do
-    newp->next = table->array[hash];
-  while (atomic_compare_and_exchange_bool_acq (&table->array[hash], newp,
-					       newp->next));
+    newp->next = table->head->array[hash];
+  while (atomic_compare_and_exchange_bool_acq (&table->head->array[hash],
+					       (ref_t) ((char *) newp
+							- table->data),
+					       (ref_t) newp->next));

  /* Update the statistics.  */
-  if (data == (void *) -1)
-    ++table->negmiss;
-  else if (last)
-    ++table->posmiss;
+  if (packet->notfound)
+    ++table->head->negmiss;
+  else if (first)
+    ++table->head->posmiss;

-  /* Instead of slowing down the normal process for statistics
-     collection we accept living with some incorrect data.  */
-  unsigned long int nentries = ++table->nentries;
-  if (nentries > table->maxnentries)
-    table->maxnentries = nentries;
+  /* We depend on this value being correct and at least as high as the
+     real number of entries.  */
+  atomic_increment (&table->head->nentries);
+
+  /* It does not matter that we are not loading the just increment
+     value, this is just for statistics.  */
+  unsigned long int nentries = table->head->nentries;
+  if (nentries > table->head->maxnentries)
+    table->head->maxnentries = nentries;
+
+  return 0;
 }

 /* Walk through the table and remove all entries which lifetime ended.
@ -136,13 +175,9 @@ cache_add (int type, void *key, size_t len, const void *packet, size_t total,
   free the data structures since some hash table entries share the same
   data.  */
 void
-prune_cache (struct database *table, time_t now)
+prune_cache (struct database_dyn *table, time_t now)
 {
-  size_t cnt = table->module;
-  int mark[cnt];
-  int anything = 0;
-  size_t first = cnt + 1;
-  size_t last = 0;
+  size_t cnt = table->head->module;

  /* If this table is not actually used don't do anything.  */
  if (cnt == 0)
@ -181,27 +216,112 @@ prune_cache (struct database *table, time_t now)
     we don't need to get any lock.  It is at all timed assured that the
     linked lists are set up correctly and that no second thread prunes
     the cache.  */
+  bool mark[cnt];
+  size_t first = cnt + 1;
+  size_t last = 0;
+  char *const data = table->data;
+  bool any = false;
+
  do
    {
-      struct hashentry *runp = table->array[--cnt];
+      ref_t run = table->head->array[--cnt];

-      mark[cnt] = 0;
-
-      while (runp != NULL)
+      while (run != ENDREF)
 	{
-	  if (runp->timeout < now)
+	  struct hashentry *runp = (struct hashentry *) (data + run);
+	  struct datahead *dh = (struct datahead *) (data + runp->packet);
+
+	  /* Check whether the entry timed out.  */
+	  if (dh->timeout < now)
 	    {
-	      ++mark[cnt];
-	      anything = 1;
+	      /* This hash bucket could contain entries which need to
+		 be looked at.  */
+	      mark[cnt] = true;
+
 	      first = MIN (first, cnt);
 	      last = MAX (last, cnt);
+
+	      /* We only have to look at the data of the first entries
+		 since the count information is kept in the data part
+		 which is shared.  */
+	      if (runp->first)
+		{
+
+		  /* At this point there are two choices: we reload the
+		     value or we discard it.  Do not change NRELOADS if
+		     we never not reload the record.  */
+		  if ((reload_count != UINT_MAX
+		       && __builtin_expect (dh->nreloads >= reload_count, 0))
+		      /* We always remove negative entries.  */
+		      || dh->notfound
+		      /* Discard everything if the user explicitly
+			 requests it.  */
+		      || now == LONG_MAX)
+		    {
+		      /* Remove the value.  */
+		      dh->usable = false;
+
+		      /* We definitely have some garbage entries now.  */
+		      any = true;
+		    }
+		  else
+		    {
+		      /* Reload the value.  We do this only for the
+			 initially used key, not the additionally
+			 added derived value.  */
+		      switch (runp->type)
+			{
+			case GETPWBYNAME:
+			  readdpwbyname (table, runp, dh);
+			  break;
+
+			case GETPWBYUID:
+			  readdpwbyuid (table, runp, dh);
+			  break;
+
+			case GETGRBYNAME:
+			  readdgrbyname (table, runp, dh);
+			  break;
+
+			case GETGRBYGID:
+			  readdgrbygid (table, runp, dh);
+			  break;
+
+			case GETHOSTBYNAME:
+			  readdhstbyname (table, runp, dh);
+			  break;
+
+			case GETHOSTBYNAMEv6:
+			  readdhstbynamev6 (table, runp, dh);
+			  break;
+
+			case GETHOSTBYADDR:
+			  readdhstbyaddr (table, runp, dh);
+			  break;
+
+			case GETHOSTBYADDRv6:
+			  readdhstbyaddrv6 (table, runp, dh);
+			  break;
+
+			default:
+			  assert (! "should never happen");
+			}
+
+		      /* If the entry has been replaced, we might need
+			 cleanup.  */
+		      any |= !dh->usable;
+		    }
+		}
 	    }
-	  runp = runp->next;
+	  else
+	    assert (dh->usable);
+
+	  run = runp->next;
 	}
    }
  while (cnt > 0);

-  if (anything)
+  if (first <= last)
    {
      struct hashentry *head = NULL;

@ -209,47 +329,57 @@ prune_cache (struct database *table, time_t now)
 	 the table.  */
      if (__builtin_expect (pthread_rwlock_trywrlock (&table->lock) != 0, 0))
 	{
-	  ++table->wrlockdelayed;
+	  ++table->head->wrlockdelayed;
 	  pthread_rwlock_wrlock (&table->lock);
 	}

      while (first <= last)
 	{
-	  if (mark[first] > 0)
+	  if (mark[first])
 	    {
-	      struct hashentry *runp;
+	      ref_t *old = &table->head->array[first];
+	      ref_t run = table->head->array[first];

-	      while (table->array[first]->timeout < now)
+	      while (run != ENDREF)
 		{
-		  table->array[first]->dellist = head;
-		  head = table->array[first];
-		  table->array[first] = head->next;
-		  --table->nentries;
-		  if (--mark[first] == 0)
-		    break;
-		}
+		  struct hashentry *runp = (struct hashentry *) (data + run);
+		  struct datahead *dh
+		    = (struct datahead *) (data + runp->packet);

-	      runp = table->array[first];
-	      while (mark[first] > 0)
-		{
-		  if (runp->next->timeout < now)
+		  if (! dh->usable)
 		    {
-		      runp->next->dellist = head;
-		      head = runp->next;
-		      runp->next = head->next;
-		      --mark[first];
-		      --table->nentries;
+		      /* We need the list only for debugging but it is
+			 more costly to avoid creating the list than
+			 doing it.  */
+		      runp->dellist = head;
+		      head = runp;
+
+		      /* No need for an atomic operation, we have the
+			 write lock.  */
+		      --table->head->nentries;
+
+		      run = *old = runp->next;
 		    }
 		  else
-		    runp = runp->next;
+		    {
+		      old = &runp->next;
+		      run = runp->next;
+		    }
 		}
 	    }
+
 	  ++first;
 	}

      /* It's all done.  */
      pthread_rwlock_unlock (&table->lock);

+      /* Make sure the data is saved to disk.  */
+      if (table->persistent)
+	msync (table->head,
+	       table->data + table->head->first_free - (char *) table->head,
+	       MS_ASYNC);
+
      /* One extra pass if we do debugging.  */
      if (__builtin_expect (debug_level > 0, 0))
 	{
@ -263,33 +393,20 @@ prune_cache (struct database *table, time_t now)
 	      if (runp->type == GETHOSTBYADDR || runp->type == GETHOSTBYADDRv6)
 		{
 		  inet_ntop (runp->type == GETHOSTBYADDR ? AF_INET : AF_INET6,
-			     runp->key, buf, sizeof (buf));
+			     table->data + runp->key, buf, sizeof (buf));
 		  str = buf;
 		}
 	      else
-		str = runp->key;
+		str = table->data + runp->key;

 	      dbg_log ("remove %s entry \"%s\"", serv2str[runp->type], str);

 	      runp = runp->dellist;
 	    }
 	}
-
-      /* And another run to free the data.  */
-      do
-	{
-	  struct hashentry *old = head;
-
-	  /* Free the data structures.  */
-	  if (old->data == (void *) -1)
-	    free (old->key);
-	  else if (old->last)
-	    free (old->data);
-
-	  head = head->dellist;
-
-	  free (old);
-	}
-      while (head != NULL);
    }
+
+  /* Run garbage collection if any entry has been removed or replaced.  */
+  if (any)
+    gc (table);
 }
--- a/nscd/connections.c
+++ b/nscd/connections.c
@ -24,14 +24,15 @@
 #include <errno.h>
 #include <fcntl.h>
 #include <grp.h>
+#include <libintl.h>
 #include <pthread.h>
 #include <pwd.h>
 #include <resolv.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
-#include <libintl.h>
 #include <arpa/inet.h>
+#include <sys/mman.h>
 #include <sys/param.h>
 #include <sys/poll.h>
 #include <sys/socket.h>
@ -41,6 +42,11 @@
 #include "nscd.h"
 #include "dbg_log.h"

+
+/* Number of bytes of data we initially reserve for each hash table bucket.  */
+#define DEFAULT_DATASIZE_PER_BUCKET 1024
+
+
 /* Wrapper functions with error checking for standard functions.  */
 extern void *xmalloc (size_t n);
 extern void *xcalloc (size_t n, size_t s);
@ -56,25 +62,11 @@ static gid_t *server_groups;
 #ifndef NGROUPS
 # define NGROUPS 32
 #endif
-static int server_ngroups = NGROUPS;
+static int server_ngroups;

 static void begin_drop_privileges (void);
 static void finish_drop_privileges (void);

-
-/* Mapping of request type to database.  */
-static const dbtype serv2db[LASTDBREQ + 1] =
-{
-  [GETPWBYNAME] = pwddb,
-  [GETPWBYUID] = pwddb,
-  [GETGRBYNAME] = grpdb,
-  [GETGRBYGID] = grpdb,
-  [GETHOSTBYNAME] = hstdb,
-  [GETHOSTBYNAMEv6] = hstdb,
-  [GETHOSTBYADDR] = hstdb,
-  [GETHOSTBYADDRv6] = hstdb,
-};
-
 /* Map request type to a string.  */
 const char *serv2str[LASTREQ] =
 {
@ -92,43 +84,71 @@ const char *serv2str[LASTREQ] =
 };

 /* The control data structures for the services.  */
-struct database dbs[lastdb] =
+struct database_dyn dbs[lastdb] =
 {
  [pwddb] = {
    .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
    .enabled = 0,
    .check_file = 1,
+    .persistent = 0,
    .filename = "/etc/passwd",
-    .module = 211,
+    .db_filename = _PATH_NSCD_PASSWD_DB,
    .disabled_iov = &pwd_iov_disabled,
    .postimeout = 3600,
-    .negtimeout = 20
+    .negtimeout = 20,
+    .wr_fd = -1,
+    .ro_fd = -1,
+    .mmap_used = false
  },
  [grpdb] = {
    .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
    .enabled = 0,
    .check_file = 1,
+    .persistent = 0,
    .filename = "/etc/group",
-    .module = 211,
+    .db_filename = _PATH_NSCD_GROUP_DB,
    .disabled_iov = &grp_iov_disabled,
    .postimeout = 3600,
-    .negtimeout = 60
+    .negtimeout = 60,
+    .wr_fd = -1,
+    .ro_fd = -1,
+    .mmap_used = false
  },
  [hstdb] = {
    .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
    .enabled = 0,
    .check_file = 1,
+    .persistent = 0,
    .filename = "/etc/hosts",
-    .module = 211,
+    .db_filename = _PATH_NSCD_HOSTS_DB,
    .disabled_iov = &hst_iov_disabled,
    .postimeout = 3600,
-    .negtimeout = 20
+    .negtimeout = 20,
+    .wr_fd = -1,
+    .ro_fd = -1,
+    .mmap_used = false
  }
 };

+
+/* Mapping of request type to database.  */
+static struct database_dyn *const serv2db[LASTDBREQ + 1] =
+{
+  [GETPWBYNAME] = &dbs[pwddb],
+  [GETPWBYUID] = &dbs[pwddb],
+  [GETGRBYNAME] = &dbs[grpdb],
+  [GETGRBYGID] = &dbs[grpdb],
+  [GETHOSTBYNAME] = &dbs[hstdb],
+  [GETHOSTBYNAMEv6] = &dbs[hstdb],
+  [GETHOSTBYADDR] = &dbs[hstdb],
+  [GETHOSTBYADDRv6] = &dbs[hstdb]
+};
+
+
 /* Number of seconds between two cache pruning runs.  */
 #define CACHE_PRUNE_INTERVAL	15

+
 /* Number of threads to use.  */
 int nthreads = -1;

@ -138,6 +158,9 @@ static int sock;
 /* Number of times clients had to wait.  */
 unsigned long int client_queued;

+/* Alignment requirement of the beginning of the data region.  */
+#define ALIGN 16
+

 /* Initialize database information structures.  */
 void
@ -166,13 +189,256 @@ nscd_init (void)
    if (dbs[cnt].enabled)
      {
 	pthread_rwlock_init (&dbs[cnt].lock, NULL);
+	pthread_mutex_init (&dbs[cnt].memlock, NULL);

-	dbs[cnt].array = (struct hashentry **)
-	  calloc (dbs[cnt].module, sizeof (struct hashentry *));
-	if (dbs[cnt].array == NULL)
+	if (dbs[cnt].persistent)
 	  {
-	    dbg_log (_("while allocating cache: %s"), strerror (errno));
-	    exit (1);
+	    /* Try to open the appropriate file on disk.  */
+	    int fd = open (dbs[cnt].db_filename, O_RDWR);
+	    if (fd != -1)
+	      {
+		struct stat64 st;
+		void *mem;
+		size_t total;
+		struct database_pers_head head;
+		ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
+						      sizeof (head)));
+		if (n != sizeof (head) || fstat64 (fd, &st) != 0)
+		  {
+		  fail_db:
+		    dbg_log (_("invalid persistent database file \"%s\": %s"),
+			     dbs[cnt].db_filename, strerror (errno));
+		    dbs[cnt].persistent = 0;
+		  }
+		else if (head.module == 0 && head.data_size == 0)
+		  {
+		    /* The file has been created, but the head has not been
+		       initialized yet.  Remove the old file.  */
+		    unlink (dbs[cnt].db_filename);
+		  }
+		else if (head.header_size != (int) sizeof (head))
+		  {
+		    dbg_log (_("invalid persistent database file \"%s\": %s"),
+			     dbs[cnt].db_filename,
+			     _("header size does not match"));
+		    dbs[cnt].persistent = 0;
+		  }
+		else if ((total = (sizeof (head)
+				   + roundup (head.module
+					      * sizeof (struct hashentry),
+					      ALIGN)
+				   + head.data_size))
+			 < st.st_size)
+		  {
+		    dbg_log (_("invalid persistent database file \"%s\": %s"),
+			     dbs[cnt].db_filename,
+			     _("file size does not match"));
+		    dbs[cnt].persistent = 0;
+		  }
+		else if ((mem = mmap (NULL, total, PROT_READ | PROT_WRITE,
+				      MAP_SHARED, fd, 0)) == MAP_FAILED)
+		  goto fail_db;
+		else
+		  {
+		    /* Success.  We have the database.  */
+		    dbs[cnt].head = mem;
+		    dbs[cnt].memsize = total;
+		    dbs[cnt].data = (char *)
+		      &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
+						     ALIGN / sizeof (ref_t))];
+		    dbs[cnt].mmap_used = true;
+
+		    if (dbs[cnt].suggested_module > head.module)
+		      dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
+			       dbnames[cnt]);
+
+		    dbs[cnt].wr_fd = fd;
+		    fd = -1;
+		    /* We also need a read-only descriptor.  */
+		    dbs[cnt].ro_fd = open (dbs[cnt].db_filename, O_RDONLY);
+		    if (dbs[cnt].ro_fd == -1)
+		      dbg_log (_("\
+cannot create read-only descriptor for \"%s\"; no mmap"),
+			       dbs[cnt].db_filename);
+
+		    // XXX Shall we test whether the descriptors actually
+		    // XXX point to the same file?
+		  }
+
+		/* Close the file descriptors in case something went
+		   wrong in which case the variable have not been
+		   assigned -1.  */
+		if (fd != -1)
+		  close (fd);
+	      }
+	  }
+
+	if (dbs[cnt].head == NULL)
+	  {
+	    /* No database loaded.  Allocate the data structure,
+	       possibly on disk.  */
+	    struct database_pers_head head;
+	    size_t total = (sizeof (head)
+			    + roundup (dbs[cnt].suggested_module
+				       * sizeof (ref_t), ALIGN)
+			    + (dbs[cnt].suggested_module
+			       * DEFAULT_DATASIZE_PER_BUCKET));
+
+	    /* Try to create the database.  If we do not need a
+	       persistent database create a temporary file.  */
+	    int fd;
+	    int ro_fd = -1;
+	    if (dbs[cnt].persistent)
+	      {
+		fd = open (dbs[cnt].db_filename,
+			   O_RDWR | O_CREAT | O_EXCL | O_TRUNC,
+			   S_IRUSR | S_IWUSR);
+		if (fd != -1)
+		  ro_fd = open (dbs[cnt].db_filename, O_RDONLY);
+	      }
+	    else
+	      {
+		size_t slen = strlen (dbs[cnt].db_filename);
+		char fname[slen + 8];
+		strcpy (mempcpy (fname, dbs[cnt].db_filename, slen),
+			".XXXXXX");
+		fd = mkstemp (fname);
+
+		/* We do not need the file name anymore after we
+		   opened another file descriptor in read-only mode.  */
+		if (fd != -1)
+		  {
+		    ro_fd = open (fname, O_RDONLY);
+
+		    unlink (fname);
+		  }
+	      }
+
+	    if (fd == -1)
+	      {
+		if (errno == EEXIST)
+		  {
+		    dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
+			     dbnames[cnt], dbs[cnt].db_filename);
+		    // XXX Correct way to terminate?
+		    exit (1);
+		  }
+
+		if  (dbs[cnt].persistent)
+		  dbg_log (_("cannot create %s; no persistent database used"),
+			   dbs[cnt].db_filename);
+		else
+		  dbg_log (_("cannot create %s; no sharing possible"),
+			   dbs[cnt].db_filename);
+
+		dbs[cnt].persistent = 0;
+		// XXX remember: no mmap
+	      }
+	    else
+	      {
+		/* Tell the user if we could not create the read-only
+		   descriptor.  */
+		if (ro_fd == -1)
+		  dbg_log (_("\
+cannot create read-only descriptor for \"%s\"; no mmap"),
+			   dbs[cnt].db_filename);
+
+		/* Before we create the header, initialiye the hash
+		   table.  So that if we get interrupted if writing
+		   the header we can recognize a partially initialized
+		   database.  */
+		size_t ps = sysconf (_SC_PAGESIZE);
+		char tmpbuf[ps];
+		assert (~ENDREF == 0);
+		memset (tmpbuf, '\xff', ps);
+
+		size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
+		off_t offset = sizeof (head);
+
+		size_t towrite;
+		if (offset % ps != 0)
+		  {
+		    towrite = MIN (remaining, ps - (offset % ps));
+		    pwrite (fd, tmpbuf, towrite, offset);
+		    offset += towrite;
+		    remaining -= towrite;
+		  }
+
+		while (remaining > ps)
+		  {
+		    pwrite (fd, tmpbuf, ps, offset);
+		    offset += ps;
+		    remaining -= ps;
+		  }
+
+		if (remaining > 0)
+		  pwrite (fd, tmpbuf, remaining, offset);
+
+		/* Create the header of the file.  */
+		struct database_pers_head head =
+		  {
+		    .version = DB_VERSION,
+		    .header_size = sizeof (head),
+		    .module = dbs[cnt].suggested_module,
+		    .data_size = (dbs[cnt].suggested_module
+				  * DEFAULT_DATASIZE_PER_BUCKET),
+		    .first_free = 0
+		  };
+		void *mem;
+
+		if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
+		     != sizeof (head))
+		    || ftruncate (fd, total) != 0
+		    || (mem = mmap (NULL, total, PROT_READ | PROT_WRITE,
+				    MAP_SHARED, fd, 0)) == MAP_FAILED)
+		  {
+		    unlink (dbs[cnt].db_filename);
+		    dbg_log (_("cannot write to database file %s: %s"),
+			     dbs[cnt].db_filename, strerror (errno));
+		    dbs[cnt].persistent = 0;
+		  }
+		else
+		  {
+		    /* Success.  */
+		    dbs[cnt].head = mem;
+		    dbs[cnt].data = (char *)
+		      &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
+						     ALIGN / sizeof (ref_t))];
+		    dbs[cnt].memsize = total;
+		    dbs[cnt].mmap_used = true;
+
+		    /* Remember the descriptors.  */
+		    dbs[cnt].wr_fd = fd;
+		    dbs[cnt].ro_fd = ro_fd;
+		    fd = -1;
+		    ro_fd = -1;
+		  }
+
+		if (fd != -1)
+		  close (fd);
+		if (ro_fd != -1)
+		  close (ro_fd);
+	      }
+	  }
+
+	if (dbs[cnt].head == NULL)
+	  {
+	    /* We do not use the persistent database.  Just
+	       create an in-memory data structure.  */
+	    assert (! dbs[cnt].persistent);
+
+	    dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
+				     + (dbs[cnt].suggested_module
+					* sizeof (ref_t)));
+	    memset (dbs[cnt].head, '\0', sizeof (dbs[cnt].head));
+	    assert (~ENDREF == 0);
+	    memset (dbs[cnt].head->array, '\xff',
+		    dbs[cnt].suggested_module * sizeof (ref_t));
+	    dbs[cnt].head->module = dbs[cnt].suggested_module;
+	    dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
+					* dbs[cnt].head->module);
+	    dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
+	    dbs[cnt].head->first_free = 0;
 	  }

 	if (dbs[cnt].check_file)
@ -215,7 +481,7 @@ nscd_init (void)
    fcntl (sock, F_SETFL, fl | O_NONBLOCK);

  /* Set permissions for the socket.  */
-  chmod (_PATH_NSCDSOCKET, 0666);
+  chmod (_PATH_NSCDSOCKET, DEFFILEMODE);

  /* Set the socket up to accept connections.  */
  if (listen (sock, SOMAXCONN) < 0)
@ -276,12 +542,11 @@ cannot handle old request version %d; current version is %d"),
      return;
    }

+  struct database_dyn *db = serv2db[req->type];
+
  if (__builtin_expect (req->type, GETPWBYNAME) >= GETPWBYNAME
      && __builtin_expect (req->type, LASTDBREQ) <= LASTDBREQ)
    {
-      struct hashentry *cached;
-      struct database *db = &dbs[serv2db[req->type]];
-
      if (__builtin_expect (debug_level, 0) > 0)
 	{
 	  if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
@ -294,7 +559,7 @@ cannot handle old request version %d; current version is %d"),
 				  key, buf, sizeof (buf)));
 	    }
 	  else
-	    dbg_log ("\t%s (%s)", serv2str[req->type], (char *)key);
+	    dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
 	}

      /* Is this service enabled?  */
@ -318,18 +583,19 @@ cannot handle old request version %d; current version is %d"),
      /* Be sure we can read the data.  */
      if (__builtin_expect (pthread_rwlock_tryrdlock (&db->lock) != 0, 0))
 	{
-	  ++db->rdlockdelayed;
+	  ++db->head->rdlockdelayed;
 	  pthread_rwlock_rdlock (&db->lock);
 	}

      /* See whether we can handle it from the cache.  */
-      cached = (struct hashentry *) cache_search (req->type, key, req->key_len,
-						  db, uid);
+      struct datahead *cached;
+      cached = (struct datahead *) cache_search (req->type, key, req->key_len,
+						 db, uid);
      if (cached != NULL)
 	{
 	  /* Hurray it's in the cache.  */
-	  if (TEMP_FAILURE_RETRY (write (fd, cached->packet, cached->total))
-	      != cached->total
+	  if (TEMP_FAILURE_RETRY (write (fd, cached->data, cached->recsize))
+	      != cached->recsize
 	      && __builtin_expect (debug_level, 0) > 0)
 	    {
 	      /* We have problems sending the result.  */
@ -349,45 +615,43 @@ cannot handle old request version %d; current version is %d"),
    {
      if (req->type == INVALIDATE)
 	dbg_log ("\t%s (%s)", serv2str[req->type], (char *)key);
-      else if (req->type > LASTDBREQ && req->type < LASTREQ)
-	dbg_log ("\t%s", serv2str[req->type]);
      else
-	dbg_log (_("\tinvalid request type %d"), req->type);
+	dbg_log ("\t%s", serv2str[req->type]);
    }

  /* Handle the request.  */
  switch (req->type)
    {
    case GETPWBYNAME:
-      addpwbyname (&dbs[serv2db[req->type]], fd, req, key, uid);
+      addpwbyname (db, fd, req, key, uid);
      break;

    case GETPWBYUID:
-      addpwbyuid (&dbs[serv2db[req->type]], fd, req, key, uid);
+      addpwbyuid (db, fd, req, key, uid);
      break;

    case GETGRBYNAME:
-      addgrbyname (&dbs[serv2db[req->type]], fd, req, key, uid);
+      addgrbyname (db, fd, req, key, uid);
      break;

    case GETGRBYGID:
-      addgrbygid (&dbs[serv2db[req->type]], fd, req, key, uid);
+      addgrbygid (db, fd, req, key, uid);
      break;

    case GETHOSTBYNAME:
-      addhstbyname (&dbs[serv2db[req->type]], fd, req, key, uid);
+      addhstbyname (db, fd, req, key, uid);
      break;

    case GETHOSTBYNAMEv6:
-      addhstbynamev6 (&dbs[serv2db[req->type]], fd, req, key, uid);
+      addhstbynamev6 (db, fd, req, key, uid);
      break;

    case GETHOSTBYADDR:
-      addhstbyaddr (&dbs[serv2db[req->type]], fd, req, key, uid);
+      addhstbyaddr (db, fd, req, key, uid);
      break;

    case GETHOSTBYADDRv6:
-      addhstbyaddrv6 (&dbs[serv2db[req->type]], fd, req, key, uid);
+      addhstbyaddrv6 (db, fd, req, key, uid);
      break;

    case GETSTAT:
@ -484,6 +748,7 @@ nscd_run (void *p)
 	      prune_cache (&dbs[my_number], time(NULL));
 	      now = time (NULL);
 	      next_prune = now + CACHE_PRUNE_INTERVAL;
+
 	      goto try_get;
 	    }
 	}
@ -538,7 +803,7 @@ nscd_run (void *p)
 	    }

 	  if (req.type < GETPWBYNAME || req.type > LASTDBREQ
-	      || secure[serv2db[req.type]])
+	      || serv2db[req.type]->secure)
 	    uid = caller.uid;

 	  pid = caller.pid;
@ -646,9 +911,7 @@ start_threads (void)
 static void
 begin_drop_privileges (void)
 {
-  struct passwd *pwd;
-
-  pwd = getpwnam (server_user);
+  struct passwd *pwd = getpwnam (server_user);

  if (pwd == NULL)
    {
@ -660,15 +923,15 @@ begin_drop_privileges (void)
  server_uid = pwd->pw_uid;
  server_gid = pwd->pw_gid;

+  if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
+    {
+      /* This really must never happen.  */
+      dbg_log (_("Failed to run nscd as user '%s'"), server_user);
+      error (EXIT_FAILURE, errno, _("initial getgrouplist failed"));
+    }
+
  server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));

-  if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
-      == 0)
-    return;
-
-  server_groups = (gid_t *) xrealloc (server_groups,
-				      server_ngroups * sizeof (gid_t));
-
  if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
      == -1)
    {
--- a/nscd/grpcache.c
+++ b/nscd/grpcache.c
@ -19,9 +19,11 @@
   02111-1307 USA.  */

 #include <alloca.h>
+#include <assert.h>
 #include <errno.h>
 #include <error.h>
 #include <grp.h>
+#include <libintl.h>
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdio.h>
@ -29,7 +31,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
-#include <libintl.h>
+#include <sys/mman.h>
 #include <stackinfo.h>

 #include "nscd.h"
@ -66,51 +68,87 @@ static const gr_response_header notfound =
 };


-struct groupdata
-{
-  gr_response_header resp;
-  char strdata[0];
-};
-
-
 static void
-cache_addgr (struct database *db, int fd, request_header *req, void *key,
-	     struct group *grp, uid_t owner, int type)
+cache_addgr (struct database_dyn *db, int fd, request_header *req,
+	     const void *key, struct group *grp, uid_t owner,
+	     struct hashentry *he, struct datahead *dh, int errval)
 {
  ssize_t total;
  ssize_t written;
  time_t t = time (NULL);

+  /* We allocate all data in one memory block: the iov vector,
+     the response header and the dataset itself.  */
+  struct dataset
+  {
+    struct datahead head;
+    gr_response_header resp;
+    char strdata[0];
+  } *dataset;
+
+  assert (offsetof (struct dataset, resp) == offsetof (struct datahead, data));
+
  if (grp == NULL)
    {
-      /* We have no data.  This means we send the standard reply for this
-	 case.  */
-      total = sizeof (notfound);
-
-      written = TEMP_FAILURE_RETRY (write (fd, &notfound, total));
-
-      void *copy = malloc (req->key_len);
-      /* If we cannot allocate memory simply do not cache the information.  */
-      if (copy != NULL)
+      if (he != NULL && errval == EAGAIN)
 	{
-	  memcpy (copy, key, req->key_len);
+	  /* If we have an old record available but cannot find one
+	     now because the service is not available we keep the old
+	     record and make sure it does not get removed.  */
+	  if (reload_count != UINT_MAX)
+	    /* Do not reset the value if we never not reload the record.  */
+	    dh->nreloads = reload_count - 1;

-	  /* Compute the timeout time.  */
-	  t += db->negtimeout;
+	  written = total = 0;
+	}
+      else
+	{
+	  /* We have no data.  This means we send the standard reply for this
+	     case.  */
+	  total = sizeof (notfound);

-	  /* Now get the lock to safely insert the records.  */
-	  pthread_rwlock_rdlock (&db->lock);
+	  written = TEMP_FAILURE_RETRY (write (fd, &notfound, total));

-	  cache_add (req->type, copy, req->key_len, &notfound,
-		     sizeof (notfound), (void *) -1, 0, t, db, owner);
+	  dataset = mempool_alloc (db, sizeof (struct dataset) + req->key_len);
+	  /* If we cannot permanently store the result, so be it.  */
+	  if (dataset != NULL)
+	    {
+	      dataset->head.allocsize = sizeof (struct dataset) + req->key_len;
+	      dataset->head.recsize = total;
+	      dataset->head.notfound = true;
+	      dataset->head.nreloads = 0;
+	      dataset->head.usable = true;

-	  pthread_rwlock_unlock (&db->lock);
+	      /* Compute the timeout time.  */
+	      dataset->head.timeout = t + db->negtimeout;
+
+	      /* This is the reply.  */
+	      memcpy (&dataset->resp, &notfound, total);
+
+	      /* Copy the key data.  */
+	      memcpy (dataset->strdata, key, req->key_len);
+
+	      /* Now get the lock to safely insert the records.  */
+	      pthread_rwlock_rdlock (&db->lock);
+
+	      if (cache_add (req->type, &dataset->strdata, req->key_len,
+			     &dataset->head, true, db, owner) < 0)
+		/* Ensure the data can be recovered.  */
+		dataset->head.usable = false;
+
+	      pthread_rwlock_unlock (&db->lock);
+
+	      /* Mark the old entry as obsolete.  */
+	      if (dh != NULL)
+		dh->usable = false;
+	    }
+	  else
+	    ++db->head->addfailed;
 	}
    }
  else
    {
      /* Determine the I/O structure.  */
-      struct groupdata *data;
      size_t gr_name_len = strlen (grp->gr_name) + 1;
      size_t gr_passwd_len = strlen (grp->gr_passwd) + 1;
      size_t gr_mem_cnt = 0;
@ -118,12 +156,16 @@ cache_addgr (struct database *db, int fd, request_header *req, void *key,
      size_t gr_mem_len_total = 0;
      char *gr_name;
      char *cp;
-      char buf[12];
+      const size_t key_len = strlen (key);
+      const size_t buf_len = 3 + sizeof (grp->gr_gid) + key_len + 1;
+      char *buf = alloca (buf_len);
      ssize_t n;
      size_t cnt;

      /* We need this to insert the `bygid' entry.  */
-      n = snprintf (buf, sizeof (buf), "%d", grp->gr_gid) + 1;
+      int key_offset;
+      n = snprintf (buf, buf_len, "%d%c%n%s", grp->gr_gid, '\0',
+		    &key_offset, (char *) key) + 1;

      /* Determine the length of all members.  */
      while (grp->gr_mem[gr_mem_cnt])
@ -135,24 +177,52 @@ cache_addgr (struct database *db, int fd, request_header *req, void *key,
 	  gr_mem_len_total += gr_mem_len[gr_mem_cnt];
 	}

-      /* We allocate all data in one memory block: the iov vector,
-	 the response header and the dataset itself.  */
-      total = (sizeof (struct groupdata)
-	       + gr_mem_cnt * sizeof (uint32_t)
-	       + gr_name_len + gr_passwd_len + gr_mem_len_total);
-      data = (struct groupdata *) malloc (total + n + req->key_len);
-      if (data == NULL)
-	/* There is no reason to go on.  */
-	error (EXIT_FAILURE, errno, _("while allocating cache entry"));
+      written = total = (sizeof (struct dataset)
+			 + gr_mem_cnt * sizeof (uint32_t)
+			 + gr_name_len + gr_passwd_len + gr_mem_len_total);

-      data->resp.version = NSCD_VERSION;
-      data->resp.found = 1;
-      data->resp.gr_name_len = gr_name_len;
-      data->resp.gr_passwd_len = gr_passwd_len;
-      data->resp.gr_gid = grp->gr_gid;
-      data->resp.gr_mem_cnt = gr_mem_cnt;
+      /* If we refill the cache, first assume the reconrd did not
+	 change.  Allocate memory on the cache since it is likely
+	 discarded anyway.  If it turns out to be necessary to have a
+	 new record we can still allocate real memory.  */
+      bool alloca_used = false;
+      dataset = NULL;

-      cp = data->strdata;
+      if (he == NULL)
+	{
+	  dataset = (struct dataset *) mempool_alloc (db, total + n);
+	  if (dataset == NULL)
+	    ++db->head->addfailed;
+	}
+
+      if (dataset == NULL)
+	{
+	  /* We cannot permanently add the result in the moment.  But
+	     we can provide the result as is.  Store the data in some
+	     temporary memory.  */
+	  dataset = (struct dataset *) alloca (total + n);
+
+	  /* We cannot add this record to the permanent database.  */
+	  alloca_used = true;
+	}
+
+      dataset->head.allocsize = total + n;
+      dataset->head.recsize = total - offsetof (struct dataset, resp);
+      dataset->head.notfound = false;
+      dataset->head.nreloads = he == NULL ? 0 : (dh->nreloads + 1);
+      dataset->head.usable = true;
+
+      /* Compute the timeout time.  */
+      dataset->head.timeout = t + db->postimeout;
+
+      dataset->resp.version = NSCD_VERSION;
+      dataset->resp.found = 1;
+      dataset->resp.gr_name_len = gr_name_len;
+      dataset->resp.gr_passwd_len = gr_passwd_len;
+      dataset->resp.gr_gid = grp->gr_gid;
+      dataset->resp.gr_mem_cnt = gr_mem_cnt;
+
+      cp = dataset->strdata;

      /* This is the member string length array.  */
      cp = mempcpy (cp, gr_mem_len, gr_mem_cnt * sizeof (uint32_t));
@ -163,33 +233,120 @@ cache_addgr (struct database *db, int fd, request_header *req, void *key,
      for (cnt = 0; cnt < gr_mem_cnt; ++cnt)
 	cp = mempcpy (cp, grp->gr_mem[cnt], gr_mem_len[cnt]);

-      /* Next the stringified GID value.  */
+      /* Finally the stringified GID value.  */
      memcpy (cp, buf, n);
+      char *key_copy = cp + key_offset;
+      assert (key_copy == (char *) rawmemchr (cp, '\0') + 1);

-      /* Copy of the key in case it differs.  */
-      char *key_copy = memcpy (cp + n, key, req->key_len);
+      /* Now we can determine whether on refill we have to create a new
+	 record or not.  */
+      if (he != NULL)
+	{
+	  assert (fd == -1);

-      /* Write the result.  */
-      written = TEMP_FAILURE_RETRY (write (fd, &data->resp, total));
+	  if (total + n == dh->allocsize
+	      && total - offsetof (struct dataset, resp) == dh->recsize
+	      && memcmp (&dataset->resp, dh->data,
+			 dh->allocsize - offsetof (struct dataset, resp)) == 0)
+	    {
+	      /* The data has not changed.  We will just bump the
+		 timeout value.  Note that the new record has been
+		 allocated on the stack and need not be freed.  */
+	      dh->timeout = dataset->head.timeout;
+	      ++dh->nreloads;
+	    }
+	  else
+	    {
+	      /* We have to create a new record.  Just allocate
+		 appropriate memory and copy it.  */
+	      struct dataset *newp
+		= (struct dataset *) mempool_alloc (db, total + n);
+	      if (newp != NULL)
+		{
+		  /* Adjust pointers into the memory block.  */
+		  gr_name = (char *) newp + (gr_name - (char *) dataset);
+		  cp = (char *) newp + (cp - (char *) dataset);

-      /* Compute the timeout time.  */
-      t += db->postimeout;
+		  dataset = memcpy (newp, dataset, total + n);
+		  alloca_used = false;
+		}

-      /* Now get the lock to safely insert the records.  */
-      pthread_rwlock_rdlock (&db->lock);
+	      /* Mark the old record as obsolete.  */
+	      dh->usable = false;
+	    }
+	}
+      else
+	{
+	  /* We write the dataset before inserting it to the database
+	     since while inserting this thread might block and so would
+	     unnecessarily let the receiver wait.  */
+	  assert (fd != -1);

-      /* We have to add the value for both, byname and byuid.  */
-      cache_add (GETGRBYNAME, gr_name, gr_name_len, data,
-		 total, data, 0, t, db, owner);
+	  written = TEMP_FAILURE_RETRY (write (fd, &dataset->resp, total));
+	}

-      /* If the key is different from the name add a separate entry.  */
-      if (type == GETGRBYNAME && strcmp (key_copy, gr_name) != 0)
-	cache_add (GETGRBYNAME, key_copy, req->key_len, data,
-		   total, data, 0, t, db, owner);
+      /* Add the record to the database.  But only if it has not been
+	 stored on the stack.  */
+      if (! alloca_used)
+	{
+	  /* If necessary, we also propagate the data to disk.  */
+	  if (db->persistent)
+	    // XXX async OK?
+	    msync (dataset, total + n, MS_ASYNC);

-      cache_add (GETGRBYGID, cp, n, data, total, data, 1, t, db, owner);
+	  /* Now get the lock to safely insert the records.  */
+	  pthread_rwlock_rdlock (&db->lock);

-      pthread_rwlock_unlock (&db->lock);
+	  /* NB: in the following code we always must add the entry
+	     marked with FIRST first.  Otherwise we end up with
+	     dangling "pointers" in case a latter hash entry cannot be
+	     added.  */
+	  bool first = req->type == GETGRBYNAME;
+
+	  /* If the request was by GID, add that entry first.  */
+	  if (req->type != GETGRBYNAME)
+	    {
+	      if (cache_add (GETGRBYGID, cp, n, &dataset->head, true, db,
+			     owner) < 0)
+		{
+		  /* Could not allocate memory.  Make sure the data gets
+		     discarded.  */
+		  dataset->head.usable = false;
+		  goto out;
+		}
+	    }
+	  /* If the key is different from the name add a separate entry.  */
+	  else if (strcmp (key_copy, gr_name) != 0)
+	    {
+	      if (cache_add (GETGRBYNAME, key_copy, key_len + 1,
+			     &dataset->head, first, db, owner) < 0)
+		{
+		  /* Could not allocate memory.  Make sure the data gets
+		     discarded.  */
+		  dataset->head.usable = false;
+		  goto out;
+		}
+
+	      first = false;
+	    }
+
+	  /* We have to add the value for both, byname and byuid.  */
+	  if (__builtin_expect (cache_add (GETGRBYNAME, gr_name, gr_name_len,
+					   &dataset->head, first, db, owner)
+				== 0, 1))
+	    {
+	      if (req->type == GETGRBYNAME)
+		(void) cache_add (GETGRBYGID, cp, n, &dataset->head,
+				  req->type != GETGRBYNAME, db, owner);
+	    }
+	  else if (first)
+	    /* Could not allocate memory.  Make sure the data gets
+	       discarded.  */
+	    dataset->head.usable = false;
+
+	out:
+	  pthread_rwlock_unlock (&db->lock);
+	}
    }

  if (__builtin_expect (written != total, 0) && debug_level > 0)
@ -201,32 +358,57 @@ cache_addgr (struct database *db, int fd, request_header *req, void *key,
 }


-void
-addgrbyname (struct database *db, int fd, request_header *req,
-	     void *key, uid_t uid)
+union keytype
+{
+  void *v;
+  gid_t g;
+};
+
+
+static int
+lookup (int type, union keytype key, struct group *resultbufp, char *buffer,
+	size_t buflen, struct group **grp)
+{
+  if (type == GETGRBYNAME)
+    return __getgrnam_r (key.v, resultbufp, buffer, buflen, grp);
+  else
+    return __getgrgid_r (key.g, resultbufp, buffer, buflen, grp);
+}
+
+
+static void
+addgrbyX (struct database_dyn *db, int fd, request_header *req,
+	  union keytype key, const char *keystr, uid_t uid,
+	  struct hashentry *he, struct datahead *dh)
 {
  /* Search for the entry matching the key.  Please note that we don't
     look again in the table whether the dataset is now available.  We
     simply insert it.  It does not matter if it is in there twice.  The
     pruning function only will look at the timestamp.  */
-  int buflen = 1024;
+  size_t buflen = 1024;
  char *buffer = (char *) alloca (buflen);
  struct group resultbuf;
  struct group *grp;
  uid_t oldeuid = 0;
  bool use_malloc = false;
+  int errval = 0;

  if (__builtin_expect (debug_level > 0, 0))
-    dbg_log (_("Haven't found \"%s\" in group cache!"), (char *) key);
+    {
+      if (he == NULL)
+	dbg_log (_("Haven't found \"%s\" in group cache!"), keystr);
+      else
+	dbg_log (_("Reloading \"%s\" in group cache!"), keystr);
+    }

-  if (secure[grpdb])
+  if (db->secure)
    {
      oldeuid = geteuid ();
      seteuid (uid);
    }

-  while (__getgrnam_r (key, &resultbuf, buffer, buflen, &grp) != 0
-	 && errno == ERANGE)
+  while (lookup (req->type, key, &resultbuf, buffer, buflen, &grp) != 0
+	 && (errval = errno) == ERANGE)
    {
      char *old_buffer = buffer;
      errno = 0;
@ -243,6 +425,11 @@ addgrbyname (struct database *db, int fd, request_header *req,
 		 never happen.  */
 	      grp = NULL;
 	      buffer = old_buffer;
+
+	      /* We set the error to indicate this is (possibly) a
+		 temporary error and that it does not mean the entry
+		 is not available at all.  */
+	      errval = EAGAIN;
 	      break;
 	    }
 	  use_malloc = true;
@ -253,10 +440,10 @@ addgrbyname (struct database *db, int fd, request_header *req,
 	buffer = (char *) extend_alloca (buffer, buflen, buflen + INCR);
    }

-  if (secure[grpdb])
+  if (db->secure)
    seteuid (oldeuid);

-  cache_addgr (db, fd, req, key, grp, uid, GETGRBYNAME);
+  cache_addgr (db, fd, req, keystr, grp, uid, he, dh, errval);

  if (use_malloc)
    free (buffer);
@ -264,23 +451,38 @@ addgrbyname (struct database *db, int fd, request_header *req,


 void
-addgrbygid (struct database *db, int fd, request_header *req,
+addgrbyname (struct database_dyn *db, int fd, request_header *req,
+	     void *key, uid_t uid)
+{
+  union keytype u = { .v = key };
+
+  addgrbyX (db, fd, req, u, key, uid, NULL, NULL);
+}
+
+
+void
+readdgrbyname (struct database_dyn *db, struct hashentry *he,
+	       struct datahead *dh)
+{
+  request_header req =
+    {
+      .type = GETGRBYNAME,
+      .key_len = he->len
+    };
+  union keytype u = { .v = db->data + he->key };
+
+  addgrbyX (db, -1, &req, u, db->data + he->key, he->owner, he, dh);
+}
+
+
+void
+addgrbygid (struct database_dyn *db, int fd, request_header *req,
 	    void *key, uid_t uid)
 {
-  /* Search for the entry matching the key.  Please note that we don't
-     look again in the table whether the dataset is now available.  We
-     simply insert it.  It does not matter if it is in there twice.  The
-     pruning function only will look at the timestamp.  */
-  int buflen = 1024;
-  char *buffer = (char *) alloca (buflen);
-  struct group resultbuf;
-  struct group *grp;
-  uid_t oldeuid = 0;
  char *ep;
-  gid_t gid = strtoul ((char *)key, &ep, 10);
-  bool use_malloc = false;
+  gid_t gid = strtoul ((char *) key, &ep, 10);

-  if (*(char *) key == '\0' || *ep != '\0')  /* invalid numeric gid */
+  if (*(char *) key == '\0' || *ep != '\0')  /* invalid numeric uid */
    {
      if (debug_level > 0)
        dbg_log (_("Invalid numeric gid \"%s\"!"), (char *) key);
@ -289,47 +491,28 @@ addgrbygid (struct database *db, int fd, request_header *req,
      return;
    }

-  if (__builtin_expect (debug_level > 0, 0))
-    dbg_log (_("Haven't found \"%d\" in group cache!"), gid);
+  union keytype u = { .g = gid };

-  if (secure[grpdb])
-    {
-      oldeuid = geteuid ();
-      seteuid (uid);
-    }
-
-  while (__getgrgid_r (gid, &resultbuf, buffer, buflen, &grp) != 0
-	 && errno == ERANGE)
-    {
-      char *old_buffer = buffer;
-      errno = 0;
-
-      if (__builtin_expect (buflen > 32768, 0))
-	{
-	  buflen += INCR;
-	  buffer = (char *) realloc (use_malloc ? buffer : NULL, buflen);
-	  if (buffer == NULL)
-	    {
-	      /* We ran out of memory.  We cannot do anything but
-		 sending a negative response.  In reality this should
-		 never happen.  */
-	      grp = NULL;
-	      buffer = old_buffer;
-	      break;
-	    }
-	  use_malloc = true;
-	}
-      else
-	/* Allocate a new buffer on the stack.  If possible combine it
-	   with the previously allocated buffer.  */
-	buffer = (char *) extend_alloca (buffer, buflen, buflen + INCR);
-    }
-
-  if (secure[grpdb])
-    seteuid (oldeuid);
-
-  cache_addgr (db, fd, req, key, grp, uid, GETGRBYGID);
-
-  if (use_malloc)
-    free (buffer);
+  addgrbyX (db, fd, req, u, key, uid, NULL, NULL);
+}
+
+
+void
+readdgrbygid (struct database_dyn *db, struct hashentry *he,
+	      struct datahead *dh)
+{
+  char *ep;
+  gid_t gid = strtoul (db->data + he->key, &ep, 10);
+
+  /* Since the key has been added before it must be OK.  */
+  assert (*(db->data + he->key) != '\0' && *ep == '\0');
+
+  request_header req =
+    {
+      .type = GETGRBYGID,
+      .key_len = he->len
+    };
+  union keytype u = { .g = gid };
+
+  addgrbyX (db, -1, &req, u, db->data + he->key, he->owner, he, dh);
 }
--- a/nscd/hstcache.c
+++ b/nscd/hstcache.c
@ -22,6 +22,7 @@
 #include <assert.h>
 #include <errno.h>
 #include <error.h>
+#include <libintl.h>
 #include <netdb.h>
 #include <stdbool.h>
 #include <stddef.h>
@ -30,9 +31,9 @@
 #include <string.h>
 #include <time.h>
 #include <unistd.h>
-#include <libintl.h>
 #include <arpa/inet.h>
 #include <arpa/nameser.h>
+#include <sys/mman.h>
 #include <stackinfo.h>

 #include "nscd.h"
@ -74,51 +75,88 @@ static const hst_response_header notfound =
 };


-struct hostdata
-{
-  hst_response_header resp;
-  char strdata[0];
-};
-
-
 static void
-cache_addhst (struct database *db, int fd, request_header *req, void *key,
-	      struct hostent *hst, uid_t owner, int add_addr)
+cache_addhst (struct database_dyn *db, int fd, request_header *req,
+	      const void *key, struct hostent *hst, uid_t owner, int add_addr,
+	      struct hashentry *he, struct datahead *dh, int errval)
 {
  ssize_t total;
  ssize_t written;
  time_t t = time (NULL);

+  /* We allocate all data in one memory block: the iov vector,
+     the response header and the dataset itself.  */
+  struct dataset
+  {
+    struct datahead head;
+    hst_response_header resp;
+    char strdata[0];
+  } *dataset;
+
+  assert (offsetof (struct dataset, resp) == offsetof (struct datahead, data));
+
  if (hst == NULL)
    {
-      /* We have no data.  This means we send the standard reply for this
-	 case.  */
-      total = sizeof (notfound);
-
-      written = TEMP_FAILURE_RETRY (write (fd, &notfound, total));
-
-      void *copy = malloc (req->key_len);
-      /* If we cannot allocate memory simply do not cache the information.  */
-      if (copy != NULL)
+      if (he != NULL && errval == EAGAIN)
 	{
-	  memcpy (copy, key, req->key_len);
+	  /* If we have an old record available but cannot find one
+	     now because the service is not available we keep the old
+	     record and make sure it does not get removed.  */
+	  if (reload_count != UINT_MAX)
+	    /* Do not reset the value if we never not reload the record.  */
+	    dh->nreloads = reload_count - 1;

-	  /* Compute the timeout time.  */
-	  t += db->negtimeout;
+	  written = total = 0;
+	}
+      else
+	{
+	  /* We have no data.  This means we send the standard reply for this
+	     case.  */
+	  written = total = sizeof (notfound);

-	  /* Now get the lock to safely insert the records.  */
-	  pthread_rwlock_rdlock (&db->lock);
+	  if (fd != -1)
+	    written = TEMP_FAILURE_RETRY (write (fd, &notfound, total));

-	  cache_add (req->type, copy, req->key_len, &notfound,
-		     sizeof (notfound), (void *) -1, 0, t, db, owner);
+	  dataset = mempool_alloc (db, sizeof (struct dataset) + req->key_len);
+	  /* If we cannot permanently store the result, so be it.  */
+	  if (dataset != NULL)
+	    {
+	      dataset->head.allocsize = sizeof (struct dataset) + req->key_len;
+	      dataset->head.recsize = total;
+	      dataset->head.notfound = true;
+	      dataset->head.nreloads = 0;
+	      dataset->head.usable = true;

-	  pthread_rwlock_unlock (&db->lock);
+	      /* Compute the timeout time.  */
+	      dataset->head.timeout = t + db->negtimeout;
+
+	      /* This is the reply.  */
+	      memcpy (&dataset->resp, &notfound, total);
+
+	      /* Copy the key data.  */
+	      memcpy (dataset->strdata, key, req->key_len);
+
+	      /* Now get the lock to safely insert the records.  */
+	      pthread_rwlock_rdlock (&db->lock);
+
+	      if (cache_add (req->type, &dataset->strdata, req->key_len,
+			     &dataset->head, true, db, owner) < 0)
+		/* Ensure the data can be recovered.  */
+		dataset->head.usable = false;
+
+	      pthread_rwlock_unlock (&db->lock);
+
+	      /* Mark the old entry as obsolete.  */
+	      if (dh != NULL)
+		dh->usable = false;
+	    }
+	  else
+	    ++db->head->addfailed;
 	}
    }
  else
    {
      /* Determine the I/O structure.  */
-      struct hostdata *data;
      size_t h_name_len = strlen (hst->h_name) + 1;
      size_t h_aliases_cnt;
      uint32_t *h_aliases_len;
@ -148,28 +186,66 @@ cache_addhst (struct database *db, int fd, request_header *req, void *key,
      for (cnt = 0; hst->h_addr_list[cnt]; ++cnt)
 	++h_addr_list_cnt;

-      /* We allocate all data in one memory block: the iov vector,
-	 the response header and the dataset itself.  */
-      total += (sizeof (struct hostdata)
+      if (h_addr_list_cnt == 0)
+	/* Invalid entry.  */
+	return;
+
+      total += (sizeof (struct dataset)
 		+ h_name_len
 		+ h_aliases_cnt * sizeof (uint32_t)
 		+ h_addr_list_cnt * hst->h_length);
+      written = total;

-      data = (struct hostdata *) malloc (total + req->key_len);
-      if (data == NULL)
-	/* There is no reason to go on.  */
-	error (EXIT_FAILURE, errno, _("while allocating cache entry"));
+      /* If we refill the cache, first assume the reconrd did not
+	 change.  Allocate memory on the cache since it is likely
+	 discarded anyway.  If it turns out to be necessary to have a
+	 new record we can still allocate real memory.  */
+      bool alloca_used = false;
+      dataset = NULL;

-      data->resp.version = NSCD_VERSION;
-      data->resp.found = 1;
-      data->resp.h_name_len = h_name_len;
-      data->resp.h_aliases_cnt = h_aliases_cnt;
-      data->resp.h_addrtype = hst->h_addrtype;
-      data->resp.h_length = hst->h_length;
-      data->resp.h_addr_list_cnt = h_addr_list_cnt;
-      data->resp.error = NETDB_SUCCESS;
+      /* If the record contains more than one IP address (used for
+	 load balancing etc) don't cache the entry.  This is something
+	 the current cache handling cannot handle and it is more than
+	 questionable whether it is worthwhile complicating the cache
+	 handling just for handling such a special case. */
+      if (he == NULL && (add_addr || hst->h_addr_list[1] == NULL))
+	{
+	  dataset = (struct dataset *) mempool_alloc (db,
+						      total + req->key_len);
+	  if (dataset == NULL)
+	    ++db->head->addfailed;
+	}

-      cp = data->strdata;
+      if (dataset == NULL)
+	{
+	  /* We cannot permanently add the result in the moment.  But
+	     we can provide the result as is.  Store the data in some
+	     temporary memory.  */
+	  dataset = (struct dataset *) alloca (total + req->key_len);
+
+	  /* We cannot add this record to the permanent database.  */
+	  alloca_used = true;
+	}
+
+      dataset->head.allocsize = total + req->key_len;
+      dataset->head.recsize = total - offsetof (struct dataset, resp);
+      dataset->head.notfound = false;
+      dataset->head.nreloads = he == NULL ? 0 : (dh->nreloads + 1);
+      dataset->head.usable = true;
+
+      /* Compute the timeout time.  */
+      dataset->head.timeout = t + db->postimeout;
+
+      dataset->resp.version = NSCD_VERSION;
+      dataset->resp.found = 1;
+      dataset->resp.h_name_len = h_name_len;
+      dataset->resp.h_aliases_cnt = h_aliases_cnt;
+      dataset->resp.h_addrtype = hst->h_addrtype;
+      dataset->resp.h_length = hst->h_length;
+      dataset->resp.h_addr_list_cnt = h_addr_list_cnt;
+      dataset->resp.error = NETDB_SUCCESS;
+
+      cp = dataset->strdata;

      cp = mempcpy (cp, hst->h_name, h_name_len);
      cp = mempcpy (cp, h_aliases_len, h_aliases_cnt * sizeof (uint32_t));
@ -184,7 +260,9 @@ cache_addhst (struct database *db, int fd, request_header *req, void *key,
      for (cnt = 0; cnt < h_aliases_cnt; ++cnt)
 	cp = mempcpy (cp, hst->h_aliases[cnt], h_aliases_len[cnt]);

-      assert (cp == data->strdata + total - sizeof (hst_response_header));
+      assert (cp
+	      == dataset->strdata + total - offsetof (struct dataset,
+						      strdata));

      /* If we are adding a GETHOSTBYNAME{,v6} entry we must be prepared
 	 that the answer we get from the NSS does not contain the key
@ -193,90 +271,221 @@ cache_addhst (struct database *db, int fd, request_header *req, void *key,
 	 we explicitly add the name here.  */
      if (req->type == GETHOSTBYNAME || req->type == GETHOSTBYNAMEv6)
 	key_copy = memcpy (cp, key, req->key_len);
+      else
+	memset (cp, '\0', req->key_len);

-      /* We write the dataset before inserting it to the database
-	 since while inserting this thread might block and so would
-	 unnecessarily let the receiver wait.  */
-      written = TEMP_FAILURE_RETRY (write (fd, data, total));
-
-      /* If the record contains more than one IP address (used for
-         load balancing etc) don't cache the entry.  This is something
-         the current cache handling cannot handle and it is more than
-         questionable whether it is worthwhile complicating the cache
-         handling just for handling such a special case.  */
-      if (!add_addr && hst->h_addr_list[1] != NULL)
+      /* Now we can determine whether on refill we have to create a new
+	 record or not.  */
+      if (he != NULL)
 	{
-	  free (data);
-	  return;
+	  assert (fd == -1);
+
+	  if (total + req->key_len == dh->allocsize
+	      && total - offsetof (struct dataset, resp) == dh->recsize
+	      && memcmp (&dataset->resp, dh->data,
+			 dh->allocsize - offsetof (struct dataset, resp)) == 0)
+	    {
+	      /* The sata has not changed.  We will just bump the
+		 timeout value.  Note that the new record has been
+		 allocated on the stack and need not be freed.  */
+	      dh->timeout = dataset->head.timeout;
+	      ++dh->nreloads;
+	    }
+	  else
+	    {
+	      /* We have to create a new record.  Just allocate
+		 appropriate memory and copy it.  */
+	      struct dataset *newp
+		= (struct dataset *) mempool_alloc (db, total + req->key_len);
+	      if (newp != NULL)
+		{
+		  /* Adjust pointers into the memory block.  */
+		  addresses = (char *) newp + (addresses - (char *) dataset);
+		  aliases = (char *) newp + (aliases - (char *) dataset);
+		  if (key_copy != NULL)
+		    key_copy = (char *) newp + (key_copy - (char *) dataset);
+
+		  dataset = memcpy (newp, dataset, total + req->key_len);
+		  alloca_used = false;
+		}
+
+	      /* Mark the old record as obsolete.  */
+	      dh->usable = false;
+	    }
+	}
+      else
+	{
+	  /* We write the dataset before inserting it to the database
+	     since while inserting this thread might block and so would
+	     unnecessarily keep the receiver waiting.  */
+	  assert (fd != -1);
+
+	  written = TEMP_FAILURE_RETRY (write (fd, &dataset->resp, total));
 	}

-      addr_list_type = (hst->h_length == NS_INADDRSZ
-			? GETHOSTBYADDR : GETHOSTBYADDRv6);
+      /* Add the record to the database.  But only if it has not been
+	 stored on the stack.

-      /* Compute the timeout time.  */
-      t += db->postimeout;
-
-      /* Now get the lock to safely insert the records.  */
-      pthread_rwlock_rdlock (&db->lock);
-
-      /* First add all the aliases.  */
-      assert (add_addr || hst->h_addr_list[1] == NULL);
-      if (!add_addr)
-	for (cnt = 0; cnt < h_aliases_cnt; ++cnt)
-	  {
-	    if (addr_list_type == GETHOSTBYADDR)
-	      cache_add (GETHOSTBYNAME, aliases, h_aliases_len[cnt], data,
-			 total, data, 0, t, db, owner);
-
-	    cache_add (GETHOSTBYNAMEv6, aliases, h_aliases_len[cnt], data,
-		       total, data, 0, t, db, owner);
-
-	    aliases += h_aliases_len[cnt];
-	  }
-
-      /* Next the normal addresses.  */
-      if (add_addr)
-	for (cnt = 0; cnt < h_addr_list_cnt; ++cnt)
-	  {
-	    cache_add (addr_list_type, addresses, hst->h_length, data, total,
-		       data, cnt + 1 == h_addr_list_cnt, t, db, owner);
-	    addresses += hst->h_length;
-	  }
-
-      /* If necessary the IPv6 addresses.  */
-      if (add_addr && addr_list_type == GETHOSTBYADDR)
-	for (cnt = 0; cnt < h_addr_list_cnt; ++cnt)
-	  {
-	    cache_add (GETHOSTBYADDRv6, addresses, IN6ADDRSZ, data, total,
-		       data, 0, t, db, owner);
-	    addresses += IN6ADDRSZ;
-	  }
-
-      /* Avoid adding names if more than one address is available.  See
-	 above for more info.  */
-      if (!add_addr)
+	 If the record contains more than one IP address (used for
+	 load balancing etc) don't cache the entry.  This is something
+	 the current cache handling cannot handle and it is more than
+	 questionable whether it is worthwhile complicating the cache
+	 handling just for handling such a special case. */
+      if (! alloca_used)
 	{
-	  /* If necessary add the key for this request.
+	  /* If necessary, we also propagate the data to disk.  */
+	  if (db->persistent)
+	    // XXX async OK?
+	    msync (dataset, total + req->key_len, MS_ASYNC);

-	     Note: hst->h_addr_list[1] == NULL.  */
-	  if (req->type == GETHOSTBYNAME || req->type == GETHOSTBYNAMEv6)
+	  addr_list_type = (hst->h_length == NS_INADDRSZ
+			    ? GETHOSTBYADDR : GETHOSTBYADDRv6);
+
+	  /* Now get the lock to safely insert the records.  */
+	  pthread_rwlock_rdlock (&db->lock);
+
+	  /* NB: the following code is really complicated.  It has
+	     seemlingly duplicated code paths which do the same.  The
+	     problem is that we always must add the hash table entry
+	     with the FIRST flag set first.  Otherwise we get dangling
+	     pointers in case memory allocation fails.  */
+	  assert (add_addr || hst->h_addr_list[1] == NULL);
+
+	  /* Add the normal addresses.  */
+	  if (add_addr)
 	    {
+	      for (cnt = 0; cnt < h_addr_list_cnt; ++cnt)
+		{
+		  if (cache_add (addr_list_type, addresses, hst->h_length,
+				 &dataset->head, cnt == 0, db, owner) < 0)
+		    {
+		      /* Ensure the data can be recovered.  */
+		      if (cnt == 0)
+			dataset->head.usable = false;
+		      goto out;
+		    }
+		  addresses += hst->h_length;
+		}
+
+	      /* If necessary the IPv6 addresses.  */
 	      if (addr_list_type == GETHOSTBYADDR)
-		cache_add (GETHOSTBYNAME, key_copy, req->key_len, data, total,
-			   data, 0, t, db, owner);
-	      cache_add (GETHOSTBYNAMEv6, key_copy, req->key_len, data,
-			 total, data, 0, t, db, owner);
+		for (cnt = 0; cnt < h_addr_list_cnt; ++cnt)
+		  {
+		    if (cache_add (GETHOSTBYADDRv6, addresses, IN6ADDRSZ,
+				   &dataset->head, false, db, owner) < 0)
+		      goto out;
+
+		    addresses += IN6ADDRSZ;
+		  }
+	    }
+	  /* Avoid adding names if more than one address is available.  See
+	     above for more info.  */
+	  else
+	    {
+	      assert (req->type == GETHOSTBYNAME
+		      || req->type == GETHOSTBYNAMEv6
+		      || req->type == GETHOSTBYADDR
+		      || req->type == GETHOSTBYADDRv6);
+
+	      /* If necessary add the key for this request.  */
+	      if (req->type == GETHOSTBYNAME)
+		{
+		  bool first = true;
+		  if (addr_list_type == GETHOSTBYADDR)
+		    {
+		      if (cache_add (GETHOSTBYNAME, key_copy, req->key_len,
+				     &dataset->head, true, db, owner) < 0)
+			{
+			  /* Could not allocate memory.  Make sure the
+			     data gets discarded.  */
+			  dataset->head.usable = false;
+			  goto out;
+			}
+
+		      first = false;
+		    }
+		  if (cache_add (GETHOSTBYNAMEv6, key_copy, req->key_len,
+				 &dataset->head, first, db, owner) < 0)
+		    {
+		      /* Could not allocate memory.  Make sure the
+			 data gets discarded.  */
+		      if (first)
+			dataset->head.usable = false;
+		      goto out;
+		    }
+		}
+	      else if (req->type == GETHOSTBYNAMEv6)
+		{
+		  if (cache_add (GETHOSTBYNAMEv6, key_copy, req->key_len,
+				 &dataset->head, true, db, owner) < 0)
+		    {
+		      /* Could not allocate memory.  Make sure the
+			 data gets discarded.  */
+		      dataset->head.usable = false;
+		      goto out;
+		    }
+
+		  if (addr_list_type == GETHOSTBYADDR
+		      && cache_add (GETHOSTBYNAME, key_copy, req->key_len,
+				    &dataset->head, false, db, owner) < 0)
+		    goto out;
+		}
+
+	      /* And finally the name.  We mark this as the last entry.  */
+	      if (addr_list_type == GETHOSTBYADDR
+		  && req->type == GETHOSTBYADDR
+		  && cache_add (GETHOSTBYNAME, dataset->strdata, h_name_len,
+				&dataset->head, true, db, owner) < 0)
+		{
+		  /* Could not allocate memory.  Make sure the
+		     data gets discarded.  */
+		  dataset->head.usable = false;
+		  goto out;
+		}
+
+	      if (cache_add (GETHOSTBYNAMEv6, dataset->strdata,
+			     h_name_len, &dataset->head,
+			     ((req->type == GETHOSTBYADDR
+			       && addr_list_type != GETHOSTBYADDR)
+			      || req->type == GETHOSTBYADDRv6), db,
+			     owner) < 0)
+		{
+		  /* Could not allocate memory.  Make sure the
+		     data gets discarded.  */
+		  if ((req->type == GETHOSTBYADDR
+		       && addr_list_type != GETHOSTBYADDR)
+		      || req->type == GETHOSTBYADDRv6)
+		    dataset->head.usable = false;
+		  goto out;
+		}
+
+	      if (addr_list_type == GETHOSTBYADDR
+		  && req->type != GETHOSTBYADDR
+		  && cache_add (GETHOSTBYNAME, dataset->strdata, h_name_len,
+				&dataset->head, false, db, owner) < 0)
+		goto out;
+
+	      /* First add all the aliases.  */
+	      for (cnt = 0; cnt < h_aliases_cnt; ++cnt)
+		{
+		  if (addr_list_type == GETHOSTBYADDR)
+		    if (cache_add (GETHOSTBYNAME, aliases,
+				   h_aliases_len[cnt], &dataset->head,
+				   false, db, owner) < 0)
+		      break;
+
+		  if (cache_add (GETHOSTBYNAMEv6, aliases,
+				 h_aliases_len[cnt], &dataset->head,
+				 false, db, owner) < 0)
+		    break;
+
+		  aliases += h_aliases_len[cnt];
+		}
 	    }

-	  /* And finally the name.  We mark this as the last entry.  */
-	  if (addr_list_type == GETHOSTBYADDR)
-	    cache_add (GETHOSTBYNAME, data->strdata, h_name_len, data, total,
-		       data, 0, t, db, owner);
-	  cache_add (GETHOSTBYNAMEv6, data->strdata, h_name_len, data,
-		     total, data, 1, t, db, owner);
+	out:
+	  pthread_rwlock_unlock (&db->lock);
 	}
-
-      pthread_rwlock_unlock (&db->lock);
    }

  if (__builtin_expect (written != total, 0) && debug_level > 0)
@ -288,9 +497,28 @@ cache_addhst (struct database *db, int fd, request_header *req, void *key,
 }


-void
-addhstbyname (struct database *db, int fd, request_header *req,
-	      void *key, uid_t uid)
+static int
+lookup (int type, void *key, struct hostent *resultbufp, char *buffer,
+	size_t buflen, struct hostent **hst)
+{
+  if (type == GETHOSTBYNAME)
+    return __gethostbyname2_r (key, AF_INET, resultbufp, buffer, buflen, hst,
+			       &h_errno);
+  else if (type == GETHOSTBYNAMEv6)
+    return __gethostbyname2_r (key, AF_INET6, resultbufp, buffer, buflen, hst,
+			       &h_errno);
+  else if (type == GETHOSTBYADDR)
+    return __gethostbyaddr_r (key, NS_INADDRSZ, AF_INET, resultbufp, buffer,
+			      buflen, hst, &h_errno);
+  else
+    return __gethostbyaddr_r (key, NS_IN6ADDRSZ, AF_INET6, resultbufp, buffer,
+			      buflen, hst, &h_errno);
+}
+
+
+static void
+addhstbyX (struct database_dyn *db, int fd, request_header *req,
+	   void *key, uid_t uid, struct hashentry *he, struct datahead *dh)
 {
  /* Search for the entry matching the key.  Please note that we don't
     look again in the table whether the dataset is now available.  We
@ -302,20 +530,25 @@ addhstbyname (struct database *db, int fd, request_header *req,
  struct hostent *hst;
  uid_t oldeuid = 0;
  bool use_malloc = false;
+  int errval = 0;

  if (__builtin_expect (debug_level > 0, 0))
-    dbg_log (_("Haven't found \"%s\" in hosts cache!"), (char *) key);
+    {
+      if (he == NULL)
+	dbg_log (_("Haven't found \"%s\" in hosts cache!"), (char *) key);
+      else
+	dbg_log (_("Reloading \"%s\" in hosts cache!"), (char *) key);
+    }

-  if (secure[hstdb])
+  if (db->secure)
    {
      oldeuid = geteuid ();
      seteuid (uid);
    }

-  while (__gethostbyname2_r (key, AF_INET, &resultbuf, buffer, buflen,
-  			     &hst, &h_errno) != 0
+  while (lookup (req->type, key, &resultbuf, buffer, buflen, &hst) != 0
 	 && h_errno == NETDB_INTERNAL
-	 && errno == ERANGE)
+	 && (errval = errno) == ERANGE)
    {
      char *old_buffer = buffer;
      errno = 0;
@ -332,6 +565,11 @@ addhstbyname (struct database *db, int fd, request_header *req,
 		 never happen.  */
 	      hst = NULL;
 	      buffer = old_buffer;
+
+	      /* We set the error to indicate this is (possibly) a
+		 temporary error and that it does not mean the entry
+		 is not available at all.  */
+	      errval = EAGAIN;
 	      break;
 	    }
 	  use_malloc = true;
@ -342,10 +580,11 @@ addhstbyname (struct database *db, int fd, request_header *req,
 	buffer = (char *) extend_alloca (buffer, buflen, buflen + INCR);
    }

-  if (secure[hstdb])
+  if (db->secure)
    seteuid (oldeuid);

-  cache_addhst (db, fd, req, key, hst, uid, 0);
+  cache_addhst (db, fd, req, key, hst, uid, 0, he, dh,
+		h_errno == TRY_AGAIN ? errval : 0);

  if (use_malloc)
    free (buffer);
@ -353,197 +592,88 @@ addhstbyname (struct database *db, int fd, request_header *req,


 void
-addhstbyaddr (struct database *db, int fd, request_header *req,
+addhstbyname (struct database_dyn *db, int fd, request_header *req,
 	      void *key, uid_t uid)
 {
-  /* Search for the entry matching the key.  Please note that we don't
-     look again in the table whether the dataset is now available.  We
-     simply insert it.  It does not matter if it is in there twice.  The
-     pruning function only will look at the timestamp.  */
-  int buflen = 1024;
-  char *buffer = (char *) alloca (buflen);
-  struct hostent resultbuf;
-  struct hostent *hst;
-  uid_t oldeuid = 0;
-  bool use_malloc = false;
-
-  if (__builtin_expect (debug_level > 0, 0))
-    {
-      char buf[INET_ADDRSTRLEN];
-      dbg_log (_("Haven't found \"%s\" in hosts cache!"),
-	       inet_ntop (AF_INET, key, buf, sizeof (buf)));
-    }
-
-  if (secure[hstdb])
-    {
-      oldeuid = geteuid ();
-      seteuid (uid);
-    }
-
-  while (__gethostbyaddr_r (key, NS_INADDRSZ, AF_INET, &resultbuf, buffer,
-  			    buflen, &hst, &h_errno) != 0
-	 && h_errno == NETDB_INTERNAL
-	 && errno == ERANGE)
-    {
-      char *old_buffer = buffer;
-      errno = 0;
-
-      if (__builtin_expect (buflen > 32768, 0))
-	{
-	  buflen += INCR;
-	  buffer = (char *) realloc (use_malloc ? buffer : NULL, buflen);
-	  if (buffer == NULL)
-	    {
-	      /* We ran out of memory.  We cannot do anything but
-		 sending a negative response.  In reality this should
-		 never happen.  */
-	      hst = NULL;
-	      buffer = old_buffer;
-	      break;
-	    }
-	  use_malloc = true;
-	}
-      else
-	/* Allocate a new buffer on the stack.  If possible combine it
-	   with the previously allocated buffer.  */
-	buffer = (char *) extend_alloca (buffer, buflen, buflen + INCR);
-    }
-
-  if (secure[hstdb])
-    seteuid (oldeuid);
-
-  cache_addhst (db, fd, req, key, hst, uid, 1);
-
-  if (use_malloc)
-    free (buffer);
+  addhstbyX (db, fd, req, key, uid, NULL, NULL);
 }


 void
-addhstbynamev6 (struct database *db, int fd, request_header *req,
-		void *key, uid_t uid)
+readdhstbyname (struct database_dyn *db, struct hashentry *he,
+		struct datahead *dh)
 {
-  /* Search for the entry matching the key.  Please note that we don't
-     look again in the table whether the dataset is now available.  We
-     simply insert it.  It does not matter if it is in there twice.  The
-     pruning function only will look at the timestamp.  */
-  int buflen = 1024;
-  char *buffer = (char *) alloca (buflen);
-  struct hostent resultbuf;
-  struct hostent *hst;
-  uid_t oldeuid = 0;
-  bool use_malloc = false;
-
-  if (__builtin_expect (debug_level > 0, 0))
-    dbg_log (_("Haven't found \"%s\" in hosts cache!"), (char *) key);
-
-  if (secure[hstdb])
+  request_header req =
    {
-      oldeuid = geteuid ();
-      seteuid (uid);
-    }
+      .type = GETHOSTBYNAME,
+      .key_len = he->len
+    };

-  while (__gethostbyname2_r (key, AF_INET6, &resultbuf, buffer, buflen,
-  			     &hst, &h_errno) != 0
-	 && h_errno == NETDB_INTERNAL
-	 && errno == ERANGE)
-    {
-      char *old_buffer = buffer;
-      errno = 0;
-
-      if (__builtin_expect (buflen > 32768, 0))
-	{
-	  buflen += INCR;
-	  buffer = (char *) realloc (use_malloc ? buffer : NULL, buflen);
-	  if (buffer == NULL)
-	    {
-	      /* We ran out of memory.  We cannot do anything but
-		 sending a negative response.  In reality this should
-		 never happen.  */
-	      hst = NULL;
-	      buffer = old_buffer;
-	      break;
-	    }
-	  use_malloc = true;
-	}
-      else
-	/* Allocate a new buffer on the stack.  If possible combine it
-	   with the previously allocated buffer.  */
-	buffer = (char *) extend_alloca (buffer, buflen, buflen + INCR);
-    }
-
-  if (secure[hstdb])
-    seteuid (oldeuid);
-
-  cache_addhst (db, fd, req, key, hst, uid, 0);
-
-  if (use_malloc)
-    free (buffer);
+  addhstbyX (db, -1, &req, db->data + he->key, he->owner, he, dh);
 }


 void
-addhstbyaddrv6 (struct database *db, int fd, request_header *req,
+addhstbyaddr (struct database_dyn *db, int fd, request_header *req,
+	      void *key, uid_t uid)
+{
+  addhstbyX (db, fd, req, key, uid, NULL, NULL);
+}
+
+
+void
+readdhstbyaddr (struct database_dyn *db, struct hashentry *he,
+		struct datahead *dh)
+{
+  request_header req =
+    {
+      .type = GETHOSTBYADDR,
+      .key_len = he->len
+    };
+
+  addhstbyX (db, -1, &req, db->data + he->key, he->owner, he, dh);
+}
+
+
+void
+addhstbynamev6 (struct database_dyn *db, int fd, request_header *req,
 		void *key, uid_t uid)
 {
-  /* Search for the entry matching the key.  Please note that we don't
-     look again in the table whether the dataset is now available.  We
-     simply insert it.  It does not matter if it is in there twice.  The
-     pruning function only will look at the timestamp.  */
-  int buflen = 1024;
-  char *buffer = (char *) alloca (buflen);
-  struct hostent resultbuf;
-  struct hostent *hst;
-  uid_t oldeuid = 0;
-  bool use_malloc = false;
-
-  if (__builtin_expect (debug_level > 0, 0))
-    {
-      char buf[INET6_ADDRSTRLEN];
-      dbg_log (_("Haven't found \"%s\" in hosts cache!"),
-	       inet_ntop (AF_INET6, key, buf, sizeof (buf)));
-    }
-
-  if (secure[hstdb])
-    {
-      oldeuid = geteuid ();
-      seteuid (uid);
-    }
-
-  while (__gethostbyaddr_r (key, NS_IN6ADDRSZ, AF_INET6, &resultbuf,
-  			    buffer, buflen, &hst, &h_errno) != 0
-	 && h_errno == NETDB_INTERNAL
-	 && errno == ERANGE)
-    {
-      char *old_buffer = buffer;
-      errno = 0;
-
-      if (__builtin_expect (buflen > 32768, 0))
-	{
-	  buflen += INCR;
-	  buffer = (char *) realloc (use_malloc ? buffer : NULL, buflen);
-	  if (buffer == NULL)
-	    {
-	      /* We ran out of memory.  We cannot do anything but
-		 sending a negative response.  In reality this should
-		 never happen.  */
-	      hst = NULL;
-	      buffer = old_buffer;
-	      break;
-	    }
-	  use_malloc = true;
-	}
-      else
-	/* Allocate a new buffer on the stack.  If possible combine it
-	   with the previously allocated buffer.  */
-	buffer = (char *) extend_alloca (buffer, buflen, buflen + INCR);
-    }
-
-  if (secure[hstdb])
-    seteuid (oldeuid);
-
-  cache_addhst (db, fd, req, key, hst, uid, 1);
-
-  if (use_malloc)
-    free (buffer);
+  addhstbyX (db, fd, req, key, uid, NULL, NULL);
+}
+
+
+void
+readdhstbynamev6 (struct database_dyn *db, struct hashentry *he,
+		  struct datahead *dh)
+{
+  request_header req =
+    {
+      .type = GETHOSTBYNAMEv6,
+      .key_len = he->len
+    };
+
+  addhstbyX (db, -1, &req, db->data + he->key, he->owner, he, dh);
+}
+
+
+void
+addhstbyaddrv6 (struct database_dyn *db, int fd, request_header *req,
+		void *key, uid_t uid)
+{
+  addhstbyX (db, fd, req, key, uid, NULL, NULL);
+}
+
+
+void
+readdhstbyaddrv6 (struct database_dyn *db, struct hashentry *he,
+		  struct datahead *dh)
+{
+  request_header req =
+    {
+      .type = GETHOSTBYADDRv6,
+      .key_len = he->len
+    };
+
+  addhstbyX (db, -1, &req, db->data + he->key, he->owner, he, dh);
 }
--- a/nscd/mem.c
+++ b/nscd/mem.c
@ -0,0 +1,515 @@
+/* Cache memory handling.
+   Copyright (C) 2004 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@redhat.com>, 2004.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <assert.h>
+#include <errno.h>
+#include <error.h>
+#include <inttypes.h>
+#include <libintl.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/param.h>
+
+#include "dbg_log.h"
+#include "nscd.h"
+
+
+/* Maximum alignment requirement we will encounter.  */
+#define BLOCK_ALIGN_LOG 3
+#define BLOCK_ALIGN (1 << BLOCK_ALIGN_LOG)
+#define BLOCK_ALIGN_M1 (BLOCK_ALIGN - 1)
+
+
+static int
+sort_he (const void *p1, const void *p2)
+{
+  struct hashentry *h1 = *(struct hashentry **) p1;
+  struct hashentry *h2 = *(struct hashentry **) p2;
+
+  if (h1 < h2)
+    return -1;
+  if (h1 > h2)
+    return 1;
+  return 0;
+}
+
+
+static int
+sort_he_data (const void *p1, const void *p2)
+{
+  struct hashentry *h1 = *(struct hashentry **) p1;
+  struct hashentry *h2 = *(struct hashentry **) p2;
+
+  if (h1->packet < h2->packet)
+    return -1;
+  if (h1->packet > h2->packet)
+    return 1;
+  return 0;
+}
+
+
+/* Basic definitions for the bitmap implementation.  Only BITMAP_T
+   needs to be changed to choose a different word size.  */
+#define BITMAP_T uint8_t
+#define BITS (CHAR_BIT * sizeof (BITMAP_T))
+#define ALLBITS ((((BITMAP_T) 1) << BITS) - 1)
+#define HIGHBIT (((BITMAP_T) 1) << (BITS - 1))
+
+
+static void
+markrange (BITMAP_T *mark, ref_t start, size_t len)
+{
+  /* Adjust parameters for block alignment.  */
+  start /= BLOCK_ALIGN;
+  len = (len + BLOCK_ALIGN_M1) / BLOCK_ALIGN;
+
+  size_t elem = start / BITS;
+
+  if (start % BITS != 0)
+    {
+      if (start % BITS + len <= BITS)
+	{
+	  /* All fits in the partial byte.  */
+	  mark[elem] |= (ALLBITS >> (BITS - len)) << (start % BITS);
+	  return;
+	}
+
+      mark[elem++] |= 0xff << (start % BITS);
+      len -= BITS - (start % BITS);
+    }
+
+  while (len >= BITS)
+    {
+      mark[elem++] = ALLBITS;
+      len -= BITS;
+    }
+
+  if (len > 0)
+    mark[elem] |= ALLBITS >> (BITS - len);
+}
+
+
+void
+gc (struct database_dyn *db)
+{
+  /* We need write access.  */
+  pthread_rwlock_wrlock (&db->lock);
+
+  /* And the memory handling lock.  */
+  pthread_mutex_lock (&db->memlock);
+
+  /* We need an array representing the data area.  All memory
+     allocation is BLOCK_ALIGN aligned so this is the level at which
+     we have to look at the memory.  We use a mark and sweep algorithm
+     where the marks are placed in this array.  */
+  assert (db->head->first_free % BLOCK_ALIGN == 0);
+  BITMAP_T mark[(db->head->first_free / BLOCK_ALIGN + BITS - 1) / BITS];
+  memset (mark, '\0', sizeof (mark));
+
+  /* Create an array which can hold pointer to all the entries in hash
+     entries.  */
+  struct hashentry *he[db->head->nentries];
+  struct hashentry *he_data[db->head->nentries];
+
+  size_t cnt = 0;
+  for (size_t idx = 0; idx < db->head->module; ++idx)
+    {
+      ref_t *prevp = &db->head->array[idx];
+      ref_t run = *prevp;
+
+      while (run != ENDREF)
+	{
+	  assert (cnt < db->head->nentries);
+	  he[cnt] = (struct hashentry *) (db->data + run);
+
+	  he[cnt]->prevp = prevp;
+	  prevp = &he[cnt]->next;
+
+	  /* This is the hash entry itself.  */
+	  markrange (mark, run, sizeof (struct hashentry));
+
+	  /* Add the information for the data itself.  We do this
+	     only for the one special entry marked with FIRST.  */
+	  if (he[cnt]->first)
+	    {
+	      struct datahead *dh
+		= (struct datahead *) (db->data + he[cnt]->packet);
+	      markrange (mark, he[cnt]->packet, dh->allocsize);
+	    }
+
+	  run = he[cnt]->next;
+
+	  ++cnt;
+	}
+    }
+  assert (cnt == db->head->nentries);
+
+  /* Sort the entries by the addresses of the referenced data.  All
+     the entries pointing to the same DATAHEAD object will have the
+     same key.  Stability of the sorting is unimportant.  */
+  memcpy (he_data, he, cnt * sizeof (struct hashentry *));
+  qsort (he_data, cnt, sizeof (struct hashentry *), sort_he_data);
+
+  /* Sort the entries by their address.  */
+  qsort (he, cnt, sizeof (struct hashentry *), sort_he);
+
+  /* Determine the highest used address.  */
+  size_t high = sizeof (mark);
+  while (high > 0 && mark[high - 1] == 0)
+    --high;
+
+  /* No memory used.  */
+  if (high == 0)
+    {
+      db->head->first_free = 0;
+      goto out;
+    }
+
+  /* Determine the highest offset.  */
+  BITMAP_T mask = HIGHBIT;
+  ref_t highref = (high * BITS - 1) * BLOCK_ALIGN;
+  while ((mark[high - 1] & mask) == 0)
+    {
+      mask >>= 1;
+      highref -= BLOCK_ALIGN;
+    }
+
+  /* No we can iterate over the MARK array and find bits which are not
+     set.  These represent memory which can be recovered.  */
+  size_t byte = 0;
+  /* Find the first gap.  */
+  while (byte < high && mark[byte] == ALLBITS)
+    ++byte;
+
+  if (byte == high
+      || (byte == high - 1 && (mark[byte] & ~(mask | (mask - 1))) == 0))
+    /* No gap.  */
+    goto out;
+
+  mask = 1;
+  cnt = 0;
+  while ((mark[byte] & mask) != 0)
+    {
+      ++cnt;
+      mask <<= 1;
+    }
+  ref_t off_free = (byte * BITS + cnt) * BLOCK_ALIGN;
+  assert (off_free <= db->head->first_free);
+
+  struct hashentry **next_hash = he;
+  struct hashentry **next_data = he_data;
+
+  /* Skip over the hash entries in the first block which does not get
+     moved.  */
+  while (next_hash < &he[db->head->nentries]
+	 && *next_hash < (struct hashentry *) (db->data + off_free))
+    ++next_hash;
+
+  while (next_data < &he_data[db->head->nentries]
+	 && (*next_data)->packet < off_free)
+    ++next_data;
+
+
+  /* We do not perform the move operations right away since the
+     he_data array is not sorted by the address of the data.  */
+  struct moveinfo
+  {
+    void *from;
+    void *to;
+    size_t size;
+    struct moveinfo *next;
+  } *moves = NULL;
+
+  while (byte < high)
+    {
+      /* Search for the next filled block.  BYTE is the index of the
+	 entry in MARK, MASK is the bit, and CNT is the bit number.
+	 OFF_FILLED is the corresponding offset.  */
+      if ((mark[byte] & ~(mask - 1)) == 0)
+	{
+	  /* No other bit set in the same element of MARK.  Search in the
+	     following memory.  */
+	  do
+	    ++byte;
+	  while (byte < high && mark[byte] == 0);
+
+	  if (byte == high)
+	    /* That was it.  */
+	    break;
+
+	  mask = 1;
+	  cnt = 0;
+	}
+      /* Find the exact bit.  */
+      while ((mark[byte] & mask) == 0)
+	{
+	  ++cnt;
+	  mask <<= 1;
+	}
+
+      ref_t off_alloc = (byte * BITS + cnt) * BLOCK_ALIGN;
+      assert (off_alloc <= db->head->first_free);
+
+      /* Find the end of the used area.  */
+      if ((mark[byte] & ~(mask - 1)) == (BITMAP_T) ~(mask - 1))
+	{
+	  /* All other bits set.  Search the next bytes in MARK.  */
+	  do
+	    ++byte;
+	  while (byte < high && mark[byte] == ALLBITS);
+
+	  mask = 1;
+	  cnt = 0;
+	}
+      if (byte < high)
+	{
+	  /* Find the exact bit.  */
+	  while ((mark[byte] & mask) != 0)
+	    {
+	      ++cnt;
+	      mask <<= 1;
+	    }
+	}
+
+      ref_t off_allocend = (byte * BITS + cnt) * BLOCK_ALIGN;
+      assert (off_allocend <= db->head->first_free);
+      /* Now we know that we can copy the area from OFF_ALLOC to
+	 OFF_ALLOCEND (not included) to the memory starting at
+	 OFF_FREE.  First fix up all the entries for the
+	 displacement.  */
+      ref_t disp = off_alloc - off_free;
+
+      struct moveinfo *new_move
+	= (struct moveinfo *) alloca (sizeof (*new_move));
+      new_move->from = db->data + off_alloc;
+      new_move->to = db->data + off_free;
+      new_move->size = off_allocend - off_alloc;
+      /* Create a circular list to be always able to append at the end.  */
+      if (moves == NULL)
+	moves = new_move->next = new_move;
+      else
+	{
+	  new_move->next = moves->next;
+	  moves = moves->next = new_move;
+	}
+
+      /* The following loop will prepare to move this much data.  */
+      off_free += off_allocend - off_alloc;
+
+      while (off_alloc < off_allocend)
+	{
+	  /* Determine whether the next entry is for a hash entry or
+	     the data.  */
+	  if ((struct hashentry *) (db->data + off_alloc) == *next_hash)
+	    {
+	      /* Just correct the forward reference.  */
+	      *(*next_hash++)->prevp -= disp;
+
+	      off_alloc += ((sizeof (struct hashentry) + BLOCK_ALIGN_M1)
+			    & ~BLOCK_ALIGN_M1);
+	    }
+	  else
+	    {
+	      assert (next_data < &he_data[db->head->nentries]);
+	      assert ((*next_data)->packet == off_alloc);
+
+	      struct datahead *dh = (struct datahead *) (db->data + off_alloc);
+	      do
+		{
+		  assert ((*next_data)->key >= (*next_data)->packet);
+		  assert ((*next_data)->key + (*next_data)->len
+			  <= (*next_data)->packet + dh->allocsize);
+
+		  (*next_data)->packet -= disp;
+		  (*next_data)->key -= disp;
+		  ++next_data;
+		}
+	      while (next_data < &he_data[db->head->nentries]
+		     && (*next_data)->packet == off_alloc);
+
+	      off_alloc += (dh->allocsize + BLOCK_ALIGN_M1) & ~BLOCK_ALIGN_M1;
+	    }
+	}
+      assert (off_alloc == off_allocend);
+
+      assert (off_alloc <= db->head->first_free);
+      if (off_alloc == db->head->first_free)
+	/* We are done, that was the last block.  */
+	break;
+    }
+  assert (next_hash == &he[db->head->nentries]);
+  assert (next_data == &he_data[db->head->nentries]);
+
+  /* Now perform the actual moves.  */
+  if (moves != NULL)
+    {
+      struct moveinfo *runp = moves->next;
+      do
+	{
+	  assert ((char *) runp->to >= db->data);
+	  assert ((char *) runp->to + runp->size
+		  <= db->data  + db->head->first_free);
+	  assert ((char *) runp->from >= db->data);
+	  assert ((char *) runp->from + runp->size
+		  <= db->data  + db->head->first_free);
+
+	  /* The regions may overlap.  */
+	  memmove (runp->to, runp->from, runp->size);
+	  runp = runp->next;
+	}
+      while (runp != moves->next);
+
+      if (__builtin_expect (debug_level >= 3, 0))
+	dbg_log (_("freed %zu bytes in %s cache"),
+		 db->head->first_free
+		 - ((char *) moves->to + moves->size - db->data),
+		 dbnames[db - dbs]);
+
+      /* The byte past the end of the last copied block is the next
+	 available byte.  */
+      db->head->first_free = (char *) moves->to + moves->size - db->data;
+
+      /* Consistency check.  */
+      if (__builtin_expect (debug_level >= 3, 0))
+	{
+	  for (size_t idx = 0; idx < db->head->module; ++idx)
+	    {
+	      ref_t run = db->head->array[idx];
+	      size_t cnt = 0;
+
+	      while (run != ENDREF)
+		{
+		  if (run + sizeof (struct hashentry) > db->head->first_free)
+		    {
+		      dbg_log ("entry %zu in hash bucket %zu out of bounds: "
+			       "%" PRIu32 "+%zu > %zu\n",
+			       cnt, idx, run, sizeof (struct hashentry),
+			       db->head->first_free);
+		      break;
+		    }
+
+		  struct hashentry *he = (struct hashentry *) (db->data + run);
+
+		  if (he->key + he->len > db->head->first_free)
+		    dbg_log ("key of entry %zu in hash bucket %zu out of "
+			     "bounds: %" PRIu32 "+%zu > %zu\n",
+			     cnt, idx, he->key, he->len, db->head->first_free);
+
+		  if (he->packet + sizeof (struct datahead)
+		      > db->head->first_free)
+		    dbg_log ("packet of entry %zu in hash bucket %zu out of "
+			     "bounds: %" PRIu32 "+%zu > %zu\n",
+			     cnt, idx, he->packet, sizeof (struct datahead),
+			     db->head->first_free);
+		  else
+		    {
+		      struct datahead *dh = (struct datahead *) (db->data
+								 + he->packet);
+		      if (he->packet + dh->allocsize
+			  > db->head->first_free)
+			dbg_log ("full key of entry %zu in hash bucket %zu "
+				 "out of bounds: %" PRIu32 "+%zu > %zu",
+				 cnt, idx, he->packet, dh->allocsize,
+				 db->head->first_free);
+		    }
+
+		  run = he->next;
+		  ++cnt;
+		}
+	    }
+	}
+    }
+
+  /* Make sure the data on disk is updated.  */
+  if (db->persistent)
+    msync (db->head, db->data + db->head->first_free - (char *) db->head,
+	   MS_ASYNC);
+
+  /* We are done.  */
+ out:
+  pthread_mutex_unlock (&db->memlock);
+  pthread_rwlock_unlock (&db->lock);
+}
+
+
+void *
+mempool_alloc (struct database_dyn *db, size_t len)
+{
+  /* Make sure LEN is a multiple of our maximum alignment so we can
+     keep track of used memory is multiples of this alignment value.  */
+  if ((len & BLOCK_ALIGN_M1) != 0)
+    len += BLOCK_ALIGN - (len & BLOCK_ALIGN_M1);
+
+  pthread_mutex_lock (&db->memlock);
+
+  assert ((db->head->first_free & BLOCK_ALIGN_M1) == 0);
+
+  bool tried_resize = false;
+  void *res;
+ retry:
+  res = db->data + db->head->first_free;
+
+  if (__builtin_expect (db->head->first_free + len > db->head->data_size, 0))
+    {
+      if (! tried_resize)
+	{
+	  /* Try to resize the database.  Grow size of 1/8th.  */
+	  size_t new_data_size = db->head->data_size + db->head->data_size / 8;
+	  size_t oldtotal = (sizeof (struct database_pers_head)
+			     + db->head->module * sizeof (ref_t)
+			     + db->head->data_size);
+	  size_t newtotal = (sizeof (struct database_pers_head)
+			     + db->head->module * sizeof (ref_t)
+			     + new_data_size);
+
+	  if ((!db->mmap_used || ftruncate (db->wr_fd, newtotal) != 0)
+	      /* Try to resize the mapping.  Note: no MREMAP_MAYMOVE.  */
+	      && mremap (db->head, oldtotal, newtotal, 0) == 0)
+	    {
+	      db->head->data_size = new_data_size;
+	      tried_resize = true;
+	      goto retry;
+	    }
+	}
+
+      if (! db->last_alloc_failed)
+	{
+	  dbg_log (_("no more memory for database '%s'"), dbnames[db - dbs]);
+
+	  db->last_alloc_failed = true;
+	}
+
+      /* No luck.  */
+      res = NULL;
+    }
+  else
+    {
+      db->head->first_free += len;
+
+      db->last_alloc_failed = false;
+    }
+
+  pthread_mutex_unlock (&db->memlock);
+
+  return res;
+}
--- a/nscd/nscd.c
+++ b/nscd/nscd.c
@ -36,6 +36,7 @@
 #include <string.h>
 #include <syslog.h>
 #include <unistd.h>
+#include <sys/mman.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
 #include <sys/un.h>
@ -69,7 +70,6 @@ int disabled_passwd;
 int disabled_group;
 int go_background = 1;

-int secure[lastdb];
 int secure_in_use;
 static const char *conffile = _PATH_NSCDCONF;

@ -342,11 +342,11 @@ parse_opt (int key, char *arg, struct argp_state *state)

    case 'S':
      if (strcmp (arg, "passwd,yes") == 0)
-	secure_in_use = secure[pwddb] = 1;
+	secure_in_use = dbs[pwddb].secure = 1;
      else if (strcmp (arg, "group,yes") == 0)
-	secure_in_use = secure[grpdb] = 1;
+	secure_in_use = dbs[grpdb].secure = 1;
      else if (strcmp (arg, "hosts,yes") == 0)
-	secure_in_use = secure[hstdb] = 1;
+	secure_in_use = dbs[hstdb].secure = 1;
      break;

    default:
@ -406,6 +406,14 @@ termination_handler (int signum)
  /* Clean up pid file.  */
  unlink (_PATH_NSCDPID);

+  // XXX Terminate threads.
+
+  /* Synchronize memory.  */
+  for (int cnt = 0; cnt < lastdb; ++cnt)
+    if (dbs[cnt].persistent)
+      // XXX async OK?
+      msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
+
  _exit (EXIT_SUCCESS);
 }

--- a/nscd/nscd.conf
+++ b/nscd/nscd.conf
@ -11,12 +11,14 @@
 #	server-user             <user to run server as instead of root>
 #		server-user is ignored if nscd is started with -S parameters
 #       stat-user               <user who is allowed to request statistics>
+#	reload-count		unlimited|<number>
 #
 #       enable-cache		<service> <yes|no>
 #	positive-time-to-live	<service> <time in seconds>
 #	negative-time-to-live   <service> <time in seconds>
 #       suggested-size		<service> <prime number>
 #	check-files		<service> <yes|no>
+#	persistent		<service> <yes|no>
 #
 # Currently supported cache names (services): passwd, group, hosts
 #
@ -27,21 +29,25 @@
 #	server-user		nobody
 #	stat-user		somebody
 	debug-level		0
+#	reload-count		5

 	enable-cache		passwd		yes
 	positive-time-to-live	passwd		600
 	negative-time-to-live	passwd		20
 	suggested-size		passwd		211
 	check-files		passwd		yes
+	persistent		passwd		yes

 	enable-cache		group		yes
 	positive-time-to-live	group		3600
 	negative-time-to-live	group		60
 	suggested-size		group		211
 	check-files		group		yes
+	persistent		group		yes

 	enable-cache		hosts		yes
 	positive-time-to-live	hosts		3600
 	negative-time-to-live	hosts		20
 	suggested-size		hosts		211
 	check-files		hosts		yes
+	persistent		hosts		yes
--- a/nscd/nscd.h
+++ b/nscd/nscd.h
@ -1,4 +1,5 @@
-/* Copyright (c) 1998, 1999, 2000, 2001, 2003 Free Software Foundation, Inc.
+/* Copyright (c) 1998, 1999, 2000, 2001, 2003, 2004
+   Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Thorsten Kukuk <kukuk@suse.de>, 1998.

@ -21,6 +22,7 @@
 #define _NSCD_H	1

 #include <pthread.h>
+#include <stdbool.h>
 #include <time.h>
 #include <sys/uio.h>

@ -40,56 +42,134 @@ typedef enum
 } dbtype;


+/* Head of record in data part of database.  */
+struct datahead
+{
+  size_t allocsize;	/* Allocated Bytes.  */
+  size_t recsize;	/* Size of the record.  */
+  time_t timeout;	/* Time when this entry becomes invalid.  */
+  bool notfound;	/* Nonzero if data for key has not been found.  */
+  uint8_t nreloads;	/* Reloads without use.  */
+  bool usable;		/* False if the entry must be ignored.  */
+
+  /* We need to have the following element aligned for the response
+     header data types and their use in the 'struct dataset' types
+     defined in the XXXcache.c files.  */
+  union
+  {
+    pw_response_header pwdata;
+    gr_response_header grdata;
+    hst_response_header hstdata;
+    ssize_t align1;
+    time_t align2;
+  } data[0];
+};
+
+
+/* Default limit on the number of times a value gets reloaded without
+   being used in the meantime.  NSCD does not throw a value out as
+   soon as it times out.  It tries to reload the value from the
+   server.  Only if the value has not been used for so many rounds it
+   is removed.  */
+#define DEFAULT_RELOAD_LIMIT 5
+
+
+/* Type for offsets in data part of database.  */
+typedef uint32_t ref_t;
+/* Value for invalid/no reference.  */
+#define ENDREF	UINT32_MAX
+
+
 /* Structure for one hash table entry.  */
 struct hashentry
 {
-  request_type type;		/* Which type of dataset.  */
+  request_type type:8;		/* Which type of dataset.  */
+  bool first;			/* True if this was the original key.  */
  size_t len;			/* Length of key.  */
-  void *key;			/* Pointer to key.  */
-  uid_t owner;                  /* If secure table, this is the owner.  */
-  struct hashentry *next;	/* Next entry in this hash bucket list.  */
-  time_t timeout;		/* Time when this entry becomes invalid.  */
-  ssize_t total;		/* Number of bytes in PACKET.  */
-  const void *packet;		/* Records for the result.  */
-  void *data;			/* The malloc()ed respond record.  */
-  int last;			/* Nonzero if DATA should be free()d.  */
-  struct hashentry *dellist;	/* Next record to be deleted.  */
+  ref_t key;			/* Pointer to key.  */
+  uid_t owner;			/* If secure table, this is the owner.  */
+  ref_t next;			/* Next entry in this hash bucket list.  */
+  ref_t packet;			/* Records for the result.  */
+  union
+  {
+    struct hashentry *dellist;	/* Next record to be deleted.  This can be a
+				   pointer since only nscd uses this field.  */
+    ref_t *prevp;		/* Pointer to field containing forward
+				   reference.  */
+  };
 };

-/* Structure describing one database.  */
-struct database
+
+/* Current persistent database version.  */
+#define DB_VERSION	1
+
+/* Header of persistent database file.  */
+struct database_pers_head
+{
+  int version;
+  int header_size;
+
+  size_t module;
+  size_t data_size;
+
+  size_t first_free;		/* Offset of first free byte in data area.  */
+
+  size_t nentries;
+  size_t maxnentries;
+  size_t maxnsearched;
+
+  uintmax_t poshit;
+  uintmax_t neghit;
+  uintmax_t posmiss;
+  uintmax_t negmiss;
+
+  uintmax_t rdlockdelayed;
+  uintmax_t wrlockdelayed;
+
+  uintmax_t addfailed;
+
+  ref_t array[0];
+};
+
+/* Structure describing dynamic part of one database.  */
+struct database_dyn
 {
  pthread_rwlock_t lock;

  int enabled;
  int check_file;
+  int persistent;
  const char *filename;
+  const char *db_filename;
  time_t file_mtime;
-  size_t module;
+  size_t suggested_module;
+  int secure;
+
+  unsigned long int postimeout;	/* In seconds.  */
+  unsigned long int negtimeout;	/* In seconds.  */
+
+  int wr_fd;			/* Writable file descriptor.  */
+  int ro_fd;			/* Unwritable file descriptor.  */

  const struct iovec *disabled_iov;

-  unsigned long int postimeout;
-  unsigned long int negtimeout;
-
-  unsigned long int poshit;
-  unsigned long int neghit;
-  unsigned long int posmiss;
-  unsigned long int negmiss;
-
-  unsigned long int nentries;
-  unsigned long int maxnentries;
-  unsigned long int maxnsearched;
-
-  unsigned long int rdlockdelayed;
-  unsigned long int wrlockdelayed;
-
-  struct hashentry **array;
+  struct database_pers_head *head;
+  char *data;
+  size_t memsize;
+  pthread_mutex_t memlock;
+  bool mmap_used;
+  bool last_alloc_failed;
 };


+/* Paths of the file for the persistent storage.  */
+#define _PATH_NSCD_PASSWD_DB	"/var/run/nscd/passwd"
+#define _PATH_NSCD_GROUP_DB	"/var/run/nscd/group"
+#define _PATH_NSCD_HOSTS_DB	"/var/run/nscd/hosts"
+
+
 /* Global variables.  */
-extern struct database dbs[lastdb];
+extern struct database_dyn dbs[lastdb];
 extern const char *dbnames[lastdb];
 extern const char *serv2str[LASTREQ];

@ -97,11 +177,11 @@ extern const struct iovec pwd_iov_disabled;
 extern const struct iovec grp_iov_disabled;
 extern const struct iovec hst_iov_disabled;

+
 /* Number of threads to run.  */
 extern int nthreads;

 /* Tables for which we cache data with uid.  */
-extern int secure[lastdb];
 extern int secure_in_use; /* Is one of the above 1?  */

 /* User name to run server processes as.  */
@ -117,6 +197,13 @@ extern time_t start_time;
 /* Number of times clients had to wait.  */
 extern unsigned long int client_queued;

+/* Maximum needed alignment.  */
+extern const size_t block_align;
+
+/* Number of times a value is reloaded without being used.  UINT_MAX
+   means unlimited.  */
+extern unsigned int reload_count;
+
 /* Prototypes for global functions.  */

 /* nscd.c */
@ -129,42 +216,63 @@ extern void close_sockets (void);
 extern void start_threads (void) __attribute__ ((__noreturn__));

 /* nscd_conf.c */
-extern int nscd_parse_file (const char *fname, struct database dbs[lastdb]);
+extern int nscd_parse_file (const char *fname,
+			    struct database_dyn dbs[lastdb]);

 /* nscd_stat.c */
-extern void send_stats (int fd, struct database dbs[lastdb]);
+extern void send_stats (int fd, struct database_dyn dbs[lastdb]);
 extern int receive_print_stats (void) __attribute__ ((__noreturn__));

 /* cache.c */
-extern struct hashentry *cache_search (request_type, void *key, size_t len,
-				       struct database *table, uid_t owner);
-extern void cache_add (int type, void *key, size_t len,
-		       const void *packet, size_t iovtotal, void *data,
-		       int last, time_t t, struct database *table,
-		       uid_t owner);
-extern void prune_cache (struct database *table, time_t now);
+extern struct datahead *cache_search (request_type, void *key, size_t len,
+				      struct database_dyn *table,
+				      uid_t owner);
+extern int cache_add (int type, const void *key, size_t len,
+		      struct datahead *packet, bool first,
+		      struct database_dyn *table, uid_t owner);
+extern void prune_cache (struct database_dyn *table, time_t now);

 /* pwdcache.c */
-extern void addpwbyname (struct database *db, int fd, request_header *req,
+extern void addpwbyname (struct database_dyn *db, int fd, request_header *req,
 			 void *key, uid_t uid);
-extern void addpwbyuid (struct database *db, int fd, request_header *req,
+extern void addpwbyuid (struct database_dyn *db, int fd, request_header *req,
 			void *key, uid_t uid);
+extern void readdpwbyname (struct database_dyn *db, struct hashentry *he,
+			   struct datahead *dh);
+extern void readdpwbyuid (struct database_dyn *db, struct hashentry *he,
+			  struct datahead *dh);

 /* grpcache.c */
-extern void addgrbyname (struct database *db, int fd, request_header *req,
+extern void addgrbyname (struct database_dyn *db, int fd, request_header *req,
 			 void *key, uid_t uid);
-extern void addgrbygid (struct database *db, int fd, request_header *req,
+extern void addgrbygid (struct database_dyn *db, int fd, request_header *req,
 			void *key, uid_t uid);
+extern void readdgrbyname (struct database_dyn *db, struct hashentry *he,
+			   struct datahead *dh);
+extern void readdgrbygid (struct database_dyn *db, struct hashentry *he,
+			  struct datahead *dh);

 /* hstcache.c */
-extern void addhstbyname (struct database *db, int fd, request_header *req,
+extern void addhstbyname (struct database_dyn *db, int fd, request_header *req,
 			  void *key, uid_t uid);
-extern void addhstbyaddr (struct database *db, int fd, request_header *req,
+extern void addhstbyaddr (struct database_dyn *db, int fd, request_header *req,
 			  void *key, uid_t uid);
-extern void addhstbynamev6 (struct database *db, int fd, request_header *req,
-			    void *key, uid_t uid);
-extern void addhstbyaddrv6 (struct database *db, int fd, request_header *req,
-			    void *key, uid_t uid);
+extern void addhstbynamev6 (struct database_dyn *db, int fd,
+			    request_header *req, void *key, uid_t uid);
+extern void addhstbyaddrv6 (struct database_dyn *db, int fd,
+			    request_header *req, void *key, uid_t uid);
+extern void readdhstbyname (struct database_dyn *db, struct hashentry *he,
+			    struct datahead *dh);
+extern void readdhstbyaddr (struct database_dyn *db, struct hashentry *he,
+			    struct datahead *dh);
+extern void readdhstbynamev6 (struct database_dyn *db, struct hashentry *he,
+			      struct datahead *dh);
+extern void readdhstbyaddrv6 (struct database_dyn *db, struct hashentry *he,
+			      struct datahead *dh);


+/* mem.c */
+extern void *mempool_alloc (struct database_dyn *db, size_t len);
+extern void gc (struct database_dyn *db);
+
 #endif /* nscd.h */
--- a/nscd/nscd_conf.c
+++ b/nscd/nscd_conf.c
@ -44,7 +44,7 @@ const char *dbnames[lastdb] =
 };

 int
-nscd_parse_file (const char *fname, struct database dbs[lastdb])
+nscd_parse_file (const char *fname, struct database_dyn dbs[lastdb])
 {
  FILE *fp;
  char *line, *cp, *entry, *arg1, *arg2;
@ -117,7 +117,7 @@ nscd_parse_file (const char *fname, struct database dbs[lastdb])
 		break;
 	      }
 	  if (cnt == lastdb)
-	    dbg_log ("server %s is not supported\n", arg1);
+	    dbg_log ("database %s is not supported\n", arg1);
 	}
      else if (strcmp (entry, "negative-time-to-live") == 0)
 	{
@ -128,18 +128,18 @@ nscd_parse_file (const char *fname, struct database dbs[lastdb])
 		break;
 	      }
 	  if (cnt == lastdb)
-	    dbg_log ("server %s is not supported\n", arg1);
+	    dbg_log ("database %s is not supported\n", arg1);
 	}
      else if (strcmp (entry, "suggested-size") == 0)
 	{
 	  for (cnt = 0; cnt < lastdb; ++cnt)
 	    if (strcmp (arg1, dbnames[cnt]) == 0)
 	      {
-		dbs[cnt].module = atol (arg2);
+		dbs[cnt].suggested_module = atol (arg2);
 		break;
 	      }
 	  if (cnt == lastdb)
-	    dbg_log ("server %s is not supported\n", arg1);
+	    dbg_log ("database %s is not supported\n", arg1);
 	}
      else if (strcmp (entry, "enable-cache") == 0)
 	{
@ -153,7 +153,7 @@ nscd_parse_file (const char *fname, struct database dbs[lastdb])
 		break;
 	      }
 	  if (cnt == lastdb)
-	    dbg_log ("server %s is not supported\n", arg1);
+	    dbg_log ("database %s is not supported\n", arg1);
 	}
      else if (strcmp (entry, "check-files") == 0)
 	{
@ -167,7 +167,7 @@ nscd_parse_file (const char *fname, struct database dbs[lastdb])
 		break;
 	      }
 	  if (cnt == lastdb)
-	    dbg_log ("server %s is not supported\n", arg1);
+	    dbg_log ("database %s is not supported\n", arg1);
 	}
      else if (strcmp (entry, "logfile") == 0)
 	set_logfile (arg1);
@ -202,6 +202,35 @@ nscd_parse_file (const char *fname, struct database dbs[lastdb])
 		stat_uid = pw->pw_uid;
 	    }
        }
+      else if (strcmp (entry, "persistent") == 0)
+	{
+	  for (cnt = 0; cnt < lastdb; ++cnt)
+	    if (strcmp (arg1, dbnames[cnt]) == 0)
+	      {
+		if (strcmp (arg2, "no") == 0)
+		  dbs[cnt].persistent = 0;
+		else if (strcmp (arg2, "yes") == 0)
+		  dbs[cnt].persistent = 1;
+		break;
+	      }
+	  if (cnt == lastdb)
+	    dbg_log ("database %s is not supported\n", arg1);
+	}
+      else if (strcmp (entry, "reload-count") == 0)
+	{
+	  if (strcasecmp (arg1, "unlimited") == 0)
+	    reload_count = UINT_MAX;
+	  else
+	    {
+	      unsigned int count = strtoul (arg1, NULL, 0);
+	      if (count > UINT8_MAX - 1)
+		reload_count = UINT_MAX;
+	      else if (count >= 0)
+	    reload_count = count;
+	      else
+		dbg_log (_("invalid value for 'reload-count': %u"), count);
+	    }
+	}
      else
 	dbg_log (_("Unknown option: %s %s %s"), entry, arg1, arg2);
    }
--- a/nscd/nscd_getgr_r.c
+++ b/nscd/nscd_getgr_r.c
@ -67,7 +67,7 @@ __nscd_getgrgid_r (gid_t gid, struct group *resultbuf, char *buffer,
 static int
 internal_function
 nscd_getgr_r (const char *key, size_t keylen, request_type type,
-	      struct group *resbuf, char *buffer, size_t buflen,
+	      struct group *resultbuf, char *buffer, size_t buflen,
 	      struct group **result)
 {
  gr_response_header gr_resp;
@ -115,17 +115,17 @@ nscd_getgr_r (const char *key, size_t keylen, request_type type,
      buflen -= total_len;

      p += align;
-      resbuf->gr_mem = (char **) p;
+      resultbuf->gr_mem = (char **) p;
      p += (1 + gr_resp.gr_mem_cnt) * sizeof (char *);

      /* Set pointers for strings.  */
-      resbuf->gr_name = p;
+      resultbuf->gr_name = p;
      p += gr_resp.gr_name_len;
-      resbuf->gr_passwd = p;
+      resultbuf->gr_passwd = p;
      p += gr_resp.gr_passwd_len;

      /* Fill in what we know now.  */
-      resbuf->gr_gid = gr_resp.gr_gid;
+      resultbuf->gr_gid = gr_resp.gr_gid;

      /* Allocate array to store lengths.  */
      len = (uint32_t *) alloca (gr_resp.gr_mem_cnt * sizeof (uint32_t));
@ -133,7 +133,7 @@ nscd_getgr_r (const char *key, size_t keylen, request_type type,
      total_len = gr_resp.gr_mem_cnt * sizeof (uint32_t);
      vec[0].iov_base = len;
      vec[0].iov_len = total_len;
-      vec[1].iov_base = resbuf->gr_name;
+      vec[1].iov_base = resultbuf->gr_name;
      vec[1].iov_len = gr_resp.gr_name_len + gr_resp.gr_passwd_len;
      total_len += gr_resp.gr_name_len + gr_resp.gr_passwd_len;

@ -143,13 +143,13 @@ nscd_getgr_r (const char *key, size_t keylen, request_type type,
 	goto out;

      /* Clear the terminating entry.  */
-      resbuf->gr_mem[gr_resp.gr_mem_cnt] = NULL;
+      resultbuf->gr_mem[gr_resp.gr_mem_cnt] = NULL;

      /* Prepare reading the group members.  */
      total_len = 0;
      for (cnt = 0; cnt < gr_resp.gr_mem_cnt; ++cnt)
 	{
-	  resbuf->gr_mem[cnt] = p;
+	  resultbuf->gr_mem[cnt] = p;
 	  total_len += len[cnt];
 	  p += len[cnt];
 	}
@ -157,21 +157,17 @@ nscd_getgr_r (const char *key, size_t keylen, request_type type,
      if (__builtin_expect (total_len > buflen, 0))
 	goto no_room;

-      if (gr_resp.gr_mem_cnt > 0
-	  && __builtin_expect (TEMP_FAILURE_RETRY (__read (sock,
-							   resbuf->gr_mem[0],
-							   total_len))
-			       != total_len, 0))
+      retval = 0;
+      n = TEMP_FAILURE_RETRY (__read (sock, resultbuf->gr_mem[0],
+					     total_len));
+      if (__builtin_expect (n != total_len, 0))
 	{
 	  /* The `errno' to some value != ERANGE.  */
 	  __set_errno (ENOENT);
 	  retval = ENOENT;
 	}
      else
-	{
-	  retval = 0;
-	  *result = resbuf;
-	}
+	*result = resultbuf;
    }
  else
    {
--- a/nscd/nscd_stat.c
+++ b/nscd/nscd_stat.c
@ -1,4 +1,4 @@
-/* Copyright (c) 1998, 2003 Free Software Foundation, Inc.
+/* Copyright (c) 1998, 2003, 2004 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Thorsten Kukuk <kukuk@vt.uni-paderborn.de>, 1998.

@ -19,6 +19,7 @@

 #include <errno.h>
 #include <error.h>
+#include <inttypes.h>
 #include <langinfo.h>
 #include <stdio.h>
 #include <stdlib.h>
@ -42,17 +43,21 @@ struct dbstat
  unsigned long int postimeout;
  unsigned long int negtimeout;

-  unsigned long int poshit;
-  unsigned long int neghit;
-  unsigned long int posmiss;
-  unsigned long int negmiss;
+  size_t nentries;
+  size_t maxnentries;
+  size_t maxnsearched;
+  size_t datasize;
+  size_t dataused;

-  unsigned long int nentries;
-  unsigned long int maxnentries;
-  unsigned long int maxnsearched;
+  uintmax_t poshit;
+  uintmax_t neghit;
+  uintmax_t posmiss;
+  uintmax_t negmiss;

-  unsigned long int rdlockdelayed;
-  unsigned long int wrlockdelayed;
+  uintmax_t rdlockdelayed;
+  uintmax_t wrlockdelayed;
+
+  uintmax_t addfailed;
 };

 /* Record for transmitting statistics.  */
@ -68,7 +73,7 @@ struct statdata


 void
-send_stats (int fd, struct database dbs[lastdb])
+send_stats (int fd, struct database_dyn dbs[lastdb])
 {
  struct statdata data;
  int cnt;
@ -83,18 +88,21 @@ send_stats (int fd, struct database dbs[lastdb])
    {
      data.dbs[cnt].enabled = dbs[cnt].enabled;
      data.dbs[cnt].check_file = dbs[cnt].check_file;
-      data.dbs[cnt].module = dbs[cnt].module;
+      data.dbs[cnt].module = dbs[cnt].head->module;
      data.dbs[cnt].postimeout = dbs[cnt].postimeout;
      data.dbs[cnt].negtimeout = dbs[cnt].negtimeout;
-      data.dbs[cnt].poshit = dbs[cnt].poshit;
-      data.dbs[cnt].neghit = dbs[cnt].neghit;
-      data.dbs[cnt].posmiss = dbs[cnt].posmiss;
-      data.dbs[cnt].negmiss = dbs[cnt].negmiss;
-      data.dbs[cnt].nentries = dbs[cnt].nentries;
-      data.dbs[cnt].maxnentries = dbs[cnt].maxnentries;
-      data.dbs[cnt].maxnsearched = dbs[cnt].maxnsearched;
-      data.dbs[cnt].rdlockdelayed = dbs[cnt].rdlockdelayed;
-      data.dbs[cnt].wrlockdelayed = dbs[cnt].wrlockdelayed;
+      data.dbs[cnt].poshit = dbs[cnt].head->poshit;
+      data.dbs[cnt].neghit = dbs[cnt].head->neghit;
+      data.dbs[cnt].posmiss = dbs[cnt].head->posmiss;
+      data.dbs[cnt].negmiss = dbs[cnt].head->negmiss;
+      data.dbs[cnt].nentries = dbs[cnt].head->nentries;
+      data.dbs[cnt].maxnentries = dbs[cnt].head->maxnentries;
+      data.dbs[cnt].datasize = dbs[cnt].head->data_size;
+      data.dbs[cnt].dataused = dbs[cnt].head->first_free;
+      data.dbs[cnt].maxnsearched = dbs[cnt].head->maxnsearched;
+      data.dbs[cnt].rdlockdelayed = dbs[cnt].head->rdlockdelayed;
+      data.dbs[cnt].wrlockdelayed = dbs[cnt].head->wrlockdelayed;
+      data.dbs[cnt].addfailed = dbs[cnt].head->addfailed;
    }

  if (TEMP_FAILURE_RETRY (write (fd, &data, sizeof (data))) != sizeof (data))
@ -220,22 +228,26 @@ receive_print_stats (void)

      printf (_("\n%s cache:\n\n"
 		"%15s  cache is enabled\n"
-		"%15Zu  suggested size\n"
+		"%15zu  suggested size\n"
+		"%15zu  total data pool size\n"
+		"%15zu  used data pool size\n"
 		"%15lu  seconds time to live for positive entries\n"
 		"%15lu  seconds time to live for negative entries\n"
-		"%15lu  cache hits on positive entries\n"
-		"%15lu  cache hits on negative entries\n"
-		"%15lu  cache misses on positive entries\n"
-		"%15lu  cache misses on negative entries\n"
+		"%15" PRIuMAX "  cache hits on positive entries\n"
+		"%15" PRIuMAX "  cache hits on negative entries\n"
+		"%15" PRIuMAX "  cache misses on positive entries\n"
+		"%15" PRIuMAX "  cache misses on negative entries\n"
 		"%15lu%% cache hit rate\n"
-		"%15lu  current number of cached values\n"
-		"%15lu  maximum number of cached values\n"
-		"%15lu  maximum chain length searched\n"
-		"%15lu  number of delays on rdlock\n"
-		"%15lu  number of delays on wrlock\n"
+		"%15zu  current number of cached values\n"
+		"%15zu  maximum number of cached values\n"
+		"%15zu  maximum chain length searched\n"
+		"%15" PRIuMAX "  number of delays on rdlock\n"
+		"%15" PRIuMAX "  number of delays on wrlock\n"
+		"%15" PRIuMAX "  memory allocations failed\n"
 		"%15s  check /etc/%s for changes\n"),
 	      dbnames[i], enabled,
 	      data.dbs[i].module,
+	      data.dbs[i].datasize, data.dbs[i].dataused,
 	      data.dbs[i].postimeout, data.dbs[i].negtimeout,
 	      data.dbs[i].poshit, data.dbs[i].neghit,
 	      data.dbs[i].posmiss, data.dbs[i].negmiss,
@ -243,7 +255,8 @@ receive_print_stats (void)
 	      data.dbs[i].nentries, data.dbs[i].maxnentries,
 	      data.dbs[i].maxnsearched,
 	      data.dbs[i].rdlockdelayed,
-	      data.dbs[i].wrlockdelayed, check_file, dbnames[i]);
+	      data.dbs[i].wrlockdelayed,
+	      data.dbs[i].addfailed, check_file, dbnames[i]);
    }

  close (fd);
--- a/nscd/pwdcache.c
+++ b/nscd/pwdcache.c
@ -19,8 +19,10 @@
   02111-1307 USA.  */

 #include <alloca.h>
+#include <assert.h>
 #include <errno.h>
 #include <error.h>
+#include <libintl.h>
 #include <pwd.h>
 #include <stdbool.h>
 #include <stddef.h>
@ -29,7 +31,7 @@
 #include <string.h>
 #include <time.h>
 #include <unistd.h>
-#include <libintl.h>
+#include <sys/mman.h>
 #include <stackinfo.h>

 #include "nscd.h"
@ -72,83 +74,153 @@ static const pw_response_header notfound =
 };


-struct passwddata
-{
-  pw_response_header resp;
-  char strdata[0];
-};
-
-
 static void
-cache_addpw (struct database *db, int fd, request_header *req, void *key,
-	     struct passwd *pwd, uid_t owner, int type)
+cache_addpw (struct database_dyn *db, int fd, request_header *req,
+	     const void *key, struct passwd *pwd, uid_t owner,
+	     struct hashentry *he, struct datahead *dh, int errval)
 {
  ssize_t total;
  ssize_t written;
  time_t t = time (NULL);

+  /* We allocate all data in one memory block: the iov vector,
+     the response header and the dataset itself.  */
+  struct dataset
+  {
+    struct datahead head;
+    pw_response_header resp;
+    char strdata[0];
+  } *dataset;
+
+  assert (offsetof (struct dataset, resp) == offsetof (struct datahead, data));
+
  if (pwd == NULL)
    {
-      /* We have no data.  This means we send the standard reply for this
-	 case.  */
-      total = sizeof (notfound);
-
-      written = TEMP_FAILURE_RETRY (write (fd, &notfound, total));
-
-      void *copy = malloc (req->key_len);
-      /* If we cannot allocate memory simply do not cache the information.  */
-      if (copy != NULL)
+      if (he != NULL && errval == EAGAIN)
 	{
-	  memcpy (copy, key, req->key_len);
+	  /* If we have an old record available but cannot find one
+	     now because the service is not available we keep the old
+	     record and make sure it does not get removed.  */
+	  if (reload_count != UINT_MAX && dh->nreloads == reload_count)
+	    /* Do not reset the value if we never not reload the record.  */
+	    dh->nreloads = reload_count - 1;

-	  /* Compute the timeout time.  */
-	  t += db->negtimeout;
+	  written = total = 0;
+	}
+      else
+	{
+	  /* We have no data.  This means we send the standard reply for this
+	     case.  */
+	  written = total = sizeof (notfound);

-	  /* Now get the lock to safely insert the records.  */
-	  pthread_rwlock_rdlock (&db->lock);
+	  if (fd != -1)
+	    written = TEMP_FAILURE_RETRY (write (fd, &notfound, total));

-	  cache_add (req->type, copy, req->key_len, &notfound,
-		     sizeof (notfound), (void *) -1, 0, t, db, owner);
+	  dataset = mempool_alloc (db, sizeof (struct dataset) + req->key_len);
+	  /* If we cannot permanently store the result, so be it.  */
+	  if (dataset != NULL)
+	    {
+	      dataset->head.allocsize = sizeof (struct dataset) + req->key_len;
+	      dataset->head.recsize = total;
+	      dataset->head.notfound = true;
+	      dataset->head.nreloads = 0;
+	      dataset->head.usable = true;

-	  pthread_rwlock_unlock (&db->lock);
+	      /* Compute the timeout time.  */
+	      dataset->head.timeout = t + db->negtimeout;
+
+	      /* This is the reply.  */
+	      memcpy (&dataset->resp, &notfound, total);
+
+	      /* Copy the key data.  */
+	      char *key_copy = memcpy (dataset->strdata, key, req->key_len);
+
+	      /* Now get the lock to safely insert the records.  */
+	      pthread_rwlock_rdlock (&db->lock);
+
+	      if (cache_add (req->type, key_copy, req->key_len,
+			     &dataset->head, true, db, owner) < 0)
+		/* Ensure the data can be recovered.  */
+		dataset->head.usable = false;
+
+
+	      pthread_rwlock_unlock (&db->lock);
+
+	      /* Mark the old entry as obsolete.  */
+	      if (dh != NULL)
+		dh->usable = false;
+	    }
+	  else
+	    ++db->head->addfailed;
 	}
    }
  else
    {
      /* Determine the I/O structure.  */
-      struct passwddata *data;
      size_t pw_name_len = strlen (pwd->pw_name) + 1;
      size_t pw_passwd_len = strlen (pwd->pw_passwd) + 1;
      size_t pw_gecos_len = strlen (pwd->pw_gecos) + 1;
      size_t pw_dir_len = strlen (pwd->pw_dir) + 1;
      size_t pw_shell_len = strlen (pwd->pw_shell) + 1;
      char *cp;
-      char buf[12];
+      const size_t key_len = strlen (key);
+      const size_t buf_len = 3 * sizeof (pwd->pw_uid) + key_len + 1;
+      char *buf = alloca (buf_len);
      ssize_t n;

      /* We need this to insert the `byuid' entry.  */
-      n = snprintf (buf, sizeof (buf), "%d", pwd->pw_uid) + 1;
+      int key_offset;
+      n = snprintf (buf, buf_len, "%d%c%n%s", pwd->pw_uid, '\0',
+		    &key_offset, (char *) key) + 1;

-      /* We allocate all data in one memory block: the iov vector,
-	 the response header and the dataset itself.  */
-      total = (sizeof (struct passwddata) + pw_name_len + pw_passwd_len
-	       + pw_gecos_len + pw_dir_len + pw_shell_len);
-      data = (struct passwddata *) malloc (total + n + req->key_len);
-      if (data == NULL)
-	/* There is no reason to go on.  */
-	error (EXIT_FAILURE, errno, _("while allocating cache entry"));
+      written = total = (sizeof (struct dataset) + pw_name_len + pw_passwd_len
+			 + pw_gecos_len + pw_dir_len + pw_shell_len);

-      data->resp.version = NSCD_VERSION;
-      data->resp.found = 1;
-      data->resp.pw_name_len = pw_name_len;
-      data->resp.pw_passwd_len = pw_passwd_len;
-      data->resp.pw_uid = pwd->pw_uid;
-      data->resp.pw_gid = pwd->pw_gid;
-      data->resp.pw_gecos_len = pw_gecos_len;
-      data->resp.pw_dir_len = pw_dir_len;
-      data->resp.pw_shell_len = pw_shell_len;
+      /* If we refill the cache, first assume the reconrd did not
+	 change.  Allocate memory on the cache since it is likely
+	 discarded anyway.  If it turns out to be necessary to have a
+	 new record we can still allocate real memory.  */
+      bool alloca_used = false;
+      dataset = NULL;

-      cp = data->strdata;
+      if (he == NULL)
+	{
+	  dataset = (struct dataset *) mempool_alloc (db, total + n);
+	  if (dataset == NULL)
+	    ++db->head->addfailed;
+	}
+
+      if (dataset == NULL)
+	{
+	  /* We cannot permanently add the result in the moment.  But
+	     we can provide the result as is.  Store the data in some
+	     temporary memory.  */
+	  dataset = (struct dataset *) alloca (total + n);
+
+	  /* We cannot add this record to the permanent database.  */
+	  alloca_used = true;
+	}
+
+      dataset->head.allocsize = total + n;
+      dataset->head.recsize = total - offsetof (struct dataset, resp);
+      dataset->head.notfound = false;
+      dataset->head.nreloads = he == NULL ? 0 : (dh->nreloads + 1);
+      dataset->head.usable = true;
+
+      /* Compute the timeout time.  */
+      dataset->head.timeout = t + db->postimeout;
+
+      dataset->resp.version = NSCD_VERSION;
+      dataset->resp.found = 1;
+      dataset->resp.pw_name_len = pw_name_len;
+      dataset->resp.pw_passwd_len = pw_passwd_len;
+      dataset->resp.pw_uid = pwd->pw_uid;
+      dataset->resp.pw_gid = pwd->pw_gid;
+      dataset->resp.pw_gecos_len = pw_gecos_len;
+      dataset->resp.pw_dir_len = pw_dir_len;
+      dataset->resp.pw_shell_len = pw_shell_len;
+
+      cp = dataset->strdata;

      /* Copy the strings over into the buffer.  */
      cp = mempcpy (cp, pwd->pw_name, pw_name_len);
@ -157,35 +229,120 @@ cache_addpw (struct database *db, int fd, request_header *req, void *key,
      cp = mempcpy (cp, pwd->pw_dir, pw_dir_len);
      cp = mempcpy (cp, pwd->pw_shell, pw_shell_len);

-      /* Next the stringified UID value.  */
+      /* Finally the stringified UID value.  */
      memcpy (cp, buf, n);
+      char *key_copy = cp + key_offset;
+      assert (key_copy == (char *) rawmemchr (cp, '\0') + 1);

-      /* Copy of the key in case it differs.  */
-      char *key_copy = memcpy (cp + n, key, req->key_len);
+      /* Now we can determine whether on refill we have to create a new
+	 record or not.  */
+      if (he != NULL)
+	{
+	  assert (fd == -1);

-      /* We write the dataset before inserting it to the database
-	 since while inserting this thread might block and so would
-	 unnecessarily let the receiver wait.  */
-      written = TEMP_FAILURE_RETRY (write (fd, &data->resp, total));
+	  if (total + n == dh->allocsize
+	      && total - offsetof (struct dataset, resp) == dh->recsize
+	      && memcmp (&dataset->resp, dh->data,
+			 dh->allocsize - offsetof (struct dataset, resp)) == 0)
+	    {
+	      /* The sata has not changed.  We will just bump the
+		 timeout value.  Note that the new record has been
+		 allocated on the stack and need not be freed.  */
+	      dh->timeout = dataset->head.timeout;
+	      ++dh->nreloads;
+	    }
+	  else
+	    {
+	      /* We have to create a new record.  Just allocate
+		 appropriate memory and copy it.  */
+	      struct dataset *newp
+		= (struct dataset *) mempool_alloc (db, total + n);
+	      if (newp != NULL)
+		{
+		  /* Adjust pointer into the memory block.  */
+		  cp = (char *) newp + (cp - (char *) dataset);

-      /* Compute the timeout time.  */
-      t += db->postimeout;
+		  dataset = memcpy (newp, dataset, total + n);
+		  alloca_used = false;
+		}

-      /* Now get the lock to safely insert the records.  */
-      pthread_rwlock_rdlock (&db->lock);
+	      /* Mark the old record as obsolete.  */
+	      dh->usable = false;
+	    }
+	}
+      else
+	{
+	  /* We write the dataset before inserting it to the database
+	     since while inserting this thread might block and so would
+	     unnecessarily let the receiver wait.  */
+	  assert (fd != -1);

-      /* We have to add the value for both, byname and byuid.  */
-      cache_add (GETPWBYNAME, data->strdata, pw_name_len, data,
-		 total, data, 0, t, db, owner);
+	  written = TEMP_FAILURE_RETRY (write (fd, &dataset->resp, total));
+	}

-      /* If the key is different from the name add a separate entry.  */
-      if (type == GETPWBYNAME && strcmp (key_copy, data->strdata) != 0)
-	cache_add (GETPWBYNAME, key_copy, req->key_len, data,
-		   total, data, 0, t, db, owner);

-      cache_add (GETPWBYUID, cp, n, data, total, data, 1, t, db, owner);
+      /* Add the record to the database.  But only if it has not been
+	 stored on the stack.  */
+      if (! alloca_used)
+	{
+	  /* If necessary, we also propagate the data to disk.  */
+	  if (db->persistent)
+	    // XXX async OK?
+	    msync (dataset, total + n, MS_ASYNC);

-      pthread_rwlock_unlock (&db->lock);
+	  /* Now get the lock to safely insert the records.  */
+	  pthread_rwlock_rdlock (&db->lock);
+
+	  /* NB: in the following code we always must add the entry
+	     marked with FIRST first.  Otherwise we end up with
+	     dangling "pointers" in case a latter hash entry cannot be
+	     added.  */
+	  bool first = req->type == GETPWBYNAME;
+
+	  /* If the request was by UID, add that entry first.  */
+	  if (req->type != GETPWBYNAME)
+	    {
+	      if (cache_add (GETPWBYUID, cp, n, &dataset->head, true, db,
+			     owner) < 0)
+		{
+		  /* Could not allocate memory.  Make sure the data gets
+		     discarded.  */
+		  dataset->head.usable = false;
+		  goto out;
+		}
+	    }
+	  /* If the key is different from the name add a separate entry.  */
+	  else if (strcmp (key_copy, dataset->strdata) != 0)
+	    {
+	      if (cache_add (GETPWBYNAME, key_copy, key_len + 1,
+			     &dataset->head, first, db, owner) < 0)
+		{
+		  /* Could not allocate memory.  Make sure the data gets
+		     discarded.  */
+		  dataset->head.usable = false;
+		  goto out;
+		}
+
+	      first = false;
+	    }
+
+	  /* We have to add the value for both, byname and byuid.  */
+	  if (__builtin_expect (cache_add (GETPWBYNAME, dataset->strdata,
+					   pw_name_len, &dataset->head, first,
+					   db, owner) == 0, 1))
+	    {
+	      if (req->type == GETPWBYNAME)
+		(void) cache_add (GETPWBYUID, cp, n, &dataset->head,
+				  req->type != GETPWBYNAME, db, owner);
+	    }
+	  else if (first)
+	    /* Could not allocate memory.  Make sure the data gets
+	       discarded.  */
+	    dataset->head.usable = false;
+
+	out:
+	  pthread_rwlock_unlock (&db->lock);
+	}
    }

  if (__builtin_expect (written != total, 0) && debug_level > 0)
@ -197,32 +354,57 @@ cache_addpw (struct database *db, int fd, request_header *req, void *key,
 }


-void
-addpwbyname (struct database *db, int fd, request_header *req,
-	     void *key, uid_t c_uid)
+union keytype
+{
+  void *v;
+  uid_t u;
+};
+
+
+static int
+lookup (int type, union keytype key, struct passwd *resultbufp, char *buffer,
+	size_t buflen, struct passwd **pwd)
+{
+  if (type == GETPWBYNAME)
+    return __getpwnam_r (key.v, resultbufp, buffer, buflen, pwd);
+  else
+    return __getpwuid_r (key.u, resultbufp, buffer, buflen, pwd);
+}
+
+
+static void
+addpwbyX (struct database_dyn *db, int fd, request_header *req,
+	  union keytype key, const char *keystr, uid_t c_uid,
+	  struct hashentry *he, struct datahead *dh)
 {
  /* Search for the entry matching the key.  Please note that we don't
     look again in the table whether the dataset is now available.  We
     simply insert it.  It does not matter if it is in there twice.  The
     pruning function only will look at the timestamp.  */
-  int buflen = 1024;
+  size_t buflen = 1024;
  char *buffer = (char *) alloca (buflen);
  struct passwd resultbuf;
  struct passwd *pwd;
  uid_t oldeuid = 0;
  bool use_malloc = false;
+  int errval = 0;

  if (__builtin_expect (debug_level > 0, 0))
-    dbg_log (_("Haven't found \"%s\" in password cache!"), (char *) key);
+    {
+      if (he == NULL)
+	dbg_log (_("Haven't found \"%s\" in password cache!"), keystr);
+      else
+	dbg_log (_("Reloading \"%s\" in password cache!"), keystr);
+    }

-  if (secure[pwddb])
+  if (db->secure)
    {
      oldeuid = geteuid ();
      seteuid (c_uid);
    }

-  while (__getpwnam_r (key, &resultbuf, buffer, buflen, &pwd) != 0
-	 && errno == ERANGE)
+  while (lookup (req->type, key, &resultbuf, buffer, buflen, &pwd) != 0
+	 && (errval = errno) == ERANGE)
    {
      char *old_buffer = buffer;
      errno = 0;
@ -239,6 +421,11 @@ addpwbyname (struct database *db, int fd, request_header *req,
 		 never happen.  */
 	      pwd = NULL;
 	      buffer = old_buffer;
+
+	      /* We set the error to indicate this is (possibly) a
+		 temporary error and that it does not mean the entry
+		 is not available at all.  */
+	      errval = EAGAIN;
 	      break;
 	    }
 	  use_malloc = true;
@ -249,10 +436,11 @@ addpwbyname (struct database *db, int fd, request_header *req,
 	buffer = (char *) extend_alloca (buffer, buflen, buflen + INCR);
    }

-  if (secure[pwddb])
+  if (db->secure)
    seteuid (oldeuid);

-  cache_addpw (db, fd, req, key, pwd, c_uid, GETPWBYNAME);
+  /* Add the entry to the cache.  */
+  cache_addpw (db, fd, req, keystr, pwd, c_uid, he, dh, errval);

  if (use_malloc)
    free (buffer);
@ -260,21 +448,36 @@ addpwbyname (struct database *db, int fd, request_header *req,


 void
-addpwbyuid (struct database *db, int fd, request_header *req,
+addpwbyname (struct database_dyn *db, int fd, request_header *req,
+	     void *key, uid_t c_uid)
+{
+  union keytype u = { .v = key };
+
+  addpwbyX (db, fd, req, u, key, c_uid, NULL, NULL);
+}
+
+
+void
+readdpwbyname (struct database_dyn *db, struct hashentry *he,
+	       struct datahead *dh)
+{
+  request_header req =
+    {
+      .type = GETPWBYNAME,
+      .key_len = he->len
+    };
+  union keytype u = { .v = db->data + he->key };
+
+  addpwbyX (db, -1, &req, u, db->data + he->key, he->owner, he, dh);
+}
+
+
+void
+addpwbyuid (struct database_dyn *db, int fd, request_header *req,
 	    void *key, uid_t c_uid)
 {
-  /* Search for the entry matching the key.  Please note that we don't
-     look again in the table whether the dataset is now available.  We
-     simply insert it.  It does not matter if it is in there twice.  The
-     pruning function only will look at the timestamp.  */
-  int buflen = 256;
-  char *buffer = (char *) alloca (buflen);
-  struct passwd resultbuf;
-  struct passwd *pwd;
-  uid_t oldeuid = 0;
  char *ep;
  uid_t uid = strtoul ((char *) key, &ep, 10);
-  bool use_malloc = false;

  if (*(char *) key == '\0' || *ep != '\0')  /* invalid numeric uid */
    {
@ -285,47 +488,28 @@ addpwbyuid (struct database *db, int fd, request_header *req,
      return;
    }

-  if (__builtin_expect (debug_level > 0, 0))
-    dbg_log (_("Haven't found \"%d\" in password cache!"), uid);
+  union keytype u = { .u = uid };

-  if (secure[pwddb])
-    {
-      oldeuid = geteuid ();
-      seteuid (c_uid);
-    }
-
-  while (__getpwuid_r (uid, &resultbuf, buffer, buflen, &pwd) != 0
-	 && errno == ERANGE)
-    {
-      char *old_buffer = buffer;
-      errno = 0;
-
-      if (__builtin_expect (buflen > 32768, 0))
-	{
-	  buflen += 1024;
-	  buffer = (char *) realloc (use_malloc ? buffer : NULL, buflen);
-	  if (buffer == NULL)
-	    {
-	      /* We ran out of memory.  We cannot do anything but
-		 sending a negative response.  In reality this should
-		 never happen.  */
-	      pwd = NULL;
-	      buffer = old_buffer;
-	      break;
-	    }
-	  use_malloc = true;
-	}
-      else
-	/* Allocate a new buffer on the stack.  If possible combine it
-	   with the previously allocated buffer.  */
-	buffer = (char *) extend_alloca (buffer, buflen, buflen + INCR);
-    }
-
-  if (secure[pwddb])
-    seteuid (oldeuid);
-
-  cache_addpw (db, fd, req, key, pwd, c_uid, GETPWBYUID);
-
-  if (use_malloc)
-    free (buffer);
+  addpwbyX (db, fd, req, u, key, c_uid, NULL, NULL);
+}
+
+
+void
+readdpwbyuid (struct database_dyn *db, struct hashentry *he,
+	      struct datahead *dh)
+{
+  char *ep;
+  uid_t uid = strtoul (db->data + he->key, &ep, 10);
+
+  /* Since the key has been added before it must be OK.  */
+  assert (*(db->data + he->key) != '\0' && *ep == '\0');
+
+  request_header req =
+    {
+      .type = GETPWBYUID,
+      .key_len = he->len
+    };
+  union keytype u = { .u = uid };
+
+  addpwbyX (db, -1, &req, u, db->data + he->key, he->owner, he, dh);
 }