* nscd/aicache.c (addhstaiX): Check herrno after IPv4 lookup only

when the lookup call failed. * nscd/nscd.h (struct database_dyn): Rename prunelock to prune_lock. Add prune_cond and wakeup_time. (CACHE_PRUNE_INTERNAL): Define. Update declarations of prune_cache and setup_thread. * nscd/connections.c (dbs): Update initializers. (CACHE_PRUNE_INTERNAL): Moved to nscd.h. (nscd_init): Default number of threads is now 4. (invalidate_cache): Take lock before calling prune_cache. (handle_request): If SELinux forbids the request, say so. (readylist_cond): Use static initializer. (nscd_run_prune): New function. Used only by pruning threads. (nscd_run_worder): Renamed from nscd_run. Remove support for pruning here. (fd_ready): Update nscd_run reference. (start_threads): No need to initialize readylist_cond. Start pruning threads separately. * nscd/nscd_setup_thread.c: Change return value type to int and always return 0. * sysdeps/unix/sysv/linux/nscd_setup_thread.c: Change return value type to int and return nonzero value if we can use the TID address hack. * nscd/cache.c (cache_add): If next wakeup time of cleanup thread for the database is later than the new entry's timeout, update the wakeup time and wake the cleanup thread. (prune_cache): Return seconds the next entry in the database is still valid. Remove locking for pruning here. * nscd/nscd.conf: Document default number of threads.
2007-11-06 00:45:40 +00:00 · 2007-11-06 00:45:40 +00:00 · ffb1b88282
commit ffb1b88282
parent 41ff2a4999
8 changed files with 183 additions and 109 deletions
--- a/33
+++ b/33
@ -1,8 +1,41 @@
+2007-11-05  Ulrich Drepper  <drepper@redhat.com>
+
+	* nscd/aicache.c (addhstaiX): Check herrno after IPv4 lookup only
+	when the lookup call failed.
+
+	* nscd/nscd.h (struct database_dyn): Rename prunelock to prune_lock.
+	Add prune_cond and wakeup_time.
+	(CACHE_PRUNE_INTERNAL): Define.
+	Update declarations of prune_cache and setup_thread.
+	* nscd/connections.c (dbs): Update initializers.
+	(CACHE_PRUNE_INTERNAL): Moved to nscd.h.
+	(nscd_init): Default number of threads is now 4.
+	(invalidate_cache): Take lock before calling prune_cache.
+	(handle_request): If SELinux forbids the request, say so.
+	(readylist_cond): Use static initializer.
+	(nscd_run_prune): New function.  Used only by pruning threads.
+	(nscd_run_worder): Renamed from nscd_run.  Remove support for pruning
+	here.
+	(fd_ready): Update nscd_run reference.
+	(start_threads): No need to initialize readylist_cond.
+	Start pruning threads separately.
+	* nscd/nscd_setup_thread.c: Change return value type to int and always
+	return 0.
+	* sysdeps/unix/sysv/linux/nscd_setup_thread.c: Change return value type
+	to int and return nonzero value if we can use the TID address hack.
+	* nscd/cache.c (cache_add): If next wakeup time of cleanup thread for
+	the database is later than the new entry's timeout, update the
+	wakeup time and wake the cleanup thread.
+	(prune_cache): Return seconds the next entry in the database is still
+	valid.  Remove locking for pruning here.
+	* nscd/nscd.conf: Document default number of threads.
+
 2007-10-31  Ulrich Drepper  <drepper@redhat.com>

 	* sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Make sure
 	stack is properly aligned for the target function.
 	Correct unwind info.
+
 	* elf/rtld.c (dl_main): Initialize stack and pointer guard early
 	when using auditing libraries.

--- a/nscd/aicache.c
+++ b/nscd/aicache.c
@ -157,7 +157,7 @@ addhstaiX (struct database_dyn *db, int fd, request_header *req,
 	      tmpbuf4 = tmpbuf6;
 	    }

-	  /* Next collect IPv4 information first.  */
+	  /* Next collect IPv4 information.  */
 	  while (1)
 	    {
 	      rc4 = 0;
@ -170,7 +170,7 @@ addhstaiX (struct database_dyn *db, int fd, request_header *req,
 	      tmpbuf4 = extend_alloca (tmpbuf4, tmpbuf4len, 2 * tmpbuf4len);
 	    }

-	  if (rc4 != 0 || herrno == NETDB_INTERNAL)
+	  if (rc4 != 0 && herrno == NETDB_INTERNAL)
 	    goto out;

 	  if (status[0] == NSS_STATUS_SUCCESS
--- a/nscd/cache.c
+++ b/nscd/cache.c
@ -197,6 +197,20 @@ cache_add (int type, const void *key, size_t len, struct datahead *packet,
 	   (char *) &table->head->array[hash] - (char *) table->head
 	   + sizeof (ref_t), MS_ASYNC);

+  /* Perhaps the prune thread for the data is not running in a long
+     time.  Wake it if necessary.  */
+  time_t next_wakeup = table->wakeup_time;
+  while (next_wakeup + CACHE_PRUNE_INTERVAL > packet->timeout)
+    if (atomic_compare_and_exchange_bool_acq (&table->wakeup_time,
+					      packet->timeout,
+					      next_wakeup) == 0)
+      {
+	pthread_cond_signal (&table->prune_cond);
+	break;
+      }
+    else
+      next_wakeup = table->wakeup_time;
+
  return 0;
 }

@ -212,7 +226,7 @@ cache_add (int type, const void *key, size_t len, struct datahead *packet,
   actually remove them.  This is complicated by the way we have to
   free the data structures since some hash table entries share the same
   data.  */
-void
+time_t
 prune_cache (struct database_dyn *table, time_t now, int fd)
 {
  size_t cnt = table->head->module;
@ -226,7 +240,9 @@ prune_cache (struct database_dyn *table, time_t now, int fd)
 	  int32_t resp = 0;
 	  writeall (fd, &resp, sizeof (resp));
 	}
-      return;
+
+      /* No need to do this again anytime soon.  */
+      return 24 * 60 * 60;
    }

  /* If we check for the modification of the underlying file we invalidate
@ -256,21 +272,6 @@ prune_cache (struct database_dyn *table, time_t now, int fd)
 	}
    }

-  /* This function can be called from the cleanup thread but also in
-     response to an invalidate command.  Make sure only one thread is
-     running.  When not serving INVALIDATE request, no need for the
-     second thread to wait around.  */
-  if (__builtin_expect (pthread_mutex_trylock (&table->prunelock) != 0, 0))
-    {
-      /* The work is already being done.  */
-      if (fd == -1)
-	return;
-
-      /* We have to wait until the thread is done and then run again
-	 so that the large NOW value invalidates all entries.  */
-      pthread_mutex_lock (&table->prunelock);
-    }
-
  /* We run through the table and find values which are not valid anymore.

     Note that for the initial step, finding the entries to be removed,
@ -473,5 +474,5 @@ prune_cache (struct database_dyn *table, time_t now, int fd)
  if (any)
    gc (table);

-  pthread_mutex_unlock (&table->prunelock);
+  return next_timeout - now;
 }
--- a/nscd/connections.c
+++ b/nscd/connections.c
@ -104,7 +104,7 @@ struct database_dyn dbs[lastdb] =
 {
  [pwddb] = {
    .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
-    .prunelock = PTHREAD_MUTEX_INITIALIZER,
+    .prune_lock = PTHREAD_MUTEX_INITIALIZER,
    .enabled = 0,
    .check_file = 1,
    .persistent = 0,
@ -123,7 +123,7 @@ struct database_dyn dbs[lastdb] =
  },
  [grpdb] = {
    .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
-    .prunelock = PTHREAD_MUTEX_INITIALIZER,
+    .prune_lock = PTHREAD_MUTEX_INITIALIZER,
    .enabled = 0,
    .check_file = 1,
    .persistent = 0,
@ -142,7 +142,7 @@ struct database_dyn dbs[lastdb] =
  },
  [hstdb] = {
    .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
-    .prunelock = PTHREAD_MUTEX_INITIALIZER,
+    .prune_lock = PTHREAD_MUTEX_INITIALIZER,
    .enabled = 0,
    .check_file = 1,
    .persistent = 0,
@ -161,7 +161,7 @@ struct database_dyn dbs[lastdb] =
  },
  [servdb] = {
    .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
-    .prunelock = PTHREAD_MUTEX_INITIALIZER,
+    .prune_lock = PTHREAD_MUTEX_INITIALIZER,
    .enabled = 0,
    .check_file = 1,
    .persistent = 0,
@ -210,10 +210,6 @@ static struct
 };


-/* Number of seconds between two cache pruning runs.  */
-#define CACHE_PRUNE_INTERVAL	15
-
-
 /* Initial number of threads to use.  */
 int nthreads = -1;
 /* Maximum number of threads to use.  */
@ -495,7 +491,7 @@ nscd_init (void)

  if (nthreads == -1)
    /* No configuration for this value, assume a default.  */
-    nthreads = 2 * lastdb;
+    nthreads = 4;

  for (size_t cnt = 0; cnt < lastdb; ++cnt)
    if (dbs[cnt].enabled)
@ -898,7 +894,11 @@ invalidate_cache (char *key, int fd)
    }

  if (dbs[number].enabled)
-    prune_cache (&dbs[number], LONG_MAX, fd);
+    {
+      pthread_mutex_lock (&dbs[number].prune_lock);
+      prune_cache (&dbs[number], LONG_MAX, fd);
+      pthread_mutex_unlock (&dbs[number].prune_lock);
+    }
  else
    {
      resp = 0;
@ -970,9 +970,13 @@ cannot handle old request version %d; current version is %d"),
      return;
    }

-  /* Make the SELinux check before we go on to the standard checks.  */
+  /* Perform the SELinux check before we go on to the standard checks.  */
  if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
-    return;
+    {
+      if (debug_level > 0)
+	dbg_log (_("request not handled due to missing permission"));
+      return;
+    }

  struct database_dyn *db = reqinfo[req->type].db;

@ -1336,7 +1340,7 @@ static struct fdlist *readylist;
 /* Conditional variable and mutex to signal availability of entries in
   READYLIST.  The condvar is initialized dynamically since we might
   use a different clock depending on availability.  */
-static pthread_cond_t readylist_cond;
+static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;

 /* The clock to use with the condvar.  */
@ -1346,32 +1350,76 @@ static clockid_t timeout_clock = CLOCK_REALTIME;
 static unsigned long int nready;


-/* This is the main loop.  It is replicated in different threads but the
-   `poll' call makes sure only one thread handles an incoming connection.  */
+/* Function for the clean-up threads.  */
 static void *
 __attribute__ ((__noreturn__))
-nscd_run (void *p)
+nscd_run_prune (void *p)
 {
  const long int my_number = (long int) p;
-  const int run_prune = my_number < lastdb && dbs[my_number].enabled;
+  assert (dbs[my_number].enabled);
+
+  int dont_need_update = setup_thread (&dbs[my_number]);
+
+  /* We are running.  */
+  dbs[my_number].head->timestamp = time (NULL);
+
  struct timespec prune_ts;
-  int to = 0;
-  char buf[256];
+  if (clock_gettime (timeout_clock, &prune_ts) == -1)
+    /* Should never happen.  */
+    abort ();

-  if (run_prune)
+  /* Compute the initial timeout time.  Prevent all the timers to go
+     off at the same time by adding a db-based value.  */
+  prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
+
+  pthread_mutex_lock (&dbs[my_number].prune_lock);
+  while (1)
    {
-      setup_thread (&dbs[my_number]);
+      /* Wait, but not forever.  */
+      int e = pthread_cond_timedwait (&dbs[my_number].prune_cond,
+				      &dbs[my_number].prune_lock,
+				      &prune_ts);
+      assert (e == 0 || e == ETIMEDOUT);

-      /* We are running.  */
-      dbs[my_number].head->timestamp = time (NULL);
+      time_t next_wait;
+      time_t now = time (NULL);
+      if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time)
+	{
+	  next_wait = prune_cache (&dbs[my_number], now, -1);
+	  next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
+	  /* If clients cannot determine for sure whether nscd is running
+	     we need to wake up occasionally to update the timestamp.
+	     Wait 90% of the update period.  */
+#define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
+	  if (__builtin_expect (! dont_need_update, 0))
+	    next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
+
+	  /* Make it known when we will wake up again.  */
+	  dbs[my_number].wakeup_time = now + next_wait;
+	}
+      else
+	/* The cache was just pruned.  Do not do it again now.  Just
+	   use the new timeout value.  */
+	next_wait = dbs[my_number].wakeup_time - now;

      if (clock_gettime (timeout_clock, &prune_ts) == -1)
 	/* Should never happen.  */
 	abort ();

-      /* Compute timeout time.  */
-      prune_ts.tv_sec += CACHE_PRUNE_INTERVAL;
+      /* Compute next timeout time.  */
+      prune_ts.tv_sec += next_wait;
    }
+}
+
+
+/* This is the main loop.  It is replicated in different threads but
+   the the use of the ready list makes sure only one thread handles an
+   incoming connection.  */
+static void *
+__attribute__ ((__noreturn__))
+nscd_run_worker (void *p)
+{
+  char buf[256];

  /* Initial locking.  */
  pthread_mutex_lock (&readylist_lock);
@ -1382,26 +1430,7 @@ nscd_run (void *p)
  while (1)
    {
      while (readylist == NULL)
-	{
-	  if (run_prune)
-	    {
-	      /* Wait, but not forever.  */
-	      to = pthread_cond_timedwait (&readylist_cond, &readylist_lock,
-					   &prune_ts);
-
-	      /* If we were woken and there is no work to be done,
-		 just start pruning.  */
-	      if (readylist == NULL && to == ETIMEDOUT)
-		{
-		  --nready;
-		  pthread_mutex_unlock (&readylist_lock);
-		  goto only_prune;
-		}
-	    }
-	  else
-	    /* No need to timeout.  */
-	    pthread_cond_wait (&readylist_cond, &readylist_lock);
-	}
+	pthread_cond_wait (&readylist_cond, &readylist_lock);

      struct fdlist *it = readylist->next;
      if (readylist->next == readylist)
@ -1504,28 +1533,6 @@ handle_request: request received (Version = %d)"), req.version);
      /* We are done.  */
      close (fd);

-      /* Check whether we should be pruning the cache. */
-      assert (run_prune || to == 0);
-      if (to == ETIMEDOUT)
-	{
-	only_prune:
-	  /* The pthread_cond_timedwait() call timed out.  It is time
-		 to clean up the cache.  */
-	  assert (my_number < lastdb);
-	  prune_cache (&dbs[my_number], time (NULL), -1);
-
-	  if (clock_gettime (timeout_clock, &prune_ts) == -1)
-	    /* Should never happen.  */
-	    abort ();
-
-	  /* Compute next timeout time.  */
-	  prune_ts.tv_sec += CACHE_PRUNE_INTERVAL;
-
-	  /* In case the list is emtpy we do not want to run the prune
-	     code right away again.  */
-	  to = 0;
-	}
-
      /* Re-locking.  */
      pthread_mutex_lock (&readylist_lock);

@ -1568,7 +1575,7 @@ fd_ready (int fd)
      /* Try to start another thread to help out.  */
      pthread_t th;
      if (nthreads < max_nthreads
-	  && pthread_create (&th, &attr, nscd_run,
+	  && pthread_create (&th, &attr, nscd_run_worker,
 			     (void *) (long int) nthreads) == 0)
 	{
 	  /* We got another thread.  */
@ -1828,10 +1835,6 @@ start_threads (void)
 	timeout_clock = CLOCK_MONOTONIC;
 #endif

-  pthread_cond_init (&readylist_cond, &condattr);
-  pthread_condattr_destroy (&condattr);
-
-
  /* Create the attribute for the threads.  They are all created
     detached.  */
  pthread_attr_init (&attr);
@ -1843,19 +1846,46 @@ start_threads (void)
  if (debug_level == 0)
    nthreads = MAX (nthreads, lastdb);

-  int nfailed = 0;
+  /* Create the threads which prune the databases.  */
+  // XXX Ideally this work would be done by some of the worker threads.
+  // XXX But this is problematic since we would need to be able to wake
+  // XXX them up explicitly as well as part of the group handling the
+  // XXX ready-list.  This requires an operation where we can wait on
+  // XXX two conditional variables at the same time.  This operation
+  // XXX does not exist (yet).
+  for (long int i = 0; i < lastdb; ++i)
+    {
+      /* Initialize the conditional variable.  */
+      if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
+	{
+	  dbg_log (_("could not initialize conditional variable"));
+	  exit (1);
+	}
+
+      pthread_t th;
+      if (dbs[i].enabled
+	  && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
+	{
+	  dbg_log (_("could not start clean-up thread; terminating"));
+	  exit (1);
+	}
+    }
+
+  pthread_condattr_destroy (&condattr);
+
  for (long int i = 0; i < nthreads; ++i)
    {
      pthread_t th;
-      if (pthread_create (&th, &attr, nscd_run, (void *) (i - nfailed)) != 0)
-	++nfailed;
-    }
-  if (nthreads - nfailed < lastdb)
-    {
-      /* We could not start enough threads.  */
-      dbg_log (_("could only start %d threads; terminating"),
-	       nthreads - nfailed);
-      exit (1);
+      if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
+	{
+	  if (i == 0)
+	    {
+	      dbg_log (_("could not start any worker thread; terminating"));
+	      exit (1);
+	    }
+
+	  break;
+	}
    }

  /* Determine how much room for descriptors we should initially
--- a/nscd/nscd.conf
+++ b/nscd/nscd.conf
@ -31,7 +31,7 @@


 #	logfile			/var/log/nscd.log
-#	threads			6
+#	threads			4
 #	max-threads		128
 #	server-user		nobody
 #	stat-user		somebody
--- a/nscd/nscd.h
+++ b/nscd/nscd.h
@ -59,7 +59,9 @@ typedef enum
 struct database_dyn
 {
  pthread_rwlock_t lock;
-  pthread_mutex_t prunelock;
+  pthread_cond_t prune_cond;
+  pthread_mutex_t prune_lock;
+  time_t wakeup_time;

  int enabled;
  int check_file;
@ -111,6 +113,11 @@ struct database_dyn
 #define DEFAULT_DATASIZE_PER_BUCKET 1024


+/* Number of seconds between two cache pruning runs if we do not have
+   better information when it is really needed.  */
+#define CACHE_PRUNE_INTERVAL	15
+
+
 /* Global variables.  */
 extern struct database_dyn dbs[lastdb];
 extern const char *const dbnames[lastdb];
@ -189,7 +196,7 @@ extern struct datahead *cache_search (request_type, void *key, size_t len,
 extern int cache_add (int type, const void *key, size_t len,
 		      struct datahead *packet, bool first,
 		      struct database_dyn *table, uid_t owner);
-extern void prune_cache (struct database_dyn *table, time_t now, int fd);
+extern time_t prune_cache (struct database_dyn *table, time_t now, int fd);

 /* pwdcache.c */
 extern void addpwbyname (struct database_dyn *db, int fd, request_header *req,
@ -258,7 +265,7 @@ extern void gc (struct database_dyn *db);


 /* nscd_setup_thread.c */
-extern void setup_thread (struct database_dyn *db);
+extern int setup_thread (struct database_dyn *db);


 /* Special version of TEMP_FAILURE_RETRY for functions returning error
--- a/nscd/nscd_setup_thread.c
+++ b/nscd/nscd_setup_thread.c
@ -20,8 +20,9 @@
 #include <nscd.h>


-void
+int
 setup_thread (struct database_dyn *db)
 {
  /* Nothing.  */
+  return 0;
 }
--- a/sysdeps/unix/sysv/linux/nscd_setup_thread.c
+++ b/sysdeps/unix/sysv/linux/nscd_setup_thread.c
@ -23,7 +23,7 @@
 #include <sysdep.h>


-void
+int
 setup_thread (struct database_dyn *db)
 {
 #ifdef __NR_set_tid_address
@ -43,6 +43,8 @@ setup_thread (struct database_dyn *db)
    /* We know the kernel can reset this field when nscd terminates.
       So, set the field to a nonzero value which indicates that nscd
       is certainly running and clients can skip the test.  */
-    db->head->nscd_certainly_running = 1;
+    return db->head->nscd_certainly_running = 1;
 #endif
+
+  return 0;
 }