Update.

1999-12-25 Ulrich Drepper <drepper@cygnus.com> * locale/C-collate.c (_nl_C_LC_COLLATE): Add one more entry for the indirect table. * locale/langinfo.h: Likewise. * locale/categories.def: Likewise. Remove reference to postload functions. * locale/lc-collate.c (_nl_postload_collate): Removed. Also remove __collate_tablemb, __collate_weightmb, and __collate_extramb. * locale/localeinfo.h: Remove declaration for removed variables above. Remove prototype for _nl_get_era_entry. * locale/weight.h: Complete rewrite for new collate implementation. * locale/programs/ld-collate.c: Many changes to make output file usable in strxfrm/strcoll. * string/strxfrm.c: Complete rewrite for new collate implementation. * wcsmbs/wcsxfrm.c: Don't use strxfrm.c, implement dummy implementation locally. 1999-12-25 Shinya Hanataka <hanataka@abyss.rim.or.jp> * locale/programs/ld-ctype.c (allocate_arrays): Correctly assign transformation values for chars >255. * wctype/wctrans.c: Return pointer unmodified.
1999-12-25 23:41:39 +00:00 · 1999-12-25 23:41:39 +00:00 · 450bf66ef2
commit 450bf66ef2
parent ce40141c6b
12 changed files with 537 additions and 471 deletions
--- a/24
+++ b/24
@ -1,3 +1,27 @@
 1999-12-25  Ulrich Drepper  <drepper@cygnus.com>
 	* locale/C-collate.c (_nl_C_LC_COLLATE): Add one more entry for the
 	indirect table.
 	* locale/langinfo.h: Likewise.
 	* locale/categories.def: Likewise.  Remove reference to postload
 	functions.
 	* locale/lc-collate.c (_nl_postload_collate): Removed.  Also remove
 	__collate_tablemb, __collate_weightmb, and __collate_extramb.
 	* locale/localeinfo.h: Remove declaration for removed variables above.
 	Remove prototype for _nl_get_era_entry.
 	* locale/weight.h: Complete rewrite for new collate implementation.
 	* locale/programs/ld-collate.c: Many changes to make output file
 	usable in strxfrm/strcoll.
 	* string/strxfrm.c: Complete rewrite for new collate implementation.
 	* wcsmbs/wcsxfrm.c: Don't use strxfrm.c, implement dummy implementation
 	locally.
 1999-12-25  Shinya Hanataka  <hanataka@abyss.rim.or.jp>
 	* locale/programs/ld-ctype.c (allocate_arrays): Correctly assign
 	transformation values for chars >255.
 	* wctype/wctrans.c: Return pointer unmodified.
 1999-12-24  Ulrich Drepper  <drepper@cygnus.com>
 	* sysdeps/posix/system.c (__libc_system): Check whether command
--- a/locale/C-collate.c
+++ b/locale/C-collate.c
@ -150,12 +150,13 @@ const struct locale_data _nl_C_LC_COLLATE =
  _nl_C_name,
  NULL, 0, 0, /* no file mapped */
  UNDELETABLE,
-  5,
+  6,
  {
    { word: 0 },
    { string: NULL },
    { string: NULL },
    { string: NULL },
    { string: NULL },
    { string: NULL }
  }
 };
--- a/locale/categories.def
+++ b/locale/categories.def
@ -47,7 +47,8 @@ DEFINE_CATEGORY
  DEFINE_ELEMENT (_NL_COLLATE_TABLEMB,        "collate-tablemb",          std, string)
  DEFINE_ELEMENT (_NL_COLLATE_WEIGHTMB,       "collate-weightmb",         std, string)
  DEFINE_ELEMENT (_NL_COLLATE_EXTRAMB,        "collate-extramb",          std, string)
-  ), _nl_postload_collate)
+  DEFINE_ELEMENT (_NL_COLLATE_INDIRECTMB,     "collate-indirectmb",       std, string)
  ), NO_POSTLOAD)
 /* The actual definition of ctype is meaningless here.  It is hard coded in
--- a/locale/langinfo.h
+++ b/locale/langinfo.h
@ -235,6 +235,7 @@ enum
  _NL_COLLATE_TABLEMB,
  _NL_COLLATE_WEIGHTMB,
  _NL_COLLATE_EXTRAMB,
  _NL_COLLATE_INDIRECTMB,
  _NL_NUM_LC_COLLATE,
  /* LC_CTYPE category: character classification.
--- a/locale/lc-collate.c
+++ b/locale/lc-collate.c
@ -22,21 +22,3 @@
 _NL_CURRENT_DEFINE (LC_COLLATE);
 const int32_t *__collate_tablemb;
 const unsigned char *__collate_weightmb;
 const unsigned char *__collate_extramb;
 /* We are called after loading LC_CTYPE data to load it into
   the variables used by the collation functions and regex.  */
 void
 _nl_postload_collate (void)
 {
 #define paste(a,b) paste1(a,b)
 #define paste1(a,b) a##b
 #define current(x) _NL_CURRENT (LC_COLLATE, paste(_NL_COLLATE_,x))
  __collate_tablemb = (const int32_t *) current (TABLEMB);
  __collate_weightmb = (const unsigned char *) current (WEIGHTMB);
  __collate_extramb = (const unsigned char *) current (EXTRAMB);
 }
--- a/locale/localeinfo.h
+++ b/locale/localeinfo.h
@ -165,9 +165,6 @@ extern void _nl_unload_locale (struct locale_data *locale);
 extern void _nl_remove_locale (int locale, struct locale_data *data);
 /* initialize `era' entries */
 extern void _nl_init_era_entries (void);
 /* Return `era' entry which corresponds to TP.  Used in strftime.  */
 extern struct era_entry *_nl_get_era_entry (const struct tm *tp);
@ -180,10 +177,4 @@ extern const char *_nl_get_alt_digit (unsigned int number);
 /* Similar, but now for wide characters.  */
 extern const wchar_t *_nl_get_walt_digit (unsigned int number);
 /* Global variables for LC_COLLATE category data.  */
 extern const int32_t *__collate_tablemb;
 extern const unsigned char *__collate_extrweightmb;
 extern const unsigned char *__collate_extramb;
 #endif	/* localeinfo.h */
--- a/locale/programs/ld-collate.c
+++ b/locale/programs/ld-collate.c
@ -137,9 +137,6 @@ struct locale_collate_t
  /* To make handling of errors easier we have another section.  */
  struct section_list error_section;
  /* Number of sorting rules given in order_start line.  */
  uint32_t nrules;
  /* Start of the order list.  */
  struct element_t *start;
@ -176,7 +173,7 @@ struct locale_collate_t
 /* We have a few global variables which are used for reading all
   LC_COLLATE category descriptions in all files.  */
-static int nrules;
+static uint32_t nrules;
 /* These are definitions used by some of the functions for handling
@ -426,7 +423,7 @@ read_directions (struct linereader *ldfile, struct token *arg,
 	      if (! warned)
 		{
 		  lr_error (ldfile, _("\
-%s: `%s' mentioned twice in definition of weight %d in category `%s'"),
+%s: `%s' mentioned twice in definition of weight %d"),
 			    "LC_COLLATE", "position", cnt + 1);
 		}
 	    }
@ -450,7 +447,13 @@ read_directions (struct linereader *ldfile, struct token *arg,
 	  /* See whether we have to increment the counter.  */
 	  if (arg->tok != tok_comma && rules[cnt] != 0)
 	    {
 	      /* Add the default `forward' if we have seen only `position'.  */
 	      if (rules[cnt] == sort_position)
 		rules[cnt] = sort_position | sort_forward;
 	      ++cnt;
 	    }
 	  if (arg->tok == tok_eof || arg->tok == tok_eol)
 	    /* End of line or file, so we exit the loop.  */
@ -876,7 +879,7 @@ insert_value (struct linereader *ldfile, struct token *arg,
 	      elem->nmbs = seq->nbytes;
 	    }
-	  if (elem->wcs == NULL && seq != ILLEGAL_CHAR_VALUE)
+	  if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
 	    {
 	      uint32_t wcs[2] = { wc, 0 };
@ -1552,7 +1555,7 @@ collate_finish (struct localedef_t *locale, struct charmap_t *charmap)
 }
-static inline int32_t
+static int32_t
 output_weight (struct obstack *pool, struct locale_collate_t *collate,
 	       struct element_t *elem)
 {
@ -1575,25 +1578,18 @@ output_weight (struct obstack *pool, struct locale_collate_t *collate,
      int len = 0;
      int i;
      /* Add the direction.  */
      obstack_1grow (pool, elem->section->rules[cnt]);
      for (i = 0; i < elem->weights[cnt].cnt; ++i)
-	/* Encode the weight value.  */
+	/* Encode the weight value.  We do nothing for IGNORE entries.  */
-	if (elem->weights[cnt].w[i] == NULL)
+	if (elem->weights[cnt].w[i] != NULL)
 	  {
 	    /* This entry was IGNORE.  */
 	    buf[len++] = IGNORE_CHAR;
 	  }
 	else
 	  len += utf8_encode (&buf[len],
 			      elem->weights[cnt].w[i]->mborder[cnt]);
      /* And add the buffer content.  */
      obstack_1grow (pool, len);
      obstack_grow (pool, buf, len);
    }
-  return retval;
+  return retval | ((elem->section->ruleidx & 0x7f) << 24);
 }
@ -1611,11 +1607,13 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
  int32_t tablemb[256];
  struct obstack weightpool;
  struct obstack extrapool;
  struct obstack indirectpool;
  struct section_list *sect;
  int i;
  obstack_init (&weightpool);
  obstack_init (&extrapool);
  obstack_init (&indirectpool);
  data.magic = LIMAGIC (LC_COLLATE);
  data.n = nelems;
@ -1629,7 +1627,7 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
  cnt = 0;
  assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_NRULES));
-  iov[2 + cnt].iov_base = &collate->nrules;
+  iov[2 + cnt].iov_base = &nrules;
  iov[2 + cnt].iov_len = sizeof (uint32_t);
  idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
  ++cnt;
@ -1638,7 +1636,12 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
  for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
    if (sect->ruleidx == i)
      {
-	obstack_grow (&weightpool, sect->rules, nrules);
+	int j;
 	obstack_make_room (&weightpool, nrules);
 	for (j = 0; j < nrules; ++j)
 	  obstack_1grow_fast (&weightpool, sect->rules[j]);
 	++i;
      }
  /* And align the output.  */
@ -1719,38 +1722,60 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
 	      {
 		int i;
 		/* More than one consecutive entry.  We mark this by having
 		   a negative index into the weight table.  */
 		weightidx = -weightidx;
 		/* Now add first the initial byte sequence.  */
 		added = ((sizeof (int32_t) + 1 + 1 + 2 * (runp->nmbs - 1)
 			  + __alignof__ (int32_t) - 1)
 			 & ~(__alignof__ (int32_t) - 1));
 		obstack_make_room (&extrapool, added);
 		/* More than one consecutive entry.  We mark this by having
 		   a negative index into the indirect table.  */
 		if (sizeof (int32_t) == sizeof (int))
-		  obstack_int_grow_fast (&extrapool, weightidx);
+		  obstack_int_grow_fast (&extrapool,
 					 obstack_object_size (&indirectpool)
 					 / sizeof (int32_t));
 		else
-		  obstack_grow (&extrapool, &weightidx, sizeof (int32_t));
+		  {
-		obstack_1grow_fast (&extrapool, runp->section->ruleidx);
+		    int32_t i = (obstack_object_size (&indirectpool)
 				 / sizeof (int32_t));
 		    obstack_grow (&extrapool, &i, sizeof (int32_t));
 		  }
 		obstack_1grow_fast (&extrapool, runp->nmbs - 1);
 		for (i = 1; i < runp->nmbs; ++i)
 		  obstack_1grow_fast (&extrapool, runp->mbs[i]);
-		/* Now find the end of the consecutive sequence.  */
+		/* Now find the end of the consecutive sequence and
-		do
+                   add all the indeces in the indirect pool.  */
-		  runp = runp->next;
+		while (1)
-		while (runp->mbnext != NULL
+		  {
-		       && runp->nmbs == runp->mbnext->nmbs
+		    if (sizeof (int32_t) == sizeof (int))
-		       && memcmp (runp->mbs, runp->mbnext->mbs,
+		      obstack_int_grow_fast (&extrapool, weightidx);
-				  runp->nmbs - 1) == 0
+		    else
-		       && (runp->mbs[runp->nmbs - 1] + 1
+		      obstack_grow (&extrapool, &weightidx, sizeof (int32_t));
 			   == runp->mbnext->mbs[runp->nmbs - 1]));
-		/* And add the end by sequence.  Without length this time.  */
+		    runp = runp->next;
 		    if (runp->mbnext == NULL
 			|| runp->nmbs != runp->mbnext->nmbs
 			|| memcmp (runp->mbs, runp->mbnext->mbs,
 				   runp->nmbs - 1) != 0
 			|| (runp->mbs[runp->nmbs - 1] + 1
 			    != runp->mbnext->mbs[runp->nmbs - 1]))
 		      break;
 		    /* Insert the weight.  */
 		    weightidx = output_weight (&weightpool, collate, runp);
 		  }
 		/* And add the end byte sequence.  Without length this
                   time.  */
 		for (i = 1; i < runp->nmbs; ++i)
 		  obstack_1grow_fast (&extrapool, runp->mbs[i]);
 		weightidx = output_weight (&weightpool, collate, runp);
 		if (sizeof (int32_t) == sizeof (int))
 		  obstack_int_grow_fast (&extrapool, weightidx);
 		else
 		  obstack_grow (&extrapool, &weightidx, sizeof (int32_t));
 	      }
 	    else
 	      {
@ -1768,7 +1793,6 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
 		  obstack_int_grow_fast (&extrapool, weightidx);
 		else
 		  obstack_grow (&extrapool, &weightidx, sizeof (int32_t));
 		obstack_1grow_fast (&extrapool, runp->section->ruleidx);
 		obstack_1grow_fast (&extrapool, runp->nmbs - 1);
 		for (i = 1; i < runp->nmbs; ++i)
 		  obstack_1grow_fast (&extrapool, runp->mbs[i]);
@ -1835,6 +1859,12 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
  idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
  ++cnt;
  assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTMB));
  iov[2 + cnt].iov_len = obstack_object_size (&indirectpool);
  iov[2 + cnt].iov_base = obstack_finish (&indirectpool);
  idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
  ++cnt;
  assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
@ -1842,6 +1872,7 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
  obstack_free (&weightpool, NULL);
  obstack_free (&extrapool, NULL);
  obstack_free (&indirectpool, NULL);
 }
@ -2291,16 +2322,16 @@ error while adding equivalent collating symbol"));
 		      uint32_t cnt;
 		      /* This means we have exactly one rule: `forward'.  */
-		      if (collate->nrules > 1)
+		      if (nrules > 1)
 			lr_error (ldfile, _("\
 %s: invalid number of sorting rules"),
 				  "LC_COLLATE");
 		      else
-			collate->nrules = 1;
+			nrules = 1;
 		      sp->rules = obstack_alloc (&collate->mempool,
 						 (sizeof (enum coll_sort_rule)
-						  * collate->nrules));
+						  * nrules));
-		      for (cnt = 0; cnt < collate->nrules; ++cnt)
+		      for (cnt = 0; cnt < nrules; ++cnt)
 			sp->rules[cnt] = sort_forward;
 		      /* Next line.  */
--- a/locale/programs/ld-ctype.c
+++ b/locale/programs/ld-ctype.c
@ -3073,10 +3073,8 @@ Computing table size for character classes might take a while..."),
      while (idx2 < ctype->map_collection_act[idx])
 	{
 	  if (ctype->map_collection[idx][idx2] != 0)
-	    *find_idx (ctype, &ctype->map32[idx],
+	    ctype->map32[idx][ctype->charnames[idx2]] =
-		       &ctype->map_collection_max[idx],
+	      ctype->map_collection[idx][idx2];
 		       &ctype->map_collection_act[idx],
 		       ctype->names[idx2]) = ctype->map_collection[idx][idx2];
 	  ++idx2;
 	}
    }
--- a/locale/weight.h
+++ b/locale/weight.h
@ -17,191 +17,106 @@
   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
   Boston, MA 02111-1307, USA.  */
-#include <alloca.h>
+/* Find index of weight.  */
-#include <errno.h>
+static inline int32_t
-#include <langinfo.h>
+findidx (const unsigned char **cpp)
 #include "localeinfo.h"
 #ifndef STRING_TYPE
 # error STRING_TYPE not defined
 #endif
 #ifndef USTRING_TYPE
 # error USTRING_TYPE not defined
 #endif
 typedef struct weight_t
 {
-  struct weight_t *prev;
+  int_fast32_t i = table[*(*cpp)++];
-  struct weight_t *next;
+  const unsigned char *cp;
  struct data_pair
    {
      int number;
      const uint32_t *value;
    } data[0];
 } weight_t;
  if (i >= 0)
    /* This is an index into the weight table.  Cool.  */
    return i;
-/* The following five macros grant access to the values in the
+  /* Oh well, more than one sequence starting with this byte.
-   collate locale file that do not depend on byte order.  */
+     Search for the correct one.  */
-#ifndef USE_IN_EXTENDED_LOCALE_MODEL
+  cp = &extra[-i];
 # define collate_nrules \
  (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES))
 # define collate_hash_size \
  (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_SIZE))
 # define collate_hash_layers \
  (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_LAYERS))
 # define collate_undefined \
  (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_UNDEFINED_WC))
 # define collate_rules \
  ((uint32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_RULES))
 static __inline void get_weight (const STRING_TYPE **str, weight_t *result);
 static __inline void
 get_weight (const STRING_TYPE **str, weight_t *result)
 #else
 # define collate_nrules \
  current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word
 # define collate_hash_size \
  current->values[_NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].word
 # define collate_hash_layers \
  current->values[_NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].word
 # define collate_undefined \
  current->values[_NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED_WC)].word
 # define collate_rules \
  ((uint32_t *) current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULES)].string)
 static __inline void get_weight (const STRING_TYPE **str, weight_t *result,
 				 struct locale_data *current,
 				 const uint32_t *__collate_tablewc,
 				 const uint32_t *__collate_extrawc);
 static __inline void
 get_weight (const STRING_TYPE **str, weight_t *result,
 	    struct locale_data *current, const uint32_t *__collate_tablewc,
 	    const uint32_t *__collate_extrawc)
 #endif
 {
  unsigned int ch = *((USTRING_TYPE *) (*str))++;
  size_t slot;
  if (sizeof (STRING_TYPE) == 1)
    slot = ch * (collate_nrules + 1);
  else
    {
      const size_t level_size = collate_hash_size * (collate_nrules + 1);
      size_t level;
      slot = (ch % collate_hash_size) * (collate_nrules + 1);
      level = 0;
      while (__collate_tablewc[slot] != (uint32_t) ch)
 	{
 	  if (__collate_tablewc[slot + 1] == 0
 	      || ++level >= collate_hash_layers)
 	    {
 	      size_t idx = collate_undefined;
 	      size_t cnt;
 	      for (cnt = 0; cnt < collate_nrules; ++cnt)
 		{
 		  result->data[cnt].number = __collate_extrawc[idx++];
 		  result->data[cnt].value = &__collate_extrawc[idx];
 		  idx += result->data[cnt].number;
 		}
 	      /* The Unix standard requires that a character outside
 		 the domain is signalled by setting `errno'.  */
 	      __set_errno (EINVAL);
 	      return;
 	    }
 	  slot += level_size;
 	}
    }
  if (__collate_tablewc[slot + 1] != (uint32_t) FORWARD_CHAR)
    {
      /* We have a simple form.  One value for each weight.  */
      size_t cnt;
      for (cnt = 0; cnt < collate_nrules; ++cnt)
 	{
 	  result->data[cnt].number = 1;
 	  result->data[cnt].value = &__collate_tablewc[slot + 1 + cnt];
 	}
      return;
    }
  /* We now look for any collation element which starts with CH.
     There might none, but the last list member is a catch-all case
     because it is simple the character CH.  The value of this entry
     might be the same as UNDEFINED.  */
  slot = __collate_tablewc[slot + 2];
  while (1)
    {
-      size_t idx;
+      size_t nhere;
      const unsigned char *usrc = *cpp;
-      /* This is a comparison between a uint32_t array (aka wchar_t) and
+      /* The first thing is the index.  */
-	 an 8-bit string.  */
+      i = *((int32_t *) cp);
-      for (idx = 0; __collate_extrawc[slot + 2 + idx] != 0; ++idx)
+      cp += sizeof (int32_t);
 	if (__collate_extrawc[slot + 2 + idx] != (uint32_t) (*str)[idx])
 	  break;
-      /* When the loop finished with all character of the collation
+      /* Next is the length of the byte sequence.  These are always
-	 element used, we found the longest prefix.  */
+	 short byte sequences so there is no reason to call any
-      if (__collate_extrawc[slot + 2 + idx] == 0)
+	 function (even if they are inlined).  */
      nhere = *cp++;
      if (i >= 0)
 	{
 	  /* It is a single character.  If it matches we found our
 	     index.  Note that at the end of each list there is an
 	     entry of length zero which represents the single byte
 	     sequence.  The first (and here only) byte was tested
 	     already.  */
 	  size_t cnt;
-	  *str += idx;
+	  for (cnt = 0; cnt < nhere; ++cnt)
-	  idx += slot + 3;
+	    if (cp[cnt] != usrc[cnt])
-	  for (cnt = 0; cnt < collate_nrules; ++cnt)
+	      break;
 	  if (cnt == nhere)
 	    {
-	      result->data[cnt].number = __collate_extrawc[idx++];
+	      /* Found it.  */
-	      result->data[cnt].value = &__collate_extrawc[idx];
+	      *cpp += nhere;
-	      idx += result->data[cnt].number;
+	      return i;
 	    }
 	  return;
 	    }
-      /* To next entry in list.  */
+	  /* Up to the next entry.  */
-      slot += __collate_extrawc[slot];
+	  cp += nhere;
 	}
      else
 	{
 	  /* This is a range of characters.  First decide whether the
 	     current byte sequence lies in the range.  */
 	  size_t cnt;
 	  size_t offset = 0;
 	  for (cnt = 0; cnt < nhere; ++cnt)
 	    if (cp[cnt] != usrc[cnt])
 	      break;
 	  if (cnt != nhere)
 	    {
 	      if (cp[cnt] > usrc[cnt])
 		{
 		  /* Cannot be in this range.  */
 		  cp += 2 * nhere;
 		  continue;
 		}
 	      /* Test against the end of the range.  */
 	      for (cnt = 0; cnt < nhere; ++cnt)
 		if (cp[nhere + cnt] != usrc[cnt])
 		  break;
 	      if (cnt != nhere && cp[nhere + cnt] < usrc[cnt])
 		{
 		  /* Cannot be in this range.  */
 		  cp += 2 * nhere;
 		  continue;
 		}
 	      /* This range matches the next characters.  Now find
 		 the offset in the indirect table.  */
 	      for (cnt = 0; cp[cnt] == usrc[cnt]; ++cnt);
 	      do
 		{
 		  offset <<= 8;
 		  offset += usrc[cnt] - cp[cnt];
 		}
 	      while (++cnt < nhere);
 	    }
 	  *cpp += nhere;
 	  return offset;
 	}
    }
  /* NOTREACHED */
  return 0x43219876;
 }
 /* To process a string efficiently we retrieve all information about
   the string at once.  The following macro constructs a double linked
   list of this information.  It is a macro because we use `alloca'
   and we use a double linked list because of the backward collation
   order.
   We have this strange extra macro since the functions which use the
   given locale (not the global one) cannot use the global tables.  */
 #ifndef USE_IN_EXTENDED_LOCALE_MODEL
 # define call_get_weight(strp, newp) get_weight ((strp), (newp))
 #else
 # define call_get_weight(strp, newp) \
  get_weight ((strp), (newp), current, collate_table, collate_extra)
 #endif
 #define get_string(str, forw, backw) \
  do									      \
    {									      \
      weight_t *newp;							      \
      while (*str != '\0')						      \
 	{								      \
 	  newp = (weight_t *) alloca (sizeof (weight_t)			      \
 				      + (collate_nrules			      \
 					 * sizeof (struct data_pair)));	      \
 									      \
 	  newp->prev = backw;						      \
 	  if (backw == NULL)						      \
 	    forw = newp;						      \
 	  else								      \
 	    backw->next = newp;						      \
 	  newp->next = NULL;						      \
 	  backw = newp;							      \
 	  call_get_weight (&str, newp);					      \
 	}								      \
    }									      \
  while (0)
--- a/string/strxfrm.c
+++ b/string/strxfrm.c
@ -17,282 +17,397 @@
   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
   Boston, MA 02111-1307, USA.  */
 #include <langinfo.h>
 #include <stddef.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
-#ifndef WIDE_VERSION
+#include "../locale/localeinfo.h"
-# define STRING_TYPE char
+
-# define USTRING_TYPE unsigned char
+#ifdef USE_IN_EXTENDED_LOCALE_MODEL
 # define L_(Ch) Ch
 # ifdef USE_IN_EXTENDED_LOCALE_MODEL
 # define STRXFRM __strxfrm_l
-# else
+#else
 # define STRXFRM strxfrm
 # endif
 # define STRLEN strlen
 # define STPNCPY __stpncpy
 #endif
 /* These are definitions used by some of the functions for handling
   UTF-8 encoding below.  */
 static const uint32_t encoding_mask[] =
 {
  ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff
 };
 static const unsigned char encoding_byte[] =
 {
  0xc0, 0xe0, 0xf0, 0xf8, 0xfc
 };
 /* We need UTF-8 encoding of numbers.  */
 static inline int
 utf8_encode (char *buf, int val)
 {
  char *startp = buf;
  int retval;
  if (val < 0x80)
    {
      *buf++ = (char) val;
      retval = 1;
    }
  else
    {
      int step;
      for (step = 2; step < 6; ++step)
 	if ((val & encoding_mask[step - 2]) == 0)
 	  break;
      retval = step;
      *buf = encoding_byte[step - 2];
      --step;
      do
 	{
 	  buf[step] = 0x80 | (val & 0x3f);
 	  val >>= 6;
 	}
      while (--step > 0);
      *buf |= val;
    }
  return buf - startp;
 }
 #ifndef USE_IN_EXTENDED_LOCALE_MODEL
 size_t
-STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n)
+STRXFRM (char *dest, const char *src, size_t n)
 #else
 size_t
-STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l)
+STRXFRM (char *dest, const char *src, size_t n, __locale_t l)
 #endif
 {
      if (n != 0)
 	STPNCPY (dest, src, n);
      return STRLEN (src);
 }
 #if 0
 /* Include the shared helper functions.  `strxfrm'/`wcsxfrm' also use
   these functions.  */
 #include "../locale/weight.h"
 #ifndef WIDE_VERSION
 /* Write 32 bit value UTF-8 encoded but only if enough space is left.  */
 static __inline size_t
 print_val (u_int32_t value, char *dest, size_t max, size_t act)
 {
  char tmp[6];
  int idx = 0;
  if (value < 0x80)
    tmp[idx++] = (char) value;
  else
    {
      tmp[idx++] = '\x80' + (char) (value & 0x3f);
      value >>= 6;
      if (value < 0x20)
 	tmp[idx++] = '\xc0' + (char) value;
      else
 	{
 	  tmp[idx++] = '\x80' + (char) (value & 0x3f);
 	  value >>= 6;
 	  if (value < 0x10)
 	    tmp[idx++] = '\xe0' + (char) value;
 	  else
 	    {
 	      tmp[idx++] = '\x80' + (char) (value & 0x3f);
 	      value >>= 6;
 	      if (value < 0x08)
 		tmp[idx++] = '\xf0' + (char) value;
 	      else
 		{
 		  tmp[idx++] = '\x80' + (char) (value & 0x3f);
 		  value >>= 6;
 		  if (value < 0x04)
 		    tmp[idx++] = '\xf8' + (char) value;
 		  else
 		    {
 		      tmp[idx++] = '\x80' + (char) (value & 0x3f);
 		      tmp[idx++] = '\xfc' + (char) (value >> 6);
 		    }
 		}
 	    }
 	}
    }
  while (idx-- > 0)
    {
      if (act < max)
 	dest[act] = tmp[idx];
      ++act;
    }
  return act;
 }
 #else
 static __inline size_t
 print_val (u_int32_t value, wchar_t *dest, size_t max, size_t act)
 {
  /* We cannot really assume wchar_t is 32 bits wide.  But it is for
     GCC and so we don't do much optimization for the other case.  */
  if (sizeof (wchar_t) == 4)
    {
      if (act < max)
 	dest[act] = (wchar_t) value;
      ++act;
    }
  else
    {
      wchar_t tmp[3];
      size_t idx = 0;
      if (value < 0x8000)
 	tmp[idx++] = (wchar_t) act;
      else
 	{
 	  tmp[idx++] = (wchar_t) (0x8000 + (value & 0x3fff));
 	  value >>= 14;
 	  if (value < 0x2000)
 	    tmp[idx++] = (wchar_t) (0xc000 + value);
 	  else
 	    {
 	      tmp[idx++] = (wchar_t) (0x8000 + (value & 0x3fff));
 	      value >>= 14;
 	      tmp[idx++] = (wchar_t) (0xe000 + value);
 	    }
 	}
      while (idx-- > 0)
 	{
 	  if (act < max)
 	    dest[act] = tmp[idx];
 	  ++act;
 	}
    }
  return act;
 }
 #endif
 /* Transform SRC into a form such that the result of strcmp
   on two strings that have been transformed by strxfrm is
   the same as the result of strcoll on the two strings before
   their transformation.  The transformed string is put in at
   most N characters of DEST and its length is returned.  */
 #ifndef USE_IN_EXTENDED_LOCALE_MODEL
 size_t
 STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n)
 #else
 size_t
 STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l)
 #endif
 {
 #ifdef USE_IN_EXTENDED_LOCALE_MODEL
  struct locale_data *current = l->__locales[LC_COLLATE];
-# if BYTE_ORDER == BIG_ENDIAN
+  uint_fast32_t nrules = *((uint32_t *) current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].string);
-  const u_int32_t *collate_table = (const u_int32_t *)
+#else
-    current->values[_NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].string;
+  uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
  const u_int32_t *collate_extra = (const u_int32_t *)
    current->values[_NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].string;
 # elif BYTE_ORDER == LITTLE_ENDIAN
  const u_int32_t *collate_table = (const u_int32_t *)
    current->values[_NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].string;
  const u_int32_t *collate_extra = (const u_int32_t *)
    current->values[_NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].string;
 # else
 #  error bizarre byte order
 # endif
 #endif
-  weight_t *forw = NULL;
+  /* We don't assign the following values right away since it might be
-  weight_t *backw = NULL;
+     unnecessary in case there are no rules.  */
-  size_t pass;
+  const unsigned char *rulesets;
-  size_t written;
+  const int32_t *table;
  const unsigned char *weights;
  const unsigned char *extra;
  const int32_t *indirect;
  uint_fast32_t pass;
  size_t needed;
  const unsigned char *usrc;
  size_t srclen = strlen (src);
  int32_t *idxarr;
  unsigned char *rulearr;
  size_t idxmax;
  size_t idxcnt;
  int use_malloc = 0;
-  /* If the current locale does not specify locale data we use normal
+#include "../locale/weight.h"
-     8-bit string comparison.  */
+
-  if (collate_nrules == 0)
+  if (nrules == 0)
    {
      if (n != 0)
-	STPNCPY (dest, src, n);
+	__stpncpy (dest, src, n);
-      return STRLEN (src);
+      return srclen;
    }
 #ifdef USE_IN_EXTENDED_LOCALE_MODEL
  rulesets = (const unsigned char *)
    current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string;
  table = (const int32_t *)
    current->values[_NL_ITEM_INDEX (_NL_COLLATE_TABLEMB)].string;
  weights = (const unsigned char *)
    current->values[_NL_ITEM_INDEX (_NL_COLLATE_WEIGHTMB)].string;
  extra = (const unsigned char *)
    current->values[_NL_ITEM_INDEX (_NL_COLLATE_EXTRAMB)].string;
  indirect = (const int32_t *)
    current->values[_NL_ITEM_INDEX (_NL_COLLATE_INDIRECTMB)].string;
 #else
  rulesets = (const unsigned char *)
    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_RULESETS);
  table = (const int32_t *)
    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
  weights = (const unsigned char *)
    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
  extra = (const unsigned char *)
    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
  indirect = (const int32_t *)
    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
 #endif
  /* Handle an empty string as a special case.  */
-  if (*src == '\0')
+  if (srclen == 0)
    {
      if (n != 0)
        *dest = '\0';
      return 1;
    }
-  /* Get full information about the string.  This means we get
+  /* We need the elements of the string as unsigned values since they
-     information for all passes in a special data structure.  */
+     are used as indeces.  */
-  get_string (src, forw, backw);
+  usrc = (const unsigned char *) src;
-  /* Now we have all the information.  In at most the given number of
+  /* Perform the first pass over the string and while doing this find
-     passes we can finally decide about the order.  */
+     and store the weights for each character.  Since we want this to
-  written = 0;
+     be as fast as possible we are using `alloca' to store the temporary
-  for (pass = 0; pass < collate_nrules; ++pass)
+     values.  But since there is no limit on the length of the string
     we have to use `malloc' if the string is too long.  We should be
     very conservative here.  */
  if (srclen >= 16384)
    {
-      int forward = (collate_rules[pass] & sort_forward) != 0;
+      idxarr = (int32_t *) malloc (srclen * (sizeof (int32_t) + 1));
-      const weight_t *run = forward ? forw : backw;
+      rulearr = (unsigned char *) &idxarr[srclen];
      int idx = forward ? 0 : run->data[pass].number - 1;
-      while (1)
+      if (idxarr == NULL)
-	{
+	/* No memory.  Well, go with the stack then.
 	  int ignore = 0;
 	  u_int32_t w = 0;
-	  /* Here we have to check for IGNORE entries.  If these are
+	   XXX Once this implementation is stable we will handle this
-	     found we count them and go on with he next value.  */
+	   differently.  Instead of precomputing the indeces we will
-	  while (run != NULL
+	   do this in time.  This means, though, that this happens for
-		 && ((w = run->data[pass].value[idx])
+	   every pass again.  */
-		     == (u_int32_t) IGNORE_CHAR))
+	goto try_stack;
-	    {
+      use_malloc = 1;
 	      ++ignore;
 	      if (forward
 		  ? ++idx >= run->data[pass].number
 		  : --idx < 0)
 		{
 		  weight_t *nextp = forward ? run->next : run->prev;
 		  if (nextp == NULL)
 		    {
 		      w = 0;
 		      /* No more non-INGOREd elements means lowest
 			 possible value.  */
 		      ignore = -1;
    }
  else
-		    idx = forward ? 0 : nextp->data[pass].number - 1;
+    {
-		  run = nextp;
+    try_stack:
      idxarr = (int32_t *) alloca (srclen * sizeof (int32_t));
      rulearr = (unsigned char *) alloca (srclen);
    }
  idxmax = 0;
  do
    {
      int32_t tmp = findidx (&usrc);
      rulearr[idxmax] = tmp >> 24;
      idxarr[idxmax] = tmp & 0x80ffffff;
      ++idxmax;
    }
  while (*usrc != '\0');
  /* Now the passes over the weights.  We now use the indeces we found
     before.  */
  needed = 0;
  for (pass = 0; pass < nrules; ++pass)
    {
      size_t backw_stop = ~0ul;
      int rule = rulesets[rulearr[0] * nrules + pass];
      /* We assume that if a rule has defined `position' in one section
 	 this is true for all of them.  */
      int position = rule & sort_position;
      if (position == 0)
 	{
 	  for (idxcnt = 0; idxcnt < idxmax; ++idxcnt)
 	    {
 	      if ((rule & sort_forward) != 0)
 		{
 		  size_t len;
 		  if (backw_stop != ~0ul)
 		    {
 		      /* Handle the pushed elements now.  */
 		      size_t backw;
 		      for (backw = idxcnt - 1; backw >= backw_stop; --backw)
 			{
 			  len = weights[idxarr[backw]++];
 			  if (needed + len < n)
 			    while (len-- > 0)
 			      dest[needed++] = weights[idxarr[backw]++];
 			  else
 			    {
 				/* No more characters fit into the buffer.  */
 			      needed += len;
 			      idxarr[backw] += len;
 			    }
 			}
-	  /* Stop if all characters are processed.  */
+		      backw_stop = ~0ul;
-	  if (run == NULL)
+		    }
 	    break;
-	  /* Now we have information of the number of ignored weights
+		  /* Now handle the forward element.  */
-	     and the value of the next weight.  We have to add 2
+		  len = weights[idxarr[idxcnt]++];
-	     because 0 means EOS and 1 is the intermediate string end.  */
+		  if (needed + len < n)
-	  if ((collate_rules[pass] & sort_position) != 0)
+		    while (len-- > 0)
-	    written = print_val (ignore + 2, dest, n, written);
+		      dest[needed++] = weights[idxarr[idxcnt]++];
-
+		  else
 	  if (w != 0)
 	    written = print_val (w, dest, n, written);
 	  /* We have to increment the index counters.  */
 	  if (forward)
 		    {
-	      if (++idx >= run->data[pass].number)
+		      /* No more characters fit into the buffer.  */
-		{
+		      needed += len;
-		  run = run->next;
+		      idxarr[idxcnt] += len;
 		  idx = 0;
 		    }
 		}
 	      else
 		{
-	      if (--idx < 0)
+		  /* Remember where the backwards series started.  */
 		  if (backw_stop == ~0ul)
 		    backw_stop = idxcnt;
 		}
 	      rule = rulesets[rulearr[idxcnt + 1] * nrules + pass];
 	    }
 	  if (backw_stop != ~0ul)
 	    {
-		  run = run->prev;
+	      /* Handle the pushed elements now.  */
-		  if (run != NULL)
+	      size_t backw;
-		    idx = run->data[pass].number - 1;
+
 	      for (backw = idxcnt - 1; backw >= backw_stop; --backw)
 		{
 		  size_t len = weights[idxarr[backw]++];
 		  if (needed + len < n)
 		    while (len-- > 0)
 		      dest[needed++] = weights[idxarr[backw]++];
 		  else
 		    {
 		      /* No more characters fit into the buffer.  */
 		      needed += len;
 		      idxarr[backw] += len;
 		    }
 		}
 	    }
 	}
      else
 	{
 	  int val = 1;
 	  char buf[7];
 	  size_t buflen;
 	  size_t i;
 	  for (idxcnt = 0; idxcnt < idxmax; ++idxcnt)
 	    {
 	      if ((rule & sort_forward) != 0)
 		{
 		  size_t len;
 		  if (backw_stop != ~0ul)
 		    {
 		     /* Handle the pushed elements now.  */
 		      size_t backw;
 		      for (backw = idxcnt - 1; backw >= backw_stop; --backw)
 			{
 			  len = weights[idxarr[backw]++];
 			  if (len != 0)
 			    {
 			      buflen = utf8_encode (buf, val);
 			      if (needed + buflen + len < n)
 				{
 				  for (i = 0; i < buflen; ++i)
 				    dest[needed + i] = buf[i];
 				  for (i = 0; i < len; ++i)
 				    dest[needed + buflen + i] =
 				      weights[idxarr[backw] + i];
 				}
 			      idxarr[backw] += len;
 			      needed += buflen + len;
 			      val = 1;
 			    }
 			  else
 			    ++val;
 			}
 		      backw_stop = ~0ul;
 		    }
 		  /* Now handle the forward element.  */
 		  len = weights[idxarr[idxcnt]++];
 		  if (len != 0)
 		    {
 		      buflen = utf8_encode (buf, val);
 		      if (needed + buflen + len < n)
 			{
 			  for (i = 0; i < buflen; ++i)
 			    dest[needed + i] = buf[i];
 			  for (i = 0; i < len; ++i)
 			    dest[needed + buflen + i] =
 			      weights[idxarr[idxcnt] + i];
 			}
 		      idxarr[idxcnt] += len;
 		      needed += buflen + len;
 		      val = 1;
 		    }
 		  else
 		    /* Note that we don't have to increment `idxarr[idxcnt]'
 		       since the length is zero.  */
 		    ++val;
 		}
 	      else
 		{
 		  /* Remember where the backwards series started.  */
 		  if (backw_stop == ~0ul)
 		    backw_stop = idxcnt;
 		}
 	      rule = rulesets[rulearr[idxcnt + 1] * nrules + pass];
 	    }
 	  if (backw_stop != ~0)
 	    {
 	      /* Handle the pushed elements now.  */
 	      size_t backw;
 	      for (backw = idxmax - 1; backw >= backw_stop; --backw)
 		{
 		  size_t len = weights[idxarr[backw]++];
 		  if (len != 0)
 		    {
 		      buflen = utf8_encode (buf, val);
 		      if (needed + buflen + len < n)
 			{
 			  for (i = 0; i < buflen; ++i)
 			    dest[needed + i] = buf[i];
 			  for (i = 0; i < len; ++i)
 			    dest[needed + buflen + i] =
 			      weights[idxarr[backw] + i];
 			}
 		      idxarr[backw] += len;
 		      needed += buflen + len;
 		      val = 1;
 		    }
 		  else
 		    ++val;
 		}
 	    }
 	}
-      /* Write marker for end of word.  */
+      /* Finally store the byte to separate the passes or terminate
-      if (pass + 1 < collate_nrules)
+	 the string.  */
-	written = print_val (1, dest, n, written);
+      if (needed < n)
 	dest[needed] = pass + 1 < nrules ? '\1' : '\0';
      ++needed;
    }
-  /* Terminate string.  */
+  /* This is a little optimization: many collation specifications have
-  if (written < n)
+     a `position' rule at the end and if no non-ignored character
-    dest[written] = L_('\0');
+     is found the last \1 byte is immediately followed by a \0 byte
     signalling this.  We can avoid the \1 byte(s).  */
  if (needed > 2 && dest[needed - 2] == '\1')
    {
      /* Remove the \1 byte.  */
      --needed;
      dest[needed - 1] = '\0';
    }
-  /* Return length without counting the terminating '\0'.  */
+  /* Free the memory if needed.  */
-  return written;
+  if (use_malloc)
    free (idxarr);
  return needed;
 }
 #endif
--- a/wcsmbs/wcsxfrm.c
+++ b/wcsmbs/wcsxfrm.c
@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
@ -19,16 +19,23 @@
 #include <wchar.h>
 #define WIDE_VERSION 1
 #define STRING_TYPE wchar_t
 #define USTRING_TYPE wint_t
 #define L_(Ch) L##Ch
 #ifdef USE_IN_EXTENDED_LOCALE_MODEL
 # define STRXFRM __wcsxfrm_l
 #else
 # define STRXFRM wcsxfrm
 #endif
 #define STRLEN __wcslen
 #define STPNCPY __wcpncpy
-#include <string/strxfrm.c>
+
 #ifndef USE_IN_EXTENDED_LOCALE_MODEL
 size_t
 STRXFRM (wchar_t *dest, const wchar_t *src, size_t n)
 #else
 size_t
 STRXFRM (wchar_t *dest, const wchar_t *src, size_t n, __locale_t l)
 #endif
 {
  if (n != 0)
    __wcpncpy (dest, src, n);
  return __wcslen (src);
 }
--- a/wctype/wctrans.c
+++ b/wctype/wctrans.c
@ -52,5 +52,5 @@ wctrans (const char *property)
  /* We have to search the table.  */
  result = (int32_t *) _NL_CURRENT (LC_CTYPE, _NL_NUM_LC_CTYPE + cnt - 2);
-  return (wctrans_t) (result + 128);
+  return (wctrans_t) result;
 }