glibc/stdlib/grouping.c

/* Internal header for proving correct grouping in strings of numbers.
   Copyright (C) 1995,1996,1997,1998,2000,2003 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, write to the Free
   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307 USA.  */

#include <limits.h>
#include <stddef.h>
#include <string.h>

#ifndef MAX
#define MAX(a,b)	({ typeof(a) _a = (a); typeof(b) _b = (b); \
			   _a > _b ? _a : _b; })
#endif

#ifdef USE_WIDE_CHAR
# include <wctype.h>
# define L_(Ch) L##Ch
# define UCHAR_TYPE wint_t
# define STRING_TYPE wchar_t
#else
# define L_(Ch) Ch
# define UCHAR_TYPE unsigned char
# define STRING_TYPE char
#endif

#include "grouping.h"

/* Find the maximum prefix of the string between BEGIN and END which
   satisfies the grouping rules.  It is assumed that at least one digit
   follows BEGIN directly.  */

const STRING_TYPE *
#ifdef USE_WIDE_CHAR
__correctly_grouped_prefixwc (const STRING_TYPE *begin, const STRING_TYPE *end,
			      wchar_t thousands,
#else
__correctly_grouped_prefixmb (const STRING_TYPE *begin, const STRING_TYPE *end,
			      const char *thousands,
#endif
			      const char *grouping)
{
#ifndef USE_WIDE_CHAR
  size_t thousands_len;
  int cnt;
#endif

  if (grouping == NULL)
    return end;

#ifndef USE_WIDE_CHAR
  thousands_len = strlen (thousands);
#endif

  while (end > begin)
    {
      const STRING_TYPE *cp = end - 1;
      const char *gp = grouping;

      /* Check first group.  */
      while (cp >= begin)
	{
#ifdef USE_WIDE_CHAR
	  if (*cp == thousands)
	    break;
#else
	  if (cp[thousands_len - 1] == *thousands)
	    {
	      for (cnt = 1; thousands[cnt] != '\0'; ++cnt)
		if (thousands[cnt] != cp[thousands_len - 1 - cnt])
		  break;
	      if (thousands[cnt] == '\0')
		break;
	    }
#endif
	  --cp;
	}

      /* We allow the representation to contain no grouping at all even if
	 the locale specifies we can have grouping.  */
      if (cp < begin)
	return end;

      if (end - cp == (int) *gp + 1)
	{
	  /* This group matches the specification.  */

	  const STRING_TYPE *new_end;

	  if (cp < begin)
	    /* There is just one complete group.  We are done.  */
	    return end;

	  /* CP points to a thousands separator character.  The preceding
	     remainder of the string from BEGIN to NEW_END is the part we
	     will consider if there is a grouping error in this trailing
	     portion from CP to END.  */
	  new_end = cp - 1;

	  /* Loop while the grouping is correct.  */
	  while (1)
	    {
	      /* Get the next grouping rule.  */
	      ++gp;
	      if (*gp == 0)
		/* If end is reached use last rule.  */
	        --gp;

	      /* Skip the thousands separator.  */
	      --cp;

	      if (*gp == CHAR_MAX
#if CHAR_MIN < 0
		  || *gp < 0
#endif
		  )
	        {
	          /* No more thousands separators are allowed to follow.  */
	          while (cp >= begin)
		    {
#ifdef USE_WIDE_CHAR
		      if (*cp == thousands)
			break;
#else
		      for (cnt = 0; thousands[cnt] != '\0'; ++cnt)
			if (thousands[cnt] != cp[thousands_len - cnt - 1])
			  break;
		      if (thousands[cnt] == '\0')
			break;
#endif
		      --cp;
		    }

	          if (cp < begin)
		    /* OK, only digits followed.  */
		    return end;
	        }
	      else
	        {
		  /* Check the next group.  */
	          const STRING_TYPE *group_end = cp;

		  while (cp >= begin)
		    {
#ifdef USE_WIDE_CHAR
		      if (*cp == thousands)
			break;
#else
		      for (cnt = 0; thousands[cnt] != '\0'; ++cnt)
			if (thousands[cnt] != cp[thousands_len - cnt - 1])
			  break;
		      if (thousands[cnt] == '\0')
			break;
#endif
		      --cp;
		    }

		  if (cp < begin && group_end - cp <= (int) *gp)
		    /* Final group is correct.  */
		    return end;

		  if (cp < begin || group_end - cp != (int) *gp)
		    /* Incorrect group.  Punt.  */
		    break;
		}
	    }

	  /* The trailing portion of the string starting at NEW_END
	     contains a grouping error.  So we will look for a correctly
	     grouped number in the preceding portion instead.  */
	  end = new_end;
	}
      else
	{
	  /* Even the first group was wrong; determine maximum shift.  */
	  if (end - cp > (int) *gp + 1)
	    end = cp + (int) *gp + 1;
	  else if (cp < begin)
	    /* This number does not fill the first group, but is correct.  */
	    return end;
	  else
	    /* CP points to a thousands separator character.  */
	    end = cp;
	}
    }

  return MAX (begin, end);
}
Update. 2003-06-11 Ulrich Drepper <drepper@redhat.com> * time/tzfile.c: Add a couple of __builtin_expect. Remove warnings gcc 3.3 shows. * argp/argp-help.c (hol_entry_short_iterate): Don't inline. * elf/dl-load.c (fillin_rpath): Likewise. (add_path): Likewise. * elf/dl-version.c (find_needed): Always inline. * elf/do-lookup.c (FCT): Don't inline. * iconv/Makefile: Extend vpath to intl subdir. (iconvconfig-modules): Add hash-string. * iconv/gconv_charset.h (strip): Don't inline. (upstr): Always inline. Move __gconv_compare_alias prototype to... * iconv/gconv_int.h: ...here. * iconv/gconv_db.c: Don't include gconv_charset.h. * iconv/gconv_conf.c (add_alias): Don't inline. (insert_module): Likewise. * iconv/gconv_simple.c (internal_ucs4_loop): Always inline. (internal_ucs4_loop_unaligned): Likewise. (internal_ucs4_loop_single): Likewise. (ucs4_internal_loop): Likewise. (ucs4_internal_loop_unaligned): Likewise. (ucs4_internal_loop_single): Likewise. (internal_ucs4le_loop): Always inline. (internal_ucs4le_loop_unaligned): Likewise. (internal_ucs4le_loop_single): Likewise. (ucs4le_internal_loop): Likewise. (ucs4le_internal_loop_unaligned): Likewise. (ucs4le_internal_loop_single): Likewise. * iconv/loop.c: Always inline the defined functions. * iconvdata/cns11642.h: Likewise. * iconvdata/cns11642l1.h: Likewise. * iconvdata/euc-kr.c: Likewise. * iconvdata/gb2312.h: Likewise. * iconvdata/jis0201.h: Likewise. * iconvdata/jis0208.h: Likewise. * iconvdata/jis0212.h: Likewise. * iconvdata/jisx0213.h: Likewise. * iconvdata/ksc5601.h: Likewise. * iconvdata/utf-7.c (base64): Don't inline. * include/libc-symbols.h (symbol_set_first_element): Add cast (symbol_set_end_p): Likewise. * include/set-hooks (RUN_HOOK): Likewise. * inet/Makefile (aux): Add ifreq. * intl/Makefile (aux): Add some entries from routines. Add hash-string. * intl/hash-string.c: New file. * intl/hash-string.h: Remove hash_string definition. Declare __hash_string. * iconv/gconv_cache.c (find_module_idx): Adjust hash_string caller. * iconv/iconvconfig.c (new_name): Likewise. * intl/dcigettext.c (_nl_find_msg): Likewise. * intl/loadmsgcat.c (_nl_load_domain): Likewise. * io/ftw.c (open_dir_stream): Always inline. (process_entry): Don't inline. * locale/findlocale.c: Include gconv_int.h. * locale/setlocale.c (new_composite_name): Don't inline. * locale/weight.h (findidx): Always inline. * locale/weightwc.h (findidx): Likewise. * locale/programs/linereader.c (lr_ignore_rest): Define here. * locale/programs/linereader.h (lr_ignore_rest): Don't define here, just declare it. (lr_getc): Always inline. (lr_ungetc): Likewise. * nss/nss_files/files-parse.c (parse_list): Likewise. * stdio-common/Makefile (aux): Add printf-parsemb and printf-parsewc. * stdio-common/_itoa.h (_itoa_word): Always inline. (_fitoa_word, _fitoa): Don't define here, only declare. * stdio-common/_itoa.c (_iftoa_word): Add here. (_fitoa): Likewise. * stdio-common/_itowa.h (_itowa_word): Always inline. * stdio-common/printf-parse.h (read_int): Don't inline. (find_spec): Don't define. Declare __find_specmb and __find_specwc. (parse_one_spec): Don't define. Declare __parse_one_specmb and __parse_one_specwc. * stdio-common/printf-parsemb.c: New file. * stdio-common/printf-parsewc.c: New file. * stdio-common/vfprintf.c: Update calls to find_spec and parse_one_spec for new names. * stdio-common/printf-prs.c: Likewise. Define DONT_NEED_READ_INT. * stdlib/Makefile (aux): Add grouping and groupingwc. * stdlib/grouping.c: New file. * stdlib/groupingwc.c: New file. * stdlib/grouping.h (correctly_grouped_prefix): Don't define here. Just prototype. * stdlib/rpmatch.c (try): Don't inline. * stdlib/strtod.c (round_and_return): Don't line. (str_to_mpn): Likewise. (__mpn_lshift_1): Always inline. Optimize only for constant count. Adjust for name change of correctly_grouped_prefix. * sysdeps/generic/strtol.c: Adjust for name change of correctly_grouped_prefix. * string/strxfrm.c (utf8_encode): Don't inline. * sysdeps/generic/dl-cache.c: Define _dl_cache_libcmp. * sysdeps/generic/dl-cache.h: Just declare _dl_cache_libcmp. * sysdeps/generic/ifreq.c: New file. * sysdeps/unix/sysv/linux/ifreq.c: New file. * sysdeps/generic/ifreq.h (__ifreq): Only declare here. * sysdeps/unix/sysv/linux/ifreq.h: Likewise. * sysdeps/generic/ldsodefs.h (_dl_name_match_p): Always inline. * sysdeps/generic/unwind-dw2-fde.c (start_fde_sort): Don't inline. (fde_split): Likewise. (fde_merge): Likewise. (end_fde_sort): Likewise. (init_object): Likewise. (binary_search_unencoded_fdes): Likewise. (binary_search_single_encoding_fdes): Likewise. (binary_search_mixed_encoding_fdes): Likewise. * sysdeps/generic/wordexp.c (w_addchar): Don't inline. * sysdeps/i386/dl-machine.c (elf_machine_runtime_setup): Always inline. * sysdeps/posix/sprofil.c (profil_count): Don't inline. * sysdeps/unix/sysv/linux/Makefile [subdir=io] (sysdep_routines): Add xstatconv. * sysdeps/unix/sysv/linux/xstatconv.h: New file. * sysdeps/unix/sysv/linux/xstatconv.c: Don't inline the function. Export them. Prepend __ to name. * sysdeps/unix/sysv/linux/Dist: Add xstatconv.h. * sysdeps/unix/sysv/linux/fxstat.c: Adjust for name change of conversion functions. * sysdeps/unix/sysv/linux/fxstat64.c: Likewise. * sysdeps/unix/sysv/linux/lxstat.c: Likewise. * sysdeps/unix/sysv/linux/lxstat64.c: Likewise. * sysdeps/unix/sysv/linux/xstat.c: Likewise. * sysdeps/unix/sysv/linux/xstat64.c: Likewise. * sysdeps/unix/sysv/linux/i386/fxstat.c: Likewise. * sysdeps/unix/sysv/linux/i386/lxstat.c: Likewise. * sysdeps/unix/sysv/linux/i386/xstat.c: Likewise. * sysdeps/unix/sysv/linux/pathconf.c (__statfs_link_max, __statfs_filesize_max, __statfs_symlinks): Define here. __ prepended to name. Change callers. * sysdeps/unix/sysv/linux/pathconf.h (__statfs_link_max, __statfs_filesize_max, __statfs_symlinks): Don't define here, just declare. * sysdeps/unix/sysv/linux/fpathconf.c: Change all callers. * time/tzfile.c (decode): Always inline. * wcsmbs/wcsnrtombs.c: Change type of inbuf to unsigned char. Remove cast in tomb function call. wcsmbs/wcsrtombs.c Likewise. * wcsmbs/wcstob.c: Introduce new temp variable to take pointer in tomb function call. 2003-06-12 01:22:36 +02:00			`/* Internal header for proving correct grouping in strings of numbers.`
			`Copyright (C) 1995,1996,1997,1998,2000,2003 Free Software Foundation, Inc.`
			`This file is part of the GNU C Library.`
			`Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.`

			`The GNU C Library is free software; you can redistribute it and/or`
			`modify it under the terms of the GNU Lesser General Public`
			`License as published by the Free Software Foundation; either`
			`version 2.1 of the License, or (at your option) any later version.`

			`The GNU C Library is distributed in the hope that it will be useful,`
			`but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`Lesser General Public License for more details.`

			`You should have received a copy of the GNU Lesser General Public`
			`License along with the GNU C Library; if not, write to the Free`
			`Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA`
			`02111-1307 USA. */`

			`#include <limits.h>`
			`#include <stddef.h>`
			`#include <string.h>`

			`#ifndef MAX`
			`#define MAX(a,b) ({ typeof(a) _a = (a); typeof(b) _b = (b); \`
			`_a > _b ? _a : _b; })`
			`#endif`

			`#ifdef USE_WIDE_CHAR`
			`# include <wctype.h>`
			`# define L_(Ch) L##Ch`
			`# define UCHAR_TYPE wint_t`
			`# define STRING_TYPE wchar_t`
			`#else`
			`# define L_(Ch) Ch`
			`# define UCHAR_TYPE unsigned char`
			`# define STRING_TYPE char`
			`#endif`

			`#include "grouping.h"`

			`/* Find the maximum prefix of the string between BEGIN and END which`
			`satisfies the grouping rules. It is assumed that at least one digit`
			`follows BEGIN directly. */`

			`const STRING_TYPE *`
			`#ifdef USE_WIDE_CHAR`
			`__correctly_grouped_prefixwc (const STRING_TYPE begin, const STRING_TYPE end,`
			`wchar_t thousands,`
			`#else`
			`__correctly_grouped_prefixmb (const STRING_TYPE begin, const STRING_TYPE end,`
			`const char *thousands,`
			`#endif`
			`const char *grouping)`
			`{`
			`#ifndef USE_WIDE_CHAR`
			`size_t thousands_len;`
			`int cnt;`
			`#endif`

			`if (grouping == NULL)`
			`return end;`

			`#ifndef USE_WIDE_CHAR`
			`thousands_len = strlen (thousands);`
			`#endif`

			`while (end > begin)`
			`{`
			`const STRING_TYPE *cp = end - 1;`
			`const char *gp = grouping;`

			`/* Check first group. */`
			`while (cp >= begin)`
			`{`
			`#ifdef USE_WIDE_CHAR`
			`if (*cp == thousands)`
			`break;`
			`#else`
			`if (cp[thousands_len - 1] == *thousands)`
			`{`
			`for (cnt = 1; thousands[cnt] != '\0'; ++cnt)`
			`if (thousands[cnt] != cp[thousands_len - 1 - cnt])`
			`break;`
			`if (thousands[cnt] == '\0')`
			`break;`
			`}`
			`#endif`
			`--cp;`
			`}`

			`/* We allow the representation to contain no grouping at all even if`
			`the locale specifies we can have grouping. */`
			`if (cp < begin)`
			`return end;`

			`if (end - cp == (int) *gp + 1)`
			`{`
			`/* This group matches the specification. */`

			`const STRING_TYPE *new_end;`

			`if (cp < begin)`
			`/* There is just one complete group. We are done. */`
			`return end;`

			`/* CP points to a thousands separator character. The preceding`
			`remainder of the string from BEGIN to NEW_END is the part we`
			`will consider if there is a grouping error in this trailing`
			`portion from CP to END. */`
			`new_end = cp - 1;`

			`/* Loop while the grouping is correct. */`
			`while (1)`
			`{`
			`/* Get the next grouping rule. */`
			`++gp;`
			`if (*gp == 0)`
			`/* If end is reached use last rule. */`
			`--gp;`

			`/* Skip the thousands separator. */`
			`--cp;`

			`if (*gp == CHAR_MAX`
			`#if CHAR_MIN < 0`
			`\|\| *gp < 0`
			`#endif`
			`)`
			`{`
			`/* No more thousands separators are allowed to follow. */`
			`while (cp >= begin)`
			`{`
			`#ifdef USE_WIDE_CHAR`
			`if (*cp == thousands)`
			`break;`
			`#else`
			`for (cnt = 0; thousands[cnt] != '\0'; ++cnt)`
			`if (thousands[cnt] != cp[thousands_len - cnt - 1])`
			`break;`
			`if (thousands[cnt] == '\0')`
			`break;`
			`#endif`
			`--cp;`
			`}`

			`if (cp < begin)`
			`/* OK, only digits followed. */`
			`return end;`
			`}`
			`else`
			`{`
			`/* Check the next group. */`
			`const STRING_TYPE *group_end = cp;`

			`while (cp >= begin)`
			`{`
			`#ifdef USE_WIDE_CHAR`
			`if (*cp == thousands)`
			`break;`
			`#else`
			`for (cnt = 0; thousands[cnt] != '\0'; ++cnt)`
			`if (thousands[cnt] != cp[thousands_len - cnt - 1])`
			`break;`
			`if (thousands[cnt] == '\0')`
			`break;`
			`#endif`
			`--cp;`
			`}`

			`if (cp < begin && group_end - cp <= (int) *gp)`
			`/* Final group is correct. */`
			`return end;`

			`if (cp < begin \|\| group_end - cp != (int) *gp)`
			`/* Incorrect group. Punt. */`
			`break;`
			`}`
			`}`

			`/* The trailing portion of the string starting at NEW_END`
			`contains a grouping error. So we will look for a correctly`
			`grouped number in the preceding portion instead. */`
			`end = new_end;`
			`}`
			`else`
			`{`
			`/* Even the first group was wrong; determine maximum shift. */`
			`if (end - cp > (int) *gp + 1)`
			`end = cp + (int) *gp + 1;`
			`else if (cp < begin)`
			`/* This number does not fill the first group, but is correct. */`
			`return end;`
			`else`
			`/* CP points to a thousands separator character. */`
			`end = cp;`
			`}`
			`}`

			`return MAX (begin, end);`
			`}`