2000-05-24  Ulrich Drepper  <drepper@redhat.com>

	* locale/programs/ld-collate.c (struct element_t): Add mbseqorder
	and wcseqorder members.
	(struct locale_collate_t): Likewise.
	(collate_finish): Assign collation sequence value to each character.
	Create tables for output.
	(collate_output): Write out tables with collation sequence information.
	* locale/C-collate.c: Provide C locale data for collation sequence
	table.
	* locale/langinfo.h: Add _NL_COLLATE_COLLSEQMB and
	_NL_COLLATE_COLLSEQWC.
	* locale/categories.def: Add entries for _NL_COLLATE_COLLSEQMB and
	_NL_COLLATE_COLLSEQWC.
	* posix/fnmatch.c: Define SUFFIX and WIDE_CHAR_VERSION before
	include fnmatch_loop.c.
	* posix/fnmatch_loop.c: Don't use strcoll while determining whether
	character is matched by range expression.  Use collation sequence
	table.  Outside glibc fall back on simple character value comparison.
This commit is contained in:
Ulrich Drepper 2000-05-24 20:22:51 +00:00
parent b7cbee1cb0
commit acb5ee2e56
8 changed files with 1644 additions and 22 deletions

View File

@ -1,3 +1,23 @@
2000-05-24 Ulrich Drepper <drepper@redhat.com>
* locale/programs/ld-collate.c (struct element_t): Add mbseqorder
and wcseqorder members.
(struct locale_collate_t): Likewise.
(collate_finish): Assign collation sequence value to each character.
Create tables for output.
(collate_output): Write out tables with collation sequence information.
* locale/C-collate.c: Provide C locale data for collation sequence
table.
* locale/langinfo.h: Add _NL_COLLATE_COLLSEQMB and
_NL_COLLATE_COLLSEQWC.
* locale/categories.def: Add entries for _NL_COLLATE_COLLSEQMB and
_NL_COLLATE_COLLSEQWC.
* posix/fnmatch.c: Define SUFFIX and WIDE_CHAR_VERSION before
include fnmatch_loop.c.
* posix/fnmatch_loop.c: Don't use strcoll while determining whether
character is matched by range expression. Use collation sequence
table. Outside glibc fall back on simple character value comparison.
2000-05-24 Andreas Jaeger <aj@suse.de>
* sysdeps/mips/elf/start.S (ENTRY_POINT): Align stack for double

View File

@ -1,4 +1,4 @@
/* Copyright (C) 1995, 1996, 1997, 1999 Free Software Foundation, Inc.
/* Copyright (C) 1995, 1996, 1997, 1999, 2000 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1995.
@ -20,12 +20,84 @@
#include <endian.h>
#include "localeinfo.h"
static const char collseqmb[] =
{
'\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
'\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
'\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
'\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
'\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
'\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
'\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
'\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
'\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
'\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
'\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
'\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
'\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
'\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
'\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
'\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f',
'\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87',
'\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f',
'\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97',
'\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f',
'\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7',
'\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf',
'\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7',
'\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf',
'\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7',
'\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf',
'\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7',
'\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf',
'\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7',
'\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef',
'\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7',
'\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff'
};
static const uint32_t collseqwc[] =
{
L'\x00', L'\x01', L'\x02', L'\x03', L'\x04', L'\x05', L'\x06', L'\x07',
L'\x08', L'\x09', L'\x0a', L'\x0b', L'\x0c', L'\x0d', L'\x0e', L'\x0f',
L'\x10', L'\x11', L'\x12', L'\x13', L'\x14', L'\x15', L'\x16', L'\x17',
L'\x18', L'\x19', L'\x1a', L'\x1b', L'\x1c', L'\x1d', L'\x1e', L'\x1f',
L'\x20', L'\x21', L'\x22', L'\x23', L'\x24', L'\x25', L'\x26', L'\x27',
L'\x28', L'\x29', L'\x2a', L'\x2b', L'\x2c', L'\x2d', L'\x2e', L'\x2f',
L'\x30', L'\x31', L'\x32', L'\x33', L'\x34', L'\x35', L'\x36', L'\x37',
L'\x38', L'\x39', L'\x3a', L'\x3b', L'\x3c', L'\x3d', L'\x3e', L'\x3f',
L'\x40', L'\x41', L'\x42', L'\x43', L'\x44', L'\x45', L'\x46', L'\x47',
L'\x48', L'\x49', L'\x4a', L'\x4b', L'\x4c', L'\x4d', L'\x4e', L'\x4f',
L'\x50', L'\x51', L'\x52', L'\x53', L'\x54', L'\x55', L'\x56', L'\x57',
L'\x58', L'\x59', L'\x5a', L'\x5b', L'\x5c', L'\x5d', L'\x5e', L'\x5f',
L'\x60', L'\x61', L'\x62', L'\x63', L'\x64', L'\x65', L'\x66', L'\x67',
L'\x68', L'\x69', L'\x6a', L'\x6b', L'\x6c', L'\x6d', L'\x6e', L'\x6f',
L'\x70', L'\x71', L'\x72', L'\x73', L'\x74', L'\x75', L'\x76', L'\x77',
L'\x78', L'\x79', L'\x7a', L'\x7b', L'\x7c', L'\x7d', L'\x7e', L'\x7f',
L'\x80', L'\x81', L'\x82', L'\x83', L'\x84', L'\x85', L'\x86', L'\x87',
L'\x88', L'\x89', L'\x8a', L'\x8b', L'\x8c', L'\x8d', L'\x8e', L'\x8f',
L'\x90', L'\x91', L'\x92', L'\x93', L'\x94', L'\x95', L'\x96', L'\x97',
L'\x98', L'\x99', L'\x9a', L'\x9b', L'\x9c', L'\x9d', L'\x9e', L'\x9f',
L'\xa0', L'\xa1', L'\xa2', L'\xa3', L'\xa4', L'\xa5', L'\xa6', L'\xa7',
L'\xa8', L'\xa9', L'\xaa', L'\xab', L'\xac', L'\xad', L'\xae', L'\xaf',
L'\xb0', L'\xb1', L'\xb2', L'\xb3', L'\xb4', L'\xb5', L'\xb6', L'\xb7',
L'\xb8', L'\xb9', L'\xba', L'\xbb', L'\xbc', L'\xbd', L'\xbe', L'\xbf',
L'\xc0', L'\xc1', L'\xc2', L'\xc3', L'\xc4', L'\xc5', L'\xc6', L'\xc7',
L'\xc8', L'\xc9', L'\xca', L'\xcb', L'\xcc', L'\xcd', L'\xce', L'\xcf',
L'\xd0', L'\xd1', L'\xd2', L'\xd3', L'\xd4', L'\xd5', L'\xd6', L'\xd7',
L'\xd8', L'\xd9', L'\xda', L'\xdb', L'\xdc', L'\xdd', L'\xde', L'\xdf',
L'\xe0', L'\xe1', L'\xe2', L'\xe3', L'\xe4', L'\xe5', L'\xe6', L'\xe7',
L'\xe8', L'\xe9', L'\xea', L'\xeb', L'\xec', L'\xed', L'\xee', L'\xef',
L'\xf0', L'\xf1', L'\xf2', L'\xf3', L'\xf4', L'\xf5', L'\xf6', L'\xf7',
L'\xf8', L'\xf9', L'\xfa', L'\xfb', L'\xfc', L'\xfd', L'\xfe', L'\xff'
};
const struct locale_data _nl_C_LC_COLLATE =
{
_nl_C_name,
NULL, 0, 0, /* no file mapped */
UNDELETABLE,
16,
18,
{
{ word: 0 },
{ string: NULL },
@ -40,8 +112,10 @@ const struct locale_data _nl_C_LC_COLLATE =
{ string: NULL },
{ string: NULL },
{ string: NULL },
{ word: 0 },
{ string: NULL },
{ string: NULL }
{ string: NULL },
{ string: NULL },
{ string: collseqmb },
{ wstr: collseqwc }
}
};

View File

@ -58,6 +58,8 @@ DEFINE_CATEGORY
DEFINE_ELEMENT (_NL_COLLATE_SYMB_HASH_SIZEMB, "collate-symb-hash-sizemb", std, word)
DEFINE_ELEMENT (_NL_COLLATE_SYMB_TABLEMB, "collate-symb-tablemb", std, string)
DEFINE_ELEMENT (_NL_COLLATE_SYMB_EXTRAMB, "collate-symb-extramb", std, string)
DEFINE_ELEMENT (_NL_COLLATE_COLLSEQMB, "collate-collseqmb", std, string)
DEFINE_ELEMENT (_NL_COLLATE_COLLSEQWC, "collate-collseqwc", std, string)
), NO_POSTLOAD)

View File

@ -248,6 +248,8 @@ enum
_NL_COLLATE_SYMB_HASH_SIZEMB,
_NL_COLLATE_SYMB_TABLEMB,
_NL_COLLATE_SYMB_EXTRAMB,
_NL_COLLATE_COLLSEQMB,
_NL_COLLATE_COLLSEQWC,
_NL_NUM_LC_COLLATE,
/* LC_CTYPE category: character classification.

View File

@ -1,3 +1,7 @@
2000-05-24 Ulrich Drepper <drepper@redhat.com>
* locales/iso14651_t1: New file.
2000-05-15 Andreas Jaeger <aj@suse.de>
* tst-fmon.data: Change testcase following fixes for

File diff suppressed because it is too large Load Diff

View File

@ -48,6 +48,15 @@
# include <wctype.h>
#endif
/* We need some of the locale data (the collation sequence information)
but there is no interface to get this information in general. Therefore
we support a correct implementation only in glibc. */
#ifdef _LIBC
# include "../locale/localeinfo.h"
# define CONCAT(a,b) __CONCAT(a,b)
#endif
/* Comment out all this code if we are using the GNU C Library, and are not
actually compiling the library itself. This code is part of the GNU C
Library, but also included in many other GNU distributions. Compiling
@ -192,6 +201,7 @@ __wcschrnul (s, c)
# define STRCHR(S, C) strchr (S, C)
# define STRCHRNUL(S, C) __strchrnul (S, C)
# define STRCOLL(S1, S2) strcoll (S1, S2)
# define SUFFIX MB
# include "fnmatch_loop.c"
@ -209,7 +219,10 @@ __wcschrnul (s, c)
# define BTOWC(C) (C)
# define STRCHR(S, C) wcschr (S, C)
# define STRCHRNUL(S, C) __wcschrnul (S, C)
# define STRCOLL(S1, S2) wcscoll (S1, S2)
# define STRCOLL(S1, S2) wcscoll (S1, S2)
# define SUFFIX WC
# define WIDE_CHAR_VERSION 1
# undef IS_CHAR_CLASS
# ifdef _LIBC

View File

@ -31,6 +31,16 @@ FCT (pattern, string, no_leading_period, flags)
{
register const CHAR *p = pattern, *n = string;
register UCHAR c;
#ifdef _LIBC
const UCHAR *collseq = (const UCHAR *)
_NL_CURRENT(LC_COLLATE, CONCAT(_NL_COLLATE_COLLSEQ,SUFFIX));
# ifdef WIDE_CHAR_VERSION
const wint_t *names = (const wint_t *)
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_NAMES);
size_t size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_SIZE);
size_t layers = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_LAYERS);
# endif
#endif
while ((c = *p++) != L('\0'))
{
@ -210,9 +220,9 @@ FCT (pattern, string, no_leading_period, flags)
/* Leave room for the null. */
CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
size_t c1 = 0;
# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
wctype_t wt;
# endif
#endif
const CHAR *startp = p;
for (;;)
@ -240,7 +250,7 @@ FCT (pattern, string, no_leading_period, flags)
}
str[c1] = L('\0');
# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
wt = IS_CHAR_CLASS (str);
if (wt == 0)
/* Invalid character class name. */
@ -248,7 +258,7 @@ FCT (pattern, string, no_leading_period, flags)
if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
goto matched;
# else
#else
if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
|| (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
|| (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
@ -262,7 +272,7 @@ FCT (pattern, string, no_leading_period, flags)
|| (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
|| (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
goto matched;
# endif
#endif
}
else if (c == L('\0'))
/* [ (unterminated) loses. */
@ -279,27 +289,117 @@ FCT (pattern, string, no_leading_period, flags)
if (c == L('-') && *p != L(']'))
{
/* It is a range. */
CHAR lo[2];
CHAR fc[2];
#if _LIBC
/* We have to find the collation sequence
value for C. Collation sequence is nothing
we can regularly access. The sequence
value is defined by the order in which the
definitions of the collation values for the
various characters appear in the source
file. A strange concept, nowhere
documented. */
int32_t fseqidx;
int32_t lseqidx;
UCHAR cend = *p++;
# ifdef WIDE_CHAR_VERSION
size_t cnt;
# endif
if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
cend = *p++;
if (cend == L('\0'))
return FNM_NOMATCH;
lo[0] = cold;
lo[1] = L('\0');
fc[0] = fn;
fc[1] = L('\0');
if (STRCOLL (lo, fc) <= 0)
# ifdef WIDE_CHAR_VERSION
/* Search in the `names' array for the characters. */
fseqidx = fn % size;
cnt = 0;
while (names[fseqidx] != fn)
{
CHAR hi[2];
hi[0] = FOLD (cend);
hi[1] = L('\0');
if (STRCOLL (fc, hi) <= 0)
if (++cnt == layers)
/* XXX We don't know anything about
the character we are supposed to
match. This means we are failing. */
goto range_not_matched;
fseqidx += size;
}
lseqidx = cold % size;
cnt = 0;
while (names[lseqidx] != cold)
{
if (++cnt == layers)
{
lseqidx = -1;
break;
}
lseqidx += size;
}
# else
fseqidx = fn;
lseqidx = cold;
# endif
/* XXX It is not entirely clear to me how to handle
characters which are not mentioned in the
collation specification. */
if (
# ifdef WIDE_CHAR_VERSION
lseqidx == -1 ||
# endif
collseq[lseqidx] <= collseq[fseqidx])
{
/* We have to look at the upper bound. */
int32_t hseqidx;
cend = FOLD (cend);
# ifdef WIDE_CHAR_VERSION
hseqidx = cend % size;
cnt = 0;
while (names[hseqidx] != cend)
{
if (++cnt == layers)
{
/* Hum, no information about the upper
bound. The matching succeeds if the
lower bound is matched exactly. */
if (lseqidx == -1 || cold != fn)
goto range_not_matched;
goto matched;
}
}
# else
hseqidx = cend;
# endif
if (
# ifdef WIDE_CHAR_VERSION
(lseqidx == -1
&& collseq[fseqidx] == collseq[hseqidx]) ||
# endif
collseq[fseqidx] <= collseq[hseqidx])
goto matched;
}
# ifdef WIDE_CHAR_VERSION
range_not_matched:
# endif
#else
/* We use a boring value comparison of the character
values. This is better than comparing using
`strcoll' since the latter would have surprising
and sometimes fatal consequences. */
UCHAR cend = *p++;
if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
cend = *p++;
if (cend == L('\0'))
return FNM_NOMATCH;
/* It is a range. */
if (cold <= fc && fc <= c)
goto matched;
#endif
c = *p++;
}
@ -371,3 +471,4 @@ FCT (pattern, string, no_leading_period, flags)
#undef STRCOLL
#undef L
#undef BTOWC
#undef SUFFIX