From 5e4633932782f08412e8cee75236f4f458591a3d Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Fri, 28 Jul 2000 19:52:40 +0000 Subject: [PATCH] Update. 2000-07-27 Bruno Haible * locale/C-ctype.c (_nl_C_LC_CTYPE): Swap the two names in _NL_CTYPE_MAP_NAMES. * locale/localeinfo.h (__TOW_toupper, __TOW_tolower): New enum values. * wctype/wcfuncs.c (towlower, towupper): Use them. * wctype/wcfuncs_l.c (__towlower_l, __towupper_l): Likewise. * wctype/wctrans.c (wctrans): Likewise. * wctype/wctrans_l.c (__wctrans_l): Likewise. 2000-07-27 Bruno Haible * posix/fnmatch.c (is_char_class): Fix logic. Avoid buffer overrun. Use the optimized version of this function outside glibc as well. --- ChangeLog | 15 +++++ locale/C-ctype.c | 2 +- locale/localeinfo.h | 9 +++ localedata/ChangeLog | 6 ++ localedata/tests-mbwc/dat_towctrans.c | 14 ++++ localedata/tests-mbwc/tgn_locdef.h | 3 + posix/fnmatch.c | 93 ++++++++++++++++----------- wctype/wcfuncs.c | 4 +- wctype/wcfuncs_l.c | 4 +- wctype/wctrans.c | 4 +- wctype/wctrans_l.c | 4 +- 11 files changed, 110 insertions(+), 48 deletions(-) diff --git a/ChangeLog b/ChangeLog index 52beff8836..269b5b8329 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +2000-07-27 Bruno Haible + + * locale/C-ctype.c (_nl_C_LC_CTYPE): Swap the two names in + _NL_CTYPE_MAP_NAMES. + * locale/localeinfo.h (__TOW_toupper, __TOW_tolower): New enum values. + * wctype/wcfuncs.c (towlower, towupper): Use them. + * wctype/wcfuncs_l.c (__towlower_l, __towupper_l): Likewise. + * wctype/wctrans.c (wctrans): Likewise. + * wctype/wctrans_l.c (__wctrans_l): Likewise. + +2000-07-27 Bruno Haible + + * posix/fnmatch.c (is_char_class): Fix logic. Avoid buffer overrun. + Use the optimized version of this function outside glibc as well. + 2000-07-27 Jakub Jelinek * locale/indigits.h (indigit_value): Correct. diff --git a/locale/C-ctype.c b/locale/C-ctype.c index 0b960d9b59..23420c1bc5 100644 --- a/locale/C-ctype.c +++ b/locale/C-ctype.c @@ -370,7 +370,7 @@ const struct locale_data _nl_C_LC_CTYPE = "vowel_connect\0" #endif }, - { string: "tolower\0" "toupper\0" + { string: "toupper\0" "tolower\0" #ifdef PREDEFINED_CLASSES "tosymmetric\0" #endif diff --git a/locale/localeinfo.h b/locale/localeinfo.h index b5dfcd92d8..412ac27a06 100644 --- a/locale/localeinfo.h +++ b/locale/localeinfo.h @@ -121,6 +121,15 @@ struct era_entry }; +/* LC_CTYPE specific: + Hardwired indices for standard wide character translation mappings. */ +enum +{ + __TOW_toupper = 0, + __TOW_tolower = 1 +}; + + /* For each category declare the variable for the current locale data. */ #define DEFINE_CATEGORY(category, category_name, items, a) \ extern struct locale_data *_nl_current_##category; diff --git a/localedata/ChangeLog b/localedata/ChangeLog index 1d3c967774..3125e95607 100644 --- a/localedata/ChangeLog +++ b/localedata/ChangeLog @@ -1,3 +1,9 @@ +2000-07-27 Bruno Haible + + * tests-mbwc/tgn_locdef.h (TST_LOC_C): New macro. + * tests-mbwc/dat_towctrans.c: Apply the en_US test also to the C + locale. + 2000-07-28 Ulrich Drepper * locales/zh_TW: New file. diff --git a/localedata/tests-mbwc/dat_towctrans.c b/localedata/tests-mbwc/dat_towctrans.c index 53083e3bac..1ed53d08cc 100644 --- a/localedata/tests-mbwc/dat_towctrans.c +++ b/localedata/tests-mbwc/dat_towctrans.c @@ -35,6 +35,20 @@ */ TST_TOWCTRANS tst_towctrans_loc [] = { + { + { Ttowctrans, TST_LOC_C }, + { +#ifdef SHOJI_IS_RIGHT + { { 0x0010, "xxxxxxx" }, { 1,EINVAL,1,0x0010 } }, +#else + { { 0x0010, "xxxxxxx" }, { 1,0, 1,0x0010 } }, +#endif + { { 0x007F, "tolower" }, { 1,0, 1,0x007F } }, + { { 0x0061, "toupper" }, { 1,0, 1,0x0041 } }, + { { 0x0041, "tolower" }, { 1,0, 1,0x0061 } }, + { is_last: 1 } + } + }, { { Ttowctrans, TST_LOC_de }, { diff --git a/localedata/tests-mbwc/tgn_locdef.h b/localedata/tests-mbwc/tgn_locdef.h index 96b8a16884..ace63e2c58 100644 --- a/localedata/tests-mbwc/tgn_locdef.h +++ b/localedata/tests-mbwc/tgn_locdef.h @@ -3,6 +3,9 @@ /* Defines for all locales used in the suite. */ +/* POSIX C locale. */ +#define TST_LOC_C "C" + /* German locale with ISO-8859-1. */ #define TST_LOC_de "de_DE.ISO-8859-1" diff --git a/posix/fnmatch.c b/posix/fnmatch.c index 62cfa5fee5..b6f67ae0b3 100644 --- a/posix/fnmatch.c +++ b/posix/fnmatch.c @@ -224,13 +224,13 @@ __wcschrnul (s, c) # define SUFFIX WC # define WIDE_CHAR_VERSION 1 - # undef IS_CHAR_CLASS -# ifdef _LIBC /* We have to convert the wide character string in a multibyte string. But - we know that the character class names are ASCII strings and since the - internal wide character encoding is UCS4 we can use a simplified method - to convert the string to a multibyte character string. */ + we know that the character class names consist of alphanumeric characters + from the portable character set, and since the wide character encoding + for a member of the portable character set is the same code point as + its single-byte encoding, we can use a simplified method to convert the + string to a multibyte character string. */ static wctype_t is_char_class (const wchar_t *wcs) { @@ -239,47 +239,62 @@ is_char_class (const wchar_t *wcs) do { - if (*wcs < 0x20 || *wcs >= 0x7f) - return 0; - - *cp++ = (char) *wcs; - } - while (*wcs++ != L'\0'); - - return __wctype (s); -} + /* Test for a printable character from the portable character set. */ +# ifdef _LIBC + if (*wcs < 0x20 || *wcs > 0x7e + || *wcs == 0x24 || *wcs == 0x40 || *wcs == 0x60) + return (wctype_t) 0; # else -/* Since we cannot assume anything about the internal encoding we have to - convert the string back to multibyte representation the hard way. */ -static wctype_t -is_char_class (const wchar_t *wcs) -{ - mbstate_t ps; - const wchar_t *pwc; - char *s; - size_t n; - - memset (&ps, '\0', sizeof (ps)); - - pwc = wcs; - n = wcsrtombs (NULL, &pwc, 0, &ps); - if (n == (size_t) -1) - /* Something went wrong. */ - return 0; - - s = alloca (n + 1); - assert (mbsinit (&ps)); - pwc = wcs; - (void) wcsrtombs (s, &pwc, n + 1, &ps); - - return wctype (s); -} + switch (*wcs) + { + case L' ': case L'!': case L'"': case L'#': case L'%': + case L'&': case L'\'': case L'(': case L')': case L'*': + case L'+': case L',': case L'-': case L'.': case L'/': + case L'0': case L'1': case L'2': case L'3': case L'4': + case L'5': case L'6': case L'7': case L'8': case L'9': + case L':': case L';': case L'<': case L'=': case L'>': + case L'?': + case L'A': case L'B': case L'C': case L'D': case L'E': + case L'F': case L'G': case L'H': case L'I': case L'J': + case L'K': case L'L': case L'M': case L'N': case L'O': + case L'P': case L'Q': case L'R': case L'S': case L'T': + case L'U': case L'V': case L'W': case L'X': case L'Y': + case L'Z': + case L'[': case L'\\': case L']': case L'^': case L'_': + case L'a': case L'b': case L'c': case L'd': case L'e': + case L'f': case L'g': case L'h': case L'i': case L'j': + case L'k': case L'l': case L'm': case L'n': case L'o': + case L'p': case L'q': case L'r': case L's': case L't': + case L'u': case L'v': case L'w': case L'x': case L'y': + case L'z': case L'{': case L'|': case L'}': case L'~': + break; + default: + return (wctype_t) 0; + } # endif + + /* Avoid overrunning the buffer. */ + if (cp == s + CHAR_CLASS_MAX_LENGTH) + return (wctype_t) 0; + + *cp++ = (char) *wcs++; + } + while (*wcs != L'\0'); + + *cp = '\0'; + +# ifdef _LIBC + return __wctype (s); +# else + return wctype (s); +# endif +} # define IS_CHAR_CLASS(string) is_char_class (string) # include "fnmatch_loop.c" # endif + int fnmatch (pattern, string, flags) const char *pattern; diff --git a/wctype/wcfuncs.c b/wctype/wcfuncs.c index f241ed5587..ea697c2729 100644 --- a/wctype/wcfuncs.c +++ b/wctype/wcfuncs.c @@ -100,7 +100,7 @@ wint_t else { /* New locale format. */ - return wctrans_table_lookup (__ctype32_wctrans[1], wc); + return wctrans_table_lookup (__ctype32_wctrans[__TOW_tolower], wc); } } @@ -123,6 +123,6 @@ wint_t else { /* New locale format. */ - return wctrans_table_lookup (__ctype32_wctrans[0], wc); + return wctrans_table_lookup (__ctype32_wctrans[__TOW_toupper], wc); } } diff --git a/wctype/wcfuncs_l.c b/wctype/wcfuncs_l.c index 7b8eb96ce6..5ef543cc47 100644 --- a/wctype/wcfuncs_l.c +++ b/wctype/wcfuncs_l.c @@ -86,7 +86,7 @@ wint_t else { /* New locale format. */ - size_t i = locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_MAP_OFFSET)].word + 1; + size_t i = locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_MAP_OFFSET)].word + __TOW_tolower; const char *desc = locale->__locales[LC_CTYPE]->values[i].string; return wctrans_table_lookup (desc, wc); } @@ -113,7 +113,7 @@ wint_t else { /* New locale format. */ - size_t i = locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_MAP_OFFSET)].word + 0; + size_t i = locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_MAP_OFFSET)].word + __TOW_toupper; const char *desc = locale->__locales[LC_CTYPE]->values[i].string; return wctrans_table_lookup (desc, wc); } diff --git a/wctype/wctrans.c b/wctype/wctrans.c index 3dd72231cd..6c61c0b630 100644 --- a/wctype/wctrans.c +++ b/wctype/wctrans.c @@ -50,9 +50,9 @@ wctrans (const char *property) if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_HASH_SIZE) != 0) { /* Old locale format. */ - if (cnt == 0) + if (cnt == __TOW_toupper) return (wctrans_t) __ctype32_toupper; - else if (cnt == 1) + else if (cnt == __TOW_tolower) return (wctrans_t) __ctype32_tolower; /* We have to search the table. */ diff --git a/wctype/wctrans_l.c b/wctype/wctrans_l.c index acd22ea556..3b8b6050b5 100644 --- a/wctype/wctrans_l.c +++ b/wctype/wctrans_l.c @@ -43,9 +43,9 @@ __wctrans_l (const char *property, __locale_t locale) if (locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_HASH_SIZE)].word == 0) { /* Old locale format. */ - if (cnt == 0) + if (cnt == __TOW_toupper) return (wctrans_t) locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_TOUPPER32)].string; - else if (cnt == 1) + else if (cnt == __TOW_tolower) return (wctrans_t) locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_TOLOWER32)].string; /* We have to search the table. */