(collate_output): Update.

* locale/programs/ld-collate.c (collate_output): Emit correct
	information for collation elements.
	Don't write over end of array idx.
	* posix/regex.c: Handle also collation elements at end of range.

	* posix/PTESTS: Fix a few typos.
This commit is contained in:
Ulrich Drepper 1999-12-31 22:21:25 +00:00
parent 1c5d461740
commit ac8295d23b
5 changed files with 86 additions and 81 deletions

View File

@ -1,5 +1,12 @@
1999-12-31 Ulrich Drepper <drepper@cygnus.com>
* locale/programs/ld-collate.c (collate_output): Emit correct
information for collation elements.
Don't write over end of array idx.
* posix/regex.c: Handle also collation elements at end of range.
* posix/PTESTS: Fix a few typos.
* posix/bits/posix2_lim.h: Remove _POSIX2_EQUIV_CLASS_MAX. I have
no idea where this came from.
* sysdeps/posix/sysconf.c: Remove _POSIX2_EQUIV_CLASS_MAX

View File

@ -91,8 +91,6 @@ struct element_t
unsigned int used_in_level;
struct element_list_t *weights;
/* Index in the `weight' table in the output file for the character. */
int32_t weights_idx;
/* Nonzero if this is a real character definition. */
int is_character;
@ -301,7 +299,6 @@ new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
/* Will be allocated later. */
newp->weights = NULL;
newp->weights_idx = 0;
newp->file = NULL;
newp->line = 0;
@ -1809,9 +1806,6 @@ output_weight (struct obstack *pool, struct locale_collate_t *collate,
obstack_grow (pool, buf, len);
}
/* Remember the index. */
elem->weights_idx = retval;
return retval | ((elem->section->ruleidx & 0x7f) << 24);
}
@ -1899,11 +1893,26 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
/* If we have no LC_COLLATE data emit only the number of rules as zero. */
if (collate == NULL)
{
int32_t dummy = 0;
while (cnt < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
{
iov[2 + cnt].iov_base = (char *) "";
iov[2 + cnt].iov_len = 0;
idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
/* The words have to be handled specially. */
if (cnt == _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)
|| cnt == _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)
|| cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
{
iov[2 + cnt].iov_base = &dummy;
iov[2 + cnt].iov_len = sizeof (int32_t);
}
else
{
iov[2 + cnt].iov_base = (char *) "";
iov[2 + cnt].iov_len = 0;
}
if (cnt + 1 < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
++cnt;
}
@ -2453,23 +2462,20 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
elem_table[idx * 2] = hash;
elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
/* Now add the index into the weights table. We know the
address is always 32bit aligned. */
if (sizeof (int) == sizeof (int32_t))
obstack_int_grow (&extrapool, runp->weights_idx);
else
obstack_grow (&extrapool, &runp->weights_idx,
sizeof (int32_t));
/* The the string itself including length. */
obstack_1grow (&extrapool, namelen);
obstack_grow (&extrapool, runp->name, namelen);
/* And the multibyte representation. */
obstack_1grow (&extrapool, runp->nmbs);
obstack_grow (&extrapool, runp->mbs, runp->nmbs);
/* And align again to 32 bits. */
if ((1 + namelen) % sizeof (int32_t) != 0)
if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0)
obstack_grow (&extrapool, "\0\0",
(sizeof (int32_t)
- (1 + namelen) % sizeof (int32_t)));
- ((1 + namelen + 1 + runp->nmbs)
% sizeof (int32_t))));
}
}
@ -2492,7 +2498,6 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_EXTRAMB));
iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
iov[2 + cnt].iov_base = obstack_finish (&extrapool);
idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
++cnt;

View File

@ -115,7 +115,7 @@
3¦3¦[][.-.]-0]¦ab0-]¦
3¦3¦[A-[.].]c]¦ab]!¦
# GA122
-2¦-2¦[[.ch]]¦abc¦
-2¦-2¦[[.ch.]]¦abc¦
-2¦-2¦[[.ab.][.CD.][.EF.]]¦yZabCDEFQ9¦
# GA125
2¦2¦[[=a=]b]¦Abc¦
@ -163,12 +163,12 @@
2¦6¦bc[d-w]xy¦abchxyz¦
# GA129
1¦1¦[a-cd-f]¦dbccde¦
-1¦-1¦[a-ce-f¦dBCCdE¦
-1¦-1¦[a-ce-f]¦dBCCdE¦
2¦4¦b[n-zA-M]Y¦absY9Z¦
2¦4¦b[n-zA-M]Y¦abGY9Z¦
# GA130
3¦3¦[-xy]¦ac-¦
2¦4¦[c[-xy]D¦ac-D+¦
2¦4¦c[-xy]D¦ac-D+¦
2¦2¦[--/]¦a.b¦
2¦4¦c[--/]D¦ac.D+b¦
2¦2¦[^-ac]¦abcde-¦
@ -189,7 +189,7 @@
3¦4¦[a-c][e-f]¦acbedf¦
4¦8¦abc*XYZ¦890abXYZ#*¦
4¦9¦abc*XYZ¦890abcXYZ#*¦
4¦15¦abc*XYZ¦890abccccccccXYZ#*¦
4¦15¦abc*XYZ¦890abcccccccXYZ#*¦
-1¦-1¦abc*XYZ¦890abc*XYZ#*¦
# GA132
2¦4¦\(*bc\)¦a*bc¦
@ -267,7 +267,7 @@
1¦1¦^a¦abc¦
-1¦-1¦^b¦abc¦
-1¦-1¦^[a-zA-Z]¦99Nine¦
1¦4¦^[a-zA-Z]¦Nine99¦
1¦4¦^[a-zA-Z]*¦Nine99¦
# GA145(1)
1¦2¦\(^a\)\1¦aabc¦
-1¦-1¦\(^a\)\1¦^a^abc¦
@ -284,7 +284,7 @@
3¦3¦a$¦cba¦
-1¦-1¦a$¦abc¦
5¦7¦[a-z]*$¦99ZZxyz¦
-1¦-1¦[a-z]*$¦99ZZxyz99¦
9¦9¦[a-z]*$¦99ZZxyz99¦
3¦3¦$$¦ab$¦
-1¦-1¦$$¦$ab¦
3¦3¦\$$¦ab$¦

View File

@ -110,7 +110,7 @@
{ 3, 3, "[][.-.]-0]", "ab0-]", },
{ 3, 3, "[A-[.].]c]", "ab]!", },
{ 0, 0, "GA122", NULL, },
{ -2, -2, "[[.ch]]", "abc", },
{ -2, -2, "[[.ch.]]", "abc", },
{ -2, -2, "[[.ab.][.CD.][.EF.]]", "yZabCDEFQ9", },
{ 0, 0, "GA125", NULL, },
{ 2, 2, "[[=a=]b]", "Abc", },
@ -158,12 +158,12 @@
{ 2, 6, "bc[d-w]xy", "abchxyz", },
{ 0, 0, "GA129", NULL, },
{ 1, 1, "[a-cd-f]", "dbccde", },
{ -1, -1, "[a-ce-f", "dBCCdE", },
{ -1, -1, "[a-ce-f]", "dBCCdE", },
{ 2, 4, "b[n-zA-M]Y", "absY9Z", },
{ 2, 4, "b[n-zA-M]Y", "abGY9Z", },
{ 0, 0, "GA130", NULL, },
{ 3, 3, "[-xy]", "ac-", },
{ 2, 4, "[c[-xy]D", "ac-D+", },
{ 2, 4, "c[-xy]D", "ac-D+", },
{ 2, 2, "[--/]", "a.b", },
{ 2, 4, "c[--/]D", "ac.D+b", },
{ 2, 2, "[^-ac]", "abcde-", },
@ -184,7 +184,7 @@
{ 3, 4, "[a-c][e-f]", "acbedf", },
{ 4, 8, "abc*XYZ", "890abXYZ#*", },
{ 4, 9, "abc*XYZ", "890abcXYZ#*", },
{ 4, 15, "abc*XYZ", "890abccccccccXYZ#*", },
{ 4, 15, "abc*XYZ", "890abcccccccXYZ#*", },
{ -1, -1, "abc*XYZ", "890abc*XYZ#*", },
{ 0, 0, "GA132", NULL, },
{ 2, 4, "\\(*bc\\)", "a*bc", },
@ -262,7 +262,7 @@
{ 1, 1, "^a", "abc", },
{ -1, -1, "^b", "abc", },
{ -1, -1, "^[a-zA-Z]", "99Nine", },
{ 1, 4, "^[a-zA-Z]", "Nine99", },
{ 1, 4, "^[a-zA-Z]*", "Nine99", },
{ 0, 0, "GA145(1)", NULL, },
{ 1, 2, "\\(^a\\)\\1", "aabc", },
{ -1, -1, "\\(^a\\)\\1", "^a^abc", },
@ -274,7 +274,7 @@
{ 3, 3, "a$", "cba", },
{ -1, -1, "a$", "abc", },
{ 5, 7, "[a-z]*$", "99ZZxyz", },
{ -1, -1, "[a-z]*$", "99ZZxyz99", },
{ 9, 9, "[a-z]*$", "99ZZxyz99", },
{ 3, 3, "$$", "ab$", },
{ -1, -1, "$$", "$ab", },
{ 3, 3, "\\$$", "ab$", },

View File

@ -1570,7 +1570,8 @@ static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p,
reg_syntax_t syntax));
static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend,
reg_syntax_t syntax));
static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr,
static reg_errcode_t compile_range _RE_ARGS ((unsigned int range_start,
const char **p_ptr,
const char *pend,
char *translate,
reg_syntax_t syntax,
@ -2174,6 +2175,7 @@ regex_compile (pattern, size, syntax, bufp)
case '[':
{
boolean had_char_class = false;
unsigned int range_start = 0xffffffff;
if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
@ -2217,6 +2219,7 @@ regex_compile (pattern, size, syntax, bufp)
PATFETCH (c1);
SET_LIST_BIT (c1);
range_start = c1;
continue;
}
@ -2241,8 +2244,10 @@ regex_compile (pattern, size, syntax, bufp)
&& *p != ']')
{
reg_errcode_t ret
= compile_range (&p, pend, translate, syntax, b);
= compile_range (range_start, &p, pend, translate,
syntax, b);
if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
range_start = 0xffffffff;
}
else if (p[0] == '-' && p[1] != ']')
@ -2252,8 +2257,9 @@ regex_compile (pattern, size, syntax, bufp)
/* Move past the `-'. */
PATFETCH (c1);
ret = compile_range (&p, pend, translate, syntax, b);
ret = compile_range (c, &p, pend, translate, syntax, b);
if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
range_start = 0xffffffff;
}
/* See if we're at the beginning of a possible character
@ -2376,6 +2382,7 @@ regex_compile (pattern, size, syntax, bufp)
PATUNFETCH;
SET_LIST_BIT ('[');
SET_LIST_BIT (':');
range_start = ':';
had_char_class = false;
}
}
@ -2503,6 +2510,16 @@ regex_compile (pattern, size, syntax, bufp)
#endif
had_char_class = true;
}
else
{
c1++;
while (c1--)
PATUNFETCH;
SET_LIST_BIT ('[');
SET_LIST_BIT ('=');
range_start = '=';
had_char_class = false;
}
}
else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.')
{
@ -2553,6 +2570,7 @@ regex_compile (pattern, size, syntax, bufp)
/* Set the bit for the character. */
SET_LIST_BIT (str[0]);
range_start = ((const unsigned char *) str)[0];
}
#ifdef _LIBC
else
@ -2561,9 +2579,7 @@ regex_compile (pattern, size, syntax, bufp)
those known to the collate implementation.
First find out whether the bytes in `str' are
actually from exactly one character. */
const unsigned char *weights;
int32_t table_size;
const int32_t *table;
const int32_t *symb_table;
const unsigned char *extra;
int32_t idx;
@ -2574,10 +2590,6 @@ regex_compile (pattern, size, syntax, bufp)
int32_t hash;
int ch;
table = (const int32_t *)
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
weights = (const unsigned char *)
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
table_size =
_NL_CURRENT_WORD (LC_COLLATE,
_NL_COLLATE_SYMB_HASH_SIZEMB);
@ -2598,17 +2610,15 @@ regex_compile (pattern, size, syntax, bufp)
{
/* First compare the hashing value. */
if (symb_table[2 * elem] == hash
&& (c1 == extra[symb_table[2 * elem + 1]
+ sizeof (int32_t)])
&& c1 == extra[symb_table[2 * elem + 1]]
&& memcmp (str,
&extra[symb_table[2 * elem + 1]
+ sizeof (int32_t) + 1],
+ 1],
c1) == 0)
{
/* Yep, this is the entry. */
idx = *((int32_t *)
(extra
+ symb_table[2 * elem + 1]));
idx = symb_table[2 * elem + 1];
idx += 1 + extra[idx];
break;
}
@ -2624,40 +2634,21 @@ regex_compile (pattern, size, syntax, bufp)
class. */
PATFETCH (c);
/* Now we have to go throught the whole table
and find all characters which have the same
first level weight.
/* Now add the multibyte character(s) we found
to the acceptabed list.
XXX Note that this is not entirely correct.
we would have to match multibyte sequences
but this is not possible with the current
implementation. */
for (ch = 1; ch < 256; ++ch)
/* XXX This test would have to be changed if we
would allow matching multibyte sequences. */
if (table[ch] > 0)
{
int32_t idx2 = table[ch];
size_t len = weights[idx2];
/* Test whether the lenghts match. */
if (weights[idx] == len)
{
/* They do. New compare the bytes of
the weight. */
size_t cnt = 0;
while (cnt < len
&& (weights[idx + 1 + cnt]
== weights[idx2 + 1 + cnt]))
++len;
if (cnt == len)
/* They match. Mark the character as
acceptable. */
SET_LIST_BIT (ch);
}
}
implementation. Also, we have to match
collating symbols, which expand to more than
one file, as a whole and not allow the
individual bytes. */
c1 = extra[idx++];
if (c1 == 1)
range_start = extra[idx];
while (c1-- > 0)
SET_LIST_BIT (extra[idx++]);
}
#endif
had_char_class = false;
@ -2668,7 +2659,8 @@ regex_compile (pattern, size, syntax, bufp)
while (c1--)
PATUNFETCH;
SET_LIST_BIT ('[');
SET_LIST_BIT ('=');
SET_LIST_BIT ('.');
range_start = '.';
had_char_class = false;
}
}
@ -2676,6 +2668,7 @@ regex_compile (pattern, size, syntax, bufp)
{
had_char_class = false;
SET_LIST_BIT (c);
range_start = c;
}
}
@ -3425,7 +3418,8 @@ group_in_compile_stack (compile_stack, regnum)
`regex_compile' itself. */
static reg_errcode_t
compile_range (p_ptr, pend, translate, syntax, b)
compile_range (range_start, p_ptr, pend, translate, syntax, b)
unsigned int range_start;
const char **p_ptr, *pend;
RE_TRANSLATE_TYPE translate;
reg_syntax_t syntax;
@ -3434,7 +3428,7 @@ compile_range (p_ptr, pend, translate, syntax, b)
unsigned this_char;
const char *p = *p_ptr;
unsigned int range_start, range_end;
unsigned int range_end;
if (p == pend)
return REG_ERANGE;
@ -3447,7 +3441,6 @@ compile_range (p_ptr, pend, translate, syntax, b)
We also want to fetch the endpoints without translating them; the
appropriate translation is done in the bit-setting loop below. */
/* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */
range_start = ((const unsigned char *) p)[-2];
range_end = ((const unsigned char *) p)[0];
/* Have to increment the pointer into the pattern string, so the