Update.
1999-12-25 Ulrich Drepper <drepper@cygnus.com> * locale/C-collate.c (_nl_C_LC_COLLATE): Add one more entry for the indirect table. * locale/langinfo.h: Likewise. * locale/categories.def: Likewise. Remove reference to postload functions. * locale/lc-collate.c (_nl_postload_collate): Removed. Also remove __collate_tablemb, __collate_weightmb, and __collate_extramb. * locale/localeinfo.h: Remove declaration for removed variables above. Remove prototype for _nl_get_era_entry. * locale/weight.h: Complete rewrite for new collate implementation. * locale/programs/ld-collate.c: Many changes to make output file usable in strxfrm/strcoll. * string/strxfrm.c: Complete rewrite for new collate implementation. * wcsmbs/wcsxfrm.c: Don't use strxfrm.c, implement dummy implementation locally. 1999-12-25 Shinya Hanataka <hanataka@abyss.rim.or.jp> * locale/programs/ld-ctype.c (allocate_arrays): Correctly assign transformation values for chars >255. * wctype/wctrans.c: Return pointer unmodified.
This commit is contained in:
parent
ce40141c6b
commit
450bf66ef2
24
ChangeLog
24
ChangeLog
@ -1,3 +1,27 @@
|
|||||||
|
1999-12-25 Ulrich Drepper <drepper@cygnus.com>
|
||||||
|
|
||||||
|
* locale/C-collate.c (_nl_C_LC_COLLATE): Add one more entry for the
|
||||||
|
indirect table.
|
||||||
|
* locale/langinfo.h: Likewise.
|
||||||
|
* locale/categories.def: Likewise. Remove reference to postload
|
||||||
|
functions.
|
||||||
|
* locale/lc-collate.c (_nl_postload_collate): Removed. Also remove
|
||||||
|
__collate_tablemb, __collate_weightmb, and __collate_extramb.
|
||||||
|
* locale/localeinfo.h: Remove declaration for removed variables above.
|
||||||
|
Remove prototype for _nl_get_era_entry.
|
||||||
|
* locale/weight.h: Complete rewrite for new collate implementation.
|
||||||
|
* locale/programs/ld-collate.c: Many changes to make output file
|
||||||
|
usable in strxfrm/strcoll.
|
||||||
|
* string/strxfrm.c: Complete rewrite for new collate implementation.
|
||||||
|
* wcsmbs/wcsxfrm.c: Don't use strxfrm.c, implement dummy implementation
|
||||||
|
locally.
|
||||||
|
|
||||||
|
1999-12-25 Shinya Hanataka <hanataka@abyss.rim.or.jp>
|
||||||
|
|
||||||
|
* locale/programs/ld-ctype.c (allocate_arrays): Correctly assign
|
||||||
|
transformation values for chars >255.
|
||||||
|
* wctype/wctrans.c: Return pointer unmodified.
|
||||||
|
|
||||||
1999-12-24 Ulrich Drepper <drepper@cygnus.com>
|
1999-12-24 Ulrich Drepper <drepper@cygnus.com>
|
||||||
|
|
||||||
* sysdeps/posix/system.c (__libc_system): Check whether command
|
* sysdeps/posix/system.c (__libc_system): Check whether command
|
||||||
|
@ -150,12 +150,13 @@ const struct locale_data _nl_C_LC_COLLATE =
|
|||||||
_nl_C_name,
|
_nl_C_name,
|
||||||
NULL, 0, 0, /* no file mapped */
|
NULL, 0, 0, /* no file mapped */
|
||||||
UNDELETABLE,
|
UNDELETABLE,
|
||||||
5,
|
6,
|
||||||
{
|
{
|
||||||
{ word: 0 },
|
{ word: 0 },
|
||||||
{ string: NULL },
|
{ string: NULL },
|
||||||
{ string: NULL },
|
{ string: NULL },
|
||||||
{ string: NULL },
|
{ string: NULL },
|
||||||
|
{ string: NULL },
|
||||||
{ string: NULL }
|
{ string: NULL }
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -47,7 +47,8 @@ DEFINE_CATEGORY
|
|||||||
DEFINE_ELEMENT (_NL_COLLATE_TABLEMB, "collate-tablemb", std, string)
|
DEFINE_ELEMENT (_NL_COLLATE_TABLEMB, "collate-tablemb", std, string)
|
||||||
DEFINE_ELEMENT (_NL_COLLATE_WEIGHTMB, "collate-weightmb", std, string)
|
DEFINE_ELEMENT (_NL_COLLATE_WEIGHTMB, "collate-weightmb", std, string)
|
||||||
DEFINE_ELEMENT (_NL_COLLATE_EXTRAMB, "collate-extramb", std, string)
|
DEFINE_ELEMENT (_NL_COLLATE_EXTRAMB, "collate-extramb", std, string)
|
||||||
), _nl_postload_collate)
|
DEFINE_ELEMENT (_NL_COLLATE_INDIRECTMB, "collate-indirectmb", std, string)
|
||||||
|
), NO_POSTLOAD)
|
||||||
|
|
||||||
|
|
||||||
/* The actual definition of ctype is meaningless here. It is hard coded in
|
/* The actual definition of ctype is meaningless here. It is hard coded in
|
||||||
|
@ -235,6 +235,7 @@ enum
|
|||||||
_NL_COLLATE_TABLEMB,
|
_NL_COLLATE_TABLEMB,
|
||||||
_NL_COLLATE_WEIGHTMB,
|
_NL_COLLATE_WEIGHTMB,
|
||||||
_NL_COLLATE_EXTRAMB,
|
_NL_COLLATE_EXTRAMB,
|
||||||
|
_NL_COLLATE_INDIRECTMB,
|
||||||
_NL_NUM_LC_COLLATE,
|
_NL_NUM_LC_COLLATE,
|
||||||
|
|
||||||
/* LC_CTYPE category: character classification.
|
/* LC_CTYPE category: character classification.
|
||||||
|
@ -22,21 +22,3 @@
|
|||||||
|
|
||||||
|
|
||||||
_NL_CURRENT_DEFINE (LC_COLLATE);
|
_NL_CURRENT_DEFINE (LC_COLLATE);
|
||||||
|
|
||||||
const int32_t *__collate_tablemb;
|
|
||||||
const unsigned char *__collate_weightmb;
|
|
||||||
const unsigned char *__collate_extramb;
|
|
||||||
|
|
||||||
/* We are called after loading LC_CTYPE data to load it into
|
|
||||||
the variables used by the collation functions and regex. */
|
|
||||||
void
|
|
||||||
_nl_postload_collate (void)
|
|
||||||
{
|
|
||||||
#define paste(a,b) paste1(a,b)
|
|
||||||
#define paste1(a,b) a##b
|
|
||||||
#define current(x) _NL_CURRENT (LC_COLLATE, paste(_NL_COLLATE_,x))
|
|
||||||
|
|
||||||
__collate_tablemb = (const int32_t *) current (TABLEMB);
|
|
||||||
__collate_weightmb = (const unsigned char *) current (WEIGHTMB);
|
|
||||||
__collate_extramb = (const unsigned char *) current (EXTRAMB);
|
|
||||||
}
|
|
||||||
|
@ -165,9 +165,6 @@ extern void _nl_unload_locale (struct locale_data *locale);
|
|||||||
extern void _nl_remove_locale (int locale, struct locale_data *data);
|
extern void _nl_remove_locale (int locale, struct locale_data *data);
|
||||||
|
|
||||||
|
|
||||||
/* initialize `era' entries */
|
|
||||||
extern void _nl_init_era_entries (void);
|
|
||||||
|
|
||||||
/* Return `era' entry which corresponds to TP. Used in strftime. */
|
/* Return `era' entry which corresponds to TP. Used in strftime. */
|
||||||
extern struct era_entry *_nl_get_era_entry (const struct tm *tp);
|
extern struct era_entry *_nl_get_era_entry (const struct tm *tp);
|
||||||
|
|
||||||
@ -180,10 +177,4 @@ extern const char *_nl_get_alt_digit (unsigned int number);
|
|||||||
/* Similar, but now for wide characters. */
|
/* Similar, but now for wide characters. */
|
||||||
extern const wchar_t *_nl_get_walt_digit (unsigned int number);
|
extern const wchar_t *_nl_get_walt_digit (unsigned int number);
|
||||||
|
|
||||||
|
|
||||||
/* Global variables for LC_COLLATE category data. */
|
|
||||||
extern const int32_t *__collate_tablemb;
|
|
||||||
extern const unsigned char *__collate_extrweightmb;
|
|
||||||
extern const unsigned char *__collate_extramb;
|
|
||||||
|
|
||||||
#endif /* localeinfo.h */
|
#endif /* localeinfo.h */
|
||||||
|
@ -137,9 +137,6 @@ struct locale_collate_t
|
|||||||
/* To make handling of errors easier we have another section. */
|
/* To make handling of errors easier we have another section. */
|
||||||
struct section_list error_section;
|
struct section_list error_section;
|
||||||
|
|
||||||
/* Number of sorting rules given in order_start line. */
|
|
||||||
uint32_t nrules;
|
|
||||||
|
|
||||||
/* Start of the order list. */
|
/* Start of the order list. */
|
||||||
struct element_t *start;
|
struct element_t *start;
|
||||||
|
|
||||||
@ -176,7 +173,7 @@ struct locale_collate_t
|
|||||||
|
|
||||||
/* We have a few global variables which are used for reading all
|
/* We have a few global variables which are used for reading all
|
||||||
LC_COLLATE category descriptions in all files. */
|
LC_COLLATE category descriptions in all files. */
|
||||||
static int nrules;
|
static uint32_t nrules;
|
||||||
|
|
||||||
|
|
||||||
/* These are definitions used by some of the functions for handling
|
/* These are definitions used by some of the functions for handling
|
||||||
@ -426,7 +423,7 @@ read_directions (struct linereader *ldfile, struct token *arg,
|
|||||||
if (! warned)
|
if (! warned)
|
||||||
{
|
{
|
||||||
lr_error (ldfile, _("\
|
lr_error (ldfile, _("\
|
||||||
%s: `%s' mentioned twice in definition of weight %d in category `%s'"),
|
%s: `%s' mentioned twice in definition of weight %d"),
|
||||||
"LC_COLLATE", "position", cnt + 1);
|
"LC_COLLATE", "position", cnt + 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -450,7 +447,13 @@ read_directions (struct linereader *ldfile, struct token *arg,
|
|||||||
|
|
||||||
/* See whether we have to increment the counter. */
|
/* See whether we have to increment the counter. */
|
||||||
if (arg->tok != tok_comma && rules[cnt] != 0)
|
if (arg->tok != tok_comma && rules[cnt] != 0)
|
||||||
|
{
|
||||||
|
/* Add the default `forward' if we have seen only `position'. */
|
||||||
|
if (rules[cnt] == sort_position)
|
||||||
|
rules[cnt] = sort_position | sort_forward;
|
||||||
|
|
||||||
++cnt;
|
++cnt;
|
||||||
|
}
|
||||||
|
|
||||||
if (arg->tok == tok_eof || arg->tok == tok_eol)
|
if (arg->tok == tok_eof || arg->tok == tok_eol)
|
||||||
/* End of line or file, so we exit the loop. */
|
/* End of line or file, so we exit the loop. */
|
||||||
@ -876,7 +879,7 @@ insert_value (struct linereader *ldfile, struct token *arg,
|
|||||||
elem->nmbs = seq->nbytes;
|
elem->nmbs = seq->nbytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (elem->wcs == NULL && seq != ILLEGAL_CHAR_VALUE)
|
if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
|
||||||
{
|
{
|
||||||
uint32_t wcs[2] = { wc, 0 };
|
uint32_t wcs[2] = { wc, 0 };
|
||||||
|
|
||||||
@ -1552,7 +1555,7 @@ collate_finish (struct localedef_t *locale, struct charmap_t *charmap)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static inline int32_t
|
static int32_t
|
||||||
output_weight (struct obstack *pool, struct locale_collate_t *collate,
|
output_weight (struct obstack *pool, struct locale_collate_t *collate,
|
||||||
struct element_t *elem)
|
struct element_t *elem)
|
||||||
{
|
{
|
||||||
@ -1575,25 +1578,18 @@ output_weight (struct obstack *pool, struct locale_collate_t *collate,
|
|||||||
int len = 0;
|
int len = 0;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
/* Add the direction. */
|
|
||||||
obstack_1grow (pool, elem->section->rules[cnt]);
|
|
||||||
|
|
||||||
for (i = 0; i < elem->weights[cnt].cnt; ++i)
|
for (i = 0; i < elem->weights[cnt].cnt; ++i)
|
||||||
/* Encode the weight value. */
|
/* Encode the weight value. We do nothing for IGNORE entries. */
|
||||||
if (elem->weights[cnt].w[i] == NULL)
|
if (elem->weights[cnt].w[i] != NULL)
|
||||||
{
|
|
||||||
/* This entry was IGNORE. */
|
|
||||||
buf[len++] = IGNORE_CHAR;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
len += utf8_encode (&buf[len],
|
len += utf8_encode (&buf[len],
|
||||||
elem->weights[cnt].w[i]->mborder[cnt]);
|
elem->weights[cnt].w[i]->mborder[cnt]);
|
||||||
|
|
||||||
/* And add the buffer content. */
|
/* And add the buffer content. */
|
||||||
|
obstack_1grow (pool, len);
|
||||||
obstack_grow (pool, buf, len);
|
obstack_grow (pool, buf, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
return retval;
|
return retval | ((elem->section->ruleidx & 0x7f) << 24);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -1611,11 +1607,13 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
|
|||||||
int32_t tablemb[256];
|
int32_t tablemb[256];
|
||||||
struct obstack weightpool;
|
struct obstack weightpool;
|
||||||
struct obstack extrapool;
|
struct obstack extrapool;
|
||||||
|
struct obstack indirectpool;
|
||||||
struct section_list *sect;
|
struct section_list *sect;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
obstack_init (&weightpool);
|
obstack_init (&weightpool);
|
||||||
obstack_init (&extrapool);
|
obstack_init (&extrapool);
|
||||||
|
obstack_init (&indirectpool);
|
||||||
|
|
||||||
data.magic = LIMAGIC (LC_COLLATE);
|
data.magic = LIMAGIC (LC_COLLATE);
|
||||||
data.n = nelems;
|
data.n = nelems;
|
||||||
@ -1629,7 +1627,7 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
|
|||||||
cnt = 0;
|
cnt = 0;
|
||||||
|
|
||||||
assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_NRULES));
|
assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_NRULES));
|
||||||
iov[2 + cnt].iov_base = &collate->nrules;
|
iov[2 + cnt].iov_base = &nrules;
|
||||||
iov[2 + cnt].iov_len = sizeof (uint32_t);
|
iov[2 + cnt].iov_len = sizeof (uint32_t);
|
||||||
idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
|
idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
|
||||||
++cnt;
|
++cnt;
|
||||||
@ -1638,7 +1636,12 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
|
|||||||
for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
|
for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
|
||||||
if (sect->ruleidx == i)
|
if (sect->ruleidx == i)
|
||||||
{
|
{
|
||||||
obstack_grow (&weightpool, sect->rules, nrules);
|
int j;
|
||||||
|
|
||||||
|
obstack_make_room (&weightpool, nrules);
|
||||||
|
|
||||||
|
for (j = 0; j < nrules; ++j)
|
||||||
|
obstack_1grow_fast (&weightpool, sect->rules[j]);
|
||||||
++i;
|
++i;
|
||||||
}
|
}
|
||||||
/* And align the output. */
|
/* And align the output. */
|
||||||
@ -1719,38 +1722,60 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
|
|||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
/* More than one consecutive entry. We mark this by having
|
|
||||||
a negative index into the weight table. */
|
|
||||||
weightidx = -weightidx;
|
|
||||||
|
|
||||||
/* Now add first the initial byte sequence. */
|
/* Now add first the initial byte sequence. */
|
||||||
added = ((sizeof (int32_t) + 1 + 1 + 2 * (runp->nmbs - 1)
|
added = ((sizeof (int32_t) + 1 + 1 + 2 * (runp->nmbs - 1)
|
||||||
+ __alignof__ (int32_t) - 1)
|
+ __alignof__ (int32_t) - 1)
|
||||||
& ~(__alignof__ (int32_t) - 1));
|
& ~(__alignof__ (int32_t) - 1));
|
||||||
obstack_make_room (&extrapool, added);
|
obstack_make_room (&extrapool, added);
|
||||||
|
|
||||||
|
/* More than one consecutive entry. We mark this by having
|
||||||
|
a negative index into the indirect table. */
|
||||||
if (sizeof (int32_t) == sizeof (int))
|
if (sizeof (int32_t) == sizeof (int))
|
||||||
obstack_int_grow_fast (&extrapool, weightidx);
|
obstack_int_grow_fast (&extrapool,
|
||||||
|
obstack_object_size (&indirectpool)
|
||||||
|
/ sizeof (int32_t));
|
||||||
else
|
else
|
||||||
obstack_grow (&extrapool, &weightidx, sizeof (int32_t));
|
{
|
||||||
obstack_1grow_fast (&extrapool, runp->section->ruleidx);
|
int32_t i = (obstack_object_size (&indirectpool)
|
||||||
|
/ sizeof (int32_t));
|
||||||
|
obstack_grow (&extrapool, &i, sizeof (int32_t));
|
||||||
|
}
|
||||||
obstack_1grow_fast (&extrapool, runp->nmbs - 1);
|
obstack_1grow_fast (&extrapool, runp->nmbs - 1);
|
||||||
for (i = 1; i < runp->nmbs; ++i)
|
for (i = 1; i < runp->nmbs; ++i)
|
||||||
obstack_1grow_fast (&extrapool, runp->mbs[i]);
|
obstack_1grow_fast (&extrapool, runp->mbs[i]);
|
||||||
|
|
||||||
/* Now find the end of the consecutive sequence. */
|
/* Now find the end of the consecutive sequence and
|
||||||
do
|
add all the indeces in the indirect pool. */
|
||||||
runp = runp->next;
|
while (1)
|
||||||
while (runp->mbnext != NULL
|
{
|
||||||
&& runp->nmbs == runp->mbnext->nmbs
|
if (sizeof (int32_t) == sizeof (int))
|
||||||
&& memcmp (runp->mbs, runp->mbnext->mbs,
|
obstack_int_grow_fast (&extrapool, weightidx);
|
||||||
runp->nmbs - 1) == 0
|
else
|
||||||
&& (runp->mbs[runp->nmbs - 1] + 1
|
obstack_grow (&extrapool, &weightidx, sizeof (int32_t));
|
||||||
== runp->mbnext->mbs[runp->nmbs - 1]));
|
|
||||||
|
|
||||||
/* And add the end by sequence. Without length this time. */
|
runp = runp->next;
|
||||||
|
if (runp->mbnext == NULL
|
||||||
|
|| runp->nmbs != runp->mbnext->nmbs
|
||||||
|
|| memcmp (runp->mbs, runp->mbnext->mbs,
|
||||||
|
runp->nmbs - 1) != 0
|
||||||
|
|| (runp->mbs[runp->nmbs - 1] + 1
|
||||||
|
!= runp->mbnext->mbs[runp->nmbs - 1]))
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Insert the weight. */
|
||||||
|
weightidx = output_weight (&weightpool, collate, runp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* And add the end byte sequence. Without length this
|
||||||
|
time. */
|
||||||
for (i = 1; i < runp->nmbs; ++i)
|
for (i = 1; i < runp->nmbs; ++i)
|
||||||
obstack_1grow_fast (&extrapool, runp->mbs[i]);
|
obstack_1grow_fast (&extrapool, runp->mbs[i]);
|
||||||
|
|
||||||
|
weightidx = output_weight (&weightpool, collate, runp);
|
||||||
|
if (sizeof (int32_t) == sizeof (int))
|
||||||
|
obstack_int_grow_fast (&extrapool, weightidx);
|
||||||
|
else
|
||||||
|
obstack_grow (&extrapool, &weightidx, sizeof (int32_t));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -1768,7 +1793,6 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
|
|||||||
obstack_int_grow_fast (&extrapool, weightidx);
|
obstack_int_grow_fast (&extrapool, weightidx);
|
||||||
else
|
else
|
||||||
obstack_grow (&extrapool, &weightidx, sizeof (int32_t));
|
obstack_grow (&extrapool, &weightidx, sizeof (int32_t));
|
||||||
obstack_1grow_fast (&extrapool, runp->section->ruleidx);
|
|
||||||
obstack_1grow_fast (&extrapool, runp->nmbs - 1);
|
obstack_1grow_fast (&extrapool, runp->nmbs - 1);
|
||||||
for (i = 1; i < runp->nmbs; ++i)
|
for (i = 1; i < runp->nmbs; ++i)
|
||||||
obstack_1grow_fast (&extrapool, runp->mbs[i]);
|
obstack_1grow_fast (&extrapool, runp->mbs[i]);
|
||||||
@ -1835,6 +1859,12 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
|
|||||||
idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
|
idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
|
||||||
++cnt;
|
++cnt;
|
||||||
|
|
||||||
|
assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTMB));
|
||||||
|
iov[2 + cnt].iov_len = obstack_object_size (&indirectpool);
|
||||||
|
iov[2 + cnt].iov_base = obstack_finish (&indirectpool);
|
||||||
|
idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
|
||||||
|
++cnt;
|
||||||
|
|
||||||
|
|
||||||
assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
|
assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
|
||||||
|
|
||||||
@ -1842,6 +1872,7 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
|
|||||||
|
|
||||||
obstack_free (&weightpool, NULL);
|
obstack_free (&weightpool, NULL);
|
||||||
obstack_free (&extrapool, NULL);
|
obstack_free (&extrapool, NULL);
|
||||||
|
obstack_free (&indirectpool, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -2291,16 +2322,16 @@ error while adding equivalent collating symbol"));
|
|||||||
uint32_t cnt;
|
uint32_t cnt;
|
||||||
|
|
||||||
/* This means we have exactly one rule: `forward'. */
|
/* This means we have exactly one rule: `forward'. */
|
||||||
if (collate->nrules > 1)
|
if (nrules > 1)
|
||||||
lr_error (ldfile, _("\
|
lr_error (ldfile, _("\
|
||||||
%s: invalid number of sorting rules"),
|
%s: invalid number of sorting rules"),
|
||||||
"LC_COLLATE");
|
"LC_COLLATE");
|
||||||
else
|
else
|
||||||
collate->nrules = 1;
|
nrules = 1;
|
||||||
sp->rules = obstack_alloc (&collate->mempool,
|
sp->rules = obstack_alloc (&collate->mempool,
|
||||||
(sizeof (enum coll_sort_rule)
|
(sizeof (enum coll_sort_rule)
|
||||||
* collate->nrules));
|
* nrules));
|
||||||
for (cnt = 0; cnt < collate->nrules; ++cnt)
|
for (cnt = 0; cnt < nrules; ++cnt)
|
||||||
sp->rules[cnt] = sort_forward;
|
sp->rules[cnt] = sort_forward;
|
||||||
|
|
||||||
/* Next line. */
|
/* Next line. */
|
||||||
|
@ -3073,10 +3073,8 @@ Computing table size for character classes might take a while..."),
|
|||||||
while (idx2 < ctype->map_collection_act[idx])
|
while (idx2 < ctype->map_collection_act[idx])
|
||||||
{
|
{
|
||||||
if (ctype->map_collection[idx][idx2] != 0)
|
if (ctype->map_collection[idx][idx2] != 0)
|
||||||
*find_idx (ctype, &ctype->map32[idx],
|
ctype->map32[idx][ctype->charnames[idx2]] =
|
||||||
&ctype->map_collection_max[idx],
|
ctype->map_collection[idx][idx2];
|
||||||
&ctype->map_collection_act[idx],
|
|
||||||
ctype->names[idx2]) = ctype->map_collection[idx][idx2];
|
|
||||||
++idx2;
|
++idx2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
261
locale/weight.h
261
locale/weight.h
@ -17,191 +17,106 @@
|
|||||||
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
Boston, MA 02111-1307, USA. */
|
Boston, MA 02111-1307, USA. */
|
||||||
|
|
||||||
#include <alloca.h>
|
/* Find index of weight. */
|
||||||
#include <errno.h>
|
static inline int32_t
|
||||||
#include <langinfo.h>
|
findidx (const unsigned char **cpp)
|
||||||
#include "localeinfo.h"
|
|
||||||
|
|
||||||
#ifndef STRING_TYPE
|
|
||||||
# error STRING_TYPE not defined
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef USTRING_TYPE
|
|
||||||
# error USTRING_TYPE not defined
|
|
||||||
#endif
|
|
||||||
|
|
||||||
typedef struct weight_t
|
|
||||||
{
|
{
|
||||||
struct weight_t *prev;
|
int_fast32_t i = table[*(*cpp)++];
|
||||||
struct weight_t *next;
|
const unsigned char *cp;
|
||||||
struct data_pair
|
|
||||||
{
|
|
||||||
int number;
|
|
||||||
const uint32_t *value;
|
|
||||||
} data[0];
|
|
||||||
} weight_t;
|
|
||||||
|
|
||||||
|
if (i >= 0)
|
||||||
|
/* This is an index into the weight table. Cool. */
|
||||||
|
return i;
|
||||||
|
|
||||||
/* The following five macros grant access to the values in the
|
/* Oh well, more than one sequence starting with this byte.
|
||||||
collate locale file that do not depend on byte order. */
|
Search for the correct one. */
|
||||||
#ifndef USE_IN_EXTENDED_LOCALE_MODEL
|
cp = &extra[-i];
|
||||||
# define collate_nrules \
|
|
||||||
(_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES))
|
|
||||||
# define collate_hash_size \
|
|
||||||
(_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_SIZE))
|
|
||||||
# define collate_hash_layers \
|
|
||||||
(_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_LAYERS))
|
|
||||||
# define collate_undefined \
|
|
||||||
(_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_UNDEFINED_WC))
|
|
||||||
# define collate_rules \
|
|
||||||
((uint32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_RULES))
|
|
||||||
|
|
||||||
static __inline void get_weight (const STRING_TYPE **str, weight_t *result);
|
|
||||||
static __inline void
|
|
||||||
get_weight (const STRING_TYPE **str, weight_t *result)
|
|
||||||
#else
|
|
||||||
# define collate_nrules \
|
|
||||||
current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word
|
|
||||||
# define collate_hash_size \
|
|
||||||
current->values[_NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].word
|
|
||||||
# define collate_hash_layers \
|
|
||||||
current->values[_NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].word
|
|
||||||
# define collate_undefined \
|
|
||||||
current->values[_NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED_WC)].word
|
|
||||||
# define collate_rules \
|
|
||||||
((uint32_t *) current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULES)].string)
|
|
||||||
|
|
||||||
static __inline void get_weight (const STRING_TYPE **str, weight_t *result,
|
|
||||||
struct locale_data *current,
|
|
||||||
const uint32_t *__collate_tablewc,
|
|
||||||
const uint32_t *__collate_extrawc);
|
|
||||||
static __inline void
|
|
||||||
get_weight (const STRING_TYPE **str, weight_t *result,
|
|
||||||
struct locale_data *current, const uint32_t *__collate_tablewc,
|
|
||||||
const uint32_t *__collate_extrawc)
|
|
||||||
#endif
|
|
||||||
{
|
|
||||||
unsigned int ch = *((USTRING_TYPE *) (*str))++;
|
|
||||||
size_t slot;
|
|
||||||
|
|
||||||
if (sizeof (STRING_TYPE) == 1)
|
|
||||||
slot = ch * (collate_nrules + 1);
|
|
||||||
else
|
|
||||||
{
|
|
||||||
const size_t level_size = collate_hash_size * (collate_nrules + 1);
|
|
||||||
size_t level;
|
|
||||||
|
|
||||||
slot = (ch % collate_hash_size) * (collate_nrules + 1);
|
|
||||||
|
|
||||||
level = 0;
|
|
||||||
while (__collate_tablewc[slot] != (uint32_t) ch)
|
|
||||||
{
|
|
||||||
if (__collate_tablewc[slot + 1] == 0
|
|
||||||
|| ++level >= collate_hash_layers)
|
|
||||||
{
|
|
||||||
size_t idx = collate_undefined;
|
|
||||||
size_t cnt;
|
|
||||||
|
|
||||||
for (cnt = 0; cnt < collate_nrules; ++cnt)
|
|
||||||
{
|
|
||||||
result->data[cnt].number = __collate_extrawc[idx++];
|
|
||||||
result->data[cnt].value = &__collate_extrawc[idx];
|
|
||||||
idx += result->data[cnt].number;
|
|
||||||
}
|
|
||||||
/* The Unix standard requires that a character outside
|
|
||||||
the domain is signalled by setting `errno'. */
|
|
||||||
__set_errno (EINVAL);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
slot += level_size;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (__collate_tablewc[slot + 1] != (uint32_t) FORWARD_CHAR)
|
|
||||||
{
|
|
||||||
/* We have a simple form. One value for each weight. */
|
|
||||||
size_t cnt;
|
|
||||||
|
|
||||||
for (cnt = 0; cnt < collate_nrules; ++cnt)
|
|
||||||
{
|
|
||||||
result->data[cnt].number = 1;
|
|
||||||
result->data[cnt].value = &__collate_tablewc[slot + 1 + cnt];
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* We now look for any collation element which starts with CH.
|
|
||||||
There might none, but the last list member is a catch-all case
|
|
||||||
because it is simple the character CH. The value of this entry
|
|
||||||
might be the same as UNDEFINED. */
|
|
||||||
slot = __collate_tablewc[slot + 2];
|
|
||||||
|
|
||||||
while (1)
|
while (1)
|
||||||
{
|
{
|
||||||
size_t idx;
|
size_t nhere;
|
||||||
|
const unsigned char *usrc = *cpp;
|
||||||
|
|
||||||
/* This is a comparison between a uint32_t array (aka wchar_t) and
|
/* The first thing is the index. */
|
||||||
an 8-bit string. */
|
i = *((int32_t *) cp);
|
||||||
for (idx = 0; __collate_extrawc[slot + 2 + idx] != 0; ++idx)
|
cp += sizeof (int32_t);
|
||||||
if (__collate_extrawc[slot + 2 + idx] != (uint32_t) (*str)[idx])
|
|
||||||
break;
|
|
||||||
|
|
||||||
/* When the loop finished with all character of the collation
|
/* Next is the length of the byte sequence. These are always
|
||||||
element used, we found the longest prefix. */
|
short byte sequences so there is no reason to call any
|
||||||
if (__collate_extrawc[slot + 2 + idx] == 0)
|
function (even if they are inlined). */
|
||||||
|
nhere = *cp++;
|
||||||
|
|
||||||
|
if (i >= 0)
|
||||||
{
|
{
|
||||||
|
/* It is a single character. If it matches we found our
|
||||||
|
index. Note that at the end of each list there is an
|
||||||
|
entry of length zero which represents the single byte
|
||||||
|
sequence. The first (and here only) byte was tested
|
||||||
|
already. */
|
||||||
size_t cnt;
|
size_t cnt;
|
||||||
|
|
||||||
*str += idx;
|
for (cnt = 0; cnt < nhere; ++cnt)
|
||||||
idx += slot + 3;
|
if (cp[cnt] != usrc[cnt])
|
||||||
for (cnt = 0; cnt < collate_nrules; ++cnt)
|
break;
|
||||||
|
|
||||||
|
if (cnt == nhere)
|
||||||
{
|
{
|
||||||
result->data[cnt].number = __collate_extrawc[idx++];
|
/* Found it. */
|
||||||
result->data[cnt].value = &__collate_extrawc[idx];
|
*cpp += nhere;
|
||||||
idx += result->data[cnt].number;
|
return i;
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* To next entry in list. */
|
/* Up to the next entry. */
|
||||||
slot += __collate_extrawc[slot];
|
cp += nhere;
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* This is a range of characters. First decide whether the
|
||||||
|
current byte sequence lies in the range. */
|
||||||
|
size_t cnt;
|
||||||
|
size_t offset = 0;
|
||||||
|
|
||||||
|
for (cnt = 0; cnt < nhere; ++cnt)
|
||||||
|
if (cp[cnt] != usrc[cnt])
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (cnt != nhere)
|
||||||
|
{
|
||||||
|
if (cp[cnt] > usrc[cnt])
|
||||||
|
{
|
||||||
|
/* Cannot be in this range. */
|
||||||
|
cp += 2 * nhere;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Test against the end of the range. */
|
||||||
|
for (cnt = 0; cnt < nhere; ++cnt)
|
||||||
|
if (cp[nhere + cnt] != usrc[cnt])
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (cnt != nhere && cp[nhere + cnt] < usrc[cnt])
|
||||||
|
{
|
||||||
|
/* Cannot be in this range. */
|
||||||
|
cp += 2 * nhere;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This range matches the next characters. Now find
|
||||||
|
the offset in the indirect table. */
|
||||||
|
for (cnt = 0; cp[cnt] == usrc[cnt]; ++cnt);
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
offset <<= 8;
|
||||||
|
offset += usrc[cnt] - cp[cnt];
|
||||||
|
}
|
||||||
|
while (++cnt < nhere);
|
||||||
|
}
|
||||||
|
|
||||||
|
*cpp += nhere;
|
||||||
|
return offset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* NOTREACHED */
|
||||||
|
return 0x43219876;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* To process a string efficiently we retrieve all information about
|
|
||||||
the string at once. The following macro constructs a double linked
|
|
||||||
list of this information. It is a macro because we use `alloca'
|
|
||||||
and we use a double linked list because of the backward collation
|
|
||||||
order.
|
|
||||||
|
|
||||||
We have this strange extra macro since the functions which use the
|
|
||||||
given locale (not the global one) cannot use the global tables. */
|
|
||||||
#ifndef USE_IN_EXTENDED_LOCALE_MODEL
|
|
||||||
# define call_get_weight(strp, newp) get_weight ((strp), (newp))
|
|
||||||
#else
|
|
||||||
# define call_get_weight(strp, newp) \
|
|
||||||
get_weight ((strp), (newp), current, collate_table, collate_extra)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define get_string(str, forw, backw) \
|
|
||||||
do \
|
|
||||||
{ \
|
|
||||||
weight_t *newp; \
|
|
||||||
while (*str != '\0') \
|
|
||||||
{ \
|
|
||||||
newp = (weight_t *) alloca (sizeof (weight_t) \
|
|
||||||
+ (collate_nrules \
|
|
||||||
* sizeof (struct data_pair))); \
|
|
||||||
\
|
|
||||||
newp->prev = backw; \
|
|
||||||
if (backw == NULL) \
|
|
||||||
forw = newp; \
|
|
||||||
else \
|
|
||||||
backw->next = newp; \
|
|
||||||
newp->next = NULL; \
|
|
||||||
backw = newp; \
|
|
||||||
call_get_weight (&str, newp); \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
while (0)
|
|
||||||
|
565
string/strxfrm.c
565
string/strxfrm.c
@ -17,282 +17,397 @@
|
|||||||
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
Boston, MA 02111-1307, USA. */
|
Boston, MA 02111-1307, USA. */
|
||||||
|
|
||||||
|
#include <langinfo.h>
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
|
#include <stdint.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#ifndef WIDE_VERSION
|
#include "../locale/localeinfo.h"
|
||||||
# define STRING_TYPE char
|
|
||||||
# define USTRING_TYPE unsigned char
|
#ifdef USE_IN_EXTENDED_LOCALE_MODEL
|
||||||
# define L_(Ch) Ch
|
|
||||||
# ifdef USE_IN_EXTENDED_LOCALE_MODEL
|
|
||||||
# define STRXFRM __strxfrm_l
|
# define STRXFRM __strxfrm_l
|
||||||
# else
|
#else
|
||||||
# define STRXFRM strxfrm
|
# define STRXFRM strxfrm
|
||||||
# endif
|
|
||||||
# define STRLEN strlen
|
|
||||||
# define STPNCPY __stpncpy
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/* These are definitions used by some of the functions for handling
|
||||||
|
UTF-8 encoding below. */
|
||||||
|
static const uint32_t encoding_mask[] =
|
||||||
|
{
|
||||||
|
~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff
|
||||||
|
};
|
||||||
|
|
||||||
|
static const unsigned char encoding_byte[] =
|
||||||
|
{
|
||||||
|
0xc0, 0xe0, 0xf0, 0xf8, 0xfc
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/* We need UTF-8 encoding of numbers. */
|
||||||
|
static inline int
|
||||||
|
utf8_encode (char *buf, int val)
|
||||||
|
{
|
||||||
|
char *startp = buf;
|
||||||
|
int retval;
|
||||||
|
|
||||||
|
if (val < 0x80)
|
||||||
|
{
|
||||||
|
*buf++ = (char) val;
|
||||||
|
retval = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int step;
|
||||||
|
|
||||||
|
for (step = 2; step < 6; ++step)
|
||||||
|
if ((val & encoding_mask[step - 2]) == 0)
|
||||||
|
break;
|
||||||
|
retval = step;
|
||||||
|
|
||||||
|
*buf = encoding_byte[step - 2];
|
||||||
|
--step;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
buf[step] = 0x80 | (val & 0x3f);
|
||||||
|
val >>= 6;
|
||||||
|
}
|
||||||
|
while (--step > 0);
|
||||||
|
*buf |= val;
|
||||||
|
}
|
||||||
|
|
||||||
|
return buf - startp;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#ifndef USE_IN_EXTENDED_LOCALE_MODEL
|
#ifndef USE_IN_EXTENDED_LOCALE_MODEL
|
||||||
size_t
|
size_t
|
||||||
STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n)
|
STRXFRM (char *dest, const char *src, size_t n)
|
||||||
#else
|
#else
|
||||||
size_t
|
size_t
|
||||||
STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l)
|
STRXFRM (char *dest, const char *src, size_t n, __locale_t l)
|
||||||
#endif
|
|
||||||
{
|
|
||||||
if (n != 0)
|
|
||||||
STPNCPY (dest, src, n);
|
|
||||||
|
|
||||||
return STRLEN (src);
|
|
||||||
}
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
/* Include the shared helper functions. `strxfrm'/`wcsxfrm' also use
|
|
||||||
these functions. */
|
|
||||||
#include "../locale/weight.h"
|
|
||||||
|
|
||||||
|
|
||||||
#ifndef WIDE_VERSION
|
|
||||||
/* Write 32 bit value UTF-8 encoded but only if enough space is left. */
|
|
||||||
static __inline size_t
|
|
||||||
print_val (u_int32_t value, char *dest, size_t max, size_t act)
|
|
||||||
{
|
|
||||||
char tmp[6];
|
|
||||||
int idx = 0;
|
|
||||||
|
|
||||||
if (value < 0x80)
|
|
||||||
tmp[idx++] = (char) value;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
tmp[idx++] = '\x80' + (char) (value & 0x3f);
|
|
||||||
value >>= 6;
|
|
||||||
|
|
||||||
if (value < 0x20)
|
|
||||||
tmp[idx++] = '\xc0' + (char) value;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
tmp[idx++] = '\x80' + (char) (value & 0x3f);
|
|
||||||
value >>= 6;
|
|
||||||
|
|
||||||
if (value < 0x10)
|
|
||||||
tmp[idx++] = '\xe0' + (char) value;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
tmp[idx++] = '\x80' + (char) (value & 0x3f);
|
|
||||||
value >>= 6;
|
|
||||||
|
|
||||||
if (value < 0x08)
|
|
||||||
tmp[idx++] = '\xf0' + (char) value;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
tmp[idx++] = '\x80' + (char) (value & 0x3f);
|
|
||||||
value >>= 6;
|
|
||||||
|
|
||||||
if (value < 0x04)
|
|
||||||
tmp[idx++] = '\xf8' + (char) value;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
tmp[idx++] = '\x80' + (char) (value & 0x3f);
|
|
||||||
tmp[idx++] = '\xfc' + (char) (value >> 6);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
while (idx-- > 0)
|
|
||||||
{
|
|
||||||
if (act < max)
|
|
||||||
dest[act] = tmp[idx];
|
|
||||||
++act;
|
|
||||||
}
|
|
||||||
|
|
||||||
return act;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
static __inline size_t
|
|
||||||
print_val (u_int32_t value, wchar_t *dest, size_t max, size_t act)
|
|
||||||
{
|
|
||||||
/* We cannot really assume wchar_t is 32 bits wide. But it is for
|
|
||||||
GCC and so we don't do much optimization for the other case. */
|
|
||||||
if (sizeof (wchar_t) == 4)
|
|
||||||
{
|
|
||||||
if (act < max)
|
|
||||||
dest[act] = (wchar_t) value;
|
|
||||||
++act;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
wchar_t tmp[3];
|
|
||||||
size_t idx = 0;
|
|
||||||
|
|
||||||
if (value < 0x8000)
|
|
||||||
tmp[idx++] = (wchar_t) act;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
tmp[idx++] = (wchar_t) (0x8000 + (value & 0x3fff));
|
|
||||||
value >>= 14;
|
|
||||||
if (value < 0x2000)
|
|
||||||
tmp[idx++] = (wchar_t) (0xc000 + value);
|
|
||||||
else
|
|
||||||
{
|
|
||||||
tmp[idx++] = (wchar_t) (0x8000 + (value & 0x3fff));
|
|
||||||
value >>= 14;
|
|
||||||
tmp[idx++] = (wchar_t) (0xe000 + value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
while (idx-- > 0)
|
|
||||||
{
|
|
||||||
if (act < max)
|
|
||||||
dest[act] = tmp[idx];
|
|
||||||
++act;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return act;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
/* Transform SRC into a form such that the result of strcmp
|
|
||||||
on two strings that have been transformed by strxfrm is
|
|
||||||
the same as the result of strcoll on the two strings before
|
|
||||||
their transformation. The transformed string is put in at
|
|
||||||
most N characters of DEST and its length is returned. */
|
|
||||||
#ifndef USE_IN_EXTENDED_LOCALE_MODEL
|
|
||||||
size_t
|
|
||||||
STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n)
|
|
||||||
#else
|
|
||||||
size_t
|
|
||||||
STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l)
|
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
#ifdef USE_IN_EXTENDED_LOCALE_MODEL
|
#ifdef USE_IN_EXTENDED_LOCALE_MODEL
|
||||||
struct locale_data *current = l->__locales[LC_COLLATE];
|
struct locale_data *current = l->__locales[LC_COLLATE];
|
||||||
# if BYTE_ORDER == BIG_ENDIAN
|
uint_fast32_t nrules = *((uint32_t *) current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].string);
|
||||||
const u_int32_t *collate_table = (const u_int32_t *)
|
#else
|
||||||
current->values[_NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].string;
|
uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
|
||||||
const u_int32_t *collate_extra = (const u_int32_t *)
|
|
||||||
current->values[_NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].string;
|
|
||||||
# elif BYTE_ORDER == LITTLE_ENDIAN
|
|
||||||
const u_int32_t *collate_table = (const u_int32_t *)
|
|
||||||
current->values[_NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].string;
|
|
||||||
const u_int32_t *collate_extra = (const u_int32_t *)
|
|
||||||
current->values[_NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].string;
|
|
||||||
# else
|
|
||||||
# error bizarre byte order
|
|
||||||
# endif
|
|
||||||
#endif
|
#endif
|
||||||
weight_t *forw = NULL;
|
/* We don't assign the following values right away since it might be
|
||||||
weight_t *backw = NULL;
|
unnecessary in case there are no rules. */
|
||||||
size_t pass;
|
const unsigned char *rulesets;
|
||||||
size_t written;
|
const int32_t *table;
|
||||||
|
const unsigned char *weights;
|
||||||
|
const unsigned char *extra;
|
||||||
|
const int32_t *indirect;
|
||||||
|
uint_fast32_t pass;
|
||||||
|
size_t needed;
|
||||||
|
const unsigned char *usrc;
|
||||||
|
size_t srclen = strlen (src);
|
||||||
|
int32_t *idxarr;
|
||||||
|
unsigned char *rulearr;
|
||||||
|
size_t idxmax;
|
||||||
|
size_t idxcnt;
|
||||||
|
int use_malloc = 0;
|
||||||
|
|
||||||
/* If the current locale does not specify locale data we use normal
|
#include "../locale/weight.h"
|
||||||
8-bit string comparison. */
|
|
||||||
if (collate_nrules == 0)
|
if (nrules == 0)
|
||||||
{
|
{
|
||||||
if (n != 0)
|
if (n != 0)
|
||||||
STPNCPY (dest, src, n);
|
__stpncpy (dest, src, n);
|
||||||
|
|
||||||
return STRLEN (src);
|
return srclen;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef USE_IN_EXTENDED_LOCALE_MODEL
|
||||||
|
rulesets = (const unsigned char *)
|
||||||
|
current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string;
|
||||||
|
table = (const int32_t *)
|
||||||
|
current->values[_NL_ITEM_INDEX (_NL_COLLATE_TABLEMB)].string;
|
||||||
|
weights = (const unsigned char *)
|
||||||
|
current->values[_NL_ITEM_INDEX (_NL_COLLATE_WEIGHTMB)].string;
|
||||||
|
extra = (const unsigned char *)
|
||||||
|
current->values[_NL_ITEM_INDEX (_NL_COLLATE_EXTRAMB)].string;
|
||||||
|
indirect = (const int32_t *)
|
||||||
|
current->values[_NL_ITEM_INDEX (_NL_COLLATE_INDIRECTMB)].string;
|
||||||
|
#else
|
||||||
|
rulesets = (const unsigned char *)
|
||||||
|
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_RULESETS);
|
||||||
|
table = (const int32_t *)
|
||||||
|
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
|
||||||
|
weights = (const unsigned char *)
|
||||||
|
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
|
||||||
|
extra = (const unsigned char *)
|
||||||
|
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
|
||||||
|
indirect = (const int32_t *)
|
||||||
|
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Handle an empty string as a special case. */
|
/* Handle an empty string as a special case. */
|
||||||
if (*src == '\0')
|
if (srclen == 0)
|
||||||
{
|
{
|
||||||
if (n != 0)
|
if (n != 0)
|
||||||
*dest = '\0';
|
*dest = '\0';
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Get full information about the string. This means we get
|
/* We need the elements of the string as unsigned values since they
|
||||||
information for all passes in a special data structure. */
|
are used as indeces. */
|
||||||
get_string (src, forw, backw);
|
usrc = (const unsigned char *) src;
|
||||||
|
|
||||||
/* Now we have all the information. In at most the given number of
|
/* Perform the first pass over the string and while doing this find
|
||||||
passes we can finally decide about the order. */
|
and store the weights for each character. Since we want this to
|
||||||
written = 0;
|
be as fast as possible we are using `alloca' to store the temporary
|
||||||
for (pass = 0; pass < collate_nrules; ++pass)
|
values. But since there is no limit on the length of the string
|
||||||
|
we have to use `malloc' if the string is too long. We should be
|
||||||
|
very conservative here. */
|
||||||
|
if (srclen >= 16384)
|
||||||
{
|
{
|
||||||
int forward = (collate_rules[pass] & sort_forward) != 0;
|
idxarr = (int32_t *) malloc (srclen * (sizeof (int32_t) + 1));
|
||||||
const weight_t *run = forward ? forw : backw;
|
rulearr = (unsigned char *) &idxarr[srclen];
|
||||||
int idx = forward ? 0 : run->data[pass].number - 1;
|
|
||||||
|
|
||||||
while (1)
|
if (idxarr == NULL)
|
||||||
{
|
/* No memory. Well, go with the stack then.
|
||||||
int ignore = 0;
|
|
||||||
u_int32_t w = 0;
|
|
||||||
|
|
||||||
/* Here we have to check for IGNORE entries. If these are
|
XXX Once this implementation is stable we will handle this
|
||||||
found we count them and go on with he next value. */
|
differently. Instead of precomputing the indeces we will
|
||||||
while (run != NULL
|
do this in time. This means, though, that this happens for
|
||||||
&& ((w = run->data[pass].value[idx])
|
every pass again. */
|
||||||
== (u_int32_t) IGNORE_CHAR))
|
goto try_stack;
|
||||||
{
|
use_malloc = 1;
|
||||||
++ignore;
|
|
||||||
if (forward
|
|
||||||
? ++idx >= run->data[pass].number
|
|
||||||
: --idx < 0)
|
|
||||||
{
|
|
||||||
weight_t *nextp = forward ? run->next : run->prev;
|
|
||||||
if (nextp == NULL)
|
|
||||||
{
|
|
||||||
w = 0;
|
|
||||||
/* No more non-INGOREd elements means lowest
|
|
||||||
possible value. */
|
|
||||||
ignore = -1;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
idx = forward ? 0 : nextp->data[pass].number - 1;
|
{
|
||||||
run = nextp;
|
try_stack:
|
||||||
|
idxarr = (int32_t *) alloca (srclen * sizeof (int32_t));
|
||||||
|
rulearr = (unsigned char *) alloca (srclen);
|
||||||
|
}
|
||||||
|
|
||||||
|
idxmax = 0;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
int32_t tmp = findidx (&usrc);
|
||||||
|
rulearr[idxmax] = tmp >> 24;
|
||||||
|
idxarr[idxmax] = tmp & 0x80ffffff;
|
||||||
|
|
||||||
|
++idxmax;
|
||||||
|
}
|
||||||
|
while (*usrc != '\0');
|
||||||
|
|
||||||
|
/* Now the passes over the weights. We now use the indeces we found
|
||||||
|
before. */
|
||||||
|
needed = 0;
|
||||||
|
for (pass = 0; pass < nrules; ++pass)
|
||||||
|
{
|
||||||
|
size_t backw_stop = ~0ul;
|
||||||
|
int rule = rulesets[rulearr[0] * nrules + pass];
|
||||||
|
/* We assume that if a rule has defined `position' in one section
|
||||||
|
this is true for all of them. */
|
||||||
|
int position = rule & sort_position;
|
||||||
|
|
||||||
|
if (position == 0)
|
||||||
|
{
|
||||||
|
for (idxcnt = 0; idxcnt < idxmax; ++idxcnt)
|
||||||
|
{
|
||||||
|
if ((rule & sort_forward) != 0)
|
||||||
|
{
|
||||||
|
size_t len;
|
||||||
|
|
||||||
|
if (backw_stop != ~0ul)
|
||||||
|
{
|
||||||
|
/* Handle the pushed elements now. */
|
||||||
|
size_t backw;
|
||||||
|
|
||||||
|
for (backw = idxcnt - 1; backw >= backw_stop; --backw)
|
||||||
|
{
|
||||||
|
len = weights[idxarr[backw]++];
|
||||||
|
|
||||||
|
if (needed + len < n)
|
||||||
|
while (len-- > 0)
|
||||||
|
dest[needed++] = weights[idxarr[backw]++];
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* No more characters fit into the buffer. */
|
||||||
|
needed += len;
|
||||||
|
idxarr[backw] += len;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Stop if all characters are processed. */
|
backw_stop = ~0ul;
|
||||||
if (run == NULL)
|
}
|
||||||
break;
|
|
||||||
|
|
||||||
/* Now we have information of the number of ignored weights
|
/* Now handle the forward element. */
|
||||||
and the value of the next weight. We have to add 2
|
len = weights[idxarr[idxcnt]++];
|
||||||
because 0 means EOS and 1 is the intermediate string end. */
|
if (needed + len < n)
|
||||||
if ((collate_rules[pass] & sort_position) != 0)
|
while (len-- > 0)
|
||||||
written = print_val (ignore + 2, dest, n, written);
|
dest[needed++] = weights[idxarr[idxcnt]++];
|
||||||
|
else
|
||||||
if (w != 0)
|
|
||||||
written = print_val (w, dest, n, written);
|
|
||||||
|
|
||||||
/* We have to increment the index counters. */
|
|
||||||
if (forward)
|
|
||||||
{
|
{
|
||||||
if (++idx >= run->data[pass].number)
|
/* No more characters fit into the buffer. */
|
||||||
{
|
needed += len;
|
||||||
run = run->next;
|
idxarr[idxcnt] += len;
|
||||||
idx = 0;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (--idx < 0)
|
/* Remember where the backwards series started. */
|
||||||
|
if (backw_stop == ~0ul)
|
||||||
|
backw_stop = idxcnt;
|
||||||
|
}
|
||||||
|
|
||||||
|
rule = rulesets[rulearr[idxcnt + 1] * nrules + pass];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (backw_stop != ~0ul)
|
||||||
{
|
{
|
||||||
run = run->prev;
|
/* Handle the pushed elements now. */
|
||||||
if (run != NULL)
|
size_t backw;
|
||||||
idx = run->data[pass].number - 1;
|
|
||||||
|
for (backw = idxcnt - 1; backw >= backw_stop; --backw)
|
||||||
|
{
|
||||||
|
size_t len = weights[idxarr[backw]++];
|
||||||
|
|
||||||
|
if (needed + len < n)
|
||||||
|
while (len-- > 0)
|
||||||
|
dest[needed++] = weights[idxarr[backw]++];
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* No more characters fit into the buffer. */
|
||||||
|
needed += len;
|
||||||
|
idxarr[backw] += len;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int val = 1;
|
||||||
|
char buf[7];
|
||||||
|
size_t buflen;
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
for (idxcnt = 0; idxcnt < idxmax; ++idxcnt)
|
||||||
|
{
|
||||||
|
if ((rule & sort_forward) != 0)
|
||||||
|
{
|
||||||
|
size_t len;
|
||||||
|
|
||||||
|
if (backw_stop != ~0ul)
|
||||||
|
{
|
||||||
|
/* Handle the pushed elements now. */
|
||||||
|
size_t backw;
|
||||||
|
|
||||||
|
for (backw = idxcnt - 1; backw >= backw_stop; --backw)
|
||||||
|
{
|
||||||
|
len = weights[idxarr[backw]++];
|
||||||
|
if (len != 0)
|
||||||
|
{
|
||||||
|
buflen = utf8_encode (buf, val);
|
||||||
|
if (needed + buflen + len < n)
|
||||||
|
{
|
||||||
|
for (i = 0; i < buflen; ++i)
|
||||||
|
dest[needed + i] = buf[i];
|
||||||
|
for (i = 0; i < len; ++i)
|
||||||
|
dest[needed + buflen + i] =
|
||||||
|
weights[idxarr[backw] + i];
|
||||||
|
}
|
||||||
|
idxarr[backw] += len;
|
||||||
|
needed += buflen + len;
|
||||||
|
val = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
++val;
|
||||||
|
}
|
||||||
|
|
||||||
|
backw_stop = ~0ul;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Now handle the forward element. */
|
||||||
|
len = weights[idxarr[idxcnt]++];
|
||||||
|
if (len != 0)
|
||||||
|
{
|
||||||
|
buflen = utf8_encode (buf, val);
|
||||||
|
if (needed + buflen + len < n)
|
||||||
|
{
|
||||||
|
for (i = 0; i < buflen; ++i)
|
||||||
|
dest[needed + i] = buf[i];
|
||||||
|
for (i = 0; i < len; ++i)
|
||||||
|
dest[needed + buflen + i] =
|
||||||
|
weights[idxarr[idxcnt] + i];
|
||||||
|
}
|
||||||
|
idxarr[idxcnt] += len;
|
||||||
|
needed += buflen + len;
|
||||||
|
val = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
/* Note that we don't have to increment `idxarr[idxcnt]'
|
||||||
|
since the length is zero. */
|
||||||
|
++val;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Remember where the backwards series started. */
|
||||||
|
if (backw_stop == ~0ul)
|
||||||
|
backw_stop = idxcnt;
|
||||||
|
}
|
||||||
|
|
||||||
|
rule = rulesets[rulearr[idxcnt + 1] * nrules + pass];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (backw_stop != ~0)
|
||||||
|
{
|
||||||
|
/* Handle the pushed elements now. */
|
||||||
|
size_t backw;
|
||||||
|
|
||||||
|
for (backw = idxmax - 1; backw >= backw_stop; --backw)
|
||||||
|
{
|
||||||
|
size_t len = weights[idxarr[backw]++];
|
||||||
|
if (len != 0)
|
||||||
|
{
|
||||||
|
buflen = utf8_encode (buf, val);
|
||||||
|
if (needed + buflen + len < n)
|
||||||
|
{
|
||||||
|
for (i = 0; i < buflen; ++i)
|
||||||
|
dest[needed + i] = buf[i];
|
||||||
|
for (i = 0; i < len; ++i)
|
||||||
|
dest[needed + buflen + i] =
|
||||||
|
weights[idxarr[backw] + i];
|
||||||
|
}
|
||||||
|
idxarr[backw] += len;
|
||||||
|
needed += buflen + len;
|
||||||
|
val = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
++val;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Write marker for end of word. */
|
/* Finally store the byte to separate the passes or terminate
|
||||||
if (pass + 1 < collate_nrules)
|
the string. */
|
||||||
written = print_val (1, dest, n, written);
|
if (needed < n)
|
||||||
|
dest[needed] = pass + 1 < nrules ? '\1' : '\0';
|
||||||
|
++needed;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Terminate string. */
|
/* This is a little optimization: many collation specifications have
|
||||||
if (written < n)
|
a `position' rule at the end and if no non-ignored character
|
||||||
dest[written] = L_('\0');
|
is found the last \1 byte is immediately followed by a \0 byte
|
||||||
|
signalling this. We can avoid the \1 byte(s). */
|
||||||
|
if (needed > 2 && dest[needed - 2] == '\1')
|
||||||
|
{
|
||||||
|
/* Remove the \1 byte. */
|
||||||
|
--needed;
|
||||||
|
dest[needed - 1] = '\0';
|
||||||
|
}
|
||||||
|
|
||||||
/* Return length without counting the terminating '\0'. */
|
/* Free the memory if needed. */
|
||||||
return written;
|
if (use_malloc)
|
||||||
|
free (idxarr);
|
||||||
|
|
||||||
|
return needed;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
|
/* Copyright (C) 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
|
||||||
This file is part of the GNU C Library.
|
This file is part of the GNU C Library.
|
||||||
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
|
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
|
||||||
|
|
||||||
@ -19,16 +19,23 @@
|
|||||||
|
|
||||||
#include <wchar.h>
|
#include <wchar.h>
|
||||||
|
|
||||||
#define WIDE_VERSION 1
|
|
||||||
#define STRING_TYPE wchar_t
|
|
||||||
#define USTRING_TYPE wint_t
|
|
||||||
#define L_(Ch) L##Ch
|
|
||||||
#ifdef USE_IN_EXTENDED_LOCALE_MODEL
|
#ifdef USE_IN_EXTENDED_LOCALE_MODEL
|
||||||
# define STRXFRM __wcsxfrm_l
|
# define STRXFRM __wcsxfrm_l
|
||||||
#else
|
#else
|
||||||
# define STRXFRM wcsxfrm
|
# define STRXFRM wcsxfrm
|
||||||
#endif
|
#endif
|
||||||
#define STRLEN __wcslen
|
|
||||||
#define STPNCPY __wcpncpy
|
|
||||||
|
|
||||||
#include <string/strxfrm.c>
|
|
||||||
|
#ifndef USE_IN_EXTENDED_LOCALE_MODEL
|
||||||
|
size_t
|
||||||
|
STRXFRM (wchar_t *dest, const wchar_t *src, size_t n)
|
||||||
|
#else
|
||||||
|
size_t
|
||||||
|
STRXFRM (wchar_t *dest, const wchar_t *src, size_t n, __locale_t l)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
if (n != 0)
|
||||||
|
__wcpncpy (dest, src, n);
|
||||||
|
|
||||||
|
return __wcslen (src);
|
||||||
|
}
|
||||||
|
@ -52,5 +52,5 @@ wctrans (const char *property)
|
|||||||
/* We have to search the table. */
|
/* We have to search the table. */
|
||||||
result = (int32_t *) _NL_CURRENT (LC_CTYPE, _NL_NUM_LC_CTYPE + cnt - 2);
|
result = (int32_t *) _NL_CURRENT (LC_CTYPE, _NL_NUM_LC_CTYPE + cnt - 2);
|
||||||
|
|
||||||
return (wctrans_t) (result + 128);
|
return (wctrans_t) result;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user