Optimize regex a bit

This commit is contained in:
Ulrich Drepper 2012-01-03 07:54:15 -05:00
parent a316c1f682
commit 9f1151705e
2 changed files with 36 additions and 4 deletions

View File

@ -1,3 +1,7 @@
2012-01-03 Ulrich Drepper <drepper@gmail.com>
* posix/regcomp.c (init_word_char): Optimize a bit for sane encodings.
2012-01-01 Ulrich Drepper <drepper@gmail.com>
* posix/getconf.c: Update copyright year.

View File

@ -1,5 +1,5 @@
/* Extended regular expression matching and search library.
Copyright (C) 2002-2007,2009,2010,2011 Free Software Foundation, Inc.
Copyright (C) 2002-2007,2009,2010,2011,2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
@ -926,10 +926,38 @@ static void
internal_function
init_word_char (re_dfa_t *dfa)
{
int i, j, ch;
dfa->word_ops_used = 1;
for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
int i = 0;
int ch = 0;
if (BE (dfa->map_notascii == 0, 1))
{
if (sizeof (dfa->word_char[0]) == 8)
{
dfa->word_char[0] = UINT64_C (0x03ff000000000000);
dfa->word_char[1] = UINT64_C (0x07fffffe87fffffe);
i = 2;
}
else if (sizeof (dfa->word_char[0]) == 4)
{
dfa->word_char[0] = UINT32_C (0x00000000);
dfa->word_char[1] = UINT32_C (0x03ff0000);
dfa->word_char[2] = UINT32_C (0x87fffffe);
dfa->word_char[3] = UINT32_C (0x07fffffe);
i = 4;
}
else
abort ();
ch = 128;
if (BE (dfa->is_utf8, 1))
{
memset (&dfa->word_char[i], '\0', (SBC_MAX - ch) / 8);
return;
}
}
for (; i < BITSET_WORDS; ++i)
for (int j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
if (isalnum (ch) || ch == '_')
dfa->word_char[i] |= (bitset_word_t) 1 << j;
}