Update.
2003-11-20 Ulrich Drepper <drepper@redhat.com> * posix/PTESTS: Fix first test in GA143. 2003-11-20 Jakub Jelinek <jakub@redhat.com> * posix/regex_internal.h (re_dfastate_t): Remove trtable_search. Add word_trtable. * posix/regex_internal.c (create_newstate_common, free_state): Don't free trtable_search. * posix/regexec.c (check_matching): Remove fl_search argument. (transit_state_sb): Likewise. #ifdef out as unused. (build_trtable): Remove fl_search argument. Set state->word_trtable and state->trtable. Build separate word and non-word tables if multi-byte and they differ for some character. (transit_state): Remove fl_search argument. Don't update state->trtable here. Handle state->word_trtable. #ifdef out unused call to transit_state_sb. (re_search_internal): Update check_matching caller. (group_nodes_into_DFAstates): Don't clear non-ascii chars in accepts bitmask for multi-byte locales. * posix/bug-regex19.c (tests): Enable some commented out tests, add 2 new tests. * posix/tst-rxspencer.c (mb_tests): Don't test [[=b=]] for now as multi-byte. Don't run identical multi-byte tests multiple times unnecessarily. (main): Check setlocale return value. * posix/Makefile (tst-rxspencer-ARGS): Add --utf8 argument. (tst-rxspencer-ENV): Remove MALLOC_TRACE, add LOCPATH. ($(objpfx)tst-rxspencer-mem): Run another tst-rxspencer test here, without --utf8 argument but with MALLOC_TRACE.
This commit is contained in:
parent
beac34a2ad
commit
c13c99fa92
32
ChangeLog
32
ChangeLog
@ -1,3 +1,35 @@
|
||||
2003-11-20 Ulrich Drepper <drepper@redhat.com>
|
||||
|
||||
* posix/PTESTS: Fix first test in GA143.
|
||||
|
||||
2003-11-20 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* posix/regex_internal.h (re_dfastate_t): Remove trtable_search.
|
||||
Add word_trtable.
|
||||
* posix/regex_internal.c (create_newstate_common, free_state):
|
||||
Don't free trtable_search.
|
||||
* posix/regexec.c (check_matching): Remove fl_search argument.
|
||||
(transit_state_sb): Likewise. #ifdef out as unused.
|
||||
(build_trtable): Remove fl_search argument. Set state->word_trtable
|
||||
and state->trtable. Build separate word and non-word tables if
|
||||
multi-byte and they differ for some character.
|
||||
(transit_state): Remove fl_search argument. Don't update
|
||||
state->trtable here. Handle state->word_trtable.
|
||||
#ifdef out unused call to transit_state_sb.
|
||||
(re_search_internal): Update check_matching caller.
|
||||
(group_nodes_into_DFAstates): Don't clear non-ascii chars in accepts
|
||||
bitmask for multi-byte locales.
|
||||
* posix/bug-regex19.c (tests): Enable some commented out tests, add
|
||||
2 new tests.
|
||||
* posix/tst-rxspencer.c (mb_tests): Don't test [[=b=]] for now as
|
||||
multi-byte. Don't run identical multi-byte tests multiple times
|
||||
unnecessarily.
|
||||
(main): Check setlocale return value.
|
||||
* posix/Makefile (tst-rxspencer-ARGS): Add --utf8 argument.
|
||||
(tst-rxspencer-ENV): Remove MALLOC_TRACE, add LOCPATH.
|
||||
($(objpfx)tst-rxspencer-mem): Run another tst-rxspencer test
|
||||
here, without --utf8 argument but with MALLOC_TRACE.
|
||||
|
||||
2003-11-19 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* posix/regexec.c (extend_buffers): Don't allocate
|
||||
|
@ -1,3 +1,7 @@
|
||||
2003-11-20 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* Makefile (LOCALES): Add cs_CZ.UTF-8.
|
||||
|
||||
2003-11-15 Ulrich Drepper <drepper@redhat.com>
|
||||
|
||||
* Makefile (tst-leaks-ENV): Add LOCPATH.
|
||||
|
@ -132,7 +132,7 @@ LOCALES := de_DE.ISO-8859-1 de_DE.UTF-8 en_US.ANSI_X3.4-1968 \
|
||||
en_US.ISO-8859-1 ja_JP.EUC-JP da_DK.ISO-8859-1 \
|
||||
hr_HR.ISO-8859-2 sv_SE.ISO-8859-1 ja_JP.SJIS fr_FR.ISO-8859-1 \
|
||||
vi_VN.TCVN5712-1 nb_NO.ISO-8859-1 nn_NO.ISO-8859-1 \
|
||||
tr_TR.UTF-8
|
||||
tr_TR.UTF-8 cs_CZ.UTF-8
|
||||
LOCALE_SRCS := $(shell echo "$(LOCALES)"|sed 's/\([^ .]*\)[^ ]*/\1/g')
|
||||
CHARMAPS := $(shell echo "$(LOCALES)" | \
|
||||
sed -e 's/[^ .]*[.]\([^ ]*\)/\1/g' -e s/SJIS/SHIFT_JIS/g)
|
||||
|
@ -148,7 +148,6 @@ tst-exec-ARGS = -- $(built-program-cmd)
|
||||
tst-spawn-ARGS = -- $(built-program-cmd)
|
||||
tst-dir-ARGS = `pwd` `cd $(common-objdir)/$(subdir); pwd` `cd $(common-objdir); pwd` $(objpfx)tst-dir
|
||||
tst-chmod-ARGS = `pwd`
|
||||
tst-rxspencer-ARGS = rxspencer/tests
|
||||
|
||||
tst-fnmatch-ENV = LOCPATH=$(common-objpfx)localedata
|
||||
tst-regexloc-ENV = LOCPATH=$(common-objpfx)localedata
|
||||
@ -160,6 +159,8 @@ bug-regex17-ENV = LOCPATH=$(common-objpfx)localedata
|
||||
bug-regex18-ENV = LOCPATH=$(common-objpfx)localedata
|
||||
bug-regex19-ENV = LOCPATH=$(common-objpfx)localedata
|
||||
bug-regex20-ENV = LOCPATH=$(common-objpfx)localedata
|
||||
tst-rxspencer-ARGS = --utf8 rxspencer/tests
|
||||
tst-rxspencer-ENV = LOCPATH=$(common-objpfx)localedata
|
||||
|
||||
testcases.h: TESTS TESTS2C.sed
|
||||
sed -f TESTS2C.sed < $< > $@T
|
||||
@ -207,9 +208,13 @@ bug-regex21-ENV = MALLOC_TRACE=$(objpfx)bug-regex21.mtrace
|
||||
$(objpfx)bug-regex21-mem: $(objpfx)bug-regex21.out
|
||||
$(common-objpfx)malloc/mtrace $(objpfx)bug-regex21.mtrace > $@
|
||||
|
||||
tst-rxspencer-ENV = MALLOC_TRACE=$(objpfx)tst-rxspencer.mtrace
|
||||
|
||||
# tst-rxspencer.mtrace is generated only when run without --utf8
|
||||
# option, since otherwise the file has almost 100M and takes very long
|
||||
# time to process.
|
||||
$(objpfx)tst-rxspencer-mem: $(objpfx)tst-rxspencer.out
|
||||
MALLOC_TRACE=$(objpfx)tst-rxspencer.mtrace $(tst-rxspencer-ENV) \
|
||||
$(run-program-prefix) $(objpfx)tst-rxspencer rxspencer/tests \
|
||||
> /dev/null
|
||||
$(common-objpfx)malloc/mtrace $(objpfx)tst-rxspencer.mtrace > $@
|
||||
|
||||
$(objpfx)tst-getconf.out: tst-getconf.sh $(objpfx)getconf
|
||||
|
@ -270,7 +270,7 @@
|
||||
1¦63¦a\{1,63\}¦aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa¦
|
||||
# 2.8.3.4 BRE Precedence
|
||||
# GA143
|
||||
2¦20¦\^\[[[.].]]\\(\\1\\)\\*\\{1,2\\}\$¦a^[]\(1\)\*\{1,2\}$b¦
|
||||
2¦20¦\^\[[[.].]]\\(\\1\\)\*\\{1,2\\}\$¦a^[]\(1\)*\{1,2\}$b¦
|
||||
1¦6¦[[=*=]][[=\=]][[=]=]][[===]][[...]][[:punct:]]¦*\]=.;¦
|
||||
1¦6¦[$\(*\)^]*¦$\()*^¦
|
||||
1¦1¦[\1]¦1¦
|
||||
|
@ -37,17 +37,21 @@ static struct
|
||||
\xc3\x96 LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
\xe2\x80\x94 EM DASH */
|
||||
/* Should not match. */
|
||||
{RE_SYNTAX_POSIX_BASIC, "\\<A", "aOAA", 0, -1},
|
||||
{RE_SYNTAX_POSIX_BASIC, "\\<A", "aOAA", 2, -1},
|
||||
{RE_SYNTAX_POSIX_BASIC, "A\\>", "aAAO", 1, -1},
|
||||
{RE_SYNTAX_POSIX_BASIC, "\\bA", "aOAA", 0, -1},
|
||||
{RE_SYNTAX_POSIX_BASIC, "\\bA", "aOAA", 2, -1},
|
||||
{RE_SYNTAX_POSIX_BASIC, "A\\b", "aAAO", 1, -1},
|
||||
{RE_SYNTAX_POSIX_BASIC, "\\<\xc3\x84", "a\xc3\x96\xc3\x84\xc3\x84", 0, -1},
|
||||
{RE_SYNTAX_POSIX_BASIC, "\\<\xc3\x84", "a\xc3\x96\xc3\x84\xc3\x84", 3, -1},
|
||||
{RE_SYNTAX_POSIX_BASIC, "\xc3\x84\\>", "a\xc3\x84\xc3\x84\xc3\x96", 1, -1},
|
||||
#if 0
|
||||
/* XXX Not used since they fail so far. */
|
||||
/* XXX these 2 tests still fail. */
|
||||
{RE_SYNTAX_POSIX_BASIC, "\\b\xc3\x84", "a\xc3\x96\xc3\x84\xc3\x84", 0, -1},
|
||||
{RE_SYNTAX_POSIX_BASIC, "\\b\xc3\x84", "a\xc3\x96\xc3\x84\xc3\x84", 3, -1},
|
||||
{RE_SYNTAX_POSIX_BASIC, "\xc3\x84\\b", "a\xc3\x84\xc3\x84\xc3\x96", 1, -1},
|
||||
#endif
|
||||
{RE_SYNTAX_POSIX_BASIC, "\xc3\x84\\b", "a\xc3\x84\xc3\x84\xc3\x96", 1, -1},
|
||||
/* Should match. */
|
||||
{RE_SYNTAX_POSIX_BASIC, "\\<A", "AA", 0, 0},
|
||||
{RE_SYNTAX_POSIX_BASIC, "\\<A", "a-AA", 2, 2},
|
||||
@ -57,8 +61,6 @@ static struct
|
||||
{RE_SYNTAX_POSIX_BASIC, "\\bA", "a-AA", 2, 2},
|
||||
{RE_SYNTAX_POSIX_BASIC, "A\\b", "aAA-", 1, 2},
|
||||
{RE_SYNTAX_POSIX_BASIC, "A\\b", "aAA", 1, 2},
|
||||
#if 0
|
||||
/* XXX Not used since they fail so far. */
|
||||
{RE_SYNTAX_POSIX_BASIC, "\\<\xc3\x84", "\xc3\x84\xc3\x84", 0, 0},
|
||||
{RE_SYNTAX_POSIX_BASIC, "\\<\xc3\x84", "a\xe2\x80\x94\xc3\x84\xc3\x84", 4, 4},
|
||||
{RE_SYNTAX_POSIX_BASIC, "\xc3\x84\\>", "a\xc3\x84\xc3\x84\xe2\x80\x94", 1, 3},
|
||||
@ -67,7 +69,6 @@ static struct
|
||||
{RE_SYNTAX_POSIX_BASIC, "\\b\xc3\x84", "a\xe2\x80\x94\xc3\x84\xc3\x84", 4, 4},
|
||||
{RE_SYNTAX_POSIX_BASIC, "\xc3\x84\\b", "a\xc3\x84\xc3\x84\xe2\x80\x94", 1, 3},
|
||||
{RE_SYNTAX_POSIX_BASIC, "\xc3\x84\\b", "a\xc3\x84\xc3\x84", 1, 3}
|
||||
#endif
|
||||
};
|
||||
|
||||
int
|
||||
|
@ -1207,7 +1207,6 @@ create_newstate_common (dfa, nodes, hash)
|
||||
return NULL;
|
||||
}
|
||||
newstate->trtable = NULL;
|
||||
newstate->trtable_search = NULL;
|
||||
newstate->hash = hash;
|
||||
return newstate;
|
||||
}
|
||||
@ -1369,6 +1368,5 @@ free_state (state)
|
||||
}
|
||||
re_node_set_free (&state->nodes);
|
||||
re_free (state->trtable);
|
||||
re_free (state->trtable_search);
|
||||
re_free (state);
|
||||
}
|
||||
|
@ -456,7 +456,6 @@ struct re_dfastate_t
|
||||
re_node_set nodes;
|
||||
re_node_set *entrance_nodes;
|
||||
struct re_dfastate_t **trtable;
|
||||
struct re_dfastate_t **trtable_search;
|
||||
/* If this state is a special state.
|
||||
A state is a special state if the state is the halt state, or
|
||||
a anchor. */
|
||||
@ -469,6 +468,7 @@ struct re_dfastate_t
|
||||
/* If this state has backreference node(s). */
|
||||
unsigned int has_backref : 1;
|
||||
unsigned int has_constraint : 1;
|
||||
unsigned int word_trtable : 1;
|
||||
};
|
||||
typedef struct re_dfastate_t re_dfastate_t;
|
||||
|
||||
|
205
posix/regexec.c
205
posix/regexec.c
@ -57,7 +57,7 @@ static re_dfastate_t *acquire_init_state_context (reg_errcode_t *err,
|
||||
static reg_errcode_t prune_impossible_nodes (const regex_t *preg,
|
||||
re_match_context_t *mctx);
|
||||
static int check_matching (const regex_t *preg, re_match_context_t *mctx,
|
||||
int fl_search, int fl_longest_match);
|
||||
int fl_longest_match);
|
||||
static int check_halt_node_context (const re_dfa_t *dfa, int node,
|
||||
unsigned int context);
|
||||
static int check_halt_state_context (const regex_t *preg,
|
||||
@ -123,15 +123,16 @@ static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst,
|
||||
re_dfastate_t **src, int num);
|
||||
static re_dfastate_t *transit_state (reg_errcode_t *err, const regex_t *preg,
|
||||
re_match_context_t *mctx,
|
||||
re_dfastate_t *state, int fl_search);
|
||||
re_dfastate_t *state);
|
||||
static reg_errcode_t check_subexp_matching_top (re_dfa_t *dfa,
|
||||
re_match_context_t *mctx,
|
||||
re_node_set *cur_nodes,
|
||||
int str_idx);
|
||||
#if 0
|
||||
static re_dfastate_t *transit_state_sb (reg_errcode_t *err, const regex_t *preg,
|
||||
re_dfastate_t *pstate,
|
||||
int fl_search,
|
||||
re_match_context_t *mctx);
|
||||
#endif
|
||||
#ifdef RE_ENABLE_I18N
|
||||
static reg_errcode_t transit_state_mb (const regex_t *preg,
|
||||
re_dfastate_t *pstate,
|
||||
@ -173,8 +174,7 @@ static reg_errcode_t expand_bkref_cache (const regex_t *preg,
|
||||
int last_str, int subexp_num,
|
||||
int fl_open);
|
||||
static re_dfastate_t **build_trtable (const regex_t *dfa,
|
||||
const re_dfastate_t *state,
|
||||
int fl_search);
|
||||
re_dfastate_t *state);
|
||||
#ifdef RE_ENABLE_I18N
|
||||
static int check_node_accept_bytes (const regex_t *preg, int node_idx,
|
||||
const re_string_t *input, int idx);
|
||||
@ -741,7 +741,7 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
|
||||
/* It seems to be appropriate one, then use the matcher. */
|
||||
/* We assume that the matching starts from 0. */
|
||||
mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
|
||||
match_last = check_matching (preg, &mctx, 0, fl_longest_match);
|
||||
match_last = check_matching (preg, &mctx, fl_longest_match);
|
||||
if (match_last != -1)
|
||||
{
|
||||
if (BE (match_last == -2, 0))
|
||||
@ -919,8 +919,8 @@ acquire_init_state_context (err, preg, mctx, idx)
|
||||
if (dfa->init_state->has_constraint)
|
||||
{
|
||||
unsigned int context;
|
||||
context = re_string_context_at (mctx->input, idx - 1, mctx->eflags,
|
||||
preg->newline_anchor);
|
||||
context = re_string_context_at (mctx->input, idx - 1, mctx->eflags,
|
||||
preg->newline_anchor);
|
||||
if (IS_WORD_CONTEXT (context))
|
||||
return dfa->init_state_word;
|
||||
else if (IS_ORDINARY_CONTEXT (context))
|
||||
@ -947,16 +947,15 @@ acquire_init_state_context (err, preg, mctx, idx)
|
||||
/* Check whether the regular expression match input string INPUT or not,
|
||||
and return the index where the matching end, return -1 if not match,
|
||||
or return -2 in case of an error.
|
||||
FL_SEARCH means we must search where the matching starts,
|
||||
FL_LONGEST_MATCH means we want the POSIX longest matching.
|
||||
Note that the matcher assume that the maching starts from the current
|
||||
index of the buffer. */
|
||||
|
||||
static int
|
||||
check_matching (preg, mctx, fl_search, fl_longest_match)
|
||||
check_matching (preg, mctx, fl_longest_match)
|
||||
const regex_t *preg;
|
||||
re_match_context_t *mctx;
|
||||
int fl_search, fl_longest_match;
|
||||
int fl_longest_match;
|
||||
{
|
||||
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
|
||||
reg_errcode_t err;
|
||||
@ -1006,31 +1005,15 @@ check_matching (preg, mctx, fl_search, fl_longest_match)
|
||||
|
||||
while (!re_string_eoi (mctx->input))
|
||||
{
|
||||
cur_state = transit_state (&err, preg, mctx, cur_state,
|
||||
fl_search && !match);
|
||||
cur_state = transit_state (&err, preg, mctx, cur_state);
|
||||
if (cur_state == NULL) /* Reached at the invalid state or an error. */
|
||||
{
|
||||
cur_str_idx = re_string_cur_idx (mctx->input);
|
||||
if (BE (err != REG_NOERROR, 0))
|
||||
return -2;
|
||||
if (fl_search && !match)
|
||||
{
|
||||
/* Restart from initial state, since we are searching
|
||||
the point from where matching start. */
|
||||
#ifdef RE_ENABLE_I18N
|
||||
if (dfa->mb_cur_max == 1
|
||||
|| re_string_first_byte (mctx->input, cur_str_idx))
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
cur_state = acquire_init_state_context (&err, preg, mctx,
|
||||
cur_str_idx);
|
||||
if (BE (cur_state == NULL && err != REG_NOERROR, 0))
|
||||
return -2;
|
||||
if (mctx->state_log != NULL)
|
||||
mctx->state_log[cur_str_idx] = cur_state;
|
||||
}
|
||||
else if (!fl_longest_match && match)
|
||||
if (!fl_longest_match && match)
|
||||
break;
|
||||
else /* (fl_longest_match && match) || (!fl_search && !match) */
|
||||
else
|
||||
{
|
||||
if (mctx->state_log == NULL)
|
||||
break;
|
||||
@ -2069,12 +2052,11 @@ sift_states_iter_mb (preg, mctx, sctx, node_idx, str_idx, max_str_idx)
|
||||
update the destination of STATE_LOG. */
|
||||
|
||||
static re_dfastate_t *
|
||||
transit_state (err, preg, mctx, state, fl_search)
|
||||
transit_state (err, preg, mctx, state)
|
||||
reg_errcode_t *err;
|
||||
const regex_t *preg;
|
||||
re_match_context_t *mctx;
|
||||
re_dfastate_t *state;
|
||||
int fl_search;
|
||||
{
|
||||
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
|
||||
re_dfastate_t **trtable, *next_state;
|
||||
@ -2113,24 +2095,40 @@ transit_state (err, preg, mctx, state, fl_search)
|
||||
{
|
||||
/* Use transition table */
|
||||
ch = re_string_fetch_byte (mctx->input);
|
||||
trtable = fl_search ? state->trtable_search : state->trtable;
|
||||
trtable = state->trtable;
|
||||
if (trtable == NULL)
|
||||
{
|
||||
trtable = build_trtable (preg, state, fl_search);
|
||||
if (fl_search)
|
||||
state->trtable_search = trtable;
|
||||
else
|
||||
state->trtable = trtable;
|
||||
trtable = build_trtable (preg, state);
|
||||
if (trtable == NULL)
|
||||
{
|
||||
*err = REG_ESPACE;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
next_state = trtable[ch];
|
||||
if (BE (state->word_trtable, 0))
|
||||
{
|
||||
unsigned int context;
|
||||
context
|
||||
= re_string_context_at (mctx->input,
|
||||
re_string_cur_idx (mctx->input) - 1,
|
||||
mctx->eflags, preg->newline_anchor);
|
||||
if (IS_WORD_CONTEXT (context))
|
||||
next_state = trtable[ch + SBC_MAX];
|
||||
else
|
||||
next_state = trtable[ch];
|
||||
}
|
||||
else
|
||||
next_state = trtable[ch];
|
||||
}
|
||||
#if 0
|
||||
else
|
||||
{
|
||||
/* don't use transition table */
|
||||
next_state = transit_state_sb (err, preg, state, fl_search, mctx);
|
||||
next_state = transit_state_sb (err, preg, state, mctx);
|
||||
if (BE (next_state == NULL && err != REG_NOERROR, 0))
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
cur_idx = re_string_cur_idx (mctx->input);
|
||||
@ -2242,15 +2240,15 @@ check_subexp_matching_top (dfa, mctx, cur_nodes, str_idx)
|
||||
return REG_NOERROR;
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* Return the next state to which the current state STATE will transit by
|
||||
accepting the current input byte. */
|
||||
|
||||
static re_dfastate_t *
|
||||
transit_state_sb (err, preg, state, fl_search, mctx)
|
||||
transit_state_sb (err, preg, state, mctx)
|
||||
reg_errcode_t *err;
|
||||
const regex_t *preg;
|
||||
re_dfastate_t *state;
|
||||
int fl_search;
|
||||
re_match_context_t *mctx;
|
||||
{
|
||||
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
|
||||
@ -2276,29 +2274,6 @@ transit_state_sb (err, preg, state, fl_search, mctx)
|
||||
}
|
||||
}
|
||||
}
|
||||
if (fl_search)
|
||||
{
|
||||
#ifdef RE_ENABLE_I18N
|
||||
int not_initial = 0;
|
||||
if (dfa->mb_cur_max > 1)
|
||||
for (node_cnt = 0; node_cnt < next_nodes.nelem; ++node_cnt)
|
||||
if (dfa->nodes[next_nodes.elems[node_cnt]].type == CHARACTER)
|
||||
{
|
||||
not_initial = dfa->nodes[next_nodes.elems[node_cnt]].mb_partial;
|
||||
break;
|
||||
}
|
||||
if (!not_initial)
|
||||
#endif
|
||||
{
|
||||
*err = re_node_set_merge (&next_nodes,
|
||||
dfa->init_state->entrance_nodes);
|
||||
if (BE (*err != REG_NOERROR, 0))
|
||||
{
|
||||
re_node_set_free (&next_nodes);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
context = re_string_context_at (mctx->input, cur_str_idx, mctx->eflags,
|
||||
preg->newline_anchor);
|
||||
next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
|
||||
@ -2309,6 +2284,7 @@ transit_state_sb (err, preg, state, fl_search, mctx)
|
||||
re_string_skip_bytes (mctx->input, 1);
|
||||
return next_state;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef RE_ENABLE_I18N
|
||||
static reg_errcode_t
|
||||
@ -3117,10 +3093,9 @@ expand_bkref_cache (preg, mctx, cur_nodes, cur_str, last_str, subexp_num,
|
||||
Return the new table if succeeded, otherwise return NULL. */
|
||||
|
||||
static re_dfastate_t **
|
||||
build_trtable (preg, state, fl_search)
|
||||
build_trtable (preg, state)
|
||||
const regex_t *preg;
|
||||
const re_dfastate_t *state;
|
||||
int fl_search;
|
||||
re_dfastate_t *state;
|
||||
{
|
||||
reg_errcode_t err;
|
||||
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
|
||||
@ -3154,6 +3129,7 @@ build_trtable (preg, state, fl_search)
|
||||
|
||||
/* Initialize transiton table. */
|
||||
trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
|
||||
state->word_trtable = 0;
|
||||
if (BE (trtable == NULL, 0))
|
||||
{
|
||||
if (dests_node_malloced)
|
||||
@ -3170,7 +3146,10 @@ build_trtable (preg, state, fl_search)
|
||||
free (dests_node);
|
||||
/* Return NULL in case of an error, trtable otherwise. */
|
||||
if (ndests == 0)
|
||||
return trtable;
|
||||
{
|
||||
state->trtable = trtable;
|
||||
return trtable;
|
||||
}
|
||||
free (trtable);
|
||||
return NULL;
|
||||
}
|
||||
@ -3224,26 +3203,6 @@ out_free:
|
||||
goto out_free;
|
||||
}
|
||||
}
|
||||
/* If search flag is set, merge the initial state. */
|
||||
if (fl_search)
|
||||
{
|
||||
#ifdef RE_ENABLE_I18N
|
||||
int not_initial = 0;
|
||||
for (j = 0; j < follows.nelem; ++j)
|
||||
if (dfa->nodes[follows.elems[j]].type == CHARACTER)
|
||||
{
|
||||
not_initial = dfa->nodes[follows.elems[j]].mb_partial;
|
||||
break;
|
||||
}
|
||||
if (!not_initial)
|
||||
#endif
|
||||
{
|
||||
err = re_node_set_merge (&follows,
|
||||
dfa->init_state->entrance_nodes);
|
||||
if (BE (err != REG_NOERROR, 0))
|
||||
goto out_free;
|
||||
}
|
||||
}
|
||||
dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
|
||||
if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
|
||||
goto out_free;
|
||||
@ -3274,31 +3233,41 @@ out_free:
|
||||
for (j = 0; j < UINT_BITS; ++j, ++ch)
|
||||
if ((acceptable[i] >> j) & 1)
|
||||
{
|
||||
/* The current state accepts the character ch. */
|
||||
if (IS_WORD_CHAR (ch))
|
||||
{
|
||||
for (k = 0; k < ndests; ++k)
|
||||
if ((dests_ch[k][i] >> j) & 1)
|
||||
for (k = 0; k < ndests; ++k)
|
||||
if ((dests_ch[k][i] >> j) & 1)
|
||||
{
|
||||
/* k-th destination accepts the word character ch. */
|
||||
if (state->word_trtable)
|
||||
{
|
||||
/* k-th destination accepts the word character ch. */
|
||||
trtable[ch] = dest_states_word[k];
|
||||
/* There must be only one destination which accepts
|
||||
character ch. See group_nodes_into_DFAstates. */
|
||||
break;
|
||||
}
|
||||
}
|
||||
else /* not WORD_CHAR */
|
||||
{
|
||||
for (k = 0; k < ndests; ++k)
|
||||
if ((dests_ch[k][i] >> j) & 1)
|
||||
{
|
||||
/* k-th destination accepts the non-word character ch. */
|
||||
trtable[ch] = dest_states[k];
|
||||
/* There must be only one destination which accepts
|
||||
character ch. See group_nodes_into_DFAstates. */
|
||||
break;
|
||||
trtable[ch + SBC_MAX] = dest_states_word[k];
|
||||
}
|
||||
}
|
||||
else if (dfa->mb_cur_max > 1
|
||||
&& dest_states[k] != dest_states_word[k])
|
||||
{
|
||||
re_dfastate_t **new_trtable;
|
||||
|
||||
new_trtable = (re_dfastate_t **)
|
||||
realloc (trtable,
|
||||
sizeof (re_dfastate_t *)
|
||||
* 2 * SBC_MAX);
|
||||
if (BE (new_trtable == NULL, 0))
|
||||
goto out_free;
|
||||
memcpy (new_trtable + SBC_MAX, new_trtable,
|
||||
sizeof (re_dfastate_t *) * SBC_MAX);
|
||||
trtable = new_trtable;
|
||||
state->word_trtable = 1;
|
||||
trtable[ch] = dest_states[k];
|
||||
trtable[ch + SBC_MAX] = dest_states_word[k];
|
||||
}
|
||||
else if (IS_WORD_CHAR (ch))
|
||||
trtable[ch] = dest_states_word[k];
|
||||
else
|
||||
trtable[ch] = dest_states[k];
|
||||
/* There must be only one destination which accepts
|
||||
character ch. See group_nodes_into_DFAstates. */
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* new line */
|
||||
if (bitset_contain (acceptable, NEWLINE_CHAR))
|
||||
@ -3309,6 +3278,8 @@ out_free:
|
||||
{
|
||||
/* k-th destination accepts newline character. */
|
||||
trtable[NEWLINE_CHAR] = dest_states_nl[k];
|
||||
if (state->word_trtable)
|
||||
trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[k];
|
||||
/* There must be only one destination which accepts
|
||||
newline. See group_nodes_into_DFAstates. */
|
||||
break;
|
||||
@ -3325,6 +3296,7 @@ out_free:
|
||||
if (dests_node_malloced)
|
||||
free (dests_node);
|
||||
|
||||
state->trtable = trtable;
|
||||
return trtable;
|
||||
}
|
||||
|
||||
@ -3386,6 +3358,8 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
|
||||
match it the context. */
|
||||
if (constraint)
|
||||
{
|
||||
int word_char_max;
|
||||
|
||||
if (constraint & NEXT_NEWLINE_CONSTRAINT)
|
||||
{
|
||||
int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
|
||||
@ -3400,11 +3374,16 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
|
||||
bitset_empty (accepts);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* This assumes ASCII compatible locale. We cannot say
|
||||
anything about the non-ascii chars. */
|
||||
word_char_max
|
||||
= dfa->mb_cur_max > 1 ? BITSET_UINTS / 2 : BITSET_UINTS;
|
||||
if (constraint & NEXT_WORD_CONSTRAINT)
|
||||
for (j = 0; j < BITSET_UINTS; ++j)
|
||||
for (j = 0; j < word_char_max; ++j)
|
||||
accepts[j] &= dfa->word_char[j];
|
||||
if (constraint & NEXT_NOTWORD_CONSTRAINT)
|
||||
for (j = 0; j < BITSET_UINTS; ++j)
|
||||
for (j = 0; j < word_char_max; ++j)
|
||||
accepts[j] &= ~dfa->word_char[j];
|
||||
}
|
||||
|
||||
|
@ -350,16 +350,28 @@ mb_tests (const char *pattern, int cflags, const char *string, int eflags,
|
||||
if (strstr (pattern, "[:xdigit:]"))
|
||||
return 0;
|
||||
|
||||
/* XXX: regex ATM handles only single byte equivalence classes. */
|
||||
if (strstr (pattern, "[[=b=]]"))
|
||||
return 0;
|
||||
|
||||
for (i = 1; i < 16; ++i)
|
||||
{
|
||||
char *p = letters;
|
||||
if (i & 1)
|
||||
if ((i & 1)
|
||||
&& (strchr (pattern, 'a') || strchr (string, 'a')
|
||||
|| strchr (pattern, 'A') || strchr (string, 'A')))
|
||||
*p++ = 'a', *p++ = 'A';
|
||||
if (i & 2)
|
||||
if ((i & 2)
|
||||
&& (strchr (pattern, 'b') || strchr (string, 'b')
|
||||
|| strchr (pattern, 'B') || strchr (string, 'B')))
|
||||
*p++ = 'b', *p++ = 'B';
|
||||
if (i & 4)
|
||||
if ((i & 4)
|
||||
&& (strchr (pattern, 'c') || strchr (string, 'c')
|
||||
|| strchr (pattern, 'C') || strchr (string, 'C')))
|
||||
*p++ = 'c', *p++ = 'C';
|
||||
if (i & 8)
|
||||
if ((i & 8)
|
||||
&& (strchr (pattern, 'd') || strchr (string, 'd')
|
||||
|| strchr (pattern, 'D') || strchr (string, 'D')))
|
||||
*p++ = 'd', *p++ = 'D';
|
||||
*p++ = '\0';
|
||||
sprintf (fail, "UTF-8 %s FAIL", letters);
|
||||
@ -489,7 +501,11 @@ main (int argc, char **argv)
|
||||
replace_special_chars (matches);
|
||||
}
|
||||
|
||||
setlocale (LC_ALL, "C");
|
||||
if (setlocale (LC_ALL, "C") == NULL)
|
||||
{
|
||||
puts ("setlocale C failed");
|
||||
ret = 1;
|
||||
}
|
||||
if (test (pattern, cflags, string, eflags, expect, matches, "FAIL")
|
||||
|| (try_bre_ere
|
||||
&& test (pattern, cflags & ~REG_EXTENDED, string, eflags,
|
||||
@ -497,12 +513,16 @@ main (int argc, char **argv)
|
||||
ret = 1;
|
||||
else if (test_utf8)
|
||||
{
|
||||
setlocale (LC_ALL, "cs_CZ.UTF-8");
|
||||
if (test (pattern, cflags, string, eflags, expect, matches,
|
||||
"UTF-8 FAIL")
|
||||
|| (try_bre_ere
|
||||
&& test (pattern, cflags & ~REG_EXTENDED, string, eflags,
|
||||
expect, matches, "UTF-8 FAIL")))
|
||||
if (setlocale (LC_ALL, "cs_CZ.UTF-8") == NULL)
|
||||
{
|
||||
puts ("setlocale cs_CZ.UTF-8 failed");
|
||||
ret = 1;
|
||||
}
|
||||
else if (test (pattern, cflags, string, eflags, expect, matches,
|
||||
"UTF-8 FAIL")
|
||||
|| (try_bre_ere
|
||||
&& test (pattern, cflags & ~REG_EXTENDED, string,
|
||||
eflags, expect, matches, "UTF-8 FAIL")))
|
||||
ret = 1;
|
||||
else if (mb_tests (pattern, cflags, string, eflags, expect, matches)
|
||||
|| (try_bre_ere
|
||||
|
Loading…
Reference in New Issue
Block a user