(build_trtable): Don't allocate the trtable until state->word_trtable is known. Don't hardcode UINT_BITS iterations on each bitset item.

(match_ctx_init, match_ctx_clean,
match_ctx_free, match_ctx_free_subtops,
match_ctx_add_entry, search_cur_bkref_entry,
match_ctx_clear_flag, match_ctx_add_subtop,
match_ctx_add_sublast, sift_ctx_init,
re_search_internal, re_search_2_stub, re_search_stub,
re_copy_regs, acquire_init_state_context,
prune_impossible_nodes, check_matching,
check_halt_node_context, check_halt_state_context
update_regs, proceed_next_node, push_fail_stack,
pop_fail_stack, set_regs, free_fail_stack_return,
sift_states_iter_mb, sift_states_backward
update_cur_sifted_state, add_epsilon_src_nodes,
sub_epsilon_src_nodes, check_dst_limits,
check_dst_limits_calc_pos, check_subexp_limits,
sift_states_bkref, clean_state_log_if_need,
merge_state_array, transit_state,
check_subexp_matching_top, transit_state_sb,
transit_state_mb, transit_state_bkref,
get_subexp, get_subexp_sub, find_subexp_node,
check_arrival, check_arrival_add_next_nodes,
find_collation_sequence_value, check_arrival_expand_ecl,
check_arrival_expand_ecl_sub, expand_bkref_cache,
build_trtable, check_node_accept_bytes, extend_buffers,
group_nodes_into_DFAstates, check_node_accept): Likewise.
This commit is contained in:
Ulrich Drepper 2003-12-23 01:43:19 +00:00
parent d3e4ed994c
commit 3ce12656a8
1 changed files with 128 additions and 113 deletions

View File

@ -19,176 +19,176 @@
02111-1307 USA. */
static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
re_string_t *input, int n);
static void match_ctx_clean (re_match_context_t *mctx);
static void match_ctx_free (re_match_context_t *cache);
static void match_ctx_free_subtops (re_match_context_t *mctx);
re_string_t *input, int n) internal_function;
static void match_ctx_clean (re_match_context_t *mctx) internal_function;
static void match_ctx_free (re_match_context_t *cache) internal_function;
static void match_ctx_free_subtops (re_match_context_t *mctx) internal_function;
static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
int str_idx, int from, int to);
static int search_cur_bkref_entry (re_match_context_t *mctx, int str_idx);
static void match_ctx_clear_flag (re_match_context_t *mctx);
int str_idx, int from, int to) internal_function;
static int search_cur_bkref_entry (re_match_context_t *mctx, int str_idx) internal_function;
static void match_ctx_clear_flag (re_match_context_t *mctx) internal_function;
static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
int str_idx);
int str_idx) internal_function;
static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
int node, int str_idx);
int node, int str_idx) internal_function;
static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
re_dfastate_t **limited_sts, int last_node,
int last_str_idx, int check_subexp);
int last_str_idx, int check_subexp) internal_function;
static reg_errcode_t re_search_internal (const regex_t *preg,
const char *string, int length,
int start, int range, int stop,
size_t nmatch, regmatch_t pmatch[],
int eflags);
int eflags) internal_function;
static int re_search_2_stub (struct re_pattern_buffer *bufp,
const char *string1, int length1,
const char *string2, int length2,
int start, int range, struct re_registers *regs,
int stop, int ret_len);
int stop, int ret_len) internal_function;
static int re_search_stub (struct re_pattern_buffer *bufp,
const char *string, int length, int start,
int range, int stop, struct re_registers *regs,
int ret_len);
int ret_len) internal_function;
static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
int nregs, int regs_allocated);
int nregs, int regs_allocated) internal_function;
static inline re_dfastate_t *acquire_init_state_context
(reg_errcode_t *err, const regex_t *preg, const re_match_context_t *mctx,
int idx) __attribute ((always_inline));
int idx) __attribute ((always_inline)) internal_function;
static reg_errcode_t prune_impossible_nodes (const regex_t *preg,
re_match_context_t *mctx);
re_match_context_t *mctx) internal_function;
static int check_matching (const regex_t *preg, re_match_context_t *mctx,
int fl_longest_match);
int fl_longest_match) internal_function;
static int check_halt_node_context (const re_dfa_t *dfa, int node,
unsigned int context);
unsigned int context) internal_function;
static int check_halt_state_context (const regex_t *preg,
const re_dfastate_t *state,
const re_match_context_t *mctx, int idx);
const re_match_context_t *mctx, int idx) internal_function;
static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch, int cur_node,
int cur_idx, int nmatch);
int cur_idx, int nmatch) internal_function;
static int proceed_next_node (const regex_t *preg, int nregs, regmatch_t *regs,
const re_match_context_t *mctx,
int *pidx, int node, re_node_set *eps_via_nodes,
struct re_fail_stack_t *fs);
struct re_fail_stack_t *fs) internal_function;
static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
int str_idx, int *dests, int nregs,
regmatch_t *regs,
re_node_set *eps_via_nodes);
re_node_set *eps_via_nodes) internal_function;
static int pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
regmatch_t *regs, re_node_set *eps_via_nodes);
regmatch_t *regs, re_node_set *eps_via_nodes) internal_function;
static reg_errcode_t set_regs (const regex_t *preg,
const re_match_context_t *mctx,
size_t nmatch, regmatch_t *pmatch,
int fl_backtrack);
static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
int fl_backtrack) internal_function;
static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) internal_function;
#ifdef RE_ENABLE_I18N
static int sift_states_iter_mb (const regex_t *preg,
const re_match_context_t *mctx,
re_sift_context_t *sctx,
int node_idx, int str_idx, int max_str_idx);
int node_idx, int str_idx, int max_str_idx) internal_function;
#endif /* RE_ENABLE_I18N */
static reg_errcode_t sift_states_backward (const regex_t *preg,
re_match_context_t *mctx,
re_sift_context_t *sctx);
re_sift_context_t *sctx) internal_function;
static reg_errcode_t update_cur_sifted_state (const regex_t *preg,
re_match_context_t *mctx,
re_sift_context_t *sctx,
int str_idx,
re_node_set *dest_nodes);
re_node_set *dest_nodes) internal_function;
static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa,
re_node_set *dest_nodes,
const re_node_set *candidates);
const re_node_set *candidates) internal_function;
static reg_errcode_t sub_epsilon_src_nodes (re_dfa_t *dfa, int node,
re_node_set *dest_nodes,
const re_node_set *and_nodes);
const re_node_set *and_nodes) internal_function;
static int check_dst_limits (re_dfa_t *dfa, re_node_set *limits,
re_match_context_t *mctx, int dst_node,
int dst_idx, int src_node, int src_idx);
int dst_idx, int src_node, int src_idx) internal_function;
static int check_dst_limits_calc_pos (re_dfa_t *dfa, re_match_context_t *mctx,
int limit, re_node_set *eclosures,
int subexp_idx, int node, int str_idx);
int subexp_idx, int node, int str_idx) internal_function;
static reg_errcode_t check_subexp_limits (re_dfa_t *dfa,
re_node_set *dest_nodes,
const re_node_set *candidates,
re_node_set *limits,
struct re_backref_cache_entry *bkref_ents,
int str_idx);
int str_idx) internal_function;
static reg_errcode_t sift_states_bkref (const regex_t *preg,
re_match_context_t *mctx,
re_sift_context_t *sctx,
int str_idx, re_node_set *dest_nodes);
int str_idx, re_node_set *dest_nodes) internal_function;
static reg_errcode_t clean_state_log_if_need (re_match_context_t *mctx,
int next_state_log_idx);
int next_state_log_idx) internal_function;
static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst,
re_dfastate_t **src, int num);
re_dfastate_t **src, int num) internal_function;
static re_dfastate_t *transit_state (reg_errcode_t *err, const regex_t *preg,
re_match_context_t *mctx,
re_dfastate_t *state);
re_dfastate_t *state) internal_function;
static reg_errcode_t check_subexp_matching_top (re_dfa_t *dfa,
re_match_context_t *mctx,
re_node_set *cur_nodes,
int str_idx);
int str_idx) internal_function;
#if 0
static re_dfastate_t *transit_state_sb (reg_errcode_t *err, const regex_t *preg,
re_dfastate_t *pstate,
re_match_context_t *mctx);
re_match_context_t *mctx) internal_function;
#endif
#ifdef RE_ENABLE_I18N
static reg_errcode_t transit_state_mb (const regex_t *preg,
re_dfastate_t *pstate,
re_match_context_t *mctx);
re_match_context_t *mctx) internal_function;
#endif /* RE_ENABLE_I18N */
static reg_errcode_t transit_state_bkref (const regex_t *preg,
const re_node_set *nodes,
re_match_context_t *mctx);
re_match_context_t *mctx) internal_function;
static reg_errcode_t get_subexp (const regex_t *preg, re_match_context_t *mctx,
int bkref_node, int bkref_str_idx);
int bkref_node, int bkref_str_idx) internal_function;
static reg_errcode_t get_subexp_sub (const regex_t *preg,
re_match_context_t *mctx,
const re_sub_match_top_t *sub_top,
re_sub_match_last_t *sub_last,
int bkref_node, int bkref_str);
int bkref_node, int bkref_str) internal_function;
static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
int subexp_idx, int type);
int subexp_idx, int type) internal_function;
static reg_errcode_t check_arrival (const regex_t *preg,
re_match_context_t *mctx,
state_array_t *path, int top_node,
int top_str, int last_node, int last_str,
int type);
int type) internal_function;
static reg_errcode_t check_arrival_add_next_nodes (const regex_t *preg,
re_dfa_t *dfa,
re_match_context_t *mctx,
int str_idx,
re_node_set *cur_nodes,
re_node_set *next_nodes);
re_node_set *next_nodes) internal_function;
static reg_errcode_t check_arrival_expand_ecl (re_dfa_t *dfa,
re_node_set *cur_nodes,
int ex_subexp, int type);
int ex_subexp, int type) internal_function;
static reg_errcode_t check_arrival_expand_ecl_sub (re_dfa_t *dfa,
re_node_set *dst_nodes,
int target, int ex_subexp,
int type);
int type) internal_function;
static reg_errcode_t expand_bkref_cache (const regex_t *preg,
re_match_context_t *mctx,
re_node_set *cur_nodes, int cur_str,
int last_str, int subexp_num,
int type);
int type) internal_function;
static re_dfastate_t **build_trtable (const regex_t *dfa,
re_dfastate_t *state);
re_dfastate_t *state) internal_function;
#ifdef RE_ENABLE_I18N
static int check_node_accept_bytes (const regex_t *preg, int node_idx,
const re_string_t *input, int idx);
const re_string_t *input, int idx) internal_function;
# ifdef _LIBC
static unsigned int find_collation_sequence_value (const unsigned char *mbs,
size_t name_len);
size_t name_len) internal_function;
# endif /* _LIBC */
#endif /* RE_ENABLE_I18N */
static int group_nodes_into_DFAstates (const regex_t *dfa,
const re_dfastate_t *state,
re_node_set *states_node,
bitset *states_ch);
bitset *states_ch) internal_function;
static int check_node_accept (const regex_t *preg, const re_token_t *node,
const re_match_context_t *mctx, int idx);
static reg_errcode_t extend_buffers (re_match_context_t *mctx);
const re_match_context_t *mctx, int idx) internal_function;
static reg_errcode_t extend_buffers (re_match_context_t *mctx) internal_function;
/* Entry point for POSIX code. */
@ -3132,7 +3132,8 @@ build_trtable (preg, state)
{
reg_errcode_t err;
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
int i, j, k, ch;
int i, j, ch;
unsigned int elem, mask;
int dests_node_malloced = 0, dest_states_malloced = 0;
int ndests; /* Number of the destination states from `state'. */
re_dfastate_t **trtable;
@ -3161,14 +3162,7 @@ build_trtable (preg, state)
dests_ch = (bitset *) (dests_node + SBC_MAX);
/* Initialize transiton table. */
trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
state->word_trtable = 0;
if (BE (trtable == NULL, 0))
{
if (dests_node_malloced)
free (dests_node);
return NULL;
}
/* At first, group all nodes belonging to `state' into several
destinations. */
@ -3180,10 +3174,10 @@ build_trtable (preg, state)
/* Return NULL in case of an error, trtable otherwise. */
if (ndests == 0)
{
state->trtable = trtable;
return trtable;
state->trtable = (re_dfastate_t **)
calloc (sizeof (re_dfastate_t *), SBC_MAX);;
return state->trtable;
}
free (trtable);
return NULL;
}
@ -3209,7 +3203,6 @@ out_free:
re_node_set_free (&follows);
for (i = 0; i < ndests; ++i)
re_node_set_free (dests_node + i);
free (trtable);
if (dests_node_malloced)
free (dests_node);
return NULL;
@ -3247,11 +3240,16 @@ out_free:
CONTEXT_WORD);
if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
goto out_free;
if (dest_states[i] != dest_states_word[i]
&& dfa->mb_cur_max > 1)
state->word_trtable = 1;
dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
CONTEXT_NEWLINE);
if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
goto out_free;
}
}
else
{
dest_states_word[i] = dest_states[i];
@ -3260,59 +3258,76 @@ out_free:
bitset_merge (acceptable, dests_ch[i]);
}
/* Update the transition table. */
/* For all characters ch...: */
for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
for (j = 0; j < UINT_BITS; ++j, ++ch)
if ((acceptable[i] >> j) & 1)
{
for (k = 0; k < ndests; ++k)
if ((dests_ch[k][i] >> j) & 1)
{
/* k-th destination accepts the word character ch. */
if (state->word_trtable)
{
trtable[ch] = dest_states[k];
trtable[ch + SBC_MAX] = dest_states_word[k];
}
else if (dfa->mb_cur_max > 1
&& dest_states[k] != dest_states_word[k])
{
re_dfastate_t **new_trtable;
if (!BE (state->word_trtable, 0))
{
/* We don't care about whether the following character is a word
character, or we are in a single-byte character set so we can
discern by looking at the character code: allocate a
256-entry transition table. */
trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
if (BE (trtable == NULL, 0))
goto out_free;
new_trtable = (re_dfastate_t **)
realloc (trtable,
sizeof (re_dfastate_t *)
* 2 * SBC_MAX);
if (BE (new_trtable == NULL, 0))
goto out_free;
memcpy (new_trtable + SBC_MAX, new_trtable,
sizeof (re_dfastate_t *) * SBC_MAX);
trtable = new_trtable;
state->word_trtable = 1;
trtable[ch] = dest_states[k];
trtable[ch + SBC_MAX] = dest_states_word[k];
}
else if (IS_WORD_CHAR (ch))
trtable[ch] = dest_states_word[k];
else
trtable[ch] = dest_states[k];
/* There must be only one destination which accepts
character ch. See group_nodes_into_DFAstates. */
break;
}
}
/* For all characters ch...: */
for (i = 0; i < BITSET_UINTS; ++i)
for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
elem;
mask <<= 1, elem >>= 1, ++ch)
if (BE (elem & 1, 0))
{
/* There must be exactly one destination which accepts
character ch. See group_nodes_into_DFAstates. */
for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
;
/* j-th destination accepts the word character ch. */
if (IS_WORD_CHAR (ch))
trtable[ch] = dest_states_word[j];
else
trtable[ch] = dest_states[j];
}
}
else
{
/* We care about whether the following character is a word
character, and we are in a multi-byte character set: discern
by looking at the character code: build two 256-entry
transition tables, one starting at trtable[0] and one
starting at trtable[SBC_MAX]. */
trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *),
2 * SBC_MAX);
if (BE (trtable == NULL, 0))
goto out_free;
/* For all characters ch...: */
for (i = 0; i < BITSET_UINTS; ++i)
for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
elem;
mask <<= 1, elem >>= 1, ++ch)
if (BE (elem & 1, 0))
{
/* There must be exactly one destination which accepts
character ch. See group_nodes_into_DFAstates. */
for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
;
/* j-th destination accepts the word character ch. */
trtable[ch] = dest_states[j];
trtable[ch + SBC_MAX] = dest_states_word[j];
}
}
/* new line */
if (bitset_contain (acceptable, NEWLINE_CHAR))
{
/* The current state accepts newline character. */
for (k = 0; k < ndests; ++k)
if (bitset_contain (dests_ch[k], NEWLINE_CHAR))
for (j = 0; j < ndests; ++j)
if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
{
/* k-th destination accepts newline character. */
trtable[NEWLINE_CHAR] = dest_states_nl[k];
trtable[NEWLINE_CHAR] = dest_states_nl[j];
if (state->word_trtable)
trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[k];
trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
/* There must be only one destination which accepts
newline. See group_nodes_into_DFAstates. */
break;