Thompson matcher refactored.
2013-07-31 Tim Shen <timshen91@gmail.com> Thompson matcher refactored. Fix grouping problem. * include/bits/regex.h: Use a dispatcher _M_get_matcher(). * include/bits/regex_compiler.h: Tweak for auto switching. * include/bits/regex_grep_matcher.h: Class structure. * include/bits/regex_grep_matcher.tcc: _BFSMatcher(Thompson matcher) refactoring. * include/bits/regex_nfa.h: Change _Results's interfaces. * include/std/regex: Includes <map> and <queue>. * testsuite/28_regex/algorithms/regex_match/extended/53622.cc: For both matchers. * testsuite/28_regex/algorithms/regex_match/extended/57173.cc: For both matchers. * testsuite/28_regex/algorithms/regex_match/extended/ string_dispatch_01.cc: New. From-SVN: r201358
This commit is contained in:
parent
daaf92f822
commit
abf1719efe
@ -1,3 +1,20 @@
|
|||||||
|
2013-07-31 Tim Shen <timshen91@gmail.com>
|
||||||
|
|
||||||
|
Thompson matcher refactored. Fix grouping problem.
|
||||||
|
* include/bits/regex.h: Use a dispatcher _M_get_matcher().
|
||||||
|
* include/bits/regex_compiler.h: Tweak for auto switching.
|
||||||
|
* include/bits/regex_grep_matcher.h: Class structure.
|
||||||
|
* include/bits/regex_grep_matcher.tcc: _BFSMatcher(Thompson
|
||||||
|
matcher) refactoring.
|
||||||
|
* include/bits/regex_nfa.h: Change _Results's interfaces.
|
||||||
|
* include/std/regex: Includes <map> and <queue>.
|
||||||
|
* testsuite/28_regex/algorithms/regex_match/extended/53622.cc:
|
||||||
|
For both matchers.
|
||||||
|
* testsuite/28_regex/algorithms/regex_match/extended/57173.cc:
|
||||||
|
For both matchers.
|
||||||
|
* testsuite/28_regex/algorithms/regex_match/extended/
|
||||||
|
string_dispatch_01.cc: New.
|
||||||
|
|
||||||
2013-07-30 Paolo Carlini <paolo.carlini@oracle.com>
|
2013-07-30 Paolo Carlini <paolo.carlini@oracle.com>
|
||||||
|
|
||||||
Revert last commit.
|
Revert last commit.
|
||||||
|
@ -2184,8 +2184,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||||||
__detail::_Automaton::_SizeT __sz = __a->_M_sub_count();
|
__detail::_Automaton::_SizeT __sz = __a->_M_sub_count();
|
||||||
__detail::_SpecializedCursor<_Bi_iter> __cs(__s, __e);
|
__detail::_SpecializedCursor<_Bi_iter> __cs(__s, __e);
|
||||||
__detail::_SpecializedResults<_Bi_iter, _Alloc> __r(__sz, __cs, __m);
|
__detail::_SpecializedResults<_Bi_iter, _Alloc> __r(__sz, __cs, __m);
|
||||||
__detail::_Grep_matcher __matcher(__cs, __r, __a, __flags);
|
return __a->_M_get_matcher(__cs, __r, __a, __flags)->_M_match();
|
||||||
return __matcher._M_dfs_match();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -2336,8 +2335,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||||||
for (auto __cur = __first; __cur != __last; ++__cur) // Any KMP-like algo?
|
for (auto __cur = __first; __cur != __last; ++__cur) // Any KMP-like algo?
|
||||||
{
|
{
|
||||||
__detail::_SpecializedCursor<_Bi_iter> __curs(__cur, __last);
|
__detail::_SpecializedCursor<_Bi_iter> __curs(__cur, __last);
|
||||||
__detail::_Grep_matcher __matcher(__curs, __r, __a, __flags);
|
auto __matcher = __a->_M_get_matcher(__curs, __r, __a, __flags);
|
||||||
if (__matcher._M_dfs_search_from_first())
|
if (__matcher->_M_search_from_first())
|
||||||
{
|
{
|
||||||
__r._M_set_range(__m.size(),
|
__r._M_set_range(__m.size(),
|
||||||
__detail::_SpecializedCursor<_Bi_iter>
|
__detail::_SpecializedCursor<_Bi_iter>
|
||||||
|
@ -936,7 +936,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||||||
if (_M_match_token(_ScannerT::_S_token_backref))
|
if (_M_match_token(_ScannerT::_S_token_backref))
|
||||||
{
|
{
|
||||||
// __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value);
|
// __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value);
|
||||||
return true;
|
_M_state_store._M_set_back_ref(true);
|
||||||
|
//return true;
|
||||||
}
|
}
|
||||||
if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
|
if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
|
||||||
{
|
{
|
||||||
|
@ -60,6 +60,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||||||
const _SpecializedCursor<_FwdIterT>& __cursor,
|
const _SpecializedCursor<_FwdIterT>& __cursor,
|
||||||
match_results<_FwdIterT, _Alloc>& __m);
|
match_results<_FwdIterT, _Alloc>& __m);
|
||||||
|
|
||||||
|
~_SpecializedResults()
|
||||||
|
{
|
||||||
|
if (_M_managed)
|
||||||
|
delete &_M_results;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
_SpecializedResults(const _SpecializedResults& __rhs)
|
||||||
|
: _M_results(*new match_results<_FwdIterT, _Alloc>(__rhs._M_results)),
|
||||||
|
_M_managed(true)
|
||||||
|
{ }
|
||||||
|
|
||||||
|
public:
|
||||||
void
|
void
|
||||||
_M_set_pos(int __i, int __j, const _PatternCursor& __pc);
|
_M_set_pos(int __i, int __j, const _PatternCursor& __pc);
|
||||||
|
|
||||||
@ -76,8 +89,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||||||
_M_set_matched(int __i, bool __is_matched)
|
_M_set_matched(int __i, bool __is_matched)
|
||||||
{ _M_results.at(__i).matched = __is_matched; }
|
{ _M_results.at(__i).matched = __is_matched; }
|
||||||
|
|
||||||
|
std::unique_ptr<_Results>
|
||||||
|
_M_clone() const
|
||||||
|
{ return unique_ptr<_Results>(new _SpecializedResults(*this)); }
|
||||||
|
|
||||||
|
void
|
||||||
|
_M_assign(const _Results& __rhs)
|
||||||
|
{
|
||||||
|
auto __r = static_cast<const _SpecializedResults*>(&__rhs);
|
||||||
|
_M_results = __r->_M_results;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
match_results<_FwdIterT, _Alloc>& _M_results;
|
match_results<_FwdIterT, _Alloc>& _M_results;
|
||||||
|
bool _M_managed;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename _FwdIterT, typename _Alloc>
|
template<typename _FwdIterT, typename _Alloc>
|
||||||
@ -85,7 +110,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||||||
_SpecializedResults(const _Automaton::_SizeT __size,
|
_SpecializedResults(const _Automaton::_SizeT __size,
|
||||||
const _SpecializedCursor<_FwdIterT>& __cursor,
|
const _SpecializedCursor<_FwdIterT>& __cursor,
|
||||||
match_results<_FwdIterT, _Alloc>& __m)
|
match_results<_FwdIterT, _Alloc>& __m)
|
||||||
: _M_results(__m)
|
: _M_results(__m), _M_managed(false)
|
||||||
{
|
{
|
||||||
_M_results.clear();
|
_M_results.clear();
|
||||||
_M_results.reserve(__size + 2);
|
_M_results.reserve(__size + 2);
|
||||||
@ -110,9 +135,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||||||
_M_results.at(__i).second = __c._M_pos();
|
_M_results.at(__i).second = __c._M_pos();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A stack of states used in evaluating the NFA.
|
|
||||||
typedef std::stack<_StateIdT, std::vector<_StateIdT> > _StateStack;
|
|
||||||
|
|
||||||
/// Executes a regular expression NFA/DFA over a range using a
|
/// Executes a regular expression NFA/DFA over a range using a
|
||||||
/// variant of the parallel execution algorithm featured in the grep
|
/// variant of the parallel execution algorithm featured in the grep
|
||||||
/// utility, modified to use Laurikari tags.
|
/// utility, modified to use Laurikari tags.
|
||||||
@ -124,45 +146,110 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||||||
const _AutomatonPtr& __automaton,
|
const _AutomatonPtr& __automaton,
|
||||||
regex_constants::match_flag_type __flags)
|
regex_constants::match_flag_type __flags)
|
||||||
: _M_nfa(static_pointer_cast<_Nfa>(__automaton)),
|
: _M_nfa(static_pointer_cast<_Nfa>(__automaton)),
|
||||||
_M_pattern(__p), _M_results(__r)
|
_M_str_cur(__p), _M_results(__r)
|
||||||
|
{ }
|
||||||
|
|
||||||
|
virtual
|
||||||
|
~_Grep_matcher()
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
// Set matched when string exactly match the pattern.
|
// Set matched when string exactly match the pattern.
|
||||||
void
|
virtual bool
|
||||||
_M_match();
|
_M_match() = 0;
|
||||||
|
|
||||||
// Set matched when some prefix of the string matches the pattern.
|
// Set matched when some prefix of the string matches the pattern.
|
||||||
void
|
virtual bool
|
||||||
_M_search_from_first();
|
_M_search_from_first() = 0;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
const std::shared_ptr<_Nfa> _M_nfa;
|
||||||
|
_PatternCursor& _M_str_cur;
|
||||||
|
_Results& _M_results;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Time complexity: exponential
|
||||||
|
// Space complexity: O(_M_str_cur.size())
|
||||||
|
// _M_dfs() take a state, along with current string cursor(_M_str_cur),
|
||||||
|
// trying to match current state with current character.
|
||||||
|
// Only _S_opcode_match will consume a character.
|
||||||
|
class _DFSMatcher
|
||||||
|
: public _Grep_matcher
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
_DFSMatcher(_PatternCursor& __p,
|
||||||
|
_Results& __r,
|
||||||
|
const _AutomatonPtr& __automaton,
|
||||||
|
regex_constants::match_flag_type __flags)
|
||||||
|
: _Grep_matcher(__p, __r, __automaton, __flags)
|
||||||
|
{ }
|
||||||
|
|
||||||
// TODO: in the future this function will be _M_match, in another class.
|
|
||||||
bool
|
bool
|
||||||
_M_dfs_match()
|
_M_match()
|
||||||
{ return _M_dfs<true>(_M_nfa->_M_start()); }
|
{ return _M_dfs<true>(_M_nfa->_M_start()); }
|
||||||
|
|
||||||
// TODO: in the future this function will be _M_search_from_first,
|
|
||||||
// in another class.
|
|
||||||
bool
|
bool
|
||||||
_M_dfs_search_from_first()
|
_M_search_from_first()
|
||||||
{ return _M_dfs<false>(_M_nfa->_M_start()); }
|
{ return _M_dfs<false>(_M_nfa->_M_start()); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
_StateSet
|
|
||||||
_M_e_closure(_StateIdT __i);
|
|
||||||
|
|
||||||
_StateSet
|
|
||||||
_M_e_closure(const _StateSet& __s);
|
|
||||||
|
|
||||||
_StateSet
|
|
||||||
_M_e_closure(_StateStack& __stack, const _StateSet& __s);
|
|
||||||
|
|
||||||
template<bool __match_mode>
|
template<bool __match_mode>
|
||||||
bool
|
bool
|
||||||
_M_dfs(_StateIdT __i);
|
_M_dfs(_StateIdT __i);
|
||||||
|
};
|
||||||
|
|
||||||
const std::shared_ptr<_Nfa> _M_nfa;
|
// It's essentially a variant of Single-Source-Shortest-Path problem, where,
|
||||||
_PatternCursor& _M_pattern;
|
// the matching results is the final distance and should be minimized.
|
||||||
_Results& _M_results;
|
// Instead of using Dijkstra Algorithm, I pick up the queue-optimizaed
|
||||||
|
// (BFS-like) Bellman-Ford algorithm,
|
||||||
|
// SPFA(http://en.wikipedia.org/wiki/Shortest_Path_Faster_Algorithm).
|
||||||
|
//
|
||||||
|
// Every entry of _M_current saves the solution(grouping status) for every
|
||||||
|
// matching head. When states transfer, solutions will be compared and
|
||||||
|
// deduplicated(based on which greedy mode we have).
|
||||||
|
//
|
||||||
|
// Time complexity: O(_M_str_cur.size() * _M_nfa.size())
|
||||||
|
// Space complexity: O(_M_nfa.size() * _M_nfa.mark_count())
|
||||||
|
class _BFSMatcher
|
||||||
|
: public _Grep_matcher
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
_BFSMatcher(_PatternCursor& __p,
|
||||||
|
_Results& __r,
|
||||||
|
const _AutomatonPtr& __automaton,
|
||||||
|
regex_constants::match_flag_type __flags)
|
||||||
|
: _Grep_matcher(__p, __r, __automaton, __flags)
|
||||||
|
{
|
||||||
|
if (_M_nfa->_M_start() != _S_invalid_state_id)
|
||||||
|
_M_current[_M_nfa->_M_start()] = _M_results._M_clone();
|
||||||
|
_M_e_closure();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
_M_match()
|
||||||
|
{ return _M_main_loop<true>(); }
|
||||||
|
|
||||||
|
bool
|
||||||
|
_M_search_from_first()
|
||||||
|
{ return _M_main_loop<false>(); }
|
||||||
|
|
||||||
|
private:
|
||||||
|
template<bool __match_mode>
|
||||||
|
bool
|
||||||
|
_M_main_loop();
|
||||||
|
|
||||||
|
void
|
||||||
|
_M_e_closure();
|
||||||
|
|
||||||
|
void
|
||||||
|
_M_move();
|
||||||
|
|
||||||
|
bool
|
||||||
|
_M_match_less_than(_StateIdT __u, _StateIdT __v) const;
|
||||||
|
|
||||||
|
bool
|
||||||
|
_M_includes_some() const;
|
||||||
|
|
||||||
|
std::map<_StateIdT, std::unique_ptr<_Results>> _M_current;
|
||||||
};
|
};
|
||||||
|
|
||||||
//@} regex-detail
|
//@} regex-detail
|
||||||
|
@ -32,83 +32,13 @@
|
|||||||
|
|
||||||
namespace std _GLIBCXX_VISIBILITY(default)
|
namespace std _GLIBCXX_VISIBILITY(default)
|
||||||
{
|
{
|
||||||
namespace
|
|
||||||
{
|
|
||||||
// A stack of states used in evaluating the NFA.
|
|
||||||
typedef std::stack<std::__detail::_StateIdT,
|
|
||||||
std::vector<std::__detail::_StateIdT>
|
|
||||||
> _StateStack;
|
|
||||||
|
|
||||||
// Obtains the next state set given the current state set __s and the current
|
|
||||||
// input character.
|
|
||||||
inline std::__detail::_StateSet
|
|
||||||
__move(const std::__detail::_PatternCursor& __p,
|
|
||||||
const std::__detail::_Nfa& __nfa,
|
|
||||||
const std::__detail::_StateSet& __s)
|
|
||||||
{
|
|
||||||
std::__detail::_StateSet __m;
|
|
||||||
for (std::__detail::_StateSet::const_iterator __i = __s.begin();
|
|
||||||
__i != __s.end(); ++__i)
|
|
||||||
{
|
|
||||||
if (*__i == std::__detail::_S_invalid_state_id)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
const std::__detail::_State& __state = __nfa[*__i];
|
|
||||||
if (__state._M_opcode == std::__detail::_S_opcode_match
|
|
||||||
&& __state._M_matches(__p))
|
|
||||||
__m.insert(__state._M_next);
|
|
||||||
}
|
|
||||||
return __m;
|
|
||||||
}
|
|
||||||
|
|
||||||
// returns true if (__s intersect __t) is not empty
|
|
||||||
inline bool
|
|
||||||
__includes_some(const std::__detail::_StateSet& __s,
|
|
||||||
const std::__detail::_StateSet& __t)
|
|
||||||
{
|
|
||||||
if (__s.size() > 0 && __t.size() > 0)
|
|
||||||
{
|
|
||||||
std::__detail::_StateSet::const_iterator __first = __s.begin();
|
|
||||||
std::__detail::_StateSet::const_iterator __second = __t.begin();
|
|
||||||
while (__first != __s.end() && __second != __t.end())
|
|
||||||
{
|
|
||||||
if (*__first < *__second)
|
|
||||||
++__first;
|
|
||||||
else if (*__second < *__first)
|
|
||||||
++__second;
|
|
||||||
else
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If an identified state __u is not already in the current state set __e,
|
|
||||||
// insert it and push it on the current state stack __s.
|
|
||||||
inline void
|
|
||||||
__add_visited_state(const std::__detail::_StateIdT __u,
|
|
||||||
_StateStack& __s,
|
|
||||||
std::__detail::_StateSet& __e)
|
|
||||||
{
|
|
||||||
if (__e.count(__u) == 0)
|
|
||||||
{
|
|
||||||
__e.insert(__u);
|
|
||||||
__s.push(__u);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // anonymous namespace
|
|
||||||
|
|
||||||
namespace __detail
|
namespace __detail
|
||||||
{
|
{
|
||||||
_GLIBCXX_BEGIN_NAMESPACE_VERSION
|
_GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
|
|
||||||
// _M_dfs() take a state, along with current string cursor(_M_pattern),
|
|
||||||
// trying to match current state with current character.
|
|
||||||
// Only _S_opcode_match will consume a character.
|
|
||||||
// TODO: This is too slow. Try to compile the NFA to a DFA.
|
// TODO: This is too slow. Try to compile the NFA to a DFA.
|
||||||
template<bool __match_mode>
|
template<bool __match_mode>
|
||||||
bool _Grep_matcher::
|
bool _DFSMatcher::
|
||||||
_M_dfs(_StateIdT __i)
|
_M_dfs(_StateIdT __i)
|
||||||
{
|
{
|
||||||
if (__i == _S_invalid_state_id)
|
if (__i == _S_invalid_state_id)
|
||||||
@ -126,25 +56,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||||||
|| _M_dfs<__match_mode>(__state._M_next);
|
|| _M_dfs<__match_mode>(__state._M_next);
|
||||||
break;
|
break;
|
||||||
case _S_opcode_subexpr_begin:
|
case _S_opcode_subexpr_begin:
|
||||||
__state._M_tagger(_M_pattern, _M_results);
|
__state._M_tagger(_M_str_cur, _M_results);
|
||||||
__ret = _M_dfs<__match_mode>(__state._M_next);
|
__ret = _M_dfs<__match_mode>(__state._M_next);
|
||||||
break;
|
break;
|
||||||
case _S_opcode_subexpr_end:
|
case _S_opcode_subexpr_end:
|
||||||
__state._M_tagger(_M_pattern, _M_results);
|
__state._M_tagger(_M_str_cur, _M_results);
|
||||||
__ret = _M_dfs<__match_mode>(__state._M_next);
|
__ret = _M_dfs<__match_mode>(__state._M_next);
|
||||||
_M_results._M_set_matched(__state._M_subexpr, __ret);
|
_M_results._M_set_matched(__state._M_subexpr, __ret);
|
||||||
break;
|
break;
|
||||||
case _S_opcode_match:
|
case _S_opcode_match:
|
||||||
if (!_M_pattern._M_at_end() && __state._M_matches(_M_pattern))
|
if (!_M_str_cur._M_at_end() && __state._M_matches(_M_str_cur))
|
||||||
{
|
{
|
||||||
_M_pattern._M_next();
|
_M_str_cur._M_next();
|
||||||
__ret = _M_dfs<__match_mode>(__state._M_next);
|
__ret = _M_dfs<__match_mode>(__state._M_next);
|
||||||
_M_pattern._M_prev();
|
_M_str_cur._M_prev();
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case _S_opcode_accept:
|
case _S_opcode_accept:
|
||||||
if (__match_mode)
|
if (__match_mode)
|
||||||
__ret = _M_pattern._M_at_end();
|
__ret = _M_str_cur._M_at_end();
|
||||||
else
|
else
|
||||||
__ret = true;
|
__ret = true;
|
||||||
break;
|
break;
|
||||||
@ -154,88 +84,158 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||||||
return __ret;
|
return __ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void _Grep_matcher::
|
template<bool __match_mode>
|
||||||
_M_match()
|
bool _BFSMatcher::
|
||||||
|
_M_main_loop()
|
||||||
{
|
{
|
||||||
__detail::_StateSet __t = this->_M_e_closure(_M_nfa->_M_start());
|
while (!_M_str_cur._M_at_end())
|
||||||
for (; !_M_pattern._M_at_end(); _M_pattern._M_next())
|
{
|
||||||
__t = this->_M_e_closure(__move(_M_pattern, *_M_nfa, __t));
|
if (!__match_mode)
|
||||||
|
if (_M_includes_some())
|
||||||
_M_results._M_set_matched(0,
|
return true;
|
||||||
__includes_some(_M_nfa->_M_final_states(), __t));
|
_M_move();
|
||||||
|
_M_str_cur._M_next();
|
||||||
|
_M_e_closure();
|
||||||
|
}
|
||||||
|
return _M_includes_some();
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void _Grep_matcher::
|
// The SPFA approach.
|
||||||
_M_search_from_first()
|
// FIXME: move it to src/c++11 when it's stable, and make it not inlined.
|
||||||
|
inline
|
||||||
|
void _BFSMatcher::
|
||||||
|
_M_e_closure()
|
||||||
{
|
{
|
||||||
__detail::_StateSet __t = this->_M_e_closure(_M_nfa->_M_start());
|
std::queue<_StateIdT> __q;
|
||||||
for (; !_M_pattern._M_at_end(); _M_pattern._M_next())
|
std::vector<bool> __in_q(_M_nfa->size(), false);
|
||||||
|
for (auto& __it : _M_current)
|
||||||
{
|
{
|
||||||
if (__includes_some(_M_nfa->_M_final_states(), __t)) // KISS
|
__in_q[__it.first] = true;
|
||||||
|
__q.push(__it.first);
|
||||||
|
}
|
||||||
|
while (!__q.empty())
|
||||||
{
|
{
|
||||||
_M_results._M_set_matched(0, true);
|
auto __u = __q.front();
|
||||||
|
__q.pop();
|
||||||
|
__in_q[__u] = false;
|
||||||
|
const auto& __state = (*_M_nfa)[__u];
|
||||||
|
|
||||||
|
// Can be implemented using method, but there're too much arguments.
|
||||||
|
auto __add_visited_state = [&](_StateIdT __v)
|
||||||
|
{
|
||||||
|
if (__v == _S_invalid_state_id)
|
||||||
return;
|
return;
|
||||||
|
if (_M_match_less_than(__u, __v))
|
||||||
|
{
|
||||||
|
_M_current[__v] = _M_current[__u]->_M_clone();
|
||||||
|
// if a state is updated, it's outgoing neighbors should be
|
||||||
|
// reconsidered too. Push them to the queue.
|
||||||
|
if (!__in_q[__v])
|
||||||
|
{
|
||||||
|
__in_q[__v] = true;
|
||||||
|
__q.push(__v);
|
||||||
}
|
}
|
||||||
__t = this->_M_e_closure(__move(_M_pattern, *_M_nfa, __t));
|
|
||||||
}
|
|
||||||
_M_results._M_set_matched(0, false);
|
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// Creates the e-closure set for the initial state __i.
|
|
||||||
inline _StateSet _Grep_matcher::
|
|
||||||
_M_e_closure(_StateIdT __i)
|
|
||||||
{
|
|
||||||
_StateSet __s;
|
|
||||||
__s.insert(__i);
|
|
||||||
_StateStack __stack;
|
|
||||||
__stack.push(__i);
|
|
||||||
return this->_M_e_closure(__stack, __s);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Creates the e-closure set for an arbitrary state set __s.
|
|
||||||
inline _StateSet _Grep_matcher::
|
|
||||||
_M_e_closure(const _StateSet& __s)
|
|
||||||
{
|
|
||||||
_StateStack __stack;
|
|
||||||
for (_StateSet::const_iterator __i = __s.begin(); __i != __s.end(); ++__i)
|
|
||||||
__stack.push(*__i);
|
|
||||||
return this->_M_e_closure(__stack, __s);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline _StateSet _Grep_matcher::
|
|
||||||
_M_e_closure(_StateStack& __stack, const _StateSet& __s)
|
|
||||||
{
|
|
||||||
_StateSet __e = __s;
|
|
||||||
while (!__stack.empty())
|
|
||||||
{
|
|
||||||
_StateIdT __t = __stack.top(); __stack.pop();
|
|
||||||
if (__t == _S_invalid_state_id)
|
|
||||||
continue;
|
|
||||||
// for each __u with edge from __t to __u labeled e do ...
|
|
||||||
const _State& __state = _M_nfa->operator[](__t);
|
|
||||||
switch (__state._M_opcode)
|
switch (__state._M_opcode)
|
||||||
{
|
{
|
||||||
case _S_opcode_alternative:
|
case _S_opcode_alternative:
|
||||||
__add_visited_state(__state._M_next, __stack, __e);
|
__add_visited_state(__state._M_next);
|
||||||
__add_visited_state(__state._M_alt, __stack, __e);
|
__add_visited_state(__state._M_alt);
|
||||||
break;
|
break;
|
||||||
case _S_opcode_subexpr_begin:
|
case _S_opcode_subexpr_begin:
|
||||||
__add_visited_state(__state._M_next, __stack, __e);
|
__state._M_tagger(_M_str_cur, *_M_current[__u]);
|
||||||
__state._M_tagger(_M_pattern, _M_results);
|
__add_visited_state(__state._M_next);
|
||||||
break;
|
break;
|
||||||
case _S_opcode_subexpr_end:
|
case _S_opcode_subexpr_end:
|
||||||
__add_visited_state(__state._M_next, __stack, __e);
|
__state._M_tagger(_M_str_cur, *_M_current[__u]);
|
||||||
__state._M_tagger(_M_pattern, _M_results);
|
_M_current[__u]->_M_set_matched(__state._M_subexpr, true);
|
||||||
_M_results._M_set_matched(__state._M_subexpr, true);
|
__add_visited_state(__state._M_next);
|
||||||
|
break;
|
||||||
|
case _S_opcode_match:
|
||||||
break;
|
break;
|
||||||
case _S_opcode_accept:
|
case _S_opcode_accept:
|
||||||
__add_visited_state(__state._M_next, __stack, __e);
|
__add_visited_state(__state._M_next);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
_GLIBCXX_DEBUG_ASSERT(false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return __e;
|
}
|
||||||
|
|
||||||
|
// FIXME: move it to src/c++11 when it's stable, and make it not inlined.
|
||||||
|
inline
|
||||||
|
void _BFSMatcher::
|
||||||
|
_M_move()
|
||||||
|
{
|
||||||
|
decltype(_M_current) __next;
|
||||||
|
for (auto& __it : _M_current)
|
||||||
|
{
|
||||||
|
const auto& __state = (*_M_nfa)[__it.first];
|
||||||
|
if (__state._M_opcode == _S_opcode_match
|
||||||
|
&& __state._M_matches(_M_str_cur))
|
||||||
|
if (_M_match_less_than(__it.first, __state._M_next)
|
||||||
|
&& __state._M_next != _S_invalid_state_id)
|
||||||
|
__next[__state._M_next] = __it.second->_M_clone();
|
||||||
|
}
|
||||||
|
_M_current = move(__next);
|
||||||
|
}
|
||||||
|
|
||||||
|
// FIXME: move it to src/c++11 when it's stable, and make it not inlined.
|
||||||
|
inline
|
||||||
|
bool _BFSMatcher::
|
||||||
|
_M_match_less_than(_StateIdT __u, _StateIdT __v) const
|
||||||
|
{
|
||||||
|
if (_M_current.count(__u) == 0)
|
||||||
|
return false;
|
||||||
|
if (_M_current.count(__v) > 0)
|
||||||
|
return true;
|
||||||
|
// TODO: Greedy and Non-greedy support
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// FIXME: move it to src/c++11 when it's stable, and make it not inlined.
|
||||||
|
inline
|
||||||
|
bool _BFSMatcher::
|
||||||
|
_M_includes_some() const
|
||||||
|
{
|
||||||
|
auto& __s = _M_nfa->_M_final_states();
|
||||||
|
auto& __t = _M_current;
|
||||||
|
if (__s.size() > 0 && __t.size() > 0)
|
||||||
|
{
|
||||||
|
auto __first = __s.begin();
|
||||||
|
auto __second = __t.begin();
|
||||||
|
while (__first != __s.end() && __second != __t.end())
|
||||||
|
{
|
||||||
|
if (*__first < __second->first)
|
||||||
|
++__first;
|
||||||
|
else if (__second->first < *__first)
|
||||||
|
++__second;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_M_results._M_assign(*__second->second);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// FIXME: move it to src/c++11 when it's stable, and make it not inlined.
|
||||||
|
inline
|
||||||
|
std::unique_ptr<_Grep_matcher> _Nfa::
|
||||||
|
_M_get_matcher(_PatternCursor& __p,
|
||||||
|
_Results& __r,
|
||||||
|
const _AutomatonPtr& __a,
|
||||||
|
regex_constants::match_flag_type __flags)
|
||||||
|
{
|
||||||
|
if (_M_has_back_ref)
|
||||||
|
return unique_ptr<_Grep_matcher>(
|
||||||
|
new _DFSMatcher(__p, __r, __a, __flags));
|
||||||
|
else
|
||||||
|
return unique_ptr<_Grep_matcher>(
|
||||||
|
new _BFSMatcher(__p, __r, __a, __flags));
|
||||||
}
|
}
|
||||||
|
|
||||||
_GLIBCXX_END_NAMESPACE_VERSION
|
_GLIBCXX_END_NAMESPACE_VERSION
|
||||||
|
@ -39,6 +39,24 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||||||
* @{
|
* @{
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/// Provides a generic facade for a templated match_results.
|
||||||
|
struct _Results
|
||||||
|
{
|
||||||
|
virtual
|
||||||
|
~_Results()
|
||||||
|
{ }
|
||||||
|
virtual void _M_set_pos(int __i, int __j, const _PatternCursor& __p) = 0;
|
||||||
|
virtual void _M_set_matched(int __i, bool __is_matched) = 0;
|
||||||
|
virtual std::unique_ptr<_Results> _M_clone() const = 0;
|
||||||
|
virtual void _M_assign(const _Results& __rhs) = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
class _Grep_matcher;
|
||||||
|
class _Automaton;
|
||||||
|
|
||||||
|
/// Generic shared pointer to an automaton.
|
||||||
|
typedef std::shared_ptr<_Automaton> _AutomatonPtr;
|
||||||
|
|
||||||
/// Base class for, um, automata. Could be an NFA or a DFA. Your choice.
|
/// Base class for, um, automata. Could be an NFA or a DFA. Your choice.
|
||||||
class _Automaton
|
class _Automaton
|
||||||
{
|
{
|
||||||
@ -52,15 +70,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||||||
virtual _SizeT
|
virtual _SizeT
|
||||||
_M_sub_count() const = 0;
|
_M_sub_count() const = 0;
|
||||||
|
|
||||||
|
virtual std::unique_ptr<_Grep_matcher>
|
||||||
|
_M_get_matcher(_PatternCursor& __p,
|
||||||
|
_Results& __r,
|
||||||
|
const _AutomatonPtr& __automaton,
|
||||||
|
regex_constants::match_flag_type __flags) = 0;
|
||||||
|
|
||||||
#ifdef _GLIBCXX_DEBUG
|
#ifdef _GLIBCXX_DEBUG
|
||||||
virtual std::ostream&
|
virtual std::ostream&
|
||||||
_M_dot(std::ostream& __ostr) const = 0;
|
_M_dot(std::ostream& __ostr) const = 0;
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Generic shared pointer to an automaton.
|
|
||||||
typedef std::shared_ptr<_Automaton> _AutomatonPtr;
|
|
||||||
|
|
||||||
/// Operation codes that define the type of transitions within the base NFA
|
/// Operation codes that define the type of transitions within the base NFA
|
||||||
/// that represents the regular expression.
|
/// that represents the regular expression.
|
||||||
enum _Opcode
|
enum _Opcode
|
||||||
@ -73,13 +94,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||||||
_S_opcode_accept = 255
|
_S_opcode_accept = 255
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Provides a generic facade for a templated match_results.
|
|
||||||
struct _Results
|
|
||||||
{
|
|
||||||
virtual void _M_set_pos(int __i, int __j, const _PatternCursor& __p) = 0;
|
|
||||||
virtual void _M_set_matched(int __i, bool __is_matched) = 0;
|
|
||||||
};
|
|
||||||
|
|
||||||
/// Tags current state (for subexpr begin/end).
|
/// Tags current state (for subexpr begin/end).
|
||||||
typedef std::function<void (const _PatternCursor&, _Results&)> _Tagger;
|
typedef std::function<void (const _PatternCursor&, _Results&)> _Tagger;
|
||||||
|
|
||||||
@ -113,7 +127,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||||||
{ __r._M_set_pos(_M_index, 1, __pc); }
|
{ __r._M_set_pos(_M_index, 1, __pc); }
|
||||||
|
|
||||||
int _M_index;
|
int _M_index;
|
||||||
_FwdIterT _M_pos;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Indicates if current state matches cursor current.
|
/// Indicates if current state matches cursor current.
|
||||||
@ -275,7 +288,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||||||
typedef regex_constants::syntax_option_type _FlagT;
|
typedef regex_constants::syntax_option_type _FlagT;
|
||||||
|
|
||||||
_Nfa(_FlagT __f)
|
_Nfa(_FlagT __f)
|
||||||
: _M_flags(__f), _M_start_state(0), _M_subexpr_count(0)
|
: _M_flags(__f), _M_start_state(0), _M_subexpr_count(0),
|
||||||
|
// TODO: BFS by default. Your choice. Need to be set by the compiler.
|
||||||
|
_M_has_back_ref(false)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
~_Nfa()
|
~_Nfa()
|
||||||
@ -334,6 +349,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||||||
return this->size()-1;
|
return this->size()-1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
_M_set_back_ref(bool __b)
|
||||||
|
{ _M_has_back_ref = __b; }
|
||||||
|
|
||||||
|
std::unique_ptr<_Grep_matcher>
|
||||||
|
_M_get_matcher(_PatternCursor& __p,
|
||||||
|
_Results& __r,
|
||||||
|
const _AutomatonPtr& __automaton,
|
||||||
|
regex_constants::match_flag_type __flags);
|
||||||
|
|
||||||
#ifdef _GLIBCXX_DEBUG
|
#ifdef _GLIBCXX_DEBUG
|
||||||
std::ostream&
|
std::ostream&
|
||||||
_M_dot(std::ostream& __ostr) const;
|
_M_dot(std::ostream& __ostr) const;
|
||||||
@ -344,6 +369,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||||||
_StateIdT _M_start_state;
|
_StateIdT _M_start_state;
|
||||||
_StateSet _M_accepting_states;
|
_StateSet _M_accepting_states;
|
||||||
_SizeT _M_subexpr_count;
|
_SizeT _M_subexpr_count;
|
||||||
|
bool _M_has_back_ref;
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Describes a sequence of one or more %_State, its current start
|
/// Describes a sequence of one or more %_State, its current start
|
||||||
|
@ -44,6 +44,8 @@
|
|||||||
#include <iterator>
|
#include <iterator>
|
||||||
#include <locale>
|
#include <locale>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <map>
|
||||||
|
#include <queue>
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <stack>
|
#include <stack>
|
||||||
|
@ -32,6 +32,7 @@ test01()
|
|||||||
{
|
{
|
||||||
bool test __attribute__((unused)) = true;
|
bool test __attribute__((unused)) = true;
|
||||||
|
|
||||||
|
{
|
||||||
std::regex re("zxcv/(one.*)abc", std::regex::extended);
|
std::regex re("zxcv/(one.*)abc", std::regex::extended);
|
||||||
std::string target("zxcv/onetwoabc");
|
std::string target("zxcv/onetwoabc");
|
||||||
std::smatch m;
|
std::smatch m;
|
||||||
@ -44,6 +45,20 @@ test01()
|
|||||||
VERIFY( std::string(m[1].first, m[1].second) == "onetwo" );
|
VERIFY( std::string(m[1].first, m[1].second) == "onetwo" );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
std::regex re("zxcv/(one.*)abc()\\2", std::regex::extended);
|
||||||
|
std::string target("zxcv/onetwoabc");
|
||||||
|
std::smatch m;
|
||||||
|
|
||||||
|
VERIFY( std::regex_search(target, m, re) );
|
||||||
|
VERIFY( m.size() == 3 );
|
||||||
|
VERIFY( m[0].matched == true );
|
||||||
|
VERIFY( std::string(m[0].first, m[0].second) == "zxcv/onetwoabc" );
|
||||||
|
VERIFY( m[1].matched == true );
|
||||||
|
VERIFY( std::string(m[1].first, m[1].second) == "onetwo" );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
main()
|
main()
|
||||||
{
|
{
|
||||||
|
@ -33,6 +33,7 @@ test01()
|
|||||||
{
|
{
|
||||||
bool test __attribute__((unused)) = true;
|
bool test __attribute__((unused)) = true;
|
||||||
|
|
||||||
|
{
|
||||||
std::regex re("/asdf(/.*)", std::regex::extended);
|
std::regex re("/asdf(/.*)", std::regex::extended);
|
||||||
std::string target("/asdf/qwerty");
|
std::string target("/asdf/qwerty");
|
||||||
std::smatch m;
|
std::smatch m;
|
||||||
@ -41,6 +42,16 @@ test01()
|
|||||||
VERIFY( m.size() == 2 );
|
VERIFY( m.size() == 2 );
|
||||||
VERIFY( std::string(m[1].first, m[1].second) == "/qwerty");
|
VERIFY( std::string(m[1].first, m[1].second) == "/qwerty");
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
std::regex re("/asdf(/.*)()\\2", std::regex::extended);
|
||||||
|
std::string target("/asdf/qwerty");
|
||||||
|
std::smatch m;
|
||||||
|
|
||||||
|
VERIFY( std::regex_match(target, m, re) );
|
||||||
|
VERIFY( m.size() == 3 );
|
||||||
|
VERIFY( std::string(m[1].first, m[1].second) == "/qwerty");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
main()
|
main()
|
||||||
|
@ -0,0 +1,71 @@
|
|||||||
|
// { dg-options "-std=gnu++11" }
|
||||||
|
|
||||||
|
//
|
||||||
|
// 2013-07-29 Tim Shen <timshen91@gmail.com>
|
||||||
|
//
|
||||||
|
// Copyright (C) 2013 Free Software Foundation, Inc.
|
||||||
|
//
|
||||||
|
// This file is part of the GNU ISO C++ Library. This library is free
|
||||||
|
// software; you can redistribute it and/or modify it under the
|
||||||
|
// terms of the GNU General Public License as published by the
|
||||||
|
// Free Software Foundation; either version 3, or (at your option)
|
||||||
|
// any later version.
|
||||||
|
//
|
||||||
|
// This library is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU General Public License along
|
||||||
|
// with this library; see the file COPYING3. If not see
|
||||||
|
// <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
// 28.11.2 regex_match
|
||||||
|
// Tests Extended automatic matcher dispatching against a std::string target.
|
||||||
|
|
||||||
|
#include <regex>
|
||||||
|
#include <testsuite_hooks.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
template<typename _Bi_iter, typename _Alloc,
|
||||||
|
typename _Ch_type, typename _Rx_traits>
|
||||||
|
void
|
||||||
|
fake_match(_Bi_iter __s,
|
||||||
|
_Bi_iter __e,
|
||||||
|
match_results<_Bi_iter, _Alloc>& __m,
|
||||||
|
const basic_regex<_Ch_type, _Rx_traits>& __re,
|
||||||
|
regex_constants::match_flag_type __flags
|
||||||
|
= regex_constants::match_default)
|
||||||
|
{
|
||||||
|
__detail::_AutomatonPtr __a = __re._M_get_automaton();
|
||||||
|
__detail::_Automaton::_SizeT __sz = __a->_M_sub_count();
|
||||||
|
__detail::_SpecializedCursor<_Bi_iter> __cs(__s, __e);
|
||||||
|
__detail::_SpecializedResults<_Bi_iter, _Alloc> __r(__sz, __cs, __m);
|
||||||
|
VERIFY( dynamic_cast<__detail::_DFSMatcher *>(
|
||||||
|
&*__a->_M_get_matcher(__cs, __r, __a, __flags)) != nullptr );
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
test01()
|
||||||
|
{
|
||||||
|
bool test __attribute__((unused)) = true;
|
||||||
|
|
||||||
|
regex re("()(one(.*))abc\\1"); // backref cause DFS
|
||||||
|
const string target("onetwoabc");
|
||||||
|
smatch m;
|
||||||
|
fake_match(target.begin(), target.end(), m, re);
|
||||||
|
|
||||||
|
regex_match(target, m, re);
|
||||||
|
VERIFY( m[2].matched );
|
||||||
|
VERIFY( m[3].matched );
|
||||||
|
VERIFY( std::string(m[2].first, m[2].second) == "onetwo" );
|
||||||
|
VERIFY( std::string(m[3].first, m[3].second) == "two" );
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
main()
|
||||||
|
{
|
||||||
|
test01();
|
||||||
|
return 0;
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user