regex.h: Executor caller.
2013-08-22 Tim Shen <timshen91@gmail.com> * include/bits/regex.h: Executor caller. * include/bits/regex_executor.h: Fix empty grouping problem. * include/bits/regex_executor.tcc: Same. * testsuite/28_regex/algorithms/regex_match/ecma/cstring_emptygroup.cc: New. From-SVN: r201914
This commit is contained in:
parent
9ad30113d6
commit
1b488e33b6
|
@ -1,3 +1,11 @@
|
||||||
|
2013-08-22 Tim Shen <timshen91@gmail.com>
|
||||||
|
|
||||||
|
* include/bits/regex.h: Executor caller.
|
||||||
|
* include/bits/regex_executor.h: Fix empty grouping problem.
|
||||||
|
* include/bits/regex_executor.tcc: Same.
|
||||||
|
* testsuite/28_regex/algorithms/regex_match/ecma/cstring_emptygroup.cc:
|
||||||
|
New.
|
||||||
|
|
||||||
2013-08-20 Phil Muldoon <pmuldoon@redhat.com>
|
2013-08-20 Phil Muldoon <pmuldoon@redhat.com>
|
||||||
|
|
||||||
PR libstdc++/53477
|
PR libstdc++/53477
|
||||||
|
|
|
@ -2211,7 +2211,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
{
|
{
|
||||||
if (__re._M_automaton == nullptr)
|
if (__re._M_automaton == nullptr)
|
||||||
return false;
|
return false;
|
||||||
if (__detail::__get_executor(__s, __e, __m, __re, __flags)->_M_match())
|
__detail::__get_executor(__s, __e, __m, __re, __flags)->_M_match();
|
||||||
|
if (__m.size() > 0 && __m[0].matched)
|
||||||
{
|
{
|
||||||
for (auto __it : __m)
|
for (auto __it : __m)
|
||||||
if (!__it.matched)
|
if (!__it.matched)
|
||||||
|
@ -2371,22 +2372,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
if (__re._M_automaton == nullptr)
|
if (__re._M_automaton == nullptr)
|
||||||
return false;
|
return false;
|
||||||
for (auto __cur = __first; __cur != __last; ++__cur) // Any KMP-like algo?
|
for (auto __cur = __first; __cur != __last; ++__cur) // Any KMP-like algo?
|
||||||
if (__detail::__get_executor(__cur, __last, __m, __re, __flags)
|
{
|
||||||
->_M_search_from_first())
|
__detail::__get_executor(__cur, __last, __m, __re, __flags)
|
||||||
{
|
->_M_search_from_first();
|
||||||
for (auto __it : __m)
|
if (__m.size() > 0 && __m[0].matched)
|
||||||
if (!__it.matched)
|
{
|
||||||
__it.first = __it.second = __last;
|
for (auto __it : __m)
|
||||||
__m.at(__m.size()).first = __first;
|
if (!__it.matched)
|
||||||
__m.at(__m.size()).second = __m[0].first;
|
__it.first = __it.second = __last;
|
||||||
__m.at(__m.size()+1).first = __m[0].second;
|
__m.at(__m.size()).first = __first;
|
||||||
__m.at(__m.size()+1).second = __last;
|
__m.at(__m.size()).second = __m[0].first;
|
||||||
__m.at(__m.size()).matched =
|
__m.at(__m.size()+1).first = __m[0].second;
|
||||||
(__m.prefix().first != __m.prefix().second);
|
__m.at(__m.size()+1).second = __last;
|
||||||
__m.at(__m.size()+1).matched =
|
__m.at(__m.size()).matched =
|
||||||
(__m.suffix().first != __m.suffix().second);
|
(__m.prefix().first != __m.prefix().second);
|
||||||
return true;
|
__m.at(__m.size()+1).matched =
|
||||||
}
|
(__m.suffix().first != __m.suffix().second);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -28,12 +28,17 @@
|
||||||
* Do not attempt to use it directly. @headername{regex}
|
* Do not attempt to use it directly. @headername{regex}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
// TODO: convert comments to doxygen format.
|
||||||
|
|
||||||
namespace std _GLIBCXX_VISIBILITY(default)
|
namespace std _GLIBCXX_VISIBILITY(default)
|
||||||
{
|
{
|
||||||
_GLIBCXX_BEGIN_NAMESPACE_VERSION
|
_GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
template<typename, typename>
|
template<typename, typename>
|
||||||
class basic_regex;
|
class basic_regex;
|
||||||
|
|
||||||
|
template<typename>
|
||||||
|
class sub_match;
|
||||||
|
|
||||||
template<typename, typename>
|
template<typename, typename>
|
||||||
class match_results;
|
class match_results;
|
||||||
_GLIBCXX_END_NAMESPACE_VERSION
|
_GLIBCXX_END_NAMESPACE_VERSION
|
||||||
|
@ -52,19 +57,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
class _Executor
|
class _Executor
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
typedef match_results<_BiIter, _Alloc> _ResultsT;
|
typedef match_results<_BiIter, _Alloc> _ResultsT;
|
||||||
typedef regex_constants::match_flag_type _FlagT;
|
typedef std::vector<sub_match<_BiIter>, _Alloc> _ResultsVec;
|
||||||
|
typedef regex_constants::match_flag_type _FlagT;
|
||||||
|
|
||||||
virtual
|
virtual
|
||||||
~_Executor()
|
~_Executor()
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
// Set matched when string exactly match the pattern.
|
// Set matched when string exactly match the pattern.
|
||||||
virtual bool
|
virtual void
|
||||||
_M_match() = 0;
|
_M_match() = 0;
|
||||||
|
|
||||||
// Set matched when some prefix of the string matches the pattern.
|
// Set matched when some prefix of the string matches the pattern.
|
||||||
virtual bool
|
virtual void
|
||||||
_M_search_from_first() = 0;
|
_M_search_from_first() = 0;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
@ -74,20 +80,34 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
_ResultsT& __results,
|
_ResultsT& __results,
|
||||||
_FlagT __flags,
|
_FlagT __flags,
|
||||||
_SizeT __size)
|
_SizeT __size)
|
||||||
: _M_current(__begin), _M_end(__end),
|
: _M_current(__begin), _M_end(__end), _M_results(__results),
|
||||||
_M_results(__results), _M_flags(__flags)
|
_M_flags(__flags)
|
||||||
{
|
{
|
||||||
__results.resize(__size + 2);
|
__size += 2;
|
||||||
for (auto __it : __results)
|
_M_results.resize(__size);
|
||||||
__it.matched = false;
|
for (auto __i = 0; __i < __size; __i++)
|
||||||
|
_M_results[__i].matched = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
_BiIter _M_current;
|
_BiIter _M_current;
|
||||||
_BiIter _M_end;
|
_BiIter _M_end;
|
||||||
_ResultsT& _M_results;
|
_ResultsVec& _M_results;
|
||||||
_FlagT _M_flags;
|
_FlagT _M_flags;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// A _DFSExecutor perform a DFS on given NFA and input string. At the very
|
||||||
|
// beginning the executor stands in the start state, then it try every
|
||||||
|
// possible state transition in current state recursively. Some state
|
||||||
|
// transitions consume input string, say, a single-char-matcher or a
|
||||||
|
// back-reference matcher; some not, like assertion or other anchor nodes.
|
||||||
|
// When the input is exhausted and the current state is an accepting state,
|
||||||
|
// the whole executor return true.
|
||||||
|
//
|
||||||
|
// TODO: This approach is exponentially slow for certain input.
|
||||||
|
// Try to compile the NFA to a DFA.
|
||||||
|
//
|
||||||
|
// Time complexity: exponential
|
||||||
|
// Space complexity: O(__end - __begin)
|
||||||
template<typename _BiIter, typename _Alloc,
|
template<typename _BiIter, typename _Alloc,
|
||||||
typename _CharT, typename _TraitsT>
|
typename _CharT, typename _TraitsT>
|
||||||
class _DFSExecutor
|
class _DFSExecutor
|
||||||
|
@ -97,6 +117,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
|
typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
|
||||||
typedef _NFA<_CharT, _TraitsT> _RegexT;
|
typedef _NFA<_CharT, _TraitsT> _RegexT;
|
||||||
typedef typename _BaseT::_ResultsT _ResultsT;
|
typedef typename _BaseT::_ResultsT _ResultsT;
|
||||||
|
typedef typename _BaseT::_ResultsVec _ResultsVec;
|
||||||
typedef regex_constants::match_flag_type _FlagT;
|
typedef regex_constants::match_flag_type _FlagT;
|
||||||
|
|
||||||
_DFSExecutor(_BiIter __begin,
|
_DFSExecutor(_BiIter __begin,
|
||||||
|
@ -105,37 +126,39 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
const _RegexT& __nfa,
|
const _RegexT& __nfa,
|
||||||
_FlagT __flags)
|
_FlagT __flags)
|
||||||
: _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count()),
|
: _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count()),
|
||||||
_M_traits(_TraitsT()), _M_nfa(__nfa)
|
_M_traits(_TraitsT()), _M_nfa(__nfa), _M_results_ret(this->_M_results)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
bool
|
void
|
||||||
_M_match()
|
_M_match()
|
||||||
{ return _M_dfs<true>(_M_nfa._M_start()); }
|
{ _M_dfs<true>(_M_nfa._M_start()); }
|
||||||
|
|
||||||
bool
|
void
|
||||||
_M_search_from_first()
|
_M_search_from_first()
|
||||||
{ return _M_dfs<false>(_M_nfa._M_start()); }
|
{ _M_dfs<false>(_M_nfa._M_start()); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
template<bool __match_mode>
|
template<bool __match_mode>
|
||||||
bool
|
bool
|
||||||
_M_dfs(_StateIdT __i);
|
_M_dfs(_StateIdT __i);
|
||||||
|
|
||||||
|
_ResultsVec _M_results_ret;
|
||||||
_TraitsT _M_traits;
|
_TraitsT _M_traits;
|
||||||
const _RegexT& _M_nfa;
|
const _RegexT& _M_nfa;
|
||||||
};
|
};
|
||||||
|
|
||||||
// It's essentially a variant of Single-Source-Shortest-Path problem, where,
|
// Like the DFS approach, it try every possible state transition; Unlike DFS,
|
||||||
// the matching results is the final distance and should be minimized.
|
// it uses a queue instead of a stack to store matching states. It's a BFS
|
||||||
// Instead of using Dijkstra Algorithm, I pick up the queue-optimizaed
|
// approach.
|
||||||
// (BFS-like) Bellman-Ford algorithm,
|
//
|
||||||
// SPFA(http://en.wikipedia.org/wiki/Shortest_Path_Faster_Algorithm).
|
// Russ Cox's article(http://swtch.com/~rsc/regexp/regexp1.html) explained
|
||||||
|
// this algorithm clearly.
|
||||||
//
|
//
|
||||||
// Every entry of _M_covered saves the solution(grouping status) for every
|
// Every entry of _M_covered saves the solution(grouping status) for every
|
||||||
// matching head. When states transfer, solutions will be compared and
|
// matching head. When states transit, solutions will be compared and
|
||||||
// deduplicated(based on which greedy mode we have).
|
// deduplicated(based on which greedy mode we have).
|
||||||
//
|
//
|
||||||
// Time complexity: O(_M_str_cur.size() * _M_nfa.size())
|
// Time complexity: O((__end - __begin) * _M_nfa.size())
|
||||||
// Space complexity: O(_M_nfa.size() * _M_nfa.mark_count())
|
// Space complexity: O(_M_nfa.size() * _M_nfa.mark_count())
|
||||||
template<typename _BiIter, typename _Alloc,
|
template<typename _BiIter, typename _Alloc,
|
||||||
typename _CharT, typename _TraitsT>
|
typename _CharT, typename _TraitsT>
|
||||||
|
@ -146,12 +169,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
|
typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
|
||||||
typedef _NFA<_CharT, _TraitsT> _RegexT;
|
typedef _NFA<_CharT, _TraitsT> _RegexT;
|
||||||
typedef typename _BaseT::_ResultsT _ResultsT;
|
typedef typename _BaseT::_ResultsT _ResultsT;
|
||||||
typedef std::unique_ptr<_ResultsT> _ResultsPtr;
|
typedef typename _BaseT::_ResultsVec _ResultsVec;
|
||||||
|
typedef std::unique_ptr<_ResultsVec> _ResultsPtr;
|
||||||
typedef regex_constants::match_flag_type _FlagT;
|
typedef regex_constants::match_flag_type _FlagT;
|
||||||
|
|
||||||
_BFSExecutor(_BiIter __begin,
|
_BFSExecutor(_BiIter __begin,
|
||||||
_BiIter __end,
|
_BiIter __end,
|
||||||
_ResultsT& __results,
|
_ResultsT& __results,
|
||||||
const _RegexT& __nfa,
|
const _RegexT& __nfa,
|
||||||
_FlagT __flags)
|
_FlagT __flags)
|
||||||
: _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count()),
|
: _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count()),
|
||||||
|
@ -159,21 +183,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
{
|
{
|
||||||
if (_M_nfa._M_start() != _S_invalid_state_id)
|
if (_M_nfa._M_start() != _S_invalid_state_id)
|
||||||
_M_covered[_M_nfa._M_start()] =
|
_M_covered[_M_nfa._M_start()] =
|
||||||
_ResultsPtr(new _ResultsT(this->_M_results));
|
_ResultsPtr(new _ResultsVec(this->_M_results));
|
||||||
_M_e_closure();
|
_M_e_closure();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
void
|
||||||
_M_match()
|
_M_match()
|
||||||
{ return _M_main_loop<true>(); }
|
{ _M_main_loop<true>(); }
|
||||||
|
|
||||||
bool
|
void
|
||||||
_M_search_from_first()
|
_M_search_from_first()
|
||||||
{ return _M_main_loop<false>(); }
|
{ _M_main_loop<false>(); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
template<bool __match_mode>
|
template<bool __match_mode>
|
||||||
bool
|
void
|
||||||
_M_main_loop();
|
_M_main_loop();
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -183,13 +207,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
_M_move();
|
_M_move();
|
||||||
|
|
||||||
bool
|
bool
|
||||||
_M_match_less_than(_StateIdT __u, _StateIdT __v) const;
|
_M_match_less_than(const _ResultsVec& __u, const _ResultsVec& __v) const;
|
||||||
|
|
||||||
bool
|
bool
|
||||||
_M_includes_some() const;
|
_M_includes_some() const;
|
||||||
|
|
||||||
std::map<_StateIdT, _ResultsPtr> _M_covered;
|
std::map<_StateIdT, _ResultsPtr> _M_covered;
|
||||||
const _RegexT& _M_nfa;
|
const _RegexT& _M_nfa;
|
||||||
};
|
};
|
||||||
|
|
||||||
//@} regex-detail
|
//@} regex-detail
|
||||||
|
|
|
@ -34,19 +34,18 @@ namespace __detail
|
||||||
{
|
{
|
||||||
_GLIBCXX_BEGIN_NAMESPACE_VERSION
|
_GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
|
|
||||||
// TODO: This is too slow. Try to compile the NFA to a DFA.
|
|
||||||
template<typename _BiIter, typename _Alloc,
|
template<typename _BiIter, typename _Alloc,
|
||||||
typename _CharT, typename _TraitsT>
|
typename _CharT, typename _TraitsT>
|
||||||
template<bool __match_mode>
|
template<bool __match_mode>
|
||||||
bool _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
|
bool _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
|
||||||
_M_dfs(_StateIdT __i)
|
_M_dfs(_StateIdT __i)
|
||||||
{
|
{
|
||||||
auto& __current = this->_M_current;
|
|
||||||
auto& __end = this->_M_end;
|
|
||||||
auto& __results = this->_M_results;
|
|
||||||
if (__i == _S_invalid_state_id)
|
if (__i == _S_invalid_state_id)
|
||||||
// This is not that certain. Need deeper investigate.
|
// This is not that certain. Need deeper investigate.
|
||||||
return false;
|
return false;
|
||||||
|
auto& __current = this->_M_current;
|
||||||
|
auto& __end = this->_M_end;
|
||||||
|
auto& __results = _M_results_ret;
|
||||||
const auto& __state = _M_nfa[__i];
|
const auto& __state = _M_nfa[__i];
|
||||||
bool __ret = false;
|
bool __ret = false;
|
||||||
switch (__state._M_opcode)
|
switch (__state._M_opcode)
|
||||||
|
@ -59,14 +58,33 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
|| _M_dfs<__match_mode>(__state._M_next);
|
|| _M_dfs<__match_mode>(__state._M_next);
|
||||||
break;
|
break;
|
||||||
case _S_opcode_subexpr_begin:
|
case _S_opcode_subexpr_begin:
|
||||||
__results.at(__state._M_subexpr).first = __current;
|
// Here's the critical part: if there's nothing changed since last
|
||||||
__ret = _M_dfs<__match_mode>(__state._M_next);
|
// visit, do NOT continue. This prevents the executor from get into
|
||||||
|
// infinite loop when use "()*" to match "".
|
||||||
|
//
|
||||||
|
// Every change on __results will be roll back after the recursion
|
||||||
|
// step finished.
|
||||||
|
if (!__results[__state._M_subexpr].matched
|
||||||
|
|| __results[__state._M_subexpr].first != __current)
|
||||||
|
{
|
||||||
|
auto __back = __current;
|
||||||
|
__results[__state._M_subexpr].first = __current;
|
||||||
|
__ret = _M_dfs<__match_mode>(__state._M_next);
|
||||||
|
__results[__state._M_subexpr].first = __back;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case _S_opcode_subexpr_end:
|
case _S_opcode_subexpr_end:
|
||||||
__results.at(__state._M_subexpr).second = __current;
|
if (__results[__state._M_subexpr].second != __current
|
||||||
__results.at(__state._M_subexpr).matched = true;
|
|| __results[__state._M_subexpr].matched != true)
|
||||||
__ret = _M_dfs<__match_mode>(__state._M_next);
|
{
|
||||||
__results.at(__state._M_subexpr).matched = __ret;
|
auto __back = __results[__state._M_subexpr];
|
||||||
|
__results[__state._M_subexpr].second = __current;
|
||||||
|
__results[__state._M_subexpr].matched = true;
|
||||||
|
__ret = _M_dfs<__match_mode>(__state._M_next);
|
||||||
|
__results[__state._M_subexpr] = __back;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
__ret = _M_dfs<__match_mode>(__state._M_next);
|
||||||
break;
|
break;
|
||||||
case _S_opcode_match:
|
case _S_opcode_match:
|
||||||
if (__current != __end && __state._M_matches(*__current))
|
if (__current != __end && __state._M_matches(*__current))
|
||||||
|
@ -82,7 +100,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
// If matched, keep going; else just return to try another state.
|
// If matched, keep going; else just return to try another state.
|
||||||
case _S_opcode_backref:
|
case _S_opcode_backref:
|
||||||
{
|
{
|
||||||
auto& __submatch = __results.at(__state._M_backref_index);
|
auto& __submatch = __results[__state._M_backref_index];
|
||||||
if (!__submatch.matched)
|
if (!__submatch.matched)
|
||||||
break;
|
break;
|
||||||
auto __last = __current;
|
auto __last = __current;
|
||||||
|
@ -92,12 +110,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
++__last;
|
++__last;
|
||||||
if (_M_traits.transform(__submatch.first, __submatch.second)
|
if (_M_traits.transform(__submatch.first, __submatch.second)
|
||||||
== _M_traits.transform(__current, __last))
|
== _M_traits.transform(__current, __last))
|
||||||
{
|
if (__last != __current)
|
||||||
auto __backup = __current;
|
{
|
||||||
__current = __last;
|
auto __backup = __current;
|
||||||
|
__current = __last;
|
||||||
|
__ret = _M_dfs<__match_mode>(__state._M_next);
|
||||||
|
__current = __backup;
|
||||||
|
}
|
||||||
|
else
|
||||||
__ret = _M_dfs<__match_mode>(__state._M_next);
|
__ret = _M_dfs<__match_mode>(__state._M_next);
|
||||||
__current = __backup;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case _S_opcode_accept:
|
case _S_opcode_accept:
|
||||||
|
@ -105,6 +126,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
__ret = __current == __end;
|
__ret = __current == __end;
|
||||||
else
|
else
|
||||||
__ret = true;
|
__ret = true;
|
||||||
|
if (__ret)
|
||||||
|
this->_M_results = __results;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
_GLIBCXX_DEBUG_ASSERT(false);
|
_GLIBCXX_DEBUG_ASSERT(false);
|
||||||
|
@ -115,22 +138,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
template<typename _BiIter, typename _Alloc,
|
template<typename _BiIter, typename _Alloc,
|
||||||
typename _CharT, typename _TraitsT>
|
typename _CharT, typename _TraitsT>
|
||||||
template<bool __match_mode>
|
template<bool __match_mode>
|
||||||
bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
|
void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
|
||||||
_M_main_loop()
|
_M_main_loop()
|
||||||
{
|
{
|
||||||
while (this->_M_current != this->_M_end)
|
while (this->_M_current != this->_M_end)
|
||||||
{
|
{
|
||||||
if (!__match_mode)
|
if (!__match_mode)
|
||||||
if (_M_includes_some())
|
if (_M_includes_some())
|
||||||
return true;
|
return;
|
||||||
_M_move();
|
_M_move();
|
||||||
++this->_M_current;
|
++this->_M_current;
|
||||||
_M_e_closure();
|
_M_e_closure();
|
||||||
}
|
}
|
||||||
return _M_includes_some();
|
_M_includes_some();
|
||||||
}
|
}
|
||||||
|
|
||||||
// The SPFA approach.
|
|
||||||
template<typename _BiIter, typename _Alloc,
|
template<typename _BiIter, typename _Alloc,
|
||||||
typename _CharT, typename _TraitsT>
|
typename _CharT, typename _TraitsT>
|
||||||
void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
|
void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
|
||||||
|
@ -152,13 +174,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
const auto& __state = _M_nfa[__u];
|
const auto& __state = _M_nfa[__u];
|
||||||
|
|
||||||
// Can be implemented using method, but there're too much arguments.
|
// Can be implemented using method, but there're too much arguments.
|
||||||
|
// I would use macro function before C++11, but lambda is a better
|
||||||
|
// choice, since hopefully compiler can inline it.
|
||||||
auto __add_visited_state = [&](_StateIdT __v)
|
auto __add_visited_state = [&](_StateIdT __v)
|
||||||
{
|
{
|
||||||
if (__v == _S_invalid_state_id)
|
if (__v == _S_invalid_state_id)
|
||||||
return;
|
return;
|
||||||
if (_M_match_less_than(__u, __v))
|
if (_M_covered.count(__u) != 0
|
||||||
|
&& (_M_covered.count(__v) == 0
|
||||||
|
|| _M_match_less_than(*_M_covered[__u], *_M_covered[__v])))
|
||||||
{
|
{
|
||||||
_M_covered[__v] = _ResultsPtr(new _ResultsT(*_M_covered[__u]));
|
_M_covered[__v] = _ResultsPtr(new _ResultsVec(*_M_covered[__u]));
|
||||||
// if a state is updated, it's outgoing neighbors should be
|
// if a state is updated, it's outgoing neighbors should be
|
||||||
// reconsidered too. Push them to the queue.
|
// reconsidered too. Push them to the queue.
|
||||||
if (!__in_q[__v])
|
if (!__in_q[__v])
|
||||||
|
@ -176,13 +202,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
__add_visited_state(__state._M_alt);
|
__add_visited_state(__state._M_alt);
|
||||||
break;
|
break;
|
||||||
case _S_opcode_subexpr_begin:
|
case _S_opcode_subexpr_begin:
|
||||||
_M_covered[__u]->at(__state._M_subexpr).first = __current;
|
{
|
||||||
__add_visited_state(__state._M_next);
|
auto& __cu = *_M_covered[__u];
|
||||||
|
auto __back = __cu[__state._M_subexpr].first;
|
||||||
|
__cu[__state._M_subexpr].first = __current;
|
||||||
|
__add_visited_state(__state._M_next);
|
||||||
|
__cu[__state._M_subexpr].first = __back;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case _S_opcode_subexpr_end:
|
case _S_opcode_subexpr_end:
|
||||||
_M_covered[__u]->at(__state._M_subexpr).second = __current;
|
{
|
||||||
_M_covered[__u]->at(__state._M_subexpr).matched = true;
|
auto& __cu = *_M_covered[__u];
|
||||||
__add_visited_state(__state._M_next);
|
auto __back = __cu[__state._M_subexpr];
|
||||||
|
__cu[__state._M_subexpr].second = __current;
|
||||||
|
__cu[__state._M_subexpr].matched = true;
|
||||||
|
__add_visited_state(__state._M_next);
|
||||||
|
__cu[__state._M_subexpr] = __back;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case _S_opcode_match:
|
case _S_opcode_match:
|
||||||
break;
|
break;
|
||||||
|
@ -206,9 +242,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
const auto& __state = _M_nfa[__it.first];
|
const auto& __state = _M_nfa[__it.first];
|
||||||
if (__state._M_opcode == _S_opcode_match
|
if (__state._M_opcode == _S_opcode_match
|
||||||
&& __state._M_matches(*this->_M_current))
|
&& __state._M_matches(*this->_M_current))
|
||||||
if (_M_match_less_than(__it.first, __state._M_next)
|
if (__state._M_next != _S_invalid_state_id)
|
||||||
&& __state._M_next != _S_invalid_state_id)
|
if (__next.count(__state._M_next) == 0
|
||||||
__next[__state._M_next] = move(__it.second);
|
|| _M_match_less_than(*__it.second, *__next[__state._M_next]))
|
||||||
|
__next[__state._M_next] = move(__it.second);
|
||||||
}
|
}
|
||||||
_M_covered = move(__next);
|
_M_covered = move(__next);
|
||||||
}
|
}
|
||||||
|
@ -216,14 +253,28 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
template<typename _BiIter, typename _Alloc,
|
template<typename _BiIter, typename _Alloc,
|
||||||
typename _CharT, typename _TraitsT>
|
typename _CharT, typename _TraitsT>
|
||||||
bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
|
bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
|
||||||
_M_match_less_than(_StateIdT __u, _StateIdT __v) const
|
_M_match_less_than(const _ResultsVec& __u, const _ResultsVec& __v) const
|
||||||
{
|
{
|
||||||
if (_M_covered.count(__u) == 0)
|
|
||||||
return false;
|
|
||||||
if (_M_covered.count(__v) > 0)
|
|
||||||
return true;
|
|
||||||
// TODO: Greedy and Non-greedy support
|
// TODO: Greedy and Non-greedy support
|
||||||
return true;
|
_GLIBCXX_DEBUG_ASSERT(__u.size() == __v.size());
|
||||||
|
auto __size = __u.size();
|
||||||
|
for (auto __i = 0; __i < __size; __i++)
|
||||||
|
{
|
||||||
|
auto& __uit = __u[__i], __vit = __v[__i];
|
||||||
|
if (__uit.matched && !__vit.matched)
|
||||||
|
return true;
|
||||||
|
if (!__uit.matched && __vit.matched)
|
||||||
|
return false;
|
||||||
|
if (__uit.matched && __vit.matched)
|
||||||
|
{
|
||||||
|
// GREEDY
|
||||||
|
if (__uit.first != __vit.first)
|
||||||
|
return __uit.first < __vit.first;
|
||||||
|
if (__uit.second != __vit.second)
|
||||||
|
return __uit.second > __vit.second;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename _BiIter, typename _Alloc,
|
template<typename _BiIter, typename _Alloc,
|
||||||
|
@ -265,11 +316,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
typedef std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>>
|
typedef std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>>
|
||||||
_ExecutorPtr;
|
_ExecutorPtr;
|
||||||
typedef _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT> _DFSExecutorT;
|
typedef _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT> _DFSExecutorT;
|
||||||
|
typedef _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT> _BFSExecutorT;
|
||||||
auto __p = std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
|
auto __p = std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
|
||||||
(__re._M_automaton);
|
(__re._M_automaton);
|
||||||
if (__p->_M_has_backref)
|
if (__p->_M_has_backref)
|
||||||
return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, *__p, __flags));
|
return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, *__p, __flags));
|
||||||
return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, *__p, __flags));
|
return _ExecutorPtr(new _BFSExecutorT(__b, __e, __m, *__p, __flags));
|
||||||
}
|
}
|
||||||
|
|
||||||
_GLIBCXX_END_NAMESPACE_VERSION
|
_GLIBCXX_END_NAMESPACE_VERSION
|
||||||
|
|
|
@ -0,0 +1,58 @@
|
||||||
|
// { dg-options "-std=gnu++11" }
|
||||||
|
|
||||||
|
//
|
||||||
|
// 2013-08-22 Tim Shen <timshen91@gmail.com>
|
||||||
|
//
|
||||||
|
// Copyright (C) 2013 Free Software Foundation, Inc.
|
||||||
|
//
|
||||||
|
// This file is part of the GNU ISO C++ Library. This library is free
|
||||||
|
// software; you can redistribute it and/or modify it under the
|
||||||
|
// terms of the GNU General Public License as published by the
|
||||||
|
// Free Software Foundation; either version 3, or (at your option)
|
||||||
|
// any later version.
|
||||||
|
//
|
||||||
|
// This library is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU General Public License along
|
||||||
|
// with this library; see the file COPYING3. If not see
|
||||||
|
// <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
// 28.11.2 regex_match
|
||||||
|
// Tests ECMAScript empty-grouping against a C-string.
|
||||||
|
|
||||||
|
#include <regex>
|
||||||
|
#include <testsuite_hooks.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
void
|
||||||
|
test01()
|
||||||
|
{
|
||||||
|
bool test __attribute__((unused)) = true;
|
||||||
|
|
||||||
|
{
|
||||||
|
regex re("()*\\1");
|
||||||
|
cmatch m;
|
||||||
|
const char s[] = "";
|
||||||
|
VERIFY( regex_match(s, m, re) );
|
||||||
|
VERIFY( m.size() == 2 );
|
||||||
|
VERIFY( m[0].matched );
|
||||||
|
VERIFY( m[1].matched );
|
||||||
|
}
|
||||||
|
{
|
||||||
|
regex re("()*");
|
||||||
|
cmatch m;
|
||||||
|
const char s[] = "";
|
||||||
|
VERIFY( regex_match(s, m, re) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
main()
|
||||||
|
{
|
||||||
|
test01();
|
||||||
|
return 0;
|
||||||
|
}
|
Loading…
Reference in New Issue