regex.h: Remove virtual class _Automaton.
2013-10-20 Tim Shen <timshen91@gmail.com> * include/bits/regex.h: Remove virtual class _Automaton. * include/bits/regex_automaton.h: Likewise. * include/bits/regex.tcc: Adjust comment for policy changing. * include/bits/regex_executor.h: Update comments of complexity. * include/bits/regex_executor.tcc: Adjust executor choosing policy. Now DFS executor is the default one. * testsuite/util/testsuite_regex.h (regex_match_debug, regex_search_debug): Adjust for policy changing. From-SVN: r203875
This commit is contained in:
parent
f5b9d5e877
commit
603b781be4
|
@ -1,3 +1,14 @@
|
|||
2013-10-20 Tim Shen <timshen91@gmail.com>
|
||||
|
||||
* include/bits/regex.h: Remove virtual class _Automaton.
|
||||
* include/bits/regex_automaton.h: Likewise.
|
||||
* include/bits/regex.tcc: Adjust comment for policy changing.
|
||||
* include/bits/regex_executor.h: Update comments of complexity.
|
||||
* include/bits/regex_executor.tcc: Adjust executor choosing
|
||||
policy. Now DFS executor is the default one.
|
||||
* testsuite/util/testsuite_regex.h (regex_match_debug,
|
||||
regex_search_debug): Adjust for policy changing.
|
||||
|
||||
2013-10-20 Chris Jefferson <chris@bubblescope.net>
|
||||
Paolo Carlini <paolo.carlini@oracle.com>
|
||||
|
||||
|
|
|
@ -727,7 +727,7 @@ _GLIBCXX_END_NAMESPACE_VERSION
|
|||
#endif
|
||||
|
||||
protected:
|
||||
typedef std::shared_ptr<__detail::_Automaton<_Ch_type, _Rx_traits>>
|
||||
typedef std::shared_ptr<__detail::_NFA<_Ch_type, _Rx_traits>>
|
||||
_AutomatonPtr;
|
||||
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
|
|
|
@ -38,8 +38,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
|
||||
// Result of merging regex_match and regex_search.
|
||||
//
|
||||
// __policy now can be _S_auto(auto dispatch by checking back-references)
|
||||
// and _S_force_dfs(just use _DFSExecutor).
|
||||
// __policy now can be _S_auto (auto dispatch) and _S_alternate (use
|
||||
// the other one if possible, for test purpose).
|
||||
//
|
||||
// That __match_mode is true means regex_match, else regex_search.
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
|
|
|
@ -104,31 +104,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
#endif
|
||||
};
|
||||
|
||||
/// Base class for, um, automata. Could be an NFA or a DFA. Your choice.
|
||||
template<typename _CharT, typename _TraitsT>
|
||||
class _Automaton
|
||||
{
|
||||
public:
|
||||
typedef size_t _SizeT;
|
||||
|
||||
public:
|
||||
virtual
|
||||
~_Automaton()
|
||||
{ }
|
||||
|
||||
virtual _SizeT
|
||||
_M_sub_count() const = 0;
|
||||
|
||||
#ifdef _GLIBCXX_DEBUG
|
||||
virtual std::ostream&
|
||||
_M_dot(std::ostream& __ostr) const = 0;
|
||||
#endif
|
||||
};
|
||||
|
||||
template<typename _CharT, typename _TraitsT>
|
||||
class _NFA
|
||||
: public _Automaton<_CharT, _TraitsT>,
|
||||
public std::vector<_State<_CharT, _TraitsT>>
|
||||
: public std::vector<_State<_CharT, _TraitsT>>
|
||||
{
|
||||
public:
|
||||
typedef _State<_CharT, _TraitsT> _StateT;
|
||||
|
|
|
@ -179,8 +179,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
// TODO: This approach is exponentially slow for certain input.
|
||||
// Try to compile the NFA to a DFA.
|
||||
//
|
||||
// Time complexity: exponential
|
||||
// Space complexity: O(__end - __begin)
|
||||
// Time complexity: o(match_length), O(2^(_M_nfa->size()))
|
||||
// Space complexity: \theta(match_results.size() + match_length)
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT>
|
||||
class _DFSExecutor
|
||||
|
@ -200,16 +200,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
const _RegexT& __re,
|
||||
_FlagT __flags)
|
||||
: _BaseT(__begin, __end, __results, __re, __flags),
|
||||
_M_nfa(*std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
|
||||
(__re._M_automaton)),
|
||||
_M_start_state(_M_nfa._M_start())
|
||||
_M_nfa(__re._M_automaton), _M_start_state(_M_nfa->_M_start())
|
||||
{ }
|
||||
|
||||
private:
|
||||
void
|
||||
_M_init(_BiIter __cur)
|
||||
{
|
||||
_M_cur_results.resize(_M_nfa._M_sub_count() + 2);
|
||||
_M_cur_results.resize(_M_nfa->_M_sub_count() + 2);
|
||||
this->_M_current = __cur;
|
||||
}
|
||||
|
||||
|
@ -235,9 +233,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
}
|
||||
|
||||
// To record current solution.
|
||||
_ResultsVec _M_cur_results;
|
||||
const _NFAT& _M_nfa;
|
||||
_StateIdT _M_start_state;
|
||||
std::shared_ptr<_NFAT> _M_nfa;
|
||||
_ResultsVec _M_cur_results;
|
||||
_StateIdT _M_start_state;
|
||||
};
|
||||
|
||||
// Like the DFS approach, it try every possible state transition; Unlike DFS,
|
||||
|
@ -251,8 +249,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
// matching head. When states transit, solutions will be compared and
|
||||
// deduplicated(based on which greedy mode we have).
|
||||
//
|
||||
// Time complexity: O((__end - __begin) * _M_nfa.size())
|
||||
// Space complexity: O(_M_nfa.size() * _M_nfa.mark_count())
|
||||
// Time complexity: o(match_length * (quantifier_number
|
||||
// + match_results.size())
|
||||
// O(match_length * _M_nfa->size()
|
||||
// * (quantifier_number + match_results.size())
|
||||
// Space complexity: o(quantifier_number + match_results.size())
|
||||
// O(_M_nfa->size()
|
||||
// * (quantifier_number + match_results.size())
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT>
|
||||
class _BFSExecutor
|
||||
|
@ -382,11 +385,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
const _RegexT& __re,
|
||||
_FlagT __flags)
|
||||
: _BaseT(__begin, __end, __results, __re, __flags),
|
||||
_M_nfa(*std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
|
||||
(__re._M_automaton)),
|
||||
_M_match_stack(_M_nfa.size()),
|
||||
_M_stack(_M_nfa.size()),
|
||||
_M_start_state(_M_nfa._M_start())
|
||||
_M_nfa(__re._M_automaton), _M_match_stack(_M_nfa->size()),
|
||||
_M_stack(_M_nfa->size()), _M_start_state(_M_nfa->_M_start())
|
||||
{ }
|
||||
|
||||
private:
|
||||
|
@ -398,7 +398,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
_ResultsVec& __res(this->_M_results);
|
||||
_M_covered[this->_M_start_state] =
|
||||
_ResultsPtr(new _ResultsEntry(__res.size(),
|
||||
_M_nfa._M_quant_count));
|
||||
_M_nfa->_M_quant_count));
|
||||
_M_stack._M_push(this->_M_start_state);
|
||||
}
|
||||
|
||||
|
@ -428,7 +428,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
this->_M_flags));
|
||||
}
|
||||
|
||||
const _NFAT& _M_nfa;
|
||||
std::shared_ptr<_NFAT> _M_nfa;
|
||||
std::map<_StateIdT, _ResultsPtr> _M_covered;
|
||||
_TodoList _M_match_stack;
|
||||
_TodoList _M_stack;
|
||||
|
|
|
@ -28,6 +28,13 @@
|
|||
* Do not attempt to use it directly. @headername{regex}
|
||||
*/
|
||||
|
||||
// See below __get_executor to get what this is talking about. The default
|
||||
// value 1 indicated a conservative optimization without giving up worst case
|
||||
// performance.
|
||||
#ifndef _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT
|
||||
#define _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT 1
|
||||
#endif
|
||||
|
||||
namespace std _GLIBCXX_VISIBILITY(default)
|
||||
{
|
||||
namespace __detail
|
||||
|
@ -60,7 +67,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
_M_dfs(_StateIdT __i)
|
||||
{
|
||||
auto& __current = this->_M_current;
|
||||
const auto& __state = _M_nfa[__i];
|
||||
const auto& __state = (*_M_nfa)[__i];
|
||||
bool __ret = false;
|
||||
switch (__state._M_opcode)
|
||||
{
|
||||
|
@ -216,7 +223,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
{
|
||||
auto __u = _M_stack._M_pop();
|
||||
_GLIBCXX_DEBUG_ASSERT(_M_covered.count(__u));
|
||||
const auto& __state = _M_nfa[__u];
|
||||
const auto& __state = (*_M_nfa)[__u];
|
||||
|
||||
// Can be implemented using method, but there will be too many
|
||||
// arguments. I would use macro function before C++11, but lambda is
|
||||
|
@ -314,7 +321,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
while (!_M_match_stack._M_empty())
|
||||
{
|
||||
auto __u = _M_match_stack._M_pop();
|
||||
const auto& __state = _M_nfa[__u];
|
||||
const auto& __state = (*_M_nfa)[__u];
|
||||
auto& __cu = _M_covered[__u];
|
||||
if (__state._M_matches(*this->_M_current)
|
||||
&& (__next.count(__state._M_next) == 0
|
||||
|
@ -333,7 +340,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
_M_includes_some()
|
||||
{
|
||||
bool __succ = false;
|
||||
for (auto __u : _M_nfa._M_final_states())
|
||||
for (auto __u : _M_nfa->_M_final_states())
|
||||
if (_M_covered.count(__u))
|
||||
{
|
||||
__succ = true;
|
||||
|
@ -380,8 +387,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
}
|
||||
|
||||
enum class _RegexExecutorPolicy : int
|
||||
{ _S_auto, _S_force_dfs };
|
||||
{ _S_auto, _S_alternate };
|
||||
|
||||
// This function decide which executor to use under given circumstances.
|
||||
// The _S_auto policy now is the following: if a NFA has no back-references
|
||||
// and has more than _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT quantifiers
|
||||
// (*, +, ?), the _BFSExecutor will be used, other wise _DFSExecutor. This is
|
||||
// because _DFSExecutor has a exponential upper bound, but better best-case
|
||||
// performace. Meanwhile, _BFSExecutor can effectively prevent from
|
||||
// exponential-long time matching (which must contains many quantifiers), but
|
||||
// it's slower in average.
|
||||
//
|
||||
// For simple regex, _BFSExecutor could be 2 or more times slower than
|
||||
// _DFSExecutor.
|
||||
//
|
||||
// Of course, _BFSExecutor cannot handle back-references.
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT,
|
||||
_RegexExecutorPolicy __policy>
|
||||
|
@ -396,12 +416,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
_ExecutorPtr;
|
||||
typedef _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT> _DFSExecutorT;
|
||||
typedef _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT> _BFSExecutorT;
|
||||
auto __p = std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
|
||||
(__re._M_automaton);
|
||||
if (__policy == _RegexExecutorPolicy::_S_force_dfs
|
||||
|| (__policy == _RegexExecutorPolicy::_S_auto && __p->_M_has_backref))
|
||||
return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, __re, __flags));
|
||||
return _ExecutorPtr(new _BFSExecutorT(__b, __e, __m, __re, __flags));
|
||||
if (!__re._M_automaton->_M_has_backref
|
||||
&& (__policy == _RegexExecutorPolicy::_S_alternate
|
||||
|| __re._M_automaton->_M_quant_count
|
||||
> _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT))
|
||||
return _ExecutorPtr(new _BFSExecutorT(__b, __e, __m, __re, __flags));
|
||||
return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, __re, __flags));
|
||||
}
|
||||
|
||||
_GLIBCXX_END_NAMESPACE_VERSION
|
||||
|
|
|
@ -148,7 +148,7 @@ namespace __gnu_test
|
|||
(__s, __e, __m, __re, __flags);
|
||||
match_results<_Bi_iter, _Alloc> __mm;
|
||||
auto __res2 = __regex_algo_impl<_Bi_iter, _Alloc, _Ch_type, _Rx_traits,
|
||||
_RegexExecutorPolicy::_S_force_dfs, true>
|
||||
_RegexExecutorPolicy::_S_alternate, true>
|
||||
(__s, __e, __mm, __re, __flags);
|
||||
if (__res1 == __res2 && __m == __mm)
|
||||
return __res1;
|
||||
|
@ -234,7 +234,7 @@ namespace __gnu_test
|
|||
(__s, __e, __m, __re, __flags);
|
||||
match_results<_Bi_iter, _Alloc> __mm;
|
||||
auto __res2 = __regex_algo_impl<_Bi_iter, _Alloc, _Ch_type, _Rx_traits,
|
||||
_RegexExecutorPolicy::_S_force_dfs, false>
|
||||
_RegexExecutorPolicy::_S_alternate, false>
|
||||
(__s, __e, __mm, __re, __flags);
|
||||
if (__res1 == __res2 && __m == __mm)
|
||||
return __res1;
|
||||
|
|
Loading…
Reference in New Issue