regex.h: Remove unnecessary friends.
2013-10-26 Tim Shen <timshen91@gmail.com> * include/bits/regex.h: Remove unnecessary friends. * include/bits/regex.tcc (__regex_algo_impl<>): Move __get_executor to here. * include/bits/regex_executor.h: Remove _DFSExecutor and _BFSExecutor; they are merged into _Executor. Eliminate quantifier tracking part, so it's faster. * include/bits/regex_executor.tcc: Implement _Executor. * testsuite/28_regex/algorithms/regex_match/ecma/char/ungreedy.cc: New. * testsuite/28_regex/algorithms/regex_search/ecma/greedy.cc: Adjust duplicate testcases. * testsuite/performance/28_regex/split.h: New. * testsuite/performance/28_regex/split_bfs.cc: New. * testsuite/util/testsuite_regex.h: Adjust behavior of two-executors agreement judger: do not compare match_results when executor return false. From-SVN: r204093
This commit is contained in:
parent
5d905bb613
commit
9f0d9611e7
|
@ -1,3 +1,21 @@
|
|||
2013-10-26 Tim Shen <timshen91@gmail.com>
|
||||
|
||||
* include/bits/regex.h: Remove unnecessary friends.
|
||||
* include/bits/regex.tcc (__regex_algo_impl<>): Move __get_executor
|
||||
to here.
|
||||
* include/bits/regex_executor.h: Remove _DFSExecutor and _BFSExecutor;
|
||||
they are merged into _Executor. Eliminate quantifier tracking part, so
|
||||
it's faster.
|
||||
* include/bits/regex_executor.tcc: Implement _Executor.
|
||||
* testsuite/28_regex/algorithms/regex_match/ecma/char/ungreedy.cc: New.
|
||||
* testsuite/28_regex/algorithms/regex_search/ecma/greedy.cc: Adjust
|
||||
duplicate testcases.
|
||||
* testsuite/performance/28_regex/split.h: New.
|
||||
* testsuite/performance/28_regex/split_bfs.cc: New.
|
||||
* testsuite/util/testsuite_regex.h: Adjust behavior of two-executors
|
||||
agreement judger: do not compare match_results when executor return
|
||||
false.
|
||||
|
||||
2013-10-25 François Dumont <fdumont@gcc.gnu.org>
|
||||
|
||||
* include/debug/formatter.h (__check_singular): Add const on
|
||||
|
|
|
@ -36,6 +36,9 @@ namespace __detail
|
|||
{
|
||||
_GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
|
||||
enum class _RegexExecutorPolicy : int
|
||||
{ _S_auto, _S_alternate };
|
||||
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT,
|
||||
_RegexExecutorPolicy __policy,
|
||||
|
@ -730,17 +733,6 @@ _GLIBCXX_END_NAMESPACE_VERSION
|
|||
typedef std::shared_ptr<__detail::_NFA<_Ch_type, _Rx_traits>>
|
||||
_AutomatonPtr;
|
||||
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT,
|
||||
__detail::_RegexExecutorPolicy __policy>
|
||||
friend std::unique_ptr<
|
||||
__detail::_Executor<_BiIter, _Alloc, _CharT, _TraitsT>>
|
||||
__detail::__get_executor(_BiIter,
|
||||
_BiIter,
|
||||
std::vector<sub_match<_BiIter>, _Alloc>&,
|
||||
const basic_regex<_CharT, _TraitsT>&,
|
||||
regex_constants::match_flag_type);
|
||||
|
||||
template<typename _Bp, typename _Ap, typename _Cp, typename _Rp,
|
||||
__detail::_RegexExecutorPolicy, bool>
|
||||
friend bool
|
||||
|
@ -748,15 +740,9 @@ _GLIBCXX_END_NAMESPACE_VERSION
|
|||
const basic_regex<_Cp, _Rp>&,
|
||||
regex_constants::match_flag_type);
|
||||
|
||||
template<typename, typename, typename, typename>
|
||||
template<typename, typename, typename, bool>
|
||||
friend class __detail::_Executor;
|
||||
|
||||
template<typename, typename, typename, typename>
|
||||
friend class __detail::_DFSExecutor;
|
||||
|
||||
template<typename, typename, typename, typename>
|
||||
friend class __detail::_BFSExecutor;
|
||||
|
||||
flag_type _M_flags;
|
||||
_Rx_traits _M_traits;
|
||||
_AutomatonPtr _M_automaton;
|
||||
|
@ -1851,15 +1837,9 @@ _GLIBCXX_END_NAMESPACE_VERSION
|
|||
//@}
|
||||
|
||||
private:
|
||||
template<typename, typename, typename, typename>
|
||||
template<typename, typename, typename, bool>
|
||||
friend class __detail::_Executor;
|
||||
|
||||
template<typename, typename, typename, typename>
|
||||
friend class __detail::_DFSExecutor;
|
||||
|
||||
template<typename, typename, typename, typename>
|
||||
friend class __detail::_BFSExecutor;
|
||||
|
||||
template<typename, typename, typename>
|
||||
friend class regex_iterator;
|
||||
|
||||
|
|
|
@ -28,6 +28,13 @@
|
|||
* Do not attempt to use it directly. @headername{regex}
|
||||
*/
|
||||
|
||||
// See below __regex_algo_impl to get what this is talking about. The default
|
||||
// value 1 indicated a conservative optimization without giving up worst case
|
||||
// performance.
|
||||
#ifndef _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT
|
||||
#define _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT 1
|
||||
#endif
|
||||
|
||||
namespace std _GLIBCXX_VISIBILITY(default)
|
||||
{
|
||||
_GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
|
@ -61,14 +68,41 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
for (auto& __it : __res)
|
||||
__it.matched = false;
|
||||
|
||||
auto __executor = __get_executor<_BiIter, _Alloc, _CharT, _TraitsT,
|
||||
__policy>(__s, __e, __res, __re, __flags);
|
||||
|
||||
// This function decide which executor to use under given circumstances.
|
||||
// The _S_auto policy now is the following: if a NFA has no
|
||||
// back-references and has more than _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT
|
||||
// quantifiers (*, +, ?), the BFS executor will be used, other wise
|
||||
// DFS executor. This is because DFS executor has a exponential upper
|
||||
// bound, but better best-case performace. Meanwhile, BFS executor can
|
||||
// effectively prevent from exponential-long time matching (which must
|
||||
// contains many quantifiers), but it's slower in average.
|
||||
//
|
||||
// For simple regex, BFS executor could be 2 or more times slower than
|
||||
// DFS executor.
|
||||
//
|
||||
// Of course, BFS executor cannot handle back-references.
|
||||
bool __ret;
|
||||
if (__match_mode)
|
||||
__ret = __executor->_M_match();
|
||||
if (!__re._M_automaton->_M_has_backref
|
||||
&& (__policy == _RegexExecutorPolicy::_S_alternate
|
||||
|| __re._M_automaton->_M_quant_count
|
||||
> _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT))
|
||||
{
|
||||
_Executor<_BiIter, _Alloc, _TraitsT, false>
|
||||
__executor(__s, __e, __m, __re, __flags);
|
||||
if (__match_mode)
|
||||
__ret = __executor._M_match();
|
||||
else
|
||||
__ret = __executor._M_search();
|
||||
}
|
||||
else
|
||||
__ret = __executor->_M_search();
|
||||
{
|
||||
_Executor<_BiIter, _Alloc, _TraitsT, true>
|
||||
__executor(__s, __e, __m, __re, __flags);
|
||||
if (__match_mode)
|
||||
__ret = __executor._M_match();
|
||||
else
|
||||
__ret = __executor._M_search();
|
||||
}
|
||||
if (__ret)
|
||||
{
|
||||
for (auto __it : __res)
|
||||
|
|
|
@ -30,10 +30,6 @@
|
|||
|
||||
// FIXME convert comments to doxygen format.
|
||||
|
||||
// TODO Put _DFSExecutor and _BFSExecutor into one class. They are becoming
|
||||
// much more similar. Also, make grouping seperated. The
|
||||
// regex_constants::nosubs enables much more simpler execution.
|
||||
|
||||
namespace std _GLIBCXX_VISIBILITY(default)
|
||||
{
|
||||
_GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
|
@ -56,15 +52,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
* @{
|
||||
*/
|
||||
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT>
|
||||
template<typename _BiIter, typename _Alloc, typename _TraitsT,
|
||||
bool __dfs_mode>
|
||||
class _Executor
|
||||
{
|
||||
public:
|
||||
typedef basic_regex<_CharT, _TraitsT> _RegexT;
|
||||
typedef std::vector<sub_match<_BiIter>, _Alloc> _ResultsVec;
|
||||
typedef regex_constants::match_flag_type _FlagT;
|
||||
typedef typename _TraitsT::char_class_type _ClassT;
|
||||
typedef typename iterator_traits<_BiIter>::value_type _CharT;
|
||||
typedef basic_regex<_CharT, _TraitsT> _RegexT;
|
||||
typedef std::vector<sub_match<_BiIter>, _Alloc> _ResultsVec;
|
||||
typedef regex_constants::match_flag_type _FlagT;
|
||||
typedef typename _TraitsT::char_class_type _ClassT;
|
||||
typedef _NFA<_CharT, _TraitsT> _NFAT;
|
||||
|
||||
public:
|
||||
_Executor(_BiIter __begin,
|
||||
|
@ -75,39 +73,47 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
: _M_begin(__begin),
|
||||
_M_end(__end),
|
||||
_M_re(__re),
|
||||
_M_nfa(*__re._M_automaton),
|
||||
_M_results(__results),
|
||||
_M_match_queue(__dfs_mode ? nullptr
|
||||
: new queue<pair<_StateIdT, _ResultsVec>>()),
|
||||
_M_visited(__dfs_mode ? nullptr : new vector<bool>(_M_nfa.size())),
|
||||
_M_flags((__flags & regex_constants::match_prev_avail)
|
||||
? (__flags
|
||||
& ~regex_constants::match_not_bol
|
||||
& ~regex_constants::match_not_bow)
|
||||
: __flags)
|
||||
{ }
|
||||
|
||||
virtual
|
||||
~_Executor()
|
||||
: __flags),
|
||||
_M_start_state(_M_nfa._M_start())
|
||||
{ }
|
||||
|
||||
// Set matched when string exactly match the pattern.
|
||||
bool
|
||||
_M_match()
|
||||
{
|
||||
_M_match_mode = true;
|
||||
_M_init(_M_begin);
|
||||
return _M_main();
|
||||
_M_current = _M_begin;
|
||||
return _M_main<true>();
|
||||
}
|
||||
|
||||
// Set matched when some prefix of the string matches the pattern.
|
||||
bool
|
||||
_M_search_from_first()
|
||||
{
|
||||
_M_match_mode = false;
|
||||
_M_init(_M_begin);
|
||||
return _M_main();
|
||||
_M_current = _M_begin;
|
||||
return _M_main<false>();
|
||||
}
|
||||
|
||||
bool
|
||||
_M_search();
|
||||
|
||||
private:
|
||||
template<bool __match_mode>
|
||||
void
|
||||
_M_dfs(_StateIdT __start);
|
||||
|
||||
template<bool __match_mode>
|
||||
bool
|
||||
_M_main();
|
||||
|
||||
bool
|
||||
_M_is_word(_CharT __ch) const
|
||||
{
|
||||
|
@ -134,307 +140,27 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
bool
|
||||
_M_word_boundry(_State<_CharT, _TraitsT> __state) const;
|
||||
|
||||
virtual std::unique_ptr<_Executor>
|
||||
_M_clone() const = 0;
|
||||
|
||||
// Return whether now match the given sub-NFA.
|
||||
bool
|
||||
_M_lookahead(_State<_CharT, _TraitsT> __state) const
|
||||
{
|
||||
auto __sub = this->_M_clone();
|
||||
__sub->_M_set_start(__state._M_alt);
|
||||
return __sub->_M_search_from_first();
|
||||
}
|
||||
|
||||
void
|
||||
_M_set_results(_ResultsVec& __cur_results);
|
||||
_M_lookahead(_State<_CharT, _TraitsT> __state);
|
||||
|
||||
public:
|
||||
virtual void
|
||||
_M_init(_BiIter __cur) = 0;
|
||||
|
||||
virtual void
|
||||
_M_set_start(_StateIdT __start) = 0;
|
||||
|
||||
virtual bool
|
||||
_M_main() = 0;
|
||||
|
||||
_BiIter _M_current;
|
||||
const _BiIter _M_begin;
|
||||
const _BiIter _M_end;
|
||||
const _RegexT& _M_re;
|
||||
_ResultsVec& _M_results;
|
||||
_FlagT _M_flags;
|
||||
bool _M_match_mode;
|
||||
};
|
||||
|
||||
// A _DFSExecutor perform a DFS on given NFA and input string. At the very
|
||||
// beginning the executor stands in the start state, then it try every
|
||||
// possible state transition in current state recursively. Some state
|
||||
// transitions consume input string, say, a single-char-matcher or a
|
||||
// back-reference matcher; some not, like assertion or other anchor nodes.
|
||||
// When the input is exhausted and the current state is an accepting state,
|
||||
// the whole executor return true.
|
||||
//
|
||||
// TODO: This approach is exponentially slow for certain input.
|
||||
// Try to compile the NFA to a DFA.
|
||||
//
|
||||
// Time complexity: o(match_length), O(2^(_M_nfa->size()))
|
||||
// Space complexity: \theta(match_results.size() + match_length)
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT>
|
||||
class _DFSExecutor
|
||||
: public _Executor<_BiIter, _Alloc, _CharT, _TraitsT>
|
||||
{
|
||||
public:
|
||||
typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
|
||||
typedef _NFA<_CharT, _TraitsT> _NFAT;
|
||||
typedef typename _BaseT::_RegexT _RegexT;
|
||||
typedef typename _BaseT::_ResultsVec _ResultsVec;
|
||||
typedef typename _BaseT::_FlagT _FlagT;
|
||||
|
||||
public:
|
||||
_DFSExecutor(_BiIter __begin,
|
||||
_BiIter __end,
|
||||
_ResultsVec& __results,
|
||||
const _RegexT& __re,
|
||||
_FlagT __flags)
|
||||
: _BaseT(__begin, __end, __results, __re, __flags),
|
||||
_M_nfa(__re._M_automaton), _M_start_state(_M_nfa->_M_start())
|
||||
{ }
|
||||
|
||||
private:
|
||||
void
|
||||
_M_init(_BiIter __cur)
|
||||
{
|
||||
_M_cur_results.resize(_M_nfa->_M_sub_count() + 2);
|
||||
this->_M_current = __cur;
|
||||
}
|
||||
|
||||
void
|
||||
_M_set_start(_StateIdT __start)
|
||||
{ _M_start_state = __start; }
|
||||
|
||||
bool
|
||||
_M_main()
|
||||
{ return _M_dfs(this->_M_start_state); }
|
||||
|
||||
bool
|
||||
_M_dfs(_StateIdT __start);
|
||||
|
||||
std::unique_ptr<_BaseT>
|
||||
_M_clone() const
|
||||
{
|
||||
return std::unique_ptr<_BaseT>(new _DFSExecutor(this->_M_current,
|
||||
this->_M_end,
|
||||
this->_M_results,
|
||||
this->_M_re,
|
||||
this->_M_flags));
|
||||
}
|
||||
|
||||
_ResultsVec _M_cur_results;
|
||||
_BiIter _M_current;
|
||||
const _BiIter _M_begin;
|
||||
const _BiIter _M_end;
|
||||
const _RegexT& _M_re;
|
||||
const _NFAT& _M_nfa;
|
||||
_ResultsVec& _M_results;
|
||||
// Used in BFS, saving states that need to be considered for the next
|
||||
// character.
|
||||
std::unique_ptr<queue<pair<_StateIdT, _ResultsVec>>> _M_match_queue;
|
||||
// Used in BFS, indicating that which state is already visited.
|
||||
std::unique_ptr<vector<bool>> _M_visited;
|
||||
_FlagT _M_flags;
|
||||
// To record current solution.
|
||||
std::shared_ptr<_NFAT> _M_nfa;
|
||||
_ResultsVec _M_cur_results;
|
||||
_StateIdT _M_start_state;
|
||||
};
|
||||
|
||||
// Like the DFS approach, it try every possible state transition; Unlike DFS,
|
||||
// it uses a queue instead of a stack to store matching states. It's a BFS
|
||||
// approach.
|
||||
//
|
||||
// Russ Cox's article(http://swtch.com/~rsc/regexp/regexp1.html) explained
|
||||
// this algorithm clearly.
|
||||
//
|
||||
// Every entry of _M_covered saves the solution(grouping status) for every
|
||||
// matching head. When states transit, solutions will be compared and
|
||||
// deduplicated(based on which greedy mode we have).
|
||||
//
|
||||
// Time complexity: o(match_length * (quantifier_number
|
||||
// + match_results.size())
|
||||
// O(match_length * _M_nfa->size()
|
||||
// * (quantifier_number + match_results.size())
|
||||
// Space complexity: o(quantifier_number + match_results.size())
|
||||
// O(_M_nfa->size()
|
||||
// * (quantifier_number + match_results.size())
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT>
|
||||
class _BFSExecutor
|
||||
: public _Executor<_BiIter, _Alloc, _CharT, _TraitsT>
|
||||
{
|
||||
public:
|
||||
typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
|
||||
typedef _NFA<_CharT, _TraitsT> _NFAT;
|
||||
typedef typename _BaseT::_RegexT _RegexT;
|
||||
typedef typename _BaseT::_ResultsVec _ResultsVec;
|
||||
typedef typename _BaseT::_FlagT _FlagT;
|
||||
// Here's a solution for greedy/ungreedy mode in BFS approach. We need to
|
||||
// carefully work out how to compare to conflict matching states.
|
||||
//
|
||||
// A matching state is a pair(where, when); `where` is a NFA node; `when`
|
||||
// is a _BiIter, indicating which char is the next to be matched. Two
|
||||
// matching states conflict if they have equivalent `where` and `when`.
|
||||
//
|
||||
// Now we need to drop one and keep another, because at most one of them
|
||||
// could be the final optimal solution. This behavior is affected by
|
||||
// greedy policy.
|
||||
//
|
||||
// The definition of `greedy`:
|
||||
// For the sequence of quantifiers in NFA sorted by their start positions,
|
||||
// now maintain a vector in every matching state, with length equal to
|
||||
// quantifier seq, recording repeating times of every quantifier. Now to
|
||||
// compare two matching states, we just lexically compare these two
|
||||
// vectors. To win the compare(to survive), one matching state needs to
|
||||
// make its greedy quantifier count larger, and ungreedy quantifiers
|
||||
// count smaller.
|
||||
//
|
||||
// In the implementation, we recorded negtive counts for greedy
|
||||
// quantifiers and positive counts of ungreedy ones. Now the implicit
|
||||
// operator<() for lexicographical_compare will emit the answer.
|
||||
//
|
||||
// When two vectors equal, it means the `where`, `when` and quantifier
|
||||
// counts are identical, and indicates the same solution; so
|
||||
// _ResultsEntry::operator<() just return false.
|
||||
struct _ResultsEntry
|
||||
: private _ResultsVec
|
||||
{
|
||||
public:
|
||||
_ResultsEntry(size_t __res_sz, size_t __sz)
|
||||
: _ResultsVec(__res_sz), _M_quant_keys(__sz)
|
||||
{ }
|
||||
|
||||
void
|
||||
resize(size_t __n)
|
||||
{ _ResultsVec::resize(__n); }
|
||||
|
||||
size_t
|
||||
size()
|
||||
{ return _ResultsVec::size(); }
|
||||
|
||||
sub_match<_BiIter>&
|
||||
operator[](size_t __idx)
|
||||
{ return _ResultsVec::operator[](__idx); }
|
||||
|
||||
bool
|
||||
operator<(const _ResultsEntry& __rhs) const
|
||||
{
|
||||
_GLIBCXX_DEBUG_ASSERT(_M_quant_keys.size()
|
||||
== __rhs._M_quant_keys.size());
|
||||
return lexicographical_compare(_M_quant_keys.begin(),
|
||||
_M_quant_keys.end(),
|
||||
__rhs._M_quant_keys.begin(),
|
||||
__rhs._M_quant_keys.end());
|
||||
}
|
||||
|
||||
void
|
||||
_M_inc(size_t __idx, bool __neg)
|
||||
{ _M_quant_keys[__idx] += __neg ? 1 : -1; }
|
||||
|
||||
_ResultsVec&
|
||||
_M_get()
|
||||
{ return *this; }
|
||||
|
||||
public:
|
||||
std::vector<int> _M_quant_keys;
|
||||
};
|
||||
typedef std::unique_ptr<_ResultsEntry> _ResultsPtr;
|
||||
|
||||
class _TodoList
|
||||
{
|
||||
public:
|
||||
explicit
|
||||
_TodoList(size_t __sz)
|
||||
: _M_states(), _M_exists(__sz, false)
|
||||
{ }
|
||||
|
||||
void _M_push(_StateIdT __u)
|
||||
{
|
||||
_GLIBCXX_DEBUG_ASSERT(__u < _M_exists.size());
|
||||
if (!_M_exists[__u])
|
||||
{
|
||||
_M_exists[__u] = true;
|
||||
_M_states.push_back(__u);
|
||||
}
|
||||
}
|
||||
|
||||
_StateIdT _M_pop()
|
||||
{
|
||||
auto __ret = _M_states.back();
|
||||
_M_states.pop_back();
|
||||
_M_exists[__ret] = false;
|
||||
return __ret;
|
||||
}
|
||||
|
||||
bool _M_empty() const
|
||||
{ return _M_states.empty(); }
|
||||
|
||||
void _M_clear()
|
||||
{
|
||||
_M_states.clear();
|
||||
_M_exists.assign(_M_exists.size(), false);
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<_StateIdT> _M_states;
|
||||
std::vector<bool> _M_exists;
|
||||
};
|
||||
|
||||
public:
|
||||
_BFSExecutor(_BiIter __begin,
|
||||
_BiIter __end,
|
||||
_ResultsVec& __results,
|
||||
const _RegexT& __re,
|
||||
_FlagT __flags)
|
||||
: _BaseT(__begin, __end, __results, __re, __flags),
|
||||
_M_nfa(__re._M_automaton), _M_match_stack(_M_nfa->size()),
|
||||
_M_stack(_M_nfa->size()), _M_start_state(_M_nfa->_M_start())
|
||||
{ }
|
||||
|
||||
private:
|
||||
void
|
||||
_M_init(_BiIter __cur)
|
||||
{
|
||||
this->_M_current = __cur;
|
||||
_M_covered.clear();
|
||||
_ResultsVec& __res(this->_M_results);
|
||||
_M_covered[this->_M_start_state] =
|
||||
_ResultsPtr(new _ResultsEntry(__res.size(),
|
||||
_M_nfa->_M_quant_count));
|
||||
_M_stack._M_push(this->_M_start_state);
|
||||
}
|
||||
|
||||
void
|
||||
_M_set_start(_StateIdT __start)
|
||||
{ _M_start_state = __start; }
|
||||
|
||||
bool
|
||||
_M_main();
|
||||
|
||||
void
|
||||
_M_e_closure();
|
||||
|
||||
void
|
||||
_M_move();
|
||||
|
||||
bool
|
||||
_M_includes_some();
|
||||
|
||||
std::unique_ptr<_BaseT>
|
||||
_M_clone() const
|
||||
{
|
||||
return std::unique_ptr<_BaseT>(new _BFSExecutor(this->_M_current,
|
||||
this->_M_end,
|
||||
this->_M_results,
|
||||
this->_M_re,
|
||||
this->_M_flags));
|
||||
}
|
||||
|
||||
std::shared_ptr<_NFAT> _M_nfa;
|
||||
std::map<_StateIdT, _ResultsPtr> _M_covered;
|
||||
_TodoList _M_match_stack;
|
||||
_TodoList _M_stack;
|
||||
_StateIdT _M_start_state;
|
||||
// To record global optimal solution.
|
||||
_ResultsPtr _M_cur_results;
|
||||
_StateIdT _M_start_state;
|
||||
// Do we have a solution so far?
|
||||
bool _M_has_sol;
|
||||
};
|
||||
|
||||
//@} regex-detail
|
||||
|
|
|
@ -28,22 +28,15 @@
|
|||
* Do not attempt to use it directly. @headername{regex}
|
||||
*/
|
||||
|
||||
// See below __get_executor to get what this is talking about. The default
|
||||
// value 1 indicated a conservative optimization without giving up worst case
|
||||
// performance.
|
||||
#ifndef _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT
|
||||
#define _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT 1
|
||||
#endif
|
||||
|
||||
namespace std _GLIBCXX_VISIBILITY(default)
|
||||
{
|
||||
namespace __detail
|
||||
{
|
||||
_GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT>
|
||||
bool _Executor<_BiIter, _Alloc, _CharT, _TraitsT>::
|
||||
template<typename _BiIter, typename _Alloc, typename _TraitsT,
|
||||
bool __dfs_mode>
|
||||
bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
|
||||
_M_search()
|
||||
{
|
||||
if (_M_flags & regex_constants::match_continuous)
|
||||
|
@ -51,9 +44,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
auto __cur = _M_begin;
|
||||
do
|
||||
{
|
||||
_M_match_mode = false;
|
||||
_M_init(__cur);
|
||||
if (_M_main())
|
||||
_M_current = __cur;
|
||||
if (_M_main<false>())
|
||||
return true;
|
||||
}
|
||||
// Continue when __cur == _M_end
|
||||
|
@ -61,24 +53,141 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
return false;
|
||||
}
|
||||
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT>
|
||||
bool _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
|
||||
template<typename _BiIter, typename _Alloc, typename _TraitsT,
|
||||
bool __dfs_mode>
|
||||
template<bool __match_mode>
|
||||
bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
|
||||
_M_main()
|
||||
{
|
||||
if (__dfs_mode)
|
||||
{
|
||||
_M_has_sol = false;
|
||||
_M_cur_results = _M_results;
|
||||
_M_dfs<__match_mode>(_M_start_state);
|
||||
return _M_has_sol;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Like the DFS approach, it try every possible state transition;
|
||||
// Unlike DFS, it uses a queue instead of a stack to store matching
|
||||
// states. It's a BFS approach.
|
||||
//
|
||||
// Russ Cox's article(http://swtch.com/~rsc/regexp/regexp1.html)
|
||||
// explained this algorithm clearly.
|
||||
//
|
||||
// Time complexity: o(match_length * match_results.size())
|
||||
// O(match_length * _M_nfa.size()
|
||||
// * match_results.size())
|
||||
// Space complexity: o(_M_nfa.size() + match_results.size())
|
||||
// O(_M_nfa.size() * match_results.size())
|
||||
_M_match_queue->push(make_pair(_M_start_state, _M_results));
|
||||
bool __ret = false;
|
||||
while (1)
|
||||
{
|
||||
_M_has_sol = false;
|
||||
if (_M_match_queue->empty())
|
||||
break;
|
||||
_M_visited->assign(_M_visited->size(), false);
|
||||
auto _M_old_queue = std::move(*_M_match_queue);
|
||||
while (!_M_old_queue.empty())
|
||||
{
|
||||
auto __task = _M_old_queue.front();
|
||||
_M_old_queue.pop();
|
||||
_M_cur_results = __task.second;
|
||||
_M_dfs<__match_mode>(__task.first);
|
||||
}
|
||||
if (!__match_mode)
|
||||
__ret |= _M_has_sol;
|
||||
if (_M_current == _M_end)
|
||||
break;
|
||||
++_M_current;
|
||||
}
|
||||
if (__match_mode)
|
||||
__ret = _M_has_sol;
|
||||
return __ret;
|
||||
}
|
||||
}
|
||||
|
||||
// Return whether now match the given sub-NFA.
|
||||
template<typename _BiIter, typename _Alloc, typename _TraitsT,
|
||||
bool __dfs_mode>
|
||||
bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
|
||||
_M_lookahead(_State<_Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
|
||||
_CharT, _TraitsT> __state)
|
||||
{
|
||||
_ResultsVec __what(_M_cur_results.size());
|
||||
auto __sub = std::unique_ptr<_Executor>(new _Executor(_M_current,
|
||||
_M_end,
|
||||
__what,
|
||||
_M_re,
|
||||
_M_flags));
|
||||
__sub->_M_start_state = __state._M_alt;
|
||||
if (__sub->_M_search_from_first())
|
||||
{
|
||||
for (size_t __i = 0; __i < __what.size(); __i++)
|
||||
if (__what[__i].matched)
|
||||
_M_cur_results[__i] = __what[__i];
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// A _DFSExecutor perform a DFS on given NFA and input string. At the very
|
||||
// beginning the executor stands in the start state, then it try every
|
||||
// possible state transition in current state recursively. Some state
|
||||
// transitions consume input string, say, a single-char-matcher or a
|
||||
// back-reference matcher; some not, like assertion or other anchor nodes.
|
||||
// When the input is exhausted and the current state is an accepting state,
|
||||
// the whole executor return true.
|
||||
//
|
||||
// TODO: This approach is exponentially slow for certain input.
|
||||
// Try to compile the NFA to a DFA.
|
||||
//
|
||||
// Time complexity: o(match_length), O(2^(_M_nfa.size()))
|
||||
// Space complexity: \theta(match_results.size() + match_length)
|
||||
//
|
||||
template<typename _BiIter, typename _Alloc, typename _TraitsT,
|
||||
bool __dfs_mode>
|
||||
template<bool __match_mode>
|
||||
void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
|
||||
_M_dfs(_StateIdT __i)
|
||||
{
|
||||
auto& __current = this->_M_current;
|
||||
const auto& __state = (*_M_nfa)[__i];
|
||||
bool __ret = false;
|
||||
if (!__dfs_mode)
|
||||
{
|
||||
if ((*_M_visited)[__i])
|
||||
return;
|
||||
(*_M_visited)[__i] = true;
|
||||
}
|
||||
|
||||
const auto& __state = _M_nfa[__i];
|
||||
switch (__state._M_opcode)
|
||||
{
|
||||
case _S_opcode_alternative:
|
||||
// Greedy or not, this is a question ;)
|
||||
if (!__state._M_neg)
|
||||
__ret = _M_dfs(__state._M_alt)
|
||||
|| _M_dfs(__state._M_next);
|
||||
{
|
||||
_M_dfs<__match_mode>(__state._M_alt);
|
||||
if (!__dfs_mode || !_M_has_sol)
|
||||
_M_dfs<__match_mode>(__state._M_next);
|
||||
}
|
||||
else
|
||||
__ret = _M_dfs(__state._M_next)
|
||||
|| _M_dfs(__state._M_alt);
|
||||
{
|
||||
if (__dfs_mode)
|
||||
{
|
||||
_M_dfs<__match_mode>(__state._M_next);
|
||||
if (!_M_has_sol)
|
||||
_M_dfs<__match_mode>(__state._M_alt);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!_M_has_sol)
|
||||
{
|
||||
_M_dfs<__match_mode>(__state._M_next);
|
||||
if (!_M_has_sol)
|
||||
_M_dfs<__match_mode>(__state._M_alt);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case _S_opcode_subexpr_begin:
|
||||
// Here's the critical part: if there's nothing changed since last
|
||||
|
@ -88,273 +197,128 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
// Every change on _M_cur_results will be roll back after the
|
||||
// recursion step finished.
|
||||
if (!_M_cur_results[__state._M_subexpr].matched
|
||||
|| _M_cur_results[__state._M_subexpr].first != __current)
|
||||
|| _M_cur_results[__state._M_subexpr].first != _M_current)
|
||||
{
|
||||
auto __back = _M_cur_results[__state._M_subexpr].first;
|
||||
_M_cur_results[__state._M_subexpr].first = __current;
|
||||
__ret = _M_dfs(__state._M_next);
|
||||
_M_cur_results[__state._M_subexpr].first = __back;
|
||||
auto& __res = _M_cur_results[__state._M_subexpr];
|
||||
auto __back = __res.first;
|
||||
__res.first = _M_current;
|
||||
_M_dfs<__match_mode>(__state._M_next);
|
||||
__res.first = __back;
|
||||
}
|
||||
break;
|
||||
case _S_opcode_subexpr_end:
|
||||
if (_M_cur_results[__state._M_subexpr].second != __current
|
||||
if (_M_cur_results[__state._M_subexpr].second != _M_current
|
||||
|| _M_cur_results[__state._M_subexpr].matched != true)
|
||||
{
|
||||
auto __back = _M_cur_results[__state._M_subexpr];
|
||||
_M_cur_results[__state._M_subexpr].second = __current;
|
||||
_M_cur_results[__state._M_subexpr].matched = true;
|
||||
__ret = _M_dfs(__state._M_next);
|
||||
_M_cur_results[__state._M_subexpr] = __back;
|
||||
auto& __res = _M_cur_results[__state._M_subexpr];
|
||||
auto __back = __res;
|
||||
__res.second = _M_current;
|
||||
__res.matched = true;
|
||||
_M_dfs<__match_mode>(__state._M_next);
|
||||
__res = __back;
|
||||
}
|
||||
else
|
||||
__ret = _M_dfs(__state._M_next);
|
||||
_M_dfs<__match_mode>(__state._M_next);
|
||||
break;
|
||||
case _S_opcode_line_begin_assertion:
|
||||
if (this->_M_at_begin())
|
||||
__ret = _M_dfs(__state._M_next);
|
||||
if (_M_at_begin())
|
||||
_M_dfs<__match_mode>(__state._M_next);
|
||||
break;
|
||||
case _S_opcode_line_end_assertion:
|
||||
if (this->_M_at_end())
|
||||
__ret = _M_dfs(__state._M_next);
|
||||
if (_M_at_end())
|
||||
_M_dfs<__match_mode>(__state._M_next);
|
||||
break;
|
||||
case _S_opcode_word_boundry:
|
||||
if (this->_M_word_boundry(__state) == !__state._M_neg)
|
||||
__ret = _M_dfs(__state._M_next);
|
||||
if (_M_word_boundry(__state) == !__state._M_neg)
|
||||
_M_dfs<__match_mode>(__state._M_next);
|
||||
break;
|
||||
// Here __state._M_alt offers a single start node for a sub-NFA.
|
||||
// We recursivly invoke our algorithm to match the sub-NFA.
|
||||
case _S_opcode_subexpr_lookahead:
|
||||
if (this->_M_lookahead(__state) == !__state._M_neg)
|
||||
__ret = _M_dfs(__state._M_next);
|
||||
if (_M_lookahead(__state) == !__state._M_neg)
|
||||
_M_dfs<__match_mode>(__state._M_next);
|
||||
break;
|
||||
case _S_opcode_match:
|
||||
if (__current != this->_M_end && __state._M_matches(*__current))
|
||||
if (__dfs_mode)
|
||||
{
|
||||
++__current;
|
||||
__ret = _M_dfs(__state._M_next);
|
||||
--__current;
|
||||
if (_M_current != _M_end && __state._M_matches(*_M_current))
|
||||
{
|
||||
++_M_current;
|
||||
_M_dfs<__match_mode>(__state._M_next);
|
||||
--_M_current;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (__state._M_matches(*_M_current))
|
||||
_M_match_queue->push(make_pair(__state._M_next, _M_cur_results));
|
||||
break;
|
||||
// First fetch the matched result from _M_cur_results as __submatch;
|
||||
// then compare it with
|
||||
// (__current, __current + (__submatch.second - __submatch.first))
|
||||
// (_M_current, _M_current + (__submatch.second - __submatch.first))
|
||||
// If matched, keep going; else just return to try another state.
|
||||
case _S_opcode_backref:
|
||||
{
|
||||
_GLIBCXX_DEBUG_ASSERT(__dfs_mode);
|
||||
auto& __submatch = _M_cur_results[__state._M_backref_index];
|
||||
if (!__submatch.matched)
|
||||
break;
|
||||
auto __last = __current;
|
||||
auto __last = _M_current;
|
||||
for (auto __tmp = __submatch.first;
|
||||
__last != this->_M_end && __tmp != __submatch.second;
|
||||
__last != _M_end && __tmp != __submatch.second;
|
||||
++__tmp)
|
||||
++__last;
|
||||
if (this->_M_re._M_traits.transform(__submatch.first,
|
||||
if (_M_re._M_traits.transform(__submatch.first,
|
||||
__submatch.second)
|
||||
== this->_M_re._M_traits.transform(__current, __last))
|
||||
== _M_re._M_traits.transform(_M_current, __last))
|
||||
{
|
||||
if (__last != __current)
|
||||
if (__last != _M_current)
|
||||
{
|
||||
auto __backup = __current;
|
||||
__current = __last;
|
||||
__ret = _M_dfs(__state._M_next);
|
||||
__current = __backup;
|
||||
auto __backup = _M_current;
|
||||
_M_current = __last;
|
||||
_M_dfs<__match_mode>(__state._M_next);
|
||||
_M_current = __backup;
|
||||
}
|
||||
else
|
||||
__ret = _M_dfs(__state._M_next);
|
||||
_M_dfs<__match_mode>(__state._M_next);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case _S_opcode_accept:
|
||||
if (this->_M_match_mode)
|
||||
__ret = __current == this->_M_end;
|
||||
if (__dfs_mode)
|
||||
{
|
||||
_GLIBCXX_DEBUG_ASSERT(!_M_has_sol);
|
||||
if (__match_mode)
|
||||
_M_has_sol = _M_current == _M_end;
|
||||
else
|
||||
_M_has_sol = true;
|
||||
if (_M_current == _M_begin
|
||||
&& (_M_flags & regex_constants::match_not_null))
|
||||
_M_has_sol = false;
|
||||
if (_M_has_sol)
|
||||
_M_results = _M_cur_results;
|
||||
}
|
||||
else
|
||||
__ret = true;
|
||||
if (__current == this->_M_begin
|
||||
&& (this->_M_flags & regex_constants::match_not_null))
|
||||
__ret = false;
|
||||
if (__ret)
|
||||
this->_M_set_results(_M_cur_results);
|
||||
{
|
||||
if (_M_current == _M_begin
|
||||
&& (_M_flags & regex_constants::match_not_null))
|
||||
break;
|
||||
if (!__match_mode || _M_current == _M_end)
|
||||
if (!_M_has_sol)
|
||||
{
|
||||
_M_has_sol = true;
|
||||
_M_results = _M_cur_results;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
_GLIBCXX_DEBUG_ASSERT(false);
|
||||
}
|
||||
return __ret;
|
||||
}
|
||||
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT>
|
||||
bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
|
||||
_M_main()
|
||||
{
|
||||
_M_e_closure();
|
||||
bool __ret = false;
|
||||
if (!this->_M_match_mode
|
||||
&& !(this->_M_flags & regex_constants::match_not_null))
|
||||
__ret = _M_includes_some() || __ret;
|
||||
while (this->_M_current != this->_M_end)
|
||||
{
|
||||
_M_move();
|
||||
++this->_M_current;
|
||||
if (_M_stack._M_empty())
|
||||
break;
|
||||
_M_e_closure();
|
||||
if (!this->_M_match_mode)
|
||||
// To keep regex_search greedy, no "return true" here.
|
||||
__ret = _M_includes_some() || __ret;
|
||||
}
|
||||
if (this->_M_match_mode)
|
||||
__ret = _M_includes_some();
|
||||
if (__ret)
|
||||
this->_M_set_results(_M_cur_results->_M_get());
|
||||
_M_match_stack._M_clear();
|
||||
_GLIBCXX_DEBUG_ASSERT(_M_stack._M_empty());
|
||||
return __ret;
|
||||
}
|
||||
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT>
|
||||
void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
|
||||
_M_e_closure()
|
||||
{
|
||||
auto& __current = this->_M_current;
|
||||
|
||||
while (!_M_stack._M_empty())
|
||||
{
|
||||
auto __u = _M_stack._M_pop();
|
||||
_GLIBCXX_DEBUG_ASSERT(_M_covered.count(__u));
|
||||
const auto& __state = (*_M_nfa)[__u];
|
||||
|
||||
// Can be implemented using method, but there will be too many
|
||||
// arguments. I would use macro function before C++11, but lambda is
|
||||
// a better choice, since hopefully compiler can inline it.
|
||||
auto __add_visited_state = [=](_StateIdT __v)
|
||||
{
|
||||
if (_M_covered.count(__v) == 0)
|
||||
{
|
||||
_M_covered[__v] =
|
||||
_ResultsPtr(new _ResultsEntry(*_M_covered[__u]));
|
||||
_M_stack._M_push(__v);
|
||||
return;
|
||||
}
|
||||
auto& __cu = _M_covered[__u];
|
||||
auto& __cv = _M_covered[__v];
|
||||
if (*__cu < *__cv)
|
||||
{
|
||||
__cv = _ResultsPtr(new _ResultsEntry(*__cu));
|
||||
// if a state is updated, it's outgoing neighbors should be
|
||||
// reconsidered too. Push them to the queue.
|
||||
_M_stack._M_push(__v);
|
||||
}
|
||||
};
|
||||
|
||||
// Identical to DFS's switch part.
|
||||
switch (__state._M_opcode)
|
||||
{
|
||||
// Needs to maintain quantifier count vector here. A quantifier
|
||||
// must be concerned with a alt node.
|
||||
case _S_opcode_alternative:
|
||||
{
|
||||
__add_visited_state(__state._M_next);
|
||||
auto& __cu = *_M_covered[__u];
|
||||
auto __back = __cu._M_quant_keys[__state._M_quant_index];
|
||||
__cu._M_inc(__state._M_quant_index, __state._M_neg);
|
||||
__add_visited_state(__state._M_alt);
|
||||
__cu._M_quant_keys[__state._M_quant_index] = __back;
|
||||
}
|
||||
break;
|
||||
case _S_opcode_subexpr_begin:
|
||||
{
|
||||
auto& __sub = (*_M_covered[__u])[__state._M_subexpr];
|
||||
if (!__sub.matched || __sub.first != __current)
|
||||
{
|
||||
auto __back = __sub.first;
|
||||
__sub.first = __current;
|
||||
__add_visited_state(__state._M_next);
|
||||
__sub.first = __back;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case _S_opcode_subexpr_end:
|
||||
{
|
||||
auto& __cu = *_M_covered[__u];
|
||||
auto __back = __cu[__state._M_subexpr];
|
||||
__cu[__state._M_subexpr].second = __current;
|
||||
__cu[__state._M_subexpr].matched = true;
|
||||
__add_visited_state(__state._M_next);
|
||||
__cu[__state._M_subexpr] = __back;
|
||||
}
|
||||
break;
|
||||
case _S_opcode_line_begin_assertion:
|
||||
if (this->_M_at_begin())
|
||||
__add_visited_state(__state._M_next);
|
||||
break;
|
||||
case _S_opcode_line_end_assertion:
|
||||
if (this->_M_at_end())
|
||||
__add_visited_state(__state._M_next);
|
||||
break;
|
||||
case _S_opcode_word_boundry:
|
||||
if (this->_M_word_boundry(__state) == !__state._M_neg)
|
||||
__add_visited_state(__state._M_next);
|
||||
break;
|
||||
case _S_opcode_subexpr_lookahead:
|
||||
if (this->_M_lookahead(__state) == !__state._M_neg)
|
||||
__add_visited_state(__state._M_next);
|
||||
break;
|
||||
case _S_opcode_match:
|
||||
_M_match_stack._M_push(__u);
|
||||
break;
|
||||
case _S_opcode_accept:
|
||||
break;
|
||||
default:
|
||||
_GLIBCXX_DEBUG_ASSERT(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT>
|
||||
void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
|
||||
_M_move()
|
||||
{
|
||||
decltype(_M_covered) __next;
|
||||
while (!_M_match_stack._M_empty())
|
||||
{
|
||||
auto __u = _M_match_stack._M_pop();
|
||||
const auto& __state = (*_M_nfa)[__u];
|
||||
auto& __cu = _M_covered[__u];
|
||||
if (__state._M_matches(*this->_M_current)
|
||||
&& (__next.count(__state._M_next) == 0
|
||||
|| *__cu < *__next[__state._M_next]))
|
||||
{
|
||||
__next[__state._M_next] = std::move(__cu);
|
||||
_M_stack._M_push(__state._M_next);
|
||||
}
|
||||
}
|
||||
_M_covered = move(__next);
|
||||
}
|
||||
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT>
|
||||
bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
|
||||
_M_includes_some()
|
||||
{
|
||||
bool __succ = false;
|
||||
for (auto __u : _M_nfa->_M_final_states())
|
||||
if (_M_covered.count(__u))
|
||||
{
|
||||
__succ = true;
|
||||
auto& __cu = _M_covered[__u];
|
||||
if (_M_cur_results == nullptr || *__cu < *_M_cur_results)
|
||||
_M_cur_results = _ResultsPtr(new _ResultsEntry(*__cu));
|
||||
}
|
||||
return __succ;
|
||||
}
|
||||
|
||||
// Return whether now is at some word boundry.
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT>
|
||||
bool _Executor<_BiIter, _Alloc, _CharT, _TraitsT>::
|
||||
template<typename _BiIter, typename _Alloc, typename _TraitsT,
|
||||
bool __dfs_mode>
|
||||
bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
|
||||
_M_word_boundry(_State<_CharT, _TraitsT> __state) const
|
||||
{
|
||||
// By definition.
|
||||
|
@ -376,54 +340,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
return __ans;
|
||||
}
|
||||
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT>
|
||||
void _Executor<_BiIter, _Alloc, _CharT, _TraitsT>::
|
||||
_M_set_results(_ResultsVec& __cur_results)
|
||||
{
|
||||
for (size_t __i = 0; __i < __cur_results.size(); ++__i)
|
||||
if (__cur_results[__i].matched)
|
||||
_M_results[__i] = __cur_results[__i];
|
||||
}
|
||||
|
||||
enum class _RegexExecutorPolicy : int
|
||||
{ _S_auto, _S_alternate };
|
||||
|
||||
// This function decide which executor to use under given circumstances.
|
||||
// The _S_auto policy now is the following: if a NFA has no back-references
|
||||
// and has more than _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT quantifiers
|
||||
// (*, +, ?), the _BFSExecutor will be used, other wise _DFSExecutor. This is
|
||||
// because _DFSExecutor has a exponential upper bound, but better best-case
|
||||
// performace. Meanwhile, _BFSExecutor can effectively prevent from
|
||||
// exponential-long time matching (which must contains many quantifiers), but
|
||||
// it's slower in average.
|
||||
//
|
||||
// For simple regex, _BFSExecutor could be 2 or more times slower than
|
||||
// _DFSExecutor.
|
||||
//
|
||||
// Of course, _BFSExecutor cannot handle back-references.
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT,
|
||||
_RegexExecutorPolicy __policy>
|
||||
std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>>
|
||||
__get_executor(_BiIter __b,
|
||||
_BiIter __e,
|
||||
std::vector<sub_match<_BiIter>, _Alloc>& __m,
|
||||
const basic_regex<_CharT, _TraitsT>& __re,
|
||||
regex_constants::match_flag_type __flags)
|
||||
{
|
||||
typedef std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>>
|
||||
_ExecutorPtr;
|
||||
typedef _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT> _DFSExecutorT;
|
||||
typedef _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT> _BFSExecutorT;
|
||||
if (!__re._M_automaton->_M_has_backref
|
||||
&& (__policy == _RegexExecutorPolicy::_S_alternate
|
||||
|| __re._M_automaton->_M_quant_count
|
||||
> _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT))
|
||||
return _ExecutorPtr(new _BFSExecutorT(__b, __e, __m, __re, __flags));
|
||||
return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, __re, __flags));
|
||||
}
|
||||
|
||||
_GLIBCXX_END_NAMESPACE_VERSION
|
||||
} // namespace __detail
|
||||
} // namespace
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
// { dg-options "-std=gnu++11" }
|
||||
|
||||
//
|
||||
// 2013-10-24 Tim Shen <timshen91@gmail.com>
|
||||
//
|
||||
// Copyright (C) 2013 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
//
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License along
|
||||
// with this library; see the file COPYING3. If not see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
// 28.11.2 regex_match
|
||||
// Tests ECMAScript ungreedy match.
|
||||
|
||||
#include <regex>
|
||||
#include <testsuite_hooks.h>
|
||||
#include <testsuite_regex.h>
|
||||
|
||||
using namespace __gnu_test;
|
||||
using namespace std;
|
||||
|
||||
void
|
||||
test01()
|
||||
{
|
||||
bool test __attribute__((unused)) = true;
|
||||
|
||||
regex re("(a*?)*?");
|
||||
cmatch m;
|
||||
VERIFY(regex_match("a", m, re));
|
||||
VERIFY(m.size() == 2);
|
||||
VERIFY(string(m[0].first, m[0].second) == "a");
|
||||
}
|
||||
|
||||
int
|
||||
main()
|
||||
{
|
||||
test01();
|
||||
return 0;
|
||||
}
|
|
@ -1,72 +0,0 @@
|
|||
// { dg-options "-std=gnu++11" }
|
||||
|
||||
//
|
||||
// 2013-07-29 Tim Shen <timshen91@gmail.com>
|
||||
//
|
||||
// Copyright (C) 2013 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
//
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License along
|
||||
// with this library; see the file COPYING3. If not see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
// 28.11.2 regex_match
|
||||
// Tests Extended automatic matcher dispatching against a std::string target.
|
||||
|
||||
#include <regex>
|
||||
#include <testsuite_hooks.h>
|
||||
|
||||
using namespace std;
|
||||
|
||||
template<typename _Bi_iter, typename _Alloc,
|
||||
typename _Ch_type, typename _Rx_traits>
|
||||
void
|
||||
fake_match(_Bi_iter __s,
|
||||
_Bi_iter __e,
|
||||
match_results<_Bi_iter, _Alloc>& __m,
|
||||
const basic_regex<_Ch_type, _Rx_traits>& __re,
|
||||
regex_constants::match_flag_type __flags
|
||||
= regex_constants::match_default)
|
||||
{
|
||||
using namespace __detail;
|
||||
auto& __res = (vector<sub_match<_Bi_iter>, _Alloc>&)(__m);
|
||||
VERIFY( (dynamic_cast
|
||||
<_DFSExecutor<_Bi_iter, _Alloc, _Ch_type, _Rx_traits>*>
|
||||
(&*__get_executor<_Bi_iter, _Alloc, _Ch_type, _Rx_traits,
|
||||
_RegexExecutorPolicy::_S_auto>(__s, __e, __res, __re, __flags))
|
||||
!= nullptr) );
|
||||
}
|
||||
|
||||
void
|
||||
test01()
|
||||
{
|
||||
bool test __attribute__((unused)) = true;
|
||||
|
||||
regex re("()(one(.*))abc\\1"); // backref cause DFS
|
||||
const string target("onetwoabc");
|
||||
smatch m;
|
||||
fake_match(target.begin(), target.end(), m, re);
|
||||
|
||||
regex_match(target, m, re);
|
||||
VERIFY( m[2].matched );
|
||||
VERIFY( m[3].matched );
|
||||
VERIFY( std::string(m[2].first, m[2].second) == "onetwo" );
|
||||
VERIFY( std::string(m[3].first, m[3].second) == "two" );
|
||||
}
|
||||
|
||||
int
|
||||
main()
|
||||
{
|
||||
test01();
|
||||
return 0;
|
||||
}
|
|
@ -54,9 +54,9 @@ test01()
|
|||
VERIFY(regex_search_debug("aaaa", m, regex("(a+)(a+)")));
|
||||
TEST(1, "aaa");
|
||||
TEST(2, "a");
|
||||
VERIFY(regex_search_debug("aaaa", m, regex("(a+?)(a+)")));
|
||||
TEST(1, "a");
|
||||
TEST(2, "aaa");
|
||||
VERIFY(regex_search_debug("aaaa", m, regex("(a+)(a+?)")));
|
||||
TEST(1, "aaa");
|
||||
TEST(2, "a");
|
||||
VERIFY(regex_search_debug("aaaa", m, regex("(a+?)(a+)")));
|
||||
TEST(1, "a");
|
||||
TEST(2, "aaa");
|
||||
|
|
|
@ -18,82 +18,12 @@
|
|||
// 2013-10-08 Tim Shen <timshen91@gmail.com>
|
||||
|
||||
#include <testsuite_performance.h>
|
||||
#include <regex>
|
||||
#include "split.h"
|
||||
|
||||
using namespace __gnu_test;
|
||||
using namespace std;
|
||||
|
||||
void split(string s)
|
||||
{
|
||||
regex re("\\s+");
|
||||
for (auto it = sregex_token_iterator(s.begin(), s.end(), re, -1);
|
||||
it != sregex_token_iterator();
|
||||
++it)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
string source = "\
|
||||
// Copyright (C) 2013 Free Software Foundation, Inc.\n\
|
||||
//\n\
|
||||
// This file is part of the GNU ISO C++ Library. This library is free\n\
|
||||
// software; you can redistribute it and/or modify it under the\n\
|
||||
// terms of the GNU General Public License as published by the\n\
|
||||
// Free Software Foundation; either version 3, or (at your option)\n\
|
||||
// any later version.\n\
|
||||
\n\
|
||||
// This library is distributed in the hope that it will be useful,\n\
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
|
||||
// GNU General Public License for more details.\n\
|
||||
\n\
|
||||
// You should have received a copy of the GNU General Public License along\n\
|
||||
// with this library; see the file COPYING3. If not see\n\
|
||||
// <http://www.gnu.org/licenses/>.\n\
|
||||
\n\
|
||||
// 2013-10-08 Tim Shen <timshen91@gmail.com>\n\
|
||||
\n\
|
||||
#include <testsuite_performance.h>\n\
|
||||
#include <regex>\n\
|
||||
\n\
|
||||
using namespace __gnu_test;\n\
|
||||
using namespace std;\n\
|
||||
\n\
|
||||
void split(string s)\n\
|
||||
{\n\
|
||||
regex re(\"\\s+\");\n\
|
||||
for (auto it = sregex_token_iterator(s.begin(), s.end(), re, -1);\n\
|
||||
it != sregex_token_iterator();\n\
|
||||
++it)\n\
|
||||
{\n\
|
||||
}\n\
|
||||
}\n\
|
||||
\n\
|
||||
int main()\n\
|
||||
{\n\
|
||||
string source = \"\";\n\
|
||||
time_counter time;\n\
|
||||
resource_counter resource;\n\
|
||||
\n\
|
||||
source = source + source;\n\
|
||||
source = source + source;\n\
|
||||
source = source + source;\n\
|
||||
source = source + source;\n\
|
||||
source = source + source;\n\
|
||||
source = source + source;\n\
|
||||
source = source + source;\n\
|
||||
source = source + source;\n\
|
||||
\n\
|
||||
start_counters(time, resource);\n\
|
||||
split(source);\n\
|
||||
stop_counters(time, resource);\n\
|
||||
report_performance(__FILE__, \"\", time, resource);\n\
|
||||
\n\
|
||||
return 0;\n\
|
||||
}\n";
|
||||
|
||||
time_counter time;
|
||||
resource_counter resource;
|
||||
|
||||
|
|
|
@ -0,0 +1,91 @@
|
|||
// Copyright (C) 2013 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License along
|
||||
// with this library; see the file COPYING3. If not see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
// 2013-10-26 Tim Shen <timshen91@gmail.com>
|
||||
|
||||
#include <regex>
|
||||
|
||||
using namespace std;
|
||||
|
||||
void split(string s)
|
||||
{
|
||||
regex re("\\s+");
|
||||
for (auto it = sregex_token_iterator(s.begin(), s.end(), re, -1);
|
||||
it != sregex_token_iterator();
|
||||
++it)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
string source = "\
|
||||
// Copyright (C) 2013 Free Software Foundation, Inc.\n\
|
||||
//\n\
|
||||
// This file is part of the GNU ISO C++ Library. This library is free\n\
|
||||
// software; you can redistribute it and/or modify it under the\n\
|
||||
// terms of the GNU General Public License as published by the\n\
|
||||
// Free Software Foundation; either version 3, or (at your option)\n\
|
||||
// any later version.\n\
|
||||
\n\
|
||||
// This library is distributed in the hope that it will be useful,\n\
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
|
||||
// GNU General Public License for more details.\n\
|
||||
\n\
|
||||
// You should have received a copy of the GNU General Public License along\n\
|
||||
// with this library; see the file COPYING3. If not see\n\
|
||||
// <http://www.gnu.org/licenses/>.\n\
|
||||
\n\
|
||||
// 2013-10-08 Tim Shen <timshen91@gmail.com>\n\
|
||||
\n\
|
||||
#include <testsuite_performance.h>\n\
|
||||
#include <regex>\n\
|
||||
\n\
|
||||
using namespace __gnu_test;\n\
|
||||
using namespace std;\n\
|
||||
\n\
|
||||
void split(string s)\n\
|
||||
{\n\
|
||||
regex re(\"\\s+\");\n\
|
||||
for (auto it = sregex_token_iterator(s.begin(), s.end(), re, -1);\n\
|
||||
it != sregex_token_iterator();\n\
|
||||
++it)\n\
|
||||
{\n\
|
||||
}\n\
|
||||
}\n\
|
||||
\n\
|
||||
int main()\n\
|
||||
{\n\
|
||||
string source = \"\";\n\
|
||||
time_counter time;\n\
|
||||
resource_counter resource;\n\
|
||||
\n\
|
||||
source = source + source;\n\
|
||||
source = source + source;\n\
|
||||
source = source + source;\n\
|
||||
source = source + source;\n\
|
||||
source = source + source;\n\
|
||||
source = source + source;\n\
|
||||
source = source + source;\n\
|
||||
source = source + source;\n\
|
||||
\n\
|
||||
start_counters(time, resource);\n\
|
||||
split(source);\n\
|
||||
stop_counters(time, resource);\n\
|
||||
report_performance(__FILE__, \"\", time, resource);\n\
|
||||
\n\
|
||||
return 0;\n\
|
||||
}\n";
|
|
@ -0,0 +1,46 @@
|
|||
// Copyright (C) 2013 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License along
|
||||
// with this library; see the file COPYING3. If not see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
// 2013-10-26 Tim Shen <timshen91@gmail.com>
|
||||
|
||||
#include <testsuite_performance.h>
|
||||
#define _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT 0
|
||||
#include "split.h"
|
||||
|
||||
using namespace __gnu_test;
|
||||
|
||||
int main()
|
||||
{
|
||||
time_counter time;
|
||||
resource_counter resource;
|
||||
|
||||
source = source + source;
|
||||
source = source + source;
|
||||
source = source + source;
|
||||
source = source + source;
|
||||
source = source + source;
|
||||
source = source + source;
|
||||
source = source + source;
|
||||
source = source + source;
|
||||
|
||||
start_counters(time, resource);
|
||||
split(source);
|
||||
stop_counters(time, resource);
|
||||
report_performance(__FILE__, "", time, resource);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -150,7 +150,8 @@ namespace __gnu_test
|
|||
auto __res2 = __regex_algo_impl<_Bi_iter, _Alloc, _Ch_type, _Rx_traits,
|
||||
_RegexExecutorPolicy::_S_alternate, true>
|
||||
(__s, __e, __mm, __re, __flags);
|
||||
if (__res1 == __res2 && __m == __mm)
|
||||
// __m is unspecified if return value is false.
|
||||
if (__res1 == __res2 && (!__res1 || __m == __mm))
|
||||
return __res1;
|
||||
throw(std::exception());
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue