Revert last commit.

From-SVN: r201360
This commit is contained in:
Tim Shen 2013-07-31 03:36:37 +00:00 committed by Tim Shen
parent a167b052df
commit 1151053225
10 changed files with 211 additions and 419 deletions

View File

@ -1,3 +1,7 @@
2013-07-31 Tim Shen <timshen91@gmail.com>
Revert last commit.
2013-07-31 Tim Shen <timshen91@gmail.com> 2013-07-31 Tim Shen <timshen91@gmail.com>
Thompson matcher refactored. Fix grouping problem. Thompson matcher refactored. Fix grouping problem.

View File

@ -2175,7 +2175,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
bool bool
regex_match(_Bi_iter __s, regex_match(_Bi_iter __s,
_Bi_iter __e, _Bi_iter __e,
match_results<_Bi_iter, _Alloc>& __m, match_results<_Bi_iter, _Alloc>& __m,
const basic_regex<_Ch_type, _Rx_traits>& __re, const basic_regex<_Ch_type, _Rx_traits>& __re,
regex_constants::match_flag_type __flags regex_constants::match_flag_type __flags
= regex_constants::match_default) = regex_constants::match_default)
@ -2184,7 +2184,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__detail::_Automaton::_SizeT __sz = __a->_M_sub_count(); __detail::_Automaton::_SizeT __sz = __a->_M_sub_count();
__detail::_SpecializedCursor<_Bi_iter> __cs(__s, __e); __detail::_SpecializedCursor<_Bi_iter> __cs(__s, __e);
__detail::_SpecializedResults<_Bi_iter, _Alloc> __r(__sz, __cs, __m); __detail::_SpecializedResults<_Bi_iter, _Alloc> __r(__sz, __cs, __m);
return __a->_M_get_matcher(__cs, __r, __a, __flags)->_M_match(); __detail::_Grep_matcher __matcher(__cs, __r, __a, __flags);
return __matcher._M_dfs_match();
} }
/** /**
@ -2335,8 +2336,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
for (auto __cur = __first; __cur != __last; ++__cur) // Any KMP-like algo? for (auto __cur = __first; __cur != __last; ++__cur) // Any KMP-like algo?
{ {
__detail::_SpecializedCursor<_Bi_iter> __curs(__cur, __last); __detail::_SpecializedCursor<_Bi_iter> __curs(__cur, __last);
auto __matcher = __a->_M_get_matcher(__curs, __r, __a, __flags); __detail::_Grep_matcher __matcher(__curs, __r, __a, __flags);
if (__matcher->_M_search_from_first()) if (__matcher._M_dfs_search_from_first())
{ {
__r._M_set_range(__m.size(), __r._M_set_range(__m.size(),
__detail::_SpecializedCursor<_Bi_iter> __detail::_SpecializedCursor<_Bi_iter>

View File

@ -936,8 +936,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
if (_M_match_token(_ScannerT::_S_token_backref)) if (_M_match_token(_ScannerT::_S_token_backref))
{ {
// __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value); // __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value);
_M_state_store._M_set_back_ref(true); return true;
//return true;
} }
if (_M_match_token(_ScannerT::_S_token_subexpr_begin)) if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
{ {

View File

@ -60,19 +60,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
const _SpecializedCursor<_FwdIterT>& __cursor, const _SpecializedCursor<_FwdIterT>& __cursor,
match_results<_FwdIterT, _Alloc>& __m); match_results<_FwdIterT, _Alloc>& __m);
~_SpecializedResults()
{
if (_M_managed)
delete &_M_results;
}
private:
_SpecializedResults(const _SpecializedResults& __rhs)
: _M_results(*new match_results<_FwdIterT, _Alloc>(__rhs._M_results)),
_M_managed(true)
{ }
public:
void void
_M_set_pos(int __i, int __j, const _PatternCursor& __pc); _M_set_pos(int __i, int __j, const _PatternCursor& __pc);
@ -89,20 +76,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_set_matched(int __i, bool __is_matched) _M_set_matched(int __i, bool __is_matched)
{ _M_results.at(__i).matched = __is_matched; } { _M_results.at(__i).matched = __is_matched; }
std::unique_ptr<_Results>
_M_clone() const
{ return unique_ptr<_Results>(new _SpecializedResults(*this)); }
void
_M_assign(const _Results& __rhs)
{
auto __r = static_cast<const _SpecializedResults*>(&__rhs);
_M_results = __r->_M_results;
}
private: private:
match_results<_FwdIterT, _Alloc>& _M_results; match_results<_FwdIterT, _Alloc>& _M_results;
bool _M_managed;
}; };
template<typename _FwdIterT, typename _Alloc> template<typename _FwdIterT, typename _Alloc>
@ -110,7 +85,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_SpecializedResults(const _Automaton::_SizeT __size, _SpecializedResults(const _Automaton::_SizeT __size,
const _SpecializedCursor<_FwdIterT>& __cursor, const _SpecializedCursor<_FwdIterT>& __cursor,
match_results<_FwdIterT, _Alloc>& __m) match_results<_FwdIterT, _Alloc>& __m)
: _M_results(__m), _M_managed(false) : _M_results(__m)
{ {
_M_results.clear(); _M_results.clear();
_M_results.reserve(__size + 2); _M_results.reserve(__size + 2);
@ -130,11 +105,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
typedef const _SpecializedCursor<_FwdIterT>& _CursorT; typedef const _SpecializedCursor<_FwdIterT>& _CursorT;
_CursorT __c = static_cast<_CursorT>(__pc); _CursorT __c = static_cast<_CursorT>(__pc);
if (__j == 0) if (__j == 0)
_M_results.at(__i).first = __c._M_pos(); _M_results.at(__i).first = __c._M_pos();
else else
_M_results.at(__i).second = __c._M_pos(); _M_results.at(__i).second = __c._M_pos();
} }
/// A stack of states used in evaluating the NFA.
typedef std::stack<_StateIdT, std::vector<_StateIdT> > _StateStack;
/// Executes a regular expression NFA/DFA over a range using a /// Executes a regular expression NFA/DFA over a range using a
/// variant of the parallel execution algorithm featured in the grep /// variant of the parallel execution algorithm featured in the grep
/// utility, modified to use Laurikari tags. /// utility, modified to use Laurikari tags.
@ -146,110 +124,45 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
const _AutomatonPtr& __automaton, const _AutomatonPtr& __automaton,
regex_constants::match_flag_type __flags) regex_constants::match_flag_type __flags)
: _M_nfa(static_pointer_cast<_Nfa>(__automaton)), : _M_nfa(static_pointer_cast<_Nfa>(__automaton)),
_M_str_cur(__p), _M_results(__r) _M_pattern(__p), _M_results(__r)
{ }
virtual
~_Grep_matcher()
{ } { }
// Set matched when string exactly match the pattern. // Set matched when string exactly match the pattern.
virtual bool void
_M_match() = 0; _M_match();
// Set matched when some prefix of the string matches the pattern. // Set matched when some prefix of the string matches the pattern.
virtual bool void
_M_search_from_first() = 0; _M_search_from_first();
protected:
const std::shared_ptr<_Nfa> _M_nfa;
_PatternCursor& _M_str_cur;
_Results& _M_results;
};
// Time complexity: exponential
// Space complexity: O(_M_str_cur.size())
// _M_dfs() take a state, along with current string cursor(_M_str_cur),
// trying to match current state with current character.
// Only _S_opcode_match will consume a character.
class _DFSMatcher
: public _Grep_matcher
{
public:
_DFSMatcher(_PatternCursor& __p,
_Results& __r,
const _AutomatonPtr& __automaton,
regex_constants::match_flag_type __flags)
: _Grep_matcher(__p, __r, __automaton, __flags)
{ }
// TODO: in the future this function will be _M_match, in another class.
bool bool
_M_match() _M_dfs_match()
{ return _M_dfs<true>(_M_nfa->_M_start()); } { return _M_dfs<true>(_M_nfa->_M_start()); }
// TODO: in the future this function will be _M_search_from_first,
// in another class.
bool bool
_M_search_from_first() _M_dfs_search_from_first()
{ return _M_dfs<false>(_M_nfa->_M_start()); } { return _M_dfs<false>(_M_nfa->_M_start()); }
private: private:
_StateSet
_M_e_closure(_StateIdT __i);
_StateSet
_M_e_closure(const _StateSet& __s);
_StateSet
_M_e_closure(_StateStack& __stack, const _StateSet& __s);
template<bool __match_mode> template<bool __match_mode>
bool bool
_M_dfs(_StateIdT __i); _M_dfs(_StateIdT __i);
};
// It's essentially a variant of Single-Source-Shortest-Path problem, where, const std::shared_ptr<_Nfa> _M_nfa;
// the matching results is the final distance and should be minimized. _PatternCursor& _M_pattern;
// Instead of using Dijkstra Algorithm, I pick up the queue-optimizaed _Results& _M_results;
// (BFS-like) Bellman-Ford algorithm,
// SPFA(http://en.wikipedia.org/wiki/Shortest_Path_Faster_Algorithm).
//
// Every entry of _M_current saves the solution(grouping status) for every
// matching head. When states transfer, solutions will be compared and
// deduplicated(based on which greedy mode we have).
//
// Time complexity: O(_M_str_cur.size() * _M_nfa.size())
// Space complexity: O(_M_nfa.size() * _M_nfa.mark_count())
class _BFSMatcher
: public _Grep_matcher
{
public:
_BFSMatcher(_PatternCursor& __p,
_Results& __r,
const _AutomatonPtr& __automaton,
regex_constants::match_flag_type __flags)
: _Grep_matcher(__p, __r, __automaton, __flags)
{
if (_M_nfa->_M_start() != _S_invalid_state_id)
_M_current[_M_nfa->_M_start()] = _M_results._M_clone();
_M_e_closure();
}
bool
_M_match()
{ return _M_main_loop<true>(); }
bool
_M_search_from_first()
{ return _M_main_loop<false>(); }
private:
template<bool __match_mode>
bool
_M_main_loop();
void
_M_e_closure();
void
_M_move();
bool
_M_match_less_than(_StateIdT __u, _StateIdT __v) const;
bool
_M_includes_some() const;
std::map<_StateIdT, std::unique_ptr<_Results>> _M_current;
}; };
//@} regex-detail //@} regex-detail

View File

@ -32,13 +32,83 @@
namespace std _GLIBCXX_VISIBILITY(default) namespace std _GLIBCXX_VISIBILITY(default)
{ {
namespace
{
// A stack of states used in evaluating the NFA.
typedef std::stack<std::__detail::_StateIdT,
std::vector<std::__detail::_StateIdT>
> _StateStack;
// Obtains the next state set given the current state set __s and the current
// input character.
inline std::__detail::_StateSet
__move(const std::__detail::_PatternCursor& __p,
const std::__detail::_Nfa& __nfa,
const std::__detail::_StateSet& __s)
{
std::__detail::_StateSet __m;
for (std::__detail::_StateSet::const_iterator __i = __s.begin();
__i != __s.end(); ++__i)
{
if (*__i == std::__detail::_S_invalid_state_id)
continue;
const std::__detail::_State& __state = __nfa[*__i];
if (__state._M_opcode == std::__detail::_S_opcode_match
&& __state._M_matches(__p))
__m.insert(__state._M_next);
}
return __m;
}
// returns true if (__s intersect __t) is not empty
inline bool
__includes_some(const std::__detail::_StateSet& __s,
const std::__detail::_StateSet& __t)
{
if (__s.size() > 0 && __t.size() > 0)
{
std::__detail::_StateSet::const_iterator __first = __s.begin();
std::__detail::_StateSet::const_iterator __second = __t.begin();
while (__first != __s.end() && __second != __t.end())
{
if (*__first < *__second)
++__first;
else if (*__second < *__first)
++__second;
else
return true;
}
}
return false;
}
// If an identified state __u is not already in the current state set __e,
// insert it and push it on the current state stack __s.
inline void
__add_visited_state(const std::__detail::_StateIdT __u,
_StateStack& __s,
std::__detail::_StateSet& __e)
{
if (__e.count(__u) == 0)
{
__e.insert(__u);
__s.push(__u);
}
}
} // anonymous namespace
namespace __detail namespace __detail
{ {
_GLIBCXX_BEGIN_NAMESPACE_VERSION _GLIBCXX_BEGIN_NAMESPACE_VERSION
// _M_dfs() take a state, along with current string cursor(_M_pattern),
// trying to match current state with current character.
// Only _S_opcode_match will consume a character.
// TODO: This is too slow. Try to compile the NFA to a DFA. // TODO: This is too slow. Try to compile the NFA to a DFA.
template<bool __match_mode> template<bool __match_mode>
bool _DFSMatcher:: bool _Grep_matcher::
_M_dfs(_StateIdT __i) _M_dfs(_StateIdT __i)
{ {
if (__i == _S_invalid_state_id) if (__i == _S_invalid_state_id)
@ -56,186 +126,116 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|| _M_dfs<__match_mode>(__state._M_next); || _M_dfs<__match_mode>(__state._M_next);
break; break;
case _S_opcode_subexpr_begin: case _S_opcode_subexpr_begin:
__state._M_tagger(_M_str_cur, _M_results); __state._M_tagger(_M_pattern, _M_results);
__ret = _M_dfs<__match_mode>(__state._M_next); __ret = _M_dfs<__match_mode>(__state._M_next);
break; break;
case _S_opcode_subexpr_end: case _S_opcode_subexpr_end:
__state._M_tagger(_M_str_cur, _M_results); __state._M_tagger(_M_pattern, _M_results);
__ret = _M_dfs<__match_mode>(__state._M_next); __ret = _M_dfs<__match_mode>(__state._M_next);
_M_results._M_set_matched(__state._M_subexpr, __ret); _M_results._M_set_matched(__state._M_subexpr, __ret);
break; break;
case _S_opcode_match: case _S_opcode_match:
if (!_M_str_cur._M_at_end() && __state._M_matches(_M_str_cur)) if (!_M_pattern._M_at_end() && __state._M_matches(_M_pattern))
{ {
_M_str_cur._M_next(); _M_pattern._M_next();
__ret = _M_dfs<__match_mode>(__state._M_next); __ret = _M_dfs<__match_mode>(__state._M_next);
_M_str_cur._M_prev(); _M_pattern._M_prev();
} }
break; break;
case _S_opcode_accept: case _S_opcode_accept:
if (__match_mode) if (__match_mode)
__ret = _M_str_cur._M_at_end(); __ret = _M_pattern._M_at_end();
else else
__ret = true; __ret = true;
break; break;
default: default:
_GLIBCXX_DEBUG_ASSERT(false); _GLIBCXX_DEBUG_ASSERT( false );
} }
return __ret; return __ret;
} }
template<bool __match_mode> inline void _Grep_matcher::
bool _BFSMatcher:: _M_match()
_M_main_loop()
{
while (!_M_str_cur._M_at_end())
{
if (!__match_mode)
if (_M_includes_some())
return true;
_M_move();
_M_str_cur._M_next();
_M_e_closure();
}
return _M_includes_some();
}
// The SPFA approach.
// FIXME: move it to src/c++11 when it's stable, and make it not inlined.
inline
void _BFSMatcher::
_M_e_closure()
{ {
std::queue<_StateIdT> __q; __detail::_StateSet __t = this->_M_e_closure(_M_nfa->_M_start());
std::vector<bool> __in_q(_M_nfa->size(), false); for (; !_M_pattern._M_at_end(); _M_pattern._M_next())
for (auto& __it : _M_current) __t = this->_M_e_closure(__move(_M_pattern, *_M_nfa, __t));
{
__in_q[__it.first] = true;
__q.push(__it.first);
}
while (!__q.empty())
{
auto __u = __q.front();
__q.pop();
__in_q[__u] = false;
const auto& __state = (*_M_nfa)[__u];
// Can be implemented using method, but there're too much arguments. _M_results._M_set_matched(0,
auto __add_visited_state = [&](_StateIdT __v) __includes_some(_M_nfa->_M_final_states(), __t));
{ }
if (__v == _S_invalid_state_id)
return;
if (_M_match_less_than(__u, __v))
{
_M_current[__v] = _M_current[__u]->_M_clone();
// if a state is updated, it's outgoing neighbors should be
// reconsidered too. Push them to the queue.
if (!__in_q[__v])
{
__in_q[__v] = true;
__q.push(__v);
}
}
};
switch (__state._M_opcode) inline void _Grep_matcher::
_M_search_from_first()
{
__detail::_StateSet __t = this->_M_e_closure(_M_nfa->_M_start());
for (; !_M_pattern._M_at_end(); _M_pattern._M_next())
{
if (__includes_some(_M_nfa->_M_final_states(), __t)) // KISS
{ {
case _S_opcode_alternative: _M_results._M_set_matched(0, true);
__add_visited_state(__state._M_next); return;
__add_visited_state(__state._M_alt);
break;
case _S_opcode_subexpr_begin:
__state._M_tagger(_M_str_cur, *_M_current[__u]);
__add_visited_state(__state._M_next);
break;
case _S_opcode_subexpr_end:
__state._M_tagger(_M_str_cur, *_M_current[__u]);
_M_current[__u]->_M_set_matched(__state._M_subexpr, true);
__add_visited_state(__state._M_next);
break;
case _S_opcode_match:
break;
case _S_opcode_accept:
__add_visited_state(__state._M_next);
break;
default:
_GLIBCXX_DEBUG_ASSERT(false);
} }
__t = this->_M_e_closure(__move(_M_pattern, *_M_nfa, __t));
} }
_M_results._M_set_matched(0, false);
} }
// FIXME: move it to src/c++11 when it's stable, and make it not inlined. // Creates the e-closure set for the initial state __i.
inline inline _StateSet _Grep_matcher::
void _BFSMatcher:: _M_e_closure(_StateIdT __i)
_M_move()
{ {
decltype(_M_current) __next; _StateSet __s;
for (auto& __it : _M_current) __s.insert(__i);
_StateStack __stack;
__stack.push(__i);
return this->_M_e_closure(__stack, __s);
}
// Creates the e-closure set for an arbitrary state set __s.
inline _StateSet _Grep_matcher::
_M_e_closure(const _StateSet& __s)
{
_StateStack __stack;
for (_StateSet::const_iterator __i = __s.begin(); __i != __s.end(); ++__i)
__stack.push(*__i);
return this->_M_e_closure(__stack, __s);
}
inline _StateSet _Grep_matcher::
_M_e_closure(_StateStack& __stack, const _StateSet& __s)
{
_StateSet __e = __s;
while (!__stack.empty())
{ {
const auto& __state = (*_M_nfa)[__it.first]; _StateIdT __t = __stack.top(); __stack.pop();
if (__state._M_opcode == _S_opcode_match if (__t == _S_invalid_state_id)
&& __state._M_matches(_M_str_cur)) continue;
if (_M_match_less_than(__it.first, __state._M_next) // for each __u with edge from __t to __u labeled e do ...
&& __state._M_next != _S_invalid_state_id) const _State& __state = _M_nfa->operator[](__t);
__next[__state._M_next] = __it.second->_M_clone(); switch (__state._M_opcode)
}
_M_current = move(__next);
}
// FIXME: move it to src/c++11 when it's stable, and make it not inlined.
inline
bool _BFSMatcher::
_M_match_less_than(_StateIdT __u, _StateIdT __v) const
{
if (_M_current.count(__u) == 0)
return false;
if (_M_current.count(__v) > 0)
return true;
// TODO: Greedy and Non-greedy support
return true;
}
// FIXME: move it to src/c++11 when it's stable, and make it not inlined.
inline
bool _BFSMatcher::
_M_includes_some() const
{
auto& __s = _M_nfa->_M_final_states();
auto& __t = _M_current;
if (__s.size() > 0 && __t.size() > 0)
{
auto __first = __s.begin();
auto __second = __t.begin();
while (__first != __s.end() && __second != __t.end())
{ {
if (*__first < __second->first) case _S_opcode_alternative:
++__first; __add_visited_state(__state._M_next, __stack, __e);
else if (__second->first < *__first) __add_visited_state(__state._M_alt, __stack, __e);
++__second; break;
else case _S_opcode_subexpr_begin:
{ __add_visited_state(__state._M_next, __stack, __e);
_M_results._M_assign(*__second->second); __state._M_tagger(_M_pattern, _M_results);
return true; break;
} case _S_opcode_subexpr_end:
__add_visited_state(__state._M_next, __stack, __e);
__state._M_tagger(_M_pattern, _M_results);
_M_results._M_set_matched(__state._M_subexpr, true);
break;
case _S_opcode_accept:
__add_visited_state(__state._M_next, __stack, __e);
break;
default:
break;
} }
} }
return false; return __e;
}
// FIXME: move it to src/c++11 when it's stable, and make it not inlined.
inline
std::unique_ptr<_Grep_matcher> _Nfa::
_M_get_matcher(_PatternCursor& __p,
_Results& __r,
const _AutomatonPtr& __a,
regex_constants::match_flag_type __flags)
{
if (_M_has_back_ref)
return unique_ptr<_Grep_matcher>(
new _DFSMatcher(__p, __r, __a, __flags));
else
return unique_ptr<_Grep_matcher>(
new _BFSMatcher(__p, __r, __a, __flags));
} }
_GLIBCXX_END_NAMESPACE_VERSION _GLIBCXX_END_NAMESPACE_VERSION

View File

@ -39,24 +39,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* @{ * @{
*/ */
/// Provides a generic facade for a templated match_results.
struct _Results
{
virtual
~_Results()
{ }
virtual void _M_set_pos(int __i, int __j, const _PatternCursor& __p) = 0;
virtual void _M_set_matched(int __i, bool __is_matched) = 0;
virtual std::unique_ptr<_Results> _M_clone() const = 0;
virtual void _M_assign(const _Results& __rhs) = 0;
};
class _Grep_matcher;
class _Automaton;
/// Generic shared pointer to an automaton.
typedef std::shared_ptr<_Automaton> _AutomatonPtr;
/// Base class for, um, automata. Could be an NFA or a DFA. Your choice. /// Base class for, um, automata. Could be an NFA or a DFA. Your choice.
class _Automaton class _Automaton
{ {
@ -70,18 +52,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
virtual _SizeT virtual _SizeT
_M_sub_count() const = 0; _M_sub_count() const = 0;
virtual std::unique_ptr<_Grep_matcher>
_M_get_matcher(_PatternCursor& __p,
_Results& __r,
const _AutomatonPtr& __automaton,
regex_constants::match_flag_type __flags) = 0;
#ifdef _GLIBCXX_DEBUG #ifdef _GLIBCXX_DEBUG
virtual std::ostream& virtual std::ostream&
_M_dot(std::ostream& __ostr) const = 0; _M_dot(std::ostream& __ostr) const = 0;
#endif #endif
}; };
/// Generic shared pointer to an automaton.
typedef std::shared_ptr<_Automaton> _AutomatonPtr;
/// Operation codes that define the type of transitions within the base NFA /// Operation codes that define the type of transitions within the base NFA
/// that represents the regular expression. /// that represents the regular expression.
enum _Opcode enum _Opcode
@ -94,6 +73,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_S_opcode_accept = 255 _S_opcode_accept = 255
}; };
/// Provides a generic facade for a templated match_results.
struct _Results
{
virtual void _M_set_pos(int __i, int __j, const _PatternCursor& __p) = 0;
virtual void _M_set_matched(int __i, bool __is_matched) = 0;
};
/// Tags current state (for subexpr begin/end). /// Tags current state (for subexpr begin/end).
typedef std::function<void (const _PatternCursor&, _Results&)> _Tagger; typedef std::function<void (const _PatternCursor&, _Results&)> _Tagger;
@ -127,6 +113,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ __r._M_set_pos(_M_index, 1, __pc); } { __r._M_set_pos(_M_index, 1, __pc); }
int _M_index; int _M_index;
_FwdIterT _M_pos;
}; };
/// Indicates if current state matches cursor current. /// Indicates if current state matches cursor current.
@ -288,9 +275,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
typedef regex_constants::syntax_option_type _FlagT; typedef regex_constants::syntax_option_type _FlagT;
_Nfa(_FlagT __f) _Nfa(_FlagT __f)
: _M_flags(__f), _M_start_state(0), _M_subexpr_count(0), : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0)
// TODO: BFS by default. Your choice. Need to be set by the compiler.
_M_has_back_ref(false)
{ } { }
~_Nfa() ~_Nfa()
@ -349,16 +334,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return this->size()-1; return this->size()-1;
} }
void
_M_set_back_ref(bool __b)
{ _M_has_back_ref = __b; }
std::unique_ptr<_Grep_matcher>
_M_get_matcher(_PatternCursor& __p,
_Results& __r,
const _AutomatonPtr& __automaton,
regex_constants::match_flag_type __flags);
#ifdef _GLIBCXX_DEBUG #ifdef _GLIBCXX_DEBUG
std::ostream& std::ostream&
_M_dot(std::ostream& __ostr) const; _M_dot(std::ostream& __ostr) const;
@ -369,7 +344,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_StateIdT _M_start_state; _StateIdT _M_start_state;
_StateSet _M_accepting_states; _StateSet _M_accepting_states;
_SizeT _M_subexpr_count; _SizeT _M_subexpr_count;
bool _M_has_back_ref;
}; };
/// Describes a sequence of one or more %_State, its current start /// Describes a sequence of one or more %_State, its current start

View File

@ -44,8 +44,6 @@
#include <iterator> #include <iterator>
#include <locale> #include <locale>
#include <memory> #include <memory>
#include <map>
#include <queue>
#include <set> #include <set>
#include <sstream> #include <sstream>
#include <stack> #include <stack>

View File

@ -32,31 +32,16 @@ test01()
{ {
bool test __attribute__((unused)) = true; bool test __attribute__((unused)) = true;
{ std::regex re("zxcv/(one.*)abc", std::regex::extended);
std::regex re("zxcv/(one.*)abc", std::regex::extended); std::string target("zxcv/onetwoabc");
std::string target("zxcv/onetwoabc"); std::smatch m;
std::smatch m;
VERIFY( std::regex_search(target, m, re) ); VERIFY( std::regex_search(target, m, re) );
VERIFY( m.size() == 2 ); VERIFY( m.size() == 2 );
VERIFY( m[0].matched == true ); VERIFY( m[0].matched == true );
VERIFY( std::string(m[0].first, m[0].second) == "zxcv/onetwoabc" ); VERIFY( std::string(m[0].first, m[0].second) == "zxcv/onetwoabc" );
VERIFY( m[1].matched == true ); VERIFY( m[1].matched == true );
VERIFY( std::string(m[1].first, m[1].second) == "onetwo" ); VERIFY( std::string(m[1].first, m[1].second) == "onetwo" );
}
{
std::regex re("zxcv/(one.*)abc()\\2", std::regex::extended);
std::string target("zxcv/onetwoabc");
std::smatch m;
VERIFY( std::regex_search(target, m, re) );
VERIFY( m.size() == 3 );
VERIFY( m[0].matched == true );
VERIFY( std::string(m[0].first, m[0].second) == "zxcv/onetwoabc" );
VERIFY( m[1].matched == true );
VERIFY( std::string(m[1].first, m[1].second) == "onetwo" );
}
} }
int int

View File

@ -33,24 +33,13 @@ test01()
{ {
bool test __attribute__((unused)) = true; bool test __attribute__((unused)) = true;
{ std::regex re("/asdf(/.*)", std::regex::extended);
std::regex re("/asdf(/.*)", std::regex::extended); std::string target("/asdf/qwerty");
std::string target("/asdf/qwerty"); std::smatch m;
std::smatch m;
VERIFY( std::regex_match(target, m, re) ); VERIFY( std::regex_match(target, m, re) );
VERIFY( m.size() == 2 ); VERIFY( m.size() == 2 );
VERIFY( std::string(m[1].first, m[1].second) == "/qwerty"); VERIFY( std::string(m[1].first, m[1].second) == "/qwerty");
}
{
std::regex re("/asdf(/.*)()\\2", std::regex::extended);
std::string target("/asdf/qwerty");
std::smatch m;
VERIFY( std::regex_match(target, m, re) );
VERIFY( m.size() == 3 );
VERIFY( std::string(m[1].first, m[1].second) == "/qwerty");
}
} }
int int

View File

@ -1,71 +0,0 @@
// { dg-options "-std=gnu++11" }
//
// 2013-07-29 Tim Shen <timshen91@gmail.com>
//
// Copyright (C) 2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
// 28.11.2 regex_match
// Tests Extended automatic matcher dispatching against a std::string target.
#include <regex>
#include <testsuite_hooks.h>
using namespace std;
template<typename _Bi_iter, typename _Alloc,
typename _Ch_type, typename _Rx_traits>
void
fake_match(_Bi_iter __s,
_Bi_iter __e,
match_results<_Bi_iter, _Alloc>& __m,
const basic_regex<_Ch_type, _Rx_traits>& __re,
regex_constants::match_flag_type __flags
= regex_constants::match_default)
{
__detail::_AutomatonPtr __a = __re._M_get_automaton();
__detail::_Automaton::_SizeT __sz = __a->_M_sub_count();
__detail::_SpecializedCursor<_Bi_iter> __cs(__s, __e);
__detail::_SpecializedResults<_Bi_iter, _Alloc> __r(__sz, __cs, __m);
VERIFY( dynamic_cast<__detail::_DFSMatcher *>(
&*__a->_M_get_matcher(__cs, __r, __a, __flags)) != nullptr );
}
void
test01()
{
bool test __attribute__((unused)) = true;
regex re("()(one(.*))abc\\1"); // backref cause DFS
const string target("onetwoabc");
smatch m;
fake_match(target.begin(), target.end(), m, re);
regex_match(target, m, re);
VERIFY( m[2].matched );
VERIFY( m[3].matched );
VERIFY( std::string(m[2].first, m[2].second) == "onetwo" );
VERIFY( std::string(m[3].first, m[3].second) == "two" );
}
int
main()
{
test01();
return 0;
}