regex_automaton.h: Add dummy node type.

2013-09-05  Tim Shen  <timshen91@gmail.com>

	* include/bits/regex_automaton.h: Add dummy node type. Rewrite
	_StateSeq.
	* include/bits/regex_automaton.tcc: Implement them.
	* include/bits/regex_compiler.h: Rewrite _Compiler to use new
	_StateSeq interfaces.
	* include/bits/regex_compiler.tcc: Implement them.
	* include/bits/regex_scanner.h: Add word boundry assertion token.
	* include/bits/regex_scanner.tcc (_Scanner<>::_M_eat_escape_ecma):
	Support word boundry.
	* testsuite/28_regex/algorithms/regex_match/basic/
	string_range_02_03.cc: Remove "xfail".
	* testsuite/28_regex/algorithms/regex_match/extended/cstring_plus.cc:
	Likewise.
	* testsuite/28_regex/algorithms/regex_match/extended/
	string_range_02_03.cc: Likewise.
	* testsuite/28_regex/algorithms/regex_match/extended/
	cstring_questionmark.cc: Remove xfail and get correct length of
	c-string.
	* testsuite/28_regex/algorithms/regex_match/extended/
	string_range_00_03.cc: Likewise.
	* testsuite/28_regex/algorithms/regex_match/ecma/char/quoted_char.cc:
	New.
	* testsuite/28_regex/algorithms/regex_match/extended/cstring_range.cc:
	New.
	* testsuite/28_regex/iterators/regex_iterator/wchar_t/string_02.cc: New.

From-SVN: r202290
This commit is contained in:
Tim Shen 2013-09-05 15:20:39 +00:00
parent e6523306de
commit 7c812a2a57
15 changed files with 553 additions and 311 deletions

View File

@ -1,8 +1,36 @@
2013-09-05 Tim Shen <timshen91@gmail.com>
* include/bits/regex_automaton.h: Add dummy node type. Rewrite
_StateSeq.
* include/bits/regex_automaton.tcc: Implement them.
* include/bits/regex_compiler.h: Rewrite _Compiler to use new
_StateSeq interfaces.
* include/bits/regex_compiler.tcc: Implement them.
* include/bits/regex_scanner.h: Add word boundry assertion token.
* include/bits/regex_scanner.tcc (_Scanner<>::_M_eat_escape_ecma):
Support word boundry.
* testsuite/28_regex/algorithms/regex_match/basic/
string_range_02_03.cc: Remove "xfail".
* testsuite/28_regex/algorithms/regex_match/extended/cstring_plus.cc:
Likewise.
* testsuite/28_regex/algorithms/regex_match/extended/
string_range_02_03.cc: Likewise.
* testsuite/28_regex/algorithms/regex_match/extended/
cstring_questionmark.cc: Remove xfail and get correct length of
c-string.
* testsuite/28_regex/algorithms/regex_match/extended/
string_range_00_03.cc: Likewise.
* testsuite/28_regex/algorithms/regex_match/ecma/char/quoted_char.cc:
New.
* testsuite/28_regex/algorithms/regex_match/extended/cstring_range.cc:
New.
* testsuite/28_regex/iterators/regex_iterator/wchar_t/string_02.cc: New.
2013-09-03 Paolo Carlini <paolo.carlini@oracle.com> 2013-09-03 Paolo Carlini <paolo.carlini@oracle.com>
PR libstdc++/58302 PR libstdc++/58302
* include/bits/random.tcc (negative_binomial_distribution<>:: * include/bits/random.tcc (negative_binomial_distribution<>::
operator()(_UniformRandomNumberGenerator&, const param_type&): operator()(_UniformRandomNumberGenerator&, const param_type&)):
Fix typo in template argument. Fix typo in template argument.
* testsuite/26_numerics/random/negative_binomial_distribution/ * testsuite/26_numerics/random/negative_binomial_distribution/
operators/58302.cc: New. operators/58302.cc: New.

View File

@ -56,6 +56,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_S_opcode_backref = 2, _S_opcode_backref = 2,
_S_opcode_subexpr_begin = 4, _S_opcode_subexpr_begin = 4,
_S_opcode_subexpr_end = 5, _S_opcode_subexpr_end = 5,
_S_opcode_dummy = 6,
_S_opcode_match = 100, _S_opcode_match = 100,
_S_opcode_accept = 255 _S_opcode_accept = 255
}; };
@ -69,7 +70,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_OpcodeT _M_opcode; // type of outgoing transition _OpcodeT _M_opcode; // type of outgoing transition
_StateIdT _M_next; // outgoing transition _StateIdT _M_next; // outgoing transition
union // Since they are mutual exclusive. union // Since they are mutually exclusive.
{ {
_StateIdT _M_alt; // for _S_opcode_alternative _StateIdT _M_alt; // for _S_opcode_alternative
unsigned int _M_subexpr; // for _S_opcode_subexpr_* unsigned int _M_subexpr; // for _S_opcode_subexpr_*
@ -201,6 +202,24 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_StateIdT _StateIdT
_M_insert_backref(unsigned int __index); _M_insert_backref(unsigned int __index);
_StateIdT
_M_insert_dummy()
{
this->push_back(_StateT(_S_opcode_dummy));
return this->size()-1;
}
_StateIdT
_M_insert_state(_StateT __s)
{
this->push_back(__s);
return this->size()-1;
}
// Eliminate dummy node in this NFA to make it compact.
void
_M_eliminate_dummy();
#ifdef _GLIBCXX_DEBUG #ifdef _GLIBCXX_DEBUG
std::ostream& std::ostream&
_M_dot(std::ostream& __ostr) const; _M_dot(std::ostream& __ostr) const;
@ -222,58 +241,40 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ {
public: public:
typedef _NFA<_CharT, _TraitsT> _RegexT; typedef _NFA<_CharT, _TraitsT> _RegexT;
public: public:
// Constructs a single-node sequence _StateSeq(_RegexT& __nfa, _StateIdT __s)
_StateSeq(_RegexT& __ss, _StateIdT __s, : _StateSeq(__nfa, __s, __s)
_StateIdT __e = _S_invalid_state_id)
: _M_nfa(__ss), _M_start(__s), _M_end1(__s), _M_end2(__e)
{ }
// Constructs a split sequence from two other sequencces
_StateSeq(const _StateSeq& __e1, const _StateSeq& __e2)
: _M_nfa(__e1._M_nfa),
_M_start(_M_nfa._M_insert_alt(__e1._M_start, __e2._M_start)),
_M_end1(__e1._M_end1), _M_end2(__e2._M_end1)
{ } { }
// Constructs a split sequence from a single sequence _StateSeq(_RegexT& __nfa, _StateIdT __s, _StateIdT __end)
_StateSeq(const _StateSeq& __e, _StateIdT __id) : _M_nfa(__nfa), _M_start(__s), _M_end(__end)
: _M_nfa(__e._M_nfa),
_M_start(_M_nfa._M_insert_alt(__id, __e._M_start)),
_M_end1(__id), _M_end2(__e._M_end1)
{ } { }
// Constructs a copy of a %_StateSeq // Append a state on *this and change *this to the new sequence.
_StateSeq(const _StateSeq& __rhs)
: _M_nfa(__rhs._M_nfa), _M_start(__rhs._M_start),
_M_end1(__rhs._M_end1), _M_end2(__rhs._M_end2)
{ }
_StateSeq& operator=(const _StateSeq& __rhs);
_StateIdT
_M_front() const
{ return _M_start; }
// Extends a sequence by one.
void void
_M_push_back(_StateIdT __id); _M_append(_StateIdT __id)
{
_M_nfa[_M_end]._M_next = __id;
_M_end = __id;
}
// Extends and maybe joins a sequence. // Append a sequence on *this and change *this to the new sequence.
void void
_M_append(_StateIdT __id); _M_append(const _StateSeq& __s)
{
void _M_nfa[_M_end]._M_next = __s._M_start;
_M_append(_StateSeq& __rhs); _M_end = __s._M_end;
}
// Clones an entire sequence. // Clones an entire sequence.
_StateIdT _StateSeq
_M_clone(); _M_clone();
private: public:
_RegexT& _M_nfa; _RegexT& _M_nfa;
_StateIdT _M_start; _StateIdT _M_start;
_StateIdT _M_end1; _StateIdT _M_end;
_StateIdT _M_end2;
}; };
//@} regex-detail //@} regex-detail

View File

@ -102,9 +102,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
case _S_opcode_accept: case _S_opcode_accept:
__ostr << __id << " [label=\"" << __id << "\\nACC\"];\n" ; __ostr << __id << " [label=\"" << __id << "\\nACC\"];\n" ;
break; break;
case _S_opcode_dummy:
break;
default: default:
__ostr << __id << " [label=\"" << __id << "\\nUNK\"];\n" _GLIBCXX_DEBUG_ASSERT(false);
<< __id << " -> " << _M_next << " [label=\"?\"];\n";
break; break;
} }
return __ostr; return __ostr;
@ -145,65 +146,61 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
} }
template<typename _CharT, typename _TraitsT> template<typename _CharT, typename _TraitsT>
_StateSeq<_CharT, _TraitsT>& _StateSeq<_CharT, _TraitsT>:: void _NFA<_CharT, _TraitsT>::
operator=(const _StateSeq& __rhs) _M_eliminate_dummy()
{ {
_M_start = __rhs._M_start; for (auto& __it : *this)
_M_end1 = __rhs._M_end1; {
_M_end2 = __rhs._M_end2; while (__it._M_next >= 0 && (*this)[__it._M_next]._M_opcode
return *this; == _S_opcode_dummy)
__it._M_next = (*this)[__it._M_next]._M_next;
if (__it._M_opcode == _S_opcode_alternative)
while (__it._M_alt >= 0 && (*this)[__it._M_alt]._M_opcode
== _S_opcode_dummy)
__it._M_alt = (*this)[__it._M_alt]._M_next;
}
} }
// Just apply DFS on the sequence and re-link their links.
template<typename _CharT, typename _TraitsT> template<typename _CharT, typename _TraitsT>
void _StateSeq<_CharT, _TraitsT>:: _StateSeq<_CharT, _TraitsT> _StateSeq<_CharT, _TraitsT>::
_M_push_back(_StateIdT __id)
{
if (_M_end1 != _S_invalid_state_id)
_M_nfa[_M_end1]._M_next = __id;
_M_end1 = __id;
}
template<typename _CharT, typename _TraitsT>
void _StateSeq<_CharT, _TraitsT>::
_M_append(_StateIdT __id)
{
if (_M_end2 != _S_invalid_state_id)
{
if (_M_end2 == _M_end1)
_M_nfa[_M_end2]._M_alt = __id;
else
_M_nfa[_M_end2]._M_next = __id;
_M_end2 = _S_invalid_state_id;
}
if (_M_end1 != _S_invalid_state_id)
_M_nfa[_M_end1]._M_next = __id;
_M_end1 = __id;
}
template<typename _CharT, typename _TraitsT>
void _StateSeq<_CharT, _TraitsT>::
_M_append(_StateSeq& __rhs)
{
if (_M_end2 != _S_invalid_state_id)
{
if (_M_end2 == _M_end1)
_M_nfa[_M_end2]._M_alt = __rhs._M_start;
else
_M_nfa[_M_end2]._M_next = __rhs._M_start;
_M_end2 = _S_invalid_state_id;
}
if (__rhs._M_end2 != _S_invalid_state_id)
_M_end2 = __rhs._M_end2;
if (_M_end1 != _S_invalid_state_id)
_M_nfa[_M_end1]._M_next = __rhs._M_start;
_M_end1 = __rhs._M_end1;
}
// @todo implement this function.
template<typename _CharT, typename _TraitsT>
_StateIdT _StateSeq<_CharT, _TraitsT>::
_M_clone() _M_clone()
{ return 0; } {
std::map<_StateIdT, _StateIdT> __m;
std::stack<_StateIdT> __stack;
__stack.push(_M_start);
while (!__stack.empty())
{
auto __u = __stack.top();
__stack.pop();
auto __dup = _M_nfa[__u];
auto __id = _M_nfa._M_insert_state(__dup);
__m[__u] = __id;
if (__u == _M_end)
continue;
if (__m.count(__dup._M_next) == 0)
__stack.push(__dup._M_next);
if (__dup._M_opcode == _S_opcode_alternative)
if (__m.count(__dup._M_alt) == 0)
__stack.push(__dup._M_alt);
}
for (auto __it : __m)
{
auto& __ref = _M_nfa[__it.second];
if (__ref._M_next != -1)
{
_GLIBCXX_DEBUG_ASSERT(__m.count(__ref._M_next));
__ref._M_next = __m[__ref._M_next];
}
if (__ref._M_opcode == _S_opcode_alternative)
if (__ref._M_alt != -1)
{
_GLIBCXX_DEBUG_ASSERT(__m.count(__ref._M_alt));
__ref._M_alt = __m[__ref._M_alt];
}
}
return _StateSeq(_M_nfa, __m[_M_start], __m[_M_end]);
}
_GLIBCXX_END_NAMESPACE_VERSION _GLIBCXX_END_NAMESPACE_VERSION
} // namespace __detail } // namespace __detail

View File

@ -56,7 +56,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
std::shared_ptr<_RegexT> std::shared_ptr<_RegexT>
_M_get_nfa() const _M_get_nfa() const
{ return std::shared_ptr<_RegexT>(new _RegexT(_M_state_store)); } { return std::shared_ptr<_RegexT>(new _RegexT(_M_nfa)); }
private: private:
typedef _Scanner<_FwdIter> _ScannerT; typedef _Scanner<_FwdIter> _ScannerT;
@ -64,6 +64,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
typedef _StateSeq<_CharT, _TraitsT> _StateSeqT; typedef _StateSeq<_CharT, _TraitsT> _StateSeqT;
typedef std::stack<_StateSeqT, std::vector<_StateSeqT>> _StackT; typedef std::stack<_StateSeqT, std::vector<_StateSeqT>> _StackT;
typedef _BracketMatcher<_CharT, _TraitsT> _BMatcherT; typedef _BracketMatcher<_CharT, _TraitsT> _BMatcherT;
typedef std::ctype<_CharT> _CtypeT;
// accepts a specific token or returns false. // accepts a specific token or returns false.
bool bool
@ -90,21 +91,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
bool bool
_M_bracket_expression(); _M_bracket_expression();
void
_M_bracket_list(_BMatcherT& __matcher);
bool
_M_follow_list(_BMatcherT& __matcher);
void void
_M_expression_term(_BMatcherT& __matcher); _M_expression_term(_BMatcherT& __matcher);
bool bool
_M_range_expression(_BMatcherT& __matcher); _M_range_expression(_BMatcherT& __matcher);
bool
_M_start_range(_BMatcherT& __matcher);
bool bool
_M_collating_symbol(_BMatcherT& __matcher); _M_collating_symbol(_BMatcherT& __matcher);
@ -120,12 +112,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
bool bool
_M_try_char(); _M_try_char();
_CharT _StateSeqT
_M_get_char(); _M_pop()
{
auto ret = _M_stack.top();
_M_stack.pop();
return ret;
}
const _TraitsT& _M_traits; const _TraitsT& _M_traits;
const _CtypeT& _M_ctype;
_ScannerT _M_scanner; _ScannerT _M_scanner;
_RegexT _M_state_store; _RegexT _M_nfa;
_StringT _M_value; _StringT _M_value;
_StackT _M_stack; _StackT _M_stack;
_FlagT _M_flags; _FlagT _M_flags;

View File

@ -28,6 +28,31 @@
* Do not attempt to use it directly. @headername{regex} * Do not attempt to use it directly. @headername{regex}
*/ */
// TODO make comments doxygen format.
// This compiler refers to "Regular Expression Matching Can Be Simple And Fast"
// (http://swtch.com/~rsc/regexp/regexp1.html"),
// but doesn't strictly follow it.
//
// When compiling, states are *chained* instead of tree- or graph-constructed.
// It's more like structured programs: there's if statement and loop statement.
//
// For alternative structure(say "a|b"), aka "if statement", two branchs should
// be constructed. However, these two shall merge to an "end_tag" at the end of
// this operator:
//
// branch1
// / \
// => begin_tag end_tag =>
// \ /
// branch2
//
// This is the difference between this implementation and that in Russ's
// article.
//
// That's why we introduced dummy node here ------ "end_tag" is a dummy node.
// All dummy node will be eliminated at the end of compiling process.
namespace std _GLIBCXX_VISIBILITY(default) namespace std _GLIBCXX_VISIBILITY(default)
{ {
namespace __detail namespace __detail
@ -39,32 +64,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_Compiler(_FwdIter __b, _FwdIter __e, _Compiler(_FwdIter __b, _FwdIter __e,
const _TraitsT& __traits, _FlagT __flags) const _TraitsT& __traits, _FlagT __flags)
: _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()), : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()),
_M_state_store(__flags), _M_flags(__flags) _M_ctype(std::use_facet<std::ctype<_CharT>>(_M_traits.getloc())),
_M_nfa(__flags), _M_flags(__flags)
{ {
_StateSeqT __r(_M_state_store, _StateSeqT __r(_M_nfa, _M_nfa._M_start());
_M_state_store._M_insert_subexpr_begin()); __r._M_append(_M_nfa._M_insert_subexpr_begin());
_M_disjunction(); this->_M_disjunction();
if (!_M_stack.empty()) if (!_M_match_token(_ScannerT::_S_token_eof))
{ __throw_regex_error(regex_constants::error_paren);
__r._M_append(_M_stack.top()); __r._M_append(_M_pop());
_M_stack.pop(); _GLIBCXX_DEBUG_ASSERT(_M_stack.empty());
} __r._M_append(_M_nfa._M_insert_subexpr_end());
__r._M_append(_M_state_store._M_insert_subexpr_end()); __r._M_append(_M_nfa._M_insert_accept());
__r._M_append(_M_state_store._M_insert_accept()); _M_nfa._M_eliminate_dummy();
}
template<typename _FwdIter, typename _CharT, typename _TraitsT>
bool
_Compiler<_FwdIter, _CharT, _TraitsT>::
_M_match_token(_TokenT token)
{
if (token == _M_scanner._M_get_token())
{
_M_value = _M_scanner._M_get_value();
_M_scanner._M_advance();
return true;
}
return false;
} }
template<typename _FwdIter, typename _CharT, typename _TraitsT> template<typename _FwdIter, typename _CharT, typename _TraitsT>
@ -73,12 +85,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_disjunction() _M_disjunction()
{ {
this->_M_alternative(); this->_M_alternative();
if (_M_match_token(_ScannerT::_S_token_or)) // TODO empty alternative like, um, "(|asdf)"
while (_M_match_token(_ScannerT::_S_token_or))
{ {
_StateSeqT __alt1 = _M_stack.top(); _M_stack.pop(); _StateSeqT __alt1 = _M_pop();
this->_M_disjunction(); this->_M_alternative();
_StateSeqT __alt2 = _M_stack.top(); _M_stack.pop(); _StateSeqT __alt2 = _M_pop();
_M_stack.push(_StateSeqT(__alt1, __alt2)); auto __end = _M_nfa._M_insert_dummy();
__alt1._M_append(__end);
__alt2._M_append(__end);
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_alt(__alt1._M_start,
__alt2._M_start),
__end));
} }
} }
@ -89,15 +108,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ {
if (this->_M_term()) if (this->_M_term())
{ {
_StateSeqT __re = _M_stack.top(); _M_stack.pop(); _StateSeqT __re = _M_pop();
this->_M_alternative(); this->_M_alternative();
if (!_M_stack.empty()) __re._M_append(_M_pop());
{
__re._M_append(_M_stack.top());
_M_stack.pop();
}
_M_stack.push(__re); _M_stack.push(__re);
} }
else
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy()));
} }
template<typename _FwdIter, typename _CharT, typename _TraitsT> template<typename _FwdIter, typename _CharT, typename _TraitsT>
@ -121,7 +138,22 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_Compiler<_FwdIter, _CharT, _TraitsT>:: _Compiler<_FwdIter, _CharT, _TraitsT>::
_M_assertion() _M_assertion()
{ {
return false; // temporary place holders.
if (_M_match_token(_ScannerT::_S_token_line_begin))
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy()));
else if (_M_match_token(_ScannerT::_S_token_line_end))
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy()));
else if (_M_match_token(_ScannerT::_S_token_word_bound))
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy()));
else if (_M_match_token(_ScannerT::_S_token_neg_word_bound))
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy()));
else if (_M_match_token(_ScannerT::_S_token_subexpr_lookahead_begin))
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy()));
else if (_M_match_token(_ScannerT::_S_token_subexpr_neg_lookahead_begin))
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy()));
else
return false;
return true;
} }
template<typename _FwdIter, typename _CharT, typename _TraitsT> template<typename _FwdIter, typename _CharT, typename _TraitsT>
@ -133,67 +165,70 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ {
if (_M_stack.empty()) if (_M_stack.empty())
__throw_regex_error(regex_constants::error_badrepeat); __throw_regex_error(regex_constants::error_badrepeat);
_StateSeqT __r(_M_stack.top(), -1); auto __e = _M_pop();
__r._M_append(__r._M_front()); _StateSeqT __r(_M_nfa, _M_nfa._M_insert_alt(_S_invalid_state_id,
_M_stack.pop(); __e._M_start));
__e._M_append(__r);
_M_stack.push(__r); _M_stack.push(__r);
return;
} }
if (_M_match_token(_ScannerT::_S_token_closure1)) else if (_M_match_token(_ScannerT::_S_token_closure1))
{ {
if (_M_stack.empty()) if (_M_stack.empty())
__throw_regex_error(regex_constants::error_badrepeat); __throw_regex_error(regex_constants::error_badrepeat);
_StateSeqT __r(_M_state_store, auto __e = _M_pop();
_M_state_store. __e._M_append(_M_nfa._M_insert_alt(_S_invalid_state_id, __e._M_start));
_M_insert_alt(_S_invalid_state_id, _M_stack.push(__e);
_M_stack.top()._M_front()));
_M_stack.top()._M_append(__r);
return;
} }
if (_M_match_token(_ScannerT::_S_token_opt)) else if (_M_match_token(_ScannerT::_S_token_opt))
{ {
if (_M_stack.empty()) if (_M_stack.empty())
__throw_regex_error(regex_constants::error_badrepeat); __throw_regex_error(regex_constants::error_badrepeat);
_StateSeqT __r(_M_stack.top(), -1); auto __e = _M_pop();
_M_stack.pop(); auto __end = _M_nfa._M_insert_dummy();
_StateSeqT __r(_M_nfa, _M_nfa._M_insert_alt(_S_invalid_state_id,
__e._M_start));
__e._M_append(__end);
__r._M_append(__end);
_M_stack.push(__r); _M_stack.push(__r);
return;
} }
if (_M_match_token(_ScannerT::_S_token_interval_begin)) else if (_M_match_token(_ScannerT::_S_token_interval_begin))
{ {
if (_M_stack.empty()) if (_M_stack.empty())
__throw_regex_error(regex_constants::error_badrepeat); __throw_regex_error(regex_constants::error_badrepeat);
if (!_M_match_token(_ScannerT::_S_token_dup_count)) if (!_M_match_token(_ScannerT::_S_token_dup_count))
__throw_regex_error(regex_constants::error_badbrace); __throw_regex_error(regex_constants::error_badbrace);
_StateSeqT __r(_M_stack.top()); _StateSeqT __r(_M_pop());
_StateSeqT __e(_M_nfa, _M_nfa._M_insert_dummy());
int __min_rep = _M_cur_int_value(10); int __min_rep = _M_cur_int_value(10);
for (int __i = 1; __i < __min_rep; ++__i) // {3
_M_stack.top()._M_append(__r._M_clone()); for (int __i = 0; __i < __min_rep; ++__i)
__e._M_append(__r._M_clone());
if (_M_match_token(_ScannerT::_S_token_comma)) if (_M_match_token(_ScannerT::_S_token_comma))
if (_M_match_token(_ScannerT::_S_token_dup_count)) if (_M_match_token(_ScannerT::_S_token_dup_count)) // {3,7}
{ {
int __n = _M_cur_int_value(10) - __min_rep; int __n = _M_cur_int_value(10) - __min_rep;
if (__n < 0) if (__n < 0)
__throw_regex_error(regex_constants::error_badbrace); __throw_regex_error(regex_constants::error_badbrace);
for (int __i = 0; __i < __n; ++__i) auto __end = _M_nfa._M_insert_dummy();
{ for (int __i = 0; __i < __n; ++__i)
_StateSeqT __r(_M_state_store, {
_M_state_store. auto __tmp = __r._M_clone();
_M_insert_alt(_S_invalid_state_id, __e._M_append(_StateSeqT(_M_nfa, _M_nfa.
_M_stack.top()._M_front())); _M_insert_alt(__tmp._M_start, __end), __tmp._M_end));
_M_stack.top()._M_append(__r); }
} __e._M_append(__end);
} }
else else // {3,}
{ {
_StateSeqT __r(_M_stack.top(), -1); auto __tmp = __r._M_clone();
__r._M_push_back(__r._M_front()); _StateSeqT __s(_M_nfa, _M_nfa._M_insert_alt(_S_invalid_state_id,
_M_stack.pop(); __tmp._M_start));
_M_stack.push(__r); __tmp._M_append(__s);
__e._M_append(__s);
} }
if (!_M_match_token(_ScannerT::_S_token_interval_end)) if (!_M_match_token(_ScannerT::_S_token_interval_end))
__throw_regex_error(regex_constants::error_brace); __throw_regex_error(regex_constants::error_brace);
return; _M_stack.push(__e);
} }
} }
@ -203,46 +238,50 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_atom() _M_atom()
{ {
if (_M_match_token(_ScannerT::_S_token_anychar)) if (_M_match_token(_ScannerT::_S_token_anychar))
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher
(_AnyMatcher<_CharT, _TraitsT>(_M_traits))));
else if (_M_try_char())
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher
(_CharMatcher<_CharT, _TraitsT>(_M_value[0],
_M_traits,
_M_flags))));
else if (_M_match_token(_ScannerT::_S_token_backref))
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa.
_M_insert_backref(_M_cur_int_value(10))));
else if (_M_match_token(_ScannerT::_S_token_quoted_class))
{ {
_M_stack.push(_StateSeqT(_M_state_store, _GLIBCXX_DEBUG_ASSERT(_M_value.size() == 1);
_M_state_store._M_insert_matcher _BMatcherT __matcher(_M_ctype.is(_CtypeT::upper, _M_value[0]),
(_AnyMatcher<_CharT, _TraitsT>(_M_traits)))); _M_traits, _M_flags);
return true; __matcher._M_add_character_class(_M_value);
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher(__matcher)));
} }
if (_M_try_char()) else if (_M_match_token(_ScannerT::_S_token_subexpr_no_group_begin))
{ {
_M_stack.push(_StateSeqT(_M_state_store, _StateSeqT __r(_M_nfa, _M_nfa._M_insert_dummy());
_M_state_store._M_insert_matcher
(_CharMatcher<_CharT, _TraitsT>(_M_value[0],
_M_traits,
_M_flags))));
return true;
}
if (_M_match_token(_ScannerT::_S_token_backref))
{
_M_stack.push(_StateSeqT(_M_state_store, _M_state_store.
_M_insert_backref(_M_cur_int_value(10))));
return true;
}
if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
{
int __mark = _M_state_store._M_sub_count();
_StateSeqT __r(_M_state_store,
_M_state_store.
_M_insert_subexpr_begin());
this->_M_disjunction(); this->_M_disjunction();
if (!_M_match_token(_ScannerT::_S_token_subexpr_end)) if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
__throw_regex_error(regex_constants::error_paren); __throw_regex_error(regex_constants::error_paren);
if (!_M_stack.empty()) __r._M_append(_M_pop());
{
__r._M_append(_M_stack.top());
_M_stack.pop();
}
__r._M_append(_M_state_store._M_insert_subexpr_end());
_M_stack.push(__r); _M_stack.push(__r);
return true;
} }
return _M_bracket_expression(); else if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
{
int __mark = _M_nfa._M_sub_count();
_StateSeqT __r(_M_nfa, _M_nfa._M_insert_subexpr_begin());
this->_M_disjunction();
if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
__throw_regex_error(regex_constants::error_paren);
__r._M_append(_M_pop());
__r._M_append(_M_nfa._M_insert_subexpr_end());
_M_stack.push(__r);
}
else if (!_M_bracket_expression())
return false;
return true;
} }
template<typename _FwdIter, typename _CharT, typename _TraitsT> template<typename _FwdIter, typename _CharT, typename _TraitsT>
@ -255,51 +294,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
if (!(__neg || _M_match_token(_ScannerT::_S_token_bracket_begin))) if (!(__neg || _M_match_token(_ScannerT::_S_token_bracket_begin)))
return false; return false;
_BMatcherT __matcher(__neg, _M_traits, _M_flags); _BMatcherT __matcher(__neg, _M_traits, _M_flags);
_M_bracket_list(__matcher); while (!_M_match_token(_ScannerT::_S_token_bracket_end))
_M_stack.push(_StateSeqT(_M_state_store, _M_expression_term(__matcher);
_M_state_store._M_insert_matcher(__matcher))); _M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_matcher(__matcher)));
return true; return true;
} }
template<typename _FwdIter, typename _CharT, typename _TraitsT>
void
_Compiler<_FwdIter, _CharT, _TraitsT>::
_M_bracket_list(_BMatcherT& __matcher)
{
if (_M_match_token(_ScannerT::_S_token_bracket_end))
return;
_M_expression_term(__matcher);
_M_bracket_list(__matcher);
return;
}
template<typename _FwdIter, typename _CharT, typename _TraitsT> template<typename _FwdIter, typename _CharT, typename _TraitsT>
void void
_Compiler<_FwdIter, _CharT, _TraitsT>:: _Compiler<_FwdIter, _CharT, _TraitsT>::
_M_expression_term(_BMatcherT& __matcher) _M_expression_term(_BMatcherT& __matcher)
{ {
if (_M_match_token(_ScannerT::_S_token_collsymbol)) if (_M_match_token(_ScannerT::_S_token_collsymbol))
{ __matcher._M_add_collating_element(_M_value);
__matcher._M_add_collating_element(_M_value); else if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
return; __matcher._M_add_equivalence_class(_M_value);
} else if (_M_match_token(_ScannerT::_S_token_char_class_name))
if (_M_match_token(_ScannerT::_S_token_equiv_class_name)) __matcher._M_add_character_class(_M_value);
{ else if (_M_try_char()) // [a
__matcher._M_add_equivalence_class(_M_value);
return;
}
if (_M_match_token(_ScannerT::_S_token_char_class_name))
{
__matcher._M_add_character_class(_M_value);
return;
}
if (_M_try_char()) // [a
{ {
auto __ch = _M_value[0]; auto __ch = _M_value[0];
if (_M_try_char()) if (_M_try_char())
{ {
if (_M_value[0] == std::use_facet<std::ctype<_CharT>> if (_M_value[0] == '-') // [a-
(_M_traits.getloc()).widen('-')) // [a-
{ {
if (_M_try_char()) // [a-z] if (_M_try_char()) // [a-z]
{ {
@ -315,9 +332,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__matcher._M_add_char(_M_value[0]); __matcher._M_add_char(_M_value[0]);
} }
__matcher._M_add_char(__ch); __matcher._M_add_char(__ch);
return;
} }
__throw_regex_error(regex_constants::error_brack); else
__throw_regex_error(regex_constants::error_brack);
} }
template<typename _FwdIter, typename _CharT, typename _TraitsT> template<typename _FwdIter, typename _CharT, typename _TraitsT>
@ -341,6 +358,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return __is_char; return __is_char;
} }
template<typename _FwdIter, typename _CharT, typename _TraitsT>
bool
_Compiler<_FwdIter, _CharT, _TraitsT>::
_M_match_token(_TokenT token)
{
if (token == _M_scanner._M_get_token())
{
_M_value = _M_scanner._M_get_value();
_M_scanner._M_advance();
return true;
}
return false;
}
template<typename _FwdIter, typename _CharT, typename _TraitsT> template<typename _FwdIter, typename _CharT, typename _TraitsT>
int int
_Compiler<_FwdIter, _CharT, _TraitsT>:: _Compiler<_FwdIter, _CharT, _TraitsT>::

View File

@ -86,6 +86,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_S_token_closure1, _S_token_closure1,
_S_token_line_begin, _S_token_line_begin,
_S_token_line_end, _S_token_line_end,
_S_token_word_bound,
_S_token_neg_word_bound,
_S_token_comma, _S_token_comma,
_S_token_dup_count, _S_token_dup_count,
_S_token_eof, _S_token_eof,

View File

@ -28,7 +28,7 @@
* Do not attempt to use it directly. @headername{regex} * Do not attempt to use it directly. @headername{regex}
*/ */
// TODO make comments doxygen format // TODO make comments doxygen format.
// N3376 specified 6 regex styles: ECMAScript, basic, extended, grep, egrep // N3376 specified 6 regex styles: ECMAScript, basic, extended, grep, egrep
// and awk // and awk
@ -370,10 +370,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_token = _S_token_ord_char; _M_token = _S_token_ord_char;
_M_value.assign(1, _M_escape_map.at(__c)); _M_value.assign(1, _M_escape_map.at(__c));
} }
else if (__c == 'b')
_M_token = _S_token_word_bound;
else if (__c == 'B')
_M_token = _S_token_neg_word_bound;
// N3376 28.13 // N3376 28.13
else if (__c == 'b' else if (__c == 'd'
|| __c == 'B'
|| __c == 'd'
|| __c == 'D' || __c == 'D'
|| __c == 's' || __c == 's'
|| __c == 'S' || __c == 'S'

View File

@ -1,5 +1,4 @@
// { dg-options "-std=c++0x" } // { dg-options "-std=c++0x" }
// { dg-do run { xfail *-*-* } }
// //
// 2010-06-16 Stephen M. Webb <stephen.webb@bregmasoft.ca> // 2010-06-16 Stephen M. Webb <stephen.webb@bregmasoft.ca>

View File

@ -0,0 +1,52 @@
// { dg-options "-std=gnu++11" }
//
// 2013-09-05 Tim Shen <timshen91@gmail.com>
//
// Copyright (C) 2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
// 28.11.2 regex_match
// Tests ECMAScript \d \D \s \S \w \W
#include <regex>
#include <testsuite_hooks.h>
using namespace std;
void
test01()
{
bool test __attribute__((unused)) = true;
VERIFY(regex_match("01", regex("\\d*")));
VERIFY(regex_match("asdfjkl", regex("\\D*")));
VERIFY(!regex_match("asdfjkl0", regex("\\D*")));
VERIFY(regex_match("\r\t\v\f ", regex("\\s*")));
VERIFY(regex_match("asdfjkl", regex("\\S*")));
VERIFY(!regex_match("asdfjkl\r", regex("\\S*")));
VERIFY(regex_match("_az", regex("\\w*")));
VERIFY(regex_match("!@#$%", regex("\\W*")));
VERIFY(!regex_match("_01234", regex("\\W*")));
}
int
main()
{
test01();
return 0;
}

View File

@ -1,5 +1,4 @@
// { dg-options "-std=c++0x" } // { dg-options "-std=c++0x" }
// { dg-do run { xfail *-*-* } }
// //
// 2010-06-21 Stephen M. Webb <stephen.webb@bregmasoft.ca> // 2010-06-21 Stephen M. Webb <stephen.webb@bregmasoft.ca>
@ -32,27 +31,31 @@ test01()
{ {
bool test __attribute__((unused)) = true; bool test __attribute__((unused)) = true;
std::regex re("(a+)", std::regex::extended); std::regex re("(a+)", std::regex::extended);
const char target[] = "aa"; const char target[] = "aa";
std::cmatch m; std::cmatch m;
VERIFY( std::regex_match(target, m, re) ); VERIFY( std::regex_match(target, m, re) );
VERIFY( re.mark_count() == 1 ); VERIFY( re.mark_count() == 1 );
VERIFY( m.size() == re.mark_count()+1 ); VERIFY( m.size() == re.mark_count()+1 );
VERIFY( m.empty() == false ); VERIFY( m.empty() == false );
VERIFY( m.prefix().first == target ); VERIFY( m.prefix().first == target );
VERIFY( m.prefix().second == target ); VERIFY( m.prefix().second == target );
VERIFY( m.prefix().matched == false ); VERIFY( m.prefix().matched == false );
VERIFY( m.suffix().first == target+sizeof(target) ); VERIFY( m.suffix().first == target+sizeof(target)-1 );
VERIFY( m.suffix().second == target+sizeof(target) ); VERIFY( m.suffix().second == target+sizeof(target)-1 );
VERIFY( m.suffix().matched == false ); VERIFY( m.suffix().matched == false );
VERIFY( m[0].first == target ); VERIFY( m[0].first == target );
VERIFY( m[0].second == target+sizeof(target) ); VERIFY( m[0].second == target+sizeof(target)-1 );
VERIFY( m[0].matched == true ); VERIFY( m[0].matched == true );
VERIFY( m[1].first == target ); VERIFY( m[1].first == target );
VERIFY( m[1].second == target+sizeof(target) ); VERIFY( m[1].second == target+sizeof(target)-1 );
VERIFY( m[1].matched == true ); VERIFY( m[1].matched == true );
VERIFY(!std::regex_match("", std::regex("a+", std::regex::extended)));
VERIFY(std::regex_match("a", std::regex("a+", std::regex::extended)));
VERIFY(std::regex_match("aa", std::regex("a+", std::regex::extended)));
} }

View File

@ -1,5 +1,4 @@
// { dg-options "-std=c++0x" } // { dg-options "-std=c++0x" }
// { dg-do run { xfail *-*-* } }
// //
// 2010-06-21 Stephen M. Webb <stephen.webb@bregmasoft.ca> // 2010-06-21 Stephen M. Webb <stephen.webb@bregmasoft.ca>
@ -32,27 +31,31 @@ test01()
{ {
bool test __attribute__((unused)) = true; bool test __attribute__((unused)) = true;
std::regex re("(aa?)", std::regex::extended); std::regex re("(aa?)", std::regex::extended);
char target[] = "a"; char target[] = "a";
std::cmatch m; std::cmatch m;
VERIFY( std::regex_match(target, m, re) ); VERIFY( std::regex_match(target, m, re) );
VERIFY( re.mark_count() == 1 ); VERIFY( re.mark_count() == 1 );
VERIFY( m.size() == re.mark_count()+1 ); VERIFY( m.size() == re.mark_count()+1 );
VERIFY( m.empty() == false ); VERIFY( m.empty() == false );
VERIFY( m.prefix().first == target ); VERIFY( m.prefix().first == target );
VERIFY( m.prefix().second == target ); VERIFY( m.prefix().second == target );
VERIFY( m.prefix().matched == false ); VERIFY( m.prefix().matched == false );
VERIFY( m.suffix().first == target+sizeof(target) ); VERIFY( m.suffix().first == target+sizeof(target)-1 );
VERIFY( m.suffix().second == target+sizeof(target) ); VERIFY( m.suffix().second == target+sizeof(target)-1 );
VERIFY( m.suffix().matched == false ); VERIFY( m.suffix().matched == false );
VERIFY( m[0].first == target ); VERIFY( m[0].first == target );
VERIFY( m[0].second == target+sizeof(target) ); VERIFY( m[0].second == target+sizeof(target)-1 );
VERIFY( m[0].matched == true ); VERIFY( m[0].matched == true );
VERIFY( m[1].first == target ); VERIFY( m[1].first == target );
VERIFY( m[1].second == target+sizeof(target) ); VERIFY( m[1].second == target+sizeof(target)-1 );
VERIFY( m[1].matched == true ); VERIFY( m[1].matched == true );
VERIFY(std::regex_match("", std::regex("a?", std::regex::extended)));
VERIFY(std::regex_match("a", std::regex("a?", std::regex::extended)));
VERIFY(!std::regex_match("aa", std::regex("a?", std::regex::extended)));
} }

View File

@ -0,0 +1,68 @@
// { dg-options "-std=gnu++11" }
//
// 2013-09-05 Tim Shen <timshen91@gmail.com>
//
// Copyright (C) 2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
// 28.11.2 regex_match
// Tests Extended interval range.
#include <regex>
#include <testsuite_hooks.h>
using namespace std;
void
test01()
{
bool test __attribute__((unused)) = true;
regex re;
re.assign("(ab){3}", std::regex::extended);
VERIFY(!regex_match("abab", re));
VERIFY(regex_match("ababab", re));
VERIFY(!regex_match("abababab", re));
re.assign("(ab){3,}", std::regex::extended);
VERIFY(!regex_match("abab", re));
VERIFY(regex_match("ababab", re));
VERIFY(regex_match("abababab", re));
VERIFY(regex_match("ababababab", re));
re.assign("(ab){0,3}", std::regex::extended);
VERIFY(regex_match("", re));
VERIFY(regex_match("ab", re));
VERIFY(regex_match("abab", re));
VERIFY(regex_match("ababab", re));
VERIFY(!regex_match("abababab", re));
re.assign("(a|b){0,2}", std::regex::extended);
VERIFY(regex_match("", re));
VERIFY(regex_match("a", re));
VERIFY(regex_match("b", re));
VERIFY(regex_match("aa", re));
VERIFY(regex_match("ab", re));
VERIFY(regex_match("ba", re));
VERIFY(regex_match("bb", re));
VERIFY(!regex_match("aaa", re));
}
int
main()
{
test01();
return 0;
}

View File

@ -31,23 +31,23 @@ test01()
{ {
bool test __attribute__((unused)) = true; bool test __attribute__((unused)) = true;
std::regex re("a{0,3}", std::regex::extended); std::regex re("a{0,3}", std::regex::extended);
std::string target("aa"); std::string target("aa");
std::smatch m; std::smatch m;
VERIFY( std::regex_match(target, m, re) ); VERIFY( std::regex_match(target, m, re) );
VERIFY( m.size() == re.mark_count()+1 ); VERIFY( m.size() == re.mark_count()+1 );
VERIFY( m.empty() == false ); VERIFY( m.empty() == false );
VERIFY( m.prefix().first == target.begin() ); VERIFY( m.prefix().first == target.begin() );
VERIFY( m.prefix().second == target.begin() ); VERIFY( m.prefix().second == target.begin() );
VERIFY( m.prefix().matched == false ); VERIFY( m.prefix().matched == false );
VERIFY( m.suffix().first == target.end() ); VERIFY( m.suffix().first == target.end() );
VERIFY( m.suffix().second == target.end() ); VERIFY( m.suffix().second == target.end() );
VERIFY( m.suffix().matched == false ); VERIFY( m.suffix().matched == false );
VERIFY( m[0].first == target.begin() ); VERIFY( m[0].first == target.begin() );
VERIFY( m[0].second == target.end() ); VERIFY( m[0].second == target.end() );
VERIFY( m[0].matched == true ); VERIFY( m[0].matched == true );
} }

View File

@ -1,5 +1,4 @@
// { dg-options "-std=c++0x" } // { dg-options "-std=c++0x" }
// { dg-do run { xfail *-*-* } }
// //
// 2010-06-16 Stephen M. Webb <stephen.webb@bregmasoft.ca> // 2010-06-16 Stephen M. Webb <stephen.webb@bregmasoft.ca>

View File

@ -0,0 +1,59 @@
// { dg-options "-std=gnu++11" }
// { dg-require-namedlocale "en_US.UTF-8" }
// { dg-do run { xfail *-*-* } }
//
// 2013-09-05 Tim Shen <timshen91@gmail.com>
//
// Copyright (C) 2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
// 28.12.1 regex_iterator
// Tests regex_iterator class
#include <regex>
#include <testsuite_hooks.h>
void
test01()
{
bool test __attribute__((unused)) = true;
std::setlocale(LC_ALL, "en_US.UTF-8");
std::wstring str2 = L"ä\u2009Ä\u2009ö\u2009Ö\u2009ü\u2009Ü";
std::wregex re2;
re2.imbue(std::locale("en_US.UTF-8"));
re2.assign(L"([[:lower:]]{0,1}[[:space:]]{0,1}[[:upper:]]{0,1})");
std::wsregex_iterator p(str2.begin(), str2.end(), re2);
auto a = p;
++p;
VERIFY(a != p);
//for (std::wsregex_iterator p(str2.begin(), str2.end(), re2);
// p != std::wsregex_iterator{}; ++p)
// std::wcout << (*p)[1] << std::endl;
}
int
main()
{
test01();
return 0;
}