regex_automaton.h: Add dummy node type.

2013-09-05  Tim Shen  <timshen91@gmail.com>

	* include/bits/regex_automaton.h: Add dummy node type. Rewrite
	_StateSeq.
	* include/bits/regex_automaton.tcc: Implement them.
	* include/bits/regex_compiler.h: Rewrite _Compiler to use new
	_StateSeq interfaces.
	* include/bits/regex_compiler.tcc: Implement them.
	* include/bits/regex_scanner.h: Add word boundry assertion token.
	* include/bits/regex_scanner.tcc (_Scanner<>::_M_eat_escape_ecma):
	Support word boundry.
	* testsuite/28_regex/algorithms/regex_match/basic/
	string_range_02_03.cc: Remove "xfail".
	* testsuite/28_regex/algorithms/regex_match/extended/cstring_plus.cc:
	Likewise.
	* testsuite/28_regex/algorithms/regex_match/extended/
	string_range_02_03.cc: Likewise.
	* testsuite/28_regex/algorithms/regex_match/extended/
	cstring_questionmark.cc: Remove xfail and get correct length of
	c-string.
	* testsuite/28_regex/algorithms/regex_match/extended/
	string_range_00_03.cc: Likewise.
	* testsuite/28_regex/algorithms/regex_match/ecma/char/quoted_char.cc:
	New.
	* testsuite/28_regex/algorithms/regex_match/extended/cstring_range.cc:
	New.
	* testsuite/28_regex/iterators/regex_iterator/wchar_t/string_02.cc: New.

From-SVN: r202290
This commit is contained in:
Tim Shen 2013-09-05 15:20:39 +00:00
parent e6523306de
commit 7c812a2a57
15 changed files with 553 additions and 311 deletions

View File

@ -1,8 +1,36 @@
2013-09-05 Tim Shen <timshen91@gmail.com>
* include/bits/regex_automaton.h: Add dummy node type. Rewrite
_StateSeq.
* include/bits/regex_automaton.tcc: Implement them.
* include/bits/regex_compiler.h: Rewrite _Compiler to use new
_StateSeq interfaces.
* include/bits/regex_compiler.tcc: Implement them.
* include/bits/regex_scanner.h: Add word boundry assertion token.
* include/bits/regex_scanner.tcc (_Scanner<>::_M_eat_escape_ecma):
Support word boundry.
* testsuite/28_regex/algorithms/regex_match/basic/
string_range_02_03.cc: Remove "xfail".
* testsuite/28_regex/algorithms/regex_match/extended/cstring_plus.cc:
Likewise.
* testsuite/28_regex/algorithms/regex_match/extended/
string_range_02_03.cc: Likewise.
* testsuite/28_regex/algorithms/regex_match/extended/
cstring_questionmark.cc: Remove xfail and get correct length of
c-string.
* testsuite/28_regex/algorithms/regex_match/extended/
string_range_00_03.cc: Likewise.
* testsuite/28_regex/algorithms/regex_match/ecma/char/quoted_char.cc:
New.
* testsuite/28_regex/algorithms/regex_match/extended/cstring_range.cc:
New.
* testsuite/28_regex/iterators/regex_iterator/wchar_t/string_02.cc: New.
2013-09-03 Paolo Carlini <paolo.carlini@oracle.com>
PR libstdc++/58302
* include/bits/random.tcc (negative_binomial_distribution<>::
operator()(_UniformRandomNumberGenerator&, const param_type&):
operator()(_UniformRandomNumberGenerator&, const param_type&)):
Fix typo in template argument.
* testsuite/26_numerics/random/negative_binomial_distribution/
operators/58302.cc: New.

View File

@ -56,6 +56,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_S_opcode_backref = 2,
_S_opcode_subexpr_begin = 4,
_S_opcode_subexpr_end = 5,
_S_opcode_dummy = 6,
_S_opcode_match = 100,
_S_opcode_accept = 255
};
@ -69,7 +70,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_OpcodeT _M_opcode; // type of outgoing transition
_StateIdT _M_next; // outgoing transition
union // Since they are mutual exclusive.
union // Since they are mutually exclusive.
{
_StateIdT _M_alt; // for _S_opcode_alternative
unsigned int _M_subexpr; // for _S_opcode_subexpr_*
@ -201,6 +202,24 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_StateIdT
_M_insert_backref(unsigned int __index);
_StateIdT
_M_insert_dummy()
{
this->push_back(_StateT(_S_opcode_dummy));
return this->size()-1;
}
_StateIdT
_M_insert_state(_StateT __s)
{
this->push_back(__s);
return this->size()-1;
}
// Eliminate dummy node in this NFA to make it compact.
void
_M_eliminate_dummy();
#ifdef _GLIBCXX_DEBUG
std::ostream&
_M_dot(std::ostream& __ostr) const;
@ -222,58 +241,40 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
public:
typedef _NFA<_CharT, _TraitsT> _RegexT;
public:
// Constructs a single-node sequence
_StateSeq(_RegexT& __ss, _StateIdT __s,
_StateIdT __e = _S_invalid_state_id)
: _M_nfa(__ss), _M_start(__s), _M_end1(__s), _M_end2(__e)
{ }
// Constructs a split sequence from two other sequencces
_StateSeq(const _StateSeq& __e1, const _StateSeq& __e2)
: _M_nfa(__e1._M_nfa),
_M_start(_M_nfa._M_insert_alt(__e1._M_start, __e2._M_start)),
_M_end1(__e1._M_end1), _M_end2(__e2._M_end1)
_StateSeq(_RegexT& __nfa, _StateIdT __s)
: _StateSeq(__nfa, __s, __s)
{ }
// Constructs a split sequence from a single sequence
_StateSeq(const _StateSeq& __e, _StateIdT __id)
: _M_nfa(__e._M_nfa),
_M_start(_M_nfa._M_insert_alt(__id, __e._M_start)),
_M_end1(__id), _M_end2(__e._M_end1)
_StateSeq(_RegexT& __nfa, _StateIdT __s, _StateIdT __end)
: _M_nfa(__nfa), _M_start(__s), _M_end(__end)
{ }
// Constructs a copy of a %_StateSeq
_StateSeq(const _StateSeq& __rhs)
: _M_nfa(__rhs._M_nfa), _M_start(__rhs._M_start),
_M_end1(__rhs._M_end1), _M_end2(__rhs._M_end2)
{ }
_StateSeq& operator=(const _StateSeq& __rhs);
_StateIdT
_M_front() const
{ return _M_start; }
// Extends a sequence by one.
// Append a state on *this and change *this to the new sequence.
void
_M_push_back(_StateIdT __id);
_M_append(_StateIdT __id)
{
_M_nfa[_M_end]._M_next = __id;
_M_end = __id;
}
// Extends and maybe joins a sequence.
// Append a sequence on *this and change *this to the new sequence.
void
_M_append(_StateIdT __id);
void
_M_append(_StateSeq& __rhs);
_M_append(const _StateSeq& __s)
{
_M_nfa[_M_end]._M_next = __s._M_start;
_M_end = __s._M_end;
}
// Clones an entire sequence.
_StateIdT
_StateSeq
_M_clone();
private:
public:
_RegexT& _M_nfa;
_StateIdT _M_start;
_StateIdT _M_end1;
_StateIdT _M_end2;
_StateIdT _M_end;
};
//@} regex-detail

View File

@ -102,9 +102,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
case _S_opcode_accept:
__ostr << __id << " [label=\"" << __id << "\\nACC\"];\n" ;
break;
case _S_opcode_dummy:
break;
default:
__ostr << __id << " [label=\"" << __id << "\\nUNK\"];\n"
<< __id << " -> " << _M_next << " [label=\"?\"];\n";
_GLIBCXX_DEBUG_ASSERT(false);
break;
}
return __ostr;
@ -145,65 +146,61 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
}
template<typename _CharT, typename _TraitsT>
_StateSeq<_CharT, _TraitsT>& _StateSeq<_CharT, _TraitsT>::
operator=(const _StateSeq& __rhs)
void _NFA<_CharT, _TraitsT>::
_M_eliminate_dummy()
{
_M_start = __rhs._M_start;
_M_end1 = __rhs._M_end1;
_M_end2 = __rhs._M_end2;
return *this;
for (auto& __it : *this)
{
while (__it._M_next >= 0 && (*this)[__it._M_next]._M_opcode
== _S_opcode_dummy)
__it._M_next = (*this)[__it._M_next]._M_next;
if (__it._M_opcode == _S_opcode_alternative)
while (__it._M_alt >= 0 && (*this)[__it._M_alt]._M_opcode
== _S_opcode_dummy)
__it._M_alt = (*this)[__it._M_alt]._M_next;
}
}
// Just apply DFS on the sequence and re-link their links.
template<typename _CharT, typename _TraitsT>
void _StateSeq<_CharT, _TraitsT>::
_M_push_back(_StateIdT __id)
{
if (_M_end1 != _S_invalid_state_id)
_M_nfa[_M_end1]._M_next = __id;
_M_end1 = __id;
}
template<typename _CharT, typename _TraitsT>
void _StateSeq<_CharT, _TraitsT>::
_M_append(_StateIdT __id)
{
if (_M_end2 != _S_invalid_state_id)
{
if (_M_end2 == _M_end1)
_M_nfa[_M_end2]._M_alt = __id;
else
_M_nfa[_M_end2]._M_next = __id;
_M_end2 = _S_invalid_state_id;
}
if (_M_end1 != _S_invalid_state_id)
_M_nfa[_M_end1]._M_next = __id;
_M_end1 = __id;
}
template<typename _CharT, typename _TraitsT>
void _StateSeq<_CharT, _TraitsT>::
_M_append(_StateSeq& __rhs)
{
if (_M_end2 != _S_invalid_state_id)
{
if (_M_end2 == _M_end1)
_M_nfa[_M_end2]._M_alt = __rhs._M_start;
else
_M_nfa[_M_end2]._M_next = __rhs._M_start;
_M_end2 = _S_invalid_state_id;
}
if (__rhs._M_end2 != _S_invalid_state_id)
_M_end2 = __rhs._M_end2;
if (_M_end1 != _S_invalid_state_id)
_M_nfa[_M_end1]._M_next = __rhs._M_start;
_M_end1 = __rhs._M_end1;
}
// @todo implement this function.
template<typename _CharT, typename _TraitsT>
_StateIdT _StateSeq<_CharT, _TraitsT>::
_StateSeq<_CharT, _TraitsT> _StateSeq<_CharT, _TraitsT>::
_M_clone()
{ return 0; }
{
std::map<_StateIdT, _StateIdT> __m;
std::stack<_StateIdT> __stack;
__stack.push(_M_start);
while (!__stack.empty())
{
auto __u = __stack.top();
__stack.pop();
auto __dup = _M_nfa[__u];
auto __id = _M_nfa._M_insert_state(__dup);
__m[__u] = __id;
if (__u == _M_end)
continue;
if (__m.count(__dup._M_next) == 0)
__stack.push(__dup._M_next);
if (__dup._M_opcode == _S_opcode_alternative)
if (__m.count(__dup._M_alt) == 0)
__stack.push(__dup._M_alt);
}
for (auto __it : __m)
{
auto& __ref = _M_nfa[__it.second];
if (__ref._M_next != -1)
{
_GLIBCXX_DEBUG_ASSERT(__m.count(__ref._M_next));
__ref._M_next = __m[__ref._M_next];
}
if (__ref._M_opcode == _S_opcode_alternative)
if (__ref._M_alt != -1)
{
_GLIBCXX_DEBUG_ASSERT(__m.count(__ref._M_alt));
__ref._M_alt = __m[__ref._M_alt];
}
}
return _StateSeq(_M_nfa, __m[_M_start], __m[_M_end]);
}
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace __detail

View File

@ -56,7 +56,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
std::shared_ptr<_RegexT>
_M_get_nfa() const
{ return std::shared_ptr<_RegexT>(new _RegexT(_M_state_store)); }
{ return std::shared_ptr<_RegexT>(new _RegexT(_M_nfa)); }
private:
typedef _Scanner<_FwdIter> _ScannerT;
@ -64,6 +64,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
typedef _StateSeq<_CharT, _TraitsT> _StateSeqT;
typedef std::stack<_StateSeqT, std::vector<_StateSeqT>> _StackT;
typedef _BracketMatcher<_CharT, _TraitsT> _BMatcherT;
typedef std::ctype<_CharT> _CtypeT;
// accepts a specific token or returns false.
bool
@ -90,21 +91,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
bool
_M_bracket_expression();
void
_M_bracket_list(_BMatcherT& __matcher);
bool
_M_follow_list(_BMatcherT& __matcher);
void
_M_expression_term(_BMatcherT& __matcher);
bool
_M_range_expression(_BMatcherT& __matcher);
bool
_M_start_range(_BMatcherT& __matcher);
bool
_M_collating_symbol(_BMatcherT& __matcher);
@ -120,12 +112,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
bool
_M_try_char();
_CharT
_M_get_char();
_StateSeqT
_M_pop()
{
auto ret = _M_stack.top();
_M_stack.pop();
return ret;
}
const _TraitsT& _M_traits;
const _CtypeT& _M_ctype;
_ScannerT _M_scanner;
_RegexT _M_state_store;
_RegexT _M_nfa;
_StringT _M_value;
_StackT _M_stack;
_FlagT _M_flags;

View File

@ -28,6 +28,31 @@
* Do not attempt to use it directly. @headername{regex}
*/
// TODO make comments doxygen format.
// This compiler refers to "Regular Expression Matching Can Be Simple And Fast"
// (http://swtch.com/~rsc/regexp/regexp1.html"),
// but doesn't strictly follow it.
//
// When compiling, states are *chained* instead of tree- or graph-constructed.
// It's more like structured programs: there's if statement and loop statement.
//
// For alternative structure(say "a|b"), aka "if statement", two branchs should
// be constructed. However, these two shall merge to an "end_tag" at the end of
// this operator:
//
// branch1
// / \
// => begin_tag end_tag =>
// \ /
// branch2
//
// This is the difference between this implementation and that in Russ's
// article.
//
// That's why we introduced dummy node here ------ "end_tag" is a dummy node.
// All dummy node will be eliminated at the end of compiling process.
namespace std _GLIBCXX_VISIBILITY(default)
{
namespace __detail
@ -39,32 +64,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_Compiler(_FwdIter __b, _FwdIter __e,
const _TraitsT& __traits, _FlagT __flags)
: _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()),
_M_state_store(__flags), _M_flags(__flags)
_M_ctype(std::use_facet<std::ctype<_CharT>>(_M_traits.getloc())),
_M_nfa(__flags), _M_flags(__flags)
{
_StateSeqT __r(_M_state_store,
_M_state_store._M_insert_subexpr_begin());
_M_disjunction();
if (!_M_stack.empty())
{
__r._M_append(_M_stack.top());
_M_stack.pop();
}
__r._M_append(_M_state_store._M_insert_subexpr_end());
__r._M_append(_M_state_store._M_insert_accept());
}
template<typename _FwdIter, typename _CharT, typename _TraitsT>
bool
_Compiler<_FwdIter, _CharT, _TraitsT>::
_M_match_token(_TokenT token)
{
if (token == _M_scanner._M_get_token())
{
_M_value = _M_scanner._M_get_value();
_M_scanner._M_advance();
return true;
}
return false;
_StateSeqT __r(_M_nfa, _M_nfa._M_start());
__r._M_append(_M_nfa._M_insert_subexpr_begin());
this->_M_disjunction();
if (!_M_match_token(_ScannerT::_S_token_eof))
__throw_regex_error(regex_constants::error_paren);
__r._M_append(_M_pop());
_GLIBCXX_DEBUG_ASSERT(_M_stack.empty());
__r._M_append(_M_nfa._M_insert_subexpr_end());
__r._M_append(_M_nfa._M_insert_accept());
_M_nfa._M_eliminate_dummy();
}
template<typename _FwdIter, typename _CharT, typename _TraitsT>
@ -73,12 +85,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_disjunction()
{
this->_M_alternative();
if (_M_match_token(_ScannerT::_S_token_or))
// TODO empty alternative like, um, "(|asdf)"
while (_M_match_token(_ScannerT::_S_token_or))
{
_StateSeqT __alt1 = _M_stack.top(); _M_stack.pop();
this->_M_disjunction();
_StateSeqT __alt2 = _M_stack.top(); _M_stack.pop();
_M_stack.push(_StateSeqT(__alt1, __alt2));
_StateSeqT __alt1 = _M_pop();
this->_M_alternative();
_StateSeqT __alt2 = _M_pop();
auto __end = _M_nfa._M_insert_dummy();
__alt1._M_append(__end);
__alt2._M_append(__end);
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_alt(__alt1._M_start,
__alt2._M_start),
__end));
}
}
@ -89,15 +108,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
if (this->_M_term())
{
_StateSeqT __re = _M_stack.top(); _M_stack.pop();
_StateSeqT __re = _M_pop();
this->_M_alternative();
if (!_M_stack.empty())
{
__re._M_append(_M_stack.top());
_M_stack.pop();
}
__re._M_append(_M_pop());
_M_stack.push(__re);
}
else
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy()));
}
template<typename _FwdIter, typename _CharT, typename _TraitsT>
@ -121,7 +138,22 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_Compiler<_FwdIter, _CharT, _TraitsT>::
_M_assertion()
{
return false;
// temporary place holders.
if (_M_match_token(_ScannerT::_S_token_line_begin))
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy()));
else if (_M_match_token(_ScannerT::_S_token_line_end))
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy()));
else if (_M_match_token(_ScannerT::_S_token_word_bound))
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy()));
else if (_M_match_token(_ScannerT::_S_token_neg_word_bound))
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy()));
else if (_M_match_token(_ScannerT::_S_token_subexpr_lookahead_begin))
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy()));
else if (_M_match_token(_ScannerT::_S_token_subexpr_neg_lookahead_begin))
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy()));
else
return false;
return true;
}
template<typename _FwdIter, typename _CharT, typename _TraitsT>
@ -133,67 +165,70 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
if (_M_stack.empty())
__throw_regex_error(regex_constants::error_badrepeat);
_StateSeqT __r(_M_stack.top(), -1);
__r._M_append(__r._M_front());
_M_stack.pop();
auto __e = _M_pop();
_StateSeqT __r(_M_nfa, _M_nfa._M_insert_alt(_S_invalid_state_id,
__e._M_start));
__e._M_append(__r);
_M_stack.push(__r);
return;
}
if (_M_match_token(_ScannerT::_S_token_closure1))
else if (_M_match_token(_ScannerT::_S_token_closure1))
{
if (_M_stack.empty())
__throw_regex_error(regex_constants::error_badrepeat);
_StateSeqT __r(_M_state_store,
_M_state_store.
_M_insert_alt(_S_invalid_state_id,
_M_stack.top()._M_front()));
_M_stack.top()._M_append(__r);
return;
auto __e = _M_pop();
__e._M_append(_M_nfa._M_insert_alt(_S_invalid_state_id, __e._M_start));
_M_stack.push(__e);
}
if (_M_match_token(_ScannerT::_S_token_opt))
else if (_M_match_token(_ScannerT::_S_token_opt))
{
if (_M_stack.empty())
__throw_regex_error(regex_constants::error_badrepeat);
_StateSeqT __r(_M_stack.top(), -1);
_M_stack.pop();
__throw_regex_error(regex_constants::error_badrepeat);
auto __e = _M_pop();
auto __end = _M_nfa._M_insert_dummy();
_StateSeqT __r(_M_nfa, _M_nfa._M_insert_alt(_S_invalid_state_id,
__e._M_start));
__e._M_append(__end);
__r._M_append(__end);
_M_stack.push(__r);
return;
}
if (_M_match_token(_ScannerT::_S_token_interval_begin))
else if (_M_match_token(_ScannerT::_S_token_interval_begin))
{
if (_M_stack.empty())
__throw_regex_error(regex_constants::error_badrepeat);
if (!_M_match_token(_ScannerT::_S_token_dup_count))
__throw_regex_error(regex_constants::error_badbrace);
_StateSeqT __r(_M_stack.top());
_StateSeqT __r(_M_pop());
_StateSeqT __e(_M_nfa, _M_nfa._M_insert_dummy());
int __min_rep = _M_cur_int_value(10);
for (int __i = 1; __i < __min_rep; ++__i)
_M_stack.top()._M_append(__r._M_clone());
// {3
for (int __i = 0; __i < __min_rep; ++__i)
__e._M_append(__r._M_clone());
if (_M_match_token(_ScannerT::_S_token_comma))
if (_M_match_token(_ScannerT::_S_token_dup_count))
if (_M_match_token(_ScannerT::_S_token_dup_count)) // {3,7}
{
int __n = _M_cur_int_value(10) - __min_rep;
if (__n < 0)
__throw_regex_error(regex_constants::error_badbrace);
for (int __i = 0; __i < __n; ++__i)
{
_StateSeqT __r(_M_state_store,
_M_state_store.
_M_insert_alt(_S_invalid_state_id,
_M_stack.top()._M_front()));
_M_stack.top()._M_append(__r);
}
int __n = _M_cur_int_value(10) - __min_rep;
if (__n < 0)
__throw_regex_error(regex_constants::error_badbrace);
auto __end = _M_nfa._M_insert_dummy();
for (int __i = 0; __i < __n; ++__i)
{
auto __tmp = __r._M_clone();
__e._M_append(_StateSeqT(_M_nfa, _M_nfa.
_M_insert_alt(__tmp._M_start, __end), __tmp._M_end));
}
__e._M_append(__end);
}
else
else // {3,}
{
_StateSeqT __r(_M_stack.top(), -1);
__r._M_push_back(__r._M_front());
_M_stack.pop();
_M_stack.push(__r);
auto __tmp = __r._M_clone();
_StateSeqT __s(_M_nfa, _M_nfa._M_insert_alt(_S_invalid_state_id,
__tmp._M_start));
__tmp._M_append(__s);
__e._M_append(__s);
}
if (!_M_match_token(_ScannerT::_S_token_interval_end))
__throw_regex_error(regex_constants::error_brace);
return;
_M_stack.push(__e);
}
}
@ -203,46 +238,50 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_atom()
{
if (_M_match_token(_ScannerT::_S_token_anychar))
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher
(_AnyMatcher<_CharT, _TraitsT>(_M_traits))));
else if (_M_try_char())
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher
(_CharMatcher<_CharT, _TraitsT>(_M_value[0],
_M_traits,
_M_flags))));
else if (_M_match_token(_ScannerT::_S_token_backref))
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa.
_M_insert_backref(_M_cur_int_value(10))));
else if (_M_match_token(_ScannerT::_S_token_quoted_class))
{
_M_stack.push(_StateSeqT(_M_state_store,
_M_state_store._M_insert_matcher
(_AnyMatcher<_CharT, _TraitsT>(_M_traits))));
return true;
_GLIBCXX_DEBUG_ASSERT(_M_value.size() == 1);
_BMatcherT __matcher(_M_ctype.is(_CtypeT::upper, _M_value[0]),
_M_traits, _M_flags);
__matcher._M_add_character_class(_M_value);
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher(__matcher)));
}
if (_M_try_char())
else if (_M_match_token(_ScannerT::_S_token_subexpr_no_group_begin))
{
_M_stack.push(_StateSeqT(_M_state_store,
_M_state_store._M_insert_matcher
(_CharMatcher<_CharT, _TraitsT>(_M_value[0],
_M_traits,
_M_flags))));
return true;
}
if (_M_match_token(_ScannerT::_S_token_backref))
{
_M_stack.push(_StateSeqT(_M_state_store, _M_state_store.
_M_insert_backref(_M_cur_int_value(10))));
return true;
}
if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
{
int __mark = _M_state_store._M_sub_count();
_StateSeqT __r(_M_state_store,
_M_state_store.
_M_insert_subexpr_begin());
_StateSeqT __r(_M_nfa, _M_nfa._M_insert_dummy());
this->_M_disjunction();
if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
__throw_regex_error(regex_constants::error_paren);
if (!_M_stack.empty())
{
__r._M_append(_M_stack.top());
_M_stack.pop();
}
__r._M_append(_M_state_store._M_insert_subexpr_end());
__r._M_append(_M_pop());
_M_stack.push(__r);
return true;
}
return _M_bracket_expression();
else if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
{
int __mark = _M_nfa._M_sub_count();
_StateSeqT __r(_M_nfa, _M_nfa._M_insert_subexpr_begin());
this->_M_disjunction();
if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
__throw_regex_error(regex_constants::error_paren);
__r._M_append(_M_pop());
__r._M_append(_M_nfa._M_insert_subexpr_end());
_M_stack.push(__r);
}
else if (!_M_bracket_expression())
return false;
return true;
}
template<typename _FwdIter, typename _CharT, typename _TraitsT>
@ -255,51 +294,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
if (!(__neg || _M_match_token(_ScannerT::_S_token_bracket_begin)))
return false;
_BMatcherT __matcher(__neg, _M_traits, _M_flags);
_M_bracket_list(__matcher);
_M_stack.push(_StateSeqT(_M_state_store,
_M_state_store._M_insert_matcher(__matcher)));
while (!_M_match_token(_ScannerT::_S_token_bracket_end))
_M_expression_term(__matcher);
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_matcher(__matcher)));
return true;
}
template<typename _FwdIter, typename _CharT, typename _TraitsT>
void
_Compiler<_FwdIter, _CharT, _TraitsT>::
_M_bracket_list(_BMatcherT& __matcher)
{
if (_M_match_token(_ScannerT::_S_token_bracket_end))
return;
_M_expression_term(__matcher);
_M_bracket_list(__matcher);
return;
}
template<typename _FwdIter, typename _CharT, typename _TraitsT>
void
_Compiler<_FwdIter, _CharT, _TraitsT>::
_M_expression_term(_BMatcherT& __matcher)
{
if (_M_match_token(_ScannerT::_S_token_collsymbol))
{
__matcher._M_add_collating_element(_M_value);
return;
}
if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
{
__matcher._M_add_equivalence_class(_M_value);
return;
}
if (_M_match_token(_ScannerT::_S_token_char_class_name))
{
__matcher._M_add_character_class(_M_value);
return;
}
if (_M_try_char()) // [a
__matcher._M_add_collating_element(_M_value);
else if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
__matcher._M_add_equivalence_class(_M_value);
else if (_M_match_token(_ScannerT::_S_token_char_class_name))
__matcher._M_add_character_class(_M_value);
else if (_M_try_char()) // [a
{
auto __ch = _M_value[0];
if (_M_try_char())
{
if (_M_value[0] == std::use_facet<std::ctype<_CharT>>
(_M_traits.getloc()).widen('-')) // [a-
if (_M_value[0] == '-') // [a-
{
if (_M_try_char()) // [a-z]
{
@ -315,9 +332,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__matcher._M_add_char(_M_value[0]);
}
__matcher._M_add_char(__ch);
return;
}
__throw_regex_error(regex_constants::error_brack);
else
__throw_regex_error(regex_constants::error_brack);
}
template<typename _FwdIter, typename _CharT, typename _TraitsT>
@ -341,6 +358,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return __is_char;
}
template<typename _FwdIter, typename _CharT, typename _TraitsT>
bool
_Compiler<_FwdIter, _CharT, _TraitsT>::
_M_match_token(_TokenT token)
{
if (token == _M_scanner._M_get_token())
{
_M_value = _M_scanner._M_get_value();
_M_scanner._M_advance();
return true;
}
return false;
}
template<typename _FwdIter, typename _CharT, typename _TraitsT>
int
_Compiler<_FwdIter, _CharT, _TraitsT>::

View File

@ -86,6 +86,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_S_token_closure1,
_S_token_line_begin,
_S_token_line_end,
_S_token_word_bound,
_S_token_neg_word_bound,
_S_token_comma,
_S_token_dup_count,
_S_token_eof,

View File

@ -28,7 +28,7 @@
* Do not attempt to use it directly. @headername{regex}
*/
// TODO make comments doxygen format
// TODO make comments doxygen format.
// N3376 specified 6 regex styles: ECMAScript, basic, extended, grep, egrep
// and awk
@ -370,10 +370,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_token = _S_token_ord_char;
_M_value.assign(1, _M_escape_map.at(__c));
}
else if (__c == 'b')
_M_token = _S_token_word_bound;
else if (__c == 'B')
_M_token = _S_token_neg_word_bound;
// N3376 28.13
else if (__c == 'b'
|| __c == 'B'
|| __c == 'd'
else if (__c == 'd'
|| __c == 'D'
|| __c == 's'
|| __c == 'S'

View File

@ -1,5 +1,4 @@
// { dg-options "-std=c++0x" }
// { dg-do run { xfail *-*-* } }
//
// 2010-06-16 Stephen M. Webb <stephen.webb@bregmasoft.ca>

View File

@ -0,0 +1,52 @@
// { dg-options "-std=gnu++11" }
//
// 2013-09-05 Tim Shen <timshen91@gmail.com>
//
// Copyright (C) 2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
// 28.11.2 regex_match
// Tests ECMAScript \d \D \s \S \w \W
#include <regex>
#include <testsuite_hooks.h>
using namespace std;
void
test01()
{
bool test __attribute__((unused)) = true;
VERIFY(regex_match("01", regex("\\d*")));
VERIFY(regex_match("asdfjkl", regex("\\D*")));
VERIFY(!regex_match("asdfjkl0", regex("\\D*")));
VERIFY(regex_match("\r\t\v\f ", regex("\\s*")));
VERIFY(regex_match("asdfjkl", regex("\\S*")));
VERIFY(!regex_match("asdfjkl\r", regex("\\S*")));
VERIFY(regex_match("_az", regex("\\w*")));
VERIFY(regex_match("!@#$%", regex("\\W*")));
VERIFY(!regex_match("_01234", regex("\\W*")));
}
int
main()
{
test01();
return 0;
}

View File

@ -1,5 +1,4 @@
// { dg-options "-std=c++0x" }
// { dg-do run { xfail *-*-* } }
//
// 2010-06-21 Stephen M. Webb <stephen.webb@bregmasoft.ca>
@ -32,27 +31,31 @@ test01()
{
bool test __attribute__((unused)) = true;
std::regex re("(a+)", std::regex::extended);
const char target[] = "aa";
std::cmatch m;
std::regex re("(a+)", std::regex::extended);
const char target[] = "aa";
std::cmatch m;
VERIFY( std::regex_match(target, m, re) );
VERIFY( std::regex_match(target, m, re) );
VERIFY( re.mark_count() == 1 );
VERIFY( m.size() == re.mark_count()+1 );
VERIFY( m.empty() == false );
VERIFY( m.prefix().first == target );
VERIFY( m.prefix().second == target );
VERIFY( m.prefix().matched == false );
VERIFY( m.suffix().first == target+sizeof(target) );
VERIFY( m.suffix().second == target+sizeof(target) );
VERIFY( m.suffix().matched == false );
VERIFY( m[0].first == target );
VERIFY( m[0].second == target+sizeof(target) );
VERIFY( m[0].matched == true );
VERIFY( m[1].first == target );
VERIFY( m[1].second == target+sizeof(target) );
VERIFY( m[1].matched == true );
VERIFY( re.mark_count() == 1 );
VERIFY( m.size() == re.mark_count()+1 );
VERIFY( m.empty() == false );
VERIFY( m.prefix().first == target );
VERIFY( m.prefix().second == target );
VERIFY( m.prefix().matched == false );
VERIFY( m.suffix().first == target+sizeof(target)-1 );
VERIFY( m.suffix().second == target+sizeof(target)-1 );
VERIFY( m.suffix().matched == false );
VERIFY( m[0].first == target );
VERIFY( m[0].second == target+sizeof(target)-1 );
VERIFY( m[0].matched == true );
VERIFY( m[1].first == target );
VERIFY( m[1].second == target+sizeof(target)-1 );
VERIFY( m[1].matched == true );
VERIFY(!std::regex_match("", std::regex("a+", std::regex::extended)));
VERIFY(std::regex_match("a", std::regex("a+", std::regex::extended)));
VERIFY(std::regex_match("aa", std::regex("a+", std::regex::extended)));
}

View File

@ -1,5 +1,4 @@
// { dg-options "-std=c++0x" }
// { dg-do run { xfail *-*-* } }
//
// 2010-06-21 Stephen M. Webb <stephen.webb@bregmasoft.ca>
@ -32,27 +31,31 @@ test01()
{
bool test __attribute__((unused)) = true;
std::regex re("(aa?)", std::regex::extended);
char target[] = "a";
std::cmatch m;
std::regex re("(aa?)", std::regex::extended);
char target[] = "a";
std::cmatch m;
VERIFY( std::regex_match(target, m, re) );
VERIFY( std::regex_match(target, m, re) );
VERIFY( re.mark_count() == 1 );
VERIFY( m.size() == re.mark_count()+1 );
VERIFY( m.empty() == false );
VERIFY( m.prefix().first == target );
VERIFY( m.prefix().second == target );
VERIFY( m.prefix().matched == false );
VERIFY( m.suffix().first == target+sizeof(target) );
VERIFY( m.suffix().second == target+sizeof(target) );
VERIFY( m.suffix().matched == false );
VERIFY( m[0].first == target );
VERIFY( m[0].second == target+sizeof(target) );
VERIFY( m[0].matched == true );
VERIFY( m[1].first == target );
VERIFY( m[1].second == target+sizeof(target) );
VERIFY( m[1].matched == true );
VERIFY( re.mark_count() == 1 );
VERIFY( m.size() == re.mark_count()+1 );
VERIFY( m.empty() == false );
VERIFY( m.prefix().first == target );
VERIFY( m.prefix().second == target );
VERIFY( m.prefix().matched == false );
VERIFY( m.suffix().first == target+sizeof(target)-1 );
VERIFY( m.suffix().second == target+sizeof(target)-1 );
VERIFY( m.suffix().matched == false );
VERIFY( m[0].first == target );
VERIFY( m[0].second == target+sizeof(target)-1 );
VERIFY( m[0].matched == true );
VERIFY( m[1].first == target );
VERIFY( m[1].second == target+sizeof(target)-1 );
VERIFY( m[1].matched == true );
VERIFY(std::regex_match("", std::regex("a?", std::regex::extended)));
VERIFY(std::regex_match("a", std::regex("a?", std::regex::extended)));
VERIFY(!std::regex_match("aa", std::regex("a?", std::regex::extended)));
}

View File

@ -0,0 +1,68 @@
// { dg-options "-std=gnu++11" }
//
// 2013-09-05 Tim Shen <timshen91@gmail.com>
//
// Copyright (C) 2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
// 28.11.2 regex_match
// Tests Extended interval range.
#include <regex>
#include <testsuite_hooks.h>
using namespace std;
void
test01()
{
bool test __attribute__((unused)) = true;
regex re;
re.assign("(ab){3}", std::regex::extended);
VERIFY(!regex_match("abab", re));
VERIFY(regex_match("ababab", re));
VERIFY(!regex_match("abababab", re));
re.assign("(ab){3,}", std::regex::extended);
VERIFY(!regex_match("abab", re));
VERIFY(regex_match("ababab", re));
VERIFY(regex_match("abababab", re));
VERIFY(regex_match("ababababab", re));
re.assign("(ab){0,3}", std::regex::extended);
VERIFY(regex_match("", re));
VERIFY(regex_match("ab", re));
VERIFY(regex_match("abab", re));
VERIFY(regex_match("ababab", re));
VERIFY(!regex_match("abababab", re));
re.assign("(a|b){0,2}", std::regex::extended);
VERIFY(regex_match("", re));
VERIFY(regex_match("a", re));
VERIFY(regex_match("b", re));
VERIFY(regex_match("aa", re));
VERIFY(regex_match("ab", re));
VERIFY(regex_match("ba", re));
VERIFY(regex_match("bb", re));
VERIFY(!regex_match("aaa", re));
}
int
main()
{
test01();
return 0;
}

View File

@ -31,23 +31,23 @@ test01()
{
bool test __attribute__((unused)) = true;
std::regex re("a{0,3}", std::regex::extended);
std::string target("aa");
std::smatch m;
std::regex re("a{0,3}", std::regex::extended);
std::string target("aa");
std::smatch m;
VERIFY( std::regex_match(target, m, re) );
VERIFY( std::regex_match(target, m, re) );
VERIFY( m.size() == re.mark_count()+1 );
VERIFY( m.empty() == false );
VERIFY( m.prefix().first == target.begin() );
VERIFY( m.prefix().second == target.begin() );
VERIFY( m.prefix().matched == false );
VERIFY( m.suffix().first == target.end() );
VERIFY( m.suffix().second == target.end() );
VERIFY( m.suffix().matched == false );
VERIFY( m[0].first == target.begin() );
VERIFY( m[0].second == target.end() );
VERIFY( m[0].matched == true );
VERIFY( m.size() == re.mark_count()+1 );
VERIFY( m.empty() == false );
VERIFY( m.prefix().first == target.begin() );
VERIFY( m.prefix().second == target.begin() );
VERIFY( m.prefix().matched == false );
VERIFY( m.suffix().first == target.end() );
VERIFY( m.suffix().second == target.end() );
VERIFY( m.suffix().matched == false );
VERIFY( m[0].first == target.begin() );
VERIFY( m[0].second == target.end() );
VERIFY( m[0].matched == true );
}

View File

@ -1,5 +1,4 @@
// { dg-options "-std=c++0x" }
// { dg-do run { xfail *-*-* } }
//
// 2010-06-16 Stephen M. Webb <stephen.webb@bregmasoft.ca>

View File

@ -0,0 +1,59 @@
// { dg-options "-std=gnu++11" }
// { dg-require-namedlocale "en_US.UTF-8" }
// { dg-do run { xfail *-*-* } }
//
// 2013-09-05 Tim Shen <timshen91@gmail.com>
//
// Copyright (C) 2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
// 28.12.1 regex_iterator
// Tests regex_iterator class
#include <regex>
#include <testsuite_hooks.h>
void
test01()
{
bool test __attribute__((unused)) = true;
std::setlocale(LC_ALL, "en_US.UTF-8");
std::wstring str2 = L"ä\u2009Ä\u2009ö\u2009Ö\u2009ü\u2009Ü";
std::wregex re2;
re2.imbue(std::locale("en_US.UTF-8"));
re2.assign(L"([[:lower:]]{0,1}[[:space:]]{0,1}[[:upper:]]{0,1})");
std::wsregex_iterator p(str2.begin(), str2.end(), re2);
auto a = p;
++p;
VERIFY(a != p);
//for (std::wsregex_iterator p(str2.begin(), str2.end(), re2);
// p != std::wsregex_iterator{}; ++p)
// std::wcout << (*p)[1] << std::endl;
}
int
main()
{
test01();
return 0;
}