Implement bracket expression.

2013-08-03  Tim Shen  <timshen91@gmail.com>

	Implement bracket expression.
	* include/bits/regex.h: Remove constexpr from "|=", etc.
	* include/bits/regex_compiler.h: Parse bracket expression.
	* include/bits/regex_nfa.h: _Comparator and _BracketMatcher(old
	_RangeMatcher).
	* include/bits/regex_nfa.tcc: Implement them.
	* testsuite/28_regex/algorithms/regex_match/extended/53622.cc:
	from regex_search to regex_match.
	* testsuite/28_regex/algorithms/regex_match/extended/
	cstring_bracket_01.cc: New.

From-SVN: r201465
This commit is contained in:
Tim Shen 2013-08-03 14:32:54 +00:00 committed by Tim Shen
parent cdaa808aed
commit 399eeef978
7 changed files with 332 additions and 193 deletions

View File

@ -1,3 +1,16 @@
2013-08-03 Tim Shen <timshen91@gmail.com>
Implement bracket expression.
* include/bits/regex.h: Remove constexpr from "|=", etc.
* include/bits/regex_compiler.h: Parse bracket expression.
* include/bits/regex_nfa.h: _Comparator and _BracketMatcher(old
_RangeMatcher).
* include/bits/regex_nfa.tcc: Implement them.
* testsuite/28_regex/algorithms/regex_match/extended/53622.cc:
from regex_search to regex_match.
* testsuite/28_regex/algorithms/regex_match/extended/
cstring_bracket_01.cc: New.
2013-08-02 Paolo Carlini <paolo.carlini@oracle.com> 2013-08-02 Paolo Carlini <paolo.carlini@oracle.com>
* include/debug/functions.h (__foreign_iterator_aux4): * include/debug/functions.h (__foreign_iterator_aux4):

View File

@ -95,15 +95,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
operator~() const operator~() const
{ return _RegexMask(~_M_base, ~_M_extended); } { return _RegexMask(~_M_base, ~_M_extended); }
constexpr _RegexMask& _RegexMask&
operator&=(_RegexMask __other) operator&=(_RegexMask __other)
{ return *this = (*this) & __other; } { return *this = (*this) & __other; }
constexpr _RegexMask& _RegexMask&
operator|=(_RegexMask __other) operator|=(_RegexMask __other)
{ return *this = (*this) | __other; } { return *this = (*this) | __other; }
constexpr _RegexMask& _RegexMask&
operator^=(_RegexMask __other) operator^=(_RegexMask __other)
{ return *this = (*this) ^ __other; } { return *this = (*this) ^ __other; }
@ -228,7 +228,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__fctyp.tolower(&*__v.begin(), &*__v.end()); __fctyp.tolower(&*__v.begin(), &*__v.end());
return this->transform(&*__v.begin(), &*__v.end()); return this->transform(&*__v.begin(), &*__v.end());
} }
__catch (...) __catch (std::bad_cast)
{ {
} }
return string_type(); return string_type();
@ -519,7 +519,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
}; };
std::string __s(__last - __first, '?'); std::string __s(__last - __first, '?');
string_type a(__first, __last);
__fctyp.narrow(__first, __last, '?', &*__s.begin()); __fctyp.narrow(__first, __last, '?', &*__s.begin());
for (unsigned int __i = 0; *__collatenames[__i]; __i++) for (unsigned int __i = 0; *__collatenames[__i]; __i++)

View File

@ -44,9 +44,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ {
typedef unsigned int _StateT; typedef unsigned int _StateT;
static constexpr _StateT _S_state_at_start = 1 << 0; static constexpr _StateT _S_state_in_brace = 1 << 0;
static constexpr _StateT _S_state_in_brace = 1 << 2; static constexpr _StateT _S_state_in_bracket = 1 << 1;
static constexpr _StateT _S_state_in_bracket = 1 << 3;
virtual ~_Scanner_base() { }; virtual ~_Scanner_base() { };
}; };
@ -77,8 +76,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_S_token_anychar, _S_token_anychar,
_S_token_backref, _S_token_backref,
_S_token_bracket_begin, _S_token_bracket_begin,
_S_token_bracket_inverse_begin,
_S_token_bracket_end, _S_token_bracket_end,
_S_token_inverse_class,
_S_token_char_class_name, _S_token_char_class_name,
_S_token_closure0, _S_token_closure0,
_S_token_closure1, _S_token_closure1,
@ -97,7 +96,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_S_token_opt, _S_token_opt,
_S_token_or, _S_token_or,
_S_token_ord_char, _S_token_ord_char,
_S_token_quoted_char,
_S_token_subexpr_begin, _S_token_subexpr_begin,
_S_token_subexpr_end, _S_token_subexpr_end,
_S_token_word_begin, _S_token_word_begin,
@ -108,7 +106,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_Scanner(_IteratorT __begin, _IteratorT __end, _FlagT __flags, _Scanner(_IteratorT __begin, _IteratorT __end, _FlagT __flags,
std::locale __loc) std::locale __loc)
: _M_current(__begin) , _M_end(__end) , _M_flags(__flags), : _M_current(__begin) , _M_end(__end) , _M_flags(__flags),
_M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(_S_state_at_start) _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(0)
{ _M_advance(); } { _M_advance(); }
void void
@ -219,9 +217,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
} }
else if (__c == _M_ctype.widen('[')) else if (__c == _M_ctype.widen('['))
{ {
_M_curToken = _S_token_bracket_begin; if (*++_M_current == _M_ctype.widen('^'))
_M_state |= (_S_state_in_bracket | _S_state_at_start); {
++_M_current; _M_curToken = _S_token_bracket_inverse_begin;
++_M_current;
}
else
_M_curToken = _S_token_bracket_begin;
_M_state |= _S_state_in_bracket;
return; return;
} }
else if (__c == _M_ctype.widen('\\')) else if (__c == _M_ctype.widen('\\'))
@ -304,14 +307,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_Scanner<_InputIterator>:: _Scanner<_InputIterator>::
_M_scan_in_bracket() _M_scan_in_bracket()
{ {
if (_M_state & _S_state_at_start && *_M_current == _M_ctype.widen('^')) if (*_M_current == _M_ctype.widen('['))
{
_M_curToken = _S_token_inverse_class;
_M_state &= ~_S_state_at_start;
++_M_current;
return;
}
else if (*_M_current == _M_ctype.widen('['))
{ {
++_M_current; ++_M_current;
if (_M_current == _M_end) if (_M_current == _M_end)
@ -347,21 +343,22 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
} }
else if (*_M_current == _M_ctype.widen(']')) else if (*_M_current == _M_ctype.widen(']'))
{ {
if (!(_M_flags & regex_constants::ECMAScript) _M_curToken = _S_token_bracket_end;
|| !(_M_state & _S_state_at_start)) _M_state &= ~_S_state_in_bracket;
{ ++_M_current;
// special case: only if _not_ chr first after return;
// '[' or '[^' and if not ECMAscript
_M_curToken = _S_token_bracket_end;
++_M_current;
return;
}
} }
else if (*_M_current == _M_ctype.widen('\\'))
{
_M_eat_escape();
return;
}
_M_curToken = _S_token_collelem_single; _M_curToken = _S_token_collelem_single;
_M_curValue.assign(1, *_M_current); _M_curValue.assign(1, *_M_current);
++_M_current; ++_M_current;
} }
// TODO implement it.
template<typename _InputIterator> template<typename _InputIterator>
void void
_Scanner<_InputIterator>:: _Scanner<_InputIterator>::
@ -463,11 +460,28 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_curToken = _S_token_backref; _M_curToken = _S_token_backref;
_M_curValue.assign(1, __c); _M_curValue.assign(1, __c);
} }
else if (_M_state & _S_state_in_bracket)
{
if (__c == _M_ctype.widen('-')
|| __c == _M_ctype.widen('[')
|| __c == _M_ctype.widen(']'))
{
_M_curToken = _S_token_ord_char;
_M_curValue.assign(1, __c);
}
else if ((_M_flags & regex_constants::ECMAScript)
&& __c == _M_ctype.widen('b'))
{
_M_curToken = _S_token_ord_char;
_M_curValue.assign(1, _M_ctype.widen(' '));
}
else
__throw_regex_error(regex_constants::error_escape);
}
else else
__throw_regex_error(regex_constants::error_escape); __throw_regex_error(regex_constants::error_escape);
} }
// Eats a character class or throwns an exception. // Eats a character class or throwns an exception.
// current point to ':' delimiter on entry, char after ']' on return // current point to ':' delimiter on entry, char after ']' on return
template<typename _InputIterator> template<typename _InputIterator>
@ -549,6 +563,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
case _S_token_bracket_begin: case _S_token_bracket_begin:
ostr << "bracket-begin\n"; ostr << "bracket-begin\n";
break; break;
case _S_token_bracket_inverse_begin:
ostr << "bracket-inverse-begin\n";
break;
case _S_token_bracket_end: case _S_token_bracket_end:
ostr << "bracket-end\n"; ostr << "bracket-end\n";
break; break;
@ -606,9 +623,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
case _S_token_ord_char: case _S_token_ord_char:
ostr << "ordinary character: \"" << _M_value() << "\"\n"; ostr << "ordinary character: \"" << _M_value() << "\"\n";
break; break;
case _S_token_quoted_char:
ostr << "quoted char\n";
break;
case _S_token_subexpr_begin: case _S_token_subexpr_begin:
ostr << "subexpr begin\n"; ostr << "subexpr begin\n";
break; break;
@ -624,6 +638,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
case _S_token_unknown: case _S_token_unknown:
ostr << "-- unknown token --\n"; ostr << "-- unknown token --\n";
break; break;
default:
_GLIBCXX_DEBUG_ASSERT(false);
} }
return ostr; return ostr;
} }
@ -650,7 +666,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
typedef _Scanner<_InIter> _ScannerT; typedef _Scanner<_InIter> _ScannerT;
typedef typename _ScannerT::_TokenT _TokenT; typedef typename _ScannerT::_TokenT _TokenT;
typedef std::stack<_StateSeq, std::vector<_StateSeq> > _StackT; typedef std::stack<_StateSeq, std::vector<_StateSeq> > _StackT;
typedef _RangeMatcher<_InIter, _TraitsT> _RMatcherT; typedef _BracketMatcher<_InIter, _TraitsT> _BMatcherT;
// accepts a specific token or returns false. // accepts a specific token or returns false.
bool bool
@ -659,7 +675,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void void
_M_disjunction(); _M_disjunction();
bool void
_M_alternative(); _M_alternative();
bool bool
@ -668,7 +684,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
bool bool
_M_assertion(); _M_assertion();
bool void
_M_quantifier(); _M_quantifier();
bool bool
@ -678,31 +694,28 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_bracket_expression(); _M_bracket_expression();
bool bool
_M_bracket_list(_RMatcherT& __matcher); _M_bracket_list(_BMatcherT& __matcher);
bool bool
_M_follow_list(_RMatcherT& __matcher); _M_follow_list(_BMatcherT& __matcher);
void
_M_expression_term(_BMatcherT& __matcher);
bool bool
_M_follow_list2(_RMatcherT& __matcher); _M_range_expression(_BMatcherT& __matcher);
bool bool
_M_expression_term(_RMatcherT& __matcher); _M_start_range(_BMatcherT& __matcher);
bool bool
_M_range_expression(_RMatcherT& __matcher); _M_collating_symbol(_BMatcherT& __matcher);
bool bool
_M_start_range(_RMatcherT& __matcher); _M_equivalence_class(_BMatcherT& __matcher);
bool bool
_M_collating_symbol(_RMatcherT& __matcher); _M_character_class(_BMatcherT& __matcher);
bool
_M_equivalence_class(_RMatcherT& __matcher);
bool
_M_character_class(_RMatcherT& __matcher);
int int
_M_cur_int_value(int __radix); _M_cur_int_value(int __radix);
@ -712,6 +725,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_StringT _M_cur_value; _StringT _M_cur_value;
_Nfa _M_state_store; _Nfa _M_state_store;
_StackT _M_stack; _StackT _M_stack;
_FlagT _M_flags;
}; };
template<typename _InIter, typename _TraitsT> template<typename _InIter, typename _TraitsT>
@ -719,7 +733,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_Compiler(const _InIter& __b, const _InIter& __e, _TraitsT& __traits, _Compiler(const _InIter& __b, const _InIter& __e, _TraitsT& __traits,
_Compiler<_InIter, _TraitsT>::_FlagT __flags) _Compiler<_InIter, _TraitsT>::_FlagT __flags)
: _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()), : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()),
_M_state_store(__flags) _M_state_store(__flags), _M_flags(__flags)
{ {
typedef _StartTagger<_InIter, _TraitsT> _Start; typedef _StartTagger<_InIter, _TraitsT> _Start;
typedef _EndTagger<_InIter, _TraitsT> _End; typedef _EndTagger<_InIter, _TraitsT> _End;
@ -743,8 +757,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ {
if (token == _M_scanner._M_token()) if (token == _M_scanner._M_token())
{ {
_M_cur_value = _M_scanner._M_value(); _M_cur_value = _M_scanner._M_value();
_M_scanner._M_advance(); _M_scanner._M_advance();
return true; return true;
} }
return false; return false;
@ -766,7 +780,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
} }
template<typename _InIter, typename _TraitsT> template<typename _InIter, typename _TraitsT>
bool void
_Compiler<_InIter, _TraitsT>:: _Compiler<_InIter, _TraitsT>::
_M_alternative() _M_alternative()
{ {
@ -780,9 +794,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_stack.pop(); _M_stack.pop();
} }
_M_stack.push(__re); _M_stack.push(__re);
return true;
} }
return false;
} }
template<typename _InIter, typename _TraitsT> template<typename _InIter, typename _TraitsT>
@ -829,7 +841,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
} }
template<typename _InIter, typename _TraitsT> template<typename _InIter, typename _TraitsT>
bool void
_Compiler<_InIter, _TraitsT>:: _Compiler<_InIter, _TraitsT>::
_M_quantifier() _M_quantifier()
{ {
@ -841,7 +853,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__r._M_append(__r._M_front()); __r._M_append(__r._M_front());
_M_stack.pop(); _M_stack.pop();
_M_stack.push(__r); _M_stack.push(__r);
return true; return;
} }
if (_M_match_token(_ScannerT::_S_token_closure1)) if (_M_match_token(_ScannerT::_S_token_closure1))
{ {
@ -852,7 +864,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_insert_alt(_S_invalid_state_id, _M_insert_alt(_S_invalid_state_id,
_M_stack.top()._M_front())); _M_stack.top()._M_front()));
_M_stack.top()._M_append(__r); _M_stack.top()._M_append(__r);
return true; return;
} }
if (_M_match_token(_ScannerT::_S_token_opt)) if (_M_match_token(_ScannerT::_S_token_opt))
{ {
@ -861,7 +873,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_StateSeq __r(_M_stack.top(), -1); _StateSeq __r(_M_stack.top(), -1);
_M_stack.pop(); _M_stack.pop();
_M_stack.push(__r); _M_stack.push(__r);
return true; return;
} }
if (_M_match_token(_ScannerT::_S_token_interval_begin)) if (_M_match_token(_ScannerT::_S_token_interval_begin))
{ {
@ -897,9 +909,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
} }
if (!_M_match_token(_ScannerT::_S_token_interval_end)) if (!_M_match_token(_ScannerT::_S_token_interval_end))
__throw_regex_error(regex_constants::error_brace); __throw_regex_error(regex_constants::error_brace);
return true; return;
} }
return false;
} }
template<typename _InIter, typename _TraitsT> template<typename _InIter, typename _TraitsT>
@ -922,15 +933,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ {
_M_stack.push(_StateSeq(_M_state_store, _M_stack.push(_StateSeq(_M_state_store,
_M_state_store._M_insert_matcher _M_state_store._M_insert_matcher
(_CMatcher(_M_cur_value[0], _M_traits)))); (_CMatcher(_M_cur_value[0], _M_flags, _M_traits))));
return true;
}
if (_M_match_token(_ScannerT::_S_token_quoted_char))
{
// note that in the ECMA grammar, this case covers backrefs.
_M_stack.push(_StateSeq(_M_state_store,
_M_state_store._M_insert_matcher
(_CMatcher(_M_cur_value[0], _M_traits))));
return true; return true;
} }
if (_M_match_token(_ScannerT::_S_token_backref)) if (_M_match_token(_ScannerT::_S_token_backref))
@ -966,124 +969,74 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_Compiler<_InIter, _TraitsT>:: _Compiler<_InIter, _TraitsT>::
_M_bracket_expression() _M_bracket_expression()
{ {
if (_M_match_token(_ScannerT::_S_token_bracket_begin)) bool __inverse =
{ _M_match_token(_ScannerT::_S_token_bracket_inverse_begin);
_RMatcherT __matcher(_M_match_token(_ScannerT::_S_token_line_begin), if (!(__inverse || _M_match_token(_ScannerT::_S_token_bracket_begin)))
_M_traits); return false;
if (!_M_bracket_list(__matcher) _BMatcherT __matcher( __inverse, _M_flags, _M_traits);
|| !_M_match_token(_ScannerT::_S_token_bracket_end)) // special case: only if _not_ chr first after
__throw_regex_error(regex_constants::error_brack); // '[' or '[^' or if ECMAscript
_M_stack.push(_StateSeq(_M_state_store, if (!_M_bracket_list(__matcher) // list is empty
_M_state_store._M_insert_matcher(__matcher))); && !(_M_flags & regex_constants::ECMAScript))
return true; __throw_regex_error(regex_constants::error_brack);
} _M_stack.push(_StateSeq(_M_state_store,
return false; _M_state_store._M_insert_matcher(__matcher)));
}
// If the dash is the last character in the bracket expression, it is not
// special.
template<typename _InIter, typename _TraitsT>
bool
_Compiler<_InIter, _TraitsT>::
_M_bracket_list(_RMatcherT& __matcher)
{
if (_M_follow_list(__matcher))
{
if (_M_match_token(_ScannerT::_S_token_dash))
__matcher._M_add_char(_M_cur_value[0]);
return true;
}
return false;
}
template<typename _InIter, typename _TraitsT>
bool
_Compiler<_InIter, _TraitsT>::
_M_follow_list(_RMatcherT& __matcher)
{ return _M_expression_term(__matcher) && _M_follow_list2(__matcher); }
template<typename _InIter, typename _TraitsT>
bool
_Compiler<_InIter, _TraitsT>::
_M_follow_list2(_RMatcherT& __matcher)
{
if (_M_expression_term(__matcher))
return _M_follow_list2(__matcher);
return true; return true;
} }
template<typename _InIter, typename _TraitsT> template<typename _InIter, typename _TraitsT>
bool bool // list is non-empty
_Compiler<_InIter, _TraitsT>:: _Compiler<_InIter, _TraitsT>::
_M_expression_term(_RMatcherT& __matcher) _M_bracket_list(_BMatcherT& __matcher)
{ {
return (_M_collating_symbol(__matcher) if (_M_match_token(_ScannerT::_S_token_bracket_end))
|| _M_character_class(__matcher) return false;
|| _M_equivalence_class(__matcher) _M_expression_term(__matcher);
|| (_M_start_range(__matcher) _M_bracket_list(__matcher);
&& _M_range_expression(__matcher)));
}
template<typename _InIter, typename _TraitsT>
bool
_Compiler<_InIter, _TraitsT>::
_M_range_expression(_RMatcherT& __matcher)
{
if (!_M_collating_symbol(__matcher))
if (!_M_match_token(_ScannerT::_S_token_dash))
__throw_regex_error(regex_constants::error_range);
__matcher._M_make_range();
return true; return true;
} }
template<typename _InIter, typename _TraitsT> template<typename _InIter, typename _TraitsT>
bool void
_Compiler<_InIter, _TraitsT>:: _Compiler<_InIter, _TraitsT>::
_M_start_range(_RMatcherT& __matcher) _M_expression_term(_BMatcherT& __matcher)
{ return _M_match_token(_ScannerT::_S_token_dash); }
template<typename _InIter, typename _TraitsT>
bool
_Compiler<_InIter, _TraitsT>::
_M_collating_symbol(_RMatcherT& __matcher)
{ {
if (_M_match_token(_ScannerT::_S_token_collelem_single))
{
__matcher._M_add_char(_M_cur_value[0]);
return true;
}
if (_M_match_token(_ScannerT::_S_token_collsymbol)) if (_M_match_token(_ScannerT::_S_token_collsymbol))
{ {
__matcher._M_add_collating_element(_M_cur_value); __matcher._M_add_collating_element(_M_cur_value);
return true; return;
} }
return false;
}
template<typename _InIter, typename _TraitsT>
bool
_Compiler<_InIter, _TraitsT>::
_M_equivalence_class(_RMatcherT& __matcher)
{
if (_M_match_token(_ScannerT::_S_token_equiv_class_name)) if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
{ {
__matcher._M_add_equivalence_class(_M_cur_value); __matcher._M_add_equivalence_class(_M_cur_value);
return true; return;
} }
return false;
}
template<typename _InIter, typename _TraitsT>
bool
_Compiler<_InIter, _TraitsT>::
_M_character_class(_RMatcherT& __matcher)
{
if (_M_match_token(_ScannerT::_S_token_char_class_name)) if (_M_match_token(_ScannerT::_S_token_char_class_name))
{ {
__matcher._M_add_character_class(_M_cur_value); __matcher._M_add_character_class(_M_cur_value);
return true; return;
} }
return false; if (_M_match_token(_ScannerT::_S_token_collelem_single)) // [a
{
auto __ch = _M_cur_value[0];
if (_M_match_token(_ScannerT::_S_token_dash)) // [a-
{
// If the dash is the last character in the bracket expression,
// it is not special.
if (_M_scanner._M_token() == _ScannerT::_S_token_bracket_end)
__matcher._M_add_char(_M_cur_value[0]); // [a-] <=> [a\-]
else // [a-z]
{
if (!_M_match_token(_ScannerT::_S_token_collelem_single))
__throw_regex_error(regex_constants::error_range);
__matcher._M_make_range(__ch, _M_cur_value[0]);
}
}
else // [a]
__matcher._M_add_char(__ch);
return;
}
__throw_regex_error(regex_constants::error_brack);
} }
template<typename _InIter, typename _TraitsT> template<typename _InIter, typename _TraitsT>

View File

@ -129,6 +129,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
int _M_index; int _M_index;
}; };
// TODO For now we use an all-in-one comparator. In the future there may be
// optimizations based on regex_traits::translate and regex_transform.
template<typename _InIterT, typename _TraitsT>
struct _Comparator
{
typedef regex_constants::syntax_option_type _FlagT;
typedef typename _TraitsT::char_type _CharT;
typedef std::basic_string<_CharT> _StringT;
_Comparator(_FlagT __flags, const _TraitsT& __traits)
: _M_flags(__flags), _M_traits(__traits)
{ }
bool
_M_equ(_CharT __a, _CharT __b) const;
bool
_M_le(_CharT __a, _CharT __b) const;
_FlagT _M_flags;
_TraitsT _M_traits;
};
/// Indicates if current state matches cursor current. /// Indicates if current state matches cursor current.
typedef std::function<bool (const _PatternCursor&)> _Matcher; typedef std::function<bool (const _PatternCursor&)> _Matcher;
@ -140,12 +163,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
/// Matches a single character /// Matches a single character
template<typename _InIterT, typename _TraitsT> template<typename _InIterT, typename _TraitsT>
struct _CharMatcher struct _CharMatcher
: public _Comparator<_InIterT, _TraitsT>
{ {
typedef typename _TraitsT::char_type char_type; typedef _Comparator<_InIterT, _TraitsT> _BaseT;
typedef typename _TraitsT::char_type _CharT;
typedef regex_constants::syntax_option_type _FlagT;
explicit explicit
_CharMatcher(char_type __c, const _TraitsT& __t = _TraitsT()) _CharMatcher(_CharT __c, _FlagT __flags, const _TraitsT& __t)
: _M_traits(__t), _M_c(_M_traits.translate(__c)) : _BaseT(__flags, __t), _M_c(__c)
{ } { }
bool bool
@ -153,55 +179,79 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ {
typedef const _SpecializedCursor<_InIterT>& _CursorT; typedef const _SpecializedCursor<_InIterT>& _CursorT;
_CursorT __c = static_cast<_CursorT>(__pc); _CursorT __c = static_cast<_CursorT>(__pc);
return _M_traits.translate(__c._M_current()) == _M_c; return this->_M_equ(__c._M_current(), _M_c);
} }
const _TraitsT& _M_traits; _CharT _M_c;
char_type _M_c;
}; };
/// Matches a character range (bracket expression) /// Matches a character range (bracket expression)
template<typename _InIterT, typename _TraitsT> template<typename _InIterT, typename _TraitsT>
struct _RangeMatcher struct _BracketMatcher
: public _Comparator<_InIterT, _TraitsT>
{ {
typedef typename _TraitsT::char_type _CharT; typedef _Comparator<_InIterT, _TraitsT> _BaseT;
typedef std::basic_string<_CharT> _StringT; typedef typename _TraitsT::char_class_type _CharClassT;
typedef regex_constants::syntax_option_type _FlagT;
typedef typename _TraitsT::char_type _CharT;
typedef std::basic_string<_CharT> _StringT;
explicit explicit
_RangeMatcher(bool __is_non_matching, const _TraitsT& __t = _TraitsT()) _BracketMatcher(bool __is_non_matching,
: _M_traits(__t), _M_is_non_matching(__is_non_matching) _FlagT __flags,
const _TraitsT& __t)
: _BaseT(__flags, __t), _M_flags(__flags), _M_traits(__t),
_M_is_non_matching(__is_non_matching), _M_class_set(0)
{ } { }
bool bool
operator()(const _PatternCursor& __pc) const operator()(const _PatternCursor& __pc) const;
{
typedef const _SpecializedCursor<_InIterT>& _CursorT;
_CursorT __c = static_cast<_CursorT>(__pc);
return true;
}
void void
_M_add_char(_CharT __c) _M_add_char(_CharT __c)
{ } { _M_char_set.push_back(__c); }
void void
_M_add_collating_element(const _StringT& __s) _M_add_collating_element(const _StringT& __s)
{ } {
auto __st = _M_traits.lookup_collatename(&*__s.begin(), &*__s.end());
if (__st.empty())
__throw_regex_error(regex_constants::error_collate);
// TODO: digraph
_M_char_set.push_back(__st[0]);
}
void void
_M_add_equivalence_class(const _StringT& __s) _M_add_equivalence_class(const _StringT& __s)
{ } {
_M_add_character_class(
_M_traits.transform_primary(&*__s.begin(), &*__s.end()));
}
void void
_M_add_character_class(const _StringT& __s) _M_add_character_class(const _StringT& __s)
{ } {
auto __st = _M_traits.lookup_classname(
&*__s.begin(), &*__s.end(), (_M_flags & regex_constants::icase));
if (__st == 0)
__throw_regex_error(regex_constants::error_ctype);
_M_class_set |= __st;
}
void void
_M_make_range() _M_make_range(_CharT __l, _CharT __r)
{ } {
if (!this->_M_le(__l, __r))
__throw_regex_error(regex_constants::error_range);
_M_range_set.push_back(make_pair(__l, __r));
}
const _TraitsT& _M_traits; _FlagT _M_flags;
bool _M_is_non_matching; _TraitsT _M_traits;
bool _M_is_non_matching;
std::vector<_CharT> _M_char_set;
std::vector<pair<_CharT, _CharT>> _M_range_set;
_CharClassT _M_class_set;
}; };
/// Identifies a state in the NFA. /// Identifies a state in the NFA.

View File

@ -35,6 +35,64 @@ namespace __detail
{ {
_GLIBCXX_BEGIN_NAMESPACE_VERSION _GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename _InIterT, typename _TraitsT>
bool _BracketMatcher<_InIterT, _TraitsT>::
operator()(const _PatternCursor& __pc) const
{
typedef const _SpecializedCursor<_InIterT>& _CursorT;
_CursorT __c = static_cast<_CursorT>(__pc);
_CharT __ch = __c._M_current();
bool __ret = false;
for (auto __c : _M_char_set)
if (this->_M_equ(__c, __ch))
{
__ret = true;
break;
}
if (!__ret && _M_traits.isctype(__ch, _M_class_set))
__ret = true;
else
{
for (auto& __it : _M_range_set)
if (this->_M_le(__it.first, __ch) && this->_M_le(__ch, __it.second))
{
__ret = true;
break;
}
}
if (_M_is_non_matching)
__ret = !__ret;
return __ret;
}
template<typename _InIterT, typename _TraitsT>
bool _Comparator<_InIterT, _TraitsT>::
_M_equ(_CharT __a, _CharT __b) const
{
if (_M_flags & regex_constants::icase)
return _M_traits.translate_nocase(__a)
== _M_traits.translate_nocase(__b);
if (_M_flags & regex_constants::collate)
return _M_traits.translate(__a) == _M_traits.translate(__b);
return __a == __b;
}
template<typename _InIterT, typename _TraitsT>
bool _Comparator<_InIterT, _TraitsT>::
_M_le(_CharT __a, _CharT __b) const
{
_StringT __str1 = _StringT(1,
_M_flags & regex_constants::icase
? _M_traits.translate_nocase(__a)
: _M_traits.translate(__a));
_StringT __str2 = _StringT(1,
_M_flags & regex_constants::icase
? _M_traits.translate_nocase(__b)
: _M_traits.translate(__b));
return _M_traits.transform(__str1.begin(), __str1.end())
<= _M_traits.transform(__str2.begin(), __str2.end());
}
#ifdef _GLIBCXX_DEBUG #ifdef _GLIBCXX_DEBUG
inline std::ostream& _State:: inline std::ostream& _State::
_M_print(std::ostream& ostr) const _M_print(std::ostream& ostr) const

View File

@ -37,7 +37,7 @@ test01()
std::string target("zxcv/onetwoabc"); std::string target("zxcv/onetwoabc");
std::smatch m; std::smatch m;
VERIFY( std::regex_search(target, m, re) ); VERIFY( std::regex_match(target, m, re) );
VERIFY( m.size() == 2 ); VERIFY( m.size() == 2 );
VERIFY( m[0].matched == true ); VERIFY( m[0].matched == true );
VERIFY( std::string(m[0].first, m[0].second) == "zxcv/onetwoabc" ); VERIFY( std::string(m[0].first, m[0].second) == "zxcv/onetwoabc" );
@ -50,7 +50,7 @@ test01()
std::string target("zxcv/onetwoabc"); std::string target("zxcv/onetwoabc");
std::smatch m; std::smatch m;
VERIFY( std::regex_search(target, m, re) ); VERIFY( std::regex_match(target, m, re) );
VERIFY( m.size() == 3 ); VERIFY( m.size() == 3 );
VERIFY( m[0].matched == true ); VERIFY( m[0].matched == true );
VERIFY( std::string(m[0].first, m[0].second) == "zxcv/onetwoabc" ); VERIFY( std::string(m[0].first, m[0].second) == "zxcv/onetwoabc" );

View File

@ -0,0 +1,66 @@
// { dg-options "-std=gnu++11" }
//
// 2013-08-01 Tim Shen <timshen91@gmail.com>
//
// Copyright (C) 2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
// 28.11.2 regex_match
// Tests Extended bracket expression against a C-string.
#include <regex>
#include <testsuite_hooks.h>
void
test01()
{
bool test __attribute__((unused)) = true;
{
std::regex re("pre/[za-x]", std::regex::extended);
VERIFY( std::regex_match("pre/z", re) );
VERIFY( std::regex_match("pre/a", re) );
VERIFY( !std::regex_match("pre/y", re) );
}
{
std::regex re("pre/[[:uPPer:]]", std::regex::extended);
VERIFY( std::regex_match("pre/Z", re) );
VERIFY( !std::regex_match("pre/_", re) );
VERIFY( !std::regex_match("pre/a", re) );
VERIFY( !std::regex_match("pre/0", re) );
}
{
std::regex re("pre/[[:lOWer:]]", std::regex::extended | std::regex::icase);
VERIFY( std::regex_match("pre/Z", re) );
VERIFY( std::regex_match("pre/a", re) );
}
{
std::regex re("pre/[[:w:][.tilde.]]", std::regex::extended);
VERIFY( std::regex_match("pre/~", re) );
VERIFY( std::regex_match("pre/_", re) );
VERIFY( std::regex_match("pre/a", re) );
VERIFY( std::regex_match("pre/0", re) );
}
}
int
main()
{
test01();
return 0;
}