regex_automaton.h: Rearrange _NFA's layout.

2013-09-02  Tim Shen  <timshen91@gmail.com>

	* regex_automaton.h: Rearrange _NFA's layout.
	* include/bits/regex_compiler.h: Add _AnyMatcher and _CharMatcher.
	  Rearrange _BracketMatcher's layout.
	  (_BracketMatcher<>::_M_add_char): Use set instead of vector for
	  _M_char_set.
	  (_BracketMatcher<>::_M_add_collating_element): Likewise.
	  (_BracketMatcher<>::_M_make_range): Likewise.
	* include/bits/regex_compiler.tcc (_Compiler<>::_M_atom): Use
	  apropriate constructors of matchers above.
	* testsuite/28_regex/algorithms/regex_match/ecma/char/anymatcher.cc:
	  New.
	* testsuite/28_regex/algorithms/regex_match/ecma/char/backref.cc: New.
	* testsuite/28_regex/algorithms/regex_match/ecma/char/empty_range.cc:
	  New.
	* testsuite/28_regex/algorithms/regex_match/ecma/char/emptygroup.cc:
	  New.
	* testsuite/28_regex/algorithms/regex_match/ecma/char/hex.cc: New.
	* testsuite/28_regex/algorithms/regex_match/ecma/wchar_t/anymatcher.cc:
	  New.
	* testsuite/28_regex/algorithms/regex_match/ecma/wchar_t/hex.cc: New.

From-SVN: r202189
This commit is contained in:
Tim Shen 2013-09-02 22:20:56 +00:00 committed by Tim Shen
parent ce96d37253
commit e350969184
11 changed files with 255 additions and 60 deletions

View File

@ -1,3 +1,26 @@
2013-09-02 Tim Shen <timshen91@gmail.com>
* regex_automaton.h: Rearrange _NFA's layout.
* include/bits/regex_compiler.h: Add _AnyMatcher and _CharMatcher.
Rearrange _BracketMatcher's layout.
(_BracketMatcher<>::_M_add_char): Use set instead of vector for
_M_char_set.
(_BracketMatcher<>::_M_add_collating_element): Likewise.
(_BracketMatcher<>::_M_make_range): Likewise.
* include/bits/regex_compiler.tcc (_Compiler<>::_M_atom): Use
apropriate constructors of matchers above.
* testsuite/28_regex/algorithms/regex_match/ecma/char/anymatcher.cc:
New.
* testsuite/28_regex/algorithms/regex_match/ecma/char/backref.cc: New.
* testsuite/28_regex/algorithms/regex_match/ecma/char/empty_range.cc:
New.
* testsuite/28_regex/algorithms/regex_match/ecma/char/emptygroup.cc:
New.
* testsuite/28_regex/algorithms/regex_match/ecma/char/hex.cc: New.
* testsuite/28_regex/algorithms/regex_match/ecma/wchar_t/anymatcher.cc:
New.
* testsuite/28_regex/algorithms/regex_match/ecma/wchar_t/hex.cc: New.
2013-08-30 François Dumont <fdumont@gcc.gnu.org>
PR libstdc++/58148

View File

@ -206,12 +206,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_dot(std::ostream& __ostr) const;
#endif
std::vector<unsigned int> _M_paren_stack;
_StateSet _M_accepting_states;
_FlagT _M_flags;
_StateIdT _M_start_state;
_StateSet _M_accepting_states;
_SizeT _M_subexpr_count;
bool _M_has_backref;
std::vector<unsigned int> _M_paren_stack;
};
/// Describes a sequence of one or more %_State, its current start

View File

@ -125,12 +125,60 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
const _TraitsT& _M_traits;
_ScannerT _M_scanner;
_StringT _M_value;
_RegexT _M_state_store;
_StringT _M_value;
_StackT _M_stack;
_FlagT _M_flags;
};
template<typename _CharT, typename _TraitsT>
struct _AnyMatcher
{
explicit
_AnyMatcher(const _TraitsT& __traits)
: _M_traits(__traits)
{ }
bool
operator()(_CharT __ch) const
{
return _M_traits.translate(__ch) != '\n'
&& _M_traits.translate(__ch) != '\r'
&& _M_traits.translate(__ch) != u'\u2028'
&& _M_traits.translate(__ch) != u'\u2029';
}
const _TraitsT& _M_traits;
};
template<typename _CharT, typename _TraitsT>
struct _CharMatcher
{
typedef regex_constants::syntax_option_type _FlagT;
explicit
_CharMatcher(_CharT __ch, const _TraitsT& __traits, _FlagT __flags)
: _M_ch(_M_translate(__ch)), _M_traits(__traits), _M_flags(__flags)
{ }
bool
operator()(_CharT __ch) const
{ return _M_ch == _M_translate(__ch); }
_CharT
_M_translate(_CharT __ch) const
{
if (_M_flags & regex_constants::icase)
return _M_traits.translate_nocase(__ch);
else
return _M_traits.translate(__ch);
}
const _TraitsT& _M_traits;
_FlagT _M_flags;
_CharT _M_ch;
};
/// Matches a character range (bracket expression)
template<typename _CharT, typename _TraitsT>
struct _BracketMatcher
@ -141,9 +189,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
explicit
_BracketMatcher(bool __is_non_matching,
const _TraitsT& __t,
const _TraitsT& __traits,
_FlagT __flags)
: _M_is_non_matching(__is_non_matching), _M_traits(__t),
: _M_is_non_matching(__is_non_matching), _M_traits(__traits),
_M_flags(__flags), _M_class_set(0)
{ }
@ -152,7 +200,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void
_M_add_char(_CharT __c)
{ _M_char_set.push_back(_M_translate(__c)); }
{ _M_char_set.insert(_M_translate(__c)); }
void
_M_add_collating_element(const _StringT& __s)
@ -162,7 +210,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
if (__st.empty())
__throw_regex_error(regex_constants::error_collate);
// TODO: digraph
_M_char_set.push_back(__st[0]);
_M_char_set.insert(_M_translate(__st[0]));
}
void
@ -186,21 +234,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void
_M_make_range(_CharT __l, _CharT __r)
{
_M_range_set.push_back(
make_pair(_M_get_str(_M_translate(__l)),
_M_get_str(_M_translate(__r))));
if (_M_flags & regex_constants::collate)
_M_range_set.insert(
make_pair(_M_get_str(_M_translate(__l)),
_M_get_str(_M_translate(__r))));
else
_M_range_set.insert(make_pair(_M_get_str(__l), _M_get_str(__r)));
}
_CharT
_M_translate(_CharT __c) const
{
if (_M_flags & regex_constants::collate)
if (_M_is_icase())
return _M_traits.translate_nocase(__c);
else
return _M_traits.translate(__c);
if (_M_is_icase())
return _M_traits.translate_nocase(__c);
else
return __c;
return _M_traits.translate(__c);
}
bool
@ -214,12 +262,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return _M_traits.transform(__s.begin(), __s.end());
}
const _TraitsT& _M_traits;
_FlagT _M_flags;
bool _M_is_non_matching;
std::vector<_CharT> _M_char_set;
std::vector<pair<_StringT, _StringT>> _M_range_set;
_CharClassT _M_class_set;
std::set<_CharT> _M_char_set;
std::set<pair<_StringT, _StringT>> _M_range_set;
const _TraitsT& _M_traits;
_CharClassT _M_class_set;
_FlagT _M_flags;
bool _M_is_non_matching;
};
//@} regex-detail

View File

@ -204,32 +204,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
if (_M_match_token(_ScannerT::_S_token_anychar))
{
const static auto&
__any_matcher = [](_CharT __ch) -> bool
{ return true; };
_M_stack.push(_StateSeqT(_M_state_store,
_M_state_store._M_insert_matcher
(__any_matcher)));
(_AnyMatcher<_CharT, _TraitsT>(_M_traits))));
return true;
}
if (_M_try_char())
{
_CharT __c = _M_value[0];
__detail::_Matcher<_CharT> f;
if (_M_flags & regex_constants::icase)
{
auto __traits = this->_M_traits;
__c = __traits.translate_nocase(__c);
f = [__traits, __c](_CharT __ch) -> bool
{ return __traits.translate_nocase(__ch) == __c; };
}
else
f = [__c](_CharT __ch) -> bool
{ return __ch == __c; };
_M_stack.push(_StateSeqT(_M_state_store,
_M_state_store._M_insert_matcher(f)));
_M_state_store._M_insert_matcher
(_CharMatcher<_CharT, _TraitsT>(_M_value[0],
_M_traits,
_M_flags))));
return true;
}
if (_M_match_token(_ScannerT::_S_token_backref))
@ -374,26 +360,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
bool __ret = false;
if (_M_traits.isctype(__ch, _M_class_set))
__ret = true;
else if (_M_char_set.count(_M_translate(__ch)))
__ret = true;
else
{
__ch = _M_translate(__ch);
for (auto __c : _M_char_set)
if (__c == __ch)
_StringT __s = _M_get_str(_M_flags & regex_constants::collate
? _M_translate(__ch) : __ch);
for (auto& __it : _M_range_set)
if (__it.first <= __s && __s <= __it.second)
{
__ret = true;
break;
}
if (!__ret)
{
_StringT __s = _M_get_str(__ch);
for (auto& __it : _M_range_set)
if (__it.first <= __s && __s <= __it.second)
{
__ret = true;
break;
}
}
}
if (_M_is_non_matching)
return !__ret;

View File

@ -0,0 +1,52 @@
// { dg-options "-std=gnu++11" }
//
// 2013-09-02 Tim Shen <timshen91@gmail.com>
//
// Copyright (C) 2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
// 28.11.2 regex_match
// Tests ECMAScript "." against a std::string.
#include <regex>
#include <testsuite_hooks.h>
using namespace std;
void
test01()
{
bool test __attribute__((unused)) = true;
#define TEST(res, s) \
{\
regex re(res);\
string st(s);\
VERIFY(!regex_match(st, re));\
}
TEST(".", "\0");
TEST(".", "\n");
TEST(".", "\r");
}
int
main()
{
test01();
return 0;
}

View File

@ -1,7 +1,7 @@
// { dg-options "-std=gnu++11" }
//
// 2013-08-10 Tim Shen <timshen91@gmail.com>
// 2013-09-02 Tim Shen <timshen91@gmail.com>
//
// Copyright (C) 2013 Free Software Foundation, Inc.
//

View File

@ -1,7 +1,7 @@
// { dg-options "-std=gnu++11" }
//
// 2013-08-26 Tim Shen <timshen91@gmail.com>
// 2013-09-02 Tim Shen <timshen91@gmail.com>
//
// Copyright (C) 2013 Free Software Foundation, Inc.
//

View File

@ -1,7 +1,7 @@
// { dg-options "-std=gnu++11" }
//
// 2013-08-22 Tim Shen <timshen91@gmail.com>
// 2013-09-02 Tim Shen <timshen91@gmail.com>
//
// Copyright (C) 2013 Free Software Foundation, Inc.
//

View File

@ -1,7 +1,7 @@
// { dg-options "-std=gnu++11" }
//
// 2013-08-26 Tim Shen <timshen91@gmail.com>
// 2013-09-02 Tim Shen <timshen91@gmail.com>
//
// Copyright (C) 2013 Free Software Foundation, Inc.
//
@ -34,7 +34,6 @@ test01()
bool test __attribute__((unused)) = true;
VERIFY(regex_match(":", regex("\\x3a")));
VERIFY(regex_match(L"\u1234", wregex(L"\\u1234")));
try
{
regex("\\u400x");

View File

@ -0,0 +1,51 @@
// { dg-options "-std=gnu++11" }
//
// 2013-09-02 Tim Shen <timshen91@gmail.com>
//
// Copyright (C) 2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
// 28.11.2 regex_match
// Tests ECMAScript "." against a std::string.
#include <regex>
#include <testsuite_hooks.h>
using namespace std;
void
test01()
{
bool test __attribute__((unused)) = true;
#define TESTL(res, s) \
{\
wregex re(res);\
wstring st(s);\
VERIFY(!regex_match(st, re));\
}
TESTL(L".", L"\u2028");
TESTL(L".", L"\u2029");
}
int
main()
{
test01();
return 0;
}

View File

@ -0,0 +1,44 @@
// { dg-options "-std=gnu++11" }
//
// 2013-09-02 Tim Shen <timshen91@gmail.com>
//
// Copyright (C) 2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
// 28.11.2 regex_match
// Tests ECMAScript \x and \u.
#include <regex>
#include <testsuite_hooks.h>
using namespace std;
void
test01()
{
bool test __attribute__((unused)) = true;
VERIFY(regex_match(L"\u1234", wregex(L"\\u1234")));
}
int
main()
{
test01();
return 0;
}