regex_compiler.h (_AnyMatcher<>::_AnyMatcher(), [...]): Fix _AnyMatcher behavior of POSIX style and move _M_flags to template parameter...

2014-01-07  Tim Shen  <timshen91@gmail.com>

	* include/bits/regex_compiler.h (_AnyMatcher<>::_AnyMatcher(),
	_AnyMatcher<>::operator(), _AnyMatcher<>::_M_apply(),
	_CharMatcher<>::_CharMatcher(), _CharMatcher<>::_M_translate(),
	_BracketMatcher<>::_BracketMatcher(), _BracketMatcher<>::operator(),
	_BracketMatcher<>::_M_add_char(),
	_BracketMatcher<>::_M_add_collating_element(),
	_BracketMatcher<>::_M_add_equivalence_class(),
	_BracketMatcher<>::_M_add_character_class(),
	_BracketMatcher<>::_M_make_range(), _BracketMatcher<>::_M_ready(),
	_BracketMatcher<>::_M_apply(), _BracketMatcher<>::_M_make_cache()):
	Fix _AnyMatcher behavior of POSIX style and move _M_flags
	to template parameter; Add cache for _BracketMatcher. Adjust
	declarations from here...
	* include/bits/regex.h (basic_regex<>::imbue()): ...to here. Also,
	imbuing a regex will trigger a recompilation to rebuild the cache.
	* include/bits/regex_compiler.tcc (_Compiler<>::_M_atom(),
	_Compiler<>::_M_bracket_expression()): Adjust matchers' caller for
	different template bool parameters.
	* include/bits/regex_executor.h: Remove unnecessary declarations.
	* include/std/regex: Adjust including orders.
	* testsuite/28_regex/traits/char/user_defined.cc: New.
	* testsuite/28_regex/traits/wchar_t/user_defined.cc: New.

From-SVN: r206400
This commit is contained in:
Tim Shen 2014-01-07 19:17:50 +00:00 committed by Tim Shen
parent 8b2721dab0
commit f43cc2a68d
8 changed files with 362 additions and 81 deletions

View File

@ -1,3 +1,28 @@
2014-01-07 Tim Shen <timshen91@gmail.com>
* include/bits/regex_compiler.h (_AnyMatcher<>::_AnyMatcher(),
_AnyMatcher<>::operator(), _AnyMatcher<>::_M_apply(),
_CharMatcher<>::_CharMatcher(), _CharMatcher<>::_M_translate(),
_BracketMatcher<>::_BracketMatcher(), _BracketMatcher<>::operator(),
_BracketMatcher<>::_M_add_char(),
_BracketMatcher<>::_M_add_collating_element(),
_BracketMatcher<>::_M_add_equivalence_class(),
_BracketMatcher<>::_M_add_character_class(),
_BracketMatcher<>::_M_make_range(), _BracketMatcher<>::_M_ready(),
_BracketMatcher<>::_M_apply(), _BracketMatcher<>::_M_make_cache()):
Fix _AnyMatcher behavior of POSIX style and move _M_flags
to template parameter; Add cache for _BracketMatcher. Adjust
declarations from here...
* include/bits/regex.h (basic_regex<>::imbue()): ...to here. Also,
imbuing a regex will trigger a recompilation to rebuild the cache.
* include/bits/regex_compiler.tcc (_Compiler<>::_M_atom(),
_Compiler<>::_M_bracket_expression()): Adjust matchers' caller for
different template bool parameters.
* include/bits/regex_executor.h: Remove unnecessary declarations.
* include/std/regex: Adjust including orders.
* testsuite/28_regex/traits/char/user_defined.cc: New.
* testsuite/28_regex/traits/wchar_t/user_defined.cc: New.
2014-01-07 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
* config/abi/post/solaris2.9/baseline_symbols.txt: Regenerate.

View File

@ -30,6 +30,15 @@
namespace std _GLIBCXX_VISIBILITY(default)
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename, typename>
class basic_regex;
template<typename, typename>
class match_results;
_GLIBCXX_END_NAMESPACE_VERSION
namespace __detail
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
@ -48,6 +57,56 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
const basic_regex<_CharT, _TraitsT>& __re,
regex_constants::match_flag_type __flags);
template<typename, typename, typename, bool>
class _Executor;
template<typename _Tp>
struct __has_contiguous_iter : std::false_type { };
template<typename _Ch, typename _Tr, typename _Alloc>
struct __has_contiguous_iter<std::basic_string<_Ch, _Tr, _Alloc>>
: std::true_type // string<Ch> storage is contiguous
{ };
template<typename _Tp, typename _Alloc>
struct __has_contiguous_iter<std::vector<_Tp, _Alloc>>
: std::true_type // vector<Tp> storage is contiguous
{ };
template<typename _Alloc>
struct __has_contiguous_iter<std::vector<bool, _Alloc>>
: std::false_type // vector<bool> storage is not contiguous
{ };
template<typename _Tp>
struct __is_contiguous_normal_iter : std::false_type { };
template<typename _Tp, typename _Cont>
struct
__is_contiguous_normal_iter<__gnu_cxx::__normal_iterator<_Tp, _Cont>>
: __has_contiguous_iter<_Cont>::type
{ };
template<typename _Iter, typename _TraitsT>
using __enable_if_contiguous_normal_iter
= typename enable_if< __is_contiguous_normal_iter<_Iter>::value,
std::shared_ptr<_NFA<_TraitsT>> >::type;
template<typename _Iter, typename _TraitsT>
using __disable_if_contiguous_normal_iter
= typename enable_if< !__is_contiguous_normal_iter<_Iter>::value,
std::shared_ptr<_NFA<_TraitsT>> >::type;
template<typename _FwdIter, typename _TraitsT>
__disable_if_contiguous_normal_iter<_FwdIter, _TraitsT>
__compile_nfa(_FwdIter __first, _FwdIter __last, const _TraitsT& __traits,
regex_constants::syntax_option_type __flags);
template<typename _Iter, typename _TraitsT>
__enable_if_contiguous_normal_iter<_Iter, _TraitsT>
__compile_nfa(_Iter __first, _Iter __last, const _TraitsT& __traits,
regex_constants::syntax_option_type __flags);
_GLIBCXX_END_NAMESPACE_VERSION
}
@ -501,6 +560,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
basic_regex(_FwdIter __first, _FwdIter __last,
flag_type __f = ECMAScript)
: _M_flags(__f),
_M_original_str(__first, __last),
_M_automaton(__detail::__compile_nfa(__first, __last, _M_traits,
_M_flags))
{ }
@ -637,6 +697,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
flag_type __flags = ECMAScript)
{
_M_flags = __flags;
_M_original_str.assign(__s.begin(), __s.end());
_M_automaton = __detail::__compile_nfa(__s.begin(), __s.end(),
_M_traits, _M_flags);
return *this;
@ -701,7 +762,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
*/
locale_type
imbue(locale_type __loc)
{ return _M_traits.imbue(__loc); }
{
auto __ret = _M_traits.imbue(__loc);
this->assign(_M_original_str, _M_flags);
return __ret;
}
/**
* @brief Gets the locale currently imbued in the regular expression
@ -744,9 +809,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename, typename, typename, bool>
friend class __detail::_Executor;
flag_type _M_flags;
_Rx_traits _M_traits;
_AutomatonPtr _M_automaton;
flag_type _M_flags;
_Rx_traits _M_traits;
basic_string<_Ch_type> _M_original_str;
_AutomatonPtr _M_automaton;
};
/** @brief Standard regular expressions. */

View File

@ -129,43 +129,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_StackT _M_stack;
};
template<typename _Tp>
struct __has_contiguous_iter : std::false_type { };
template<typename _Ch, typename _Tr, typename _Alloc>
struct __has_contiguous_iter<std::basic_string<_Ch, _Tr, _Alloc>>
: std::true_type // string<Ch> storage is contiguous
{ };
template<typename _Tp, typename _Alloc>
struct __has_contiguous_iter<std::vector<_Tp, _Alloc>>
: std::true_type // vector<Tp> storage is contiguous
{ };
template<typename _Alloc>
struct __has_contiguous_iter<std::vector<bool, _Alloc>>
: std::false_type // vector<bool> storage is not contiguous
{ };
template<typename _Tp>
struct __is_contiguous_normal_iter : std::false_type { };
template<typename _Tp, typename _Cont>
struct
__is_contiguous_normal_iter<__gnu_cxx::__normal_iterator<_Tp, _Cont>>
: __has_contiguous_iter<_Cont>::type
{ };
template<typename _Iter, typename _TraitsT>
using __enable_if_contiguous_normal_iter
= typename enable_if< __is_contiguous_normal_iter<_Iter>::value,
std::shared_ptr<_NFA<_TraitsT>> >::type;
template<typename _Iter, typename _TraitsT>
using __disable_if_contiguous_normal_iter
= typename enable_if< !__is_contiguous_normal_iter<_Iter>::value,
std::shared_ptr<_NFA<_TraitsT>> >::type;
template<typename _FwdIter, typename _TraitsT>
inline __disable_if_contiguous_normal_iter<_FwdIter, _TraitsT>
__compile_nfa(_FwdIter __first, _FwdIter __last, const _TraitsT& __traits,
@ -185,7 +148,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return __compile_nfa(__cfirst, __cfirst + __len, __traits, __flags);
}
template<typename _TraitsT>
template<typename _TraitsT, bool __is_ecma>
struct _AnyMatcher
{
typedef typename _TraitsT::char_type _CharT;
@ -197,25 +160,55 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
bool
operator()(_CharT __ch) const
{ return _M_apply(__ch, typename is_same<_CharT, char>::type()); }
bool
_M_apply(_CharT __ch, true_type) const
{
return _M_traits.translate(__ch) != '\n'
&& _M_traits.translate(__ch) != '\r'
&& _M_traits.translate(__ch) != u'\u2028'
&& _M_traits.translate(__ch) != u'\u2029';
auto __c = _M_traits.translate(__ch);
if (__is_ecma)
{
static auto __n = _M_traits.translate('\n');
static auto __r = _M_traits.translate('\r');
return __c != __n && __c != __r;
}
else
{
static auto __nul = _M_traits.translate('\0');
return __c != __nul;
}
}
bool
_M_apply(_CharT __ch, false_type) const
{
auto __c = _M_traits.translate(__ch);
if (__is_ecma)
{
static auto __n = _M_traits.translate('\n');
static auto __r = _M_traits.translate('\r');
static auto __u2028 = _M_traits.translate(u'\u2028');
static auto __u2029 = _M_traits.translate(u'\u2029');
return __c != __n && __c != __r && __c != __u2028
&& __c != __u2029;
}
else
{
static auto __nul = _M_traits.translate('\0');
return __c != __nul;
}
}
const _TraitsT& _M_traits;
};
template<typename _TraitsT>
template<typename _TraitsT, bool __icase>
struct _CharMatcher
{
typedef typename _TraitsT::char_type _CharT;
typedef regex_constants::syntax_option_type _FlagT;
explicit
_CharMatcher(_CharT __ch, const _TraitsT& __traits, _FlagT __flags)
: _M_traits(__traits), _M_flags(__flags), _M_ch(_M_translate(__ch))
_CharMatcher(_CharT __ch, const _TraitsT& __traits)
: _M_traits(__traits), _M_ch(_M_translate(__ch))
{ }
bool
@ -225,44 +218,56 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_CharT
_M_translate(_CharT __ch) const
{
if (_M_flags & regex_constants::icase)
if (__icase)
return _M_traits.translate_nocase(__ch);
else
return _M_traits.translate(__ch);
}
const _TraitsT& _M_traits;
_FlagT _M_flags;
_CharT _M_ch;
};
/// Matches a character range (bracket expression)
// TODO: Convert used _M_flags fields to template parameters, including
// collate and icase. Avoid using std::set, could use flat_set
// (sorted vector and binary search) instead; use an fixed sized (256)
// vector<bool> for char specialization if necessary.
// (sorted vector and binary search) instead.
template<typename _TraitsT>
struct _BracketMatcher
{
public:
typedef typename _TraitsT::char_type _CharT;
typedef typename _TraitsT::char_class_type _CharClassT;
typedef typename _TraitsT::string_type _StringT;
typedef regex_constants::syntax_option_type _FlagT;
explicit
public:
_BracketMatcher(bool __is_non_matching,
const _TraitsT& __traits,
_FlagT __flags)
: _M_traits(__traits), _M_class_set(0), _M_flags(__flags),
:
#ifdef _GLIBCXX_DEBUG
_M_is_ready(false),
#endif
_M_traits(__traits), _M_class_set(0), _M_flags(__flags),
_M_is_non_matching(__is_non_matching)
{ }
bool
operator()(_CharT) const;
operator()(_CharT __ch) const
{
_GLIBCXX_DEBUG_ASSERT(_M_is_ready);
return _M_apply(__ch, _IsChar());
}
void
_M_add_char(_CharT __c)
{ _M_char_set.insert(_M_translate(__c)); }
{
_M_char_set.insert(_M_translate(__c));
#ifdef _GLIBCXX_DEBUG
_M_is_ready = false;
#endif
}
void
_M_add_collating_element(const _StringT& __s)
@ -272,6 +277,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
if (__st.empty())
__throw_regex_error(regex_constants::error_collate);
_M_char_set.insert(_M_translate(__st[0]));
#ifdef _GLIBCXX_DEBUG
_M_is_ready = false;
#endif
}
void
@ -284,6 +292,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__st = _M_traits.transform_primary(__st.data(),
__st.data() + __st.size());
_M_equiv_set.insert(__st);
#ifdef _GLIBCXX_DEBUG
_M_is_ready = false;
#endif
}
void
@ -295,6 +306,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
if (__mask == 0)
__throw_regex_error(regex_constants::error_ctype);
_M_class_set |= __mask;
#ifdef _GLIBCXX_DEBUG
_M_is_ready = false;
#endif
}
void
@ -306,8 +320,36 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_get_str(_M_translate(__r))));
else
_M_range_set.insert(make_pair(_M_get_str(__l), _M_get_str(__r)));
#ifdef _GLIBCXX_DEBUG
_M_is_ready = false;
#endif
}
void
_M_ready()
{
_M_make_cache(_IsChar());
#ifdef _GLIBCXX_DEBUG
_M_is_ready = true;
#endif
}
private:
typedef typename is_same<_CharT, char>::type _IsChar;
struct _Dummy { };
typedef typename conditional<_IsChar::value,
std::bitset<1 << (8 * sizeof(_CharT))>,
_Dummy>::type _CacheT;
typedef typename make_unsigned<_CharT>::type _UnsignedCharT;
private:
bool
_M_apply(_CharT __ch, false_type) const;
bool
_M_apply(_CharT __ch, true_type) const
{ return _M_cache[static_cast<_UnsignedCharT>(__ch)]; }
_CharT
_M_translate(_CharT __c) const
{
@ -328,6 +370,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return _M_traits.transform(__s.begin(), __s.end());
}
void
_M_make_cache(true_type)
{
for (int __i = 0; __i < _M_cache.size(); __i++)
_M_cache[static_cast<_UnsignedCharT>(__i)] =
_M_apply(__i, false_type());
}
void
_M_make_cache(false_type)
{ }
private:
_CacheT _M_cache;
std::set<_CharT> _M_char_set;
std::set<_StringT> _M_equiv_set;
std::set<pair<_StringT, _StringT>> _M_range_set;
@ -335,6 +391,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_CharClassT _M_class_set;
_FlagT _M_flags;
bool _M_is_non_matching;
#ifdef _GLIBCXX_DEBUG
bool _M_is_ready;
#endif
};
//@} regex-detail

View File

@ -284,15 +284,33 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_atom()
{
if (_M_match_token(_ScannerT::_S_token_anychar))
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher
(_AnyMatcher<_TraitsT>(_M_traits))));
{
if (_M_flags & regex_constants::ECMAScript)
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher
(_AnyMatcher<_TraitsT,
true>(_M_traits))));
else
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher
(_AnyMatcher<_TraitsT,
false>(_M_traits))));
}
else if (_M_try_char())
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher
(_CharMatcher<_TraitsT>(_M_value[0],
_M_traits,
_M_flags))));
{
if (_M_flags & regex_constants::icase)
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher
(_CharMatcher<_TraitsT,
true>(_M_value[0],
_M_traits))));
else
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher
(_CharMatcher<_TraitsT,
false>(_M_value[0],
_M_traits))));
}
else if (_M_match_token(_ScannerT::_S_token_backref))
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa.
_M_insert_backref(_M_cur_int_value(10))));
@ -302,6 +320,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_BMatcherT __matcher(_M_ctype.is(_CtypeT::upper, _M_value[0]),
_M_traits, _M_flags);
__matcher._M_add_character_class(_M_value);
__matcher._M_ready();
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher(std::move(__matcher))));
}
@ -341,6 +360,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_BMatcherT __matcher(__neg, _M_traits, _M_flags);
while (!_M_match_token(_ScannerT::_S_token_bracket_end))
_M_expression_term(__matcher);
__matcher._M_ready();
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher(std::move(__matcher))));
return true;
@ -432,7 +452,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename _TraitsT>
bool
_BracketMatcher<_TraitsT>::operator()(_CharT __ch) const
_BracketMatcher<_TraitsT>::_M_apply(_CharT __ch, false_type) const
{
bool __ret = false;
if (_M_traits.isctype(__ch, _M_class_set)

View File

@ -32,17 +32,6 @@
namespace std _GLIBCXX_VISIBILITY(default)
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename, typename>
class basic_regex;
template<typename>
class sub_match;
template<typename, typename>
class match_results;
_GLIBCXX_END_NAMESPACE_VERSION
namespace __detail
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION

View File

@ -56,11 +56,11 @@
#include <bits/regex_constants.h>
#include <bits/regex_error.h>
#include <bits/regex_scanner.h>
#include <bits/regex_automaton.h>
#include <bits/regex.h>
#include <bits/regex_scanner.h>
#include <bits/regex_compiler.h>
#include <bits/regex_executor.h>
#include <bits/regex.h>
#endif // C++11

View File

@ -0,0 +1,60 @@
// { dg-options "-std=gnu++11" }
// { dg-do run }
//
// 2014-01-07 Tim Shen <timshen91@gmail.com>
//
// Copyright (C) 2010-2014 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
// 28.3 Requirements [re.req]
// 28.2 (4) Table 127 - Regular expression traits class requirements
// 28.7 Class template regex_traits [re.traits]
#include <regex>
#include <string>
#include <testsuite_hooks.h>
using namespace std;
template<typename CharT>
class MyRegexTraits
: public regex_traits<CharT>
{
public:
CharT
translate(CharT c) const
{
return c+1;
}
};
void
test01()
{
bool test __attribute__((unused)) = true;
basic_regex<char, MyRegexTraits<char>> re(".");
VERIFY(!regex_match("\n", re));
VERIFY(!regex_match("\r", re));
}
int main()
{
test01();
return 0;
}

View File

@ -0,0 +1,62 @@
// { dg-options "-std=gnu++11" }
// { dg-do run }
//
// 2014-01-07 Tim Shen <timshen91@gmail.com>
//
// Copyright (C) 2010-2014 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
// 28.3 Requirements [re.req]
// 28.2 (4) Table 127 - Regular expression traits class requirements
// 28.7 Class template regex_traits [re.traits]
#include <regex>
#include <string>
#include <testsuite_hooks.h>
using namespace std;
template<typename CharT>
class MyRegexTraits
: public regex_traits<CharT>
{
public:
CharT
translate(CharT c) const
{
return c+1;
}
};
void
test01()
{
bool test __attribute__((unused)) = true;
basic_regex<wchar_t, MyRegexTraits<wchar_t>> re(L".");
VERIFY(!regex_match(L"\n", re));
VERIFY(!regex_match(L"\r", re));
VERIFY(!regex_match(L"\u2028", re));
VERIFY(!regex_match(L"\u2029", re));
}
int main()
{
test01();
return 0;
}