regex.h: Add friend classes.

2013-09-18  Tim Shen  <timshen91@gmail.com>

	* include/bits/regex.h: Add friend classes.
	(match_results<>::position, regex_iterator<>::operator++):
	Implement position specification in regex_iterator.
	(regex_match<>, regex_search<>):
	Move match_results initializations to these function. Remove `todo`.
	* include/bits/regex_compiler.tcc:
	(_Compiler<>::_M_quantifier): Fix greedy/ungreedy of interval matching.
	* include/bits/regex_constants.h:
	Fix indentation. Change match_flag_type to enum type.
	* include/bits/regex_executor.h:
	Merge identical code to the base class _Executor.
	Support flags in regex_constants.
	* include/bits/regex_executor.tcc: Likewise.
	* include/bits/regex_scanner.h: Add comments.
	* include/bits/regex_scanner.tcc: Same.
	* testsuite/28_regex/algorithms/regex_search/ecma/assertion.cc:
	Add a testcase.
	* testsuite/28_regex/algorithms/regex_search/ecma/flags.cc: New.
	* testsuite/28_regex/iterators/regex_iterator/char/
	string_position_01.cc: Remove `xfail`.
	* testsuite/28_regex/iterators/regex_iterator/wchar_t/string_02.cc:
	Remove `xfail` and make the case really work.

From-SVN: r202706
This commit is contained in:
Tim Shen 2013-09-18 15:56:20 +00:00 committed by Tim Shen
parent 64bc8861e9
commit b21abceec3
12 changed files with 637 additions and 401 deletions

View File

@ -1,3 +1,28 @@
2013-09-18 Tim Shen <timshen91@gmail.com>
* include/bits/regex.h: Add friend classes.
(match_results<>::position, regex_iterator<>::operator++):
Implement position specification in regex_iterator.
(regex_match<>, regex_search<>):
Move match_results initializations to these function. Remove `todo`.
* include/bits/regex_compiler.tcc:
(_Compiler<>::_M_quantifier): Fix greedy/ungreedy of interval matching.
* include/bits/regex_constants.h:
Fix indentation. Change match_flag_type to enum type.
* include/bits/regex_executor.h:
Merge identical code to the base class _Executor.
Support flags in regex_constants.
* include/bits/regex_executor.tcc: Likewise.
* include/bits/regex_scanner.h: Add comments.
* include/bits/regex_scanner.tcc: Same.
* testsuite/28_regex/algorithms/regex_search/ecma/assertion.cc:
Add a testcase.
* testsuite/28_regex/algorithms/regex_search/ecma/flags.cc: New.
* testsuite/28_regex/iterators/regex_iterator/char/
string_position_01.cc: Remove `xfail`.
* testsuite/28_regex/iterators/regex_iterator/wchar_t/string_02.cc:
Remove `xfail` and make the case really work.
2013-09-18 Paolo Carlini <paolo.carlini@oracle.com> 2013-09-18 Paolo Carlini <paolo.carlini@oracle.com>
* testsuite/performance/25_algorithms/search_n.cc: Fix typo. * testsuite/performance/25_algorithms/search_n.cc: Fix typo.

View File

@ -1004,6 +1004,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
const basic_regex<_Cp, _Rp>&, const basic_regex<_Cp, _Rp>&,
regex_constants::match_flag_type); regex_constants::match_flag_type);
template<typename, typename, typename, typename>
friend class __detail::_Executor;
template<typename, typename, typename, typename>
friend class __detail::_DFSExecutor;
template<typename, typename, typename, typename>
friend class __detail::_BFSExecutor;
flag_type _M_flags; flag_type _M_flags;
_Rx_traits _M_traits; _Rx_traits _M_traits;
_AutomatonPtr _M_automaton; _AutomatonPtr _M_automaton;
@ -1783,21 +1792,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
*/ */
explicit explicit
match_results(const _Alloc& __a = _Alloc()) match_results(const _Alloc& __a = _Alloc())
: _Base_type(__a) : _Base_type(__a), _M_in_iterator(false)
{ } { }
/** /**
* @brief Copy constructs a %match_results. * @brief Copy constructs a %match_results.
*/ */
match_results(const match_results& __rhs) match_results(const match_results& __rhs)
: _Base_type(__rhs) : _Base_type(__rhs), _M_in_iterator(false)
{ } { }
/** /**
* @brief Move constructs a %match_results. * @brief Move constructs a %match_results.
*/ */
match_results(match_results&& __rhs) noexcept match_results(match_results&& __rhs) noexcept
: _Base_type(std::move(__rhs)) : _Base_type(std::move(__rhs)), _M_in_iterator(false)
{ } { }
/** /**
@ -1905,6 +1914,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
difference_type difference_type
position(size_type __sub = 0) const position(size_type __sub = 0) const
{ {
// [28.12.1.4.5]
if (_M_in_iterator)
return __sub < size() ? std::distance(_M_begin,
(*this)[__sub].first) : -1;
else
return __sub < size() ? std::distance(this->prefix().first, return __sub < size() ? std::distance(this->prefix().first,
(*this)[__sub].first) : -1; (*this)[__sub].first) : -1;
} }
@ -2106,6 +2120,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename, typename, typename, typename> template<typename, typename, typename, typename>
friend class __detail::_BFSExecutor; friend class __detail::_BFSExecutor;
template<typename, typename, typename>
friend class regex_iterator;
template<typename _Bp, typename _Ap, template<typename _Bp, typename _Ap,
typename _Ch_type, typename _Rx_traits> typename _Ch_type, typename _Rx_traits>
friend bool friend bool
@ -2121,6 +2138,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
const basic_regex<_Ch_type, const basic_regex<_Ch_type,
_Rx_traits>&, _Rx_traits>&,
regex_constants::match_flag_type); regex_constants::match_flag_type);
_Bi_iter _M_begin;
bool _M_in_iterator;
}; };
typedef match_results<const char*> cmatch; typedef match_results<const char*> cmatch;
@ -2200,8 +2220,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* @retval false Otherwise. * @retval false Otherwise.
* *
* @throws an exception of type regex_error. * @throws an exception of type regex_error.
*
* @todo Implement this function.
*/ */
template<typename _Bi_iter, typename _Alloc, template<typename _Bi_iter, typename _Alloc,
typename _Ch_type, typename _Rx_traits> typename _Ch_type, typename _Rx_traits>
@ -2215,6 +2233,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ {
if (__re._M_automaton == nullptr) if (__re._M_automaton == nullptr)
return false; return false;
auto __size = __re._M_automaton->_M_sub_count();
__size += 2;
__m.resize(__size);
for (decltype(__size) __i = 0; __i < __size; ++__i)
__m.at(__i).matched = false;
if (__detail::__get_executor(__s, __e, __m, __re, __flags)->_M_match()) if (__detail::__get_executor(__s, __e, __m, __re, __flags)->_M_match())
{ {
for (auto __it : __m) for (auto __it : __m)
@ -2360,8 +2385,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* undefined. * undefined.
* *
* @throws an exception of type regex_error. * @throws an exception of type regex_error.
*
* @todo Implement this function.
*/ */
template<typename _Bi_iter, typename _Alloc, template<typename _Bi_iter, typename _Alloc,
typename _Ch_type, typename _Rx_traits> typename _Ch_type, typename _Rx_traits>
@ -2374,6 +2397,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ {
if (__re._M_automaton == nullptr) if (__re._M_automaton == nullptr)
return false; return false;
auto __size = __re._M_automaton->_M_sub_count();
__size += 2;
__m.resize(__size);
for (decltype(__size) __i = 0; __i < __size; ++__i)
__m.at(__i).matched = false;
if (__detail::__get_executor(__first, __last, __m, __re, __flags) if (__detail::__get_executor(__first, __last, __m, __re, __flags)
->_M_search()) ->_M_search())
{ {
@ -2677,7 +2707,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
operator++() operator++()
{ {
// FIXME: In all cases in which the call to regex_search returns true, // In all cases in which the call to regex_search returns true,
// match.prefix().first shall be equal to the previous value of // match.prefix().first shall be equal to the previous value of
// match[0].second, and for each index i in the half-open range // match[0].second, and for each index i in the half-open range
// [0, match.size()) for which match[i].matched is true, // [0, match.size()) for which match[i].matched is true,
@ -2697,12 +2727,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags
| regex_constants::match_not_null | regex_constants::match_not_null
| regex_constants::match_continuous)) | regex_constants::match_continuous))
{
_M_match._M_in_iterator = true;
_M_match._M_begin = _M_begin;
return *this; return *this;
}
else else
++__start; ++__start;
} }
_M_flags |= regex_constants::match_prev_avail; _M_flags |= regex_constants::match_prev_avail;
if (!regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags)) if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
{
_M_match._M_in_iterator = true;
_M_match._M_begin = _M_begin;
}
else
_M_match = value_type(); _M_match = value_type();
} }
return *this; return *this;

View File

@ -28,7 +28,7 @@
* Do not attempt to use it directly. @headername{regex} * Do not attempt to use it directly. @headername{regex}
*/ */
// TODO make comments doxygen format. // FIXME make comments doxygen format.
// This compiler refers to "Regular Expression Matching Can Be Simple And Fast" // This compiler refers to "Regular Expression Matching Can Be Simple And Fast"
// (http://swtch.com/~rsc/regexp/regexp1.html"), // (http://swtch.com/~rsc/regexp/regexp1.html"),
@ -223,16 +223,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
if (__n < 0) if (__n < 0)
__throw_regex_error(regex_constants::error_badbrace); __throw_regex_error(regex_constants::error_badbrace);
auto __end = _M_nfa._M_insert_dummy(); auto __end = _M_nfa._M_insert_dummy();
// _M_alt is the "match more" branch, and _M_next is the
// "match less" one. Switch _M_alt and _M_next of all created
// nodes. This is a hacking but IMO works well.
std::stack<_StateIdT> __stack;
for (int __i = 0; __i < __n; ++__i) for (int __i = 0; __i < __n; ++__i)
{ {
auto __tmp = __r._M_clone(); auto __tmp = __r._M_clone();
__e._M_append auto __alt = _M_nfa._M_insert_alt(__tmp._M_start,
(_StateSeqT(_M_nfa, __end, __neg);
_M_nfa._M_insert_alt(__tmp._M_start, __stack.push(__alt);
__end, __neg), __e._M_append(_StateSeqT(_M_nfa, __alt, __tmp._M_end));
__tmp._M_end));
} }
__e._M_append(__end); __e._M_append(__end);
while (!__stack.empty())
{
auto& __tmp = _M_nfa[__stack.top()];
__stack.pop();
swap(__tmp._M_next, __tmp._M_alt);
}
} }
else // {3,} else // {3,}
{ {

View File

@ -233,61 +233,61 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* perform bitwise operations on these values and expect the right thing to * perform bitwise operations on these values and expect the right thing to
* happen. * happen.
*/ */
typedef std::bitset<_S_match_flag_last> match_flag_type; enum match_flag_type : unsigned int
{
/** /**
* The default matching rules. * The default matching rules.
*/ */
constexpr match_flag_type match_default = 0; match_default = 0,
/** /**
* The first character in the sequence [first, last) is treated as though it * The first character in the sequence [first, last) is treated as though it
* is not at the beginning of a line, so the character (^) in the regular * is not at the beginning of a line, so the character (^) in the regular
* expression shall not match [first, first). * expression shall not match [first, first).
*/ */
constexpr match_flag_type match_not_bol = 1 << _S_not_bol; match_not_bol = 1 << _S_not_bol,
/** /**
* The last character in the sequence [first, last) is treated as though it * The last character in the sequence [first, last) is treated as though it
* is not at the end of a line, so the character ($) in the regular * is not at the end of a line, so the character ($) in the regular
* expression shall not match [last, last). * expression shall not match [last, last).
*/ */
constexpr match_flag_type match_not_eol = 1 << _S_not_eol; match_not_eol = 1 << _S_not_eol,
/** /**
* The expression \\b is not matched against the sub-sequence * The expression \\b is not matched against the sub-sequence
* [first,first). * [first,first).
*/ */
constexpr match_flag_type match_not_bow = 1 << _S_not_bow; match_not_bow = 1 << _S_not_bow,
/** /**
* The expression \\b should not be matched against the sub-sequence * The expression \\b should not be matched against the sub-sequence
* [last,last). * [last,last).
*/ */
constexpr match_flag_type match_not_eow = 1 << _S_not_eow; match_not_eow = 1 << _S_not_eow,
/** /**
* If more than one match is possible then any match is an acceptable * If more than one match is possible then any match is an acceptable
* result. * result.
*/ */
constexpr match_flag_type match_any = 1 << _S_any; match_any = 1 << _S_any,
/** /**
* The expression does not match an empty sequence. * The expression does not match an empty sequence.
*/ */
constexpr match_flag_type match_not_null = 1 << _S_not_null; match_not_null = 1 << _S_not_null,
/** /**
* The expression only matches a sub-sequence that begins at first . * The expression only matches a sub-sequence that begins at first .
*/ */
constexpr match_flag_type match_continuous = 1 << _S_continuous; match_continuous = 1 << _S_continuous,
/** /**
* --first is a valid iterator position. When this flag is set then the * --first is a valid iterator position. When this flag is set then the
* flags match_not_bol and match_not_bow are ignored by the regular * flags match_not_bol and match_not_bow are ignored by the regular
* expression algorithms 28.11 and iterators 28.12. * expression algorithms 28.11 and iterators 28.12.
*/ */
constexpr match_flag_type match_prev_avail = 1 << _S_prev_avail; match_prev_avail = 1 << _S_prev_avail,
/** /**
* When a regular expression match is to be replaced by a new string, the * When a regular expression match is to be replaced by a new string, the
@ -315,7 +315,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* undefined, use the empty string instead. If * undefined, use the empty string instead. If
* nn > match_results::size(), the result is implementation-defined. * nn > match_results::size(), the result is implementation-defined.
*/ */
constexpr match_flag_type format_default = 0; format_default = 0,
/** /**
* When a regular expression match is to be replaced by a new string, the * When a regular expression match is to be replaced by a new string, the
@ -323,20 +323,58 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* in IEEE Std 1003.1- 2001 [IEEE, Information Technology -- Portable * in IEEE Std 1003.1- 2001 [IEEE, Information Technology -- Portable
* Operating System Interface (POSIX), IEEE Standard 1003.1-2001]. * Operating System Interface (POSIX), IEEE Standard 1003.1-2001].
*/ */
constexpr match_flag_type format_sed = 1 << _S_sed; format_sed = 1 << _S_sed,
/** /**
* During a search and replace operation, sections of the character * During a search and replace operation, sections of the character
* container sequence being searched that do not match the regular * container sequence being searched that do not match the regular
* expression shall not be copied to the output string. * expression shall not be copied to the output string.
*/ */
constexpr match_flag_type format_no_copy = 1 << _S_no_copy; format_no_copy = 1 << _S_no_copy,
/** /**
* When specified during a search and replace operation, only the first * When specified during a search and replace operation, only the first
* occurrence of the regular expression shall be replaced. * occurrence of the regular expression shall be replaced.
*/ */
constexpr match_flag_type format_first_only = 1 << _S_first_only; format_first_only = 1 << _S_first_only,
};
constexpr inline match_flag_type
operator&(match_flag_type __a, match_flag_type __b)
{
return (match_flag_type)(static_cast<unsigned int>(__a)
& static_cast<unsigned int>(__b));
}
constexpr inline match_flag_type
operator|(match_flag_type __a, match_flag_type __b)
{
return (match_flag_type)(static_cast<unsigned int>(__a)
| static_cast<unsigned int>(__b));
}
constexpr inline match_flag_type
operator^(match_flag_type __a, match_flag_type __b)
{
return (match_flag_type)(static_cast<unsigned int>(__a)
^ static_cast<unsigned int>(__b));
}
constexpr inline match_flag_type
operator~(match_flag_type __a)
{ return (match_flag_type)(~static_cast<unsigned int>(__a)); }
inline match_flag_type&
operator&=(match_flag_type& __a, match_flag_type __b)
{ return __a = __a & __b; }
inline match_flag_type&
operator|=(match_flag_type& __a, match_flag_type __b)
{ return __a = __a | __b; }
inline match_flag_type&
operator^=(match_flag_type& __a, match_flag_type __b)
{ return __a = __a ^ __b; }
//@} //@}

View File

@ -28,7 +28,11 @@
* Do not attempt to use it directly. @headername{regex} * Do not attempt to use it directly. @headername{regex}
*/ */
// TODO: convert comments to doxygen format. // FIXME convert comments to doxygen format.
// TODO Put _DFSExecutor and _BFSExecutor into one class. They are becoming
// much more similar. Also, make grouping seperated. The
// regex_constants::nosubs enables much more simpler execution.
namespace std _GLIBCXX_VISIBILITY(default) namespace std _GLIBCXX_VISIBILITY(default)
{ {
@ -57,55 +61,107 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
class _Executor class _Executor
{ {
public: public:
typedef basic_regex<_CharT, _TraitsT> _RegexT;
typedef match_results<_BiIter, _Alloc> _ResultsT; typedef match_results<_BiIter, _Alloc> _ResultsT;
typedef std::vector<sub_match<_BiIter>, _Alloc> _ResultsVec; typedef std::vector<sub_match<_BiIter>, _Alloc> _ResultsVec;
typedef regex_constants::match_flag_type _FlagT; typedef regex_constants::match_flag_type _FlagT;
virtual
~_Executor()
{ }
// Set matched when string exactly match the pattern.
virtual bool
_M_match() = 0;
// Set matched when some prefix of the string matches the pattern.
virtual bool
_M_search() = 0;
protected:
typedef typename _NFA<_CharT, _TraitsT>::_SizeT _SizeT;
typedef typename _TraitsT::char_class_type _ClassT; typedef typename _TraitsT::char_class_type _ClassT;
public:
_Executor(_BiIter __begin, _Executor(_BiIter __begin,
_BiIter __end, _BiIter __end,
_ResultsT& __results, _ResultsT& __results,
_FlagT __flags, const _RegexT& __re,
_SizeT __size, _FlagT __flags)
const _TraitsT& __traits) : _M_begin(__begin),
: _M_current(__begin), _M_begin(__begin), _M_end(__end), _M_end(__end),
_M_results(__results), _M_flags(__flags), _M_traits(__traits) _M_results(__results),
_M_re(__re),
_M_flags(__flags)
{ }
// Set matched when string exactly match the pattern.
bool
_M_match()
{ {
__size += 2; _M_match_mode = true;
_M_results.resize(__size); _M_init(_M_begin);
for (_SizeT __i = 0; __i < __size; ++__i) return _M_main();
_M_results[__i].matched = false; }
// Set matched when some prefix of the string matches the pattern.
bool
_M_search_from_first()
{
_M_match_mode = false;
_M_init(_M_begin);
return _M_main();
} }
bool bool
_M_is_word(_CharT __ch) _M_search()
{
if (_M_flags & regex_constants::match_continuous)
return _M_search_from_first();
auto __cur = _M_begin;
do
{
_M_match_mode = false;
_M_init(__cur);
if (_M_main())
return true;
}
// Continue when __cur == _M_end
while (__cur++ != _M_end);
return false;
}
bool
_M_is_word(_CharT __ch) const
{ {
static const _CharT __s = 'w'; static const _CharT __s = 'w';
return _M_traits.isctype(__ch, return _M_re._M_traits.isctype
_M_traits.lookup_classname(&__s, &__s+1)); (__ch, _M_re._M_traits.lookup_classname(&__s, &__s+1));
} }
bool
_M_at_begin() const
{
return _M_current == _M_begin
&& !(_M_flags & (regex_constants::match_not_bol
| regex_constants::match_prev_avail));
}
bool
_M_at_end() const
{
return _M_current == _M_end
&& !(_M_flags & regex_constants::match_not_eol);
}
bool
_M_word_boundry(_State<_CharT, _TraitsT> __state) const;
bool
_M_lookahead(_State<_CharT, _TraitsT> __state) const;
public:
virtual void
_M_init(_BiIter __cur) = 0;
virtual void
_M_set_start(_StateIdT __start) = 0;
virtual bool
_M_main() = 0;
_BiIter _M_current; _BiIter _M_current;
const _BiIter _M_begin; const _BiIter _M_begin;
const _BiIter _M_end; const _BiIter _M_end;
_ResultsVec& _M_results; const _RegexT& _M_re;
const _TraitsT& _M_traits; _ResultsT& _M_results;
_FlagT _M_flags; const _FlagT _M_flags;
bool _M_match_mode;
}; };
// A _DFSExecutor perform a DFS on given NFA and input string. At the very // A _DFSExecutor perform a DFS on given NFA and input string. At the very
@ -128,61 +184,46 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ {
public: public:
typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT; typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
typedef _NFA<_CharT, _TraitsT> _RegexT; typedef _NFA<_CharT, _TraitsT> _NFAT;
typedef typename _BaseT::_RegexT _RegexT;
typedef typename _BaseT::_ResultsT _ResultsT; typedef typename _BaseT::_ResultsT _ResultsT;
typedef typename _BaseT::_ResultsVec _ResultsVec; typedef typename _BaseT::_ResultsVec _ResultsVec;
typedef regex_constants::match_flag_type _FlagT; typedef typename _BaseT::_FlagT _FlagT;
public:
_DFSExecutor(_BiIter __begin, _DFSExecutor(_BiIter __begin,
_BiIter __end, _BiIter __end,
_ResultsT& __results, _ResultsT& __results,
const _RegexT& __nfa, const _RegexT& __re,
const _TraitsT& __traits,
_FlagT __flags) _FlagT __flags)
: _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count(), : _BaseT(__begin, __end, __results, __re, __flags),
__traits), _M_nfa(*std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
_M_traits(__traits), _M_nfa(__nfa), _M_cur_results(this->_M_results), (__re._M_automaton)),
_M_start_state(__nfa._M_start()) _M_start_state(_M_nfa._M_start())
{ } { }
bool
_M_match()
{
this->_M_current = this->_M_begin;
return _M_dfs<true>(_M_start_state);
}
bool
_M_search_from_first()
{
this->_M_current = this->_M_begin;
return _M_dfs<false>(_M_start_state);
}
bool
_M_search()
{
auto __cur = this->_M_begin;
do
{
this->_M_current = __cur;
if (_M_dfs<false>(_M_start_state))
return true;
}
// Continue when __cur == _M_end
while (__cur++ != this->_M_end);
return false;
}
private: private:
template<bool __match_mode> void
_M_init(_BiIter __cur)
{
_M_cur_results.resize(_M_nfa._M_sub_count() + 2);
this->_M_current = __cur;
}
void
_M_set_start(_StateIdT __start)
{ _M_start_state = __start; }
bool bool
_M_dfs(_StateIdT __i); _M_main()
{ return _M_dfs(this->_M_start_state); }
bool
_M_dfs(_StateIdT __start);
// To record current solution. // To record current solution.
_ResultsVec _M_cur_results; _ResultsVec _M_cur_results;
const _TraitsT& _M_traits; const _NFAT& _M_nfa;
const _RegexT& _M_nfa;
_StateIdT _M_start_state; _StateIdT _M_start_state;
}; };
@ -206,47 +247,57 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ {
public: public:
typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT; typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
typedef _NFA<_CharT, _TraitsT> _RegexT; typedef _NFA<_CharT, _TraitsT> _NFAT;
typedef typename _BaseT::_RegexT _RegexT;
typedef typename _BaseT::_ResultsT _ResultsT; typedef typename _BaseT::_ResultsT _ResultsT;
typedef typename _BaseT::_ResultsVec _ResultsVec;
typedef typename _BaseT::_FlagT _FlagT;
// Here's a solution for greedy/ungreedy mode in BFS approach. We need to // Here's a solution for greedy/ungreedy mode in BFS approach. We need to
// carefully work out how to compare to conflict matching states. // carefully work out how to compare to conflict matching states.
// //
// A matching state is a pair(where, when); `where` is a NFA node; `when` // A matching state is a pair(where, when); `where` is a NFA node; `when`
// is a _BiIter, indicating which char is the next to be mathed one. Two // is a _BiIter, indicating which char is the next to be matched. Two
// matching states conflict means that they have equivalent `where` and // matching states conflict if they have equivalent `where` and `when`.
// `when`.
// //
// Now since we need to drop one and keep another, because at most one of // Now we need to drop one and keep another, because at most one of them
// them could be the final optimal solution. This behavior is affected by // could be the final optimal solution. This behavior is affected by
// greedy policy. // greedy policy.
// //
// The definition of `greedy`: // The definition of `greedy`:
// For the sequence of quantifiers in NFA sorted by there start position, // For the sequence of quantifiers in NFA sorted by there start position,
// now maintain a vector in a matching state, with equal length to // now maintain a vector in every matching state, with equal length to
// quantifier seq, recording repeating times of every quantifier. Now to // quantifier seq, recording repeating times of every quantifier. Now to
// compare two matching states, we just lexically compare these two // compare two matching states, we just lexically compare these two
// vectors. To win the compare(to survive), one matching state needs to // vectors. To win the compare(to survive), one matching state needs to
// make its greedy quantifier count larger, and ungreedy quantifiers // make its greedy quantifier count larger, and ungreedy quantifiers
// count smaller. // count smaller.
// //
// In the implementation, we recorded negtive numbers for greedy // In the implementation, we recorded negtive counts for greedy
// quantifiers and positive numbers of ungreedy ones. Now a simple // quantifiers and positive counts of ungreedy ones. Now the implicit
// operator<() for lexicographical_compare will emit the answer. // operator<() for lexicographical_compare will emit the answer.
// //
// When two vectors equal, it means the `where`, `when` and quantifier // When two vectors equal, it means the `where`, `when` and quantifier
// counts are identical, it indicates the same answer, so just return // counts are identical, and indicates the same solution; so just return
// false. // false.
struct _ResultsEntry struct _ResultsEntry
: private _BaseT::_ResultsVec : private _ResultsVec
{ {
public: public:
_ResultsEntry(unsigned int __res_sz, unsigned int __sz) _ResultsEntry(unsigned int __res_sz, unsigned int __sz)
: _BaseT::_ResultsVec(__res_sz), _M_quant_keys(__sz) : _ResultsVec(__res_sz), _M_quant_keys(__sz)
{ } { }
void
resize(unsigned int __n)
{ _ResultsVec::resize(__n); }
unsigned int
size()
{ return _ResultsVec::size(); }
sub_match<_BiIter>& sub_match<_BiIter>&
operator[](unsigned int __idx) operator[](unsigned int __idx)
{ return this->_BaseT::_ResultsVec::operator[](__idx); } { return _ResultsVec::operator[](__idx); }
bool bool
operator<(const _ResultsEntry& __rhs) const operator<(const _ResultsEntry& __rhs) const
@ -263,75 +314,47 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_inc(unsigned int __idx, bool __neg) _M_inc(unsigned int __idx, bool __neg)
{ _M_quant_keys[__idx] += __neg ? 1 : -1; } { _M_quant_keys[__idx] += __neg ? 1 : -1; }
typename _BaseT::_ResultsVec _ResultsVec
_M_get() _M_get()
{ return *this; } { return *this; }
public: public:
std::vector<int> _M_quant_keys; std::vector<int> _M_quant_keys;
}; };
typedef std::unique_ptr<_ResultsEntry> _ResultsPtr; typedef std::unique_ptr<_ResultsEntry> _ResultsPtr;
typedef regex_constants::match_flag_type _FlagT;
public:
_BFSExecutor(_BiIter __begin, _BFSExecutor(_BiIter __begin,
_BiIter __end, _BiIter __end,
_ResultsT& __results, _ResultsT& __results,
const _RegexT& __nfa, const _RegexT& __re,
const _TraitsT& __traits,
_FlagT __flags) _FlagT __flags)
: _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count(), : _BaseT(__begin, __end, __results, __re, __flags),
__traits), _M_nfa(*std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
_M_nfa(__nfa), (__re._M_automaton)),
_M_cur_results(nullptr), _M_start_state(_M_nfa._M_start())
_M_start_state(__nfa._M_start())
{ } { }
bool
_M_match()
{
_M_init(this->_M_begin);
return _M_main_loop<true>();
}
bool
_M_search_from_first()
{
_M_init(this->_M_begin);
return _M_main_loop<false>();
}
bool
_M_search()
{
auto __cur = this->_M_begin;
do
{
_M_init(__cur);
if (_M_main_loop<false>())
return true;
}
// Continue when __cur == _M_end
while (__cur++ != this->_M_end);
return false;
}
private: private:
void void
_M_init(_BiIter __cur) _M_init(_BiIter __cur)
{ {
_GLIBCXX_DEBUG_ASSERT(_M_start_state != _S_invalid_state_id); _GLIBCXX_DEBUG_ASSERT(this->_M_start_state != _S_invalid_state_id);
this->_M_current = __cur; this->_M_current = __cur;
_M_covered.clear(); _M_covered.clear();
_M_covered[_M_start_state] = _ResultsVec& __res(this->_M_results);
_ResultsPtr(new _ResultsEntry(this->_M_results.size(), _M_covered[this->_M_start_state] =
_ResultsPtr(new _ResultsEntry(__res.size(),
_M_nfa._M_quant_count)); _M_nfa._M_quant_count));
_M_e_closure(); _M_e_closure();
} }
template<bool __match_mode> void
_M_set_start(_StateIdT __start)
{ _M_start_state = __start; }
bool bool
_M_main_loop(); _M_main();
void void
_M_e_closure(); _M_e_closure();
@ -345,10 +368,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
std::map<_StateIdT, _ResultsPtr> _M_covered; std::map<_StateIdT, _ResultsPtr> _M_covered;
// To record global optimal solution. // To record global optimal solution.
_ResultsPtr _M_cur_results; _ResultsPtr _M_cur_results;
const _RegexT& _M_nfa; const _NFAT& _M_nfa;
_StateIdT _M_start_state; _StateIdT _M_start_state;
}; };
template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT>
std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>>
__get_executor(_BiIter __b,
_BiIter __e,
match_results<_BiIter, _Alloc>& __m,
const basic_regex<_CharT, _TraitsT>& __re,
regex_constants::match_flag_type __flags);
//@} regex-detail //@} regex-detail
_GLIBCXX_END_NAMESPACE_VERSION _GLIBCXX_END_NAMESPACE_VERSION
} // namespace __detail } // namespace __detail

View File

@ -36,7 +36,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename _BiIter, typename _Alloc, template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT> typename _CharT, typename _TraitsT>
template<bool __match_mode>
bool _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>:: bool _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
_M_dfs(_StateIdT __i) _M_dfs(_StateIdT __i)
{ {
@ -44,9 +43,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// This is not that certain. Need deeper investigate. // This is not that certain. Need deeper investigate.
return false; return false;
auto& __current = this->_M_current; auto& __current = this->_M_current;
auto& __begin = this->_M_begin;
auto& __end = this->_M_end;
auto& __results = _M_cur_results;
const auto& __state = _M_nfa[__i]; const auto& __state = _M_nfa[__i];
bool __ret = false; bool __ret = false;
switch (__state._M_opcode) switch (__state._M_opcode)
@ -54,129 +50,115 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
case _S_opcode_alternative: case _S_opcode_alternative:
// Greedy or not, this is a question ;) // Greedy or not, this is a question ;)
if (!__state._M_neg) if (!__state._M_neg)
__ret = _M_dfs<__match_mode>(__state._M_alt) __ret = _M_dfs(__state._M_alt)
|| _M_dfs<__match_mode>(__state._M_next); || _M_dfs(__state._M_next);
else else
__ret = _M_dfs<__match_mode>(__state._M_next) __ret = _M_dfs(__state._M_next)
|| _M_dfs<__match_mode>(__state._M_alt); || _M_dfs(__state._M_alt);
break; break;
case _S_opcode_subexpr_begin: case _S_opcode_subexpr_begin:
// Here's the critical part: if there's nothing changed since last // Here's the critical part: if there's nothing changed since last
// visit, do NOT continue. This prevents the executor from get into // visit, do NOT continue. This prevents the executor from get into
// infinite loop when use "()*" to match "". // infinite loop when use "()*" to match "".
// //
// Every change on __results will be roll back after the recursion // Every change on _M_cur_results will be roll back after the
// step finished. // recursion step finished.
if (!__results[__state._M_subexpr].matched if (!_M_cur_results[__state._M_subexpr].matched
|| __results[__state._M_subexpr].first != __current) || _M_cur_results[__state._M_subexpr].first != __current)
{ {
auto __back = __current; auto __back = __current;
__results[__state._M_subexpr].first = __current; _M_cur_results[__state._M_subexpr].first = __current;
__ret = _M_dfs<__match_mode>(__state._M_next); __ret = _M_dfs(__state._M_next);
__results[__state._M_subexpr].first = __back; _M_cur_results[__state._M_subexpr].first = __back;
} }
break; break;
case _S_opcode_subexpr_end: case _S_opcode_subexpr_end:
if (__results[__state._M_subexpr].second != __current if (_M_cur_results[__state._M_subexpr].second != __current
|| __results[__state._M_subexpr].matched != true) || _M_cur_results[__state._M_subexpr].matched != true)
{ {
auto __back = __results[__state._M_subexpr]; auto __back = _M_cur_results[__state._M_subexpr];
__results[__state._M_subexpr].second = __current; _M_cur_results[__state._M_subexpr].second = __current;
__results[__state._M_subexpr].matched = true; _M_cur_results[__state._M_subexpr].matched = true;
__ret = _M_dfs<__match_mode>(__state._M_next); __ret = _M_dfs(__state._M_next);
__results[__state._M_subexpr] = __back; _M_cur_results[__state._M_subexpr] = __back;
} }
else else
__ret = _M_dfs<__match_mode>(__state._M_next); __ret = _M_dfs(__state._M_next);
break; break;
case _S_opcode_line_begin_assertion: case _S_opcode_line_begin_assertion:
if (__current == __begin) if (this->_M_at_begin())
__ret = _M_dfs<__match_mode>(__state._M_next); __ret = _M_dfs(__state._M_next);
break; break;
case _S_opcode_line_end_assertion: case _S_opcode_line_end_assertion:
if (__current == __end) if (this->_M_at_end())
__ret = _M_dfs<__match_mode>(__state._M_next); __ret = _M_dfs(__state._M_next);
break; break;
// By definition.
case _S_opcode_word_boundry: case _S_opcode_word_boundry:
{ if (this->_M_word_boundry(__state) == !__state._M_neg)
bool __ans = false; __ret = _M_dfs(__state._M_next);
if (__current == __begin && this->_M_is_word(*__current))
__ans = true;
else if (__current == __end && this->_M_is_word(*__current))
__ans = true;
else
{
auto __pre = __current;
--__pre;
if (this->_M_is_word(*__current)
!= this->_M_is_word(*__pre))
__ans = true;
}
if (__ans == !__state._M_neg)
__ret = _M_dfs<__match_mode>(__state._M_next);
}
break; break;
// Here __state._M_alt offers a single start node for a sub-NFA. // Here __state._M_alt offers a single start node for a sub-NFA.
// We recursivly invoke our algorithm to match the sub-NFA. // We recursivly invoke our algorithm to match the sub-NFA.
case _S_opcode_subexpr_lookahead: case _S_opcode_subexpr_lookahead:
{ if (this->_M_lookahead(__state) == !__state._M_neg)
_ResultsT __m; __ret = _M_dfs(__state._M_next);
// FIXME Here's not necessarily a DFSExecutor. But we need to
// refactor the whole NFA to a recursive tree structure first.
_DFSExecutor __sub(this->_M_current,
this->_M_end,
__m,
this->_M_nfa,
this->_M_traits,
this->_M_flags);
__sub._M_start_state = __state._M_alt;
if (__sub._M_search_from_first() == !__state._M_neg)
__ret = _M_dfs<__match_mode>(__state._M_next);
}
break; break;
case _S_opcode_match: case _S_opcode_match:
if (__current != __end && __state._M_matches(*__current)) if (__current != this->_M_end && __state._M_matches(*__current))
{ {
++__current; ++__current;
__ret = _M_dfs<__match_mode>(__state._M_next); __ret = _M_dfs(__state._M_next);
--__current; --__current;
} }
break; break;
// First fetch the matched result from __results as __submatch; // First fetch the matched result from _M_cur_results as __submatch;
// then compare it with // then compare it with
// (__current, __current + (__submatch.second - __submatch.first)) // (__current, __current + (__submatch.second - __submatch.first))
// If matched, keep going; else just return to try another state. // If matched, keep going; else just return to try another state.
case _S_opcode_backref: case _S_opcode_backref:
{ {
auto& __submatch = __results[__state._M_backref_index]; auto& __submatch = _M_cur_results[__state._M_backref_index];
if (!__submatch.matched) if (!__submatch.matched)
break; break;
auto __last = __current; auto __last = __current;
for (auto __tmp = __submatch.first; for (auto __tmp = __submatch.first;
__last != __end && __tmp != __submatch.second; __last != this->_M_end && __tmp != __submatch.second;
++__tmp) ++__tmp)
++__last; ++__last;
if (_M_traits.transform(__submatch.first, __submatch.second) if (this->_M_re._M_traits.transform(__submatch.first,
== _M_traits.transform(__current, __last)) __submatch.second)
== this->_M_re._M_traits.transform(__current, __last))
if (__last != __current) if (__last != __current)
{ {
auto __backup = __current; auto __backup = __current;
__current = __last; __current = __last;
__ret = _M_dfs<__match_mode>(__state._M_next); __ret = _M_dfs(__state._M_next);
__current = __backup; __current = __backup;
} }
else else
__ret = _M_dfs<__match_mode>(__state._M_next); __ret = _M_dfs(__state._M_next);
} }
break; break;
case _S_opcode_accept: case _S_opcode_accept:
if (__match_mode) if (this->_M_match_mode)
__ret = __current == __end; __ret = __current == this->_M_end;
else else
__ret = true; __ret = true;
if (__current == this->_M_begin
&& (this->_M_flags & regex_constants::match_not_null))
__ret = false;
if (__ret) if (__ret)
this->_M_results = __results; {
_ResultsVec& __res(this->_M_results);
if (this->_M_re.flags() & regex_constants::nosubs)
{
_M_cur_results.resize(3); // truncate
__res.resize(3);
}
for (unsigned int __i = 0; __i < _M_cur_results.size(); ++__i)
if (_M_cur_results[__i].matched)
__res[__i] = _M_cur_results[__i];
}
break; break;
default: default:
_GLIBCXX_DEBUG_ASSERT(false); _GLIBCXX_DEBUG_ASSERT(false);
@ -186,23 +168,37 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename _BiIter, typename _Alloc, template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT> typename _CharT, typename _TraitsT>
template<bool __match_mode>
bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>:: bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
_M_main_loop() _M_main()
{ {
bool __ret = false; bool __ret = false;
if (!this->_M_match_mode
&& !(this->_M_flags & regex_constants::match_not_null))
__ret = _M_includes_some() || __ret;
while (this->_M_current != this->_M_end) while (this->_M_current != this->_M_end)
{ {
if (!__match_mode)
// To keep regex_search greedy, no "return true" here.
__ret = _M_includes_some() || __ret;
_M_move(); _M_move();
++this->_M_current; ++this->_M_current;
_M_e_closure(); _M_e_closure();
} if (!this->_M_match_mode)
// To keep regex_search greedy, no "return true" here.
__ret = _M_includes_some() || __ret; __ret = _M_includes_some() || __ret;
}
if (this->_M_match_mode)
__ret = _M_includes_some();
if (__ret) if (__ret)
this->_M_results = _M_cur_results->_M_get(); {
_ResultsVec& __res(this->_M_results);
if (this->_M_re.flags() & regex_constants::nosubs)
{
// truncate
_M_cur_results->resize(3);
__res.resize(3);
}
for (unsigned int __i = 0; __i < _M_cur_results->size(); ++__i)
if ((*_M_cur_results)[__i].matched)
__res[__i] = (*_M_cur_results)[__i];
}
return __ret; return __ret;
} }
@ -211,11 +207,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>:: void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
_M_e_closure() _M_e_closure()
{ {
auto& __current = this->_M_current;
std::queue<_StateIdT> __q; std::queue<_StateIdT> __q;
std::vector<bool> __in_q(_M_nfa.size(), false); std::vector<bool> __in_q(_M_nfa.size(), false);
auto& __begin = this->_M_begin; auto& __current = this->_M_current;
auto& __end = this->_M_end;
for (auto& __it : _M_covered) for (auto& __it : _M_covered)
{ {
@ -292,46 +286,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
} }
break; break;
case _S_opcode_line_begin_assertion: case _S_opcode_line_begin_assertion:
if (__current == __begin) if (this->_M_at_begin())
__add_visited_state(__state._M_next); __add_visited_state(__state._M_next);
break; break;
case _S_opcode_line_end_assertion: case _S_opcode_line_end_assertion:
if (__current == __end) if (this->_M_at_end())
__add_visited_state(__state._M_next); __add_visited_state(__state._M_next);
break; break;
case _S_opcode_word_boundry: case _S_opcode_word_boundry:
{ if (this->_M_word_boundry(__state) == !__state._M_neg)
bool __ans = false;
if (__current == __begin && this->_M_is_word(*__current))
__ans = true;
else if (__current == __end && this->_M_is_word(*__current))
__ans = true;
else
{
auto __pre = __current;
--__pre;
if (this->_M_is_word(*__current)
!= this->_M_is_word(*__pre))
__ans = true;
}
if (__ans == !__state._M_neg)
__add_visited_state(__state._M_next); __add_visited_state(__state._M_next);
}
break; break;
case _S_opcode_subexpr_lookahead: case _S_opcode_subexpr_lookahead:
{ if (this->_M_lookahead(__state) == !__state._M_neg)
_ResultsT __m;
// Same comment as in DFS.
_BFSExecutor __sub(this->_M_current,
this->_M_end,
__m,
this->_M_nfa,
this->_M_traits,
this->_M_flags);
__sub._M_start_state = __state._M_alt;
if (__sub._M_search_from_first() == !__state._M_neg)
__add_visited_state(__state._M_next); __add_visited_state(__state._M_next);
}
break; break;
case _S_opcode_match: case _S_opcode_match:
break; break;
@ -395,6 +363,44 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return __succ; return __succ;
} }
// Return whether now is at some word boundry.
template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT>
bool _Executor<_BiIter, _Alloc, _CharT, _TraitsT>::
_M_word_boundry(_State<_CharT, _TraitsT> __state) const
{
// By definition.
bool __ans = false;
auto __pre = _M_current;
--__pre;
if (!(_M_at_begin() && _M_at_end()))
if (_M_at_begin())
__ans = _M_is_word(*_M_current)
&& !(_M_flags & regex_constants::match_not_bow);
else if (_M_at_end())
__ans = _M_is_word(*__pre)
&& !(_M_flags & regex_constants::match_not_eow);
else
__ans = _M_is_word(*_M_current)
!= _M_is_word(*__pre);
return __ans;
}
// Return whether now match the given sub-NFA.
template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT>
bool _Executor<_BiIter, _Alloc, _CharT, _TraitsT>::
_M_lookahead(_State<_CharT, _TraitsT> __state) const
{
auto __sub = __get_executor(this->_M_current,
this->_M_end,
this->_M_results,
this->_M_re,
this->_M_flags);
__sub->_M_set_start(__state._M_alt);
return __sub->_M_search_from_first();
}
template<typename _BiIter, typename _Alloc, template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT> typename _CharT, typename _TraitsT>
std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>> std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>>
@ -411,10 +417,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
auto __p = std::static_pointer_cast<_NFA<_CharT, _TraitsT>> auto __p = std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
(__re._M_automaton); (__re._M_automaton);
if (__p->_M_has_backref) if (__p->_M_has_backref)
return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, *__p, return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, __re, __flags));
__re._M_traits, __flags)); return _ExecutorPtr(new _BFSExecutorT(__b, __e, __m, __re, __flags));
return _ExecutorPtr(new _BFSExecutorT(__b, __e, __m, *__p,
__re._M_traits, __flags));
} }
_GLIBCXX_END_NAMESPACE_VERSION _GLIBCXX_END_NAMESPACE_VERSION

View File

@ -68,7 +68,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_S_token_backref, _S_token_backref,
_S_token_subexpr_begin, _S_token_subexpr_begin,
_S_token_subexpr_no_group_begin, _S_token_subexpr_no_group_begin,
_S_token_subexpr_lookahead_begin, _S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n'
_S_token_subexpr_end, _S_token_subexpr_end,
_S_token_bracket_begin, _S_token_bracket_begin,
_S_token_bracket_neg_begin, _S_token_bracket_neg_begin,
@ -86,7 +86,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_S_token_ungreedy, _S_token_ungreedy,
_S_token_line_begin, _S_token_line_begin,
_S_token_line_end, _S_token_line_end,
_S_token_word_bound, _S_token_word_bound, // neg if _M_value[0] == 'n'
_S_token_comma, _S_token_comma,
_S_token_dup_count, _S_token_dup_count,
_S_token_eof, _S_token_eof,
@ -174,7 +174,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_StringT _M_value; _StringT _M_value;
bool _M_at_bracket_start; bool _M_at_bracket_start;
public: public:
// TODO: make them static when this file is stable. // FIXME: make them static when this file is stable.
const std::map<char, _TokenT> _M_token_map; const std::map<char, _TokenT> _M_token_map;
const std::map<char, char> _M_ecma_escape_map; const std::map<char, char> _M_ecma_escape_map;
const std::map<char, char> _M_awk_escape_map; const std::map<char, char> _M_awk_escape_map;

View File

@ -28,7 +28,7 @@
* Do not attempt to use it directly. @headername{regex} * Do not attempt to use it directly. @headername{regex}
*/ */
// TODO make comments doxygen format. // FIXME make comments doxygen format.
// N3376 specified 6 regex styles: ECMAScript, basic, extended, grep, egrep // N3376 specified 6 regex styles: ECMAScript, basic, extended, grep, egrep
// and awk // and awk

View File

@ -1,5 +1,4 @@
// { dg-options "-std=gnu++11" } // { dg-options "-std=gnu++11" }
// { dg-do run { xfail *-*-* } }
// //
// 2013-09-14 Tim Shen <timshen91@gmail.com> // 2013-09-14 Tim Shen <timshen91@gmail.com>
@ -54,22 +53,37 @@ test01()
string sol[] = string sol[] =
{ {
"This", "This",
"",
"is", "is",
"",
"a", "a",
"",
"regular", "regular",
"",
"expression", "expression",
"",
}; };
regex re("\\b\\w*\\b"); regex re("\\b\\w*\\b");
int i = 0; int i = 0;
for (auto it = sregex_iterator(s.begin(), s.end(), re); for (auto it = sregex_iterator(s.begin(), s.end(), re);
it != sregex_iterator() && i < 5; it != sregex_iterator();
++it) ++it)
{ {
string s((*it)[0].first, (*it)[0].second); string s((*it)[0].first, (*it)[0].second);
VERIFY(s == sol[i++]); VERIFY(s == sol[i++]);
} }
VERIFY(i == 5); VERIFY(i == 10);
{
cmatch m;
regex re("(?=(as)df)as(df)");
regex_search("asdf", m, re);
VERIFY(m.size() == 3);
VERIFY(m[0].matched && string(m[0].first, m[0].second) == "asdf");
VERIFY(m[1].matched && string(m[1].first, m[1].second) == "as");
VERIFY(m[2].matched && string(m[2].first, m[2].second) == "df");
}
} }
int int

View File

@ -0,0 +1,71 @@
// { dg-options "-std=gnu++11" }
//
// 2013-09-18 Tim Shen <timshen91@gmail.com>
//
// Copyright (C) 2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
// 28.11.3 regex_search
// Tests ECMAScript flags.
#include <regex>
#include <testsuite_hooks.h>
using namespace std;
void
test01()
{
bool test __attribute__((unused)) = true;
cmatch m;
regex re("((as)(df))", regex_constants::ECMAScript | regex_constants::nosubs);
VERIFY(regex_search("asdf", m, re));
VERIFY(m.size() == 1);
VERIFY(m[0].matched && string(m[0].first, m[0].second) == "asdf");
VERIFY( regex_search("a", regex("^a")));
VERIFY(!regex_search("a", regex("^a"), regex_constants::match_not_bol));
VERIFY( regex_search("a", regex("a$")));
VERIFY(!regex_search("a", regex("a$"), regex_constants::match_not_eol));
VERIFY( regex_search("a", regex("\\ba")));
VERIFY(!regex_search("a", regex("\\ba"), regex_constants::match_not_bow));
VERIFY( regex_search("a", regex("a\\b")));
VERIFY(!regex_search("a", regex("a\\b"), regex_constants::match_not_eow));
VERIFY( regex_search("", regex("")));
VERIFY(!regex_search("", regex(""), regex_constants::match_not_null));
VERIFY( regex_search("", regex("^$")));
VERIFY(!regex_search("", regex("^$"), regex_constants::match_not_null));
VERIFY( regex_search("aaa", m, regex("a*?"),
regex_constants::match_not_null));
VERIFY(m[0].matched && string(m[0].first, m[0].second) == "a");
VERIFY( regex_search("asdf", regex("sdf")));
VERIFY(!regex_search("asdf", regex("sdf"),
regex_constants::match_continuous));
VERIFY( regex_search(" a"+1, regex("\\ba"),
regex_constants::match_prev_avail));
VERIFY( regex_search("ba"+1, regex("\\Ba"),
regex_constants::match_prev_avail));
}
int
main()
{
test01();
return 0;
}

View File

@ -1,5 +1,4 @@
// { dg-options "-std=gnu++11" } // { dg-options "-std=gnu++11" }
// { dg-do run { xfail *-*-* } }
// //
// 2013-07-25 Tim Shen <timshen91@gmail.com> // 2013-07-25 Tim Shen <timshen91@gmail.com>

View File

@ -1,6 +1,5 @@
// { dg-options "-std=gnu++11" } // { dg-options "-std=gnu++11" }
// { dg-require-namedlocale "en_US.UTF-8" } // { dg-require-namedlocale "en_US.UTF-8" }
// { dg-do run { xfail *-*-* } }
// //
// 2013-09-05 Tim Shen <timshen91@gmail.com> // 2013-09-05 Tim Shen <timshen91@gmail.com>
@ -42,13 +41,19 @@ test01()
re2.assign(L"([[:lower:]]{0,1}[[:space:]]{0,1}[[:upper:]]{0,1})"); re2.assign(L"([[:lower:]]{0,1}[[:space:]]{0,1}[[:upper:]]{0,1})");
std::wsregex_iterator p(str2.begin(), str2.end(), re2); std::wstring sol[] =
auto a = p; {
++p; L"ä\u2009Ä",
VERIFY(a != p); L"\u2009",
//for (std::wsregex_iterator p(str2.begin(), str2.end(), re2); L"ö\u2009Ö",
// p != std::wsregex_iterator{}; ++p) L"\u2009",
// std::wcout << (*p)[1] << std::endl; L"ü\u2009Ü",
L"",
};
int i = 0;
for (std::wsregex_iterator p(str2.begin(), str2.end(), re2);
p != std::wsregex_iterator{}; ++p)
VERIFY(std::wstring((*p)[1].first, (*p)[1].second) == sol[i++]);
} }
int int