Makefile.am: Adjust to new files.
2013-08-07 Tim Shen <timshen91@gmail.com> * include/Makefile.am: Adjust to new files. * include/Makefile.in: Regenerate. * include/bits/regex.h: Adjust to new interfaces. * include/bits/regex_automaton.h: New. * include/bits/regex_automaton.tcc: New. * include/bits/regex_compiler.h: Adjust to new files. * include/bits/regex_compiler.tcc: New. * include/bits/regex_constants.h: Tail spaces. * include/bits/regex_error.h: Likewise. * include/bits/regex_executor.h: New. * include/bits/regex_executor.tcc: New. * include/std/regex: Adjust to new files. * testsuite/28_regex/algorithms/regex_match/extended/ string_dispatch_01.cc: Adjust to new interfaces. From-SVN: r201573
This commit is contained in:
parent
5ee5b32cb2
commit
6cb784b639
|
@ -1,3 +1,20 @@
|
|||
2013-08-07 Tim Shen <timshen91@gmail.com>
|
||||
|
||||
* include/Makefile.am: Adjust to new files.
|
||||
* include/Makefile.in: Regenerate.
|
||||
* include/bits/regex.h: Adjust to new interfaces.
|
||||
* include/bits/regex_automaton.h: New.
|
||||
* include/bits/regex_automaton.tcc: New.
|
||||
* include/bits/regex_compiler.h: Adjust to new files.
|
||||
* include/bits/regex_compiler.tcc: New.
|
||||
* include/bits/regex_constants.h: Tail spaces.
|
||||
* include/bits/regex_error.h: Likewise.
|
||||
* include/bits/regex_executor.h: New.
|
||||
* include/bits/regex_executor.tcc: New.
|
||||
* include/std/regex: Adjust to new files.
|
||||
* testsuite/28_regex/algorithms/regex_match/extended/
|
||||
string_dispatch_01.cc: Adjust to new interfaces.
|
||||
|
||||
2013-08-07 Paolo Carlini <paolo.carlini@oracle.com>
|
||||
|
||||
* include/ext/atomicity.h: Add #pragma GCC system_header.
|
||||
|
|
|
@ -126,14 +126,14 @@ bits_headers = \
|
|||
${bits_srcdir}/random.tcc \
|
||||
${bits_srcdir}/range_access.h \
|
||||
${bits_srcdir}/regex.h \
|
||||
${bits_srcdir}/regex_compiler.h \
|
||||
${bits_srcdir}/regex_constants.h \
|
||||
${bits_srcdir}/regex_cursor.h \
|
||||
${bits_srcdir}/regex_error.h \
|
||||
${bits_srcdir}/regex_grep_matcher.h \
|
||||
${bits_srcdir}/regex_grep_matcher.tcc \
|
||||
${bits_srcdir}/regex_nfa.h \
|
||||
${bits_srcdir}/regex_nfa.tcc \
|
||||
${bits_srcdir}/regex_automaton.h \
|
||||
${bits_srcdir}/regex_automaton.tcc \
|
||||
${bits_srcdir}/regex_compiler.h \
|
||||
${bits_srcdir}/regex_compiler.tcc \
|
||||
${bits_srcdir}/regex_executor.h \
|
||||
${bits_srcdir}/regex_executor.tcc \
|
||||
${bits_srcdir}/stream_iterator.h \
|
||||
${bits_srcdir}/streambuf_iterator.h \
|
||||
${bits_srcdir}/shared_ptr.h \
|
||||
|
|
|
@ -393,14 +393,14 @@ bits_headers = \
|
|||
${bits_srcdir}/random.tcc \
|
||||
${bits_srcdir}/range_access.h \
|
||||
${bits_srcdir}/regex.h \
|
||||
${bits_srcdir}/regex_compiler.h \
|
||||
${bits_srcdir}/regex_constants.h \
|
||||
${bits_srcdir}/regex_cursor.h \
|
||||
${bits_srcdir}/regex_error.h \
|
||||
${bits_srcdir}/regex_grep_matcher.h \
|
||||
${bits_srcdir}/regex_grep_matcher.tcc \
|
||||
${bits_srcdir}/regex_nfa.h \
|
||||
${bits_srcdir}/regex_nfa.tcc \
|
||||
${bits_srcdir}/regex_automaton.h \
|
||||
${bits_srcdir}/regex_automaton.tcc \
|
||||
${bits_srcdir}/regex_compiler.h \
|
||||
${bits_srcdir}/regex_compiler.tcc \
|
||||
${bits_srcdir}/regex_executor.h \
|
||||
${bits_srcdir}/regex_executor.tcc \
|
||||
${bits_srcdir}/stream_iterator.h \
|
||||
${bits_srcdir}/streambuf_iterator.h \
|
||||
${bits_srcdir}/shared_ptr.h \
|
||||
|
|
|
@ -40,7 +40,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
/**
|
||||
* @brief Class regex_traits. Describes aspects of a regular expression.
|
||||
*
|
||||
* A regular expression traits class that satisfies the requirements of
|
||||
* A regular expression traits class that satisfies the requirements of
|
||||
* section [28.7].
|
||||
*
|
||||
* The class %regex is parameterized around a set of related types and
|
||||
|
@ -61,7 +61,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
_BaseType _M_base;
|
||||
unsigned char _M_extended;
|
||||
static constexpr unsigned char _S_under = 1 << 0;
|
||||
// FIXME: _S_blank should be removed in the future, when locale's complete.
|
||||
// FIXME: _S_blank should be removed in the future,
|
||||
// when locale's complete.
|
||||
static constexpr unsigned char _S_blank = 1 << 1;
|
||||
static constexpr unsigned char _S_valid_mask = 0x3;
|
||||
|
||||
|
@ -128,7 +129,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
* @brief Constructs a default traits object.
|
||||
*/
|
||||
regex_traits() { }
|
||||
|
||||
|
||||
/**
|
||||
* @brief Gives the length of a C-style string starting at @p __p.
|
||||
*
|
||||
|
@ -153,7 +154,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
char_type
|
||||
translate(char_type __c) const
|
||||
{ return __c; }
|
||||
|
||||
|
||||
/**
|
||||
* @brief Translates a character into a case-insensitive equivalent.
|
||||
*
|
||||
|
@ -165,12 +166,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
*/
|
||||
char_type
|
||||
translate_nocase(char_type __c) const
|
||||
{
|
||||
{
|
||||
typedef std::ctype<char_type> __ctype_type;
|
||||
const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
|
||||
return __fctyp.tolower(__c);
|
||||
return __fctyp.tolower(__c);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @brief Gets a sort key for a character sequence.
|
||||
*
|
||||
|
@ -209,7 +210,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
*
|
||||
* Effects: if typeid(use_facet<collate<_Ch_type> >) ==
|
||||
* typeid(collate_byname<_Ch_type>) and the form of the sort key
|
||||
* returned by collate_byname<_Ch_type>::transform(__first, __last)
|
||||
* returned by collate_byname<_Ch_type>::transform(__first, __last)
|
||||
* is known and can be converted into a primary sort key
|
||||
* then returns that key, otherwise returns an empty string.
|
||||
*
|
||||
|
@ -239,7 +240,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
*
|
||||
* @param __first beginning of the collation element name.
|
||||
* @param __last one-past-the-end of the collation element name.
|
||||
*
|
||||
*
|
||||
* @returns a sequence of one or more characters that represents the
|
||||
* collating element consisting of the character sequence designated by
|
||||
* the iterator range [__first, __last). Returns an empty string if the
|
||||
|
@ -312,13 +313,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
* @param __ch a character representing a digit.
|
||||
* @param __radix the radix if the numeric conversion (limited to 8, 10,
|
||||
* or 16).
|
||||
*
|
||||
*
|
||||
* @returns the value represented by the digit __ch in base radix if the
|
||||
* character __ch is a valid digit in base radix; otherwise returns -1.
|
||||
*/
|
||||
int
|
||||
value(_Ch_type __ch, int __radix) const;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Imbues the regex_traits object with a copy of a new locale.
|
||||
*
|
||||
|
@ -336,7 +337,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
std::swap(_M_locale, __loc);
|
||||
return __loc;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @brief Gets a copy of the current locale in use by the regex_traits
|
||||
* object.
|
||||
|
@ -344,7 +345,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
locale_type
|
||||
getloc() const
|
||||
{ return _M_locale; }
|
||||
|
||||
|
||||
protected:
|
||||
locale_type _M_locale;
|
||||
};
|
||||
|
@ -579,7 +580,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
if (__s == __it->first)
|
||||
{
|
||||
if (__icase
|
||||
&& ((__it->second & (ctype_base::lower | ctype_base::upper)) != 0))
|
||||
&& ((__it->second
|
||||
& (ctype_base::lower | ctype_base::upper)) != 0))
|
||||
return ctype_base::alpha;
|
||||
return __it->second;
|
||||
}
|
||||
|
@ -594,7 +596,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
{
|
||||
typedef std::ctype<char_type> __ctype_type;
|
||||
const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
|
||||
|
||||
|
||||
return __fctyp.is(__f._M_base, __c)
|
||||
// [[:w:]]
|
||||
|| ((__f._M_extended & _RegexMask::_S_under)
|
||||
|
@ -662,9 +664,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
* character sequence.
|
||||
*/
|
||||
basic_regex()
|
||||
: _M_flags(ECMAScript),
|
||||
_M_automaton(__detail::__compile<const _Ch_type*, _Rx_traits>(0, 0,
|
||||
_M_traits, _M_flags))
|
||||
: _M_flags(ECMAScript), _M_automaton(nullptr)
|
||||
{ }
|
||||
|
||||
/**
|
||||
|
@ -680,9 +680,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
*/
|
||||
explicit
|
||||
basic_regex(const _Ch_type* __p, flag_type __f = ECMAScript)
|
||||
: _M_flags(__f),
|
||||
_M_automaton(__detail::__compile(__p, __p + _Rx_traits::length(__p),
|
||||
_M_traits, _M_flags))
|
||||
: basic_regex(__p, __p + _Rx_traits::length(__p), __f)
|
||||
{ }
|
||||
|
||||
/**
|
||||
|
@ -697,9 +695,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
*
|
||||
* @throws regex_error if @p __p is not a valid regular expression.
|
||||
*/
|
||||
basic_regex(const _Ch_type* __p, std::size_t __len, flag_type __f)
|
||||
: _M_flags(__f),
|
||||
_M_automaton(__detail::__compile(__p, __p + __len, _M_traits, _M_flags))
|
||||
basic_regex(const _Ch_type* __p,
|
||||
std::size_t __len, flag_type __f = ECMAScript)
|
||||
: basic_regex(__p, __p + __len, __f)
|
||||
{ }
|
||||
|
||||
/**
|
||||
|
@ -707,10 +705,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
*
|
||||
* @param __rhs A @p regex object.
|
||||
*/
|
||||
basic_regex(const basic_regex& __rhs)
|
||||
: _M_flags(__rhs._M_flags), _M_traits(__rhs._M_traits),
|
||||
_M_automaton(__rhs._M_automaton)
|
||||
{ }
|
||||
basic_regex(const basic_regex& __rhs) = default;
|
||||
|
||||
/**
|
||||
* @brief Move-constructs a basic regular expression.
|
||||
|
@ -733,12 +728,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
*/
|
||||
template<typename _Ch_traits, typename _Ch_alloc>
|
||||
explicit
|
||||
basic_regex(const std::basic_string<_Ch_type, _Ch_traits,
|
||||
basic_regex(const std::basic_string<_Ch_type, _Ch_traits,
|
||||
_Ch_alloc>& __s,
|
||||
flag_type __f = ECMAScript)
|
||||
: _M_flags(__f),
|
||||
_M_automaton(__detail::__compile(__s.begin(), __s.end(),
|
||||
_M_traits, _M_flags))
|
||||
: basic_regex(__s.begin(), __s.end(), __f)
|
||||
{ }
|
||||
|
||||
/**
|
||||
|
@ -755,10 +748,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
* expression.
|
||||
*/
|
||||
template<typename _InputIterator>
|
||||
basic_regex(_InputIterator __first, _InputIterator __last,
|
||||
basic_regex(_InputIterator __first, _InputIterator __last,
|
||||
flag_type __f = ECMAScript)
|
||||
: _M_flags(__f),
|
||||
_M_automaton(__detail::__compile(__first, __last, _M_traits, _M_flags))
|
||||
_M_automaton(__detail::_Compiler<_InputIterator, _Ch_type, _Rx_traits>
|
||||
(__first, __last, _M_traits, _M_flags)._M_get_nfa())
|
||||
{ }
|
||||
|
||||
/**
|
||||
|
@ -771,9 +765,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
*/
|
||||
basic_regex(initializer_list<_Ch_type> __l,
|
||||
flag_type __f = ECMAScript)
|
||||
: _M_flags(__f),
|
||||
_M_automaton(__detail::__compile(__l.begin(), __l.end(),
|
||||
_M_traits, _M_flags))
|
||||
: basic_regex(__l.begin(), __l.end(), __f)
|
||||
{ }
|
||||
|
||||
/**
|
||||
|
@ -781,7 +773,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
*/
|
||||
~basic_regex()
|
||||
{ }
|
||||
|
||||
|
||||
/**
|
||||
* @brief Assigns one regular expression to another.
|
||||
*/
|
||||
|
@ -806,7 +798,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
basic_regex&
|
||||
operator=(const _Ch_type* __p)
|
||||
{ return this->assign(__p, flags()); }
|
||||
|
||||
|
||||
/**
|
||||
* @brief Replaces a regular expression with a new one constructed from
|
||||
* a string.
|
||||
|
@ -831,7 +823,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
this->swap(__tmp);
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @brief The move-assignment operator.
|
||||
*
|
||||
|
@ -880,7 +872,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
{ return this->assign(string_type(__p, __len), __flags); }
|
||||
|
||||
/**
|
||||
* @brief Assigns a new regular expression to a regex object from a
|
||||
* @brief Assigns a new regular expression to a regex object from a
|
||||
* string containing a regular expression pattern.
|
||||
*
|
||||
* @param __s A string containing a regular expression pattern.
|
||||
|
@ -894,7 +886,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
basic_regex&
|
||||
assign(const basic_string<_Ch_type, _Ch_typeraits, _Alloc>& __s,
|
||||
flag_type __flags = ECMAScript)
|
||||
{
|
||||
{
|
||||
basic_regex __tmp(__s, __flags);
|
||||
this->swap(__tmp);
|
||||
return *this;
|
||||
|
@ -942,7 +934,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
unsigned int
|
||||
mark_count() const
|
||||
{ return _M_automaton->_M_sub_count() - 1; }
|
||||
|
||||
|
||||
/**
|
||||
* @brief Gets the flags used to construct the regular expression
|
||||
* or in the last call to assign().
|
||||
|
@ -950,7 +942,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
flag_type
|
||||
flags() const
|
||||
{ return _M_flags; }
|
||||
|
||||
|
||||
// [7.8.5] locale
|
||||
/**
|
||||
* @brief Imbues the regular expression object with the given locale.
|
||||
|
@ -960,7 +952,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
locale_type
|
||||
imbue(locale_type __loc)
|
||||
{ return _M_traits.imbue(__loc); }
|
||||
|
||||
|
||||
/**
|
||||
* @brief Gets the locale currently imbued in the regular expression
|
||||
* object.
|
||||
|
@ -968,7 +960,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
locale_type
|
||||
getloc() const
|
||||
{ return _M_traits.getloc(); }
|
||||
|
||||
|
||||
// [7.8.6] swap
|
||||
/**
|
||||
* @brief Swaps the contents of two regular expression objects.
|
||||
|
@ -988,17 +980,40 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
_M_dot(std::ostream& __ostr)
|
||||
{ _M_automaton->_M_dot(__ostr); }
|
||||
#endif
|
||||
|
||||
const __detail::_AutomatonPtr&
|
||||
_M_get_automaton() const
|
||||
{ return _M_automaton; }
|
||||
|
||||
protected:
|
||||
flag_type _M_flags;
|
||||
_Rx_traits _M_traits;
|
||||
__detail::_AutomatonPtr _M_automaton;
|
||||
typedef std::shared_ptr<__detail::_Automaton<_Ch_type, _Rx_traits>>
|
||||
_AutomatonPtr;
|
||||
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT>
|
||||
friend std::unique_ptr<
|
||||
__detail::_Executor<_BiIter, _Alloc, _CharT, _TraitsT>>
|
||||
__detail::__get_executor(_BiIter,
|
||||
_BiIter,
|
||||
match_results<_BiIter, _Alloc>&,
|
||||
const basic_regex<_CharT, _TraitsT>&,
|
||||
regex_constants::match_flag_type);
|
||||
|
||||
template<typename _B, typename _A, typename _C, typename _R>
|
||||
friend bool
|
||||
regex_match(_B, _B,
|
||||
match_results<_B, _A>&,
|
||||
const basic_regex<_C, _R>&,
|
||||
regex_constants::match_flag_type);
|
||||
|
||||
template<typename _B, typename _A, typename _C, typename _R>
|
||||
friend bool
|
||||
regex_search(_B, _B,
|
||||
match_results<_B, _A>&,
|
||||
const basic_regex<_C, _R>&,
|
||||
regex_constants::match_flag_type);
|
||||
|
||||
flag_type _M_flags;
|
||||
_Rx_traits _M_traits;
|
||||
_AutomatonPtr _M_automaton;
|
||||
};
|
||||
|
||||
|
||||
/** @brief Standard regular expressions. */
|
||||
typedef basic_regex<char> regex;
|
||||
|
||||
|
@ -1046,7 +1061,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
typedef std::basic_string<value_type> string_type;
|
||||
|
||||
bool matched;
|
||||
|
||||
|
||||
constexpr sub_match() : matched() { }
|
||||
|
||||
/**
|
||||
|
@ -1072,7 +1087,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
? string_type(this->first, this->second)
|
||||
: string_type();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @brief Gets the matching sequence as a string.
|
||||
*
|
||||
|
@ -1085,7 +1100,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
? string_type(this->first, this->second)
|
||||
: string_type();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @brief Compares this and another matched sequence.
|
||||
*
|
||||
|
@ -1111,7 +1126,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
int
|
||||
compare(const string_type& __s) const
|
||||
{ return this->str().compare(__s); }
|
||||
|
||||
|
||||
/**
|
||||
* @brief Compares this sub_match to a C-style string.
|
||||
*
|
||||
|
@ -1125,8 +1140,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
compare(const value_type* __s) const
|
||||
{ return this->str().compare(__s); }
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
/** @brief Standard regex submatch over a C-style null-terminated string. */
|
||||
typedef sub_match<const char*> csub_match;
|
||||
|
||||
|
@ -1142,7 +1157,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
#endif
|
||||
|
||||
// [7.9.2] sub_match non-member operators
|
||||
|
||||
|
||||
/**
|
||||
* @brief Tests the equivalence of two regular expression submatches.
|
||||
* @param __lhs First regular expression submatch.
|
||||
|
@ -1760,7 +1775,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
|
||||
typedef std::basic_string<char_type> string_type;
|
||||
//@}
|
||||
|
||||
|
||||
public:
|
||||
/**
|
||||
* @name 28.10.1 Construction, Copying, and Destruction
|
||||
|
@ -1815,7 +1830,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
*/
|
||||
~match_results()
|
||||
{ }
|
||||
|
||||
|
||||
//@}
|
||||
|
||||
// 28.10.2, state:
|
||||
|
@ -1846,7 +1861,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
size_type __size = _Base_type::size();
|
||||
return (__size && _Base_type::operator[](0).matched) ? __size - 2 : 0;
|
||||
}
|
||||
|
||||
|
||||
size_type
|
||||
max_size() const
|
||||
{ return _Base_type::max_size(); }
|
||||
|
@ -1859,7 +1874,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
bool
|
||||
empty() const
|
||||
{ return size() == 0; }
|
||||
|
||||
|
||||
//@}
|
||||
|
||||
/**
|
||||
|
@ -1911,7 +1926,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
string_type
|
||||
str(size_type __sub = 0) const
|
||||
{ return (*this)[__sub].str(); }
|
||||
|
||||
|
||||
/**
|
||||
* @brief Gets a %sub_match reference for the match or submatch.
|
||||
* @param __sub indicates the submatch.
|
||||
|
@ -1925,7 +1940,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
*/
|
||||
const_reference
|
||||
operator[](size_type __sub) const
|
||||
{
|
||||
{
|
||||
_GLIBCXX_DEBUG_ASSERT( ready() );
|
||||
return __sub < size()
|
||||
? _Base_type::operator[](__sub)
|
||||
|
@ -1972,7 +1987,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
const_iterator
|
||||
begin() const
|
||||
{ return _Base_type::begin(); }
|
||||
|
||||
|
||||
/**
|
||||
* @brief Gets an iterator to the start of the %sub_match collection.
|
||||
*/
|
||||
|
@ -1986,7 +2001,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
const_iterator
|
||||
end() const
|
||||
{ return !empty() ? _Base_type::end() - 2 : _Base_type::end(); }
|
||||
|
||||
|
||||
/**
|
||||
* @brief Gets an iterator to one-past-the-end of the collection.
|
||||
*/
|
||||
|
@ -2047,21 +2062,22 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
*/
|
||||
string_type
|
||||
format(const char_type* __fmt,
|
||||
match_flag_type __flags = regex_constants::format_default) const
|
||||
match_flag_type __flags = regex_constants::format_default) const
|
||||
{
|
||||
string_type __result;
|
||||
format(std::back_inserter(__result),
|
||||
__fmt,
|
||||
__fmt + char_traits<char_type>::length(__fmt),
|
||||
__flags);
|
||||
return __result;
|
||||
}
|
||||
|
||||
//@}
|
||||
//@}
|
||||
|
||||
/**
|
||||
* @name 10.5 Allocator
|
||||
*/
|
||||
//@{
|
||||
//@{
|
||||
|
||||
/**
|
||||
* @brief Gets a copy of the allocator.
|
||||
|
@ -2069,13 +2085,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
allocator_type
|
||||
get_allocator() const
|
||||
{ return _Base_type::get_allocator(); }
|
||||
|
||||
//@}
|
||||
|
||||
//@}
|
||||
|
||||
/**
|
||||
* @name 10.6 Swap
|
||||
*/
|
||||
//@{
|
||||
//@{
|
||||
|
||||
/**
|
||||
* @brief Swaps the contents of two match_results.
|
||||
|
@ -2083,12 +2099,33 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
void
|
||||
swap(match_results& __that)
|
||||
{ _Base_type::swap(__that); }
|
||||
//@}
|
||||
|
||||
//@}
|
||||
|
||||
private:
|
||||
friend class __detail::_SpecializedResults<_Bi_iter, _Alloc>;
|
||||
template<typename, typename, typename, typename>
|
||||
friend class __detail::_Executor;
|
||||
|
||||
template<typename, typename, typename, typename>
|
||||
friend class __detail::_DFSExecutor;
|
||||
|
||||
template<typename, typename, typename, typename>
|
||||
friend class __detail::_BFSExecutor;
|
||||
|
||||
template<typename _B, typename _A, typename _Ch_type, typename _Rx_traits>
|
||||
friend bool
|
||||
regex_match(_B, _B, match_results<_B, _A>&,
|
||||
const basic_regex<_Ch_type,
|
||||
_Rx_traits>&,
|
||||
regex_constants::match_flag_type);
|
||||
|
||||
template<typename _B, typename _A, typename _Ch_type, typename _Rx_traits>
|
||||
friend bool
|
||||
regex_search(_B, _B, match_results<_B, _A>&,
|
||||
const basic_regex<_Ch_type,
|
||||
_Rx_traits>&,
|
||||
regex_constants::match_flag_type);
|
||||
};
|
||||
|
||||
|
||||
typedef match_results<const char*> cmatch;
|
||||
typedef match_results<string::const_iterator> smatch;
|
||||
#ifdef _GLIBCXX_USE_WCHAR_T
|
||||
|
@ -2179,11 +2216,22 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
regex_constants::match_flag_type __flags
|
||||
= regex_constants::match_default)
|
||||
{
|
||||
__detail::_AutomatonPtr __a = __re._M_get_automaton();
|
||||
__detail::_Automaton::_SizeT __sz = __a->_M_sub_count();
|
||||
__detail::_SpecializedCursor<_Bi_iter> __cs(__s, __e);
|
||||
__detail::_SpecializedResults<_Bi_iter, _Alloc> __r(__sz, __cs, __m);
|
||||
return __a->_M_get_matcher(__cs, __r, __a, __flags)->_M_match();
|
||||
if (__re._M_automaton == nullptr)
|
||||
return false;
|
||||
if (__detail::__get_executor(__s, __e, __m, __re, __flags)->_M_match())
|
||||
{
|
||||
for (auto __it : __m)
|
||||
if (!__it.matched)
|
||||
__it.first = __it.second = __e;
|
||||
__m.at(__m.size()).matched = false;
|
||||
__m.at(__m.size()).first = __s;
|
||||
__m.at(__m.size()).second = __s;
|
||||
__m.at(__m.size()+1).matched = false;
|
||||
__m.at(__m.size()+1).first = __e;
|
||||
__m.at(__m.size()+1).second = __e;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -2206,7 +2254,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
const basic_regex<_Ch_type, _Rx_traits>& __re,
|
||||
regex_constants::match_flag_type __flags
|
||||
= regex_constants::match_default)
|
||||
{
|
||||
{
|
||||
match_results<_Bi_iter> __what;
|
||||
return regex_match(__first, __last, __what, __re, __flags);
|
||||
}
|
||||
|
@ -2252,7 +2300,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
typename _Alloc, typename _Ch_type, typename _Rx_traits>
|
||||
inline bool
|
||||
regex_match(const basic_string<_Ch_type, _Ch_traits, _Ch_alloc>& __s,
|
||||
match_results<typename basic_string<_Ch_type,
|
||||
match_results<typename basic_string<_Ch_type,
|
||||
_Ch_traits, _Ch_alloc>::const_iterator, _Alloc>& __m,
|
||||
const basic_regex<_Ch_type, _Rx_traits>& __re,
|
||||
regex_constants::match_flag_type __flags
|
||||
|
@ -2327,29 +2375,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
regex_constants::match_flag_type __flags
|
||||
= regex_constants::match_default)
|
||||
{
|
||||
__detail::_AutomatonPtr __a = __re._M_get_automaton();
|
||||
__detail::_Automaton::_SizeT __sz = __a->_M_sub_count();
|
||||
__detail::_SpecializedCursor<_Bi_iter> __cs(__first, __last);
|
||||
__detail::_SpecializedResults<_Bi_iter, _Alloc> __r(__sz, __cs, __m);
|
||||
if (__re._M_automaton == nullptr)
|
||||
return false;
|
||||
for (auto __cur = __first; __cur != __last; ++__cur) // Any KMP-like algo?
|
||||
{
|
||||
__detail::_SpecializedCursor<_Bi_iter> __curs(__cur, __last);
|
||||
auto __matcher = __a->_M_get_matcher(__curs, __r, __a, __flags);
|
||||
if (__matcher->_M_search_from_first())
|
||||
{
|
||||
__r._M_set_range(__m.size(),
|
||||
__detail::_SpecializedCursor<_Bi_iter>
|
||||
{__first, __m[0].first});
|
||||
__r._M_set_range(__m.size()+1,
|
||||
__detail::_SpecializedCursor<_Bi_iter>
|
||||
{__m[0].second, __last});
|
||||
__r._M_set_matched(__m.size(),
|
||||
__m.prefix().first != __m.prefix().second);
|
||||
__r._M_set_matched(__m.size()+1,
|
||||
__m.suffix().first != __m.suffix().second);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (__detail::__get_executor(__cur, __last, __m, __re, __flags)
|
||||
->_M_search_from_first())
|
||||
{
|
||||
for (auto __it : __m)
|
||||
if (!__it.matched)
|
||||
__it.first = __it.second = __last;
|
||||
__m.at(__m.size()).first = __first;
|
||||
__m.at(__m.size()).second = __m[0].first;
|
||||
__m.at(__m.size()+1).first = __m[0].second;
|
||||
__m.at(__m.size()+1).second = __last;
|
||||
__m.at(__m.size()).matched =
|
||||
(__m.prefix().first != __m.prefix().second);
|
||||
__m.at(__m.size()+1).matched =
|
||||
(__m.suffix().first != __m.suffix().second);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -2513,7 +2557,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
|
||||
// std [28.12] Class template regex_iterator
|
||||
/**
|
||||
* An iterator adaptor that will provide repeated calls of regex_search over
|
||||
* An iterator adaptor that will provide repeated calls of regex_search over
|
||||
* a range until no more matches remain.
|
||||
*/
|
||||
template<typename _Bi_iter,
|
||||
|
@ -2536,7 +2580,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
regex_iterator()
|
||||
: _M_match()
|
||||
{ }
|
||||
|
||||
|
||||
/**
|
||||
* Constructs a %regex_iterator...
|
||||
* @param __a [IN] The start of a text range to search.
|
||||
|
@ -2554,46 +2598,46 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
* Copy constructs a %regex_iterator.
|
||||
*/
|
||||
regex_iterator(const regex_iterator& __rhs) = default;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Assigns one %regex_iterator to another.
|
||||
*/
|
||||
regex_iterator&
|
||||
operator=(const regex_iterator& __rhs) = default;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Tests the equivalence of two regex iterators.
|
||||
*/
|
||||
bool
|
||||
operator==(const regex_iterator& __rhs) const;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Tests the inequivalence of two regex iterators.
|
||||
*/
|
||||
bool
|
||||
operator!=(const regex_iterator& __rhs) const
|
||||
{ return !(*this == __rhs); }
|
||||
|
||||
|
||||
/**
|
||||
* @brief Dereferences a %regex_iterator.
|
||||
*/
|
||||
const value_type&
|
||||
operator*() const
|
||||
{ return _M_match; }
|
||||
|
||||
|
||||
/**
|
||||
* @brief Selects a %regex_iterator member.
|
||||
*/
|
||||
const value_type*
|
||||
operator->() const
|
||||
{ return &_M_match; }
|
||||
|
||||
|
||||
/**
|
||||
* @brief Increments a %regex_iterator.
|
||||
*/
|
||||
regex_iterator&
|
||||
operator++();
|
||||
|
||||
|
||||
/**
|
||||
* @brief Postincrements a %regex_iterator.
|
||||
*/
|
||||
|
@ -2604,7 +2648,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
++(*this);
|
||||
return __tmp;
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
_Bi_iter _M_begin;
|
||||
_Bi_iter _M_end;
|
||||
|
@ -2665,7 +2709,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
typedef regex_iterator<const char*> cregex_iterator;
|
||||
typedef regex_iterator<string::const_iterator> sregex_iterator;
|
||||
#ifdef _GLIBCXX_USE_WCHAR_T
|
||||
|
@ -2693,11 +2737,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
typedef const value_type* pointer;
|
||||
typedef const value_type& reference;
|
||||
typedef std::forward_iterator_tag iterator_category;
|
||||
|
||||
|
||||
public:
|
||||
/**
|
||||
* @brief Default constructs a %regex_token_iterator.
|
||||
*
|
||||
*
|
||||
* A default-constructed %regex_token_iterator is a singular iterator
|
||||
* that will compare equal to the one-past-the-end value for any
|
||||
* iterator of the same type.
|
||||
|
@ -2705,7 +2749,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
regex_token_iterator()
|
||||
: _M_position(), _M_result(nullptr), _M_suffix(), _M_n(0), _M_subs()
|
||||
{ }
|
||||
|
||||
|
||||
/**
|
||||
* Constructs a %regex_token_iterator...
|
||||
* @param __a [IN] The start of the text to search.
|
||||
|
@ -2987,7 +3031,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
/** @brief Token iterator for standard wide-character strings. */
|
||||
typedef regex_token_iterator<wstring::const_iterator> wsregex_token_iterator;
|
||||
#endif
|
||||
|
||||
|
||||
//@} // group regex
|
||||
_GLIBCXX_END_NAMESPACE_VERSION
|
||||
} // namespace
|
||||
|
|
|
@ -0,0 +1,274 @@
|
|||
// class template regex -*- C++ -*-
|
||||
|
||||
// Copyright (C) 2013 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// Under Section 7 of GPL version 3, you are granted additional
|
||||
// permissions described in the GCC Runtime Library Exception, version
|
||||
// 3.1, as published by the Free Software Foundation.
|
||||
|
||||
// You should have received a copy of the GNU General Public License and
|
||||
// a copy of the GCC Runtime Library Exception along with this program;
|
||||
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
/**
|
||||
* @file bits/regex_automaton.h
|
||||
* This is an internal header file, included by other library headers.
|
||||
* Do not attempt to use it directly. @headername{regex}
|
||||
*/
|
||||
|
||||
namespace std _GLIBCXX_VISIBILITY(default)
|
||||
{
|
||||
namespace __detail
|
||||
{
|
||||
_GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
|
||||
/**
|
||||
* @defgroup regex-detail Base and Implementation Classes
|
||||
* @ingroup regex
|
||||
* @{
|
||||
*/
|
||||
|
||||
typedef int _StateIdT;
|
||||
typedef std::set<_StateIdT> _StateSet;
|
||||
static const _StateIdT _S_invalid_state_id = -1;
|
||||
|
||||
template<typename _CharT>
|
||||
using _Matcher = std::function<bool (_CharT)>;
|
||||
|
||||
/// Operation codes that define the type of transitions within the base NFA
|
||||
/// that represents the regular expression.
|
||||
enum _Opcode
|
||||
{
|
||||
_S_opcode_unknown = 0,
|
||||
_S_opcode_alternative = 1,
|
||||
_S_opcode_subexpr_begin = 4,
|
||||
_S_opcode_subexpr_end = 5,
|
||||
_S_opcode_match = 100,
|
||||
_S_opcode_accept = 255
|
||||
};
|
||||
|
||||
template<typename _CharT, typename _TraitsT>
|
||||
class _State
|
||||
{
|
||||
public:
|
||||
typedef int _OpcodeT;
|
||||
typedef _Matcher<_CharT> _MatcherT;
|
||||
|
||||
_OpcodeT _M_opcode; // type of outgoing transition
|
||||
_StateIdT _M_next; // outgoing transition
|
||||
_StateIdT _M_alt; // for _S_opcode_alternative
|
||||
unsigned int _M_subexpr; // for _S_opcode_subexpr_*
|
||||
_MatcherT _M_matches; // for _S_opcode_match
|
||||
|
||||
explicit _State(_OpcodeT __opcode)
|
||||
: _M_opcode(__opcode), _M_next(_S_invalid_state_id)
|
||||
{ }
|
||||
|
||||
_State(const _MatcherT& __m)
|
||||
: _M_opcode(_S_opcode_match), _M_next(_S_invalid_state_id),
|
||||
_M_matches(__m)
|
||||
{ }
|
||||
|
||||
_State(_OpcodeT __opcode, unsigned __index)
|
||||
: _M_opcode(__opcode), _M_next(_S_invalid_state_id), _M_subexpr(__index)
|
||||
{ }
|
||||
|
||||
_State(_StateIdT __next, _StateIdT __alt)
|
||||
: _M_opcode(_S_opcode_alternative), _M_next(__next), _M_alt(__alt)
|
||||
{ }
|
||||
|
||||
#ifdef _GLIBCXX_DEBUG
|
||||
std::ostream&
|
||||
_M_print(std::ostream& ostr) const;
|
||||
|
||||
// Prints graphviz dot commands for state.
|
||||
std::ostream&
|
||||
_M_dot(std::ostream& __ostr, _StateIdT __id) const;
|
||||
#endif
|
||||
};
|
||||
|
||||
/// Base class for, um, automata. Could be an NFA or a DFA. Your choice.
|
||||
template<typename _CharT, typename _TraitsT>
|
||||
class _Automaton
|
||||
{
|
||||
public:
|
||||
typedef unsigned int _SizeT;
|
||||
|
||||
public:
|
||||
virtual _SizeT
|
||||
_M_sub_count() const = 0;
|
||||
|
||||
#ifdef _GLIBCXX_DEBUG
|
||||
virtual std::ostream&
|
||||
_M_dot(std::ostream& __ostr) const = 0;
|
||||
#endif
|
||||
};
|
||||
|
||||
template<typename _CharT, typename _TraitsT>
|
||||
class _NFA
|
||||
: public _Automaton<_CharT, _TraitsT>,
|
||||
public std::vector<_State<_CharT, _TraitsT>>
|
||||
{
|
||||
public:
|
||||
typedef _State<_CharT, _TraitsT> _StateT;
|
||||
typedef const _Matcher<_CharT>& _MatcherT;
|
||||
typedef unsigned int _SizeT;
|
||||
typedef regex_constants::syntax_option_type _FlagT;
|
||||
|
||||
_NFA(_FlagT __f)
|
||||
: _M_flags(__f), _M_start_state(0), _M_subexpr_count(0),
|
||||
_M_has_backref(false)
|
||||
{ }
|
||||
|
||||
_FlagT
|
||||
_M_options() const
|
||||
{ return _M_flags; }
|
||||
|
||||
_StateIdT
|
||||
_M_start() const
|
||||
{ return _M_start_state; }
|
||||
|
||||
const _StateSet&
|
||||
_M_final_states() const
|
||||
{ return _M_accepting_states; }
|
||||
|
||||
_SizeT
|
||||
_M_sub_count() const
|
||||
{ return _M_subexpr_count; }
|
||||
|
||||
_StateIdT
|
||||
_M_insert_accept()
|
||||
{
|
||||
this->push_back(_StateT(_S_opcode_accept));
|
||||
_M_accepting_states.insert(this->size()-1);
|
||||
return this->size()-1;
|
||||
}
|
||||
|
||||
_StateIdT
|
||||
_M_insert_alt(_StateIdT __next, _StateIdT __alt)
|
||||
{
|
||||
this->push_back(_StateT(__next, __alt));
|
||||
return this->size()-1;
|
||||
}
|
||||
|
||||
_StateIdT
|
||||
_M_insert_matcher(_MatcherT __m)
|
||||
{
|
||||
this->push_back(_StateT(__m));
|
||||
return this->size()-1;
|
||||
}
|
||||
|
||||
_StateIdT
|
||||
_M_insert_subexpr_begin()
|
||||
{
|
||||
auto __id = _M_subexpr_count++;
|
||||
_M_paren_stack.push(__id);
|
||||
this->push_back(_StateT(_S_opcode_subexpr_begin, __id));
|
||||
return this->size()-1;
|
||||
}
|
||||
|
||||
_StateIdT
|
||||
_M_insert_subexpr_end()
|
||||
{
|
||||
this->push_back(_StateT(_S_opcode_subexpr_end, _M_paren_stack.top()));
|
||||
_M_paren_stack.pop();
|
||||
return this->size()-1;
|
||||
}
|
||||
|
||||
void
|
||||
_M_set_backref(bool __b)
|
||||
{ _M_has_backref = __b; }
|
||||
|
||||
#ifdef _GLIBCXX_DEBUG
|
||||
std::ostream&
|
||||
_M_dot(std::ostream& __ostr) const;
|
||||
#endif
|
||||
|
||||
_FlagT _M_flags;
|
||||
_StateIdT _M_start_state;
|
||||
_StateSet _M_accepting_states;
|
||||
_SizeT _M_subexpr_count;
|
||||
bool _M_has_backref;
|
||||
std::stack<unsigned int> _M_paren_stack;
|
||||
};
|
||||
|
||||
/// Describes a sequence of one or more %_State, its current start
|
||||
/// and end(s). This structure contains fragments of an NFA during
|
||||
/// construction.
|
||||
template<typename _CharT, typename _TraitsT>
|
||||
class _StateSeq
|
||||
{
|
||||
public:
|
||||
typedef _NFA<_CharT, _TraitsT> _RegexT;
|
||||
public:
|
||||
// Constructs a single-node sequence
|
||||
_StateSeq(_RegexT& __ss, _StateIdT __s,
|
||||
_StateIdT __e = _S_invalid_state_id)
|
||||
: _M_nfa(__ss), _M_start(__s), _M_end1(__s), _M_end2(__e)
|
||||
{ }
|
||||
// Constructs a split sequence from two other sequencces
|
||||
_StateSeq(const _StateSeq& __e1, const _StateSeq& __e2)
|
||||
: _M_nfa(__e1._M_nfa),
|
||||
_M_start(_M_nfa._M_insert_alt(__e1._M_start, __e2._M_start)),
|
||||
_M_end1(__e1._M_end1), _M_end2(__e2._M_end1)
|
||||
{ }
|
||||
|
||||
// Constructs a split sequence from a single sequence
|
||||
_StateSeq(const _StateSeq& __e, _StateIdT __id)
|
||||
: _M_nfa(__e._M_nfa),
|
||||
_M_start(_M_nfa._M_insert_alt(__id, __e._M_start)),
|
||||
_M_end1(__id), _M_end2(__e._M_end1)
|
||||
{ }
|
||||
|
||||
// Constructs a copy of a %_StateSeq
|
||||
_StateSeq(const _StateSeq& __rhs)
|
||||
: _M_nfa(__rhs._M_nfa), _M_start(__rhs._M_start),
|
||||
_M_end1(__rhs._M_end1), _M_end2(__rhs._M_end2)
|
||||
{ }
|
||||
|
||||
_StateSeq& operator=(const _StateSeq& __rhs);
|
||||
|
||||
_StateIdT
|
||||
_M_front() const
|
||||
{ return _M_start; }
|
||||
|
||||
// Extends a sequence by one.
|
||||
void
|
||||
_M_push_back(_StateIdT __id);
|
||||
|
||||
// Extends and maybe joins a sequence.
|
||||
void
|
||||
_M_append(_StateIdT __id);
|
||||
|
||||
void
|
||||
_M_append(_StateSeq& __rhs);
|
||||
|
||||
// Clones an entire sequence.
|
||||
_StateIdT
|
||||
_M_clone();
|
||||
|
||||
private:
|
||||
_RegexT& _M_nfa;
|
||||
_StateIdT _M_start;
|
||||
_StateIdT _M_end1;
|
||||
_StateIdT _M_end2;
|
||||
};
|
||||
|
||||
//@} regex-detail
|
||||
_GLIBCXX_END_NAMESPACE_VERSION
|
||||
} // namespace __detail
|
||||
} // namespace std
|
||||
|
||||
#include <bits/regex_automaton.tcc>
|
|
@ -0,0 +1,181 @@
|
|||
// class template regex -*- C++ -*-
|
||||
|
||||
// Copyright (C) 2013 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// Under Section 7 of GPL version 3, you are granted additional
|
||||
// permissions described in the GCC Runtime Library Exception, version
|
||||
// 3.1, as published by the Free Software Foundation.
|
||||
|
||||
// You should have received a copy of the GNU General Public License and
|
||||
// a copy of the GCC Runtime Library Exception along with this program;
|
||||
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
/**
|
||||
* @file bits/regex_automaton.tcc
|
||||
* This is an internal header file, included by other library headers.
|
||||
* Do not attempt to use it directly. @headername{regex}
|
||||
*/
|
||||
|
||||
namespace std _GLIBCXX_VISIBILITY(default)
|
||||
{
|
||||
namespace __detail
|
||||
{
|
||||
_GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
|
||||
#ifdef _GLIBCXX_DEBUG
|
||||
template<typename _CharT, typename _TraitsT>
|
||||
std::ostream& _State<_CharT, _TraitsT>::
|
||||
_M_print(std::ostream& ostr) const
|
||||
{
|
||||
switch (_M_opcode)
|
||||
{
|
||||
case _S_opcode_alternative:
|
||||
ostr << "alt next=" << _M_next << " alt=" << _M_alt;
|
||||
break;
|
||||
case _S_opcode_subexpr_begin:
|
||||
ostr << "subexpr begin next=" << _M_next << " index=" << _M_subexpr;
|
||||
break;
|
||||
case _S_opcode_subexpr_end:
|
||||
ostr << "subexpr end next=" << _M_next << " index=" << _M_subexpr;
|
||||
break;
|
||||
case _S_opcode_match:
|
||||
ostr << "match next=" << _M_next;
|
||||
break;
|
||||
case _S_opcode_accept:
|
||||
ostr << "accept next=" << _M_next;
|
||||
break;
|
||||
default:
|
||||
ostr << "unknown next=" << _M_next;
|
||||
break;
|
||||
}
|
||||
return ostr;
|
||||
}
|
||||
|
||||
// Prints graphviz dot commands for state.
|
||||
template<typename _CharT, typename _TraitsT>
|
||||
std::ostream& _State<_CharT, _TraitsT>::
|
||||
_M_dot(std::ostream& __ostr, _StateIdT __id) const
|
||||
{
|
||||
switch (_M_opcode)
|
||||
{
|
||||
case _S_opcode_alternative:
|
||||
__ostr << __id << " [label=\"" << __id << "\\nALT\"];\n"
|
||||
<< __id << " -> " << _M_next
|
||||
<< " [label=\"epsilon\", tailport=\"s\"];\n"
|
||||
<< __id << " -> " << _M_alt
|
||||
<< " [label=\"epsilon\", tailport=\"n\"];\n";
|
||||
break;
|
||||
case _S_opcode_subexpr_begin:
|
||||
__ostr << __id << " [label=\"" << __id << "\\nSBEGIN "
|
||||
<< _M_subexpr << "\"];\n"
|
||||
<< __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
|
||||
break;
|
||||
case _S_opcode_subexpr_end:
|
||||
__ostr << __id << " [label=\"" << __id << "\\nSEND "
|
||||
<< _M_subexpr << "\"];\n"
|
||||
<< __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
|
||||
break;
|
||||
case _S_opcode_match:
|
||||
__ostr << __id << " [label=\"" << __id << "\\nMATCH\"];\n"
|
||||
<< __id << " -> " << _M_next << " [label=\"<match>\"];\n";
|
||||
break;
|
||||
case _S_opcode_accept:
|
||||
__ostr << __id << " [label=\"" << __id << "\\nACC\"];\n" ;
|
||||
break;
|
||||
default:
|
||||
__ostr << __id << " [label=\"" << __id << "\\nUNK\"];\n"
|
||||
<< __id << " -> " << _M_next << " [label=\"?\"];\n";
|
||||
break;
|
||||
}
|
||||
return __ostr;
|
||||
}
|
||||
|
||||
template<typename _CharT, typename _TraitsT>
|
||||
std::ostream& _NFA<_CharT, _TraitsT>::
|
||||
_M_dot(std::ostream& __ostr) const
|
||||
{
|
||||
__ostr << "digraph _Nfa {\n"
|
||||
<< " rankdir=LR;\n";
|
||||
for (unsigned int __i = 0; __i < this->size(); ++__i)
|
||||
{ this->at(__i)._M_dot(__ostr, __i); }
|
||||
__ostr << "}\n";
|
||||
return __ostr;
|
||||
}
|
||||
#endif
|
||||
|
||||
template<typename _CharT, typename _TraitsT>
|
||||
_StateSeq<_CharT, _TraitsT>& _StateSeq<_CharT, _TraitsT>::
|
||||
operator=(const _StateSeq& __rhs)
|
||||
{
|
||||
_M_start = __rhs._M_start;
|
||||
_M_end1 = __rhs._M_end1;
|
||||
_M_end2 = __rhs._M_end2;
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<typename _CharT, typename _TraitsT>
|
||||
void _StateSeq<_CharT, _TraitsT>::
|
||||
_M_push_back(_StateIdT __id)
|
||||
{
|
||||
if (_M_end1 != _S_invalid_state_id)
|
||||
_M_nfa[_M_end1]._M_next = __id;
|
||||
_M_end1 = __id;
|
||||
}
|
||||
|
||||
template<typename _CharT, typename _TraitsT>
|
||||
void _StateSeq<_CharT, _TraitsT>::
|
||||
_M_append(_StateIdT __id)
|
||||
{
|
||||
if (_M_end2 != _S_invalid_state_id)
|
||||
{
|
||||
if (_M_end2 == _M_end1)
|
||||
_M_nfa[_M_end2]._M_alt = __id;
|
||||
else
|
||||
_M_nfa[_M_end2]._M_next = __id;
|
||||
_M_end2 = _S_invalid_state_id;
|
||||
}
|
||||
if (_M_end1 != _S_invalid_state_id)
|
||||
_M_nfa[_M_end1]._M_next = __id;
|
||||
_M_end1 = __id;
|
||||
}
|
||||
|
||||
template<typename _CharT, typename _TraitsT>
|
||||
void _StateSeq<_CharT, _TraitsT>::
|
||||
_M_append(_StateSeq& __rhs)
|
||||
{
|
||||
if (_M_end2 != _S_invalid_state_id)
|
||||
{
|
||||
if (_M_end2 == _M_end1)
|
||||
_M_nfa[_M_end2]._M_alt = __rhs._M_start;
|
||||
else
|
||||
_M_nfa[_M_end2]._M_next = __rhs._M_start;
|
||||
_M_end2 = _S_invalid_state_id;
|
||||
}
|
||||
if (__rhs._M_end2 != _S_invalid_state_id)
|
||||
_M_end2 = __rhs._M_end2;
|
||||
if (_M_end1 != _S_invalid_state_id)
|
||||
_M_nfa[_M_end1]._M_next = __rhs._M_start;
|
||||
_M_end1 = __rhs._M_end1;
|
||||
}
|
||||
|
||||
// @todo implement this function.
|
||||
template<typename _CharT, typename _TraitsT>
|
||||
_StateIdT _StateSeq<_CharT, _TraitsT>::
|
||||
_M_clone()
|
||||
{ return 0; }
|
||||
|
||||
_GLIBCXX_END_NAMESPACE_VERSION
|
||||
} // namespace __detail
|
||||
} // namespace
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,896 @@
|
|||
// class template regex -*- C++ -*-
|
||||
|
||||
// Copyright (C) 2013 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// Under Section 7 of GPL version 3, you are granted additional
|
||||
// permissions described in the GCC Runtime Library Exception, version
|
||||
// 3.1, as published by the Free Software Foundation.
|
||||
|
||||
// You should have received a copy of the GNU General Public License and
|
||||
// a copy of the GCC Runtime Library Exception along with this program;
|
||||
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
/**
|
||||
* @file bits/regex_compiler.tcc
|
||||
* This is an internal header file, included by other library headers.
|
||||
* Do not attempt to use it directly. @headername{regex}
|
||||
*/
|
||||
|
||||
namespace std _GLIBCXX_VISIBILITY(default)
|
||||
{
|
||||
namespace __detail
|
||||
{
|
||||
_GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
|
||||
template<typename _BiIter>
|
||||
void
|
||||
_Scanner<_BiIter>::
|
||||
_M_advance()
|
||||
{
|
||||
if (_M_current == _M_end)
|
||||
{
|
||||
_M_curToken = _S_token_eof;
|
||||
return;
|
||||
}
|
||||
|
||||
_CharT __c = *_M_current;
|
||||
if (_M_state & _S_state_in_bracket)
|
||||
{
|
||||
_M_scan_in_bracket();
|
||||
return;
|
||||
}
|
||||
if (_M_state & _S_state_in_brace)
|
||||
{
|
||||
_M_scan_in_brace();
|
||||
return;
|
||||
}
|
||||
#if 0
|
||||
// TODO: re-enable line anchors when _M_assertion is implemented.
|
||||
// See PR libstdc++/47724
|
||||
else if (_M_state & _S_state_at_start && __c == _M_ctype.widen('^'))
|
||||
{
|
||||
_M_curToken = _S_token_line_begin;
|
||||
++_M_current;
|
||||
return;
|
||||
}
|
||||
else if (__c == _M_ctype.widen('$'))
|
||||
{
|
||||
_M_curToken = _S_token_line_end;
|
||||
++_M_current;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
else if (__c == _M_ctype.widen('.'))
|
||||
{
|
||||
_M_curToken = _S_token_anychar;
|
||||
++_M_current;
|
||||
return;
|
||||
}
|
||||
else if (__c == _M_ctype.widen('*'))
|
||||
{
|
||||
_M_curToken = _S_token_closure0;
|
||||
++_M_current;
|
||||
return;
|
||||
}
|
||||
else if (__c == _M_ctype.widen('+'))
|
||||
{
|
||||
_M_curToken = _S_token_closure1;
|
||||
++_M_current;
|
||||
return;
|
||||
}
|
||||
else if (__c == _M_ctype.widen('|'))
|
||||
{
|
||||
_M_curToken = _S_token_or;
|
||||
++_M_current;
|
||||
return;
|
||||
}
|
||||
else if (__c == _M_ctype.widen('['))
|
||||
{
|
||||
if (*++_M_current == _M_ctype.widen('^'))
|
||||
{
|
||||
_M_curToken = _S_token_bracket_inverse_begin;
|
||||
++_M_current;
|
||||
}
|
||||
else
|
||||
_M_curToken = _S_token_bracket_begin;
|
||||
_M_state |= _S_state_in_bracket;
|
||||
return;
|
||||
}
|
||||
else if (__c == _M_ctype.widen('\\'))
|
||||
{
|
||||
_M_eat_escape();
|
||||
return;
|
||||
}
|
||||
else if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
|
||||
{
|
||||
if (__c == _M_ctype.widen('('))
|
||||
{
|
||||
_M_curToken = _S_token_subexpr_begin;
|
||||
++_M_current;
|
||||
return;
|
||||
}
|
||||
else if (__c == _M_ctype.widen(')'))
|
||||
{
|
||||
_M_curToken = _S_token_subexpr_end;
|
||||
++_M_current;
|
||||
return;
|
||||
}
|
||||
else if (__c == _M_ctype.widen('{'))
|
||||
{
|
||||
_M_curToken = _S_token_interval_begin;
|
||||
_M_state |= _S_state_in_brace;
|
||||
++_M_current;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
_M_curToken = _S_token_ord_char;
|
||||
_M_curValue.assign(1, __c);
|
||||
++_M_current;
|
||||
}
|
||||
|
||||
template<typename _BiIter>
|
||||
void
|
||||
_Scanner<_BiIter>::
|
||||
_M_scan_in_brace()
|
||||
{
|
||||
if (_M_ctype.is(_CtypeT::digit, *_M_current))
|
||||
{
|
||||
_M_curToken = _S_token_dup_count;
|
||||
_M_curValue.assign(1, *_M_current);
|
||||
++_M_current;
|
||||
while (_M_current != _M_end
|
||||
&& _M_ctype.is(_CtypeT::digit, *_M_current))
|
||||
{
|
||||
_M_curValue += *_M_current;
|
||||
++_M_current;
|
||||
}
|
||||
return;
|
||||
}
|
||||
else if (*_M_current == _M_ctype.widen(','))
|
||||
{
|
||||
_M_curToken = _S_token_comma;
|
||||
++_M_current;
|
||||
return;
|
||||
}
|
||||
if (_M_flags & (regex_constants::basic | regex_constants::grep))
|
||||
{
|
||||
if (*_M_current == _M_ctype.widen('\\'))
|
||||
_M_eat_escape();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (*_M_current == _M_ctype.widen('}'))
|
||||
{
|
||||
_M_curToken = _S_token_interval_end;
|
||||
_M_state &= ~_S_state_in_brace;
|
||||
++_M_current;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename _BiIter>
|
||||
void
|
||||
_Scanner<_BiIter>::
|
||||
_M_scan_in_bracket()
|
||||
{
|
||||
if (*_M_current == _M_ctype.widen('['))
|
||||
{
|
||||
++_M_current;
|
||||
if (_M_current == _M_end)
|
||||
{
|
||||
_M_curToken = _S_token_eof;
|
||||
return;
|
||||
}
|
||||
|
||||
if (*_M_current == _M_ctype.widen('.'))
|
||||
{
|
||||
_M_curToken = _S_token_collsymbol;
|
||||
_M_eat_collsymbol();
|
||||
return;
|
||||
}
|
||||
else if (*_M_current == _M_ctype.widen(':'))
|
||||
{
|
||||
_M_curToken = _S_token_char_class_name;
|
||||
_M_eat_charclass();
|
||||
return;
|
||||
}
|
||||
else if (*_M_current == _M_ctype.widen('='))
|
||||
{
|
||||
_M_curToken = _S_token_equiv_class_name;
|
||||
_M_eat_equivclass();
|
||||
return;
|
||||
}
|
||||
}
|
||||
else if (*_M_current == _M_ctype.widen('-'))
|
||||
{
|
||||
_M_curToken = _S_token_dash;
|
||||
++_M_current;
|
||||
return;
|
||||
}
|
||||
else if (*_M_current == _M_ctype.widen(']'))
|
||||
{
|
||||
_M_curToken = _S_token_bracket_end;
|
||||
_M_state &= ~_S_state_in_bracket;
|
||||
++_M_current;
|
||||
return;
|
||||
}
|
||||
else if (*_M_current == _M_ctype.widen('\\'))
|
||||
{
|
||||
_M_eat_escape();
|
||||
return;
|
||||
}
|
||||
_M_curToken = _S_token_collelem_single;
|
||||
_M_curValue.assign(1, *_M_current);
|
||||
++_M_current;
|
||||
}
|
||||
|
||||
// TODO Complete it.
|
||||
template<typename _BiIter>
|
||||
void
|
||||
_Scanner<_BiIter>::
|
||||
_M_eat_escape()
|
||||
{
|
||||
++_M_current;
|
||||
if (_M_current == _M_end)
|
||||
{
|
||||
_M_curToken = _S_token_eof;
|
||||
return;
|
||||
}
|
||||
_CharT __c = *_M_current;
|
||||
++_M_current;
|
||||
|
||||
if (__c == _M_ctype.widen('('))
|
||||
{
|
||||
if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
|
||||
{
|
||||
_M_curToken = _S_token_ord_char;
|
||||
_M_curValue.assign(1, __c);
|
||||
}
|
||||
else
|
||||
_M_curToken = _S_token_subexpr_begin;
|
||||
}
|
||||
else if (__c == _M_ctype.widen(')'))
|
||||
{
|
||||
if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
|
||||
{
|
||||
_M_curToken = _S_token_ord_char;
|
||||
_M_curValue.assign(1, __c);
|
||||
}
|
||||
else
|
||||
_M_curToken = _S_token_subexpr_end;
|
||||
}
|
||||
else if (__c == _M_ctype.widen('{'))
|
||||
{
|
||||
if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
|
||||
{
|
||||
_M_curToken = _S_token_ord_char;
|
||||
_M_curValue.assign(1, __c);
|
||||
}
|
||||
else
|
||||
{
|
||||
_M_curToken = _S_token_interval_begin;
|
||||
_M_state |= _S_state_in_brace;
|
||||
}
|
||||
}
|
||||
else if (__c == _M_ctype.widen('}'))
|
||||
{
|
||||
if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
|
||||
{
|
||||
_M_curToken = _S_token_ord_char;
|
||||
_M_curValue.assign(1, __c);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!(_M_state && _S_state_in_brace))
|
||||
__throw_regex_error(regex_constants::error_badbrace);
|
||||
_M_state &= ~_S_state_in_brace;
|
||||
_M_curToken = _S_token_interval_end;
|
||||
}
|
||||
}
|
||||
else if (__c == _M_ctype.widen('x'))
|
||||
{
|
||||
++_M_current;
|
||||
if (_M_current == _M_end)
|
||||
{
|
||||
_M_curToken = _S_token_eof;
|
||||
return;
|
||||
}
|
||||
if (_M_ctype.is(_CtypeT::digit, *_M_current))
|
||||
{
|
||||
_M_curValue.assign(1, *_M_current);
|
||||
++_M_current;
|
||||
if (_M_current == _M_end)
|
||||
{
|
||||
_M_curToken = _S_token_eof;
|
||||
return;
|
||||
}
|
||||
if (_M_ctype.is(_CtypeT::digit, *_M_current))
|
||||
{
|
||||
_M_curValue += *_M_current;
|
||||
++_M_current;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (__c == _M_ctype.widen('^')
|
||||
|| __c == _M_ctype.widen('.')
|
||||
|| __c == _M_ctype.widen('*')
|
||||
|| __c == _M_ctype.widen('$')
|
||||
|| __c == _M_ctype.widen('\\'))
|
||||
{
|
||||
_M_curToken = _S_token_ord_char;
|
||||
_M_curValue.assign(1, __c);
|
||||
}
|
||||
else if (_M_ctype.is(_CtypeT::digit, __c))
|
||||
{
|
||||
_M_curToken = _S_token_backref;
|
||||
_M_curValue.assign(1, __c);
|
||||
}
|
||||
else if (_M_state & _S_state_in_bracket)
|
||||
{
|
||||
if (__c == _M_ctype.widen('-')
|
||||
|| __c == _M_ctype.widen('[')
|
||||
|| __c == _M_ctype.widen(']'))
|
||||
{
|
||||
_M_curToken = _S_token_ord_char;
|
||||
_M_curValue.assign(1, __c);
|
||||
}
|
||||
else if ((_M_flags & regex_constants::ECMAScript)
|
||||
&& __c == _M_ctype.widen('b'))
|
||||
{
|
||||
_M_curToken = _S_token_ord_char;
|
||||
_M_curValue.assign(1, _M_ctype.widen(' '));
|
||||
}
|
||||
else
|
||||
__throw_regex_error(regex_constants::error_escape);
|
||||
}
|
||||
else
|
||||
__throw_regex_error(regex_constants::error_escape);
|
||||
}
|
||||
|
||||
// Eats a character class or throwns an exception.
|
||||
// current point to ':' delimiter on entry, char after ']' on return
|
||||
template<typename _BiIter>
|
||||
void
|
||||
_Scanner<_BiIter>::
|
||||
_M_eat_charclass()
|
||||
{
|
||||
++_M_current; // skip ':'
|
||||
if (_M_current == _M_end)
|
||||
__throw_regex_error(regex_constants::error_ctype);
|
||||
for (_M_curValue.clear();
|
||||
_M_current != _M_end && *_M_current != _M_ctype.widen(':');
|
||||
++_M_current)
|
||||
_M_curValue += *_M_current;
|
||||
if (_M_current == _M_end)
|
||||
__throw_regex_error(regex_constants::error_ctype);
|
||||
++_M_current; // skip ':'
|
||||
if (*_M_current != _M_ctype.widen(']'))
|
||||
__throw_regex_error(regex_constants::error_ctype);
|
||||
++_M_current; // skip ']'
|
||||
}
|
||||
|
||||
|
||||
template<typename _BiIter>
|
||||
void
|
||||
_Scanner<_BiIter>::
|
||||
_M_eat_equivclass()
|
||||
{
|
||||
++_M_current; // skip '='
|
||||
if (_M_current == _M_end)
|
||||
__throw_regex_error(regex_constants::error_collate);
|
||||
for (_M_curValue.clear();
|
||||
_M_current != _M_end && *_M_current != _M_ctype.widen('=');
|
||||
++_M_current)
|
||||
_M_curValue += *_M_current;
|
||||
if (_M_current == _M_end)
|
||||
__throw_regex_error(regex_constants::error_collate);
|
||||
++_M_current; // skip '='
|
||||
if (*_M_current != _M_ctype.widen(']'))
|
||||
__throw_regex_error(regex_constants::error_collate);
|
||||
++_M_current; // skip ']'
|
||||
}
|
||||
|
||||
|
||||
template<typename _BiIter>
|
||||
void
|
||||
_Scanner<_BiIter>::
|
||||
_M_eat_collsymbol()
|
||||
{
|
||||
++_M_current; // skip '.'
|
||||
if (_M_current == _M_end)
|
||||
__throw_regex_error(regex_constants::error_collate);
|
||||
for (_M_curValue.clear();
|
||||
_M_current != _M_end && *_M_current != _M_ctype.widen('.');
|
||||
++_M_current)
|
||||
_M_curValue += *_M_current;
|
||||
if (_M_current == _M_end)
|
||||
__throw_regex_error(regex_constants::error_collate);
|
||||
++_M_current; // skip '.'
|
||||
if (*_M_current != _M_ctype.widen(']'))
|
||||
__throw_regex_error(regex_constants::error_collate);
|
||||
++_M_current; // skip ']'
|
||||
}
|
||||
|
||||
#ifdef _GLIBCXX_DEBUG
|
||||
template<typename _BiIter>
|
||||
std::ostream&
|
||||
_Scanner<_BiIter>::
|
||||
_M_print(std::ostream& ostr)
|
||||
{
|
||||
switch (_M_curToken)
|
||||
{
|
||||
case _S_token_anychar:
|
||||
ostr << "any-character\n";
|
||||
break;
|
||||
case _S_token_backref:
|
||||
ostr << "backref\n";
|
||||
break;
|
||||
case _S_token_bracket_begin:
|
||||
ostr << "bracket-begin\n";
|
||||
break;
|
||||
case _S_token_bracket_inverse_begin:
|
||||
ostr << "bracket-inverse-begin\n";
|
||||
break;
|
||||
case _S_token_bracket_end:
|
||||
ostr << "bracket-end\n";
|
||||
break;
|
||||
case _S_token_char_class_name:
|
||||
ostr << "char-class-name \"" << _M_curValue << "\"\n";
|
||||
break;
|
||||
case _S_token_closure0:
|
||||
ostr << "closure0\n";
|
||||
break;
|
||||
case _S_token_closure1:
|
||||
ostr << "closure1\n";
|
||||
break;
|
||||
case _S_token_collelem_multi:
|
||||
ostr << "coll-elem-multi \"" << _M_curValue << "\"\n";
|
||||
break;
|
||||
case _S_token_collelem_single:
|
||||
ostr << "coll-elem-single \"" << _M_curValue << "\"\n";
|
||||
break;
|
||||
case _S_token_collsymbol:
|
||||
ostr << "collsymbol \"" << _M_curValue << "\"\n";
|
||||
break;
|
||||
case _S_token_comma:
|
||||
ostr << "comma\n";
|
||||
break;
|
||||
case _S_token_dash:
|
||||
ostr << "dash\n";
|
||||
break;
|
||||
case _S_token_dup_count:
|
||||
ostr << "dup count: " << _M_curValue << "\n";
|
||||
break;
|
||||
case _S_token_eof:
|
||||
ostr << "EOF\n";
|
||||
break;
|
||||
case _S_token_equiv_class_name:
|
||||
ostr << "equiv-class-name \"" << _M_curValue << "\"\n";
|
||||
break;
|
||||
case _S_token_interval_begin:
|
||||
ostr << "interval begin\n";
|
||||
break;
|
||||
case _S_token_interval_end:
|
||||
ostr << "interval end\n";
|
||||
break;
|
||||
case _S_token_line_begin:
|
||||
ostr << "line begin\n";
|
||||
break;
|
||||
case _S_token_line_end:
|
||||
ostr << "line end\n";
|
||||
break;
|
||||
case _S_token_opt:
|
||||
ostr << "opt\n";
|
||||
break;
|
||||
case _S_token_or:
|
||||
ostr << "or\n";
|
||||
break;
|
||||
case _S_token_ord_char:
|
||||
ostr << "ordinary character: \"" << _M_value() << "\"\n";
|
||||
break;
|
||||
case _S_token_subexpr_begin:
|
||||
ostr << "subexpr begin\n";
|
||||
break;
|
||||
case _S_token_subexpr_end:
|
||||
ostr << "subexpr end\n";
|
||||
break;
|
||||
case _S_token_word_begin:
|
||||
ostr << "word begin\n";
|
||||
break;
|
||||
case _S_token_word_end:
|
||||
ostr << "word end\n";
|
||||
break;
|
||||
case _S_token_unknown:
|
||||
ostr << "-- unknown token --\n";
|
||||
break;
|
||||
default:
|
||||
_GLIBCXX_DEBUG_ASSERT(false);
|
||||
}
|
||||
return ostr;
|
||||
}
|
||||
#endif
|
||||
|
||||
template<typename _InputIter, typename _CharT, typename _TraitsT>
|
||||
_Compiler<_InputIter, _CharT, _TraitsT>::
|
||||
_Compiler(_InputIter __b, _InputIter __e,
|
||||
const _TraitsT& __traits, _FlagT __flags)
|
||||
: _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()),
|
||||
_M_state_store(__flags), _M_flags(__flags)
|
||||
{
|
||||
_StateSeqT __r(_M_state_store,
|
||||
_M_state_store._M_insert_subexpr_begin());
|
||||
_M_disjunction();
|
||||
if (!_M_stack.empty())
|
||||
{
|
||||
__r._M_append(_M_stack.top());
|
||||
_M_stack.pop();
|
||||
}
|
||||
__r._M_append(_M_state_store._M_insert_subexpr_end());
|
||||
__r._M_append(_M_state_store._M_insert_accept());
|
||||
}
|
||||
|
||||
template<typename _InputIter, typename _CharT, typename _TraitsT>
|
||||
bool
|
||||
_Compiler<_InputIter, _CharT, _TraitsT>::
|
||||
_M_match_token(_Compiler<_InputIter, _CharT, _TraitsT>::_TokenT token)
|
||||
{
|
||||
if (token == _M_scanner._M_token())
|
||||
{
|
||||
_M_cur_value = _M_scanner._M_value();
|
||||
_M_scanner._M_advance();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template<typename _InputIter, typename _CharT, typename _TraitsT>
|
||||
void
|
||||
_Compiler<_InputIter, _CharT, _TraitsT>::
|
||||
_M_disjunction()
|
||||
{
|
||||
this->_M_alternative();
|
||||
if (_M_match_token(_ScannerT::_S_token_or))
|
||||
{
|
||||
_StateSeqT __alt1 = _M_stack.top(); _M_stack.pop();
|
||||
this->_M_disjunction();
|
||||
_StateSeqT __alt2 = _M_stack.top(); _M_stack.pop();
|
||||
_M_stack.push(_StateSeqT(__alt1, __alt2));
|
||||
}
|
||||
}
|
||||
|
||||
template<typename _InputIter, typename _CharT, typename _TraitsT>
|
||||
void
|
||||
_Compiler<_InputIter, _CharT, _TraitsT>::
|
||||
_M_alternative()
|
||||
{
|
||||
if (this->_M_term())
|
||||
{
|
||||
_StateSeqT __re = _M_stack.top(); _M_stack.pop();
|
||||
this->_M_alternative();
|
||||
if (!_M_stack.empty())
|
||||
{
|
||||
__re._M_append(_M_stack.top());
|
||||
_M_stack.pop();
|
||||
}
|
||||
_M_stack.push(__re);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename _InputIter, typename _CharT, typename _TraitsT>
|
||||
bool
|
||||
_Compiler<_InputIter, _CharT, _TraitsT>::
|
||||
_M_term()
|
||||
{
|
||||
if (this->_M_assertion())
|
||||
return true;
|
||||
if (this->_M_atom())
|
||||
{
|
||||
this->_M_quantifier();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template<typename _InputIter, typename _CharT, typename _TraitsT>
|
||||
bool
|
||||
_Compiler<_InputIter, _CharT, _TraitsT>::
|
||||
_M_assertion()
|
||||
{
|
||||
if (_M_match_token(_ScannerT::_S_token_line_begin))
|
||||
{
|
||||
// __m.push(_Matcher::_S_opcode_line_begin);
|
||||
return true;
|
||||
}
|
||||
if (_M_match_token(_ScannerT::_S_token_line_end))
|
||||
{
|
||||
// __m.push(_Matcher::_S_opcode_line_end);
|
||||
return true;
|
||||
}
|
||||
if (_M_match_token(_ScannerT::_S_token_word_begin))
|
||||
{
|
||||
// __m.push(_Matcher::_S_opcode_word_begin);
|
||||
return true;
|
||||
}
|
||||
if (_M_match_token(_ScannerT::_S_token_word_end))
|
||||
{
|
||||
// __m.push(_Matcher::_S_opcode_word_end);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template<typename _InputIter, typename _CharT, typename _TraitsT>
|
||||
void
|
||||
_Compiler<_InputIter, _CharT, _TraitsT>::
|
||||
_M_quantifier()
|
||||
{
|
||||
if (_M_match_token(_ScannerT::_S_token_closure0))
|
||||
{
|
||||
if (_M_stack.empty())
|
||||
__throw_regex_error(regex_constants::error_badrepeat);
|
||||
_StateSeqT __r(_M_stack.top(), -1);
|
||||
__r._M_append(__r._M_front());
|
||||
_M_stack.pop();
|
||||
_M_stack.push(__r);
|
||||
return;
|
||||
}
|
||||
if (_M_match_token(_ScannerT::_S_token_closure1))
|
||||
{
|
||||
if (_M_stack.empty())
|
||||
__throw_regex_error(regex_constants::error_badrepeat);
|
||||
_StateSeqT __r(_M_state_store,
|
||||
_M_state_store.
|
||||
_M_insert_alt(_S_invalid_state_id,
|
||||
_M_stack.top()._M_front()));
|
||||
_M_stack.top()._M_append(__r);
|
||||
return;
|
||||
}
|
||||
if (_M_match_token(_ScannerT::_S_token_opt))
|
||||
{
|
||||
if (_M_stack.empty())
|
||||
__throw_regex_error(regex_constants::error_badrepeat);
|
||||
_StateSeqT __r(_M_stack.top(), -1);
|
||||
_M_stack.pop();
|
||||
_M_stack.push(__r);
|
||||
return;
|
||||
}
|
||||
if (_M_match_token(_ScannerT::_S_token_interval_begin))
|
||||
{
|
||||
if (_M_stack.empty())
|
||||
__throw_regex_error(regex_constants::error_badrepeat);
|
||||
if (!_M_match_token(_ScannerT::_S_token_dup_count))
|
||||
__throw_regex_error(regex_constants::error_badbrace);
|
||||
_StateSeqT __r(_M_stack.top());
|
||||
int __min_rep = _M_cur_int_value(10);
|
||||
for (int __i = 1; __i < __min_rep; ++__i)
|
||||
_M_stack.top()._M_append(__r._M_clone());
|
||||
if (_M_match_token(_ScannerT::_S_token_comma))
|
||||
if (_M_match_token(_ScannerT::_S_token_dup_count))
|
||||
{
|
||||
int __n = _M_cur_int_value(10) - __min_rep;
|
||||
if (__n < 0)
|
||||
__throw_regex_error(regex_constants::error_badbrace);
|
||||
for (int __i = 0; __i < __n; ++__i)
|
||||
{
|
||||
_StateSeqT __r(_M_state_store,
|
||||
_M_state_store.
|
||||
_M_insert_alt(_S_invalid_state_id,
|
||||
_M_stack.top()._M_front()));
|
||||
_M_stack.top()._M_append(__r);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
_StateSeqT __r(_M_stack.top(), -1);
|
||||
__r._M_push_back(__r._M_front());
|
||||
_M_stack.pop();
|
||||
_M_stack.push(__r);
|
||||
}
|
||||
if (!_M_match_token(_ScannerT::_S_token_interval_end))
|
||||
__throw_regex_error(regex_constants::error_brace);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename _InputIter, typename _CharT, typename _TraitsT>
|
||||
bool
|
||||
_Compiler<_InputIter, _CharT, _TraitsT>::
|
||||
_M_atom()
|
||||
{
|
||||
if (_M_match_token(_ScannerT::_S_token_anychar))
|
||||
{
|
||||
const static auto&
|
||||
__any_matcher = [](_CharT) -> bool
|
||||
{ return true; };
|
||||
|
||||
_M_stack.push(_StateSeqT(_M_state_store,
|
||||
_M_state_store._M_insert_matcher
|
||||
(__any_matcher)));
|
||||
return true;
|
||||
}
|
||||
if (_M_match_token(_ScannerT::_S_token_ord_char))
|
||||
{
|
||||
auto __c = _M_cur_value[0];
|
||||
__detail::_Matcher<_CharT> f;
|
||||
if (_M_flags & regex_constants::icase)
|
||||
{
|
||||
auto __traits = this->_M_traits;
|
||||
__c = __traits.translate_nocase(__c);
|
||||
f = [__traits, __c](_CharT __ch) -> bool
|
||||
{ return __traits.translate_nocase(__ch) == __c; };
|
||||
}
|
||||
else
|
||||
f = [__c](_CharT __ch) -> bool
|
||||
{ return __ch == __c; };
|
||||
|
||||
_M_stack.push(_StateSeqT(_M_state_store,
|
||||
_M_state_store._M_insert_matcher(f)));
|
||||
return true;
|
||||
}
|
||||
if (_M_match_token(_ScannerT::_S_token_backref))
|
||||
{
|
||||
// __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value);
|
||||
_M_state_store._M_set_backref(true);
|
||||
//return true;
|
||||
}
|
||||
if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
|
||||
{
|
||||
int __mark = _M_state_store._M_sub_count();
|
||||
_StateSeqT __r(_M_state_store,
|
||||
_M_state_store.
|
||||
_M_insert_subexpr_begin());
|
||||
this->_M_disjunction();
|
||||
if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
|
||||
__throw_regex_error(regex_constants::error_paren);
|
||||
if (!_M_stack.empty())
|
||||
{
|
||||
__r._M_append(_M_stack.top());
|
||||
_M_stack.pop();
|
||||
}
|
||||
__r._M_append(_M_state_store._M_insert_subexpr_end());
|
||||
_M_stack.push(__r);
|
||||
return true;
|
||||
}
|
||||
return _M_bracket_expression();
|
||||
}
|
||||
|
||||
template<typename _InputIter, typename _CharT, typename _TraitsT>
|
||||
bool
|
||||
_Compiler<_InputIter, _CharT, _TraitsT>::
|
||||
_M_bracket_expression()
|
||||
{
|
||||
bool __inverse =
|
||||
_M_match_token(_ScannerT::_S_token_bracket_inverse_begin);
|
||||
if (!(__inverse || _M_match_token(_ScannerT::_S_token_bracket_begin)))
|
||||
return false;
|
||||
_BMatcherT __matcher( __inverse, _M_traits, _M_flags);
|
||||
// special case: only if _not_ chr first after
|
||||
// '[' or '[^' or if ECMAscript
|
||||
if (!_M_bracket_list(__matcher) // list is empty
|
||||
&& !(_M_flags & regex_constants::ECMAScript))
|
||||
__throw_regex_error(regex_constants::error_brack);
|
||||
_M_stack.push(_StateSeqT(_M_state_store,
|
||||
_M_state_store._M_insert_matcher(__matcher)));
|
||||
return true;
|
||||
}
|
||||
|
||||
template<typename _InputIter, typename _CharT, typename _TraitsT>
|
||||
bool // list is non-empty
|
||||
_Compiler<_InputIter, _CharT, _TraitsT>::
|
||||
_M_bracket_list(_BMatcherT& __matcher)
|
||||
{
|
||||
if (_M_match_token(_ScannerT::_S_token_bracket_end))
|
||||
return false;
|
||||
_M_expression_term(__matcher);
|
||||
_M_bracket_list(__matcher);
|
||||
return true;
|
||||
}
|
||||
|
||||
template<typename _InputIter, typename _CharT, typename _TraitsT>
|
||||
void
|
||||
_Compiler<_InputIter, _CharT, _TraitsT>::
|
||||
_M_expression_term(_BMatcherT& __matcher)
|
||||
{
|
||||
if (_M_match_token(_ScannerT::_S_token_collsymbol))
|
||||
{
|
||||
__matcher._M_add_collating_element(_M_cur_value);
|
||||
return;
|
||||
}
|
||||
if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
|
||||
{
|
||||
__matcher._M_add_equivalence_class(_M_cur_value);
|
||||
return;
|
||||
}
|
||||
if (_M_match_token(_ScannerT::_S_token_char_class_name))
|
||||
{
|
||||
__matcher._M_add_character_class(_M_cur_value);
|
||||
return;
|
||||
}
|
||||
if (_M_match_token(_ScannerT::_S_token_collelem_single)) // [a
|
||||
{
|
||||
auto __ch = _M_cur_value[0];
|
||||
if (_M_match_token(_ScannerT::_S_token_dash)) // [a-
|
||||
{
|
||||
// If the dash is the last character in the bracket expression,
|
||||
// it is not special.
|
||||
if (_M_scanner._M_token() == _ScannerT::_S_token_bracket_end)
|
||||
__matcher._M_add_char(_M_cur_value[0]); // [a-] <=> [a\-]
|
||||
else // [a-z]
|
||||
{
|
||||
if (!_M_match_token(_ScannerT::_S_token_collelem_single))
|
||||
__throw_regex_error(regex_constants::error_range);
|
||||
__matcher._M_make_range(__ch, _M_cur_value[0]);
|
||||
}
|
||||
}
|
||||
else // [a]
|
||||
__matcher._M_add_char(__ch);
|
||||
return;
|
||||
}
|
||||
__throw_regex_error(regex_constants::error_brack);
|
||||
}
|
||||
|
||||
template<typename _InputIter, typename _CharT, typename _TraitsT>
|
||||
int
|
||||
_Compiler<_InputIter, _CharT, _TraitsT>::
|
||||
_M_cur_int_value(int __radix)
|
||||
{
|
||||
int __v = 0;
|
||||
for (typename _StringT::size_type __i = 0;
|
||||
__i < _M_cur_value.length(); ++__i)
|
||||
__v =__v * __radix + _M_traits.value(_M_cur_value[__i], __radix);
|
||||
return __v;
|
||||
}
|
||||
|
||||
template<typename _CharT, typename _TraitsT>
|
||||
bool _BracketMatcher<_CharT, _TraitsT>::
|
||||
operator()(_CharT __ch) const
|
||||
{
|
||||
auto __oldch = __ch;
|
||||
if (_M_flags & regex_constants::collate)
|
||||
if (_M_is_icase())
|
||||
__ch = _M_traits.translate_nocase(__ch);
|
||||
else
|
||||
__ch = _M_traits.translate(__ch);
|
||||
|
||||
bool __ret = false;
|
||||
for (auto __c : _M_char_set)
|
||||
if (__c == __ch)
|
||||
{
|
||||
__ret = true;
|
||||
break;
|
||||
}
|
||||
if (!__ret && _M_traits.isctype(__oldch, _M_class_set))
|
||||
__ret = true;
|
||||
else
|
||||
{
|
||||
_StringT __s = _M_get_str(__ch);
|
||||
for (auto& __it : _M_range_set)
|
||||
if (__it.first <= __s && __s <= __it.second)
|
||||
{
|
||||
__ret = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (_M_is_non_matching)
|
||||
__ret = !__ret;
|
||||
return __ret;
|
||||
}
|
||||
|
||||
_GLIBCXX_END_NAMESPACE_VERSION
|
||||
} // namespace __detail
|
||||
} // namespace
|
|
@ -79,7 +79,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
*/
|
||||
typedef unsigned int syntax_option_type;
|
||||
|
||||
/**
|
||||
/**
|
||||
* Specifies that the matching of regular expressions against a character
|
||||
* sequence shall be performed without regard to case.
|
||||
*/
|
||||
|
@ -139,7 +139,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
* identical to syntax_option_type extended, except that C-style escape
|
||||
* sequences are supported. These sequences are:
|
||||
* \\\\, \\a, \\b, \\f, \\n, \\r, \\t , \\v, \\', ',
|
||||
* and \\ddd (where ddd is one, two, or three octal digits).
|
||||
* and \\ddd (where ddd is one, two, or three octal digits).
|
||||
*/
|
||||
constexpr syntax_option_type awk = 1 << _S_awk;
|
||||
|
||||
|
@ -154,7 +154,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
/**
|
||||
* Specifies that the grammar recognized by the regular expression engine is
|
||||
* that used by POSIX utility grep when given the -E option in
|
||||
* IEEE Std 1003.1-2001. This option is identical to syntax_option_type
|
||||
* IEEE Std 1003.1-2001. This option is identical to syntax_option_type
|
||||
* extended, except that newlines are treated as whitespace.
|
||||
*/
|
||||
constexpr syntax_option_type egrep = 1 << _S_egrep;
|
||||
|
@ -215,35 +215,35 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
* expression shall not match [last, last).
|
||||
*/
|
||||
constexpr match_flag_type match_not_eol = 1 << _S_not_eol;
|
||||
|
||||
|
||||
/**
|
||||
* The expression \\b is not matched against the sub-sequence
|
||||
* [first,first).
|
||||
*/
|
||||
constexpr match_flag_type match_not_bow = 1 << _S_not_bow;
|
||||
|
||||
|
||||
/**
|
||||
* The expression \\b should not be matched against the sub-sequence
|
||||
* [last,last).
|
||||
*/
|
||||
constexpr match_flag_type match_not_eow = 1 << _S_not_eow;
|
||||
|
||||
|
||||
/**
|
||||
* If more than one match is possible then any match is an acceptable
|
||||
* result.
|
||||
*/
|
||||
constexpr match_flag_type match_any = 1 << _S_any;
|
||||
|
||||
|
||||
/**
|
||||
* The expression does not match an empty sequence.
|
||||
*/
|
||||
constexpr match_flag_type match_not_null = 1 << _S_not_null;
|
||||
|
||||
|
||||
/**
|
||||
* The expression only matches a sub-sequence that begins at first .
|
||||
*/
|
||||
constexpr match_flag_type match_continuous = 1 << _S_continuous;
|
||||
|
||||
|
||||
/**
|
||||
* --first is a valid iterator position. When this flag is set then the
|
||||
* flags match_not_bol and match_not_bow are ignored by the regular
|
||||
|
@ -260,7 +260,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
* operations all non-overlapping occurrences of the regular expression
|
||||
* are located and replaced, and sections of the input that did not match
|
||||
* the expression are copied unchanged to the output string.
|
||||
*
|
||||
*
|
||||
* Format strings (from ECMA-262 [15.5.4.11]):
|
||||
* @li $$ The dollar-sign itself ($)
|
||||
* @li $& The matched substring.
|
||||
|
|
|
@ -1,105 +0,0 @@
|
|||
// class template regex -*- C++ -*-
|
||||
|
||||
// Copyright (C) 2010-2013 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// Under Section 7 of GPL version 3, you are granted additional
|
||||
// permissions described in the GCC Runtime Library Exception, version
|
||||
// 3.1, as published by the Free Software Foundation.
|
||||
|
||||
// You should have received a copy of the GNU General Public License and
|
||||
// a copy of the GCC Runtime Library Exception along with this program;
|
||||
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
/**
|
||||
* @file bits/regex_cursor.h
|
||||
* This is an internal header file, included by other library headers.
|
||||
* Do not attempt to use it directly. @headername{regex}
|
||||
*/
|
||||
|
||||
namespace std _GLIBCXX_VISIBILITY(default)
|
||||
{
|
||||
namespace __detail
|
||||
{
|
||||
_GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
|
||||
/**
|
||||
* @defgroup regex-detail Base and Implementation Classes
|
||||
* @ingroup regex
|
||||
* @{
|
||||
*/
|
||||
|
||||
/// ABC for pattern matching
|
||||
struct _PatternCursor
|
||||
{
|
||||
virtual ~_PatternCursor() { };
|
||||
virtual void _M_next() = 0;
|
||||
virtual void _M_prev() = 0;
|
||||
virtual bool _M_at_end() const = 0;
|
||||
};
|
||||
|
||||
/// Provides a cursor into the specific target string.
|
||||
template<typename _FwdIterT>
|
||||
class _SpecializedCursor
|
||||
: public _PatternCursor
|
||||
{
|
||||
public:
|
||||
_SpecializedCursor(const _FwdIterT& __b, const _FwdIterT __e)
|
||||
: _M_b(__b), _M_c(__b), _M_e(__e)
|
||||
{ }
|
||||
|
||||
typename std::iterator_traits<_FwdIterT>::value_type
|
||||
_M_current() const
|
||||
{ return *_M_c; }
|
||||
|
||||
void
|
||||
_M_next()
|
||||
{ ++_M_c; }
|
||||
|
||||
void
|
||||
_M_prev()
|
||||
{ --_M_c; }
|
||||
|
||||
_FwdIterT
|
||||
_M_pos() const
|
||||
{ return _M_c; }
|
||||
|
||||
const _FwdIterT&
|
||||
_M_begin() const
|
||||
{ return _M_b; }
|
||||
|
||||
const _FwdIterT&
|
||||
_M_end() const
|
||||
{ return _M_e; }
|
||||
|
||||
bool
|
||||
_M_at_end() const
|
||||
{ return _M_c == _M_e; }
|
||||
|
||||
private:
|
||||
_FwdIterT _M_b;
|
||||
_FwdIterT _M_c;
|
||||
_FwdIterT _M_e;
|
||||
};
|
||||
|
||||
// Helper function to create a cursor specialized for an iterator class.
|
||||
template<typename _FwdIterT>
|
||||
inline _SpecializedCursor<_FwdIterT>
|
||||
__cursor(const _FwdIterT& __b, const _FwdIterT __e)
|
||||
{ return _SpecializedCursor<_FwdIterT>(__b, __e); }
|
||||
|
||||
//@} regex-detail
|
||||
_GLIBCXX_END_NAMESPACE_VERSION
|
||||
} // namespace __detail
|
||||
} // namespace
|
|
@ -45,7 +45,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
* @name 5.3 Error Types
|
||||
*/
|
||||
//@{
|
||||
|
||||
|
||||
enum error_type
|
||||
{
|
||||
_S_error_collate,
|
||||
|
|
|
@ -0,0 +1,199 @@
|
|||
// class template regex -*- C++ -*-
|
||||
|
||||
// Copyright (C) 2013 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// Under Section 7 of GPL version 3, you are granted additional
|
||||
// permissions described in the GCC Runtime Library Exception, version
|
||||
// 3.1, as published by the Free Software Foundation.
|
||||
|
||||
// You should have received a copy of the GNU General Public License and
|
||||
// a copy of the GCC Runtime Library Exception along with this program;
|
||||
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
/**
|
||||
* @file bits/regex_executor.h
|
||||
* This is an internal header file, included by other library headers.
|
||||
* Do not attempt to use it directly. @headername{regex}
|
||||
*/
|
||||
|
||||
namespace std _GLIBCXX_VISIBILITY(default)
|
||||
{
|
||||
_GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
template<typename, typename>
|
||||
class basic_regex;
|
||||
|
||||
template<typename, typename>
|
||||
class match_results;
|
||||
_GLIBCXX_END_NAMESPACE_VERSION
|
||||
|
||||
namespace __detail
|
||||
{
|
||||
_GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
|
||||
/**
|
||||
* @addtogroup regex-detail
|
||||
* @{
|
||||
*/
|
||||
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT>
|
||||
class _Executor
|
||||
{
|
||||
public:
|
||||
typedef match_results<_BiIter, _Alloc> _ResultsT;
|
||||
typedef regex_constants::match_flag_type _FlagT;
|
||||
|
||||
virtual
|
||||
~_Executor()
|
||||
{ }
|
||||
|
||||
// Set matched when string exactly match the pattern.
|
||||
virtual bool
|
||||
_M_match() = 0;
|
||||
|
||||
// Set matched when some prefix of the string matches the pattern.
|
||||
virtual bool
|
||||
_M_search_from_first() = 0;
|
||||
|
||||
protected:
|
||||
typedef typename _NFA<_CharT, _TraitsT>::_SizeT _SizeT;
|
||||
_Executor(_BiIter __begin,
|
||||
_BiIter __end,
|
||||
_ResultsT& __results,
|
||||
_FlagT __flags,
|
||||
_SizeT __size)
|
||||
: _M_current(__begin), _M_end(__end),
|
||||
_M_results(__results), _M_flags(__flags)
|
||||
{
|
||||
__results.resize(__size + 2);
|
||||
for (auto __it : __results)
|
||||
__it.matched = false;
|
||||
}
|
||||
|
||||
_BiIter _M_current;
|
||||
_BiIter _M_end;
|
||||
_ResultsT& _M_results;
|
||||
_FlagT _M_flags;
|
||||
};
|
||||
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT>
|
||||
class _DFSExecutor
|
||||
: public _Executor<_BiIter, _Alloc, _CharT, _TraitsT>
|
||||
{
|
||||
public:
|
||||
typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
|
||||
typedef _NFA<_CharT, _TraitsT> _RegexT;
|
||||
typedef typename _BaseT::_ResultsT _ResultsT;
|
||||
typedef regex_constants::match_flag_type _FlagT;
|
||||
|
||||
_DFSExecutor(_BiIter __begin,
|
||||
_BiIter __end,
|
||||
_ResultsT& __results,
|
||||
const _RegexT& __nfa,
|
||||
_FlagT __flags)
|
||||
: _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count()),
|
||||
_M_nfa(__nfa)
|
||||
{ }
|
||||
|
||||
bool
|
||||
_M_match()
|
||||
{ return _M_dfs<true>(_M_nfa._M_start()); }
|
||||
|
||||
bool
|
||||
_M_search_from_first()
|
||||
{ return _M_dfs<false>(_M_nfa._M_start()); }
|
||||
|
||||
private:
|
||||
template<bool __match_mode>
|
||||
bool
|
||||
_M_dfs(_StateIdT __i);
|
||||
|
||||
const _RegexT& _M_nfa;
|
||||
};
|
||||
|
||||
// It's essentially a variant of Single-Source-Shortest-Path problem, where,
|
||||
// the matching results is the final distance and should be minimized.
|
||||
// Instead of using Dijkstra Algorithm, I pick up the queue-optimizaed
|
||||
// (BFS-like) Bellman-Ford algorithm,
|
||||
// SPFA(http://en.wikipedia.org/wiki/Shortest_Path_Faster_Algorithm).
|
||||
//
|
||||
// Every entry of _M_covered saves the solution(grouping status) for every
|
||||
// matching head. When states transfer, solutions will be compared and
|
||||
// deduplicated(based on which greedy mode we have).
|
||||
//
|
||||
// Time complexity: O(_M_str_cur.size() * _M_nfa.size())
|
||||
// Space complexity: O(_M_nfa.size() * _M_nfa.mark_count())
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT>
|
||||
class _BFSExecutor
|
||||
: public _Executor<_BiIter, _Alloc, _CharT, _TraitsT>
|
||||
{
|
||||
public:
|
||||
typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
|
||||
typedef _NFA<_CharT, _TraitsT> _RegexT;
|
||||
typedef typename _BaseT::_ResultsT _ResultsT;
|
||||
typedef std::unique_ptr<_ResultsT> _ResultsPtr;
|
||||
typedef regex_constants::match_flag_type _FlagT;
|
||||
|
||||
_BFSExecutor(_BiIter __begin,
|
||||
_BiIter __end,
|
||||
_ResultsT& __results,
|
||||
const _RegexT& __nfa,
|
||||
_FlagT __flags)
|
||||
: _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count()),
|
||||
_M_nfa(__nfa)
|
||||
{
|
||||
if (_M_nfa._M_start() != _S_invalid_state_id)
|
||||
_M_covered[_M_nfa._M_start()] =
|
||||
_ResultsPtr(new _ResultsT(this->_M_results));
|
||||
_M_e_closure();
|
||||
}
|
||||
|
||||
bool
|
||||
_M_match()
|
||||
{ return _M_main_loop<true>(); }
|
||||
|
||||
bool
|
||||
_M_search_from_first()
|
||||
{ return _M_main_loop<false>(); }
|
||||
|
||||
private:
|
||||
template<bool __match_mode>
|
||||
bool
|
||||
_M_main_loop();
|
||||
|
||||
void
|
||||
_M_e_closure();
|
||||
|
||||
void
|
||||
_M_move();
|
||||
|
||||
bool
|
||||
_M_match_less_than(_StateIdT __u, _StateIdT __v) const;
|
||||
|
||||
bool
|
||||
_M_includes_some() const;
|
||||
|
||||
std::map<_StateIdT, _ResultsPtr> _M_covered;
|
||||
const _RegexT& _M_nfa;
|
||||
};
|
||||
|
||||
//@} regex-detail
|
||||
_GLIBCXX_END_NAMESPACE_VERSION
|
||||
} // namespace __detail
|
||||
} // namespace std
|
||||
|
||||
#include <bits/regex_executor.tcc>
|
|
@ -0,0 +1,252 @@
|
|||
// class template regex -*- C++ -*-
|
||||
|
||||
// Copyright (C) 2013 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// Under Section 7 of GPL version 3, you are granted additional
|
||||
// permissions described in the GCC Runtime Library Exception, version
|
||||
// 3.1, as published by the Free Software Foundation.
|
||||
|
||||
// You should have received a copy of the GNU General Public License and
|
||||
// a copy of the GCC Runtime Library Exception along with this program;
|
||||
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
/**
|
||||
* @file bits/regex_executor.tcc
|
||||
* This is an internal header file, included by other library headers.
|
||||
* Do not attempt to use it directly. @headername{regex}
|
||||
*/
|
||||
|
||||
namespace std _GLIBCXX_VISIBILITY(default)
|
||||
{
|
||||
namespace __detail
|
||||
{
|
||||
_GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
|
||||
// TODO: This is too slow. Try to compile the NFA to a DFA.
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT>
|
||||
template<bool __match_mode>
|
||||
bool _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
|
||||
_M_dfs(_StateIdT __i)
|
||||
{
|
||||
auto& __current = this->_M_current;
|
||||
auto& __end = this->_M_end;
|
||||
auto& __results = this->_M_results;
|
||||
if (__i == _S_invalid_state_id)
|
||||
// This is not that certain. Need deeper investigate.
|
||||
return false;
|
||||
const auto& __state = _M_nfa[__i];
|
||||
bool __ret = false;
|
||||
switch (__state._M_opcode)
|
||||
{
|
||||
case _S_opcode_alternative:
|
||||
// Greedy mode by default. For non-greedy mode,
|
||||
// swap _M_alt and _M_next.
|
||||
// TODO: Add greedy mode option.
|
||||
__ret = _M_dfs<__match_mode>(__state._M_alt)
|
||||
|| _M_dfs<__match_mode>(__state._M_next);
|
||||
break;
|
||||
case _S_opcode_subexpr_begin:
|
||||
__results.at(__state._M_subexpr).first = __current;
|
||||
__ret = _M_dfs<__match_mode>(__state._M_next);
|
||||
break;
|
||||
case _S_opcode_subexpr_end:
|
||||
__ret = _M_dfs<__match_mode>(__state._M_next);
|
||||
__results.at(__state._M_subexpr).second = __current;
|
||||
__results.at(__state._M_subexpr).matched = __ret;
|
||||
break;
|
||||
case _S_opcode_match:
|
||||
if (__current != __end && __state._M_matches(*__current))
|
||||
{
|
||||
++__current;
|
||||
__ret = _M_dfs<__match_mode>(__state._M_next);
|
||||
--__current;
|
||||
}
|
||||
break;
|
||||
case _S_opcode_accept:
|
||||
if (__match_mode)
|
||||
__ret = __current == __end;
|
||||
else
|
||||
__ret = true;
|
||||
break;
|
||||
default:
|
||||
_GLIBCXX_DEBUG_ASSERT(false);
|
||||
}
|
||||
return __ret;
|
||||
}
|
||||
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT>
|
||||
template<bool __match_mode>
|
||||
bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
|
||||
_M_main_loop()
|
||||
{
|
||||
while (this->_M_current != this->_M_end)
|
||||
{
|
||||
if (!__match_mode)
|
||||
if (_M_includes_some())
|
||||
return true;
|
||||
_M_move();
|
||||
++this->_M_current;
|
||||
_M_e_closure();
|
||||
}
|
||||
return _M_includes_some();
|
||||
}
|
||||
|
||||
// The SPFA approach.
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT>
|
||||
void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
|
||||
_M_e_closure()
|
||||
{
|
||||
auto& __current = this->_M_current;
|
||||
std::queue<_StateIdT> __q;
|
||||
std::vector<bool> __in_q(_M_nfa.size(), false);
|
||||
for (auto& __it : _M_covered)
|
||||
{
|
||||
__in_q[__it.first] = true;
|
||||
__q.push(__it.first);
|
||||
}
|
||||
while (!__q.empty())
|
||||
{
|
||||
auto __u = __q.front();
|
||||
__q.pop();
|
||||
__in_q[__u] = false;
|
||||
const auto& __state = _M_nfa[__u];
|
||||
|
||||
// Can be implemented using method, but there're too much arguments.
|
||||
auto __add_visited_state = [&](_StateIdT __v)
|
||||
{
|
||||
if (__v == _S_invalid_state_id)
|
||||
return;
|
||||
if (_M_match_less_than(__u, __v))
|
||||
{
|
||||
_M_covered[__v] = _ResultsPtr(new _ResultsT(*_M_covered[__u]));
|
||||
// if a state is updated, it's outgoing neighbors should be
|
||||
// reconsidered too. Push them to the queue.
|
||||
if (!__in_q[__v])
|
||||
{
|
||||
__in_q[__v] = true;
|
||||
__q.push(__v);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
switch (__state._M_opcode)
|
||||
{
|
||||
case _S_opcode_alternative:
|
||||
__add_visited_state(__state._M_next);
|
||||
__add_visited_state(__state._M_alt);
|
||||
break;
|
||||
case _S_opcode_subexpr_begin:
|
||||
_M_covered[__u]->at(__state._M_subexpr).first = __current;
|
||||
__add_visited_state(__state._M_next);
|
||||
break;
|
||||
case _S_opcode_subexpr_end:
|
||||
_M_covered[__u]->at(__state._M_subexpr).second = __current;
|
||||
_M_covered[__u]->at(__state._M_subexpr).matched = true;
|
||||
__add_visited_state(__state._M_next);
|
||||
break;
|
||||
case _S_opcode_match:
|
||||
break;
|
||||
case _S_opcode_accept:
|
||||
__add_visited_state(__state._M_next);
|
||||
break;
|
||||
default:
|
||||
_GLIBCXX_DEBUG_ASSERT(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT>
|
||||
void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
|
||||
_M_move()
|
||||
{
|
||||
decltype(_M_covered) __next;
|
||||
for (auto& __it : _M_covered)
|
||||
{
|
||||
const auto& __state = _M_nfa[__it.first];
|
||||
if (__state._M_opcode == _S_opcode_match
|
||||
&& __state._M_matches(*this->_M_current))
|
||||
if (_M_match_less_than(__it.first, __state._M_next)
|
||||
&& __state._M_next != _S_invalid_state_id)
|
||||
__next[__state._M_next] = move(__it.second);
|
||||
}
|
||||
_M_covered = move(__next);
|
||||
}
|
||||
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT>
|
||||
bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
|
||||
_M_match_less_than(_StateIdT __u, _StateIdT __v) const
|
||||
{
|
||||
if (_M_covered.count(__u) == 0)
|
||||
return false;
|
||||
if (_M_covered.count(__v) > 0)
|
||||
return true;
|
||||
// TODO: Greedy and Non-greedy support
|
||||
return true;
|
||||
}
|
||||
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT>
|
||||
bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
|
||||
_M_includes_some() const
|
||||
{
|
||||
auto& __s = _M_nfa._M_final_states();
|
||||
auto& __t = _M_covered;
|
||||
if (__s.size() > 0 && __t.size() > 0)
|
||||
{
|
||||
auto __first = __s.begin();
|
||||
auto __second = __t.begin();
|
||||
while (__first != __s.end() && __second != __t.end())
|
||||
{
|
||||
if (*__first < __second->first)
|
||||
++__first;
|
||||
else if (__second->first < *__first)
|
||||
++__second;
|
||||
else
|
||||
{
|
||||
this->_M_results = *__second->second;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT>
|
||||
std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>>
|
||||
__get_executor(_BiIter __b,
|
||||
_BiIter __e,
|
||||
match_results<_BiIter, _Alloc>& __m,
|
||||
const basic_regex<_CharT, _TraitsT>& __re,
|
||||
regex_constants::match_flag_type __flags)
|
||||
{
|
||||
typedef std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>>
|
||||
_ExecutorPtr;
|
||||
typedef _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT> _DFSExecutorT;
|
||||
auto __p = std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
|
||||
(__re._M_automaton);
|
||||
if (__p->_M_has_backref)
|
||||
return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, *__p, __flags));
|
||||
return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, *__p, __flags));
|
||||
}
|
||||
|
||||
_GLIBCXX_END_NAMESPACE_VERSION
|
||||
} // namespace __detail
|
||||
} // namespace
|
|
@ -1,260 +0,0 @@
|
|||
// class template regex -*- C++ -*-
|
||||
|
||||
// Copyright (C) 2010-2013 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// Under Section 7 of GPL version 3, you are granted additional
|
||||
// permissions described in the GCC Runtime Library Exception, version
|
||||
// 3.1, as published by the Free Software Foundation.
|
||||
|
||||
// You should have received a copy of the GNU General Public License and
|
||||
// a copy of the GCC Runtime Library Exception along with this program;
|
||||
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
/**
|
||||
* @file bits/regex_grep_matcher.h
|
||||
* This is an internal header file, included by other library headers.
|
||||
* Do not attempt to use it directly. @headername{regex}
|
||||
*/
|
||||
|
||||
namespace std _GLIBCXX_VISIBILITY(default)
|
||||
{
|
||||
_GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
|
||||
template<typename _BiIter>
|
||||
class sub_match;
|
||||
|
||||
template<typename _Bi_iter, typename _Allocator>
|
||||
class match_results;
|
||||
|
||||
_GLIBCXX_END_NAMESPACE_VERSION
|
||||
|
||||
namespace __detail
|
||||
{
|
||||
_GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
|
||||
/**
|
||||
* @defgroup regex-detail Base and Implementation Classes
|
||||
* @ingroup regex
|
||||
* @{
|
||||
*/
|
||||
|
||||
/// A _Results facade specialized for wrapping a templated match_results.
|
||||
template<typename _FwdIterT, typename _Alloc>
|
||||
class _SpecializedResults
|
||||
: public _Results
|
||||
{
|
||||
public:
|
||||
_SpecializedResults(const _Automaton::_SizeT __size,
|
||||
const _SpecializedCursor<_FwdIterT>& __cursor,
|
||||
match_results<_FwdIterT, _Alloc>& __m);
|
||||
|
||||
~_SpecializedResults()
|
||||
{
|
||||
if (_M_managed)
|
||||
delete &_M_results;
|
||||
}
|
||||
|
||||
private:
|
||||
_SpecializedResults(const _SpecializedResults& __rhs)
|
||||
: _M_results(*new match_results<_FwdIterT, _Alloc>(__rhs._M_results)),
|
||||
_M_managed(true)
|
||||
{ }
|
||||
|
||||
public:
|
||||
void
|
||||
_M_set_pos(int __i, int __j, const _PatternCursor& __pc);
|
||||
|
||||
void
|
||||
_M_set_range(int __i, const _PatternCursor& __pc)
|
||||
{
|
||||
typedef const _SpecializedCursor<_FwdIterT>& _CursorT;
|
||||
_CursorT __c = static_cast<_CursorT>(__pc);
|
||||
_M_results.at(__i).first = __c._M_begin();
|
||||
_M_results.at(__i).second = __c._M_end();
|
||||
}
|
||||
|
||||
void
|
||||
_M_set_matched(int __i, bool __is_matched)
|
||||
{ _M_results.at(__i).matched = __is_matched; }
|
||||
|
||||
std::unique_ptr<_Results>
|
||||
_M_clone() const
|
||||
{ return unique_ptr<_Results>(new _SpecializedResults(*this)); }
|
||||
|
||||
void
|
||||
_M_assign(const _Results& __rhs)
|
||||
{
|
||||
auto __r = static_cast<const _SpecializedResults*>(&__rhs);
|
||||
_M_results = __r->_M_results;
|
||||
}
|
||||
|
||||
private:
|
||||
match_results<_FwdIterT, _Alloc>& _M_results;
|
||||
bool _M_managed;
|
||||
};
|
||||
|
||||
template<typename _FwdIterT, typename _Alloc>
|
||||
_SpecializedResults<_FwdIterT, _Alloc>::
|
||||
_SpecializedResults(const _Automaton::_SizeT __size,
|
||||
const _SpecializedCursor<_FwdIterT>& __cursor,
|
||||
match_results<_FwdIterT, _Alloc>& __m)
|
||||
: _M_results(__m), _M_managed(false)
|
||||
{
|
||||
_M_results.clear();
|
||||
_M_results.reserve(__size + 2);
|
||||
_M_results.resize(__size);
|
||||
typename match_results<_FwdIterT, _Alloc>::value_type __sm;
|
||||
__sm.first = __sm.second = __cursor._M_begin();
|
||||
_M_results.push_back(__sm);
|
||||
__sm.first = __sm.second = __cursor._M_end();
|
||||
_M_results.push_back(__sm);
|
||||
}
|
||||
|
||||
template<typename _FwdIterT, typename _Alloc>
|
||||
void
|
||||
_SpecializedResults<_FwdIterT, _Alloc>::
|
||||
_M_set_pos(int __i, int __j, const _PatternCursor& __pc)
|
||||
{
|
||||
typedef const _SpecializedCursor<_FwdIterT>& _CursorT;
|
||||
_CursorT __c = static_cast<_CursorT>(__pc);
|
||||
if (__j == 0)
|
||||
_M_results.at(__i).first = __c._M_pos();
|
||||
else
|
||||
_M_results.at(__i).second = __c._M_pos();
|
||||
}
|
||||
|
||||
/// Executes a regular expression NFA/DFA over a range using a
|
||||
/// variant of the parallel execution algorithm featured in the grep
|
||||
/// utility, modified to use Laurikari tags.
|
||||
class _Grep_matcher
|
||||
{
|
||||
public:
|
||||
_Grep_matcher(_PatternCursor& __p,
|
||||
_Results& __r,
|
||||
const _AutomatonPtr& __automaton,
|
||||
regex_constants::match_flag_type __flags)
|
||||
: _M_nfa(static_pointer_cast<_Nfa>(__automaton)),
|
||||
_M_str_cur(__p), _M_results(__r)
|
||||
{ }
|
||||
|
||||
virtual
|
||||
~_Grep_matcher()
|
||||
{ }
|
||||
|
||||
// Set matched when string exactly match the pattern.
|
||||
virtual bool
|
||||
_M_match() = 0;
|
||||
|
||||
// Set matched when some prefix of the string matches the pattern.
|
||||
virtual bool
|
||||
_M_search_from_first() = 0;
|
||||
|
||||
protected:
|
||||
const std::shared_ptr<_Nfa> _M_nfa;
|
||||
_PatternCursor& _M_str_cur;
|
||||
_Results& _M_results;
|
||||
};
|
||||
|
||||
// Time complexity: exponential
|
||||
// Space complexity: O(_M_str_cur.size())
|
||||
// _M_dfs() take a state, along with current string cursor(_M_str_cur),
|
||||
// trying to match current state with current character.
|
||||
// Only _S_opcode_match will consume a character.
|
||||
class _DFSMatcher
|
||||
: public _Grep_matcher
|
||||
{
|
||||
public:
|
||||
_DFSMatcher(_PatternCursor& __p,
|
||||
_Results& __r,
|
||||
const _AutomatonPtr& __automaton,
|
||||
regex_constants::match_flag_type __flags)
|
||||
: _Grep_matcher(__p, __r, __automaton, __flags)
|
||||
{ }
|
||||
|
||||
bool
|
||||
_M_match()
|
||||
{ return _M_dfs<true>(_M_nfa->_M_start()); }
|
||||
|
||||
bool
|
||||
_M_search_from_first()
|
||||
{ return _M_dfs<false>(_M_nfa->_M_start()); }
|
||||
|
||||
private:
|
||||
template<bool __match_mode>
|
||||
bool
|
||||
_M_dfs(_StateIdT __i);
|
||||
};
|
||||
|
||||
// It's essentially a variant of Single-Source-Shortest-Path problem, where,
|
||||
// the matching results is the final distance and should be minimized.
|
||||
// Instead of using Dijkstra Algorithm, I pick up the queue-optimizaed
|
||||
// (BFS-like) Bellman-Ford algorithm,
|
||||
// SPFA(http://en.wikipedia.org/wiki/Shortest_Path_Faster_Algorithm).
|
||||
//
|
||||
// Every entry of _M_current saves the solution(grouping status) for every
|
||||
// matching head. When states transfer, solutions will be compared and
|
||||
// deduplicated(based on which greedy mode we have).
|
||||
//
|
||||
// Time complexity: O(_M_str_cur.size() * _M_nfa.size())
|
||||
// Space complexity: O(_M_nfa.size() * _M_nfa.mark_count())
|
||||
class _BFSMatcher
|
||||
: public _Grep_matcher
|
||||
{
|
||||
public:
|
||||
_BFSMatcher(_PatternCursor& __p,
|
||||
_Results& __r,
|
||||
const _AutomatonPtr& __automaton,
|
||||
regex_constants::match_flag_type __flags)
|
||||
: _Grep_matcher(__p, __r, __automaton, __flags)
|
||||
{
|
||||
if (_M_nfa->_M_start() != _S_invalid_state_id)
|
||||
_M_current[_M_nfa->_M_start()] = _M_results._M_clone();
|
||||
_M_e_closure();
|
||||
}
|
||||
|
||||
bool
|
||||
_M_match()
|
||||
{ return _M_main_loop<true>(); }
|
||||
|
||||
bool
|
||||
_M_search_from_first()
|
||||
{ return _M_main_loop<false>(); }
|
||||
|
||||
private:
|
||||
template<bool __match_mode>
|
||||
bool
|
||||
_M_main_loop();
|
||||
|
||||
void
|
||||
_M_e_closure();
|
||||
|
||||
void
|
||||
_M_move();
|
||||
|
||||
bool
|
||||
_M_match_less_than(_StateIdT __u, _StateIdT __v) const;
|
||||
|
||||
bool
|
||||
_M_includes_some() const;
|
||||
|
||||
std::map<_StateIdT, std::unique_ptr<_Results>> _M_current;
|
||||
};
|
||||
|
||||
//@} regex-detail
|
||||
_GLIBCXX_END_NAMESPACE_VERSION
|
||||
} // namespace __detail
|
||||
} // namespace std
|
||||
|
||||
#include <bits/regex_grep_matcher.tcc>
|
|
@ -1,243 +0,0 @@
|
|||
// class template regex -*- C++ -*-
|
||||
|
||||
// Copyright (C) 2010-2013 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// Under Section 7 of GPL version 3, you are granted additional
|
||||
// permissions described in the GCC Runtime Library Exception, version
|
||||
// 3.1, as published by the Free Software Foundation.
|
||||
|
||||
// You should have received a copy of the GNU General Public License and
|
||||
// a copy of the GCC Runtime Library Exception along with this program;
|
||||
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
/**
|
||||
* @file bits/regex_grep_matcher.tcc
|
||||
* This is an internal header file, included by other library headers.
|
||||
* Do not attempt to use it directly. @headername{regex}
|
||||
*/
|
||||
|
||||
#include <regex>
|
||||
|
||||
namespace std _GLIBCXX_VISIBILITY(default)
|
||||
{
|
||||
namespace __detail
|
||||
{
|
||||
_GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
|
||||
// TODO: This is too slow. Try to compile the NFA to a DFA.
|
||||
template<bool __match_mode>
|
||||
bool _DFSMatcher::
|
||||
_M_dfs(_StateIdT __i)
|
||||
{
|
||||
if (__i == _S_invalid_state_id)
|
||||
// This is not that certain. Need deeper investigate.
|
||||
return false;
|
||||
const auto& __state = (*_M_nfa)[__i];
|
||||
bool __ret = false;
|
||||
switch (__state._M_opcode)
|
||||
{
|
||||
case _S_opcode_alternative:
|
||||
// Greedy mode by default. For non-greedy mode,
|
||||
// swap _M_alt and _M_next.
|
||||
// TODO: Add greedy mode option.
|
||||
__ret = _M_dfs<__match_mode>(__state._M_alt)
|
||||
|| _M_dfs<__match_mode>(__state._M_next);
|
||||
break;
|
||||
case _S_opcode_subexpr_begin:
|
||||
__state._M_tagger(_M_str_cur, _M_results);
|
||||
__ret = _M_dfs<__match_mode>(__state._M_next);
|
||||
break;
|
||||
case _S_opcode_subexpr_end:
|
||||
__state._M_tagger(_M_str_cur, _M_results);
|
||||
__ret = _M_dfs<__match_mode>(__state._M_next);
|
||||
_M_results._M_set_matched(__state._M_subexpr, __ret);
|
||||
break;
|
||||
case _S_opcode_match:
|
||||
if (!_M_str_cur._M_at_end() && __state._M_matches(_M_str_cur))
|
||||
{
|
||||
_M_str_cur._M_next();
|
||||
__ret = _M_dfs<__match_mode>(__state._M_next);
|
||||
_M_str_cur._M_prev();
|
||||
}
|
||||
break;
|
||||
case _S_opcode_accept:
|
||||
if (__match_mode)
|
||||
__ret = _M_str_cur._M_at_end();
|
||||
else
|
||||
__ret = true;
|
||||
break;
|
||||
default:
|
||||
_GLIBCXX_DEBUG_ASSERT(false);
|
||||
}
|
||||
return __ret;
|
||||
}
|
||||
|
||||
template<bool __match_mode>
|
||||
bool _BFSMatcher::
|
||||
_M_main_loop()
|
||||
{
|
||||
while (!_M_str_cur._M_at_end())
|
||||
{
|
||||
if (!__match_mode)
|
||||
if (_M_includes_some())
|
||||
return true;
|
||||
_M_move();
|
||||
_M_str_cur._M_next();
|
||||
_M_e_closure();
|
||||
}
|
||||
return _M_includes_some();
|
||||
}
|
||||
|
||||
// The SPFA approach.
|
||||
// FIXME: move it to src/c++11 when it's stable, and make it not inlined.
|
||||
inline
|
||||
void _BFSMatcher::
|
||||
_M_e_closure()
|
||||
{
|
||||
std::queue<_StateIdT> __q;
|
||||
std::vector<bool> __in_q(_M_nfa->size(), false);
|
||||
for (auto& __it : _M_current)
|
||||
{
|
||||
__in_q[__it.first] = true;
|
||||
__q.push(__it.first);
|
||||
}
|
||||
while (!__q.empty())
|
||||
{
|
||||
auto __u = __q.front();
|
||||
__q.pop();
|
||||
__in_q[__u] = false;
|
||||
const auto& __state = (*_M_nfa)[__u];
|
||||
|
||||
// Can be implemented using method, but there're too much arguments.
|
||||
auto __add_visited_state = [&](_StateIdT __v)
|
||||
{
|
||||
if (__v == _S_invalid_state_id)
|
||||
return;
|
||||
if (_M_match_less_than(__u, __v))
|
||||
{
|
||||
_M_current[__v] = _M_current[__u]->_M_clone();
|
||||
// if a state is updated, it's outgoing neighbors should be
|
||||
// reconsidered too. Push them to the queue.
|
||||
if (!__in_q[__v])
|
||||
{
|
||||
__in_q[__v] = true;
|
||||
__q.push(__v);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
switch (__state._M_opcode)
|
||||
{
|
||||
case _S_opcode_alternative:
|
||||
__add_visited_state(__state._M_next);
|
||||
__add_visited_state(__state._M_alt);
|
||||
break;
|
||||
case _S_opcode_subexpr_begin:
|
||||
__state._M_tagger(_M_str_cur, *_M_current[__u]);
|
||||
__add_visited_state(__state._M_next);
|
||||
break;
|
||||
case _S_opcode_subexpr_end:
|
||||
__state._M_tagger(_M_str_cur, *_M_current[__u]);
|
||||
_M_current[__u]->_M_set_matched(__state._M_subexpr, true);
|
||||
__add_visited_state(__state._M_next);
|
||||
break;
|
||||
case _S_opcode_match:
|
||||
break;
|
||||
case _S_opcode_accept:
|
||||
__add_visited_state(__state._M_next);
|
||||
break;
|
||||
default:
|
||||
_GLIBCXX_DEBUG_ASSERT(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: move it to src/c++11 when it's stable, and make it not inlined.
|
||||
inline
|
||||
void _BFSMatcher::
|
||||
_M_move()
|
||||
{
|
||||
decltype(_M_current) __next;
|
||||
for (auto& __it : _M_current)
|
||||
{
|
||||
const auto& __state = (*_M_nfa)[__it.first];
|
||||
if (__state._M_opcode == _S_opcode_match
|
||||
&& __state._M_matches(_M_str_cur))
|
||||
if (_M_match_less_than(__it.first, __state._M_next)
|
||||
&& __state._M_next != _S_invalid_state_id)
|
||||
__next[__state._M_next] = __it.second->_M_clone();
|
||||
}
|
||||
_M_current = move(__next);
|
||||
}
|
||||
|
||||
// FIXME: move it to src/c++11 when it's stable, and make it not inlined.
|
||||
inline
|
||||
bool _BFSMatcher::
|
||||
_M_match_less_than(_StateIdT __u, _StateIdT __v) const
|
||||
{
|
||||
if (_M_current.count(__u) == 0)
|
||||
return false;
|
||||
if (_M_current.count(__v) > 0)
|
||||
return true;
|
||||
// TODO: Greedy and Non-greedy support
|
||||
return true;
|
||||
}
|
||||
|
||||
// FIXME: move it to src/c++11 when it's stable, and make it not inlined.
|
||||
inline
|
||||
bool _BFSMatcher::
|
||||
_M_includes_some() const
|
||||
{
|
||||
auto& __s = _M_nfa->_M_final_states();
|
||||
auto& __t = _M_current;
|
||||
if (__s.size() > 0 && __t.size() > 0)
|
||||
{
|
||||
auto __first = __s.begin();
|
||||
auto __second = __t.begin();
|
||||
while (__first != __s.end() && __second != __t.end())
|
||||
{
|
||||
if (*__first < __second->first)
|
||||
++__first;
|
||||
else if (__second->first < *__first)
|
||||
++__second;
|
||||
else
|
||||
{
|
||||
_M_results._M_assign(*__second->second);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// FIXME: move it to src/c++11 when it's stable, and make it not inlined.
|
||||
inline
|
||||
std::unique_ptr<_Grep_matcher> _Nfa::
|
||||
_M_get_matcher(_PatternCursor& __p,
|
||||
_Results& __r,
|
||||
const _AutomatonPtr& __a,
|
||||
regex_constants::match_flag_type __flags)
|
||||
{
|
||||
if (_M_has_back_ref)
|
||||
return unique_ptr<_Grep_matcher>(
|
||||
new _DFSMatcher(__p, __r, __a, __flags));
|
||||
else
|
||||
return unique_ptr<_Grep_matcher>(
|
||||
new _BFSMatcher(__p, __r, __a, __flags));
|
||||
}
|
||||
|
||||
_GLIBCXX_END_NAMESPACE_VERSION
|
||||
} // namespace __detail
|
||||
} // namespace
|
|
@ -1,491 +0,0 @@
|
|||
// class template regex -*- C++ -*-
|
||||
|
||||
// Copyright (C) 2010-2013 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// Under Section 7 of GPL version 3, you are granted additional
|
||||
// permissions described in the GCC Runtime Library Exception, version
|
||||
// 3.1, as published by the Free Software Foundation.
|
||||
|
||||
// You should have received a copy of the GNU General Public License and
|
||||
// a copy of the GCC Runtime Library Exception along with this program;
|
||||
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
/**
|
||||
* @file bits/regex_nfa.h
|
||||
* This is an internal header file, included by other library headers.
|
||||
* Do not attempt to use it directly. @headername{regex}
|
||||
*/
|
||||
|
||||
namespace std _GLIBCXX_VISIBILITY(default)
|
||||
{
|
||||
namespace __detail
|
||||
{
|
||||
_GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
|
||||
/**
|
||||
* @addtogroup regex-detail
|
||||
* @{
|
||||
*/
|
||||
|
||||
/// Provides a generic facade for a templated match_results.
|
||||
struct _Results
|
||||
{
|
||||
virtual
|
||||
~_Results()
|
||||
{ }
|
||||
virtual void _M_set_pos(int __i, int __j, const _PatternCursor& __p) = 0;
|
||||
virtual void _M_set_matched(int __i, bool __is_matched) = 0;
|
||||
virtual std::unique_ptr<_Results> _M_clone() const = 0;
|
||||
virtual void _M_assign(const _Results& __rhs) = 0;
|
||||
};
|
||||
|
||||
class _Grep_matcher;
|
||||
class _Automaton;
|
||||
|
||||
/// Generic shared pointer to an automaton.
|
||||
typedef std::shared_ptr<_Automaton> _AutomatonPtr;
|
||||
|
||||
/// Base class for, um, automata. Could be an NFA or a DFA. Your choice.
|
||||
class _Automaton
|
||||
{
|
||||
public:
|
||||
typedef unsigned int _SizeT;
|
||||
|
||||
public:
|
||||
virtual
|
||||
~_Automaton() { }
|
||||
|
||||
virtual _SizeT
|
||||
_M_sub_count() const = 0;
|
||||
|
||||
virtual std::unique_ptr<_Grep_matcher>
|
||||
_M_get_matcher(_PatternCursor& __p,
|
||||
_Results& __r,
|
||||
const _AutomatonPtr& __automaton,
|
||||
regex_constants::match_flag_type __flags) = 0;
|
||||
|
||||
#ifdef _GLIBCXX_DEBUG
|
||||
virtual std::ostream&
|
||||
_M_dot(std::ostream& __ostr) const = 0;
|
||||
#endif
|
||||
};
|
||||
|
||||
/// Operation codes that define the type of transitions within the base NFA
|
||||
/// that represents the regular expression.
|
||||
enum _Opcode
|
||||
{
|
||||
_S_opcode_unknown = 0,
|
||||
_S_opcode_alternative = 1,
|
||||
_S_opcode_subexpr_begin = 4,
|
||||
_S_opcode_subexpr_end = 5,
|
||||
_S_opcode_match = 100,
|
||||
_S_opcode_accept = 255
|
||||
};
|
||||
|
||||
/// Tags current state (for subexpr begin/end).
|
||||
typedef std::function<void (const _PatternCursor&, _Results&)> _Tagger;
|
||||
|
||||
/// Start state tag.
|
||||
template<typename _FwdIterT, typename _TraitsT>
|
||||
struct _StartTagger
|
||||
{
|
||||
explicit
|
||||
_StartTagger(int __i)
|
||||
: _M_index(__i)
|
||||
{ }
|
||||
|
||||
void
|
||||
operator()(const _PatternCursor& __pc, _Results& __r)
|
||||
{ __r._M_set_pos(_M_index, 0, __pc); }
|
||||
|
||||
int _M_index;
|
||||
};
|
||||
|
||||
/// End state tag.
|
||||
template<typename _FwdIterT, typename _TraitsT>
|
||||
struct _EndTagger
|
||||
{
|
||||
explicit
|
||||
_EndTagger(int __i)
|
||||
: _M_index(__i)
|
||||
{ }
|
||||
|
||||
void
|
||||
operator()(const _PatternCursor& __pc, _Results& __r)
|
||||
{ __r._M_set_pos(_M_index, 1, __pc); }
|
||||
|
||||
int _M_index;
|
||||
};
|
||||
|
||||
// TODO For now we use an all-in-one comparator. In the future there may be
|
||||
// optimizations based on regex_traits::translate and regex_transform.
|
||||
template<typename _InIterT, typename _TraitsT>
|
||||
struct _Comparator
|
||||
{
|
||||
typedef regex_constants::syntax_option_type _FlagT;
|
||||
typedef typename _TraitsT::char_type _CharT;
|
||||
typedef std::basic_string<_CharT> _StringT;
|
||||
|
||||
_Comparator(_FlagT __flags, const _TraitsT& __traits)
|
||||
: _M_flags(__flags), _M_traits(__traits)
|
||||
{ }
|
||||
|
||||
bool
|
||||
_M_equ(_CharT __a, _CharT __b) const;
|
||||
|
||||
bool
|
||||
_M_le(_CharT __a, _CharT __b) const;
|
||||
|
||||
_FlagT _M_flags;
|
||||
_TraitsT _M_traits;
|
||||
};
|
||||
|
||||
/// Indicates if current state matches cursor current.
|
||||
typedef std::function<bool (const _PatternCursor&)> _Matcher;
|
||||
|
||||
/// Matches any character
|
||||
inline bool
|
||||
_AnyMatcher(const _PatternCursor&)
|
||||
{ return true; }
|
||||
|
||||
/// Matches a single character
|
||||
template<typename _InIterT, typename _TraitsT>
|
||||
struct _CharMatcher
|
||||
: public _Comparator<_InIterT, _TraitsT>
|
||||
{
|
||||
typedef _Comparator<_InIterT, _TraitsT> _BaseT;
|
||||
typedef typename _TraitsT::char_type _CharT;
|
||||
typedef regex_constants::syntax_option_type _FlagT;
|
||||
|
||||
explicit
|
||||
_CharMatcher(_CharT __c, _FlagT __flags, const _TraitsT& __t)
|
||||
: _BaseT(__flags, __t), _M_c(__c)
|
||||
{ }
|
||||
|
||||
bool
|
||||
operator()(const _PatternCursor& __pc) const
|
||||
{
|
||||
typedef const _SpecializedCursor<_InIterT>& _CursorT;
|
||||
_CursorT __c = static_cast<_CursorT>(__pc);
|
||||
return this->_M_equ(__c._M_current(), _M_c);
|
||||
}
|
||||
|
||||
_CharT _M_c;
|
||||
};
|
||||
|
||||
/// Matches a character range (bracket expression)
|
||||
template<typename _InIterT, typename _TraitsT>
|
||||
struct _BracketMatcher
|
||||
: public _Comparator<_InIterT, _TraitsT>
|
||||
{
|
||||
typedef _Comparator<_InIterT, _TraitsT> _BaseT;
|
||||
typedef typename _TraitsT::char_class_type _CharClassT;
|
||||
typedef regex_constants::syntax_option_type _FlagT;
|
||||
typedef typename _TraitsT::char_type _CharT;
|
||||
typedef std::basic_string<_CharT> _StringT;
|
||||
|
||||
explicit
|
||||
_BracketMatcher(bool __is_non_matching,
|
||||
_FlagT __flags,
|
||||
const _TraitsT& __t)
|
||||
: _BaseT(__flags, __t), _M_flags(__flags), _M_traits(__t),
|
||||
_M_is_non_matching(__is_non_matching), _M_class_set(0)
|
||||
{ }
|
||||
|
||||
bool
|
||||
operator()(const _PatternCursor& __pc) const;
|
||||
|
||||
void
|
||||
_M_add_char(_CharT __c)
|
||||
{ _M_char_set.push_back(__c); }
|
||||
|
||||
void
|
||||
_M_add_collating_element(const _StringT& __s)
|
||||
{
|
||||
auto __st = _M_traits.lookup_collatename(&*__s.begin(), &*__s.end());
|
||||
if (__st.empty())
|
||||
__throw_regex_error(regex_constants::error_collate);
|
||||
// TODO: digraph
|
||||
_M_char_set.push_back(__st[0]);
|
||||
}
|
||||
|
||||
void
|
||||
_M_add_equivalence_class(const _StringT& __s)
|
||||
{
|
||||
_M_add_character_class(
|
||||
_M_traits.transform_primary(&*__s.begin(), &*__s.end()));
|
||||
}
|
||||
|
||||
void
|
||||
_M_add_character_class(const _StringT& __s)
|
||||
{
|
||||
auto __st = _M_traits.lookup_classname(
|
||||
&*__s.begin(), &*__s.end(), (_M_flags & regex_constants::icase));
|
||||
if (__st == 0)
|
||||
__throw_regex_error(regex_constants::error_ctype);
|
||||
_M_class_set |= __st;
|
||||
}
|
||||
|
||||
void
|
||||
_M_make_range(_CharT __l, _CharT __r)
|
||||
{
|
||||
if (!this->_M_le(__l, __r))
|
||||
__throw_regex_error(regex_constants::error_range);
|
||||
_M_range_set.push_back(make_pair(__l, __r));
|
||||
}
|
||||
|
||||
_FlagT _M_flags;
|
||||
_TraitsT _M_traits;
|
||||
bool _M_is_non_matching;
|
||||
std::vector<_CharT> _M_char_set;
|
||||
std::vector<pair<_CharT, _CharT>> _M_range_set;
|
||||
_CharClassT _M_class_set;
|
||||
};
|
||||
|
||||
/// Identifies a state in the NFA.
|
||||
typedef int _StateIdT;
|
||||
|
||||
/// The special case in which a state identifier is not an index.
|
||||
static const _StateIdT _S_invalid_state_id = -1;
|
||||
|
||||
|
||||
/**
|
||||
* @brief struct _State
|
||||
*
|
||||
* An individual state in an NFA
|
||||
*
|
||||
* In this case a "state" is an entry in the NFA definition coupled
|
||||
* with its outgoing transition(s). All states have a single outgoing
|
||||
* transition, except for accepting states (which have no outgoing
|
||||
* transitions) and alt states, which have two outgoing transitions.
|
||||
*/
|
||||
struct _State
|
||||
{
|
||||
typedef int _OpcodeT;
|
||||
|
||||
_OpcodeT _M_opcode; // type of outgoing transition
|
||||
_StateIdT _M_next; // outgoing transition
|
||||
_StateIdT _M_alt; // for _S_opcode_alternative
|
||||
unsigned int _M_subexpr; // for _S_opcode_subexpr_*
|
||||
_Tagger _M_tagger; // for _S_opcode_subexpr_*
|
||||
_Matcher _M_matches; // for _S_opcode_match
|
||||
|
||||
explicit _State(_OpcodeT __opcode)
|
||||
: _M_opcode(__opcode), _M_next(_S_invalid_state_id)
|
||||
{ }
|
||||
|
||||
_State(const _Matcher& __m)
|
||||
: _M_opcode(_S_opcode_match), _M_next(_S_invalid_state_id), _M_matches(__m)
|
||||
{ }
|
||||
|
||||
_State(_OpcodeT __opcode, unsigned int __s, const _Tagger& __t)
|
||||
: _M_opcode(__opcode), _M_next(_S_invalid_state_id), _M_subexpr(__s),
|
||||
_M_tagger(__t)
|
||||
{ }
|
||||
|
||||
_State(_StateIdT __next, _StateIdT __alt)
|
||||
: _M_opcode(_S_opcode_alternative), _M_next(__next), _M_alt(__alt)
|
||||
{ }
|
||||
|
||||
#ifdef _GLIBCXX_DEBUG
|
||||
std::ostream&
|
||||
_M_print(std::ostream& ostr) const;
|
||||
|
||||
// Prints graphviz dot commands for state.
|
||||
std::ostream&
|
||||
_M_dot(std::ostream& __ostr, _StateIdT __id) const;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
/// The Grep Matcher works on sets of states. Here are sets of states.
|
||||
typedef std::set<_StateIdT> _StateSet;
|
||||
|
||||
/**
|
||||
* @brief struct _Nfa
|
||||
*
|
||||
* A collection of all states making up an NFA.
|
||||
*
|
||||
* An NFA is a 4-tuple M = (K, S, s, F), where
|
||||
* K is a finite set of states,
|
||||
* S is the alphabet of the NFA,
|
||||
* s is the initial state,
|
||||
* F is a set of final (accepting) states.
|
||||
*
|
||||
* This NFA class is templated on S, a type that will hold values of the
|
||||
* underlying alphabet (without regard to semantics of that alphabet). The
|
||||
* other elements of the tuple are generated during construction of the NFA
|
||||
* and are available through accessor member functions.
|
||||
*/
|
||||
class _Nfa
|
||||
: public _Automaton, public std::vector<_State>
|
||||
{
|
||||
public:
|
||||
typedef _State _StateT;
|
||||
typedef unsigned int _SizeT;
|
||||
typedef regex_constants::syntax_option_type _FlagT;
|
||||
|
||||
_Nfa(_FlagT __f)
|
||||
: _M_flags(__f), _M_start_state(0), _M_subexpr_count(0),
|
||||
// TODO: BFS by default. Your choice. Need to be set by the compiler.
|
||||
_M_has_back_ref(false)
|
||||
{ }
|
||||
|
||||
~_Nfa()
|
||||
{ }
|
||||
|
||||
_FlagT
|
||||
_M_options() const
|
||||
{ return _M_flags; }
|
||||
|
||||
_StateIdT
|
||||
_M_start() const
|
||||
{ return _M_start_state; }
|
||||
|
||||
const _StateSet&
|
||||
_M_final_states() const
|
||||
{ return _M_accepting_states; }
|
||||
|
||||
_SizeT
|
||||
_M_sub_count() const
|
||||
{ return _M_subexpr_count; }
|
||||
|
||||
_StateIdT
|
||||
_M_insert_accept()
|
||||
{
|
||||
this->push_back(_StateT(_S_opcode_accept));
|
||||
_M_accepting_states.insert(this->size()-1);
|
||||
return this->size()-1;
|
||||
}
|
||||
|
||||
_StateIdT
|
||||
_M_insert_alt(_StateIdT __next, _StateIdT __alt)
|
||||
{
|
||||
this->push_back(_StateT(__next, __alt));
|
||||
return this->size()-1;
|
||||
}
|
||||
|
||||
_StateIdT
|
||||
_M_insert_matcher(_Matcher __m)
|
||||
{
|
||||
this->push_back(_StateT(__m));
|
||||
return this->size()-1;
|
||||
}
|
||||
|
||||
_StateIdT
|
||||
_M_insert_subexpr_begin(const _Tagger& __t)
|
||||
{
|
||||
this->push_back(_StateT(_S_opcode_subexpr_begin, _M_subexpr_count++,
|
||||
__t));
|
||||
return this->size()-1;
|
||||
}
|
||||
|
||||
_StateIdT
|
||||
_M_insert_subexpr_end(unsigned int __i, const _Tagger& __t)
|
||||
{
|
||||
this->push_back(_StateT(_S_opcode_subexpr_end, __i, __t));
|
||||
return this->size()-1;
|
||||
}
|
||||
|
||||
void
|
||||
_M_set_back_ref(bool __b)
|
||||
{ _M_has_back_ref = __b; }
|
||||
|
||||
std::unique_ptr<_Grep_matcher>
|
||||
_M_get_matcher(_PatternCursor& __p,
|
||||
_Results& __r,
|
||||
const _AutomatonPtr& __automaton,
|
||||
regex_constants::match_flag_type __flags);
|
||||
|
||||
#ifdef _GLIBCXX_DEBUG
|
||||
std::ostream&
|
||||
_M_dot(std::ostream& __ostr) const;
|
||||
#endif
|
||||
|
||||
private:
|
||||
_FlagT _M_flags;
|
||||
_StateIdT _M_start_state;
|
||||
_StateSet _M_accepting_states;
|
||||
_SizeT _M_subexpr_count;
|
||||
bool _M_has_back_ref;
|
||||
};
|
||||
|
||||
/// Describes a sequence of one or more %_State, its current start
|
||||
/// and end(s). This structure contains fragments of an NFA during
|
||||
/// construction.
|
||||
class _StateSeq
|
||||
{
|
||||
public:
|
||||
// Constructs a single-node sequence
|
||||
_StateSeq(_Nfa& __ss, _StateIdT __s, _StateIdT __e = _S_invalid_state_id)
|
||||
: _M_nfa(__ss), _M_start(__s), _M_end1(__s), _M_end2(__e)
|
||||
{ }
|
||||
// Constructs a split sequence from two other sequencces
|
||||
_StateSeq(const _StateSeq& __e1, const _StateSeq& __e2)
|
||||
: _M_nfa(__e1._M_nfa),
|
||||
_M_start(_M_nfa._M_insert_alt(__e1._M_start, __e2._M_start)),
|
||||
_M_end1(__e1._M_end1), _M_end2(__e2._M_end1)
|
||||
{ }
|
||||
|
||||
// Constructs a split sequence from a single sequence
|
||||
_StateSeq(const _StateSeq& __e, _StateIdT __id)
|
||||
: _M_nfa(__e._M_nfa),
|
||||
_M_start(_M_nfa._M_insert_alt(__id, __e._M_start)),
|
||||
_M_end1(__id), _M_end2(__e._M_end1)
|
||||
{ }
|
||||
|
||||
// Constructs a copy of a %_StateSeq
|
||||
_StateSeq(const _StateSeq& __rhs)
|
||||
: _M_nfa(__rhs._M_nfa), _M_start(__rhs._M_start),
|
||||
_M_end1(__rhs._M_end1), _M_end2(__rhs._M_end2)
|
||||
{ }
|
||||
|
||||
|
||||
_StateSeq& operator=(const _StateSeq& __rhs);
|
||||
|
||||
_StateIdT
|
||||
_M_front() const
|
||||
{ return _M_start; }
|
||||
|
||||
// Extends a sequence by one.
|
||||
void
|
||||
_M_push_back(_StateIdT __id);
|
||||
|
||||
// Extends and maybe joins a sequence.
|
||||
void
|
||||
_M_append(_StateIdT __id);
|
||||
|
||||
void
|
||||
_M_append(_StateSeq& __rhs);
|
||||
|
||||
// Clones an entire sequence.
|
||||
_StateIdT
|
||||
_M_clone();
|
||||
|
||||
private:
|
||||
_Nfa& _M_nfa;
|
||||
_StateIdT _M_start;
|
||||
_StateIdT _M_end1;
|
||||
_StateIdT _M_end2;
|
||||
|
||||
};
|
||||
|
||||
//@} regex-detail
|
||||
_GLIBCXX_END_NAMESPACE_VERSION
|
||||
} // namespace __detail
|
||||
} // namespace std
|
||||
|
||||
#include <bits/regex_nfa.tcc>
|
||||
|
|
@ -1,232 +0,0 @@
|
|||
// class template regex -*- C++ -*-
|
||||
|
||||
// Copyright (C) 2010-2013 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// Under Section 7 of GPL version 3, you are granted additional
|
||||
// permissions described in the GCC Runtime Library Exception, version
|
||||
// 3.1, as published by the Free Software Foundation.
|
||||
|
||||
// You should have received a copy of the GNU General Public License and
|
||||
// a copy of the GCC Runtime Library Exception along with this program;
|
||||
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
/**
|
||||
* @file bits/regex_nfa.tcc
|
||||
* This is an internal header file, included by other library headers.
|
||||
* Do not attempt to use it directly. @headername{regex}
|
||||
*/
|
||||
#include <regex>
|
||||
|
||||
namespace std _GLIBCXX_VISIBILITY(default)
|
||||
{
|
||||
namespace __detail
|
||||
{
|
||||
_GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
|
||||
template<typename _InIterT, typename _TraitsT>
|
||||
bool _BracketMatcher<_InIterT, _TraitsT>::
|
||||
operator()(const _PatternCursor& __pc) const
|
||||
{
|
||||
typedef const _SpecializedCursor<_InIterT>& _CursorT;
|
||||
_CursorT __c = static_cast<_CursorT>(__pc);
|
||||
_CharT __ch = __c._M_current();
|
||||
bool __ret = false;
|
||||
for (auto __c : _M_char_set)
|
||||
if (this->_M_equ(__c, __ch))
|
||||
{
|
||||
__ret = true;
|
||||
break;
|
||||
}
|
||||
if (!__ret && _M_traits.isctype(__ch, _M_class_set))
|
||||
__ret = true;
|
||||
else
|
||||
{
|
||||
for (auto& __it : _M_range_set)
|
||||
if (this->_M_le(__it.first, __ch) && this->_M_le(__ch, __it.second))
|
||||
{
|
||||
__ret = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (_M_is_non_matching)
|
||||
__ret = !__ret;
|
||||
return __ret;
|
||||
}
|
||||
|
||||
template<typename _InIterT, typename _TraitsT>
|
||||
bool _Comparator<_InIterT, _TraitsT>::
|
||||
_M_equ(_CharT __a, _CharT __b) const
|
||||
{
|
||||
if (_M_flags & regex_constants::icase)
|
||||
return _M_traits.translate_nocase(__a)
|
||||
== _M_traits.translate_nocase(__b);
|
||||
if (_M_flags & regex_constants::collate)
|
||||
return _M_traits.translate(__a) == _M_traits.translate(__b);
|
||||
return __a == __b;
|
||||
}
|
||||
|
||||
template<typename _InIterT, typename _TraitsT>
|
||||
bool _Comparator<_InIterT, _TraitsT>::
|
||||
_M_le(_CharT __a, _CharT __b) const
|
||||
{
|
||||
_StringT __str1 = _StringT(1,
|
||||
_M_flags & regex_constants::icase
|
||||
? _M_traits.translate_nocase(__a)
|
||||
: _M_traits.translate(__a));
|
||||
_StringT __str2 = _StringT(1,
|
||||
_M_flags & regex_constants::icase
|
||||
? _M_traits.translate_nocase(__b)
|
||||
: _M_traits.translate(__b));
|
||||
return _M_traits.transform(__str1.begin(), __str1.end())
|
||||
<= _M_traits.transform(__str2.begin(), __str2.end());
|
||||
}
|
||||
|
||||
#ifdef _GLIBCXX_DEBUG
|
||||
inline std::ostream& _State::
|
||||
_M_print(std::ostream& ostr) const
|
||||
{
|
||||
switch (_M_opcode)
|
||||
{
|
||||
case _S_opcode_alternative:
|
||||
ostr << "alt next=" << _M_next << " alt=" << _M_alt;
|
||||
break;
|
||||
case _S_opcode_subexpr_begin:
|
||||
ostr << "subexpr begin next=" << _M_next << " index=" << _M_subexpr;
|
||||
break;
|
||||
case _S_opcode_subexpr_end:
|
||||
ostr << "subexpr end next=" << _M_next << " index=" << _M_subexpr;
|
||||
break;
|
||||
case _S_opcode_match:
|
||||
ostr << "match next=" << _M_next;
|
||||
break;
|
||||
case _S_opcode_accept:
|
||||
ostr << "accept next=" << _M_next;
|
||||
break;
|
||||
default:
|
||||
ostr << "unknown next=" << _M_next;
|
||||
break;
|
||||
}
|
||||
return ostr;
|
||||
}
|
||||
|
||||
// Prints graphviz dot commands for state.
|
||||
inline std::ostream& _State::
|
||||
_M_dot(std::ostream& __ostr, _StateIdT __id) const
|
||||
{
|
||||
switch (_M_opcode)
|
||||
{
|
||||
case _S_opcode_alternative:
|
||||
__ostr << __id << " [label=\"" << __id << "\\nALT\"];\n"
|
||||
<< __id << " -> " << _M_next
|
||||
<< " [label=\"epsilon\", tailport=\"s\"];\n"
|
||||
<< __id << " -> " << _M_alt
|
||||
<< " [label=\"epsilon\", tailport=\"n\"];\n";
|
||||
break;
|
||||
case _S_opcode_subexpr_begin:
|
||||
__ostr << __id << " [label=\"" << __id << "\\nSBEGIN "
|
||||
<< _M_subexpr << "\"];\n"
|
||||
<< __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
|
||||
break;
|
||||
case _S_opcode_subexpr_end:
|
||||
__ostr << __id << " [label=\"" << __id << "\\nSEND "
|
||||
<< _M_subexpr << "\"];\n"
|
||||
<< __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
|
||||
break;
|
||||
case _S_opcode_match:
|
||||
__ostr << __id << " [label=\"" << __id << "\\nMATCH\"];\n"
|
||||
<< __id << " -> " << _M_next << " [label=\"<match>\"];\n";
|
||||
break;
|
||||
case _S_opcode_accept:
|
||||
__ostr << __id << " [label=\"" << __id << "\\nACC\"];\n" ;
|
||||
break;
|
||||
default:
|
||||
__ostr << __id << " [label=\"" << __id << "\\nUNK\"];\n"
|
||||
<< __id << " -> " << _M_next << " [label=\"?\"];\n";
|
||||
break;
|
||||
}
|
||||
return __ostr;
|
||||
}
|
||||
|
||||
inline std::ostream& _Nfa::
|
||||
_M_dot(std::ostream& __ostr) const
|
||||
{
|
||||
__ostr << "digraph _Nfa {\n"
|
||||
<< " rankdir=LR;\n";
|
||||
for (unsigned int __i = 0; __i < this->size(); ++__i)
|
||||
{ this->at(__i)._M_dot(__ostr, __i); }
|
||||
__ostr << "}\n";
|
||||
return __ostr;
|
||||
}
|
||||
#endif
|
||||
|
||||
inline _StateSeq& _StateSeq::
|
||||
operator=(const _StateSeq& __rhs)
|
||||
{
|
||||
_M_start = __rhs._M_start;
|
||||
_M_end1 = __rhs._M_end1;
|
||||
_M_end2 = __rhs._M_end2;
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline void _StateSeq::
|
||||
_M_push_back(_StateIdT __id)
|
||||
{
|
||||
if (_M_end1 != _S_invalid_state_id)
|
||||
_M_nfa[_M_end1]._M_next = __id;
|
||||
_M_end1 = __id;
|
||||
}
|
||||
|
||||
inline void _StateSeq::
|
||||
_M_append(_StateIdT __id)
|
||||
{
|
||||
if (_M_end2 != _S_invalid_state_id)
|
||||
{
|
||||
if (_M_end2 == _M_end1)
|
||||
_M_nfa[_M_end2]._M_alt = __id;
|
||||
else
|
||||
_M_nfa[_M_end2]._M_next = __id;
|
||||
_M_end2 = _S_invalid_state_id;
|
||||
}
|
||||
if (_M_end1 != _S_invalid_state_id)
|
||||
_M_nfa[_M_end1]._M_next = __id;
|
||||
_M_end1 = __id;
|
||||
}
|
||||
|
||||
inline void _StateSeq::
|
||||
_M_append(_StateSeq& __rhs)
|
||||
{
|
||||
if (_M_end2 != _S_invalid_state_id)
|
||||
{
|
||||
if (_M_end2 == _M_end1)
|
||||
_M_nfa[_M_end2]._M_alt = __rhs._M_start;
|
||||
else
|
||||
_M_nfa[_M_end2]._M_next = __rhs._M_start;
|
||||
_M_end2 = _S_invalid_state_id;
|
||||
}
|
||||
if (__rhs._M_end2 != _S_invalid_state_id)
|
||||
_M_end2 = __rhs._M_end2;
|
||||
if (_M_end1 != _S_invalid_state_id)
|
||||
_M_nfa[_M_end1]._M_next = __rhs._M_start;
|
||||
_M_end1 = __rhs._M_end1;
|
||||
}
|
||||
|
||||
// @todo implement this function.
|
||||
inline _StateIdT _StateSeq::
|
||||
_M_clone()
|
||||
{ return 0; }
|
||||
|
||||
_GLIBCXX_END_NAMESPACE_VERSION
|
||||
} // namespace __detail
|
||||
} // namespace
|
|
@ -54,13 +54,11 @@
|
|||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <bits/range_access.h>
|
||||
#include <bits/regex_constants.h>
|
||||
#include <bits/regex_error.h>
|
||||
#include <bits/regex_cursor.h>
|
||||
#include <bits/regex_nfa.h>
|
||||
#include <bits/regex_automaton.h>
|
||||
#include <bits/regex_compiler.h>
|
||||
#include <bits/regex_grep_matcher.h>
|
||||
#include <bits/regex_executor.h>
|
||||
#include <bits/regex.h>
|
||||
|
||||
#endif // C++11
|
||||
|
|
|
@ -38,12 +38,10 @@ template<typename _Bi_iter, typename _Alloc,
|
|||
regex_constants::match_flag_type __flags
|
||||
= regex_constants::match_default)
|
||||
{
|
||||
__detail::_AutomatonPtr __a = __re._M_get_automaton();
|
||||
__detail::_Automaton::_SizeT __sz = __a->_M_sub_count();
|
||||
__detail::_SpecializedCursor<_Bi_iter> __cs(__s, __e);
|
||||
__detail::_SpecializedResults<_Bi_iter, _Alloc> __r(__sz, __cs, __m);
|
||||
VERIFY( dynamic_cast<__detail::_DFSMatcher *>(
|
||||
&*__a->_M_get_matcher(__cs, __r, __a, __flags)) != nullptr );
|
||||
VERIFY( (dynamic_cast
|
||||
<__detail::_DFSExecutor<_Bi_iter, _Alloc, _Ch_type, _Rx_traits>*>
|
||||
(&*__detail::__get_executor(__s, __e, __m, __re, __flags))
|
||||
!= nullptr) );
|
||||
}
|
||||
|
||||
void
|
||||
|
|
Loading…
Reference in New Issue