regex_automaton.h: _S_opcode_backref.
2013-08-18 Tim Shen <timshen91@gmail.com> * include/bits/regex_automaton.h: _S_opcode_backref. * include/bits/regex_automaton.tcc: Backref automaton support. * include/bits/regex_compiler.tcc: Parsing support. * include/bits/regex_executor.h: Add _M_traits for _DFSExecutor. * include/bits/regex_executor.tcc: Add _S_opcode_backref support. * testsuite/28_regex/algorithms/regex_match/ecma/string_backref.cc: New. From-SVN: r201825
This commit is contained in:
parent
1d5755efee
commit
ce645eb091
|
@ -1,3 +1,12 @@
|
||||||
|
2013-08-18 Tim Shen <timshen91@gmail.com>
|
||||||
|
|
||||||
|
* include/bits/regex_automaton.h: _S_opcode_backref.
|
||||||
|
* include/bits/regex_automaton.tcc: Backref automaton support.
|
||||||
|
* include/bits/regex_compiler.tcc: Parsing support.
|
||||||
|
* include/bits/regex_executor.h: Add _M_traits for _DFSExecutor.
|
||||||
|
* include/bits/regex_executor.tcc: Add _S_opcode_backref support.
|
||||||
|
* testsuite/28_regex/algorithms/regex_match/ecma/string_backref.cc: New.
|
||||||
|
|
||||||
2013-08-16 Tim Shen <timshen91@gmail.com>
|
2013-08-16 Tim Shen <timshen91@gmail.com>
|
||||||
|
|
||||||
* include/bits/regex.h (regex_traits<>::transform_primary):
|
* include/bits/regex.h (regex_traits<>::transform_primary):
|
||||||
|
|
|
@ -53,6 +53,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
{
|
{
|
||||||
_S_opcode_unknown = 0,
|
_S_opcode_unknown = 0,
|
||||||
_S_opcode_alternative = 1,
|
_S_opcode_alternative = 1,
|
||||||
|
_S_opcode_backref = 2,
|
||||||
_S_opcode_subexpr_begin = 4,
|
_S_opcode_subexpr_begin = 4,
|
||||||
_S_opcode_subexpr_end = 5,
|
_S_opcode_subexpr_end = 5,
|
||||||
_S_opcode_match = 100,
|
_S_opcode_match = 100,
|
||||||
|
@ -66,11 +67,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
typedef int _OpcodeT;
|
typedef int _OpcodeT;
|
||||||
typedef _Matcher<_CharT> _MatcherT;
|
typedef _Matcher<_CharT> _MatcherT;
|
||||||
|
|
||||||
_OpcodeT _M_opcode; // type of outgoing transition
|
_OpcodeT _M_opcode; // type of outgoing transition
|
||||||
_StateIdT _M_next; // outgoing transition
|
_StateIdT _M_next; // outgoing transition
|
||||||
_StateIdT _M_alt; // for _S_opcode_alternative
|
union // Since they are mutual exclusive.
|
||||||
unsigned int _M_subexpr; // for _S_opcode_subexpr_*
|
{
|
||||||
_MatcherT _M_matches; // for _S_opcode_match
|
_StateIdT _M_alt; // for _S_opcode_alternative
|
||||||
|
unsigned int _M_subexpr; // for _S_opcode_subexpr_*
|
||||||
|
unsigned int _M_backref_index; // for _S_opcode_backref
|
||||||
|
};
|
||||||
|
_MatcherT _M_matches; // for _S_opcode_match
|
||||||
|
|
||||||
explicit _State(_OpcodeT __opcode)
|
explicit _State(_OpcodeT __opcode)
|
||||||
: _M_opcode(__opcode), _M_next(_S_invalid_state_id)
|
: _M_opcode(__opcode), _M_next(_S_invalid_state_id)
|
||||||
|
@ -82,8 +87,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
_State(_OpcodeT __opcode, unsigned __index)
|
_State(_OpcodeT __opcode, unsigned __index)
|
||||||
: _M_opcode(__opcode), _M_next(_S_invalid_state_id), _M_subexpr(__index)
|
: _M_opcode(__opcode), _M_next(_S_invalid_state_id)
|
||||||
{ }
|
{
|
||||||
|
if (__opcode == _S_opcode_subexpr_begin
|
||||||
|
|| __opcode == _S_opcode_subexpr_end)
|
||||||
|
_M_subexpr = __index;
|
||||||
|
else if (__opcode == _S_opcode_backref)
|
||||||
|
_M_backref_index = __index;
|
||||||
|
}
|
||||||
|
|
||||||
_State(_StateIdT __next, _StateIdT __alt)
|
_State(_StateIdT __next, _StateIdT __alt)
|
||||||
: _M_opcode(_S_opcode_alternative), _M_next(__next), _M_alt(__alt)
|
: _M_opcode(_S_opcode_alternative), _M_next(__next), _M_alt(__alt)
|
||||||
|
@ -174,7 +185,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
_M_insert_subexpr_begin()
|
_M_insert_subexpr_begin()
|
||||||
{
|
{
|
||||||
auto __id = _M_subexpr_count++;
|
auto __id = _M_subexpr_count++;
|
||||||
_M_paren_stack.push(__id);
|
_M_paren_stack.push_back(__id);
|
||||||
this->push_back(_StateT(_S_opcode_subexpr_begin, __id));
|
this->push_back(_StateT(_S_opcode_subexpr_begin, __id));
|
||||||
return this->size()-1;
|
return this->size()-1;
|
||||||
}
|
}
|
||||||
|
@ -182,26 +193,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
_StateIdT
|
_StateIdT
|
||||||
_M_insert_subexpr_end()
|
_M_insert_subexpr_end()
|
||||||
{
|
{
|
||||||
this->push_back(_StateT(_S_opcode_subexpr_end, _M_paren_stack.top()));
|
this->push_back(_StateT(_S_opcode_subexpr_end, _M_paren_stack.back()));
|
||||||
_M_paren_stack.pop();
|
_M_paren_stack.pop_back();
|
||||||
return this->size()-1;
|
return this->size()-1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
_StateIdT
|
||||||
_M_set_backref(bool __b)
|
_M_insert_backref(unsigned int __index);
|
||||||
{ _M_has_backref = __b; }
|
|
||||||
|
|
||||||
#ifdef _GLIBCXX_DEBUG
|
#ifdef _GLIBCXX_DEBUG
|
||||||
std::ostream&
|
std::ostream&
|
||||||
_M_dot(std::ostream& __ostr) const;
|
_M_dot(std::ostream& __ostr) const;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
_FlagT _M_flags;
|
_FlagT _M_flags;
|
||||||
_StateIdT _M_start_state;
|
_StateIdT _M_start_state;
|
||||||
_StateSet _M_accepting_states;
|
_StateSet _M_accepting_states;
|
||||||
_SizeT _M_subexpr_count;
|
_SizeT _M_subexpr_count;
|
||||||
bool _M_has_backref;
|
bool _M_has_backref;
|
||||||
std::stack<unsigned int> _M_paren_stack;
|
std::vector<unsigned int> _M_paren_stack;
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Describes a sequence of one or more %_State, its current start
|
/// Describes a sequence of one or more %_State, its current start
|
||||||
|
|
|
@ -50,6 +50,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
case _S_opcode_subexpr_end:
|
case _S_opcode_subexpr_end:
|
||||||
ostr << "subexpr end next=" << _M_next << " index=" << _M_subexpr;
|
ostr << "subexpr end next=" << _M_next << " index=" << _M_subexpr;
|
||||||
break;
|
break;
|
||||||
|
case _S_opcode_backref:
|
||||||
|
ostr << "backref next=" << _M_next << " index=" << _M_backref_index;
|
||||||
|
break;
|
||||||
case _S_opcode_match:
|
case _S_opcode_match:
|
||||||
ostr << "match next=" << _M_next;
|
ostr << "match next=" << _M_next;
|
||||||
break;
|
break;
|
||||||
|
@ -87,6 +90,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
<< _M_subexpr << "\"];\n"
|
<< _M_subexpr << "\"];\n"
|
||||||
<< __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
|
<< __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
|
||||||
break;
|
break;
|
||||||
|
case _S_opcode_backref:
|
||||||
|
__ostr << __id << " [label=\"" << __id << "\\nBACKREF "
|
||||||
|
<< _M_subexpr << "\"];\n"
|
||||||
|
<< __id << " -> " << _M_next << " [label=\"<match>\"];\n";
|
||||||
|
break;
|
||||||
case _S_opcode_match:
|
case _S_opcode_match:
|
||||||
__ostr << __id << " [label=\"" << __id << "\\nMATCH\"];\n"
|
__ostr << __id << " [label=\"" << __id << "\\nMATCH\"];\n"
|
||||||
<< __id << " -> " << _M_next << " [label=\"<match>\"];\n";
|
<< __id << " -> " << _M_next << " [label=\"<match>\"];\n";
|
||||||
|
@ -115,6 +123,27 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
template<typename _CharT, typename _TraitsT>
|
||||||
|
_StateIdT _NFA<_CharT, _TraitsT>::
|
||||||
|
_M_insert_backref(unsigned int __index)
|
||||||
|
{
|
||||||
|
// To figure out whether a backref is valid, a stack is used to store
|
||||||
|
// unfinished sub-expressions. For example, when parsing
|
||||||
|
// "(a(b)(c\\1(d)))" at '\\1', _M_subexpr_count is 3, indicating that 3
|
||||||
|
// sub expressions are parsed or partially parsed(in the stack), aka,
|
||||||
|
// "(a..", "(b)" and "(c..").
|
||||||
|
// _M_paren_stack is {1, 3}, for incomplete "(a.." and "(c..". At this
|
||||||
|
// time, "\\2" is valid, but "\\1" and "\\3" are not.
|
||||||
|
if (__index >= _M_subexpr_count)
|
||||||
|
__throw_regex_error(regex_constants::error_backref);
|
||||||
|
for (auto __it : _M_paren_stack)
|
||||||
|
if (__index == __it)
|
||||||
|
__throw_regex_error(regex_constants::error_backref);
|
||||||
|
_M_has_backref = true;
|
||||||
|
this->push_back(_StateT(_S_opcode_backref, __index));
|
||||||
|
return this->size()-1;
|
||||||
|
}
|
||||||
|
|
||||||
template<typename _CharT, typename _TraitsT>
|
template<typename _CharT, typename _TraitsT>
|
||||||
_StateSeq<_CharT, _TraitsT>& _StateSeq<_CharT, _TraitsT>::
|
_StateSeq<_CharT, _TraitsT>& _StateSeq<_CharT, _TraitsT>::
|
||||||
operator=(const _StateSeq& __rhs)
|
operator=(const _StateSeq& __rhs)
|
||||||
|
|
|
@ -745,8 +745,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
if (_M_match_token(_ScannerT::_S_token_backref))
|
if (_M_match_token(_ScannerT::_S_token_backref))
|
||||||
{
|
{
|
||||||
// __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value);
|
// __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value);
|
||||||
_M_state_store._M_set_backref(true);
|
_M_stack.push(_StateSeqT(_M_state_store, _M_state_store.
|
||||||
//return true;
|
_M_insert_backref(_M_cur_int_value(10))));
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
|
if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
|
||||||
{
|
{
|
||||||
|
|
|
@ -82,10 +82,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
__it.matched = false;
|
__it.matched = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
_BiIter _M_current;
|
_BiIter _M_current;
|
||||||
_BiIter _M_end;
|
_BiIter _M_end;
|
||||||
_ResultsT& _M_results;
|
_ResultsT& _M_results;
|
||||||
_FlagT _M_flags;
|
_FlagT _M_flags;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename _BiIter, typename _Alloc,
|
template<typename _BiIter, typename _Alloc,
|
||||||
|
@ -96,16 +96,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
public:
|
public:
|
||||||
typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
|
typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
|
||||||
typedef _NFA<_CharT, _TraitsT> _RegexT;
|
typedef _NFA<_CharT, _TraitsT> _RegexT;
|
||||||
typedef typename _BaseT::_ResultsT _ResultsT;
|
typedef typename _BaseT::_ResultsT _ResultsT;
|
||||||
typedef regex_constants::match_flag_type _FlagT;
|
typedef regex_constants::match_flag_type _FlagT;
|
||||||
|
|
||||||
_DFSExecutor(_BiIter __begin,
|
_DFSExecutor(_BiIter __begin,
|
||||||
_BiIter __end,
|
_BiIter __end,
|
||||||
_ResultsT& __results,
|
_ResultsT& __results,
|
||||||
const _RegexT& __nfa,
|
const _RegexT& __nfa,
|
||||||
_FlagT __flags)
|
_FlagT __flags)
|
||||||
: _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count()),
|
: _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count()),
|
||||||
_M_nfa(__nfa)
|
_M_traits(_TraitsT()), _M_nfa(__nfa)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
bool
|
bool
|
||||||
|
@ -121,6 +121,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
bool
|
bool
|
||||||
_M_dfs(_StateIdT __i);
|
_M_dfs(_StateIdT __i);
|
||||||
|
|
||||||
|
_TraitsT _M_traits;
|
||||||
const _RegexT& _M_nfa;
|
const _RegexT& _M_nfa;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -63,8 +63,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
__ret = _M_dfs<__match_mode>(__state._M_next);
|
__ret = _M_dfs<__match_mode>(__state._M_next);
|
||||||
break;
|
break;
|
||||||
case _S_opcode_subexpr_end:
|
case _S_opcode_subexpr_end:
|
||||||
__ret = _M_dfs<__match_mode>(__state._M_next);
|
|
||||||
__results.at(__state._M_subexpr).second = __current;
|
__results.at(__state._M_subexpr).second = __current;
|
||||||
|
__results.at(__state._M_subexpr).matched = true;
|
||||||
|
__ret = _M_dfs<__match_mode>(__state._M_next);
|
||||||
__results.at(__state._M_subexpr).matched = __ret;
|
__results.at(__state._M_subexpr).matched = __ret;
|
||||||
break;
|
break;
|
||||||
case _S_opcode_match:
|
case _S_opcode_match:
|
||||||
|
@ -75,6 +76,30 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
--__current;
|
--__current;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
// First fetch the matched result from __results as __submatch;
|
||||||
|
// then compare it with
|
||||||
|
// (__current, __current + (__submatch.second - __submatch.first))
|
||||||
|
// If matched, keep going; else just return to try another state.
|
||||||
|
case _S_opcode_backref:
|
||||||
|
{
|
||||||
|
auto& __submatch = __results.at(__state._M_backref_index);
|
||||||
|
if (!__submatch.matched)
|
||||||
|
break;
|
||||||
|
auto __last = __current;
|
||||||
|
for (auto __tmp = __submatch.first;
|
||||||
|
__last != __end && __tmp != __submatch.second;
|
||||||
|
++__tmp)
|
||||||
|
++__last;
|
||||||
|
if (_M_traits.transform(__submatch.first, __submatch.second)
|
||||||
|
== _M_traits.transform(__current, __last))
|
||||||
|
{
|
||||||
|
auto __backup = __current;
|
||||||
|
__current = __last;
|
||||||
|
__ret = _M_dfs<__match_mode>(__state._M_next);
|
||||||
|
__current = __backup;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
case _S_opcode_accept:
|
case _S_opcode_accept:
|
||||||
if (__match_mode)
|
if (__match_mode)
|
||||||
__ret = __current == __end;
|
__ret = __current == __end;
|
||||||
|
|
|
@ -0,0 +1,78 @@
|
||||||
|
// { dg-options "-std=gnu++11" }
|
||||||
|
|
||||||
|
//
|
||||||
|
// 2013-08-10 Tim Shen <timshen91@gmail.com>
|
||||||
|
//
|
||||||
|
// Copyright (C) 2013 Free Software Foundation, Inc.
|
||||||
|
//
|
||||||
|
// This file is part of the GNU ISO C++ Library. This library is free
|
||||||
|
// software; you can redistribute it and/or modify it under the
|
||||||
|
// terms of the GNU General Public License as published by the
|
||||||
|
// Free Software Foundation; either version 3, or (at your option)
|
||||||
|
// any later version.
|
||||||
|
//
|
||||||
|
// This library is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU General Public License along
|
||||||
|
// with this library; see the file COPYING3. If not see
|
||||||
|
// <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
// 28.11.2 regex_match
|
||||||
|
// Tests ECMAScript back-refernce against a std::string.
|
||||||
|
|
||||||
|
#include <regex>
|
||||||
|
#include <testsuite_hooks.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
void
|
||||||
|
test01()
|
||||||
|
{
|
||||||
|
bool test __attribute__((unused)) = true;
|
||||||
|
|
||||||
|
regex re("([A-Z])\\1*");
|
||||||
|
smatch m;
|
||||||
|
{
|
||||||
|
string s = "AAAA";
|
||||||
|
regex_match(s, m, re);
|
||||||
|
VERIFY( m[0].matched );
|
||||||
|
VERIFY( m[1].matched );
|
||||||
|
VERIFY( std::string(m[0].first, m[0].second) == "AAAA" );
|
||||||
|
VERIFY( std::string(m[1].first, m[1].second) == "A" );
|
||||||
|
}
|
||||||
|
{
|
||||||
|
string s = "BBBB";
|
||||||
|
regex_match(s, m, re);
|
||||||
|
VERIFY( m[0].matched );
|
||||||
|
VERIFY( m[1].matched );
|
||||||
|
VERIFY( std::string(m[0].first, m[0].second) == "BBBB" );
|
||||||
|
VERIFY( std::string(m[1].first, m[1].second) == "B" );
|
||||||
|
}
|
||||||
|
{
|
||||||
|
string s = "BBBA";
|
||||||
|
regex_match(s, m, re);
|
||||||
|
VERIFY( !m[0].matched );
|
||||||
|
VERIFY( !m[1].matched );
|
||||||
|
}
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
regex re("(a(b)(c\\1(d)))");
|
||||||
|
VERIFY( false );
|
||||||
|
}
|
||||||
|
catch (...)
|
||||||
|
{
|
||||||
|
VERIFY( true );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
main()
|
||||||
|
{
|
||||||
|
test01();
|
||||||
|
return 0;
|
||||||
|
}
|
Loading…
Reference in New Issue