regex_automaton.h: _S_opcode_backref.
2013-08-18 Tim Shen <timshen91@gmail.com> * include/bits/regex_automaton.h: _S_opcode_backref. * include/bits/regex_automaton.tcc: Backref automaton support. * include/bits/regex_compiler.tcc: Parsing support. * include/bits/regex_executor.h: Add _M_traits for _DFSExecutor. * include/bits/regex_executor.tcc: Add _S_opcode_backref support. * testsuite/28_regex/algorithms/regex_match/ecma/string_backref.cc: New. From-SVN: r201825
This commit is contained in:
parent
1d5755efee
commit
ce645eb091
@ -1,3 +1,12 @@
|
||||
2013-08-18 Tim Shen <timshen91@gmail.com>
|
||||
|
||||
* include/bits/regex_automaton.h: _S_opcode_backref.
|
||||
* include/bits/regex_automaton.tcc: Backref automaton support.
|
||||
* include/bits/regex_compiler.tcc: Parsing support.
|
||||
* include/bits/regex_executor.h: Add _M_traits for _DFSExecutor.
|
||||
* include/bits/regex_executor.tcc: Add _S_opcode_backref support.
|
||||
* testsuite/28_regex/algorithms/regex_match/ecma/string_backref.cc: New.
|
||||
|
||||
2013-08-16 Tim Shen <timshen91@gmail.com>
|
||||
|
||||
* include/bits/regex.h (regex_traits<>::transform_primary):
|
||||
|
@ -53,6 +53,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
{
|
||||
_S_opcode_unknown = 0,
|
||||
_S_opcode_alternative = 1,
|
||||
_S_opcode_backref = 2,
|
||||
_S_opcode_subexpr_begin = 4,
|
||||
_S_opcode_subexpr_end = 5,
|
||||
_S_opcode_match = 100,
|
||||
@ -66,11 +67,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
typedef int _OpcodeT;
|
||||
typedef _Matcher<_CharT> _MatcherT;
|
||||
|
||||
_OpcodeT _M_opcode; // type of outgoing transition
|
||||
_StateIdT _M_next; // outgoing transition
|
||||
_StateIdT _M_alt; // for _S_opcode_alternative
|
||||
unsigned int _M_subexpr; // for _S_opcode_subexpr_*
|
||||
_MatcherT _M_matches; // for _S_opcode_match
|
||||
_OpcodeT _M_opcode; // type of outgoing transition
|
||||
_StateIdT _M_next; // outgoing transition
|
||||
union // Since they are mutual exclusive.
|
||||
{
|
||||
_StateIdT _M_alt; // for _S_opcode_alternative
|
||||
unsigned int _M_subexpr; // for _S_opcode_subexpr_*
|
||||
unsigned int _M_backref_index; // for _S_opcode_backref
|
||||
};
|
||||
_MatcherT _M_matches; // for _S_opcode_match
|
||||
|
||||
explicit _State(_OpcodeT __opcode)
|
||||
: _M_opcode(__opcode), _M_next(_S_invalid_state_id)
|
||||
@ -82,8 +87,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
{ }
|
||||
|
||||
_State(_OpcodeT __opcode, unsigned __index)
|
||||
: _M_opcode(__opcode), _M_next(_S_invalid_state_id), _M_subexpr(__index)
|
||||
{ }
|
||||
: _M_opcode(__opcode), _M_next(_S_invalid_state_id)
|
||||
{
|
||||
if (__opcode == _S_opcode_subexpr_begin
|
||||
|| __opcode == _S_opcode_subexpr_end)
|
||||
_M_subexpr = __index;
|
||||
else if (__opcode == _S_opcode_backref)
|
||||
_M_backref_index = __index;
|
||||
}
|
||||
|
||||
_State(_StateIdT __next, _StateIdT __alt)
|
||||
: _M_opcode(_S_opcode_alternative), _M_next(__next), _M_alt(__alt)
|
||||
@ -174,7 +185,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
_M_insert_subexpr_begin()
|
||||
{
|
||||
auto __id = _M_subexpr_count++;
|
||||
_M_paren_stack.push(__id);
|
||||
_M_paren_stack.push_back(__id);
|
||||
this->push_back(_StateT(_S_opcode_subexpr_begin, __id));
|
||||
return this->size()-1;
|
||||
}
|
||||
@ -182,26 +193,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
_StateIdT
|
||||
_M_insert_subexpr_end()
|
||||
{
|
||||
this->push_back(_StateT(_S_opcode_subexpr_end, _M_paren_stack.top()));
|
||||
_M_paren_stack.pop();
|
||||
this->push_back(_StateT(_S_opcode_subexpr_end, _M_paren_stack.back()));
|
||||
_M_paren_stack.pop_back();
|
||||
return this->size()-1;
|
||||
}
|
||||
|
||||
void
|
||||
_M_set_backref(bool __b)
|
||||
{ _M_has_backref = __b; }
|
||||
_StateIdT
|
||||
_M_insert_backref(unsigned int __index);
|
||||
|
||||
#ifdef _GLIBCXX_DEBUG
|
||||
std::ostream&
|
||||
_M_dot(std::ostream& __ostr) const;
|
||||
#endif
|
||||
|
||||
_FlagT _M_flags;
|
||||
_StateIdT _M_start_state;
|
||||
_StateSet _M_accepting_states;
|
||||
_SizeT _M_subexpr_count;
|
||||
bool _M_has_backref;
|
||||
std::stack<unsigned int> _M_paren_stack;
|
||||
_FlagT _M_flags;
|
||||
_StateIdT _M_start_state;
|
||||
_StateSet _M_accepting_states;
|
||||
_SizeT _M_subexpr_count;
|
||||
bool _M_has_backref;
|
||||
std::vector<unsigned int> _M_paren_stack;
|
||||
};
|
||||
|
||||
/// Describes a sequence of one or more %_State, its current start
|
||||
|
@ -50,6 +50,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
case _S_opcode_subexpr_end:
|
||||
ostr << "subexpr end next=" << _M_next << " index=" << _M_subexpr;
|
||||
break;
|
||||
case _S_opcode_backref:
|
||||
ostr << "backref next=" << _M_next << " index=" << _M_backref_index;
|
||||
break;
|
||||
case _S_opcode_match:
|
||||
ostr << "match next=" << _M_next;
|
||||
break;
|
||||
@ -87,6 +90,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
<< _M_subexpr << "\"];\n"
|
||||
<< __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
|
||||
break;
|
||||
case _S_opcode_backref:
|
||||
__ostr << __id << " [label=\"" << __id << "\\nBACKREF "
|
||||
<< _M_subexpr << "\"];\n"
|
||||
<< __id << " -> " << _M_next << " [label=\"<match>\"];\n";
|
||||
break;
|
||||
case _S_opcode_match:
|
||||
__ostr << __id << " [label=\"" << __id << "\\nMATCH\"];\n"
|
||||
<< __id << " -> " << _M_next << " [label=\"<match>\"];\n";
|
||||
@ -115,6 +123,27 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
}
|
||||
#endif
|
||||
|
||||
template<typename _CharT, typename _TraitsT>
|
||||
_StateIdT _NFA<_CharT, _TraitsT>::
|
||||
_M_insert_backref(unsigned int __index)
|
||||
{
|
||||
// To figure out whether a backref is valid, a stack is used to store
|
||||
// unfinished sub-expressions. For example, when parsing
|
||||
// "(a(b)(c\\1(d)))" at '\\1', _M_subexpr_count is 3, indicating that 3
|
||||
// sub expressions are parsed or partially parsed(in the stack), aka,
|
||||
// "(a..", "(b)" and "(c..").
|
||||
// _M_paren_stack is {1, 3}, for incomplete "(a.." and "(c..". At this
|
||||
// time, "\\2" is valid, but "\\1" and "\\3" are not.
|
||||
if (__index >= _M_subexpr_count)
|
||||
__throw_regex_error(regex_constants::error_backref);
|
||||
for (auto __it : _M_paren_stack)
|
||||
if (__index == __it)
|
||||
__throw_regex_error(regex_constants::error_backref);
|
||||
_M_has_backref = true;
|
||||
this->push_back(_StateT(_S_opcode_backref, __index));
|
||||
return this->size()-1;
|
||||
}
|
||||
|
||||
template<typename _CharT, typename _TraitsT>
|
||||
_StateSeq<_CharT, _TraitsT>& _StateSeq<_CharT, _TraitsT>::
|
||||
operator=(const _StateSeq& __rhs)
|
||||
|
@ -745,8 +745,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
if (_M_match_token(_ScannerT::_S_token_backref))
|
||||
{
|
||||
// __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value);
|
||||
_M_state_store._M_set_backref(true);
|
||||
//return true;
|
||||
_M_stack.push(_StateSeqT(_M_state_store, _M_state_store.
|
||||
_M_insert_backref(_M_cur_int_value(10))));
|
||||
return true;
|
||||
}
|
||||
if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
|
||||
{
|
||||
|
@ -82,10 +82,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
__it.matched = false;
|
||||
}
|
||||
|
||||
_BiIter _M_current;
|
||||
_BiIter _M_end;
|
||||
_BiIter _M_current;
|
||||
_BiIter _M_end;
|
||||
_ResultsT& _M_results;
|
||||
_FlagT _M_flags;
|
||||
_FlagT _M_flags;
|
||||
};
|
||||
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
@ -96,16 +96,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
public:
|
||||
typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
|
||||
typedef _NFA<_CharT, _TraitsT> _RegexT;
|
||||
typedef typename _BaseT::_ResultsT _ResultsT;
|
||||
typedef typename _BaseT::_ResultsT _ResultsT;
|
||||
typedef regex_constants::match_flag_type _FlagT;
|
||||
|
||||
_DFSExecutor(_BiIter __begin,
|
||||
_BiIter __end,
|
||||
_ResultsT& __results,
|
||||
_ResultsT& __results,
|
||||
const _RegexT& __nfa,
|
||||
_FlagT __flags)
|
||||
: _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count()),
|
||||
_M_nfa(__nfa)
|
||||
_M_traits(_TraitsT()), _M_nfa(__nfa)
|
||||
{ }
|
||||
|
||||
bool
|
||||
@ -121,6 +121,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
bool
|
||||
_M_dfs(_StateIdT __i);
|
||||
|
||||
_TraitsT _M_traits;
|
||||
const _RegexT& _M_nfa;
|
||||
};
|
||||
|
||||
|
@ -63,8 +63,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
__ret = _M_dfs<__match_mode>(__state._M_next);
|
||||
break;
|
||||
case _S_opcode_subexpr_end:
|
||||
__ret = _M_dfs<__match_mode>(__state._M_next);
|
||||
__results.at(__state._M_subexpr).second = __current;
|
||||
__results.at(__state._M_subexpr).matched = true;
|
||||
__ret = _M_dfs<__match_mode>(__state._M_next);
|
||||
__results.at(__state._M_subexpr).matched = __ret;
|
||||
break;
|
||||
case _S_opcode_match:
|
||||
@ -75,6 +76,30 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
--__current;
|
||||
}
|
||||
break;
|
||||
// First fetch the matched result from __results as __submatch;
|
||||
// then compare it with
|
||||
// (__current, __current + (__submatch.second - __submatch.first))
|
||||
// If matched, keep going; else just return to try another state.
|
||||
case _S_opcode_backref:
|
||||
{
|
||||
auto& __submatch = __results.at(__state._M_backref_index);
|
||||
if (!__submatch.matched)
|
||||
break;
|
||||
auto __last = __current;
|
||||
for (auto __tmp = __submatch.first;
|
||||
__last != __end && __tmp != __submatch.second;
|
||||
++__tmp)
|
||||
++__last;
|
||||
if (_M_traits.transform(__submatch.first, __submatch.second)
|
||||
== _M_traits.transform(__current, __last))
|
||||
{
|
||||
auto __backup = __current;
|
||||
__current = __last;
|
||||
__ret = _M_dfs<__match_mode>(__state._M_next);
|
||||
__current = __backup;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case _S_opcode_accept:
|
||||
if (__match_mode)
|
||||
__ret = __current == __end;
|
||||
|
@ -0,0 +1,78 @@
|
||||
// { dg-options "-std=gnu++11" }
|
||||
|
||||
//
|
||||
// 2013-08-10 Tim Shen <timshen91@gmail.com>
|
||||
//
|
||||
// Copyright (C) 2013 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
//
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License along
|
||||
// with this library; see the file COPYING3. If not see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
// 28.11.2 regex_match
|
||||
// Tests ECMAScript back-refernce against a std::string.
|
||||
|
||||
#include <regex>
|
||||
#include <testsuite_hooks.h>
|
||||
|
||||
using namespace std;
|
||||
|
||||
void
|
||||
test01()
|
||||
{
|
||||
bool test __attribute__((unused)) = true;
|
||||
|
||||
regex re("([A-Z])\\1*");
|
||||
smatch m;
|
||||
{
|
||||
string s = "AAAA";
|
||||
regex_match(s, m, re);
|
||||
VERIFY( m[0].matched );
|
||||
VERIFY( m[1].matched );
|
||||
VERIFY( std::string(m[0].first, m[0].second) == "AAAA" );
|
||||
VERIFY( std::string(m[1].first, m[1].second) == "A" );
|
||||
}
|
||||
{
|
||||
string s = "BBBB";
|
||||
regex_match(s, m, re);
|
||||
VERIFY( m[0].matched );
|
||||
VERIFY( m[1].matched );
|
||||
VERIFY( std::string(m[0].first, m[0].second) == "BBBB" );
|
||||
VERIFY( std::string(m[1].first, m[1].second) == "B" );
|
||||
}
|
||||
{
|
||||
string s = "BBBA";
|
||||
regex_match(s, m, re);
|
||||
VERIFY( !m[0].matched );
|
||||
VERIFY( !m[1].matched );
|
||||
}
|
||||
{
|
||||
try
|
||||
{
|
||||
regex re("(a(b)(c\\1(d)))");
|
||||
VERIFY( false );
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
VERIFY( true );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main()
|
||||
{
|
||||
test01();
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user