ad9ec7b3c5
PR libstdc++/61424 * include/bits/regex.tcc (__regex_algo_impl<>): Use DFS for ECMAScript, not just regex containing back-references. * include/bits/regex_compiler.tcc (_Compiler<>::_M_disjunction): exchange _M_next and _M_alt for alternative operator, making matching from left to right. * include/bits/regex_executor.h (_State_info<>::_M_get_sol_pos): Add position tracking fom DFS. * include/bits/regex_executor.tcc (_Executor<>::_M_main_dispatch, _Executor<>::_M_dfs): Likewise. * include/bits/regex_scanner.h: Remove unused enum entry. * testsuite/28_regex/algorithms/regex_search/61424.cc: New testcase from PR. From-SVN: r212184
263 lines
6.5 KiB
C++
263 lines
6.5 KiB
C++
// class template regex -*- C++ -*-
|
|
|
|
// Copyright (C) 2013-2014 Free Software Foundation, Inc.
|
|
//
|
|
// This file is part of the GNU ISO C++ Library. This library is free
|
|
// software; you can redistribute it and/or modify it under the
|
|
// terms of the GNU General Public License as published by the
|
|
// Free Software Foundation; either version 3, or (at your option)
|
|
// any later version.
|
|
|
|
// This library is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
|
|
// Under Section 7 of GPL version 3, you are granted additional
|
|
// permissions described in the GCC Runtime Library Exception, version
|
|
// 3.1, as published by the Free Software Foundation.
|
|
|
|
// You should have received a copy of the GNU General Public License and
|
|
// a copy of the GCC Runtime Library Exception along with this program;
|
|
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
// <http://www.gnu.org/licenses/>.
|
|
|
|
/**
|
|
* @file bits/regex_scanner.h
|
|
* This is an internal header file, included by other library headers.
|
|
* Do not attempt to use it directly. @headername{regex}
|
|
*/
|
|
|
|
namespace std _GLIBCXX_VISIBILITY(default)
|
|
{
|
|
namespace __detail
|
|
{
|
|
_GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|
|
|
/**
|
|
* @addtogroup regex-detail
|
|
* @{
|
|
*/
|
|
|
|
struct _ScannerBase
|
|
{
|
|
public:
|
|
/// Token types returned from the scanner.
|
|
enum _TokenT
|
|
{
|
|
_S_token_anychar,
|
|
_S_token_ord_char,
|
|
_S_token_oct_num,
|
|
_S_token_hex_num,
|
|
_S_token_backref,
|
|
_S_token_subexpr_begin,
|
|
_S_token_subexpr_no_group_begin,
|
|
_S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n'
|
|
_S_token_subexpr_end,
|
|
_S_token_bracket_begin,
|
|
_S_token_bracket_neg_begin,
|
|
_S_token_bracket_end,
|
|
_S_token_interval_begin,
|
|
_S_token_interval_end,
|
|
_S_token_quoted_class,
|
|
_S_token_char_class_name,
|
|
_S_token_collsymbol,
|
|
_S_token_equiv_class_name,
|
|
_S_token_opt,
|
|
_S_token_or,
|
|
_S_token_closure0,
|
|
_S_token_closure1,
|
|
_S_token_line_begin,
|
|
_S_token_line_end,
|
|
_S_token_word_bound, // neg if _M_value[0] == 'n'
|
|
_S_token_comma,
|
|
_S_token_dup_count,
|
|
_S_token_eof,
|
|
_S_token_unknown
|
|
};
|
|
|
|
protected:
|
|
typedef regex_constants::syntax_option_type _FlagT;
|
|
|
|
enum _StateT
|
|
{
|
|
_S_state_normal,
|
|
_S_state_in_brace,
|
|
_S_state_in_bracket,
|
|
};
|
|
|
|
protected:
|
|
_ScannerBase(_FlagT __flags)
|
|
: _M_state(_S_state_normal),
|
|
_M_flags(__flags),
|
|
_M_escape_tbl(_M_is_ecma()
|
|
? _M_ecma_escape_tbl
|
|
: _M_awk_escape_tbl),
|
|
_M_spec_char(_M_is_ecma()
|
|
? _M_ecma_spec_char
|
|
: _M_is_basic()
|
|
? _M_basic_spec_char
|
|
: _M_extended_spec_char),
|
|
_M_at_bracket_start(false)
|
|
{ }
|
|
|
|
protected:
|
|
const char*
|
|
_M_find_escape(char __c)
|
|
{
|
|
auto __it = _M_escape_tbl;
|
|
for (; __it->first != '\0'; ++__it)
|
|
if (__it->first == __c)
|
|
return &__it->second;
|
|
return nullptr;
|
|
}
|
|
|
|
bool
|
|
_M_is_ecma() const
|
|
{ return _M_flags & regex_constants::ECMAScript; }
|
|
|
|
bool
|
|
_M_is_basic() const
|
|
{ return _M_flags & (regex_constants::basic | regex_constants::grep); }
|
|
|
|
bool
|
|
_M_is_extended() const
|
|
{
|
|
return _M_flags & (regex_constants::extended
|
|
| regex_constants::egrep
|
|
| regex_constants::awk);
|
|
}
|
|
|
|
bool
|
|
_M_is_grep() const
|
|
{ return _M_flags & (regex_constants::grep | regex_constants::egrep); }
|
|
|
|
bool
|
|
_M_is_awk() const
|
|
{ return _M_flags & regex_constants::awk; }
|
|
|
|
protected:
|
|
const std::pair<char, _TokenT> _M_token_tbl[9] =
|
|
{
|
|
{'^', _S_token_line_begin},
|
|
{'$', _S_token_line_end},
|
|
{'.', _S_token_anychar},
|
|
{'*', _S_token_closure0},
|
|
{'+', _S_token_closure1},
|
|
{'?', _S_token_opt},
|
|
{'|', _S_token_or},
|
|
{'\n', _S_token_or}, // grep and egrep
|
|
{'\0', _S_token_or},
|
|
};
|
|
const std::pair<char, char> _M_ecma_escape_tbl[8] =
|
|
{
|
|
{'0', '\0'},
|
|
{'b', '\b'},
|
|
{'f', '\f'},
|
|
{'n', '\n'},
|
|
{'r', '\r'},
|
|
{'t', '\t'},
|
|
{'v', '\v'},
|
|
{'\0', '\0'},
|
|
};
|
|
const std::pair<char, char> _M_awk_escape_tbl[11] =
|
|
{
|
|
{'"', '"'},
|
|
{'/', '/'},
|
|
{'\\', '\\'},
|
|
{'a', '\a'},
|
|
{'b', '\b'},
|
|
{'f', '\f'},
|
|
{'n', '\n'},
|
|
{'r', '\r'},
|
|
{'t', '\t'},
|
|
{'v', '\v'},
|
|
{'\0', '\0'},
|
|
};
|
|
const char* _M_ecma_spec_char = "^$\\.*+?()[]{}|";
|
|
const char* _M_basic_spec_char = ".[\\*^$";
|
|
const char* _M_extended_spec_char = ".[\\()*+?{|^$";
|
|
|
|
_StateT _M_state;
|
|
_FlagT _M_flags;
|
|
_TokenT _M_token;
|
|
const std::pair<char, char>* _M_escape_tbl;
|
|
const char* _M_spec_char;
|
|
bool _M_at_bracket_start;
|
|
};
|
|
|
|
/**
|
|
* @brief Scans an input range for regex tokens.
|
|
*
|
|
* The %_Scanner class interprets the regular expression pattern in
|
|
* the input range passed to its constructor as a sequence of parse
|
|
* tokens passed to the regular expression compiler. The sequence
|
|
* of tokens provided depends on the flag settings passed to the
|
|
* constructor: different regular expression grammars will interpret
|
|
* the same input pattern in syntactically different ways.
|
|
*/
|
|
template<typename _CharT>
|
|
class _Scanner
|
|
: public _ScannerBase
|
|
{
|
|
public:
|
|
typedef const _CharT* _IterT;
|
|
typedef std::basic_string<_CharT> _StringT;
|
|
typedef regex_constants::syntax_option_type _FlagT;
|
|
typedef const std::ctype<_CharT> _CtypeT;
|
|
|
|
_Scanner(_IterT __begin, _IterT __end,
|
|
_FlagT __flags, std::locale __loc);
|
|
|
|
void
|
|
_M_advance();
|
|
|
|
_TokenT
|
|
_M_get_token() const
|
|
{ return _M_token; }
|
|
|
|
const _StringT&
|
|
_M_get_value() const
|
|
{ return _M_value; }
|
|
|
|
#ifdef _GLIBCXX_DEBUG
|
|
std::ostream&
|
|
_M_print(std::ostream&);
|
|
#endif
|
|
|
|
private:
|
|
void
|
|
_M_scan_normal();
|
|
|
|
void
|
|
_M_scan_in_bracket();
|
|
|
|
void
|
|
_M_scan_in_brace();
|
|
|
|
void
|
|
_M_eat_escape_ecma();
|
|
|
|
void
|
|
_M_eat_escape_posix();
|
|
|
|
void
|
|
_M_eat_escape_awk();
|
|
|
|
void
|
|
_M_eat_class(char);
|
|
|
|
_IterT _M_current;
|
|
_IterT _M_end;
|
|
_CtypeT& _M_ctype;
|
|
_StringT _M_value;
|
|
void (_Scanner::* _M_eat_escape)();
|
|
};
|
|
|
|
//@} regex-detail
|
|
_GLIBCXX_END_NAMESPACE_VERSION
|
|
} // namespace __detail
|
|
} // namespace std
|
|
|
|
#include <bits/regex_scanner.tcc>
|