849cab7b75
2010-06-25 Stephen M. Webb <stephen.webb@bregmasoft.ca> Initial regex implementation. * include/std/regex: Modified to use bits/regex_* headers. * include/bits/regex_compiler.h: New file. * include/bits/regex_constants.h: New file. * include/bits/regex_cursor.h: New file. * include/bits/regex_error.h: New file. * include/bits/regex_grep_matcher.h: New file. * include/bits/regex_grep_matcher.tcc: New file. * include/bits/regex.h: New file. * include/bits/regex_nfa.h: New file. * include/bits/regex_nfa.tcc: New file. * include/Makefile.am: Added above new files. * include/Makefile.in: Regenerated. * testsuite/28_regex/02_definitions: New file. * testsuite/28_regex/03_requirements: New file. * testsuite/28_regex/03_requirements/typedefs.cc: New file. * testsuite/28_regex/04_header: New file. * testsuite/28_regex/04_header/regex: New file. * testsuite/28_regex/04_header/regex/std_c++0x_neg.cc: New file. * testsuite/28_regex/05_constants: New file. * testsuite/28_regex/05_constants/error_type.cc: New file. * testsuite/28_regex/05_constants/match_flag_type.cc: New file. * testsuite/28_regex/05_constants/syntax_option_type.cc: New file. * testsuite/28_regex/06_exception_type: New file. * testsuite/28_regex/06_exception_type/regex_error.cc: New file. * testsuite/28_regex/07_traits: New file. * testsuite/28_regex/07_traits/char: New file. * testsuite/28_regex/07_traits/char/ctor.cc: New file. * testsuite/28_regex/07_traits/char/isctype.cc: New file. * testsuite/28_regex/07_traits/char/length.cc: New file. * testsuite/28_regex/07_traits/char/lookup_classname.cc: New file. * testsuite/28_regex/07_traits/char/lookup_collatename.cc: New file. * testsuite/28_regex/07_traits/char/transform.cc: New file. * testsuite/28_regex/07_traits/char/transform_primary.cc: New file. * testsuite/28_regex/07_traits/char/translate.cc: New file. * testsuite/28_regex/07_traits/char/translate_nocase.cc: New file. * testsuite/28_regex/07_traits/char/value.cc: New file. * testsuite/28_regex/07_traits/wchar_t: New file. * testsuite/28_regex/07_traits/wchar_t/ctor.cc: New file. * testsuite/28_regex/07_traits/wchar_t/length.cc: New file. * testsuite/28_regex/07_traits/wchar_t/transform.cc: New file. * testsuite/28_regex/07_traits/wchar_t/translate.cc: New file. * testsuite/28_regex/07_traits/wchar_t/translate_nocase.cc: New file. * testsuite/28_regex/07_traits/wchar_t/value.cc: New file. * testsuite/28_regex/08_basic_regex: New file. * testsuite/28_regex/08_basic_regex/assign: New file. * testsuite/28_regex/08_basic_regex/assign/char: New file. * testsuite/28_regex/08_basic_regex/assign/char/cstring.cc: New file. * testsuite/28_regex/08_basic_regex/assign/char/cstring_op.cc: New file. * testsuite/28_regex/08_basic_regex/assign/char/moveable.cc: New file. * testsuite/28_regex/08_basic_regex/assign/char/pstring.cc: New file. * testsuite/28_regex/08_basic_regex/assign/char/range.cc: New file. * testsuite/28_regex/08_basic_regex/assign/char/string.cc: New file. * testsuite/28_regex/08_basic_regex/assign/char/string_op.cc: New file. * testsuite/28_regex/08_basic_regex/assign/wchar_t: New file. * testsuite/28_regex/08_basic_regex/assign/wchar_t/cstring.cc: New file. * testsuite/28_regex/08_basic_regex/assign/wchar_t/cstring_op.cc: New file. * testsuite/28_regex/08_basic_regex/assign/wchar_t/pstring.cc: New file. * testsuite/28_regex/08_basic_regex/assign/wchar_t/range.cc: New file. * testsuite/28_regex/08_basic_regex/assign/wchar_t/string.cc: New file. * testsuite/28_regex/08_basic_regex/assign/wchar_t/string_op.cc: New file. * testsuite/28_regex/08_basic_regex/ctors: New file. * testsuite/28_regex/08_basic_regex/ctors/basic: New file. * testsuite/28_regex/08_basic_regex/ctors/basic/cstring.cc: New file. * testsuite/28_regex/08_basic_regex/ctors/basic/pstring_char.cc: New file. * testsuite/28_regex/08_basic_regex/ctors/basic/pstring_wchar_t.cc: New file. * testsuite/28_regex/08_basic_regex/ctors/basic/string_range_01_02_03.cc: New file. * testsuite/28_regex/08_basic_regex/ctors/char: New file. * testsuite/28_regex/08_basic_regex/ctors/char/cstring_awk.cc: New file. * testsuite/28_regex/08_basic_regex/ctors/char/cstring.cc: New file. * testsuite/28_regex/08_basic_regex/ctors/char/cstring_ecma.cc: New file. * testsuite/28_regex/08_basic_regex/ctors/char/cstring_egrep.cc: New file. * testsuite/28_regex/08_basic_regex/ctors/char/cstring_grep.cc: New file. * testsuite/28_regex/08_basic_regex/ctors/char/default.cc: New file. * testsuite/28_regex/08_basic_regex/ctors/char/range.cc: New file. * testsuite/28_regex/08_basic_regex/ctors/copy_char.cc: New file. * testsuite/28_regex/08_basic_regex/ctors/extended: New file. * testsuite/28_regex/08_basic_regex/ctors/extended/cstring.cc: New file. * testsuite/28_regex/08_basic_regex/ctors/extended/string_range_01_02_03.cc: New file. * testsuite/28_regex/08_basic_regex/ctors/move_char.cc: New file. * testsuite/28_regex/08_basic_regex/ctors/string_char.cc: New file. * testsuite/28_regex/08_basic_regex/ctors/string_wchar_t.cc: New file. * testsuite/28_regex/08_basic_regex/ctors/wchar_t: New file. * testsuite/28_regex/08_basic_regex/ctors/wchar_t/cstring.cc: New file. * testsuite/28_regex/08_basic_regex/ctors/wchar_t/default.cc: New file. * testsuite/28_regex/08_basic_regex/ctors/wchar_t/range.cc: New file. * testsuite/28_regex/08_basic_regex/regex.cc: New file. * testsuite/28_regex/09_sub_match: New file. * testsuite/28_regex/09_sub_match/cast_char.cc: New file. * testsuite/28_regex/09_sub_match/cast_wchar_t.cc: New file. * testsuite/28_regex/09_sub_match/length.cc: New file. * testsuite/28_regex/09_sub_match/typedefs.cc: New file. * testsuite/28_regex/10_match_results: New file. * testsuite/28_regex/10_match_results/ctors: New file. * testsuite/28_regex/10_match_results/ctors/char: New file. * testsuite/28_regex/10_match_results/ctors/char/default.cc: New file. * testsuite/28_regex/10_match_results/ctors/wchar_t: New file. * testsuite/28_regex/10_match_results/ctors/wchar_t/default.cc: New file. * testsuite/28_regex/10_match_results/typedefs.cc: New file. * testsuite/28_regex/11_algorithms: New file. * testsuite/28_regex/11_algorithms/02_match: New file. * testsuite/28_regex/11_algorithms/02_match/basic: New file. * testsuite/28_regex/11_algorithms/02_match/basic/string_01.cc: New file. * testsuite/28_regex/11_algorithms/02_match/basic/string_range_00_03.cc: New file. * testsuite/28_regex/11_algorithms/02_match/basic/string_range_01_03.cc: New file. * testsuite/28_regex/11_algorithms/02_match/basic/string_range_02_03.cc: New file. * testsuite/28_regex/11_algorithms/02_match/extended: New file. * testsuite/28_regex/11_algorithms/02_match/extended/cstring_plus.cc: New file. * testsuite/28_regex/11_algorithms/02_match/extended/cstring_questionmark.cc: New file. * testsuite/28_regex/11_algorithms/02_match/extended/string_any.cc: New file. * testsuite/28_regex/11_algorithms/02_match/extended/string_range_00_03.cc: New file. * testsuite/28_regex/11_algorithms/02_match/extended/string_range_01_03.cc: New file. * testsuite/28_regex/11_algorithms/02_match/extended/string_range_02_03.cc: New file. * testsuite/28_regex/12_iterators: New file. * testsuite/28_regex/12_iterators/regex_iterator: New file. * testsuite/28_regex/12_iterators/regex_iterator/ctors: New file. * testsuite/28_regex/12_iterators/regex_iterator/ctors/char: New file. * testsuite/28_regex/12_iterators/regex_iterator/ctors/char/default.cc: New file. * testsuite/28_regex/12_iterators/regex_iterator/ctors/wchar_t: New file. * testsuite/28_regex/12_iterators/regex_iterator/ctors/wchar_t/default.cc: New file. * testsuite/28_regex/12_iterators/regex_iterator/typedefs.cc: New file. * testsuite/28_regex/12_iterators/regex_token_iterator: New file. * testsuite/28_regex/12_iterators/regex_token_iterator/ctors: New file. * testsuite/28_regex/12_iterators/regex_token_iterator/ctors/char: New file. * testsuite/28_regex/12_iterators/regex_token_iterator/ctors/char/default.cc: New file. * testsuite/28_regex/12_iterators/regex_token_iterator/ctors/wchar_t: New file. * testsuite/28_regex/12_iterators/regex_token_iterator/ctors/wchar_t/default.cc: New file. * testsuite/28_regex/12_iterators/regex_token_iterator/typedefs.cc: New file. * testsuite/28_regex/13_ecmascript: New file. From-SVN: r161410
404 lines
10 KiB
C++
404 lines
10 KiB
C++
// class template regex -*- C++ -*-
|
|
|
|
// Copyright (C) 2010 Free Software Foundation, Inc.
|
|
//
|
|
// This file is part of the GNU ISO C++ Library. This library is free
|
|
// software; you can redistribute it and/or modify it under the
|
|
// terms of the GNU General Public License as published by the
|
|
// Free Software Foundation; either version 3, or (at your option)
|
|
// any later version.
|
|
|
|
// This library is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
|
|
// Under Section 7 of GPL version 3, you are granted additional
|
|
// permissions described in the GCC Runtime Library Exception, version
|
|
// 3.1, as published by the Free Software Foundation.
|
|
|
|
// You should have received a copy of the GNU General Public License and
|
|
// a copy of the GCC Runtime Library Exception along with this program;
|
|
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
// <http://www.gnu.org/licenses/>.
|
|
|
|
/**
|
|
* @file bits/regex_nfa.h
|
|
* This is an internal header file, included by other library headers.
|
|
* You should not attempt to use it directly.
|
|
*/
|
|
|
|
namespace std
|
|
{
|
|
namespace __regex
|
|
{
|
|
|
|
// Base class for, um, automata. Could be an NFA or a DFA. Your choice.
|
|
class _Automaton
|
|
{
|
|
public:
|
|
typedef unsigned int _SizeT;
|
|
|
|
public:
|
|
virtual
|
|
~_Automaton()
|
|
{ }
|
|
|
|
virtual _SizeT
|
|
_M_sub_count() const = 0;
|
|
|
|
#ifdef _GLIBCXX_DEBUG
|
|
virtual std::ostream&
|
|
_M_dot(std::ostream& __ostr) const = 0;
|
|
#endif
|
|
};
|
|
|
|
// Generic shred pointer to an automaton.
|
|
typedef std::shared_ptr<_Automaton> _AutomatonPtr;
|
|
|
|
// Operation codes that define the type of transitions within the base NFA
|
|
// that represents the regular expression.
|
|
enum _Opcode
|
|
{
|
|
_S_opcode_unknown = 0,
|
|
_S_opcode_alternative = 1,
|
|
_S_opcode_subexpr_begin = 4,
|
|
_S_opcode_subexpr_end = 5,
|
|
_S_opcode_match = 100,
|
|
_S_opcode_accept = 255
|
|
};
|
|
|
|
// Provides a generic facade for a templated match_results.
|
|
struct _Results
|
|
{
|
|
virtual void _M_set_pos(int __i, int __j, const _PatternCursor& __p) = 0;
|
|
virtual void _M_set_matched(int __i, bool __is_matched) = 0;
|
|
};
|
|
|
|
// Tags current state (for subexpr begin/end).
|
|
typedef std::function<void (const _PatternCursor&, _Results&)> _Tagger;
|
|
|
|
template<typename _FwdIterT, typename _TraitsT>
|
|
struct _StartTagger
|
|
: public _Tagger
|
|
{
|
|
explicit
|
|
_StartTagger(int __i)
|
|
: _M_index(__i)
|
|
{ }
|
|
|
|
void
|
|
operator()(const _PatternCursor& __pc, _Results& __r)
|
|
{ __r._M_set_pos(_M_index, 0, __pc); }
|
|
|
|
int _M_index;
|
|
};
|
|
|
|
template<typename _FwdIterT, typename _TraitsT>
|
|
struct _EndTagger
|
|
: public _Tagger
|
|
{
|
|
explicit
|
|
_EndTagger(int __i)
|
|
: _M_index(__i)
|
|
{ }
|
|
|
|
void
|
|
operator()(const _PatternCursor& __pc, _Results& __r)
|
|
{ __r._M_set_pos(_M_index, 1, __pc); }
|
|
|
|
int _M_index;
|
|
_FwdIterT _M_pos;
|
|
};
|
|
// Indicates if current state matches cursor current.
|
|
typedef std::function<bool (const _PatternCursor&)> _Matcher;
|
|
|
|
// Matches any character
|
|
inline bool
|
|
_AnyMatcher(const _PatternCursor&)
|
|
{ return true; }
|
|
|
|
// Matches a single character
|
|
template<typename _InIterT, typename _TraitsT>
|
|
struct _CharMatcher
|
|
: public _Matcher
|
|
{
|
|
typedef typename _TraitsT::char_type char_type;
|
|
|
|
explicit
|
|
_CharMatcher(char_type __c, const _TraitsT& __t = _TraitsT())
|
|
: _M_traits(__t), _M_c(_M_traits.translate(__c))
|
|
{ }
|
|
|
|
bool
|
|
operator()(const _PatternCursor& __pc) const
|
|
{
|
|
typedef const _SpecializedCursor<_InIterT>& _CursorT;
|
|
_CursorT __c = static_cast<_CursorT>(__pc);
|
|
return _M_traits.translate(__c._M_current()) == _M_c;
|
|
}
|
|
|
|
const _TraitsT& _M_traits;
|
|
char_type _M_c;
|
|
};
|
|
|
|
// Matches a character range (bracket expression)
|
|
template<typename _InIterT, typename _TraitsT>
|
|
struct _RangeMatcher
|
|
: public _Matcher
|
|
{
|
|
typedef typename _TraitsT::char_type _CharT;
|
|
typedef std::basic_string<_CharT> _StringT;
|
|
|
|
explicit
|
|
_RangeMatcher(bool __is_non_matching, const _TraitsT& __t = _TraitsT())
|
|
: _M_traits(__t), _M_is_non_matching(__is_non_matching)
|
|
{ }
|
|
|
|
bool
|
|
operator()(const _PatternCursor& __pc) const
|
|
{
|
|
typedef const _SpecializedCursor<_InIterT>& _CursorT;
|
|
_CursorT __c = static_cast<_CursorT>(__pc);
|
|
return true;
|
|
}
|
|
|
|
void
|
|
_M_add_char(_CharT __c)
|
|
{ }
|
|
|
|
void
|
|
_M_add_collating_element(const _StringT& __s)
|
|
{ }
|
|
|
|
void
|
|
_M_add_equivalence_class(const _StringT& __s)
|
|
{ }
|
|
|
|
void
|
|
_M_add_character_class(const _StringT& __s)
|
|
{ }
|
|
|
|
void
|
|
_M_make_range()
|
|
{ }
|
|
|
|
const _TraitsT& _M_traits;
|
|
bool _M_is_non_matching;
|
|
};
|
|
|
|
// Identifies a state in the NFA.
|
|
typedef int _StateIdT;
|
|
|
|
// The special case in which a state identifier is not an index.
|
|
static const _StateIdT _S_invalid_state_id = -1;
|
|
|
|
|
|
// An individual state in an NFA
|
|
//
|
|
// In this case a "state" is an entry in the NFA definition coupled with its
|
|
// outgoing transition(s). All states have a single outgoing transition,
|
|
// except for accepting states (which have no outgoing transitions) and alt
|
|
// states, which have two outgoing transitions.
|
|
//
|
|
struct _State
|
|
{
|
|
typedef int _OpcodeT;
|
|
|
|
_OpcodeT _M_opcode; // type of outgoing transition
|
|
_StateIdT _M_next; // outgoing tranition
|
|
_StateIdT _M_alt; // for _S_opcode_alternative
|
|
unsigned int _M_subexpr; // for _S_opcode_subexpr_*
|
|
_Tagger _M_tagger; // for _S_opcode_subexpr_*
|
|
_Matcher _M_matches; // for _S_opcode_match
|
|
|
|
explicit _State(_OpcodeT __opcode)
|
|
: _M_opcode(__opcode), _M_next(_S_invalid_state_id)
|
|
{ }
|
|
|
|
_State(const _Matcher& __m)
|
|
: _M_opcode(_S_opcode_match), _M_next(_S_invalid_state_id), _M_matches(__m)
|
|
{ }
|
|
|
|
_State(_OpcodeT __opcode, unsigned int __s, const _Tagger& __t)
|
|
: _M_opcode(__opcode), _M_next(_S_invalid_state_id), _M_subexpr(__s),
|
|
_M_tagger(__t)
|
|
{ }
|
|
|
|
_State(_StateIdT __next, _StateIdT __alt)
|
|
: _M_opcode(_S_opcode_alternative), _M_next(__next), _M_alt(__alt)
|
|
{ }
|
|
|
|
#ifdef _GLIBCXX_DEBUG
|
|
std::ostream&
|
|
_M_print(std::ostream& ostr) const;
|
|
|
|
// Prints graphviz dot commands for state.
|
|
std::ostream&
|
|
_M_dot(std::ostream& __ostr, _StateIdT __id) const;
|
|
#endif
|
|
};
|
|
|
|
|
|
// The Grep Matcher works on sets of states. Here are sets of states.
|
|
typedef std::set<_StateIdT> _StateSet;
|
|
|
|
// A collection of all states making up an NFA
|
|
//
|
|
// An NFA is a 4-tuple M = (K, S, s, F), where
|
|
// K is a finite set of states,
|
|
// S is the alphabet of the NFA,
|
|
// s is the initial state,
|
|
// F is a set of final (accepting) states.
|
|
//
|
|
// This NFA class is templated on S, a type that will hold values of the
|
|
// underlying alphabet (without regard to semantics of that alphabet). The
|
|
// other elements of the tuple are generated during construction of the NFA
|
|
// and are available through accessor member functions.
|
|
//
|
|
class _Nfa
|
|
: public _Automaton, public std::vector<_State>
|
|
{
|
|
public:
|
|
typedef _State _StateT;
|
|
typedef unsigned int _SizeT;
|
|
typedef regex_constants::syntax_option_type _FlagT;
|
|
|
|
public:
|
|
_Nfa(_FlagT __f)
|
|
: _M_flags(__f), _M_start_state(0), _M_subexpr_count(0)
|
|
{ }
|
|
|
|
~_Nfa()
|
|
{ }
|
|
|
|
_FlagT
|
|
_M_options() const
|
|
{ return _M_flags; }
|
|
|
|
_StateIdT
|
|
_M_start() const
|
|
{ return _M_start_state; }
|
|
|
|
const _StateSet&
|
|
_M_final_states() const
|
|
{ return _M_accepting_states; }
|
|
|
|
_SizeT
|
|
_M_sub_count() const
|
|
{ return _M_subexpr_count; }
|
|
|
|
_StateIdT
|
|
_M_insert_accept()
|
|
{
|
|
this->push_back(_StateT(_S_opcode_accept));
|
|
_M_accepting_states.insert(this->size()-1);
|
|
return this->size()-1;
|
|
}
|
|
|
|
_StateIdT
|
|
_M_insert_alt(_StateIdT __next, _StateIdT __alt)
|
|
{
|
|
this->push_back(_StateT(__next, __alt));
|
|
return this->size()-1;
|
|
}
|
|
|
|
_StateIdT
|
|
_M_insert_matcher(_Matcher __m)
|
|
{
|
|
this->push_back(_StateT(__m));
|
|
return this->size()-1;
|
|
}
|
|
|
|
_StateIdT
|
|
_M_insert_subexpr_begin(const _Tagger& __t)
|
|
{
|
|
this->push_back(_StateT(_S_opcode_subexpr_begin, _M_subexpr_count++, __t));
|
|
return this->size()-1;
|
|
}
|
|
|
|
_StateIdT
|
|
_M_insert_subexpr_end(unsigned int __i, const _Tagger& __t)
|
|
{
|
|
this->push_back(_StateT(_S_opcode_subexpr_end, __i, __t));
|
|
return this->size()-1;
|
|
}
|
|
|
|
#ifdef _GLIBCXX_DEBUG
|
|
std::ostream&
|
|
_M_dot(std::ostream& __ostr) const;
|
|
#endif
|
|
|
|
private:
|
|
_FlagT _M_flags;
|
|
_StateIdT _M_start_state;
|
|
_StateSet _M_accepting_states;
|
|
_SizeT _M_subexpr_count;
|
|
};
|
|
|
|
// Describes a sequence of one or more %_State, its current start and end(s).
|
|
//
|
|
// This structure contains fragments of an NFA during construction.
|
|
class _StateSeq
|
|
{
|
|
public:
|
|
// Constructs a single-node sequence
|
|
_StateSeq(_Nfa& __ss, _StateIdT __s, _StateIdT __e = _S_invalid_state_id)
|
|
: _M_nfa(__ss), _M_start(__s), _M_end1(__s), _M_end2(__e)
|
|
{ }
|
|
// Constructs a split sequence from two other sequencces
|
|
_StateSeq(const _StateSeq& __e1, const _StateSeq& __e2)
|
|
: _M_nfa(__e1._M_nfa),
|
|
_M_start(_M_nfa._M_insert_alt(__e1._M_start, __e2._M_start)),
|
|
_M_end1(__e1._M_end1), _M_end2(__e2._M_end1)
|
|
{ }
|
|
|
|
// Constructs a split sequence from a single sequence
|
|
_StateSeq(const _StateSeq& __e, _StateIdT __id)
|
|
: _M_nfa(__e._M_nfa),
|
|
_M_start(_M_nfa._M_insert_alt(__id, __e._M_start)),
|
|
_M_end1(__id), _M_end2(__e._M_end1)
|
|
{ }
|
|
|
|
// Constructs a copy of a %_StateSeq
|
|
_StateSeq(const _StateSeq& __rhs)
|
|
: _M_nfa(__rhs._M_nfa), _M_start(__rhs._M_start),
|
|
_M_end1(__rhs._M_end1), _M_end2(__rhs._M_end2)
|
|
{ }
|
|
|
|
|
|
_StateSeq& operator=(const _StateSeq& __rhs);
|
|
|
|
_StateIdT
|
|
_M_front() const
|
|
{ return _M_start; }
|
|
|
|
// Extends a sequence by one.
|
|
void
|
|
_M_push_back(_StateIdT __id);
|
|
|
|
// Extends and maybe joins a sequence.
|
|
void
|
|
_M_append(_StateIdT __id);
|
|
|
|
void
|
|
_M_append(_StateSeq& __rhs);
|
|
|
|
// Clones an entire sequence.
|
|
_StateIdT
|
|
_M_clone();
|
|
|
|
private:
|
|
_Nfa& _M_nfa;
|
|
_StateIdT _M_start;
|
|
_StateIdT _M_end1;
|
|
_StateIdT _M_end2;
|
|
|
|
};
|
|
|
|
} // namespace __regex
|
|
} // namespace std
|
|
|
|
#include <bits/regex_nfa.tcc>
|
|
|