regex_compiler.h (__detail::_BracketMatcher): Reorder members to avoid wasted space when not using a cache.

* include/bits/regex_compiler.h (__detail::_BracketMatcher): Reorder members to avoid wasted space when not using a cache. (__detail::_BracketMatcher::_M_ready()): Sort and deduplicate set. * include/bits/regex_compiler.tcc (__detail::_BracketMatcher::_M_apply(_CharT, false_type)): Use binary search on set. * include/bits/regex_executor.h (__detail::_Executor::_Match_mode): New enumeration type to indicate match mode. (__detail::_Executor::_State_info): New type holding members only needed in BFS-mode. Replace unique_ptr<vector<bool>> with unique_ptr<bool[]>. (__detail::_Executor::_M_rep_once_more, __detail::_Executor::_M_dfs): Replace template parameter with run-time function parameter. (__detail::_Executor::_M_main): Likewise. Dispatch to ... (__detail::_Executor::_M_main_dispatch): New overloaded functions to implement DFS and BFS mode. * include/bits/regex_executor.tcc (__detail::_Executor::_M_main): Split implementation into ... (__detail::_Executor::_M_main_dispatch): New overloaded functions. (__detail::_Executor::_M_lookahead): Create nested executor on stack. (__detail::_Executor::_M_rep_once_more): Pass match mode as function argument instead of template argument. (__detail::_Executor::_M_dfs): Likewise. * include/bits/regex_scanner.tcc: Fix typos in comments. * testsuite/performance/28_regex/range.cc: New. From-SVN: r211143
2014-06-02 20:36:14 +01:00 · 2014-06-02 20:36:14 +01:00 · 097f0bcf65
parent 0d732cca67
commit 097f0bcf65
8 changed files with 254 additions and 146 deletions
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@ -9,6 +9,32 @@
 	* testsuite/30_threads/async/forced_unwind.cc: New.
 	* testsuite/30_threads/packaged_task/forced_unwind.cc: New.

+	* include/bits/regex_compiler.h (__detail::_BracketMatcher): Reorder
+	members to avoid wasted space when not using a cache.
+	(__detail::_BracketMatcher::_M_ready()): Sort and deduplicate set.
+	* include/bits/regex_compiler.tcc
+	(__detail::_BracketMatcher::_M_apply(_CharT, false_type)): Use binary
+	search on set.
+	* include/bits/regex_executor.h (__detail::_Executor::_Match_mode):
+	New enumeration type to indicate match mode.
+	(__detail::_Executor::_State_info): New type holding members only
+	needed in BFS-mode. Replace unique_ptr<vector<bool>> with
+	unique_ptr<bool[]>.
+	(__detail::_Executor::_M_rep_once_more, __detail::_Executor::_M_dfs):
+	Replace template parameter with run-time function parameter.
+	(__detail::_Executor::_M_main): Likewise. Dispatch to ...
+	(__detail::_Executor::_M_main_dispatch): New overloaded functions to
+	implement DFS and BFS mode.
+	* include/bits/regex_executor.tcc (__detail::_Executor::_M_main):
+	Split implementation into ...
+	(__detail::_Executor::_M_main_dispatch): New overloaded functions.
+	(__detail::_Executor::_M_lookahead): Create nested executor on stack.
+	(__detail::_Executor::_M_rep_once_more): Pass match mode as function
+	argument instead of template argument.
+	(__detail::_Executor::_M_dfs): Likewise.
+	* include/bits/regex_scanner.tcc: Fix typos in comments.
+	* testsuite/performance/28_regex/range.cc: New.
+
 2014-06-02  Rüdiger Sonderfeld  <ruediger@c-plusplus.de>
 	    Jonathan Wakely  <jwakely@redhat.com>

--- a/libstdc++-v3/include/bits/regex.tcc
+++ b/libstdc++-v3/include/bits/regex.tcc
@ -32,7 +32,7 @@
 // If _GLIBCXX_REGEX_USE_THOMPSON_NFA is defined, the thompson NFA
 // algorithm will be used. This algorithm is not enabled by default,
 // and cannot be used if the regex contains back-references, but has better
-// (polynomial instead of exponential) worst case performace.
+// (polynomial instead of exponential) worst case performance.
 // See __regex_algo_impl below.

 namespace std _GLIBCXX_VISIBILITY(default)
--- a/libstdc++-v3/include/bits/regex_compiler.h
+++ b/libstdc++-v3/include/bits/regex_compiler.h
@ -43,7 +43,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
    struct _BracketMatcher;

  /**
-   * @brief Builds an NFA from an input iterator interval.
+   * @brief Builds an NFA from an input iterator range.
   *
   * The %_TraitsT type should fulfill requirements [28.3].
   */
@ -329,7 +329,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      operator()(_CharT __ch) const
      {
 	_GLIBCXX_DEBUG_ASSERT(_M_is_ready);
-	return _M_apply(__ch, _IsChar());
+	return _M_apply(__ch, _UseCache());
      }

      void
@ -400,21 +400,28 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      void
      _M_ready()
      {
-	_M_make_cache(_IsChar());
+	std::sort(_M_char_set.begin(), _M_char_set.end());
+	auto __end = std::unique(_M_char_set.begin(), _M_char_set.end());
+	_M_char_set.erase(__end, _M_char_set.end());
+	_M_make_cache(_UseCache());
 #ifdef _GLIBCXX_DEBUG
 	_M_is_ready = true;
 #endif
      }

    private:
-      typedef typename is_same<_CharT, char>::type _IsChar;
-      struct _Dummy { };
-      typedef typename conditional<_IsChar::value,
-				   std::bitset<1 << (8 * sizeof(_CharT))>,
-				   _Dummy>::type _CacheT;
-      typedef typename make_unsigned<_CharT>::type _UnsignedCharT;
+      // Currently we only use the cache for char
+      typedef typename std::is_same<_CharT, char>::type _UseCache;
+
+      static constexpr size_t
+      _S_cache_size() { return 1ul << (sizeof(_CharT) * __CHAR_BIT__); }
+
+      struct _Dummy { };
+      typedef typename std::conditional<_UseCache::value,
+					std::bitset<_S_cache_size()>,
+					_Dummy>::type _CacheT;
+      typedef typename std::make_unsigned<_CharT>::type _UnsignedCharT;

-    private:
      bool
      _M_apply(_CharT __ch, false_type) const;

@ -425,9 +432,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      void
      _M_make_cache(true_type)
      {
-	for (size_t __i = 0; __i < _M_cache.size(); __i++)
-	  _M_cache[static_cast<_UnsignedCharT>(__i)] =
-	    _M_apply(__i, false_type());
+	for (unsigned __i = 0; __i < _M_cache.size(); __i++)
+	  _M_cache[__i] = _M_apply(static_cast<_CharT>(__i), false_type());
      }

      void
@ -435,7 +441,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      { }

    private:
-      _CacheT                                   _M_cache;
      std::vector<_CharT>                       _M_char_set;
      std::vector<_StringT>                     _M_equiv_set;
      std::vector<pair<_StrTransT, _StrTransT>> _M_range_set;
@ -444,6 +449,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      _TransT                                   _M_translator;
      const _TraitsT&                           _M_traits;
      bool                                      _M_is_non_matching;
+      _CacheT					_M_cache;
 #ifdef _GLIBCXX_DEBUG
      bool                                      _M_is_ready;
 #endif
--- a/libstdc++-v3/include/bits/regex_compiler.tcc
+++ b/libstdc++-v3/include/bits/regex_compiler.tcc
@ -37,9 +37,9 @@
 // When compiling, states are *chained* instead of tree- or graph-constructed.
 // It's more like structured programs: there's if statement and loop statement.
 //
-// For alternative structure(say "a|b"), aka "if statement", two branchs should
-// be constructed. However, these two shall merge to an "end_tag" at the end of
-// this operator:
+// For alternative structure (say "a|b"), aka "if statement", two branches
+// should be constructed. However, these two shall merge to an "end_tag" at
+// the end of this operator:
 //
 //                branch1
 //              /        \
@ -151,7 +151,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      else if (_M_match_token(_ScannerT::_S_token_line_end))
 	_M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_line_end()));
      else if (_M_match_token(_ScannerT::_S_token_word_bound))
-	// _M_value[0] == 'n' means it's negtive, say "not word boundary".
+	// _M_value[0] == 'n' means it's negative, say "not word boundary".
 	_M_stack.push(_StateSeqT(_M_nfa, _M_nfa.
 	      _M_insert_word_bound(_M_value[0] == 'n')));
      else if (_M_match_token(_ScannerT::_S_token_subexpr_lookahead_begin))
@ -256,7 +256,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	      auto __end = _M_nfa._M_insert_dummy();
 	      // _M_alt is the "match more" branch, and _M_next is the
 	      // "match less" one. Switch _M_alt and _M_next of all created
-	      // nodes. This is a hacking but IMO works well.
+	      // nodes. This is a hack but IMO works well.
 	      std::stack<_StateIdT> __stack;
 	      for (long __i = 0; __i < __n; ++__i)
 		{
@ -511,12 +511,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
    _BracketMatcher<_TraitsT, __icase, __collate>::
    _M_apply(_CharT __ch, false_type) const
    {
-      bool __ret = false;
-      if (std::find(_M_char_set.begin(), _M_char_set.end(),
-		    _M_translator._M_translate(__ch))
-	  != _M_char_set.end())
-	__ret = true;
-      else
+      bool __ret = std::binary_search(_M_char_set.begin(), _M_char_set.end(),
+				      _M_translator._M_translate(__ch));
+      if (!__ret)
 	{
 	  auto __s = _M_translator._M_transform(__ch);
 	  for (auto& __it : _M_range_set)
--- a/libstdc++-v3/include/bits/regex_executor.h
+++ b/libstdc++-v3/include/bits/regex_executor.h
@ -42,8 +42,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   */

  /**
-   * @brief Takes a regex and an input string in and
-   * do the matching.
+   * @brief Takes a regex and an input string and does the matching.
   *
   * The %_Executor class has two modes: DFS mode and BFS mode, controlled
   * by the template parameter %__dfs_mode.
@ -52,6 +51,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	   bool __dfs_mode>
    class _Executor
    {
+      using __search_mode = integral_constant<bool, __dfs_mode>;
+      using __dfs = true_type;
+      using __bfs = false_type;
+
+      enum class _Match_mode : unsigned char { _Exact, _Prefix };
+
    public:
      typedef typename iterator_traits<_BiIter>::value_type _CharT;
      typedef basic_regex<_CharT, _TraitsT>                 _RegexT;
@ -71,24 +76,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      _M_re(__re),
      _M_nfa(*__re._M_automaton),
      _M_results(__results),
-      _M_match_queue(__dfs_mode ? nullptr
-		     : new vector<pair<_StateIdT, _ResultsVec>>()),
      _M_rep_count(_M_nfa.size()),
-      _M_visited(__dfs_mode ? nullptr : new vector<bool>(_M_nfa.size())),
+      _M_states(_M_nfa._M_start(), _M_nfa.size()),
      _M_flags((__flags & regex_constants::match_prev_avail)
 	       ? (__flags
 		  & ~regex_constants::match_not_bol
 		  & ~regex_constants::match_not_bow)
-	       : __flags),
-      _M_start_state(_M_nfa._M_start())
+	       : __flags)
      { }

-      // Set matched when string exactly match the pattern.
+      // Set matched when string exactly matches the pattern.
      bool
      _M_match()
      {
 	_M_current = _M_begin;
-	return _M_main<true>();
+	return _M_main(_Match_mode::_Exact);
      }

      // Set matched when some prefix of the string matches the pattern.
@ -96,24 +98,28 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      _M_search_from_first()
      {
 	_M_current = _M_begin;
-	return _M_main<false>();
+	return _M_main(_Match_mode::_Prefix);
      }

      bool
      _M_search();

    private:
-      template<bool __match_mode>
-	void
-	_M_rep_once_more(_StateIdT);
+      void
+      _M_rep_once_more(_Match_mode __match_mode, _StateIdT);

-      template<bool __match_mode>
-	void
-	_M_dfs(_StateIdT __start);
+      void
+      _M_dfs(_Match_mode __match_mode, _StateIdT __start);

-      template<bool __match_mode>
-	bool
-	_M_main();
+      bool
+      _M_main(_Match_mode __match_mode)
+      { return _M_main_dispatch(__match_mode, __search_mode{}); }
+
+      bool
+      _M_main_dispatch(_Match_mode __match_mode, __dfs);
+
+      bool
+      _M_main_dispatch(_Match_mode __match_mode, __bfs);

      bool
      _M_is_word(_CharT __ch) const
@ -144,6 +150,53 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      bool
      _M_lookahead(_State<_TraitsT> __state);

+       // Holds additional information used in BFS-mode.
+      template<typename _SearchMode, typename _ResultsVec>
+	struct _State_info;
+
+      template<typename _ResultsVec>
+	struct _State_info<__bfs, _ResultsVec>
+	{
+	  explicit
+	  _State_info(_StateIdT __start, size_t __n)
+	  : _M_start(__start), _M_visited_states(new bool[__n]())
+	  { }
+
+	  bool _M_visited(_StateIdT __i)
+	  {
+	    if (_M_visited_states[__i])
+	      return true;
+	    _M_visited_states[__i] = true;
+	    return false;
+	  }
+
+	  void _M_queue(_StateIdT __i, const _ResultsVec& __res)
+	  { _M_match_queue.emplace_back(__i, __res); }
+
+	  // Saves states that need to be considered for the next character.
+	  vector<pair<_StateIdT, _ResultsVec>>	_M_match_queue;
+	  // Indicates which states are already visited.
+	  unique_ptr<bool[]>			_M_visited_states;
+	  // To record current solution.
+	  _StateIdT _M_start;
+	};
+
+      template<typename _ResultsVec>
+	struct _State_info<__dfs, _ResultsVec>
+	{
+	  explicit
+	  _State_info(_StateIdT __start, size_t) : _M_start(__start)
+	  { }
+
+	  // Dummy implementations for DFS mode.
+	  bool _M_visited(_StateIdT) const { return false; }
+	  void _M_queue(_StateIdT, const _ResultsVec&) { }
+
+	  // To record current solution.
+	  _StateIdT _M_start;
+	};
+
+
    public:
      _ResultsVec                                           _M_cur_results;
      _BiIter                                               _M_current;
@ -152,15 +205,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      const _RegexT&                                        _M_re;
      const _NFAT&                                          _M_nfa;
      _ResultsVec&                                          _M_results;
-      // Used in BFS, saving states that need to be considered for the next
-      // character.
-      unique_ptr<vector<pair<_StateIdT, _ResultsVec>>>      _M_match_queue;
-      // Used in BFS, indicating that which state is already visited.
      vector<pair<_BiIter, int>>                            _M_rep_count;
-      unique_ptr<vector<bool>>                              _M_visited;
+      _State_info<__search_mode, _ResultsVec>		    _M_states;
      _FlagT                                                _M_flags;
-      // To record current solution.
-      _StateIdT                                             _M_start_state;
      // Do we have a solution so far?
      bool                                                  _M_has_sol;
    };
--- a/libstdc++-v3/include/bits/regex_executor.tcc
+++ b/libstdc++-v3/include/bits/regex_executor.tcc
@ -35,7 +35,7 @@ namespace __detail
 _GLIBCXX_BEGIN_NAMESPACE_VERSION

  template<typename _BiIter, typename _Alloc, typename _TraitsT,
-    bool __dfs_mode>
+	   bool __dfs_mode>
    bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
    _M_search()
    {
@ -45,7 +45,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      do
 	{
 	  _M_current = __cur;
-	  if (_M_main<false>())
+	  if (_M_main(_Match_mode::_Prefix))
 	    return true;
 	}
      // Continue when __cur == _M_end
@ -53,8 +53,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      return false;
    }

-  // This function operates in different modes, DFS mode or BFS mode, indicated
-  // by template parameter __dfs_mode. See _M_main for details.
+  // The _M_main function operates in different modes, DFS mode or BFS mode,
+  // indicated by template parameter __dfs_mode, and dispatches to one of the
+  // _M_main_dispatch overloads.
  //
  // ------------------------------------------------------------
  //
@ -62,12 +63,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  //
  // It applies a Depth-First-Search (aka backtracking) on given NFA and input
  // string.
-  // At the very beginning the executor stands in the start state, then it tries
-  // every possible state transition in current state recursively. Some state
-  // transitions consume input string, say, a single-char-matcher or a
+  // At the very beginning the executor stands in the start state, then it
+  // tries every possible state transition in current state recursively. Some
+  // state transitions consume input string, say, a single-char-matcher or a
  // back-reference matcher; some don't, like assertion or other anchor nodes.
-  // When the input is exhausted and/or the current state is an accepting state,
-  // the whole executor returns true.
+  // When the input is exhausted and/or the current state is an accepting
+  // state, the whole executor returns true.
  //
  // TODO: This approach is exponentially slow for certain input.
  //       Try to compile the NFA to a DFA.
@ -75,6 +76,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  // Time complexity: \Omega(match_length), O(2^(_M_nfa.size()))
  // Space complexity: \theta(match_results.size() + match_length)
  //
+  template<typename _BiIter, typename _Alloc, typename _TraitsT,
+	   bool __dfs_mode>
+    bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+    _M_main_dispatch(_Match_mode __match_mode, __dfs)
+    {
+      _M_has_sol = false;
+      _M_cur_results = _M_results;
+      _M_dfs(__match_mode, _M_states._M_start);
+      return _M_has_sol;
+    }
+
  // ------------------------------------------------------------
  //
  // BFS mode:
@ -84,11 +96,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  //
  // It first computes epsilon closure (states that can be achieved without
  // consuming characters) for every state that's still matching,
-  // using the same DFS algorithm, but doesn't re-enter states (find a true in
-  // _M_visited), nor follows _S_opcode_match.
+  // using the same DFS algorithm, but doesn't re-enter states (using
+  // _M_states._M_visited to check), nor follow _S_opcode_match.
  //
-  // Then apply DFS using every _S_opcode_match (in _M_match_queue) as the start
-  // state.
+  // Then apply DFS using every _S_opcode_match (in _M_states._M_match_queue)
+  // as the start state.
  //
  // It significantly reduces potential duplicate states, so has a better
  // upper bound; but it requires more overhead.
@ -98,60 +110,45 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  // Space complexity: \Omega(_M_nfa.size() + match_results.size())
  //                   O(_M_nfa.size() * match_results.size())
  template<typename _BiIter, typename _Alloc, typename _TraitsT,
-    bool __dfs_mode>
-  template<bool __match_mode>
+	   bool __dfs_mode>
    bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
-    _M_main()
+    _M_main_dispatch(_Match_mode __match_mode, __bfs)
    {
-      if (__dfs_mode)
+      _M_states._M_queue(_M_states._M_start, _M_results);
+      bool __ret = false;
+      while (1)
 	{
 	  _M_has_sol = false;
-	  _M_cur_results = _M_results;
-	  _M_dfs<__match_mode>(_M_start_state);
-	  return _M_has_sol;
-	}
-      else
-	{
-	  _M_match_queue->push_back(make_pair(_M_start_state, _M_results));
-	  bool __ret = false;
-	  while (1)
+	  if (_M_states._M_match_queue.empty())
+	    break;
+	  std::fill_n(_M_states._M_visited_states.get(), _M_nfa.size(), false);
+	  auto __old_queue = std::move(_M_states._M_match_queue);
+	  for (auto& __task : __old_queue)
 	    {
-	      _M_has_sol = false;
-	      if (_M_match_queue->empty())
-		break;
-	      _M_visited->assign(_M_visited->size(), false);
-	      auto _M_old_queue = std::move(*_M_match_queue);
-	      for (auto __task : _M_old_queue)
-		{
-		  _M_cur_results = __task.second;
-		  _M_dfs<__match_mode>(__task.first);
-		}
-	      if (!__match_mode)
-		__ret |= _M_has_sol;
-	      if (_M_current == _M_end)
-		break;
-	      ++_M_current;
+	      _M_cur_results = std::move(__task.second);
+	      _M_dfs(__match_mode, __task.first);
 	    }
-	  if (__match_mode)
-	    __ret = _M_has_sol;
-	  return __ret;
+	  if (__match_mode == _Match_mode::_Prefix)
+	    __ret |= _M_has_sol;
+	  if (_M_current == _M_end)
+	    break;
+	  ++_M_current;
 	}
+      if (__match_mode == _Match_mode::_Exact)
+	__ret = _M_has_sol;
+      return __ret;
    }

  // Return whether now match the given sub-NFA.
  template<typename _BiIter, typename _Alloc, typename _TraitsT,
-    bool __dfs_mode>
+	   bool __dfs_mode>
    bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
    _M_lookahead(_State<_TraitsT> __state)
    {
      _ResultsVec __what(_M_cur_results.size());
-      auto __sub = std::unique_ptr<_Executor>(new _Executor(_M_current,
-							    _M_end,
-							    __what,
-							    _M_re,
-							    _M_flags));
-      __sub->_M_start_state = __state._M_alt;
-      if (__sub->_M_search_from_first())
+      _Executor __sub(_M_current, _M_end, __what, _M_re, _M_flags);
+      __sub._M_states._M_start = __state._M_alt;
+      if (__sub._M_search_from_first())
 	{
 	  for (size_t __i = 0; __i < __what.size(); __i++)
 	    if (__what[__i].matched)
@ -169,9 +166,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  // we need to spare one more time for potential group capture.
  template<typename _BiIter, typename _Alloc, typename _TraitsT,
    bool __dfs_mode>
-  template<bool __match_mode>
    void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
-    _M_rep_once_more(_StateIdT __i)
+    _M_rep_once_more(_Match_mode __match_mode, _StateIdT __i)
    {
      const auto& __state = _M_nfa[__i];
      auto& __rep_count = _M_rep_count[__i];
@ -180,7 +176,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  auto __back = __rep_count;
 	  __rep_count.first = _M_current;
 	  __rep_count.second = 1;
-	  _M_dfs<__match_mode>(__state._M_alt);
+	  _M_dfs(__match_mode, __state._M_alt);
 	  __rep_count = __back;
 	}
      else
@ -188,24 +184,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  if (__rep_count.second < 2)
 	    {
 	      __rep_count.second++;
-	      _M_dfs<__match_mode>(__state._M_alt);
+	      _M_dfs(__match_mode, __state._M_alt);
 	      __rep_count.second--;
 	    }
 	}
    };

  template<typename _BiIter, typename _Alloc, typename _TraitsT,
-    bool __dfs_mode>
-  template<bool __match_mode>
+	   bool __dfs_mode>
    void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
-    _M_dfs(_StateIdT __i)
+    _M_dfs(_Match_mode __match_mode, _StateIdT __i)
    {
-      if (!__dfs_mode)
-	{
-	  if ((*_M_visited)[__i])
-	    return;
-	  (*_M_visited)[__i] = true;
-	}
+      if (_M_states._M_visited(__i))
+	return;

      const auto& __state = _M_nfa[__i];
      // Every change on _M_cur_results and _M_current will be rolled back after
@ -221,33 +212,33 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	    // Greedy.
 	    if (!__state._M_neg)
 	      {
-		_M_rep_once_more<__match_mode>(__i);
+		_M_rep_once_more(__match_mode, __i);
 		// If it's DFS executor and already accepted, we're done.
 		if (!__dfs_mode || !_M_has_sol)
-		  _M_dfs<__match_mode>(__state._M_next);
+		  _M_dfs(__match_mode, __state._M_next);
 	      }
 	    else // Non-greedy mode
 	      {
 		if (__dfs_mode)
 		  {
 		    // vice-versa.
-		    _M_dfs<__match_mode>(__state._M_next);
+		    _M_dfs(__match_mode, __state._M_next);
 		    if (!_M_has_sol)
-		      _M_rep_once_more<__match_mode>(__i);
+		      _M_rep_once_more(__match_mode, __i);
 		  }
 		else
 		  {
 		    // DON'T attempt anything, because there's already another
-		    // state with higher priority accepted. This state cannot be
-		    // better by attempting its next node.
+		    // state with higher priority accepted. This state cannot
+		    // be better by attempting its next node.
 		    if (!_M_has_sol)
 		      {
-			_M_dfs<__match_mode>(__state._M_next);
+			_M_dfs(__match_mode, __state._M_next);
 			// DON'T attempt anything if it's already accepted. An
 			// accepted state *must* be better than a solution that
 			// matches a non-greedy quantifier one more time.
 			if (!_M_has_sol)
-			  _M_rep_once_more<__match_mode>(__i);
+			  _M_rep_once_more(__match_mode, __i);
 		      }
 		  }
 	      }
@ -258,7 +249,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	    auto& __res = _M_cur_results[__state._M_subexpr];
 	    auto __back = __res.first;
 	    __res.first = _M_current;
-	    _M_dfs<__match_mode>(__state._M_next);
+	    _M_dfs(__match_mode, __state._M_next);
 	    __res.first = __back;
 	  }
 	  break;
@ -268,27 +259,27 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	    auto __back = __res;
 	    __res.second = _M_current;
 	    __res.matched = true;
-	    _M_dfs<__match_mode>(__state._M_next);
+	    _M_dfs(__match_mode, __state._M_next);
 	    __res = __back;
 	  }
 	  break;
 	case _S_opcode_line_begin_assertion:
 	  if (_M_at_begin())
-	    _M_dfs<__match_mode>(__state._M_next);
+	    _M_dfs(__match_mode, __state._M_next);
 	  break;
 	case _S_opcode_line_end_assertion:
 	  if (_M_at_end())
-	    _M_dfs<__match_mode>(__state._M_next);
+	    _M_dfs(__match_mode, __state._M_next);
 	  break;
 	case _S_opcode_word_boundary:
 	  if (_M_word_boundary(__state) == !__state._M_neg)
-	    _M_dfs<__match_mode>(__state._M_next);
+	    _M_dfs(__match_mode, __state._M_next);
 	  break;
 	// Here __state._M_alt offers a single start node for a sub-NFA.
 	// We recursively invoke our algorithm to match the sub-NFA.
 	case _S_opcode_subexpr_lookahead:
 	  if (_M_lookahead(__state) == !__state._M_neg)
-	    _M_dfs<__match_mode>(__state._M_next);
+	    _M_dfs(__match_mode, __state._M_next);
 	  break;
 	case _S_opcode_match:
 	  if (__dfs_mode)
@ -296,14 +287,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	      if (_M_current != _M_end && __state._M_matches(*_M_current))
 		{
 		  ++_M_current;
-		  _M_dfs<__match_mode>(__state._M_next);
+		  _M_dfs(__match_mode, __state._M_next);
 		  --_M_current;
 		}
 	    }
 	  else
 	    if (__state._M_matches(*_M_current))
-	      _M_match_queue->push_back(make_pair(__state._M_next,
-						  _M_cur_results));
+	      _M_states._M_queue(__state._M_next, _M_cur_results);
 	  break;
 	// First fetch the matched result from _M_cur_results as __submatch;
 	// then compare it with
@ -328,11 +318,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 		  {
 		    auto __backup = _M_current;
 		    _M_current = __last;
-		    _M_dfs<__match_mode>(__state._M_next);
+		    _M_dfs(__match_mode, __state._M_next);
 		    _M_current = __backup;
 		  }
 		else
-		  _M_dfs<__match_mode>(__state._M_next);
+		  _M_dfs(__match_mode, __state._M_next);
 	      }
 	  }
 	  break;
@ -340,7 +330,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  if (__dfs_mode)
 	    {
 	      _GLIBCXX_DEBUG_ASSERT(!_M_has_sol);
-	      if (__match_mode)
+	      if (__match_mode == _Match_mode::_Exact)
 		_M_has_sol = _M_current == _M_end;
 	      else
 		_M_has_sol = true;
@ -355,7 +345,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	      if (_M_current == _M_begin
 		  && (_M_flags & regex_constants::match_not_null))
 		break;
-	      if (!__match_mode || _M_current == _M_end)
+	      if (__match_mode == _Match_mode::_Prefix || _M_current == _M_end)
 		if (!_M_has_sol)
 		  {
 		    _M_has_sol = true;
@ -364,9 +354,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	    }
 	  break;
 	case _S_opcode_alternative:
-	  _M_dfs<__match_mode>(__state._M_alt);
+	  _M_dfs(__match_mode, __state._M_alt);
 	  if (!__dfs_mode || !_M_has_sol)
-	    _M_dfs<__match_mode>(__state._M_next);
+	    _M_dfs(__match_mode, __state._M_next);
 	  break;
 	default:
 	  _GLIBCXX_DEBUG_ASSERT(false);
@ -375,7 +365,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION

  // Return whether now is at some word boundary.
  template<typename _BiIter, typename _Alloc, typename _TraitsT,
-    bool __dfs_mode>
+	   bool __dfs_mode>
    bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
    _M_word_boundary(_State<_TraitsT> __state) const
    {
--- a/libstdc++-v3/include/bits/regex_scanner.tcc
+++ b/libstdc++-v3/include/bits/regex_scanner.tcc
@ -227,14 +227,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	    }
 	}
      // In POSIX, when encountering "[]" or "[^]", the ']' is interpreted
-      // literally. So "[]]" or "[^]]" is valid regex. See the testcases
+      // literally. So "[]]" and "[^]]" are valid regexes. See the testcases
      // `*/empty_range.cc`.
      else if (__c == ']' && (_M_is_ecma() || !_M_at_bracket_start))
 	{
 	  _M_token = _S_token_bracket_end;
 	  _M_state = _S_state_normal;
 	}
-      // ECMAScirpt and awk permmits escaping in bracket.
+      // ECMAScript and awk permits escaping in bracket.
      else if (__c == '\\' && (_M_is_ecma() || _M_is_awk()))
 	(this->*_M_eat_escape)();
      else
@ -344,7 +344,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	    }
 	  _M_token = _S_token_hex_num;
 	}
-      // ECMAScript recongnizes multi-digit back-references.
+      // ECMAScript recognizes multi-digit back-references.
      else if (_M_ctype.is(_CtypeT::digit, __c))
 	{
 	  _M_value.assign(1, __c);
@ -392,6 +392,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      else
 	{
 #ifdef __STRICT_ANSI__
+	  // POSIX says it is undefined to escape ordinary characters
 	  __throw_regex_error(regex_constants::error_escape);
 #else
 	  _M_token = _S_token_ord_char;
@ -435,8 +436,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	__throw_regex_error(regex_constants::error_escape);
    }

-  // Eats a character class or throwns an exception.
-  // __ch cound be ':', '.' or '=', _M_current is the char after ']' when
+  // Eats a character class or throws an exception.
+  // __ch could be ':', '.' or '=', _M_current is the char after ']' when
  // returning.
  template<typename _CharT>
    void
--- a/libstdc++-v3/testsuite/performance/28_regex/range.cc
+++ b/libstdc++-v3/testsuite/performance/28_regex/range.cc
@ -0,0 +1,41 @@
+// Copyright (C) 2014 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+#include <regex>
+#include <testsuite_performance.h>
+
+using namespace __gnu_test;
+
+int main()
+{
+  time_counter time;
+  resource_counter resource;
+
+  start_counters(time, resource);
+
+  // this should get compiled to just L"[abcd]"
+  auto re = std::wregex(L'[' + std::wstring(300, L'a') + L"bc"
+                        + std::wstring(1000, 'a') + L"d]");
+  bool ok = true;
+  for (int i = 0; i < 100000; ++i)
+    ok = ok && (std::regex_match(L"b", re) && std::regex_match(L"d", re));
+
+  stop_counters(time, resource);
+  report_performance(__FILE__, "", time, resource);
+
+  return ok ? 0 : 1;
+}