regex_executor.h: Add _TodoList class.

2013-10-08  Tim Shen  <timshen91@gmail.com>

	* include/bits/regex_executor.h: Add _TodoList class.
	* include/bits/regex_executor.tcc (_BFSExecutor<>::_M_main): Add
	_M_match_stack and _M_stack to make everything faster. Break if
	_M_stack is empty, to reduce unnecessary idling.
	* testsuite/performance/28_regex/split.cc: New.

From-SVN: r203261
This commit is contained in:
Tim Shen 2013-10-08 03:41:14 +00:00 committed by Tim Shen
parent 59a2a4e2de
commit 18971f1fc3
4 changed files with 234 additions and 83 deletions

View File

@ -1,3 +1,11 @@
2013-10-08 Tim Shen <timshen91@gmail.com>
* include/bits/regex_executor.h: Add _TodoList class.
* include/bits/regex_executor.tcc (_BFSExecutor<>::_M_main): Add
_M_match_stack and _M_stack to make everything faster. Break if
_M_stack is empty, to reduce unnecessary idling.
* testsuite/performance/28_regex/split.cc: New.
2013-10-06 Tim Shen <timshen91@gmail.com>
* include/bits/regex.h: (regex_token_iterator<>::regex_token_iterator):

View File

@ -102,22 +102,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
}
bool
_M_search()
{
if (_M_flags & regex_constants::match_continuous)
return _M_search_from_first();
auto __cur = _M_begin;
do
{
_M_match_mode = false;
_M_init(__cur);
if (_M_main())
return true;
}
// Continue when __cur == _M_end
while (__cur++ != _M_end);
return false;
}
_M_search();
bool
_M_is_word(_CharT __ch) const
@ -346,6 +331,46 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
};
typedef std::unique_ptr<_ResultsEntry> _ResultsPtr;
class _TodoList
{
public:
explicit
_TodoList(size_t __sz)
: _M_states(), _M_exists(__sz, false)
{ }
void _M_push(_StateIdT __u)
{
_GLIBCXX_DEBUG_ASSERT(__u < _M_exists.size());
if (!_M_exists[__u])
{
_M_exists[__u] = true;
_M_states.push_back(__u);
}
}
_StateIdT _M_pop()
{
auto __ret = _M_states.back();
_M_states.pop_back();
_M_exists[__ret] = false;
return __ret;
}
bool _M_empty() const
{ return _M_states.empty(); }
void _M_clear()
{
_M_states.clear();
_M_exists.assign(_M_exists.size(), false);
}
private:
std::vector<_StateIdT> _M_states;
std::vector<bool> _M_exists;
};
public:
_BFSExecutor(_BiIter __begin,
_BiIter __end,
@ -355,6 +380,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
: _BaseT(__begin, __end, __results, __re, __flags),
_M_nfa(*std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
(__re._M_automaton)),
_M_match_stack(_M_nfa.size()),
_M_stack(_M_nfa.size()),
_M_start_state(_M_nfa._M_start())
{ }
@ -362,14 +389,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void
_M_init(_BiIter __cur)
{
_GLIBCXX_DEBUG_ASSERT(this->_M_start_state != _S_invalid_state_id);
this->_M_current = __cur;
_M_covered.clear();
_ResultsVec& __res(this->_M_results);
_M_covered[this->_M_start_state] =
_ResultsPtr(new _ResultsEntry(__res.size(),
_M_nfa._M_quant_count));
_M_e_closure();
_M_stack._M_push(this->_M_start_state);
}
void
@ -398,11 +424,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
this->_M_flags));
}
const _NFAT& _M_nfa;
std::map<_StateIdT, _ResultsPtr> _M_covered;
_TodoList _M_match_stack;
_TodoList _M_stack;
_StateIdT _M_start_state;
// To record global optimal solution.
_ResultsPtr _M_cur_results;
const _NFAT& _M_nfa;
_StateIdT _M_start_state;
};
//@} regex-detail

View File

@ -34,14 +34,31 @@ namespace __detail
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT>
bool _Executor<_BiIter, _Alloc, _CharT, _TraitsT>::
_M_search()
{
if (_M_flags & regex_constants::match_continuous)
return _M_search_from_first();
auto __cur = _M_begin;
do
{
_M_match_mode = false;
_M_init(__cur);
if (_M_main())
return true;
}
// Continue when __cur == _M_end
while (__cur++ != _M_end);
return false;
}
template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT>
bool _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
_M_dfs(_StateIdT __i)
{
if (__i == _S_invalid_state_id)
// This is not that certain. Need deeper investigate.
return false;
auto& __current = this->_M_current;
const auto& __state = _M_nfa[__i];
bool __ret = false;
@ -161,6 +178,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
_M_main()
{
_M_e_closure();
bool __ret = false;
if (!this->_M_match_mode
&& !(this->_M_flags & regex_constants::match_not_null))
@ -169,6 +187,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
_M_move();
++this->_M_current;
if (_M_stack._M_empty())
break;
_M_e_closure();
if (!this->_M_match_mode)
// To keep regex_search greedy, no "return true" here.
@ -178,6 +198,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__ret = _M_includes_some();
if (__ret)
this->_M_set_results(_M_cur_results->_M_get());
_M_match_stack._M_clear();
_GLIBCXX_DEBUG_ASSERT(_M_stack._M_empty());
return __ret;
}
@ -186,42 +208,34 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
_M_e_closure()
{
std::queue<_StateIdT> __q;
std::vector<bool> __in_q(_M_nfa.size(), false);
auto& __current = this->_M_current;
for (auto& __it : _M_covered)
while (!_M_stack._M_empty())
{
__in_q[__it.first] = true;
__q.push(__it.first);
}
while (!__q.empty())
{
auto __u = __q.front();
__q.pop();
__in_q[__u] = false;
auto __u = _M_stack._M_pop();
_GLIBCXX_DEBUG_ASSERT(_M_covered.count(__u));
const auto& __state = _M_nfa[__u];
// Can be implemented using method, but there will be too many
// arguments. I would use macro function before C++11, but lambda is
// a better choice, since hopefully compiler can inline it.
auto __add_visited_state = [&](_StateIdT __v)
auto __add_visited_state = [=](_StateIdT __v)
{
if (__v == _S_invalid_state_id)
return;
if (_M_covered.count(__u) != 0
&& (_M_covered.count(__v) == 0
|| *_M_covered[__u] < *_M_covered[__v]))
if (_M_covered.count(__v) == 0)
{
_M_covered[__v] =
_ResultsPtr(new _ResultsEntry(*_M_covered[__u]));
_M_stack._M_push(__v);
return;
}
auto& __cu = _M_covered[__u];
auto& __cv = _M_covered[__v];
if (*__cu < *__cv)
{
__cv = _ResultsPtr(new _ResultsEntry(*__cu));
// if a state is updated, it's outgoing neighbors should be
// reconsidered too. Push them to the queue.
if (!__in_q[__v])
{
__in_q[__v] = true;
__q.push(__v);
}
_M_stack._M_push(__v);
}
};
@ -233,13 +247,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
case _S_opcode_alternative:
{
__add_visited_state(__state._M_next);
auto __back =
_M_covered[__u]->_M_quant_keys[__state._M_quant_index];
_M_covered[__u]->_M_inc(__state._M_quant_index,
__state._M_neg);
auto& __cu = *_M_covered[__u];
auto __back = __cu._M_quant_keys[__state._M_quant_index];
__cu._M_inc(__state._M_quant_index, __state._M_neg);
__add_visited_state(__state._M_alt);
_M_covered[__u]->_M_quant_keys[__state._M_quant_index]
= __back;
__cu._M_quant_keys[__state._M_quant_index] = __back;
}
break;
case _S_opcode_subexpr_begin:
@ -281,6 +293,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__add_visited_state(__state._M_next);
break;
case _S_opcode_match:
_M_match_stack._M_push(__u);
break;
case _S_opcode_accept:
break;
@ -296,15 +309,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_move()
{
decltype(_M_covered) __next;
for (auto& __it : _M_covered)
while (!_M_match_stack._M_empty())
{
const auto& __state = _M_nfa[__it.first];
if (__state._M_opcode == _S_opcode_match
&& __state._M_matches(*this->_M_current))
if (__state._M_next != _S_invalid_state_id)
if (__next.count(__state._M_next) == 0
|| *__it.second < *__next[__state._M_next])
__next[__state._M_next] = move(__it.second);
auto __u = _M_match_stack._M_pop();
const auto& __state = _M_nfa[__u];
auto& __cu = _M_covered[__u];
if (__state._M_matches(*this->_M_current)
&& (__next.count(__state._M_next) == 0
|| *__cu < *__next[__state._M_next]))
{
__next[__state._M_next] = std::move(__cu);
_M_stack._M_push(__state._M_next);
}
}
_M_covered = move(__next);
}
@ -314,31 +330,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
_M_includes_some()
{
auto& __s = _M_nfa._M_final_states();
auto& __t = _M_covered;
bool __succ = false;
if (__s.size() > 0 && __t.size() > 0)
{
auto __first = __s.begin();
auto __second = __t.begin();
while (__first != __s.end() && __second != __t.end())
{
if (*__first < __second->first)
++__first;
else if (*__first > __second->first)
++__second;
else
{
if (_M_cur_results == nullptr
|| *__second->second < *_M_cur_results)
_M_cur_results =
_ResultsPtr(new _ResultsEntry(*__second->second));
__succ = true;
++__first;
++__second;
}
}
}
for (auto __u : _M_nfa._M_final_states())
if (_M_covered.count(__u))
{
__succ = true;
auto& __cu = _M_covered[__u];
if (_M_cur_results == nullptr || *__cu < *_M_cur_results)
_M_cur_results = _ResultsPtr(new _ResultsEntry(*__cu));
}
return __succ;
}

View File

@ -0,0 +1,115 @@
// Copyright (C) 2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
// 2013-10-08 Tim Shen <timshen91@gmail.com>
#include <testsuite_performance.h>
#include <regex>
using namespace __gnu_test;
using namespace std;
void split(string s)
{
regex re("\\s+");
for (auto it = sregex_token_iterator(s.begin(), s.end(), re, -1);
it != sregex_token_iterator();
++it)
{
}
}
int main()
{
string source = "\
// Copyright (C) 2013 Free Software Foundation, Inc.\n\
//\n\
// This file is part of the GNU ISO C++ Library. This library is free\n\
// software; you can redistribute it and/or modify it under the\n\
// terms of the GNU General Public License as published by the\n\
// Free Software Foundation; either version 3, or (at your option)\n\
// any later version.\n\
\n\
// This library is distributed in the hope that it will be useful,\n\
// but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
// GNU General Public License for more details.\n\
\n\
// You should have received a copy of the GNU General Public License along\n\
// with this library; see the file COPYING3. If not see\n\
// <http://www.gnu.org/licenses/>.\n\
\n\
// 2013-10-08 Tim Shen <timshen91@gmail.com>\n\
\n\
#include <testsuite_performance.h>\n\
#include <regex>\n\
\n\
using namespace __gnu_test;\n\
using namespace std;\n\
\n\
void split(string s)\n\
{\n\
regex re(\"\\s+\");\n\
for (auto it = sregex_token_iterator(s.begin(), s.end(), re, -1);\n\
it != sregex_token_iterator();\n\
++it)\n\
{\n\
}\n\
}\n\
\n\
int main()\n\
{\n\
string source = \"\";\n\
time_counter time;\n\
resource_counter resource;\n\
\n\
source = source + source;\n\
source = source + source;\n\
source = source + source;\n\
source = source + source;\n\
source = source + source;\n\
source = source + source;\n\
source = source + source;\n\
source = source + source;\n\
\n\
start_counters(time, resource);\n\
split(source);\n\
stop_counters(time, resource);\n\
report_performance(__FILE__, \"\", time, resource);\n\
\n\
return 0;\n\
}\n";
time_counter time;
resource_counter resource;
source = source + source;
source = source + source;
source = source + source;
source = source + source;
source = source + source;
source = source + source;
source = source + source;
source = source + source;
start_counters(time, resource);
split(source);
stop_counters(time, resource);
report_performance(__FILE__, "", time, resource);
return 0;
}