regex_executor.h: Add _TodoList class.
2013-10-08 Tim Shen <timshen91@gmail.com> * include/bits/regex_executor.h: Add _TodoList class. * include/bits/regex_executor.tcc (_BFSExecutor<>::_M_main): Add _M_match_stack and _M_stack to make everything faster. Break if _M_stack is empty, to reduce unnecessary idling. * testsuite/performance/28_regex/split.cc: New. From-SVN: r203261
This commit is contained in:
parent
59a2a4e2de
commit
18971f1fc3
|
@ -1,3 +1,11 @@
|
|||
2013-10-08 Tim Shen <timshen91@gmail.com>
|
||||
|
||||
* include/bits/regex_executor.h: Add _TodoList class.
|
||||
* include/bits/regex_executor.tcc (_BFSExecutor<>::_M_main): Add
|
||||
_M_match_stack and _M_stack to make everything faster. Break if
|
||||
_M_stack is empty, to reduce unnecessary idling.
|
||||
* testsuite/performance/28_regex/split.cc: New.
|
||||
|
||||
2013-10-06 Tim Shen <timshen91@gmail.com>
|
||||
|
||||
* include/bits/regex.h: (regex_token_iterator<>::regex_token_iterator):
|
||||
|
|
|
@ -102,22 +102,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
}
|
||||
|
||||
bool
|
||||
_M_search()
|
||||
{
|
||||
if (_M_flags & regex_constants::match_continuous)
|
||||
return _M_search_from_first();
|
||||
auto __cur = _M_begin;
|
||||
do
|
||||
{
|
||||
_M_match_mode = false;
|
||||
_M_init(__cur);
|
||||
if (_M_main())
|
||||
return true;
|
||||
}
|
||||
// Continue when __cur == _M_end
|
||||
while (__cur++ != _M_end);
|
||||
return false;
|
||||
}
|
||||
_M_search();
|
||||
|
||||
bool
|
||||
_M_is_word(_CharT __ch) const
|
||||
|
@ -346,6 +331,46 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
};
|
||||
typedef std::unique_ptr<_ResultsEntry> _ResultsPtr;
|
||||
|
||||
class _TodoList
|
||||
{
|
||||
public:
|
||||
explicit
|
||||
_TodoList(size_t __sz)
|
||||
: _M_states(), _M_exists(__sz, false)
|
||||
{ }
|
||||
|
||||
void _M_push(_StateIdT __u)
|
||||
{
|
||||
_GLIBCXX_DEBUG_ASSERT(__u < _M_exists.size());
|
||||
if (!_M_exists[__u])
|
||||
{
|
||||
_M_exists[__u] = true;
|
||||
_M_states.push_back(__u);
|
||||
}
|
||||
}
|
||||
|
||||
_StateIdT _M_pop()
|
||||
{
|
||||
auto __ret = _M_states.back();
|
||||
_M_states.pop_back();
|
||||
_M_exists[__ret] = false;
|
||||
return __ret;
|
||||
}
|
||||
|
||||
bool _M_empty() const
|
||||
{ return _M_states.empty(); }
|
||||
|
||||
void _M_clear()
|
||||
{
|
||||
_M_states.clear();
|
||||
_M_exists.assign(_M_exists.size(), false);
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<_StateIdT> _M_states;
|
||||
std::vector<bool> _M_exists;
|
||||
};
|
||||
|
||||
public:
|
||||
_BFSExecutor(_BiIter __begin,
|
||||
_BiIter __end,
|
||||
|
@ -355,6 +380,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
: _BaseT(__begin, __end, __results, __re, __flags),
|
||||
_M_nfa(*std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
|
||||
(__re._M_automaton)),
|
||||
_M_match_stack(_M_nfa.size()),
|
||||
_M_stack(_M_nfa.size()),
|
||||
_M_start_state(_M_nfa._M_start())
|
||||
{ }
|
||||
|
||||
|
@ -362,14 +389,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
void
|
||||
_M_init(_BiIter __cur)
|
||||
{
|
||||
_GLIBCXX_DEBUG_ASSERT(this->_M_start_state != _S_invalid_state_id);
|
||||
this->_M_current = __cur;
|
||||
_M_covered.clear();
|
||||
_ResultsVec& __res(this->_M_results);
|
||||
_M_covered[this->_M_start_state] =
|
||||
_ResultsPtr(new _ResultsEntry(__res.size(),
|
||||
_M_nfa._M_quant_count));
|
||||
_M_e_closure();
|
||||
_M_stack._M_push(this->_M_start_state);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -398,11 +424,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
this->_M_flags));
|
||||
}
|
||||
|
||||
const _NFAT& _M_nfa;
|
||||
std::map<_StateIdT, _ResultsPtr> _M_covered;
|
||||
_TodoList _M_match_stack;
|
||||
_TodoList _M_stack;
|
||||
_StateIdT _M_start_state;
|
||||
// To record global optimal solution.
|
||||
_ResultsPtr _M_cur_results;
|
||||
const _NFAT& _M_nfa;
|
||||
_StateIdT _M_start_state;
|
||||
};
|
||||
|
||||
//@} regex-detail
|
||||
|
|
|
@ -34,14 +34,31 @@ namespace __detail
|
|||
{
|
||||
_GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT>
|
||||
bool _Executor<_BiIter, _Alloc, _CharT, _TraitsT>::
|
||||
_M_search()
|
||||
{
|
||||
if (_M_flags & regex_constants::match_continuous)
|
||||
return _M_search_from_first();
|
||||
auto __cur = _M_begin;
|
||||
do
|
||||
{
|
||||
_M_match_mode = false;
|
||||
_M_init(__cur);
|
||||
if (_M_main())
|
||||
return true;
|
||||
}
|
||||
// Continue when __cur == _M_end
|
||||
while (__cur++ != _M_end);
|
||||
return false;
|
||||
}
|
||||
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT>
|
||||
bool _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
|
||||
_M_dfs(_StateIdT __i)
|
||||
{
|
||||
if (__i == _S_invalid_state_id)
|
||||
// This is not that certain. Need deeper investigate.
|
||||
return false;
|
||||
auto& __current = this->_M_current;
|
||||
const auto& __state = _M_nfa[__i];
|
||||
bool __ret = false;
|
||||
|
@ -161,6 +178,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
|
||||
_M_main()
|
||||
{
|
||||
_M_e_closure();
|
||||
bool __ret = false;
|
||||
if (!this->_M_match_mode
|
||||
&& !(this->_M_flags & regex_constants::match_not_null))
|
||||
|
@ -169,6 +187,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
{
|
||||
_M_move();
|
||||
++this->_M_current;
|
||||
if (_M_stack._M_empty())
|
||||
break;
|
||||
_M_e_closure();
|
||||
if (!this->_M_match_mode)
|
||||
// To keep regex_search greedy, no "return true" here.
|
||||
|
@ -178,6 +198,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
__ret = _M_includes_some();
|
||||
if (__ret)
|
||||
this->_M_set_results(_M_cur_results->_M_get());
|
||||
_M_match_stack._M_clear();
|
||||
_GLIBCXX_DEBUG_ASSERT(_M_stack._M_empty());
|
||||
return __ret;
|
||||
}
|
||||
|
||||
|
@ -186,42 +208,34 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
|
||||
_M_e_closure()
|
||||
{
|
||||
std::queue<_StateIdT> __q;
|
||||
std::vector<bool> __in_q(_M_nfa.size(), false);
|
||||
auto& __current = this->_M_current;
|
||||
|
||||
for (auto& __it : _M_covered)
|
||||
while (!_M_stack._M_empty())
|
||||
{
|
||||
__in_q[__it.first] = true;
|
||||
__q.push(__it.first);
|
||||
}
|
||||
while (!__q.empty())
|
||||
{
|
||||
auto __u = __q.front();
|
||||
__q.pop();
|
||||
__in_q[__u] = false;
|
||||
auto __u = _M_stack._M_pop();
|
||||
_GLIBCXX_DEBUG_ASSERT(_M_covered.count(__u));
|
||||
const auto& __state = _M_nfa[__u];
|
||||
|
||||
// Can be implemented using method, but there will be too many
|
||||
// arguments. I would use macro function before C++11, but lambda is
|
||||
// a better choice, since hopefully compiler can inline it.
|
||||
auto __add_visited_state = [&](_StateIdT __v)
|
||||
auto __add_visited_state = [=](_StateIdT __v)
|
||||
{
|
||||
if (__v == _S_invalid_state_id)
|
||||
return;
|
||||
if (_M_covered.count(__u) != 0
|
||||
&& (_M_covered.count(__v) == 0
|
||||
|| *_M_covered[__u] < *_M_covered[__v]))
|
||||
if (_M_covered.count(__v) == 0)
|
||||
{
|
||||
_M_covered[__v] =
|
||||
_ResultsPtr(new _ResultsEntry(*_M_covered[__u]));
|
||||
_M_stack._M_push(__v);
|
||||
return;
|
||||
}
|
||||
auto& __cu = _M_covered[__u];
|
||||
auto& __cv = _M_covered[__v];
|
||||
if (*__cu < *__cv)
|
||||
{
|
||||
__cv = _ResultsPtr(new _ResultsEntry(*__cu));
|
||||
// if a state is updated, it's outgoing neighbors should be
|
||||
// reconsidered too. Push them to the queue.
|
||||
if (!__in_q[__v])
|
||||
{
|
||||
__in_q[__v] = true;
|
||||
__q.push(__v);
|
||||
}
|
||||
_M_stack._M_push(__v);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -233,13 +247,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
case _S_opcode_alternative:
|
||||
{
|
||||
__add_visited_state(__state._M_next);
|
||||
auto __back =
|
||||
_M_covered[__u]->_M_quant_keys[__state._M_quant_index];
|
||||
_M_covered[__u]->_M_inc(__state._M_quant_index,
|
||||
__state._M_neg);
|
||||
auto& __cu = *_M_covered[__u];
|
||||
auto __back = __cu._M_quant_keys[__state._M_quant_index];
|
||||
__cu._M_inc(__state._M_quant_index, __state._M_neg);
|
||||
__add_visited_state(__state._M_alt);
|
||||
_M_covered[__u]->_M_quant_keys[__state._M_quant_index]
|
||||
= __back;
|
||||
__cu._M_quant_keys[__state._M_quant_index] = __back;
|
||||
}
|
||||
break;
|
||||
case _S_opcode_subexpr_begin:
|
||||
|
@ -281,6 +293,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
__add_visited_state(__state._M_next);
|
||||
break;
|
||||
case _S_opcode_match:
|
||||
_M_match_stack._M_push(__u);
|
||||
break;
|
||||
case _S_opcode_accept:
|
||||
break;
|
||||
|
@ -296,15 +309,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
_M_move()
|
||||
{
|
||||
decltype(_M_covered) __next;
|
||||
for (auto& __it : _M_covered)
|
||||
while (!_M_match_stack._M_empty())
|
||||
{
|
||||
const auto& __state = _M_nfa[__it.first];
|
||||
if (__state._M_opcode == _S_opcode_match
|
||||
&& __state._M_matches(*this->_M_current))
|
||||
if (__state._M_next != _S_invalid_state_id)
|
||||
if (__next.count(__state._M_next) == 0
|
||||
|| *__it.second < *__next[__state._M_next])
|
||||
__next[__state._M_next] = move(__it.second);
|
||||
auto __u = _M_match_stack._M_pop();
|
||||
const auto& __state = _M_nfa[__u];
|
||||
auto& __cu = _M_covered[__u];
|
||||
if (__state._M_matches(*this->_M_current)
|
||||
&& (__next.count(__state._M_next) == 0
|
||||
|| *__cu < *__next[__state._M_next]))
|
||||
{
|
||||
__next[__state._M_next] = std::move(__cu);
|
||||
_M_stack._M_push(__state._M_next);
|
||||
}
|
||||
}
|
||||
_M_covered = move(__next);
|
||||
}
|
||||
|
@ -314,31 +330,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
|
||||
_M_includes_some()
|
||||
{
|
||||
auto& __s = _M_nfa._M_final_states();
|
||||
auto& __t = _M_covered;
|
||||
bool __succ = false;
|
||||
if (__s.size() > 0 && __t.size() > 0)
|
||||
{
|
||||
auto __first = __s.begin();
|
||||
auto __second = __t.begin();
|
||||
while (__first != __s.end() && __second != __t.end())
|
||||
{
|
||||
if (*__first < __second->first)
|
||||
++__first;
|
||||
else if (*__first > __second->first)
|
||||
++__second;
|
||||
else
|
||||
{
|
||||
if (_M_cur_results == nullptr
|
||||
|| *__second->second < *_M_cur_results)
|
||||
_M_cur_results =
|
||||
_ResultsPtr(new _ResultsEntry(*__second->second));
|
||||
__succ = true;
|
||||
++__first;
|
||||
++__second;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (auto __u : _M_nfa._M_final_states())
|
||||
if (_M_covered.count(__u))
|
||||
{
|
||||
__succ = true;
|
||||
auto& __cu = _M_covered[__u];
|
||||
if (_M_cur_results == nullptr || *__cu < *_M_cur_results)
|
||||
_M_cur_results = _ResultsPtr(new _ResultsEntry(*__cu));
|
||||
}
|
||||
return __succ;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,115 @@
|
|||
// Copyright (C) 2013 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License along
|
||||
// with this library; see the file COPYING3. If not see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
// 2013-10-08 Tim Shen <timshen91@gmail.com>
|
||||
|
||||
#include <testsuite_performance.h>
|
||||
#include <regex>
|
||||
|
||||
using namespace __gnu_test;
|
||||
using namespace std;
|
||||
|
||||
void split(string s)
|
||||
{
|
||||
regex re("\\s+");
|
||||
for (auto it = sregex_token_iterator(s.begin(), s.end(), re, -1);
|
||||
it != sregex_token_iterator();
|
||||
++it)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
string source = "\
|
||||
// Copyright (C) 2013 Free Software Foundation, Inc.\n\
|
||||
//\n\
|
||||
// This file is part of the GNU ISO C++ Library. This library is free\n\
|
||||
// software; you can redistribute it and/or modify it under the\n\
|
||||
// terms of the GNU General Public License as published by the\n\
|
||||
// Free Software Foundation; either version 3, or (at your option)\n\
|
||||
// any later version.\n\
|
||||
\n\
|
||||
// This library is distributed in the hope that it will be useful,\n\
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
|
||||
// GNU General Public License for more details.\n\
|
||||
\n\
|
||||
// You should have received a copy of the GNU General Public License along\n\
|
||||
// with this library; see the file COPYING3. If not see\n\
|
||||
// <http://www.gnu.org/licenses/>.\n\
|
||||
\n\
|
||||
// 2013-10-08 Tim Shen <timshen91@gmail.com>\n\
|
||||
\n\
|
||||
#include <testsuite_performance.h>\n\
|
||||
#include <regex>\n\
|
||||
\n\
|
||||
using namespace __gnu_test;\n\
|
||||
using namespace std;\n\
|
||||
\n\
|
||||
void split(string s)\n\
|
||||
{\n\
|
||||
regex re(\"\\s+\");\n\
|
||||
for (auto it = sregex_token_iterator(s.begin(), s.end(), re, -1);\n\
|
||||
it != sregex_token_iterator();\n\
|
||||
++it)\n\
|
||||
{\n\
|
||||
}\n\
|
||||
}\n\
|
||||
\n\
|
||||
int main()\n\
|
||||
{\n\
|
||||
string source = \"\";\n\
|
||||
time_counter time;\n\
|
||||
resource_counter resource;\n\
|
||||
\n\
|
||||
source = source + source;\n\
|
||||
source = source + source;\n\
|
||||
source = source + source;\n\
|
||||
source = source + source;\n\
|
||||
source = source + source;\n\
|
||||
source = source + source;\n\
|
||||
source = source + source;\n\
|
||||
source = source + source;\n\
|
||||
\n\
|
||||
start_counters(time, resource);\n\
|
||||
split(source);\n\
|
||||
stop_counters(time, resource);\n\
|
||||
report_performance(__FILE__, \"\", time, resource);\n\
|
||||
\n\
|
||||
return 0;\n\
|
||||
}\n";
|
||||
|
||||
time_counter time;
|
||||
resource_counter resource;
|
||||
|
||||
source = source + source;
|
||||
source = source + source;
|
||||
source = source + source;
|
||||
source = source + source;
|
||||
source = source + source;
|
||||
source = source + source;
|
||||
source = source + source;
|
||||
source = source + source;
|
||||
|
||||
start_counters(time, resource);
|
||||
split(source);
|
||||
stop_counters(time, resource);
|
||||
report_performance(__FILE__, "", time, resource);
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue