Implement C++17 string searchers.

* include/std/functional: (unordered_map, vector): New includes
	in C++17 mode.
	(array, bits/stl_algo.h): Likewise.
	(default_searcher, __boyer_moore_map_base): New.
	(__boyer_moore_array_base, __is_std_equal_to): Likewise.
	(__boyer_moore_base_t, boyer_moore_searcher): Likewise.
	(boyer_moore_horspool_searcher, make_default_searcher): Likewise.
	(make_boyer_moore_searcher): Likewise.
	(make_boyer_moore_horspool_searcher): Likewise.
	* testsuite/20_util/function_objects/searchers.cc: New.

From-SVN: r240093
This commit is contained in:
Ville Voutilainen 2016-09-12 18:48:07 +03:00 committed by Ville Voutilainen
parent 8f3675f855
commit f82dfb8d4e
3 changed files with 461 additions and 0 deletions

View File

@ -1,3 +1,17 @@
2016-09-12 Ville Voutilainen <ville.voutilainen@gmail.com>
Implement C++17 string searchers.
* include/std/functional: (unordered_map, vector): New includes
in C++17 mode.
(array, bits/stl_algo.h): Likewise.
(default_searcher, __boyer_moore_map_base): New.
(__boyer_moore_array_base, __is_std_equal_to): Likewise.
(__boyer_moore_base_t, boyer_moore_searcher): Likewise.
(boyer_moore_horspool_searcher, make_default_searcher): Likewise.
(make_boyer_moore_searcher): Likewise.
(make_boyer_moore_horspool_searcher): Likewise.
* testsuite/20_util/function_objects/searchers.cc: New.
2016-09-12 Matthew Wahab <matthew.wahab@arm.com>
* testsuite/22_locale/codecvt/codecvt_utf16/requirements/1.cc:

View File

@ -58,6 +58,13 @@
#include <bits/functional_hash.h>
#include <bits/invoke.h>
#if __cplusplus > 201402L
#include <unordered_map>
#include <vector>
#include <array>
#include <bits/stl_algo.h>
#endif
namespace std _GLIBCXX_VISIBILITY(default)
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
@ -2197,6 +2204,308 @@ _GLIBCXX_MEM_FN_TRAITS(&&, false_type, true_type)
return _Not_fn<std::decay_t<_Fn>>{std::forward<_Fn>(__fn)};
}
// Searchers
template<typename _ForwardIterator1, typename _BinaryPredicate = equal_to<>>
class default_searcher
{
public:
default_searcher(_ForwardIterator1 __pat_first,
_ForwardIterator1 __pat_last,
_BinaryPredicate __pred = _BinaryPredicate())
: _M_m(__pat_first, __pat_last, std::move(__pred))
{ }
template<typename _ForwardIterator2>
_ForwardIterator2
operator()(_ForwardIterator2 __first, _ForwardIterator2 __last) const
{
return std::search(__first, __last,
std::get<0>(_M_m), std::get<1>(_M_m),
std::get<2>(_M_m));
}
private:
std::tuple<_ForwardIterator1, _ForwardIterator1, _BinaryPredicate> _M_m;
};
template<typename _Key, typename _Tp, typename _Hash, typename _Pred>
struct __boyer_moore_map_base
{
template<typename _RAIter>
__boyer_moore_map_base(_RAIter __pat, size_t __patlen,
_Hash&& __hf, _Pred&& __pred)
: _M_bad_char{ __patlen, std::move(__hf), std::move(__pred) }
{
if (__patlen > 0)
for (__diff_type __i = 0; __i < __patlen - 1; ++__i)
_M_bad_char[__pat[__i]] = __patlen - 1 - __i;
}
using __diff_type = _Tp;
__diff_type
_M_lookup(_Key __key, __diff_type __not_found) const
{
auto __iter = _M_bad_char.find(__key);
if (__iter == _M_bad_char.end())
return __not_found;
return __iter->second;
}
_Pred
_M_pred() const { return _M_bad_char.key_eq(); }
std::unordered_map<_Key, _Tp, _Hash, _Pred> _M_bad_char;
};
template<typename _Tp, size_t _Len, typename _Pred>
struct __boyer_moore_array_base
{
template<typename _RAIter, typename _Unused>
__boyer_moore_array_base(_RAIter __pat, size_t __patlen,
_Unused&&, _Pred&& __pred)
: _M_bad_char{ std::array<_Tp, _Len>{}, std::move(__pred) }
{
std::get<0>(_M_bad_char).fill(__patlen);
if (__patlen > 0)
for (__diff_type __i = 0; __i < __patlen - 1; ++__i)
{
auto __ch = __pat[__i];
using _UCh = std::make_unsigned_t<decltype(__ch)>;
auto __uch = static_cast<_UCh>(__ch);
std::get<0>(_M_bad_char)[__uch] = __patlen - 1 - __i;
}
}
using __diff_type = _Tp;
template<typename _Key>
__diff_type
_M_lookup(_Key __key, __diff_type __not_found) const
{
auto __ukey = static_cast<std::make_unsigned_t<_Key>>(__key);
if (__ukey >= _Len)
return __not_found;
return std::get<0>(_M_bad_char)[__ukey];
}
const _Pred&
_M_pred() const { return std::get<1>(_M_bad_char); }
std::tuple<std::array<_Tp, _Len>, _Pred> _M_bad_char;
};
template<typename _Pred>
struct __is_std_equal_to : std::false_type { };
template<>
struct __is_std_equal_to<std::equal_to<void>> : std::true_type { };
// Use __boyer_moore_array_base when pattern consists of narrow characters
// and uses std::equal_to as the predicate.
template<typename _RAIter, typename _Hash, typename _Pred,
typename _Val = typename iterator_traits<_RAIter>::value_type,
typename _Diff = typename iterator_traits<_RAIter>::difference_type>
using __boyer_moore_base_t
= std::conditional_t<sizeof(_Val) == 1 && is_integral<_Val>::value
&& __is_std_equal_to<_Pred>::value,
__boyer_moore_array_base<_Diff, 256, _Pred>,
__boyer_moore_map_base<_Val, _Diff, _Hash, _Pred>>;
template<typename _RAIter, typename _Hash
= std::hash<typename std::iterator_traits<_RAIter>::value_type>,
typename _BinaryPredicate = std::equal_to<>>
class boyer_moore_searcher
: __boyer_moore_base_t<_RAIter, _Hash, _BinaryPredicate>
{
using _Base = __boyer_moore_base_t<_RAIter, _Hash, _BinaryPredicate>;
using typename _Base::__diff_type;
public:
boyer_moore_searcher(_RAIter __pat_first, _RAIter __pat_last,
_Hash __hf = _Hash(),
_BinaryPredicate __pred = _BinaryPredicate());
template<typename _RandomAccessIterator2>
_RandomAccessIterator2
operator()(_RandomAccessIterator2 __first,
_RandomAccessIterator2 __last) const;
private:
bool
_M_is_prefix(_RAIter __word, __diff_type __len,
__diff_type __pos)
{
const auto& __pred = this->_M_pred();
__diff_type __suffixlen = __len - __pos;
for (__diff_type __i = 0; __i < __suffixlen; ++__i)
if (!__pred(__word[__i], __word[__pos + __i]))
return false;
return true;
}
__diff_type
_M_suffix_length(_RAIter __word, __diff_type __len,
__diff_type __pos)
{
const auto& __pred = this->_M_pred();
__diff_type __i = 0;
while (__pred(__word[__pos - __i], __word[__len - 1 - __i])
&& __i < __pos)
{
++__i;
}
return __i;
}
template<typename _Tp>
__diff_type
_M_bad_char_shift(_Tp __c) const
{ return this->_M_lookup(__c, _M_pat_end - _M_pat); }
_RAIter _M_pat;
_RAIter _M_pat_end;
std::vector<__diff_type> _M_good_suffix;
};
template<typename _RAIter, typename _Hash
= std::hash<typename std::iterator_traits<_RAIter>::value_type>,
typename _BinaryPredicate = std::equal_to<>>
class boyer_moore_horspool_searcher
: __boyer_moore_base_t<_RAIter, _Hash, _BinaryPredicate>
{
using _Base = __boyer_moore_base_t<_RAIter, _Hash, _BinaryPredicate>;
using typename _Base::__diff_type;
public:
boyer_moore_horspool_searcher(_RAIter __pat,
_RAIter __pat_end,
_Hash __hf = _Hash(),
_BinaryPredicate __pred
= _BinaryPredicate())
: _Base(__pat, __pat_end - __pat, std::move(__hf), std::move(__pred)),
_M_pat(__pat), _M_pat_end(__pat_end)
{ }
template<typename _RandomAccessIterator2>
_RandomAccessIterator2
operator()(_RandomAccessIterator2 __first,
_RandomAccessIterator2 __last) const
{
const auto& __pred = this->_M_pred();
auto __patlen = _M_pat_end - _M_pat;
if (__patlen == 0)
return __first;
auto __len = __last - __first;
while (__len >= __patlen)
{
for (auto __scan = __patlen - 1;
__pred(__first[__scan], _M_pat[__scan]); --__scan)
if (__scan == 0)
return __first;
auto __shift = _M_bad_char_shift(__first[__patlen - 1]);
__len -= __shift;
__first += __shift;
}
return __last;
}
private:
template<typename _Tp>
__diff_type
_M_bad_char_shift(_Tp __c) const
{ return this->_M_lookup(__c, _M_pat_end - _M_pat); }
_RAIter _M_pat;
_RAIter _M_pat_end;
};
/// Generator function for default_searcher
template<typename _ForwardIterator,
typename _BinaryPredicate = std::equal_to<>>
inline default_searcher<_ForwardIterator, _BinaryPredicate>
make_default_searcher(_ForwardIterator __pat_first,
_ForwardIterator __pat_last,
_BinaryPredicate __pred = _BinaryPredicate())
{ return { __pat_first, __pat_last, __pred }; }
/// Generator function for boyer_moore_searcher
template<typename _RAIter, typename _Hash
= std::hash<typename std::iterator_traits<_RAIter>::value_type>,
typename _BinaryPredicate = equal_to<>>
inline boyer_moore_searcher<_RAIter, _Hash, _BinaryPredicate>
make_boyer_moore_searcher(_RAIter __pat_first, _RAIter __pat_last,
_Hash __hf = _Hash(),
_BinaryPredicate __pred = _BinaryPredicate())
{ return { __pat_first, __pat_last, std::move(__hf), std::move(__pred) }; }
/// Generator function for boyer_moore_horspool_searcher
template<typename _RAIter, typename _Hash
= std::hash<typename std::iterator_traits<_RAIter>::value_type>,
typename _BinaryPredicate = equal_to<>>
inline boyer_moore_horspool_searcher<_RAIter, _Hash, _BinaryPredicate>
make_boyer_moore_horspool_searcher(_RAIter __pat_first, _RAIter __pat_last,
_Hash __hf = _Hash(),
_BinaryPredicate __pred
= _BinaryPredicate())
{ return { __pat_first, __pat_last, std::move(__hf), std::move(__pred) }; }
template<typename _RAIter, typename _Hash, typename _BinaryPredicate>
boyer_moore_searcher<_RAIter, _Hash, _BinaryPredicate>::
boyer_moore_searcher(_RAIter __pat, _RAIter __pat_end,
_Hash __hf, _BinaryPredicate __pred)
: _Base(__pat, __pat_end - __pat, std::move(__hf), std::move(__pred)),
_M_pat(__pat), _M_pat_end(__pat_end), _M_good_suffix(__pat_end - __pat)
{
auto __patlen = __pat_end - __pat;
if (__patlen == 0)
return;
__diff_type __last_prefix = __patlen - 1;
for (__diff_type __p = __patlen - 1; __p >= 0; --__p)
{
if (_M_is_prefix(__pat, __patlen, __p + 1))
__last_prefix = __p + 1;
_M_good_suffix[__p] = __last_prefix + (__patlen - 1 - __p);
}
for (__diff_type __p = 0; __p < __patlen - 1; ++__p)
{
auto __slen = _M_suffix_length(__pat, __patlen, __p);
auto __pos = __patlen - 1 - __slen;
if (!__pred(__pat[__p - __slen], __pat[__pos]))
_M_good_suffix[__pos] = __patlen - 1 - __p + __slen;
}
}
template<typename _RAIter, typename _Hash, typename _BinaryPredicate>
template<typename _RandomAccessIterator2>
_RandomAccessIterator2
boyer_moore_searcher<_RAIter, _Hash, _BinaryPredicate>::
operator()(_RandomAccessIterator2 __first,
_RandomAccessIterator2 __last) const
{
auto __patlen = _M_pat_end - _M_pat;
if (__patlen == 0)
return __first;
const auto& __pred = this->_M_pred();
__diff_type __i = __patlen - 1;
auto __stringlen = __last - __first;
while (__i < __stringlen)
{
__diff_type __j = __patlen - 1;
while (__j >= 0 && __pred(__first[__i], _M_pat[__j]))
{
--__i;
--__j;
}
if (__j < 0)
return __first + __i + 1;
__i += std::max(_M_bad_char_shift(__first[__i]),
_M_good_suffix[__j]);
}
return __last;
}
#endif
_GLIBCXX_END_NAMESPACE_VERSION

View File

@ -0,0 +1,138 @@
// Copyright (C) 2014-2016 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
// { dg-options "-std=gnu++17" }
#include <functional>
#include <cstring>
#ifdef _GLIBCXX_USE_WCHAR_T
# include <cwchar>
#endif
#include <algorithm>
#include <testsuite_hooks.h>
using std::make_default_searcher;
using std::make_boyer_moore_searcher;
using std::make_boyer_moore_horspool_searcher;
void
test01()
{
const char s[] = { 'a', (char)-97, 'a', '\0' };
const char* needles[] = {
s, "", "a", "aa", "aaa", "ab", "cd", "abcd", "abcdabcd", "abcabcd"
};
const char* haystacks[] = {
s, "", "a", "aa", "aaa", "ab", "cd", "abcd", "abcdabcd", "abcabcd",
"aaaaaaa", "aabaa", "aaacab", "cdabcdab", "abcdabcd", "xyzabcdxyz"
};
for (auto n : needles)
{
auto ne = n + std::strlen(n);
auto d = make_default_searcher(n, ne);
auto bm = make_boyer_moore_searcher(n, ne);
auto bmh = make_boyer_moore_horspool_searcher(n, ne);
for (auto h : haystacks)
{
auto he = h + std::strlen(h);
auto res = std::search(h, he, n, ne);
auto d_res = d(h, he);
VERIFY( d_res == res );
auto bm_res = bm(h, he);
VERIFY( bm_res == res );
auto bmh_res = bmh(h, he);
VERIFY( bmh_res == res );
}
}
}
void
test02()
{
#ifdef _GLIBCXX_USE_WCHAR_T
const wchar_t s[] = { L'a', (wchar_t)-97, L'a', L'\0' };
const wchar_t* needles[] = {
s, L"", L"a", L"aa", L"aaa", L"ab", L"cd", L"abcd", L"abcdabcd", L"abcabcd"
};
const wchar_t* haystacks[] = {
s, L"", L"a", L"aa", L"aaa", L"ab", L"cd", L"abcd", L"abcdabcd", L"abcabcd",
L"aaaaaaa", L"aabaa", L"aaacab", L"cdabcdab", L"abcdabcd", L"xyzabcdxyz"
};
for (auto n : needles)
{
auto ne = n + std::wcslen(n);
auto d = make_default_searcher(n, ne);
auto bm = make_boyer_moore_searcher(n, ne);
auto bmh = make_boyer_moore_horspool_searcher(n, ne);
for (auto h : haystacks)
{
auto he = h + std::wcslen(h);
auto res = std::search(h, he, n, ne);
auto d_res = d(h, he);
VERIFY( d_res == res );
auto bm_res = bm(h, he);
VERIFY( bm_res == res );
auto bmh_res = bmh(h, he);
VERIFY( bmh_res == res );
}
}
#endif
}
void
test03()
{
// custom predicate
struct
{
static unsigned char
norm(unsigned char c) { return std::isalnum(c) ? c : '#'; }
// equality
bool operator()(char l, char r) const { return norm(l) == norm(r); }
// hash
std::size_t operator()(char c) const { return std::hash<char>{}(norm(c)); }
} eq;
const char* needle = " foo 123 ";
const char* haystack = "*****foo*123******";
const char* ne = needle + std::strlen(needle);
const char* he = haystack + std::strlen(haystack);
auto d = make_default_searcher(needle, ne, eq);
auto bm = make_boyer_moore_searcher(needle, ne, eq, eq);
auto bmh = make_boyer_moore_horspool_searcher(needle, ne, eq, eq);
auto res = std::search(haystack, he, needle, ne, eq);
auto d_res = d(haystack, he);
VERIFY( d_res == res );
auto bm_res = bm(haystack, he);
VERIFY( bm_res == res );
auto bmh_res = bmh(haystack, he);
VERIFY( bmh_res == res );
}
int
main()
{
test01();
test02();
test03();
}