hash_bytes.cc: New file...

2010-09-13  Matt Austern  <austern@google.com>

	* src/hash_bytes.cc: New file, exports _Hash_bytes (a Murmur hash),
	and _Fnv_hash_bytes (based on a FNV algorithm).
	* src/compatibility-c++0x.cc (hash<string>::operator(),
	hash<const string&>::operator(), hash<wstring>::operator(),
	hash<const wstring&>::operator(), hash<error_code>::operator()):
	Adjust, use _Hash_bytes.
	* include/std/system_error (hash<error_code>::operator()): Likewise.
	* include/std/thread (hash<thread::id>operator()): Likewise.
	* include/std/bitset (hash<bitset>operator()): Likewise.
	* include/bits/basic_string.h (hash<string>::operator(),
	hash<wstring>::operator(), hash<u16string>::operator(),
	hash<u32string>::operator()): Adjust.
	* include/bits/vector.tcc (hash<vector<bool>>::operator()): Adjust.
	* include/bits/functional_hash.h (_Hash_bytes, _Fnv_hash_bytes):
	Declare.
	(struct _Hash_impl, struct _Fnv_hash_impl): Add, use _Hash_bytes
	and _Fnv_hash_bytes, respectively.
	(hash<float>::operator(), hash<double>::operator()): Adjust.
	* config/abi/pre/gnu.ver: Add exports.
	* src/Makefile.am: Add.
	* src/Makefile.in: Regenerate.

From-SVN: r164253
This commit is contained in:
Matt Austern 2010-09-13 18:23:56 +00:00 committed by Paolo Carlini
parent 2ea5ee06a9
commit e7f72940d2
12 changed files with 304 additions and 97 deletions

View File

@ -1,3 +1,27 @@
2010-09-13 Matt Austern <austern@google.com>
* src/hash_bytes.cc: New file, exports _Hash_bytes (a Murmur hash),
and _Fnv_hash_bytes (based on a FNV algorithm).
* src/compatibility-c++0x.cc (hash<string>::operator(),
hash<const string&>::operator(), hash<wstring>::operator(),
hash<const wstring&>::operator(), hash<error_code>::operator()):
Adjust, use _Hash_bytes.
* include/std/system_error (hash<error_code>::operator()): Likewise.
* include/std/thread (hash<thread::id>operator()): Likewise.
* include/std/bitset (hash<bitset>operator()): Likewise.
* include/bits/basic_string.h (hash<string>::operator(),
hash<wstring>::operator(), hash<u16string>::operator(),
hash<u32string>::operator()): Adjust.
* include/bits/vector.tcc (hash<vector<bool>>::operator()): Adjust.
* include/bits/functional_hash.h (_Hash_bytes, _Fnv_hash_bytes):
Declare.
(struct _Hash_impl, struct _Fnv_hash_impl): Add, use _Hash_bytes
and _Fnv_hash_bytes, respectively.
(hash<float>::operator(), hash<double>::operator()): Adjust.
* config/abi/pre/gnu.ver: Add exports.
* src/Makefile.am: Add.
* src/Makefile.in: Regenerate.
2010-09-13 Paolo Carlini <paolo.carlini@oracle.com>
* include/bits/forward_list.h (forward_list<>::resize(size_type,

View File

@ -1178,6 +1178,12 @@ GLIBCXX_3.4.15 {
_ZNSbIwSt11char_traitsIwESaIwEE4backEv;
_ZNKSbIwSt11char_traitsIwESaIwEE4backEv;
# Default function.
_ZSt11_Hash_bytesPKv*;
# FNV hash.
_ZSt15_Fnv_hash_bytesPKv*;
} GLIBCXX_3.4.14;
# Symbols in the support library (libsupc++) have their own tag.

View File

@ -2929,7 +2929,7 @@ _GLIBCXX_BEGIN_NAMESPACE(std)
{
size_t
operator()(const string& __s) const
{ return std::_Fnv_hash::hash(__s.data(), __s.length()); }
{ return std::_Hash_impl::hash(__s.data(), __s.length()); }
};
#ifdef _GLIBCXX_USE_WCHAR_T
@ -2940,8 +2940,8 @@ _GLIBCXX_BEGIN_NAMESPACE(std)
{
size_t
operator()(const wstring& __s) const
{ return std::_Fnv_hash::hash(__s.data(),
__s.length() * sizeof(wchar_t)); }
{ return std::_Hash_impl::hash(__s.data(),
__s.length() * sizeof(wchar_t)); }
};
#endif
#endif /* _GLIBCXX_COMPATIBILITY_CXX0X */
@ -2954,8 +2954,8 @@ _GLIBCXX_BEGIN_NAMESPACE(std)
{
size_t
operator()(const u16string& __s) const
{ return std::_Fnv_hash::hash(__s.data(),
__s.length() * sizeof(char16_t)); }
{ return std::_Hash_impl::hash(__s.data(),
__s.length() * sizeof(char16_t)); }
};
/// std::hash specialization for u32string.
@ -2965,8 +2965,8 @@ _GLIBCXX_BEGIN_NAMESPACE(std)
{
size_t
operator()(const u32string& __s) const
{ return std::_Fnv_hash::hash(__s.data(),
__s.length() * sizeof(char32_t)); }
{ return std::_Hash_impl::hash(__s.data(),
__s.length() * sizeof(char32_t)); }
};
#endif

View File

@ -116,74 +116,55 @@ namespace std
#undef _Cxx_hashtable_define_trivial_hash
// Fowler / Noll / Vo (FNV) Hash (type FNV-1a)
// Hash function implementation for the nontrivial specialization.
// All of them are based on a primitive that hashes a pointer to
// a byte array. The actual hash algorithm is not guaranteed to
// stay the same from release to release -- it may be updated or
// tuned to improve hash quality or speed.
size_t
_Hash_bytes(const void* __ptr, size_t __len, size_t __seed);
// Dummy generic implementation (for sizeof(size_t) != 4, 8).
template<size_t>
struct _Fnv_hash_base
{
template<typename _Tp>
static size_t
hash(const _Tp* __ptr, size_t __clength, size_t __hash = 0)
{
const char* __cptr = reinterpret_cast<const char*>(__ptr);
for (; __clength; --__clength)
__hash = (__hash * 131) + *__cptr++;
return __hash;
}
};
// A similar hash primitive, using the FNV hash algorithm. This
// algorithm is guaranteed to stay the same from release to release.
// (although it might not produce the same values on different machines.)
size_t
_Fnv_hash_bytes(const void* __ptr, size_t __len, size_t __seed);
template<>
struct _Fnv_hash_base<4>
{
template<typename _Tp>
static size_t
hash(const _Tp* __ptr, size_t __clength,
size_t __hash = static_cast<size_t>(2166136261UL))
{
const char* __cptr = reinterpret_cast<const char*>(__ptr);
for (; __clength; --__clength)
{
__hash ^= static_cast<size_t>(*__cptr++);
__hash *= static_cast<size_t>(16777619UL);
}
return __hash;
}
};
template<>
struct _Fnv_hash_base<8>
{
template<typename _Tp>
static size_t
hash(const _Tp* __ptr, size_t __clength,
size_t __hash = static_cast<size_t>(14695981039346656037ULL))
{
const char* __cptr = reinterpret_cast<const char*>(__ptr);
for (; __clength; --__clength)
{
__hash ^= static_cast<size_t>(*__cptr++);
__hash *= static_cast<size_t>(1099511628211ULL);
}
return __hash;
}
};
struct _Hash_impl
{
static size_t
hash(const void* __ptr, size_t __clength,
size_t __seed = static_cast<size_t>(0xc70f6907UL))
{ return _Hash_bytes(__ptr, __clength, __seed); }
struct _Fnv_hash
: public _Fnv_hash_base<sizeof(size_t)>
{
using _Fnv_hash_base<sizeof(size_t)>::hash;
template<typename _Tp>
static size_t
hash(const _Tp& __val)
{ return hash(&__val, sizeof(__val)); }
template<typename _Tp>
static size_t
hash(const _Tp& __val)
{ return hash(&__val, sizeof(__val)); }
template<typename _Tp>
static size_t
__hash_combine(const _Tp& __val, size_t __hash)
{ return hash(&__val, sizeof(__val), __hash); }
};
template<typename _Tp>
static size_t
__hash_combine(const _Tp& __val, size_t __hash)
{ return hash(&__val, sizeof(__val), __hash); }
};
struct _Fnv_hash_impl
{
static size_t
hash(const void* __ptr, size_t __clength,
size_t __seed = static_cast<size_t>(2166136261UL))
{ return _Fnv_hash_bytes(__ptr, __clength, __seed); }
template<typename _Tp>
static size_t
hash(const _Tp& __val)
{ return hash(&__val, sizeof(__val)); }
template<typename _Tp>
static size_t
__hash_combine(const _Tp& __val, size_t __hash)
{ return hash(&__val, sizeof(__val), __hash); }
};
/// Specialization for float.
template<>
@ -191,7 +172,7 @@ namespace std
hash<float>::operator()(float __val) const
{
// 0 and -0 both hash to zero.
return __val != 0.0f ? std::_Fnv_hash::hash(__val) : 0;
return __val != 0.0f ? std::_Hash_impl::hash(__val) : 0;
}
/// Specialization for double.
@ -200,7 +181,7 @@ namespace std
hash<double>::operator()(double __val) const
{
// 0 and -0 both hash to zero.
return __val != 0.0 ? std::_Fnv_hash::hash(__val) : 0;
return __val != 0.0 ? std::_Hash_impl::hash(__val) : 0;
}
/// Specialization for long double.

View File

@ -748,7 +748,7 @@ _GLIBCXX_BEGIN_NAMESPACE(std)
if (__words)
{
const size_t __clength = __words * sizeof(_Bit_type);
__hash = std::_Fnv_hash::hash(__b._M_impl._M_start._M_p, __clength);
__hash = std::_Hash_impl::hash(__b._M_impl._M_start._M_p, __clength);
}
const size_t __extrabits = __b.size() % _S_word_bit;
@ -760,9 +760,9 @@ _GLIBCXX_BEGIN_NAMESPACE(std)
const size_t __clength
= (__extrabits + __CHAR_BIT__ - 1) / __CHAR_BIT__;
if (__words)
__hash = std::_Fnv_hash::hash(&__hiword, __clength, __hash);
__hash = std::_Hash_impl::hash(&__hiword, __clength, __hash);
else
__hash = std::_Fnv_hash::hash(&__hiword, __clength);
__hash = std::_Hash_impl::hash(&__hiword, __clength);
}
return __hash;

View File

@ -1501,7 +1501,7 @@ _GLIBCXX_BEGIN_NAMESPACE(std)
operator()(const _GLIBCXX_STD_D::bitset<_Nb>& __b) const
{
const size_t __clength = (_Nb + __CHAR_BIT__ - 1) / __CHAR_BIT__;
return std::_Fnv_hash::hash(__b._M_getdata(), __clength);
return std::_Hash_impl::hash(__b._M_getdata(), __clength);
}
};

View File

@ -355,8 +355,8 @@ _GLIBCXX_BEGIN_NAMESPACE(std)
size_t
operator()(const error_code& __e) const
{
const size_t __tmp = std::_Fnv_hash::hash(__e._M_value);
return std::_Fnv_hash::__hash_combine(__e._M_cat, __tmp);
const size_t __tmp = std::_Hash_impl::hash(__e._M_value);
return std::_Hash_impl::__hash_combine(__e._M_cat, __tmp);
}
};
@ -367,4 +367,3 @@ _GLIBCXX_END_NAMESPACE
#endif // __GXX_EXPERIMENTAL_CXX0X__
#endif // _GLIBCXX_SYSTEM_ERROR

View File

@ -228,7 +228,7 @@ namespace std
{
size_t
operator()(const thread::id& __id) const
{ return std::_Fnv_hash::hash(__id._M_thread); }
{ return std::_Hash_impl::hash(__id._M_thread); }
};
template<class _CharT, class _Traits>

View File

@ -179,6 +179,7 @@ sources = \
hash_tr1.cc \
hashtable_c++0x.cc \
hashtable_tr1.cc \
hash_bytes.cc \
ios.cc \
ios_failure.cc \
ios_init.cc \
@ -309,6 +310,11 @@ hashtable_c++0x.lo: hashtable_c++0x.cc
hashtable_c++0x.o: hashtable_c++0x.cc
$(CXXCOMPILE) -std=gnu++0x -c $<
hash_bytes.lo: hash_bytes.cc
$(LTCXXCOMPILE) -std=gnu++0x -c $<
hash_bytes.o: hash_bytes.cc
$(CXXCOMPILE) -std=gnu++0x -c $<
limits.lo: limits.cc
$(LTCXXCOMPILE) -std=gnu++0x -c $<
limits.o: limits.cc

View File

@ -100,17 +100,18 @@ am__objects_5 = atomic.lo bitmap_allocator.lo pool_allocator.lo \
compatibility-c++0x.lo compatibility-debug_list.lo \
compatibility-list.lo complex_io.lo ctype.lo debug.lo \
functexcept.lo globals_io.lo hash_c++0x.lo hash_tr1.lo \
hashtable_c++0x.lo hashtable_tr1.lo ios.lo ios_failure.lo \
ios_init.lo ios_locale.lo limits.lo list.lo debug_list.lo \
locale.lo locale_init.lo locale_facets.lo localename.lo \
math_stubs_float.lo math_stubs_long_double.lo stdexcept.lo \
strstream.lo system_error.lo tree.lo allocator-inst.lo \
concept-inst.lo fstream-inst.lo ext-inst.lo ios-inst.lo \
iostream-inst.lo istream-inst.lo istream.lo locale-inst.lo \
misc-inst.lo ostream-inst.lo sstream-inst.lo streambuf-inst.lo \
streambuf.lo string-inst.lo valarray-inst.lo wlocale-inst.lo \
wstring-inst.lo mutex.lo condition_variable.lo chrono.lo \
thread.lo future.lo $(am__objects_1) $(am__objects_4)
hashtable_c++0x.lo hashtable_tr1.lo hash_bytes.lo ios.lo \
ios_failure.lo ios_init.lo ios_locale.lo limits.lo list.lo \
debug_list.lo locale.lo locale_init.lo locale_facets.lo \
localename.lo math_stubs_float.lo math_stubs_long_double.lo \
stdexcept.lo strstream.lo system_error.lo tree.lo \
allocator-inst.lo concept-inst.lo fstream-inst.lo ext-inst.lo \
ios-inst.lo iostream-inst.lo istream-inst.lo istream.lo \
locale-inst.lo misc-inst.lo ostream-inst.lo sstream-inst.lo \
streambuf-inst.lo streambuf.lo string-inst.lo valarray-inst.lo \
wlocale-inst.lo wstring-inst.lo mutex.lo condition_variable.lo \
chrono.lo thread.lo future.lo $(am__objects_1) \
$(am__objects_4)
am_libstdc___la_OBJECTS = $(am__objects_5)
libstdc___la_OBJECTS = $(am_libstdc___la_OBJECTS)
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
@ -383,6 +384,7 @@ sources = \
hash_tr1.cc \
hashtable_c++0x.cc \
hashtable_tr1.cc \
hash_bytes.cc \
ios.cc \
ios_failure.cc \
ios_init.cc \
@ -885,6 +887,11 @@ hashtable_c++0x.lo: hashtable_c++0x.cc
hashtable_c++0x.o: hashtable_c++0x.cc
$(CXXCOMPILE) -std=gnu++0x -c $<
hash_bytes.lo: hash_bytes.cc
$(LTCXXCOMPILE) -std=gnu++0x -c $<
hash_bytes.o: hash_bytes.cc
$(CXXCOMPILE) -std=gnu++0x -c $<
limits.lo: limits.cc
$(LTCXXCOMPILE) -std=gnu++0x -c $<
limits.o: limits.cc

View File

@ -54,23 +54,23 @@ namespace std
template<>
size_t
hash<string>::operator()(string __s) const
{ return _Fnv_hash::hash(__s.data(), __s.length()); }
{ return _Hash_impl::hash(__s.data(), __s.length()); }
template<>
size_t
hash<const string&>::operator()(const string& __s) const
{ return _Fnv_hash::hash(__s.data(), __s.length()); }
{ return _Hash_impl::hash(__s.data(), __s.length()); }
#ifdef _GLIBCXX_USE_WCHAR_T
template<>
size_t
hash<wstring>::operator()(wstring __s) const
{ return _Fnv_hash::hash(__s.data(), __s.length() * sizeof(wchar_t)); }
{ return _Hash_impl::hash(__s.data(), __s.length() * sizeof(wchar_t)); }
template<>
size_t
hash<const wstring&>::operator()(const wstring& __s) const
{ return _Fnv_hash::hash(__s.data(), __s.length() * sizeof(wchar_t)); }
{ return _Hash_impl::hash(__s.data(), __s.length() * sizeof(wchar_t)); }
#endif
#endif
@ -78,7 +78,7 @@ namespace std
size_t
hash<error_code>::operator()(error_code __e) const
{
const size_t __tmp = std::_Fnv_hash::hash(__e._M_value);
return std::_Fnv_hash::__hash_combine(__e._M_cat, __tmp);
const size_t __tmp = std::_Hash_impl::hash(__e._M_value);
return std::_Hash_impl::__hash_combine(__e._M_cat, __tmp);
}
}

View File

@ -0,0 +1,184 @@
// Definition of _Hash_bytes. -*- C++ -*-
// Copyright (C) 2010 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// Under Section 7 of GPL version 3, you are granted additional
// permissions described in the GCC Runtime Library Exception, version
// 3.1, as published by the Free Software Foundation.
// You should have received a copy of the GNU General Public License and
// a copy of the GCC Runtime Library Exception along with this program;
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
// <http://www.gnu.org/licenses/>.
// This file defines Hash_bytes, a primitive used for defining hash
// functions. Based on public domain MurmurHashUnaligned2, by Austin
// Appleby. http://murmurhash.googlepages.com/
// This file also defines _Fnv_hash_bytes, another primitive with
// exactly the same interface but using a different hash algorithm,
// Fowler / Noll / Vo (FNV) Hash (type FNV-1a). The Murmur hash
// function apears to be better in both speed and hash quality, and
// FNV is provided primarily for backward compatibility.
#include <cstring>
#include <bits/functional_hash.h>
namespace
{
inline std::size_t
unaligned_load(const char* p)
{
std::size_t result;
std::memcpy(&result, p, sizeof(result));
return result;
}
// Loads n bytes, where 1 <= n < 8.
inline std::size_t
load_bytes(const char* p, int n)
{
size_t result = 0;
--n;
do
result = (result << 8) + static_cast<unsigned char>(p[n]);
while (--n >= 0);
return result;
}
inline std::size_t
shift_mix(std::size_t v)
{ return v ^ (v >> 47);}
}
namespace std
{
#if __SIZEOF_SIZE_T__ == 4
// Implementation of Murmur hash for 32-bit size_t.
size_t
_Hash_bytes(const void* ptr, size_t len, size_t seed)
{
const size_t m = 0x5bd1e995;
size_t hash = seed ^ len;
const char* buf = static_cast<const char*>(ptr);
// Mix 4 bytes at a time into the hash.
while(len >= 4)
{
size_t k = unaligned_load(buf);
k *= m;
k ^= k >> 24;
k *= m;
hash *= m;
hash ^= k;
buf += 4;
len -= 4;
}
// Handle the last few bytes of the input array.
switch(len)
{
case 3:
hash ^= static_cast<unsigned char>(buf[2]) << 16;
case 2:
hash ^= static_cast<unsigned char>(buf[1]) << 8;
case 1:
hash ^= static_cast<unsigned char>(buf[0]);
hash *= m;
};
// Do a few final mixes of the hash.
hash ^= hash >> 13;
hash *= m;
hash ^= hash >> 15;
return hash;
}
// Implementation of FNV hash for 32-bit size_t.
size_t
_Fnv_hash_bytes(const void* ptr, size_t len, size_t hash)
{
const char* cptr = static_cast<const char*>(ptr);
for (; len; --len)
{
hash ^= static_cast<size_t>(*cptr++);
hash *= static_cast<size_t>(16777619UL);
}
return hash;
}
#elif __SIZEOF_SIZE_T__ == 8
// Implementation of Murmur hash for 64-bit size_t.
size_t
_Hash_bytes(const void* ptr, size_t len, size_t seed)
{
static const size_t mul = (0xc6a4a793UL << 32UL) + 0x5bd1e995UL;
const char* const buf = static_cast<const char*>(ptr);
// Remove the bytes not divisible by the sizeof(size_t). This
// allows the main loop to process the data as 64-bit integers.
const int len_aligned = len & ~0x7;
const char* const end = buf + len_aligned;
size_t hash = seed ^ (len * mul);
for (const char* p = buf; p != end; p += 8)
{
const size_t data = shift_mix(unaligned_load(p) * mul) * mul;
hash ^= data;
hash *= mul;
}
if ((len & 0x7) != 0)
{
const size_t data = load_bytes(end, len & 0x7);
hash ^= data;
hash *= mul;
}
hash = shift_mix(hash) * mul;
hash = shift_mix(hash);
return hash;
}
// Implementation of FNV hash for 64-bit size_t.
size_t
_Fnv_hash_bytes(const void* ptr, size_t len, size_t hash)
{
const char* cptr = static_cast<const char*>(ptr);
for (; len; --len)
{
hash ^= static_cast<size_t>(*cptr++);
hash *= static_cast<size_t>(1099511628211ULL);
}
return hash;
}
#else
// Dummy hash implementation for unusual sizeof(size_t).
size_t
_Hash_bytes(const void* ptr, size_t len, size_t seed)
{
size_t hash = seed;
const char* cptr = reinterpret_cast<const char*>(ptr);
for (; clength; --clength)
hash = (hash * 131) + *cptr++;
return hash;
}
size_t
_Fnv_hash_bytes(const void* ptr, size_t len, size_t seed)
{ return _Hash_bytes(ptr, len, seed); }
#endif /* __SIZEOF_SIZE_T__ */
}