Small improvement to SipHasher
This commit is contained in:
parent
7bccb829d0
commit
a319d13a9b
|
@ -14,6 +14,8 @@
|
||||||
|
|
||||||
use marker::PhantomData;
|
use marker::PhantomData;
|
||||||
use ptr;
|
use ptr;
|
||||||
|
use cmp;
|
||||||
|
use mem;
|
||||||
|
|
||||||
/// An implementation of SipHash 1-3.
|
/// An implementation of SipHash 1-3.
|
||||||
///
|
///
|
||||||
|
@ -78,45 +80,6 @@ struct State {
|
||||||
v3: u64,
|
v3: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
// sadly, these macro definitions can't appear later,
|
|
||||||
// because they're needed in the following defs;
|
|
||||||
// this design could be improved.
|
|
||||||
|
|
||||||
macro_rules! u8to64_le {
|
|
||||||
($buf:expr, $i:expr) =>
|
|
||||||
($buf[0+$i] as u64 |
|
|
||||||
($buf[1+$i] as u64) << 8 |
|
|
||||||
($buf[2+$i] as u64) << 16 |
|
|
||||||
($buf[3+$i] as u64) << 24 |
|
|
||||||
($buf[4+$i] as u64) << 32 |
|
|
||||||
($buf[5+$i] as u64) << 40 |
|
|
||||||
($buf[6+$i] as u64) << 48 |
|
|
||||||
($buf[7+$i] as u64) << 56);
|
|
||||||
($buf:expr, $i:expr, $len:expr) =>
|
|
||||||
({
|
|
||||||
let mut t = 0;
|
|
||||||
let mut out = 0;
|
|
||||||
while t < $len {
|
|
||||||
out |= ($buf[t+$i] as u64) << t*8;
|
|
||||||
t += 1;
|
|
||||||
}
|
|
||||||
out
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Load a full u64 word from a byte stream, in LE order. Use
|
|
||||||
/// `copy_nonoverlapping` to let the compiler generate the most efficient way
|
|
||||||
/// to load u64 from a possibly unaligned address.
|
|
||||||
///
|
|
||||||
/// Unsafe because: unchecked indexing at i..i+8
|
|
||||||
#[inline]
|
|
||||||
unsafe fn load_u64_le(buf: &[u8], i: usize) -> u64 {
|
|
||||||
debug_assert!(i + 8 <= buf.len());
|
|
||||||
let mut data = 0u64;
|
|
||||||
ptr::copy_nonoverlapping(buf.get_unchecked(i), &mut data as *mut _ as *mut u8, 8);
|
|
||||||
data.to_le()
|
|
||||||
}
|
|
||||||
|
|
||||||
macro_rules! compress {
|
macro_rules! compress {
|
||||||
($state:expr) => ({
|
($state:expr) => ({
|
||||||
compress!($state.v0, $state.v1, $state.v2, $state.v3)
|
compress!($state.v0, $state.v1, $state.v2, $state.v3)
|
||||||
|
@ -132,6 +95,47 @@ macro_rules! compress {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Load an integer of the desired type from a byte stream, in LE order. Uses
|
||||||
|
/// `copy_nonoverlapping` to let the compiler generate the most efficient way
|
||||||
|
/// to load it from a possibly unaligned address.
|
||||||
|
///
|
||||||
|
/// Unsafe because: unchecked indexing at i..i+size_of(int_ty)
|
||||||
|
macro_rules! load_int_le {
|
||||||
|
($buf:expr, $i:expr, $int_ty:ident) =>
|
||||||
|
({
|
||||||
|
debug_assert!($i + mem::size_of::<$int_ty>() <= $buf.len());
|
||||||
|
let mut data = 0 as $int_ty;
|
||||||
|
ptr::copy_nonoverlapping($buf.get_unchecked($i),
|
||||||
|
&mut data as *mut _ as *mut u8,
|
||||||
|
mem::size_of::<$int_ty>());
|
||||||
|
data.to_le()
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Load an u64 using up to 7 bytes of a byte slice.
|
||||||
|
///
|
||||||
|
/// Unsafe because: unchecked indexing at start..start+len
|
||||||
|
#[inline]
|
||||||
|
unsafe fn u8to64_le(buf: &[u8], start: usize, len: usize) -> u64 {
|
||||||
|
debug_assert!(len < 8);
|
||||||
|
let mut i = 0; // current byte index (from LSB) in the output u64
|
||||||
|
let mut out = 0;
|
||||||
|
if i + 3 < len {
|
||||||
|
out = load_int_le!(buf, start + i, u32) as u64;
|
||||||
|
i += 4;
|
||||||
|
}
|
||||||
|
if i + 1 < len {
|
||||||
|
out |= (load_int_le!(buf, start + i, u16) as u64) << (i * 8);
|
||||||
|
i += 2
|
||||||
|
}
|
||||||
|
if i < len {
|
||||||
|
out |= (*buf.get_unchecked(start + i) as u64) << (i * 8);
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
debug_assert_eq!(i, len);
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
impl SipHasher {
|
impl SipHasher {
|
||||||
/// Creates a new `SipHasher` with the two initial keys set to 0.
|
/// Creates a new `SipHasher` with the two initial keys set to 0.
|
||||||
#[inline]
|
#[inline]
|
||||||
|
@ -220,6 +224,37 @@ impl<S: Sip> Hasher<S> {
|
||||||
self.state.v3 = self.k1 ^ 0x7465646279746573;
|
self.state.v3 = self.k1 ^ 0x7465646279746573;
|
||||||
self.ntail = 0;
|
self.ntail = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Specialized write function that is only valid for buffers with len <= 8.
|
||||||
|
// It's used to force inlining of write_u8 and write_usize, those would normally be inlined
|
||||||
|
// except for composite types (that includes slices and str hashing because of delimiter).
|
||||||
|
// Without this extra push the compiler is very reluctant to inline delimiter writes,
|
||||||
|
// degrading performance substantially for the most common use cases.
|
||||||
|
#[inline(always)]
|
||||||
|
fn short_write(&mut self, msg: &[u8]) {
|
||||||
|
debug_assert!(msg.len() <= 8);
|
||||||
|
let length = msg.len();
|
||||||
|
self.length += length;
|
||||||
|
|
||||||
|
let needed = 8 - self.ntail;
|
||||||
|
let fill = cmp::min(length, needed);
|
||||||
|
if fill == 8 {
|
||||||
|
self.tail = unsafe { load_int_le!(msg, 0, u64) };
|
||||||
|
} else {
|
||||||
|
self.tail |= unsafe { u8to64_le(msg, 0, fill) } << (8 * self.ntail);
|
||||||
|
if length < needed {
|
||||||
|
self.ntail += length;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.state.v3 ^= self.tail;
|
||||||
|
S::c_rounds(&mut self.state);
|
||||||
|
self.state.v0 ^= self.tail;
|
||||||
|
|
||||||
|
// Buffered tail is now flushed, process new input.
|
||||||
|
self.ntail = length - needed;
|
||||||
|
self.tail = unsafe { u8to64_le(msg, needed, self.ntail) };
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
|
@ -262,6 +297,21 @@ impl super::Hasher for SipHasher24 {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<S: Sip> super::Hasher for Hasher<S> {
|
impl<S: Sip> super::Hasher for Hasher<S> {
|
||||||
|
// see short_write comment for explanation
|
||||||
|
#[inline]
|
||||||
|
fn write_usize(&mut self, i: usize) {
|
||||||
|
let bytes = unsafe {
|
||||||
|
::slice::from_raw_parts(&i as *const usize as *const u8, mem::size_of::<usize>())
|
||||||
|
};
|
||||||
|
self.short_write(bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
// see short_write comment for explanation
|
||||||
|
#[inline]
|
||||||
|
fn write_u8(&mut self, i: u8) {
|
||||||
|
self.short_write(&[i]);
|
||||||
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn write(&mut self, msg: &[u8]) {
|
fn write(&mut self, msg: &[u8]) {
|
||||||
let length = msg.len();
|
let length = msg.len();
|
||||||
|
@ -271,19 +321,16 @@ impl<S: Sip> super::Hasher for Hasher<S> {
|
||||||
|
|
||||||
if self.ntail != 0 {
|
if self.ntail != 0 {
|
||||||
needed = 8 - self.ntail;
|
needed = 8 - self.ntail;
|
||||||
|
self.tail |= unsafe { u8to64_le(msg, 0, cmp::min(length, needed)) } << 8 * self.ntail;
|
||||||
if length < needed {
|
if length < needed {
|
||||||
self.tail |= u8to64_le!(msg, 0, length) << 8 * self.ntail;
|
|
||||||
self.ntail += length;
|
self.ntail += length;
|
||||||
return
|
return
|
||||||
|
} else {
|
||||||
|
self.state.v3 ^= self.tail;
|
||||||
|
S::c_rounds(&mut self.state);
|
||||||
|
self.state.v0 ^= self.tail;
|
||||||
|
self.ntail = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
let m = self.tail | u8to64_le!(msg, 0, needed) << 8 * self.ntail;
|
|
||||||
|
|
||||||
self.state.v3 ^= m;
|
|
||||||
S::c_rounds(&mut self.state);
|
|
||||||
self.state.v0 ^= m;
|
|
||||||
|
|
||||||
self.ntail = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Buffered tail is now flushed, process new input.
|
// Buffered tail is now flushed, process new input.
|
||||||
|
@ -292,7 +339,7 @@ impl<S: Sip> super::Hasher for Hasher<S> {
|
||||||
|
|
||||||
let mut i = needed;
|
let mut i = needed;
|
||||||
while i < len - left {
|
while i < len - left {
|
||||||
let mi = unsafe { load_u64_le(msg, i) };
|
let mi = unsafe { load_int_le!(msg, i, u64) };
|
||||||
|
|
||||||
self.state.v3 ^= mi;
|
self.state.v3 ^= mi;
|
||||||
S::c_rounds(&mut self.state);
|
S::c_rounds(&mut self.state);
|
||||||
|
@ -301,7 +348,7 @@ impl<S: Sip> super::Hasher for Hasher<S> {
|
||||||
i += 8;
|
i += 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
self.tail = u8to64_le!(msg, i, left);
|
self.tail = unsafe { u8to64_le(msg, i, left) };
|
||||||
self.ntail = left;
|
self.ntail = left;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -14,6 +14,7 @@ use test::{Bencher, black_box};
|
||||||
|
|
||||||
use core::hash::{Hash, Hasher};
|
use core::hash::{Hash, Hasher};
|
||||||
use core::hash::{SipHasher, SipHasher13, SipHasher24};
|
use core::hash::{SipHasher, SipHasher13, SipHasher24};
|
||||||
|
use core::{slice, mem};
|
||||||
|
|
||||||
// Hash just the bytes of the slice, without length prefix
|
// Hash just the bytes of the slice, without length prefix
|
||||||
struct Bytes<'a>(&'a [u8]);
|
struct Bytes<'a>(&'a [u8]);
|
||||||
|
@ -327,6 +328,26 @@ fn test_hash_no_concat_alias() {
|
||||||
assert!(hash(&v) != hash(&w));
|
assert!(hash(&v) != hash(&w));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_write_short_works() {
|
||||||
|
let test_usize = 0xd0c0b0a0usize;
|
||||||
|
let mut h1 = SipHasher24::new();
|
||||||
|
h1.write_usize(test_usize);
|
||||||
|
h1.write(b"bytes");
|
||||||
|
h1.write(b"string");
|
||||||
|
h1.write_u8(0xFFu8);
|
||||||
|
h1.write_u8(0x01u8);
|
||||||
|
let mut h2 = SipHasher24::new();
|
||||||
|
h2.write(unsafe {
|
||||||
|
slice::from_raw_parts(&test_usize as *const _ as *const u8,
|
||||||
|
mem::size_of::<usize>())
|
||||||
|
});
|
||||||
|
h2.write(b"bytes");
|
||||||
|
h2.write(b"string");
|
||||||
|
h2.write(&[0xFFu8, 0x01u8]);
|
||||||
|
assert_eq!(h1.finish(), h2.finish());
|
||||||
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn bench_str_under_8_bytes(b: &mut Bencher) {
|
fn bench_str_under_8_bytes(b: &mut Bencher) {
|
||||||
let s = "foo";
|
let s = "foo";
|
||||||
|
|
Loading…
Reference in New Issue