auto merge of #15257 : erickt/rust/hashmap, r=alexcrichton

While `HashMap::new` and `HashMap::with_capacity` were being initialized with a random `SipHasher`, it turns out that `HashMap::from_iter` was just using the default instance of `SipHasher`, which wasn't randomized. This closes that bug, and also inlines some important methods.
This commit is contained in:
bors 2014-07-02 07:31:41 +00:00
commit 7c4d8e94ba
3 changed files with 121 additions and 16 deletions

View File

@ -16,15 +16,13 @@ use collections::{Collection, Mutable, Set, MutableSet, Map, MutableMap};
use default::Default; use default::Default;
use fmt::Show; use fmt::Show;
use fmt; use fmt;
use hash::{Hash, Hasher, sip}; use hash::{Hash, Hasher, RandomSipHasher};
use iter::{Iterator, FilterMap, Chain, Repeat, Zip, Extendable}; use iter::{Iterator, FilterMap, Chain, Repeat, Zip, Extendable};
use iter::{range, range_inclusive, FromIterator}; use iter::{range, range_inclusive, FromIterator};
use iter; use iter;
use mem::replace; use mem::replace;
use num; use num;
use option::{Some, None, Option}; use option::{Some, None, Option};
use rand::Rng;
use rand;
use result::{Ok, Err}; use result::{Ok, Err};
mod table { mod table {
@ -733,7 +731,7 @@ impl DefaultResizePolicy {
/// } /// }
/// ``` /// ```
#[deriving(Clone)] #[deriving(Clone)]
pub struct HashMap<K, V, H = sip::SipHasher> { pub struct HashMap<K, V, H = RandomSipHasher> {
// All hashes are keyed on these values, to prevent hash collision attacks. // All hashes are keyed on these values, to prevent hash collision attacks.
hasher: H, hasher: H,
@ -1033,18 +1031,17 @@ impl<K: Eq + Hash<S>, V, S, H: Hasher<S>> MutableMap<K, V> for HashMap<K, V, H>
} }
impl<K: Hash + Eq, V> HashMap<K, V, sip::SipHasher> { impl<K: Hash + Eq, V> HashMap<K, V, RandomSipHasher> {
/// Create an empty HashMap. /// Create an empty HashMap.
pub fn new() -> HashMap<K, V, sip::SipHasher> { #[inline]
pub fn new() -> HashMap<K, V, RandomSipHasher> {
HashMap::with_capacity(INITIAL_CAPACITY) HashMap::with_capacity(INITIAL_CAPACITY)
} }
/// Creates an empty hash map with the given initial capacity. /// Creates an empty hash map with the given initial capacity.
pub fn with_capacity(capacity: uint) -> HashMap<K, V, sip::SipHasher> { #[inline]
let mut r = rand::task_rng(); pub fn with_capacity(capacity: uint) -> HashMap<K, V, RandomSipHasher> {
let r0 = r.gen(); let hasher = RandomSipHasher::new();
let r1 = r.gen();
let hasher = sip::SipHasher::new_with_keys(r0, r1);
HashMap::with_capacity_and_hasher(capacity, hasher) HashMap::with_capacity_and_hasher(capacity, hasher)
} }
} }
@ -1053,6 +1050,7 @@ impl<K: Eq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
/// Creates an empty hashmap which will use the given hasher to hash keys. /// Creates an empty hashmap which will use the given hasher to hash keys.
/// ///
/// The creates map has the default initial capacity. /// The creates map has the default initial capacity.
#[inline]
pub fn with_hasher(hasher: H) -> HashMap<K, V, H> { pub fn with_hasher(hasher: H) -> HashMap<K, V, H> {
HashMap::with_capacity_and_hasher(INITIAL_CAPACITY, hasher) HashMap::with_capacity_and_hasher(INITIAL_CAPACITY, hasher)
} }
@ -1064,6 +1062,7 @@ impl<K: Eq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
/// is designed to allow HashMaps to be resistant to attacks that /// is designed to allow HashMaps to be resistant to attacks that
/// cause many collisions and very poor performance. Setting it /// cause many collisions and very poor performance. Setting it
/// manually using this function can expose a DoS attack vector. /// manually using this function can expose a DoS attack vector.
#[inline]
pub fn with_capacity_and_hasher(capacity: uint, hasher: H) -> HashMap<K, V, H> { pub fn with_capacity_and_hasher(capacity: uint, hasher: H) -> HashMap<K, V, H> {
let cap = num::next_power_of_two(max(INITIAL_CAPACITY, capacity)); let cap = num::next_power_of_two(max(INITIAL_CAPACITY, capacity));
HashMap { HashMap {
@ -1489,7 +1488,7 @@ pub type SetMoveItems<K> =
/// HashMap where the value is (). As with the `HashMap` type, a `HashSet` /// HashMap where the value is (). As with the `HashMap` type, a `HashSet`
/// requires that the elements implement the `Eq` and `Hash` traits. /// requires that the elements implement the `Eq` and `Hash` traits.
#[deriving(Clone)] #[deriving(Clone)]
pub struct HashSet<T, H = sip::SipHasher> { pub struct HashSet<T, H = RandomSipHasher> {
map: HashMap<T, (), H> map: HashMap<T, (), H>
} }
@ -1529,15 +1528,17 @@ impl<T: Eq + Hash<S>, S, H: Hasher<S>> MutableSet<T> for HashSet<T, H> {
fn remove(&mut self, value: &T) -> bool { self.map.remove(value) } fn remove(&mut self, value: &T) -> bool { self.map.remove(value) }
} }
impl<T: Hash + Eq> HashSet<T, sip::SipHasher> { impl<T: Hash + Eq> HashSet<T, RandomSipHasher> {
/// Create an empty HashSet /// Create an empty HashSet
pub fn new() -> HashSet<T, sip::SipHasher> { #[inline]
pub fn new() -> HashSet<T, RandomSipHasher> {
HashSet::with_capacity(INITIAL_CAPACITY) HashSet::with_capacity(INITIAL_CAPACITY)
} }
/// Create an empty HashSet with space for at least `n` elements in /// Create an empty HashSet with space for at least `n` elements in
/// the hash table. /// the hash table.
pub fn with_capacity(capacity: uint) -> HashSet<T, sip::SipHasher> { #[inline]
pub fn with_capacity(capacity: uint) -> HashSet<T, RandomSipHasher> {
HashSet { map: HashMap::with_capacity(capacity) } HashSet { map: HashMap::with_capacity(capacity) }
} }
} }
@ -1547,6 +1548,7 @@ impl<T: Eq + Hash<S>, S, H: Hasher<S>> HashSet<T, H> {
/// keys. /// keys.
/// ///
/// The hash set is also created with the default initial capacity. /// The hash set is also created with the default initial capacity.
#[inline]
pub fn with_hasher(hasher: H) -> HashSet<T, H> { pub fn with_hasher(hasher: H) -> HashSet<T, H> {
HashSet::with_capacity_and_hasher(INITIAL_CAPACITY, hasher) HashSet::with_capacity_and_hasher(INITIAL_CAPACITY, hasher)
} }
@ -1558,6 +1560,7 @@ impl<T: Eq + Hash<S>, S, H: Hasher<S>> HashSet<T, H> {
/// is designed to allow `HashSet`s to be resistant to attacks that /// is designed to allow `HashSet`s to be resistant to attacks that
/// cause many collisions and very poor performance. Setting it /// cause many collisions and very poor performance. Setting it
/// manually using this function can expose a DoS attack vector. /// manually using this function can expose a DoS attack vector.
#[inline]
pub fn with_capacity_and_hasher(capacity: uint, hasher: H) -> HashSet<T, H> { pub fn with_capacity_and_hasher(capacity: uint, hasher: H) -> HashSet<T, H> {
HashSet { map: HashMap::with_capacity_and_hasher(capacity, hasher) } HashSet { map: HashMap::with_capacity_and_hasher(capacity, hasher) }
} }

102
src/libstd/hash.rs Normal file
View File

@ -0,0 +1,102 @@
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
/*!
* Generic hashing support.
*
* This module provides a generic way to compute the hash of a value. The
* simplest way to make a type hashable is to use `#[deriving(Hash)]`:
*
* # Example
*
* ```rust
* use std::hash;
* use std::hash::Hash;
*
* #[deriving(Hash)]
* struct Person {
* id: uint,
* name: String,
* phone: u64,
* }
*
* let person1 = Person { id: 5, name: "Janet".to_string(), phone: 555_666_7777 };
* let person2 = Person { id: 5, name: "Bob".to_string(), phone: 555_666_7777 };
*
* assert!(hash::hash(&person1) != hash::hash(&person2));
* ```
*
* If you need more control over how a value is hashed, you need to implement
* the trait `Hash`:
*
* ```rust
* use std::hash;
* use std::hash::Hash;
* use std::hash::sip::SipState;
*
* struct Person {
* id: uint,
* name: String,
* phone: u64,
* }
*
* impl Hash for Person {
* fn hash(&self, state: &mut SipState) {
* self.id.hash(state);
* self.phone.hash(state);
* }
* }
*
* let person1 = Person { id: 5, name: "Janet".to_string(), phone: 555_666_7777 };
* let person2 = Person { id: 5, name: "Bob".to_string(), phone: 555_666_7777 };
*
* assert!(hash::hash(&person1) == hash::hash(&person2));
* ```
*/
pub use core_collections::hash::{Hash, Hasher, Writer, hash, sip};
use default::Default;
use rand::Rng;
use rand;
/// `RandomSipHasher` computes the SipHash algorithm from a stream of bytes
/// initialized with random keys.
#[deriving(Clone)]
pub struct RandomSipHasher {
hasher: sip::SipHasher,
}
impl RandomSipHasher {
/// Construct a new `RandomSipHasher` that is initialized with random keys.
#[inline]
pub fn new() -> RandomSipHasher {
let mut r = rand::task_rng();
let r0 = r.gen();
let r1 = r.gen();
RandomSipHasher {
hasher: sip::SipHasher::new_with_keys(r0, r1),
}
}
}
impl Hasher<sip::SipState> for RandomSipHasher {
#[inline]
fn hash<T: Hash<sip::SipState>>(&self, value: &T) -> u64 {
self.hasher.hash(value)
}
}
impl Default for RandomSipHasher {
#[inline]
fn default() -> RandomSipHasher {
RandomSipHasher::new()
}
}

View File

@ -167,7 +167,6 @@ pub use core::option;
pub use alloc::owned; pub use alloc::owned;
pub use alloc::rc; pub use alloc::rc;
pub use core_collections::hash;
pub use core_collections::slice; pub use core_collections::slice;
pub use core_collections::str; pub use core_collections::str;
pub use core_collections::string; pub use core_collections::string;
@ -237,6 +236,7 @@ pub mod to_str;
/* Common data structures */ /* Common data structures */
pub mod collections; pub mod collections;
pub mod hash;
/* Tasks and communication */ /* Tasks and communication */