diff --git a/src/libstd/collections/hash/map.rs b/src/libstd/collections/hash/map.rs index 56d95401a9a..ed797a7ee08 100644 --- a/src/libstd/collections/hash/map.rs +++ b/src/libstd/collections/hash/map.rs @@ -9,7 +9,6 @@ // except according to those terms. use self::Entry::*; -use self::SearchResult::*; use self::VacantEntryState::*; use borrow::Borrow; @@ -26,7 +25,6 @@ use super::table::{ Bucket, EmptyBucket, FullBucket, - FullBucketImm, FullBucketMut, RawTable, SafeHash @@ -342,10 +340,11 @@ pub struct HashMap { } /// Search for a pre-hashed key. +#[inline] fn search_hashed(table: M, hash: SafeHash, mut is_match: F) - -> SearchResult where + -> InternalEntry where M: Deref>, F: FnMut(&K) -> bool, { @@ -353,37 +352,50 @@ fn search_hashed(table: M, // undefined behavior when Bucket::new gets the raw bucket in this // case, immediately return the appropriate search result. if table.capacity() == 0 { - return TableRef(table); + return InternalEntry::TableIsEmpty; } - let size = table.size(); + let size = table.size() as isize; let mut probe = Bucket::new(table, hash); - let ib = probe.index(); + let ib = probe.index() as isize; - while probe.index() != ib + size { + loop { let full = match probe.peek() { - Empty(b) => return TableRef(b.into_table()), // hit an empty bucket - Full(b) => b + Empty(bucket) => { + // Found a hole! + return InternalEntry::Vacant { + hash: hash, + elem: NoElem(bucket), + }; + } + Full(bucket) => bucket }; - if full.distance() + ib < full.index() { + let robin_ib = full.index() as isize - full.displacement() as isize; + + if ib < robin_ib { + // Found a luckier bucket than me. // We can finish the search early if we hit any bucket // with a lower distance to initial bucket than we've probed. - return TableRef(full.into_table()); + return InternalEntry::Vacant { + hash: hash, + elem: NeqElem(full, robin_ib as usize), + }; } // If the hash doesn't match, it can't be this one.. if hash == full.hash() { // If the key doesn't match, it can't be this one.. if is_match(full.read().0) { - return FoundExisting(full); + return InternalEntry::Occupied { + elem: full + }; } } probe = full.next(); + debug_assert!(probe.index() as isize != ib + size + 1); } - - TableRef(probe.into_table()) } fn pop_internal(starting_bucket: FullBucketMut) -> (K, V) { @@ -393,7 +405,7 @@ fn pop_internal(starting_bucket: FullBucketMut) -> (K, V) { None => return (retkey, retval) }; - while gap.full().distance() != 0 { + while gap.full().displacement() != 0 { gap = match gap.shift() { Some(b) => b, None => break @@ -409,78 +421,63 @@ fn pop_internal(starting_bucket: FullBucketMut) -> (K, V) { /// to recalculate it. /// /// `hash`, `k`, and `v` are the elements to "robin hood" into the hashtable. -fn robin_hood<'a, K: 'a, V: 'a>(mut bucket: FullBucketMut<'a, K, V>, +fn robin_hood<'a, K: 'a, V: 'a>(bucket: FullBucketMut<'a, K, V>, mut ib: usize, mut hash: SafeHash, - mut k: K, - mut v: V) + mut key: K, + mut val: V) -> &'a mut V { let starting_index = bucket.index(); let size = { let table = bucket.table(); // FIXME "lifetime too short". table.size() }; + // Save the *starting point*. + let mut bucket = bucket.stash(); // There can be at most `size - dib` buckets to displace, because // in the worst case, there are `size` elements and we already are - // `distance` buckets away from the initial one. - let idx_end = starting_index + size - bucket.distance(); + // `displacement` buckets away from the initial one. + let idx_end = starting_index + size - bucket.displacement(); loop { - let (old_hash, old_key, old_val) = bucket.replace(hash, k, v); + let (old_hash, old_key, old_val) = bucket.replace(hash, key, val); + hash = old_hash; + key = old_key; + val = old_val; + loop { let probe = bucket.next(); - assert!(probe.index() != idx_end); + debug_assert!(probe.index() != idx_end); let full_bucket = match probe.peek() { Empty(bucket) => { // Found a hole! - let b = bucket.put(old_hash, old_key, old_val); + let bucket = bucket.put(hash, key, val); // Now that it's stolen, just read the value's pointer - // right out of the table! - return Bucket::at_index(b.into_table(), starting_index) - .peek() - .expect_full() - .into_mut_refs() - .1; + // right out of the table! Go back to the *starting point*. + // + // This use of `into_table` is misleading. It turns the + // bucket, which is a FullBucket on top of a + // FullBucketMut, into just one FullBucketMut. The "table" + // refers to the inner FullBucketMut in this context. + return bucket.into_table().into_mut_refs().1; }, Full(bucket) => bucket }; - let probe_ib = full_bucket.index() - full_bucket.distance(); + let probe_ib = full_bucket.index() - full_bucket.displacement(); bucket = full_bucket; // Robin hood! Steal the spot. if ib < probe_ib { ib = probe_ib; - hash = old_hash; - k = old_key; - v = old_val; break; } } } } -/// A result that works like Option> but preserves -/// the reference that grants us access to the table in any case. -enum SearchResult { - // This is an entry that holds the given key: - FoundExisting(FullBucket), - - // There was no such entry. The reference is given back: - TableRef(M) -} - -impl SearchResult { - fn into_option(self) -> Option> { - match self { - FoundExisting(bucket) => Some(bucket), - TableRef(_) => None - } - } -} - impl HashMap where K: Eq + Hash, S: BuildHasher { @@ -491,20 +488,20 @@ impl HashMap /// Search for a key, yielding the index if it's found in the hashtable. /// If you already have the hash for the key lying around, use /// search_hashed. - fn search<'a, Q: ?Sized>(&'a self, q: &Q) -> Option> + #[inline] + fn search<'a, Q: ?Sized>(&'a self, q: &Q) -> InternalEntry> where K: Borrow, Q: Eq + Hash { let hash = self.make_hash(q); search_hashed(&self.table, hash, |k| q.eq(k.borrow())) - .into_option() } - fn search_mut<'a, Q: ?Sized>(&'a mut self, q: &Q) -> Option> + #[inline] + fn search_mut<'a, Q: ?Sized>(&'a mut self, q: &Q) -> InternalEntry> where K: Borrow, Q: Eq + Hash { let hash = self.make_hash(q); search_hashed(&mut self.table, hash, |k| q.eq(k.borrow())) - .into_option() } // The caller should ensure that invariants by Robin Hood Hashing hold. @@ -711,7 +708,7 @@ impl HashMap loop { bucket = match bucket.peek() { Full(full) => { - if full.distance() == 0 { + if full.displacement() == 0 { // This bucket occupies its ideal spot. // It indicates the start of another "cluster". bucket = full.into_bucket(); @@ -804,53 +801,19 @@ impl HashMap /// /// If the key already exists, the hashtable will be returned untouched /// and a reference to the existing element will be returned. - fn insert_hashed_nocheck(&mut self, hash: SafeHash, k: K, v: V) -> &mut V { - self.insert_or_replace_with(hash, k, v, |_, _, _, _| ()) - } - - fn insert_or_replace_with<'a, F>(&'a mut self, - hash: SafeHash, - k: K, - v: V, - mut found_existing: F) - -> &'a mut V where - F: FnMut(&mut K, &mut V, K, V), - { - // Worst case, we'll find one empty bucket among `size + 1` buckets. - let size = self.table.size(); - let mut probe = Bucket::new(&mut self.table, hash); - let ib = probe.index(); - - loop { - let mut bucket = match probe.peek() { - Empty(bucket) => { - // Found a hole! - return bucket.put(hash, k, v).into_mut_refs().1; - } - Full(bucket) => bucket - }; - - // hash matches? - if bucket.hash() == hash { - // key matches? - if k == *bucket.read_mut().0 { - let (bucket_k, bucket_v) = bucket.into_mut_refs(); - debug_assert!(k == *bucket_k); - // Key already exists. Get its reference. - found_existing(bucket_k, bucket_v, k, v); - return bucket_v; - } + fn insert_hashed_nocheck(&mut self, hash: SafeHash, k: K, v: V) -> Option { + let entry = search_hashed(&mut self.table, hash, |key| *key == k).into_entry(k); + match entry { + Some(Occupied(mut elem)) => { + Some(elem.insert(v)) } - - let robin_ib = bucket.index() as isize - bucket.distance() as isize; - - if (ib as isize) < robin_ib { - // Found a luckier bucket than me. Better steal his spot. - return robin_hood(bucket, robin_ib as usize, hash, k, v); + Some(Vacant(elem)) => { + elem.insert(v); + None + } + None => { + unreachable!() } - - probe = bucket.next(); - assert!(probe.index() != ib + size + 1); } } @@ -977,9 +940,7 @@ impl HashMap pub fn entry(&mut self, key: K) -> Entry { // Gotta resize now. self.reserve(1); - - let hash = self.make_hash(&key); - search_entry_hashed(&mut self.table, hash, key) + self.search_mut(&key).into_entry(key).expect("unreachable") } /// Returns the number of elements in the map. @@ -1082,7 +1043,7 @@ impl HashMap pub fn get(&self, k: &Q) -> Option<&V> where K: Borrow, Q: Hash + Eq { - self.search(k).map(|bucket| bucket.into_refs().1) + self.search(k).into_occupied_bucket().map(|bucket| bucket.into_refs().1) } /// Returns true if the map contains a value for the specified key. @@ -1105,7 +1066,7 @@ impl HashMap pub fn contains_key(&self, k: &Q) -> bool where K: Borrow, Q: Hash + Eq { - self.search(k).is_some() + self.search(k).into_occupied_bucket().is_some() } /// Returns a mutable reference to the value corresponding to the key. @@ -1130,7 +1091,7 @@ impl HashMap pub fn get_mut(&mut self, k: &Q) -> Option<&mut V> where K: Borrow, Q: Hash + Eq { - self.search_mut(k).map(|bucket| bucket.into_mut_refs().1) + self.search_mut(k).into_occupied_bucket().map(|bucket| bucket.into_mut_refs().1) } /// Inserts a key-value pair into the map. @@ -1161,12 +1122,7 @@ impl HashMap pub fn insert(&mut self, k: K, v: V) -> Option { let hash = self.make_hash(&k); self.reserve(1); - - let mut retval = None; - self.insert_or_replace_with(hash, k, v, |_, val_ref, _, val| { - retval = Some(replace(val_ref, val)); - }); - retval + self.insert_hashed_nocheck(hash, k, v) } /// Removes a key from the map, returning the value at the key if the key @@ -1194,54 +1150,7 @@ impl HashMap return None } - self.search_mut(k).map(|bucket| pop_internal(bucket).1) - } -} - -fn search_entry_hashed<'a, K: Eq, V>(table: &'a mut RawTable, hash: SafeHash, k: K) - -> Entry<'a, K, V> -{ - // Worst case, we'll find one empty bucket among `size + 1` buckets. - let size = table.size(); - let mut probe = Bucket::new(table, hash); - let ib = probe.index(); - - loop { - let bucket = match probe.peek() { - Empty(bucket) => { - // Found a hole! - return Vacant(VacantEntry { - hash: hash, - key: k, - elem: NoElem(bucket), - }); - }, - Full(bucket) => bucket - }; - - // hash matches? - if bucket.hash() == hash { - // key matches? - if k == *bucket.read().0 { - return Occupied(OccupiedEntry{ - elem: bucket, - }); - } - } - - let robin_ib = bucket.index() as isize - bucket.distance() as isize; - - if (ib as isize) < robin_ib { - // Found a luckier bucket than me. Better steal his spot. - return Vacant(VacantEntry { - hash: hash, - key: k, - elem: NeqElem(bucket, robin_ib as usize), - }); - } - - probe = bucket.next(); - assert!(probe.index() != ib + size + 1); + self.search_mut(k).into_occupied_bucket().map(|bucket| pop_internal(bucket).1) } } @@ -1362,18 +1271,47 @@ pub struct Drain<'a, K: 'a, V: 'a> { inner: iter::Map, fn((SafeHash, K, V)) -> (K, V)> } -/// A view into a single occupied location in a HashMap. -#[stable(feature = "rust1", since = "1.0.0")] -pub struct OccupiedEntry<'a, K: 'a, V: 'a> { - elem: FullBucket>, +enum InternalEntry { + Occupied { + elem: FullBucket, + }, + Vacant { + hash: SafeHash, + elem: VacantEntryState, + }, + TableIsEmpty, } -/// A view into a single empty location in a HashMap. -#[stable(feature = "rust1", since = "1.0.0")] -pub struct VacantEntry<'a, K: 'a, V: 'a> { - hash: SafeHash, - key: K, - elem: VacantEntryState>, +impl InternalEntry { + #[inline] + fn into_occupied_bucket(self) -> Option> { + match self { + InternalEntry::Occupied { elem } => Some(elem), + _ => None, + } + } +} + +impl<'a, K, V> InternalEntry> { + #[inline] + fn into_entry(self, key: K) -> Option> { + match self { + InternalEntry::Occupied { elem } => { + Some(Occupied(OccupiedEntry { + key: Some(key), + elem: elem + })) + } + InternalEntry::Vacant { hash, elem } => { + Some(Vacant(VacantEntry { + hash: hash, + key: key, + elem: elem, + })) + } + InternalEntry::TableIsEmpty => None + } + } } /// A view into a single location in a map, which may be vacant or occupied. @@ -1392,6 +1330,21 @@ pub enum Entry<'a, K: 'a, V: 'a> { ), } +/// A view into a single occupied location in a HashMap. +#[stable(feature = "rust1", since = "1.0.0")] +pub struct OccupiedEntry<'a, K: 'a, V: 'a> { + key: Option, + elem: FullBucket>, +} + +/// A view into a single empty location in a HashMap. +#[stable(feature = "rust1", since = "1.0.0")] +pub struct VacantEntry<'a, K: 'a, V: 'a> { + hash: SafeHash, + key: K, + elem: VacantEntryState>, +} + /// Possible states of a VacantEntry. enum VacantEntryState { /// The index is occupied, but the key to insert has precedence, @@ -1592,6 +1545,12 @@ impl<'a, K, V> OccupiedEntry<'a, K, V> { pub fn remove(self) -> V { pop_internal(self.elem).1 } + /// Returns a key that was used for search. + /// + /// The key was retained for further use. + fn take_key(&mut self) -> Option { + self.key.take() + } } impl<'a, K: 'a, V: 'a> VacantEntry<'a, K, V> { @@ -1696,7 +1655,7 @@ impl super::Recover for HashMap type Key = K; fn get(&self, key: &Q) -> Option<&K> { - self.search(key).map(|bucket| bucket.into_refs().0) + self.search(key).into_occupied_bucket().map(|bucket| bucket.into_refs().0) } fn take(&mut self, key: &Q) -> Option { @@ -1704,18 +1663,22 @@ impl super::Recover for HashMap return None } - self.search_mut(key).map(|bucket| pop_internal(bucket).0) + self.search_mut(key).into_occupied_bucket().map(|bucket| pop_internal(bucket).0) } fn replace(&mut self, key: K) -> Option { - let hash = self.make_hash(&key); self.reserve(1); - let mut retkey = None; - self.insert_or_replace_with(hash, key, (), |key_ref, _, key, _| { - retkey = Some(replace(key_ref, key)); - }); - retkey + match self.entry(key) { + Occupied(mut occupied) => { + let key = occupied.take_key().unwrap(); + Some(mem::replace(occupied.elem.read_mut().0, key)) + } + Vacant(vacant) => { + vacant.insert(()); + None + } + } } } @@ -1750,6 +1713,20 @@ mod test_map { assert_eq!(*m.get(&2).unwrap(), 4); } + #[test] + fn test_clone() { + let mut m = HashMap::new(); + assert_eq!(m.len(), 0); + assert!(m.insert(1, 2).is_none()); + assert_eq!(m.len(), 1); + assert!(m.insert(2, 4).is_none()); + assert_eq!(m.len(), 2); + let m2 = m.clone(); + assert_eq!(*m2.get(&1).unwrap(), 2); + assert_eq!(*m2.get(&2).unwrap(), 4); + assert_eq!(m2.len(), 2); + } + thread_local! { static DROP_VECTOR: RefCell> = RefCell::new(Vec::new()) } #[derive(Hash, PartialEq, Eq)] @@ -1841,7 +1818,7 @@ mod test_map { } #[test] - fn test_move_iter_drops() { + fn test_into_iter_drops() { DROP_VECTOR.with(|v| { *v.borrow_mut() = vec![0; 200]; }); @@ -1906,11 +1883,35 @@ mod test_map { } #[test] - fn test_empty_pop() { + fn test_empty_remove() { let mut m: HashMap = HashMap::new(); assert_eq!(m.remove(&0), None); } + #[test] + fn test_empty_entry() { + let mut m: HashMap = HashMap::new(); + match m.entry(0) { + Occupied(_) => panic!(), + Vacant(_) => {} + } + assert!(*m.entry(0).or_insert(true)); + assert_eq!(m.len(), 1); + } + + #[test] + fn test_empty_iter() { + let mut m: HashMap = HashMap::new(); + assert_eq!(m.drain().next(), None); + assert_eq!(m.keys().next(), None); + assert_eq!(m.values().next(), None); + assert_eq!(m.iter().next(), None); + assert_eq!(m.iter_mut().next(), None); + assert_eq!(m.len(), 0); + assert!(m.is_empty()); + assert_eq!(m.into_iter().next(), None); + } + #[test] fn test_lots_of_insertions() { let mut m = HashMap::new(); diff --git a/src/libstd/collections/hash/table.rs b/src/libstd/collections/hash/table.rs index 97cab94b67b..5802a1fd265 100644 --- a/src/libstd/collections/hash/table.rs +++ b/src/libstd/collections/hash/table.rs @@ -201,23 +201,47 @@ impl EmptyBucket { pub fn table(&self) -> &M { &self.table } - /// Move out the reference to the table. - pub fn into_table(self) -> M { - self.table - } } impl Bucket { - /// Move out the reference to the table. - pub fn into_table(self) -> M { - self.table - } /// Get the raw index. pub fn index(&self) -> usize { self.idx } } +impl Deref for FullBucket where M: Deref> { + type Target = RawTable; + fn deref(&self) -> &RawTable { + &self.table + } +} + +/// `Put` is implemented for types which provide access to a table and cannot be invalidated +/// by filling a bucket. A similar implementation for `Take` is possible. +pub trait Put { + unsafe fn borrow_table_mut(&mut self) -> &mut RawTable; +} + + +impl<'t, K, V> Put for &'t mut RawTable { + unsafe fn borrow_table_mut(&mut self) -> &mut RawTable { + *self + } +} + +impl Put for Bucket where M: Put { + unsafe fn borrow_table_mut(&mut self) -> &mut RawTable { + self.table.borrow_table_mut() + } +} + +impl Put for FullBucket where M: Put { + unsafe fn borrow_table_mut(&mut self) -> &mut RawTable { + self.table.borrow_table_mut() + } +} + impl>> Bucket { pub fn new(table: M, hash: SafeHash) -> Bucket { Bucket::at_index(table, hash.inspect() as usize) @@ -268,22 +292,14 @@ impl>> Bucket { /// Modifies the bucket pointer in place to make it point to the next slot. pub fn next(&mut self) { - // Branchless bucket iteration step. - // As we reach the end of the table... - // We take the current idx: 0111111b - // Xor it by its increment: ^ 1000000b - // ------------ - // 1111111b - // Then AND with the capacity: & 1000000b - // ------------ - // to get the backwards offset: 1000000b - // ... and it's zero at all other times. - let maybe_wraparound_dist = (self.idx ^ (self.idx + 1)) & self.table.capacity(); - // Finally, we obtain the offset 1 or the offset -cap + 1. - let dist = 1 - (maybe_wraparound_dist as isize); - self.idx += 1; - + let range = self.table.capacity(); + // This code is branchless thanks to a conditional move. + let dist = if self.idx & (range - 1) == 0 { + 1 - range as isize + } else { + 1 + }; unsafe { self.raw = self.raw.offset(dist); } @@ -326,7 +342,7 @@ impl>> EmptyBucket { } } -impl> + DerefMut> EmptyBucket { +impl EmptyBucket where M: Put { /// Puts given key and value pair, along with the key's hash, /// into this bucket in the hashtable. Note how `self` is 'moved' into /// this function, because this slot will no longer be empty when @@ -340,9 +356,9 @@ impl> + DerefMut> EmptyBucket { *self.raw.hash = hash.inspect(); ptr::write(self.raw.key, key); ptr::write(self.raw.val, value); - } - self.table.size += 1; + self.table.borrow_table_mut().size += 1; + } FullBucket { raw: self.raw, idx: self.idx, table: self.table } } @@ -365,12 +381,22 @@ impl>> FullBucket { } } + /// Duplicates the current position. This can be useful for operations + /// on two or more buckets. + pub fn stash(self) -> FullBucket { + FullBucket { + raw: self.raw, + idx: self.idx, + table: self, + } + } + /// Get the distance between this bucket and the 'ideal' location /// as determined by the key's hash stored in it. /// /// In the cited blog posts above, this is called the "distance to /// initial bucket", or DIB. Also known as "probe count". - pub fn distance(&self) -> usize { + pub fn displacement(&self) -> usize { // Calculates the distance one has to travel when going from // `hash mod capacity` onwards to `idx mod capacity`, wrapping around // if the destination is not reached before the end of the table. @@ -395,12 +421,15 @@ impl>> FullBucket { } } -impl> + DerefMut> FullBucket { +// We take a mutable reference to the table instead of accepting anything that +// implements `DerefMut` to prevent fn `take` from being called on `stash`ed +// buckets. +impl<'t, K, V> FullBucket> { /// Removes this bucket's key and value from the hashtable. /// /// This works similarly to `put`, building an `EmptyBucket` out of the /// taken bucket. - pub fn take(mut self) -> (EmptyBucket, K, V) { + pub fn take(mut self) -> (EmptyBucket>, K, V) { self.table.size -= 1; unsafe { @@ -416,7 +445,11 @@ impl> + DerefMut> FullBucket { ) } } +} +// This use of `Put` is misleading and restrictive, but safe and sufficient for our use cases +// where `M` is a full bucket or table reference type with mutable access to the table. +impl FullBucket where M: Put { pub fn replace(&mut self, h: SafeHash, k: K, v: V) -> (SafeHash, K, V) { unsafe { let old_hash = ptr::replace(self.raw.hash as *mut SafeHash, h); @@ -426,7 +459,9 @@ impl> + DerefMut> FullBucket { (old_hash, old_key, old_val) } } +} +impl FullBucket where M: Deref> + DerefMut { /// Gets mutable references to the key and value at a given index. pub fn read_mut(&mut self) -> (&mut K, &mut V) { unsafe { @@ -436,7 +471,7 @@ impl> + DerefMut> FullBucket { } } -impl<'t, K, V, M: Deref> + 't> FullBucket { +impl<'t, K, V, M> FullBucket where M: Deref> + 't { /// Exchange a bucket state for immutable references into the table. /// Because the underlying reference to the table is also consumed, /// no further changes to the structure of the table are possible; @@ -450,7 +485,7 @@ impl<'t, K, V, M: Deref> + 't> FullBucket { } } -impl<'t, K, V, M: Deref> + DerefMut + 't> FullBucket { +impl<'t, K, V, M> FullBucket where M: Deref> + DerefMut + 't { /// This works similarly to `into_refs`, exchanging a bucket state /// for mutable references into the table. pub fn into_mut_refs(self) -> (&'t mut K, &'t mut V) { @@ -461,17 +496,7 @@ impl<'t, K, V, M: Deref> + DerefMut + 't> FullBucket BucketState { - // For convenience. - pub fn expect_full(self) -> FullBucket { - match self { - Full(full) => full, - Empty(..) => panic!("Expected full bucket") - } - } -} - -impl>> GapThenFull { +impl GapThenFull where M: Deref> { #[inline] pub fn full(&self) -> &FullBucket { &self.full