From 8e58f3088b5c86339f8d2cdbdb37d1c54af08bca Mon Sep 17 00:00:00 2001 From: Alexis Beingessner Date: Thu, 18 Sep 2014 17:05:22 -0400 Subject: [PATCH] implement entry API for HashMap Deprecates the `find_or_*` family of "internal mutation" methods on `HashMap` in favour of the "external mutation" Entry API as part of RFC 60. Part of #17320, although this still needs to be done on the rest of the maps, they don't have any internal mutation methods defined, so they can be done without deprecating or breaking anything. Work on `BTree`'s is part of the complete rewrite in #17334. The implemented API deviates from the API described in the RFC in two key places: * `VacantEntry.set` yields a mutable reference to the inserted element to avoid code duplication where complex logic needs to be done *regardless* of whether the entry was vacant or not. * `OccupiedEntry.into_mut` was added so that it is possible to return a reference into the map beyond the lifetime of the Entry itself, providing functional parity to `VacantEntry.set`. This allows the full find_or_insert functionality to be implemented using this API. A PR will be submitted to the RFC to amend this. [breaking-change] --- src/jemalloc | 2 +- src/libstd/collections/hashmap/map.rs | 288 +++++++++++++++++++++++++- src/libstd/collections/hashmap/mod.rs | 5 + 3 files changed, 288 insertions(+), 7 deletions(-) diff --git a/src/jemalloc b/src/jemalloc index aae04170ccb..2dba541881f 160000 --- a/src/jemalloc +++ b/src/jemalloc @@ -1 +1 @@ -Subproject commit aae04170ccbfeea620502106b581c3c216cd132a +Subproject commit 2dba541881fb8e35246d653bbe2e7c7088777a4a diff --git a/src/libstd/collections/hashmap/map.rs b/src/libstd/collections/hashmap/map.rs index e8c5eecc6f2..9cdf0facd56 100644 --- a/src/libstd/collections/hashmap/map.rs +++ b/src/libstd/collections/hashmap/map.rs @@ -20,16 +20,19 @@ use hash::{Hash, Hasher, RandomSipHasher}; use iter::{Iterator, FromIterator, Extendable}; use iter; use mem::replace; +use mem; use num; use ops::{Deref, DerefMut}; use option::{Some, None, Option}; use result::{Ok, Err}; use ops::Index; +use core::result::Result; use super::table; use super::table::{ Bucket, Empty, + EmptyBucket, Full, FullBucket, FullBucketImm, @@ -328,11 +331,11 @@ fn search_hashed>>(table: M, hash: &SafeHash, search_hashed_generic(table, hash, |k_| *k == *k_) } -fn pop_internal(starting_bucket: FullBucketMut) -> V { - let (empty, _k, retval) = starting_bucket.take(); +fn pop_internal(starting_bucket: FullBucketMut) -> (K, V) { + let (empty, retkey, retval) = starting_bucket.take(); let mut gap = match empty.gap_peek() { Some(b) => b, - None => return retval + None => return (retkey, retval) }; while gap.full().distance() != 0 { @@ -343,7 +346,7 @@ fn pop_internal(starting_bucket: FullBucketMut) -> V { } // Now we've done all our shifting. Return the value we grabbed earlier. - return retval; + return (retkey, retval); } /// Perform robin hood bucket stealing at the given `bucket`. You must @@ -567,7 +570,8 @@ impl, V, S, H: Hasher> MutableMap for HashMap self.make_some_room(potential_new_size); self.search_mut(k).map(|bucket| { - pop_internal(bucket) + let (_k, val) = pop_internal(bucket); + val }) } } @@ -852,12 +856,28 @@ impl, V, S, H: Hasher> HashMap { self.insert_hashed_nocheck(hash, k, v) } + /// Deprecated: use `entry` as follows instead: + /// + /// ``` + /// use std::collections::HashMap; + /// use std::collections::hashmap::{Occupied, Vacant}; + /// + /// let mut map = HashMap::new(); + /// + /// let result = match map.entry("a") { + /// Vacant(entry) => entry.set(1i), + /// Occupied(entry) => entry.into_mut(), + /// }; + /// assert_eq!(*result, 1); + /// ``` + /// /// Return the value corresponding to the key in the map, or insert /// and return the value if it doesn't exist. /// /// # Example /// /// ``` + /// #![allow(deprecated)] /// use std::collections::HashMap; /// let mut map = HashMap::new(); /// @@ -867,16 +887,34 @@ impl, V, S, H: Hasher> HashMap { /// // Find the existing key /// assert_eq!(*map.find_or_insert("a", -2), 1); /// ``` + #[deprecated = "use entry instead"] + #[allow(deprecated)] pub fn find_or_insert(&mut self, k: K, v: V) -> &mut V { self.find_with_or_insert_with(k, v, |_k, _v, _a| (), |_k, a| a) } + /// Deprecated: use `entry` as follows instead: + /// + /// ``` + /// use std::collections::HashMap; + /// use std::collections::hashmap::{Occupied, Vacant}; + /// + /// let mut map = HashMap::new(); + /// + /// let result = match map.entry("a") { + /// Vacant(entry) => entry.set(1i), + /// Occupied(entry) => entry.into_mut(), + /// }; + /// assert_eq!(*result, 1); + /// ``` + /// /// Return the value corresponding to the key in the map, or create, /// insert, and return a new value if it doesn't exist. /// /// # Example /// /// ``` + /// #![allow(deprecated)] /// use std::collections::HashMap; /// let mut map = HashMap::new(); /// @@ -886,11 +924,31 @@ impl, V, S, H: Hasher> HashMap { /// // Find the existing key /// assert_eq!(*map.find_or_insert_with(2, |&key| key as uint), 10); /// ``` + #[deprecated = "use entry instead"] + #[allow(deprecated)] pub fn find_or_insert_with<'a>(&'a mut self, k: K, f: |&K| -> V) -> &'a mut V { self.find_with_or_insert_with(k, (), |_k, _v, _a| (), |k, _a| f(k)) } + /// Deprecated: use `entry` as follows instead: + /// + /// ``` + /// use std::collections::HashMap; + /// use std::collections::hashmap::{Occupied, Vacant}; + /// + /// let mut map = HashMap::new(); + /// + /// let result = match map.entry("a") { + /// Vacant(entry) => entry.set(1u), + /// Occupied(mut entry) => { + /// *entry.get_mut() += 1; + /// entry.into_mut() + /// } + /// }; + /// assert_eq!(*result, 1); + /// ``` + /// /// Insert a key-value pair into the map if the key is not already present. /// Otherwise, modify the existing value for the key. /// Returns the new or modified value for the key. @@ -898,6 +956,7 @@ impl, V, S, H: Hasher> HashMap { /// # Example /// /// ``` + /// #![allow(deprecated)] /// use std::collections::HashMap; /// let mut map = HashMap::new(); /// @@ -908,6 +967,7 @@ impl, V, S, H: Hasher> HashMap { /// assert_eq!(*map.insert_or_update_with("a", 9, |_key, val| *val = 7), 7); /// assert_eq!(map["a"], 7); /// ``` + #[deprecated = "use entry instead"] pub fn insert_or_update_with<'a>( &'a mut self, k: K, @@ -921,6 +981,24 @@ impl, V, S, H: Hasher> HashMap { self.insert_or_replace_with(hash, k, v, |kref, vref, _v| f(kref, vref)) } + /// Deprecated: use `entry` as follows instead: + /// + /// ``` + /// use std::collections::HashMap; + /// use std::collections::hashmap::{Occupied, Vacant}; + /// + /// let mut map = HashMap::new(); + /// + /// let result = match map.entry("a") { + /// Vacant(entry) => entry.set(1u), + /// Occupied(mut entry) => { + /// *entry.get_mut() += 1; + /// entry.into_mut() + /// } + /// }; + /// assert_eq!(*result, 1); + /// ``` + /// /// Modify and return the value corresponding to the key in the map, or /// insert and return a new value if it doesn't exist. /// @@ -934,6 +1012,7 @@ impl, V, S, H: Hasher> HashMap { /// # Example /// /// ``` + /// #![allow(deprecated)] /// use std::collections::HashMap; /// /// // map some strings to vectors of strings @@ -965,6 +1044,7 @@ impl, V, S, H: Hasher> HashMap { /// assert_eq!(map["b key"], vec!["new value"]); /// assert_eq!(map["z key"], vec!["new value", "value"]); /// ``` + #[deprecated = "use entry instead"] pub fn find_with_or_insert_with<'a, A>(&'a mut self, k: K, a: A, @@ -1124,7 +1204,8 @@ impl, V, S, H: Hasher> HashMap { match self.search_equiv_mut(k) { Some(bucket) => { - Some(pop_internal(bucket)) + let (_k, val) = pop_internal(bucket); + Some(val) } _ => None } @@ -1254,6 +1335,68 @@ impl, V, S, H: Hasher> HashMap { inner: self.table.into_iter().map(|(_, k, v)| (k, v)) } } + + /// Gets the given key's corresponding entry in the map for in-place manipulation + pub fn entry<'a>(&'a mut self, key: K) -> Entry<'a, K, V> { + // Gotta resize now, and we don't know which direction, so try both? + let size = self.table.size(); + self.make_some_room(size + 1); + if size > 0 { + self.make_some_room(size - 1); + } + + let hash = self.make_hash(&key); + search_entry_hashed(&mut self.table, hash, key) + } +} + +fn search_entry_hashed<'a, K: Eq, V>(table: &'a mut RawTable, hash: SafeHash, k: K) + -> Entry<'a, K, V> { + // Worst case, we'll find one empty bucket among `size + 1` buckets. + let size = table.size(); + let mut probe = Bucket::new(table, &hash); + let ib = probe.index(); + + loop { + let bucket = match probe.peek() { + Empty(bucket) => { + // Found a hole! + return Vacant(VacantEntry { + hash: hash, + key: k, + elem: NoElem(bucket), + }); + }, + Full(bucket) => bucket + }; + + if bucket.hash() == hash { + let is_eq = { + let (bucket_k, _) = bucket.read(); + k == *bucket_k + }; + + if is_eq { + return Occupied(OccupiedEntry{ + elem: bucket, + }); + } + } + + let robin_ib = bucket.index() as int - bucket.distance() as int; + + if (ib as int) < robin_ib { + // Found a luckier bucket than me. Better steal his spot. + return Vacant(VacantEntry { + hash: hash, + key: k, + elem: NeqElem(bucket, robin_ib as uint), + }); + } + + probe = bucket.next(); + assert!(probe.index() != ib + size + 1); + } } impl, V: Clone, S, H: Hasher> HashMap { @@ -1353,6 +1496,35 @@ pub struct MoveEntries { inner: iter::Map<'static, (SafeHash, K, V), (K, V), table::MoveEntries> } +/// A view into a single occupied location in a HashMap +pub struct OccupiedEntry<'a, K:'a, V:'a> { + elem: FullBucket>, +} + +/// A view into a single empty location in a HashMap +pub struct VacantEntry<'a, K:'a, V:'a> { + hash: SafeHash, + key: K, + elem: VacantEntryState>, +} + +/// A view into a single location in a map, which may be vacant or occupied +pub enum Entry<'a, K:'a, V:'a> { + /// An occupied Entry + Occupied(OccupiedEntry<'a, K, V>), + /// A vacant Entry + Vacant(VacantEntry<'a, K, V>), +} + +/// Possible states of a VacantEntry +enum VacantEntryState { + /// The index is occupied, but the key to insert has precedence, + /// and will kick the current one out on insertion + NeqElem(FullBucket, uint), + /// The index is genuinely vacant + NoElem(EmptyBucket), +} + impl<'a, K, V> Iterator<(&'a K, &'a V)> for Entries<'a, K, V> { #[inline] fn next(&mut self) -> Option<(&'a K, &'a V)> { @@ -1386,6 +1558,57 @@ impl Iterator<(K, V)> for MoveEntries { } } +impl<'a, K, V> OccupiedEntry<'a, K, V> { + /// Gets a reference to the value in the entry + pub fn get(&self) -> &V { + let (_, v) = self.elem.read(); + v + } + + /// Gets a mutable reference to the value in the entry + pub fn get_mut(&mut self) -> &mut V { + let (_, v) = self.elem.read_mut(); + v + } + + /// Converts the OccupiedEntry into a mutable reference to the value in the entry + /// with a lifetime bound to the map itself + pub fn into_mut(self) -> &'a mut V { + let (_, v) = self.elem.into_mut_refs(); + v + } + + /// Sets the value of the entry, and returns the entry's old value + pub fn set(&mut self, mut value: V) -> V { + let old_value = self.get_mut(); + mem::swap(&mut value, old_value); + value + } + + /// Takes the value out of the entry, and returns it + pub fn take(self) -> V { + let (_, _, v) = self.elem.take(); + v + } +} + +impl<'a, K, V> VacantEntry<'a, K, V> { + /// Sets the value of the entry with the VacantEntry's key, + /// and returns a mutable reference to it + pub fn set(self, value: V) -> &'a mut V { + match self.elem { + NeqElem(bucket, ib) => { + robin_hood(bucket, ib, self.hash, self.key, value) + } + NoElem(bucket) => { + let full = bucket.put(self.hash, self.key, value); + let (_, v) = full.into_mut_refs(); + v + } + } + } +} + /// HashMap keys iterator pub type Keys<'a, K, V> = iter::Map<'static, (&'a K, &'a V), &'a K, Entries<'a, K, V>>; @@ -1416,6 +1639,7 @@ mod test_map { use prelude::*; use super::HashMap; + use super::{Occupied, Vacant}; use cmp::Equiv; use hash; use iter::{Iterator,range_inclusive,range_step_inclusive}; @@ -2026,4 +2250,56 @@ mod test_map { map[4]; } + + #[test] + fn test_entry(){ + let xs = [(1i, 10i), (2, 20), (3, 30), (4, 40), (5, 50), (6, 60)]; + + let mut map: HashMap = xs.iter().map(|&x| x).collect(); + + // Existing key (insert) + match map.entry(1) { + Vacant(_) => unreachable!(), + Occupied(mut view) => { + assert_eq!(view.get(), &10); + assert_eq!(view.set(100), 10); + } + } + assert_eq!(map.find(&1).unwrap(), &100); + assert_eq!(map.len(), 6); + + + // Existing key (update) + match map.entry(2) { + Vacant(_) => unreachable!(), + Occupied(mut view) => { + let v = view.get_mut(); + let new_v = (*v) * 10; + *v = new_v; + } + } + assert_eq!(map.find(&2).unwrap(), &200); + assert_eq!(map.len(), 6); + + // Existing key (take) + match map.entry(3) { + Vacant(_) => unreachable!(), + Occupied(view) => { + assert_eq!(view.take(), 30); + } + } + assert_eq!(map.find(&3), None); + assert_eq!(map.len(), 5); + + + // Inexistent key (insert) + match map.entry(10) { + Occupied(_) => unreachable!(), + Vacant(view) => { + assert_eq!(*view.set(1000), 1000); + } + } + assert_eq!(map.find(&10).unwrap(), &1000); + assert_eq!(map.len(), 6); + } } diff --git a/src/libstd/collections/hashmap/mod.rs b/src/libstd/collections/hashmap/mod.rs index b5612ce0f07..6508d4609f1 100644 --- a/src/libstd/collections/hashmap/mod.rs +++ b/src/libstd/collections/hashmap/mod.rs @@ -14,6 +14,11 @@ pub use self::map::HashMap; pub use self::map::Entries; pub use self::map::MutEntries; pub use self::map::MoveEntries; +pub use self::map::Entry; +pub use self::map::Occupied; +pub use self::map::Vacant; +pub use self::map::OccupiedEntry; +pub use self::map::VacantEntry; pub use self::map::Keys; pub use self::map::Values; pub use self::map::INITIAL_CAPACITY;