std: Remove implicit shrinking from hash_map.
Implements fn shrink_to_fit for HashMap.
This commit is contained in:
parent
29e928f2ba
commit
72c96badd2
@ -23,7 +23,7 @@ use hash::{Hash, Hasher, RandomSipHasher};
|
||||
use iter::{mod, Iterator, IteratorExt, FromIterator, Extend};
|
||||
use kinds::Sized;
|
||||
use mem::{mod, replace};
|
||||
use num::UnsignedInt;
|
||||
use num::{Int, UnsignedInt};
|
||||
use ops::{Deref, Index, IndexMut};
|
||||
use option::{Some, None, Option};
|
||||
use result::{Result, Ok, Err};
|
||||
@ -41,45 +41,53 @@ use super::table::{
|
||||
SafeHash
|
||||
};
|
||||
|
||||
// FIXME(conventions): update capacity management to match other collections (no auto-shrink)
|
||||
|
||||
const INITIAL_LOG2_CAP: uint = 5;
|
||||
pub const INITIAL_CAPACITY: uint = 1 << INITIAL_LOG2_CAP; // 2^5
|
||||
|
||||
/// The default behavior of HashMap implements a load factor of 90.9%.
|
||||
/// This behavior is characterized by the following conditions:
|
||||
/// This behavior is characterized by the following condition:
|
||||
///
|
||||
/// - if size > 0.909 * capacity: grow
|
||||
/// - if size < 0.25 * capacity: shrink (if this won't bring capacity lower
|
||||
/// than the minimum)
|
||||
/// - if size > 0.909 * capacity: grow the map
|
||||
#[deriving(Clone)]
|
||||
struct DefaultResizePolicy {
|
||||
/// Doubled minimal capacity. The capacity must never drop below
|
||||
/// the minimum capacity. (The check happens before the capacity
|
||||
/// is potentially halved.)
|
||||
minimum_capacity2: uint
|
||||
}
|
||||
struct DefaultResizePolicy;
|
||||
|
||||
impl DefaultResizePolicy {
|
||||
fn new(new_capacity: uint) -> DefaultResizePolicy {
|
||||
DefaultResizePolicy {
|
||||
minimum_capacity2: new_capacity << 1
|
||||
}
|
||||
fn new() -> DefaultResizePolicy {
|
||||
DefaultResizePolicy
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn capacity_range(&self, new_size: uint) -> (uint, uint) {
|
||||
// Here, we are rephrasing the logic by specifying the ranges:
|
||||
fn min_capacity(&self, usable_size: uint) -> uint {
|
||||
// Here, we are rephrasing the logic by specifying the lower limit
|
||||
// on capacity:
|
||||
//
|
||||
// - if `size * 1.1 < cap < size * 4`: don't resize
|
||||
// - if `cap < minimum_capacity * 2`: don't shrink
|
||||
// - otherwise, resize accordingly
|
||||
((new_size * 11) / 10, max(new_size << 2, self.minimum_capacity2))
|
||||
// - if `cap < size * 1.1`: grow the map
|
||||
usable_size * 11 / 10
|
||||
}
|
||||
|
||||
/// An inverse of `min_capacity`, approximately.
|
||||
#[inline]
|
||||
fn reserve(&mut self, new_capacity: uint) {
|
||||
self.minimum_capacity2 = new_capacity << 1;
|
||||
fn usable_capacity(&self, cap: uint) -> uint {
|
||||
// As the number of entries approaches usable capacity,
|
||||
// min_capacity(size) must be smaller than the internal capacity,
|
||||
// so that the map is not resized:
|
||||
// `min_capacity(usable_capacity(x)) <= x`.
|
||||
// The lef-hand side can only be smaller due to flooring by integer
|
||||
// division.
|
||||
//
|
||||
// This doesn't have to be checked for overflow since allocation size
|
||||
// in bytes will overflow earlier than multiplication by 10.
|
||||
cap * 10 / 11
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resize_policy() {
|
||||
use prelude::*;
|
||||
let rp = DefaultResizePolicy;
|
||||
for n in range(0u, 1000) {
|
||||
assert!(rp.min_capacity(rp.usable_capacity(n)) <= n);
|
||||
assert!(rp.usable_capacity(rp.min_capacity(n)) <= n);
|
||||
}
|
||||
}
|
||||
|
||||
@ -282,7 +290,6 @@ pub struct HashMap<K, V, H = RandomSipHasher> {
|
||||
|
||||
table: RawTable<K, V>,
|
||||
|
||||
// We keep this at the end since it might as well have tail padding.
|
||||
resize_policy: DefaultResizePolicy,
|
||||
}
|
||||
|
||||
@ -529,7 +536,7 @@ impl<K: Eq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
|
||||
pub fn with_hasher(hasher: H) -> HashMap<K, V, H> {
|
||||
HashMap {
|
||||
hasher: hasher,
|
||||
resize_policy: DefaultResizePolicy::new(INITIAL_CAPACITY),
|
||||
resize_policy: DefaultResizePolicy::new(),
|
||||
table: RawTable::new(0),
|
||||
}
|
||||
}
|
||||
@ -554,17 +561,32 @@ impl<K: Eq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn with_capacity_and_hasher(capacity: uint, hasher: H) -> HashMap<K, V, H> {
|
||||
let cap = max(INITIAL_CAPACITY, capacity).next_power_of_two();
|
||||
let resize_policy = DefaultResizePolicy::new();
|
||||
let min_cap = max(INITIAL_CAPACITY, resize_policy.min_capacity(capacity));
|
||||
let internal_cap = min_cap.checked_next_power_of_two().expect("capacity overflow");
|
||||
assert!(internal_cap >= capacity, "capacity overflow");
|
||||
HashMap {
|
||||
hasher: hasher,
|
||||
resize_policy: DefaultResizePolicy::new(cap),
|
||||
table: RawTable::new(cap),
|
||||
resize_policy: resize_policy,
|
||||
table: RawTable::new(internal_cap),
|
||||
}
|
||||
}
|
||||
|
||||
/// The hashtable will never try to shrink below this size. You can use
|
||||
/// this function to reduce reallocations if your hashtable frequently
|
||||
/// grows and shrinks by large amounts.
|
||||
/// Returns the number of elements the map can hold without reallocating.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```
|
||||
/// use std::collections::HashMap;
|
||||
/// let map: HashMap<int, int> = HashMap::with_capacity(100);
|
||||
/// assert!(map.capacity() >= 100);
|
||||
/// ```
|
||||
#[inline]
|
||||
#[unstable = "matches collection reform specification, waiting for dust to settle"]
|
||||
pub fn capacity(&self) -> uint {
|
||||
self.resize_policy.usable_capacity(self.table.capacity())
|
||||
}
|
||||
|
||||
///
|
||||
/// This function has no effect on the operational semantics of the
|
||||
/// hashtable, only on performance.
|
||||
@ -579,8 +601,6 @@ impl<K: Eq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
|
||||
pub fn reserve(&mut self, new_minimum_capacity: uint) {
|
||||
let cap = max(INITIAL_CAPACITY, new_minimum_capacity).next_power_of_two();
|
||||
|
||||
self.resize_policy.reserve(cap);
|
||||
|
||||
if self.table.capacity() < cap {
|
||||
self.resize(cap);
|
||||
}
|
||||
@ -601,12 +621,6 @@ impl<K: Eq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
|
||||
return;
|
||||
}
|
||||
|
||||
if new_capacity < old_table.capacity() {
|
||||
// Shrink the table. Naive algorithm for resizing:
|
||||
for (h, k, v) in old_table.into_iter() {
|
||||
self.insert_hashed_nocheck(h, k, v);
|
||||
}
|
||||
} else {
|
||||
// Grow the table.
|
||||
// Specialization of the other branch.
|
||||
let mut bucket = Bucket::first(&mut old_table);
|
||||
@ -669,26 +683,44 @@ impl<K: Eq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
|
||||
};
|
||||
bucket.next();
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq!(self.table.size(), old_size);
|
||||
}
|
||||
|
||||
/// Performs any necessary resize operations, such that there's space for
|
||||
/// new_size elements.
|
||||
fn make_some_room(&mut self, new_size: uint) {
|
||||
let (grow_at, shrink_at) = self.resize_policy.capacity_range(new_size);
|
||||
let cap = self.table.capacity();
|
||||
/// Shrinks the capacity of the map as much as possible. It will drop
|
||||
/// down as much as possible while maintaining the internal rules
|
||||
/// and possibly leaving some space in accordance with the resize policy.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```
|
||||
/// use std::collections::HashMap;
|
||||
///
|
||||
/// let mut map: HashMap<int, int> = HashMap::with_capacity(100);
|
||||
/// map.insert(1, 2);
|
||||
/// map.insert(3, 4);
|
||||
/// assert!(map.capacity() >= 100);
|
||||
/// map.shrink_to_fit();
|
||||
/// assert!(map.capacity() >= 2);
|
||||
/// ```
|
||||
#[unstable = "matches collection reform specification, waiting for dust to settle"]
|
||||
pub fn shrink_to_fit(&mut self) {
|
||||
let min_capacity = self.resize_policy.min_capacity(self.len());
|
||||
let min_capacity = max(min_capacity.next_power_of_two(), INITIAL_CAPACITY);
|
||||
|
||||
// An invalid value shouldn't make us run out of space.
|
||||
debug_assert!(grow_at >= new_size);
|
||||
debug_assert!(self.len() <= min_capacity);
|
||||
|
||||
if cap <= grow_at {
|
||||
let new_capacity = max(cap << 1, INITIAL_CAPACITY);
|
||||
self.resize(new_capacity);
|
||||
} else if shrink_at <= cap {
|
||||
let new_capacity = cap >> 1;
|
||||
self.resize(new_capacity);
|
||||
if self.table.capacity() != min_capacity {
|
||||
let old_table = replace(&mut self.table, RawTable::new(min_capacity));
|
||||
let old_size = old_table.size();
|
||||
|
||||
// Shrink the table. Naive algorithm for resizing:
|
||||
for (h, k, v) in old_table.into_iter() {
|
||||
self.insert_hashed_nocheck(h, k, v);
|
||||
}
|
||||
|
||||
debug_assert_eq!(self.table.size(), old_size);
|
||||
}
|
||||
}
|
||||
|
||||
@ -775,8 +807,7 @@ impl<K: Eq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
|
||||
return None
|
||||
}
|
||||
|
||||
let potential_new_size = self.table.size() - 1;
|
||||
self.make_some_room(potential_new_size);
|
||||
self.reserve(1);
|
||||
|
||||
match self.search_equiv_mut(k) {
|
||||
Some(bucket) => {
|
||||
@ -907,12 +938,8 @@ impl<K: Eq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
|
||||
|
||||
/// Gets the given key's corresponding entry in the map for in-place manipulation
|
||||
pub fn entry<'a>(&'a mut self, key: K) -> Entry<'a, K, V> {
|
||||
// Gotta resize now, and we don't know which direction, so try both?
|
||||
let size = self.table.size();
|
||||
self.make_some_room(size + 1);
|
||||
if size > 0 {
|
||||
self.make_some_room(size - 1);
|
||||
}
|
||||
// Gotta resize now.
|
||||
self.reserve(1);
|
||||
|
||||
let hash = self.make_hash(&key);
|
||||
search_entry_hashed(&mut self.table, hash, key)
|
||||
@ -964,10 +991,6 @@ impl<K: Eq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
|
||||
/// ```
|
||||
#[unstable = "matches collection reform specification, waiting for dust to settle"]
|
||||
pub fn clear(&mut self) {
|
||||
// Prevent reallocations from happening from now on. Makes it possible
|
||||
// for the map to be reused but has a downside: reserves permanently.
|
||||
self.resize_policy.reserve(self.table.size());
|
||||
|
||||
let cap = self.table.capacity();
|
||||
let mut buckets = Bucket::first(&mut self.table);
|
||||
|
||||
@ -1100,8 +1123,7 @@ impl<K: Eq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
|
||||
#[unstable = "matches collection reform specification, waiting for dust to settle"]
|
||||
pub fn insert(&mut self, k: K, v: V) -> Option<V> {
|
||||
let hash = self.make_hash(&k);
|
||||
let potential_new_size = self.table.size() + 1;
|
||||
self.make_some_room(potential_new_size);
|
||||
self.reserve(1);
|
||||
|
||||
let mut retval = None;
|
||||
self.insert_or_replace_with(hash, k, v, |_, val_ref, val| {
|
||||
@ -1141,9 +1163,6 @@ impl<K: Eq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
|
||||
return None
|
||||
}
|
||||
|
||||
let potential_new_size = self.table.size() - 1;
|
||||
self.make_some_room(potential_new_size);
|
||||
|
||||
self.search_mut(k).map(|bucket| {
|
||||
let (_k, val) = pop_internal(bucket);
|
||||
val
|
||||
@ -1894,7 +1913,7 @@ mod test_map {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resize_policy() {
|
||||
fn test_behavior_resize_policy() {
|
||||
let mut m = HashMap::new();
|
||||
|
||||
assert_eq!(m.len(), 0);
|
||||
@ -1905,7 +1924,7 @@ mod test_map {
|
||||
m.remove(&0);
|
||||
assert!(m.is_empty());
|
||||
let initial_cap = m.table.capacity();
|
||||
m.reserve(initial_cap * 2);
|
||||
m.reserve(initial_cap);
|
||||
let cap = m.table.capacity();
|
||||
|
||||
assert_eq!(cap, initial_cap * 2);
|
||||
@ -1935,15 +1954,55 @@ mod test_map {
|
||||
assert_eq!(m.table.capacity(), new_cap);
|
||||
}
|
||||
// A little more than one quarter full.
|
||||
// Shrinking starts as we remove more elements:
|
||||
m.shrink_to_fit();
|
||||
assert_eq!(m.table.capacity(), cap);
|
||||
// again, a little more than half full
|
||||
for _ in range(0, cap / 2 - 1) {
|
||||
i -= 1;
|
||||
m.remove(&i);
|
||||
}
|
||||
m.shrink_to_fit();
|
||||
|
||||
assert_eq!(m.len(), i);
|
||||
assert!(!m.is_empty());
|
||||
assert_eq!(m.table.capacity(), cap);
|
||||
assert_eq!(m.table.capacity(), initial_cap);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_reserve_shrink_to_fit() {
|
||||
let mut m = HashMap::new();
|
||||
m.insert(0u, 0u);
|
||||
m.remove(&0);
|
||||
assert!(m.capacity() >= m.len());
|
||||
for i in range(0, 128) {
|
||||
m.insert(i, i);
|
||||
}
|
||||
m.reserve(256);
|
||||
|
||||
let usable_cap = m.capacity();
|
||||
for i in range(128, 128+256) {
|
||||
m.insert(i, i);
|
||||
assert_eq!(m.capacity(), usable_cap);
|
||||
}
|
||||
|
||||
for i in range(100, 128+256) {
|
||||
assert_eq!(m.remove(&i), Some(i));
|
||||
}
|
||||
m.shrink_to_fit();
|
||||
|
||||
assert_eq!(m.len(), 100);
|
||||
assert!(!m.is_empty());
|
||||
assert!(m.capacity() >= m.len());
|
||||
|
||||
for i in range(0, 100) {
|
||||
assert_eq!(m.remove(&i), Some(i));
|
||||
}
|
||||
m.shrink_to_fit();
|
||||
m.insert(0, 0);
|
||||
|
||||
assert_eq!(m.len(), 1);
|
||||
assert!(m.capacity() >= m.len());
|
||||
assert_eq!(m.remove(&0), Some(0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -25,7 +25,6 @@ use result::{Ok, Err};
|
||||
use super::map::{HashMap, Entries, MoveEntries, INITIAL_CAPACITY};
|
||||
|
||||
// FIXME(conventions): implement BitOr, BitAnd, BitXor, and Sub
|
||||
// FIXME(conventions): update capacity management to match other collections (no auto-shrink)
|
||||
|
||||
|
||||
// Future Optimization (FIXME!)
|
||||
@ -172,7 +171,28 @@ impl<T: Eq + Hash<S>, S, H: Hasher<S>> HashSet<T, H> {
|
||||
HashSet { map: HashMap::with_capacity_and_hasher(capacity, hasher) }
|
||||
}
|
||||
|
||||
/// Reserve space for at least `n` elements in the hash table.
|
||||
/// Returns the number of elements the set can hold without reallocating.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```
|
||||
/// use std::collections::HashSet;
|
||||
/// let set: HashSet<int> = HashSet::with_capacity(100);
|
||||
/// assert!(set.capacity() >= 100);
|
||||
/// ```
|
||||
#[inline]
|
||||
#[unstable = "matches collection reform specification, waiting for dust to settle"]
|
||||
pub fn capacity(&self) -> uint {
|
||||
self.map.capacity()
|
||||
}
|
||||
|
||||
/// Reserves capacity for at least `additional` more elements to be inserted
|
||||
/// in the `HashSet`. The collection may reserve more space to avoid
|
||||
/// frequent reallocations.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if the new allocation size overflows `uint`.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
@ -181,8 +201,30 @@ impl<T: Eq + Hash<S>, S, H: Hasher<S>> HashSet<T, H> {
|
||||
/// let mut set: HashSet<int> = HashSet::new();
|
||||
/// set.reserve(10);
|
||||
/// ```
|
||||
pub fn reserve(&mut self, n: uint) {
|
||||
self.map.reserve(n)
|
||||
#[unstable = "matches collection reform specification, waiting for dust to settle"]
|
||||
pub fn reserve(&mut self, additional: uint) {
|
||||
self.map.reserve(additional)
|
||||
}
|
||||
|
||||
/// Shrinks the capacity of the set as much as possible. It will drop
|
||||
/// down as much as possible while maintaining the internal rules
|
||||
/// and possibly leaving some space in accordance with the resize policy.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```
|
||||
/// use std::collections::HashSet;
|
||||
///
|
||||
/// let mut set: HashSet<int> = HashSet::with_capacity(100);
|
||||
/// set.insert(1);
|
||||
/// set.insert(2);
|
||||
/// assert!(set.capacity() >= 100);
|
||||
/// set.shrink_to_fit();
|
||||
/// assert!(set.capacity() >= 2);
|
||||
/// ```
|
||||
#[unstable = "matches collection reform specification, waiting for dust to settle"]
|
||||
pub fn shrink_to_fit(&mut self) {
|
||||
self.map.shrink_to_fit()
|
||||
}
|
||||
|
||||
/// Deprecated: use `contains` and `BorrowFrom`.
|
||||
|
Loading…
x
Reference in New Issue
Block a user