From b6edc59413f79016a1063c2ec6bc05516bc99cb6 Mon Sep 17 00:00:00 2001 From: Alexis Beingessner Date: Tue, 16 Sep 2014 10:49:26 -0400 Subject: [PATCH] complete btree rewrite Replaces BTree with BTreeMap and BTreeSet, which are completely new implementations. BTreeMap's internal Node representation is particularly inefficient at the moment to make this first implementation easy to reason about and fairly safe. Both collections are also currently missing some of the tooling specific to sorted collections, which is planned as future work pending reform of these APIs. General implementation issues are discussed with TODOs internally Perf results on x86_64 Linux: test treemap::bench::find_rand_100 ... bench: 76 ns/iter (+/- 4) test treemap::bench::find_rand_10_000 ... bench: 163 ns/iter (+/- 6) test treemap::bench::find_seq_100 ... bench: 77 ns/iter (+/- 3) test treemap::bench::find_seq_10_000 ... bench: 115 ns/iter (+/- 1) test treemap::bench::insert_rand_100 ... bench: 111 ns/iter (+/- 1) test treemap::bench::insert_rand_10_000 ... bench: 996 ns/iter (+/- 18) test treemap::bench::insert_seq_100 ... bench: 486 ns/iter (+/- 20) test treemap::bench::insert_seq_10_000 ... bench: 800 ns/iter (+/- 15) test btree::map::bench::find_rand_100 ... bench: 74 ns/iter (+/- 4) test btree::map::bench::find_rand_10_000 ... bench: 153 ns/iter (+/- 5) test btree::map::bench::find_seq_100 ... bench: 82 ns/iter (+/- 1) test btree::map::bench::find_seq_10_000 ... bench: 108 ns/iter (+/- 0) test btree::map::bench::insert_rand_100 ... bench: 220 ns/iter (+/- 1) test btree::map::bench::insert_rand_10_000 ... bench: 620 ns/iter (+/- 16) test btree::map::bench::insert_seq_100 ... bench: 411 ns/iter (+/- 12) test btree::map::bench::insert_seq_10_000 ... bench: 534 ns/iter (+/- 14) BTreeMap still has a lot of room for optimization, but it's already beating out TreeMap on most access patterns. [breaking-change] --- src/libcollections/btree.rs | 919 ----------------------- src/libcollections/btree/map.rs | 1203 ++++++++++++++++++++++++++++++ src/libcollections/btree/mod.rs | 32 + src/libcollections/btree/node.rs | 552 ++++++++++++++ src/libcollections/btree/set.rs | 433 +++++++++++ src/libcollections/lib.rs | 2 +- src/libstd/collections/mod.rs | 2 +- 7 files changed, 2222 insertions(+), 921 deletions(-) delete mode 100644 src/libcollections/btree.rs create mode 100644 src/libcollections/btree/map.rs create mode 100644 src/libcollections/btree/mod.rs create mode 100644 src/libcollections/btree/node.rs create mode 100644 src/libcollections/btree/set.rs diff --git a/src/libcollections/btree.rs b/src/libcollections/btree.rs deleted file mode 100644 index f6011976b65..00000000000 --- a/src/libcollections/btree.rs +++ /dev/null @@ -1,919 +0,0 @@ -// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. -// - -// NB. this is not deprecated for removal, just deprecating the -// current implementation. If the major pain-points are addressed -// (overuse of by-value self and .clone), this can be removed. -#![deprecated = "the current implementation is extremely inefficient, \ - prefer a HashMap, TreeMap or TrieMap"] -#![allow(deprecated)] - -//! Starting implementation of a B-tree for Rust. -//! Structure inspired by Github user davidhalperin's gist. - -// A B-tree contains a root node (which contains a vector of elements), -// a length (the height of the tree), and lower and upper bounds on the -// number of elements that a given node can contain. - -use core::prelude::*; - -use alloc::boxed::Box; -use core::fmt; -use core::fmt::Show; - -use MutableSeq; -use vec::Vec; - -#[allow(missing_doc)] -pub struct BTree { - root: Node, - len: uint, - lower_bound: uint, - upper_bound: uint -} - -impl BTree { - /// Returns new `BTree` with root node (leaf) and user-supplied lower bound - /// The lower bound applies to every node except the root node. - pub fn new(k: K, v: V, lb: uint) -> BTree { - BTree { - root: Node::new_leaf(vec!(LeafElt::new(k, v))), - len: 1, - lower_bound: lb, - upper_bound: 2 * lb - } - } - - /// Helper function for `clone`: returns new BTree with supplied root node, - /// length, and lower bound. For use when the length is known already. - fn new_with_node_len(n: Node, - length: uint, - lb: uint) -> BTree { - BTree { - root: n, - len: length, - lower_bound: lb, - upper_bound: 2 * lb - } - } -} - -// We would probably want to remove the dependence on the Clone trait in the future. -// It is here as a crutch to ensure values can be passed around through the tree's nodes -// especially during insertions and deletions. -impl BTree { - /// Returns the value of a given key, which may not exist in the tree. - /// Calls the root node's get method. - pub fn get(self, k: K) -> Option { - return self.root.get(k); - } - - /// An insert method that uses the `clone` method for support. - pub fn insert(mut self, k: K, v: V) -> BTree { - let (a, b) = self.root.clone().insert(k, v, self.upper_bound.clone()); - if b { - match a.clone() { - LeafNode(leaf) => { - self.root = Node::new_leaf(leaf.clone().elts); - } - BranchNode(branch) => { - self.root = Node::new_branch(branch.clone().elts, - branch.clone().rightmost_child); - } - } - } - self - } -} - -impl Clone for BTree { - fn clone(&self) -> BTree { - BTree::new_with_node_len(self.root.clone(), self.len, self.lower_bound) - } -} - -impl PartialEq for BTree { - fn eq(&self, other: &BTree) -> bool { - self.root.cmp(&other.root) == Equal - } -} - -impl Eq for BTree {} - -impl PartialOrd for BTree { - fn partial_cmp(&self, other: &BTree) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for BTree { - /// Returns an ordering based on the root nodes of each `BTree`. - fn cmp(&self, other: &BTree) -> Ordering { - self.root.cmp(&other.root) - } -} - -impl fmt::Show for BTree { - /// Returns a string representation of the `BTree`. - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - self.root.fmt(f) - } -} - - -// Node types -// -// A node is either a LeafNode or a BranchNode, which contain either a Leaf or a Branch. -// Branches contain BranchElts, which contain a left child (another node) and a key-value -// pair. Branches also contain the rightmost child of the elements in the array. -// Leaves contain LeafElts, which do not have children. -enum Node { - LeafNode(Leaf), - BranchNode(Branch) -} - - -impl Node { - /// Creates a new leaf node given a vector of elements. - fn new_leaf(vec: Vec>) -> Node { - LeafNode(Leaf::new(vec)) - } - - /// Creates a new branch node given a vector of an elements and a pointer to a rightmost child. - fn new_branch(vec: Vec>, right: Box>) - -> Node { - BranchNode(Branch::new(vec, right)) - } - - /// Determines whether the given Node contains a Branch or a Leaf. - /// Used in testing. - fn is_leaf(&self) -> bool { - match self { - &LeafNode(..) => true, - &BranchNode(..) => false - } - } - - /// A binary search function for Nodes. - /// Calls either the Branch's or the Leaf's bsearch function. - fn bsearch_node(&self, k: K) -> Option { - match self { - &LeafNode(ref leaf) => leaf.bsearch_leaf(k), - &BranchNode(ref branch) => branch.bsearch_branch(k) - } - } -} - -impl Node { - /// Returns the corresponding value to the provided key. - /// `get()` is called in different ways on a branch or a leaf. - fn get(&self, k: K) -> Option { - match *self { - LeafNode(ref leaf) => return leaf.get(k), - BranchNode(ref branch) => return branch.get(k) - } - } - - /// Matches on the `Node`, then performs and returns the appropriate insert method. - fn insert(self, k: K, v: V, ub: uint) -> (Node, bool) { - match self { - LeafNode(leaf) => leaf.insert(k, v, ub), - BranchNode(branch) => branch.insert(k, v, ub) - } - } -} - -impl Clone for Node { - /// Returns a new `Node` based on whether or not it is a branch or a leaf. - fn clone(&self) -> Node { - match *self { - LeafNode(ref leaf) => { - Node::new_leaf(leaf.elts.clone()) - } - BranchNode(ref branch) => { - Node::new_branch(branch.elts.clone(), - branch.rightmost_child.clone()) - } - } - } -} - -impl PartialEq for Node { - fn eq(&self, other: &Node) -> bool { - match *self{ - BranchNode(ref branch) => { - if other.is_leaf() { - return false; - } - match *other { - BranchNode(ref branch2) => branch.cmp(branch2) == Equal, - LeafNode(..) => false - } - } - LeafNode(ref leaf) => { - match *other { - LeafNode(ref leaf2) => leaf.cmp(leaf2) == Equal, - BranchNode(..) => false - } - } - } - } -} - -impl Eq for Node {} - -impl PartialOrd for Node { - fn partial_cmp(&self, other: &Node) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for Node { - /// Implementation of `Ord` for `Node`s. - fn cmp(&self, other: &Node) -> Ordering { - match *self { - LeafNode(ref leaf) => { - match *other { - LeafNode(ref leaf2) => leaf.cmp(leaf2), - BranchNode(_) => Less - } - } - BranchNode(ref branch) => { - match *other { - BranchNode(ref branch2) => branch.cmp(branch2), - LeafNode(_) => Greater - } - } - } - } -} - -impl fmt::Show for Node { - /// Returns a string representation of a `Node`. - /// Will iterate over the Node and show `Key: x, value: y, child: ()` - /// for all elements in the `Node`. `child` only exists if the `Node` contains - /// a branch. - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - LeafNode(ref leaf) => leaf.fmt(f), - BranchNode(ref branch) => branch.fmt(f), - } - } -} - - -// A leaf is a vector with elements that contain no children. A leaf also -// does not contain a rightmost child. -struct Leaf { - elts: Vec> -} - -// Vector of values with children, plus a rightmost child (greater than all) -struct Branch { - elts: Vec>, - rightmost_child: Box>, -} - - -impl Leaf { - /// Creates a new `Leaf` from a vector of `LeafElts`. - fn new(vec: Vec>) -> Leaf { - Leaf { - elts: vec - } - } - - /// Searches a leaf for a spot for a new element using a binary search. - /// Returns `None` if the element is already in the vector. - fn bsearch_leaf(&self, k: K) -> Option { - let mut high: uint = self.elts.len(); - let mut low: uint = 0; - let mut midpoint: uint = (high - low) / 2 ; - if midpoint == high { - midpoint = 0; - } - loop { - let order = self.elts[midpoint].key.cmp(&k); - match order { - Equal => { - return None; - } - Greater => { - if midpoint > 0 { - if self.elts[midpoint - 1].key.cmp(&k) == Less { - return Some(midpoint); - } - else { - let tmp = midpoint; - midpoint = midpoint / 2; - high = tmp; - continue; - } - } - else { - return Some(0); - } - } - Less => { - if midpoint + 1 < self.elts.len() { - if self.elts[midpoint + 1].key.cmp(&k) == Greater { - return Some(midpoint); - } - else { - let tmp = midpoint; - midpoint = (high + low) / 2; - low = tmp; - } - } - else { - return Some(self.elts.len()); - } - } - } - } - } -} - - -impl Leaf { - /// Returns the corresponding value to the supplied key. - fn get(&self, k: K) -> Option { - for s in self.elts.iter() { - let order = s.key.cmp(&k); - match order { - Equal => return Some(s.value.clone()), - _ => {} - } - } - return None; - } - - /// Uses `clone()` to facilitate inserting new elements into a tree. - fn insert(mut self, k: K, v: V, ub: uint) -> (Node, bool) { - let to_insert = LeafElt::new(k, v); - let index: Option = self.bsearch_leaf(to_insert.clone().key); - //Check index to see whether we actually inserted the element into the vector. - match index { - //If the index is None, the new element already exists in the vector. - None => { - return (Node::new_leaf(self.clone().elts), false); - } - //If there is an index, insert at that index. - Some(i) => { - if i >= self.elts.len() { - self.elts.push(to_insert.clone()); - } - else { - self.elts.insert(i, to_insert.clone()); - } - } - } - //If we have overfilled the vector (by making its size greater than the - //upper bound), we return a new Branch with one element and two children. - if self.elts.len() > ub { - let midpoint_opt = self.elts.remove(ub / 2); - let midpoint = midpoint_opt.unwrap(); - let (left_leaf, right_leaf) = self.elts.partition(|le| - le.key.cmp(&midpoint.key.clone()) - == Less); - let branch_return = Node::new_branch(vec!(BranchElt::new(midpoint.key.clone(), - midpoint.value.clone(), - box Node::new_leaf(left_leaf))), - box Node::new_leaf(right_leaf)); - return (branch_return, true); - } - (Node::new_leaf(self.elts.clone()), true) - } -} - -impl Clone for Leaf { - /// Returns a new `Leaf` with the same elts. - fn clone(&self) -> Leaf { - Leaf::new(self.elts.clone()) - } -} - -impl PartialEq for Leaf { - fn eq(&self, other: &Leaf) -> bool { - self.elts == other.elts - } -} - -impl Eq for Leaf {} - -impl PartialOrd for Leaf { - fn partial_cmp(&self, other: &Leaf) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for Leaf { - /// Returns an ordering based on the first element of each `Leaf`. - fn cmp(&self, other: &Leaf) -> Ordering { - if self.elts.len() > other.elts.len() { - return Greater; - } - if self.elts.len() < other.elts.len() { - return Less; - } - self.elts[0].cmp(&other.elts[0]) - } -} - - -impl fmt::Show for Leaf { - /// Returns a string representation of a `Leaf`. - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - for (i, s) in self.elts.iter().enumerate() { - if i != 0 { try!(write!(f, " // ")) } - try!(write!(f, "{}", *s)) - } - Ok(()) - } -} - - -impl Branch { - /// Creates a new `Branch` from a vector of `BranchElts` and a rightmost child (a node). - fn new(vec: Vec>, right: Box>) - -> Branch { - Branch { - elts: vec, - rightmost_child: right - } - } - - fn bsearch_branch(&self, k: K) -> Option { - let mut midpoint: uint = self.elts.len() / 2; - let mut high: uint = self.elts.len(); - let mut low: uint = 0u; - if midpoint == high { - midpoint = 0u; - } - loop { - let order = self.elts[midpoint].key.cmp(&k); - match order { - Equal => { - return None; - } - Greater => { - if midpoint > 0 { - if self.elts[midpoint - 1].key.cmp(&k) == Less { - return Some(midpoint); - } - else { - let tmp = midpoint; - midpoint = (midpoint - low) / 2; - high = tmp; - continue; - } - } - else { - return Some(0); - } - } - Less => { - if midpoint + 1 < self.elts.len() { - if self.elts[midpoint + 1].key.cmp(&k) == Greater { - return Some(midpoint); - } - else { - let tmp = midpoint; - midpoint = (high - midpoint) / 2; - low = tmp; - } - } - else { - return Some(self.elts.len()); - } - } - } - } - } -} - -impl Branch { - /// Returns the corresponding value to the supplied key. - /// If the key is not there, find the child that might hold it. - fn get(&self, k: K) -> Option { - for s in self.elts.iter() { - let order = s.key.cmp(&k); - match order { - Less => return s.left.get(k), - Equal => return Some(s.value.clone()), - _ => {} - } - } - self.rightmost_child.get(k) - } - - /// An insert method that uses `.clone()` for support. - fn insert(mut self, k: K, v: V, ub: uint) -> (Node, bool) { - let mut new_branch = Node::new_branch(self.clone().elts, self.clone().rightmost_child); - let mut outcome = false; - let index: Option = new_branch.bsearch_node(k.clone()); - //First, find which path down the tree will lead to the appropriate leaf - //for the key-value pair. - match index.clone() { - None => { - return (Node::new_branch(self.clone().elts, - self.clone().rightmost_child), - outcome); - } - Some(i) => { - if i == self.elts.len() { - let new_outcome = self.clone().rightmost_child.insert(k.clone(), - v.clone(), - ub.clone()); - new_branch = new_outcome.clone().val0(); - outcome = new_outcome.val1(); - } - else { - let new_outcome = self.elts[i].left.clone().insert(k.clone(), - v.clone(), - ub.clone()); - new_branch = new_outcome.clone().val0(); - outcome = new_outcome.val1(); - } - //Check to see whether a branch or a leaf was returned from the - //tree traversal. - match new_branch.clone() { - //If we have a leaf, we do not need to resize the tree, - //so we can return false. - LeafNode(..) => { - if i == self.elts.len() { - self.rightmost_child = box new_branch.clone(); - } - else { - self.elts.get_mut(i).left = box new_branch.clone(); - } - return (Node::new_branch(self.clone().elts, - self.clone().rightmost_child), - true); - } - //If we have a branch, we might need to refactor the tree. - BranchNode(..) => {} - } - } - } - //If we inserted something into the tree, do the following: - if outcome { - match new_branch.clone() { - //If we have a new leaf node, integrate it into the current branch - //and return it, saying we have inserted a new element. - LeafNode(..) => { - if index.unwrap() == self.elts.len() { - self.rightmost_child = box new_branch; - } - else { - self.elts.get_mut(index.unwrap()).left = box new_branch; - } - return (Node::new_branch(self.clone().elts, - self.clone().rightmost_child), - true); - } - //If we have a new branch node, attempt to insert it into the tree - //as with the key-value pair, then check to see if the node is overfull. - BranchNode(branch) => { - let new_elt = branch.elts[0].clone(); - let new_elt_index = self.bsearch_branch(new_elt.clone().key); - match new_elt_index { - None => { - return (Node::new_branch(self.clone().elts, - self.clone().rightmost_child), - false); - } - Some(i) => { - self.elts.insert(i, new_elt); - if i + 1 >= self.elts.len() { - self.rightmost_child = branch.clone().rightmost_child; - } - else { - self.elts.get_mut(i + 1).left = - branch.clone().rightmost_child; - } - } - } - } - } - //If the current node is overfilled, create a new branch with one element - //and two children. - if self.elts.len() > ub { - let midpoint = self.elts.remove(ub / 2).unwrap(); - let (new_left, new_right) = self.clone().elts.partition(|le| - midpoint.key.cmp(&le.key) - == Greater); - new_branch = Node::new_branch( - vec!(BranchElt::new(midpoint.clone().key, - midpoint.clone().value, - box Node::new_branch(new_left, - midpoint.clone().left))), - box Node::new_branch(new_right, self.clone().rightmost_child)); - return (new_branch, true); - } - } - (Node::new_branch(self.elts.clone(), self.rightmost_child.clone()), outcome) - } -} - -impl Clone for Branch { - /// Returns a new branch using the clone methods of the `Branch`'s internal variables. - fn clone(&self) -> Branch { - Branch::new(self.elts.clone(), self.rightmost_child.clone()) - } -} - -impl PartialEq for Branch { - fn eq(&self, other: &Branch) -> bool { - self.elts == other.elts - } -} - -impl Eq for Branch {} - -impl PartialOrd for Branch { - fn partial_cmp(&self, other: &Branch) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for Branch { - /// Compares the first elements of two `Branch`es to determine an - /// `Ordering`. - fn cmp(&self, other: &Branch) -> Ordering { - if self.elts.len() > other.elts.len() { - return Greater; - } - if self.elts.len() < other.elts.len() { - return Less; - } - self.elts[0].cmp(&other.elts[0]) - } -} - -impl fmt::Show for Branch { - /// Returns a string representation of a `Branch`. - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - for (i, s) in self.elts.iter().enumerate() { - if i != 0 { try!(write!(f, " // ")) } - try!(write!(f, "{}", *s)) - } - write!(f, " // rightmost child: ({}) ", *self.rightmost_child) - } -} - -//A LeafElt contains no left child, but a key-value pair. -struct LeafElt { - key: K, - value: V -} - -//A BranchElt has a left child in insertion to a key-value pair. -struct BranchElt { - left: Box>, - key: K, - value: V -} - -impl LeafElt { - /// Creates a new `LeafElt` from a supplied key-value pair. - fn new(k: K, v: V) -> LeafElt { - LeafElt { - key: k, - value: v - } - } -} - -impl Clone for LeafElt { - /// Returns a new `LeafElt` by cloning the key and value. - fn clone(&self) -> LeafElt { - LeafElt::new(self.key.clone(), self.value.clone()) - } -} - -impl PartialEq for LeafElt { - fn eq(&self, other: &LeafElt) -> bool { - self.key == other.key && self.value == other.value - } -} - -impl Eq for LeafElt {} - -impl PartialOrd for LeafElt { - fn partial_cmp(&self, other: &LeafElt) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for LeafElt { - /// Returns an ordering based on the keys of the `LeafElt`s. - fn cmp(&self, other: &LeafElt) -> Ordering { - self.key.cmp(&other.key) - } -} - -impl fmt::Show for LeafElt { - /// Returns a string representation of a `LeafElt`. - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "Key: {}, value: {};", self.key, self.value) - } -} - -impl BranchElt { - /// Creates a new `BranchElt` from a supplied key, value, and left child. - fn new(k: K, v: V, n: Box>) -> BranchElt { - BranchElt { - left: n, - key: k, - value: v - } - } -} - - -impl Clone for BranchElt { - /// Returns a new `BranchElt` by cloning the key, value, and left child. - fn clone(&self) -> BranchElt { - BranchElt::new(self.key.clone(), - self.value.clone(), - self.left.clone()) - } -} - -impl PartialEq for BranchElt{ - fn eq(&self, other: &BranchElt) -> bool { - self.key == other.key && self.value == other.value - } -} - -impl Eq for BranchElt{} - -impl PartialOrd for BranchElt { - fn partial_cmp(&self, other: &BranchElt) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for BranchElt { - /// Fulfills `Ord` for `BranchElts`. - fn cmp(&self, other: &BranchElt) -> Ordering { - self.key.cmp(&other.key) - } -} - -impl fmt::Show for BranchElt { - /// Formats as a string containing the key, value, and child (which should recur to a - /// leaf). Consider changing in future to be more readable. - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "Key: {}, value: {}, (child: {})", - self.key, self.value, *self.left) - } -} - -#[cfg(test)] -mod test_btree { - use std::prelude::*; - - use super::{BTree, Node, LeafElt}; - - use MutableSeq; - - //Tests the functionality of the insert methods (which are unfinished). - #[test] - fn insert_test_one() { - let b = BTree::new(1i, "abc".to_string(), 2); - let is_insert = b.insert(2i, "xyz".to_string()); - assert!(is_insert.root.is_leaf()); - } - - #[test] - fn insert_test_two() { - let leaf_elt_1 = LeafElt::new(1i, "aaa".to_string()); - let leaf_elt_2 = LeafElt::new(2i, "bbb".to_string()); - let leaf_elt_3 = LeafElt::new(3i, "ccc".to_string()); - let n = Node::new_leaf(vec!(leaf_elt_1, leaf_elt_2, leaf_elt_3)); - let b = BTree::new_with_node_len(n, 3, 2); - //println!("{}", b.clone().insert(4, "ddd".to_string()).to_string()); - assert!(b.insert(4, "ddd".to_string()).root.is_leaf()); - } - - #[test] - fn insert_test_three() { - let leaf_elt_1 = LeafElt::new(1i, "aaa".to_string()); - let leaf_elt_2 = LeafElt::new(2i, "bbb".to_string()); - let leaf_elt_3 = LeafElt::new(3i, "ccc".to_string()); - let leaf_elt_4 = LeafElt::new(4i, "ddd".to_string()); - let n = Node::new_leaf(vec!(leaf_elt_1, leaf_elt_2, leaf_elt_3, leaf_elt_4)); - let b = BTree::new_with_node_len(n, 3, 2); - //println!("{}", b.clone().insert(5, "eee".to_string()).to_string()); - assert!(!b.insert(5, "eee".to_string()).root.is_leaf()); - } - - #[test] - fn insert_test_four() { - let leaf_elt_1 = LeafElt::new(1i, "aaa".to_string()); - let leaf_elt_2 = LeafElt::new(2i, "bbb".to_string()); - let leaf_elt_3 = LeafElt::new(3i, "ccc".to_string()); - let leaf_elt_4 = LeafElt::new(4i, "ddd".to_string()); - let n = Node::new_leaf(vec!(leaf_elt_1, leaf_elt_2, leaf_elt_3, leaf_elt_4)); - let mut b = BTree::new_with_node_len(n, 3, 2); - b = b.clone().insert(5, "eee".to_string()); - b = b.clone().insert(6, "fff".to_string()); - b = b.clone().insert(7, "ggg".to_string()); - b = b.clone().insert(8, "hhh".to_string()); - b = b.clone().insert(0, "omg".to_string()); - //println!("{}", b.clone().to_string()); - assert!(!b.root.is_leaf()); - } - - #[test] - fn bsearch_test_one() { - let b = BTree::new(1i, "abc".to_string(), 2u); - assert_eq!(Some(1), b.root.bsearch_node(2)); - } - - #[test] - fn bsearch_test_two() { - let b = BTree::new(1i, "abc".to_string(), 2u); - assert_eq!(Some(0), b.root.bsearch_node(0)); - } - - #[test] - fn bsearch_test_three() { - let leaf_elt_1 = LeafElt::new(1i, "aaa".to_string()); - let leaf_elt_2 = LeafElt::new(2i, "bbb".to_string()); - let leaf_elt_3 = LeafElt::new(4i, "ccc".to_string()); - let leaf_elt_4 = LeafElt::new(5i, "ddd".to_string()); - let n = Node::new_leaf(vec!(leaf_elt_1, leaf_elt_2, leaf_elt_3, leaf_elt_4)); - let b = BTree::new_with_node_len(n, 3, 2); - assert_eq!(Some(2), b.root.bsearch_node(3)); - } - - #[test] - fn bsearch_test_four() { - let leaf_elt_1 = LeafElt::new(1i, "aaa".to_string()); - let leaf_elt_2 = LeafElt::new(2i, "bbb".to_string()); - let leaf_elt_3 = LeafElt::new(4i, "ccc".to_string()); - let leaf_elt_4 = LeafElt::new(5i, "ddd".to_string()); - let n = Node::new_leaf(vec!(leaf_elt_1, leaf_elt_2, leaf_elt_3, leaf_elt_4)); - let b = BTree::new_with_node_len(n, 3, 2); - assert_eq!(Some(4), b.root.bsearch_node(800)); - } - - //Tests the functionality of the get method. - #[test] - fn get_test() { - let b = BTree::new(1i, "abc".to_string(), 2); - let val = b.get(1); - assert_eq!(val, Some("abc".to_string())); - } - - //Tests the BTree's clone() method. - #[test] - fn btree_clone_test() { - let b = BTree::new(1i, "abc".to_string(), 2); - let b2 = b.clone(); - assert!(b.root == b2.root) - } - - //Tests the BTree's cmp() method when one node is "less than" another. - #[test] - fn btree_cmp_test_less() { - let b = BTree::new(1i, "abc".to_string(), 2); - let b2 = BTree::new(2i, "bcd".to_string(), 2); - assert!(&b.cmp(&b2) == &Less) - } - - //Tests the BTree's cmp() method when two nodes are equal. - #[test] - fn btree_cmp_test_eq() { - let b = BTree::new(1i, "abc".to_string(), 2); - let b2 = BTree::new(1i, "bcd".to_string(), 2); - assert!(&b.cmp(&b2) == &Equal) - } - - //Tests the BTree's cmp() method when one node is "greater than" another. - #[test] - fn btree_cmp_test_greater() { - let b = BTree::new(1i, "abc".to_string(), 2); - let b2 = BTree::new(2i, "bcd".to_string(), 2); - assert!(&b2.cmp(&b) == &Greater) - } - - //Tests the BTree's to_string() method. - #[test] - fn btree_tostr_test() { - let b = BTree::new(1i, "abc".to_string(), 2); - assert_eq!(b.to_string(), "Key: 1, value: abc;".to_string()) - } - -} diff --git a/src/libcollections/btree/map.rs b/src/libcollections/btree/map.rs new file mode 100644 index 00000000000..b0ba2254621 --- /dev/null +++ b/src/libcollections/btree/map.rs @@ -0,0 +1,1203 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// This implementation is largely based on the high-level description and analysis of B-Trees +// found in *Open Data Structures* (ODS). Although our implementation does not use any of +// the source found in ODS, if one wishes to review the high-level design of this structure, it +// can be freely downloaded at http://opendatastructures.org/. Its contents are as of this +// writing (August 2014) freely licensed under the following Creative Commons Attribution +// License: [CC BY 2.5 CA](http://creativecommons.org/licenses/by/2.5/ca/). + +use core::prelude::*; + +use super::node::*; +use std::hash::{Writer, Hash}; +use core::default::Default; +use core::{iter, fmt, mem}; +use core::fmt::Show; + +use {Deque, Map, MutableMap, Mutable, MutableSeq}; +use ringbuf::RingBuf; + + + +/// A map based on a B-Tree. +#[deriving(Clone)] +pub struct BTreeMap { + root: Node, + length: uint, + depth: uint, + b: uint, +} + +/// An abstract base over-which all other BTree iterators are built. +struct AbsEntries { + lca: T, + left: RingBuf, + right: RingBuf, + size: uint, +} + +/// An iterator over a BTreeMap's entries. +pub struct Entries<'a, K, V> { + inner: AbsEntries> +} + +/// A mutable iterator over a BTreeMap's entries. +pub struct MutEntries<'a, K, V> { + inner: AbsEntries> +} + +/// An owning iterator over a BTreeMap's entries. +pub struct MoveEntries { + inner: AbsEntries> +} + +/// An iterator over a BTreeMap's keys. +pub type Keys<'a, K, V> = iter::Map<'static, (&'a K, &'a V), &'a K, Entries<'a, K, V>>; + +/// An iterator over a BTreeMap's values. +pub type Values<'a, K, V> = iter::Map<'static, (&'a K, &'a V), &'a V, Entries<'a, K, V>>; + +/// A view into a single entry in a map, which may either be vacant or occupied. +pub enum Entry<'a, K:'a, V:'a> { + /// A vacant Entry + Vacant(VacantEntry<'a, K, V>), + /// An occupied Entry + Occupied(OccupiedEntry<'a, K, V>), +} + +/// A vacant Entry. +pub struct VacantEntry<'a, K:'a, V:'a> { + key: K, + stack: stack::SearchStack<'a, K, V>, +} + +/// An occupied Entry. +pub struct OccupiedEntry<'a, K:'a, V:'a> { + stack: stack::SearchStack<'a, K, V>, +} + +impl BTreeMap { + /// Makes a new empty BTreeMap with a reasonable choice for B. + pub fn new() -> BTreeMap { + //FIXME(Gankro): Tune this as a function of size_of? + BTreeMap::with_b(6) + } + + /// Makes a new empty BTreeMap with the given B. + pub fn with_b(b: uint) -> BTreeMap { + assert!(b > 1, "B must be greater than 1"); + BTreeMap { + length: 0, + depth: 1, + root: Node::make_leaf_root(b), + b: b, + } + } +} + +impl Map for BTreeMap { + // Searching in a B-Tree is pretty straightforward. + // + // Start at the root. Try to find the key in the current node. If we find it, return it. + // If it's not in there, follow the edge *before* the smallest key larger than + // the search key. If no such key exists (they're *all* smaller), then just take the last + // edge in the node. If we're in a leaf and we don't find our key, then it's not + // in the tree. + fn find(&self, key: &K) -> Option<&V> { + let mut cur_node = &self.root; + loop { + match cur_node.search(key) { + Found(i) => return cur_node.val(i), + GoDown(i) => match cur_node.edge(i) { + None => return None, + Some(next_node) => { + cur_node = next_node; + continue; + } + } + } + } + } +} + +impl MutableMap for BTreeMap { + // See `find` for implementation notes, this is basically a copy-paste with mut's added + fn find_mut(&mut self, key: &K) -> Option<&mut V> { + // temp_node is a Borrowck hack for having a mutable value outlive a loop iteration + let mut temp_node = &mut self.root; + loop { + let cur_node = temp_node; + match cur_node.search(key) { + Found(i) => return cur_node.val_mut(i), + GoDown(i) => match cur_node.edge_mut(i) { + None => return None, + Some(next_node) => { + temp_node = next_node; + continue; + } + } + } + } + } + + // Insertion in a B-Tree is a bit complicated. + // + // First we do the same kind of search described in `find`. But we need to maintain a stack of + // all the nodes/edges in our search path. If we find a match for the key we're trying to + // insert, just swap the vals and return the old ones. However, when we bottom out in a leaf, + // we attempt to insert our key-value pair at the same location we would want to follow another + // edge. + // + // If the node has room, then this is done in the obvious way by shifting elements. However, + // if the node itself is full, we split node into two, and give its median key-value + // pair to its parent to insert the new node with. Of course, the parent may also be + // full, and insertion can propagate until we reach the root. If we reach the root, and + // it is *also* full, then we split the root and place the two nodes under a newly made root. + // + // Note that we subtly deviate from Open Data Structures in our implementation of split. + // ODS describes inserting into the node *regardless* of its capacity, and then + // splitting *afterwards* if it happens to be overfull. However, this is inefficient. + // Instead, we split beforehand, and then insert the key-value pair into the appropriate + // result node. This has two consequences: + // + // 1) While ODS produces a left node of size B-1, and a right node of size B, + // we may potentially reverse this. However, this shouldn't effect the analysis. + // + // 2) While ODS may potentially return the pair we *just* inserted after + // the split, we will never do this. Again, this shouldn't effect the analysis. + + fn swap(&mut self, key: K, mut value: V) -> Option { + // This is a stack of rawptrs to nodes paired with indices, respectively + // representing the nodes and edges of our search path. We have to store rawptrs + // because as far as Rust is concerned, we can mutate aliased data with such a + // stack. It is of course correct, but what it doesn't know is that we will only + // be popping and using these ptrs one at a time in child-to-parent order. The alternative + // to doing this is to take the Nodes from their parents. This actually makes + // borrowck *really* happy and everything is pretty smooth. However, this creates + // *tons* of pointless writes, and requires us to always walk all the way back to + // the root after an insertion, even if we only needed to change a leaf. Therefore, + // we accept this potential unsafety and complexity in the name of performance. + // + // Regardless, the actual dangerous logic is completely abstracted away from BTreeMap + // by the stack module. All it can do is immutably read nodes, and ask the search stack + // to proceed down some edge by index. This makes the search logic we'll be reusing in a + // few different methods much neater, and of course drastically improves safety. + let mut stack = stack::PartialSearchStack::new(self); + + loop { + // Same basic logic as found in `find`, but with PartialSearchStack mediating the + // actual nodes for us + match stack.next().search(&key) { + Found(i) => unsafe { + // Perfect match, swap the values and return the old one + let next = stack.into_next(); + mem::swap(next.unsafe_val_mut(i), &mut value); + return Some(value); + }, + GoDown(i) => { + // We need to keep searching, try to get the search stack + // to go down further + stack = match stack.push(i) { + stack::Done(new_stack) => { + // We've reached a leaf, perform the insertion here + new_stack.insert(key, value); + return None; + } + stack::Grew(new_stack) => { + // We've found the subtree to insert this key/value pair in, + // keep searching + new_stack + } + }; + } + } + } + } + + // Deletion is the most complicated operation for a B-Tree. + // + // First we do the same kind of search described in + // `find`. But we need to maintain a stack of all the nodes/edges in our search path. + // If we don't find the key, then we just return `None` and do nothing. If we do find the + // key, we perform two operations: remove the item, and then possibly handle underflow. + // + // # removing the item + // If the node is a leaf, we just remove the item, and shift + // any items after it back to fill the hole. + // + // If the node is an internal node, we *swap* the item with the smallest item in + // in its right subtree (which must reside in a leaf), and then revert to the leaf + // case + // + // # handling underflow + // After removing an item, there may be too few items in the node. We want nodes + // to be mostly full for efficiency, although we make an exception for the root, which + // may have as few as one item. If this is the case, we may first try to steal + // an item from our left or right neighbour. + // + // To steal from the left (right) neighbour, + // we take the largest (smallest) item and child from it. We then swap the taken item + // with the item in their mutual parent that separates them, and then insert the + // parent's item and the taken child into the first (last) index of the underflowed node. + // + // However, stealing has the possibility of underflowing our neighbour. If this is the + // case, we instead *merge* with our neighbour. This of course reduces the number of + // children in the parent. Therefore, we also steal the item that separates the now + // merged nodes, and insert it into the merged node. + // + // Merging may cause the parent to underflow. If this is the case, then we must repeat + // the underflow handling process on the parent. If merging merges the last two children + // of the root, then we replace the root with the merged node. + + fn pop(&mut self, key: &K) -> Option { + // See `swap` for a more thorough description of the stuff going on in here + let mut stack = stack::PartialSearchStack::new(self); + loop { + match stack.next().search(key) { + Found(i) => { + // Perfect match. Terminate the stack here, and remove the entry + return Some(stack.seal(i).remove()); + }, + GoDown(i) => { + // We need to keep searching, try to go down the next edge + stack = match stack.push(i) { + stack::Done(_) => return None, // We're at a leaf; the key isn't in here + stack::Grew(new_stack) => { + new_stack + } + }; + } + } + } + } +} + +/// The stack module provides a safe interface for constructing and manipulating a stack of ptrs +/// to nodes. By using this module much better safety guarantees can be made, and more search +/// boilerplate gets cut out. +mod stack { + use core::prelude::*; + use super::BTreeMap; + use super::super::node::*; + use {MutableMap, MutableSeq}; + use vec::Vec; + + type StackItem = (*mut Node, uint); + type Stack = Vec>; + + /// A PartialSearchStack handles the construction of a search stack. + pub struct PartialSearchStack<'a, K:'a, V:'a> { + map: &'a mut BTreeMap, + stack: Stack, + next: *mut Node, + } + + /// A SearchStack represents a full path to an element of interest. It provides methods + /// for manipulating the element at the top of its stack. + pub struct SearchStack<'a, K:'a, V:'a> { + map: &'a mut BTreeMap, + stack: Stack, + top: StackItem, + } + + /// The result of asking a PartialSearchStack to push another node onto itself. Either it + /// Grew, in which case it's still Partial, or it found its last node was actually a leaf, in + /// which case it seals itself and yields a complete SearchStack. + pub enum PushResult<'a, K:'a, V:'a> { + Grew(PartialSearchStack<'a, K, V>), + Done(SearchStack<'a, K, V>), + } + + impl<'a, K, V> PartialSearchStack<'a, K, V> { + /// Creates a new PartialSearchStack from a BTreeMap by initializing the stack with the + /// root of the tree. + pub fn new<'a>(map: &'a mut BTreeMap) -> PartialSearchStack<'a, K, V> { + let depth = map.depth; + + PartialSearchStack { + next: &mut map.root as *mut _, + map: map, + stack: Vec::with_capacity(depth), + } + } + + /// Pushes the requested child of the stack's current top on top of the stack. If the child + /// exists, then a new PartialSearchStack is yielded. Otherwise, a full SearchStack is + /// yielded. + pub fn push(self, edge: uint) -> PushResult<'a, K, V> { + let map = self.map; + let mut stack = self.stack; + let next_ptr = self.next; + let next_node = unsafe { + &mut *next_ptr + }; + let to_insert = (next_ptr, edge); + match next_node.edge_mut(edge) { + None => Done(SearchStack { + map: map, + stack: stack, + top: to_insert, + }), + Some(node) => { + stack.push(to_insert); + Grew(PartialSearchStack { + map: map, + stack: stack, + next: node as *mut _, + }) + }, + } + } + + /// Converts the stack into a mutable reference to its top. + pub fn into_next(self) -> &'a mut Node { + unsafe { + &mut *self.next + } + } + + /// Gets the top of the stack. + pub fn next(&self) -> &Node { + unsafe { + &*self.next + } + } + + /// Converts the PartialSearchStack into a SearchStack. + pub fn seal(self, index: uint) -> SearchStack<'a, K, V> { + SearchStack { + map: self.map, + stack: self.stack, + top: (self.next as *mut _, index), + } + } + } + + impl<'a, K, V> SearchStack<'a, K, V> { + /// Gets a reference to the value the stack points to. + pub fn peek(&self) -> &V { + let (node_ptr, index) = self.top; + unsafe { + (*node_ptr).val(index).unwrap() + } + } + + /// Gets a mutable reference to the value the stack points to. + pub fn peek_mut(&mut self) -> &mut V { + let (node_ptr, index) = self.top; + unsafe { + (*node_ptr).val_mut(index).unwrap() + } + } + + /// Converts the stack into a mutable reference to the value it points to, with a lifetime + /// tied to the original tree. + pub fn into_top(self) -> &'a mut V { + let (node_ptr, index) = self.top; + unsafe { + (*node_ptr).val_mut(index).unwrap() + } + } + + /// Inserts the key and value into the top element in the stack, and if that node has to + /// split recursively inserts the split contents into the next element stack until + /// splits stop. + /// + /// Assumes that the stack represents a search path from the root to a leaf. + /// + /// An &mut V is returned to the inserted value, for callers that want a reference to this. + pub fn insert(self, key: K, val: V) -> &'a mut V { + unsafe { + let map = self.map; + map.length += 1; + + let mut stack = self.stack; + // Insert the key and value into the leaf at the top of the stack + let (node, index) = self.top; + let (mut insertion, inserted_ptr) = { + (*node).insert_as_leaf(index, key, val) + }; + + loop { + match insertion { + Fit => { + // The last insertion went off without a hitch, no splits! We can stop + // inserting now. + return &mut *inserted_ptr; + } + Split(key, val, right) => match stack.pop() { + // The last insertion triggered a split, so get the next element on the + // stack to recursively insert the split node into. + None => { + // The stack was empty; we've split the root, and need to make a + // a new one. This is done in-place because we can't move the + // root out of a reference to the tree. + Node::make_internal_root(&mut map.root, map.b, key, val, right); + + map.depth += 1; + return &mut *inserted_ptr; + } + Some((node, index)) => { + // The stack wasn't empty, do the insertion and recurse + insertion = (*node).insert_as_internal(index, key, val, right); + continue; + } + } + } + } + } + } + + /// Removes the key and value in the top element of the stack, then handles underflows as + /// described in BTree's pop function. + pub fn remove(mut self) -> V { + // Ensure that the search stack goes to a leaf. This is necessary to perform deletion + // in a BTree. Note that this may put the tree in an inconsistent state (further + // described in leafify's comments), but this is immediately fixed by the + // removing the value we want to remove + self.leafify(); + + let map = self.map; + map.length -= 1; + + let mut stack = self.stack; + + // Remove the key-value pair from the leaf that this search stack points to. + // Then, note if the leaf is underfull, and promptly forget the leaf and its ptr + // to avoid ownership issues. + let (value, mut underflow) = unsafe { + let (leaf_ptr, index) = self.top; + let leaf = &mut *leaf_ptr; + let (_key, value) = leaf.remove_as_leaf(index); + let underflow = leaf.is_underfull(); + (value, underflow) + }; + + loop { + match stack.pop() { + None => { + // We've reached the root, so no matter what, we're done. We manually + // access the root via the tree itself to avoid creating any dangling + // pointers. + if map.root.len() == 0 && !map.root.is_leaf() { + // We've emptied out the root, so make its only child the new root. + // If it's a leaf, we just let it become empty. + map.depth -= 1; + map.root = map.root.pop_edge().unwrap(); + } + return value; + } + Some((parent_ptr, index)) => { + if underflow { + // Underflow! Handle it! + unsafe { + let parent = &mut *parent_ptr; + parent.handle_underflow(index); + underflow = parent.is_underfull(); + } + } else { + // All done! + return value; + } + } + } + } + } + + /// Subroutine for removal. Takes a search stack for a key that might terminate at an + /// internal node, and mutates the tree and search stack to *make* it a search stack + /// for that same key that *does* terminates at a leaf. If the mutation occurs, then this + /// leaves the tree in an inconsistent state that must be repaired by the caller by + /// removing the entry in question. Specifically the key-value pair and its successor will + /// become swapped. + fn leafify(&mut self) { + unsafe { + let (node_ptr, index) = self.top; + // First, get ptrs to the found key-value pair + let node = &mut *node_ptr; + let (key_ptr, val_ptr) = { + (node.unsafe_key_mut(index) as *mut _, + node.unsafe_val_mut(index) as *mut _) + }; + + // Try to go into the right subtree of the found key to find its successor + match node.edge_mut(index + 1) { + None => { + // We're a proper leaf stack, nothing to do + } + Some(mut temp_node) => { + //We're not a proper leaf stack, let's get to work. + self.stack.push((node_ptr, index + 1)); + loop { + // Walk into the smallest subtree of this node + let node = temp_node; + let node_ptr = node as *mut _; + + if node.is_leaf() { + // This node is a leaf, do the swap and return + self.top = (node_ptr, 0); + node.unsafe_swap(0, &mut *key_ptr, &mut *val_ptr); + break; + } else { + // This node is internal, go deeper + self.stack.push((node_ptr, 0)); + temp_node = node.unsafe_edge_mut(0); + } + } + } + } + } + } + } +} + +impl Collection for BTreeMap { + fn len(&self) -> uint { + self.length + } +} + +impl Mutable for BTreeMap { + fn clear(&mut self) { + let b = self.b; + // avoid recursive destructors by manually traversing the tree + for _ in mem::replace(self, BTreeMap::with_b(b)).into_iter() {}; + } +} + +impl FromIterator<(K, V)> for BTreeMap { + fn from_iter>(iter: T) -> BTreeMap { + let mut map = BTreeMap::new(); + map.extend(iter); + map + } +} + +impl Extendable<(K, V)> for BTreeMap { + #[inline] + fn extend>(&mut self, mut iter: T) { + for (k, v) in iter { + self.insert(k, v); + } + } +} + +impl, V: Hash> Hash for BTreeMap { + fn hash(&self, state: &mut S) { + for elt in self.iter() { + elt.hash(state); + } + } +} + +impl Default for BTreeMap { + fn default() -> BTreeMap { + BTreeMap::new() + } +} + +impl PartialEq for BTreeMap { + fn eq(&self, other: &BTreeMap) -> bool { + self.len() == other.len() && + self.iter().zip(other.iter()).all(|(a, b)| a == b) + } +} + +impl Eq for BTreeMap {} + +impl PartialOrd for BTreeMap { + #[inline] + fn partial_cmp(&self, other: &BTreeMap) -> Option { + iter::order::partial_cmp(self.iter(), other.iter()) + } +} + +impl Ord for BTreeMap { + #[inline] + fn cmp(&self, other: &BTreeMap) -> Ordering { + iter::order::cmp(self.iter(), other.iter()) + } +} + +impl Show for BTreeMap { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + try!(write!(f, "{{")); + + for (i, (k, v)) in self.iter().enumerate() { + if i != 0 { try!(write!(f, ", ")); } + try!(write!(f, "{}: {}", *k, *v)); + } + + write!(f, "}}") + } +} + +impl Index for BTreeMap { + fn index(&self, key: &K) -> &V { + self.find(key).expect("no entry found for key") + } +} + +/// Genericises over how to get the correct type of iterator from the correct type +/// of Node ownership. +trait Traverse { + fn traverse(node: N) -> Self; +} + +impl<'a, K, V> Traverse<&'a Node> for Traversal<'a, K, V> { + fn traverse(node: &'a Node) -> Traversal<'a, K, V> { + node.iter() + } +} + +impl<'a, K, V> Traverse<&'a mut Node> for MutTraversal<'a, K, V> { + fn traverse(node: &'a mut Node) -> MutTraversal<'a, K, V> { + node.iter_mut() + } +} + +impl Traverse> for MoveTraversal { + fn traverse(node: Node) -> MoveTraversal { + node.into_iter() + } +} + +/// Represents an operation to perform inside the following iterator methods. +/// This is necessary to use in `next` because we want to modify self.left inside +/// a match that borrows it. Similarly, in `next_back` for self.right. Instead, we use this +/// enum to note what we want to do, and do it after the match. +enum StackOp { + Push(T), + Pop, +} + +impl + DoubleEndedIterator>> + Iterator<(K, V)> for AbsEntries { + // This function is pretty long, but only because there's a lot of cases to consider. + // Our iterator represents two search paths, left and right, to the smallest and largest + // elements we have yet to yield. lca represents the least common ancestor of these two paths, + // above-which we never walk, since everything outside it has already been consumed (or was + // never in the range to iterate). + // + // Note that the design of these iterators permits an *arbitrary* initial pair of min and max, + // making these arbitrary sub-range iterators. However the logic to construct these paths + // efficiently is fairly involved, so this is a FIXME. The sub-range iterators also wouldn't be + // able to accurately predict size, so those iterators can't implement ExactSize. + fn next(&mut self) -> Option<(K, V)> { + loop { + // We want the smallest element, so try to get the top of the left stack + let op = match self.left.back_mut() { + // The left stack is empty, so try to get the next element of the two paths + // LCAs (the left search path is currently a subpath of the right one) + None => match self.lca.next() { + // The lca has been exhausted, walk further down the right path + None => match self.right.pop_front() { + // The right path is exhausted, so we're done + None => return None, + // The right path had something, make that the new LCA + // and restart the whole process + Some(right) => { + self.lca = right; + continue; + } + }, + // The lca yielded an edge, make that the new head of the left path + Some(Edge(next)) => Push(Traverse::traverse(next)), + // The lca yielded an entry, so yield that + Some(Elem(k, v)) => { + self.size -= 1; + return Some((k, v)) + } + }, + // The left stack wasn't empty, so continue along the node in its head + Some(iter) => match iter.next() { + // The head of the left path is empty, so Pop it off and restart the process + None => Pop, + // The head of the left path yielded an edge, so make that the new head + // of the left path + Some(Edge(next)) => Push(Traverse::traverse(next)), + // The head of the left path yielded entry, so yield that + Some(Elem(k, v)) => { + self.size -= 1; + return Some((k, v)) + } + } + }; + + // Handle any operation on the left stack as necessary + match op { + Push(item) => { self.left.push(item); }, + Pop => { self.left.pop(); }, + } + } + } + + fn size_hint(&self) -> (uint, Option) { + (self.size, Some(self.size)) + } +} + +impl + DoubleEndedIterator>> + DoubleEndedIterator<(K, V)> for AbsEntries { + // next_back is totally symmetric to next + fn next_back(&mut self) -> Option<(K, V)> { + loop { + let op = match self.right.back_mut() { + None => match self.lca.next_back() { + None => match self.left.pop_front() { + None => return None, + Some(left) => { + self.lca = left; + continue; + } + }, + Some(Edge(next)) => Push(Traverse::traverse(next)), + Some(Elem(k, v)) => { + self.size -= 1; + return Some((k, v)) + } + }, + Some(iter) => match iter.next_back() { + None => Pop, + Some(Edge(next)) => Push(Traverse::traverse(next)), + Some(Elem(k, v)) => { + self.size -= 1; + return Some((k, v)) + } + } + }; + + match op { + Push(item) => { self.right.push(item); }, + Pop => { self.right.pop(); } + } + } + } +} + +impl<'a, K, V> Iterator<(&'a K, &'a V)> for Entries<'a, K, V> { + fn next(&mut self) -> Option<(&'a K, &'a V)> { self.inner.next() } + fn size_hint(&self) -> (uint, Option) { self.inner.size_hint() } +} +impl<'a, K, V> DoubleEndedIterator<(&'a K, &'a V)> for Entries<'a, K, V> { + fn next_back(&mut self) -> Option<(&'a K, &'a V)> { self.inner.next_back() } +} +impl<'a, K, V> ExactSize<(&'a K, &'a V)> for Entries<'a, K, V> {} + + +impl<'a, K, V> Iterator<(&'a K, &'a mut V)> for MutEntries<'a, K, V> { + fn next(&mut self) -> Option<(&'a K, &'a mut V)> { self.inner.next() } + fn size_hint(&self) -> (uint, Option) { self.inner.size_hint() } +} +impl<'a, K, V> DoubleEndedIterator<(&'a K, &'a mut V)> for MutEntries<'a, K, V> { + fn next_back(&mut self) -> Option<(&'a K, &'a mut V)> { self.inner.next_back() } +} +impl<'a, K, V> ExactSize<(&'a K, &'a mut V)> for MutEntries<'a, K, V> {} + + +impl Iterator<(K, V)> for MoveEntries { + fn next(&mut self) -> Option<(K, V)> { self.inner.next() } + fn size_hint(&self) -> (uint, Option) { self.inner.size_hint() } +} +impl DoubleEndedIterator<(K, V)> for MoveEntries { + fn next_back(&mut self) -> Option<(K, V)> { self.inner.next_back() } +} +impl ExactSize<(K, V)> for MoveEntries {} + + + +impl<'a, K: Ord, V> VacantEntry<'a, K, V> { + /// Sets the value of the entry with the VacantEntry's key, + /// and returns a mutable reference to it. + pub fn set(self, value: V) -> &'a mut V { + self.stack.insert(self.key, value) + } +} + +impl<'a, K: Ord, V> OccupiedEntry<'a, K, V> { + /// Gets a reference to the value in the entry. + pub fn get(&self) -> &V { + self.stack.peek() + } + + /// Gets a mutable reference to the value in the entry. + pub fn get_mut(&mut self) -> &mut V { + self.stack.peek_mut() + } + + /// Converts the entry into a mutable reference to its value. + pub fn into_mut(self) -> &'a mut V { + self.stack.into_top() + } + + /// Sets the value of the entry with the OccupiedEntry's key, + /// and returns the entry's old value. + pub fn set(&mut self, mut value: V) -> V { + mem::swap(self.stack.peek_mut(), &mut value); + value + } + + /// Takes the value of the entry out of the map, and returns it. + pub fn take(self) -> V { + self.stack.remove() + } +} + +impl BTreeMap { + /// Gets an iterator over the entries of the map. + pub fn iter<'a>(&'a self) -> Entries<'a, K, V> { + let len = self.len(); + Entries { + inner: AbsEntries { + lca: Traverse::traverse(&self.root), + left: RingBuf::new(), + right: RingBuf::new(), + size: len, + } + } + } + + /// Gets a mutable iterator over the entries of the map. + pub fn iter_mut<'a>(&'a mut self) -> MutEntries<'a, K, V> { + let len = self.len(); + MutEntries { + inner: AbsEntries { + lca: Traverse::traverse(&mut self.root), + left: RingBuf::new(), + right: RingBuf::new(), + size: len, + } + } + } + + /// Gets an owning iterator over the entries of the map. + pub fn into_iter(self) -> MoveEntries { + let len = self.len(); + MoveEntries { + inner: AbsEntries { + lca: Traverse::traverse(self.root), + left: RingBuf::new(), + right: RingBuf::new(), + size: len, + } + } + } + + /// Gets an iterator over the keys of the map. + pub fn keys<'a>(&'a self) -> Keys<'a, K, V> { + self.iter().map(|(k, _)| k) + } + + /// Gets an iterator over the values of the map. + pub fn values<'a>(&'a self) -> Values<'a, K, V> { + self.iter().map(|(_, v)| v) + } +} + +impl BTreeMap { + /// Gets the given key's corresponding entry in the map for in-place manipulation. + pub fn entry<'a>(&'a mut self, key: K) -> Entry<'a, K, V> { + // same basic logic of `swap` and `pop`, blended together + let mut stack = stack::PartialSearchStack::new(self); + loop { + match stack.next().search(&key) { + Found(i) => { + // Perfect match + return Occupied(OccupiedEntry { + stack: stack.seal(i) + }); + }, + GoDown(i) => { + stack = match stack.push(i) { + stack::Done(new_stack) => { + // Not in the tree, but we've found where it goes + return Vacant(VacantEntry { + stack: new_stack, + key: key, + }); + } + stack::Grew(new_stack) => { + // We've found the subtree this key must go in + new_stack + } + }; + } + } + } + } +} + + + + + +#[cfg(test)] +mod test { + use std::prelude::*; + + use {Map, MutableMap}; + use super::{BTreeMap, Occupied, Vacant}; + + #[test] + fn test_basic_large() { + let mut map = BTreeMap::new(); + let size = 10000u; + assert_eq!(map.len(), 0); + + for i in range(0, size) { + assert_eq!(map.swap(i, 10*i), None); + assert_eq!(map.len(), i + 1); + } + + for i in range(0, size) { + assert_eq!(map.find(&i).unwrap(), &(i*10)); + } + + for i in range(size, size*2) { + assert_eq!(map.find(&i), None); + } + + for i in range(0, size) { + assert_eq!(map.swap(i, 100*i), Some(10*i)); + assert_eq!(map.len(), size); + } + + for i in range(0, size) { + assert_eq!(map.find(&i).unwrap(), &(i*100)); + } + + for i in range(0, size/2) { + assert_eq!(map.pop(&(i*2)), Some(i*200)); + assert_eq!(map.len(), size - i - 1); + } + + for i in range(0, size/2) { + assert_eq!(map.find(&(2*i)), None); + assert_eq!(map.find(&(2*i+1)).unwrap(), &(i*200 + 100)); + } + + for i in range(0, size/2) { + assert_eq!(map.pop(&(2*i)), None); + assert_eq!(map.pop(&(2*i+1)), Some(i*200 + 100)); + assert_eq!(map.len(), size/2 - i - 1); + } + } + + #[test] + fn test_basic_small() { + let mut map = BTreeMap::new(); + assert_eq!(map.pop(&1), None); + assert_eq!(map.find(&1), None); + assert_eq!(map.swap(1u, 1u), None); + assert_eq!(map.find(&1), Some(&1)); + assert_eq!(map.swap(1, 2), Some(1)); + assert_eq!(map.find(&1), Some(&2)); + assert_eq!(map.swap(2, 4), None); + assert_eq!(map.find(&2), Some(&4)); + assert_eq!(map.pop(&1), Some(2)); + assert_eq!(map.pop(&2), Some(4)); + assert_eq!(map.pop(&1), None); + } + + #[test] + fn test_iter() { + let size = 10000u; + + // Forwards + let mut map: BTreeMap = Vec::from_fn(size, |i| (i, i)).into_iter().collect(); + + { + let mut iter = map.iter(); + for i in range(0, size) { + assert_eq!(iter.size_hint(), (size - i, Some(size - i))); + assert_eq!(iter.next().unwrap(), (&i, &i)); + } + assert_eq!(iter.size_hint(), (0, Some(0))); + assert_eq!(iter.next(), None); + } + + { + let mut iter = map.iter_mut(); + for i in range(0, size) { + assert_eq!(iter.size_hint(), (size - i, Some(size - i))); + assert_eq!(iter.next().unwrap(), (&i, &mut (i + 0))); + } + assert_eq!(iter.size_hint(), (0, Some(0))); + assert_eq!(iter.next(), None); + } + + { + let mut iter = map.into_iter(); + for i in range(0, size) { + assert_eq!(iter.size_hint(), (size - i, Some(size - i))); + assert_eq!(iter.next().unwrap(), (i, i)); + } + assert_eq!(iter.size_hint(), (0, Some(0))); + assert_eq!(iter.next(), None); + } + + } + + #[test] + fn test_iter_rev() { + let size = 10000u; + + // Forwards + let mut map: BTreeMap = Vec::from_fn(size, |i| (i, i)).into_iter().collect(); + + { + let mut iter = map.iter().rev(); + for i in range(0, size) { + assert_eq!(iter.size_hint(), (size - i, Some(size - i))); + assert_eq!(iter.next().unwrap(), (&(size - i - 1), &(size - i - 1))); + } + assert_eq!(iter.size_hint(), (0, Some(0))); + assert_eq!(iter.next(), None); + } + + { + let mut iter = map.iter_mut().rev(); + for i in range(0, size) { + assert_eq!(iter.size_hint(), (size - i, Some(size - i))); + assert_eq!(iter.next().unwrap(), (&(size - i - 1), &mut(size - i - 1))); + } + assert_eq!(iter.size_hint(), (0, Some(0))); + assert_eq!(iter.next(), None); + } + + { + let mut iter = map.into_iter().rev(); + for i in range(0, size) { + assert_eq!(iter.size_hint(), (size - i, Some(size - i))); + assert_eq!(iter.next().unwrap(), (size - i - 1, size - i - 1)); + } + assert_eq!(iter.size_hint(), (0, Some(0))); + assert_eq!(iter.next(), None); + } + + } + + #[test] + fn test_entry(){ + let xs = [(1i, 10i), (2, 20), (3, 30), (4, 40), (5, 50), (6, 60)]; + + let mut map: BTreeMap = xs.iter().map(|&x| x).collect(); + + // Existing key (insert) + match map.entry(1) { + Vacant(_) => unreachable!(), + Occupied(mut view) => { + assert_eq!(view.get(), &10); + assert_eq!(view.set(100), 10); + } + } + assert_eq!(map.find(&1).unwrap(), &100); + assert_eq!(map.len(), 6); + + + // Existing key (update) + match map.entry(2) { + Vacant(_) => unreachable!(), + Occupied(mut view) => { + let v = view.get_mut(); + *v *= 10; + } + } + assert_eq!(map.find(&2).unwrap(), &200); + assert_eq!(map.len(), 6); + + // Existing key (take) + match map.entry(3) { + Vacant(_) => unreachable!(), + Occupied(view) => { + assert_eq!(view.take(), 30); + } + } + assert_eq!(map.find(&3), None); + assert_eq!(map.len(), 5); + + + // Inexistent key (insert) + match map.entry(10) { + Occupied(_) => unreachable!(), + Vacant(view) => { + assert_eq!(*view.set(1000), 1000); + } + } + assert_eq!(map.find(&10).unwrap(), &1000); + assert_eq!(map.len(), 6); + } +} + + + + + + +#[cfg(test)] +mod bench { + use test::Bencher; + + use super::BTreeMap; + use deque::bench::{insert_rand_n, insert_seq_n, find_rand_n, find_seq_n}; + + #[bench] + pub fn insert_rand_100(b: &mut Bencher) { + let mut m : BTreeMap = BTreeMap::new(); + insert_rand_n(100, &mut m, b); + } + + #[bench] + pub fn insert_rand_10_000(b: &mut Bencher) { + let mut m : BTreeMap = BTreeMap::new(); + insert_rand_n(10_000, &mut m, b); + } + + // Insert seq + #[bench] + pub fn insert_seq_100(b: &mut Bencher) { + let mut m : BTreeMap = BTreeMap::new(); + insert_seq_n(100, &mut m, b); + } + + #[bench] + pub fn insert_seq_10_000(b: &mut Bencher) { + let mut m : BTreeMap = BTreeMap::new(); + insert_seq_n(10_000, &mut m, b); + } + + // Find rand + #[bench] + pub fn find_rand_100(b: &mut Bencher) { + let mut m : BTreeMap = BTreeMap::new(); + find_rand_n(100, &mut m, b); + } + + #[bench] + pub fn find_rand_10_000(b: &mut Bencher) { + let mut m : BTreeMap = BTreeMap::new(); + find_rand_n(10_000, &mut m, b); + } + + // Find seq + #[bench] + pub fn find_seq_100(b: &mut Bencher) { + let mut m : BTreeMap = BTreeMap::new(); + find_seq_n(100, &mut m, b); + } + + #[bench] + pub fn find_seq_10_000(b: &mut Bencher) { + let mut m : BTreeMap = BTreeMap::new(); + find_seq_n(10_000, &mut m, b); + } +} diff --git a/src/libcollections/btree/mod.rs b/src/libcollections/btree/mod.rs new file mode 100644 index 00000000000..435a91f217e --- /dev/null +++ b/src/libcollections/btree/mod.rs @@ -0,0 +1,32 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +pub use self::map::BTreeMap; +pub use self::map::Entries; +pub use self::map::MutEntries; +pub use self::map::MoveEntries; +pub use self::map::Keys; +pub use self::map::Values; +pub use self::map::Entry; +pub use self::map::OccupiedEntry; +pub use self::map::VacantEntry; + +pub use self::set::BTreeSet; +pub use self::set::Items; +pub use self::set::MoveItems; +pub use self::set::DifferenceItems; +pub use self::set::UnionItems; +pub use self::set::SymDifferenceItems; +pub use self::set::IntersectionItems; + + +mod node; +mod map; +mod set; diff --git a/src/libcollections/btree/node.rs b/src/libcollections/btree/node.rs new file mode 100644 index 00000000000..e30b29f8767 --- /dev/null +++ b/src/libcollections/btree/node.rs @@ -0,0 +1,552 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// This module represents all the internal representation and logic for a B-Tree's node +// with a safe interface, so that BTreeMap itself does not depend on any of these details. + +use core::prelude::*; + +use core::{slice, mem, ptr}; +use core::iter::Zip; +use MutableSeq; + +use vec; +use vec::Vec; + +/// Represents the result of an Insertion: either the item fit, or the node had to split +pub enum InsertionResult { + /// The inserted element fit + Fit, + /// The inserted element did not fit, so the node was split + Split(K, V, Node), +} + +/// Represents the result of a search for a key in a single node +pub enum SearchResult { + /// The element was found at the given index + Found(uint), + /// The element wasn't found, but if it's anywhere, it must be beyond this edge + GoDown(uint), +} + +/// A B-Tree Node. We keep keys/edges/values separate to optimize searching for keys. +#[deriving(Clone)] +pub struct Node { + // FIXME(Gankro): This representation is super safe and easy to reason about, but painfully + // inefficient. As three Vecs, each node consists of *9* words: (ptr, cap, size) * 3. In + // theory, if we take full control of allocation like HashMap's RawTable does, + // and restrict leaves to max size 256 (not unreasonable for a btree node) we can cut + // this down to just (ptr, cap: u8, size: u8, is_leaf: bool). With generic + // integer arguments, cap can even move into the the type, reducing this just to + // (ptr, size, is_leaf). This could also have cache benefits for very small nodes, as keys + // could bleed into edges and vals. + // + // However doing this would require a fair amount of code to reimplement all + // the Vec logic and iterators. It would also use *way* more unsafe code, which sucks and is + // hard. For now, we accept this cost in the name of correctness and simplicity. + // + // As a compromise, keys and vals could be merged into one Vec<(K, V)>, which would shave + // off 3 words, but possibly hurt our cache effeciency during search, which only cares about + // keys. This would also avoid the Zip we use in our iterator implementations. This is + // probably worth investigating. + // + // Note that this space waste is especially tragic since we store the Nodes by value in their + // parent's edges Vec, so unoccupied spaces in the edges Vec are quite large, and we have + // to shift around a lot more bits during insertion/removal. + + keys: Vec, + edges: Vec>, + vals: Vec, +} + +impl Node { + /// Searches for the given key in the node. If it finds an exact match, + /// `Found` will be yielded with the matching index. If it fails to find an exact match, + /// `GoDown` will be yielded with the index of the subtree the key must lie in. + pub fn search(&self, key: &K) -> SearchResult { + // FIXME(Gankro): Tune when to search linear or binary based on B (and maybe K/V). + // For the B configured as of this writing (B = 6), binary search was *singnificantly* + // worse for uints. + self.search_linear(key) + } + + fn search_linear(&self, key: &K) -> SearchResult { + for (i, k) in self.keys.iter().enumerate() { + match k.cmp(key) { + Less => {}, + Equal => return Found(i), + Greater => return GoDown(i), + } + } + GoDown(self.len()) + } +} + +// Public interface +impl Node { + /// Make a new internal node + pub fn new_internal(capacity: uint) -> Node { + Node { + keys: Vec::with_capacity(capacity), + vals: Vec::with_capacity(capacity), + edges: Vec::with_capacity(capacity + 1), + } + } + + /// Make a new leaf node + pub fn new_leaf(capacity: uint) -> Node { + Node { + keys: Vec::with_capacity(capacity), + vals: Vec::with_capacity(capacity), + edges: Vec::new(), + } + } + + /// Make a leaf root from scratch + pub fn make_leaf_root(b: uint) -> Node { + Node::new_leaf(capacity_from_b(b)) + } + + /// Make an internal root and swap it with an old root + pub fn make_internal_root(left_and_out: &mut Node, b: uint, key: K, value: V, + right: Node) { + let mut node = Node::new_internal(capacity_from_b(b)); + mem::swap(left_and_out, &mut node); + left_and_out.keys.push(key); + left_and_out.vals.push(value); + left_and_out.edges.push(node); + left_and_out.edges.push(right); + } + + + /// How many key-value pairs the node contains + pub fn len(&self) -> uint { + self.keys.len() + } + + /// How many key-value pairs the node can fit + pub fn capacity(&self) -> uint { + self.keys.capacity() + } + + /// If the node has any children + pub fn is_leaf(&self) -> bool { + self.edges.is_empty() + } + + /// if the node has too few elements + pub fn is_underfull(&self) -> bool { + self.len() < min_load_from_capacity(self.capacity()) + } + + /// if the node cannot fit any more elements + pub fn is_full(&self) -> bool { + self.len() == self.capacity() + } + + /// Swap the given key-value pair with the key-value pair stored in the node's index, + /// without checking bounds. + pub unsafe fn unsafe_swap(&mut self, index: uint, key: &mut K, val: &mut V) { + mem::swap(self.keys.as_mut_slice().unsafe_mut(index), key); + mem::swap(self.vals.as_mut_slice().unsafe_mut(index), val); + } + + /// Get the node's key mutably without any bounds checks. + pub unsafe fn unsafe_key_mut(&mut self, index: uint) -> &mut K { + self.keys.as_mut_slice().unsafe_mut(index) + } + + /// Get the node's value at the given index + pub fn val(&self, index: uint) -> Option<&V> { + self.vals.as_slice().get(index) + } + + /// Get the node's value at the given index + pub fn val_mut(&mut self, index: uint) -> Option<&mut V> { + self.vals.as_mut_slice().get_mut(index) + } + + /// Get the node's value mutably without any bounds checks. + pub unsafe fn unsafe_val_mut(&mut self, index: uint) -> &mut V { + self.vals.as_mut_slice().unsafe_mut(index) + } + + /// Get the node's edge at the given index + pub fn edge(&self, index: uint) -> Option<&Node> { + self.edges.as_slice().get(index) + } + + /// Get the node's edge mutably at the given index + pub fn edge_mut(&mut self, index: uint) -> Option<&mut Node> { + self.edges.as_mut_slice().get_mut(index) + } + + /// Get the node's edge mutably without any bounds checks. + pub unsafe fn unsafe_edge_mut(&mut self, index: uint) -> &mut Node { + self.edges.as_mut_slice().unsafe_mut(index) + } + + /// Pop an edge off the end of the node + pub fn pop_edge(&mut self) -> Option> { + self.edges.pop() + } + + /// Try to insert this key-value pair at the given index in this internal node + /// If the node is full, we have to split it. + /// + /// Returns a *mut V to the inserted value, because the caller may want this when + /// they're done mutating the tree, but we don't want to borrow anything for now. + pub fn insert_as_leaf(&mut self, index: uint, key: K, value: V) -> + (InsertionResult, *mut V) { + if !self.is_full() { + // The element can fit, just insert it + self.insert_fit_as_leaf(index, key, value); + (Fit, unsafe { self.unsafe_val_mut(index) as *mut _ }) + } else { + // The element can't fit, this node is full. Split it into two nodes. + let (new_key, new_val, mut new_right) = self.split(); + let left_len = self.len(); + + let ptr = if index <= left_len { + self.insert_fit_as_leaf(index, key, value); + unsafe { self.unsafe_val_mut(index) as *mut _ } + } else { + new_right.insert_fit_as_leaf(index - left_len - 1, key, value); + unsafe { new_right.unsafe_val_mut(index - left_len - 1) as *mut _ } + }; + + (Split(new_key, new_val, new_right), ptr) + } + } + + /// Try to insert this key-value pair at the given index in this internal node + /// If the node is full, we have to split it. + pub fn insert_as_internal(&mut self, index: uint, key: K, value: V, right: Node) + -> InsertionResult { + if !self.is_full() { + // The element can fit, just insert it + self.insert_fit_as_internal(index, key, value, right); + Fit + } else { + // The element can't fit, this node is full. Split it into two nodes. + let (new_key, new_val, mut new_right) = self.split(); + let left_len = self.len(); + + if index <= left_len { + self.insert_fit_as_internal(index, key, value, right); + } else { + new_right.insert_fit_as_internal(index - left_len - 1, key, value, right); + } + + Split(new_key, new_val, new_right) + } + } + + /// Remove the key-value pair at the given index + pub fn remove_as_leaf(&mut self, index: uint) -> (K, V) { + match (self.keys.remove(index), self.vals.remove(index)) { + (Some(k), Some(v)) => (k, v), + _ => unreachable!(), + } + } + + /// Handle an underflow in this node's child. We favour handling "to the left" because we know + /// we're empty, but our neighbour can be full. Handling to the left means when we choose to + /// steal, we pop off the end of our neighbour (always fast) and "unshift" ourselves + /// (always slow, but at least faster since we know we're half-empty). + /// Handling "to the right" reverses these roles. Of course, we merge whenever possible + /// because we want dense nodes, and merging is about equal work regardless of direction. + pub fn handle_underflow(&mut self, underflowed_child_index: uint) { + assert!(underflowed_child_index <= self.len()); + unsafe { + if underflowed_child_index > 0 { + self.handle_underflow_to_left(underflowed_child_index); + } else { + self.handle_underflow_to_right(underflowed_child_index); + } + } + } + + pub fn iter<'a>(&'a self) -> Traversal<'a, K, V> { + let is_leaf = self.is_leaf(); + Traversal { + elems: self.keys.as_slice().iter().zip(self.vals.as_slice().iter()), + edges: self.edges.as_slice().iter(), + head_is_edge: true, + tail_is_edge: true, + has_edges: !is_leaf, + } + } + + pub fn iter_mut<'a>(&'a mut self) -> MutTraversal<'a, K, V> { + let is_leaf = self.is_leaf(); + MutTraversal { + elems: self.keys.as_slice().iter().zip(self.vals.as_mut_slice().iter_mut()), + edges: self.edges.as_mut_slice().iter_mut(), + head_is_edge: true, + tail_is_edge: true, + has_edges: !is_leaf, + } + } + + pub fn into_iter(self) -> MoveTraversal { + let is_leaf = self.is_leaf(); + MoveTraversal { + elems: self.keys.into_iter().zip(self.vals.into_iter()), + edges: self.edges.into_iter(), + head_is_edge: true, + tail_is_edge: true, + has_edges: !is_leaf, + } + } +} + +// Private implementation details +impl Node { + /// Make a node from its raw components + fn from_vecs(keys: Vec, vals: Vec, edges: Vec>) -> Node { + Node { + keys: keys, + vals: vals, + edges: edges, + } + } + + /// We have somehow verified that this key-value pair will fit in this internal node, + /// so insert under that assumption. + fn insert_fit_as_leaf(&mut self, index: uint, key: K, val: V) { + self.keys.insert(index, key); + self.vals.insert(index, val); + } + + /// We have somehow verified that this key-value pair will fit in this internal node, + /// so insert under that assumption + fn insert_fit_as_internal(&mut self, index: uint, key: K, val: V, right: Node) { + self.keys.insert(index, key); + self.vals.insert(index, val); + self.edges.insert(index + 1, right); + } + + /// Node is full, so split it into two nodes, and yield the middle-most key-value pair + /// because we have one too many, and our parent now has one too few + fn split(&mut self) -> (K, V, Node) { + let r_keys = split(&mut self.keys); + let r_vals = split(&mut self.vals); + let r_edges = if self.edges.is_empty() { + Vec::new() + } else { + split(&mut self.edges) + }; + + let right = Node::from_vecs(r_keys, r_vals, r_edges); + // Pop it + let key = self.keys.pop().unwrap(); + let val = self.vals.pop().unwrap(); + + (key, val, right) + } + + /// Right is underflowed. Try to steal from left, + /// but merge left and right if left is low too. + unsafe fn handle_underflow_to_left(&mut self, underflowed_child_index: uint) { + let left_len = self.edges[underflowed_child_index - 1].len(); + if left_len > min_load_from_capacity(self.capacity()) { + self.steal_to_left(underflowed_child_index); + } else { + self.merge_children(underflowed_child_index - 1); + } + } + + /// Left is underflowed. Try to steal from the right, + /// but merge left and right if right is low too. + unsafe fn handle_underflow_to_right(&mut self, underflowed_child_index: uint) { + let right_len = self.edges[underflowed_child_index + 1].len(); + if right_len > min_load_from_capacity(self.capacity()) { + self.steal_to_right(underflowed_child_index); + } else { + self.merge_children(underflowed_child_index); + } + } + + /// Steal! Stealing is roughly analagous to a binary tree rotation. + /// In this case, we're "rotating" right. + unsafe fn steal_to_left(&mut self, underflowed_child_index: uint) { + // Take the biggest stuff off left + let (mut key, mut val, edge) = { + let left = self.unsafe_edge_mut(underflowed_child_index - 1); + match (left.keys.pop(), left.vals.pop(), left.edges.pop()) { + (Some(k), Some(v), e) => (k, v, e), + _ => unreachable!(), + } + }; + + // Swap the parent's seperating key-value pair with left's + self.unsafe_swap(underflowed_child_index - 1, &mut key, &mut val); + + // Put them at the start of right + { + let right = self.unsafe_edge_mut(underflowed_child_index); + right.keys.insert(0, key); + right.vals.insert(0, val); + match edge { + None => {} + Some(e) => right.edges.insert(0, e) + } + } + } + + /// Steal! Stealing is roughly analagous to a binary tree rotation. + /// In this case, we're "rotating" left. + unsafe fn steal_to_right(&mut self, underflowed_child_index: uint) { + // Take the smallest stuff off right + let (mut key, mut val, edge) = { + let right = self.unsafe_edge_mut(underflowed_child_index + 1); + match (right.keys.remove(0), right.vals.remove(0), right.edges.remove(0)) { + (Some(k), Some(v), e) => (k, v, e), + _ => unreachable!(), + } + }; + + // Swap the parent's seperating key-value pair with right's + self.unsafe_swap(underflowed_child_index, &mut key, &mut val); + + // Put them at the end of left + { + let left = self.unsafe_edge_mut(underflowed_child_index); + left.keys.push(key); + left.vals.push(val); + match edge { + None => {} + Some(e) => left.edges.push(e) + } + } + } + + /// Merge! Left and right will be smooshed into one node, along with the key-value + /// pair that seperated them in their parent. + unsafe fn merge_children(&mut self, left_index: uint) { + // Permanently remove right's index, and the key-value pair that seperates + // left and right + let (key, val, right) = { + match (self.keys.remove(left_index), + self.vals.remove(left_index), + self.edges.remove(left_index + 1)) { + (Some(k), Some(v), Some(e)) => (k, v, e), + _ => unreachable!(), + } + }; + + // Give left right's stuff. + let left = self.unsafe_edge_mut(left_index); + left.absorb(key, val, right); + } + + /// Take all the values from right, seperated by the given key and value + fn absorb(&mut self, key: K, val: V, right: Node) { + // Just as a sanity check, make sure we can fit this guy in + debug_assert!(self.len() + right.len() <= self.capacity()) + + self.keys.push(key); + self.vals.push(val); + self.keys.extend(right.keys.into_iter()); + self.vals.extend(right.vals.into_iter()); + self.edges.extend(right.edges.into_iter()); + } +} + +/// Takes a Vec, and splits half the elements into a new one. +fn split(left: &mut Vec) -> Vec { + // This function is intended to be called on a full Vec of size 2B - 1 (keys, values), + // or 2B (edges). In the former case, left should get B elements, and right should get + // B - 1. In the latter case, both should get B. Therefore, we can just always take the last + // size / 2 elements from left, and put them on right. This also ensures this method is + // safe, even if the Vec isn't full. Just uninteresting for our purposes. + let len = left.len(); + let right_len = len / 2; + let left_len = len - right_len; + let mut right = Vec::with_capacity(left.capacity()); + unsafe { + let left_ptr = left.as_slice().unsafe_get(left_len) as *const _; + let right_ptr = right.as_mut_slice().as_mut_ptr(); + ptr::copy_nonoverlapping_memory(right_ptr, left_ptr, right_len); + left.set_len(left_len); + right.set_len(right_len); + } + right +} + +/// Get the capacity of a node from the order of the parent B-Tree +fn capacity_from_b(b: uint) -> uint { + 2 * b - 1 +} + +/// Get the minimum load of a node from its capacity +fn min_load_from_capacity(cap: uint) -> uint { + // B - 1 + cap / 2 +} + +/// An abstraction over all the different kinds of traversals a node supports +struct AbsTraversal { + elems: Elems, + edges: Edges, + head_is_edge: bool, + tail_is_edge: bool, + has_edges: bool, +} + +/// A single atomic step in a traversal. Either an element is visited, or an edge is followed +pub enum TraversalItem { + Elem(K, V), + Edge(E), +} + +/// A traversal over a node's entries and edges +pub type Traversal<'a, K, V> = AbsTraversal, slice::Items<'a, V>>, + slice::Items<'a, Node>>; + +/// A mutable traversal over a node's entries and edges +pub type MutTraversal<'a, K, V> = AbsTraversal, slice::MutItems<'a, V>>, + slice::MutItems<'a, Node>>; + +/// An owning traversal over a node's entries and edges +pub type MoveTraversal = AbsTraversal, vec::MoveItems>, + vec::MoveItems>>; + + +impl, Edges: Iterator> + Iterator> for AbsTraversal { + + fn next(&mut self) -> Option> { + let head_is_edge = self.head_is_edge; + self.head_is_edge = !head_is_edge; + + if head_is_edge && self.has_edges { + self.edges.next().map(|node| Edge(node)) + } else { + self.elems.next().map(|(k, v)| Elem(k, v)) + } + } +} + +impl, Edges: DoubleEndedIterator> + DoubleEndedIterator> for AbsTraversal { + + fn next_back(&mut self) -> Option> { + let tail_is_edge = self.tail_is_edge; + self.tail_is_edge = !tail_is_edge; + + if tail_is_edge && self.has_edges { + self.edges.next_back().map(|node| Edge(node)) + } else { + self.elems.next_back().map(|(k, v)| Elem(k, v)) + } + } +} diff --git a/src/libcollections/btree/set.rs b/src/libcollections/btree/set.rs new file mode 100644 index 00000000000..b21af89742c --- /dev/null +++ b/src/libcollections/btree/set.rs @@ -0,0 +1,433 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// This is pretty much entirely stolen from TreeSet, since BTreeMap has an identical interface +// to TreeMap + +use core::prelude::*; + +use super::{BTreeMap, Keys, MoveEntries}; +use std::hash::Hash; +use core::default::Default; +use core::{iter, fmt}; +use core::iter::Peekable; +use core::fmt::Show; + +use {Mutable, Set, MutableSet, MutableMap, Map}; + +/// A set based on a B-Tree. +#[deriving(Clone, Hash, PartialEq, Eq, Ord, PartialOrd)] +pub struct BTreeSet{ + map: BTreeMap, +} + +/// An iterator over a BTreeSet's items. +pub type Items<'a, T> = Keys<'a, T, ()>; + +/// An owning iterator over a BTreeSet's items. +pub type MoveItems = iter::Map<'static, (T, ()), T, MoveEntries>; + +/// A lazy iterator producing elements in the set difference (in-order). +pub struct DifferenceItems<'a, T:'a> { + a: Peekable<&'a T, Items<'a, T>>, + b: Peekable<&'a T, Items<'a, T>>, +} + +/// A lazy iterator producing elements in the set symmetric difference (in-order). +pub struct SymDifferenceItems<'a, T:'a> { + a: Peekable<&'a T, Items<'a, T>>, + b: Peekable<&'a T, Items<'a, T>>, +} + +/// A lazy iterator producing elements in the set intersection (in-order). +pub struct IntersectionItems<'a, T:'a> { + a: Peekable<&'a T, Items<'a, T>>, + b: Peekable<&'a T, Items<'a, T>>, +} + +/// A lazy iterator producing elements in the set union (in-order). +pub struct UnionItems<'a, T:'a> { + a: Peekable<&'a T, Items<'a, T>>, + b: Peekable<&'a T, Items<'a, T>>, +} + +impl BTreeSet { + /// Makes a new BTreeSet with a reasonable choice of B. + pub fn new() -> BTreeSet { + BTreeSet { map: BTreeMap::new() } + } + + /// Makes a new BTreeSet with the given B. + pub fn with_b(b: uint) -> BTreeSet { + BTreeSet { map: BTreeMap::with_b(b) } + } +} + +impl BTreeSet { + /// Gets an iterator over the BTreeSet's contents. + pub fn iter<'a>(&'a self) -> Items<'a, T> { + self.map.keys() + } + + /// Gets an iterator for moving out the BtreeSet's contents. + pub fn into_iter(self) -> MoveItems { + self.map.into_iter().map(|(k, _)| k) + } +} + +impl BTreeSet { + /// Visits the values representing the difference, in ascending order. + pub fn difference<'a>(&'a self, other: &'a BTreeSet) -> DifferenceItems<'a, T> { + DifferenceItems{a: self.iter().peekable(), b: other.iter().peekable()} + } + + /// Visits the values representing the symmetric difference, in ascending order. + pub fn symmetric_difference<'a>(&'a self, other: &'a BTreeSet) + -> SymDifferenceItems<'a, T> { + SymDifferenceItems{a: self.iter().peekable(), b: other.iter().peekable()} + } + + /// Visits the values representing the intersection, in ascending order. + pub fn intersection<'a>(&'a self, other: &'a BTreeSet) + -> IntersectionItems<'a, T> { + IntersectionItems{a: self.iter().peekable(), b: other.iter().peekable()} + } + + /// Visits the values representing the union, in ascending order. + pub fn union<'a>(&'a self, other: &'a BTreeSet) -> UnionItems<'a, T> { + UnionItems{a: self.iter().peekable(), b: other.iter().peekable()} + } +} + +impl Collection for BTreeSet { + fn len(&self) -> uint { + self.map.len() + } +} + +impl Mutable for BTreeSet { + fn clear(&mut self) { + self.map.clear() + } +} + +impl Set for BTreeSet { + fn contains(&self, value: &T) -> bool { + self.map.find(value).is_some() + } + + fn is_disjoint(&self, other: &BTreeSet) -> bool { + self.intersection(other).next().is_none() + } + + fn is_subset(&self, other: &BTreeSet) -> bool { + // Stolen from TreeMap + let mut x = self.iter(); + let mut y = other.iter(); + let mut a = x.next(); + let mut b = y.next(); + while a.is_some() { + if b.is_none() { + return false; + } + + let a1 = a.unwrap(); + let b1 = b.unwrap(); + + match b1.cmp(a1) { + Less => (), + Greater => return false, + Equal => a = x.next(), + } + + b = y.next(); + } + true + } +} + +impl MutableSet for BTreeSet{ + fn insert(&mut self, value: T) -> bool { + self.map.insert(value, ()) + } + + fn remove(&mut self, value: &T) -> bool { + self.map.remove(value) + } +} + +impl FromIterator for BTreeSet { + fn from_iter>(iter: Iter) -> BTreeSet { + let mut set = BTreeSet::new(); + set.extend(iter); + set + } +} + +impl Extendable for BTreeSet { + #[inline] + fn extend>(&mut self, mut iter: Iter) { + for elem in iter { + self.insert(elem); + } + } +} + +impl Default for BTreeSet { + fn default() -> BTreeSet { + BTreeSet::new() + } +} + +impl Show for BTreeSet { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + try!(write!(f, "{{")); + + for (i, x) in self.iter().enumerate() { + if i != 0 { try!(write!(f, ", ")); } + try!(write!(f, "{}", *x)); + } + + write!(f, "}}") + } +} + +/// Compare `x` and `y`, but return `short` if x is None and `long` if y is None +fn cmp_opt(x: Option<&T>, y: Option<&T>, + short: Ordering, long: Ordering) -> Ordering { + match (x, y) { + (None , _ ) => short, + (_ , None ) => long, + (Some(x1), Some(y1)) => x1.cmp(y1), + } +} + +impl<'a, T: Ord> Iterator<&'a T> for DifferenceItems<'a, T> { + fn next(&mut self) -> Option<&'a T> { + loop { + match cmp_opt(self.a.peek(), self.b.peek(), Less, Less) { + Less => return self.a.next(), + Equal => { self.a.next(); self.b.next(); } + Greater => { self.b.next(); } + } + } + } +} + +impl<'a, T: Ord> Iterator<&'a T> for SymDifferenceItems<'a, T> { + fn next(&mut self) -> Option<&'a T> { + loop { + match cmp_opt(self.a.peek(), self.b.peek(), Greater, Less) { + Less => return self.a.next(), + Equal => { self.a.next(); self.b.next(); } + Greater => return self.b.next(), + } + } + } +} + +impl<'a, T: Ord> Iterator<&'a T> for IntersectionItems<'a, T> { + fn next(&mut self) -> Option<&'a T> { + loop { + let o_cmp = match (self.a.peek(), self.b.peek()) { + (None , _ ) => None, + (_ , None ) => None, + (Some(a1), Some(b1)) => Some(a1.cmp(b1)), + }; + match o_cmp { + None => return None, + Some(Less) => { self.a.next(); } + Some(Equal) => { self.b.next(); return self.a.next() } + Some(Greater) => { self.b.next(); } + } + } + } +} + +impl<'a, T: Ord> Iterator<&'a T> for UnionItems<'a, T> { + fn next(&mut self) -> Option<&'a T> { + loop { + match cmp_opt(self.a.peek(), self.b.peek(), Greater, Less) { + Less => return self.a.next(), + Equal => { self.b.next(); return self.a.next() } + Greater => return self.b.next(), + } + } + } +} + + +#[cfg(test)] +mod test { + use std::prelude::*; + + use {Set, MutableSet}; + use super::BTreeSet; + use std::hash; + + #[test] + fn test_clone_eq() { + let mut m = BTreeSet::new(); + + m.insert(1i); + m.insert(2); + + assert!(m.clone() == m); + } + + #[test] + fn test_hash() { + let mut x = BTreeSet::new(); + let mut y = BTreeSet::new(); + + x.insert(1i); + x.insert(2); + x.insert(3); + + y.insert(3i); + y.insert(2); + y.insert(1); + + assert!(hash::hash(&x) == hash::hash(&y)); + } + + fn check(a: &[int], + b: &[int], + expected: &[int], + f: |&BTreeSet, &BTreeSet, f: |&int| -> bool| -> bool) { + let mut set_a = BTreeSet::new(); + let mut set_b = BTreeSet::new(); + + for x in a.iter() { assert!(set_a.insert(*x)) } + for y in b.iter() { assert!(set_b.insert(*y)) } + + let mut i = 0; + f(&set_a, &set_b, |x| { + assert_eq!(*x, expected[i]); + i += 1; + true + }); + assert_eq!(i, expected.len()); + } + + #[test] + fn test_intersection() { + fn check_intersection(a: &[int], b: &[int], expected: &[int]) { + check(a, b, expected, |x, y, f| x.intersection(y).all(f)) + } + + check_intersection([], [], []); + check_intersection([1, 2, 3], [], []); + check_intersection([], [1, 2, 3], []); + check_intersection([2], [1, 2, 3], [2]); + check_intersection([1, 2, 3], [2], [2]); + check_intersection([11, 1, 3, 77, 103, 5, -5], + [2, 11, 77, -9, -42, 5, 3], + [3, 5, 11, 77]); + } + + #[test] + fn test_difference() { + fn check_difference(a: &[int], b: &[int], expected: &[int]) { + check(a, b, expected, |x, y, f| x.difference(y).all(f)) + } + + check_difference([], [], []); + check_difference([1, 12], [], [1, 12]); + check_difference([], [1, 2, 3, 9], []); + check_difference([1, 3, 5, 9, 11], + [3, 9], + [1, 5, 11]); + check_difference([-5, 11, 22, 33, 40, 42], + [-12, -5, 14, 23, 34, 38, 39, 50], + [11, 22, 33, 40, 42]); + } + + #[test] + fn test_symmetric_difference() { + fn check_symmetric_difference(a: &[int], b: &[int], + expected: &[int]) { + check(a, b, expected, |x, y, f| x.symmetric_difference(y).all(f)) + } + + check_symmetric_difference([], [], []); + check_symmetric_difference([1, 2, 3], [2], [1, 3]); + check_symmetric_difference([2], [1, 2, 3], [1, 3]); + check_symmetric_difference([1, 3, 5, 9, 11], + [-2, 3, 9, 14, 22], + [-2, 1, 5, 11, 14, 22]); + } + + #[test] + fn test_union() { + fn check_union(a: &[int], b: &[int], + expected: &[int]) { + check(a, b, expected, |x, y, f| x.union(y).all(f)) + } + + check_union([], [], []); + check_union([1, 2, 3], [2], [1, 2, 3]); + check_union([2], [1, 2, 3], [1, 2, 3]); + check_union([1, 3, 5, 9, 11, 16, 19, 24], + [-2, 1, 5, 9, 13, 19], + [-2, 1, 3, 5, 9, 11, 13, 16, 19, 24]); + } + + #[test] + fn test_zip() { + let mut x = BTreeSet::new(); + x.insert(5u); + x.insert(12u); + x.insert(11u); + + let mut y = BTreeSet::new(); + y.insert("foo"); + y.insert("bar"); + + let x = x; + let y = y; + let mut z = x.iter().zip(y.iter()); + + // FIXME: #5801: this needs a type hint to compile... + let result: Option<(&uint, & &'static str)> = z.next(); + assert_eq!(result.unwrap(), (&5u, &("bar"))); + + let result: Option<(&uint, & &'static str)> = z.next(); + assert_eq!(result.unwrap(), (&11u, &("foo"))); + + let result: Option<(&uint, & &'static str)> = z.next(); + assert!(result.is_none()); + } + + #[test] + fn test_from_iter() { + let xs = [1i, 2, 3, 4, 5, 6, 7, 8, 9]; + + let set: BTreeSet = xs.iter().map(|&x| x).collect(); + + for x in xs.iter() { + assert!(set.contains(x)); + } + } + + #[test] + fn test_show() { + let mut set: BTreeSet = BTreeSet::new(); + let empty: BTreeSet = BTreeSet::new(); + + set.insert(1); + set.insert(2); + + let set_str = format!("{}", set); + + assert!(set_str == "{1, 2}".to_string()); + assert_eq!(format!("{}", empty), "{}".to_string()); + } +} diff --git a/src/libcollections/lib.rs b/src/libcollections/lib.rs index 9d3be0d14d3..8b9a0ec796e 100644 --- a/src/libcollections/lib.rs +++ b/src/libcollections/lib.rs @@ -37,7 +37,7 @@ extern crate alloc; use core::prelude::Option; pub use bitv::{Bitv, BitvSet}; -pub use btree::BTree; +pub use btree::{BTreeMap, BTreeSet}; pub use core::prelude::Collection; pub use dlist::DList; pub use enum_set::EnumSet; diff --git a/src/libstd/collections/mod.rs b/src/libstd/collections/mod.rs index d98d490a84b..324c0295971 100644 --- a/src/libstd/collections/mod.rs +++ b/src/libstd/collections/mod.rs @@ -16,7 +16,7 @@ pub use core_collections::{Collection, Mutable, Map, MutableMap}; pub use core_collections::{Set, MutableSet, Deque, MutableSeq}; -pub use core_collections::{Bitv, BitvSet, BTree, DList, EnumSet}; +pub use core_collections::{Bitv, BitvSet, BTreeMap, BTreeSet, DList, EnumSet}; pub use core_collections::{PriorityQueue, RingBuf, SmallIntMap}; pub use core_collections::{TreeMap, TreeSet, TrieMap, TrieSet}; pub use core_collections::{bitv, btree, dlist, enum_set};