Auto merge of #81094 - ssomers:btree_drainy_refactor_3, r=Mark-Simulacrum

BTreeMap: split up range_search into two stages

`range_search` expects the caller to pass the same root twice and starts searching a node for both bounds of a range. It's not very clear that in the early iterations, it searches twice in the same node. This PR splits that search up in an initial `find_leaf_edges_spanning_range` that postpones aliasing until the last second, and a second phase for continuing the search for the range in the each subtree independently (`find_lower_bound_edge` & `find_upper_bound_edge`), which greatly helps for use in #81075. It also moves those functions over to the search module.

r? `@Mark-Simulacrum`
This commit is contained in:
bors 2021-03-01 02:48:29 +00:00
commit 3b150b7a8f
5 changed files with 228 additions and 118 deletions

View File

@ -1,11 +1,8 @@
use core::borrow::Borrow;
use core::cmp::Ordering;
use core::ops::Bound::{Excluded, Included, Unbounded};
use core::ops::RangeBounds;
use core::ptr;
use super::node::{marker, ForceResult::*, Handle, NodeRef};
use super::search::SearchResult;
pub struct LeafRange<BorrowType, K, V> {
pub front: Option<Handle<NodeRef<BorrowType, K, V, marker::Leaf>, marker::Edge>>,
@ -30,100 +27,50 @@ impl<BorrowType, K, V> LeafRange<BorrowType, K, V> {
}
}
/// Finds the leaf edges delimiting a specified range in or underneath a node.
///
/// The result is meaningful only if the tree is ordered by key, like the tree
/// in a `BTreeMap` is.
fn range_search<BorrowType: marker::BorrowType, K, V, Q, R>(
root1: NodeRef<BorrowType, K, V, marker::LeafOrInternal>,
root2: NodeRef<BorrowType, K, V, marker::LeafOrInternal>,
range: R,
) -> LeafRange<BorrowType, K, V>
where
Q: ?Sized + Ord,
K: Borrow<Q>,
R: RangeBounds<Q>,
{
// WARNING: Inlining these variables would be unsound (#81138)
// We assume the bounds reported by `range` remain the same, but
// an adversarial implementation could change between calls
let start = range.start_bound();
let end = range.end_bound();
match (start, end) {
(Excluded(s), Excluded(e)) if s == e => {
panic!("range start and end are equal and excluded in BTreeMap")
}
(Included(s) | Excluded(s), Included(e) | Excluded(e)) if s > e => {
panic!("range start is greater than range end in BTreeMap")
}
_ => {}
};
let mut min_node = root1;
let mut max_node = root2;
let mut min_found = false;
let mut max_found = false;
loop {
// Using `range` again would be unsound (#81138)
let front = match (min_found, start) {
(false, Included(key)) => match min_node.search_node(key) {
SearchResult::Found(kv) => {
min_found = true;
kv.left_edge()
impl<BorrowType: marker::BorrowType, K, V> NodeRef<BorrowType, K, V, marker::LeafOrInternal> {
/// Finds the distinct leaf edges delimiting a specified range in a tree.
/// Returns either a pair of different handles into the same tree or a pair
/// of empty options.
/// # Safety
/// Unless `BorrowType` is `Immut`, do not use the duplicate handles to
/// visit the same KV twice.
unsafe fn find_leaf_edges_spanning_range<Q: ?Sized, R>(
self,
range: R,
) -> LeafRange<BorrowType, K, V>
where
Q: Ord,
K: Borrow<Q>,
R: RangeBounds<Q>,
{
match self.search_tree_for_bifurcation(&range) {
Err(_) => LeafRange::none(),
Ok((
node,
lower_edge_idx,
upper_edge_idx,
mut lower_child_bound,
mut upper_child_bound,
)) => {
let mut lower_edge = unsafe { Handle::new_edge(ptr::read(&node), lower_edge_idx) };
let mut upper_edge = unsafe { Handle::new_edge(node, upper_edge_idx) };
loop {
match (lower_edge.force(), upper_edge.force()) {
(Leaf(f), Leaf(b)) => return LeafRange { front: Some(f), back: Some(b) },
(Internal(f), Internal(b)) => {
(lower_edge, lower_child_bound) =
f.descend().find_lower_bound_edge(lower_child_bound);
(upper_edge, upper_child_bound) =
b.descend().find_upper_bound_edge(upper_child_bound);
}
_ => unreachable!("BTreeMap has different depths"),
}
}
SearchResult::GoDown(edge) => edge,
},
(false, Excluded(key)) => match min_node.search_node(key) {
SearchResult::Found(kv) => {
min_found = true;
kv.right_edge()
}
SearchResult::GoDown(edge) => edge,
},
(true, Included(_)) => min_node.last_edge(),
(true, Excluded(_)) => min_node.first_edge(),
(_, Unbounded) => min_node.first_edge(),
};
// Using `range` again would be unsound (#81138)
let back = match (max_found, end) {
(false, Included(key)) => match max_node.search_node(key) {
SearchResult::Found(kv) => {
max_found = true;
kv.right_edge()
}
SearchResult::GoDown(edge) => edge,
},
(false, Excluded(key)) => match max_node.search_node(key) {
SearchResult::Found(kv) => {
max_found = true;
kv.left_edge()
}
SearchResult::GoDown(edge) => edge,
},
(true, Included(_)) => max_node.first_edge(),
(true, Excluded(_)) => max_node.last_edge(),
(_, Unbounded) => max_node.last_edge(),
};
if front.partial_cmp(&back) == Some(Ordering::Greater) {
panic!("Ord is ill-defined in BTreeMap range");
}
match (front.force(), back.force()) {
(Leaf(f), Leaf(b)) => {
return LeafRange { front: Some(f), back: Some(b) };
}
(Internal(min_int), Internal(max_int)) => {
min_node = min_int.descend();
max_node = max_int.descend();
}
_ => unreachable!("BTreeMap has different depths"),
};
}
}
}
/// Equivalent to `range_search(root1, root2, ..)` but without the `Ord` bound.
/// Equivalent to `(root1.first_leaf_edge(), root2.last_leaf_edge())` but more efficient.
fn full_range<BorrowType: marker::BorrowType, K, V>(
root1: NodeRef<BorrowType, K, V, marker::LeafOrInternal>,
@ -158,7 +105,8 @@ impl<'a, K: 'a, V: 'a> NodeRef<marker::Immut<'a>, K, V, marker::LeafOrInternal>
K: Borrow<Q>,
R: RangeBounds<Q>,
{
range_search(self, self, range)
// SAFETY: our borrow type is immutable.
unsafe { self.find_leaf_edges_spanning_range(range) }
}
/// Finds the pair of leaf edges delimiting an entire tree.
@ -174,16 +122,16 @@ impl<'a, K: 'a, V: 'a> NodeRef<marker::ValMut<'a>, K, V, marker::LeafOrInternal>
///
/// The result is meaningful only if the tree is ordered by key, like the tree
/// in a `BTreeMap` is.
///
/// # Safety
/// Do not use the duplicate handles to visit the same KV twice.
pub fn range_search<Q, R>(self, range: R) -> LeafRange<marker::ValMut<'a>, K, V>
where
Q: ?Sized + Ord,
K: Borrow<Q>,
R: RangeBounds<Q>,
{
// We duplicate the root NodeRef here -- we will never visit the same KV
// twice, and never end up with overlapping value references.
let self2 = unsafe { ptr::read(&self) };
range_search(self, self2, range)
unsafe { self.find_leaf_edges_spanning_range(range) }
}
/// Splits a unique reference into a pair of leaf edges delimiting the full range of the tree.

View File

@ -31,7 +31,6 @@
// since leaf edges are empty and need no data representation. In an internal node,
// an edge both identifies a position and contains a pointer to a child node.
use core::cmp::Ordering;
use core::marker::PhantomData;
use core::mem::{self, MaybeUninit};
use core::ptr::{self, NonNull};
@ -742,15 +741,6 @@ impl<BorrowType, K, V, NodeType, HandleType> PartialEq
}
}
impl<BorrowType, K, V, NodeType, HandleType> PartialOrd
for Handle<NodeRef<BorrowType, K, V, NodeType>, HandleType>
{
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
let Self { node, idx, _marker } = self;
if node.eq(&other.node) { Some(idx.cmp(&other.idx)) } else { None }
}
}
impl<BorrowType, K, V, NodeType, HandleType>
Handle<NodeRef<BorrowType, K, V, NodeType>, HandleType>
{

View File

@ -2,7 +2,6 @@ use super::super::navigate;
use super::*;
use crate::fmt::Debug;
use crate::string::String;
use core::cmp::Ordering::*;
impl<'a, K: 'a, V: 'a> NodeRef<marker::Immut<'a>, K, V, marker::LeafOrInternal> {
// Asserts that the back pointer in each reachable node points to its parent.
@ -67,7 +66,7 @@ fn test_splitpoint() {
}
#[test]
fn test_partial_cmp_eq() {
fn test_partial_eq() {
let mut root1 = NodeRef::new_leaf();
root1.borrow_mut().push(1, ());
let mut root1 = NodeRef::new_internal(root1.forget_type()).forget_type();
@ -87,13 +86,6 @@ fn test_partial_cmp_eq() {
assert!(top_edge_1 == top_edge_1);
assert!(top_edge_1 != top_edge_2);
assert_eq!(leaf_edge_1a.partial_cmp(&leaf_edge_1a), Some(Equal));
assert_eq!(leaf_edge_1a.partial_cmp(&leaf_edge_1b), Some(Less));
assert_eq!(leaf_edge_1a.partial_cmp(&top_edge_1), None);
assert_eq!(leaf_edge_1a.partial_cmp(&top_edge_2), None);
assert_eq!(top_edge_1.partial_cmp(&top_edge_1), Some(Equal));
assert_eq!(top_edge_1.partial_cmp(&top_edge_2), None);
root1.pop_internal_level();
unsafe { root1.into_dying().deallocate_and_ascend() };
unsafe { root2.into_dying().deallocate_and_ascend() };

View File

@ -1,10 +1,33 @@
use core::borrow::Borrow;
use core::cmp::Ordering;
use core::ops::{Bound, RangeBounds};
use super::node::{marker, ForceResult::*, Handle, NodeRef};
use SearchBound::*;
use SearchResult::*;
pub enum SearchBound<T> {
/// An inclusive bound to look for, just like `Bound::Included(T)`.
Included(T),
/// An exclusive bound to look for, just like `Bound::Excluded(T)`.
Excluded(T),
/// An unconditional inclusive bound, just like `Bound::Unbounded`.
AllIncluded,
/// An unconditional exclusive bound.
AllExcluded,
}
impl<T> SearchBound<T> {
pub fn from_range(range_bound: Bound<T>) -> Self {
match range_bound {
Bound::Included(t) => Included(t),
Bound::Excluded(t) => Excluded(t),
Bound::Unbounded => AllIncluded,
}
}
}
pub enum SearchResult<BorrowType, K, V, FoundType, GoDownType> {
Found(Handle<NodeRef<BorrowType, K, V, FoundType>, marker::KV>),
GoDown(Handle<NodeRef<BorrowType, K, V, GoDownType>, marker::Edge>),
@ -40,6 +63,112 @@ impl<BorrowType: marker::BorrowType, K, V> NodeRef<BorrowType, K, V, marker::Lea
}
}
}
/// Descends to the nearest node where the edge matching the lower bound
/// of the range is different from the edge matching the upper bound, i.e.,
/// the nearest node that has at least one key contained in the range.
///
/// If found, returns an `Ok` with that node, the pair of edge indices in it
/// delimiting the range, and the corresponding pair of bounds for
/// continuing the search in the child nodes, in case the node is internal.
///
/// If not found, returns an `Err` with the leaf edge matching the entire
/// range.
///
/// The result is meaningful only if the tree is ordered by key.
pub fn search_tree_for_bifurcation<'r, Q: ?Sized, R>(
mut self,
range: &'r R,
) -> Result<
(
NodeRef<BorrowType, K, V, marker::LeafOrInternal>,
usize,
usize,
SearchBound<&'r Q>,
SearchBound<&'r Q>,
),
Handle<NodeRef<BorrowType, K, V, marker::Leaf>, marker::Edge>,
>
where
Q: Ord,
K: Borrow<Q>,
R: RangeBounds<Q>,
{
// WARNING: Inlining these variables would be unsound (#81138)
// We assume the bounds reported by `range` remain the same, but
// an adversarial implementation could change between calls
let (start, end) = (range.start_bound(), range.end_bound());
match (start, end) {
(Bound::Excluded(s), Bound::Excluded(e)) if s == e => {
panic!("range start and end are equal and excluded in BTreeMap")
}
(Bound::Included(s) | Bound::Excluded(s), Bound::Included(e) | Bound::Excluded(e))
if s > e =>
{
panic!("range start is greater than range end in BTreeMap")
}
_ => {}
}
let mut lower_bound = SearchBound::from_range(start);
let mut upper_bound = SearchBound::from_range(end);
loop {
let (lower_edge_idx, lower_child_bound) = self.find_lower_bound_index(lower_bound);
let (upper_edge_idx, upper_child_bound) = self.find_upper_bound_index(upper_bound);
if lower_edge_idx > upper_edge_idx {
panic!("Ord is ill-defined in BTreeMap range")
}
if lower_edge_idx < upper_edge_idx {
return Ok((
self,
lower_edge_idx,
upper_edge_idx,
lower_child_bound,
upper_child_bound,
));
}
let common_edge = unsafe { Handle::new_edge(self, lower_edge_idx) };
match common_edge.force() {
Leaf(common_edge) => return Err(common_edge),
Internal(common_edge) => {
self = common_edge.descend();
lower_bound = lower_child_bound;
upper_bound = upper_child_bound;
}
}
}
}
/// Finds an edge in the node delimiting the lower bound of a range.
/// Also returns the lower bound to be used for continuing the search in
/// the matching child node, if `self` is an internal node.
///
/// The result is meaningful only if the tree is ordered by key.
pub fn find_lower_bound_edge<'r, Q>(
self,
bound: SearchBound<&'r Q>,
) -> (Handle<Self, marker::Edge>, SearchBound<&'r Q>)
where
Q: ?Sized + Ord,
K: Borrow<Q>,
{
let (edge_idx, bound) = self.find_lower_bound_index(bound);
let edge = unsafe { Handle::new_edge(self, edge_idx) };
(edge, bound)
}
/// Clone of `find_lower_bound_edge` for the upper bound.
pub fn find_upper_bound_edge<'r, Q>(
self,
bound: SearchBound<&'r Q>,
) -> (Handle<Self, marker::Edge>, SearchBound<&'r Q>)
where
Q: ?Sized + Ord,
K: Borrow<Q>,
{
let (edge_idx, bound) = self.find_upper_bound_index(bound);
let edge = unsafe { Handle::new_edge(self, edge_idx) };
(edge, bound)
}
}
impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> {
@ -55,7 +184,7 @@ impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> {
Q: Ord,
K: Borrow<Q>,
{
match self.find_index(key) {
match self.find_key_index(key) {
IndexResult::KV(idx) => Found(unsafe { Handle::new_kv(self, idx) }),
IndexResult::Edge(idx) => GoDown(unsafe { Handle::new_edge(self, idx) }),
}
@ -66,7 +195,7 @@ impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> {
///
/// The result is meaningful only if the tree is ordered by key, like the tree
/// in a `BTreeMap` is.
fn find_index<Q: ?Sized>(&self, key: &Q) -> IndexResult
fn find_key_index<Q: ?Sized>(&self, key: &Q) -> IndexResult
where
Q: Ord,
K: Borrow<Q>,
@ -82,4 +211,54 @@ impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> {
}
IndexResult::Edge(keys.len())
}
/// Finds an edge index in the node delimiting the lower bound of a range.
/// Also returns the lower bound to be used for continuing the search in
/// the matching child node, if `self` is an internal node.
///
/// The result is meaningful only if the tree is ordered by key.
fn find_lower_bound_index<'r, Q>(
&self,
bound: SearchBound<&'r Q>,
) -> (usize, SearchBound<&'r Q>)
where
Q: ?Sized + Ord,
K: Borrow<Q>,
{
match bound {
Included(key) => match self.find_key_index(key) {
IndexResult::KV(idx) => (idx, AllExcluded),
IndexResult::Edge(idx) => (idx, bound),
},
Excluded(key) => match self.find_key_index(key) {
IndexResult::KV(idx) => (idx + 1, AllIncluded),
IndexResult::Edge(idx) => (idx, bound),
},
AllIncluded => (0, AllIncluded),
AllExcluded => (self.len(), AllExcluded),
}
}
/// Clone of `find_lower_bound_index` for the upper bound.
fn find_upper_bound_index<'r, Q>(
&self,
bound: SearchBound<&'r Q>,
) -> (usize, SearchBound<&'r Q>)
where
Q: ?Sized + Ord,
K: Borrow<Q>,
{
match bound {
Included(key) => match self.find_key_index(key) {
IndexResult::KV(idx) => (idx + 1, AllExcluded),
IndexResult::Edge(idx) => (idx, bound),
},
Excluded(key) => match self.find_key_index(key) {
IndexResult::KV(idx) => (idx, AllIncluded),
IndexResult::Edge(idx) => (idx, bound),
},
AllIncluded => (self.len(), AllIncluded),
AllExcluded => (0, AllExcluded),
}
}
}

View File

@ -92,6 +92,7 @@
#![feature(const_fn)]
#![feature(cow_is_borrowed)]
#![feature(const_cow_is_borrowed)]
#![feature(destructuring_assignment)]
#![feature(dispatch_from_dyn)]
#![feature(core_intrinsics)]
#![feature(dropck_eyepatch)]