BTreeSet intersection, difference & is_subnet optimizations

This commit is contained in:
Stein Somers 2019-09-22 17:45:47 +02:00
parent 702b45e409
commit d132a70bf4
2 changed files with 248 additions and 94 deletions

View File

@ -122,13 +122,16 @@ pub struct Difference<'a, T: 'a> {
}
enum DifferenceInner<'a, T: 'a> {
Stitch {
// iterate all of self and some of other, spotting matches along the way
self_iter: Iter<'a, T>,
other_iter: Peekable<Iter<'a, T>>,
},
Search {
// iterate a small set, look up in the large set
self_iter: Iter<'a, T>,
other_set: &'a BTreeSet<T>,
},
Iterate(Iter<'a, T>), // simply stream self's elements
}
#[stable(feature = "collection_debug", since = "1.17.0")]
@ -147,6 +150,7 @@ impl<T: fmt::Debug> fmt::Debug for Difference<'_, T> {
self_iter,
other_set: _,
} => f.debug_tuple("Difference").field(&self_iter).finish(),
DifferenceInner::Iterate(iter) => f.debug_tuple("Difference").field(&iter).finish(),
}
}
}
@ -187,13 +191,16 @@ pub struct Intersection<'a, T: 'a> {
}
enum IntersectionInner<'a, T: 'a> {
Stitch {
// iterate similarly sized sets jointly, spotting matches along the way
a: Iter<'a, T>,
b: Iter<'a, T>,
},
Search {
// iterate a small set, look up in the large set
small_iter: Iter<'a, T>,
large_set: &'a BTreeSet<T>,
},
Answer(Option<&'a T>), // return a specific value or emptiness
}
#[stable(feature = "collection_debug", since = "1.17.0")]
@ -212,6 +219,9 @@ impl<T: fmt::Debug> fmt::Debug for Intersection<'_, T> {
small_iter,
large_set: _,
} => f.debug_tuple("Intersection").field(&small_iter).finish(),
IntersectionInner::Answer(answer) => {
f.debug_tuple("Intersection").field(&answer).finish()
}
}
}
}
@ -314,24 +324,51 @@ impl<T: Ord> BTreeSet<T> {
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
pub fn difference<'a>(&'a self, other: &'a BTreeSet<T>) -> Difference<'a, T> {
if self.len() > other.len() / ITER_PERFORMANCE_TIPPING_SIZE_DIFF {
// Self is bigger than or not much smaller than other set.
// Iterate both sets jointly, spotting matches along the way.
Difference {
inner: DifferenceInner::Stitch {
self_iter: self.iter(),
other_iter: other.iter().peekable(),
},
}
let (self_min, self_max) = if let (Some(self_min), Some(self_max)) =
(self.iter().next(), self.iter().next_back())
{
(self_min, self_max)
} else {
// Self is much smaller than other set, or both sets are empty.
// Iterate the small set, searching for matches in the large set.
Difference {
inner: DifferenceInner::Search {
self_iter: self.iter(),
other_set: other,
},
}
return Difference {
inner: DifferenceInner::Iterate(self.iter()),
};
};
let (other_min, other_max) = if let (Some(other_min), Some(other_max)) =
(other.iter().next(), other.iter().next_back())
{
(other_min, other_max)
} else {
return Difference {
inner: DifferenceInner::Iterate(self.iter()),
};
};
Difference {
inner: match (self_min.cmp(other_max), self_max.cmp(other_min)) {
(Greater, _) | (_, Less) => DifferenceInner::Iterate(self.iter()),
(Equal, _) => {
let mut self_iter = self.iter();
self_iter.next();
DifferenceInner::Iterate(self_iter)
}
(_, Equal) => {
let mut self_iter = self.iter();
self_iter.next_back();
DifferenceInner::Iterate(self_iter)
}
_ => {
if self.len() <= other.len() / ITER_PERFORMANCE_TIPPING_SIZE_DIFF {
DifferenceInner::Search {
self_iter: self.iter(),
other_set: other,
}
} else {
DifferenceInner::Stitch {
self_iter: self.iter(),
other_iter: other.iter().peekable(),
}
}
}
},
}
}
@ -387,29 +424,48 @@ impl<T: Ord> BTreeSet<T> {
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
pub fn intersection<'a>(&'a self, other: &'a BTreeSet<T>) -> Intersection<'a, T> {
let (small, other) = if self.len() <= other.len() {
(self, other)
let (self_min, self_max) = if let (Some(self_min), Some(self_max)) =
(self.iter().next(), self.iter().next_back())
{
(self_min, self_max)
} else {
(other, self)
return Intersection {
inner: IntersectionInner::Answer(None),
};
};
if small.len() > other.len() / ITER_PERFORMANCE_TIPPING_SIZE_DIFF {
// Small set is not much smaller than other set.
// Iterate both sets jointly, spotting matches along the way.
Intersection {
inner: IntersectionInner::Stitch {
a: small.iter(),
b: other.iter(),
},
}
let (other_min, other_max) = if let (Some(other_min), Some(other_max)) =
(other.iter().next(), other.iter().next_back())
{
(other_min, other_max)
} else {
// Big difference in number of elements, or both sets are empty.
// Iterate the small set, searching for matches in the large set.
Intersection {
inner: IntersectionInner::Search {
small_iter: small.iter(),
large_set: other,
},
}
return Intersection {
inner: IntersectionInner::Answer(None),
};
};
Intersection {
inner: match (self_min.cmp(other_max), self_max.cmp(other_min)) {
(Greater, _) | (_, Less) => IntersectionInner::Answer(None),
(Equal, _) => IntersectionInner::Answer(Some(self_min)),
(_, Equal) => IntersectionInner::Answer(Some(self_max)),
_ => {
if self.len() <= other.len() / ITER_PERFORMANCE_TIPPING_SIZE_DIFF {
IntersectionInner::Search {
small_iter: self.iter(),
large_set: other,
}
} else if other.len() <= self.len() / ITER_PERFORMANCE_TIPPING_SIZE_DIFF {
IntersectionInner::Search {
small_iter: other.iter(),
large_set: self,
}
} else {
IntersectionInner::Stitch {
a: self.iter(),
b: other.iter(),
}
}
}
},
}
}
@ -544,43 +600,61 @@ impl<T: Ord> BTreeSet<T> {
#[stable(feature = "rust1", since = "1.0.0")]
pub fn is_subset(&self, other: &BTreeSet<T>) -> bool {
// Same result as self.difference(other).next().is_none()
// but the 3 paths below are faster (in order: hugely, 20%, 5%).
// but the code below is faster (hugely in some cases).
if self.len() > other.len() {
false
} else if self.len() > other.len() / ITER_PERFORMANCE_TIPPING_SIZE_DIFF {
// Self is not much smaller than other set.
// Stolen from TreeMap
let mut x = self.iter();
let mut y = other.iter();
let mut a = x.next();
let mut b = y.next();
while a.is_some() {
if b.is_none() {
return false;
}
let a1 = a.unwrap();
let b1 = b.unwrap();
match b1.cmp(a1) {
Less => (),
Greater => return false,
Equal => a = x.next(),
}
b = y.next();
}
true
return false;
}
let (self_min, self_max) = if let (Some(self_min), Some(self_max)) =
(self.iter().next(), self.iter().next_back())
{
(self_min, self_max)
} else {
// Big difference in number of elements, or both sets are empty.
// Iterate the small set, searching for matches in the large set.
for next in self {
return true; // self is empty
};
let (other_min, other_max) = if let (Some(other_min), Some(other_max)) =
(other.iter().next(), other.iter().next_back())
{
(other_min, other_max)
} else {
return false; // other is empty
};
let mut self_iter = self.iter();
match self_min.cmp(other_min) {
Less => return false,
Equal => {
self_iter.next();
}
Greater => (),
}
match self_max.cmp(other_max) {
Greater => return false,
Equal => {
self_iter.next_back();
}
Less => (),
}
if self_iter.len() <= other.len() / ITER_PERFORMANCE_TIPPING_SIZE_DIFF {
// Big difference in number of elements.
for next in self_iter {
if !other.contains(next) {
return false;
}
}
true
} else {
// Self is not much smaller than other set.
let mut other_iter = other.iter();
other_iter.next();
other_iter.next_back();
let mut self_next = self_iter.next();
while let Some(self1) = self_next {
match other_iter.next().map_or(Less, |other1| self1.cmp(other1)) {
Less => return false,
Equal => self_next = self_iter.next(),
Greater => (),
}
}
}
true
}
/// Returns `true` if the set is a superset of another,
@ -1120,6 +1194,7 @@ impl<T> Clone for Difference<'_, T> {
self_iter: self_iter.clone(),
other_set,
},
DifferenceInner::Iterate(iter) => DifferenceInner::Iterate(iter.clone()),
},
}
}
@ -1138,7 +1213,7 @@ impl<'a, T: Ord> Iterator for Difference<'a, T> {
loop {
match other_iter
.peek()
.map_or(Less, |other_next| Ord::cmp(self_next, other_next))
.map_or(Less, |other_next| self_next.cmp(other_next))
{
Less => return Some(self_next),
Equal => {
@ -1160,6 +1235,7 @@ impl<'a, T: Ord> Iterator for Difference<'a, T> {
return Some(self_next);
}
},
DifferenceInner::Iterate(iter) => iter.next(),
}
}
@ -1167,12 +1243,13 @@ impl<'a, T: Ord> Iterator for Difference<'a, T> {
let (self_len, other_len) = match &self.inner {
DifferenceInner::Stitch {
self_iter,
other_iter
other_iter,
} => (self_iter.len(), other_iter.len()),
DifferenceInner::Search {
self_iter,
other_set
other_set,
} => (self_iter.len(), other_set.len()),
DifferenceInner::Iterate(iter) => (iter.len(), 0),
};
(self_len.saturating_sub(other_len), Some(self_len))
}
@ -1234,6 +1311,7 @@ impl<T> Clone for Intersection<'_, T> {
small_iter: small_iter.clone(),
large_set,
},
IntersectionInner::Answer(answer) => IntersectionInner::Answer(answer.clone()),
},
}
}
@ -1251,7 +1329,7 @@ impl<'a, T: Ord> Iterator for Intersection<'a, T> {
let mut a_next = a.next()?;
let mut b_next = b.next()?;
loop {
match Ord::cmp(a_next, b_next) {
match a_next.cmp(b_next) {
Less => a_next = a.next()?,
Greater => b_next = b.next()?,
Equal => return Some(a_next),
@ -1267,15 +1345,17 @@ impl<'a, T: Ord> Iterator for Intersection<'a, T> {
return Some(small_next);
}
},
IntersectionInner::Answer(answer) => answer.take(),
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
let min_len = match &self.inner {
IntersectionInner::Stitch { a, b } => min(a.len(), b.len()),
IntersectionInner::Search { small_iter, .. } => small_iter.len(),
};
(0, Some(min_len))
match &self.inner {
IntersectionInner::Stitch { a, b } => (0, Some(min(a.len(), b.len()))),
IntersectionInner::Search { small_iter, .. } => (0, Some(small_iter.len())),
IntersectionInner::Answer(None) => (0, Some(0)),
IntersectionInner::Answer(Some(_)) => (1, Some(1)),
}
}
}

View File

@ -48,7 +48,9 @@ fn check<F>(a: &[i32], b: &[i32], expected: &[i32], f: F)
f(&set_a,
&set_b,
&mut |&x| {
assert_eq!(x, expected[i]);
if i < expected.len() {
assert_eq!(x, expected[i]);
}
i += 1;
true
});
@ -74,20 +76,20 @@ fn test_intersection() {
return;
}
let large = (0..1000).collect::<Vec<_>>();
let large = (0..100).collect::<Vec<_>>();
check_intersection(&[], &large, &[]);
check_intersection(&large, &[], &[]);
check_intersection(&[-1], &large, &[]);
check_intersection(&large, &[-1], &[]);
check_intersection(&[0], &large, &[0]);
check_intersection(&large, &[0], &[0]);
check_intersection(&[999], &large, &[999]);
check_intersection(&large, &[999], &[999]);
check_intersection(&[1000], &large, &[]);
check_intersection(&large, &[1000], &[]);
check_intersection(&[11, 5000, 1, 3, 77, 8924, 103],
check_intersection(&[99], &large, &[99]);
check_intersection(&large, &[99], &[99]);
check_intersection(&[100], &large, &[]);
check_intersection(&large, &[100], &[]);
check_intersection(&[11, 5000, 1, 3, 77, 8924],
&large,
&[1, 3, 11, 77, 103]);
&[1, 3, 11, 77]);
}
#[test]
@ -95,10 +97,15 @@ fn test_intersection_size_hint() {
let x: BTreeSet<i32> = [3, 4].iter().copied().collect();
let y: BTreeSet<i32> = [1, 2, 3].iter().copied().collect();
let mut iter = x.intersection(&y);
assert_eq!(iter.size_hint(), (0, Some(2)));
assert_eq!(iter.size_hint(), (1, Some(1)));
assert_eq!(iter.next(), Some(&3));
assert_eq!(iter.size_hint(), (0, Some(0)));
assert_eq!(iter.next(), None);
iter = y.intersection(&y);
assert_eq!(iter.size_hint(), (0, Some(3)));
assert_eq!(iter.next(), Some(&1));
assert_eq!(iter.size_hint(), (0, Some(2)));
}
#[test]
@ -111,6 +118,9 @@ fn test_difference() {
check_difference(&[1, 12], &[], &[1, 12]);
check_difference(&[], &[1, 2, 3, 9], &[]);
check_difference(&[1, 3, 5, 9, 11], &[3, 9], &[1, 5, 11]);
check_difference(&[1, 3, 5, 9, 11], &[3, 6, 9], &[1, 5, 11]);
check_difference(&[1, 3, 5, 9, 11], &[0, 1], &[3, 5, 9, 11]);
check_difference(&[1, 3, 5, 9, 11], &[11, 12], &[1, 3, 5, 9]);
check_difference(&[-5, 11, 22, 33, 40, 42],
&[-12, -5, 14, 23, 34, 38, 39, 50],
&[11, 22, 33, 40, 42]);
@ -119,18 +129,82 @@ fn test_difference() {
return;
}
let large = (0..1000).collect::<Vec<_>>();
let large = (0..100).collect::<Vec<_>>();
check_difference(&[], &large, &[]);
check_difference(&[-1], &large, &[-1]);
check_difference(&[0], &large, &[]);
check_difference(&[999], &large, &[]);
check_difference(&[1000], &large, &[1000]);
check_difference(&[11, 5000, 1, 3, 77, 8924, 103],
check_difference(&[99], &large, &[]);
check_difference(&[100], &large, &[100]);
check_difference(&[11, 5000, 1, 3, 77, 8924],
&large,
&[5000, 8924]);
check_difference(&large, &[], &large);
check_difference(&large, &[-1], &large);
check_difference(&large, &[1000], &large);
check_difference(&large, &[100], &large);
}
#[test]
fn test_difference_size_hint() {
let s246: BTreeSet<i32> = [2, 4, 6].iter().copied().collect();
let s23456: BTreeSet<i32> = (2..=6).collect();
let mut iter = s246.difference(&s23456);
assert_eq!(iter.size_hint(), (0, Some(3)));
assert_eq!(iter.next(), None);
let s12345: BTreeSet<i32> = (1..=5).collect();
iter = s246.difference(&s12345);
assert_eq!(iter.size_hint(), (0, Some(3)));
assert_eq!(iter.next(), Some(&6));
assert_eq!(iter.size_hint(), (0, Some(0)));
assert_eq!(iter.next(), None);
let s34567: BTreeSet<i32> = (3..=7).collect();
iter = s246.difference(&s34567);
assert_eq!(iter.size_hint(), (0, Some(3)));
assert_eq!(iter.next(), Some(&2));
assert_eq!(iter.size_hint(), (0, Some(2)));
assert_eq!(iter.next(), None);
let s1: BTreeSet<i32> = (-9..=1).collect();
iter = s246.difference(&s1);
assert_eq!(iter.size_hint(), (3, Some(3)));
let s2: BTreeSet<i32> = (-9..=2).collect();
iter = s246.difference(&s2);
assert_eq!(iter.size_hint(), (2, Some(2)));
assert_eq!(iter.next(), Some(&4));
assert_eq!(iter.size_hint(), (1, Some(1)));
let s23: BTreeSet<i32> = (2..=3).collect();
iter = s246.difference(&s23);
assert_eq!(iter.size_hint(), (1, Some(3)));
assert_eq!(iter.next(), Some(&4));
assert_eq!(iter.size_hint(), (1, Some(1)));
let s4: BTreeSet<i32> = (4..=4).collect();
iter = s246.difference(&s4);
assert_eq!(iter.size_hint(), (2, Some(3)));
assert_eq!(iter.next(), Some(&2));
assert_eq!(iter.size_hint(), (1, Some(2)));
assert_eq!(iter.next(), Some(&6));
assert_eq!(iter.size_hint(), (0, Some(0)));
assert_eq!(iter.next(), None);
let s56: BTreeSet<i32> = (5..=6).collect();
iter = s246.difference(&s56);
assert_eq!(iter.size_hint(), (1, Some(3)));
assert_eq!(iter.next(), Some(&2));
assert_eq!(iter.size_hint(), (0, Some(2)));
let s6: BTreeSet<i32> = (6..=19).collect();
iter = s246.difference(&s6);
assert_eq!(iter.size_hint(), (2, Some(2)));
assert_eq!(iter.next(), Some(&2));
assert_eq!(iter.size_hint(), (1, Some(1)));
let s7: BTreeSet<i32> = (7..=19).collect();
iter = s246.difference(&s7);
assert_eq!(iter.size_hint(), (3, Some(3)));
}
#[test]
@ -188,23 +262,23 @@ fn test_is_subset() {
assert_eq!(is_subset(&[1, 2], &[1, 2]), true);
assert_eq!(is_subset(&[1, 2], &[2, 3]), false);
assert_eq!(is_subset(&[-5, 11, 22, 33, 40, 42],
&[-12, -5, 14, 23, 11, 34, 22, 38, 33, 42, 39, 40]),
&[-12, -5, 11, 14, 22, 23, 33, 34, 38, 39, 40, 42]),
true);
assert_eq!(is_subset(&[-5, 11, 22, 33, 40, 42],
&[-12, -5, 14, 23, 34, 38, 22, 11]),
&[-12, -5, 11, 14, 22, 23, 34, 38]),
false);
if cfg!(miri) { // Miri is too slow
return;
}
let large = (0..1000).collect::<Vec<_>>();
let large = (0..100).collect::<Vec<_>>();
assert_eq!(is_subset(&[], &large), true);
assert_eq!(is_subset(&large, &[]), false);
assert_eq!(is_subset(&[-1], &large), false);
assert_eq!(is_subset(&[0], &large), true);
assert_eq!(is_subset(&[1, 2], &large), true);
assert_eq!(is_subset(&[999, 1000], &large), false);
assert_eq!(is_subset(&[99, 100], &large), false);
}
#[test]