Rollup merge of #82191 - Soveu:dedup, r=nagisa
Vec::dedup_by optimization Now `Vec::dedup_by` drops items in-place as it goes through them. From my benchmarks, it is around 10% faster when T is small, with no major regression when otherwise. I used `ptr::copy` instead of conditional `ptr::copy_nonoverlapping`, because the latter had some weird performance issues on my ryzen laptop (it was 50% slower on it than on intel/sandybridge laptop) It would be good if someone was able to reproduce these results.
This commit is contained in:
commit
90797ef008
|
@ -4,6 +4,7 @@
|
||||||
#![feature(btree_drain_filter)]
|
#![feature(btree_drain_filter)]
|
||||||
#![feature(map_first_last)]
|
#![feature(map_first_last)]
|
||||||
#![feature(repr_simd)]
|
#![feature(repr_simd)]
|
||||||
|
#![feature(slice_partition_dedup)]
|
||||||
#![feature(test)]
|
#![feature(test)]
|
||||||
|
|
||||||
extern crate test;
|
extern crate test;
|
||||||
|
|
|
@ -671,3 +671,92 @@ fn bench_map_fast(b: &mut Bencher) {
|
||||||
let data = black_box([(0, 0); LEN]);
|
let data = black_box([(0, 0); LEN]);
|
||||||
b.iter(|| map_fast(&data));
|
b.iter(|| map_fast(&data));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn random_sorted_fill(mut seed: u32, buf: &mut [u32]) {
|
||||||
|
let mask = if buf.len() < 8192 {
|
||||||
|
0xFF
|
||||||
|
} else if buf.len() < 200_000 {
|
||||||
|
0xFFFF
|
||||||
|
} else {
|
||||||
|
0xFFFF_FFFF
|
||||||
|
};
|
||||||
|
|
||||||
|
for item in buf.iter_mut() {
|
||||||
|
seed ^= seed << 13;
|
||||||
|
seed ^= seed >> 17;
|
||||||
|
seed ^= seed << 5;
|
||||||
|
|
||||||
|
*item = seed & mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
buf.sort();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn bench_vec_dedup_old(b: &mut Bencher, sz: usize) {
|
||||||
|
let mut template = vec![0u32; sz];
|
||||||
|
b.bytes = std::mem::size_of_val(template.as_slice()) as u64;
|
||||||
|
random_sorted_fill(0x43, &mut template);
|
||||||
|
|
||||||
|
let mut vec = template.clone();
|
||||||
|
b.iter(|| {
|
||||||
|
let len = {
|
||||||
|
let (dedup, _) = vec.partition_dedup();
|
||||||
|
dedup.len()
|
||||||
|
};
|
||||||
|
vec.truncate(len);
|
||||||
|
|
||||||
|
black_box(vec.first());
|
||||||
|
vec.clear();
|
||||||
|
vec.extend_from_slice(&template);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
fn bench_vec_dedup_new(b: &mut Bencher, sz: usize) {
|
||||||
|
let mut template = vec![0u32; sz];
|
||||||
|
b.bytes = std::mem::size_of_val(template.as_slice()) as u64;
|
||||||
|
random_sorted_fill(0x43, &mut template);
|
||||||
|
|
||||||
|
let mut vec = template.clone();
|
||||||
|
b.iter(|| {
|
||||||
|
vec.dedup();
|
||||||
|
black_box(vec.first());
|
||||||
|
vec.clear();
|
||||||
|
vec.extend_from_slice(&template);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn bench_dedup_old_100(b: &mut Bencher) {
|
||||||
|
bench_vec_dedup_old(b, 100);
|
||||||
|
}
|
||||||
|
#[bench]
|
||||||
|
fn bench_dedup_new_100(b: &mut Bencher) {
|
||||||
|
bench_vec_dedup_new(b, 100);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn bench_dedup_old_1000(b: &mut Bencher) {
|
||||||
|
bench_vec_dedup_old(b, 1000);
|
||||||
|
}
|
||||||
|
#[bench]
|
||||||
|
fn bench_dedup_new_1000(b: &mut Bencher) {
|
||||||
|
bench_vec_dedup_new(b, 1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn bench_dedup_old_10000(b: &mut Bencher) {
|
||||||
|
bench_vec_dedup_old(b, 10000);
|
||||||
|
}
|
||||||
|
#[bench]
|
||||||
|
fn bench_dedup_new_10000(b: &mut Bencher) {
|
||||||
|
bench_vec_dedup_new(b, 10000);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn bench_dedup_old_100000(b: &mut Bencher) {
|
||||||
|
bench_vec_dedup_old(b, 100000);
|
||||||
|
}
|
||||||
|
#[bench]
|
||||||
|
fn bench_dedup_new_100000(b: &mut Bencher) {
|
||||||
|
bench_vec_dedup_new(b, 100000);
|
||||||
|
}
|
||||||
|
|
|
@ -1512,15 +1512,98 @@ impl<T, A: Allocator> Vec<T, A> {
|
||||||
/// assert_eq!(vec, ["foo", "bar", "baz", "bar"]);
|
/// assert_eq!(vec, ["foo", "bar", "baz", "bar"]);
|
||||||
/// ```
|
/// ```
|
||||||
#[stable(feature = "dedup_by", since = "1.16.0")]
|
#[stable(feature = "dedup_by", since = "1.16.0")]
|
||||||
pub fn dedup_by<F>(&mut self, same_bucket: F)
|
pub fn dedup_by<F>(&mut self, mut same_bucket: F)
|
||||||
where
|
where
|
||||||
F: FnMut(&mut T, &mut T) -> bool,
|
F: FnMut(&mut T, &mut T) -> bool,
|
||||||
{
|
{
|
||||||
let len = {
|
let len = self.len();
|
||||||
let (dedup, _) = self.as_mut_slice().partition_dedup_by(same_bucket);
|
if len <= 1 {
|
||||||
dedup.len()
|
return;
|
||||||
};
|
}
|
||||||
self.truncate(len);
|
|
||||||
|
/* INVARIANT: vec.len() > read >= write > write-1 >= 0 */
|
||||||
|
struct FillGapOnDrop<'a, T, A: core::alloc::Allocator> {
|
||||||
|
/* Offset of the element we want to check if it is duplicate */
|
||||||
|
read: usize,
|
||||||
|
|
||||||
|
/* Offset of the place where we want to place the non-duplicate
|
||||||
|
* when we find it. */
|
||||||
|
write: usize,
|
||||||
|
|
||||||
|
/* The Vec that would need correction if `same_bucket` panicked */
|
||||||
|
vec: &'a mut Vec<T, A>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, T, A: core::alloc::Allocator> Drop for FillGapOnDrop<'a, T, A> {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
/* This code gets executed when `same_bucket` panics */
|
||||||
|
|
||||||
|
/* SAFETY: invariant guarantees that `read - write`
|
||||||
|
* and `len - read` never overflow and that the copy is always
|
||||||
|
* in-bounds. */
|
||||||
|
unsafe {
|
||||||
|
let ptr = self.vec.as_mut_ptr();
|
||||||
|
let len = self.vec.len();
|
||||||
|
|
||||||
|
/* How many items were left when `same_bucket` paniced.
|
||||||
|
* Basically vec[read..].len() */
|
||||||
|
let items_left = len.wrapping_sub(self.read);
|
||||||
|
|
||||||
|
/* Pointer to first item in vec[write..write+items_left] slice */
|
||||||
|
let dropped_ptr = ptr.add(self.write);
|
||||||
|
/* Pointer to first item in vec[read..] slice */
|
||||||
|
let valid_ptr = ptr.add(self.read);
|
||||||
|
|
||||||
|
/* Copy `vec[read..]` to `vec[write..write+items_left]`.
|
||||||
|
* The slices can overlap, so `copy_nonoverlapping` cannot be used */
|
||||||
|
ptr::copy(valid_ptr, dropped_ptr, items_left);
|
||||||
|
|
||||||
|
/* How many items have been already dropped
|
||||||
|
* Basically vec[read..write].len() */
|
||||||
|
let dropped = self.read.wrapping_sub(self.write);
|
||||||
|
|
||||||
|
self.vec.set_len(len - dropped);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut gap = FillGapOnDrop { read: 1, write: 1, vec: self };
|
||||||
|
let ptr = gap.vec.as_mut_ptr();
|
||||||
|
|
||||||
|
/* Drop items while going through Vec, it should be more efficient than
|
||||||
|
* doing slice partition_dedup + truncate */
|
||||||
|
|
||||||
|
/* SAFETY: Because of the invariant, read_ptr, prev_ptr and write_ptr
|
||||||
|
* are always in-bounds and read_ptr never aliases prev_ptr */
|
||||||
|
unsafe {
|
||||||
|
while gap.read < len {
|
||||||
|
let read_ptr = ptr.add(gap.read);
|
||||||
|
let prev_ptr = ptr.add(gap.write.wrapping_sub(1));
|
||||||
|
|
||||||
|
if same_bucket(&mut *read_ptr, &mut *prev_ptr) {
|
||||||
|
/* We have found duplicate, drop it in-place */
|
||||||
|
ptr::drop_in_place(read_ptr);
|
||||||
|
} else {
|
||||||
|
let write_ptr = ptr.add(gap.write);
|
||||||
|
|
||||||
|
/* Because `read_ptr` can be equal to `write_ptr`, we either
|
||||||
|
* have to use `copy` or conditional `copy_nonoverlapping`.
|
||||||
|
* Looks like the first option is faster. */
|
||||||
|
ptr::copy(read_ptr, write_ptr, 1);
|
||||||
|
|
||||||
|
/* We have filled that place, so go further */
|
||||||
|
gap.write += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
gap.read += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Technically we could let `gap` clean up with its Drop, but
|
||||||
|
* when `same_bucket` is guaranteed to not panic, this bloats a little
|
||||||
|
* the codegen, so we just do it manually */
|
||||||
|
gap.vec.set_len(gap.write);
|
||||||
|
mem::forget(gap);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Appends an element to the back of a collection.
|
/// Appends an element to the back of a collection.
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
#![feature(int_bits_const)]
|
#![feature(int_bits_const)]
|
||||||
#![feature(vecdeque_binary_search)]
|
#![feature(vecdeque_binary_search)]
|
||||||
#![feature(slice_group_by)]
|
#![feature(slice_group_by)]
|
||||||
|
#![feature(slice_partition_dedup)]
|
||||||
#![feature(vec_extend_from_within)]
|
#![feature(vec_extend_from_within)]
|
||||||
#![feature(vec_spare_capacity)]
|
#![feature(vec_spare_capacity)]
|
||||||
|
|
||||||
|
|
|
@ -2102,6 +2102,132 @@ fn test_extend_from_within() {
|
||||||
assert_eq!(v, ["a", "b", "c", "b", "c", "a", "b"]);
|
assert_eq!(v, ["a", "b", "c", "b", "c", "a", "b"]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_vec_dedup_by() {
|
||||||
|
let mut vec: Vec<i32> = vec![1, -1, 2, 3, 1, -5, 5, -2, 2];
|
||||||
|
|
||||||
|
vec.dedup_by(|a, b| a.abs() == b.abs());
|
||||||
|
|
||||||
|
assert_eq!(vec, [1, 2, 3, 1, -5, -2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_vec_dedup_empty() {
|
||||||
|
let mut vec: Vec<i32> = Vec::new();
|
||||||
|
|
||||||
|
vec.dedup();
|
||||||
|
|
||||||
|
assert_eq!(vec, []);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_vec_dedup_one() {
|
||||||
|
let mut vec = vec![12i32];
|
||||||
|
|
||||||
|
vec.dedup();
|
||||||
|
|
||||||
|
assert_eq!(vec, [12]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_vec_dedup_multiple_ident() {
|
||||||
|
let mut vec = vec![12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11];
|
||||||
|
|
||||||
|
vec.dedup();
|
||||||
|
|
||||||
|
assert_eq!(vec, [12, 11]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_vec_dedup_partialeq() {
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct Foo(i32, i32);
|
||||||
|
|
||||||
|
impl PartialEq for Foo {
|
||||||
|
fn eq(&self, other: &Foo) -> bool {
|
||||||
|
self.0 == other.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut vec = vec![Foo(0, 1), Foo(0, 5), Foo(1, 7), Foo(1, 9)];
|
||||||
|
|
||||||
|
vec.dedup();
|
||||||
|
assert_eq!(vec, [Foo(0, 1), Foo(1, 7)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_vec_dedup() {
|
||||||
|
let mut vec: Vec<bool> = Vec::with_capacity(8);
|
||||||
|
let mut template = vec.clone();
|
||||||
|
|
||||||
|
for x in 0u8..255u8 {
|
||||||
|
vec.clear();
|
||||||
|
template.clear();
|
||||||
|
|
||||||
|
let iter = (0..8).map(move |bit| (x >> bit) & 1 == 1);
|
||||||
|
vec.extend(iter);
|
||||||
|
template.extend_from_slice(&vec);
|
||||||
|
|
||||||
|
let (dedup, _) = template.partition_dedup();
|
||||||
|
vec.dedup();
|
||||||
|
|
||||||
|
assert_eq!(vec, dedup);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_vec_dedup_panicking() {
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct Panic {
|
||||||
|
drop_counter: &'static AtomicU32,
|
||||||
|
value: bool,
|
||||||
|
index: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialEq for Panic {
|
||||||
|
fn eq(&self, other: &Self) -> bool {
|
||||||
|
self.value == other.value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for Panic {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
let x = self.drop_counter.fetch_add(1, Ordering::SeqCst);
|
||||||
|
assert!(x != 4);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static DROP_COUNTER: AtomicU32 = AtomicU32::new(0);
|
||||||
|
let expected = [
|
||||||
|
Panic { drop_counter: &DROP_COUNTER, value: false, index: 0 },
|
||||||
|
Panic { drop_counter: &DROP_COUNTER, value: false, index: 5 },
|
||||||
|
Panic { drop_counter: &DROP_COUNTER, value: true, index: 6 },
|
||||||
|
Panic { drop_counter: &DROP_COUNTER, value: true, index: 7 },
|
||||||
|
];
|
||||||
|
let mut vec = vec![
|
||||||
|
Panic { drop_counter: &DROP_COUNTER, value: false, index: 0 },
|
||||||
|
// these elements get deduplicated
|
||||||
|
Panic { drop_counter: &DROP_COUNTER, value: false, index: 1 },
|
||||||
|
Panic { drop_counter: &DROP_COUNTER, value: false, index: 2 },
|
||||||
|
Panic { drop_counter: &DROP_COUNTER, value: false, index: 3 },
|
||||||
|
Panic { drop_counter: &DROP_COUNTER, value: false, index: 4 },
|
||||||
|
// here it panics
|
||||||
|
Panic { drop_counter: &DROP_COUNTER, value: false, index: 5 },
|
||||||
|
Panic { drop_counter: &DROP_COUNTER, value: true, index: 6 },
|
||||||
|
Panic { drop_counter: &DROP_COUNTER, value: true, index: 7 },
|
||||||
|
];
|
||||||
|
|
||||||
|
let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
|
||||||
|
vec.dedup();
|
||||||
|
}));
|
||||||
|
|
||||||
|
let ok = vec.iter().zip(expected.iter()).all(|(x, y)| x.index == y.index);
|
||||||
|
|
||||||
|
if !ok {
|
||||||
|
panic!("expected: {:?}\ngot: {:?}\n", expected, vec);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Regression test for issue #82533
|
// Regression test for issue #82533
|
||||||
#[test]
|
#[test]
|
||||||
fn test_extend_from_within_panicing_clone() {
|
fn test_extend_from_within_panicing_clone() {
|
||||||
|
|
Loading…
Reference in New Issue