Rollup merge of #82191 - Soveu:dedup, r=nagisa

Vec::dedup_by optimization

Now `Vec::dedup_by` drops items in-place as it goes through them.
From my benchmarks, it is around 10% faster when T is small, with no major regression when otherwise.

I used `ptr::copy` instead of conditional `ptr::copy_nonoverlapping`, because the latter had some weird performance issues on my ryzen laptop (it was 50% slower on it than on intel/sandybridge laptop)
It would be good if someone was able to reproduce these results.
This commit is contained in:
Dylan DPC 2021-03-18 00:28:04 +01:00 committed by GitHub
commit 90797ef008
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 306 additions and 6 deletions

View File

@ -4,6 +4,7 @@
#![feature(btree_drain_filter)]
#![feature(map_first_last)]
#![feature(repr_simd)]
#![feature(slice_partition_dedup)]
#![feature(test)]
extern crate test;

View File

@ -671,3 +671,92 @@ fn bench_map_fast(b: &mut Bencher) {
let data = black_box([(0, 0); LEN]);
b.iter(|| map_fast(&data));
}
fn random_sorted_fill(mut seed: u32, buf: &mut [u32]) {
let mask = if buf.len() < 8192 {
0xFF
} else if buf.len() < 200_000 {
0xFFFF
} else {
0xFFFF_FFFF
};
for item in buf.iter_mut() {
seed ^= seed << 13;
seed ^= seed >> 17;
seed ^= seed << 5;
*item = seed & mask;
}
buf.sort();
}
fn bench_vec_dedup_old(b: &mut Bencher, sz: usize) {
let mut template = vec![0u32; sz];
b.bytes = std::mem::size_of_val(template.as_slice()) as u64;
random_sorted_fill(0x43, &mut template);
let mut vec = template.clone();
b.iter(|| {
let len = {
let (dedup, _) = vec.partition_dedup();
dedup.len()
};
vec.truncate(len);
black_box(vec.first());
vec.clear();
vec.extend_from_slice(&template);
});
}
fn bench_vec_dedup_new(b: &mut Bencher, sz: usize) {
let mut template = vec![0u32; sz];
b.bytes = std::mem::size_of_val(template.as_slice()) as u64;
random_sorted_fill(0x43, &mut template);
let mut vec = template.clone();
b.iter(|| {
vec.dedup();
black_box(vec.first());
vec.clear();
vec.extend_from_slice(&template);
});
}
#[bench]
fn bench_dedup_old_100(b: &mut Bencher) {
bench_vec_dedup_old(b, 100);
}
#[bench]
fn bench_dedup_new_100(b: &mut Bencher) {
bench_vec_dedup_new(b, 100);
}
#[bench]
fn bench_dedup_old_1000(b: &mut Bencher) {
bench_vec_dedup_old(b, 1000);
}
#[bench]
fn bench_dedup_new_1000(b: &mut Bencher) {
bench_vec_dedup_new(b, 1000);
}
#[bench]
fn bench_dedup_old_10000(b: &mut Bencher) {
bench_vec_dedup_old(b, 10000);
}
#[bench]
fn bench_dedup_new_10000(b: &mut Bencher) {
bench_vec_dedup_new(b, 10000);
}
#[bench]
fn bench_dedup_old_100000(b: &mut Bencher) {
bench_vec_dedup_old(b, 100000);
}
#[bench]
fn bench_dedup_new_100000(b: &mut Bencher) {
bench_vec_dedup_new(b, 100000);
}

View File

@ -1512,15 +1512,98 @@ impl<T, A: Allocator> Vec<T, A> {
/// assert_eq!(vec, ["foo", "bar", "baz", "bar"]);
/// ```
#[stable(feature = "dedup_by", since = "1.16.0")]
pub fn dedup_by<F>(&mut self, same_bucket: F)
pub fn dedup_by<F>(&mut self, mut same_bucket: F)
where
F: FnMut(&mut T, &mut T) -> bool,
{
let len = {
let (dedup, _) = self.as_mut_slice().partition_dedup_by(same_bucket);
dedup.len()
};
self.truncate(len);
let len = self.len();
if len <= 1 {
return;
}
/* INVARIANT: vec.len() > read >= write > write-1 >= 0 */
struct FillGapOnDrop<'a, T, A: core::alloc::Allocator> {
/* Offset of the element we want to check if it is duplicate */
read: usize,
/* Offset of the place where we want to place the non-duplicate
* when we find it. */
write: usize,
/* The Vec that would need correction if `same_bucket` panicked */
vec: &'a mut Vec<T, A>,
}
impl<'a, T, A: core::alloc::Allocator> Drop for FillGapOnDrop<'a, T, A> {
fn drop(&mut self) {
/* This code gets executed when `same_bucket` panics */
/* SAFETY: invariant guarantees that `read - write`
* and `len - read` never overflow and that the copy is always
* in-bounds. */
unsafe {
let ptr = self.vec.as_mut_ptr();
let len = self.vec.len();
/* How many items were left when `same_bucket` paniced.
* Basically vec[read..].len() */
let items_left = len.wrapping_sub(self.read);
/* Pointer to first item in vec[write..write+items_left] slice */
let dropped_ptr = ptr.add(self.write);
/* Pointer to first item in vec[read..] slice */
let valid_ptr = ptr.add(self.read);
/* Copy `vec[read..]` to `vec[write..write+items_left]`.
* The slices can overlap, so `copy_nonoverlapping` cannot be used */
ptr::copy(valid_ptr, dropped_ptr, items_left);
/* How many items have been already dropped
* Basically vec[read..write].len() */
let dropped = self.read.wrapping_sub(self.write);
self.vec.set_len(len - dropped);
}
}
}
let mut gap = FillGapOnDrop { read: 1, write: 1, vec: self };
let ptr = gap.vec.as_mut_ptr();
/* Drop items while going through Vec, it should be more efficient than
* doing slice partition_dedup + truncate */
/* SAFETY: Because of the invariant, read_ptr, prev_ptr and write_ptr
* are always in-bounds and read_ptr never aliases prev_ptr */
unsafe {
while gap.read < len {
let read_ptr = ptr.add(gap.read);
let prev_ptr = ptr.add(gap.write.wrapping_sub(1));
if same_bucket(&mut *read_ptr, &mut *prev_ptr) {
/* We have found duplicate, drop it in-place */
ptr::drop_in_place(read_ptr);
} else {
let write_ptr = ptr.add(gap.write);
/* Because `read_ptr` can be equal to `write_ptr`, we either
* have to use `copy` or conditional `copy_nonoverlapping`.
* Looks like the first option is faster. */
ptr::copy(read_ptr, write_ptr, 1);
/* We have filled that place, so go further */
gap.write += 1;
}
gap.read += 1;
}
/* Technically we could let `gap` clean up with its Drop, but
* when `same_bucket` is guaranteed to not panic, this bloats a little
* the codegen, so we just do it manually */
gap.vec.set_len(gap.write);
mem::forget(gap);
}
}
/// Appends an element to the back of a collection.

View File

@ -19,6 +19,7 @@
#![feature(int_bits_const)]
#![feature(vecdeque_binary_search)]
#![feature(slice_group_by)]
#![feature(slice_partition_dedup)]
#![feature(vec_extend_from_within)]
#![feature(vec_spare_capacity)]

View File

@ -2102,6 +2102,132 @@ fn test_extend_from_within() {
assert_eq!(v, ["a", "b", "c", "b", "c", "a", "b"]);
}
#[test]
fn test_vec_dedup_by() {
let mut vec: Vec<i32> = vec![1, -1, 2, 3, 1, -5, 5, -2, 2];
vec.dedup_by(|a, b| a.abs() == b.abs());
assert_eq!(vec, [1, 2, 3, 1, -5, -2]);
}
#[test]
fn test_vec_dedup_empty() {
let mut vec: Vec<i32> = Vec::new();
vec.dedup();
assert_eq!(vec, []);
}
#[test]
fn test_vec_dedup_one() {
let mut vec = vec![12i32];
vec.dedup();
assert_eq!(vec, [12]);
}
#[test]
fn test_vec_dedup_multiple_ident() {
let mut vec = vec![12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11];
vec.dedup();
assert_eq!(vec, [12, 11]);
}
#[test]
fn test_vec_dedup_partialeq() {
#[derive(Debug)]
struct Foo(i32, i32);
impl PartialEq for Foo {
fn eq(&self, other: &Foo) -> bool {
self.0 == other.0
}
}
let mut vec = vec![Foo(0, 1), Foo(0, 5), Foo(1, 7), Foo(1, 9)];
vec.dedup();
assert_eq!(vec, [Foo(0, 1), Foo(1, 7)]);
}
#[test]
fn test_vec_dedup() {
let mut vec: Vec<bool> = Vec::with_capacity(8);
let mut template = vec.clone();
for x in 0u8..255u8 {
vec.clear();
template.clear();
let iter = (0..8).map(move |bit| (x >> bit) & 1 == 1);
vec.extend(iter);
template.extend_from_slice(&vec);
let (dedup, _) = template.partition_dedup();
vec.dedup();
assert_eq!(vec, dedup);
}
}
#[test]
fn test_vec_dedup_panicking() {
#[derive(Debug)]
struct Panic {
drop_counter: &'static AtomicU32,
value: bool,
index: usize,
}
impl PartialEq for Panic {
fn eq(&self, other: &Self) -> bool {
self.value == other.value
}
}
impl Drop for Panic {
fn drop(&mut self) {
let x = self.drop_counter.fetch_add(1, Ordering::SeqCst);
assert!(x != 4);
}
}
static DROP_COUNTER: AtomicU32 = AtomicU32::new(0);
let expected = [
Panic { drop_counter: &DROP_COUNTER, value: false, index: 0 },
Panic { drop_counter: &DROP_COUNTER, value: false, index: 5 },
Panic { drop_counter: &DROP_COUNTER, value: true, index: 6 },
Panic { drop_counter: &DROP_COUNTER, value: true, index: 7 },
];
let mut vec = vec![
Panic { drop_counter: &DROP_COUNTER, value: false, index: 0 },
// these elements get deduplicated
Panic { drop_counter: &DROP_COUNTER, value: false, index: 1 },
Panic { drop_counter: &DROP_COUNTER, value: false, index: 2 },
Panic { drop_counter: &DROP_COUNTER, value: false, index: 3 },
Panic { drop_counter: &DROP_COUNTER, value: false, index: 4 },
// here it panics
Panic { drop_counter: &DROP_COUNTER, value: false, index: 5 },
Panic { drop_counter: &DROP_COUNTER, value: true, index: 6 },
Panic { drop_counter: &DROP_COUNTER, value: true, index: 7 },
];
let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
vec.dedup();
}));
let ok = vec.iter().zip(expected.iter()).all(|(x, y)| x.index == y.index);
if !ok {
panic!("expected: {:?}\ngot: {:?}\n", expected, vec);
}
}
// Regression test for issue #82533
#[test]
fn test_extend_from_within_panicing_clone() {