Auto merge of #80824 - cuviper:heap-clones, r=kennytm

Try to avoid locals when cloning into Box/Rc/Arc

For generic `T: Clone`, we can allocate an uninitialized box beforehand,
which gives the optimizer a chance to create the clone directly in the
heap. For `T: Copy`, we can go further and do a simple memory copy,
regardless of optimization level.

The same applies to `Rc`/`Arc::make_mut` when they must clone the data.
This commit is contained in:
bors 2021-01-13 11:11:34 +00:00
commit 116d1a7056
4 changed files with 65 additions and 23 deletions

View File

@ -397,3 +397,26 @@ pub mod __alloc_error_handler {
unsafe { oom_impl(layout) }
}
}
/// Specialize clones into pre-allocated, uninitialized memory.
/// Used by `Box::clone` and `Rc`/`Arc::make_mut`.
pub(crate) trait WriteCloneIntoRaw: Sized {
unsafe fn write_clone_into_raw(&self, target: *mut Self);
}
impl<T: Clone> WriteCloneIntoRaw for T {
#[inline]
default unsafe fn write_clone_into_raw(&self, target: *mut Self) {
// Having allocated *first* may allow the optimizer to create
// the cloned value in-place, skipping the local and move.
unsafe { target.write(self.clone()) };
}
}
impl<T: Copy> WriteCloneIntoRaw for T {
#[inline]
unsafe fn write_clone_into_raw(&self, target: *mut Self) {
// We can always copy in-place, without ever involving a local value.
unsafe { target.copy_from_nonoverlapping(self, 1) };
}
}

View File

@ -151,7 +151,7 @@ use core::pin::Pin;
use core::ptr::{self, Unique};
use core::task::{Context, Poll};
use crate::alloc::{handle_alloc_error, AllocError, Allocator, Global, Layout};
use crate::alloc::{handle_alloc_error, AllocError, Allocator, Global, Layout, WriteCloneIntoRaw};
use crate::borrow::Cow;
use crate::raw_vec::RawVec;
use crate::str::from_boxed_utf8_unchecked;
@ -1014,10 +1014,14 @@ impl<T: Clone, A: Allocator + Clone> Clone for Box<T, A> {
/// // But they are unique objects
/// assert_ne!(&*x as *const i32, &*y as *const i32);
/// ```
#[rustfmt::skip]
#[inline]
fn clone(&self) -> Self {
Self::new_in((**self).clone(), self.1.clone())
// Pre-allocate memory to allow writing the cloned value directly.
let mut boxed = Self::new_uninit_in(self.1.clone());
unsafe {
(**self).write_clone_into_raw(boxed.as_mut_ptr());
boxed.assume_init()
}
}
/// Copies `source`'s contents into `self` without creating a new allocation.

View File

@ -263,7 +263,9 @@ use core::pin::Pin;
use core::ptr::{self, NonNull};
use core::slice::from_raw_parts_mut;
use crate::alloc::{box_free, handle_alloc_error, AllocError, Allocator, Global, Layout};
use crate::alloc::{
box_free, handle_alloc_error, AllocError, Allocator, Global, Layout, WriteCloneIntoRaw,
};
use crate::borrow::{Cow, ToOwned};
use crate::string::String;
use crate::vec::Vec;
@ -1037,18 +1039,26 @@ impl<T: Clone> Rc<T> {
#[stable(feature = "rc_unique", since = "1.4.0")]
pub fn make_mut(this: &mut Self) -> &mut T {
if Rc::strong_count(this) != 1 {
// Gotta clone the data, there are other Rcs
*this = Rc::new((**this).clone())
// Gotta clone the data, there are other Rcs.
// Pre-allocate memory to allow writing the cloned value directly.
let mut rc = Self::new_uninit();
unsafe {
let data = Rc::get_mut_unchecked(&mut rc);
(**this).write_clone_into_raw(data.as_mut_ptr());
*this = rc.assume_init();
}
} else if Rc::weak_count(this) != 0 {
// Can just steal the data, all that's left is Weaks
let mut rc = Self::new_uninit();
unsafe {
let mut swap = Rc::new(ptr::read(&this.ptr.as_ref().value));
mem::swap(this, &mut swap);
swap.inner().dec_strong();
let data = Rc::get_mut_unchecked(&mut rc);
data.as_mut_ptr().copy_from_nonoverlapping(&**this, 1);
this.inner().dec_strong();
// Remove implicit strong-weak ref (no need to craft a fake
// Weak here -- we know other Weaks can clean up for us)
swap.inner().dec_weak();
forget(swap);
this.inner().dec_weak();
ptr::write(this, rc.assume_init());
}
}
// This unsafety is ok because we're guaranteed that the pointer

View File

@ -22,7 +22,9 @@ use core::slice::from_raw_parts_mut;
use core::sync::atomic;
use core::sync::atomic::Ordering::{Acquire, Relaxed, Release, SeqCst};
use crate::alloc::{box_free, handle_alloc_error, AllocError, Allocator, Global, Layout};
use crate::alloc::{
box_free, handle_alloc_error, AllocError, Allocator, Global, Layout, WriteCloneIntoRaw,
};
use crate::borrow::{Cow, ToOwned};
use crate::boxed::Box;
use crate::rc::is_dangling;
@ -1369,8 +1371,14 @@ impl<T: Clone> Arc<T> {
// weak count, there's no chance the ArcInner itself could be
// deallocated.
if this.inner().strong.compare_exchange(1, 0, Acquire, Relaxed).is_err() {
// Another strong pointer exists; clone
*this = Arc::new((**this).clone());
// Another strong pointer exists, so we must clone.
// Pre-allocate memory to allow writing the cloned value directly.
let mut arc = Self::new_uninit();
unsafe {
let data = Arc::get_mut_unchecked(&mut arc);
(**this).write_clone_into_raw(data.as_mut_ptr());
*this = arc.assume_init();
}
} else if this.inner().weak.load(Relaxed) != 1 {
// Relaxed suffices in the above because this is fundamentally an
// optimization: we are always racing with weak pointers being
@ -1386,17 +1394,14 @@ impl<T: Clone> Arc<T> {
// Materialize our own implicit weak pointer, so that it can clean
// up the ArcInner as needed.
let weak = Weak { ptr: this.ptr };
let _weak = Weak { ptr: this.ptr };
// mark the data itself as already deallocated
// Can just steal the data, all that's left is Weaks
let mut arc = Self::new_uninit();
unsafe {
// there is no data race in the implicit write caused by `read`
// here (due to zeroing) because data is no longer accessed by
// other threads (due to there being no more strong refs at this
// point).
let mut swap = Arc::new(ptr::read(&weak.ptr.as_ref().data));
mem::swap(this, &mut swap);
mem::forget(swap);
let data = Arc::get_mut_unchecked(&mut arc);
data.as_mut_ptr().copy_from_nonoverlapping(&**this, 1);
ptr::write(this, arc.assume_init());
}
} else {
// We were the sole reference of either kind; bump back up the