Auto merge of #80824 - cuviper:heap-clones, r=kennytm
Try to avoid locals when cloning into Box/Rc/Arc For generic `T: Clone`, we can allocate an uninitialized box beforehand, which gives the optimizer a chance to create the clone directly in the heap. For `T: Copy`, we can go further and do a simple memory copy, regardless of optimization level. The same applies to `Rc`/`Arc::make_mut` when they must clone the data.
This commit is contained in:
commit
116d1a7056
@ -397,3 +397,26 @@ pub mod __alloc_error_handler {
|
||||
unsafe { oom_impl(layout) }
|
||||
}
|
||||
}
|
||||
|
||||
/// Specialize clones into pre-allocated, uninitialized memory.
|
||||
/// Used by `Box::clone` and `Rc`/`Arc::make_mut`.
|
||||
pub(crate) trait WriteCloneIntoRaw: Sized {
|
||||
unsafe fn write_clone_into_raw(&self, target: *mut Self);
|
||||
}
|
||||
|
||||
impl<T: Clone> WriteCloneIntoRaw for T {
|
||||
#[inline]
|
||||
default unsafe fn write_clone_into_raw(&self, target: *mut Self) {
|
||||
// Having allocated *first* may allow the optimizer to create
|
||||
// the cloned value in-place, skipping the local and move.
|
||||
unsafe { target.write(self.clone()) };
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Copy> WriteCloneIntoRaw for T {
|
||||
#[inline]
|
||||
unsafe fn write_clone_into_raw(&self, target: *mut Self) {
|
||||
// We can always copy in-place, without ever involving a local value.
|
||||
unsafe { target.copy_from_nonoverlapping(self, 1) };
|
||||
}
|
||||
}
|
||||
|
@ -151,7 +151,7 @@ use core::pin::Pin;
|
||||
use core::ptr::{self, Unique};
|
||||
use core::task::{Context, Poll};
|
||||
|
||||
use crate::alloc::{handle_alloc_error, AllocError, Allocator, Global, Layout};
|
||||
use crate::alloc::{handle_alloc_error, AllocError, Allocator, Global, Layout, WriteCloneIntoRaw};
|
||||
use crate::borrow::Cow;
|
||||
use crate::raw_vec::RawVec;
|
||||
use crate::str::from_boxed_utf8_unchecked;
|
||||
@ -1014,10 +1014,14 @@ impl<T: Clone, A: Allocator + Clone> Clone for Box<T, A> {
|
||||
/// // But they are unique objects
|
||||
/// assert_ne!(&*x as *const i32, &*y as *const i32);
|
||||
/// ```
|
||||
#[rustfmt::skip]
|
||||
#[inline]
|
||||
fn clone(&self) -> Self {
|
||||
Self::new_in((**self).clone(), self.1.clone())
|
||||
// Pre-allocate memory to allow writing the cloned value directly.
|
||||
let mut boxed = Self::new_uninit_in(self.1.clone());
|
||||
unsafe {
|
||||
(**self).write_clone_into_raw(boxed.as_mut_ptr());
|
||||
boxed.assume_init()
|
||||
}
|
||||
}
|
||||
|
||||
/// Copies `source`'s contents into `self` without creating a new allocation.
|
||||
|
@ -263,7 +263,9 @@ use core::pin::Pin;
|
||||
use core::ptr::{self, NonNull};
|
||||
use core::slice::from_raw_parts_mut;
|
||||
|
||||
use crate::alloc::{box_free, handle_alloc_error, AllocError, Allocator, Global, Layout};
|
||||
use crate::alloc::{
|
||||
box_free, handle_alloc_error, AllocError, Allocator, Global, Layout, WriteCloneIntoRaw,
|
||||
};
|
||||
use crate::borrow::{Cow, ToOwned};
|
||||
use crate::string::String;
|
||||
use crate::vec::Vec;
|
||||
@ -1037,18 +1039,26 @@ impl<T: Clone> Rc<T> {
|
||||
#[stable(feature = "rc_unique", since = "1.4.0")]
|
||||
pub fn make_mut(this: &mut Self) -> &mut T {
|
||||
if Rc::strong_count(this) != 1 {
|
||||
// Gotta clone the data, there are other Rcs
|
||||
*this = Rc::new((**this).clone())
|
||||
// Gotta clone the data, there are other Rcs.
|
||||
// Pre-allocate memory to allow writing the cloned value directly.
|
||||
let mut rc = Self::new_uninit();
|
||||
unsafe {
|
||||
let data = Rc::get_mut_unchecked(&mut rc);
|
||||
(**this).write_clone_into_raw(data.as_mut_ptr());
|
||||
*this = rc.assume_init();
|
||||
}
|
||||
} else if Rc::weak_count(this) != 0 {
|
||||
// Can just steal the data, all that's left is Weaks
|
||||
let mut rc = Self::new_uninit();
|
||||
unsafe {
|
||||
let mut swap = Rc::new(ptr::read(&this.ptr.as_ref().value));
|
||||
mem::swap(this, &mut swap);
|
||||
swap.inner().dec_strong();
|
||||
let data = Rc::get_mut_unchecked(&mut rc);
|
||||
data.as_mut_ptr().copy_from_nonoverlapping(&**this, 1);
|
||||
|
||||
this.inner().dec_strong();
|
||||
// Remove implicit strong-weak ref (no need to craft a fake
|
||||
// Weak here -- we know other Weaks can clean up for us)
|
||||
swap.inner().dec_weak();
|
||||
forget(swap);
|
||||
this.inner().dec_weak();
|
||||
ptr::write(this, rc.assume_init());
|
||||
}
|
||||
}
|
||||
// This unsafety is ok because we're guaranteed that the pointer
|
||||
|
@ -22,7 +22,9 @@ use core::slice::from_raw_parts_mut;
|
||||
use core::sync::atomic;
|
||||
use core::sync::atomic::Ordering::{Acquire, Relaxed, Release, SeqCst};
|
||||
|
||||
use crate::alloc::{box_free, handle_alloc_error, AllocError, Allocator, Global, Layout};
|
||||
use crate::alloc::{
|
||||
box_free, handle_alloc_error, AllocError, Allocator, Global, Layout, WriteCloneIntoRaw,
|
||||
};
|
||||
use crate::borrow::{Cow, ToOwned};
|
||||
use crate::boxed::Box;
|
||||
use crate::rc::is_dangling;
|
||||
@ -1369,8 +1371,14 @@ impl<T: Clone> Arc<T> {
|
||||
// weak count, there's no chance the ArcInner itself could be
|
||||
// deallocated.
|
||||
if this.inner().strong.compare_exchange(1, 0, Acquire, Relaxed).is_err() {
|
||||
// Another strong pointer exists; clone
|
||||
*this = Arc::new((**this).clone());
|
||||
// Another strong pointer exists, so we must clone.
|
||||
// Pre-allocate memory to allow writing the cloned value directly.
|
||||
let mut arc = Self::new_uninit();
|
||||
unsafe {
|
||||
let data = Arc::get_mut_unchecked(&mut arc);
|
||||
(**this).write_clone_into_raw(data.as_mut_ptr());
|
||||
*this = arc.assume_init();
|
||||
}
|
||||
} else if this.inner().weak.load(Relaxed) != 1 {
|
||||
// Relaxed suffices in the above because this is fundamentally an
|
||||
// optimization: we are always racing with weak pointers being
|
||||
@ -1386,17 +1394,14 @@ impl<T: Clone> Arc<T> {
|
||||
|
||||
// Materialize our own implicit weak pointer, so that it can clean
|
||||
// up the ArcInner as needed.
|
||||
let weak = Weak { ptr: this.ptr };
|
||||
let _weak = Weak { ptr: this.ptr };
|
||||
|
||||
// mark the data itself as already deallocated
|
||||
// Can just steal the data, all that's left is Weaks
|
||||
let mut arc = Self::new_uninit();
|
||||
unsafe {
|
||||
// there is no data race in the implicit write caused by `read`
|
||||
// here (due to zeroing) because data is no longer accessed by
|
||||
// other threads (due to there being no more strong refs at this
|
||||
// point).
|
||||
let mut swap = Arc::new(ptr::read(&weak.ptr.as_ref().data));
|
||||
mem::swap(this, &mut swap);
|
||||
mem::forget(swap);
|
||||
let data = Arc::get_mut_unchecked(&mut arc);
|
||||
data.as_mut_ptr().copy_from_nonoverlapping(&**this, 1);
|
||||
ptr::write(this, arc.assume_init());
|
||||
}
|
||||
} else {
|
||||
// We were the sole reference of either kind; bump back up the
|
||||
|
Loading…
Reference in New Issue
Block a user