Change slice::to_vec to not use extend_from_slice

This also required adding a loop guard in case clone panics Add specialization for copy There is a better version for copy, so I've added specialization for that function and hopefully that should speed it up even more. Switch FromIter<slice::Iter> to use `to_vec` Test different unrolling version for to_vec Revert to impl From benchmarking, it appears this version is faster
2020-11-18 22:59:47 +00:00 · 2020-11-18 22:59:47 +00:00 · a9915581d7
commit a9915581d7
parent a1a13b2bc4
3 changed files with 85 additions and 17 deletions
--- a/library/alloc/src/slice.rs
+++ b/library/alloc/src/slice.rs
@ -155,13 +155,65 @@ mod hack {
    }

    #[inline]
-    pub fn to_vec<T, A: AllocRef>(s: &[T], alloc: A) -> Vec<T, A>
-    where
-        T: Clone,
-    {
-        let mut vec = Vec::with_capacity_in(s.len(), alloc);
-        vec.extend_from_slice(s);
-        vec
+    pub fn to_vec<T: ConvertVec, A: AllocRef>(s: &[T], alloc: A) -> Vec<T, A> {
+        T::to_vec(s, alloc)
+    }
+
+    pub trait ConvertVec {
+        fn to_vec<A: AllocRef>(s: &[Self], alloc: A) -> Vec<Self, A>
+        where
+            Self: Sized;
+    }
+
+    impl<T: Clone> ConvertVec for T {
+        #[inline]
+        default fn to_vec<A: AllocRef>(s: &[Self], alloc: A) -> Vec<Self, A> {
+            struct DropGuard<'a, T, A: AllocRef> {
+                vec: &'a mut Vec<T, A>,
+                num_init: usize,
+            }
+            impl<'a, T, A: AllocRef> Drop for DropGuard<'a, T, A> {
+                #[inline]
+                fn drop(&mut self) {
+                    // SAFETY:
+                    // items were marked initialized in the loop below
+                    unsafe {
+                        self.vec.set_len(self.num_init);
+                    }
+                }
+            }
+            let mut vec = Vec::with_capacity_in(s.len(), alloc);
+            let mut guard = DropGuard { vec: &mut vec, num_init: 0 };
+            let slots = guard.vec.spare_capacity_mut();
+            // .take(slots.len()) is necessary for LLVM to remove bounds checks
+            // and has better codegen than zip.
+            for (i, b) in s.iter().enumerate().take(slots.len()) {
+                guard.num_init = i;
+                slots[i].write(b.clone());
+            }
+            core::mem::forget(guard);
+            // SAFETY:
+            // the vec was allocated and initialized above to at least this length.
+            unsafe {
+                vec.set_len(s.len());
+            }
+            vec
+        }
+    }
+
+    impl<T: Copy> ConvertVec for T {
+        #[inline]
+        fn to_vec<A: AllocRef>(s: &[Self], alloc: A) -> Vec<Self, A> {
+            let mut v = Vec::with_capacity_in(s.len(), alloc);
+            // SAFETY:
+            // allocated above with the capacity of `s`, and initialize to `s.len()` in
+            // ptr::copy_to_non_overlapping below.
+            unsafe {
+                s.as_ptr().copy_to_nonoverlapping(v.as_mut_ptr(), s.len());
+                v.set_len(s.len());
+            }
+            v
+        }
    }
 }

--- a/library/alloc/src/vec.rs
+++ b/library/alloc/src/vec.rs
@ -2508,17 +2508,23 @@ where
    }
 }

-impl<'a, T: 'a> SpecFromIter<&'a T, slice::Iter<'a, T>> for Vec<T>
-where
-    T: Copy,
-{
-    // reuses the extend specialization for T: Copy
+// This utilizes `iterator.as_slice().to_vec()` since spec_extend
+// must take more steps to reason about the final capacity + length
+// and thus do more work. `to_vec()` directly allocates the correct amount
+// and fills it exactly.
+impl<'a, T: 'a + Clone> SpecFromIter<&'a T, slice::Iter<'a, T>> for Vec<T> {
+    #[cfg(not(test))]
    fn from_iter(iterator: slice::Iter<'a, T>) -> Self {
-        let mut vec = Vec::new();
-        // must delegate to spec_extend() since extend() itself delegates
-        // to spec_from for empty Vecs
-        vec.spec_extend(iterator);
-        vec
+        iterator.as_slice().to_vec()
+    }
+
+    // HACK(japaric): with cfg(test) the inherent `[T]::to_vec` method, which is
+    // required for this method definition, is not available. Instead use the
+    // `slice::to_vec`  function which is only available with cfg(test)
+    // NB see the slice::hack module in slice.rs for more information
+    #[cfg(test)]
+    fn from_iter(iterator: slice::Iter<'a, T>) -> Self {
+        crate::slice::to_vec(iterator.as_slice(), Global)
    }
 }

--- a/src/test/codegen/to_vec.rs
+++ b/src/test/codegen/to_vec.rs
@ -0,0 +1,10 @@
+// compile-flags: -O
+
+#![crate_type = "lib"]
+
+// CHECK-LABEL: @copy_to_vec
+#[no_mangle]
+fn copy_to_vec(s: &[u64]) -> Vec<u64> {
+  s.to_vec()
+  // CHECK: call void @llvm.memcpy
+}