diff --git a/src/liballoc/slice.rs b/src/liballoc/slice.rs index 161493f3892..90bc2f9769c 100644 --- a/src/liballoc/slice.rs +++ b/src/liballoc/slice.rs @@ -566,15 +566,17 @@ impl> SliceConcatExt for [V] { } fn join(&self, sep: &T) -> Vec { + let mut iter = self.iter(); + let first = match iter.next() { + Some(first) => first, + None => return vec![], + }; let size = self.iter().fold(0, |acc, v| acc + v.borrow().len()); let mut result = Vec::with_capacity(size + self.len()); - let mut first = true; - for v in self { - if first { - first = false - } else { - result.push(sep.clone()) - } + result.extend_from_slice(first.borrow()); + + for v in iter { + result.push(sep.clone()); result.extend_from_slice(v.borrow()) } result diff --git a/src/liballoc/str.rs b/src/liballoc/str.rs index 823e56b64e3..32ca8d1fa5e 100644 --- a/src/liballoc/str.rs +++ b/src/liballoc/str.rs @@ -86,47 +86,13 @@ impl> SliceConcatExt for [S] { type Output = String; fn concat(&self) -> String { - if self.is_empty() { - return String::new(); - } - - // `len` calculation may overflow but push_str will check boundaries - let len = self.iter().map(|s| s.borrow().len()).sum(); - let mut result = String::with_capacity(len); - - for s in self { - result.push_str(s.borrow()) - } - - result + self.join("") } fn join(&self, sep: &str) -> String { - if self.is_empty() { - return String::new(); + unsafe { + String::from_utf8_unchecked( join_generic_copy(self, sep.as_bytes()) ) } - - // concat is faster - if sep.is_empty() { - return self.concat(); - } - - // this is wrong without the guarantee that `self` is non-empty - // `len` calculation may overflow but push_str but will check boundaries - let len = sep.len() * (self.len() - 1) + - self.iter().map(|s| s.borrow().len()).sum::(); - let mut result = String::with_capacity(len); - let mut first = true; - - for s in self { - if first { - first = false; - } else { - result.push_str(sep); - } - result.push_str(s.borrow()); - } - result } fn connect(&self, sep: &str) -> String { @@ -134,6 +100,96 @@ impl> SliceConcatExt for [S] { } } +macro_rules! spezialize_for_lengths { + ($separator:expr, $target:expr, $iter:expr; $($num:expr),*) => { + let mut target = $target; + let iter = $iter; + let sep_bytes = $separator; + match $separator.len() { + $( + // loops with hardcoded sizes run much faster + // specialize the cases with small separator lengths + $num => { + for s in iter { + copy_slice_and_advance!(target, sep_bytes); + copy_slice_and_advance!(target, s.borrow().as_ref()); + } + }, + )* + _ => { + // arbitrary non-zero size fallback + for s in iter { + copy_slice_and_advance!(target, sep_bytes); + copy_slice_and_advance!(target, s.borrow().as_ref()); + } + } + } + }; +} + +macro_rules! copy_slice_and_advance { + ($target:expr, $bytes:expr) => { + let len = $bytes.len(); + let (head, tail) = {$target}.split_at_mut(len); + head.copy_from_slice($bytes); + $target = tail; + } +} + +// Optimized join implementation that works for both Vec (T: Copy) and String's inner vec +// Currently (2018-05-13) there is a bug with type inference and specialization (see issue #36262) +// For this reason SliceConcatExt is not specialized for T: Copy and SliceConcatExt is the +// only user of this function. It is left in place for the time when that is fixed. +// +// the bounds for String-join are S: Borrow and for Vec-join Borrow<[T]> +// [T] and str both impl AsRef<[T]> for some T +// => s.borrow().as_ref() and we always have slices +fn join_generic_copy(slice: &[S], sep: &[T]) -> Vec +where + T: Copy, + B: AsRef<[T]> + ?Sized, + S: Borrow, +{ + let sep_len = sep.len(); + let mut iter = slice.iter(); + + // the first slice is the only one without a separator preceding it + let first = match iter.next() { + Some(first) => first, + None => return vec![], + }; + + // compute the exact total length of the joined Vec + // if the `len` calculation overflows, we'll panic + // we would have run out of memory anyway and the rest of the function requires + // the entire Vec pre-allocated for safety + let len = sep_len.checked_mul(iter.len()).and_then(|n| { + slice.iter() + .map(|s| s.borrow().as_ref().len()) + .try_fold(n, usize::checked_add) + }).expect("attempt to join into collection with len > usize::MAX"); + + // crucial for safety + let mut result = Vec::with_capacity(len); + assert!(result.capacity() >= len); + + result.extend_from_slice(first.borrow().as_ref()); + + unsafe { + { + let pos = result.len(); + let target = result.get_unchecked_mut(pos..len); + + // copy separator and slices over without bounds checks + // generate loops with hardcoded offsets for small separators + // massive improvements possible (~ x2) + spezialize_for_lengths!(sep, target, iter; 0, 1, 2, 3, 4); + } + result.set_len(len); + } + result +} + #[stable(feature = "rust1", since = "1.0.0")] impl Borrow for String { #[inline] diff --git a/src/liballoc/tests/slice.rs b/src/liballoc/tests/slice.rs index 6fd0b33f02a..3b7eec38609 100644 --- a/src/liballoc/tests/slice.rs +++ b/src/liballoc/tests/slice.rs @@ -609,6 +609,15 @@ fn test_join() { assert_eq!(v.join(&0), [1, 0, 2, 0, 3]); } +#[test] +fn test_join_nocopy() { + let v: [String; 0] = []; + assert_eq!(v.join(","), ""); + assert_eq!(["a".to_string(), "ab".into()].join(","), "a,ab"); + assert_eq!(["a".to_string(), "ab".into(), "abc".into()].join(","), "a,ab,abc"); + assert_eq!(["a".to_string(), "ab".into(), "".into()].join(","), "a,ab,"); +} + #[test] fn test_insert() { let mut a = vec![1, 2, 4]; diff --git a/src/liballoc/tests/str.rs b/src/liballoc/tests/str.rs index d11bf5dc3e9..03d295d16e6 100644 --- a/src/liballoc/tests/str.rs +++ b/src/liballoc/tests/str.rs @@ -162,6 +162,19 @@ fn test_join_for_different_lengths() { test_join!("-a-bc", ["", "a", "bc"], "-"); } +// join has fast paths for small separators up to 4 bytes +// this tests the slow paths. +#[test] +fn test_join_for_different_lengths_with_long_separator() { + assert_eq!("~~~~~".len(), 15); + + let empty: &[&str] = &[]; + test_join!("", empty, "~~~~~"); + test_join!("a", ["a"], "~~~~~"); + test_join!("a~~~~~b", ["a", "b"], "~~~~~"); + test_join!("~~~~~a~~~~~bc", ["", "a", "bc"], "~~~~~"); +} + #[test] fn test_unsafe_slice() { assert_eq!("ab", unsafe {"abc".slice_unchecked(0, 2)});