Auto merge of #50340 - Emerentius:master, r=alexcrichton
optimize joining for slices This improves the speed of string joining up to 3x. It removes the boolean flag check every iteration, eliminates repeated bounds checks and adds a fast paths for small separators up to a len of 4 bytes These optimizations gave me ~10%, ~50% and ~80% improvements respectively over the previous speed. Those are multiplicative. 3x improvement happens for the optimal case of joining many small strings together in my microbenchmarks. Improvements flatten out for larger strings of course as more time is spent copying bits around. I've run a few benchmarks [with this code](https://github.com/Emerentius/join_bench). They are pretty noise despite high iteration counts, but in total one can see the trends. ``` len_separator len_string n_strings speedup 4 10 10 2.38 4 10 100 3.41 4 10 1000 3.43 4 10 10000 3.25 4 100 10 2.23 4 100 100 2.73 4 100 1000 1.33 4 100 10000 1.14 4 1000 10 1.33 4 1000 100 1.15 4 1000 1000 1.08 4 1000 10000 1.04 10 10 10 1.61 10 10 100 1.74 10 10 1000 1.77 10 10 10000 1.75 10 100 10 1.58 10 100 100 1.65 10 100 1000 1.24 10 100 10000 1.12 10 1000 10 1.23 10 1000 100 1.11 10 1000 1000 1.05 10 1000 10000 0.997 100 10 10 1.66 100 10 100 1.78 100 10 1000 1.28 100 10 10000 1.16 100 100 10 1.37 100 100 100 1.26 100 100 1000 1.09 100 100 10000 1.0 100 1000 10 1.19 100 1000 100 1.12 100 1000 1000 1.05 100 1000 10000 1.12 ``` The string joining with small or empty separators is now ~50% faster than the old concatenation (small strings). The same approach can also improve the performance of joining into vectors. If this approach is acceptable, I can apply it for concatenation and for vectors as well. Alternatively, concat could just call `.join("")`.
This commit is contained in:
commit
747e655010
@ -566,15 +566,17 @@ impl<T: Clone, V: Borrow<[T]>> SliceConcatExt<T> for [V] {
|
||||
}
|
||||
|
||||
fn join(&self, sep: &T) -> Vec<T> {
|
||||
let mut iter = self.iter();
|
||||
let first = match iter.next() {
|
||||
Some(first) => first,
|
||||
None => return vec![],
|
||||
};
|
||||
let size = self.iter().fold(0, |acc, v| acc + v.borrow().len());
|
||||
let mut result = Vec::with_capacity(size + self.len());
|
||||
let mut first = true;
|
||||
for v in self {
|
||||
if first {
|
||||
first = false
|
||||
} else {
|
||||
result.push(sep.clone())
|
||||
}
|
||||
result.extend_from_slice(first.borrow());
|
||||
|
||||
for v in iter {
|
||||
result.push(sep.clone());
|
||||
result.extend_from_slice(v.borrow())
|
||||
}
|
||||
result
|
||||
|
@ -86,47 +86,13 @@ impl<S: Borrow<str>> SliceConcatExt<str> for [S] {
|
||||
type Output = String;
|
||||
|
||||
fn concat(&self) -> String {
|
||||
if self.is_empty() {
|
||||
return String::new();
|
||||
}
|
||||
|
||||
// `len` calculation may overflow but push_str will check boundaries
|
||||
let len = self.iter().map(|s| s.borrow().len()).sum();
|
||||
let mut result = String::with_capacity(len);
|
||||
|
||||
for s in self {
|
||||
result.push_str(s.borrow())
|
||||
}
|
||||
|
||||
result
|
||||
self.join("")
|
||||
}
|
||||
|
||||
fn join(&self, sep: &str) -> String {
|
||||
if self.is_empty() {
|
||||
return String::new();
|
||||
unsafe {
|
||||
String::from_utf8_unchecked( join_generic_copy(self, sep.as_bytes()) )
|
||||
}
|
||||
|
||||
// concat is faster
|
||||
if sep.is_empty() {
|
||||
return self.concat();
|
||||
}
|
||||
|
||||
// this is wrong without the guarantee that `self` is non-empty
|
||||
// `len` calculation may overflow but push_str but will check boundaries
|
||||
let len = sep.len() * (self.len() - 1) +
|
||||
self.iter().map(|s| s.borrow().len()).sum::<usize>();
|
||||
let mut result = String::with_capacity(len);
|
||||
let mut first = true;
|
||||
|
||||
for s in self {
|
||||
if first {
|
||||
first = false;
|
||||
} else {
|
||||
result.push_str(sep);
|
||||
}
|
||||
result.push_str(s.borrow());
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
fn connect(&self, sep: &str) -> String {
|
||||
@ -134,6 +100,96 @@ impl<S: Borrow<str>> SliceConcatExt<str> for [S] {
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! spezialize_for_lengths {
|
||||
($separator:expr, $target:expr, $iter:expr; $($num:expr),*) => {
|
||||
let mut target = $target;
|
||||
let iter = $iter;
|
||||
let sep_bytes = $separator;
|
||||
match $separator.len() {
|
||||
$(
|
||||
// loops with hardcoded sizes run much faster
|
||||
// specialize the cases with small separator lengths
|
||||
$num => {
|
||||
for s in iter {
|
||||
copy_slice_and_advance!(target, sep_bytes);
|
||||
copy_slice_and_advance!(target, s.borrow().as_ref());
|
||||
}
|
||||
},
|
||||
)*
|
||||
_ => {
|
||||
// arbitrary non-zero size fallback
|
||||
for s in iter {
|
||||
copy_slice_and_advance!(target, sep_bytes);
|
||||
copy_slice_and_advance!(target, s.borrow().as_ref());
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! copy_slice_and_advance {
|
||||
($target:expr, $bytes:expr) => {
|
||||
let len = $bytes.len();
|
||||
let (head, tail) = {$target}.split_at_mut(len);
|
||||
head.copy_from_slice($bytes);
|
||||
$target = tail;
|
||||
}
|
||||
}
|
||||
|
||||
// Optimized join implementation that works for both Vec<T> (T: Copy) and String's inner vec
|
||||
// Currently (2018-05-13) there is a bug with type inference and specialization (see issue #36262)
|
||||
// For this reason SliceConcatExt<T> is not specialized for T: Copy and SliceConcatExt<str> is the
|
||||
// only user of this function. It is left in place for the time when that is fixed.
|
||||
//
|
||||
// the bounds for String-join are S: Borrow<str> and for Vec-join Borrow<[T]>
|
||||
// [T] and str both impl AsRef<[T]> for some T
|
||||
// => s.borrow().as_ref() and we always have slices
|
||||
fn join_generic_copy<B, T, S>(slice: &[S], sep: &[T]) -> Vec<T>
|
||||
where
|
||||
T: Copy,
|
||||
B: AsRef<[T]> + ?Sized,
|
||||
S: Borrow<B>,
|
||||
{
|
||||
let sep_len = sep.len();
|
||||
let mut iter = slice.iter();
|
||||
|
||||
// the first slice is the only one without a separator preceding it
|
||||
let first = match iter.next() {
|
||||
Some(first) => first,
|
||||
None => return vec![],
|
||||
};
|
||||
|
||||
// compute the exact total length of the joined Vec
|
||||
// if the `len` calculation overflows, we'll panic
|
||||
// we would have run out of memory anyway and the rest of the function requires
|
||||
// the entire Vec pre-allocated for safety
|
||||
let len = sep_len.checked_mul(iter.len()).and_then(|n| {
|
||||
slice.iter()
|
||||
.map(|s| s.borrow().as_ref().len())
|
||||
.try_fold(n, usize::checked_add)
|
||||
}).expect("attempt to join into collection with len > usize::MAX");
|
||||
|
||||
// crucial for safety
|
||||
let mut result = Vec::with_capacity(len);
|
||||
assert!(result.capacity() >= len);
|
||||
|
||||
result.extend_from_slice(first.borrow().as_ref());
|
||||
|
||||
unsafe {
|
||||
{
|
||||
let pos = result.len();
|
||||
let target = result.get_unchecked_mut(pos..len);
|
||||
|
||||
// copy separator and slices over without bounds checks
|
||||
// generate loops with hardcoded offsets for small separators
|
||||
// massive improvements possible (~ x2)
|
||||
spezialize_for_lengths!(sep, target, iter; 0, 1, 2, 3, 4);
|
||||
}
|
||||
result.set_len(len);
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
impl Borrow<str> for String {
|
||||
#[inline]
|
||||
|
@ -609,6 +609,15 @@ fn test_join() {
|
||||
assert_eq!(v.join(&0), [1, 0, 2, 0, 3]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_join_nocopy() {
|
||||
let v: [String; 0] = [];
|
||||
assert_eq!(v.join(","), "");
|
||||
assert_eq!(["a".to_string(), "ab".into()].join(","), "a,ab");
|
||||
assert_eq!(["a".to_string(), "ab".into(), "abc".into()].join(","), "a,ab,abc");
|
||||
assert_eq!(["a".to_string(), "ab".into(), "".into()].join(","), "a,ab,");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_insert() {
|
||||
let mut a = vec![1, 2, 4];
|
||||
|
@ -162,6 +162,19 @@ fn test_join_for_different_lengths() {
|
||||
test_join!("-a-bc", ["", "a", "bc"], "-");
|
||||
}
|
||||
|
||||
// join has fast paths for small separators up to 4 bytes
|
||||
// this tests the slow paths.
|
||||
#[test]
|
||||
fn test_join_for_different_lengths_with_long_separator() {
|
||||
assert_eq!("~~~~~".len(), 15);
|
||||
|
||||
let empty: &[&str] = &[];
|
||||
test_join!("", empty, "~~~~~");
|
||||
test_join!("a", ["a"], "~~~~~");
|
||||
test_join!("a~~~~~b", ["a", "b"], "~~~~~");
|
||||
test_join!("~~~~~a~~~~~bc", ["", "a", "bc"], "~~~~~");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unsafe_slice() {
|
||||
assert_eq!("ab", unsafe {"abc".slice_unchecked(0, 2)});
|
||||
|
Loading…
Reference in New Issue
Block a user