Auto merge of #50340 - Emerentius:master, r=alexcrichton

optimize joining for slices

This improves the speed of string joining up to 3x.
It removes the boolean flag check every iteration, eliminates repeated bounds checks and adds a fast paths for small separators up to a len of 4 bytes
These optimizations gave me ~10%, ~50% and ~80% improvements respectively over the previous speed. Those are multiplicative.

3x improvement happens for the optimal case of joining many small strings together in my microbenchmarks. Improvements flatten out for larger strings of course as more time is spent copying bits around. I've run a few benchmarks [with this code](https://github.com/Emerentius/join_bench). They are pretty noise despite high iteration counts, but in total one can see the trends.

```
len_separator  len_string   n_strings     speedup
           4          10          10        2.38
           4          10         100        3.41
           4          10        1000        3.43
           4          10       10000        3.25
           4         100          10        2.23
           4         100         100        2.73
           4         100        1000        1.33
           4         100       10000        1.14
           4        1000          10        1.33
           4        1000         100        1.15
           4        1000        1000        1.08
           4        1000       10000        1.04
          10          10          10        1.61
          10          10         100        1.74
          10          10        1000        1.77
          10          10       10000        1.75
          10         100          10        1.58
          10         100         100        1.65
          10         100        1000        1.24
          10         100       10000        1.12
          10        1000          10        1.23
          10        1000         100        1.11
          10        1000        1000        1.05
          10        1000       10000       0.997
         100          10          10        1.66
         100          10         100        1.78
         100          10        1000        1.28
         100          10       10000        1.16
         100         100          10        1.37
         100         100         100        1.26
         100         100        1000        1.09
         100         100       10000         1.0
         100        1000          10        1.19
         100        1000         100        1.12
         100        1000        1000        1.05
         100        1000       10000        1.12
```

The string joining with small or empty separators is now ~50% faster than the old concatenation (small strings). The same approach can also improve the performance of joining into vectors.

If this approach is acceptable, I can apply it for concatenation and for vectors as well. Alternatively, concat could just call `.join("")`.
This commit is contained in:
bors 2018-06-01 16:16:30 +00:00
commit 747e655010
4 changed files with 124 additions and 44 deletions

View File

@ -566,15 +566,17 @@ impl<T: Clone, V: Borrow<[T]>> SliceConcatExt<T> for [V] {
}
fn join(&self, sep: &T) -> Vec<T> {
let mut iter = self.iter();
let first = match iter.next() {
Some(first) => first,
None => return vec![],
};
let size = self.iter().fold(0, |acc, v| acc + v.borrow().len());
let mut result = Vec::with_capacity(size + self.len());
let mut first = true;
for v in self {
if first {
first = false
} else {
result.push(sep.clone())
}
result.extend_from_slice(first.borrow());
for v in iter {
result.push(sep.clone());
result.extend_from_slice(v.borrow())
}
result

View File

@ -86,47 +86,13 @@ impl<S: Borrow<str>> SliceConcatExt<str> for [S] {
type Output = String;
fn concat(&self) -> String {
if self.is_empty() {
return String::new();
}
// `len` calculation may overflow but push_str will check boundaries
let len = self.iter().map(|s| s.borrow().len()).sum();
let mut result = String::with_capacity(len);
for s in self {
result.push_str(s.borrow())
}
result
self.join("")
}
fn join(&self, sep: &str) -> String {
if self.is_empty() {
return String::new();
unsafe {
String::from_utf8_unchecked( join_generic_copy(self, sep.as_bytes()) )
}
// concat is faster
if sep.is_empty() {
return self.concat();
}
// this is wrong without the guarantee that `self` is non-empty
// `len` calculation may overflow but push_str but will check boundaries
let len = sep.len() * (self.len() - 1) +
self.iter().map(|s| s.borrow().len()).sum::<usize>();
let mut result = String::with_capacity(len);
let mut first = true;
for s in self {
if first {
first = false;
} else {
result.push_str(sep);
}
result.push_str(s.borrow());
}
result
}
fn connect(&self, sep: &str) -> String {
@ -134,6 +100,96 @@ impl<S: Borrow<str>> SliceConcatExt<str> for [S] {
}
}
macro_rules! spezialize_for_lengths {
($separator:expr, $target:expr, $iter:expr; $($num:expr),*) => {
let mut target = $target;
let iter = $iter;
let sep_bytes = $separator;
match $separator.len() {
$(
// loops with hardcoded sizes run much faster
// specialize the cases with small separator lengths
$num => {
for s in iter {
copy_slice_and_advance!(target, sep_bytes);
copy_slice_and_advance!(target, s.borrow().as_ref());
}
},
)*
_ => {
// arbitrary non-zero size fallback
for s in iter {
copy_slice_and_advance!(target, sep_bytes);
copy_slice_and_advance!(target, s.borrow().as_ref());
}
}
}
};
}
macro_rules! copy_slice_and_advance {
($target:expr, $bytes:expr) => {
let len = $bytes.len();
let (head, tail) = {$target}.split_at_mut(len);
head.copy_from_slice($bytes);
$target = tail;
}
}
// Optimized join implementation that works for both Vec<T> (T: Copy) and String's inner vec
// Currently (2018-05-13) there is a bug with type inference and specialization (see issue #36262)
// For this reason SliceConcatExt<T> is not specialized for T: Copy and SliceConcatExt<str> is the
// only user of this function. It is left in place for the time when that is fixed.
//
// the bounds for String-join are S: Borrow<str> and for Vec-join Borrow<[T]>
// [T] and str both impl AsRef<[T]> for some T
// => s.borrow().as_ref() and we always have slices
fn join_generic_copy<B, T, S>(slice: &[S], sep: &[T]) -> Vec<T>
where
T: Copy,
B: AsRef<[T]> + ?Sized,
S: Borrow<B>,
{
let sep_len = sep.len();
let mut iter = slice.iter();
// the first slice is the only one without a separator preceding it
let first = match iter.next() {
Some(first) => first,
None => return vec![],
};
// compute the exact total length of the joined Vec
// if the `len` calculation overflows, we'll panic
// we would have run out of memory anyway and the rest of the function requires
// the entire Vec pre-allocated for safety
let len = sep_len.checked_mul(iter.len()).and_then(|n| {
slice.iter()
.map(|s| s.borrow().as_ref().len())
.try_fold(n, usize::checked_add)
}).expect("attempt to join into collection with len > usize::MAX");
// crucial for safety
let mut result = Vec::with_capacity(len);
assert!(result.capacity() >= len);
result.extend_from_slice(first.borrow().as_ref());
unsafe {
{
let pos = result.len();
let target = result.get_unchecked_mut(pos..len);
// copy separator and slices over without bounds checks
// generate loops with hardcoded offsets for small separators
// massive improvements possible (~ x2)
spezialize_for_lengths!(sep, target, iter; 0, 1, 2, 3, 4);
}
result.set_len(len);
}
result
}
#[stable(feature = "rust1", since = "1.0.0")]
impl Borrow<str> for String {
#[inline]

View File

@ -609,6 +609,15 @@ fn test_join() {
assert_eq!(v.join(&0), [1, 0, 2, 0, 3]);
}
#[test]
fn test_join_nocopy() {
let v: [String; 0] = [];
assert_eq!(v.join(","), "");
assert_eq!(["a".to_string(), "ab".into()].join(","), "a,ab");
assert_eq!(["a".to_string(), "ab".into(), "abc".into()].join(","), "a,ab,abc");
assert_eq!(["a".to_string(), "ab".into(), "".into()].join(","), "a,ab,");
}
#[test]
fn test_insert() {
let mut a = vec![1, 2, 4];

View File

@ -162,6 +162,19 @@ fn test_join_for_different_lengths() {
test_join!("-a-bc", ["", "a", "bc"], "-");
}
// join has fast paths for small separators up to 4 bytes
// this tests the slow paths.
#[test]
fn test_join_for_different_lengths_with_long_separator() {
assert_eq!("".len(), 15);
let empty: &[&str] = &[];
test_join!("", empty, "");
test_join!("a", ["a"], "");
test_join!("ab", ["a", "b"], "");
test_join!("abc", ["", "a", "bc"], "");
}
#[test]
fn test_unsafe_slice() {
assert_eq!("ab", unsafe {"abc".slice_unchecked(0, 2)});