rollup merge of #20079: SimonSapin/string_push_ascii_fast_path

`String::push(&mut self, ch: char)` currently has a single code path that calls `Char::encode_utf8`. This adds a fast path for ASCII `char`s, which are represented as a single byte in UTF-8.

Benchmarks of stage1 libcollections at the intermediate commit show that the fast path very significantly improves the performance of repeatedly pushing an ASCII `char`, but does not significantly affect the performance for a non-ASCII `char` (where the fast path is not taken).

```
bench_push_char_one_byte                  59552 ns/iter (+/- 2132) = 167 MB/s
bench_push_char_one_byte_with_fast_path    6563 ns/iter (+/- 658) = 1523 MB/s
bench_push_char_two_bytes                 71520 ns/iter (+/- 3541) = 279 MB/s
bench_push_char_two_bytes_with_slow_path  71452 ns/iter (+/- 4202) = 279 MB/s
bench_push_str_one_byte                   38910 ns/iter (+/- 2477) = 257 MB/s
```

A benchmark of pushing a one-byte-long `&str` is added for comparison, but its performance [has varied a lot lately](https://github.com/rust-lang/rust/pull/19640#issuecomment-67741561). (When the input is fixed, `s.push_str("x")` could be used just as well as `s.push('x')`.)
This commit is contained in:
Alex Crichton 2014-12-21 00:04:26 -08:00
commit 3e5257f68f

View File

@ -513,6 +513,11 @@ impl String {
#[inline]
#[stable = "function just renamed from push_char"]
pub fn push(&mut self, ch: char) {
if (ch as u32) < 0x80 {
self.vec.push(ch as u8);
return;
}
let cur_len = self.len();
// This may use up to 4 bytes.
self.vec.reserve(4);
@ -1401,6 +1406,41 @@ mod tests {
});
}
const REPETITIONS: u64 = 10_000;
#[bench]
fn bench_push_str_one_byte(b: &mut Bencher) {
b.bytes = REPETITIONS;
b.iter(|| {
let mut r = String::new();
for _ in range(0, REPETITIONS) {
r.push_str("a")
}
});
}
#[bench]
fn bench_push_char_one_byte(b: &mut Bencher) {
b.bytes = REPETITIONS;
b.iter(|| {
let mut r = String::new();
for _ in range(0, REPETITIONS) {
r.push('a')
}
});
}
#[bench]
fn bench_push_char_two_bytes(b: &mut Bencher) {
b.bytes = REPETITIONS * 2;
b.iter(|| {
let mut r = String::new();
for _ in range(0, REPETITIONS) {
r.push('â')
}
});
}
#[bench]
fn from_utf8_lossy_100_ascii(b: &mut Bencher) {
let s = b"Hello there, the quick brown fox jumped over the lazy dog! \