ToBase64 and ToHex perf improvements

The overhead of str::push_char is high enough to cripple the performance
of these two functions. I've switched them to build the output in a
~[u8] and then convert to a string later. Since we know exactly the
bytes going into the vector, we can use the unsafe version to avoid the
is_utf8 check.

I could have riced it further with vec::raw::get, but it only added
~10MB/s so I didn't think it was worth it. ToHex is still ~30% slower
than FromHex, which is puzzling.

Before:

```
test base64::test::from_base64 ... bench: 1000 ns/iter (+/- 349) = 204 MB/s
test base64::test::to_base64 ... bench: 2390 ns/iter (+/- 1130) = 63 MB/s
...
test hex::tests::bench_from_hex ... bench: 884 ns/iter (+/- 220) = 341 MB/s
test hex::tests::bench_to_hex ... bench: 2453 ns/iter (+/- 919) = 61 MB/s
```

After:

```
test base64::test::from_base64 ... bench: 1271 ns/iter (+/- 600) = 160 MB/s
test base64::test::to_base64 ... bench: 759 ns/iter (+/- 286) = 198 MB/s
...
test hex::tests::bench_from_hex ... bench: 875 ns/iter (+/- 377) = 345 MB/s
test hex::tests::bench_to_hex ... bench: 593 ns/iter (+/- 240) = 254 MB/s
```
This commit is contained in:
Steven Fackler 2013-08-04 23:51:26 -04:00
parent 463e2416e9
commit ff5fdffc13
2 changed files with 37 additions and 36 deletions

View File

@ -9,6 +9,7 @@
// except according to those terms.
//! Base64 binary-to-text encoding
use std::str;
/// Available encoding character sets
pub enum CharacterSet {
@ -40,21 +41,13 @@ pub static URL_SAFE: Config =
pub static MIME: Config =
Config {char_set: Standard, pad: true, line_length: Some(76)};
static STANDARD_CHARS: [char, ..64] = [
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
];
static STANDARD_CHARS: &'static[u8] = bytes!("ABCDEFGHIJKLMNOPQRSTUVWXYZ",
"abcdefghijklmnopqrstuvwxyz",
"0123456789+/");
static URLSAFE_CHARS: [char, ..64] = [
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_'
];
static URLSAFE_CHARS: &'static[u8] = bytes!("ABCDEFGHIJKLMNOPQRSTUVWXYZ",
"abcdefghijklmnopqrstuvwxyz",
"0123456789-_");
/// A trait for converting a value to base64 encoding.
pub trait ToBase64 {
@ -80,12 +73,12 @@ impl<'self> ToBase64 for &'self [u8] {
* ~~~
*/
fn to_base64(&self, config: Config) -> ~str {
let chars = match config.char_set {
let bytes = match config.char_set {
Standard => STANDARD_CHARS,
UrlSafe => URLSAFE_CHARS
};
let mut s = ~"";
let mut v: ~[u8] = ~[];
let mut i = 0;
let mut cur_length = 0;
let len = self.len();
@ -93,7 +86,8 @@ impl<'self> ToBase64 for &'self [u8] {
match config.line_length {
Some(line_length) =>
if cur_length >= line_length {
s.push_str("\r\n");
v.push('\r' as u8);
v.push('\n' as u8);
cur_length = 0;
},
None => ()
@ -104,10 +98,10 @@ impl<'self> ToBase64 for &'self [u8] {
(self[i + 2] as u32);
// This 24-bit number gets separated into four 6-bit numbers.
s.push_char(chars[(n >> 18) & 63]);
s.push_char(chars[(n >> 12) & 63]);
s.push_char(chars[(n >> 6 ) & 63]);
s.push_char(chars[n & 63]);
v.push(bytes[(n >> 18) & 63]);
v.push(bytes[(n >> 12) & 63]);
v.push(bytes[(n >> 6 ) & 63]);
v.push(bytes[n & 63]);
cur_length += 4;
i += 3;
@ -117,7 +111,8 @@ impl<'self> ToBase64 for &'self [u8] {
match config.line_length {
Some(line_length) =>
if cur_length >= line_length {
s.push_str("\r\n");
v.push('\r' as u8);
v.push('\n' as u8);
},
None => ()
}
@ -129,25 +124,29 @@ impl<'self> ToBase64 for &'self [u8] {
0 => (),
1 => {
let n = (self[i] as u32) << 16;
s.push_char(chars[(n >> 18) & 63]);
s.push_char(chars[(n >> 12) & 63]);
v.push(bytes[(n >> 18) & 63]);
v.push(bytes[(n >> 12) & 63]);
if config.pad {
s.push_str("==");
v.push('=' as u8);
v.push('=' as u8);
}
}
2 => {
let n = (self[i] as u32) << 16 |
(self[i + 1u] as u32) << 8;
s.push_char(chars[(n >> 18) & 63]);
s.push_char(chars[(n >> 12) & 63]);
s.push_char(chars[(n >> 6 ) & 63]);
v.push(bytes[(n >> 18) & 63]);
v.push(bytes[(n >> 12) & 63]);
v.push(bytes[(n >> 6 ) & 63]);
if config.pad {
s.push_char('=');
v.push('=' as u8);
}
}
_ => fail!("Algebra is broken, please alert the math police")
}
s
unsafe {
str::raw::from_bytes_owned(v)
}
}
}

View File

@ -19,8 +19,7 @@ pub trait ToHex {
fn to_hex(&self) -> ~str;
}
static CHARS: [char, ..16] = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'a', 'b', 'c', 'd', 'e', 'f'];
static CHARS: &'static[u8] = bytes!("0123456789abcdef");
impl<'self> ToHex for &'self [u8] {
/**
@ -39,13 +38,16 @@ impl<'self> ToHex for &'self [u8] {
* ~~~
*/
fn to_hex(&self) -> ~str {
let mut s = str::with_capacity(self.len() * 2);
// +1 for NULL terminator
let mut v = vec::with_capacity(self.len() * 2 + 1);
for &byte in self.iter() {
s.push_char(CHARS[byte >> 4]);
s.push_char(CHARS[byte & 0xf]);
v.push(CHARS[byte >> 4]);
v.push(CHARS[byte & 0xf]);
}
s
unsafe {
str::raw::from_bytes_owned(v)
}
}
}