ToBase64 and ToHex perf improvements
The overhead of str::push_char is high enough to cripple the performance of these two functions. I've switched them to build the output in a ~[u8] and then convert to a string later. Since we know exactly the bytes going into the vector, we can use the unsafe version to avoid the is_utf8 check. I could have riced it further with vec::raw::get, but it only added ~10MB/s so I didn't think it was worth it. ToHex is still ~30% slower than FromHex, which is puzzling. Before: ``` test base64::test::from_base64 ... bench: 1000 ns/iter (+/- 349) = 204 MB/s test base64::test::to_base64 ... bench: 2390 ns/iter (+/- 1130) = 63 MB/s ... test hex::tests::bench_from_hex ... bench: 884 ns/iter (+/- 220) = 341 MB/s test hex::tests::bench_to_hex ... bench: 2453 ns/iter (+/- 919) = 61 MB/s ``` After: ``` test base64::test::from_base64 ... bench: 1271 ns/iter (+/- 600) = 160 MB/s test base64::test::to_base64 ... bench: 759 ns/iter (+/- 286) = 198 MB/s ... test hex::tests::bench_from_hex ... bench: 875 ns/iter (+/- 377) = 345 MB/s test hex::tests::bench_to_hex ... bench: 593 ns/iter (+/- 240) = 254 MB/s ```
This commit is contained in:
parent
463e2416e9
commit
ff5fdffc13
@ -9,6 +9,7 @@
|
||||
// except according to those terms.
|
||||
|
||||
//! Base64 binary-to-text encoding
|
||||
use std::str;
|
||||
|
||||
/// Available encoding character sets
|
||||
pub enum CharacterSet {
|
||||
@ -40,21 +41,13 @@ pub static URL_SAFE: Config =
|
||||
pub static MIME: Config =
|
||||
Config {char_set: Standard, pad: true, line_length: Some(76)};
|
||||
|
||||
static STANDARD_CHARS: [char, ..64] = [
|
||||
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
|
||||
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
|
||||
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
|
||||
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
|
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
|
||||
];
|
||||
static STANDARD_CHARS: &'static[u8] = bytes!("ABCDEFGHIJKLMNOPQRSTUVWXYZ",
|
||||
"abcdefghijklmnopqrstuvwxyz",
|
||||
"0123456789+/");
|
||||
|
||||
static URLSAFE_CHARS: [char, ..64] = [
|
||||
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
|
||||
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
|
||||
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
|
||||
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
|
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_'
|
||||
];
|
||||
static URLSAFE_CHARS: &'static[u8] = bytes!("ABCDEFGHIJKLMNOPQRSTUVWXYZ",
|
||||
"abcdefghijklmnopqrstuvwxyz",
|
||||
"0123456789-_");
|
||||
|
||||
/// A trait for converting a value to base64 encoding.
|
||||
pub trait ToBase64 {
|
||||
@ -80,12 +73,12 @@ impl<'self> ToBase64 for &'self [u8] {
|
||||
* ~~~
|
||||
*/
|
||||
fn to_base64(&self, config: Config) -> ~str {
|
||||
let chars = match config.char_set {
|
||||
let bytes = match config.char_set {
|
||||
Standard => STANDARD_CHARS,
|
||||
UrlSafe => URLSAFE_CHARS
|
||||
};
|
||||
|
||||
let mut s = ~"";
|
||||
let mut v: ~[u8] = ~[];
|
||||
let mut i = 0;
|
||||
let mut cur_length = 0;
|
||||
let len = self.len();
|
||||
@ -93,7 +86,8 @@ impl<'self> ToBase64 for &'self [u8] {
|
||||
match config.line_length {
|
||||
Some(line_length) =>
|
||||
if cur_length >= line_length {
|
||||
s.push_str("\r\n");
|
||||
v.push('\r' as u8);
|
||||
v.push('\n' as u8);
|
||||
cur_length = 0;
|
||||
},
|
||||
None => ()
|
||||
@ -104,10 +98,10 @@ impl<'self> ToBase64 for &'self [u8] {
|
||||
(self[i + 2] as u32);
|
||||
|
||||
// This 24-bit number gets separated into four 6-bit numbers.
|
||||
s.push_char(chars[(n >> 18) & 63]);
|
||||
s.push_char(chars[(n >> 12) & 63]);
|
||||
s.push_char(chars[(n >> 6 ) & 63]);
|
||||
s.push_char(chars[n & 63]);
|
||||
v.push(bytes[(n >> 18) & 63]);
|
||||
v.push(bytes[(n >> 12) & 63]);
|
||||
v.push(bytes[(n >> 6 ) & 63]);
|
||||
v.push(bytes[n & 63]);
|
||||
|
||||
cur_length += 4;
|
||||
i += 3;
|
||||
@ -117,7 +111,8 @@ impl<'self> ToBase64 for &'self [u8] {
|
||||
match config.line_length {
|
||||
Some(line_length) =>
|
||||
if cur_length >= line_length {
|
||||
s.push_str("\r\n");
|
||||
v.push('\r' as u8);
|
||||
v.push('\n' as u8);
|
||||
},
|
||||
None => ()
|
||||
}
|
||||
@ -129,25 +124,29 @@ impl<'self> ToBase64 for &'self [u8] {
|
||||
0 => (),
|
||||
1 => {
|
||||
let n = (self[i] as u32) << 16;
|
||||
s.push_char(chars[(n >> 18) & 63]);
|
||||
s.push_char(chars[(n >> 12) & 63]);
|
||||
v.push(bytes[(n >> 18) & 63]);
|
||||
v.push(bytes[(n >> 12) & 63]);
|
||||
if config.pad {
|
||||
s.push_str("==");
|
||||
v.push('=' as u8);
|
||||
v.push('=' as u8);
|
||||
}
|
||||
}
|
||||
2 => {
|
||||
let n = (self[i] as u32) << 16 |
|
||||
(self[i + 1u] as u32) << 8;
|
||||
s.push_char(chars[(n >> 18) & 63]);
|
||||
s.push_char(chars[(n >> 12) & 63]);
|
||||
s.push_char(chars[(n >> 6 ) & 63]);
|
||||
v.push(bytes[(n >> 18) & 63]);
|
||||
v.push(bytes[(n >> 12) & 63]);
|
||||
v.push(bytes[(n >> 6 ) & 63]);
|
||||
if config.pad {
|
||||
s.push_char('=');
|
||||
v.push('=' as u8);
|
||||
}
|
||||
}
|
||||
_ => fail!("Algebra is broken, please alert the math police")
|
||||
}
|
||||
s
|
||||
|
||||
unsafe {
|
||||
str::raw::from_bytes_owned(v)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -19,8 +19,7 @@ pub trait ToHex {
|
||||
fn to_hex(&self) -> ~str;
|
||||
}
|
||||
|
||||
static CHARS: [char, ..16] = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
|
||||
'a', 'b', 'c', 'd', 'e', 'f'];
|
||||
static CHARS: &'static[u8] = bytes!("0123456789abcdef");
|
||||
|
||||
impl<'self> ToHex for &'self [u8] {
|
||||
/**
|
||||
@ -39,13 +38,16 @@ impl<'self> ToHex for &'self [u8] {
|
||||
* ~~~
|
||||
*/
|
||||
fn to_hex(&self) -> ~str {
|
||||
let mut s = str::with_capacity(self.len() * 2);
|
||||
// +1 for NULL terminator
|
||||
let mut v = vec::with_capacity(self.len() * 2 + 1);
|
||||
for &byte in self.iter() {
|
||||
s.push_char(CHARS[byte >> 4]);
|
||||
s.push_char(CHARS[byte & 0xf]);
|
||||
v.push(CHARS[byte >> 4]);
|
||||
v.push(CHARS[byte & 0xf]);
|
||||
}
|
||||
|
||||
s
|
||||
unsafe {
|
||||
str::raw::from_bytes_owned(v)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user