Added functionality to Base64 package

The Base64 package previously had extremely basic functionality. It only
suported the standard encoding character set, didn't support line breaks
and always padded output. This commit makes it significantly more
powerful.

The FromBase64 impl now supports all of the standard variants of Base64.
It ignores newlines,interprets '-' and '_' as well as '+' and '/' and
doesn't require padding. It isn't incredibly pedantic and will
successfully parse strings that are not strictly valid, but I don't
think the extra complexity required to make it accept _only_ valid
strings is worth it.

The ToBase64 trait has been modified such that to_base64 now takes a
base64::Config struct which contains the output format configuration.
This currently includes the selection of character set (standard or
url safe), whether or not to pad and an optional line break width. The
package comes with three static Config structs for the RFC 4648
standard, RFC 4648 url safe and RFC 2045 MIME formats.

The other option for configuring ToBase64 output would be to have one
method with the configuration flags passed and other traits with default
impls for the common cases, but I think that's a little messier.
This commit is contained in:
Steven Fackler 2013-06-30 18:08:22 -04:00
parent 3c0a1621b5
commit 5a8a30f45b

View File

@ -10,17 +10,30 @@
//! Base64 binary-to-text encoding
use std::vec;
/// A trait for converting a value to base64 encoding.
pub trait ToBase64 {
/// Converts the value of `self` to a base64 value, returning the owned
/// string
fn to_base64(&self) -> ~str;
/// Contains configuration parameters for to_base64
pub struct Config {
/// True to use the url-safe encoding format ('-' and '_'), false to use
/// the standard encoding format ('+' and '/')
pub url_safe: bool,
/// True to pad output with '=' characters
pub pad: bool,
/// Some(len) to wrap lines at len, None to disable line wrapping
pub line_length: Option<uint>
}
static CHARS: [char, ..64] = [
/// Configuration for RFC 4648 standard base64 encoding
pub static standard: Config =
Config {url_safe: false, pad: true, line_length: None};
/// Configuration for RFC 4648 base64url encoding
pub static url_safe: Config =
Config {url_safe: true, pad: false, line_length: None};
/// Configuration for RFC 2045 MIME base64 encoding
pub static mime: Config =
Config {url_safe: false, pad: true, line_length: Some(76)};
static STANDARD_CHARS: [char, ..64] = [
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
@ -28,6 +41,21 @@ static CHARS: [char, ..64] = [
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
];
static URLSAFE_CHARS: [char, ..64] = [
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_'
];
/// A trait for converting a value to base64 encoding.
pub trait ToBase64 {
/// Converts the value of `self` to a base64 value following the specified
/// format configuration, returning the owned string.
fn to_base64(&self, config: Config) -> ~str;
}
impl<'self> ToBase64 for &'self [u8] {
/**
* Turn a vector of `u8` bytes into a base64 string.
@ -35,56 +63,81 @@ impl<'self> ToBase64 for &'self [u8] {
* # Example
*
* ~~~ {.rust}
* extern mod extra;
* use extra::base64::ToBase64;
* use std::base64::{ToBase64, standard};
*
* fn main () {
* let str = [52,32].to_base64();
* let str = [52,32].to_base64(standard);
* println(fmt!("%s", str));
* }
* ~~~
*/
fn to_base64(&self) -> ~str {
fn to_base64(&self, config: Config) -> ~str {
let chars = match config.url_safe {
true => URLSAFE_CHARS,
false => STANDARD_CHARS
};
let mut s = ~"";
let mut i = 0;
let mut cur_length = 0;
let len = self.len();
s.reserve(((len + 3u) / 4u) * 3u);
while i < len - (len % 3) {
match config.line_length {
Some(line_length) =>
if cur_length >= line_length {
s.push_str("\r\n");
cur_length = 0;
},
None => ()
}
let mut i = 0u;
while i < len - (len % 3u) {
let n = (self[i] as uint) << 16u |
(self[i + 1u] as uint) << 8u |
(self[i + 2u] as uint);
let n = (self[i] as u32) << 16 |
(self[i + 1] as u32) << 8 |
(self[i + 2] as u32);
// This 24-bit number gets separated into four 6-bit numbers.
s.push_char(CHARS[(n >> 18u) & 63u]);
s.push_char(CHARS[(n >> 12u) & 63u]);
s.push_char(CHARS[(n >> 6u) & 63u]);
s.push_char(CHARS[n & 63u]);
s.push_char(chars[(n >> 18) & 63]);
s.push_char(chars[(n >> 12) & 63]);
s.push_char(chars[(n >> 6 ) & 63]);
s.push_char(chars[n & 63]);
i += 3u;
cur_length += 4;
i += 3;
}
if len % 3 != 0 {
match config.line_length {
Some(line_length) =>
if cur_length >= line_length {
s.push_str("\r\n");
},
None => ()
}
}
// Heh, would be cool if we knew this was exhaustive
// (the dream of bounded integer types)
match len % 3 {
0 => (),
1 => {
let n = (self[i] as uint) << 16u;
s.push_char(CHARS[(n >> 18u) & 63u]);
s.push_char(CHARS[(n >> 12u) & 63u]);
s.push_char('=');
s.push_char('=');
}
2 => {
let n = (self[i] as uint) << 16u |
(self[i + 1u] as uint) << 8u;
s.push_char(CHARS[(n >> 18u) & 63u]);
s.push_char(CHARS[(n >> 12u) & 63u]);
s.push_char(CHARS[(n >> 6u) & 63u]);
s.push_char('=');
}
_ => fail!("Algebra is broken, please alert the math police")
0 => (),
1 => {
let n = (self[i] as u32) << 16;
s.push_char(chars[(n >> 18) & 63]);
s.push_char(chars[(n >> 12) & 63]);
if config.pad {
s.push_str("==");
}
}
2 => {
let n = (self[i] as u32) << 16 |
(self[i + 1u] as u32) << 8;
s.push_char(chars[(n >> 18) & 63]);
s.push_char(chars[(n >> 12) & 63]);
s.push_char(chars[(n >> 6 ) & 63]);
if config.pad {
s.push_char('=');
}
}
_ => fail!("Algebra is broken, please alert the math police")
}
s
}
@ -98,23 +151,24 @@ impl<'self> ToBase64 for &'self str {
* # Example
*
* ~~~ {.rust}
* extern mod extra;
* use extra::base64::ToBase64;
* use std::base64::{ToBase64, standard};
*
* fn main () {
* let str = "Hello, World".to_base64();
* let str = "Hello, World".to_base64(standard);
* println(fmt!("%s",str));
* }
* ~~~
*
*/
fn to_base64(&self) -> ~str {
self.as_bytes().to_base64()
fn to_base64(&self, config: Config) -> ~str {
self.as_bytes().to_base64(config)
}
}
#[allow(missing_doc)]
/// A trait for converting from base64 encoded values.
pub trait FromBase64 {
/// Converts the value of `self`, interpreted as base64 encoded data, into
/// an owned vector of bytes, returning the vector.
fn from_base64(&self) -> ~[u8];
}
@ -126,12 +180,10 @@ impl<'self> FromBase64 for &'self [u8] {
* # Example
*
* ~~~ {.rust}
* extern mod extra;
* use extra::base64::ToBase64;
* use extra::base64::FromBase64;
* use std::base64::{ToBase64, FromBase64, standard};
*
* fn main () {
* let str = [52,32].to_base64();
* let str = [52,32].to_base64(standard);
* println(fmt!("%s", str));
* let bytes = str.from_base64();
* println(fmt!("%?",bytes));
@ -139,56 +191,52 @@ impl<'self> FromBase64 for &'self [u8] {
* ~~~
*/
fn from_base64(&self) -> ~[u8] {
if self.len() % 4u != 0u { fail!("invalid base64 length"); }
let mut r = ~[];
let mut buf: u32 = 0;
let mut modulus = 0;
let len = self.len();
let mut padding = 0u;
let mut it = self.iter();
for it.advance |&byte| {
let ch = byte as char;
let val = byte as u32;
if len != 0u {
if self[len - 1u] == '=' as u8 { padding += 1u; }
if self[len - 2u] == '=' as u8 { padding += 1u; }
match ch {
'A'..'Z' => buf |= val - 0x41,
'a'..'z' => buf |= val - 0x47,
'0'..'9' => buf |= val + 0x04,
'+'|'-' => buf |= 0x3E,
'/'|'_' => buf |= 0x3F,
'\r'|'\n' => loop,
'=' => break,
_ => fail!("Invalid Base64 character")
}
buf <<= 6;
modulus += 1;
if modulus == 4 {
modulus = 0;
r.push((buf >> 22) as u8);
r.push((buf >> 14) as u8);
r.push((buf >> 6 ) as u8);
}
}
let mut r = vec::with_capacity((len / 4u) * 3u - padding);
let mut i = 0u;
while i < len {
let mut n = 0u;
for 4u.times {
let ch = self[i] as char;
n <<= 6u;
match ch {
'A'..'Z' => n |= (ch as uint) - 0x41,
'a'..'z' => n |= (ch as uint) - 0x47,
'0'..'9' => n |= (ch as uint) + 0x04,
'+' => n |= 0x3E,
'/' => n |= 0x3F,
'=' => {
match len - i {
1u => {
r.push(((n >> 16u) & 0xFFu) as u8);
r.push(((n >> 8u ) & 0xFFu) as u8);
return copy r;
}
2u => {
r.push(((n >> 10u) & 0xFFu) as u8);
return copy r;
}
_ => fail!("invalid base64 padding")
}
}
_ => fail!("invalid base64 character")
}
i += 1u;
};
r.push(((n >> 16u) & 0xFFu) as u8);
r.push(((n >> 8u ) & 0xFFu) as u8);
r.push(((n ) & 0xFFu) as u8);
if !it.all(|&byte| {byte as char == '='}) {
fail!("Invalid Base64 character");
}
match modulus {
2 => {
r.push((buf >> 10) as u8);
}
3 => {
r.push((buf >> 16) as u8);
r.push((buf >> 8 ) as u8);
}
0 => (),
_ => fail!("Invalid Base64 length")
}
r
}
}
@ -199,20 +247,19 @@ impl<'self> FromBase64 for &'self str {
* to the byte values it encodes.
*
* You can use the `from_bytes` function in `std::str`
* to turn a `[u8]` into a string with characters corresponding to those values.
* to turn a `[u8]` into a string with characters corresponding to those
* values.
*
* # Example
*
* This converts a string literal to base64 and back.
*
* ~~~ {.rust}
* extern mod extra;
* use extra::base64::ToBase64;
* use extra::base64::FromBase64;
* use std::base64::{ToBase64, FromBase64, standard};
* use std::str;
*
* fn main () {
* let hello_str = "Hello, World".to_base64();
* let hello_str = "Hello, World".to_base64(standard);
* println(fmt!("%s",hello_str));
* let bytes = hello_str.from_base64();
* println(fmt!("%?",bytes));
@ -226,27 +273,69 @@ impl<'self> FromBase64 for &'self str {
}
}
#[cfg(test)]
mod tests {
#[test]
fn test_to_base64() {
assert_eq!("".to_base64(), ~"");
assert_eq!("f".to_base64(), ~"Zg==");
assert_eq!("fo".to_base64(), ~"Zm8=");
assert_eq!("foo".to_base64(), ~"Zm9v");
assert_eq!("foob".to_base64(), ~"Zm9vYg==");
assert_eq!("fooba".to_base64(), ~"Zm9vYmE=");
assert_eq!("foobar".to_base64(), ~"Zm9vYmFy");
}
#[test]
fn test_to_base64_basic() {
assert_eq!("".to_base64(standard), ~"");
assert_eq!("f".to_base64(standard), ~"Zg==");
assert_eq!("fo".to_base64(standard), ~"Zm8=");
assert_eq!("foo".to_base64(standard), ~"Zm9v");
assert_eq!("foob".to_base64(standard), ~"Zm9vYg==");
assert_eq!("fooba".to_base64(standard), ~"Zm9vYmE=");
assert_eq!("foobar".to_base64(standard), ~"Zm9vYmFy");
}
#[test]
fn test_from_base64() {
assert_eq!("".from_base64(), "".as_bytes().to_owned());
assert_eq!("Zg==".from_base64(), "f".as_bytes().to_owned());
assert_eq!("Zm8=".from_base64(), "fo".as_bytes().to_owned());
assert_eq!("Zm9v".from_base64(), "foo".as_bytes().to_owned());
assert_eq!("Zm9vYg==".from_base64(), "foob".as_bytes().to_owned());
assert_eq!("Zm9vYmE=".from_base64(), "fooba".as_bytes().to_owned());
assert_eq!("Zm9vYmFy".from_base64(), "foobar".as_bytes().to_owned());
#[test]
fn test_to_base64_line_break() {
assert!(![0u8, 1000].to_base64(Config {line_length: None, ..standard})
.contains("\r\n"));
assert_eq!("foobar".to_base64(Config {line_length: Some(4), ..standard}),
~"Zm9v\r\nYmFy");
}
#[test]
fn test_to_base64_padding() {
assert_eq!("f".to_base64(Config {pad: false, ..standard}), ~"Zg");
assert_eq!("fo".to_base64(Config {pad: false, ..standard}), ~"Zm8");
}
#[test]
fn test_to_base64_url_safe() {
assert_eq!([251, 255].to_base64(url_safe), ~"-_8");
assert_eq!([251, 255].to_base64(standard), ~"+/8=");
}
#[test]
fn test_from_base64_basic() {
assert_eq!("".from_base64(), "".as_bytes().to_owned());
assert_eq!("Zg==".from_base64(), "f".as_bytes().to_owned());
assert_eq!("Zm8=".from_base64(), "fo".as_bytes().to_owned());
assert_eq!("Zm9v".from_base64(), "foo".as_bytes().to_owned());
assert_eq!("Zm9vYg==".from_base64(), "foob".as_bytes().to_owned());
assert_eq!("Zm9vYmE=".from_base64(), "fooba".as_bytes().to_owned());
assert_eq!("Zm9vYmFy".from_base64(), "foobar".as_bytes().to_owned());
}
#[test]
fn test_from_base64_newlines() {
assert_eq!("Zm9v\r\nYmFy".from_base64(), "foobar".as_bytes().to_owned());
}
#[test]
fn test_from_base64_urlsafe() {
assert_eq!("-_8".from_base64(), "+/8=".from_base64());
}
#[test]
fn test_base64_random() {
use std::rand::random;
use std::vec;
for 1000.times {
let v: ~[u8] = do vec::build |push| {
for 100.times {
push(random());
}
};
assert_eq!(v.to_base64(standard).from_base64(), v);
}
}