Add std::istr. Issue #855

2011-08-22 18:06:44 -07:00 · 2011-08-22 18:06:44 -07:00 · 663d07d319
commit 663d07d319
parent 55c54f0db5
5 changed files with 712 additions and 0 deletions
--- a/src/lib/istr.rs
+++ b/src/lib/istr.rs
@ -0,0 +1,428 @@
+export eq, lteq, hash, is_empty, is_not_empty, is_whitespace, byte_len,
+index, rindex, find, starts_with, ends_with, substr, slice, split,
+concat, connect, to_upper, replace, char_slice, trim_left, trim_right, trim,
+unshift_char, shift_char, pop_char, push_char, is_utf8, from_chars, to_chars,
+char_len, char_at, bytes, is_ascii, shift_byte, pop_byte;
+
+fn eq(a: &istr, b: &istr) -> bool { a == b }
+
+fn lteq(a: &istr, b: &istr) -> bool { a <= b }
+
+fn hash(s: &istr) -> uint {
+    // djb hash.
+    // FIXME: replace with murmur.
+
+    let u: uint = 5381u;
+    for c: u8 in s { u *= 33u; u += c as uint; }
+    ret u;
+}
+
+// UTF-8 tags and ranges
+const tag_cont_u8: u8 = 128u8;
+const tag_cont: uint = 128u;
+const max_one_b: uint = 128u;
+const tag_two_b: uint = 192u;
+const max_two_b: uint = 2048u;
+const tag_three_b: uint = 224u;
+const max_three_b: uint = 65536u;
+const tag_four_b: uint = 240u;
+const max_four_b: uint = 2097152u;
+const tag_five_b: uint = 248u;
+const max_five_b: uint = 67108864u;
+const tag_six_b: uint = 252u;
+
+fn is_utf8(v: &[u8]) -> bool {
+    let i = 0u;
+    let total = vec::len::<u8>(v);
+    while i < total {
+        let chsize = utf8_char_width(v[i]);
+        if chsize == 0u { ret false; }
+        if i + chsize > total { ret false; }
+        i += 1u;
+        while chsize > 1u {
+            if v[i] & 192u8 != tag_cont_u8 { ret false; }
+            i += 1u;
+            chsize -= 1u;
+        }
+    }
+    ret true;
+}
+
+fn is_ascii(s: &istr) -> bool {
+    let i: uint = byte_len(s);
+    while i > 0u { i -= 1u; if s[i] & 128u8 != 0u8 { ret false; } }
+    ret true;
+}
+
+/// Returns true if the string has length 0
+pred is_empty(s: &istr) -> bool {
+    for c: u8 in s { ret false; } ret true;
+}
+
+/// Returns true if the string has length greater than 0
+pred is_not_empty(s: &istr) -> bool {
+    !is_empty(s)
+}
+
+fn is_whitespace(s: &istr) -> bool {
+    let i = 0u;
+    let len = char_len(s);
+    while i < len {
+        if !char::is_whitespace(char_at(s, i)) { ret false; }
+        i += 1u
+    }
+    ret true;
+}
+
+fn byte_len(s: &istr) -> uint {
+    let v: [u8] = unsafe::reinterpret_cast(s);
+    let vlen = vec::len(v);
+    unsafe::leak(v);
+    // There should always be a null terminator
+    assert vlen > 0u;
+    ret vlen - 1u;
+}
+
+fn bytes(s: &istr) -> [u8] {
+    let v = unsafe::reinterpret_cast(s);
+    let vcopy = vec::slice(v, 0u, vec::len(v) - 1u);
+    unsafe::leak(v);
+    ret vcopy;
+}
+
+fn unsafe_from_bytes(v: &[mutable? u8]) -> istr {
+    let vcopy: [u8] = v + [0u8];
+    let scopy: istr = unsafe::reinterpret_cast(vcopy);
+    ret scopy;
+}
+
+fn unsafe_from_byte(u: u8) -> istr {
+    unsafe_from_bytes([u])
+}
+
+fn push_utf8_bytes(s: &mutable istr, ch: char) {
+    let code = ch as uint;
+    let bytes = if code < max_one_b {
+        [code as u8]
+    } else if code < max_two_b {
+        [(code >> 6u & 31u | tag_two_b) as u8,
+         (code & 63u | tag_cont) as u8]
+    } else if code < max_three_b {
+        [(code >> 12u & 15u | tag_three_b) as u8,
+         (code >> 6u & 63u | tag_cont) as u8,
+         (code & 63u | tag_cont) as u8]
+    } else if code < max_four_b {
+        [(code >> 18u & 7u | tag_four_b) as u8,
+         (code >> 12u & 63u | tag_cont) as u8,
+         (code >> 6u & 63u | tag_cont) as u8,
+         (code & 63u | tag_cont) as u8]
+    } else if code < max_five_b {
+        [(code >> 24u & 3u | tag_five_b) as u8,
+         (code >> 18u & 63u | tag_cont) as u8,
+         (code >> 12u & 63u | tag_cont) as u8,
+         (code >> 6u & 63u | tag_cont) as u8,
+         (code & 63u | tag_cont) as u8]
+    } else {
+        [(code >> 30u & 1u | tag_six_b) as u8,
+         (code >> 24u & 63u | tag_cont) as u8,
+         (code >> 18u & 63u | tag_cont) as u8,
+         (code >> 12u & 63u | tag_cont) as u8,
+         (code >> 6u & 63u | tag_cont) as u8,
+         (code & 63u | tag_cont) as u8]
+    };
+    push_bytes(s, bytes);
+}
+
+fn from_char(ch: char) -> istr {
+    let buf = ~"";
+    push_utf8_bytes(buf, ch);
+    ret buf;
+}
+
+fn from_chars(chs: &[char]) -> istr {
+    let buf = ~"";
+    for ch: char in chs { push_utf8_bytes(buf, ch); }
+    ret buf;
+}
+
+fn utf8_char_width(b: u8) -> uint {
+    let byte: uint = b as uint;
+    if byte < 128u { ret 1u; }
+    if byte < 192u {
+        ret 0u; // Not a valid start byte
+
+    }
+    if byte < 224u { ret 2u; }
+    if byte < 240u { ret 3u; }
+    if byte < 248u { ret 4u; }
+    if byte < 252u { ret 5u; }
+    ret 6u;
+}
+
+fn char_range_at(s: &istr, i: uint) -> {ch: char, next: uint} {
+    let b0 = s[i];
+    let w = utf8_char_width(b0);
+    assert (w != 0u);
+    if w == 1u { ret {ch: b0 as char, next: i + 1u}; }
+    let val = 0u;
+    let end = i + w;
+    i += 1u;
+    while i < end {
+        let byte = s[i];
+        assert (byte & 192u8 == tag_cont_u8);
+        val <<= 6u;
+        val += byte & 63u8 as uint;
+        i += 1u;
+    }
+    // Clunky way to get the right bits from the first byte. Uses two shifts,
+    // the first to clip off the marker bits at the left of the byte, and then
+    // a second (as uint) to get it to the right position.
+    val += (b0 << (w + 1u as u8) as uint) << (w - 1u) * 6u - w - 1u;
+    ret {ch: val as char, next: i};
+}
+
+fn char_at(s: &istr, i: uint) -> char { ret char_range_at(s, i).ch; }
+
+fn char_len(s: &istr) -> uint {
+    let i = 0u;
+    let len = 0u;
+    let total = byte_len(s);
+    while i < total {
+        let chsize = utf8_char_width(s[i]);
+        assert (chsize > 0u);
+        len += 1u;
+        i += chsize;
+    }
+    assert (i == total);
+    ret len;
+}
+
+fn to_chars(s: &istr) -> [char] {
+    let buf: [char] = [];
+    let i = 0u;
+    let len = byte_len(s);
+    while i < len {
+        let cur = char_range_at(s, i);
+        buf += [cur.ch];
+        i = cur.next;
+    }
+    ret buf;
+}
+
+fn push_char(s: &mutable istr, ch: char) { s += from_char(ch); }
+
+fn pop_char(s: &mutable istr) -> char {
+    let end = byte_len(s);
+    while end > 0u && s[end - 1u] & 192u8 == tag_cont_u8 { end -= 1u; }
+    assert (end > 0u);
+    let ch = char_at(s, end - 1u);
+    s = substr(s, 0u, end - 1u);
+    ret ch;
+}
+
+fn shift_char(s: &mutable istr) -> char {
+    let r = char_range_at(s, 0u);
+    s = substr(s, r.next, byte_len(s) - r.next);
+    ret r.ch;
+}
+
+fn unshift_char(s: &mutable istr, ch: char) { s = from_char(ch) + s; }
+
+fn index(s: &istr, c: u8) -> int {
+    let i: int = 0;
+    for k: u8 in s { if k == c { ret i; } i += 1; }
+    ret -1;
+}
+
+fn rindex(s: &istr, c: u8) -> int {
+    let n: int = byte_len(s) as int;
+    while n >= 0 { if s[n] == c { ret n; } n -= 1; }
+    ret n;
+}
+
+fn find(haystack: &istr, needle: &istr) -> int {
+    let haystack_len: int = byte_len(haystack) as int;
+    let needle_len: int = byte_len(needle) as int;
+    if needle_len == 0 { ret 0; }
+    fn match_at(haystack: &istr, needle: &istr, i: int) -> bool {
+        let j: int = i;
+        for c: u8 in needle { if haystack[j] != c { ret false; } j += 1; }
+        ret true;
+    }
+    let i: int = 0;
+    while i <= haystack_len - needle_len {
+        if match_at(haystack, needle, i) { ret i; }
+        i += 1;
+    }
+    ret -1;
+}
+
+fn starts_with(haystack: &istr, needle: &istr) -> bool {
+    let haystack_len: uint = byte_len(haystack);
+    let needle_len: uint = byte_len(needle);
+    if needle_len == 0u { ret true; }
+    if needle_len > haystack_len { ret false; }
+    ret eq(substr(haystack, 0u, needle_len), needle);
+}
+
+fn ends_with(haystack: &istr, needle: &istr) -> bool {
+    let haystack_len: uint = byte_len(haystack);
+    let needle_len: uint = byte_len(needle);
+    ret if needle_len == 0u {
+            true
+        } else if needle_len > haystack_len {
+            false
+        } else {
+            eq(substr(haystack, haystack_len - needle_len, needle_len),
+               needle)
+        };
+}
+
+fn substr(s: &istr, begin: uint, len: uint) -> istr {
+    ret slice(s, begin, begin + len);
+}
+
+fn slice(s: &istr, begin: uint, end: uint) -> istr {
+    // FIXME: Typestate precondition
+    assert (begin <= end);
+    assert (end <= byte_len(s));
+
+    let v: [u8] = unsafe::reinterpret_cast(s);
+    let v2 = vec::slice(v, begin, end);
+    unsafe::leak(v);
+    v2 += [0u8];
+    let s2: istr = unsafe::reinterpret_cast(v2);
+    unsafe::leak(v2);
+    ret s2;
+}
+
+fn safe_slice(s: &istr, begin: uint, end: uint)
+    : uint::le(begin, end) -> istr {
+    // would need some magic to make this a precondition
+    assert (end <= byte_len(s));
+    ret slice(s, begin, end);
+}
+
+fn shift_byte(s: &mutable istr) -> u8 {
+    let len = byte_len(s);
+    assert (len > 0u);
+    let b = s[0];
+    s = substr(s, 1u, len - 1u);
+    ret b;
+}
+
+fn pop_byte(s: &mutable istr) -> u8 {
+    let len = byte_len(s);
+    assert (len > 0u);
+    let b = s[len - 1u];
+    s = substr(s, 0u, len - 1u);
+    ret b;
+}
+
+fn push_byte(s: &mutable istr, b: u8) {
+    s += unsafe_from_byte(b);
+}
+
+fn push_bytes(s: &mutable istr, bytes: &[u8]) {
+    for byte in bytes {
+        push_byte(s, byte);
+    }
+}
+
+fn split(s: &istr, sep: u8) -> [istr] {
+    let v: [istr] = [];
+    let accum: istr = ~"";
+    let ends_with_sep: bool = false;
+    for c: u8 in s {
+        if c == sep {
+            v += [accum];
+            accum = ~"";
+            ends_with_sep = true;
+        } else { accum += unsafe_from_byte(c); ends_with_sep = false; }
+    }
+    if byte_len(accum) != 0u || ends_with_sep { v += [accum]; }
+    ret v;
+}
+
+fn concat(v: &[istr]) -> istr {
+    let s: istr = ~"";
+    for ss: istr in v { s += ss; }
+    ret s;
+}
+
+fn connect(v: &[istr], sep: &istr) -> istr {
+    let s: istr = ~"";
+    let first: bool = true;
+    for ss: istr in v {
+        if first { first = false; } else { s += sep; }
+        s += ss;
+    }
+    ret s;
+}
+
+// FIXME: This only handles ASCII
+fn to_upper(s: &istr) -> istr {
+    let outstr = ~"";
+    let ascii_a = 'a' as u8;
+    let ascii_z = 'z' as u8;
+    let diff = 32u8;
+    for byte: u8 in s {
+        let next;
+        if ascii_a <= byte && byte <= ascii_z {
+            next = byte - diff;
+        } else { next = byte; }
+        push_byte(outstr, next);
+    }
+    ret outstr;
+}
+
+// FIXME: This is super-inefficient
+fn replace(s: &istr, from: &istr, to: &istr) : is_not_empty(from) -> istr {
+    // FIXME (694): Shouldn't have to check this
+    check (is_not_empty(from));
+    if byte_len(s) == 0u {
+        ret ~"";
+    } else if starts_with(s, from) {
+        ret to + replace(slice(s, byte_len(from), byte_len(s)), from, to);
+    } else {
+        ret unsafe_from_byte(s[0]) +
+                replace(slice(s, 1u, byte_len(s)), from, to);
+    }
+}
+
+// FIXME: Also not efficient
+fn char_slice(s: &istr, begin: uint, end: uint) -> istr {
+    from_chars(vec::slice(to_chars(s), begin, end))
+}
+
+fn trim_left(s: &istr) -> istr {
+    fn count_whities(s: &[char]) -> uint {
+        let i = 0u;
+        while i < vec::len(s) {
+            if !char::is_whitespace(s[i]) { break; }
+            i += 1u;
+        }
+        ret i;
+    }
+    let chars = to_chars(s);
+    let whities = count_whities(chars);
+    ret from_chars(vec::slice(chars, whities, vec::len(chars)));
+}
+
+fn trim_right(s: &istr) -> istr {
+    fn count_whities(s: &[char]) -> uint {
+        let i = vec::len(s);
+        while 0u < i {
+            if !char::is_whitespace(s[i - 1u]) { break; }
+            i -= 1u;
+        }
+        ret i;
+    }
+    let chars = to_chars(s);
+    let whities = count_whities(chars);
+    ret from_chars(vec::slice(chars, 0u, whities));
+}
+
+fn trim(s: &istr) -> istr {
+    trim_left(trim_right(s))
+}
--- a/src/lib/std.rc
+++ b/src/lib/std.rc
@ -16,6 +16,7 @@ mod u8;
 mod u64;
 mod vec;
 mod str;
+mod istr;

 // General io and system-services modules.

--- a/src/test/run-pass/utf8_chars-istr.rs
+++ b/src/test/run-pass/utf8_chars-istr.rs
@ -0,0 +1,31 @@
+use std;
+import std::istr;
+import std::vec;
+
+fn main() {
+    // Chars of 1, 2, 3, and 4 bytes
+    let chs: [char] = ['e', 'é', '€', 0x10000 as char];
+    let s: istr = istr::from_chars(chs);
+
+    assert (istr::byte_len(s) == 10u);
+    assert (istr::char_len(s) == 4u);
+    assert (vec::len::<char>(istr::to_chars(s)) == 4u);
+    assert (istr::eq(istr::from_chars(istr::to_chars(s)), s));
+    assert (istr::char_at(s, 0u) == 'e');
+    assert (istr::char_at(s, 1u) == 'é');
+
+    assert (istr::is_utf8(istr::bytes(s)));
+    assert (!istr::is_utf8([0x80_u8]));
+    assert (!istr::is_utf8([0xc0_u8]));
+    assert (!istr::is_utf8([0xc0_u8, 0x10_u8]));
+
+    let stack = ~"a×c€";
+    assert (istr::pop_char(stack) == '€');
+    assert (istr::pop_char(stack) == 'c');
+    istr::push_char(stack, 'u');
+    assert (istr::eq(stack, ~"a×u"));
+    assert (istr::shift_char(stack) == 'a');
+    assert (istr::shift_char(stack) == '×');
+    istr::unshift_char(stack, 'ß');
+    assert (istr::eq(stack, ~"ßu"));
+}
--- a/src/test/stdtest/istr.rs
+++ b/src/test/stdtest/istr.rs
@ -0,0 +1,251 @@
+import std::istr;
+
+#[test]
+fn test_eq() {
+    assert istr::eq(~"", ~"");
+    assert istr::eq(~"foo", ~"foo");
+    assert !istr::eq(~"foo", ~"bar");
+}
+
+#[test]
+fn test_lteq() {
+    assert istr::lteq(~"", ~"");
+    assert istr::lteq(~"", ~"foo");
+    assert istr::lteq(~"foo", ~"foo");
+    assert !istr::eq(~"foo", ~"bar");
+}
+
+#[test]
+fn test_bytes_len() {
+    assert (istr::byte_len(~"") == 0u);
+    assert (istr::byte_len(~"hello world") == 11u);
+    assert (istr::byte_len(~"\x63") == 1u);
+    assert (istr::byte_len(~"\xa2") == 2u);
+    assert (istr::byte_len(~"\u03c0") == 2u);
+    assert (istr::byte_len(~"\u2620") == 3u);
+    assert (istr::byte_len(~"\U0001d11e") == 4u);
+}
+
+#[test]
+fn test_index_and_rindex() {
+    assert (istr::index(~"hello", 'e' as u8) == 1);
+    assert (istr::index(~"hello", 'o' as u8) == 4);
+    assert (istr::index(~"hello", 'z' as u8) == -1);
+    assert (istr::rindex(~"hello", 'l' as u8) == 3);
+    assert (istr::rindex(~"hello", 'h' as u8) == 0);
+    assert (istr::rindex(~"hello", 'z' as u8) == -1);
+}
+
+#[test]
+fn test_split() {
+    fn t(s: &istr, c: char, i: int, k: &istr) {
+        log ~"splitting: " + s;
+        log i;
+        let v = istr::split(s, c as u8);
+        log ~"split to: ";
+        for z: istr in v { log z; }
+        log ~"comparing: " + v[i] + ~" vs. " + k;
+        assert (istr::eq(v[i], k));
+    }
+    t(~"abc.hello.there", '.', 0, ~"abc");
+    t(~"abc.hello.there", '.', 1, ~"hello");
+    t(~"abc.hello.there", '.', 2, ~"there");
+    t(~".hello.there", '.', 0, ~"");
+    t(~".hello.there", '.', 1, ~"hello");
+    t(~"...hello.there.", '.', 3, ~"hello");
+    t(~"...hello.there.", '.', 5, ~"");
+}
+
+#[test]
+fn test_find() {
+    fn t(haystack: &istr, needle: &istr, i: int) {
+        let j: int = istr::find(haystack, needle);
+        log ~"searched for " + needle;
+        log j;
+        assert (i == j);
+    }
+    t(~"this is a simple", ~"is a", 5);
+    t(~"this is a simple", ~"is z", -1);
+    t(~"this is a simple", ~"", 0);
+    t(~"this is a simple", ~"simple", 10);
+    t(~"this", ~"simple", -1);
+}
+
+#[test]
+fn test_substr() {
+    fn t(a: &istr, b: &istr, start: int) {
+        assert (istr::eq(istr::substr(a, start as uint,
+                                      istr::byte_len(b)), b));
+    }
+    t(~"hello", ~"llo", 2);
+    t(~"hello", ~"el", 1);
+    t(~"substr should not be a challenge", ~"not", 14);
+}
+
+#[test]
+fn test_concat() {
+    fn t(v: &[istr], s: &istr) { assert (istr::eq(istr::concat(v), s)); }
+    t([~"you", ~"know", ~"I'm", ~"no", ~"good"], ~"youknowI'mnogood");
+    let v: [istr] = [];
+    t(v, ~"");
+    t([~"hi"], ~"hi");
+}
+
+#[test]
+fn test_connect() {
+    fn t(v: &[istr], sep: &istr, s: &istr) {
+        assert (istr::eq(istr::connect(v, sep), s));
+    }
+    t([~"you", ~"know", ~"I'm", ~"no", ~"good"], ~" ",
+      ~"you know I'm no good");
+    let v: [istr] = [];
+    t(v, ~" ", ~"");
+    t([~"hi"], ~" ", ~"hi");
+}
+
+#[test]
+fn test_to_upper() {
+    // to_upper doesn't understand unicode yet,
+    // but we need to at least preserve it
+
+    let unicode = ~"\u65e5\u672c";
+    let input = ~"abcDEF" + unicode + ~"xyz:.;";
+    let expected = ~"ABCDEF" + unicode + ~"XYZ:.;";
+    let actual = istr::to_upper(input);
+    assert (istr::eq(expected, actual));
+}
+
+#[test]
+fn test_slice() {
+    assert (istr::eq(~"ab", istr::slice(~"abc", 0u, 2u)));
+    assert (istr::eq(~"bc", istr::slice(~"abc", 1u, 3u)));
+    assert (istr::eq(~"", istr::slice(~"abc", 1u, 1u)));
+    fn a_million_letter_a() -> istr {
+        let i = 0;
+        let rs = ~"";
+        while i < 100000 { rs += ~"aaaaaaaaaa"; i += 1; }
+        ret rs;
+    }
+    fn half_a_million_letter_a() -> istr {
+        let i = 0;
+        let rs = ~"";
+        while i < 100000 { rs += ~"aaaaa"; i += 1; }
+        ret rs;
+    }
+    assert (istr::eq(half_a_million_letter_a(),
+                    istr::slice(a_million_letter_a(), 0u, 500000u)));
+}
+
+#[test]
+fn test_starts_with() {
+    assert (istr::starts_with(~"", ~""));
+    assert (istr::starts_with(~"abc", ~""));
+    assert (istr::starts_with(~"abc", ~"a"));
+    assert (!istr::starts_with(~"a", ~"abc"));
+    assert (!istr::starts_with(~"", ~"abc"));
+}
+
+#[test]
+fn test_ends_with() {
+    assert (istr::ends_with(~"", ~""));
+    assert (istr::ends_with(~"abc", ~""));
+    assert (istr::ends_with(~"abc", ~"c"));
+    assert (!istr::ends_with(~"a", ~"abc"));
+    assert (!istr::ends_with(~"", ~"abc"));
+}
+
+#[test]
+fn test_is_empty() {
+    assert (istr::is_empty(~""));
+    assert (!istr::is_empty(~"a"));
+}
+
+#[test]
+fn test_is_not_empty() {
+    assert (istr::is_not_empty(~"a"));
+    assert (!istr::is_not_empty(~""));
+}
+
+#[test]
+fn test_replace() {
+    let a = ~"a";
+    check (istr::is_not_empty(a));
+    assert (istr::replace(~"", a, ~"b") == ~"");
+    assert (istr::replace(~"a", a, ~"b") == ~"b");
+    assert (istr::replace(~"ab", a, ~"b") == ~"bb");
+    let test = ~"test";
+    check (istr::is_not_empty(test));
+    assert (istr::replace(~" test test ", test, ~"toast")
+            == ~" toast toast ");
+    assert (istr::replace(~" test test ", test, ~"") == ~"   ");
+}
+
+#[test]
+fn test_char_slice() {
+    assert (istr::eq(~"ab", istr::char_slice(~"abc", 0u, 2u)));
+    assert (istr::eq(~"bc", istr::char_slice(~"abc", 1u, 3u)));
+    assert (istr::eq(~"", istr::char_slice(~"abc", 1u, 1u)));
+    assert (istr::eq(~"\u65e5", istr::char_slice(~"\u65e5\u672c", 0u, 1u)));
+}
+
+#[test]
+fn trim_left() {
+    assert (istr::trim_left(~"") == ~"");
+    assert (istr::trim_left(~"a") == ~"a");
+    assert (istr::trim_left(~"    ") == ~"");
+    assert (istr::trim_left(~"     blah") == ~"blah");
+    assert (istr::trim_left(~"   \u3000  wut") == ~"wut");
+    assert (istr::trim_left(~"hey ") == ~"hey ");
+}
+
+#[test]
+fn trim_right() {
+    assert (istr::trim_right(~"") == ~"");
+    assert (istr::trim_right(~"a") == ~"a");
+    assert (istr::trim_right(~"    ") == ~"");
+    assert (istr::trim_right(~"blah     ") == ~"blah");
+    assert (istr::trim_right(~"wut   \u3000  ") == ~"wut");
+    assert (istr::trim_right(~" hey") == ~" hey");
+}
+
+#[test]
+fn trim() {
+    assert (istr::trim(~"") == ~"");
+    assert (istr::trim(~"a") == ~"a");
+    assert (istr::trim(~"    ") == ~"");
+    assert (istr::trim(~"    blah     ") == ~"blah");
+    assert (istr::trim(~"\nwut   \u3000  ") == ~"wut");
+    assert (istr::trim(~" hey dude ") == ~"hey dude");
+}
+
+#[test]
+fn is_whitespace() {
+    assert (istr::is_whitespace(~""));
+    assert (istr::is_whitespace(~" "));
+    assert (istr::is_whitespace(~"\u2009")); // Thin space
+    assert (istr::is_whitespace(~"  \n\t   "));
+    assert (!istr::is_whitespace(~"   _   "));
+}
+
+#[test]
+fn is_ascii() {
+    assert istr::is_ascii(~"");
+    assert istr::is_ascii(~"a");
+    assert !istr::is_ascii(~"\u2009");
+}
+
+#[test]
+fn shift_byte() {
+    let s = ~"ABC";
+    let b = istr::shift_byte(s);
+    assert s == ~"BC";
+    assert b == 65u8;
+}
+
+#[test]
+fn pop_byte() {
+    let s = ~"ABC";
+    let b = istr::pop_byte(s);
+    assert s == ~"AB";
+    assert b == 67u8;
+}
--- a/src/test/stdtest/stdtest.rc
+++ b/src/test/stdtest/stdtest.rc
@ -25,6 +25,7 @@ mod sha1;
 mod sort;
 mod str_buf;
 mod str;
+mod istr;
 mod task;
 mod test;
 mod uint;