Add std::istr. Issue #855
This commit is contained in:
parent
55c54f0db5
commit
663d07d319
428
src/lib/istr.rs
Normal file
428
src/lib/istr.rs
Normal file
@ -0,0 +1,428 @@
|
||||
export eq, lteq, hash, is_empty, is_not_empty, is_whitespace, byte_len,
|
||||
index, rindex, find, starts_with, ends_with, substr, slice, split,
|
||||
concat, connect, to_upper, replace, char_slice, trim_left, trim_right, trim,
|
||||
unshift_char, shift_char, pop_char, push_char, is_utf8, from_chars, to_chars,
|
||||
char_len, char_at, bytes, is_ascii, shift_byte, pop_byte;
|
||||
|
||||
fn eq(a: &istr, b: &istr) -> bool { a == b }
|
||||
|
||||
fn lteq(a: &istr, b: &istr) -> bool { a <= b }
|
||||
|
||||
fn hash(s: &istr) -> uint {
|
||||
// djb hash.
|
||||
// FIXME: replace with murmur.
|
||||
|
||||
let u: uint = 5381u;
|
||||
for c: u8 in s { u *= 33u; u += c as uint; }
|
||||
ret u;
|
||||
}
|
||||
|
||||
// UTF-8 tags and ranges
|
||||
const tag_cont_u8: u8 = 128u8;
|
||||
const tag_cont: uint = 128u;
|
||||
const max_one_b: uint = 128u;
|
||||
const tag_two_b: uint = 192u;
|
||||
const max_two_b: uint = 2048u;
|
||||
const tag_three_b: uint = 224u;
|
||||
const max_three_b: uint = 65536u;
|
||||
const tag_four_b: uint = 240u;
|
||||
const max_four_b: uint = 2097152u;
|
||||
const tag_five_b: uint = 248u;
|
||||
const max_five_b: uint = 67108864u;
|
||||
const tag_six_b: uint = 252u;
|
||||
|
||||
fn is_utf8(v: &[u8]) -> bool {
|
||||
let i = 0u;
|
||||
let total = vec::len::<u8>(v);
|
||||
while i < total {
|
||||
let chsize = utf8_char_width(v[i]);
|
||||
if chsize == 0u { ret false; }
|
||||
if i + chsize > total { ret false; }
|
||||
i += 1u;
|
||||
while chsize > 1u {
|
||||
if v[i] & 192u8 != tag_cont_u8 { ret false; }
|
||||
i += 1u;
|
||||
chsize -= 1u;
|
||||
}
|
||||
}
|
||||
ret true;
|
||||
}
|
||||
|
||||
fn is_ascii(s: &istr) -> bool {
|
||||
let i: uint = byte_len(s);
|
||||
while i > 0u { i -= 1u; if s[i] & 128u8 != 0u8 { ret false; } }
|
||||
ret true;
|
||||
}
|
||||
|
||||
/// Returns true if the string has length 0
|
||||
pred is_empty(s: &istr) -> bool {
|
||||
for c: u8 in s { ret false; } ret true;
|
||||
}
|
||||
|
||||
/// Returns true if the string has length greater than 0
|
||||
pred is_not_empty(s: &istr) -> bool {
|
||||
!is_empty(s)
|
||||
}
|
||||
|
||||
fn is_whitespace(s: &istr) -> bool {
|
||||
let i = 0u;
|
||||
let len = char_len(s);
|
||||
while i < len {
|
||||
if !char::is_whitespace(char_at(s, i)) { ret false; }
|
||||
i += 1u
|
||||
}
|
||||
ret true;
|
||||
}
|
||||
|
||||
fn byte_len(s: &istr) -> uint {
|
||||
let v: [u8] = unsafe::reinterpret_cast(s);
|
||||
let vlen = vec::len(v);
|
||||
unsafe::leak(v);
|
||||
// There should always be a null terminator
|
||||
assert vlen > 0u;
|
||||
ret vlen - 1u;
|
||||
}
|
||||
|
||||
fn bytes(s: &istr) -> [u8] {
|
||||
let v = unsafe::reinterpret_cast(s);
|
||||
let vcopy = vec::slice(v, 0u, vec::len(v) - 1u);
|
||||
unsafe::leak(v);
|
||||
ret vcopy;
|
||||
}
|
||||
|
||||
fn unsafe_from_bytes(v: &[mutable? u8]) -> istr {
|
||||
let vcopy: [u8] = v + [0u8];
|
||||
let scopy: istr = unsafe::reinterpret_cast(vcopy);
|
||||
ret scopy;
|
||||
}
|
||||
|
||||
fn unsafe_from_byte(u: u8) -> istr {
|
||||
unsafe_from_bytes([u])
|
||||
}
|
||||
|
||||
fn push_utf8_bytes(s: &mutable istr, ch: char) {
|
||||
let code = ch as uint;
|
||||
let bytes = if code < max_one_b {
|
||||
[code as u8]
|
||||
} else if code < max_two_b {
|
||||
[(code >> 6u & 31u | tag_two_b) as u8,
|
||||
(code & 63u | tag_cont) as u8]
|
||||
} else if code < max_three_b {
|
||||
[(code >> 12u & 15u | tag_three_b) as u8,
|
||||
(code >> 6u & 63u | tag_cont) as u8,
|
||||
(code & 63u | tag_cont) as u8]
|
||||
} else if code < max_four_b {
|
||||
[(code >> 18u & 7u | tag_four_b) as u8,
|
||||
(code >> 12u & 63u | tag_cont) as u8,
|
||||
(code >> 6u & 63u | tag_cont) as u8,
|
||||
(code & 63u | tag_cont) as u8]
|
||||
} else if code < max_five_b {
|
||||
[(code >> 24u & 3u | tag_five_b) as u8,
|
||||
(code >> 18u & 63u | tag_cont) as u8,
|
||||
(code >> 12u & 63u | tag_cont) as u8,
|
||||
(code >> 6u & 63u | tag_cont) as u8,
|
||||
(code & 63u | tag_cont) as u8]
|
||||
} else {
|
||||
[(code >> 30u & 1u | tag_six_b) as u8,
|
||||
(code >> 24u & 63u | tag_cont) as u8,
|
||||
(code >> 18u & 63u | tag_cont) as u8,
|
||||
(code >> 12u & 63u | tag_cont) as u8,
|
||||
(code >> 6u & 63u | tag_cont) as u8,
|
||||
(code & 63u | tag_cont) as u8]
|
||||
};
|
||||
push_bytes(s, bytes);
|
||||
}
|
||||
|
||||
fn from_char(ch: char) -> istr {
|
||||
let buf = ~"";
|
||||
push_utf8_bytes(buf, ch);
|
||||
ret buf;
|
||||
}
|
||||
|
||||
fn from_chars(chs: &[char]) -> istr {
|
||||
let buf = ~"";
|
||||
for ch: char in chs { push_utf8_bytes(buf, ch); }
|
||||
ret buf;
|
||||
}
|
||||
|
||||
fn utf8_char_width(b: u8) -> uint {
|
||||
let byte: uint = b as uint;
|
||||
if byte < 128u { ret 1u; }
|
||||
if byte < 192u {
|
||||
ret 0u; // Not a valid start byte
|
||||
|
||||
}
|
||||
if byte < 224u { ret 2u; }
|
||||
if byte < 240u { ret 3u; }
|
||||
if byte < 248u { ret 4u; }
|
||||
if byte < 252u { ret 5u; }
|
||||
ret 6u;
|
||||
}
|
||||
|
||||
fn char_range_at(s: &istr, i: uint) -> {ch: char, next: uint} {
|
||||
let b0 = s[i];
|
||||
let w = utf8_char_width(b0);
|
||||
assert (w != 0u);
|
||||
if w == 1u { ret {ch: b0 as char, next: i + 1u}; }
|
||||
let val = 0u;
|
||||
let end = i + w;
|
||||
i += 1u;
|
||||
while i < end {
|
||||
let byte = s[i];
|
||||
assert (byte & 192u8 == tag_cont_u8);
|
||||
val <<= 6u;
|
||||
val += byte & 63u8 as uint;
|
||||
i += 1u;
|
||||
}
|
||||
// Clunky way to get the right bits from the first byte. Uses two shifts,
|
||||
// the first to clip off the marker bits at the left of the byte, and then
|
||||
// a second (as uint) to get it to the right position.
|
||||
val += (b0 << (w + 1u as u8) as uint) << (w - 1u) * 6u - w - 1u;
|
||||
ret {ch: val as char, next: i};
|
||||
}
|
||||
|
||||
fn char_at(s: &istr, i: uint) -> char { ret char_range_at(s, i).ch; }
|
||||
|
||||
fn char_len(s: &istr) -> uint {
|
||||
let i = 0u;
|
||||
let len = 0u;
|
||||
let total = byte_len(s);
|
||||
while i < total {
|
||||
let chsize = utf8_char_width(s[i]);
|
||||
assert (chsize > 0u);
|
||||
len += 1u;
|
||||
i += chsize;
|
||||
}
|
||||
assert (i == total);
|
||||
ret len;
|
||||
}
|
||||
|
||||
fn to_chars(s: &istr) -> [char] {
|
||||
let buf: [char] = [];
|
||||
let i = 0u;
|
||||
let len = byte_len(s);
|
||||
while i < len {
|
||||
let cur = char_range_at(s, i);
|
||||
buf += [cur.ch];
|
||||
i = cur.next;
|
||||
}
|
||||
ret buf;
|
||||
}
|
||||
|
||||
fn push_char(s: &mutable istr, ch: char) { s += from_char(ch); }
|
||||
|
||||
fn pop_char(s: &mutable istr) -> char {
|
||||
let end = byte_len(s);
|
||||
while end > 0u && s[end - 1u] & 192u8 == tag_cont_u8 { end -= 1u; }
|
||||
assert (end > 0u);
|
||||
let ch = char_at(s, end - 1u);
|
||||
s = substr(s, 0u, end - 1u);
|
||||
ret ch;
|
||||
}
|
||||
|
||||
fn shift_char(s: &mutable istr) -> char {
|
||||
let r = char_range_at(s, 0u);
|
||||
s = substr(s, r.next, byte_len(s) - r.next);
|
||||
ret r.ch;
|
||||
}
|
||||
|
||||
fn unshift_char(s: &mutable istr, ch: char) { s = from_char(ch) + s; }
|
||||
|
||||
fn index(s: &istr, c: u8) -> int {
|
||||
let i: int = 0;
|
||||
for k: u8 in s { if k == c { ret i; } i += 1; }
|
||||
ret -1;
|
||||
}
|
||||
|
||||
fn rindex(s: &istr, c: u8) -> int {
|
||||
let n: int = byte_len(s) as int;
|
||||
while n >= 0 { if s[n] == c { ret n; } n -= 1; }
|
||||
ret n;
|
||||
}
|
||||
|
||||
fn find(haystack: &istr, needle: &istr) -> int {
|
||||
let haystack_len: int = byte_len(haystack) as int;
|
||||
let needle_len: int = byte_len(needle) as int;
|
||||
if needle_len == 0 { ret 0; }
|
||||
fn match_at(haystack: &istr, needle: &istr, i: int) -> bool {
|
||||
let j: int = i;
|
||||
for c: u8 in needle { if haystack[j] != c { ret false; } j += 1; }
|
||||
ret true;
|
||||
}
|
||||
let i: int = 0;
|
||||
while i <= haystack_len - needle_len {
|
||||
if match_at(haystack, needle, i) { ret i; }
|
||||
i += 1;
|
||||
}
|
||||
ret -1;
|
||||
}
|
||||
|
||||
fn starts_with(haystack: &istr, needle: &istr) -> bool {
|
||||
let haystack_len: uint = byte_len(haystack);
|
||||
let needle_len: uint = byte_len(needle);
|
||||
if needle_len == 0u { ret true; }
|
||||
if needle_len > haystack_len { ret false; }
|
||||
ret eq(substr(haystack, 0u, needle_len), needle);
|
||||
}
|
||||
|
||||
fn ends_with(haystack: &istr, needle: &istr) -> bool {
|
||||
let haystack_len: uint = byte_len(haystack);
|
||||
let needle_len: uint = byte_len(needle);
|
||||
ret if needle_len == 0u {
|
||||
true
|
||||
} else if needle_len > haystack_len {
|
||||
false
|
||||
} else {
|
||||
eq(substr(haystack, haystack_len - needle_len, needle_len),
|
||||
needle)
|
||||
};
|
||||
}
|
||||
|
||||
fn substr(s: &istr, begin: uint, len: uint) -> istr {
|
||||
ret slice(s, begin, begin + len);
|
||||
}
|
||||
|
||||
fn slice(s: &istr, begin: uint, end: uint) -> istr {
|
||||
// FIXME: Typestate precondition
|
||||
assert (begin <= end);
|
||||
assert (end <= byte_len(s));
|
||||
|
||||
let v: [u8] = unsafe::reinterpret_cast(s);
|
||||
let v2 = vec::slice(v, begin, end);
|
||||
unsafe::leak(v);
|
||||
v2 += [0u8];
|
||||
let s2: istr = unsafe::reinterpret_cast(v2);
|
||||
unsafe::leak(v2);
|
||||
ret s2;
|
||||
}
|
||||
|
||||
fn safe_slice(s: &istr, begin: uint, end: uint)
|
||||
: uint::le(begin, end) -> istr {
|
||||
// would need some magic to make this a precondition
|
||||
assert (end <= byte_len(s));
|
||||
ret slice(s, begin, end);
|
||||
}
|
||||
|
||||
fn shift_byte(s: &mutable istr) -> u8 {
|
||||
let len = byte_len(s);
|
||||
assert (len > 0u);
|
||||
let b = s[0];
|
||||
s = substr(s, 1u, len - 1u);
|
||||
ret b;
|
||||
}
|
||||
|
||||
fn pop_byte(s: &mutable istr) -> u8 {
|
||||
let len = byte_len(s);
|
||||
assert (len > 0u);
|
||||
let b = s[len - 1u];
|
||||
s = substr(s, 0u, len - 1u);
|
||||
ret b;
|
||||
}
|
||||
|
||||
fn push_byte(s: &mutable istr, b: u8) {
|
||||
s += unsafe_from_byte(b);
|
||||
}
|
||||
|
||||
fn push_bytes(s: &mutable istr, bytes: &[u8]) {
|
||||
for byte in bytes {
|
||||
push_byte(s, byte);
|
||||
}
|
||||
}
|
||||
|
||||
fn split(s: &istr, sep: u8) -> [istr] {
|
||||
let v: [istr] = [];
|
||||
let accum: istr = ~"";
|
||||
let ends_with_sep: bool = false;
|
||||
for c: u8 in s {
|
||||
if c == sep {
|
||||
v += [accum];
|
||||
accum = ~"";
|
||||
ends_with_sep = true;
|
||||
} else { accum += unsafe_from_byte(c); ends_with_sep = false; }
|
||||
}
|
||||
if byte_len(accum) != 0u || ends_with_sep { v += [accum]; }
|
||||
ret v;
|
||||
}
|
||||
|
||||
fn concat(v: &[istr]) -> istr {
|
||||
let s: istr = ~"";
|
||||
for ss: istr in v { s += ss; }
|
||||
ret s;
|
||||
}
|
||||
|
||||
fn connect(v: &[istr], sep: &istr) -> istr {
|
||||
let s: istr = ~"";
|
||||
let first: bool = true;
|
||||
for ss: istr in v {
|
||||
if first { first = false; } else { s += sep; }
|
||||
s += ss;
|
||||
}
|
||||
ret s;
|
||||
}
|
||||
|
||||
// FIXME: This only handles ASCII
|
||||
fn to_upper(s: &istr) -> istr {
|
||||
let outstr = ~"";
|
||||
let ascii_a = 'a' as u8;
|
||||
let ascii_z = 'z' as u8;
|
||||
let diff = 32u8;
|
||||
for byte: u8 in s {
|
||||
let next;
|
||||
if ascii_a <= byte && byte <= ascii_z {
|
||||
next = byte - diff;
|
||||
} else { next = byte; }
|
||||
push_byte(outstr, next);
|
||||
}
|
||||
ret outstr;
|
||||
}
|
||||
|
||||
// FIXME: This is super-inefficient
|
||||
fn replace(s: &istr, from: &istr, to: &istr) : is_not_empty(from) -> istr {
|
||||
// FIXME (694): Shouldn't have to check this
|
||||
check (is_not_empty(from));
|
||||
if byte_len(s) == 0u {
|
||||
ret ~"";
|
||||
} else if starts_with(s, from) {
|
||||
ret to + replace(slice(s, byte_len(from), byte_len(s)), from, to);
|
||||
} else {
|
||||
ret unsafe_from_byte(s[0]) +
|
||||
replace(slice(s, 1u, byte_len(s)), from, to);
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: Also not efficient
|
||||
fn char_slice(s: &istr, begin: uint, end: uint) -> istr {
|
||||
from_chars(vec::slice(to_chars(s), begin, end))
|
||||
}
|
||||
|
||||
fn trim_left(s: &istr) -> istr {
|
||||
fn count_whities(s: &[char]) -> uint {
|
||||
let i = 0u;
|
||||
while i < vec::len(s) {
|
||||
if !char::is_whitespace(s[i]) { break; }
|
||||
i += 1u;
|
||||
}
|
||||
ret i;
|
||||
}
|
||||
let chars = to_chars(s);
|
||||
let whities = count_whities(chars);
|
||||
ret from_chars(vec::slice(chars, whities, vec::len(chars)));
|
||||
}
|
||||
|
||||
fn trim_right(s: &istr) -> istr {
|
||||
fn count_whities(s: &[char]) -> uint {
|
||||
let i = vec::len(s);
|
||||
while 0u < i {
|
||||
if !char::is_whitespace(s[i - 1u]) { break; }
|
||||
i -= 1u;
|
||||
}
|
||||
ret i;
|
||||
}
|
||||
let chars = to_chars(s);
|
||||
let whities = count_whities(chars);
|
||||
ret from_chars(vec::slice(chars, 0u, whities));
|
||||
}
|
||||
|
||||
fn trim(s: &istr) -> istr {
|
||||
trim_left(trim_right(s))
|
||||
}
|
@ -16,6 +16,7 @@ mod u8;
|
||||
mod u64;
|
||||
mod vec;
|
||||
mod str;
|
||||
mod istr;
|
||||
|
||||
// General io and system-services modules.
|
||||
|
||||
|
31
src/test/run-pass/utf8_chars-istr.rs
Normal file
31
src/test/run-pass/utf8_chars-istr.rs
Normal file
@ -0,0 +1,31 @@
|
||||
use std;
|
||||
import std::istr;
|
||||
import std::vec;
|
||||
|
||||
fn main() {
|
||||
// Chars of 1, 2, 3, and 4 bytes
|
||||
let chs: [char] = ['e', 'é', '€', 0x10000 as char];
|
||||
let s: istr = istr::from_chars(chs);
|
||||
|
||||
assert (istr::byte_len(s) == 10u);
|
||||
assert (istr::char_len(s) == 4u);
|
||||
assert (vec::len::<char>(istr::to_chars(s)) == 4u);
|
||||
assert (istr::eq(istr::from_chars(istr::to_chars(s)), s));
|
||||
assert (istr::char_at(s, 0u) == 'e');
|
||||
assert (istr::char_at(s, 1u) == 'é');
|
||||
|
||||
assert (istr::is_utf8(istr::bytes(s)));
|
||||
assert (!istr::is_utf8([0x80_u8]));
|
||||
assert (!istr::is_utf8([0xc0_u8]));
|
||||
assert (!istr::is_utf8([0xc0_u8, 0x10_u8]));
|
||||
|
||||
let stack = ~"a×c€";
|
||||
assert (istr::pop_char(stack) == '€');
|
||||
assert (istr::pop_char(stack) == 'c');
|
||||
istr::push_char(stack, 'u');
|
||||
assert (istr::eq(stack, ~"a×u"));
|
||||
assert (istr::shift_char(stack) == 'a');
|
||||
assert (istr::shift_char(stack) == '×');
|
||||
istr::unshift_char(stack, 'ß');
|
||||
assert (istr::eq(stack, ~"ßu"));
|
||||
}
|
251
src/test/stdtest/istr.rs
Normal file
251
src/test/stdtest/istr.rs
Normal file
@ -0,0 +1,251 @@
|
||||
import std::istr;
|
||||
|
||||
#[test]
|
||||
fn test_eq() {
|
||||
assert istr::eq(~"", ~"");
|
||||
assert istr::eq(~"foo", ~"foo");
|
||||
assert !istr::eq(~"foo", ~"bar");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lteq() {
|
||||
assert istr::lteq(~"", ~"");
|
||||
assert istr::lteq(~"", ~"foo");
|
||||
assert istr::lteq(~"foo", ~"foo");
|
||||
assert !istr::eq(~"foo", ~"bar");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bytes_len() {
|
||||
assert (istr::byte_len(~"") == 0u);
|
||||
assert (istr::byte_len(~"hello world") == 11u);
|
||||
assert (istr::byte_len(~"\x63") == 1u);
|
||||
assert (istr::byte_len(~"\xa2") == 2u);
|
||||
assert (istr::byte_len(~"\u03c0") == 2u);
|
||||
assert (istr::byte_len(~"\u2620") == 3u);
|
||||
assert (istr::byte_len(~"\U0001d11e") == 4u);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_index_and_rindex() {
|
||||
assert (istr::index(~"hello", 'e' as u8) == 1);
|
||||
assert (istr::index(~"hello", 'o' as u8) == 4);
|
||||
assert (istr::index(~"hello", 'z' as u8) == -1);
|
||||
assert (istr::rindex(~"hello", 'l' as u8) == 3);
|
||||
assert (istr::rindex(~"hello", 'h' as u8) == 0);
|
||||
assert (istr::rindex(~"hello", 'z' as u8) == -1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split() {
|
||||
fn t(s: &istr, c: char, i: int, k: &istr) {
|
||||
log ~"splitting: " + s;
|
||||
log i;
|
||||
let v = istr::split(s, c as u8);
|
||||
log ~"split to: ";
|
||||
for z: istr in v { log z; }
|
||||
log ~"comparing: " + v[i] + ~" vs. " + k;
|
||||
assert (istr::eq(v[i], k));
|
||||
}
|
||||
t(~"abc.hello.there", '.', 0, ~"abc");
|
||||
t(~"abc.hello.there", '.', 1, ~"hello");
|
||||
t(~"abc.hello.there", '.', 2, ~"there");
|
||||
t(~".hello.there", '.', 0, ~"");
|
||||
t(~".hello.there", '.', 1, ~"hello");
|
||||
t(~"...hello.there.", '.', 3, ~"hello");
|
||||
t(~"...hello.there.", '.', 5, ~"");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find() {
|
||||
fn t(haystack: &istr, needle: &istr, i: int) {
|
||||
let j: int = istr::find(haystack, needle);
|
||||
log ~"searched for " + needle;
|
||||
log j;
|
||||
assert (i == j);
|
||||
}
|
||||
t(~"this is a simple", ~"is a", 5);
|
||||
t(~"this is a simple", ~"is z", -1);
|
||||
t(~"this is a simple", ~"", 0);
|
||||
t(~"this is a simple", ~"simple", 10);
|
||||
t(~"this", ~"simple", -1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_substr() {
|
||||
fn t(a: &istr, b: &istr, start: int) {
|
||||
assert (istr::eq(istr::substr(a, start as uint,
|
||||
istr::byte_len(b)), b));
|
||||
}
|
||||
t(~"hello", ~"llo", 2);
|
||||
t(~"hello", ~"el", 1);
|
||||
t(~"substr should not be a challenge", ~"not", 14);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_concat() {
|
||||
fn t(v: &[istr], s: &istr) { assert (istr::eq(istr::concat(v), s)); }
|
||||
t([~"you", ~"know", ~"I'm", ~"no", ~"good"], ~"youknowI'mnogood");
|
||||
let v: [istr] = [];
|
||||
t(v, ~"");
|
||||
t([~"hi"], ~"hi");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_connect() {
|
||||
fn t(v: &[istr], sep: &istr, s: &istr) {
|
||||
assert (istr::eq(istr::connect(v, sep), s));
|
||||
}
|
||||
t([~"you", ~"know", ~"I'm", ~"no", ~"good"], ~" ",
|
||||
~"you know I'm no good");
|
||||
let v: [istr] = [];
|
||||
t(v, ~" ", ~"");
|
||||
t([~"hi"], ~" ", ~"hi");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_to_upper() {
|
||||
// to_upper doesn't understand unicode yet,
|
||||
// but we need to at least preserve it
|
||||
|
||||
let unicode = ~"\u65e5\u672c";
|
||||
let input = ~"abcDEF" + unicode + ~"xyz:.;";
|
||||
let expected = ~"ABCDEF" + unicode + ~"XYZ:.;";
|
||||
let actual = istr::to_upper(input);
|
||||
assert (istr::eq(expected, actual));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_slice() {
|
||||
assert (istr::eq(~"ab", istr::slice(~"abc", 0u, 2u)));
|
||||
assert (istr::eq(~"bc", istr::slice(~"abc", 1u, 3u)));
|
||||
assert (istr::eq(~"", istr::slice(~"abc", 1u, 1u)));
|
||||
fn a_million_letter_a() -> istr {
|
||||
let i = 0;
|
||||
let rs = ~"";
|
||||
while i < 100000 { rs += ~"aaaaaaaaaa"; i += 1; }
|
||||
ret rs;
|
||||
}
|
||||
fn half_a_million_letter_a() -> istr {
|
||||
let i = 0;
|
||||
let rs = ~"";
|
||||
while i < 100000 { rs += ~"aaaaa"; i += 1; }
|
||||
ret rs;
|
||||
}
|
||||
assert (istr::eq(half_a_million_letter_a(),
|
||||
istr::slice(a_million_letter_a(), 0u, 500000u)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_starts_with() {
|
||||
assert (istr::starts_with(~"", ~""));
|
||||
assert (istr::starts_with(~"abc", ~""));
|
||||
assert (istr::starts_with(~"abc", ~"a"));
|
||||
assert (!istr::starts_with(~"a", ~"abc"));
|
||||
assert (!istr::starts_with(~"", ~"abc"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ends_with() {
|
||||
assert (istr::ends_with(~"", ~""));
|
||||
assert (istr::ends_with(~"abc", ~""));
|
||||
assert (istr::ends_with(~"abc", ~"c"));
|
||||
assert (!istr::ends_with(~"a", ~"abc"));
|
||||
assert (!istr::ends_with(~"", ~"abc"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_empty() {
|
||||
assert (istr::is_empty(~""));
|
||||
assert (!istr::is_empty(~"a"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_not_empty() {
|
||||
assert (istr::is_not_empty(~"a"));
|
||||
assert (!istr::is_not_empty(~""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_replace() {
|
||||
let a = ~"a";
|
||||
check (istr::is_not_empty(a));
|
||||
assert (istr::replace(~"", a, ~"b") == ~"");
|
||||
assert (istr::replace(~"a", a, ~"b") == ~"b");
|
||||
assert (istr::replace(~"ab", a, ~"b") == ~"bb");
|
||||
let test = ~"test";
|
||||
check (istr::is_not_empty(test));
|
||||
assert (istr::replace(~" test test ", test, ~"toast")
|
||||
== ~" toast toast ");
|
||||
assert (istr::replace(~" test test ", test, ~"") == ~" ");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_char_slice() {
|
||||
assert (istr::eq(~"ab", istr::char_slice(~"abc", 0u, 2u)));
|
||||
assert (istr::eq(~"bc", istr::char_slice(~"abc", 1u, 3u)));
|
||||
assert (istr::eq(~"", istr::char_slice(~"abc", 1u, 1u)));
|
||||
assert (istr::eq(~"\u65e5", istr::char_slice(~"\u65e5\u672c", 0u, 1u)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trim_left() {
|
||||
assert (istr::trim_left(~"") == ~"");
|
||||
assert (istr::trim_left(~"a") == ~"a");
|
||||
assert (istr::trim_left(~" ") == ~"");
|
||||
assert (istr::trim_left(~" blah") == ~"blah");
|
||||
assert (istr::trim_left(~" \u3000 wut") == ~"wut");
|
||||
assert (istr::trim_left(~"hey ") == ~"hey ");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trim_right() {
|
||||
assert (istr::trim_right(~"") == ~"");
|
||||
assert (istr::trim_right(~"a") == ~"a");
|
||||
assert (istr::trim_right(~" ") == ~"");
|
||||
assert (istr::trim_right(~"blah ") == ~"blah");
|
||||
assert (istr::trim_right(~"wut \u3000 ") == ~"wut");
|
||||
assert (istr::trim_right(~" hey") == ~" hey");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trim() {
|
||||
assert (istr::trim(~"") == ~"");
|
||||
assert (istr::trim(~"a") == ~"a");
|
||||
assert (istr::trim(~" ") == ~"");
|
||||
assert (istr::trim(~" blah ") == ~"blah");
|
||||
assert (istr::trim(~"\nwut \u3000 ") == ~"wut");
|
||||
assert (istr::trim(~" hey dude ") == ~"hey dude");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn is_whitespace() {
|
||||
assert (istr::is_whitespace(~""));
|
||||
assert (istr::is_whitespace(~" "));
|
||||
assert (istr::is_whitespace(~"\u2009")); // Thin space
|
||||
assert (istr::is_whitespace(~" \n\t "));
|
||||
assert (!istr::is_whitespace(~" _ "));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn is_ascii() {
|
||||
assert istr::is_ascii(~"");
|
||||
assert istr::is_ascii(~"a");
|
||||
assert !istr::is_ascii(~"\u2009");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shift_byte() {
|
||||
let s = ~"ABC";
|
||||
let b = istr::shift_byte(s);
|
||||
assert s == ~"BC";
|
||||
assert b == 65u8;
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pop_byte() {
|
||||
let s = ~"ABC";
|
||||
let b = istr::pop_byte(s);
|
||||
assert s == ~"AB";
|
||||
assert b == 67u8;
|
||||
}
|
@ -25,6 +25,7 @@ mod sha1;
|
||||
mod sort;
|
||||
mod str_buf;
|
||||
mod str;
|
||||
mod istr;
|
||||
mod task;
|
||||
mod test;
|
||||
mod uint;
|
||||
|
Loading…
Reference in New Issue
Block a user