Merge pull request #1812 from killerswan/indexing2

(core::str) Fixing index and rindex
This commit is contained in:
Brian Anderson 2012-02-11 17:42:45 -08:00
commit 737db5b49a
7 changed files with 150 additions and 86 deletions

View File

@ -651,25 +651,27 @@ fn cmd_install(c: cargo) unsafe {
if str::starts_with(target, "uuid:") {
let uuid = rest(target, 5u);
let idx = str::index(uuid, '/' as u8);
if idx != -1 {
let source = str::unsafe::slice_bytes(uuid, 0u, idx as uint);
uuid = str::unsafe::slice_bytes(uuid, idx as uint + 1u,
str::byte_len(uuid));
install_uuid_specific(c, wd, source, uuid);
} else {
install_uuid(c, wd, uuid);
alt str::index(uuid, '/') {
option::some(idx) {
let source = str::slice(uuid, 0u, idx);
uuid = str::slice(uuid, idx + 1u, str::char_len(uuid));
install_uuid_specific(c, wd, source, uuid);
}
option::none {
install_uuid(c, wd, uuid);
}
}
} else {
let name = target;
let idx = str::index(name, '/' as u8);
if idx != -1 {
let source = str::unsafe::slice_bytes(name, 0u, idx as uint);
name = str::unsafe::slice_bytes(name, idx as uint + 1u,
str::byte_len(name));
install_named_specific(c, wd, source, name);
} else {
install_named(c, wd, name);
alt str::index(name, '/') {
option::some(idx) {
let source = str::slice(name, 0u, idx);
name = str::slice(name, idx + 1u, str::char_len(name));
install_named_specific(c, wd, source, name);
}
option::none {
install_named(c, wd, name);
}
}
}
}

View File

@ -109,14 +109,16 @@ mod write {
// Decides what to call an intermediate file, given the name of the output
// and the extension to use.
fn mk_intermediate_name(output_path: str, extension: str) -> str unsafe {
let dot_pos = str::index(output_path, '.' as u8);
let stem;
if dot_pos < 0 {
stem = output_path;
} else { stem = str::unsafe::slice_bytes(output_path, 0u,
dot_pos as uint); }
let stem = alt str::index(output_path, '.') {
option::some(dot_pos) {
str::slice(output_path, 0u, dot_pos)
}
option::none { output_path }
};
ret stem + "." + extension;
}
fn run_passes(sess: session, llmod: ModuleRef, output: str) {
let opts = sess.opts;
if opts.time_llvm_passes { llvm::LLVMRustEnableTimePasses(); }

View File

@ -119,16 +119,13 @@ fn get_line(fm: filemap, line: int) -> str unsafe {
let end: uint;
if line as uint < vec::len(fm.lines) - 1u {
end = fm.lines[line + 1].byte - fm.start_pos.byte;
ret str::unsafe::slice_bytes(*fm.src, begin, end);
} else {
// If we're not done parsing the file, we're at the limit of what's
// parsed. If we just slice the rest of the string, we'll print out
// the remainder of the file, which is undesirable.
end = str::byte_len(*fm.src);
let rest = str::unsafe::slice_bytes(*fm.src, begin, end);
let newline = str::index(rest, '\n' as u8);
if newline != -1 { end = begin + (newline as uint); }
ret str::splitn_char(*fm.src, '\n', 1u)[0];
}
ret str::unsafe::slice_bytes(*fm.src, begin, end);
}
fn lookup_byte_offset(cm: codemap::codemap, chpos: uint)

View File

@ -283,10 +283,9 @@ fn check_variants_T<T: copy>(
}
}
fn last_part(filename: str) -> str unsafe {
let ix = str::rindex(filename, 47u8 /* '/' */);
assert ix >= 0;
str::unsafe::slice_bytes(filename, ix as uint + 1u, str::byte_len(filename) - 3u)
fn last_part(filename: str) -> str {
let ix = option::get(str::rindex(filename, '/'));
str::slice(filename, ix + 1u, str::char_len(filename) - 3u)
}
enum happiness { passed, cleanly_rejected(str), known_bug(str), failed(str), }

View File

@ -253,15 +253,12 @@ Function: pop_char
Remove the final character from a string and return it.
Failure:
If the string does not contain any characters.
*/
fn pop_char(&s: str) -> char unsafe {
let end = byte_len(s);
while end > 0u && s[end - 1u] & 192u8 == tag_cont_u8 { end -= 1u; }
assert (end > 0u);
let ch = char_at(s, end - 1u);
s = unsafe::slice_bytes(s, 0u, end - 1u);
let {ch:ch, prev:end} = char_range_at_reverse(s, end);
s = unsafe::slice_bytes(s, 0u, end);
ret ch;
}
@ -868,32 +865,50 @@ fn lines_iter(ss: str, ff: fn(&&str)) {
Section: Searching
*/
/*
Function: index
// Function: index
//
// Returns the index of the first matching char
// (as option some/none)
fn index(ss: str, cc: char) -> option<uint> {
let bii = 0u;
let cii = 0u;
let len = byte_len(ss);
while bii < len {
let {ch, next} = char_range_at(ss, bii);
Returns the index of the first matching byte. Returns -1 if
no match is found.
// found here?
if ch == cc {
ret option::some(cii);
}
FIXME: UTF-8
*/
fn index(s: str, c: u8) -> int {
let i: int = 0;
for k: u8 in s { if k == c { ret i; } i += 1; }
ret -1;
cii += 1u;
bii = next;
}
// wasn't found
ret option::none;
}
/*
Function: rindex
// Function: rindex
//
// Returns the index of the first matching char
// (as option some/none)
fn rindex(ss: str, cc: char) -> option<uint> {
let bii = byte_len(ss);
let cii = char_len(ss);
while bii > 0u {
let {ch, prev} = char_range_at_reverse(ss, bii);
cii -= 1u;
bii = prev;
Returns the index of the last matching byte. Returns -1
if no match is found.
// found here?
if ch == cc {
ret option::some(cii);
}
}
FIXME: UTF-8
*/
fn rindex(s: str, c: u8) -> int {
let n: int = byte_len(s) as int;
while n >= 0 { if s[n] == c { ret n; } n -= 1; }
ret n;
// wasn't found
ret option::none;
}
/*
@ -1233,6 +1248,25 @@ Pluck a character out of a string
*/
fn char_at(s: str, i: uint) -> char { ret char_range_at(s, i).ch; }
// Function: char_range_at_reverse
//
// Given a byte position and a str, return the previous char and its position
// This function can be used to iterate over a unicode string in reverse.
fn char_range_at_reverse(ss: str, start: uint) -> {ch: char, prev: uint} {
let prev = start;
// while there is a previous byte == 10......
while prev > 0u && ss[prev - 1u] & 192u8 == tag_cont_u8 {
prev -= 1u;
}
// now refer to the initial byte of previous char
prev -= 1u;
let ch = char_at(ss, prev);
ret {ch:ch, prev:prev};
}
/*
Function: substr_all
@ -1442,13 +1476,42 @@ mod tests {
}
#[test]
fn test_index_and_rindex() {
assert (index("hello", 'e' as u8) == 1);
assert (index("hello", 'o' as u8) == 4);
assert (index("hello", 'z' as u8) == -1);
assert (rindex("hello", 'l' as u8) == 3);
assert (rindex("hello", 'h' as u8) == 0);
assert (rindex("hello", 'z' as u8) == -1);
fn test_index() {
assert ( index("hello", 'h') == option::some(0u));
assert ( index("hello", 'e') == option::some(1u));
assert ( index("hello", 'o') == option::some(4u));
assert ( index("hello", 'z') == option::none);
}
#[test]
fn test_rindex() {
assert (rindex("hello", 'l') == option::some(3u));
assert (rindex("hello", 'o') == option::some(4u));
assert (rindex("hello", 'h') == option::some(0u));
assert (rindex("hello", 'z') == option::none);
}
#[test]
fn test_pop_char() {
let data = "ประเทศไทย中华";
let cc = pop_char(data);
assert "ประเทศไทย中" == data;
assert '华' == cc;
}
#[test]
fn test_pop_char_2() {
let data2 = "";
let cc2 = pop_char(data2);
assert "" == data2;
assert '华' == cc2;
}
#[test]
#[should_fail]
fn test_pop_char_fail() {
let data = "";
let _cc3 = pop_char(data);
}
#[test]

View File

@ -32,6 +32,22 @@ A path or fragment of a filesystem path
*/
type path = str;
fn splitDirnameBasename (pp: path) -> {dirname: str, basename: str} {
let ii;
alt str::rindex(pp, os_fs::path_sep) {
option::some(xx) { ii = xx; }
option::none {
alt str::rindex(pp, os_fs::alt_path_sep) {
option::some(xx) { ii = xx; }
option::none { ret {dirname: ".", basename: pp}; }
}
}
}
ret {dirname: str::slice(pp, 0u, ii),
basename: str::slice(pp, ii + 1u, str::char_len(pp))};
}
/*
Function: dirname
@ -43,13 +59,8 @@ The dirname of "/usr/share" will be "/usr", but the dirname of
If the path is not prefixed with a directory, then "." is returned.
*/
fn dirname(p: path) -> path unsafe {
let i: int = str::rindex(p, os_fs::path_sep as u8);
if i == -1 {
i = str::rindex(p, os_fs::alt_path_sep as u8);
if i == -1 { ret "."; }
}
ret str::unsafe::slice_bytes(p, 0u, i as uint);
fn dirname(pp: path) -> path {
ret splitDirnameBasename(pp).dirname;
}
/*
@ -63,18 +74,10 @@ path separators in the path then the returned path is identical to
the provided path. If an empty path is provided or the path ends
with a path separator then an empty path is returned.
*/
fn basename(p: path) -> path unsafe {
let i: int = str::rindex(p, os_fs::path_sep as u8);
if i == -1 {
i = str::rindex(p, os_fs::alt_path_sep as u8);
if i == -1 { ret p; }
}
let len = str::byte_len(p);
if (i + 1) as uint >= len { ret p; }
ret str::unsafe::slice_bytes(p, (i + 1) as uint, len);
fn basename(pp: path) -> path {
ret splitDirnameBasename(pp).basename;
}
// FIXME: Need some typestate to avoid bounds check when len(pre) == 0
/*
Function: connect

View File

@ -230,16 +230,14 @@ fn getopts(args: [str], opts: [opt]) -> result unsafe {
let i_arg = option::none::<str>;
if cur[1] == '-' as u8 {
let tail = str::unsafe::slice_bytes(cur, 2u, curlen);
let eq = str::index(tail, '=' as u8);
if eq == -1 {
let tail_eq = str::splitn_char(tail, '=', 1u);
if vec::len(tail_eq) <= 1u {
names = [long(tail)];
} else {
names =
[long(str::unsafe::slice_bytes(tail,0u,eq as uint))];
[long(tail_eq[0])];
i_arg =
option::some::<str>(str::unsafe::slice_bytes(tail,
(eq as uint) + 1u,
curlen - 2u));
option::some::<str>(tail_eq[1]);
}
} else {
let j = 1u;