Added str::char_offset_iter() and str::rev_char_offset_iter()

Renamed bytes_iter to byte_iter to match other iterators Refactored str Iterators to use DoubleEnded Iterators and typedefs instead of wrapper structs Reordered the Iterator section Whitespace fixup Moved clunky `each_split_within` function to the one place in the tree where it's actually needed Replaced all block doccomments in str with line doccomments
2013-07-27 23:38:38 +02:00 · 2013-07-27 23:38:38 +02:00 · e33fca9ffe
commit e33fca9ffe
parent d75ab4a5d7
7 changed files with 592 additions and 597 deletions
--- a/src/libextra/getopts.rs
+++ b/src/libextra/getopts.rs
@ -476,7 +476,6 @@ pub mod groups {
    use getopts::{HasArg, Long, Maybe, Multi, No, Occur, Opt, Optional, Req};
    use getopts::{Short, Yes};

-    use std::str;
    use std::vec;

    /** one group of options, e.g., both -h and --help, along with
@ -667,7 +666,7 @@ pub mod groups {

            // FIXME: #5516
            let mut desc_rows = ~[];
-            for str::each_split_within(desc_normalized_whitespace, 54) |substr| {
+            for each_split_within(desc_normalized_whitespace, 54) |substr| {
                desc_rows.push(substr.to_owned());
            }

@ -683,6 +682,103 @@ pub mod groups {
               rows.collect::<~[~str]>().connect("\n") +
               "\n\n";
    }
+
+    /** Splits a string into substrings with possibly internal whitespace,
+     *  each of them at most `lim` bytes long. The substrings have leading and trailing
+     *  whitespace removed, and are only cut at whitespace boundaries.
+     *
+     *  Note: Function was moved here from `std::str` because this module is the only place that
+     *  uses it, and because it was to specific for a general string function.
+     *
+     *  #Failure:
+     *
+     *  Fails during iteration if the string contains a non-whitespace
+     *  sequence longer than the limit.
+     */
+    priv fn each_split_within<'a>(ss: &'a str,
+                                lim: uint,
+                                it: &fn(&'a str) -> bool) -> bool {
+        // Just for fun, let's write this as an state machine:
+
+        enum SplitWithinState {
+            A,  // leading whitespace, initial state
+            B,  // words
+            C,  // internal and trailing whitespace
+        }
+        enum Whitespace {
+            Ws, // current char is whitespace
+            Cr  // current char is not whitespace
+        }
+        enum LengthLimit {
+            UnderLim, // current char makes current substring still fit in limit
+            OverLim   // current char makes current substring no longer fit in limit
+        }
+
+        let mut slice_start = 0;
+        let mut last_start = 0;
+        let mut last_end = 0;
+        let mut state = A;
+        let mut fake_i = ss.len();
+        let mut lim = lim;
+
+        let mut cont = true;
+        let slice: &fn() = || { cont = it(ss.slice(slice_start, last_end)) };
+
+        // if the limit is larger than the string, lower it to save cycles
+        if (lim >= fake_i) {
+            lim = fake_i;
+        }
+
+        let machine: &fn((uint, char)) -> bool = |(i, c)| {
+            let whitespace = if ::std::char::is_whitespace(c) { Ws }       else { Cr };
+            let limit      = if (i - slice_start + 1) <= lim  { UnderLim } else { OverLim };
+
+            state = match (state, whitespace, limit) {
+                (A, Ws, _)        => { A }
+                (A, Cr, _)        => { slice_start = i; last_start = i; B }
+
+                (B, Cr, UnderLim) => { B }
+                (B, Cr, OverLim)  if (i - last_start + 1) > lim
+                                => fail!("word starting with %? longer than limit!",
+                                        ss.slice(last_start, i + 1)),
+                (B, Cr, OverLim)  => { slice(); slice_start = last_start; B }
+                (B, Ws, UnderLim) => { last_end = i; C }
+                (B, Ws, OverLim)  => { last_end = i; slice(); A }
+
+                (C, Cr, UnderLim) => { last_start = i; B }
+                (C, Cr, OverLim)  => { slice(); slice_start = i; last_start = i; last_end = i; B }
+                (C, Ws, OverLim)  => { slice(); A }
+                (C, Ws, UnderLim) => { C }
+            };
+
+            cont
+        };
+
+        ss.iter().enumerate().advance(|x| machine(x));
+
+        // Let the automaton 'run out' by supplying trailing whitespace
+        while cont && match state { B | C => true, A => false } {
+            machine((fake_i, ' '));
+            fake_i += 1;
+        }
+        return cont;
+    }
+
+    #[test]
+    priv fn test_split_within() {
+        fn t(s: &str, i: uint, u: &[~str]) {
+            let mut v = ~[];
+            for each_split_within(s, i) |s| { v.push(s.to_owned()) }
+            assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
+        }
+        t("", 0, []);
+        t("", 15, []);
+        t("hello", 15, [~"hello"]);
+        t("\nMary had a little lamb\nLittle lamb\n", 15,
+            [~"Mary had a", ~"little lamb", ~"Little lamb"]);
+        t("\nMary had a little lamb\nLittle lamb\n", ::std::uint::max_value,
+            [~"Mary had a little lamb\nLittle lamb"]);
+    }
 } // end groups module

 #[cfg(test)]
--- a/src/libextra/time.rs
+++ b/src/libextra/time.rs
@ -260,7 +260,7 @@ impl Tm {
 priv fn do_strptime(s: &str, format: &str) -> Result<Tm, ~str> {
    fn match_str(s: &str, pos: uint, needle: &str) -> bool {
        let mut i = pos;
-        for needle.bytes_iter().advance |ch| {
+        for needle.byte_iter().advance |ch| {
            if s[i] != ch {
                return false;
            }
--- a/src/libstd/str.rs
+++ b/src/libstd/str.rs
--- a/src/libstd/str/ascii.rs
+++ b/src/libstd/str/ascii.rs
@ -115,7 +115,7 @@ impl<'self> AsciiCast<&'self[Ascii]> for &'self str {

    #[inline]
    fn is_ascii(&self) -> bool {
-        self.bytes_iter().all(|b| b.is_ascii())
+        self.byte_iter().all(|b| b.is_ascii())
    }
 }

--- a/src/libsyntax/ext/bytes.rs
+++ b/src/libsyntax/ext/bytes.rs
@ -27,7 +27,7 @@ pub fn expand_syntax_ext(cx: @ExtCtxt, sp: span, tts: &[ast::token_tree]) -> bas
            ast::expr_lit(lit) => match lit.node {
                // string literal, push each byte to vector expression
                ast::lit_str(s) => {
-                    for s.bytes_iter().advance |byte| {
+                    for s.byte_iter().advance |byte| {
                        bytes.push(cx.expr_u8(sp, byte));
                    }
                }
--- a/src/test/run-pass/linear-for-loop.rs
+++ b/src/test/run-pass/linear-for-loop.rs
@ -16,7 +16,7 @@ pub fn main() {
    assert_eq!(y, 6);
    let s = ~"hello there";
    let mut i: int = 0;
-    for s.bytes_iter().advance |c| {
+    for s.byte_iter().advance |c| {
        if i == 0 { assert!((c == 'h' as u8)); }
        if i == 1 { assert!((c == 'e' as u8)); }
        if i == 2 { assert!((c == 'l' as u8)); }
--- a/src/test/run-pass/utf8.rs
+++ b/src/test/run-pass/utf8.rs
@ -41,7 +41,7 @@ pub fn main() {

    fn check_str_eq(a: ~str, b: ~str) {
        let mut i: int = 0;
-        for a.bytes_iter().advance |ab| {
+        for a.byte_iter().advance |ab| {
            info!(i);
            info!(ab);
            let bb: u8 = b[i];