Add hacks to extract and compile tutorial code

Not included in the build by default, since it's fragile and kludgy. Do something like this to run it: cd doc/tutorial RUSTC=../../build/stage2/bin/rustc bash test.sh Closes #1143
2011-11-22 16:12:23 +01:00 · 2011-11-22 16:12:23 +01:00 · fedb775fbb
commit fedb775fbb
parent 1b8b0b8584
14 changed files with 119 additions and 24 deletions
--- a/doc/tutorial/args.md
+++ b/doc/tutorial/args.md
@ -22,6 +22,8 @@ other tasks, and that most data is immutable.
 Take the following program:
    # fn get_really_big_record() -> int { 1 }
    # fn myfunc(a: int) {}
    let x = get_really_big_record();
    myfunc(x);
@ -32,6 +34,9 @@ existing value as the argument, without copying.
 There are more involved cases. The call could look like this:
    # fn myfunc(a: int, b: block()) {}
    # fn get_another_record() -> int { 1 }
    # let x = 1;
    myfunc(x, {|| x = get_another_record(); });
 Now, if `myfunc` first calls its second argument and then accesses its
--- a/doc/tutorial/build.js
+++ b/doc/tutorial/build.js
@ -4,7 +4,15 @@ require("./lib/codemirror-rust");
 md.Markdown.dialects.Maruku.block.code = function code(block, next) {
  if (block.match(/^    /)) {
-    var text = block.replace(/(^|\n)    /g, "$1"), accum = [], curstr = "", curstyle = null;
+    var text = String(block);
    while (next.length && next[0].match(/^    /)) text += "\n" + String(next.shift());
    var leaveAlone, accum = [], curstr = "", curstyle = null;
    text = text.split("\n").map(function(line) {
      line = line.slice(4);
      if (line == "## notrust") leaveAlone = true;
      return line;
    }).filter(function(x) { return !/^##? /.test(x); }).join("\n");
    if (leaveAlone) return [["pre", {}, text]];
    function add(str, style) {
      if (style != curstyle) {
        if (curstyle) accum.push(["span", {"class": "cm-" + curstyle}, curstr]);
--- a/doc/tutorial/control.md
+++ b/doc/tutorial/control.md
@ -38,6 +38,7 @@ Rust's `alt` construct is a generalized, cleaned-up version of C's
 each labelled with a pattern, and it will execute the arm that matches
 the value.
    # let my_number = 1;
    alt my_number {
      0       { std::io::println("zero"); }
      1 | 2   { std::io::println("one or two"); }
@ -89,6 +90,7 @@ To a limited extent, it is possible to use destructuring patterns when
 declaring a variable with `let`. For example, you can say this to
 extract the fields from a tuple:
    # fn get_tuple_of_two_ints() -> (int, int) { (1, 1) }
    let (a, b) = get_tuple_of_two_ints();
 This will introduce two new variables, `a` and `b`, bound to the
@ -118,6 +120,8 @@ it finds one that can be divided by five.
 There's also `while`'s ugly cousin, `do`/`while`, which does not check
 its condition on the first iteration, using traditional syntax:
    # fn eat_cake() {}
    # fn any_cake_left() -> bool { false }
    do {
        eat_cake();
    } while any_cake_left();
--- a/doc/tutorial/data.md
+++ b/doc/tutorial/data.md
@ -56,6 +56,7 @@ Records can be destructured on in `alt` patterns. The basic syntax is
 omitted as a shorthand for simply binding the variable with the same
 name as the field.
    # let mypoint = {x: 0f, y: 0f};
    alt mypoint {
        {x: 0f, y: y_name} { /* Provide sub-patterns for fields */ }
        {x, y}             { /* Simply bind the fields */ }
@ -71,6 +72,7 @@ the fields of a record, a record pattern may end with `, _` (as in
 Tags [FIXME terminology] are datatypes that have several different
 representations. For example, the type shown earlier:
    # type point = {x: float, y: float};
    tag shape {
        circle(point, float);
        rectangle(point, point);
@ -96,7 +98,7 @@ equivalent to an `enum` in C:
        east;
        south;
        west;
-    };
+    }
 This will define `north`, `east`, `south`, and `west` as constants,
 all of which have type `direction`.
@ -116,6 +118,7 @@ That is a shorthand for this:
 Tag types like this can have their content extracted with the
 dereference (`*`) unary operator:
    # tag gizmo_id = int;
    let my_gizmo_id = gizmo_id(10);
    let id_int: int = *my_gizmo_id;
@ -125,6 +128,8 @@ For tag types with multiple variants, destructuring is the only way to
 get at their contents. All variant constructors can be used as
 patterns, as in this definition of `area`:
    # type point = {x: float, y: float};
    # tag shape { circle(point, float); rectangle(point, point); }
    fn area(sh: shape) -> float {
        alt sh {
            circle(_, size) { std::math::pi * size * size }
@ -136,6 +141,8 @@ For variants without arguments, you have to write `variantname.` (with
 a dot at the end) to match them in a pattern. This to prevent
 ambiguity between matching a variant name and binding a new variable.
    # type point = {x: float, y: float};
    # tag direction { north; east; south; west; }
    fn point_from_direction(dir: direction) -> point {
        alt dir {
            north. { {x:  0f, y:  1f} }
@ -295,6 +302,7 @@ strings. They are always immutable.
 Resources are data types that have a destructor associated with them.
    # fn close_file_desc(x: int) {}
    resource file_desc(fd: int) {
        close_file_desc(fd);
    }
--- a/doc/tutorial/extract.js
+++ b/doc/tutorial/extract.js
@ -0,0 +1,42 @@
 var fs = require("fs"), md = require("./lib/markdown");
 // Runs markdown.js over the tutorial, to find the code blocks in it.
 // Uses the #-markers in those code blocks, along with some vague
 // heuristics, to turn them into compilable files. Outputs these files
 // to fragments/.
 //
 // '##ignore' means don't test this block
 // '##notrust' means the block isn't rust code
 //     (used by build.js to not highlight it)
 // '# code' means insert the given code to complete the fragment
 //     (build.js strips out such lines)
 var curFile, curFrag;
 md.Markdown.dialects.Maruku.block.code = function code(block, next) {
  if (block.match(/^    /)) {
    var ignore, text = String(block);
    while (next.length && next[0].match(/^    /)) text += "\n" + String(next.shift());
    text = text.split("\n").map(function(line) {
      line = line.slice(4);
      if (line == "## ignore" || line == "## notrust") { ignore = true; line = ""; }
      if (/^# /.test(line)) line = line.slice(2);
      return line;
    }).join("\n");
    if (ignore) return;
    if (!/\bfn main\b/.test(text)) {
      if (/(^|\n) *(native|use|mod|import|export)\b/.test(text))
        text += "\nfn main() {}\n";
      else text = "fn main() {\n" + text + "\n}\n";
    }
    if (!/\buse std\b/.test(text)) text = "use std;\n" + text;
    fs.writeFileSync("fragments/" + curFile + "_" + (++curFrag) + ".rs", text);
  }
 };
 fs.readFileSync("order", "utf8").split("\n").filter(id).forEach(handle);
 function id(x) { return x; }
 function handle(file) {
  curFile = file; curFrag = 0;
  md.parse(fs.readFileSync(file + ".md", "utf8"), "Maruku");
 }
--- a/doc/tutorial/ffi.md
+++ b/doc/tutorial/ffi.md
@ -67,6 +67,7 @@ most notably the Windows API, use other calling conventions, so Rust
 provides a way to to hint to the compiler which is expected by using
 the `"abi"` attribute:
    #[cfg(target_os = "win32")]
    #[abi = "stdcall"]
    native mod kernel32 {
        fn SetEnvironmentVariableA(n: *u8, v: *u8) -> int;
@ -81,7 +82,9 @@ or `"stdcall"`. Other conventions may be defined in the future.
 The native `SHA1` function is declared to take three arguments, and
 return a pointer.
    # native mod crypto {
    fn SHA1(src: *u8, sz: uint, out: *u8) -> *u8;
    # }
 When declaring the argument types to a foreign function, the Rust
 compiler has no way to check whether your declaration is correct, so
@ -106,6 +109,9 @@ null pointers.
 The `sha1` function is the most obscure part of the program.
    # import std::{str, vec};
    # mod crypto { fn SHA1(src: *u8, sz: uint, out: *u8) -> *u8 { out } }
    # fn as_hex(data: [u8]) -> str { "hi" }
    fn sha1(data: str) -> str unsafe {
        let bytes = str::bytes(data);
        let hash = crypto::SHA1(vec::unsafe::to_ptr(bytes),
@ -141,10 +147,15 @@ Rust's safety mechanisms.
 Let's look at our `sha1` function again.
    # import std::{str, vec};
    # mod crypto { fn SHA1(src: *u8, sz: uint, out: *u8) -> *u8 { out } }
    # fn as_hex(data: [u8]) -> str { "hi" }
    # fn x(data: str) -> str unsafe {
    let bytes = str::bytes(data);
    let hash = crypto::SHA1(vec::unsafe::to_ptr(bytes),
                            vec::len(bytes), std::ptr::null());
    ret as_hex(vec::unsafe::from_buf(hash, 20u));
    # }
 The `str::bytes` function is perfectly safe, it converts a string to
 an `[u8]`. This byte array is then fed to `vec::unsafe::to_ptr`, which
--- a/doc/tutorial/func.md
+++ b/doc/tutorial/func.md
@ -19,6 +19,10 @@ This helps the compiler avoid spurious error messages. For example,
 the following code would be a type error if `dead_end` would be
 expected to return.
    # fn can_go_left() -> bool { true }
    # fn can_go_right() -> bool { true }
    # tag dir { left; right; }
    # fn dead_end() -> ! { fail; }
    let dir = if can_go_left() { left }
              else if can_go_right() { right }
              else { dead_end(); };
@ -96,12 +100,14 @@ of integers backwards:
 To run such an iteration, you could do this:
    # fn for_rev(v: [int], act: block(int)) {}
    for_rev([1, 2, 3], {|n| log n; });
 But Rust allows a more pleasant syntax for this situation, with the
 loop block moved out of the parenthesis and the final semicolon
 omitted:
    # fn for_rev(v: [int], act: block(int)) {}
    for_rev([1, 2, 3]) {|n|
        log n;
    }
--- a/doc/tutorial/generic.md
+++ b/doc/tutorial/generic.md
@ -55,16 +55,17 @@ dereferences become impossible.
 Rust's type inferrer works very well with generics, but there are
 programs that just can't be typed.
-    let n = none;
+    let n = std::option::none;
    # n = std::option::some(1);
 If you never do anything else with `n`, the compiler will not be able
 to assign a type to it. (The same goes for `[]`, in fact.) If you
 really want to have such a statement, you'll have to write it like
 this:
-    let n2: option::t<int> = none;
+    let n2: std::option::t<int> = std::option::none;
    // or
-    let n = none::<int>;
+    let n = std::option::none::<int>;
 Note that, in a value expression, `<` already has a meaning as a
 comparison operator, so you'll have to write `::<T>` to explicitly
@ -120,6 +121,7 @@ take sendable types.
 If you try this program:
    # fn map(f: block(int) -> int, v: [int]) {}
    fn plus1(x: int) -> int { x + 1 }
    map(plus1, [1, 2, 3]);
@ -131,6 +133,7 @@ way to pass integers, which is by value. To get around this issue, you
 have to explicitly mark the arguments to a function that you want to
 pass to a generic higher-order function as being passed by pointer:
    # fn map<T, U>(f: block(T) -> U, v: [T]) {}
    fn plus1(&&x: int) -> int { x + 1 }
    map(plus1, [1, 2, 3]);
--- a/doc/tutorial/mod.md
+++ b/doc/tutorial/mod.md
@ -35,6 +35,7 @@ It is also possible to include multiple files in a crate. For this
 purpose, you create a `.rc` crate file, which references any number of
 `.rs` code files. A crate file could look like this:
    ## ignore
    #[link(name = "farm", vers = "2.5", author = "mjh")];
    mod cow;
    mod chicken;
@ -52,6 +53,7 @@ in a moment.
 To have a nested directory structure for your source files, you can
 nest mods in your `.rc` file:
    ## ignore
    mod poultry {
        mod chicken;
        mod turkey;
@ -79,6 +81,7 @@ OS X.
 It is possible to provide more specific information when using an
 external crate.
    ## ignore
    use myfarm (name = "farm", vers = "2.7");
 When a comma-separated list of name/value pairs is given after `use`,
@ -90,6 +93,7 @@ local name `myfarm`.
 Our example crate declared this set of `link` attributes:
    ## ignore
    #[link(name = "farm", vers = "2.5", author = "mjh")];
 The version does not match the one provided in the `use` directive, so
@ -105,12 +109,14 @@ these two files:
    #[link(name = "mylib", vers = "1.0")];
    fn world() -> str { "world" }
    ## ignore
    // main.rs
    use mylib;
    fn main() { log_err "hello " + mylib::world(); }
 Now compile and run like this (adjust to your platform if necessary):
    ## notrust
    > rustc --lib mylib.rs
    > rustc main.rs -L .
    > ./main
@ -147,8 +153,8 @@ restricted with `export` directives at the top of the module or file.
    mod enc {
        export encrypt, decrypt;
        const super_secret_number: int = 10;
-        fn encrypt(n: int) { n + super_secret_number }
+        fn encrypt(n: int) -> int { n + super_secret_number }
-        fn decrypt(n: int) { n - super_secret_number }
+        fn decrypt(n: int) -> int { n - super_secret_number }
    }
 This defines a rock-solid encryption algorithm. Code outside of the
--- a/doc/tutorial/setup.md
+++ b/doc/tutorial/setup.md
@ -21,6 +21,7 @@ If you modify the program to make it invalid (for example, remove the
 `use std` line), and then compile it, you'll see an error message like
 this:
    ## notrust
    hello.rs:2:4: 2:20 error: unresolved modulename: std
    hello.rs:2     std::io::println("hello world!");
                   ^~~~~~~~~~~~~~~~
--- a/doc/tutorial/syntax.md
+++ b/doc/tutorial/syntax.md
@ -21,6 +21,7 @@ statements and expressions is C-like. Function calls are written
 precedence that they have in C, comments look the same, and constructs
 like `if` and `while` are available:
    # fn call_a_function(_a: int) {}
    fn main() {
        if 1 < 2 {
            while false { call_a_function(10 * 4); }
@ -39,10 +40,13 @@ of languages. A lot of thing that are statements in C are expressions
 in Rust. This allows for useless things like this (which passes
 nil—the void type—to a function):
    # fn a_function(_a: ()) {}
    a_function(while false {});
 But also useful things like this:
    # fn the_stars_align() -> bool { false }
    # fn something_else() -> bool { true }
    let x = if the_stars_align() { 4 }
            else if something_else() { 3 }
            else { 0 };
@ -125,6 +129,7 @@ annotation:
    // The type of this vector will be inferred based on its use.
    let x = [];
    # x = [3];
    // Explicitly say this is a vector of integers.
    let y: [int] = [];
@ -272,6 +277,7 @@ The comparison operators are the traditional `==`, `!=`, `<`, `>`,
 Rust has a ternary conditional operator `?:`, as in:
    let badness = 12;
    let message = badness < 10 ? "error" : "FATAL ERROR";
 For type casting, Rust uses the binary `as` operator, which has a
@ -311,19 +317,14 @@ followed by a comma-separated list of nested attributes, as in the
 `cfg` example above, or in this [crate](mod.html) metadata
 declaration:
    ## ignore
    #[link(name = "std",
           vers = "0.1",
           url = "http://rust-lang.org/src/std")];
 An attribute without a semicolon following it applies to the
 definition that follows it. When terminated with a semicolon, it
-applies to the current context. The above example could also be
+applies to the module or crate.
 written like this:
    fn register_win_service() {
        #[cfg(target_os = "win32")];
        /* ... */
    }
 ## Syntax extensions
--- a/doc/tutorial/test.md
+++ b/doc/tutorial/test.md
@ -21,6 +21,7 @@ When you compile the program normally, the `test_twice` function will
 not be used. To actually run the tests, compile with the `--test`
 flag:
    ## notrust
    > rustc --lib twice.rs
    > ./twice
    running 1 tests
@ -30,6 +31,7 @@ flag:
 Or, if we change the file to fail, for example by replacing `x + x`
 with `x + 1`:
    ## notrust
    running 1 tests
    test test_twice ... FAILED
    failures:
--- a/doc/tutorial/test.sh
+++ b/doc/tutorial/test.sh
@ -0,0 +1,8 @@
 #!/bin/bash
 rm -f fragments/*.rs
 mkdir -p fragments
 node extract.js
 for F in `ls fragments/*.rs`; do
  $RUSTC $F > /dev/null
  if [[ $? != 0 ]] ; then echo $F; fi
 done
--- a/src/comp/syntax/parse/parser.rs
+++ b/src/comp/syntax/parse/parser.rs
@ -2485,22 +2485,12 @@ fn parse_crate_directive(p: parser, first_outer_attr: [ast::attribute]) ->
              _ { none }
            };
        alt p.peek() {
          // mod x = "foo.rs";
          token::SEMI. {
            let hi = p.get_hi_pos();
            p.bump();
            ret spanned(lo, hi, ast::cdir_src_mod(id, file_opt, outer_attrs));
          }
          // mod x = "foo_dir" { ...directives... }
          token::LBRACE. {
            p.bump();