From fedb775fbb127a5099f46078c85046440479b0ef Mon Sep 17 00:00:00 2001 From: Marijn Haverbeke Date: Tue, 22 Nov 2011 16:12:23 +0100 Subject: [PATCH] Add hacks to extract and compile tutorial code Not included in the build by default, since it's fragile and kludgy. Do something like this to run it: cd doc/tutorial RUSTC=../../build/stage2/bin/rustc bash test.sh Closes #1143 --- doc/tutorial/args.md | 5 ++++ doc/tutorial/build.js | 10 +++++++- doc/tutorial/control.md | 4 ++++ doc/tutorial/data.md | 10 +++++++- doc/tutorial/extract.js | 42 +++++++++++++++++++++++++++++++++ doc/tutorial/ffi.md | 11 +++++++++ doc/tutorial/func.md | 6 +++++ doc/tutorial/generic.md | 9 ++++--- doc/tutorial/mod.md | 10 ++++++-- doc/tutorial/setup.md | 1 + doc/tutorial/syntax.md | 15 ++++++------ doc/tutorial/test.md | 2 ++ doc/tutorial/test.sh | 8 +++++++ src/comp/syntax/parse/parser.rs | 10 -------- 14 files changed, 119 insertions(+), 24 deletions(-) create mode 100644 doc/tutorial/extract.js create mode 100644 doc/tutorial/test.sh diff --git a/doc/tutorial/args.md b/doc/tutorial/args.md index 7c084687bb2..1797f5a23f5 100644 --- a/doc/tutorial/args.md +++ b/doc/tutorial/args.md @@ -22,6 +22,8 @@ other tasks, and that most data is immutable. Take the following program: + # fn get_really_big_record() -> int { 1 } + # fn myfunc(a: int) {} let x = get_really_big_record(); myfunc(x); @@ -32,6 +34,9 @@ existing value as the argument, without copying. There are more involved cases. The call could look like this: + # fn myfunc(a: int, b: block()) {} + # fn get_another_record() -> int { 1 } + # let x = 1; myfunc(x, {|| x = get_another_record(); }); Now, if `myfunc` first calls its second argument and then accesses its diff --git a/doc/tutorial/build.js b/doc/tutorial/build.js index edefdfe3201..fb5f297c81e 100644 --- a/doc/tutorial/build.js +++ b/doc/tutorial/build.js @@ -4,7 +4,15 @@ require("./lib/codemirror-rust"); md.Markdown.dialects.Maruku.block.code = function code(block, next) { if (block.match(/^ /)) { - var text = block.replace(/(^|\n) /g, "$1"), accum = [], curstr = "", curstyle = null; + var text = String(block); + while (next.length && next[0].match(/^ /)) text += "\n" + String(next.shift()); + var leaveAlone, accum = [], curstr = "", curstyle = null; + text = text.split("\n").map(function(line) { + line = line.slice(4); + if (line == "## notrust") leaveAlone = true; + return line; + }).filter(function(x) { return !/^##? /.test(x); }).join("\n"); + if (leaveAlone) return [["pre", {}, text]]; function add(str, style) { if (style != curstyle) { if (curstyle) accum.push(["span", {"class": "cm-" + curstyle}, curstr]); diff --git a/doc/tutorial/control.md b/doc/tutorial/control.md index 198d035accb..966aeb2df52 100644 --- a/doc/tutorial/control.md +++ b/doc/tutorial/control.md @@ -38,6 +38,7 @@ Rust's `alt` construct is a generalized, cleaned-up version of C's each labelled with a pattern, and it will execute the arm that matches the value. + # let my_number = 1; alt my_number { 0 { std::io::println("zero"); } 1 | 2 { std::io::println("one or two"); } @@ -89,6 +90,7 @@ To a limited extent, it is possible to use destructuring patterns when declaring a variable with `let`. For example, you can say this to extract the fields from a tuple: + # fn get_tuple_of_two_ints() -> (int, int) { (1, 1) } let (a, b) = get_tuple_of_two_ints(); This will introduce two new variables, `a` and `b`, bound to the @@ -118,6 +120,8 @@ it finds one that can be divided by five. There's also `while`'s ugly cousin, `do`/`while`, which does not check its condition on the first iteration, using traditional syntax: + # fn eat_cake() {} + # fn any_cake_left() -> bool { false } do { eat_cake(); } while any_cake_left(); diff --git a/doc/tutorial/data.md b/doc/tutorial/data.md index 95ccb0ecec6..661fb2f0c6d 100644 --- a/doc/tutorial/data.md +++ b/doc/tutorial/data.md @@ -56,6 +56,7 @@ Records can be destructured on in `alt` patterns. The basic syntax is omitted as a shorthand for simply binding the variable with the same name as the field. + # let mypoint = {x: 0f, y: 0f}; alt mypoint { {x: 0f, y: y_name} { /* Provide sub-patterns for fields */ } {x, y} { /* Simply bind the fields */ } @@ -71,6 +72,7 @@ the fields of a record, a record pattern may end with `, _` (as in Tags [FIXME terminology] are datatypes that have several different representations. For example, the type shown earlier: + # type point = {x: float, y: float}; tag shape { circle(point, float); rectangle(point, point); @@ -96,7 +98,7 @@ equivalent to an `enum` in C: east; south; west; - }; + } This will define `north`, `east`, `south`, and `west` as constants, all of which have type `direction`. @@ -116,6 +118,7 @@ That is a shorthand for this: Tag types like this can have their content extracted with the dereference (`*`) unary operator: + # tag gizmo_id = int; let my_gizmo_id = gizmo_id(10); let id_int: int = *my_gizmo_id; @@ -125,6 +128,8 @@ For tag types with multiple variants, destructuring is the only way to get at their contents. All variant constructors can be used as patterns, as in this definition of `area`: + # type point = {x: float, y: float}; + # tag shape { circle(point, float); rectangle(point, point); } fn area(sh: shape) -> float { alt sh { circle(_, size) { std::math::pi * size * size } @@ -136,6 +141,8 @@ For variants without arguments, you have to write `variantname.` (with a dot at the end) to match them in a pattern. This to prevent ambiguity between matching a variant name and binding a new variable. + # type point = {x: float, y: float}; + # tag direction { north; east; south; west; } fn point_from_direction(dir: direction) -> point { alt dir { north. { {x: 0f, y: 1f} } @@ -295,6 +302,7 @@ strings. They are always immutable. Resources are data types that have a destructor associated with them. + # fn close_file_desc(x: int) {} resource file_desc(fd: int) { close_file_desc(fd); } diff --git a/doc/tutorial/extract.js b/doc/tutorial/extract.js new file mode 100644 index 00000000000..e8461967545 --- /dev/null +++ b/doc/tutorial/extract.js @@ -0,0 +1,42 @@ +var fs = require("fs"), md = require("./lib/markdown"); + +// Runs markdown.js over the tutorial, to find the code blocks in it. +// Uses the #-markers in those code blocks, along with some vague +// heuristics, to turn them into compilable files. Outputs these files +// to fragments/. +// +// '##ignore' means don't test this block +// '##notrust' means the block isn't rust code +// (used by build.js to not highlight it) +// '# code' means insert the given code to complete the fragment +// (build.js strips out such lines) + +var curFile, curFrag; +md.Markdown.dialects.Maruku.block.code = function code(block, next) { + if (block.match(/^ /)) { + var ignore, text = String(block); + while (next.length && next[0].match(/^ /)) text += "\n" + String(next.shift()); + text = text.split("\n").map(function(line) { + line = line.slice(4); + if (line == "## ignore" || line == "## notrust") { ignore = true; line = ""; } + if (/^# /.test(line)) line = line.slice(2); + return line; + }).join("\n"); + if (ignore) return; + if (!/\bfn main\b/.test(text)) { + if (/(^|\n) *(native|use|mod|import|export)\b/.test(text)) + text += "\nfn main() {}\n"; + else text = "fn main() {\n" + text + "\n}\n"; + } + if (!/\buse std\b/.test(text)) text = "use std;\n" + text; + fs.writeFileSync("fragments/" + curFile + "_" + (++curFrag) + ".rs", text); + } +}; + +fs.readFileSync("order", "utf8").split("\n").filter(id).forEach(handle); + +function id(x) { return x; } +function handle(file) { + curFile = file; curFrag = 0; + md.parse(fs.readFileSync(file + ".md", "utf8"), "Maruku"); +} diff --git a/doc/tutorial/ffi.md b/doc/tutorial/ffi.md index cca3669f10d..7b90f64a6ba 100644 --- a/doc/tutorial/ffi.md +++ b/doc/tutorial/ffi.md @@ -67,6 +67,7 @@ most notably the Windows API, use other calling conventions, so Rust provides a way to to hint to the compiler which is expected by using the `"abi"` attribute: + #[cfg(target_os = "win32")] #[abi = "stdcall"] native mod kernel32 { fn SetEnvironmentVariableA(n: *u8, v: *u8) -> int; @@ -81,7 +82,9 @@ or `"stdcall"`. Other conventions may be defined in the future. The native `SHA1` function is declared to take three arguments, and return a pointer. + # native mod crypto { fn SHA1(src: *u8, sz: uint, out: *u8) -> *u8; + # } When declaring the argument types to a foreign function, the Rust compiler has no way to check whether your declaration is correct, so @@ -106,6 +109,9 @@ null pointers. The `sha1` function is the most obscure part of the program. + # import std::{str, vec}; + # mod crypto { fn SHA1(src: *u8, sz: uint, out: *u8) -> *u8 { out } } + # fn as_hex(data: [u8]) -> str { "hi" } fn sha1(data: str) -> str unsafe { let bytes = str::bytes(data); let hash = crypto::SHA1(vec::unsafe::to_ptr(bytes), @@ -141,10 +147,15 @@ Rust's safety mechanisms. Let's look at our `sha1` function again. + # import std::{str, vec}; + # mod crypto { fn SHA1(src: *u8, sz: uint, out: *u8) -> *u8 { out } } + # fn as_hex(data: [u8]) -> str { "hi" } + # fn x(data: str) -> str unsafe { let bytes = str::bytes(data); let hash = crypto::SHA1(vec::unsafe::to_ptr(bytes), vec::len(bytes), std::ptr::null()); ret as_hex(vec::unsafe::from_buf(hash, 20u)); + # } The `str::bytes` function is perfectly safe, it converts a string to an `[u8]`. This byte array is then fed to `vec::unsafe::to_ptr`, which diff --git a/doc/tutorial/func.md b/doc/tutorial/func.md index c174b904fb9..15811da04b3 100644 --- a/doc/tutorial/func.md +++ b/doc/tutorial/func.md @@ -19,6 +19,10 @@ This helps the compiler avoid spurious error messages. For example, the following code would be a type error if `dead_end` would be expected to return. + # fn can_go_left() -> bool { true } + # fn can_go_right() -> bool { true } + # tag dir { left; right; } + # fn dead_end() -> ! { fail; } let dir = if can_go_left() { left } else if can_go_right() { right } else { dead_end(); }; @@ -96,12 +100,14 @@ of integers backwards: To run such an iteration, you could do this: + # fn for_rev(v: [int], act: block(int)) {} for_rev([1, 2, 3], {|n| log n; }); But Rust allows a more pleasant syntax for this situation, with the loop block moved out of the parenthesis and the final semicolon omitted: + # fn for_rev(v: [int], act: block(int)) {} for_rev([1, 2, 3]) {|n| log n; } diff --git a/doc/tutorial/generic.md b/doc/tutorial/generic.md index 43459665be5..02d37fd0e9b 100644 --- a/doc/tutorial/generic.md +++ b/doc/tutorial/generic.md @@ -55,16 +55,17 @@ dereferences become impossible. Rust's type inferrer works very well with generics, but there are programs that just can't be typed. - let n = none; + let n = std::option::none; + # n = std::option::some(1); If you never do anything else with `n`, the compiler will not be able to assign a type to it. (The same goes for `[]`, in fact.) If you really want to have such a statement, you'll have to write it like this: - let n2: option::t = none; + let n2: std::option::t = std::option::none; // or - let n = none::; + let n = std::option::none::; Note that, in a value expression, `<` already has a meaning as a comparison operator, so you'll have to write `::` to explicitly @@ -120,6 +121,7 @@ take sendable types. If you try this program: + # fn map(f: block(int) -> int, v: [int]) {} fn plus1(x: int) -> int { x + 1 } map(plus1, [1, 2, 3]); @@ -131,6 +133,7 @@ way to pass integers, which is by value. To get around this issue, you have to explicitly mark the arguments to a function that you want to pass to a generic higher-order function as being passed by pointer: + # fn map(f: block(T) -> U, v: [T]) {} fn plus1(&&x: int) -> int { x + 1 } map(plus1, [1, 2, 3]); diff --git a/doc/tutorial/mod.md b/doc/tutorial/mod.md index 24e498bcdb3..86151a9b9d5 100644 --- a/doc/tutorial/mod.md +++ b/doc/tutorial/mod.md @@ -35,6 +35,7 @@ It is also possible to include multiple files in a crate. For this purpose, you create a `.rc` crate file, which references any number of `.rs` code files. A crate file could look like this: + ## ignore #[link(name = "farm", vers = "2.5", author = "mjh")]; mod cow; mod chicken; @@ -52,6 +53,7 @@ in a moment. To have a nested directory structure for your source files, you can nest mods in your `.rc` file: + ## ignore mod poultry { mod chicken; mod turkey; @@ -79,6 +81,7 @@ OS X. It is possible to provide more specific information when using an external crate. + ## ignore use myfarm (name = "farm", vers = "2.7"); When a comma-separated list of name/value pairs is given after `use`, @@ -90,6 +93,7 @@ local name `myfarm`. Our example crate declared this set of `link` attributes: + ## ignore #[link(name = "farm", vers = "2.5", author = "mjh")]; The version does not match the one provided in the `use` directive, so @@ -105,12 +109,14 @@ these two files: #[link(name = "mylib", vers = "1.0")]; fn world() -> str { "world" } + ## ignore // main.rs use mylib; fn main() { log_err "hello " + mylib::world(); } Now compile and run like this (adjust to your platform if necessary): + ## notrust > rustc --lib mylib.rs > rustc main.rs -L . > ./main @@ -147,8 +153,8 @@ restricted with `export` directives at the top of the module or file. mod enc { export encrypt, decrypt; const super_secret_number: int = 10; - fn encrypt(n: int) { n + super_secret_number } - fn decrypt(n: int) { n - super_secret_number } + fn encrypt(n: int) -> int { n + super_secret_number } + fn decrypt(n: int) -> int { n - super_secret_number } } This defines a rock-solid encryption algorithm. Code outside of the diff --git a/doc/tutorial/setup.md b/doc/tutorial/setup.md index 44d99891f76..1ec77b5630e 100644 --- a/doc/tutorial/setup.md +++ b/doc/tutorial/setup.md @@ -21,6 +21,7 @@ If you modify the program to make it invalid (for example, remove the `use std` line), and then compile it, you'll see an error message like this: + ## notrust hello.rs:2:4: 2:20 error: unresolved modulename: std hello.rs:2 std::io::println("hello world!"); ^~~~~~~~~~~~~~~~ diff --git a/doc/tutorial/syntax.md b/doc/tutorial/syntax.md index 819df605d3a..d7be8df79dc 100644 --- a/doc/tutorial/syntax.md +++ b/doc/tutorial/syntax.md @@ -21,6 +21,7 @@ statements and expressions is C-like. Function calls are written precedence that they have in C, comments look the same, and constructs like `if` and `while` are available: + # fn call_a_function(_a: int) {} fn main() { if 1 < 2 { while false { call_a_function(10 * 4); } @@ -39,10 +40,13 @@ of languages. A lot of thing that are statements in C are expressions in Rust. This allows for useless things like this (which passes nil—the void type—to a function): + # fn a_function(_a: ()) {} a_function(while false {}); But also useful things like this: + # fn the_stars_align() -> bool { false } + # fn something_else() -> bool { true } let x = if the_stars_align() { 4 } else if something_else() { 3 } else { 0 }; @@ -125,6 +129,7 @@ annotation: // The type of this vector will be inferred based on its use. let x = []; + # x = [3]; // Explicitly say this is a vector of integers. let y: [int] = []; @@ -272,6 +277,7 @@ The comparison operators are the traditional `==`, `!=`, `<`, `>`, Rust has a ternary conditional operator `?:`, as in: + let badness = 12; let message = badness < 10 ? "error" : "FATAL ERROR"; For type casting, Rust uses the binary `as` operator, which has a @@ -311,19 +317,14 @@ followed by a comma-separated list of nested attributes, as in the `cfg` example above, or in this [crate](mod.html) metadata declaration: + ## ignore #[link(name = "std", vers = "0.1", url = "http://rust-lang.org/src/std")]; An attribute without a semicolon following it applies to the definition that follows it. When terminated with a semicolon, it -applies to the current context. The above example could also be -written like this: - - fn register_win_service() { - #[cfg(target_os = "win32")]; - /* ... */ - } +applies to the module or crate. ## Syntax extensions diff --git a/doc/tutorial/test.md b/doc/tutorial/test.md index b7185c51085..b60250217e8 100644 --- a/doc/tutorial/test.md +++ b/doc/tutorial/test.md @@ -21,6 +21,7 @@ When you compile the program normally, the `test_twice` function will not be used. To actually run the tests, compile with the `--test` flag: + ## notrust > rustc --lib twice.rs > ./twice running 1 tests @@ -30,6 +31,7 @@ flag: Or, if we change the file to fail, for example by replacing `x + x` with `x + 1`: + ## notrust running 1 tests test test_twice ... FAILED failures: diff --git a/doc/tutorial/test.sh b/doc/tutorial/test.sh new file mode 100644 index 00000000000..9cdb1aa9f8e --- /dev/null +++ b/doc/tutorial/test.sh @@ -0,0 +1,8 @@ +#!/bin/bash +rm -f fragments/*.rs +mkdir -p fragments +node extract.js +for F in `ls fragments/*.rs`; do + $RUSTC $F > /dev/null + if [[ $? != 0 ]] ; then echo $F; fi +done diff --git a/src/comp/syntax/parse/parser.rs b/src/comp/syntax/parse/parser.rs index 8d7a3bcc414..e33572d4575 100644 --- a/src/comp/syntax/parse/parser.rs +++ b/src/comp/syntax/parse/parser.rs @@ -2485,22 +2485,12 @@ fn parse_crate_directive(p: parser, first_outer_attr: [ast::attribute]) -> _ { none } }; alt p.peek() { - - - - - // mod x = "foo.rs"; token::SEMI. { let hi = p.get_hi_pos(); p.bump(); ret spanned(lo, hi, ast::cdir_src_mod(id, file_opt, outer_attrs)); } - - - - - // mod x = "foo_dir" { ...directives... } token::LBRACE. { p.bump();