From 0f224f977d9edeb7f8ca56c052c1202fab384552 Mon Sep 17 00:00:00 2001 From: Graydon Hoare Date: Fri, 20 Aug 2010 11:41:34 -0700 Subject: [PATCH] Expand rustc lexer to do almost-nearly-nontrivial stuff. --- src/comp/driver/rustc.rs | 12 +++- src/comp/fe/lexer.rs | 30 +++++++- src/comp/fe/token.rs | 150 ++++++++++++++++++++++++++++++++++++++- src/comp/util/common.rs | 13 ++++ 4 files changed, 201 insertions(+), 4 deletions(-) diff --git a/src/comp/driver/rustc.rs b/src/comp/driver/rustc.rs index 67aa5308393..42bd91c59bf 100644 --- a/src/comp/driver/rustc.rs +++ b/src/comp/driver/rustc.rs @@ -17,7 +17,7 @@ fn write_module() { llvm.LLVMDisposeModule(llmod); } -fn main(vec[str] args) -> () { +fn main(vec[str] args) { log "This is the rust 'self-hosted' compiler."; log "The one written in rust."; @@ -30,6 +30,16 @@ fn main(vec[str] args) -> () { auto p = parser.new_parser(filename); log "opened file: " + filename; auto tok = p.peek(); + while (true) { + alt (tok) { + case (token.EOF()) { ret; } + case (_) { + log token.to_str(tok); + p.bump(); + tok = p.peek(); + } + } + } } i += 1; } diff --git a/src/comp/fe/lexer.rs b/src/comp/fe/lexer.rs index 80b4b6766d1..57a60fe3f54 100644 --- a/src/comp/fe/lexer.rs +++ b/src/comp/fe/lexer.rs @@ -38,8 +38,36 @@ fn next_token(stdio_reader rdr) -> token.token { if (c == eof) { ret token.EOF(); } if (is_alpha(c)) { - accum += (c as u8); + while (is_alpha(c)) { + accum += (c as u8); + c = rdr.getc() as char; + ret token.IDENT(accum); + } } + + if (is_dec_digit(c)) { + if (c == '0') { + } else { + while (is_dec_digit(c)) { + accum += (c as u8); + ret token.LIT_INT(0); + } + } + } + + // One-byte structural symbols. + if (c == ';') { ret token.SEMI(); } + if (c == '.') { ret token.DOT(); } + if (c == '(') { ret token.LPAREN(); } + if (c == ')') { ret token.RPAREN(); } + if (c == '{') { ret token.LBRACE(); } + if (c == '}') { ret token.RBRACE(); } + if (c == '[') { ret token.LBRACKET(); } + if (c == ']') { ret token.RBRACKET(); } + if (c == '@') { ret token.AT(); } + if (c == '#') { ret token.POUND(); } + + log "lexer stopping at "; log c; ret token.EOF(); } diff --git a/src/comp/fe/token.rs b/src/comp/fe/token.rs index 76142910221..8896b6f7767 100644 --- a/src/comp/fe/token.rs +++ b/src/comp/fe/token.rs @@ -1,4 +1,7 @@ import util.common.ty_mach; +import util.common.ty_mach_to_str; +import std._int; +import std._uint; type op = tag (PLUS(), @@ -107,10 +110,10 @@ type token = tag /* Literals */ LIT_INT(int), - LIT_UINT(int), + LIT_UINT(uint), LIT_MACH_INT(ty_mach, int), LIT_STR(str), - LIT_CHAR(int), + LIT_CHAR(char), LIT_BOOL(bool), /* Name components */ @@ -149,6 +152,149 @@ type token = tag BRACEQUOTE(str), EOF()); +fn to_str(token t) -> str { + alt (t) { + case (OP(_)) { ret ""; } + case (OPEQ(_)) { ret "="; } + case (AS()) { ret "as"; } + case (WITH()) { ret "with"; } + + /* Structural symbols */ + case (AT()) { ret "@"; } + case (DOT()) { ret "."; } + case (COMMA()) { ret ","; } + case (SEMI()) { ret ";"; } + case (COLON()) { ret ":"; } + case (RARROW()) { ret "->"; } + case (SEND()) { ret "<|"; } + case (LARROW()) { ret "<-"; } + case (LPAREN()) { ret "("; } + case (RPAREN()) { ret ")"; } + case (LBRACKET()) { ret "["; } + case (RBRACKET()) { ret "]"; } + case (LBRACE()) { ret "{"; } + case (RBRACE()) { ret "}"; } + + /* Module and crate keywords */ + case (MOD()) { ret "mod"; } + case (USE()) { ret "use"; } + case (AUTH()) { ret "auth"; } + case (META()) { ret "meta"; } + + /* Metaprogramming keywords */ + case (SYNTAX()) { ret "syntax"; } + case (POUND()) { ret "#"; } + + /* Statement keywords */ + case (IF()) { ret "if"; } + case (ELSE()) { ret "else"; } + case (DO()) { ret "do"; } + case (WHILE()) { ret "while"; } + case (ALT()) { ret "alt"; } + case (CASE()) { ret "case"; } + + case (FAIL()) { ret "fail"; } + case (DROP()) { ret "drop"; } + + case (IN()) { ret "in"; } + case (FOR()) { ret "for"; } + case (EACH()) { ret "each"; } + case (PUT()) { ret "put"; } + case (RET()) { ret "ret"; } + case (BE()) { ret "be"; } + + /* Type and type-state keywords */ + case (TYPE()) { ret "type"; } + case (CHECK()) { ret "check"; } + case (CLAIM()) { ret "claim"; } + case (PROVE()) { ret "prove"; } + + /* Effect keywords */ + case (IO()) { ret "io"; } + case (STATE()) { ret "state"; } + case (UNSAFE()) { ret "unsafe"; } + + /* Type qualifiers */ + case (NATIVE()) { ret "native"; } + case (AUTO()) { ret "auto"; } + case (MUTABLE()) { ret "mutable"; } + + /* Name management */ + case (IMPORT()) { ret "import"; } + case (EXPORT()) { ret "export"; } + + /* Value / stmt declarators */ + case (LET()) { ret "let"; } + + /* Magic runtime services */ + case (LOG()) { ret "log"; } + case (SPAWN()) { ret "spawn"; } + case (BIND()) { ret "bind"; } + case (THREAD()) { ret "thread"; } + case (YIELD()) { ret "yield"; } + case (JOIN()) { ret "join"; } + + /* Literals */ + case (LIT_INT(i)) { ret _int.to_str(i, 10u); } + case (LIT_UINT(u)) { ret _uint.to_str(u, 10u); } + case (LIT_MACH_INT(tm, i)) { + ret _int.to_str(i, 10u) + + "_" + ty_mach_to_str(tm); + } + + case (LIT_STR(s)) { + // FIXME: escape. + ret "\"" + s + "\""; + } + case (LIT_CHAR(c)) { + // FIXME: escape and encode. + auto tmp = ""; + tmp += (c as u8); + ret tmp; + } + + case (LIT_BOOL(b)) { + if (b) { ret "true"; } else { ret "false"; } + } + + /* Name components */ + case (IDENT(s)) { ret s; } + case (IDX(i)) { ret "_" + _int.to_str(i, 10u); } + case (UNDERSCORE()) { ret "_"; } + + /* Reserved type names */ + case (BOOL()) { ret "bool"; } + case (INT()) { ret "int"; } + case (UINT()) { ret "uint"; } + case (FLOAT()) { ret "float"; } + case (CHAR()) { ret "char"; } + case (STR()) { ret "str"; } + case (MACH(tm)) { ret ty_mach_to_str(tm); } + + /* Algebraic type constructors */ + case (REC()) { ret "rec"; } + case (TUP()) { ret "tup"; } + case (TAG()) { ret "tag"; } + case (VEC()) { ret "vec"; } + case (ANY()) { ret "any"; } + + /* Callable type constructors */ + case (FN()) { ret "fn"; } + case (ITER()) { ret "iter"; } + + /* Object type */ + case (OBJ()) { ret "obj"; } + + /* Comm and task types */ + case (CHAN()) { ret "chan"; } + case (PORT()) { ret "port"; } + case (TASK()) { ret "task"; } + + case (BRACEQUOTE(_)) { ret ""; } + case (EOF()) { ret ""; } + } +} + // Local Variables: diff --git a/src/comp/util/common.rs b/src/comp/util/common.rs index b36f244db2d..3089c2c12da 100644 --- a/src/comp/util/common.rs +++ b/src/comp/util/common.rs @@ -3,6 +3,19 @@ type ty_mach = tag( ty_i8(), ty_i16(), ty_i32(), ty_i64(), ty_u8(), ty_u16(), ty_u32(), ty_u64(), ty_f32(), ty_f16() ); +fn ty_mach_to_str(ty_mach tm) -> str { + alt (tm) { + case (ty_u8()) { ret "u8"; } + case (ty_i8()) { ret "i8"; } + case (ty_u16()) { ret "u16"; } + case (ty_i16()) { ret "i16"; } + case (ty_u32()) { ret "u32"; } + case (ty_i32()) { ret "i32"; } + case (ty_u64()) { ret "u64"; } + case (ty_i64()) { ret "i64"; } + } +} + // // Local Variables: // mode: rust