Expand rustc lexer to do almost-nearly-nontrivial stuff.
This commit is contained in:
parent
9fc4fc6692
commit
0f224f977d
@ -17,7 +17,7 @@ fn write_module() {
|
||||
llvm.LLVMDisposeModule(llmod);
|
||||
}
|
||||
|
||||
fn main(vec[str] args) -> () {
|
||||
fn main(vec[str] args) {
|
||||
|
||||
log "This is the rust 'self-hosted' compiler.";
|
||||
log "The one written in rust.";
|
||||
@ -30,6 +30,16 @@ fn main(vec[str] args) -> () {
|
||||
auto p = parser.new_parser(filename);
|
||||
log "opened file: " + filename;
|
||||
auto tok = p.peek();
|
||||
while (true) {
|
||||
alt (tok) {
|
||||
case (token.EOF()) { ret; }
|
||||
case (_) {
|
||||
log token.to_str(tok);
|
||||
p.bump();
|
||||
tok = p.peek();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
|
@ -38,8 +38,36 @@ fn next_token(stdio_reader rdr) -> token.token {
|
||||
|
||||
if (c == eof) { ret token.EOF(); }
|
||||
if (is_alpha(c)) {
|
||||
accum += (c as u8);
|
||||
while (is_alpha(c)) {
|
||||
accum += (c as u8);
|
||||
c = rdr.getc() as char;
|
||||
ret token.IDENT(accum);
|
||||
}
|
||||
}
|
||||
|
||||
if (is_dec_digit(c)) {
|
||||
if (c == '0') {
|
||||
} else {
|
||||
while (is_dec_digit(c)) {
|
||||
accum += (c as u8);
|
||||
ret token.LIT_INT(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// One-byte structural symbols.
|
||||
if (c == ';') { ret token.SEMI(); }
|
||||
if (c == '.') { ret token.DOT(); }
|
||||
if (c == '(') { ret token.LPAREN(); }
|
||||
if (c == ')') { ret token.RPAREN(); }
|
||||
if (c == '{') { ret token.LBRACE(); }
|
||||
if (c == '}') { ret token.RBRACE(); }
|
||||
if (c == '[') { ret token.LBRACKET(); }
|
||||
if (c == ']') { ret token.RBRACKET(); }
|
||||
if (c == '@') { ret token.AT(); }
|
||||
if (c == '#') { ret token.POUND(); }
|
||||
|
||||
log "lexer stopping at ";
|
||||
log c;
|
||||
ret token.EOF();
|
||||
}
|
||||
|
@ -1,4 +1,7 @@
|
||||
import util.common.ty_mach;
|
||||
import util.common.ty_mach_to_str;
|
||||
import std._int;
|
||||
import std._uint;
|
||||
|
||||
type op = tag
|
||||
(PLUS(),
|
||||
@ -107,10 +110,10 @@ type token = tag
|
||||
|
||||
/* Literals */
|
||||
LIT_INT(int),
|
||||
LIT_UINT(int),
|
||||
LIT_UINT(uint),
|
||||
LIT_MACH_INT(ty_mach, int),
|
||||
LIT_STR(str),
|
||||
LIT_CHAR(int),
|
||||
LIT_CHAR(char),
|
||||
LIT_BOOL(bool),
|
||||
|
||||
/* Name components */
|
||||
@ -149,6 +152,149 @@ type token = tag
|
||||
BRACEQUOTE(str),
|
||||
EOF());
|
||||
|
||||
fn to_str(token t) -> str {
|
||||
alt (t) {
|
||||
case (OP(_)) { ret "<op>"; }
|
||||
case (OPEQ(_)) { ret "<op>="; }
|
||||
case (AS()) { ret "as"; }
|
||||
case (WITH()) { ret "with"; }
|
||||
|
||||
/* Structural symbols */
|
||||
case (AT()) { ret "@"; }
|
||||
case (DOT()) { ret "."; }
|
||||
case (COMMA()) { ret ","; }
|
||||
case (SEMI()) { ret ";"; }
|
||||
case (COLON()) { ret ":"; }
|
||||
case (RARROW()) { ret "->"; }
|
||||
case (SEND()) { ret "<|"; }
|
||||
case (LARROW()) { ret "<-"; }
|
||||
case (LPAREN()) { ret "("; }
|
||||
case (RPAREN()) { ret ")"; }
|
||||
case (LBRACKET()) { ret "["; }
|
||||
case (RBRACKET()) { ret "]"; }
|
||||
case (LBRACE()) { ret "{"; }
|
||||
case (RBRACE()) { ret "}"; }
|
||||
|
||||
/* Module and crate keywords */
|
||||
case (MOD()) { ret "mod"; }
|
||||
case (USE()) { ret "use"; }
|
||||
case (AUTH()) { ret "auth"; }
|
||||
case (META()) { ret "meta"; }
|
||||
|
||||
/* Metaprogramming keywords */
|
||||
case (SYNTAX()) { ret "syntax"; }
|
||||
case (POUND()) { ret "#"; }
|
||||
|
||||
/* Statement keywords */
|
||||
case (IF()) { ret "if"; }
|
||||
case (ELSE()) { ret "else"; }
|
||||
case (DO()) { ret "do"; }
|
||||
case (WHILE()) { ret "while"; }
|
||||
case (ALT()) { ret "alt"; }
|
||||
case (CASE()) { ret "case"; }
|
||||
|
||||
case (FAIL()) { ret "fail"; }
|
||||
case (DROP()) { ret "drop"; }
|
||||
|
||||
case (IN()) { ret "in"; }
|
||||
case (FOR()) { ret "for"; }
|
||||
case (EACH()) { ret "each"; }
|
||||
case (PUT()) { ret "put"; }
|
||||
case (RET()) { ret "ret"; }
|
||||
case (BE()) { ret "be"; }
|
||||
|
||||
/* Type and type-state keywords */
|
||||
case (TYPE()) { ret "type"; }
|
||||
case (CHECK()) { ret "check"; }
|
||||
case (CLAIM()) { ret "claim"; }
|
||||
case (PROVE()) { ret "prove"; }
|
||||
|
||||
/* Effect keywords */
|
||||
case (IO()) { ret "io"; }
|
||||
case (STATE()) { ret "state"; }
|
||||
case (UNSAFE()) { ret "unsafe"; }
|
||||
|
||||
/* Type qualifiers */
|
||||
case (NATIVE()) { ret "native"; }
|
||||
case (AUTO()) { ret "auto"; }
|
||||
case (MUTABLE()) { ret "mutable"; }
|
||||
|
||||
/* Name management */
|
||||
case (IMPORT()) { ret "import"; }
|
||||
case (EXPORT()) { ret "export"; }
|
||||
|
||||
/* Value / stmt declarators */
|
||||
case (LET()) { ret "let"; }
|
||||
|
||||
/* Magic runtime services */
|
||||
case (LOG()) { ret "log"; }
|
||||
case (SPAWN()) { ret "spawn"; }
|
||||
case (BIND()) { ret "bind"; }
|
||||
case (THREAD()) { ret "thread"; }
|
||||
case (YIELD()) { ret "yield"; }
|
||||
case (JOIN()) { ret "join"; }
|
||||
|
||||
/* Literals */
|
||||
case (LIT_INT(i)) { ret _int.to_str(i, 10u); }
|
||||
case (LIT_UINT(u)) { ret _uint.to_str(u, 10u); }
|
||||
case (LIT_MACH_INT(tm, i)) {
|
||||
ret _int.to_str(i, 10u)
|
||||
+ "_" + ty_mach_to_str(tm);
|
||||
}
|
||||
|
||||
case (LIT_STR(s)) {
|
||||
// FIXME: escape.
|
||||
ret "\"" + s + "\"";
|
||||
}
|
||||
case (LIT_CHAR(c)) {
|
||||
// FIXME: escape and encode.
|
||||
auto tmp = "";
|
||||
tmp += (c as u8);
|
||||
ret tmp;
|
||||
}
|
||||
|
||||
case (LIT_BOOL(b)) {
|
||||
if (b) { ret "true"; } else { ret "false"; }
|
||||
}
|
||||
|
||||
/* Name components */
|
||||
case (IDENT(s)) { ret s; }
|
||||
case (IDX(i)) { ret "_" + _int.to_str(i, 10u); }
|
||||
case (UNDERSCORE()) { ret "_"; }
|
||||
|
||||
/* Reserved type names */
|
||||
case (BOOL()) { ret "bool"; }
|
||||
case (INT()) { ret "int"; }
|
||||
case (UINT()) { ret "uint"; }
|
||||
case (FLOAT()) { ret "float"; }
|
||||
case (CHAR()) { ret "char"; }
|
||||
case (STR()) { ret "str"; }
|
||||
case (MACH(tm)) { ret ty_mach_to_str(tm); }
|
||||
|
||||
/* Algebraic type constructors */
|
||||
case (REC()) { ret "rec"; }
|
||||
case (TUP()) { ret "tup"; }
|
||||
case (TAG()) { ret "tag"; }
|
||||
case (VEC()) { ret "vec"; }
|
||||
case (ANY()) { ret "any"; }
|
||||
|
||||
/* Callable type constructors */
|
||||
case (FN()) { ret "fn"; }
|
||||
case (ITER()) { ret "iter"; }
|
||||
|
||||
/* Object type */
|
||||
case (OBJ()) { ret "obj"; }
|
||||
|
||||
/* Comm and task types */
|
||||
case (CHAN()) { ret "chan"; }
|
||||
case (PORT()) { ret "port"; }
|
||||
case (TASK()) { ret "task"; }
|
||||
|
||||
case (BRACEQUOTE(_)) { ret "<bracequote>"; }
|
||||
case (EOF()) { ret "<eof>"; }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Local Variables:
|
||||
|
@ -3,6 +3,19 @@ type ty_mach = tag( ty_i8(), ty_i16(), ty_i32(), ty_i64(),
|
||||
ty_u8(), ty_u16(), ty_u32(), ty_u64(),
|
||||
ty_f32(), ty_f16() );
|
||||
|
||||
fn ty_mach_to_str(ty_mach tm) -> str {
|
||||
alt (tm) {
|
||||
case (ty_u8()) { ret "u8"; }
|
||||
case (ty_i8()) { ret "i8"; }
|
||||
case (ty_u16()) { ret "u16"; }
|
||||
case (ty_i16()) { ret "i16"; }
|
||||
case (ty_u32()) { ret "u32"; }
|
||||
case (ty_i32()) { ret "i32"; }
|
||||
case (ty_u64()) { ret "u64"; }
|
||||
case (ty_i64()) { ret "i64"; }
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Local Variables:
|
||||
// mode: rust
|
||||
|
Loading…
Reference in New Issue
Block a user