Add block-comment support, various additional operators. Rustc can lex itself now.

This commit is contained in:
Graydon Hoare 2010-08-31 13:29:00 -07:00
parent 47e86a05ae
commit cfca901cfe
2 changed files with 165 additions and 61 deletions

View File

@ -238,16 +238,48 @@ fn consume_any_whitespace(reader rdr) {
fn consume_any_line_comment(reader rdr) {
if (rdr.curr() == '/') {
if (rdr.next() == '/') {
while (rdr.curr() != '\n') {
rdr.bump();
alt (rdr.next()) {
case ('/') {
while (rdr.curr() != '\n') {
rdr.bump();
}
// Restart whitespace munch.
be consume_any_whitespace(rdr);
}
case ('*') {
rdr.bump();
rdr.bump();
be consume_block_comment(rdr);
}
case (_) {
ret;
}
// Restart whitespace munch.
be consume_any_whitespace(rdr);
}
}
}
fn consume_block_comment(reader rdr) {
let int level = 1;
while (level > 0) {
if (rdr.curr() == '/' && rdr.next() == '*') {
rdr.bump();
rdr.bump();
level += 1;
} else {
if (rdr.curr() == '*' && rdr.next() == '/') {
rdr.bump();
rdr.bump();
level -= 1;
} else {
rdr.bump();
}
}
}
// restart whitespace munch.
be consume_any_whitespace(rdr);
}
fn next_token(reader rdr) -> token.token {
auto accum_str = "";
auto accum_int = 0;
@ -310,18 +342,19 @@ fn next_token(reader rdr) -> token.token {
}
fn op_or_opeq(reader rdr, token.op op) -> token.token {
fn binop(reader rdr, token.binop op) -> token.token {
rdr.bump();
if (rdr.next() == '=') {
rdr.bump();
ret token.OPEQ(op);
ret token.BINOPEQ(op);
} else {
ret token.OP(op);
ret token.BINOP(op);
}
}
alt (c) {
// One-byte tokens.
case (':') { rdr.bump(); ret token.COLON(); }
case (';') { rdr.bump(); ret token.SEMI(); }
case (',') { rdr.bump(); ret token.COMMA(); }
case ('.') { rdr.bump(); ret token.DOT(); }
@ -334,16 +367,74 @@ fn next_token(reader rdr) -> token.token {
case ('@') { rdr.bump(); ret token.AT(); }
case ('#') { rdr.bump(); ret token.POUND(); }
case ('_') { rdr.bump(); ret token.UNDERSCORE(); }
case ('~') { rdr.bump(); ret token.TILDE(); }
// Multi-byte tokens.
case ('=') {
if (rdr.next() == '=') {
rdr.bump();
if (rdr.curr() == '=') {
rdr.bump();
rdr.bump();
ret token.OP(token.EQEQ());
ret token.EQEQ();
} else {
ret token.EQ();
}
}
case ('!') {
rdr.bump();
if (rdr.curr() == '=') {
rdr.bump();
ret token.OP(token.EQ());
ret token.NE();
} else {
ret token.NOT();
}
}
case ('<') {
rdr.bump();
alt (rdr.curr()) {
case ('=') {
rdr.bump();
ret token.LE();
}
case ('<') {
ret binop(rdr, token.LSL());
}
case ('-') {
rdr.bump();
ret token.LARROW();
}
case ('|') {
rdr.bump();
ret token.SEND();
}
case (_) {
ret token.LT();
}
}
}
case ('>') {
rdr.bump();
alt (rdr.curr()) {
case ('=') {
rdr.bump();
ret token.GE();
}
case ('>') {
if (rdr.next() == '>') {
rdr.bump();
ret binop(rdr, token.ASR());
} else {
ret binop(rdr, token.LSR());
}
}
case (_) {
ret token.GT();
}
}
}
@ -426,7 +517,7 @@ fn next_token(reader rdr) -> token.token {
rdr.bump();
ret token.RARROW();
} else {
ret op_or_opeq(rdr, token.MINUS());
ret binop(rdr, token.MINUS());
}
}
@ -434,34 +525,40 @@ fn next_token(reader rdr) -> token.token {
if (rdr.next() == '&') {
rdr.bump();
rdr.bump();
ret token.OP(token.ANDAND());
ret token.ANDAND();
} else {
ret op_or_opeq(rdr, token.AND());
ret binop(rdr, token.AND());
}
}
case ('|') {
if (rdr.next() == '|') {
rdr.bump();
rdr.bump();
ret token.OROR();
} else {
ret binop(rdr, token.OR());
}
}
case ('+') {
ret op_or_opeq(rdr, token.PLUS());
ret binop(rdr, token.PLUS());
}
case ('*') {
ret op_or_opeq(rdr, token.STAR());
ret binop(rdr, token.STAR());
}
case ('/') {
ret op_or_opeq(rdr, token.STAR());
}
case ('!') {
ret op_or_opeq(rdr, token.NOT());
ret binop(rdr, token.STAR());
}
case ('^') {
ret op_or_opeq(rdr, token.CARET());
ret binop(rdr, token.CARET());
}
case ('%') {
ret op_or_opeq(rdr, token.PERCENT());
ret binop(rdr, token.PERCENT());
}
}

View File

@ -3,12 +3,21 @@ import util.common.ty_mach_to_str;
import std._int;
import std._uint;
type op = tag
type binop = tag
(PLUS(),
MINUS(),
STAR(),
SLASH(),
PERCENT(),
CARET(),
AND(),
OR(),
LSL(),
LSR(),
ASR());
type token = tag
(/* Expression-operator symbols. */
EQ(),
LT(),
LE(),
@ -16,20 +25,14 @@ type op = tag
NE(),
GE(),
GT(),
ANDAND(),
OROR(),
NOT(),
TILDE(),
CARET(),
AND(),
ANDAND(),
OR(),
OROR(),
LSL(),
LSR(),
ASR());
type token = tag
(OP(op),
OPEQ(op),
BINOP(binop),
BINOPEQ(binop),
AS(),
WITH(),
@ -152,40 +155,44 @@ type token = tag
BRACEQUOTE(str),
EOF());
fn op_to_str(op o) -> str {
fn binop_to_str(binop o) -> str {
alt (o) {
case (PLUS()) { ret "+"; }
case (MINUS()) { ret "-"; }
case (STAR()) { ret "*"; }
case (SLASH()) { ret "/"; }
case (PERCENT()) { ret "%"; }
case (EQ()) { ret "="; }
case (LT()) { ret "<"; }
case (LE()) { ret "<="; }
case (EQEQ()) { ret "=="; }
case (NE()) { ret "!="; }
case (GE()) { ret ">="; }
case (GT()) { ret ">"; }
case (NOT()) { ret "!"; }
case (TILDE()) { ret "~"; }
case (CARET()) { ret "^"; }
case (AND()) { ret "&"; }
case (ANDAND()) { ret "&&"; }
case (OR()) { ret "|"; }
case (OROR()) { ret "||"; }
case (LSL()) { ret "<<"; }
case (LSR()) { ret ">>"; }
case (ASR()) { ret ">>>"; }
case (PLUS()) { ret "+"; }
case (MINUS()) { ret "-"; }
case (STAR()) { ret "*"; }
case (SLASH()) { ret "/"; }
case (PERCENT()) { ret "%"; }
case (CARET()) { ret "^"; }
case (AND()) { ret "&"; }
case (OR()) { ret "|"; }
case (LSL()) { ret "<<"; }
case (LSR()) { ret ">>"; }
case (ASR()) { ret ">>>"; }
}
}
fn to_str(token t) -> str {
alt (t) {
case (OP(op)) { ret op_to_str(op); }
case (OPEQ(op)) { ret op_to_str(op) + "="; }
case (EQ()) { ret "="; }
case (LT()) { ret "<"; }
case (LE()) { ret "<="; }
case (EQEQ()) { ret "=="; }
case (NE()) { ret "!="; }
case (GE()) { ret ">="; }
case (GT()) { ret ">"; }
case (NOT()) { ret "!"; }
case (TILDE()) { ret "~"; }
case (OROR()) { ret "||"; }
case (ANDAND()) { ret "&&"; }
case (BINOP(op)) { ret binop_to_str(op); }
case (BINOPEQ(op)) { ret binop_to_str(op) + "="; }
case (AS()) { ret "as"; }
case (WITH()) { ret "with"; }
/* Structural symbols */
case (AT()) { ret "@"; }
case (DOT()) { ret "."; }