Make the lexer slightly less stateful

This commit is contained in:
Marijn Haverbeke 2011-07-25 16:23:32 +02:00
parent e949aab10a
commit dca67f95cb
2 changed files with 25 additions and 29 deletions

View File

@ -19,9 +19,7 @@ type reader =
fn next() -> char ;
fn init() ;
fn bump() ;
fn mark() ;
fn get_mark_chpos() -> uint ;
fn get_mark_str() -> str ;
fn get_str_from(uint) -> str ;
fn get_interner() -> @interner::interner[str] ;
fn get_chpos() -> uint ;
fn get_byte_pos() -> uint ;
@ -38,21 +36,16 @@ fn new_reader(&codemap::codemap cm, str src, codemap::filemap filemap,
mutable uint col,
mutable uint pos,
mutable char ch,
mutable uint mark_pos,
mutable uint mark_chpos,
mutable uint chpos,
mutable str[] strs,
codemap::filemap fm,
@interner::interner[str] itr) {
fn is_eof() -> bool { ret ch == -1 as char; }
fn mark() { mark_pos = pos; mark_chpos = chpos; }
fn get_mark_str() -> str {
fn get_str_from(uint start) -> str {
// I'm pretty skeptical about this subtraction. What if there's a
// multi-byte character before the mark?
ret str::slice(src, mark_pos - 1u,
pos - 1u);
ret str::slice(src, start - 1u, pos - 1u);
}
fn get_mark_chpos() -> uint { ret mark_chpos; }
fn get_chpos() -> uint { ret chpos; }
fn get_byte_pos() -> uint { ret pos; }
fn curr() -> char { ret ch; }
@ -90,9 +83,8 @@ fn new_reader(&codemap::codemap cm, str src, codemap::filemap filemap,
}
let str[] strs = ~[];
auto rd =
reader(cm, src, str::byte_len(src), 0u, 0u, -1 as char, 0u,
filemap.start_pos.ch, filemap.start_pos.ch, strs, filemap,
itr);
reader(cm, src, str::byte_len(src), 0u, 0u, -1 as char,
filemap.start_pos.ch, strs, filemap, itr);
rd.init();
ret rd;
}
@ -346,11 +338,17 @@ fn scan_numeric_escape(&reader rdr, uint n_hex_digits) -> char {
ret accum_int as char;
}
fn next_token(&reader rdr) -> token::token {
auto accum_str = "";
fn next_token(&reader rdr) -> tup(token::token, uint, uint) {
consume_whitespace_and_comments(rdr);
if (rdr.is_eof()) { ret token::EOF; }
rdr.mark();
auto start_chpos = rdr.get_chpos();
auto start_bpos = rdr.get_byte_pos();
auto tok = if rdr.is_eof() { token::EOF }
else { next_token_inner(rdr) };
ret tup(tok, start_chpos, start_bpos);
}
fn next_token_inner(&reader rdr) -> token::token {
auto accum_str = "";
auto c = rdr.curr();
if (is_alpha(c) || c == '_') {
while (is_alnum(c) || c == '_') {
@ -762,11 +760,10 @@ fn gather_comments_and_literals(&codemap::codemap cm, str path)
break;
}
auto tok = next_token(rdr);
if (is_lit(tok)) {
literals += ~[rec(lit=rdr.get_mark_str(),
pos=rdr.get_mark_chpos())];
if (is_lit(tok._0)) {
literals += ~[rec(lit=rdr.get_str_from(tok._2), pos=tok._1)];
}
log "tok: " + token::to_str(rdr, tok);
log "tok: " + token::to_str(rdr, tok._0);
first_read = false;
}
ret rec(cmnts=comments, lits=literals);

View File

@ -90,8 +90,9 @@ fn new_parser(parse_sess sess, ast::crate_cfg cfg, lexer::reader rdr,
// + ":" + common::istr(lo.line as int);
last_lo = lo;
tok = lexer::next_token(rdr);
lo = rdr.get_mark_chpos();
auto next = lexer::next_token(rdr);
tok = next._0;
lo = next._1;
hi = rdr.get_chpos();
}
fn fatal(str m) -> ! {
@ -122,11 +123,9 @@ fn new_parser(parse_sess sess, ast::crate_cfg cfg, lexer::reader rdr,
fn get_sess() -> parse_sess { ret sess; }
}
// Make sure npos points at first actual token:
lexer::consume_whitespace_and_comments(rdr);
auto npos = rdr.get_chpos();
ret stdio_parser(sess, cfg, ftype, lexer::next_token(rdr),
npos, npos, npos, UNRESTRICTED, rdr,
auto tok0 = lexer::next_token(rdr);
ret stdio_parser(sess, cfg, ftype, tok0._0,
tok0._1, tok0._1, tok0._1, UNRESTRICTED, rdr,
prec_table(), bad_expr_word_table());
}
@ -693,13 +692,13 @@ fn parse_path(&parser p) -> ast::path {
case (token::IDENT(?i, _)) {
hi = p.get_hi_pos();
ids += ~[p.get_str(i)];
hi = p.get_hi_pos();
p.bump();
if (p.peek() == token::MOD_SEP) { p.bump(); } else { break; }
}
case (_) { break; }
}
}
hi = p.get_hi_pos();
ret spanned(lo, hi, rec(global=global, idents=ids, types=~[]));
}