Make the lexer slightly less stateful
This commit is contained in:
parent
e949aab10a
commit
dca67f95cb
@ -19,9 +19,7 @@ type reader =
|
||||
fn next() -> char ;
|
||||
fn init() ;
|
||||
fn bump() ;
|
||||
fn mark() ;
|
||||
fn get_mark_chpos() -> uint ;
|
||||
fn get_mark_str() -> str ;
|
||||
fn get_str_from(uint) -> str ;
|
||||
fn get_interner() -> @interner::interner[str] ;
|
||||
fn get_chpos() -> uint ;
|
||||
fn get_byte_pos() -> uint ;
|
||||
@ -38,21 +36,16 @@ fn new_reader(&codemap::codemap cm, str src, codemap::filemap filemap,
|
||||
mutable uint col,
|
||||
mutable uint pos,
|
||||
mutable char ch,
|
||||
mutable uint mark_pos,
|
||||
mutable uint mark_chpos,
|
||||
mutable uint chpos,
|
||||
mutable str[] strs,
|
||||
codemap::filemap fm,
|
||||
@interner::interner[str] itr) {
|
||||
fn is_eof() -> bool { ret ch == -1 as char; }
|
||||
fn mark() { mark_pos = pos; mark_chpos = chpos; }
|
||||
fn get_mark_str() -> str {
|
||||
fn get_str_from(uint start) -> str {
|
||||
// I'm pretty skeptical about this subtraction. What if there's a
|
||||
// multi-byte character before the mark?
|
||||
ret str::slice(src, mark_pos - 1u,
|
||||
pos - 1u);
|
||||
ret str::slice(src, start - 1u, pos - 1u);
|
||||
}
|
||||
fn get_mark_chpos() -> uint { ret mark_chpos; }
|
||||
fn get_chpos() -> uint { ret chpos; }
|
||||
fn get_byte_pos() -> uint { ret pos; }
|
||||
fn curr() -> char { ret ch; }
|
||||
@ -90,9 +83,8 @@ fn new_reader(&codemap::codemap cm, str src, codemap::filemap filemap,
|
||||
}
|
||||
let str[] strs = ~[];
|
||||
auto rd =
|
||||
reader(cm, src, str::byte_len(src), 0u, 0u, -1 as char, 0u,
|
||||
filemap.start_pos.ch, filemap.start_pos.ch, strs, filemap,
|
||||
itr);
|
||||
reader(cm, src, str::byte_len(src), 0u, 0u, -1 as char,
|
||||
filemap.start_pos.ch, strs, filemap, itr);
|
||||
rd.init();
|
||||
ret rd;
|
||||
}
|
||||
@ -346,11 +338,17 @@ fn scan_numeric_escape(&reader rdr, uint n_hex_digits) -> char {
|
||||
ret accum_int as char;
|
||||
}
|
||||
|
||||
fn next_token(&reader rdr) -> token::token {
|
||||
auto accum_str = "";
|
||||
fn next_token(&reader rdr) -> tup(token::token, uint, uint) {
|
||||
consume_whitespace_and_comments(rdr);
|
||||
if (rdr.is_eof()) { ret token::EOF; }
|
||||
rdr.mark();
|
||||
auto start_chpos = rdr.get_chpos();
|
||||
auto start_bpos = rdr.get_byte_pos();
|
||||
auto tok = if rdr.is_eof() { token::EOF }
|
||||
else { next_token_inner(rdr) };
|
||||
ret tup(tok, start_chpos, start_bpos);
|
||||
}
|
||||
|
||||
fn next_token_inner(&reader rdr) -> token::token {
|
||||
auto accum_str = "";
|
||||
auto c = rdr.curr();
|
||||
if (is_alpha(c) || c == '_') {
|
||||
while (is_alnum(c) || c == '_') {
|
||||
@ -762,11 +760,10 @@ fn gather_comments_and_literals(&codemap::codemap cm, str path)
|
||||
break;
|
||||
}
|
||||
auto tok = next_token(rdr);
|
||||
if (is_lit(tok)) {
|
||||
literals += ~[rec(lit=rdr.get_mark_str(),
|
||||
pos=rdr.get_mark_chpos())];
|
||||
if (is_lit(tok._0)) {
|
||||
literals += ~[rec(lit=rdr.get_str_from(tok._2), pos=tok._1)];
|
||||
}
|
||||
log "tok: " + token::to_str(rdr, tok);
|
||||
log "tok: " + token::to_str(rdr, tok._0);
|
||||
first_read = false;
|
||||
}
|
||||
ret rec(cmnts=comments, lits=literals);
|
||||
|
@ -90,8 +90,9 @@ fn new_parser(parse_sess sess, ast::crate_cfg cfg, lexer::reader rdr,
|
||||
// + ":" + common::istr(lo.line as int);
|
||||
|
||||
last_lo = lo;
|
||||
tok = lexer::next_token(rdr);
|
||||
lo = rdr.get_mark_chpos();
|
||||
auto next = lexer::next_token(rdr);
|
||||
tok = next._0;
|
||||
lo = next._1;
|
||||
hi = rdr.get_chpos();
|
||||
}
|
||||
fn fatal(str m) -> ! {
|
||||
@ -122,11 +123,9 @@ fn new_parser(parse_sess sess, ast::crate_cfg cfg, lexer::reader rdr,
|
||||
fn get_sess() -> parse_sess { ret sess; }
|
||||
}
|
||||
|
||||
// Make sure npos points at first actual token:
|
||||
lexer::consume_whitespace_and_comments(rdr);
|
||||
auto npos = rdr.get_chpos();
|
||||
ret stdio_parser(sess, cfg, ftype, lexer::next_token(rdr),
|
||||
npos, npos, npos, UNRESTRICTED, rdr,
|
||||
auto tok0 = lexer::next_token(rdr);
|
||||
ret stdio_parser(sess, cfg, ftype, tok0._0,
|
||||
tok0._1, tok0._1, tok0._1, UNRESTRICTED, rdr,
|
||||
prec_table(), bad_expr_word_table());
|
||||
}
|
||||
|
||||
@ -693,13 +692,13 @@ fn parse_path(&parser p) -> ast::path {
|
||||
case (token::IDENT(?i, _)) {
|
||||
hi = p.get_hi_pos();
|
||||
ids += ~[p.get_str(i)];
|
||||
hi = p.get_hi_pos();
|
||||
p.bump();
|
||||
if (p.peek() == token::MOD_SEP) { p.bump(); } else { break; }
|
||||
}
|
||||
case (_) { break; }
|
||||
}
|
||||
}
|
||||
hi = p.get_hi_pos();
|
||||
ret spanned(lo, hi, rec(global=global, idents=ids, types=~[]));
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user