syntax: methodify the lexer

2014-05-21 16:57:31 -07:00 · 2014-05-21 16:57:31 -07:00 · 46d1af28b5
parent 5343eb7e0c
commit 46d1af28b5
8 changed files with 1195 additions and 1187 deletions
--- a/src/librustdoc/html/highlight.rs
+++ b/src/librustdoc/html/highlight.rs
@ -34,7 +34,7 @@ pub fn highlight(src: &str, class: Option<&str>) -> String {

    let mut out = io::MemWriter::new();
    doit(&sess,
-         lexer::new_string_reader(&sess.span_diagnostic, fm),
+         lexer::StringReader::new(&sess.span_diagnostic, fm),
         class,
         &mut out).unwrap();
    str::from_utf8_lossy(out.unwrap().as_slice()).to_string()
--- a/src/libsyntax/attr.rs
+++ b/src/libsyntax/attr.rs
@ -15,7 +15,7 @@ use ast::{AttrId, Attribute, Attribute_, MetaItem, MetaWord, MetaNameValue, Meta
 use codemap::{Span, Spanned, spanned, dummy_spanned};
 use codemap::BytePos;
 use diagnostic::SpanHandler;
-use parse::comments::{doc_comment_style, strip_doc_comment_decoration};
+use parse::lexer::comments::{doc_comment_style, strip_doc_comment_decoration};
 use parse::token::InternedString;
 use parse::token;
 use crateid::CrateId;
--- a/src/libsyntax/parse/lexer.rs
+++ b/src/libsyntax/parse/lexer.rs
--- a/src/libsyntax/parse/lexer/comments.rs
+++ b/src/libsyntax/parse/lexer/comments.rs
@ -11,8 +11,8 @@
 use ast;
 use codemap::{BytePos, CharPos, CodeMap, Pos};
 use diagnostic;
-use parse::lexer::{is_whitespace, with_str_from, Reader};
-use parse::lexer::{StringReader, bump, is_eof, nextch_is, TokenAndSpan};
+use parse::lexer::{is_whitespace, Reader};
+use parse::lexer::{StringReader, TokenAndSpan};
 use parse::lexer::{is_line_non_doc_comment, is_block_non_doc_comment};
 use parse::lexer;
 use parse::token;
@ -141,31 +141,6 @@ pub fn strip_doc_comment_decoration(comment: &str) -> String {
    fail!("not a doc-comment: {}", comment);
 }

-fn read_to_eol(rdr: &mut StringReader) -> String {
-    let mut val = String::new();
-    while !rdr.curr_is('\n') && !is_eof(rdr) {
-        val.push_char(rdr.curr.unwrap());
-        bump(rdr);
-    }
-    if rdr.curr_is('\n') { bump(rdr); }
-    return val
-}
-
-fn read_one_line_comment(rdr: &mut StringReader) -> String {
-    let val = read_to_eol(rdr);
-    assert!((val.as_slice()[0] == '/' as u8 &&
-                val.as_slice()[1] == '/' as u8) ||
-                (val.as_slice()[0] == '#' as u8 &&
-                 val.as_slice()[1] == '!' as u8));
-    return val;
-}
-
-fn consume_non_eol_whitespace(rdr: &mut StringReader) {
-    while is_whitespace(rdr.curr) && !rdr.curr_is('\n') && !is_eof(rdr) {
-        bump(rdr);
-    }
-}
-
 fn push_blank_line_comment(rdr: &StringReader, comments: &mut Vec<Comment>) {
    debug!(">>> blank-line comment");
    comments.push(Comment {
@ -177,11 +152,11 @@ fn push_blank_line_comment(rdr: &StringReader, comments: &mut Vec<Comment>) {

 fn consume_whitespace_counting_blank_lines(rdr: &mut StringReader,
                                           comments: &mut Vec<Comment>) {
-    while is_whitespace(rdr.curr) && !is_eof(rdr) {
+    while is_whitespace(rdr.curr) && !rdr.is_eof() {
        if rdr.col == CharPos(0u) && rdr.curr_is('\n') {
            push_blank_line_comment(rdr, &mut *comments);
        }
-        bump(rdr);
+        rdr.bump();
    }
 }

@ -193,7 +168,7 @@ fn read_shebang_comment(rdr: &mut StringReader, code_to_the_left: bool,
    debug!("<<< shebang comment");
    comments.push(Comment {
        style: if code_to_the_left { Trailing } else { Isolated },
-        lines: vec!(read_one_line_comment(rdr)),
+        lines: vec!(rdr.read_one_line_comment()),
        pos: p
    });
 }
@ -203,15 +178,15 @@ fn read_line_comments(rdr: &mut StringReader, code_to_the_left: bool,
    debug!(">>> line comments");
    let p = rdr.last_pos;
    let mut lines: Vec<String> = Vec::new();
-    while rdr.curr_is('/') && nextch_is(rdr, '/') {
-        let line = read_one_line_comment(rdr);
+    while rdr.curr_is('/') && rdr.nextch_is('/') {
+        let line = rdr.read_one_line_comment();
        debug!("{}", line);
        // Doc comments are not put in comments.
        if is_doc_comment(line.as_slice()) {
            break;
        }
        lines.push(line);
-        consume_non_eol_whitespace(rdr);
+        rdr.consume_non_eol_whitespace();
    }
    debug!("<<< line comments");
    if !lines.is_empty() {
@ -265,21 +240,21 @@ fn read_block_comment(rdr: &mut StringReader,
    let p = rdr.last_pos;
    let mut lines: Vec<String> = Vec::new();
    let col = rdr.col;
-    bump(rdr);
-    bump(rdr);
+    rdr.bump();
+    rdr.bump();

    let mut curr_line = String::from_str("/*");

    // doc-comments are not really comments, they are attributes
-    if (rdr.curr_is('*') && !nextch_is(rdr, '*')) || rdr.curr_is('!') {
-        while !(rdr.curr_is('*') && nextch_is(rdr, '/')) && !is_eof(rdr) {
+    if (rdr.curr_is('*') && !rdr.nextch_is('*')) || rdr.curr_is('!') {
+        while !(rdr.curr_is('*') && rdr.nextch_is('/')) && !rdr.is_eof() {
            curr_line.push_char(rdr.curr.unwrap());
-            bump(rdr);
+            rdr.bump();
        }
-        if !is_eof(rdr) {
+        if !rdr.is_eof() {
            curr_line.push_str("*/");
-            bump(rdr);
-            bump(rdr);
+            rdr.bump();
+            rdr.bump();
        }
        if !is_block_non_doc_comment(curr_line.as_slice()) {
            return
@ -290,7 +265,7 @@ fn read_block_comment(rdr: &mut StringReader,
        let mut level: int = 1;
        while level > 0 {
            debug!("=== block comment level {}", level);
-            if is_eof(rdr) {
+            if rdr.is_eof() {
                rdr.fatal("unterminated block comment");
            }
            if rdr.curr_is('\n') {
@ -298,21 +273,21 @@ fn read_block_comment(rdr: &mut StringReader,
                                                     curr_line,
                                                     col);
                curr_line = String::new();
-                bump(rdr);
+                rdr.bump();
            } else {
                curr_line.push_char(rdr.curr.unwrap());
-                if rdr.curr_is('/') && nextch_is(rdr, '*') {
-                    bump(rdr);
-                    bump(rdr);
+                if rdr.curr_is('/') && rdr.nextch_is('*') {
+                    rdr.bump();
+                    rdr.bump();
                    curr_line.push_char('*');
                    level += 1;
                } else {
-                    if rdr.curr_is('*') && nextch_is(rdr, '/') {
-                        bump(rdr);
-                        bump(rdr);
+                    if rdr.curr_is('*') && rdr.nextch_is('/') {
+                        rdr.bump();
+                        rdr.bump();
                        curr_line.push_char('/');
                        level -= 1;
-                    } else { bump(rdr); }
+                    } else { rdr.bump(); }
                }
            }
        }
@ -324,31 +299,24 @@ fn read_block_comment(rdr: &mut StringReader,
    }

    let mut style = if code_to_the_left { Trailing } else { Isolated };
-    consume_non_eol_whitespace(rdr);
-    if !is_eof(rdr) && !rdr.curr_is('\n') && lines.len() == 1u {
+    rdr.consume_non_eol_whitespace();
+    if !rdr.is_eof() && !rdr.curr_is('\n') && lines.len() == 1u {
        style = Mixed;
    }
    debug!("<<< block comment");
    comments.push(Comment {style: style, lines: lines, pos: p});
 }

-fn peeking_at_comment(rdr: &StringReader) -> bool {
-    return (rdr.curr_is('/') && nextch_is(rdr, '/')) ||
-         (rdr.curr_is('/') && nextch_is(rdr, '*')) ||
-         // consider shebangs comments, but not inner attributes
-         (rdr.curr_is('#') && nextch_is(rdr, '!') &&
-          !lexer::nextnextch_is(rdr, '['));
-}

 fn consume_comment(rdr: &mut StringReader,
                   code_to_the_left: bool,
                   comments: &mut Vec<Comment> ) {
    debug!(">>> consume comment");
-    if rdr.curr_is('/') && nextch_is(rdr, '/') {
+    if rdr.curr_is('/') && rdr.nextch_is('/') {
        read_line_comments(rdr, code_to_the_left, comments);
-    } else if rdr.curr_is('/') && nextch_is(rdr, '*') {
+    } else if rdr.curr_is('/') && rdr.nextch_is('*') {
        read_block_comment(rdr, code_to_the_left, comments);
-    } else if rdr.curr_is('#') && nextch_is(rdr, '!') {
+    } else if rdr.curr_is('#') && rdr.nextch_is('!') {
        read_shebang_comment(rdr, code_to_the_left, comments);
    } else { fail!(); }
    debug!("<<< consume comment");
@ -362,8 +330,7 @@ pub struct Literal {

 // it appears this function is called only from pprust... that's
 // probably not a good thing.
-pub fn gather_comments_and_literals(span_diagnostic:
-                                        &diagnostic::SpanHandler,
+pub fn gather_comments_and_literals(span_diagnostic: &diagnostic::SpanHandler,
                                    path: String,
                                    srdr: &mut io::Reader)
                                 -> (Vec<Comment>, Vec<Literal>) {
@ -371,20 +338,20 @@ pub fn gather_comments_and_literals(span_diagnostic:
    let src = str::from_utf8(src.as_slice()).unwrap().to_string();
    let cm = CodeMap::new();
    let filemap = cm.new_filemap(path, src);
-    let mut rdr = lexer::new_low_level_string_reader(span_diagnostic, filemap);
+    let mut rdr = lexer::StringReader::new_raw(span_diagnostic, filemap);

    let mut comments: Vec<Comment> = Vec::new();
    let mut literals: Vec<Literal> = Vec::new();
    let mut first_read: bool = true;
-    while !is_eof(&rdr) {
+    while !rdr.is_eof() {
        loop {
            let mut code_to_the_left = !first_read;
-            consume_non_eol_whitespace(&mut rdr);
+            rdr.consume_non_eol_whitespace();
            if rdr.curr_is('\n') {
                code_to_the_left = false;
                consume_whitespace_counting_blank_lines(&mut rdr, &mut comments);
            }
-            while peeking_at_comment(&rdr) {
+            while rdr.peeking_at_comment() {
                consume_comment(&mut rdr, code_to_the_left, &mut comments);
                consume_whitespace_counting_blank_lines(&mut rdr, &mut comments);
            }
@ -397,7 +364,7 @@ pub fn gather_comments_and_literals(span_diagnostic:
        //discard, and look ahead; we're working with internal state
        let TokenAndSpan {tok: tok, sp: sp} = rdr.peek();
        if token::is_lit(&tok) {
-            with_str_from(&rdr, bstart, |s| {
+            rdr.with_str_from(bstart, |s| {
                debug!("tok lit: {}", s);
                literals.push(Literal {lit: s.to_string(), pos: sp.lo});
            })
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
--- a/src/libsyntax/parse/mod.rs
+++ b/src/libsyntax/parse/mod.rs
@ -25,7 +25,6 @@ use std::str;
 pub mod lexer;
 pub mod parser;
 pub mod token;
-pub mod comments;
 pub mod attr;

 pub mod common;
@ -255,7 +254,7 @@ pub fn filemap_to_tts(sess: &ParseSess, filemap: Rc<FileMap>)
    // it appears to me that the cfg doesn't matter here... indeed,
    // parsing tt's probably shouldn't require a parser at all.
    let cfg = Vec::new();
-    let srdr = lexer::new_string_reader(&sess.span_diagnostic, filemap);
+    let srdr = lexer::StringReader::new(&sess.span_diagnostic, filemap);
    let mut p1 = Parser::new(sess, cfg, box srdr);
    p1.parse_all_token_trees()
 }
--- a/src/libsyntax/parse/token.rs
+++ b/src/libsyntax/parse/token.rs
@ -166,7 +166,7 @@ pub fn to_str(t: &Token) -> String {
      ANDAND => "&&".to_string(),
      BINOP(op) => binop_to_str(op).to_string(),
      BINOPEQ(op) => {
-          let mut s = binop_to_str(op).to_strbuf();
+          let mut s = binop_to_str(op).to_string();
          s.push_str("=");
          s
      }
--- a/src/libsyntax/print/pprust.rs
+++ b/src/libsyntax/print/pprust.rs
@ -20,7 +20,8 @@ use codemap;
 use diagnostic;
 use parse::classify::expr_is_simple_block;
 use parse::token::IdentInterner;
-use parse::{comments, token};
+use parse::token;
+use parse::lexer::comments;
 use parse;
 use print::pp::{break_offset, word, space, zerobreak, hardbreak};
 use print::pp::{Breaks, Consistent, Inconsistent, eof};