Lexer: Avoid unnecessary allocations

This commit is contained in:
Björn Steinbrink 2013-06-08 02:26:52 +02:00
parent 96798f5e05
commit de1df3608b
2 changed files with 57 additions and 42 deletions

View File

@ -13,7 +13,7 @@ use core::prelude::*;
use ast;
use codemap::{BytePos, CharPos, CodeMap, Pos};
use diagnostic;
use parse::lexer::{is_whitespace, get_str_from, reader};
use parse::lexer::{is_whitespace, with_str_from, reader};
use parse::lexer::{StringReader, bump, is_eof, nextch, TokenAndSpan};
use parse::lexer::{is_line_non_doc_comment, is_block_non_doc_comment};
use parse::lexer;
@ -352,9 +352,10 @@ pub fn gather_comments_and_literals(span_diagnostic:
//discard, and look ahead; we're working with internal state
let TokenAndSpan {tok: tok, sp: sp} = rdr.peek();
if token::is_lit(&tok) {
let s = get_str_from(rdr, bstart);
debug!("tok lit: %s", s);
literals.push(lit {lit: s, pos: sp.lo});
do with_str_from(rdr, bstart) |s| {
debug!("tok lit: %s", s);
literals.push(lit {lit: s.to_owned(), pos: sp.lo});
}
} else {
debug!("tok: %s", token::to_str(get_ident_interner(), &tok));
}

View File

@ -165,9 +165,10 @@ fn byte_offset(rdr: &StringReader, pos: BytePos) -> BytePos {
(pos - rdr.filemap.start_pos)
}
pub fn get_str_from(rdr: @mut StringReader, start: BytePos) -> ~str {
return str::slice(*rdr.src, start.to_uint(),
byte_offset(rdr, rdr.last_pos).to_uint()).to_owned();
pub fn with_str_from<T>(rdr: @mut StringReader, start: BytePos, f: &fn(s: &str) -> T) -> T {
f(rdr.src.slice(
byte_offset(rdr, start).to_uint(),
byte_offset(rdr, rdr.last_pos).to_uint()))
}
// EFFECT: advance the StringReader by one character. If a newline is
@ -259,18 +260,24 @@ fn consume_any_line_comment(rdr: @mut StringReader)
bump(rdr);
// line comments starting with "///" or "//!" are doc-comments
if rdr.curr == '/' || rdr.curr == '!' {
let start_bpos = rdr.pos - BytePos(2u);
let mut acc = ~"//";
let start_bpos = rdr.pos - BytePos(3u);
while rdr.curr != '\n' && !is_eof(rdr) {
str::push_char(&mut acc, rdr.curr);
bump(rdr);
}
// but comments with only more "/"s are not
if !is_line_non_doc_comment(acc) {
return Some(TokenAndSpan{
tok: token::DOC_COMMENT(str_to_ident(acc)),
sp: codemap::mk_sp(start_bpos, rdr.pos)
});
let ret = do with_str_from(rdr, start_bpos) |string| {
// but comments with only more "/"s are not
if !is_line_non_doc_comment(string) {
Some(TokenAndSpan{
tok: token::DOC_COMMENT(str_to_ident(string)),
sp: codemap::mk_sp(start_bpos, rdr.pos)
})
} else {
None
}
};
if ret.is_some() {
return ret;
}
} else {
while rdr.curr != '\n' && !is_eof(rdr) { bump(rdr); }
@ -306,25 +313,26 @@ pub fn is_block_non_doc_comment(s: &str) -> bool {
fn consume_block_comment(rdr: @mut StringReader)
-> Option<TokenAndSpan> {
// block comments starting with "/**" or "/*!" are doc-comments
if rdr.curr == '*' || rdr.curr == '!' {
let start_bpos = rdr.pos - BytePos(2u);
let mut acc = ~"/*";
let res = if rdr.curr == '*' || rdr.curr == '!' {
let start_bpos = rdr.pos - BytePos(3u);
while !(rdr.curr == '*' && nextch(rdr) == '/') && !is_eof(rdr) {
str::push_char(&mut acc, rdr.curr);
bump(rdr);
}
if is_eof(rdr) {
rdr.fatal(~"unterminated block doc-comment");
} else {
acc += "*/";
bump(rdr);
bump(rdr);
// but comments with only "*"s between two "/"s are not
if !is_block_non_doc_comment(acc) {
return Some(TokenAndSpan{
tok: token::DOC_COMMENT(str_to_ident(acc)),
sp: codemap::mk_sp(start_bpos, rdr.pos)
});
do with_str_from(rdr, start_bpos) |string| {
// but comments with only "*"s between two "/"s are not
if !is_block_non_doc_comment(string) {
Some(TokenAndSpan{
tok: token::DOC_COMMENT(str_to_ident(string)),
sp: codemap::mk_sp(start_bpos, rdr.pos)
})
} else {
None
}
}
}
} else {
@ -338,10 +346,11 @@ fn consume_block_comment(rdr: @mut StringReader)
bump(rdr);
}
}
}
None
};
// restart whitespace munch.
return consume_whitespace_and_comments(rdr);
if res.is_some() { res } else { consume_whitespace_and_comments(rdr) }
}
fn scan_exponent(rdr: @mut StringReader) -> Option<~str> {
@ -540,17 +549,21 @@ fn ident_continue(c: char) -> bool {
fn next_token_inner(rdr: @mut StringReader) -> token::Token {
let mut c = rdr.curr;
if ident_start(c) {
let start = byte_offset(rdr, rdr.last_pos);
let start = rdr.last_pos;
while ident_continue(rdr.curr) {
bump(rdr);
}
let string = get_str_from(rdr, start);
if "_" == string { return token::UNDERSCORE; }
let is_mod_name = rdr.curr == ':' && nextch(rdr) == ':';
return do with_str_from(rdr, start) |string| {
if string == "_" {
token::UNDERSCORE
} else {
let is_mod_name = rdr.curr == ':' && nextch(rdr) == ':';
// FIXME: perform NFKC normalization here. (Issue #2253)
return token::IDENT(str_to_ident(string), is_mod_name);
// FIXME: perform NFKC normalization here. (Issue #2253)
token::IDENT(str_to_ident(string), is_mod_name)
}
}
}
if is_dec_digit(c) {
return scan_number(c, rdr);
@ -648,19 +661,19 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
'\'' => {
// Either a character constant 'a' OR a lifetime name 'abc
bump(rdr);
let start = rdr.last_pos;
let mut c2 = rdr.curr;
bump(rdr);
// If the character is an ident start not followed by another single
// quote, then this is a lifetime name:
if ident_start(c2) && rdr.curr != '\'' {
let mut lifetime_name = ~"";
lifetime_name.push_char(c2);
while ident_continue(rdr.curr) {
lifetime_name.push_char(rdr.curr);
bump(rdr);
}
return token::LIFETIME(str_to_ident(lifetime_name));
return do with_str_from(rdr, start) |lifetime_name| {
token::LIFETIME(str_to_ident(lifetime_name))
}
}
// Otherwise it is a character constant:
@ -691,12 +704,13 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
}
'"' => {
let mut accum_str = ~"";
let n = byte_offset(rdr, rdr.last_pos);
let n = rdr.last_pos;
bump(rdr);
while rdr.curr != '"' {
if is_eof(rdr) {
rdr.fatal(fmt!("unterminated double quote string: %s",
get_str_from(rdr, n)));
do with_str_from(rdr, n) |s| {
rdr.fatal(fmt!("unterminated double quote string: %s", s));
}
}
let ch = rdr.curr;