Lexer: Avoid unnecessary allocations
This commit is contained in:
parent
96798f5e05
commit
de1df3608b
|
@ -13,7 +13,7 @@ use core::prelude::*;
|
|||
use ast;
|
||||
use codemap::{BytePos, CharPos, CodeMap, Pos};
|
||||
use diagnostic;
|
||||
use parse::lexer::{is_whitespace, get_str_from, reader};
|
||||
use parse::lexer::{is_whitespace, with_str_from, reader};
|
||||
use parse::lexer::{StringReader, bump, is_eof, nextch, TokenAndSpan};
|
||||
use parse::lexer::{is_line_non_doc_comment, is_block_non_doc_comment};
|
||||
use parse::lexer;
|
||||
|
@ -352,9 +352,10 @@ pub fn gather_comments_and_literals(span_diagnostic:
|
|||
//discard, and look ahead; we're working with internal state
|
||||
let TokenAndSpan {tok: tok, sp: sp} = rdr.peek();
|
||||
if token::is_lit(&tok) {
|
||||
let s = get_str_from(rdr, bstart);
|
||||
debug!("tok lit: %s", s);
|
||||
literals.push(lit {lit: s, pos: sp.lo});
|
||||
do with_str_from(rdr, bstart) |s| {
|
||||
debug!("tok lit: %s", s);
|
||||
literals.push(lit {lit: s.to_owned(), pos: sp.lo});
|
||||
}
|
||||
} else {
|
||||
debug!("tok: %s", token::to_str(get_ident_interner(), &tok));
|
||||
}
|
||||
|
|
|
@ -165,9 +165,10 @@ fn byte_offset(rdr: &StringReader, pos: BytePos) -> BytePos {
|
|||
(pos - rdr.filemap.start_pos)
|
||||
}
|
||||
|
||||
pub fn get_str_from(rdr: @mut StringReader, start: BytePos) -> ~str {
|
||||
return str::slice(*rdr.src, start.to_uint(),
|
||||
byte_offset(rdr, rdr.last_pos).to_uint()).to_owned();
|
||||
pub fn with_str_from<T>(rdr: @mut StringReader, start: BytePos, f: &fn(s: &str) -> T) -> T {
|
||||
f(rdr.src.slice(
|
||||
byte_offset(rdr, start).to_uint(),
|
||||
byte_offset(rdr, rdr.last_pos).to_uint()))
|
||||
}
|
||||
|
||||
// EFFECT: advance the StringReader by one character. If a newline is
|
||||
|
@ -259,18 +260,24 @@ fn consume_any_line_comment(rdr: @mut StringReader)
|
|||
bump(rdr);
|
||||
// line comments starting with "///" or "//!" are doc-comments
|
||||
if rdr.curr == '/' || rdr.curr == '!' {
|
||||
let start_bpos = rdr.pos - BytePos(2u);
|
||||
let mut acc = ~"//";
|
||||
let start_bpos = rdr.pos - BytePos(3u);
|
||||
while rdr.curr != '\n' && !is_eof(rdr) {
|
||||
str::push_char(&mut acc, rdr.curr);
|
||||
bump(rdr);
|
||||
}
|
||||
// but comments with only more "/"s are not
|
||||
if !is_line_non_doc_comment(acc) {
|
||||
return Some(TokenAndSpan{
|
||||
tok: token::DOC_COMMENT(str_to_ident(acc)),
|
||||
sp: codemap::mk_sp(start_bpos, rdr.pos)
|
||||
});
|
||||
let ret = do with_str_from(rdr, start_bpos) |string| {
|
||||
// but comments with only more "/"s are not
|
||||
if !is_line_non_doc_comment(string) {
|
||||
Some(TokenAndSpan{
|
||||
tok: token::DOC_COMMENT(str_to_ident(string)),
|
||||
sp: codemap::mk_sp(start_bpos, rdr.pos)
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
if ret.is_some() {
|
||||
return ret;
|
||||
}
|
||||
} else {
|
||||
while rdr.curr != '\n' && !is_eof(rdr) { bump(rdr); }
|
||||
|
@ -306,25 +313,26 @@ pub fn is_block_non_doc_comment(s: &str) -> bool {
|
|||
fn consume_block_comment(rdr: @mut StringReader)
|
||||
-> Option<TokenAndSpan> {
|
||||
// block comments starting with "/**" or "/*!" are doc-comments
|
||||
if rdr.curr == '*' || rdr.curr == '!' {
|
||||
let start_bpos = rdr.pos - BytePos(2u);
|
||||
let mut acc = ~"/*";
|
||||
let res = if rdr.curr == '*' || rdr.curr == '!' {
|
||||
let start_bpos = rdr.pos - BytePos(3u);
|
||||
while !(rdr.curr == '*' && nextch(rdr) == '/') && !is_eof(rdr) {
|
||||
str::push_char(&mut acc, rdr.curr);
|
||||
bump(rdr);
|
||||
}
|
||||
if is_eof(rdr) {
|
||||
rdr.fatal(~"unterminated block doc-comment");
|
||||
} else {
|
||||
acc += "*/";
|
||||
bump(rdr);
|
||||
bump(rdr);
|
||||
// but comments with only "*"s between two "/"s are not
|
||||
if !is_block_non_doc_comment(acc) {
|
||||
return Some(TokenAndSpan{
|
||||
tok: token::DOC_COMMENT(str_to_ident(acc)),
|
||||
sp: codemap::mk_sp(start_bpos, rdr.pos)
|
||||
});
|
||||
do with_str_from(rdr, start_bpos) |string| {
|
||||
// but comments with only "*"s between two "/"s are not
|
||||
if !is_block_non_doc_comment(string) {
|
||||
Some(TokenAndSpan{
|
||||
tok: token::DOC_COMMENT(str_to_ident(string)),
|
||||
sp: codemap::mk_sp(start_bpos, rdr.pos)
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -338,10 +346,11 @@ fn consume_block_comment(rdr: @mut StringReader)
|
|||
bump(rdr);
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
};
|
||||
// restart whitespace munch.
|
||||
|
||||
return consume_whitespace_and_comments(rdr);
|
||||
if res.is_some() { res } else { consume_whitespace_and_comments(rdr) }
|
||||
}
|
||||
|
||||
fn scan_exponent(rdr: @mut StringReader) -> Option<~str> {
|
||||
|
@ -540,17 +549,21 @@ fn ident_continue(c: char) -> bool {
|
|||
fn next_token_inner(rdr: @mut StringReader) -> token::Token {
|
||||
let mut c = rdr.curr;
|
||||
if ident_start(c) {
|
||||
let start = byte_offset(rdr, rdr.last_pos);
|
||||
let start = rdr.last_pos;
|
||||
while ident_continue(rdr.curr) {
|
||||
bump(rdr);
|
||||
}
|
||||
let string = get_str_from(rdr, start);
|
||||
|
||||
if "_" == string { return token::UNDERSCORE; }
|
||||
let is_mod_name = rdr.curr == ':' && nextch(rdr) == ':';
|
||||
return do with_str_from(rdr, start) |string| {
|
||||
if string == "_" {
|
||||
token::UNDERSCORE
|
||||
} else {
|
||||
let is_mod_name = rdr.curr == ':' && nextch(rdr) == ':';
|
||||
|
||||
// FIXME: perform NFKC normalization here. (Issue #2253)
|
||||
return token::IDENT(str_to_ident(string), is_mod_name);
|
||||
// FIXME: perform NFKC normalization here. (Issue #2253)
|
||||
token::IDENT(str_to_ident(string), is_mod_name)
|
||||
}
|
||||
}
|
||||
}
|
||||
if is_dec_digit(c) {
|
||||
return scan_number(c, rdr);
|
||||
|
@ -648,19 +661,19 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
|
|||
'\'' => {
|
||||
// Either a character constant 'a' OR a lifetime name 'abc
|
||||
bump(rdr);
|
||||
let start = rdr.last_pos;
|
||||
let mut c2 = rdr.curr;
|
||||
bump(rdr);
|
||||
|
||||
// If the character is an ident start not followed by another single
|
||||
// quote, then this is a lifetime name:
|
||||
if ident_start(c2) && rdr.curr != '\'' {
|
||||
let mut lifetime_name = ~"";
|
||||
lifetime_name.push_char(c2);
|
||||
while ident_continue(rdr.curr) {
|
||||
lifetime_name.push_char(rdr.curr);
|
||||
bump(rdr);
|
||||
}
|
||||
return token::LIFETIME(str_to_ident(lifetime_name));
|
||||
return do with_str_from(rdr, start) |lifetime_name| {
|
||||
token::LIFETIME(str_to_ident(lifetime_name))
|
||||
}
|
||||
}
|
||||
|
||||
// Otherwise it is a character constant:
|
||||
|
@ -691,12 +704,13 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
|
|||
}
|
||||
'"' => {
|
||||
let mut accum_str = ~"";
|
||||
let n = byte_offset(rdr, rdr.last_pos);
|
||||
let n = rdr.last_pos;
|
||||
bump(rdr);
|
||||
while rdr.curr != '"' {
|
||||
if is_eof(rdr) {
|
||||
rdr.fatal(fmt!("unterminated double quote string: %s",
|
||||
get_str_from(rdr, n)));
|
||||
do with_str_from(rdr, n) |s| {
|
||||
rdr.fatal(fmt!("unterminated double quote string: %s", s));
|
||||
}
|
||||
}
|
||||
|
||||
let ch = rdr.curr;
|
||||
|
|
Loading…
Reference in New Issue