Auto merge of #50997 - michaelwoerister:pre-analyze-filemaps, r=Mark-Simulacrum
Make FileMap::{lines, multibyte_chars, non_narrow_chars} non-mutable. This PR removes most of the interior mutability from `FileMap`, which should be beneficial, especially in a multithreaded setting. This is achieved by initializing the state in question when the filemap is constructed instead of during lexing. Hopefully this doesn't degrade performance. cc @wesleywiser
This commit is contained in:
commit
9f79d2f86a
@ -2779,6 +2779,7 @@ name = "syntax_pos"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"arena 0.0.0",
|
||||
"cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rustc_data_structures 0.0.0",
|
||||
"scoped-tls 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serialize 0.0.0",
|
||||
|
@ -456,27 +456,21 @@ impl<'a> HashStable<StableHashingContext<'a>> for FileMap {
|
||||
src_hash.hash_stable(hcx, hasher);
|
||||
|
||||
// We only hash the relative position within this filemap
|
||||
lines.with_lock(|lines| {
|
||||
lines.len().hash_stable(hcx, hasher);
|
||||
for &line in lines.iter() {
|
||||
stable_byte_pos(line, start_pos).hash_stable(hcx, hasher);
|
||||
}
|
||||
});
|
||||
lines.len().hash_stable(hcx, hasher);
|
||||
for &line in lines.iter() {
|
||||
stable_byte_pos(line, start_pos).hash_stable(hcx, hasher);
|
||||
}
|
||||
|
||||
// We only hash the relative position within this filemap
|
||||
multibyte_chars.with_lock(|multibyte_chars| {
|
||||
multibyte_chars.len().hash_stable(hcx, hasher);
|
||||
for &char_pos in multibyte_chars.iter() {
|
||||
stable_multibyte_char(char_pos, start_pos).hash_stable(hcx, hasher);
|
||||
}
|
||||
});
|
||||
multibyte_chars.len().hash_stable(hcx, hasher);
|
||||
for &char_pos in multibyte_chars.iter() {
|
||||
stable_multibyte_char(char_pos, start_pos).hash_stable(hcx, hasher);
|
||||
}
|
||||
|
||||
non_narrow_chars.with_lock(|non_narrow_chars| {
|
||||
non_narrow_chars.len().hash_stable(hcx, hasher);
|
||||
for &char_pos in non_narrow_chars.iter() {
|
||||
stable_non_narrow_char(char_pos, start_pos).hash_stable(hcx, hasher);
|
||||
}
|
||||
});
|
||||
non_narrow_chars.len().hash_stable(hcx, hasher);
|
||||
for &char_pos in non_narrow_chars.iter() {
|
||||
stable_non_narrow_char(char_pos, start_pos).hash_stable(hcx, hasher);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -623,7 +623,7 @@ impl<'a, 'tcx, 'x> SpecializedDecoder<Span> for CacheDecoder<'a, 'tcx, 'x> {
|
||||
let len = BytePos::decode(self)?;
|
||||
|
||||
let file_lo = self.file_index_to_file(file_lo_index);
|
||||
let lo = file_lo.lines.borrow()[line_lo - 1] + col_lo;
|
||||
let lo = file_lo.lines[line_lo - 1] + col_lo;
|
||||
let hi = lo + len;
|
||||
|
||||
let expn_info_tag = u8::decode(self)?;
|
||||
|
@ -1138,9 +1138,9 @@ impl<'a, 'tcx> CrateMetadata {
|
||||
src_hash,
|
||||
start_pos,
|
||||
end_pos,
|
||||
lines,
|
||||
multibyte_chars,
|
||||
non_narrow_chars,
|
||||
mut lines,
|
||||
mut multibyte_chars,
|
||||
mut non_narrow_chars,
|
||||
name_hash,
|
||||
.. } = filemap_to_import;
|
||||
|
||||
@ -1151,15 +1151,12 @@ impl<'a, 'tcx> CrateMetadata {
|
||||
// `CodeMap::new_imported_filemap()` will then translate those
|
||||
// coordinates to their new global frame of reference when the
|
||||
// offset of the FileMap is known.
|
||||
let mut lines = lines.into_inner();
|
||||
for pos in &mut lines {
|
||||
*pos = *pos - start_pos;
|
||||
}
|
||||
let mut multibyte_chars = multibyte_chars.into_inner();
|
||||
for mbc in &mut multibyte_chars {
|
||||
mbc.pos = mbc.pos - start_pos;
|
||||
}
|
||||
let mut non_narrow_chars = non_narrow_chars.into_inner();
|
||||
for swc in &mut non_narrow_chars {
|
||||
*swc = *swc - start_pos;
|
||||
}
|
||||
|
@ -211,8 +211,7 @@ impl CodeMap {
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new filemap without setting its line information. If you don't
|
||||
/// intend to set the line information yourself, you should use new_filemap_and_lines.
|
||||
/// Creates a new filemap.
|
||||
/// This does not ensure that only one FileMap exists per file name.
|
||||
pub fn new_filemap(&self, filename: FileName, src: String) -> Lrc<FileMap> {
|
||||
let start_pos = self.next_start_pos();
|
||||
@ -247,22 +246,6 @@ impl CodeMap {
|
||||
filemap
|
||||
}
|
||||
|
||||
/// Creates a new filemap and sets its line information.
|
||||
/// This does not ensure that only one FileMap exists per file name.
|
||||
pub fn new_filemap_and_lines(&self, filename: &Path, src: &str) -> Lrc<FileMap> {
|
||||
let fm = self.new_filemap(filename.to_owned().into(), src.to_owned());
|
||||
let mut byte_pos: u32 = fm.start_pos.0;
|
||||
for line in src.lines() {
|
||||
// register the start of this line
|
||||
fm.next_line(BytePos(byte_pos));
|
||||
|
||||
// update byte_pos to include this line and the \n at the end
|
||||
byte_pos += line.len() as u32 + 1;
|
||||
}
|
||||
fm
|
||||
}
|
||||
|
||||
|
||||
/// Allocates a new FileMap representing a source file from an external
|
||||
/// crate. The source code of such an "imported filemap" is not available,
|
||||
/// but we still know enough to generate accurate debuginfo location
|
||||
@ -305,9 +288,9 @@ impl CodeMap {
|
||||
external_src: Lock::new(ExternalSource::AbsentOk),
|
||||
start_pos,
|
||||
end_pos,
|
||||
lines: Lock::new(file_local_lines),
|
||||
multibyte_chars: Lock::new(file_local_multibyte_chars),
|
||||
non_narrow_chars: Lock::new(file_local_non_narrow_chars),
|
||||
lines: file_local_lines,
|
||||
multibyte_chars: file_local_multibyte_chars,
|
||||
non_narrow_chars: file_local_non_narrow_chars,
|
||||
name_hash,
|
||||
});
|
||||
|
||||
@ -345,21 +328,22 @@ impl CodeMap {
|
||||
match self.lookup_line(pos) {
|
||||
Ok(FileMapAndLine { fm: f, line: a }) => {
|
||||
let line = a + 1; // Line numbers start at 1
|
||||
let linebpos = (*f.lines.borrow())[a];
|
||||
let linebpos = f.lines[a];
|
||||
let linechpos = self.bytepos_to_file_charpos(linebpos);
|
||||
let col = chpos - linechpos;
|
||||
|
||||
let col_display = {
|
||||
let non_narrow_chars = f.non_narrow_chars.borrow();
|
||||
let start_width_idx = non_narrow_chars
|
||||
let start_width_idx = f
|
||||
.non_narrow_chars
|
||||
.binary_search_by_key(&linebpos, |x| x.pos())
|
||||
.unwrap_or_else(|x| x);
|
||||
let end_width_idx = non_narrow_chars
|
||||
let end_width_idx = f
|
||||
.non_narrow_chars
|
||||
.binary_search_by_key(&pos, |x| x.pos())
|
||||
.unwrap_or_else(|x| x);
|
||||
let special_chars = end_width_idx - start_width_idx;
|
||||
let non_narrow: usize =
|
||||
non_narrow_chars[start_width_idx..end_width_idx]
|
||||
let non_narrow: usize = f
|
||||
.non_narrow_chars[start_width_idx..end_width_idx]
|
||||
.into_iter()
|
||||
.map(|x| x.width())
|
||||
.sum();
|
||||
@ -380,12 +364,12 @@ impl CodeMap {
|
||||
}
|
||||
Err(f) => {
|
||||
let col_display = {
|
||||
let non_narrow_chars = f.non_narrow_chars.borrow();
|
||||
let end_width_idx = non_narrow_chars
|
||||
let end_width_idx = f
|
||||
.non_narrow_chars
|
||||
.binary_search_by_key(&pos, |x| x.pos())
|
||||
.unwrap_or_else(|x| x);
|
||||
let non_narrow: usize =
|
||||
non_narrow_chars[0..end_width_idx]
|
||||
let non_narrow: usize = f
|
||||
.non_narrow_chars[0..end_width_idx]
|
||||
.into_iter()
|
||||
.map(|x| x.width())
|
||||
.sum();
|
||||
@ -830,22 +814,22 @@ impl CodeMap {
|
||||
// The number of extra bytes due to multibyte chars in the FileMap
|
||||
let mut total_extra_bytes = 0;
|
||||
|
||||
for mbc in map.multibyte_chars.borrow().iter() {
|
||||
for mbc in map.multibyte_chars.iter() {
|
||||
debug!("{}-byte char at {:?}", mbc.bytes, mbc.pos);
|
||||
if mbc.pos < bpos {
|
||||
// every character is at least one byte, so we only
|
||||
// count the actual extra bytes.
|
||||
total_extra_bytes += mbc.bytes - 1;
|
||||
total_extra_bytes += mbc.bytes as u32 - 1;
|
||||
// We should never see a byte position in the middle of a
|
||||
// character
|
||||
assert!(bpos.to_usize() >= mbc.pos.to_usize() + mbc.bytes);
|
||||
assert!(bpos.to_u32() >= mbc.pos.to_u32() + mbc.bytes as u32);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assert!(map.start_pos.to_usize() + total_extra_bytes <= bpos.to_usize());
|
||||
CharPos(bpos.to_usize() - map.start_pos.to_usize() - total_extra_bytes)
|
||||
assert!(map.start_pos.to_u32() + total_extra_bytes <= bpos.to_u32());
|
||||
CharPos(bpos.to_usize() - map.start_pos.to_usize() - total_extra_bytes as usize)
|
||||
}
|
||||
|
||||
// Return the index of the filemap (in self.files) which contains pos.
|
||||
@ -1028,51 +1012,16 @@ impl FilePathMapping {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::borrow::Cow;
|
||||
use rustc_data_structures::sync::Lrc;
|
||||
|
||||
#[test]
|
||||
fn t1 () {
|
||||
let cm = CodeMap::new(FilePathMapping::empty());
|
||||
let fm = cm.new_filemap(PathBuf::from("blork.rs").into(),
|
||||
"first line.\nsecond line".to_string());
|
||||
fm.next_line(BytePos(0));
|
||||
// Test we can get lines with partial line info.
|
||||
assert_eq!(fm.get_line(0), Some(Cow::from("first line.")));
|
||||
// TESTING BROKEN BEHAVIOR: line break declared before actual line break.
|
||||
fm.next_line(BytePos(10));
|
||||
assert_eq!(fm.get_line(1), Some(Cow::from(".")));
|
||||
fm.next_line(BytePos(12));
|
||||
assert_eq!(fm.get_line(2), Some(Cow::from("second line")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn t2 () {
|
||||
let cm = CodeMap::new(FilePathMapping::empty());
|
||||
let fm = cm.new_filemap(PathBuf::from("blork.rs").into(),
|
||||
"first line.\nsecond line".to_string());
|
||||
// TESTING *REALLY* BROKEN BEHAVIOR:
|
||||
fm.next_line(BytePos(0));
|
||||
fm.next_line(BytePos(10));
|
||||
fm.next_line(BytePos(2));
|
||||
}
|
||||
|
||||
fn init_code_map() -> CodeMap {
|
||||
let cm = CodeMap::new(FilePathMapping::empty());
|
||||
let fm1 = cm.new_filemap(PathBuf::from("blork.rs").into(),
|
||||
"first line.\nsecond line".to_string());
|
||||
let fm2 = cm.new_filemap(PathBuf::from("empty.rs").into(),
|
||||
"".to_string());
|
||||
let fm3 = cm.new_filemap(PathBuf::from("blork2.rs").into(),
|
||||
"first line.\nsecond line".to_string());
|
||||
|
||||
fm1.next_line(BytePos(0));
|
||||
fm1.next_line(BytePos(12));
|
||||
fm2.next_line(fm2.start_pos);
|
||||
fm3.next_line(fm3.start_pos);
|
||||
fm3.next_line(fm3.start_pos + BytePos(12));
|
||||
|
||||
cm.new_filemap(PathBuf::from("blork.rs").into(),
|
||||
"first line.\nsecond line".to_string());
|
||||
cm.new_filemap(PathBuf::from("empty.rs").into(),
|
||||
"".to_string());
|
||||
cm.new_filemap(PathBuf::from("blork2.rs").into(),
|
||||
"first line.\nsecond line".to_string());
|
||||
cm
|
||||
}
|
||||
|
||||
@ -1125,26 +1074,10 @@ mod tests {
|
||||
fn init_code_map_mbc() -> CodeMap {
|
||||
let cm = CodeMap::new(FilePathMapping::empty());
|
||||
// € is a three byte utf8 char.
|
||||
let fm1 =
|
||||
cm.new_filemap(PathBuf::from("blork.rs").into(),
|
||||
"fir€st €€€€ line.\nsecond line".to_string());
|
||||
let fm2 = cm.new_filemap(PathBuf::from("blork2.rs").into(),
|
||||
"first line€€.\n€ second line".to_string());
|
||||
|
||||
fm1.next_line(BytePos(0));
|
||||
fm1.next_line(BytePos(28));
|
||||
fm2.next_line(fm2.start_pos);
|
||||
fm2.next_line(fm2.start_pos + BytePos(20));
|
||||
|
||||
fm1.record_multibyte_char(BytePos(3), 3);
|
||||
fm1.record_multibyte_char(BytePos(9), 3);
|
||||
fm1.record_multibyte_char(BytePos(12), 3);
|
||||
fm1.record_multibyte_char(BytePos(15), 3);
|
||||
fm1.record_multibyte_char(BytePos(18), 3);
|
||||
fm2.record_multibyte_char(fm2.start_pos + BytePos(10), 3);
|
||||
fm2.record_multibyte_char(fm2.start_pos + BytePos(13), 3);
|
||||
fm2.record_multibyte_char(fm2.start_pos + BytePos(18), 3);
|
||||
|
||||
cm.new_filemap(PathBuf::from("blork.rs").into(),
|
||||
"fir€st €€€€ line.\nsecond line".to_string());
|
||||
cm.new_filemap(PathBuf::from("blork2.rs").into(),
|
||||
"first line€€.\n€ second line".to_string());
|
||||
cm
|
||||
}
|
||||
|
||||
@ -1196,7 +1129,7 @@ mod tests {
|
||||
let cm = CodeMap::new(FilePathMapping::empty());
|
||||
let inputtext = "aaaaa\nbbbbBB\nCCC\nDDDDDddddd\neee\n";
|
||||
let selection = " \n ~~\n~~~\n~~~~~ \n \n";
|
||||
cm.new_filemap_and_lines(Path::new("blork.rs"), inputtext);
|
||||
cm.new_filemap(Path::new("blork.rs").to_owned().into(), inputtext.to_string());
|
||||
let span = span_from_selection(inputtext, selection);
|
||||
|
||||
// check that we are extracting the text we thought we were extracting
|
||||
@ -1239,7 +1172,7 @@ mod tests {
|
||||
let inputtext = "bbbb BB\ncc CCC\n";
|
||||
let selection1 = " ~~\n \n";
|
||||
let selection2 = " \n ~~~\n";
|
||||
cm.new_filemap_and_lines(Path::new("blork.rs"), inputtext);
|
||||
cm.new_filemap(Path::new("blork.rs").to_owned().into(), inputtext.to_owned());
|
||||
let span1 = span_from_selection(inputtext, selection1);
|
||||
let span2 = span_from_selection(inputtext, selection2);
|
||||
|
||||
|
@ -1495,9 +1495,11 @@ impl<'a, 'b> Folder for InvocationCollector<'a, 'b> {
|
||||
|
||||
match String::from_utf8(buf) {
|
||||
Ok(src) => {
|
||||
let src_interned = Symbol::intern(&src);
|
||||
|
||||
// Add this input file to the code map to make it available as
|
||||
// dependency information
|
||||
self.cx.codemap().new_filemap_and_lines(&filename, &src);
|
||||
self.cx.codemap().new_filemap(filename.into(), src);
|
||||
|
||||
let include_info = vec![
|
||||
dummy_spanned(ast::NestedMetaItemKind::MetaItem(
|
||||
@ -1505,7 +1507,7 @@ impl<'a, 'b> Folder for InvocationCollector<'a, 'b> {
|
||||
dummy_spanned(file)))),
|
||||
dummy_spanned(ast::NestedMetaItemKind::MetaItem(
|
||||
attr::mk_name_value_item_str(Ident::from_str("contents"),
|
||||
dummy_spanned(Symbol::intern(&src))))),
|
||||
dummy_spanned(src_interned)))),
|
||||
];
|
||||
|
||||
let include_ident = Ident::from_str("include");
|
||||
|
@ -150,11 +150,13 @@ pub fn expand_include_str(cx: &mut ExtCtxt, sp: Span, tts: &[tokenstream::TokenT
|
||||
};
|
||||
match String::from_utf8(bytes) {
|
||||
Ok(src) => {
|
||||
let interned_src = Symbol::intern(&src);
|
||||
|
||||
// Add this input file to the code map to make it available as
|
||||
// dependency information
|
||||
cx.codemap().new_filemap_and_lines(&file, &src);
|
||||
cx.codemap().new_filemap(file.into(), src);
|
||||
|
||||
base::MacEager::expr(cx.expr_str(sp, Symbol::intern(&src)))
|
||||
base::MacEager::expr(cx.expr_str(sp, interned_src))
|
||||
}
|
||||
Err(_) => {
|
||||
cx.span_err(sp,
|
||||
@ -182,7 +184,7 @@ pub fn expand_include_bytes(cx: &mut ExtCtxt, sp: Span, tts: &[tokenstream::Toke
|
||||
Ok(..) => {
|
||||
// Add this input file to the code map to make it available as
|
||||
// dependency information, but don't enter it's contents
|
||||
cx.codemap().new_filemap_and_lines(&file, "");
|
||||
cx.codemap().new_filemap(file.into(), "".to_string());
|
||||
|
||||
base::MacEager::expr(cx.expr_lit(sp, ast::LitKind::ByteStr(Lrc::new(bytes))))
|
||||
}
|
||||
|
@ -240,9 +240,11 @@ fn read_block_comment(rdr: &mut StringReader,
|
||||
let mut lines: Vec<String> = Vec::new();
|
||||
|
||||
// Count the number of chars since the start of the line by rescanning.
|
||||
let mut src_index = rdr.src_index(rdr.filemap.line_begin_pos());
|
||||
let mut src_index = rdr.src_index(rdr.filemap.line_begin_pos(rdr.pos));
|
||||
let end_src_index = rdr.src_index(rdr.pos);
|
||||
assert!(src_index <= end_src_index);
|
||||
assert!(src_index <= end_src_index,
|
||||
"src_index={}, end_src_index={}, line_begin_pos={}",
|
||||
src_index, end_src_index, rdr.filemap.line_begin_pos(rdr.pos).to_u32());
|
||||
let mut n = 0;
|
||||
while src_index < end_src_index {
|
||||
let c = char_at(&rdr.src, src_index);
|
||||
|
@ -51,11 +51,7 @@ pub struct StringReader<'a> {
|
||||
pub ch: Option<char>,
|
||||
pub filemap: Lrc<syntax_pos::FileMap>,
|
||||
/// Stop reading src at this index.
|
||||
end_src_index: usize,
|
||||
/// Whether to record new-lines and multibyte chars in filemap.
|
||||
/// This is only necessary the first time a filemap is lexed.
|
||||
/// If part of a filemap is being re-lexed, this should be set to false.
|
||||
save_new_lines_and_multibyte: bool,
|
||||
pub end_src_index: usize,
|
||||
// cached:
|
||||
peek_tok: token::Token,
|
||||
peek_span: Span,
|
||||
@ -188,7 +184,6 @@ impl<'a> StringReader<'a> {
|
||||
ch: Some('\n'),
|
||||
filemap,
|
||||
end_src_index: src.len(),
|
||||
save_new_lines_and_multibyte: true,
|
||||
// dummy values; not read
|
||||
peek_tok: token::Eof,
|
||||
peek_span: syntax_pos::DUMMY_SP,
|
||||
@ -225,7 +220,6 @@ impl<'a> StringReader<'a> {
|
||||
let mut sr = StringReader::new_raw_internal(sess, begin.fm, None);
|
||||
|
||||
// Seek the lexer to the right byte range.
|
||||
sr.save_new_lines_and_multibyte = false;
|
||||
sr.next_pos = span.lo();
|
||||
sr.end_src_index = sr.src_index(span.hi());
|
||||
|
||||
@ -458,18 +452,6 @@ impl<'a> StringReader<'a> {
|
||||
let next_ch = char_at(&self.src, next_src_index);
|
||||
let next_ch_len = next_ch.len_utf8();
|
||||
|
||||
if self.ch.unwrap() == '\n' {
|
||||
if self.save_new_lines_and_multibyte {
|
||||
self.filemap.next_line(self.next_pos);
|
||||
}
|
||||
}
|
||||
if next_ch_len > 1 {
|
||||
if self.save_new_lines_and_multibyte {
|
||||
self.filemap.record_multibyte_char(self.next_pos, next_ch_len);
|
||||
}
|
||||
}
|
||||
self.filemap.record_width(self.next_pos, next_ch);
|
||||
|
||||
self.ch = Some(next_ch);
|
||||
self.pos = self.next_pos;
|
||||
self.next_pos = self.next_pos + Pos::from_usize(next_ch_len);
|
||||
|
@ -51,7 +51,7 @@ fn test_harness(file_text: &str, span_labels: Vec<SpanLabel>, expected_output: &
|
||||
let output = Arc::new(Mutex::new(Vec::new()));
|
||||
|
||||
let code_map = Lrc::new(CodeMap::new(FilePathMapping::empty()));
|
||||
code_map.new_filemap_and_lines(Path::new("test.rs"), &file_text);
|
||||
code_map.new_filemap(Path::new("test.rs").to_owned().into(), file_text.to_owned());
|
||||
|
||||
let primary_span = make_span(&file_text, &span_labels[0].start, &span_labels[0].end);
|
||||
let mut msp = MultiSpan::from_span(primary_span);
|
||||
|
@ -14,3 +14,4 @@ rustc_data_structures = { path = "../librustc_data_structures" }
|
||||
arena = { path = "../libarena" }
|
||||
scoped-tls = { version = "0.1.1", features = ["nightly"] }
|
||||
unicode-width = "0.1.4"
|
||||
cfg-if = "0.1.2"
|
||||
|
436
src/libsyntax_pos/analyze_filemap.rs
Normal file
436
src/libsyntax_pos/analyze_filemap.rs
Normal file
@ -0,0 +1,436 @@
|
||||
// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
use unicode_width::UnicodeWidthChar;
|
||||
use super::*;
|
||||
|
||||
/// Find all newlines, multi-byte characters, and non-narrow characters in a
|
||||
/// FileMap.
|
||||
///
|
||||
/// This function will use an SSE2 enhanced implementation if hardware support
|
||||
/// is detected at runtime.
|
||||
pub fn analyze_filemap(
|
||||
src: &str,
|
||||
filemap_start_pos: BytePos)
|
||||
-> (Vec<BytePos>, Vec<MultiByteChar>, Vec<NonNarrowChar>)
|
||||
{
|
||||
let mut lines = vec![filemap_start_pos];
|
||||
let mut multi_byte_chars = vec![];
|
||||
let mut non_narrow_chars = vec![];
|
||||
|
||||
// Calls the right implementation, depending on hardware support available.
|
||||
analyze_filemap_dispatch(src,
|
||||
filemap_start_pos,
|
||||
&mut lines,
|
||||
&mut multi_byte_chars,
|
||||
&mut non_narrow_chars);
|
||||
|
||||
// The code above optimistically registers a new line *after* each \n
|
||||
// it encounters. If that point is already outside the filemap, remove
|
||||
// it again.
|
||||
if let Some(&last_line_start) = lines.last() {
|
||||
let file_map_end = filemap_start_pos + BytePos::from_usize(src.len());
|
||||
assert!(file_map_end >= last_line_start);
|
||||
if last_line_start == file_map_end {
|
||||
lines.pop();
|
||||
}
|
||||
}
|
||||
|
||||
(lines, multi_byte_chars, non_narrow_chars)
|
||||
}
|
||||
|
||||
cfg_if! {
|
||||
if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"),
|
||||
not(stage0)))] {
|
||||
fn analyze_filemap_dispatch(src: &str,
|
||||
filemap_start_pos: BytePos,
|
||||
lines: &mut Vec<BytePos>,
|
||||
multi_byte_chars: &mut Vec<MultiByteChar>,
|
||||
non_narrow_chars: &mut Vec<NonNarrowChar>) {
|
||||
if is_x86_feature_detected!("sse2") {
|
||||
unsafe {
|
||||
analyze_filemap_sse2(src,
|
||||
filemap_start_pos,
|
||||
lines,
|
||||
multi_byte_chars,
|
||||
non_narrow_chars);
|
||||
}
|
||||
} else {
|
||||
analyze_filemap_generic(src,
|
||||
src.len(),
|
||||
filemap_start_pos,
|
||||
lines,
|
||||
multi_byte_chars,
|
||||
non_narrow_chars);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/// Check 16 byte chunks of text at a time. If the chunk contains
|
||||
/// something other than printable ASCII characters and newlines, the
|
||||
/// function falls back to the generic implementation. Otherwise it uses
|
||||
/// SSE2 intrinsics to quickly find all newlines.
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn analyze_filemap_sse2(src: &str,
|
||||
output_offset: BytePos,
|
||||
lines: &mut Vec<BytePos>,
|
||||
multi_byte_chars: &mut Vec<MultiByteChar>,
|
||||
non_narrow_chars: &mut Vec<NonNarrowChar>) {
|
||||
#[cfg(target_arch = "x86")]
|
||||
use std::arch::x86::*;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
use std::arch::x86_64::*;
|
||||
|
||||
const CHUNK_SIZE: usize = 16;
|
||||
|
||||
let src_bytes = src.as_bytes();
|
||||
|
||||
let chunk_count = src.len() / CHUNK_SIZE;
|
||||
|
||||
// This variable keeps track of where we should start decoding a
|
||||
// chunk. If a multi-byte character spans across chunk boundaries,
|
||||
// we need to skip that part in the next chunk because we already
|
||||
// handled it.
|
||||
let mut intra_chunk_offset = 0;
|
||||
|
||||
for chunk_index in 0 .. chunk_count {
|
||||
let ptr = src_bytes.as_ptr() as *const __m128i;
|
||||
// We don't know if the pointer is aligned to 16 bytes, so we
|
||||
// use `loadu`, which supports unaligned loading.
|
||||
let chunk = _mm_loadu_si128(ptr.offset(chunk_index as isize));
|
||||
|
||||
// For character in the chunk, see if its byte value is < 0, which
|
||||
// indicates that it's part of a UTF-8 char.
|
||||
let multibyte_test = _mm_cmplt_epi8(chunk, _mm_set1_epi8(0));
|
||||
// Create a bit mask from the comparison results.
|
||||
let multibyte_mask = _mm_movemask_epi8(multibyte_test);
|
||||
|
||||
// If the bit mask is all zero, we only have ASCII chars here:
|
||||
if multibyte_mask == 0 {
|
||||
assert!(intra_chunk_offset == 0);
|
||||
|
||||
// Check if there are any control characters in the chunk. All
|
||||
// control characters that we can encounter at this point have a
|
||||
// byte value less than 32 or ...
|
||||
let control_char_test0 = _mm_cmplt_epi8(chunk, _mm_set1_epi8(32));
|
||||
let control_char_mask0 = _mm_movemask_epi8(control_char_test0);
|
||||
|
||||
// ... it's the ASCII 'DEL' character with a value of 127.
|
||||
let control_char_test1 = _mm_cmpeq_epi8(chunk, _mm_set1_epi8(127));
|
||||
let control_char_mask1 = _mm_movemask_epi8(control_char_test1);
|
||||
|
||||
let control_char_mask = control_char_mask0 | control_char_mask1;
|
||||
|
||||
if control_char_mask != 0 {
|
||||
// Check for newlines in the chunk
|
||||
let newlines_test = _mm_cmpeq_epi8(chunk, _mm_set1_epi8(b'\n' as i8));
|
||||
let newlines_mask = _mm_movemask_epi8(newlines_test);
|
||||
|
||||
if control_char_mask == newlines_mask {
|
||||
// All control characters are newlines, record them
|
||||
let mut newlines_mask = 0xFFFF0000 | newlines_mask as u32;
|
||||
let output_offset = output_offset +
|
||||
BytePos::from_usize(chunk_index * CHUNK_SIZE + 1);
|
||||
|
||||
loop {
|
||||
let index = newlines_mask.trailing_zeros();
|
||||
|
||||
if index >= CHUNK_SIZE as u32 {
|
||||
// We have arrived at the end of the chunk.
|
||||
break
|
||||
}
|
||||
|
||||
lines.push(BytePos(index) + output_offset);
|
||||
|
||||
// Clear the bit, so we can find the next one.
|
||||
newlines_mask &= (!1) << index;
|
||||
}
|
||||
|
||||
// We are done for this chunk. All control characters were
|
||||
// newlines and we took care of those.
|
||||
continue
|
||||
} else {
|
||||
// Some of the control characters are not newlines,
|
||||
// fall through to the slow path below.
|
||||
}
|
||||
} else {
|
||||
// No control characters, nothing to record for this chunk
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// The slow path.
|
||||
// There are control chars in here, fallback to generic decoding.
|
||||
let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset;
|
||||
intra_chunk_offset = analyze_filemap_generic(
|
||||
&src[scan_start .. ],
|
||||
CHUNK_SIZE - intra_chunk_offset,
|
||||
BytePos::from_usize(scan_start) + output_offset,
|
||||
lines,
|
||||
multi_byte_chars,
|
||||
non_narrow_chars
|
||||
);
|
||||
}
|
||||
|
||||
// There might still be a tail left to analyze
|
||||
let tail_start = chunk_count * CHUNK_SIZE + intra_chunk_offset;
|
||||
if tail_start < src.len() {
|
||||
analyze_filemap_generic(&src[tail_start as usize ..],
|
||||
src.len() - tail_start,
|
||||
output_offset + BytePos::from_usize(tail_start),
|
||||
lines,
|
||||
multi_byte_chars,
|
||||
non_narrow_chars);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
||||
// The target (or compiler version) does not support SSE2 ...
|
||||
fn analyze_filemap_dispatch(src: &str,
|
||||
filemap_start_pos: BytePos,
|
||||
lines: &mut Vec<BytePos>,
|
||||
multi_byte_chars: &mut Vec<MultiByteChar>,
|
||||
non_narrow_chars: &mut Vec<NonNarrowChar>) {
|
||||
analyze_filemap_generic(src,
|
||||
src.len(),
|
||||
filemap_start_pos,
|
||||
lines,
|
||||
multi_byte_chars,
|
||||
non_narrow_chars);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// `scan_len` determines the number of bytes in `src` to scan. Note that the
|
||||
// function can read past `scan_len` if a multi-byte character start within the
|
||||
// range but extends past it. The overflow is returned by the function.
|
||||
fn analyze_filemap_generic(src: &str,
|
||||
scan_len: usize,
|
||||
output_offset: BytePos,
|
||||
lines: &mut Vec<BytePos>,
|
||||
multi_byte_chars: &mut Vec<MultiByteChar>,
|
||||
non_narrow_chars: &mut Vec<NonNarrowChar>)
|
||||
-> usize
|
||||
{
|
||||
assert!(src.len() >= scan_len);
|
||||
let mut i = 0;
|
||||
let src_bytes = src.as_bytes();
|
||||
|
||||
while i < scan_len {
|
||||
let byte = unsafe {
|
||||
// We verified that i < scan_len <= src.len()
|
||||
*src_bytes.get_unchecked(i as usize)
|
||||
};
|
||||
|
||||
// How much to advance in order to get to the next UTF-8 char in the
|
||||
// string.
|
||||
let mut char_len = 1;
|
||||
|
||||
if byte < 32 {
|
||||
// This is an ASCII control character, it could be one of the cases
|
||||
// that are interesting to us.
|
||||
|
||||
let pos = BytePos::from_usize(i) + output_offset;
|
||||
|
||||
match byte {
|
||||
b'\n' => {
|
||||
lines.push(pos + BytePos(1));
|
||||
}
|
||||
b'\t' => {
|
||||
non_narrow_chars.push(NonNarrowChar::Tab(pos));
|
||||
}
|
||||
_ => {
|
||||
non_narrow_chars.push(NonNarrowChar::ZeroWidth(pos));
|
||||
}
|
||||
}
|
||||
} else if byte >= 127 {
|
||||
// The slow path:
|
||||
// This is either ASCII control character "DEL" or the beginning of
|
||||
// a multibyte char. Just decode to `char`.
|
||||
let c = (&src[i..]).chars().next().unwrap();
|
||||
char_len = c.len_utf8();
|
||||
|
||||
let pos = BytePos::from_usize(i) + output_offset;
|
||||
|
||||
if char_len > 1 {
|
||||
assert!(char_len >=2 && char_len <= 4);
|
||||
let mbc = MultiByteChar {
|
||||
pos,
|
||||
bytes: char_len as u8,
|
||||
};
|
||||
multi_byte_chars.push(mbc);
|
||||
}
|
||||
|
||||
// Assume control characters are zero width.
|
||||
// FIXME: How can we decide between `width` and `width_cjk`?
|
||||
let char_width = UnicodeWidthChar::width(c).unwrap_or(0);
|
||||
|
||||
if char_width != 1 {
|
||||
non_narrow_chars.push(NonNarrowChar::new(pos, char_width));
|
||||
}
|
||||
}
|
||||
|
||||
i += char_len;
|
||||
}
|
||||
|
||||
i - scan_len
|
||||
}
|
||||
|
||||
|
||||
|
||||
macro_rules! test {
|
||||
(case: $test_name:ident,
|
||||
text: $text:expr,
|
||||
filemap_start_pos: $filemap_start_pos:expr,
|
||||
lines: $lines:expr,
|
||||
multi_byte_chars: $multi_byte_chars:expr,
|
||||
non_narrow_chars: $non_narrow_chars:expr,) => (
|
||||
|
||||
#[test]
|
||||
fn $test_name() {
|
||||
|
||||
let (lines, multi_byte_chars, non_narrow_chars) =
|
||||
analyze_filemap($text, BytePos($filemap_start_pos));
|
||||
|
||||
let expected_lines: Vec<BytePos> = $lines
|
||||
.into_iter()
|
||||
.map(|pos| BytePos(pos))
|
||||
.collect();
|
||||
|
||||
assert_eq!(lines, expected_lines);
|
||||
|
||||
let expected_mbcs: Vec<MultiByteChar> = $multi_byte_chars
|
||||
.into_iter()
|
||||
.map(|(pos, bytes)| MultiByteChar {
|
||||
pos: BytePos(pos),
|
||||
bytes,
|
||||
})
|
||||
.collect();
|
||||
|
||||
assert_eq!(multi_byte_chars, expected_mbcs);
|
||||
|
||||
let expected_nncs: Vec<NonNarrowChar> = $non_narrow_chars
|
||||
.into_iter()
|
||||
.map(|(pos, width)| {
|
||||
NonNarrowChar::new(BytePos(pos), width)
|
||||
})
|
||||
.collect();
|
||||
|
||||
assert_eq!(non_narrow_chars, expected_nncs);
|
||||
})
|
||||
}
|
||||
|
||||
test!(
|
||||
case: empty_text,
|
||||
text: "",
|
||||
filemap_start_pos: 0,
|
||||
lines: vec![],
|
||||
multi_byte_chars: vec![],
|
||||
non_narrow_chars: vec![],
|
||||
);
|
||||
|
||||
test!(
|
||||
case: newlines_short,
|
||||
text: "a\nc",
|
||||
filemap_start_pos: 0,
|
||||
lines: vec![0, 2],
|
||||
multi_byte_chars: vec![],
|
||||
non_narrow_chars: vec![],
|
||||
);
|
||||
|
||||
test!(
|
||||
case: newlines_long,
|
||||
text: "012345678\nabcdef012345678\na",
|
||||
filemap_start_pos: 0,
|
||||
lines: vec![0, 10, 26],
|
||||
multi_byte_chars: vec![],
|
||||
non_narrow_chars: vec![],
|
||||
);
|
||||
|
||||
test!(
|
||||
case: newline_and_multi_byte_char_in_same_chunk,
|
||||
text: "01234β789\nbcdef0123456789abcdef",
|
||||
filemap_start_pos: 0,
|
||||
lines: vec![0, 11],
|
||||
multi_byte_chars: vec![(5, 2)],
|
||||
non_narrow_chars: vec![],
|
||||
);
|
||||
|
||||
test!(
|
||||
case: newline_and_control_char_in_same_chunk,
|
||||
text: "01234\u{07}6789\nbcdef0123456789abcdef",
|
||||
filemap_start_pos: 0,
|
||||
lines: vec![0, 11],
|
||||
multi_byte_chars: vec![],
|
||||
non_narrow_chars: vec![(5, 0)],
|
||||
);
|
||||
|
||||
test!(
|
||||
case: multi_byte_char_short,
|
||||
text: "aβc",
|
||||
filemap_start_pos: 0,
|
||||
lines: vec![0],
|
||||
multi_byte_chars: vec![(1, 2)],
|
||||
non_narrow_chars: vec![],
|
||||
);
|
||||
|
||||
test!(
|
||||
case: multi_byte_char_long,
|
||||
text: "0123456789abcΔf012345β",
|
||||
filemap_start_pos: 0,
|
||||
lines: vec![0],
|
||||
multi_byte_chars: vec![(13, 2), (22, 2)],
|
||||
non_narrow_chars: vec![],
|
||||
);
|
||||
|
||||
test!(
|
||||
case: multi_byte_char_across_chunk_boundary,
|
||||
text: "0123456789abcdeΔ123456789abcdef01234",
|
||||
filemap_start_pos: 0,
|
||||
lines: vec![0],
|
||||
multi_byte_chars: vec![(15, 2)],
|
||||
non_narrow_chars: vec![],
|
||||
);
|
||||
|
||||
test!(
|
||||
case: multi_byte_char_across_chunk_boundary_tail,
|
||||
text: "0123456789abcdeΔ....",
|
||||
filemap_start_pos: 0,
|
||||
lines: vec![0],
|
||||
multi_byte_chars: vec![(15, 2)],
|
||||
non_narrow_chars: vec![],
|
||||
);
|
||||
|
||||
test!(
|
||||
case: non_narrow_short,
|
||||
text: "0\t2",
|
||||
filemap_start_pos: 0,
|
||||
lines: vec![0],
|
||||
multi_byte_chars: vec![],
|
||||
non_narrow_chars: vec![(1, 4)],
|
||||
);
|
||||
|
||||
test!(
|
||||
case: non_narrow_long,
|
||||
text: "01\t3456789abcdef01234567\u{07}9",
|
||||
filemap_start_pos: 0,
|
||||
lines: vec![0],
|
||||
multi_byte_chars: vec![],
|
||||
non_narrow_chars: vec![(2, 4), (24, 0)],
|
||||
);
|
||||
|
||||
test!(
|
||||
case: output_offset_all,
|
||||
text: "01\t345\n789abcΔf01234567\u{07}9\nbcΔf",
|
||||
filemap_start_pos: 1000,
|
||||
lines: vec![0 + 1000, 7 + 1000, 27 + 1000],
|
||||
multi_byte_chars: vec![(13 + 1000, 2), (29 + 1000, 2)],
|
||||
non_narrow_chars: vec![(2 + 1000, 4), (24 + 1000, 0)],
|
||||
);
|
@ -24,6 +24,7 @@
|
||||
#![feature(optin_builtin_traits)]
|
||||
#![allow(unused_attributes)]
|
||||
#![feature(specialization)]
|
||||
#![feature(stdsimd)]
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::cell::Cell;
|
||||
@ -47,6 +48,9 @@ use serialize::{Encodable, Decodable, Encoder, Decoder};
|
||||
extern crate serialize;
|
||||
extern crate serialize as rustc_serialize; // used by deriving
|
||||
|
||||
#[macro_use]
|
||||
extern crate cfg_if;
|
||||
|
||||
extern crate unicode_width;
|
||||
|
||||
pub mod edition;
|
||||
@ -58,6 +62,8 @@ pub use span_encoding::{Span, DUMMY_SP};
|
||||
|
||||
pub mod symbol;
|
||||
|
||||
mod analyze_filemap;
|
||||
|
||||
pub struct Globals {
|
||||
symbol_interner: Lock<symbol::Interner>,
|
||||
span_interner: Lock<span_encoding::SpanInterner>,
|
||||
@ -652,16 +658,16 @@ impl From<Vec<Span>> for MultiSpan {
|
||||
pub const NO_EXPANSION: SyntaxContext = SyntaxContext::empty();
|
||||
|
||||
/// Identifies an offset of a multi-byte character in a FileMap
|
||||
#[derive(Copy, Clone, RustcEncodable, RustcDecodable, Eq, PartialEq)]
|
||||
#[derive(Copy, Clone, RustcEncodable, RustcDecodable, Eq, PartialEq, Debug)]
|
||||
pub struct MultiByteChar {
|
||||
/// The absolute offset of the character in the CodeMap
|
||||
pub pos: BytePos,
|
||||
/// The number of bytes, >=2
|
||||
pub bytes: usize,
|
||||
pub bytes: u8,
|
||||
}
|
||||
|
||||
/// Identifies an offset of a non-narrow character in a FileMap
|
||||
#[derive(Copy, Clone, RustcEncodable, RustcDecodable, Eq, PartialEq)]
|
||||
#[derive(Copy, Clone, RustcEncodable, RustcDecodable, Eq, PartialEq, Debug)]
|
||||
pub enum NonNarrowChar {
|
||||
/// Represents a zero-width character
|
||||
ZeroWidth(BytePos),
|
||||
@ -779,11 +785,11 @@ pub struct FileMap {
|
||||
/// The end position of this source in the CodeMap
|
||||
pub end_pos: BytePos,
|
||||
/// Locations of lines beginnings in the source code
|
||||
pub lines: Lock<Vec<BytePos>>,
|
||||
pub lines: Vec<BytePos>,
|
||||
/// Locations of multi-byte characters in the source code
|
||||
pub multibyte_chars: Lock<Vec<MultiByteChar>>,
|
||||
pub multibyte_chars: Vec<MultiByteChar>,
|
||||
/// Width of characters that are not narrow in the source code
|
||||
pub non_narrow_chars: Lock<Vec<NonNarrowChar>>,
|
||||
pub non_narrow_chars: Vec<NonNarrowChar>,
|
||||
/// A hash of the filename, used for speeding up the incr. comp. hashing.
|
||||
pub name_hash: u128,
|
||||
}
|
||||
@ -797,7 +803,7 @@ impl Encodable for FileMap {
|
||||
s.emit_struct_field("start_pos", 4, |s| self.start_pos.encode(s))?;
|
||||
s.emit_struct_field("end_pos", 5, |s| self.end_pos.encode(s))?;
|
||||
s.emit_struct_field("lines", 6, |s| {
|
||||
let lines = self.lines.borrow();
|
||||
let lines = &self.lines[..];
|
||||
// store the length
|
||||
s.emit_u32(lines.len() as u32)?;
|
||||
|
||||
@ -843,10 +849,10 @@ impl Encodable for FileMap {
|
||||
Ok(())
|
||||
})?;
|
||||
s.emit_struct_field("multibyte_chars", 7, |s| {
|
||||
(*self.multibyte_chars.borrow()).encode(s)
|
||||
self.multibyte_chars.encode(s)
|
||||
})?;
|
||||
s.emit_struct_field("non_narrow_chars", 8, |s| {
|
||||
(*self.non_narrow_chars.borrow()).encode(s)
|
||||
self.non_narrow_chars.encode(s)
|
||||
})?;
|
||||
s.emit_struct_field("name_hash", 9, |s| {
|
||||
self.name_hash.encode(s)
|
||||
@ -914,9 +920,9 @@ impl Decodable for FileMap {
|
||||
src: None,
|
||||
src_hash,
|
||||
external_src: Lock::new(ExternalSource::AbsentOk),
|
||||
lines: Lock::new(lines),
|
||||
multibyte_chars: Lock::new(multibyte_chars),
|
||||
non_narrow_chars: Lock::new(non_narrow_chars),
|
||||
lines,
|
||||
multibyte_chars,
|
||||
non_narrow_chars,
|
||||
name_hash,
|
||||
})
|
||||
})
|
||||
@ -949,6 +955,9 @@ impl FileMap {
|
||||
};
|
||||
let end_pos = start_pos.to_usize() + src.len();
|
||||
|
||||
let (lines, multibyte_chars, non_narrow_chars) =
|
||||
analyze_filemap::analyze_filemap(&src[..], start_pos);
|
||||
|
||||
FileMap {
|
||||
name,
|
||||
name_was_remapped,
|
||||
@ -959,37 +968,17 @@ impl FileMap {
|
||||
external_src: Lock::new(ExternalSource::Unneeded),
|
||||
start_pos,
|
||||
end_pos: Pos::from_usize(end_pos),
|
||||
lines: Lock::new(Vec::new()),
|
||||
multibyte_chars: Lock::new(Vec::new()),
|
||||
non_narrow_chars: Lock::new(Vec::new()),
|
||||
lines,
|
||||
multibyte_chars,
|
||||
non_narrow_chars,
|
||||
name_hash,
|
||||
}
|
||||
}
|
||||
|
||||
/// EFFECT: register a start-of-line offset in the
|
||||
/// table of line-beginnings.
|
||||
/// UNCHECKED INVARIANT: these offsets must be added in the right
|
||||
/// order and must be in the right places; there is shared knowledge
|
||||
/// about what ends a line between this file and parse.rs
|
||||
/// WARNING: pos param here is the offset relative to start of CodeMap,
|
||||
/// and CodeMap will append a newline when adding a filemap without a newline at the end,
|
||||
/// so the safe way to call this is with value calculated as
|
||||
/// filemap.start_pos + newline_offset_relative_to_the_start_of_filemap.
|
||||
pub fn next_line(&self, pos: BytePos) {
|
||||
// the new charpos must be > the last one (or it's the first one).
|
||||
let mut lines = self.lines.borrow_mut();
|
||||
let line_len = lines.len();
|
||||
assert!(line_len == 0 || ((*lines)[line_len - 1] < pos));
|
||||
lines.push(pos);
|
||||
}
|
||||
|
||||
/// Return the BytePos of the beginning of the current line.
|
||||
pub fn line_begin_pos(&self) -> BytePos {
|
||||
let lines = self.lines.borrow();
|
||||
match lines.last() {
|
||||
Some(&line_pos) => line_pos,
|
||||
None => self.start_pos,
|
||||
}
|
||||
pub fn line_begin_pos(&self, pos: BytePos) -> BytePos {
|
||||
let line_index = self.lookup_line(pos).unwrap();
|
||||
self.lines[line_index]
|
||||
}
|
||||
|
||||
/// Add externally loaded source.
|
||||
@ -1040,8 +1029,7 @@ impl FileMap {
|
||||
}
|
||||
|
||||
let begin = {
|
||||
let lines = self.lines.borrow();
|
||||
let line = if let Some(line) = lines.get(line_number) {
|
||||
let line = if let Some(line) = self.lines.get(line_number) {
|
||||
line
|
||||
} else {
|
||||
return None;
|
||||
@ -1059,35 +1047,6 @@ impl FileMap {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn record_multibyte_char(&self, pos: BytePos, bytes: usize) {
|
||||
assert!(bytes >=2 && bytes <= 4);
|
||||
let mbc = MultiByteChar {
|
||||
pos,
|
||||
bytes,
|
||||
};
|
||||
self.multibyte_chars.borrow_mut().push(mbc);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn record_width(&self, pos: BytePos, ch: char) {
|
||||
let width = match ch {
|
||||
'\t' =>
|
||||
// Tabs will consume 4 columns.
|
||||
4,
|
||||
'\n' =>
|
||||
// Make newlines take one column so that displayed spans can point them.
|
||||
1,
|
||||
ch =>
|
||||
// Assume control characters are zero width.
|
||||
// FIXME: How can we decide between `width` and `width_cjk`?
|
||||
unicode_width::UnicodeWidthChar::width(ch).unwrap_or(0),
|
||||
};
|
||||
// Only record non-narrow characters.
|
||||
if width != 1 {
|
||||
self.non_narrow_chars.borrow_mut().push(NonNarrowChar::new(pos, width));
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_real_file(&self) -> bool {
|
||||
self.name.is_real()
|
||||
}
|
||||
@ -1100,7 +1059,7 @@ impl FileMap {
|
||||
self.end_pos.0 - self.start_pos.0
|
||||
}
|
||||
pub fn count_lines(&self) -> usize {
|
||||
self.lines.borrow().len()
|
||||
self.lines.len()
|
||||
}
|
||||
|
||||
/// Find the line containing the given position. The return value is the
|
||||
@ -1108,13 +1067,12 @@ impl FileMap {
|
||||
/// number. If the filemap is empty or the position is located before the
|
||||
/// first line, None is returned.
|
||||
pub fn lookup_line(&self, pos: BytePos) -> Option<usize> {
|
||||
let lines = self.lines.borrow();
|
||||
if lines.len() == 0 {
|
||||
if self.lines.len() == 0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let line_index = lookup_line(&lines[..], pos);
|
||||
assert!(line_index < lines.len() as isize);
|
||||
let line_index = lookup_line(&self.lines[..], pos);
|
||||
assert!(line_index < self.lines.len() as isize);
|
||||
if line_index >= 0 {
|
||||
Some(line_index as usize)
|
||||
} else {
|
||||
@ -1127,12 +1085,11 @@ impl FileMap {
|
||||
return (self.start_pos, self.end_pos);
|
||||
}
|
||||
|
||||
let lines = self.lines.borrow();
|
||||
assert!(line_index < lines.len());
|
||||
if line_index == (lines.len() - 1) {
|
||||
(lines[line_index], self.end_pos)
|
||||
assert!(line_index < self.lines.len());
|
||||
if line_index == (self.lines.len() - 1) {
|
||||
(self.lines[line_index], self.end_pos)
|
||||
} else {
|
||||
(lines[line_index], lines[line_index + 1])
|
||||
(self.lines[line_index], self.lines[line_index + 1])
|
||||
}
|
||||
}
|
||||
|
||||
@ -1156,6 +1113,8 @@ fn remove_bom(src: &mut String) {
|
||||
pub trait Pos {
|
||||
fn from_usize(n: usize) -> Self;
|
||||
fn to_usize(&self) -> usize;
|
||||
fn from_u32(n: u32) -> Self;
|
||||
fn to_u32(&self) -> u32;
|
||||
}
|
||||
|
||||
/// A byte offset. Keep this small (currently 32-bits), as AST contains
|
||||
@ -1177,7 +1136,13 @@ impl Pos for BytePos {
|
||||
fn from_usize(n: usize) -> BytePos { BytePos(n as u32) }
|
||||
|
||||
#[inline(always)]
|
||||
fn to_usize(&self) -> usize { let BytePos(n) = *self; n as usize }
|
||||
fn to_usize(&self) -> usize { self.0 as usize }
|
||||
|
||||
#[inline(always)]
|
||||
fn from_u32(n: u32) -> BytePos { BytePos(n) }
|
||||
|
||||
#[inline(always)]
|
||||
fn to_u32(&self) -> u32 { self.0 }
|
||||
}
|
||||
|
||||
impl Add for BytePos {
|
||||
@ -1215,7 +1180,13 @@ impl Pos for CharPos {
|
||||
fn from_usize(n: usize) -> CharPos { CharPos(n) }
|
||||
|
||||
#[inline(always)]
|
||||
fn to_usize(&self) -> usize { let CharPos(n) = *self; n }
|
||||
fn to_usize(&self) -> usize { self.0 }
|
||||
|
||||
#[inline(always)]
|
||||
fn from_u32(n: u32) -> CharPos { CharPos(n as usize) }
|
||||
|
||||
#[inline(always)]
|
||||
fn to_u32(&self) -> u32 { self.0 as u32}
|
||||
}
|
||||
|
||||
impl Add for CharPos {
|
||||
|
Loading…
Reference in New Issue
Block a user