From 2f8865556bd6cae123b3db4ceaa0c7977dacea8d Mon Sep 17 00:00:00 2001 From: Michael Woerister Date: Wed, 11 Feb 2015 18:29:49 +0100 Subject: [PATCH] Encode codemap and span information in crate metadata. This allows to create proper debuginfo line information for items inlined from other crates (e.g. instantiations of generics). Only the codemap's 'metadata' is stored in a crate's metadata. That is, just filename, line-beginnings, etc. but not the actual source code itself. We are thus missing the opportunity of making Rust the first "open-source-only" programming language out there. Pity. --- src/librustc/metadata/common.rs | 3 + src/librustc/metadata/creader.rs | 136 +++++++++- src/librustc/metadata/cstore.rs | 16 +- src/librustc/metadata/decoder.rs | 17 +- src/librustc/metadata/encoder.rs | 30 +++ src/librustc/middle/astencode.rs | 53 +++- src/librustc_driver/pretty.rs | 6 +- src/libsyntax/codemap.rs | 317 +++++++++++++++++++++--- src/libsyntax/parse/lexer/mod.rs | 33 ++- src/libsyntax/parse/mod.rs | 154 ++++-------- src/test/auxiliary/cross_crate_spans.rs | 26 ++ src/test/debuginfo/cross-crate-spans.rs | 74 ++++++ 12 files changed, 693 insertions(+), 172 deletions(-) create mode 100644 src/test/auxiliary/cross_crate_spans.rs create mode 100644 src/test/debuginfo/cross-crate-spans.rs diff --git a/src/librustc/metadata/common.rs b/src/librustc/metadata/common.rs index a58ef53de9a..b3c1d6cd15f 100644 --- a/src/librustc/metadata/common.rs +++ b/src/librustc/metadata/common.rs @@ -252,3 +252,6 @@ pub const tag_macro_def: uint = 0x9e; pub const tag_macro_def_body: uint = 0x9f; pub const tag_paren_sugar: uint = 0xa0; + +pub const tag_codemap: uint = 0xa1; +pub const tag_codemap_filemap: uint = 0xa2; diff --git a/src/librustc/metadata/creader.rs b/src/librustc/metadata/creader.rs index 46155925b3c..bc3071d8db2 100644 --- a/src/librustc/metadata/creader.rs +++ b/src/librustc/metadata/creader.rs @@ -26,7 +26,7 @@ use syntax::ast; use syntax::abi; use syntax::attr; use syntax::attr::AttrMetaMethods; -use syntax::codemap::{Span, mk_sp}; +use syntax::codemap::{self, Span, mk_sp, Pos}; use syntax::parse; use syntax::parse::token::InternedString; use syntax::parse::token; @@ -373,15 +373,17 @@ impl<'a> CrateReader<'a> { // Maintain a reference to the top most crate. let root = if root.is_some() { root } else { &crate_paths }; - let cnum_map = self.resolve_crate_deps(root, lib.metadata.as_slice(), span); + let loader::Library { dylib, rlib, metadata } = lib; - let loader::Library{ dylib, rlib, metadata } = lib; + let cnum_map = self.resolve_crate_deps(root, metadata.as_slice(), span); + let codemap_import_info = import_codemap(self.sess.codemap(), &metadata); let cmeta = Rc::new( cstore::crate_metadata { name: name.to_string(), data: metadata, cnum_map: cnum_map, cnum: cnum, + codemap_import_info: codemap_import_info, span: span, }); @@ -586,3 +588,131 @@ impl<'a> CrateReader<'a> { } } } + +/// Imports the codemap from an external crate into the codemap of the crate +/// currently being compiled (the "local crate"). +/// +/// The import algorithm works analogous to how AST items are inlined from an +/// external crate's metadata: +/// For every FileMap in the external codemap an 'inline' copy is created in the +/// local codemap. The correspondence relation between external and local +/// FileMaps is recorded in the `ImportedFileMap` objects returned from this +/// function. When an item from an external crate is later inlined into this +/// crate, this correspondence information is used to translate the span +/// information of the inlined item so that it refers the correct positions in +/// the local codemap (see `astencode::DecodeContext::tr_span()`). +/// +/// The import algorithm in the function below will reuse FileMaps already +/// existing in the local codemap. For example, even if the FileMap of some +/// source file of libstd gets imported many times, there will only ever be +/// one FileMap object for the corresponding file in the local codemap. +/// +/// Note that imported FileMaps do not actually contain the source code of the +/// file they represent, just information about length, line breaks, and +/// multibyte characters. This information is enough to generate valid debuginfo +/// for items inlined from other crates. +fn import_codemap(local_codemap: &codemap::CodeMap, + metadata: &MetadataBlob) + -> Vec { + let external_codemap = decoder::get_imported_filemaps(metadata.as_slice()); + + let imported_filemaps = external_codemap.into_iter().map(|filemap_to_import| { + // Try to find an existing FileMap that can be reused for the filemap to + // be imported. A FileMap is reusable if it is exactly the same, just + // positioned at a different offset within the codemap. + let reusable_filemap = { + local_codemap.files + .borrow() + .iter() + .find(|fm| are_equal_modulo_startpos(&fm, &filemap_to_import)) + .map(|rc| rc.clone()) + }; + + match reusable_filemap { + Some(fm) => { + cstore::ImportedFileMap { + original_start_pos: filemap_to_import.start_pos, + original_end_pos: filemap_to_import.end_pos, + translated_filemap: fm + } + } + None => { + // We can't reuse an existing FileMap, so allocate a new one + // containing the information we need. + let codemap::FileMap { + name, + start_pos, + end_pos, + lines, + multibyte_chars, + .. + } = filemap_to_import; + + let source_length = (end_pos - start_pos).to_usize(); + + // Translate line-start positions and multibyte character + // position into frame of reference local to file. + // `CodeMap::new_imported_filemap()` will then translate those + // coordinates to their new global frame of reference when the + // offset of the FileMap is known. + let lines = lines.into_inner().map_in_place(|pos| pos - start_pos); + let multibyte_chars = multibyte_chars + .into_inner() + .map_in_place(|mbc| + codemap::MultiByteChar { + pos: mbc.pos + start_pos, + bytes: mbc.bytes + }); + + let local_version = local_codemap.new_imported_filemap(name, + source_length, + lines, + multibyte_chars); + cstore::ImportedFileMap { + original_start_pos: start_pos, + original_end_pos: end_pos, + translated_filemap: local_version + } + } + } + }).collect(); + + return imported_filemaps; + + fn are_equal_modulo_startpos(fm1: &codemap::FileMap, + fm2: &codemap::FileMap) + -> bool { + if fm1.name != fm2.name { + return false; + } + + let lines1 = fm1.lines.borrow(); + let lines2 = fm2.lines.borrow(); + + if lines1.len() != lines2.len() { + return false; + } + + for (&line1, &line2) in lines1.iter().zip(lines2.iter()) { + if (line1 - fm1.start_pos) != (line2 - fm2.start_pos) { + return false; + } + } + + let multibytes1 = fm1.multibyte_chars.borrow(); + let multibytes2 = fm2.multibyte_chars.borrow(); + + if multibytes1.len() != multibytes2.len() { + return false; + } + + for (mb1, mb2) in multibytes1.iter().zip(multibytes2.iter()) { + if (mb1.bytes != mb2.bytes) || + ((mb1.pos - fm1.start_pos) != (mb2.pos - fm2.start_pos)) { + return false; + } + } + + true + } +} diff --git a/src/librustc/metadata/cstore.rs b/src/librustc/metadata/cstore.rs index a3f7d57da67..62098733023 100644 --- a/src/librustc/metadata/cstore.rs +++ b/src/librustc/metadata/cstore.rs @@ -27,7 +27,7 @@ use std::cell::RefCell; use std::rc::Rc; use flate::Bytes; use syntax::ast; -use syntax::codemap::Span; +use syntax::codemap; use syntax::parse::token::IdentInterner; // A map from external crate numbers (as decoded from some crate file) to @@ -41,12 +41,24 @@ pub enum MetadataBlob { MetadataArchive(loader::ArchiveMetadata), } +/// Holds information about a codemap::FileMap imported from another crate. +/// See creader::import_codemap() for more information. +pub struct ImportedFileMap { + /// This FileMap's byte-offset within the codemap of its original crate + pub original_start_pos: codemap::BytePos, + /// The end of this FileMap within the codemap of its original crate + pub original_end_pos: codemap::BytePos, + /// The imported FileMap's representation within the local codemap + pub translated_filemap: Rc +} + pub struct crate_metadata { pub name: String, pub data: MetadataBlob, pub cnum_map: cnum_map, pub cnum: ast::CrateNum, - pub span: Span, + pub codemap_import_info: Vec, + pub span: codemap::Span, } #[derive(Copy, Debug, PartialEq, Clone)] diff --git a/src/librustc/metadata/decoder.rs b/src/librustc/metadata/decoder.rs index e32fcaec047..994cb3f0f25 100644 --- a/src/librustc/metadata/decoder.rs +++ b/src/librustc/metadata/decoder.rs @@ -1561,7 +1561,6 @@ pub fn is_associated_type(cdata: Cmd, id: ast::NodeId) -> bool { } } - pub fn is_default_trait<'tcx>(cdata: Cmd, id: ast::NodeId) -> bool { let item_doc = lookup_item(id, cdata.data()); match item_family(item_doc) { @@ -1569,3 +1568,19 @@ pub fn is_default_trait<'tcx>(cdata: Cmd, id: ast::NodeId) -> bool { _ => false } } + +pub fn get_imported_filemaps(metadata: &[u8]) -> Vec { + let crate_doc = rbml::Doc::new(metadata); + let cm_doc = reader::get_doc(crate_doc, tag_codemap); + + let mut filemaps = vec![]; + + reader::tagged_docs(cm_doc, tag_codemap_filemap, |filemap_doc| { + let mut decoder = reader::Decoder::new(filemap_doc); + let filemap: codemap::FileMap = Decodable::decode(&mut decoder).unwrap(); + filemaps.push(filemap); + true + }); + + return filemaps; +} diff --git a/src/librustc/metadata/encoder.rs b/src/librustc/metadata/encoder.rs index 8152a2bf16d..b9e0e452c83 100644 --- a/src/librustc/metadata/encoder.rs +++ b/src/librustc/metadata/encoder.rs @@ -1751,6 +1751,28 @@ fn encode_plugin_registrar_fn(ecx: &EncodeContext, rbml_w: &mut Encoder) { } } +fn encode_codemap(ecx: &EncodeContext, rbml_w: &mut Encoder) { + rbml_w.start_tag(tag_codemap); + let codemap = ecx.tcx.sess.codemap(); + + for filemap in &codemap.files.borrow()[..] { + + if filemap.lines.borrow().len() == 0 || filemap.is_imported() { + // No need to export empty filemaps, as they can't contain spans + // that need translation. + // Also no need to re-export imported filemaps, as any downstream + // crate will import them from their original source. + continue; + } + + rbml_w.start_tag(tag_codemap_filemap); + filemap.encode(rbml_w); + rbml_w.end_tag(); + } + + rbml_w.end_tag(); +} + /// Serialize the text of the exported macros fn encode_macro_defs(rbml_w: &mut Encoder, krate: &ast::Crate) { @@ -1968,6 +1990,7 @@ fn encode_metadata_inner(wr: &mut SeekableMemWriter, lang_item_bytes: u64, native_lib_bytes: u64, plugin_registrar_fn_bytes: u64, + codemap_bytes: u64, macro_defs_bytes: u64, impl_bytes: u64, misc_bytes: u64, @@ -1982,6 +2005,7 @@ fn encode_metadata_inner(wr: &mut SeekableMemWriter, lang_item_bytes: 0, native_lib_bytes: 0, plugin_registrar_fn_bytes: 0, + codemap_bytes: 0, macro_defs_bytes: 0, impl_bytes: 0, misc_bytes: 0, @@ -2047,6 +2071,11 @@ fn encode_metadata_inner(wr: &mut SeekableMemWriter, encode_plugin_registrar_fn(&ecx, &mut rbml_w); stats.plugin_registrar_fn_bytes = rbml_w.writer.tell().unwrap() - i; + // Encode codemap + i = rbml_w.writer.tell().unwrap(); + encode_codemap(&ecx, &mut rbml_w); + stats.codemap_bytes = rbml_w.writer.tell().unwrap() - i; + // Encode macro definitions i = rbml_w.writer.tell().unwrap(); encode_macro_defs(&mut rbml_w, krate); @@ -2091,6 +2120,7 @@ fn encode_metadata_inner(wr: &mut SeekableMemWriter, println!(" lang item bytes: {}", stats.lang_item_bytes); println!(" native bytes: {}", stats.native_lib_bytes); println!("plugin registrar bytes: {}", stats.plugin_registrar_fn_bytes); + println!(" codemap bytes: {}", stats.codemap_bytes); println!(" macro def bytes: {}", stats.macro_defs_bytes); println!(" impl bytes: {}", stats.impl_bytes); println!(" misc bytes: {}", stats.misc_bytes); diff --git a/src/librustc/middle/astencode.rs b/src/librustc/middle/astencode.rs index 33c0fb8b031..9d712c7c0fc 100644 --- a/src/librustc/middle/astencode.rs +++ b/src/librustc/middle/astencode.rs @@ -42,6 +42,7 @@ use syntax; use std::old_io::Seek; use std::num::FromPrimitive; use std::rc::Rc; +use std::cell::Cell; use rbml::reader; use rbml::writer::Encoder; @@ -58,7 +59,9 @@ struct DecodeContext<'a, 'b, 'tcx: 'a> { tcx: &'a ty::ctxt<'tcx>, cdata: &'b cstore::crate_metadata, from_id_range: ast_util::IdRange, - to_id_range: ast_util::IdRange + to_id_range: ast_util::IdRange, + // Cache the last used filemap for translating spans as an optimization. + last_filemap_index: Cell, } trait tr { @@ -120,6 +123,8 @@ impl<'a, 'b, 'c, 'tcx> ast_map::FoldOps for &'a DecodeContext<'b, 'c, 'tcx> { } } +/// Decodes an item from its AST in the cdata's metadata and adds it to the +/// ast-map. pub fn decode_inlined_item<'tcx>(cdata: &cstore::crate_metadata, tcx: &ty::ctxt<'tcx>, path: Vec, @@ -143,7 +148,8 @@ pub fn decode_inlined_item<'tcx>(cdata: &cstore::crate_metadata, cdata: cdata, tcx: tcx, from_id_range: from_id_range, - to_id_range: to_id_range + to_id_range: to_id_range, + last_filemap_index: Cell::new(0) }; let raw_ii = decode_ast(ast_doc); let ii = ast_map::map_decoded_item(&dcx.tcx.map, path, raw_ii, dcx); @@ -234,8 +240,47 @@ impl<'a, 'b, 'tcx> DecodeContext<'a, 'b, 'tcx> { assert_eq!(did.krate, ast::LOCAL_CRATE); ast::DefId { krate: ast::LOCAL_CRATE, node: self.tr_id(did.node) } } - pub fn tr_span(&self, _span: Span) -> Span { - codemap::DUMMY_SP // FIXME (#1972): handle span properly + + /// Translates a `Span` from an extern crate to the corresponding `Span` + /// within the local crate's codemap. `creader::import_codemap()` will + /// already have allocated any additionally needed FileMaps in the local + /// codemap as a side-effect of creating the crate_metadata's + /// `codemap_import_info`. + pub fn tr_span(&self, span: Span) -> Span { + let imported_filemaps = &self.cdata.codemap_import_info[..]; + + let filemap_index = { + // Optimize for the case that most spans within a translated item + // originate from the same filemap. + let last_filemap_index = self.last_filemap_index.get(); + + if span.lo >= imported_filemaps[last_filemap_index].original_start_pos && + span.hi <= imported_filemaps[last_filemap_index].original_end_pos { + last_filemap_index + } else { + let mut a = 0; + let mut b = imported_filemaps.len(); + + while b - a > 1 { + let m = (a + b) / 2; + if imported_filemaps[m].original_start_pos > span.lo { + b = m; + } else { + a = m; + } + } + + self.last_filemap_index.set(a); + a + } + }; + + let lo = (span.lo - imported_filemaps[filemap_index].original_start_pos) + + imported_filemaps[filemap_index].translated_filemap.start_pos; + let hi = (span.hi - imported_filemaps[filemap_index].original_start_pos) + + imported_filemaps[filemap_index].translated_filemap.start_pos; + + codemap::mk_sp(lo, hi) } } diff --git a/src/librustc_driver/pretty.rs b/src/librustc_driver/pretty.rs index 22473099baf..f433e67d878 100644 --- a/src/librustc_driver/pretty.rs +++ b/src/librustc_driver/pretty.rs @@ -542,7 +542,11 @@ pub fn pretty_print_input(sess: Session, let src_name = driver::source_name(input); let src = sess.codemap().get_filemap(&src_name[..]) - .src.as_bytes().to_vec(); + .src + .as_ref() + .unwrap() + .as_bytes() + .to_vec(); let mut rdr = MemReader::new(src); let out = match ofile { diff --git a/src/libsyntax/codemap.rs b/src/libsyntax/codemap.rs index 099f6462942..162da2ac54c 100644 --- a/src/libsyntax/codemap.rs +++ b/src/libsyntax/codemap.rs @@ -29,6 +29,11 @@ use std::rc::Rc; use libc::c_uint; use serialize::{Encodable, Decodable, Encoder, Decoder}; + +// _____________________________________________________________________________ +// Pos, BytePos, CharPos +// + pub trait Pos { fn from_usize(n: usize) -> Self; fn to_usize(&self) -> usize; @@ -69,6 +74,18 @@ impl Sub for BytePos { } } +impl Encodable for BytePos { + fn encode(&self, s: &mut S) -> Result<(), S::Error> { + s.emit_u32(self.0) + } +} + +impl Decodable for BytePos { + fn decode(d: &mut D) -> Result { + Ok(BytePos(try!{ d.read_u32() })) + } +} + impl Pos for CharPos { fn from_usize(n: usize) -> CharPos { CharPos(n) } fn to_usize(&self) -> usize { let CharPos(n) = *self; n } @@ -90,6 +107,10 @@ impl Sub for CharPos { } } +// _____________________________________________________________________________ +// Span, Spanned +// + /// Spans represent a region of code, used for error reporting. Positions in spans /// are *absolute* positions from the beginning of the codemap, not positions /// relative to FileMaps. Methods on the CodeMap can be used to relate spans back @@ -126,15 +147,20 @@ impl PartialEq for Span { impl Eq for Span {} impl Encodable for Span { - /* Note #1972 -- spans are encoded but not decoded */ fn encode(&self, s: &mut S) -> Result<(), S::Error> { - s.emit_nil() + // Encode spans as a single u64 in order to cut down on tagging overhead + // added by the RBML metadata encoding. The should be solved differently + // altogether some time (FIXME #21482) + s.emit_u64( (self.lo.0 as u64) | ((self.hi.0 as u64) << 32) ) } } impl Decodable for Span { - fn decode(_d: &mut D) -> Result { - Ok(DUMMY_SP) + fn decode(d: &mut D) -> Result { + let lo_hi: u64 = try! { d.read_u64() }; + let lo = BytePos(lo_hi as u32); + let hi = BytePos((lo_hi >> 32) as u32); + Ok(mk_sp(lo, hi)) } } @@ -168,6 +194,10 @@ pub fn original_sp(cm: &CodeMap, sp: Span, enclosing_sp: Span) -> Span { } } +// _____________________________________________________________________________ +// Loc, LocWithOpt, FileMapAndLine, FileMapAndBytePos +// + /// A source code location used for error reporting pub struct Loc { /// Information about the original source @@ -192,6 +222,11 @@ pub struct LocWithOpt { pub struct FileMapAndLine { pub fm: Rc, pub line: usize } pub struct FileMapAndBytePos { pub fm: Rc, pub pos: BytePos } + +// _____________________________________________________________________________ +// MacroFormat, NameAndSpan, ExpnInfo, ExpnId +// + /// The syntax with which a macro was invoked. #[derive(Clone, Copy, Hash, Debug)] pub enum MacroFormat { @@ -254,6 +289,10 @@ impl ExpnId { } } +// _____________________________________________________________________________ +// FileMap, MultiByteChar, FileName, FileLines +// + pub type FileName = String; pub struct FileLines { @@ -262,7 +301,7 @@ pub struct FileLines { } /// Identifies an offset of a multi-byte character in a FileMap -#[derive(Copy)] +#[derive(Copy, RustcEncodable, RustcDecodable, Eq, PartialEq)] pub struct MultiByteChar { /// The absolute offset of the character in the CodeMap pub pos: BytePos, @@ -277,13 +316,134 @@ pub struct FileMap { /// e.g. `` pub name: FileName, /// The complete source code - pub src: String, + pub src: Option>, /// The start position of this source in the CodeMap pub start_pos: BytePos, + /// The end position of this source in the CodeMap + pub end_pos: BytePos, /// Locations of lines beginnings in the source code - pub lines: RefCell >, + pub lines: RefCell>, /// Locations of multi-byte characters in the source code - pub multibyte_chars: RefCell >, + pub multibyte_chars: RefCell>, +} + +impl Encodable for FileMap { + fn encode(&self, s: &mut S) -> Result<(), S::Error> { + s.emit_struct("FileMap", 5, |s| { + try! { s.emit_struct_field("name", 0, |s| self.name.encode(s)) }; + try! { s.emit_struct_field("start_pos", 1, |s| self.start_pos.encode(s)) }; + try! { s.emit_struct_field("end_pos", 2, |s| self.end_pos.encode(s)) }; + try! { s.emit_struct_field("lines", 3, |s| { + let lines = self.lines.borrow(); + // store the length + try! { s.emit_u32(lines.len() as u32) }; + + if lines.len() > 0 { + // In order to preserve some space, we exploit the fact that + // the lines list is sorted and individual lines are + // probably not that long. Because of that we can store lines + // as a difference list, using as little space as possible + // for the differences. + let max_line_length = if lines.len() == 1 { + 0 + } else { + lines.as_slice() + .windows(2) + .map(|w| w[1] - w[0]) + .map(|bp| bp.to_usize()) + .max() + .unwrap() + }; + + let bytes_per_diff: u8 = match max_line_length { + 0 ... 0xFF => 1, + 0x100 ... 0xFFFF => 2, + _ => 4 + }; + + // Encode the number of bytes used per diff. + try! { bytes_per_diff.encode(s) }; + + // Encode the first element. + try! { lines[0].encode(s) }; + + let diff_iter = (&lines[..]).windows(2) + .map(|w| (w[1] - w[0])); + + match bytes_per_diff { + 1 => for diff in diff_iter { try! { (diff.0 as u8).encode(s) } }, + 2 => for diff in diff_iter { try! { (diff.0 as u16).encode(s) } }, + 4 => for diff in diff_iter { try! { (diff.0 as u32).encode(s) } }, + _ => unreachable!() + } + } + + Ok(()) + }) + }; + s.emit_struct_field("multibyte_chars", 4, |s| { + (*self.multibyte_chars.borrow()).encode(s) + }) + }) + } +} + +impl Decodable for FileMap { + fn decode(d: &mut D) -> Result { + + d.read_struct("FileMap", 5, |d| { + let name: String = try! { + d.read_struct_field("name", 0, |d| Decodable::decode(d)) + }; + let start_pos: BytePos = try! { + d.read_struct_field("start_pos", 1, |d| Decodable::decode(d)) + }; + let end_pos: BytePos = try! { + d.read_struct_field("end_pos", 2, |d| Decodable::decode(d)) + }; + let lines: Vec = try! { + d.read_struct_field("lines", 3, |d| { + let num_lines: u32 = try! { Decodable::decode(d) }; + let mut lines = Vec::with_capacity(num_lines as usize); + + if num_lines > 0 { + // Read the number of bytes used per diff. + let bytes_per_diff: u8 = try! { Decodable::decode(d) }; + + // Read the first element. + let mut line_start: BytePos = try! { Decodable::decode(d) }; + lines.push(line_start); + + for _ in 1..num_lines { + let diff = match bytes_per_diff { + 1 => try! { d.read_u8() } as u32, + 2 => try! { d.read_u16() } as u32, + 4 => try! { d.read_u32() }, + _ => unreachable!() + }; + + line_start = line_start + BytePos(diff); + + lines.push(line_start); + } + } + + Ok(lines) + }) + }; + let multibyte_chars: Vec = try! { + d.read_struct_field("multibyte_chars", 4, |d| Decodable::decode(d)) + }; + Ok(FileMap { + name: name, + start_pos: start_pos, + end_pos: end_pos, + src: None, + lines: RefCell::new(lines), + multibyte_chars: RefCell::new(multibyte_chars) + }) + }) + } } impl FileMap { @@ -307,16 +467,21 @@ impl FileMap { /// get a line from the list of pre-computed line-beginnings /// pub fn get_line(&self, line_number: usize) -> Option { - let lines = self.lines.borrow(); - lines.get(line_number).map(|&line| { - let begin: BytePos = line - self.start_pos; - let begin = begin.to_usize(); - let slice = &self.src[begin..]; - match slice.find('\n') { - Some(e) => &slice[..e], - None => slice - }.to_string() - }) + match self.src { + Some(ref src) => { + let lines = self.lines.borrow(); + lines.get(line_number).map(|&line| { + let begin: BytePos = line - self.start_pos; + let begin = begin.to_usize(); + let slice = &src[begin..]; + match slice.find('\n') { + Some(e) => &slice[..e], + None => slice + }.to_string() + }) + } + None => None + } } pub fn record_multibyte_char(&self, pos: BytePos, bytes: usize) { @@ -332,8 +497,17 @@ impl FileMap { !(self.name.starts_with("<") && self.name.ends_with(">")) } + + pub fn is_imported(&self) -> bool { + self.src.is_none() + } } + +// _____________________________________________________________________________ +// CodeMap +// + pub struct CodeMap { pub files: RefCell>>, expansions: RefCell> @@ -351,7 +525,7 @@ impl CodeMap { let mut files = self.files.borrow_mut(); let start_pos = match files.last() { None => 0, - Some(last) => last.start_pos.to_usize() + last.src.len(), + Some(last) => last.end_pos.to_usize(), }; // Remove utf-8 BOM if any. @@ -372,10 +546,13 @@ impl CodeMap { src.push('\n'); } + let end_pos = start_pos + src.len(); + let filemap = Rc::new(FileMap { name: filename, - src: src.to_string(), + src: Some(Rc::new(src)), start_pos: Pos::from_usize(start_pos), + end_pos: Pos::from_usize(end_pos), lines: RefCell::new(Vec::new()), multibyte_chars: RefCell::new(Vec::new()), }); @@ -385,6 +562,45 @@ impl CodeMap { filemap } + /// Allocates a new FileMap representing a source file from an external + /// crate. The source code of such an "imported filemap" is not available, + /// but we still know enough to generate accurate debuginfo location + /// information for things inlined from other crates. + pub fn new_imported_filemap(&self, + filename: FileName, + source_len: usize, + file_local_lines: Vec, + file_local_multibyte_chars: Vec) + -> Rc { + let mut files = self.files.borrow_mut(); + let start_pos = match files.last() { + None => 0, + Some(last) => last.end_pos.to_usize(), + }; + + let end_pos = Pos::from_usize(start_pos + source_len); + let start_pos = Pos::from_usize(start_pos); + + let lines = file_local_lines.map_in_place(|pos| pos + start_pos); + let multibyte_chars = file_local_multibyte_chars.map_in_place(|mbc| MultiByteChar { + pos: mbc.pos + start_pos, + bytes: mbc.bytes + }); + + let filemap = Rc::new(FileMap { + name: filename, + src: None, + start_pos: start_pos, + end_pos: end_pos, + lines: RefCell::new(lines), + multibyte_chars: RefCell::new(multibyte_chars), + }); + + files.push(filemap.clone()); + + filemap + } + pub fn mk_substr_filename(&self, sp: Span) -> String { let pos = self.lookup_char_pos(sp.lo); (format!("<{}:{}:{}>", @@ -442,30 +658,42 @@ impl CodeMap { return Err(SpanSnippetError::IllFormedSpan(sp)); } - let begin = self.lookup_byte_offset(sp.lo); - let end = self.lookup_byte_offset(sp.hi); + let local_begin = self.lookup_byte_offset(sp.lo); + let local_end = self.lookup_byte_offset(sp.hi); - if begin.fm.start_pos != end.fm.start_pos { + if local_begin.fm.start_pos != local_end.fm.start_pos { return Err(SpanSnippetError::DistinctSources(DistinctSources { - begin: (begin.fm.name.clone(), - begin.fm.start_pos), - end: (end.fm.name.clone(), - end.fm.start_pos) + begin: (local_begin.fm.name.clone(), + local_begin.fm.start_pos), + end: (local_end.fm.name.clone(), + local_end.fm.start_pos) })); } else { - let start = begin.pos.to_usize(); - let limit = end.pos.to_usize(); - if start > limit || limit > begin.fm.src.len() { - return Err(SpanSnippetError::MalformedForCodemap( - MalformedCodemapPositions { - name: begin.fm.name.clone(), - source_len: begin.fm.src.len(), - begin_pos: begin.pos, - end_pos: end.pos, - })); - } + match local_begin.fm.src { + Some(ref src) => { + let start_index = local_begin.pos.to_usize(); + let end_index = local_end.pos.to_usize(); + let source_len = (local_begin.fm.end_pos - + local_begin.fm.start_pos).to_usize(); - return Ok((&begin.fm.src[start..limit]).to_string()) + if start_index > end_index || end_index > source_len { + return Err(SpanSnippetError::MalformedForCodemap( + MalformedCodemapPositions { + name: local_begin.fm.name.clone(), + source_len: source_len, + begin_pos: local_begin.pos, + end_pos: local_end.pos, + })); + } + + return Ok((&src[start_index..end_index]).to_string()) + } + None => { + return Err(SpanSnippetError::SourceNotAvailable { + filename: local_begin.fm.name.clone() + }); + } + } } } @@ -478,6 +706,7 @@ impl CodeMap { panic!("asking for {} which we don't know about", filename); } + /// For a global BytePos compute the local offset within the containing FileMap pub fn lookup_byte_offset(&self, bpos: BytePos) -> FileMapAndBytePos { let idx = self.lookup_filemap_idx(bpos); let fm = (*self.files.borrow())[idx].clone(); @@ -639,11 +868,16 @@ impl CodeMap { } } +// _____________________________________________________________________________ +// SpanSnippetError, DistinctSources, MalformedCodemapPositions +// + #[derive(Clone, PartialEq, Eq, Debug)] pub enum SpanSnippetError { IllFormedSpan(Span), DistinctSources(DistinctSources), MalformedForCodemap(MalformedCodemapPositions), + SourceNotAvailable { filename: String } } #[derive(Clone, PartialEq, Eq, Debug)] @@ -660,6 +894,11 @@ pub struct MalformedCodemapPositions { end_pos: BytePos } + +// _____________________________________________________________________________ +// Tests +// + #[cfg(test)] mod test { use super::*; diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index bbe1ddfd4cf..8d3e93d35dd 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -76,6 +76,10 @@ pub struct StringReader<'a> { // are revised to go directly to token-trees. /// Is \x00,\x00 is interpreted as encoded ast::Ident? read_embedded_ident: bool, + + // cache a direct reference to the source text, so that we don't have to + // retrieve it via `self.filemap.src.as_ref().unwrap()` all the time. + source_text: Rc } impl<'a> Reader for StringReader<'a> { @@ -141,7 +145,14 @@ pub fn make_reader_with_embedded_idents<'b>(span_diagnostic: &'b SpanHandler, impl<'a> StringReader<'a> { /// For comments.rs, which hackily pokes into pos and curr pub fn new_raw<'b>(span_diagnostic: &'b SpanHandler, - filemap: Rc) -> StringReader<'b> { + filemap: Rc) -> StringReader<'b> { + if filemap.src.is_none() { + span_diagnostic.handler.bug(&format!("Cannot lex filemap without source: {}", + filemap.name)[..]); + } + + let source_text = (*filemap.src.as_ref().unwrap()).clone(); + let mut sr = StringReader { span_diagnostic: span_diagnostic, pos: filemap.start_pos, @@ -153,6 +164,7 @@ impl<'a> StringReader<'a> { peek_tok: token::Eof, peek_span: codemap::DUMMY_SP, read_embedded_ident: false, + source_text: source_text }; sr.bump(); sr @@ -213,7 +225,7 @@ impl<'a> StringReader<'a> { m.push_str(": "); let from = self.byte_offset(from_pos).to_usize(); let to = self.byte_offset(to_pos).to_usize(); - m.push_str(&self.filemap.src[from..to]); + m.push_str(&self.source_text[from..to]); self.fatal_span_(from_pos, to_pos, &m[..]); } @@ -270,9 +282,8 @@ impl<'a> StringReader<'a> { fn with_str_from_to(&self, start: BytePos, end: BytePos, f: F) -> T where F: FnOnce(&str) -> T, { - f(&self.filemap.src[ - self.byte_offset(start).to_usize().. - self.byte_offset(end).to_usize()]) + f(&self.source_text[self.byte_offset(start).to_usize().. + self.byte_offset(end).to_usize()]) } /// Converts CRLF to LF in the given string, raising an error on bare CR. @@ -321,12 +332,10 @@ impl<'a> StringReader<'a> { pub fn bump(&mut self) { self.last_pos = self.pos; let current_byte_offset = self.byte_offset(self.pos).to_usize(); - if current_byte_offset < self.filemap.src.len() { + if current_byte_offset < self.source_text.len() { assert!(self.curr.is_some()); let last_char = self.curr.unwrap(); - let next = self.filemap - .src - .char_range_at(current_byte_offset); + let next = self.source_text.char_range_at(current_byte_offset); let byte_offset_diff = next.next - current_byte_offset; self.pos = self.pos + Pos::from_usize(byte_offset_diff); self.curr = Some(next.ch); @@ -346,8 +355,8 @@ impl<'a> StringReader<'a> { pub fn nextch(&self) -> Option { let offset = self.byte_offset(self.pos).to_usize(); - if offset < self.filemap.src.len() { - Some(self.filemap.src.char_at(offset)) + if offset < self.source_text.len() { + Some(self.source_text.char_at(offset)) } else { None } @@ -359,7 +368,7 @@ impl<'a> StringReader<'a> { pub fn nextnextch(&self) -> Option { let offset = self.byte_offset(self.pos).to_usize(); - let s = &*self.filemap.src; + let s = &self.source_text[..]; if offset >= s.len() { return None } let str::CharRange { next, .. } = s.char_range_at(offset); if next < s.len() { diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index 4d099529cb4..66589d5e3d1 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -751,6 +751,7 @@ pub fn integer_lit(s: &str, suffix: Option<&str>, sd: &SpanHandler, sp: Span) -> #[cfg(test)] mod test { use super::*; + use std::rc::Rc; use serialize::json; use codemap::{Span, BytePos, Pos, Spanned, NO_EXPANSION}; use owned_slice::OwnedSlice; @@ -855,117 +856,50 @@ mod test { } #[test] - fn string_to_tts_1 () { + fn string_to_tts_1() { let tts = string_to_tts("fn a (b : i32) { b; }".to_string()); - assert_eq!(json::encode(&tts).unwrap(), - "[\ - {\ - \"variant\":\"TtToken\",\ - \"fields\":[\ - null,\ - {\ - \"variant\":\"Ident\",\ - \"fields\":[\ - \"fn\",\ - \"Plain\"\ - ]\ - }\ - ]\ - },\ - {\ - \"variant\":\"TtToken\",\ - \"fields\":[\ - null,\ - {\ - \"variant\":\"Ident\",\ - \"fields\":[\ - \"a\",\ - \"Plain\"\ - ]\ - }\ - ]\ - },\ - {\ - \"variant\":\"TtDelimited\",\ - \"fields\":[\ - null,\ - {\ - \"delim\":\"Paren\",\ - \"open_span\":null,\ - \"tts\":[\ - {\ - \"variant\":\"TtToken\",\ - \"fields\":[\ - null,\ - {\ - \"variant\":\"Ident\",\ - \"fields\":[\ - \"b\",\ - \"Plain\"\ - ]\ - }\ - ]\ - },\ - {\ - \"variant\":\"TtToken\",\ - \"fields\":[\ - null,\ - \"Colon\"\ - ]\ - },\ - {\ - \"variant\":\"TtToken\",\ - \"fields\":[\ - null,\ - {\ - \"variant\":\"Ident\",\ - \"fields\":[\ - \"i32\",\ - \"Plain\"\ - ]\ - }\ - ]\ - }\ - ],\ - \"close_span\":null\ - }\ - ]\ - },\ - {\ - \"variant\":\"TtDelimited\",\ - \"fields\":[\ - null,\ - {\ - \"delim\":\"Brace\",\ - \"open_span\":null,\ - \"tts\":[\ - {\ - \"variant\":\"TtToken\",\ - \"fields\":[\ - null,\ - {\ - \"variant\":\"Ident\",\ - \"fields\":[\ - \"b\",\ - \"Plain\"\ - ]\ - }\ - ]\ - },\ - {\ - \"variant\":\"TtToken\",\ - \"fields\":[\ - null,\ - \"Semi\"\ - ]\ - }\ - ],\ - \"close_span\":null\ - }\ - ]\ - }\ -]" - ); + + let expected = vec![ + ast::TtToken(sp(0, 2), + token::Ident(str_to_ident("fn"), + token::IdentStyle::Plain)), + ast::TtToken(sp(3, 4), + token::Ident(str_to_ident("a"), + token::IdentStyle::Plain)), + ast::TtDelimited( + sp(5, 14), + Rc::new(ast::Delimited { + delim: token::DelimToken::Paren, + open_span: sp(5, 6), + tts: vec![ + ast::TtToken(sp(6, 7), + token::Ident(str_to_ident("b"), + token::IdentStyle::Plain)), + ast::TtToken(sp(8, 9), + token::Colon), + ast::TtToken(sp(10, 13), + token::Ident(str_to_ident("i32"), + token::IdentStyle::Plain)), + ], + close_span: sp(13, 14), + })), + ast::TtDelimited( + sp(15, 21), + Rc::new(ast::Delimited { + delim: token::DelimToken::Brace, + open_span: sp(15, 16), + tts: vec![ + ast::TtToken(sp(17, 18), + token::Ident(str_to_ident("b"), + token::IdentStyle::Plain)), + ast::TtToken(sp(18, 19), + token::Semi) + ], + close_span: sp(20, 21), + })) + ]; + + assert_eq!(tts, expected); } #[test] fn ret_expr() { diff --git a/src/test/auxiliary/cross_crate_spans.rs b/src/test/auxiliary/cross_crate_spans.rs new file mode 100644 index 00000000000..22c206836ee --- /dev/null +++ b/src/test/auxiliary/cross_crate_spans.rs @@ -0,0 +1,26 @@ +// Copyright 2013-2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![crate_type = "rlib"] +#![omit_gdb_pretty_printer_section] + +// no-prefer-dynamic +// compile-flags:-g + +pub fn generic_function(val: T) -> (T, T) { + let result = (val.clone(), val.clone()); + let a_variable: u32 = 123456789; + let another_variable: f64 = 123456789.5; + zzz(); + result +} + +#[inline(never)] +fn zzz() {()} \ No newline at end of file diff --git a/src/test/debuginfo/cross-crate-spans.rs b/src/test/debuginfo/cross-crate-spans.rs new file mode 100644 index 00000000000..3aef9438a33 --- /dev/null +++ b/src/test/debuginfo/cross-crate-spans.rs @@ -0,0 +1,74 @@ +// Copyright 2013-2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![omit_gdb_pretty_printer_section] + +// ignore-android: FIXME(#10381) +// min-lldb-version: 310 + +// aux-build:cross_crate_spans.rs +extern crate cross_crate_spans; + +// compile-flags:-g + + +// === GDB TESTS =================================================================================== + +// gdb-command:break cross_crate_spans.rs:21 +// gdb-command:run + +// gdb-command:print result +// gdb-check:$1 = {17, 17} +// gdb-command:print a_variable +// gdb-check:$2 = 123456789 +// gdb-command:print another_variable +// gdb-check:$3 = 123456789.5 +// gdb-command:continue + +// gdb-command:print result +// gdb-check:$4 = {1212, 1212} +// gdb-command:print a_variable +// gdb-check:$5 = 123456789 +// gdb-command:print another_variable +// gdb-check:$6 = 123456789.5 +// gdb-command:continue + + + +// === LLDB TESTS ================================================================================== + +// lldb-command:b cross_crate_spans.rs:21 +// lldb-command:run + +// lldb-command:print result +// lldb-check:[...]$0 = (17, 17) +// lldb-command:print a_variable +// lldb-check:[...]$1 = 123456789 +// lldb-command:print another_variable +// lldb-check:[...]$2 = 123456789.5 +// lldb-command:continue + +// lldb-command:print result +// lldb-check:[...]$3 = (1212, 1212) +// lldb-command:print a_variable +// lldb-check:[...]$4 = 123456789 +// lldb-command:print another_variable +// lldb-check:[...]$5 = 123456789.5 +// lldb-command:continue + + +// This test makes sure that we can break in functions inlined from other crates. + +fn main() { + + let _ = cross_crate_spans::generic_function(17u32); + let _ = cross_crate_spans::generic_function(1212i16); + +}