rust/src/librustc_metadata/cstore.rs

304 lines
11 KiB
Rust
Raw Normal View History

// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#![allow(non_camel_case_types)]
2011-07-08 03:39:44 +02:00
// The crate store - a central repo for information collected about external
// crates and libraries
pub use self::MetadataBlob::*;
use creader;
use decoder;
use index;
use loader;
use rustc::back::svh::Svh;
use rustc::middle::cstore::{ExternCrate};
use rustc::util::nodemap::{FnvHashMap, NodeMap, NodeSet};
use std::cell::{RefCell, Ref, Cell};
2014-03-27 18:28:38 +01:00
use std::rc::Rc;
use std::path::PathBuf;
use flate::Bytes;
2013-03-26 21:38:07 +01:00
use syntax::ast;
use syntax::attr;
use syntax::codemap;
use syntax::parse::token::IdentInterner;
pub use middle::cstore::{NativeLibraryKind, LinkagePreference};
pub use middle::cstore::{NativeStatic, NativeFramework, NativeUnknown};
pub use middle::cstore::{CrateSource, LinkMeta};
// A map from external crate numbers (as decoded from some crate file) to
// local crate numbers (as generated during this session). Each external
// crate may refer to types in other external crates, and each has their
// own crate numbers.
pub type cnum_map = FnvHashMap<ast::CrateNum, ast::CrateNum>;
pub enum MetadataBlob {
MetadataVec(Bytes),
rustc: Optimize reading metadata by 4x We were previously reading metadata via `ar p`, but as learned from rustdoc awhile back, spawning a process to do something is pretty slow. Turns out LLVM has an Archive class to read archives, but it cannot write archives. This commits adds bindings to the read-only version of the LLVM archive class (with a new type that only has a read() method), and then it uses this class when reading the metadata out of rlibs. When you put this in tandem of not compressing the metadata, reading the metadata is 4x faster than it used to be The timings I got for reading metadata from the respective libraries was: libstd-04ff901e-0.9-pre.dylib => 100ms libstd-04ff901e-0.9-pre.rlib => 23ms librustuv-7945354c-0.9-pre.dylib => 4ms librustuv-7945354c-0.9-pre.rlib => 1ms librustc-5b94a16f-0.9-pre.dylib => 87ms librustc-5b94a16f-0.9-pre.rlib => 35ms libextra-a6ebb16f-0.9-pre.dylib => 63ms libextra-a6ebb16f-0.9-pre.rlib => 15ms libsyntax-2e4c0458-0.9-pre.dylib => 86ms libsyntax-2e4c0458-0.9-pre.rlib => 22ms In order to always take advantage of these faster metadata read-times, I sort the files in filesearch based on whether they have an rlib extension or not (prefer all rlib files first). Overall, this halved the compile time for a `fn main() {}` crate from 0.185s to 0.095s on my system (when preferring dynamic linking). Reading metadata is still the slowest pass of the compiler at 0.035s, but it's getting pretty close to linking at 0.021s! The next best optimization is to just not copy the metadata from LLVM because that's the most expensive part of reading metadata right now.
2013-12-17 05:58:21 +01:00
MetadataArchive(loader::ArchiveMetadata),
}
/// Holds information about a codemap::FileMap imported from another crate.
/// See creader::import_codemap() for more information.
pub struct ImportedFileMap {
/// This FileMap's byte-offset within the codemap of its original crate
pub original_start_pos: codemap::BytePos,
/// The end of this FileMap within the codemap of its original crate
pub original_end_pos: codemap::BytePos,
/// The imported FileMap's representation within the local codemap
pub translated_filemap: Rc<codemap::FileMap>
}
pub struct crate_metadata {
pub name: String,
/// Information about the extern crate that caused this crate to
/// be loaded. If this is `None`, then the crate was injected
/// (e.g., by the allocator)
pub extern_crate: Cell<Option<ExternCrate>>,
pub data: MetadataBlob,
pub cnum_map: RefCell<cnum_map>,
pub cnum: ast::CrateNum,
pub codemap_import_info: RefCell<Vec<ImportedFileMap>>,
pub staged_api: bool,
pub index: index::Index,
pub xref_index: index::DenseIndex,
/// Flag if this crate is required by an rlib version of this crate, or in
/// other words whether it was explicitly linked to. An example of a crate
/// where this is false is when an allocator crate is injected into the
/// dependency list, and therefore isn't actually needed to link an rlib.
pub explicitly_linked: Cell<bool>,
}
pub struct CStore {
metas: RefCell<FnvHashMap<ast::CrateNum, Rc<crate_metadata>>>,
/// Map from NodeId's of local extern crate statements to crate numbers
extern_mod_crate_map: RefCell<NodeMap<ast::CrateNum>>,
used_crate_sources: RefCell<Vec<CrateSource>>,
used_libraries: RefCell<Vec<(String, NativeLibraryKind)>>,
used_link_args: RefCell<Vec<String>>,
statically_included_foreign_items: RefCell<NodeSet>,
pub intr: Rc<IdentInterner>,
}
2013-12-25 21:08:04 +01:00
impl CStore {
2014-03-27 18:28:38 +01:00
pub fn new(intr: Rc<IdentInterner>) -> CStore {
2013-12-25 21:08:04 +01:00
CStore {
metas: RefCell::new(FnvHashMap()),
extern_mod_crate_map: RefCell::new(FnvHashMap()),
used_crate_sources: RefCell::new(Vec::new()),
used_libraries: RefCell::new(Vec::new()),
used_link_args: RefCell::new(Vec::new()),
intr: intr,
statically_included_foreign_items: RefCell::new(NodeSet()),
2013-12-25 21:08:04 +01:00
}
}
pub fn next_crate_num(&self) -> ast::CrateNum {
self.metas.borrow().len() as ast::CrateNum + 1
}
2014-04-17 14:06:25 +02:00
pub fn get_crate_data(&self, cnum: ast::CrateNum) -> Rc<crate_metadata> {
self.metas.borrow().get(&cnum).unwrap().clone()
2013-12-25 21:08:04 +01:00
}
pub fn get_crate_hash(&self, cnum: ast::CrateNum) -> Svh {
2013-12-25 21:08:04 +01:00
let cdata = self.get_crate_data(cnum);
decoder::get_crate_hash(cdata.data())
}
2014-04-17 14:06:25 +02:00
pub fn set_crate_data(&self, cnum: ast::CrateNum, data: Rc<crate_metadata>) {
2014-03-21 03:49:20 +01:00
self.metas.borrow_mut().insert(cnum, data);
2013-12-25 21:08:04 +01:00
}
2014-12-09 02:26:43 +01:00
pub fn iter_crate_data<I>(&self, mut i: I) where
I: FnMut(ast::CrateNum, &Rc<crate_metadata>),
2014-12-09 02:26:43 +01:00
{
for (&k, v) in self.metas.borrow().iter() {
i(k, v);
2013-12-25 21:08:04 +01:00
}
}
/// Like `iter_crate_data`, but passes source paths (if available) as well.
2014-12-09 02:26:43 +01:00
pub fn iter_crate_data_origins<I>(&self, mut i: I) where
I: FnMut(ast::CrateNum, &crate_metadata, Option<CrateSource>),
{
for (&k, v) in self.metas.borrow().iter() {
2015-11-25 16:02:59 +01:00
let origin = self.opt_used_crate_source(k);
origin.as_ref().map(|cs| { assert!(k == cs.cnum); });
2016-02-09 21:37:21 +01:00
i(k, &v, origin);
}
}
2013-12-21 05:00:58 +01:00
pub fn add_used_crate_source(&self, src: CrateSource) {
let mut used_crate_sources = self.used_crate_sources.borrow_mut();
2014-03-21 03:49:20 +01:00
if !used_crate_sources.contains(&src) {
used_crate_sources.push(src);
2013-12-25 21:08:04 +01:00
}
}
2015-11-25 16:02:59 +01:00
pub fn opt_used_crate_source(&self, cnum: ast::CrateNum)
-> Option<CrateSource> {
2014-03-21 03:49:20 +01:00
self.used_crate_sources.borrow_mut()
.iter().find(|source| source.cnum == cnum).cloned()
2013-12-25 19:10:33 +01:00
}
pub fn reset(&self) {
self.metas.borrow_mut().clear();
self.extern_mod_crate_map.borrow_mut().clear();
self.used_crate_sources.borrow_mut().clear();
self.used_libraries.borrow_mut().clear();
self.used_link_args.borrow_mut().clear();
self.statically_included_foreign_items.borrow_mut().clear();
2013-12-25 19:10:33 +01:00
}
// This method is used when generating the command line to pass through to
// system linker. The linker expects undefined symbols on the left of the
// command line to be defined in libraries on the right, not the other way
// around. For more info, see some comments in the add_used_library function
// below.
//
// In order to get this left-to-right dependency ordering, we perform a
// topological sort of all crates putting the leaves at the right-most
// positions.
2015-11-21 00:08:09 +01:00
pub fn do_get_used_crates(&self, prefer: LinkagePreference)
-> Vec<(ast::CrateNum, Option<PathBuf>)> {
let mut ordering = Vec::new();
fn visit(cstore: &CStore, cnum: ast::CrateNum,
ordering: &mut Vec<ast::CrateNum>) {
if ordering.contains(&cnum) { return }
let meta = cstore.get_crate_data(cnum);
for (_, &dep) in meta.cnum_map.borrow().iter() {
visit(cstore, dep, ordering);
}
ordering.push(cnum);
2015-10-21 18:20:46 +02:00
}
for (&num, _) in self.metas.borrow().iter() {
visit(self, num, &mut ordering);
}
info!("topological ordering: {:?}", ordering);
ordering.reverse();
2014-03-21 03:49:20 +01:00
let mut libs = self.used_crate_sources.borrow()
.iter()
2013-12-25 21:08:04 +01:00
.map(|src| (src.cnum, match prefer {
LinkagePreference::RequireDynamic => src.dylib.clone().map(|p| p.0),
LinkagePreference::RequireStatic => src.rlib.clone().map(|p| p.0),
2013-12-25 21:08:04 +01:00
}))
rustc: Fix a leak in dependency= paths With the addition of separate search paths to the compiler, it was intended that applications such as Cargo could require a `--extern` flag per `extern crate` directive in the source. The system can currently be subverted, however, due to the `existing_match()` logic in the crate loader. When loading crates we first attempt to match an `extern crate` directive against all previously loaded crates to avoid reading metadata twice. This "hit the cache if possible" step was erroneously leaking crates across the search path boundaries, however. For example: extern crate b; extern crate a; If `b` depends on `a`, then it will load crate `a` when the `extern crate b` directive is being processed. When the compiler reaches `extern crate a` it will use the previously loaded version no matter what. If the compiler was not invoked with `-L crate=path/to/a`, it will still succeed. This behavior is allowing `extern crate` declarations in Cargo without a corresponding declaration in the manifest of a dependency, which is considered a bug. This commit fixes this problem by keeping track of the origin search path for a crate. Crates loaded from the dependency search path are not candidates for crates which are loaded from the crate search path. As a result of this fix, this is a likely a breaking change for a number of Cargo packages. If the compiler starts informing that a crate can no longer be found, it likely means that the dependency was forgotten in your Cargo.toml. [breaking-change]
2015-01-06 17:46:07 +01:00
.collect::<Vec<_>>();
libs.sort_by(|&(a, _), &(b, _)| {
let a = ordering.iter().position(|x| *x == a);
let b = ordering.iter().position(|x| *x == b);
a.cmp(&b)
});
libs
2013-12-25 21:08:04 +01:00
}
pub fn add_used_library(&self, lib: String, kind: NativeLibraryKind) {
2013-12-25 21:08:04 +01:00
assert!(!lib.is_empty());
2014-03-21 03:49:20 +01:00
self.used_libraries.borrow_mut().push((lib, kind));
2013-12-25 21:08:04 +01:00
}
pub fn get_used_libraries<'a>(&'a self)
-> &'a RefCell<Vec<(String,
NativeLibraryKind)>> {
&self.used_libraries
2013-12-25 21:08:04 +01:00
}
2013-12-21 05:00:58 +01:00
pub fn add_used_link_args(&self, args: &str) {
for s in args.split(' ').filter(|s| !s.is_empty()) {
self.used_link_args.borrow_mut().push(s.to_string());
2013-12-25 21:08:04 +01:00
}
}
pub fn get_used_link_args<'a>(&'a self) -> &'a RefCell<Vec<String> > {
&self.used_link_args
2013-12-25 21:08:04 +01:00
}
2013-12-21 05:00:58 +01:00
pub fn add_extern_mod_stmt_cnum(&self,
2013-12-25 21:08:04 +01:00
emod_id: ast::NodeId,
cnum: ast::CrateNum) {
2014-03-21 03:49:20 +01:00
self.extern_mod_crate_map.borrow_mut().insert(emod_id, cnum);
}
pub fn add_statically_included_foreign_item(&self, id: ast::NodeId) {
self.statically_included_foreign_items.borrow_mut().insert(id);
}
pub fn do_is_statically_included_foreign_item(&self, id: ast::NodeId) -> bool {
self.statically_included_foreign_items.borrow().contains(&id)
}
2015-11-25 16:02:59 +01:00
pub fn do_extern_mod_stmt_cnum(&self, emod_id: ast::NodeId) -> Option<ast::CrateNum>
{
self.extern_mod_crate_map.borrow().get(&emod_id).cloned()
}
2013-07-02 21:47:32 +02:00
}
impl crate_metadata {
pub fn data<'a>(&'a self) -> &'a [u8] { self.data.as_slice() }
pub fn name(&self) -> &str { decoder::get_crate_name(self.data()) }
pub fn hash(&self) -> Svh { decoder::get_crate_hash(self.data()) }
pub fn disambiguator(&self) -> &str {
decoder::get_crate_disambiguator(self.data())
}
pub fn imported_filemaps<'a>(&'a self, codemap: &codemap::CodeMap)
-> Ref<'a, Vec<ImportedFileMap>> {
let filemaps = self.codemap_import_info.borrow();
if filemaps.is_empty() {
drop(filemaps);
let filemaps = creader::import_codemap(codemap, &self.data);
// This shouldn't borrow twice, but there is no way to downgrade RefMut to Ref.
*self.codemap_import_info.borrow_mut() = filemaps;
self.codemap_import_info.borrow()
} else {
filemaps
}
}
pub fn is_allocator(&self) -> bool {
let attrs = decoder::get_crate_attributes(self.data());
attr::contains_name(&attrs, "allocator")
}
pub fn needs_allocator(&self) -> bool {
let attrs = decoder::get_crate_attributes(self.data());
attr::contains_name(&attrs, "needs_allocator")
}
}
impl MetadataBlob {
pub fn as_slice<'a>(&'a self) -> &'a [u8] {
let slice = match *self {
MetadataVec(ref vec) => &vec[..],
rustc: Optimize reading metadata by 4x We were previously reading metadata via `ar p`, but as learned from rustdoc awhile back, spawning a process to do something is pretty slow. Turns out LLVM has an Archive class to read archives, but it cannot write archives. This commits adds bindings to the read-only version of the LLVM archive class (with a new type that only has a read() method), and then it uses this class when reading the metadata out of rlibs. When you put this in tandem of not compressing the metadata, reading the metadata is 4x faster than it used to be The timings I got for reading metadata from the respective libraries was: libstd-04ff901e-0.9-pre.dylib => 100ms libstd-04ff901e-0.9-pre.rlib => 23ms librustuv-7945354c-0.9-pre.dylib => 4ms librustuv-7945354c-0.9-pre.rlib => 1ms librustc-5b94a16f-0.9-pre.dylib => 87ms librustc-5b94a16f-0.9-pre.rlib => 35ms libextra-a6ebb16f-0.9-pre.dylib => 63ms libextra-a6ebb16f-0.9-pre.rlib => 15ms libsyntax-2e4c0458-0.9-pre.dylib => 86ms libsyntax-2e4c0458-0.9-pre.rlib => 22ms In order to always take advantage of these faster metadata read-times, I sort the files in filesearch based on whether they have an rlib extension or not (prefer all rlib files first). Overall, this halved the compile time for a `fn main() {}` crate from 0.185s to 0.095s on my system (when preferring dynamic linking). Reading metadata is still the slowest pass of the compiler at 0.035s, but it's getting pretty close to linking at 0.021s! The next best optimization is to just not copy the metadata from LLVM because that's the most expensive part of reading metadata right now.
2013-12-17 05:58:21 +01:00
MetadataArchive(ref ar) => ar.as_slice(),
};
if slice.len() < 4 {
&[] // corrupt metadata
} else {
let len = (((slice[0] as u32) << 24) |
((slice[1] as u32) << 16) |
((slice[2] as u32) << 8) |
((slice[3] as u32) << 0)) as usize;
if len + 4 <= slice.len() {
2015-01-18 01:15:52 +01:00
&slice[4.. len + 4]
} else {
&[] // corrupt or old metadata
}
}
}
}