rustc: Optimize reading metadata by 4x

We were previously reading metadata via `ar p`, but as learned from rustdoc
awhile back, spawning a process to do something is pretty slow. Turns out LLVM
has an Archive class to read archives, but it cannot write archives.

This commits adds bindings to the read-only version of the LLVM archive class
(with a new type that only has a read() method), and then it uses this class
when reading the metadata out of rlibs. When you put this in tandem of not
compressing the metadata, reading the metadata is 4x faster than it used to be
The timings I got for reading metadata from the respective libraries was:

    libstd-04ff901e-0.9-pre.dylib    => 100ms
    libstd-04ff901e-0.9-pre.rlib     => 23ms
    librustuv-7945354c-0.9-pre.dylib => 4ms
    librustuv-7945354c-0.9-pre.rlib  => 1ms
    librustc-5b94a16f-0.9-pre.dylib  => 87ms
    librustc-5b94a16f-0.9-pre.rlib   => 35ms
    libextra-a6ebb16f-0.9-pre.dylib  => 63ms
    libextra-a6ebb16f-0.9-pre.rlib   => 15ms
    libsyntax-2e4c0458-0.9-pre.dylib => 86ms
    libsyntax-2e4c0458-0.9-pre.rlib  => 22ms

In order to always take advantage of these faster metadata read-times, I sort
the files in filesearch based on whether they have an rlib extension or not
(prefer all rlib files first).

Overall, this halved the compile time for a `fn main() {}` crate from 0.185s to
0.095s on my system (when preferring dynamic linking). Reading metadata is still
the slowest pass of the compiler at 0.035s, but it's getting pretty close to
linking at 0.021s! The next best optimization is to just not copy the metadata
from LLVM because that's the most expensive part of reading metadata right now.
This commit is contained in:
Alex Crichton 2013-12-16 20:58:21 -08:00
parent 5c24bfa8c3
commit 64faafba19
8 changed files with 186 additions and 17 deletions

View File

@ -12,21 +12,29 @@
use driver::session::Session;
use metadata::filesearch;
use lib::llvm::{ArchiveRef, llvm};
use std::cast;
use std::io::fs;
use std::libc;
use std::os;
use std::run::{ProcessOptions, Process, ProcessOutput};
use std::str;
use std::unstable::raw;
use extra::tempfile::TempDir;
use syntax::abi;
pub static METADATA_FILENAME: &'static str = "metadata";
pub static METADATA_FILENAME: &'static str = "rust.metadata.bin";
pub struct Archive {
priv sess: Session,
priv dst: Path,
}
pub struct ArchiveRO {
priv ptr: ArchiveRef,
}
fn run_ar(sess: Session, args: &str, cwd: Option<&Path>,
paths: &[&Path]) -> ProcessOutput {
let ar = sess.opts.ar.clone().unwrap_or_else(|| ~"ar");
@ -193,3 +201,50 @@ impl Archive {
perhaps an -L flag is missing?", name));
}
}
impl ArchiveRO {
/// Opens a static archive for read-only purposes. This is more optimized
/// than the `open` method because it uses LLVM's internal `Archive` class
/// rather than shelling out to `ar` for everything.
///
/// If this archive is used with a mutable method, then an error will be
/// raised.
pub fn open(dst: &Path) -> Option<ArchiveRO> {
unsafe {
let ar = dst.with_c_str(|dst| {
llvm::LLVMRustOpenArchive(dst)
});
if ar.is_null() {
None
} else {
Some(ArchiveRO { ptr: ar })
}
}
}
/// Read a file in the archive
pub fn read<'a>(&'a self, file: &str) -> Option<&'a [u8]> {
unsafe {
let mut size = 0 as libc::size_t;
let ptr = file.with_c_str(|file| {
llvm::LLVMRustArchiveReadSection(self.ptr, file, &mut size)
});
if ptr.is_null() {
None
} else {
Some(cast::transmute(raw::Slice {
data: ptr,
len: size as uint,
}))
}
}
}
}
impl Drop for ArchiveRO {
fn drop(&mut self) {
unsafe {
llvm::LLVMRustDestroyArchive(self.ptr);
}
}
}

View File

@ -8,7 +8,7 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use back::archive::Archive;
use back::archive::ArchiveRO;
use back::link;
use driver::session;
use lib::llvm::{ModuleRef, TargetMachineRef, llvm, True, False};
@ -43,10 +43,11 @@ pub fn run(sess: session::Session, llmod: ModuleRef,
}
};
let archive = Archive::open(sess, path);
let archive = ArchiveRO::open(&path).expect("wanted an rlib");
debug!("reading {}", name);
let bc = time(sess.time_passes(), format!("read {}.bc", name), (), |_|
archive.read(format!("{}.bc", name)));
let bc = bc.expect("missing bytecode in archive!");
let ptr = bc.as_ptr();
debug!("linking {}", name);
time(sess.time_passes(), format!("ll link {}", name), (), |()| unsafe {

View File

@ -1100,7 +1100,6 @@ pub fn early_error(emitter: @diagnostic::Emitter, msg: &str) -> ! {
pub fn list_metadata(sess: Session, path: &Path, out: @mut io::Writer) {
metadata::loader::list_file_metadata(
sess,
token::get_ident_interner(),
session::sess_os_to_meta_os(sess.targ_cfg.os), path, out);
}

View File

@ -257,6 +257,8 @@ pub enum Pass_opaque {}
pub type PassRef = *Pass_opaque;
pub enum TargetMachine_opaque {}
pub type TargetMachineRef = *TargetMachine_opaque;
pub enum Archive_opaque {}
pub type ArchiveRef = *Archive_opaque;
pub mod debuginfo {
use super::{ValueRef};
@ -300,7 +302,7 @@ pub mod llvm {
use super::{Bool, BuilderRef, ContextRef, MemoryBufferRef, ModuleRef};
use super::{ObjectFileRef, Opcode, PassManagerRef, PassManagerBuilderRef};
use super::{SectionIteratorRef, TargetDataRef, TypeKind, TypeRef, UseRef};
use super::{ValueRef, TargetMachineRef, FileType};
use super::{ValueRef, TargetMachineRef, FileType, ArchiveRef};
use super::{CodeGenModel, RelocMode, CodeGenOptLevel};
use super::debuginfo::*;
use std::libc::{c_char, c_int, c_longlong, c_ushort, c_uint, c_ulonglong,
@ -1748,6 +1750,11 @@ pub mod llvm {
syms: **c_char,
len: size_t);
pub fn LLVMRustMarkAllFunctionsNounwind(M: ModuleRef);
pub fn LLVMRustOpenArchive(path: *c_char) -> ArchiveRef;
pub fn LLVMRustArchiveReadSection(AR: ArchiveRef, name: *c_char,
out_len: *mut size_t) -> *c_char;
pub fn LLVMRustDestroyArchive(AR: ArchiveRef);
}
}

View File

@ -15,6 +15,7 @@
use metadata::cstore;
use metadata::decoder;
use metadata::loader;
use std::hashmap::HashMap;
use extra;
@ -29,6 +30,7 @@ pub type cnum_map = @mut HashMap<ast::CrateNum, ast::CrateNum>;
pub enum MetadataBlob {
MetadataVec(~[u8]),
MetadataArchive(loader::ArchiveMetadata),
}
pub struct crate_metadata {
@ -216,6 +218,7 @@ impl MetadataBlob {
pub fn as_slice<'a>(&'a self) -> &'a [u8] {
match *self {
MetadataVec(ref vec) => vec.as_slice(),
MetadataArchive(ref ar) => ar.as_slice(),
}
}
}

View File

@ -123,7 +123,16 @@ pub fn search(filesearch: @FileSearch, pick: pick) {
match io::result(|| fs::readdir(lib_search_path)) {
Ok(files) => {
let mut rslt = FileDoesntMatch;
for path in files.iter() {
let is_rlib = |p: & &Path| {
p.extension_str() == Some("rlib")
};
// Reading metadata out of rlibs is faster, and if we find both
// an rlib and a dylib we only read one of the files of
// metadata, so in the name of speed, bring all rlib files to
// the front of the search list.
let files1 = files.iter().filter(|p| is_rlib(p));
let files2 = files.iter().filter(|p| !is_rlib(p));
for path in files1.chain(files2) {
debug!("testing {}", path.display());
let maybe_picked = pick(path);
match maybe_picked {

View File

@ -10,10 +10,10 @@
//! Finds crate binaries and loads their metadata
use back::archive::{Archive, METADATA_FILENAME};
use back::archive::{ArchiveRO, METADATA_FILENAME};
use driver::session::Session;
use lib::llvm::{False, llvm, ObjectFile, mk_section_iter};
use metadata::cstore::{MetadataBlob, MetadataVec};
use metadata::cstore::{MetadataBlob, MetadataVec, MetadataArchive};
use metadata::decoder;
use metadata::encoder;
use metadata::filesearch::{FileMatches, FileDoesntMatch};
@ -61,6 +61,12 @@ pub struct Library {
metadata: MetadataBlob,
}
pub struct ArchiveMetadata {
priv archive: ArchiveRO,
// See comments in ArchiveMetadata::new for why this is static
priv data: &'static [u8],
}
impl Context {
pub fn load_library_crate(&self) -> Library {
match self.find_library_crate() {
@ -102,7 +108,7 @@ impl Context {
if candidate && existing {
FileMatches
} else if candidate {
match get_metadata_section(self.sess, self.os, path) {
match get_metadata_section(self.os, path) {
Some(cvec) =>
if crate_matches(cvec.as_slice(), self.name,
self.version, self.hash) {
@ -248,11 +254,60 @@ fn crate_matches(crate_data: &[u8],
}
}
fn get_metadata_section(sess: Session, os: Os,
filename: &Path) -> Option<MetadataBlob> {
impl ArchiveMetadata {
fn new(ar: ArchiveRO) -> Option<ArchiveMetadata> {
let data: &'static [u8] = {
let data = match ar.read(METADATA_FILENAME) {
Some(data) => data,
None => {
debug!("didn't find '{}' in the archive", METADATA_FILENAME);
return None;
}
};
// This data is actually a pointer inside of the archive itself, but
// we essentially want to cache it because the lookup inside the
// archive is a fairly expensive operation (and it's queried for
// *very* frequently). For this reason, we transmute it to the
// static lifetime to put into the struct. Note that the buffer is
// never actually handed out with a static lifetime, but rather the
// buffer is loaned with the lifetime of this containing object.
// Hence, we're guaranteed that the buffer will never be used after
// this object is dead, so this is a safe operation to transmute and
// store the data as a static buffer.
unsafe { cast::transmute(data) }
};
Some(ArchiveMetadata {
archive: ar,
data: data,
})
}
pub fn as_slice<'a>(&'a self) -> &'a [u8] { self.data }
}
// Just a small wrapper to time how long reading metadata takes.
fn get_metadata_section(os: Os, filename: &Path) -> Option<MetadataBlob> {
use extra::time;
let start = time::precise_time_ns();
let ret = get_metadata_section_imp(os, filename);
info!("reading {} => {}ms", filename.filename_display(),
(time::precise_time_ns() - start) / 1000000);
return ret;
}
fn get_metadata_section_imp(os: Os, filename: &Path) -> Option<MetadataBlob> {
if filename.filename_str().unwrap().ends_with(".rlib") {
let archive = Archive::open(sess, filename.clone());
return Some(MetadataVec(archive.read(METADATA_FILENAME)));
// Use ArchiveRO for speed here, it's backed by LLVM and uses mmap
// internally to read the file. We also avoid even using a memcpy by
// just keeping the archive along while the metadata is in use.
let archive = match ArchiveRO::open(filename) {
Some(ar) => ar,
None => {
debug!("llvm didn't like `{}`", filename.display());
return None;
}
};
return ArchiveMetadata::new(archive).map(|ar| MetadataArchive(ar));
}
unsafe {
let mb = filename.with_c_str(|buf| {
@ -322,13 +377,13 @@ pub fn read_meta_section_name(os: Os) -> &'static str {
}
// A diagnostic function for dumping crate metadata to an output stream
pub fn list_file_metadata(sess: Session,
intr: @ident_interner,
pub fn list_file_metadata(intr: @ident_interner,
os: Os,
path: &Path,
out: @mut io::Writer) {
match get_metadata_section(sess, os, path) {
option::Some(bytes) => decoder::list_crate_metadata(intr, bytes.as_slice(),
match get_metadata_section(os, path) {
option::Some(bytes) => decoder::list_crate_metadata(intr,
bytes.as_slice(),
out),
option::None => {
write!(out, "could not find metadata in {}.\n", path.display())

View File

@ -9,6 +9,7 @@
// except according to those terms.
#include "rustllvm.h"
#include "llvm/Object/Archive.h"
//===----------------------------------------------------------------------===
//
@ -19,6 +20,7 @@
using namespace llvm;
using namespace llvm::sys;
using namespace llvm::object;
const char *LLVMRustError;
@ -558,3 +560,41 @@ LLVMRustLinkInExternalBitcode(LLVMModuleRef dst, char *bc, size_t len) {
}
return true;
}
extern "C" void*
LLVMRustOpenArchive(char *path) {
OwningPtr<MemoryBuffer> buf;
error_code err = MemoryBuffer::getFile(path, buf);
if (err) {
LLVMRustError = err.message().c_str();
return NULL;
}
Archive *ret = new Archive(buf.take(), err);
if (err) {
LLVMRustError = err.message().c_str();
return NULL;
}
return ret;
}
extern "C" const char*
LLVMRustArchiveReadSection(Archive *ar, char *name, size_t *size) {
for (Archive::child_iterator child = ar->begin_children(),
end = ar->end_children();
child != end; ++child) {
StringRef sect_name;
error_code err = child->getName(sect_name);
if (err) continue;
if (sect_name.trim(" ") == name) {
StringRef buf = child->getBuffer();
*size = buf.size();
return buf.data();
}
}
return NULL;
}
extern "C" void
LLVMRustDestroyArchive(Archive *ar) {
delete ar;
}