async-llvm(25): Restore -Ztime-passes output for trans and LLVM.

This commit is contained in:
Michael Woerister 2017-07-31 14:51:47 +02:00
parent bd36df84a5
commit a9a0ea921b
3 changed files with 79 additions and 12 deletions

View File

@ -57,6 +57,32 @@ pub fn time<T, F>(do_it: bool, what: &str, f: F) -> T where
let rv = f();
let dur = start.elapsed();
print_time_passes_entry_internal(what, dur);
TIME_DEPTH.with(|slot| slot.set(old));
rv
}
pub fn print_time_passes_entry(do_it: bool, what: &str, dur: Duration) {
if !do_it {
return
}
let old = TIME_DEPTH.with(|slot| {
let r = slot.get();
slot.set(r + 1);
r
});
print_time_passes_entry_internal(what, dur);
TIME_DEPTH.with(|slot| slot.set(old));
}
fn print_time_passes_entry_internal(what: &str, dur: Duration) {
let indentation = TIME_DEPTH.with(|slot| slot.get());
let mem_string = match get_resident() {
Some(n) => {
let mb = n as f64 / 1_000_000.0;
@ -65,14 +91,10 @@ pub fn time<T, F>(do_it: bool, what: &str, f: F) -> T where
None => "".to_owned(),
};
println!("{}time: {}{}\t{}",
repeat(" ").take(old).collect::<String>(),
repeat(" ").take(indentation).collect::<String>(),
duration_to_secs_str(dur),
mem_string,
what);
TIME_DEPTH.with(|slot| slot.set(old));
rv
}
// Hack up our own formatting for the duration to make it easier for scripts

View File

@ -23,7 +23,7 @@ use llvm::{ModuleRef, TargetMachineRef, PassManagerRef, DiagnosticInfoRef};
use llvm::SMDiagnosticRef;
use {CrateTranslation, ModuleSource, ModuleTranslation, CompiledModule, ModuleKind};
use rustc::hir::def_id::CrateNum;
use rustc::util::common::{time, time_depth, set_time_depth, path2cstr};
use rustc::util::common::{time, time_depth, set_time_depth, path2cstr, print_time_passes_entry};
use rustc::util::fs::{link_or_copy, rename_or_copy_remove};
use errors::{self, Handler, Level, DiagnosticBuilder, FatalError};
use errors::emitter::{Emitter};
@ -44,6 +44,7 @@ use std::str;
use std::sync::Arc;
use std::sync::mpsc::{channel, Sender, Receiver};
use std::slice;
use std::time::Instant;
use std::thread;
use libc::{c_uint, c_void, c_char, size_t};
@ -498,9 +499,9 @@ unsafe fn optimize_and_codegen(cgcx: &CodegenContext,
diag_handler.abort_if_errors();
// Finally, run the actual optimization passes
time(config.time_passes, &format!("llvm function passes [{}]", cgcx.worker), ||
time(config.time_passes, &format!("llvm function passes [{}]", module_name.unwrap()), ||
llvm::LLVMRustRunFunctionPassManager(fpm, llmod));
time(config.time_passes, &format!("llvm module passes [{}]", cgcx.worker), ||
time(config.time_passes, &format!("llvm module passes [{}]", module_name.unwrap()), ||
llvm::LLVMRunPassManager(mpm, llmod));
// Deallocate managers that we're now done with
@ -563,7 +564,7 @@ unsafe fn optimize_and_codegen(cgcx: &CodegenContext,
llvm::LLVMWriteBitcodeToFile(llmod, bc_out_c.as_ptr());
}
time(config.time_passes, &format!("codegen passes [{}]", cgcx.worker),
time(config.time_passes, &format!("codegen passes [{}]", module_name.unwrap()),
|| -> Result<(), FatalError> {
if config.emit_ir {
let out = output_names.temp_path(OutputType::LlvmAssembly, module_name);
@ -756,6 +757,11 @@ pub fn start_async_translation(sess: &Session,
metadata_config.set_flags(sess, no_builtins);
allocator_config.set_flags(sess, no_builtins);
// Exclude metadata and allocator modules from time_passes output, since
// they throw off the "LLVM passes" measurement.
metadata_config.time_passes = false;
allocator_config.time_passes = false;
let client = sess.jobserver_from_env.clone().unwrap_or_else(|| {
// Pick a "reasonable maximum" if we don't otherwise have a jobserver in
// our environment, capping out at 32 so we don't take everything down
@ -1266,6 +1272,9 @@ fn start_executing_work(sess: &Session,
// manner we can ensure that the maximum number of parallel workers is
// capped at any one point in time.
return thread::spawn(move || {
// We pretend to be within the top-level LLVM time-passes task here:
set_time_depth(1);
let max_workers = ::num_cpus::get();
let mut worker_id_counter = 0;
let mut free_worker_ids = Vec::new();
@ -1298,6 +1307,8 @@ fn start_executing_work(sess: &Session,
let mut main_thread_worker_state = MainThreadWorkerState::Idle;
let mut running = 0;
let mut llvm_start_time = None;
// Run the message loop while there's still anything that needs message
// processing:
while !translation_done ||
@ -1323,6 +1334,7 @@ fn start_executing_work(sess: &Session,
worker: get_worker_id(&mut free_worker_ids),
.. cgcx.clone()
};
maybe_start_llvm_timer(&item, &mut llvm_start_time);
main_thread_worker_state = MainThreadWorkerState::LLVMing;
spawn_work(cgcx, item);
}
@ -1338,7 +1350,7 @@ fn start_executing_work(sess: &Session,
worker: get_worker_id(&mut free_worker_ids),
.. cgcx.clone()
};
maybe_start_llvm_timer(&item, &mut llvm_start_time);
main_thread_worker_state = MainThreadWorkerState::LLVMing;
spawn_work(cgcx, item);
}
@ -1358,6 +1370,8 @@ fn start_executing_work(sess: &Session,
while work_items.len() > 0 && running < tokens.len() {
let (item, _) = work_items.pop().unwrap();
maybe_start_llvm_timer(&item, &mut llvm_start_time);
let cgcx = CodegenContext {
worker: get_worker_id(&mut free_worker_ids),
.. cgcx.clone()
@ -1465,6 +1479,16 @@ fn start_executing_work(sess: &Session,
}
}
if let Some(llvm_start_time) = llvm_start_time {
let total_llvm_time = Instant::now().duration_since(llvm_start_time);
// This is the top-level timing for all of LLVM, set the time-depth
// to zero.
set_time_depth(0);
print_time_passes_entry(cgcx.time_passes,
"LLVM passes",
total_llvm_time);
}
CompiledModules {
modules: compiled_modules,
metadata_module: compiled_metadata_module.unwrap(),
@ -1480,6 +1504,17 @@ fn start_executing_work(sess: &Session,
// Tune me, plz.
items_in_queue >= max_workers.saturating_sub(workers_running / 2)
}
fn maybe_start_llvm_timer(work_item: &WorkItem,
llvm_start_time: &mut Option<Instant>) {
// We keep track of the -Ztime-passes output manually,
// since the closure-based interface does not fit well here.
if work_item.config.time_passes {
if llvm_start_time.is_none() {
*llvm_start_time = Some(Instant::now());
}
}
}
}
pub const TRANS_WORKER_ID: usize = ::std::usize::MAX;

View File

@ -43,7 +43,7 @@ use rustc::ty::{self, Ty, TyCtxt};
use rustc::dep_graph::AssertDepGraphSafe;
use rustc::middle::cstore::LinkMeta;
use rustc::hir::map as hir_map;
use rustc::util::common::time;
use rustc::util::common::{time, print_time_passes_entry};
use rustc::session::config::{self, NoDebugInfo, OutputFilenames, OutputType};
use rustc::session::Session;
use rustc_incremental::{self, IncrementalHashesMap};
@ -80,7 +80,7 @@ use libc::c_uint;
use std::ffi::{CStr, CString};
use std::str;
use std::sync::Arc;
use std::time::Instant;
use std::time::{Instant, Duration};
use std::i32;
use syntax_pos::Span;
use syntax::attr;
@ -1093,6 +1093,8 @@ pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
codegen_units
};
let mut total_trans_time = Duration::new(0, 0);
for (cgu_index, cgu) in codegen_units.into_iter().enumerate() {
ongoing_translation.wait_for_signal_to_translate_item();
ongoing_translation.check_for_errors(tcx.sess);
@ -1128,6 +1130,8 @@ pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
let cost = time_to_translate.as_secs() * 1_000_000_000 +
time_to_translate.subsec_nanos() as u64;
total_trans_time += time_to_translate;
let is_last_cgu = (cgu_index + 1) == codegen_unit_count;
ongoing_translation.submit_translated_module_to_llvm(tcx.sess,
@ -1137,6 +1141,12 @@ pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
ongoing_translation.check_for_errors(tcx.sess);
}
// Since the main thread is sometimes blocked during trans, we keep track
// -Ztime-passes output manually.
print_time_passes_entry(tcx.sess.time_passes(),
"translate to LLVM IR",
total_trans_time);
if let Some(module_dispositions) = module_dispositions {
assert_module_sources::assert_module_sources(tcx, &module_dispositions);
}