Rollup merge of #37336 - michaelwoerister:debuginfo-type-ids, r=eddyb

debuginfo: Use TypeIdHasher for generating global debuginfo type IDs.

The only requirement for debuginfo type IDs is that they are globally unique. The `TypeIdHasher` (which is used for `std::intrinsic::type_id()` provides that, so we can get rid of some redundancy by re-using it for debuginfo. Values produced by the `TypeIdHasher` are also more stable than the current `UniqueTypeId` generation algorithm produces -- these incorporate the `NodeId`s, which is not good for incremental compilation.

@alexcrichton @eddyb : Could you take a look at the endianess adaptations that I made to the `TypeIdHasher`?

Also, are we sure that a 64 bit hash is wide enough for something that is supposed to be globally unique? For debuginfo I'm using 160 bits to make sure that we don't run into conflicts there.
This commit is contained in:
Jonathan Turner 2016-10-24 15:41:29 -07:00 committed by GitHub
commit 691ab948ce
2 changed files with 81 additions and 228 deletions

View File

@ -392,27 +392,30 @@ impl<'a, 'gcx, 'tcx> TyCtxt<'a, 'gcx, 'tcx> {
} }
} }
// When hashing a type this ends up affecting properties like symbol names. We /// When hashing a type this ends up affecting properties like symbol names. We
// want these symbol names to be calculated independent of other factors like /// want these symbol names to be calculated independent of other factors like
// what architecture you're compiling *from*. /// what architecture you're compiling *from*.
// ///
// The hashing just uses the standard `Hash` trait, but the implementations of /// The hashing just uses the standard `Hash` trait, but the implementations of
// `Hash` for the `usize` and `isize` types are *not* architecture independent /// `Hash` for the `usize` and `isize` types are *not* architecture independent
// (e.g. they has 4 or 8 bytes). As a result we want to avoid `usize` and /// (e.g. they has 4 or 8 bytes). As a result we want to avoid `usize` and
// `isize` completely when hashing. To ensure that these don't leak in we use a /// `isize` completely when hashing. To ensure that these don't leak in we use a
// custom hasher implementation here which inflates the size of these to a `u64` /// custom hasher implementation here which inflates the size of these to a `u64`
// and `i64`. /// and `i64`.
struct WidenUsizeHasher<H> { ///
/// The same goes for endianess: We always convert multi-byte integers to little
/// endian before hashing.
pub struct ArchIndependentHasher<H> {
inner: H, inner: H,
} }
impl<H> WidenUsizeHasher<H> { impl<H> ArchIndependentHasher<H> {
fn new(inner: H) -> WidenUsizeHasher<H> { pub fn new(inner: H) -> ArchIndependentHasher<H> {
WidenUsizeHasher { inner: inner } ArchIndependentHasher { inner: inner }
} }
} }
impl<H: Hasher> Hasher for WidenUsizeHasher<H> { impl<H: Hasher> Hasher for ArchIndependentHasher<H> {
fn write(&mut self, bytes: &[u8]) { fn write(&mut self, bytes: &[u8]) {
self.inner.write(bytes) self.inner.write(bytes)
} }
@ -425,44 +428,44 @@ impl<H: Hasher> Hasher for WidenUsizeHasher<H> {
self.inner.write_u8(i) self.inner.write_u8(i)
} }
fn write_u16(&mut self, i: u16) { fn write_u16(&mut self, i: u16) {
self.inner.write_u16(i) self.inner.write_u16(i.to_le())
} }
fn write_u32(&mut self, i: u32) { fn write_u32(&mut self, i: u32) {
self.inner.write_u32(i) self.inner.write_u32(i.to_le())
} }
fn write_u64(&mut self, i: u64) { fn write_u64(&mut self, i: u64) {
self.inner.write_u64(i) self.inner.write_u64(i.to_le())
} }
fn write_usize(&mut self, i: usize) { fn write_usize(&mut self, i: usize) {
self.inner.write_u64(i as u64) self.inner.write_u64((i as u64).to_le())
} }
fn write_i8(&mut self, i: i8) { fn write_i8(&mut self, i: i8) {
self.inner.write_i8(i) self.inner.write_i8(i)
} }
fn write_i16(&mut self, i: i16) { fn write_i16(&mut self, i: i16) {
self.inner.write_i16(i) self.inner.write_i16(i.to_le())
} }
fn write_i32(&mut self, i: i32) { fn write_i32(&mut self, i: i32) {
self.inner.write_i32(i) self.inner.write_i32(i.to_le())
} }
fn write_i64(&mut self, i: i64) { fn write_i64(&mut self, i: i64) {
self.inner.write_i64(i) self.inner.write_i64(i.to_le())
} }
fn write_isize(&mut self, i: isize) { fn write_isize(&mut self, i: isize) {
self.inner.write_i64(i as i64) self.inner.write_i64((i as i64).to_le())
} }
} }
pub struct TypeIdHasher<'a, 'gcx: 'a+'tcx, 'tcx: 'a, H> { pub struct TypeIdHasher<'a, 'gcx: 'a+'tcx, 'tcx: 'a, H> {
tcx: TyCtxt<'a, 'gcx, 'tcx>, tcx: TyCtxt<'a, 'gcx, 'tcx>,
state: WidenUsizeHasher<H>, state: ArchIndependentHasher<H>,
} }
impl<'a, 'gcx, 'tcx, H: Hasher> TypeIdHasher<'a, 'gcx, 'tcx, H> { impl<'a, 'gcx, 'tcx, H: Hasher> TypeIdHasher<'a, 'gcx, 'tcx, H> {
pub fn new(tcx: TyCtxt<'a, 'gcx, 'tcx>, state: H) -> Self { pub fn new(tcx: TyCtxt<'a, 'gcx, 'tcx>, state: H) -> Self {
TypeIdHasher { TypeIdHasher {
tcx: tcx, tcx: tcx,
state: WidenUsizeHasher::new(state), state: ArchIndependentHasher::new(state),
} }
} }
@ -493,6 +496,10 @@ impl<'a, 'gcx, 'tcx, H: Hasher> TypeIdHasher<'a, 'gcx, 'tcx, H> {
pub fn def_path(&mut self, def_path: &ast_map::DefPath) { pub fn def_path(&mut self, def_path: &ast_map::DefPath) {
def_path.deterministic_hash_to(self.tcx, &mut self.state); def_path.deterministic_hash_to(self.tcx, &mut self.state);
} }
pub fn into_inner(self) -> H {
self.state.inner
}
} }
impl<'a, 'gcx, 'tcx, H: Hasher> TypeVisitor<'tcx> for TypeIdHasher<'a, 'gcx, 'tcx, H> { impl<'a, 'gcx, 'tcx, H: Hasher> TypeVisitor<'tcx> for TypeIdHasher<'a, 'gcx, 'tcx, H> {

View File

@ -16,7 +16,7 @@ use self::EnumDiscriminantInfo::*;
use super::utils::{debug_context, DIB, span_start, bytes_to_bits, size_and_align_of, use super::utils::{debug_context, DIB, span_start, bytes_to_bits, size_and_align_of,
get_namespace_and_span_for_item, create_DIArray, is_node_local_to_unit}; get_namespace_and_span_for_item, create_DIArray, is_node_local_to_unit};
use super::namespace::mangled_name_of_item; use super::namespace::mangled_name_of_item;
use super::type_names::{compute_debuginfo_type_name, push_debuginfo_type_name}; use super::type_names::compute_debuginfo_type_name;
use super::{CrateDebugContext}; use super::{CrateDebugContext};
use context::SharedCrateContext; use context::SharedCrateContext;
use session::Session; use session::Session;
@ -26,8 +26,11 @@ use llvm::debuginfo::{DIType, DIFile, DIScope, DIDescriptor, DICompositeType, DI
use rustc::hir::def::CtorKind; use rustc::hir::def::CtorKind;
use rustc::hir::def_id::DefId; use rustc::hir::def_id::DefId;
use rustc::ty::fold::TypeVisitor;
use rustc::ty::subst::Substs; use rustc::ty::subst::Substs;
use rustc::ty::util::TypeIdHasher;
use rustc::hir; use rustc::hir;
use rustc_data_structures::blake2b;
use {type_of, machine, monomorphize}; use {type_of, machine, monomorphize};
use common::CrateContext; use common::CrateContext;
use type_::Type; use type_::Type;
@ -38,6 +41,7 @@ use util::common::path2cstr;
use libc::{c_uint, c_longlong}; use libc::{c_uint, c_longlong};
use std::ffi::CString; use std::ffi::CString;
use std::fmt::Write;
use std::path::Path; use std::path::Path;
use std::ptr; use std::ptr;
use std::rc::Rc; use std::rc::Rc;
@ -46,6 +50,7 @@ use syntax::ast;
use syntax::parse::token; use syntax::parse::token;
use syntax_pos::{self, Span}; use syntax_pos::{self, Span};
// From DWARF 5. // From DWARF 5.
// See http://www.dwarfstd.org/ShowIssue.php?issue=140129.1 // See http://www.dwarfstd.org/ShowIssue.php?issue=140129.1
const DW_LANG_RUST: c_uint = 0x1c; const DW_LANG_RUST: c_uint = 0x1c;
@ -138,219 +143,58 @@ impl<'tcx> TypeMap<'tcx> {
// ID will be generated and stored for later lookup. // ID will be generated and stored for later lookup.
fn get_unique_type_id_of_type<'a>(&mut self, cx: &CrateContext<'a, 'tcx>, fn get_unique_type_id_of_type<'a>(&mut self, cx: &CrateContext<'a, 'tcx>,
type_: Ty<'tcx>) -> UniqueTypeId { type_: Ty<'tcx>) -> UniqueTypeId {
// Let's see if we already have something in the cache
// basic type -> {:name of the type:}
// tuple -> {tuple_(:param-uid:)*}
// struct -> {struct_:svh: / :node-id:_<(:param-uid:),*> }
// enum -> {enum_:svh: / :node-id:_<(:param-uid:),*> }
// enum variant -> {variant_:variant-name:_:enum-uid:}
// reference (&) -> {& :pointee-uid:}
// mut reference (&mut) -> {&mut :pointee-uid:}
// ptr (*) -> {* :pointee-uid:}
// mut ptr (*mut) -> {*mut :pointee-uid:}
// unique ptr (box) -> {box :pointee-uid:}
// @-ptr (@) -> {@ :pointee-uid:}
// sized vec ([T; x]) -> {[:size:] :element-uid:}
// unsized vec ([T]) -> {[] :element-uid:}
// trait (T) -> {trait_:svh: / :node-id:_<(:param-uid:),*> }
// closure -> {<unsafe_> <once_> :store-sigil: |(:param-uid:),* <,_...>| -> \
// :return-type-uid: : (:bounds:)*}
// function -> {<unsafe_> <abi_> fn( (:param-uid:)* <,_...> ) -> \
// :return-type-uid:}
match self.type_to_unique_id.get(&type_).cloned() { match self.type_to_unique_id.get(&type_).cloned() {
Some(unique_type_id) => return unique_type_id, Some(unique_type_id) => return unique_type_id,
None => { /* generate one */} None => { /* generate one */}
}; };
let mut unique_type_id = String::with_capacity(256); let mut type_id_hasher = TypeIdHasher::new(cx.tcx(),
unique_type_id.push('{'); DebugInfoTypeIdHasher::new());
type_id_hasher.visit_ty(type_);
let hash = type_id_hasher.into_inner().into_hash();
match type_.sty { let mut unique_type_id = String::with_capacity(TYPE_ID_HASH_LENGTH * 2);
ty::TyNever |
ty::TyBool |
ty::TyChar |
ty::TyStr |
ty::TyInt(_) |
ty::TyUint(_) |
ty::TyFloat(_) => {
push_debuginfo_type_name(cx, type_, false, &mut unique_type_id);
},
ty::TyAdt(def, substs) => {
unique_type_id.push_str(&(String::from(def.descr()) + " "));
from_def_id_and_substs(self, cx, def.did, substs, &mut unique_type_id);
}
ty::TyTuple(component_types) if component_types.is_empty() => {
push_debuginfo_type_name(cx, type_, false, &mut unique_type_id);
},
ty::TyTuple(component_types) => {
unique_type_id.push_str("tuple ");
for &component_type in component_types {
let component_type_id =
self.get_unique_type_id_of_type(cx, component_type);
let component_type_id =
self.get_unique_type_id_as_string(component_type_id);
unique_type_id.push_str(&component_type_id[..]);
}
},
ty::TyBox(inner_type) => {
unique_type_id.push_str("box ");
let inner_type_id = self.get_unique_type_id_of_type(cx, inner_type);
let inner_type_id = self.get_unique_type_id_as_string(inner_type_id);
unique_type_id.push_str(&inner_type_id[..]);
},
ty::TyRawPtr(ty::TypeAndMut { ty: inner_type, mutbl } ) => {
unique_type_id.push('*');
if mutbl == hir::MutMutable {
unique_type_id.push_str("mut");
}
let inner_type_id = self.get_unique_type_id_of_type(cx, inner_type); for byte in hash.into_iter() {
let inner_type_id = self.get_unique_type_id_as_string(inner_type_id); write!(&mut unique_type_id, "{:x}", byte).unwrap();
unique_type_id.push_str(&inner_type_id[..]); }
},
ty::TyRef(_, ty::TypeAndMut { ty: inner_type, mutbl }) => {
unique_type_id.push('&');
if mutbl == hir::MutMutable {
unique_type_id.push_str("mut");
}
let inner_type_id = self.get_unique_type_id_of_type(cx, inner_type);
let inner_type_id = self.get_unique_type_id_as_string(inner_type_id);
unique_type_id.push_str(&inner_type_id[..]);
},
ty::TyArray(inner_type, len) => {
unique_type_id.push_str(&format!("[{}]", len));
let inner_type_id = self.get_unique_type_id_of_type(cx, inner_type);
let inner_type_id = self.get_unique_type_id_as_string(inner_type_id);
unique_type_id.push_str(&inner_type_id[..]);
},
ty::TySlice(inner_type) => {
unique_type_id.push_str("[]");
let inner_type_id = self.get_unique_type_id_of_type(cx, inner_type);
let inner_type_id = self.get_unique_type_id_as_string(inner_type_id);
unique_type_id.push_str(&inner_type_id[..]);
},
ty::TyTrait(ref trait_data) => {
unique_type_id.push_str("trait ");
let principal = cx.tcx().erase_late_bound_regions_and_normalize(
&trait_data.principal);
from_def_id_and_substs(self,
cx,
principal.def_id,
principal.substs,
&mut unique_type_id);
},
ty::TyFnDef(.., &ty::BareFnTy{ unsafety, abi, ref sig } ) |
ty::TyFnPtr(&ty::BareFnTy{ unsafety, abi, ref sig } ) => {
if unsafety == hir::Unsafety::Unsafe {
unique_type_id.push_str("unsafe ");
}
unique_type_id.push_str(abi.name());
unique_type_id.push_str(" fn(");
let sig = cx.tcx().erase_late_bound_regions_and_normalize(sig);
for &parameter_type in &sig.inputs {
let parameter_type_id =
self.get_unique_type_id_of_type(cx, parameter_type);
let parameter_type_id =
self.get_unique_type_id_as_string(parameter_type_id);
unique_type_id.push_str(&parameter_type_id[..]);
unique_type_id.push(',');
}
if sig.variadic {
unique_type_id.push_str("...");
}
unique_type_id.push_str(")->");
let return_type_id = self.get_unique_type_id_of_type(cx, sig.output);
let return_type_id = self.get_unique_type_id_as_string(return_type_id);
unique_type_id.push_str(&return_type_id[..]);
},
ty::TyClosure(_, substs) if substs.upvar_tys.is_empty() => {
push_debuginfo_type_name(cx, type_, false, &mut unique_type_id);
},
ty::TyClosure(_, substs) => {
unique_type_id.push_str("closure ");
for upvar_type in substs.upvar_tys {
let upvar_type_id =
self.get_unique_type_id_of_type(cx, upvar_type);
let upvar_type_id =
self.get_unique_type_id_as_string(upvar_type_id);
unique_type_id.push_str(&upvar_type_id[..]);
}
},
_ => {
bug!("get_unique_type_id_of_type() - unexpected type: {:?}",
type_)
}
};
unique_type_id.push('}');
// Trim to size before storing permanently
unique_type_id.shrink_to_fit();
let key = self.unique_id_interner.intern(&unique_type_id); let key = self.unique_id_interner.intern(&unique_type_id);
self.type_to_unique_id.insert(type_, UniqueTypeId(key)); self.type_to_unique_id.insert(type_, UniqueTypeId(key));
return UniqueTypeId(key); return UniqueTypeId(key);
fn from_def_id_and_substs<'a, 'tcx>(type_map: &mut TypeMap<'tcx>, // The hasher we are using to generate the UniqueTypeId. We want
cx: &CrateContext<'a, 'tcx>, // something that provides more than the 64 bits of the DefaultHasher.
def_id: DefId, const TYPE_ID_HASH_LENGTH: usize = 20;
substs: &Substs<'tcx>,
output: &mut String) { struct DebugInfoTypeIdHasher {
// First, find out the 'real' def_id of the type. Items inlined from state: blake2b::Blake2bCtx
// other crates have to be mapped back to their source. }
let def_id = if let Some(node_id) = cx.tcx().map.as_local_node_id(def_id) {
if cx.tcx().map.is_inlined_node_id(node_id) { impl ::std::hash::Hasher for DebugInfoTypeIdHasher {
// The given def_id identifies the inlined copy of a fn finish(&self) -> u64 {
// type definition, let's take the source of the copy. unimplemented!()
cx.defid_for_inlined_node(node_id).unwrap() }
} else {
def_id #[inline]
fn write(&mut self, bytes: &[u8]) {
blake2b::blake2b_update(&mut self.state, bytes);
}
}
impl DebugInfoTypeIdHasher {
fn new() -> DebugInfoTypeIdHasher {
DebugInfoTypeIdHasher {
state: blake2b::blake2b_new(TYPE_ID_HASH_LENGTH, &[])
} }
} else { }
def_id
};
// Get the crate name/disambiguator as first part of the identifier. fn into_hash(self) -> [u8; TYPE_ID_HASH_LENGTH] {
let crate_name = if def_id.is_local() { let mut hash = [0u8; TYPE_ID_HASH_LENGTH];
cx.tcx().crate_name.clone() blake2b::blake2b_final(self.state, &mut hash);
} else { hash
cx.sess().cstore.original_crate_name(def_id.krate)
};
let crate_disambiguator = cx.tcx().crate_disambiguator(def_id.krate);
output.push_str(&crate_name[..]);
output.push_str("/");
output.push_str(&crate_disambiguator[..]);
output.push_str("/");
// Add the def-index as the second part
output.push_str(&format!("{:x}", def_id.index.as_usize()));
if substs.types().next().is_some() {
output.push('<');
for type_parameter in substs.types() {
let param_type_id =
type_map.get_unique_type_id_of_type(cx, type_parameter);
let param_type_id =
type_map.get_unique_type_id_as_string(param_type_id);
output.push_str(&param_type_id[..]);
output.push(',');
}
output.push('>');
} }
} }
} }
@ -1927,15 +1771,17 @@ pub fn create_global_var_metadata(cx: &CrateContext,
return; return;
} }
let tcx = cx.tcx();
// Don't create debuginfo for globals inlined from other crates. The other // Don't create debuginfo for globals inlined from other crates. The other
// crate should already contain debuginfo for it. More importantly, the // crate should already contain debuginfo for it. More importantly, the
// global might not even exist in un-inlined form anywhere which would lead // global might not even exist in un-inlined form anywhere which would lead
// to a linker errors. // to a linker errors.
if cx.tcx().map.is_inlined_node_id(node_id) { if tcx.map.is_inlined_node_id(node_id) {
return; return;
} }
let node_def_id = cx.tcx().map.local_def_id(node_id); let node_def_id = tcx.map.local_def_id(node_id);
let (var_scope, span) = get_namespace_and_span_for_item(cx, node_def_id); let (var_scope, span) = get_namespace_and_span_for_item(cx, node_def_id);
let (file_metadata, line_number) = if span != syntax_pos::DUMMY_SP { let (file_metadata, line_number) = if span != syntax_pos::DUMMY_SP {
@ -1946,9 +1792,9 @@ pub fn create_global_var_metadata(cx: &CrateContext,
}; };
let is_local_to_unit = is_node_local_to_unit(cx, node_id); let is_local_to_unit = is_node_local_to_unit(cx, node_id);
let variable_type = cx.tcx().node_id_to_type(node_id); let variable_type = tcx.erase_regions(&tcx.node_id_to_type(node_id));
let type_metadata = type_metadata(cx, variable_type, span); let type_metadata = type_metadata(cx, variable_type, span);
let var_name = cx.tcx().item_name(node_def_id).to_string(); let var_name = tcx.item_name(node_def_id).to_string();
let linkage_name = mangled_name_of_item(cx, node_def_id, ""); let linkage_name = mangled_name_of_item(cx, node_def_id, "");
let var_name = CString::new(var_name).unwrap(); let var_name = CString::new(var_name).unwrap();