Let a portion of DefPathHash uniquely identify the DefPath's crate.

This allows to directly map from a DefPathHash to the crate it
originates from, without constructing side tables to do that mapping.

It also allows to reliably and cheaply check for DefPathHash collisions.
This commit is contained in:
Michael Woerister 2021-01-27 14:28:07 +01:00 committed by Michael Woerister
parent a3ed564c13
commit 22d489be76
12 changed files with 181 additions and 27 deletions

View File

@ -41,7 +41,6 @@ pub mod util {
pub mod ast;
pub mod attr;
pub mod crate_disambiguator;
pub mod entry;
pub mod expand;
pub mod mut_visit;

View File

@ -7,11 +7,17 @@ use std::hash::{Hash, Hasher};
use std::mem::{self, MaybeUninit};
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy)]
#[repr(C)]
pub struct Fingerprint(u64, u64);
impl Fingerprint {
pub const ZERO: Fingerprint = Fingerprint(0, 0);
#[inline]
pub fn new(_0: u64, _1: u64) -> Fingerprint {
Fingerprint(_0, _1)
}
#[inline]
pub fn from_smaller_hash(hash: u64) -> Fingerprint {
Fingerprint(hash, hash)
@ -19,7 +25,12 @@ impl Fingerprint {
#[inline]
pub fn to_smaller_hash(&self) -> u64 {
self.0
// Even though both halves of the fingerprint are expected to be good
// quality hash values, let's still combine the two values because the
// Fingerprints in DefPathHash have the StableCrateId portion which is
// the same for all DefPathHashes from the same crate. Combining the
// two halfs makes sure we get a good quality hash in such cases too.
self.0.wrapping_mul(3).wrapping_add(self.1)
}
#[inline]
@ -93,7 +104,7 @@ impl FingerprintHasher for crate::unhash::Unhasher {
#[inline]
fn write_fingerprint(&mut self, fingerprint: &Fingerprint) {
// `Unhasher` only wants a single `u64`
self.write_u64(fingerprint.0);
self.write_u64(fingerprint.0.wrapping_add(fingerprint.1));
}
}

View File

@ -5,13 +5,16 @@
//! expressions) that are mostly just leftovers.
pub use crate::def_id::DefPathHash;
use crate::def_id::{CrateNum, DefId, DefIndex, LocalDefId, CRATE_DEF_INDEX, LOCAL_CRATE};
use crate::def_id::{
CrateNum, DefId, DefIndex, LocalDefId, StableCrateId, CRATE_DEF_INDEX, LOCAL_CRATE,
};
use crate::hir;
use rustc_ast::crate_disambiguator::CrateDisambiguator;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::stable_hasher::StableHasher;
use rustc_data_structures::unhash::UnhashMap;
use rustc_index::vec::IndexVec;
use rustc_span::crate_disambiguator::CrateDisambiguator;
use rustc_span::hygiene::ExpnId;
use rustc_span::symbol::{kw, sym, Symbol};
@ -27,6 +30,7 @@ use tracing::debug;
pub struct DefPathTable {
index_to_key: IndexVec<DefIndex, DefKey>,
def_path_hashes: IndexVec<DefIndex, DefPathHash>,
def_path_hash_to_index: UnhashMap<DefPathHash, DefIndex>,
}
impl DefPathTable {
@ -39,6 +43,25 @@ impl DefPathTable {
};
self.def_path_hashes.push(def_path_hash);
debug_assert!(self.def_path_hashes.len() == self.index_to_key.len());
// Check for hash collisions of DefPathHashes. These should be
// exceedingly rare.
if let Some(existing) = self.def_path_hash_to_index.insert(def_path_hash, index) {
let def_path1 = DefPath::make(LOCAL_CRATE, existing, |idx| self.def_key(idx));
let def_path2 = DefPath::make(LOCAL_CRATE, index, |idx| self.def_key(idx));
// Continuing with colliding DefPathHashes can lead to correctness
// issues. We must abort compilation.
panic!("Found DefPathHash collsion between {:?} and {:?}", def_path1, def_path2);
}
// Assert that all DefPathHashes correctly contain the local crate's
// StableCrateId
#[cfg(debug_assertions)]
if let Some(root) = self.def_path_hashes.get(CRATE_DEF_INDEX) {
assert!(def_path_hash.stable_crate_id() == root.stable_crate_id());
}
index
}
@ -108,13 +131,10 @@ pub struct DefKey {
}
impl DefKey {
fn compute_stable_hash(&self, parent_hash: DefPathHash) -> DefPathHash {
fn compute_stable_hash(&self, parent: DefPathHash) -> DefPathHash {
let mut hasher = StableHasher::new();
// We hash a `0u8` here to disambiguate between regular `DefPath` hashes,
// and the special "root_parent" below.
0u8.hash(&mut hasher);
parent_hash.hash(&mut hasher);
parent.hash(&mut hasher);
let DisambiguatedDefPathData { ref data, disambiguator } = self.disambiguated_data;
@ -127,19 +147,13 @@ impl DefKey {
disambiguator.hash(&mut hasher);
DefPathHash(hasher.finish())
}
let local_hash: u64 = hasher.finish();
fn root_parent_stable_hash(
crate_name: &str,
crate_disambiguator: CrateDisambiguator,
) -> DefPathHash {
let mut hasher = StableHasher::new();
// Disambiguate this from a regular `DefPath` hash; see `compute_stable_hash()` above.
1u8.hash(&mut hasher);
crate_name.hash(&mut hasher);
crate_disambiguator.hash(&mut hasher);
DefPathHash(hasher.finish())
// Construct the new DefPathHash, making sure that the `crate_id`
// portion of the hash is properly copied from the parent. This way the
// `crate_id` part will be recursively propagated from the root to all
// DefPathHashes in this DefPathTable.
DefPathHash::new(parent.stable_crate_id(), local_hash)
}
}
@ -295,6 +309,12 @@ impl Definitions {
self.table.def_path_hash(id.local_def_index)
}
#[inline]
pub fn def_path_hash_to_def_id(&self, def_path_hash: DefPathHash) -> LocalDefId {
let local_def_index = self.table.def_path_hash_to_index[&def_path_hash];
LocalDefId { local_def_index }
}
/// Returns the path from the crate root to `index`. The root
/// nodes are not included in the path (i.e., this will be an
/// empty vector for the crate root). For an inlined item, this
@ -332,7 +352,8 @@ impl Definitions {
},
};
let parent_hash = DefKey::root_parent_stable_hash(crate_name, crate_disambiguator);
let stable_crate_id = StableCrateId::new(crate_name, crate_disambiguator);
let parent_hash = DefPathHash::new(stable_crate_id, 0);
let def_path_hash = key.compute_stable_hash(parent_hash);
// Create the root definition.

View File

@ -6,11 +6,11 @@ use crate::rmeta::{CrateDep, CrateMetadata, CrateNumMap, CrateRoot, MetadataBlob
use rustc_ast::expand::allocator::AllocatorKind;
use rustc_ast::{self as ast, *};
use rustc_data_structures::fx::FxHashSet;
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
use rustc_data_structures::svh::Svh;
use rustc_data_structures::sync::Lrc;
use rustc_expand::base::SyntaxExtension;
use rustc_hir::def_id::{CrateNum, LocalDefId, LOCAL_CRATE};
use rustc_hir::def_id::{CrateNum, LocalDefId, StableCrateId, LOCAL_CRATE};
use rustc_hir::definitions::Definitions;
use rustc_index::vec::IndexVec;
use rustc_middle::middle::cstore::{CrateDepKind, CrateSource, ExternCrate};
@ -40,6 +40,10 @@ pub struct CStore {
allocator_kind: Option<AllocatorKind>,
/// This crate has a `#[global_allocator]` item.
has_global_allocator: bool,
/// This map is used to verify we get no hash conflicts between
/// `StableCrateId` values.
stable_crate_ids: FxHashMap<StableCrateId, CrateNum>,
}
pub struct CrateLoader<'a> {
@ -192,6 +196,11 @@ impl<'a> CrateLoader<'a> {
metadata_loader: &'a MetadataLoaderDyn,
local_crate_name: &str,
) -> Self {
let local_crate_stable_id =
StableCrateId::new(local_crate_name, sess.local_crate_disambiguator());
let mut stable_crate_ids = FxHashMap::default();
stable_crate_ids.insert(local_crate_stable_id, LOCAL_CRATE);
CrateLoader {
sess,
metadata_loader,
@ -205,6 +214,7 @@ impl<'a> CrateLoader<'a> {
injected_panic_runtime: None,
allocator_kind: None,
has_global_allocator: false,
stable_crate_ids,
},
used_extern_options: Default::default(),
}
@ -311,6 +321,20 @@ impl<'a> CrateLoader<'a> {
res
}
fn verify_no_stable_crate_id_hash_conflicts(
&mut self,
root: &CrateRoot<'_>,
cnum: CrateNum,
) -> Result<(), CrateError> {
if let Some(existing) = self.cstore.stable_crate_ids.insert(root.stable_crate_id(), cnum) {
let crate_name0 = root.name();
let crate_name1 = self.cstore.get_crate_data(existing).name();
return Err(CrateError::StableCrateIdCollision(crate_name0, crate_name1));
}
Ok(())
}
fn register_crate(
&mut self,
host_lib: Option<Library>,
@ -332,6 +356,8 @@ impl<'a> CrateLoader<'a> {
// Claim this crate number and cache it
let cnum = self.cstore.alloc_new_crate_num();
self.verify_no_stable_crate_id_hash_conflicts(&crate_root, cnum)?;
info!(
"register crate `{}` (cnum = {}. private_dep = {})",
crate_root.name(),

View File

@ -888,6 +888,7 @@ crate enum CrateError {
MultipleMatchingCrates(Symbol, FxHashMap<Svh, Library>),
SymbolConflictsCurrent(Symbol),
SymbolConflictsOthers(Symbol),
StableCrateIdCollision(Symbol, Symbol),
DlOpen(String),
DlSym(String),
LocatorCombined(CombinedLocatorError),
@ -970,6 +971,13 @@ impl CrateError {
`-C metadata`. This will result in symbol conflicts between the two.",
root_name,
),
CrateError::StableCrateIdCollision(crate_name0, crate_name1) => {
let msg = format!(
"found crates (`{}` and `{}`) with colliding StableCrateId values.",
crate_name0, crate_name1
);
sess.struct_span_err(span, &msg)
}
CrateError::DlOpen(s) | CrateError::DlSym(s) => sess.struct_span_err(span, &s),
CrateError::LocatorCombined(locator) => {
let crate_name = locator.crate_name;

View File

@ -635,6 +635,10 @@ impl CrateRoot<'_> {
self.hash
}
crate fn stable_crate_id(&self) -> StableCrateId {
self.stable_crate_id
}
crate fn triple(&self) -> &TargetTriple {
&self.triple
}

View File

@ -651,6 +651,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
triple: tcx.sess.opts.target_triple.clone(),
hash: tcx.crate_hash(LOCAL_CRATE),
disambiguator: tcx.sess.local_crate_disambiguator(),
stable_crate_id: tcx.def_path_hash(LOCAL_CRATE.as_def_id()).stable_crate_id(),
panic_strategy: tcx.sess.panic_strategy(),
edition: tcx.sess.edition(),
has_global_allocator: tcx.has_global_allocator(LOCAL_CRATE),

View File

@ -7,7 +7,7 @@ use rustc_data_structures::svh::Svh;
use rustc_data_structures::sync::MetadataRef;
use rustc_hir as hir;
use rustc_hir::def::{CtorKind, DefKind};
use rustc_hir::def_id::{DefId, DefIndex, DefPathHash};
use rustc_hir::def_id::{DefId, DefIndex, DefPathHash, StableCrateId};
use rustc_hir::definitions::DefKey;
use rustc_hir::lang_items;
use rustc_index::{bit_set::FiniteBitSet, vec::IndexVec};
@ -203,6 +203,7 @@ crate struct CrateRoot<'tcx> {
extra_filename: String,
hash: Svh,
disambiguator: CrateDisambiguator,
stable_crate_id: StableCrateId,
panic_strategy: PanicStrategy,
edition: Edition,
has_global_allocator: bool,

View File

@ -8,7 +8,6 @@ use crate::parse::ParseSess;
use crate::search_paths::{PathKind, SearchPath};
pub use rustc_ast::attr::MarkedAttrs;
pub use rustc_ast::crate_disambiguator::CrateDisambiguator;
pub use rustc_ast::Attribute;
use rustc_data_structures::flock;
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
@ -23,6 +22,7 @@ use rustc_errors::json::JsonEmitter;
use rustc_errors::registry::Registry;
use rustc_errors::{Applicability, Diagnostic, DiagnosticBuilder, DiagnosticId, ErrorReported};
use rustc_lint_defs::FutureBreakage;
pub use rustc_span::crate_disambiguator::CrateDisambiguator;
use rustc_span::edition::Edition;
use rustc_span::source_map::{FileLoader, MultiSpan, RealFileLoader, SourceMap, Span};
use rustc_span::{sym, SourceFileHashAlgorithm, Symbol};

View File

@ -1,3 +1,4 @@
use crate::crate_disambiguator::CrateDisambiguator;
use crate::HashStableContext;
use rustc_data_structures::fingerprint::Fingerprint;
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
@ -105,10 +106,66 @@ impl ::std::fmt::Debug for CrateNum {
}
}
/// A `DefPathHash` is a fixed-size representation of a `DefPath` that is
/// stable across crate and compilation session boundaries. It consists of two
/// separate 64-bit hashes. The first uniquely identifies the crate this
/// `DefPathHash` originates from (see [StableCrateId]), and the second
/// uniquely identifies the corresponding `DefPath` within that crate. Together
/// they form a unique identifier within an entire crate graph.
///
/// There is a very small chance of hash collisions, which would mean that two
/// different `DefPath`s map to the same `DefPathHash`. Proceeding compilation
/// with such a hash collision would very probably lead to an ICE, and in the
/// worst case lead to a silent mis-compilation. The compiler therefore actively
/// and exhaustively checks for such hash collisions and aborts compilation if
/// it finds one.
///
/// `DefPathHash` uses 64-bit hashes for both the crate-id part and the
/// crate-internal part, even though it is likely that there are many more
/// `LocalDefId`s in a single crate than there are individual crates in a crate
/// graph. Since we use the same number of bits in both cases, the collision
/// probability for the crate-local part will be quite a bit higher (though
/// still very small).
///
/// This imbalance is not by accident: A hash collision in the
/// crate-local part of a `DefPathHash` will be detected and reported while
/// compiling the crate in question. Such a collision does not depend on
/// outside factors and can be easily fixed by the crate maintainer (e.g. by
/// renaming the item in question or by bumping the crate version in a harmless
/// way).
///
/// A collision between crate-id hashes on the other hand is harder to fix
/// because it depends on the set of crates in the entire crate graph of a
/// compilation session. Again, using the same crate with a different version
/// number would fix the issue with a high probability -- but that might be
/// easier said then done if the crates in questions are dependencies of
/// third-party crates.
///
/// That being said, given a high quality hash function, the collision
/// probabilities in question are very small. For example, for a big crate like
/// `rustc_middle` (with ~50000 `LocalDefId`s as of the time of writing) there
/// is a probability of roughly 1 in 14,750,000,000 of a crate-internal
/// collision occurring. For a big crate graph with 1000 crates in it, there is
/// a probability of 1 in 36,890,000,000,000 of a `StableCrateId` collision.
#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Debug)]
#[derive(HashStable_Generic, Encodable, Decodable)]
pub struct DefPathHash(pub Fingerprint);
impl DefPathHash {
/// Returns the [StableCrateId] identifying the crate this [DefPathHash]
/// originates from.
#[inline]
pub fn stable_crate_id(&self) -> StableCrateId {
StableCrateId(self.0.as_value().0)
}
/// Builds a new [DefPathHash] with the given [StableCrateId] and
/// `local_hash`, where `local_hash` must be unique within its crate.
pub fn new(stable_crate_id: StableCrateId, local_hash: u64) -> DefPathHash {
DefPathHash(Fingerprint::new(stable_crate_id.0, local_hash))
}
}
impl Borrow<Fingerprint> for DefPathHash {
#[inline]
fn borrow(&self) -> &Fingerprint {
@ -116,6 +173,30 @@ impl Borrow<Fingerprint> for DefPathHash {
}
}
/// A [StableCrateId] is a 64 bit hash of `(crate-name, crate-disambiguator)`. It
/// is to [CrateNum] what [DefPathHash] is to [DefId]. It is stable across
/// compilation sessions.
///
/// Since the ID is a hash value there is a (very small) chance that two crates
/// end up with the same [StableCrateId]. The compiler will check for such
/// collisions when loading crates and abort compilation in order to avoid
/// further trouble.
#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Debug, Encodable, Decodable)]
pub struct StableCrateId(u64);
impl StableCrateId {
/// Computes the stable ID for a crate with the given name and
/// disambiguator.
pub fn new(crate_name: &str, crate_disambiguator: CrateDisambiguator) -> StableCrateId {
use std::hash::Hash;
let mut hasher = StableHasher::new();
crate_name.hash(&mut hasher);
crate_disambiguator.hash(&mut hasher);
StableCrateId(hasher.finish())
}
}
rustc_index::newtype_index! {
/// A DefIndex is an index into the hir-map for a crate, identifying a
/// particular definition. It should really be considered an interned

View File

@ -47,6 +47,8 @@ pub mod lev_distance;
mod span_encoding;
pub use span_encoding::{Span, DUMMY_SP};
pub mod crate_disambiguator;
pub mod symbol;
pub use symbol::{sym, Symbol};