Remove old symbol naming code.

This commit is contained in:
Michael Woerister 2016-03-01 08:18:21 -05:00 committed by Niko Matsakis
parent 2eebb7b605
commit 82b5f1d869
2 changed files with 88 additions and 148 deletions

View File

@ -22,9 +22,7 @@ use session::search_paths::PathKind;
use session::Session;
use middle::cstore::{self, CrateStore, LinkMeta};
use middle::cstore::{LinkagePreference, NativeLibraryKind};
use middle::def_id::DefId;
use middle::dependency_format::Linkage;
use middle::ty::TyCtxt;
use trans::CrateTranslation;
use util::common::time;
use util::fs::fix_windows_verbatim_for_gcc;
@ -43,7 +41,6 @@ use std::str;
use flate;
use syntax::ast;
use syntax::codemap::Span;
use syntax::parse::token::InternedString;
use syntax::attr::AttrMetaMethods;
use rustc_front::hir;
@ -78,58 +75,6 @@ pub const RLIB_BYTECODE_OBJECT_V1_DATA_OFFSET: usize =
RLIB_BYTECODE_OBJECT_V1_DATASIZE_OFFSET + 8;
/*
* Name mangling and its relationship to metadata. This is complex. Read
* carefully.
*
* The semantic model of Rust linkage is, broadly, that "there's no global
* namespace" between crates. Our aim is to preserve the illusion of this
* model despite the fact that it's not *quite* possible to implement on
* modern linkers. We initially didn't use system linkers at all, but have
* been convinced of their utility.
*
* There are a few issues to handle:
*
* - Linkers operate on a flat namespace, so we have to flatten names.
* We do this using the C++ namespace-mangling technique. Foo::bar
* symbols and such.
*
* - Symbols with the same name but different types need to get different
* linkage-names. We do this by hashing a string-encoding of the type into
* a fixed-size (currently 16-byte hex) cryptographic hash function (CHF:
* we use SHA256) to "prevent collisions". This is not airtight but 16 hex
* digits on uniform probability means you're going to need 2**32 same-name
* symbols in the same process before you're even hitting birthday-paradox
* collision probability.
*
* - Symbols in different crates but with same names "within" the crate need
* to get different linkage-names.
*
* - The hash shown in the filename needs to be predictable and stable for
* build tooling integration. It also needs to be using a hash function
* which is easy to use from Python, make, etc.
*
* So here is what we do:
*
* - Consider the package id; every crate has one (specified with crate_id
* attribute). If a package id isn't provided explicitly, we infer a
* versionless one from the output name. The version will end up being 0.0
* in this case. CNAME and CVERS are taken from this package id. For
* example, github.com/mozilla/CNAME#CVERS.
*
* - Define CMH as SHA256(crateid).
*
* - Define CMH8 as the first 8 characters of CMH.
*
* - Compile our crate to lib CNAME-CMH8-CVERS.so
*
* - Define STH(sym) as SHA256(CMH, type_str(sym))
*
* - Suffix a mangled sym with ::STH@CVERS, so that it is unique in the
* name, non-name metadata, and type sense, and versioned in the way
* system linkers understand.
*/
pub fn find_crate_name(sess: Option<&Session>,
attrs: &[ast::Attribute],
input: &Input) -> String {
@ -191,95 +136,6 @@ pub fn build_link_meta(sess: &Session,
return r;
}
pub fn def_to_string(_tcx: &TyCtxt, did: DefId) -> String {
format!("{}:{}", did.krate, did.index.as_usize())
}
// Name sanitation. LLVM will happily accept identifiers with weird names, but
// gas doesn't!
// gas accepts the following characters in symbols: a-z, A-Z, 0-9, ., _, $
pub fn sanitize(s: &str) -> String {
let mut result = String::new();
for c in s.chars() {
match c {
// Escape these with $ sequences
'@' => result.push_str("$SP$"),
'*' => result.push_str("$BP$"),
'&' => result.push_str("$RF$"),
'<' => result.push_str("$LT$"),
'>' => result.push_str("$GT$"),
'(' => result.push_str("$LP$"),
')' => result.push_str("$RP$"),
',' => result.push_str("$C$"),
// '.' doesn't occur in types and functions, so reuse it
// for ':' and '-'
'-' | ':' => result.push('.'),
// These are legal symbols
'a' ... 'z'
| 'A' ... 'Z'
| '0' ... '9'
| '_' | '.' | '$' => result.push(c),
_ => {
result.push('$');
for c in c.escape_unicode().skip(1) {
match c {
'{' => {},
'}' => result.push('$'),
c => result.push(c),
}
}
}
}
}
// Underscore-qualify anything that didn't start as an ident.
if !result.is_empty() &&
result.as_bytes()[0] != '_' as u8 &&
! (result.as_bytes()[0] as char).is_xid_start() {
return format!("_{}", &result[..]);
}
return result;
}
pub fn mangle<PI: Iterator<Item=InternedString>>(path: PI, hash: Option<&str>) -> String {
// Follow C++ namespace-mangling style, see
// http://en.wikipedia.org/wiki/Name_mangling for more info.
//
// It turns out that on OSX you can actually have arbitrary symbols in
// function names (at least when given to LLVM), but this is not possible
// when using unix's linker. Perhaps one day when we just use a linker from LLVM
// we won't need to do this name mangling. The problem with name mangling is
// that it seriously limits the available characters. For example we can't
// have things like &T in symbol names when one would theoretically
// want them for things like impls of traits on that type.
//
// To be able to work on all platforms and get *some* reasonable output, we
// use C++ name-mangling.
let mut n = String::from("_ZN"); // _Z == Begin name-sequence, N == nested
fn push(n: &mut String, s: &str) {
let sani = sanitize(s);
n.push_str(&format!("{}{}", sani.len(), sani));
}
// First, connect each component with <len, name> pairs.
for data in path {
push(&mut n, &data);
}
if let Some(s) = hash {
push(&mut n, s)
}
n.push('E'); // End name-sequence.
n
}
pub fn get_linker(sess: &Session) -> (String, Command) {
if let Some(ref linker) = sess.opts.cg.linker {
(linker.clone(), Command::new(linker))

View File

@ -107,9 +107,8 @@ use rustc::front::map::definitions::DefPath;
use std::fmt::Write;
use syntax::ast;
use syntax::parse::token;
use syntax::parse::token::{self, InternedString};
use serialize::hex::ToHex;
use super::link;
pub fn def_id_to_string<'tcx>(tcx: &ty::TyCtxt<'tcx>, def_id: DefId) -> String {
@ -207,7 +206,7 @@ fn exported_name_with_opt_suffix<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx>,
path.push(token::intern_and_get_ident(suffix));
}
link::mangle(path.into_iter(), Some(&hash[..]))
mangle(path.into_iter(), Some(&hash[..]))
}
pub fn exported_name<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx>,
@ -232,5 +231,90 @@ pub fn internal_name_from_type_and_suffix<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx>
let path = [token::intern(&t.to_string()).as_str(),
gensym_name(suffix).as_str()];
let hash = get_symbol_hash(ccx, &Vec::new(), cstore::LOCAL_CRATE, &[t]);
link::mangle(path.iter().cloned(), Some(&hash[..]))
mangle(path.iter().cloned(), Some(&hash[..]))
}
// Name sanitation. LLVM will happily accept identifiers with weird names, but
// gas doesn't!
// gas accepts the following characters in symbols: a-z, A-Z, 0-9, ., _, $
pub fn sanitize(s: &str) -> String {
let mut result = String::new();
for c in s.chars() {
match c {
// Escape these with $ sequences
'@' => result.push_str("$SP$"),
'*' => result.push_str("$BP$"),
'&' => result.push_str("$RF$"),
'<' => result.push_str("$LT$"),
'>' => result.push_str("$GT$"),
'(' => result.push_str("$LP$"),
')' => result.push_str("$RP$"),
',' => result.push_str("$C$"),
// '.' doesn't occur in types and functions, so reuse it
// for ':' and '-'
'-' | ':' => result.push('.'),
// These are legal symbols
'a' ... 'z'
| 'A' ... 'Z'
| '0' ... '9'
| '_' | '.' | '$' => result.push(c),
_ => {
result.push('$');
for c in c.escape_unicode().skip(1) {
match c {
'{' => {},
'}' => result.push('$'),
c => result.push(c),
}
}
}
}
}
// Underscore-qualify anything that didn't start as an ident.
if !result.is_empty() &&
result.as_bytes()[0] != '_' as u8 &&
! (result.as_bytes()[0] as char).is_xid_start() {
return format!("_{}", &result[..]);
}
return result;
}
pub fn mangle<PI: Iterator<Item=InternedString>>(path: PI, hash: Option<&str>) -> String {
// Follow C++ namespace-mangling style, see
// http://en.wikipedia.org/wiki/Name_mangling for more info.
//
// It turns out that on OSX you can actually have arbitrary symbols in
// function names (at least when given to LLVM), but this is not possible
// when using unix's linker. Perhaps one day when we just use a linker from LLVM
// we won't need to do this name mangling. The problem with name mangling is
// that it seriously limits the available characters. For example we can't
// have things like &T in symbol names when one would theoretically
// want them for things like impls of traits on that type.
//
// To be able to work on all platforms and get *some* reasonable output, we
// use C++ name-mangling.
let mut n = String::from("_ZN"); // _Z == Begin name-sequence, N == nested
fn push(n: &mut String, s: &str) {
let sani = sanitize(s);
n.push_str(&format!("{}{}", sani.len(), sani));
}
// First, connect each component with <len, name> pairs.
for data in path {
push(&mut n, &data);
}
if let Some(s) = hash {
push(&mut n, s)
}
n.push('E'); // End name-sequence.
n
}