auto merge of #10670 : eddyb/rust/node-u32, r=alexcrichton

### Rationale
There is no reason to support more than 2³² nodes or names at this moment, as compiling something that big (even without considering the quadratic space usage of some analysis passes) would take at least **64GB**.
Meanwhile, some can't (or barely can) compile rustc because it requires almost **1.5GB**.

### Potential problems
Can someone confirm this doesn't affect metadata (de)serialization? I can't tell myself, I know nothing about it.

### Results
Some structures have a size reduction of 25% to 50%: [before](https://gist.github.com/luqmana/3a82a51fa9c86d9191fa) - [after](https://gist.github.com/eddyb/5a75f8973d3d8018afd3).
Sadly, there isn't a massive change in the memory used for compiling stage2 librustc (it doesn't go over **1.4GB** as [before](http://huonw.github.io/isrustfastyet/mem/), but I can barely see the difference).
However, my own testcase (previously peaking at **1.6GB** in typeck) shows a reduction of **200**-**400MB**.
This commit is contained in:
bors 2013-11-26 22:07:44 -08:00
commit faf4c939fb
10 changed files with 74 additions and 78 deletions

View File

@ -28,7 +28,6 @@ use syntax::abi;
use syntax::parse::token;
use syntax;
use std::int;
use std::hashmap::{HashMap,HashSet};
#[deriving(Clone)]
@ -209,7 +208,7 @@ pub struct Session_ {
building_library: @mut bool,
working_dir: Path,
lints: @mut HashMap<ast::NodeId, ~[(lint::lint, codemap::Span, ~str)]>,
node_id: @mut uint,
node_id: @mut ast::NodeId,
}
pub type Session = @Session_;
@ -274,13 +273,15 @@ impl Session_ {
pub fn next_node_id(&self) -> ast::NodeId {
self.reserve_node_ids(1)
}
pub fn reserve_node_ids(&self, count: uint) -> ast::NodeId {
pub fn reserve_node_ids(&self, count: ast::NodeId) -> ast::NodeId {
let v = *self.node_id;
*self.node_id += count;
if v > (int::max_value as uint) {
self.bug("Input too large, ran out of node ids!");
match v.checked_add(&count) {
Some(next) => { *self.node_id = next; }
None => self.bug("Input too large, ran out of node ids!")
}
v as int
v
}
pub fn diagnostic(&self) -> @mut diagnostic::span_handler {
self.span_diagnostic

View File

@ -67,7 +67,7 @@ impl visit::Visitor<()> for ReadCrateVisitor {
#[deriving(Clone)]
struct cache_entry {
cnum: int,
cnum: ast::CrateNum,
span: Span,
hash: @str,
metas: @~[@ast::MetaItem]
@ -242,7 +242,7 @@ fn metas_with_ident(ident: @str, metas: ~[@ast::MetaItem])
}
fn existing_match(e: &Env, metas: &[@ast::MetaItem], hash: &str)
-> Option<int> {
-> Option<ast::CrateNum> {
for c in e.crate_cache.iter() {
if loader::metadata_matches(*c.metas, metas)
&& (hash.is_empty() || c.hash.as_slice() == hash) {

View File

@ -76,10 +76,10 @@ fn lookup_hash(d: ebml::Doc, eq_fn: |&[u8]| -> bool, hash: u64) ->
pub type GetCrateDataCb<'self> = 'self |ast::CrateNum| -> Cmd;
pub fn maybe_find_item(item_id: int, items: ebml::Doc) -> Option<ebml::Doc> {
fn eq_item(bytes: &[u8], item_id: int) -> bool {
pub fn maybe_find_item(item_id: ast::NodeId, items: ebml::Doc) -> Option<ebml::Doc> {
fn eq_item(bytes: &[u8], item_id: ast::NodeId) -> bool {
return u64_from_be_bytes(
bytes.slice(0u, 4u), 0u, 4u) as int
bytes.slice(0u, 4u), 0u, 4u) as ast::NodeId
== item_id;
}
lookup_hash(items,
@ -87,7 +87,7 @@ pub fn maybe_find_item(item_id: int, items: ebml::Doc) -> Option<ebml::Doc> {
(item_id as i64).hash())
}
fn find_item(item_id: int, items: ebml::Doc) -> ebml::Doc {
fn find_item(item_id: ast::NodeId, items: ebml::Doc) -> ebml::Doc {
match maybe_find_item(item_id, items) {
None => fail!("lookup_item: id not found: {}", item_id),
Some(d) => d
@ -96,7 +96,7 @@ fn find_item(item_id: int, items: ebml::Doc) -> ebml::Doc {
// Looks up an item in the given metadata and returns an ebml doc pointing
// to the item data.
pub fn lookup_item(item_id: int, data: @~[u8]) -> ebml::Doc {
pub fn lookup_item(item_id: ast::NodeId, data: @~[u8]) -> ebml::Doc {
let items = reader::get_doc(reader::Doc(data), tag_items);
find_item(item_id, items)
}
@ -343,7 +343,7 @@ fn item_name(intr: @ident_interner, item: ebml::Doc) -> ast::Ident {
let string = name.as_str_slice();
match intr.find_equiv(&string) {
None => token::str_to_ident(string),
Some(val) => ast::Ident::new(val),
Some(val) => ast::Ident::new(val as ast::Name),
}
}

View File

@ -58,7 +58,7 @@ type conv_did<'self> =
pub struct PState<'self> {
data: &'self [u8],
crate: int,
crate: ast::CrateNum,
pos: uint,
tcx: ty::ctxt
}
@ -101,7 +101,7 @@ fn parse_ident_(st: &mut PState, is_last: |char| -> bool) -> ast::Ident {
return st.tcx.sess.ident_of(rslt);
}
pub fn parse_state_from_data<'a>(data: &'a [u8], crate_num: int,
pub fn parse_state_from_data<'a>(data: &'a [u8], crate_num: ast::CrateNum,
pos: uint, tcx: ty::ctxt) -> PState<'a> {
PState {
data: data,
@ -111,19 +111,19 @@ pub fn parse_state_from_data<'a>(data: &'a [u8], crate_num: int,
}
}
pub fn parse_ty_data(data: &[u8], crate_num: int, pos: uint, tcx: ty::ctxt,
pub fn parse_ty_data(data: &[u8], crate_num: ast::CrateNum, pos: uint, tcx: ty::ctxt,
conv: conv_did) -> ty::t {
let mut st = parse_state_from_data(data, crate_num, pos, tcx);
parse_ty(&mut st, conv)
}
pub fn parse_bare_fn_ty_data(data: &[u8], crate_num: int, pos: uint, tcx: ty::ctxt,
pub fn parse_bare_fn_ty_data(data: &[u8], crate_num: ast::CrateNum, pos: uint, tcx: ty::ctxt,
conv: conv_did) -> ty::BareFnTy {
let mut st = parse_state_from_data(data, crate_num, pos, tcx);
parse_bare_fn_ty(&mut st, conv)
}
pub fn parse_trait_ref_data(data: &[u8], crate_num: int, pos: uint, tcx: ty::ctxt,
pub fn parse_trait_ref_data(data: &[u8], crate_num: ast::CrateNum, pos: uint, tcx: ty::ctxt,
conv: conv_did) -> ty::TraitRef {
let mut st = parse_state_from_data(data, crate_num, pos, tcx);
parse_trait_ref(&mut st, conv)
@ -251,7 +251,7 @@ fn parse_region(st: &mut PState, conv: conv_did) -> ty::Region {
match next(st) {
'b' => {
assert_eq!(next(st), '[');
let id = parse_uint(st) as int;
let id = parse_uint(st) as ast::NodeId;
assert_eq!(next(st), '|');
let br = parse_bound_region(st, |x,y| conv(x,y));
assert_eq!(next(st), ']');
@ -259,7 +259,7 @@ fn parse_region(st: &mut PState, conv: conv_did) -> ty::Region {
}
'B' => {
assert_eq!(next(st), '[');
let node_id = parse_uint(st) as int;
let node_id = parse_uint(st) as ast::NodeId;
assert_eq!(next(st), '|');
let index = parse_uint(st);
assert_eq!(next(st), '|');
@ -268,7 +268,7 @@ fn parse_region(st: &mut PState, conv: conv_did) -> ty::Region {
}
'f' => {
assert_eq!(next(st), '[');
let id = parse_uint(st) as int;
let id = parse_uint(st) as ast::NodeId;
assert_eq!(next(st), '|');
let br = parse_bound_region(st, |x,y| conv(x,y));
assert_eq!(next(st), ']');
@ -276,7 +276,7 @@ fn parse_region(st: &mut PState, conv: conv_did) -> ty::Region {
bound_region: br})
}
's' => {
let id = parse_uint(st) as int;
let id = parse_uint(st) as ast::NodeId;
assert_eq!(next(st), '|');
ty::ReScope(id)
}
@ -539,7 +539,7 @@ fn parse_bare_fn_ty(st: &mut PState, conv: conv_did) -> ty::BareFnTy {
fn parse_sig(st: &mut PState, conv: conv_did) -> ty::FnSig {
assert_eq!(next(st), '[');
let id = parse_uint(st) as int;
let id = parse_uint(st) as ast::NodeId;
assert_eq!(next(st), '|');
let mut inputs = ~[];
while peek(st) != ']' {
@ -572,12 +572,12 @@ pub fn parse_def_id(buf: &[u8]) -> ast::DefId {
let def_part = buf.slice(colon_idx + 1u, len);
let crate_num = match uint::parse_bytes(crate_part, 10u) {
Some(cn) => cn as int,
Some(cn) => cn as ast::CrateNum,
None => fail!("internal error: parse_def_id: crate number expected, but found {:?}",
crate_part)
};
let def_num = match uint::parse_bytes(def_part, 10u) {
Some(dn) => dn as int,
Some(dn) => dn as ast::NodeId,
None => fail!("internal error: parse_def_id: id expected, but found {:?}",
def_part)
};
@ -585,7 +585,7 @@ pub fn parse_def_id(buf: &[u8]) -> ast::DefId {
}
pub fn parse_type_param_def_data(data: &[u8], start: uint,
crate_num: int, tcx: ty::ctxt,
crate_num: ast::CrateNum, tcx: ty::ctxt,
conv: conv_did) -> ty::TypeParameterDef
{
let mut st = parse_state_from_data(data, crate_num, start, tcx);

View File

@ -161,8 +161,7 @@ fn reserve_id_range(sess: Session,
// Handle the case of an empty range:
if from_id_range.empty() { return from_id_range; }
let cnt = from_id_range.max - from_id_range.min;
assert!(cnt >= 0);
let to_id_min = sess.reserve_node_ids(cnt as uint);
let to_id_min = sess.reserve_node_ids(cnt);
let to_id_max = to_id_min + cnt;
ast_util::id_range { min: to_id_min, max: to_id_max }
}
@ -1204,7 +1203,7 @@ fn decode_side_tables(xcx: @ExtendedDecodeContext,
let tbl_doc = ast_doc.get(c::tag_table as uint);
reader::docs(tbl_doc, |tag, entry_doc| {
let id0 = entry_doc.get(c::tag_table_id as uint).as_int();
let id = xcx.tr_id(id0);
let id = xcx.tr_id(id0 as ast::NodeId);
debug!(">> Side table document with tag 0x{:x} \
found for id {} (orig {})",

View File

@ -169,7 +169,7 @@ pub struct field_ty {
// the types of AST nodes.
#[deriving(Eq,IterBytes)]
pub struct creader_cache_key {
cnum: int,
cnum: CrateNum,
pos: uint,
len: uint
}

View File

@ -69,7 +69,7 @@ impl Eq for Ident {
// this uint is a reference to a table stored in thread-local
// storage.
pub type SyntaxContext = uint;
pub type SyntaxContext = u32;
// the SCTable contains a table of SyntaxContext_'s. It
// represents a flattened tree structure, to avoid having
@ -87,8 +87,8 @@ pub struct SCTable {
}
// NB: these must be placed in any SCTable...
pub static EMPTY_CTXT : uint = 0;
pub static ILLEGAL_CTXT : uint = 1;
pub static EMPTY_CTXT : SyntaxContext = 0;
pub static ILLEGAL_CTXT : SyntaxContext = 1;
#[deriving(Eq, Encodable, Decodable,IterBytes)]
pub enum SyntaxContext_ {
@ -109,10 +109,10 @@ pub enum SyntaxContext_ {
/// A name is a part of an identifier, representing a string or gensym. It's
/// the result of interning.
pub type Name = uint;
pub type Name = u32;
/// A mark represents a unique id associated with a macro expansion
pub type Mrk = uint;
pub type Mrk = u32;
impl<S:Encoder> Encodable<S> for Ident {
fn encode(&self, s: &mut S) {
@ -163,9 +163,9 @@ pub struct PathSegment {
types: OptVec<Ty>,
}
pub type CrateNum = int;
pub type CrateNum = u32;
pub type NodeId = int;
pub type NodeId = u32;
#[deriving(Clone, TotalEq, TotalOrd, Eq, Encodable, Decodable, IterBytes, ToStr)]
pub struct DefId {

View File

@ -18,7 +18,7 @@ use visit::Visitor;
use visit;
use std::hashmap::HashMap;
use std::int;
use std::u32;
use std::local_data;
use std::num;
use std::option;
@ -382,8 +382,8 @@ pub struct id_range {
impl id_range {
pub fn max() -> id_range {
id_range {
min: int::max_value,
max: int::min_value,
min: u32::max_value,
max: u32::min_value,
}
}
@ -803,9 +803,9 @@ pub fn display_sctable(table : &SCTable) {
/// Add a value to the end of a vec, return its index
fn idx_push<T>(vec: &mut ~[T], val: T) -> uint {
fn idx_push<T>(vec: &mut ~[T], val: T) -> u32 {
vec.push(val);
vec.len() - 1
(vec.len() - 1) as u32
}
/// Resolve a syntax object to a name, per MTWT.
@ -917,7 +917,7 @@ pub fn mtwt_outer_mark(ctxt: SyntaxContext) -> Mrk {
/// Push a name... unless it matches the one on top, in which
/// case pop and discard (so two of the same marks cancel)
pub fn xorPush(marks: &mut ~[uint], mark: uint) {
pub fn xorPush(marks: &mut ~[Mrk], mark: Mrk) {
if ((marks.len() > 0) && (getLast(marks) == mark)) {
marks.pop();
} else {
@ -927,7 +927,7 @@ pub fn xorPush(marks: &mut ~[uint], mark: uint) {
// get the last element of a mutable array.
// FIXME #4903: , must be a separate procedure for now.
pub fn getLast(arr: &~[Mrk]) -> uint {
pub fn getLast(arr: &~[Mrk]) -> Mrk {
*arr.last()
}
@ -1000,14 +1000,8 @@ mod test {
assert_eq!(s.clone(),~[14]);
}
// convert a list of uints to an @[ident]
// (ignores the interner completely)
fn uints_to_idents (uints: &~[uint]) -> @~[Ident] {
@uints.map(|u| Ident {name:*u, ctxt: EMPTY_CTXT})
}
fn id (u : uint, s: SyntaxContext) -> Ident {
Ident{name:u, ctxt: s}
fn id(n: Name, s: SyntaxContext) -> Ident {
Ident {name: n, ctxt: s}
}
// because of the SCTable, I now need a tidy way of

View File

@ -502,12 +502,12 @@ fn mk_fresh_ident_interner() -> @ident_interner {
@interner::StrInterner::prefill(init_vec)
}
static SELF_KEYWORD_NAME: uint = 8;
static STATIC_KEYWORD_NAME: uint = 27;
static STRICT_KEYWORD_START: uint = 32;
static STRICT_KEYWORD_FINAL: uint = 65;
static RESERVED_KEYWORD_START: uint = 66;
static RESERVED_KEYWORD_FINAL: uint = 72;
static SELF_KEYWORD_NAME: Name = 8;
static STATIC_KEYWORD_NAME: Name = 27;
static STRICT_KEYWORD_START: Name = 32;
static STRICT_KEYWORD_FINAL: Name = 65;
static RESERVED_KEYWORD_START: Name = 66;
static RESERVED_KEYWORD_FINAL: Name = 72;
// if an interner exists in TLS, return it. Otherwise, prepare a
// fresh one.

View File

@ -12,15 +12,17 @@
// allows bidirectional lookup; i.e. given a value, one can easily find the
// type, and vice versa.
use ast::Name;
use std::cmp::Equiv;
use std::hashmap::HashMap;
pub struct Interner<T> {
priv map: @mut HashMap<T, uint>,
priv map: @mut HashMap<T, Name>,
priv vect: @mut ~[T],
}
// when traits can extend traits, we should extend index<uint,T> to get []
// when traits can extend traits, we should extend index<Name,T> to get []
impl<T:Eq + IterBytes + Hash + Freeze + Clone + 'static> Interner<T> {
pub fn new() -> Interner<T> {
Interner {
@ -37,37 +39,37 @@ impl<T:Eq + IterBytes + Hash + Freeze + Clone + 'static> Interner<T> {
rv
}
pub fn intern(&self, val: T) -> uint {
pub fn intern(&self, val: T) -> Name {
match self.map.find(&val) {
Some(&idx) => return idx,
None => (),
}
let vect = &mut *self.vect;
let new_idx = vect.len();
let new_idx = vect.len() as Name;
self.map.insert(val.clone(), new_idx);
vect.push(val);
new_idx
}
pub fn gensym(&self, val: T) -> uint {
pub fn gensym(&self, val: T) -> Name {
let new_idx = {
let vect = &*self.vect;
vect.len()
vect.len() as Name
};
// leave out of .map to avoid colliding
self.vect.push(val);
new_idx
}
pub fn get(&self, idx: uint) -> T {
pub fn get(&self, idx: Name) -> T {
self.vect[idx].clone()
}
pub fn len(&self) -> uint { let vect = &*self.vect; vect.len() }
pub fn find_equiv<Q:Hash + IterBytes + Equiv<T>>(&self, val: &Q)
-> Option<uint> {
-> Option<Name> {
match self.map.find_equiv(val) {
Some(v) => Some(*v),
None => None,
@ -78,11 +80,11 @@ impl<T:Eq + IterBytes + Hash + Freeze + Clone + 'static> Interner<T> {
// A StrInterner differs from Interner<String> in that it accepts
// borrowed pointers rather than @ ones, resulting in less allocation.
pub struct StrInterner {
priv map: @mut HashMap<@str, uint>,
priv map: @mut HashMap<@str, Name>,
priv vect: @mut ~[@str],
}
// when traits can extend traits, we should extend index<uint,T> to get []
// when traits can extend traits, we should extend index<Name,T> to get []
impl StrInterner {
pub fn new() -> StrInterner {
StrInterner {
@ -97,21 +99,21 @@ impl StrInterner {
rv
}
pub fn intern(&self, val: &str) -> uint {
pub fn intern(&self, val: &str) -> Name {
match self.map.find_equiv(&val) {
Some(&idx) => return idx,
None => (),
}
let new_idx = self.len();
let new_idx = self.len() as Name;
let val = val.to_managed();
self.map.insert(val, new_idx);
self.vect.push(val);
new_idx
}
pub fn gensym(&self, val: &str) -> uint {
let new_idx = self.len();
pub fn gensym(&self, val: &str) -> Name {
let new_idx = self.len() as Name;
// leave out of .map to avoid colliding
self.vect.push(val.to_managed());
new_idx
@ -127,19 +129,19 @@ impl StrInterner {
// create a gensym with the same name as an existing
// entry.
pub fn gensym_copy(&self, idx : uint) -> uint {
let new_idx = self.len();
pub fn gensym_copy(&self, idx : Name) -> Name {
let new_idx = self.len() as Name;
// leave out of map to avoid colliding
self.vect.push(self.vect[idx]);
new_idx
}
pub fn get(&self, idx: uint) -> @str { self.vect[idx] }
pub fn get(&self, idx: Name) -> @str { self.vect[idx] }
pub fn len(&self) -> uint { let vect = &*self.vect; vect.len() }
pub fn find_equiv<Q:Hash + IterBytes + Equiv<@str>>(&self, val: &Q)
-> Option<uint> {
-> Option<Name> {
match self.map.find_equiv(val) {
Some(v) => Some(*v),
None => None,