Intern metadata while writing, shrink stage1 from 12mb to 5.7mb.

This commit is contained in:
Graydon Hoare 2011-04-29 15:26:28 +00:00
parent 6daf440037
commit a2f68b2d58
4 changed files with 225 additions and 95 deletions

View File

@ -43,34 +43,29 @@ tag resolve_result {
// Compact string representation for ty.t values. API ty_str & parse_from_str. // Compact string representation for ty.t values. API ty_str & parse_from_str.
// (The second has to be authed pure.) Extra parameters are for converting // (The second has to be authed pure.) Extra parameters are for converting
// to/from def_ids in the string rep. Whatever format you choose should not // to/from def_ids in the data buffer. Whatever format you choose should not
// contain pipe characters. // contain pipe characters.
// Callback to translate defs to strs or back. // Callback to translate defs to strs or back.
type str_def = fn(str) -> ast.def_id; type str_def = fn(str) -> ast.def_id;
type pstate = rec(str rep, mutable uint pos, uint len, ty.ctxt tcx); type pstate = rec(vec[u8] data, int crate,
mutable uint pos, uint len, ty.ctxt tcx);
fn peek(@pstate st) -> u8 { fn peek(@pstate st) -> u8 {
if (st.pos < st.len) {ret st.rep.(st.pos) as u8;} ret st.data.(st.pos);
else {ret ' ' as u8;}
} }
fn next(@pstate st) -> u8 { fn next(@pstate st) -> u8 {
if (st.pos >= st.len) {fail;} auto ch = st.data.(st.pos);
auto ch = st.rep.(st.pos);
st.pos = st.pos + 1u; st.pos = st.pos + 1u;
ret ch as u8; ret ch;
} }
fn parse_ty_str(str rep, str_def sd, ty.ctxt tcx) -> ty.t { fn parse_ty_data(vec[u8] data, int crate_num, uint pos, uint len,
auto len = _str.byte_len(rep); str_def sd, ty.ctxt tcx) -> ty.t {
auto st = @rec(rep=rep, mutable pos=0u, len=len, tcx=tcx); auto st = @rec(data=data, crate=crate_num,
mutable pos=pos, len=len, tcx=tcx);
auto result = parse_ty(st, sd); auto result = parse_ty(st, sd);
if (st.pos != len) {
log_err "parse_ty_str: incomplete parse, stopped at byte "
+ _uint.to_str(st.pos, 10u) + " of "
+ _uint.to_str(len, 10u) + " in str '" + rep + "'";
}
ret result; ret result;
} }
@ -178,6 +173,26 @@ fn parse_ty(@pstate st, str_def sd) -> ty.t {
case ('X') { ret ty.mk_var(st.tcx, parse_int(st)); } case ('X') { ret ty.mk_var(st.tcx, parse_int(st)); }
case ('E') { ret ty.mk_native(st.tcx); } case ('E') { ret ty.mk_native(st.tcx); }
case ('Y') { ret ty.mk_type(st.tcx); } case ('Y') { ret ty.mk_type(st.tcx); }
case ('#') {
auto pos = parse_hex(st);
check (next(st) as char == ':');
auto len = parse_hex(st);
check (next(st) as char == '#');
alt (st.tcx.rcache.find(tup(st.crate,pos,len))) {
case (some[ty.t](?tt)) { ret tt; }
case (none[ty.t]) {
auto ps = @rec(pos=pos, len=len with *st);
auto tt = parse_ty(ps, sd);
st.tcx.rcache.insert(tup(st.crate,pos,len), tt);
ret tt;
}
}
}
case (?c) {
log_err "unexpected char in type string: ";
log_err c;
fail;
}
} }
} }
@ -212,6 +227,23 @@ fn parse_int(@pstate st) -> int {
ret n; ret n;
} }
fn parse_hex(@pstate st) -> uint {
auto n = 0u;
while (true) {
auto cur = peek(st) as char;
if ((cur < '0' || cur > '9') &&
(cur < 'a' || cur > 'f')) {break;}
st.pos = st.pos + 1u;
n *= 16u;
if ('0' <= cur && cur <= '9') {
n += (cur as uint) - ('0' as uint);
} else {
n += (10u + (cur as uint) - ('a' as uint));
}
}
ret n;
}
fn parse_ty_fn(@pstate st, str_def sd) -> tup(vec[ty.arg], ty.t) { fn parse_ty_fn(@pstate st, str_def sd) -> tup(vec[ty.arg], ty.t) {
check(next(st) as char == '['); check(next(st) as char == '[');
let vec[ty.arg] inputs = vec(); let vec[ty.arg] inputs = vec();
@ -343,7 +375,8 @@ fn item_type(&ebml.doc item, int this_cnum, ty.ctxt tcx) -> ty.t {
auto tp = ebml.get_doc(item, metadata.tag_items_data_item_type); auto tp = ebml.get_doc(item, metadata.tag_items_data_item_type);
auto s = _str.unsafe_from_bytes(ebml.doc_data(tp)); auto s = _str.unsafe_from_bytes(ebml.doc_data(tp));
ret parse_ty_str(s, bind parse_external_def_id(this_cnum, _), tcx); ret parse_ty_data(item.data, this_cnum, tp.start, tp.end - tp.start,
bind parse_external_def_id(this_cnum, _), tcx);
} }
fn item_ty_param_count(&ebml.doc item, int this_cnum) -> uint { fn item_ty_param_count(&ebml.doc item, int this_cnum) -> uint {

View File

@ -1,9 +1,12 @@
import std._str; import std._str;
import std._uint; import std._uint;
import std._vec; import std._vec;
import std.map.hashmap;
import std.ebml; import std.ebml;
import std.io; import std.io;
import std.option; import std.option;
import std.option.some;
import std.option.none;
import front.ast; import front.ast;
import middle.fold; import middle.fold;
@ -48,119 +51,185 @@ const uint tag_index_table = 0x15u;
// Extra parameters are for converting to/from def_ids in the string rep. // Extra parameters are for converting to/from def_ids in the string rep.
// Whatever format you choose should not contain pipe characters. // Whatever format you choose should not contain pipe characters.
type ty_abbrev = rec(uint pos, uint len, str s);
mod Encode { mod Encode {
type ctxt = rec( type ctxt = rec(
fn(ast.def_id) -> str ds, // Callback to translate defs to strs. fn(ast.def_id) -> str ds, // Def -> str Callback.
ty.ctxt tcx // The type context. ty.ctxt tcx, // The type context.
bool use_abbrevs,
hashmap[ty.t, ty_abbrev] abbrevs // Type abbrevs.
); );
fn ty_str(@ctxt cx, ty.t t) -> str { fn ty_str(@ctxt cx, ty.t t) -> str {
ret sty_str(cx, ty.struct(cx.tcx, t)); check (! cx.use_abbrevs);
auto sw = io.string_writer();
enc_ty(sw.get_writer(), cx, t);
ret sw.get_str();
} }
fn mt_str(@ctxt cx, &ty.mt mt) -> str { fn enc_ty(io.writer w, @ctxt cx, ty.t t) {
auto mut_str; if (cx.use_abbrevs) {
alt (mt.mut) { alt (cx.abbrevs.find(t)) {
case (ast.imm) { mut_str = ""; } case (some[ty_abbrev](?a)) {
case (ast.mut) { mut_str = "m"; } w.write_str(a.s);
case (ast.maybe_mut) { mut_str = "?"; } ret;
}
case (none[ty_abbrev]) {
auto pos = w.get_buf_writer().tell();
auto ss = enc_sty(w, cx, ty.struct(cx.tcx, t));
auto end = w.get_buf_writer().tell();
auto len = end-pos;
fn estimate_sz(uint u) -> uint {
auto n = u;
auto len = 0u;
while (n != 0u) {
len += 1u;
n = n >> 4u;
}
ret len;
}
auto abbrev_len =
3u + estimate_sz(pos) + estimate_sz(len);
if (abbrev_len < len) {
// I.e. it's actually an abbreviation.
auto s = ("#"
+ _uint.to_str(pos, 16u) + ":"
+ _uint.to_str(len, 16u) + "#");
auto a = rec(pos=pos, len=len, s=s);
cx.abbrevs.insert(t, a);
}
ret;
}
}
} }
ret mut_str + ty_str(cx, mt.ty); enc_sty(w, cx, ty.struct(cx.tcx, t));
} }
fn sty_str(@ctxt cx, ty.sty st) -> str { fn enc_mt(io.writer w, @ctxt cx, &ty.mt mt) {
alt (mt.mut) {
case (ast.imm) { }
case (ast.mut) { w.write_char('m'); }
case (ast.maybe_mut) { w.write_char('?'); }
}
enc_ty(w, cx, mt.ty);
}
fn enc_sty(io.writer w, @ctxt cx, ty.sty st) {
alt (st) { alt (st) {
case (ty.ty_nil) {ret "n";} case (ty.ty_nil) { w.write_char('n'); }
case (ty.ty_bool) {ret "b";} case (ty.ty_bool) { w.write_char('b'); }
case (ty.ty_int) {ret "i";} case (ty.ty_int) { w.write_char('i'); }
case (ty.ty_uint) {ret "u";} case (ty.ty_uint) { w.write_char('u'); }
case (ty.ty_float) {ret "l";} case (ty.ty_float) { w.write_char('l'); }
case (ty.ty_machine(?mach)) { case (ty.ty_machine(?mach)) {
alt (mach) { alt (mach) {
case (common.ty_u8) {ret "Mb";} case (common.ty_u8) { w.write_str("Mb"); }
case (common.ty_u16) {ret "Mw";} case (common.ty_u16) { w.write_str("Mw"); }
case (common.ty_u32) {ret "Ml";} case (common.ty_u32) { w.write_str("Ml"); }
case (common.ty_u64) {ret "Md";} case (common.ty_u64) { w.write_str("Md"); }
case (common.ty_i8) {ret "MB";} case (common.ty_i8) { w.write_str("MB"); }
case (common.ty_i16) {ret "MW";} case (common.ty_i16) { w.write_str("MW"); }
case (common.ty_i32) {ret "ML";} case (common.ty_i32) { w.write_str("ML"); }
case (common.ty_i64) {ret "MD";} case (common.ty_i64) { w.write_str("MD"); }
case (common.ty_f32) {ret "Mf";} case (common.ty_f32) { w.write_str("Mf"); }
case (common.ty_f64) {ret "MF";} case (common.ty_f64) { w.write_str("MF"); }
} }
} }
case (ty.ty_char) {ret "c";} case (ty.ty_char) {w.write_char('c');}
case (ty.ty_str) {ret "s";} case (ty.ty_str) {w.write_char('s');}
case (ty.ty_tag(?def,?tys)) { // TODO restore def_id case (ty.ty_tag(?def,?tys)) { // TODO restore def_id
auto acc = "t[" + cx.ds(def) + "|"; w.write_str("t[");
for (ty.t t in tys) {acc += ty_str(cx, t);} w.write_str(cx.ds(def));
ret acc + "]"; w.write_char('|');
for (ty.t t in tys) {
enc_ty(w, cx, t);
}
w.write_char(']');
} }
case (ty.ty_box(?mt)) {ret "@" + mt_str(cx, mt);} case (ty.ty_box(?mt)) {w.write_char('@'); enc_mt(w, cx, mt); }
case (ty.ty_vec(?mt)) {ret "V" + mt_str(cx, mt);} case (ty.ty_vec(?mt)) {w.write_char('V'); enc_mt(w, cx, mt); }
case (ty.ty_port(?t)) {ret "P" + ty_str(cx, t);} case (ty.ty_port(?t)) {w.write_char('P'); enc_ty(w, cx, t); }
case (ty.ty_chan(?t)) {ret "C" + ty_str(cx, t);} case (ty.ty_chan(?t)) {w.write_char('C'); enc_ty(w, cx, t); }
case (ty.ty_tup(?mts)) { case (ty.ty_tup(?mts)) {
auto acc = "T["; w.write_str("T[");
for (ty.mt mt in mts) {acc += mt_str(cx, mt);} for (ty.mt mt in mts) {
ret acc + "]"; enc_mt(w, cx, mt);
}
w.write_char(']');
} }
case (ty.ty_rec(?fields)) { case (ty.ty_rec(?fields)) {
auto acc = "R["; w.write_str("R[");
for (ty.field field in fields) { for (ty.field field in fields) {
acc += field.ident + "="; w.write_str(field.ident);
acc += mt_str(cx, field.mt); w.write_char('=');
enc_mt(w, cx, field.mt);
} }
ret acc + "]"; w.write_char(']');
} }
case (ty.ty_fn(?proto,?args,?out)) { case (ty.ty_fn(?proto,?args,?out)) {
ret proto_str(proto) + ty_fn_str(cx, args, out); enc_proto(w, proto);
enc_ty_fn(w, cx, args, out);
} }
case (ty.ty_native_fn(?abi,?args,?out)) { case (ty.ty_native_fn(?abi,?args,?out)) {
auto abistr; w.write_char('N');
alt (abi) { alt (abi) {
case (ast.native_abi_rust) {abistr = "r";} case (ast.native_abi_rust) { w.write_char('r'); }
case (ast.native_abi_cdecl) {abistr = "c";} case (ast.native_abi_cdecl) { w.write_char('c'); }
case (ast.native_abi_llvm) {abistr = "l";} case (ast.native_abi_llvm) { w.write_char('l'); }
} }
ret "N" + abistr + ty_fn_str(cx, args, out); enc_ty_fn(w, cx, args, out);
} }
case (ty.ty_obj(?methods)) { case (ty.ty_obj(?methods)) {
auto acc = "O["; w.write_str("O[");
for (ty.method m in methods) { for (ty.method m in methods) {
acc += proto_str(m.proto); enc_proto(w, m.proto);
acc += m.ident; w.write_str(m.ident);
acc += ty_fn_str(cx, m.inputs, m.output); enc_ty_fn(w, cx, m.inputs, m.output);
} }
ret acc + "]"; w.write_char(']');
} }
case (ty.ty_var(?id)) {ret "X" + common.istr(id);} case (ty.ty_var(?id)) {
case (ty.ty_native) {ret "E";} w.write_char('X');
case (ty.ty_param(?id)) {ret "p" + common.uistr(id);} w.write_str(common.istr(id));
case (ty.ty_type) {ret "Y";} }
case (ty.ty_native) {w.write_char('E');}
case (ty.ty_param(?id)) {
w.write_char('p');
w.write_str(common.uistr(id));
}
case (ty.ty_type) {w.write_char('Y');}
// These two don't appear in crate metadata, but are here because // These two don't appear in crate metadata, but are here because
// `hash_ty()` uses this function. // `hash_ty()` uses this function.
case (ty.ty_bound_param(?id)) {ret "o" + common.uistr(id);} case (ty.ty_bound_param(?id)) {
case (ty.ty_local(?def)) {ret "L" + cx.ds(def);} w.write_char('o');
w.write_str(common.uistr(id));
}
case (ty.ty_local(?def)) {
w.write_char('L');
w.write_str(cx.ds(def));
}
} }
} }
fn proto_str(ast.proto proto) -> str { fn enc_proto(io.writer w, ast.proto proto) {
alt (proto) { alt (proto) {
case (ast.proto_iter) {ret "W";} case (ast.proto_iter) { w.write_char('W'); }
case (ast.proto_fn) {ret "F";} case (ast.proto_fn) { w.write_char('F'); }
} }
} }
fn ty_fn_str(@ctxt cx, vec[ty.arg] args, ty.t out) -> str { fn enc_ty_fn(io.writer w, @ctxt cx, vec[ty.arg] args, ty.t out) {
auto acc = "["; w.write_char('[');
for (ty.arg arg in args) { for (ty.arg arg in args) {
if (arg.mode == ast.alias) {acc += "&";} if (arg.mode == ast.alias) { w.write_char('&'); }
acc += ty_str(cx, arg.ty); enc_ty(w, cx, arg.ty);
} }
ret acc + "]" + ty_str(cx, out); w.write_char(']');
enc_ty(w, cx, out);
} }
} }
@ -336,9 +405,9 @@ fn encode_type(@trans.crate_ctxt cx, &ebml.writer ebml_w, ty.t typ) {
ebml.start_tag(ebml_w, tag_items_data_item_type); ebml.start_tag(ebml_w, tag_items_data_item_type);
auto f = def_to_str; auto f = def_to_str;
auto ty_str_ctxt = @rec(ds=f, tcx=cx.tcx); auto ty_str_ctxt = @rec(ds=f, tcx=cx.tcx,
ebml_w.writer.write(_str.bytes(Encode.ty_str(ty_str_ctxt, typ))); use_abbrevs=true, abbrevs=cx.type_abbrevs);
Encode.enc_ty(io.new_writer_(ebml_w.writer), ty_str_ctxt, typ);
ebml.end_tag(ebml_w); ebml.end_tag(ebml_w);
} }
@ -565,7 +634,6 @@ fn encode_index[T](&ebml.writer ebml_w, vec[vec[tup(T, uint)]] buckets,
ebml.end_tag(ebml_w); ebml.end_tag(ebml_w);
} }
fn write_str(io.writer writer, &str s) { fn write_str(io.writer writer, &str s) {
writer.write_str(s); writer.write_str(s);
} }

View File

@ -114,6 +114,7 @@ state type crate_ctxt = rec(session.session sess,
namegen names, namegen names,
std.sha1.sha1 sha, std.sha1.sha1 sha,
hashmap[ty.t, str] type_sha1s, hashmap[ty.t, str] type_sha1s,
hashmap[ty.t, metadata.ty_abbrev] type_abbrevs,
ty.ctxt tcx); ty.ctxt tcx);
type local_ctxt = rec(vec[str] path, type local_ctxt = rec(vec[str] path,
@ -189,7 +190,10 @@ fn mangle_name_by_type(@crate_ctxt ccx, vec[str] path, ty.t t) -> str {
case (none[str]) { case (none[str]) {
ccx.sha.reset(); ccx.sha.reset();
auto f = metadata.def_to_str; auto f = metadata.def_to_str;
auto cx = @rec(ds=f, tcx=ccx.tcx); // NB: do *not* use abbrevs here as we want the symbol names
// to be independent of one another in the crate.
auto cx = @rec(ds=f, tcx=ccx.tcx,
use_abbrevs=false, abbrevs=ccx.type_abbrevs);
ccx.sha.input_str(metadata.Encode.ty_str(cx, t)); ccx.sha.input_str(metadata.Encode.ty_str(cx, t));
hash = _str.substr(ccx.sha.result_str(), 0u, 16u); hash = _str.substr(ccx.sha.result_str(), 0u, 16u);
ccx.type_sha1s.insert(t, hash); ccx.type_sha1s.insert(t, hash);
@ -791,7 +795,9 @@ fn type_of_inner(@crate_ctxt cx, ty.t t) -> TypeRef {
} }
check (llty as int != 0); check (llty as int != 0);
llvm.LLVMAddTypeName(cx.llmod, _str.buf(ty.ty_to_abbrev_str(cx.tcx, t)), llvm.LLVMAddTypeName(cx.llmod,
_str.buf(ty.ty_to_short_str(cx.tcx,
cx.type_abbrevs, t)),
llty); llty);
cx.lltypes.insert(t, llty); cx.lltypes.insert(t, llty);
ret llty; ret llty;
@ -1673,8 +1679,7 @@ fn declare_tydesc(@local_ctxt cx, ty.t t) -> @tydesc_info {
auto glue_fn_ty = T_ptr(T_glue_fn(ccx.tn)); auto glue_fn_ty = T_ptr(T_glue_fn(ccx.tn));
auto name = sanitize(ccx.names.next("tydesc_" + auto name = mangle_name_by_seq(ccx, cx.path, "tydesc");
ty.ty_to_abbrev_str(cx.ccx.tcx, t)));
auto gvar = llvm.LLVMAddGlobal(ccx.llmod, T_tydesc(ccx.tn), auto gvar = llvm.LLVMAddGlobal(ccx.llmod, T_tydesc(ccx.tn),
_str.buf(name)); _str.buf(name));
auto tydesc = C_struct(vec(C_null(T_ptr(T_ptr(T_tydesc(ccx.tn)))), auto tydesc = C_struct(vec(C_null(T_ptr(T_ptr(T_tydesc(ccx.tn)))),
@ -7672,6 +7677,7 @@ fn trans_crate(session.session sess, @ast.crate crate, ty.ctxt tcx,
auto tydescs = map.mk_hashmap[ty.t,@tydesc_info](hasher, eqer); auto tydescs = map.mk_hashmap[ty.t,@tydesc_info](hasher, eqer);
auto lltypes = map.mk_hashmap[ty.t,TypeRef](hasher, eqer); auto lltypes = map.mk_hashmap[ty.t,TypeRef](hasher, eqer);
auto sha1s = map.mk_hashmap[ty.t,str](hasher, eqer); auto sha1s = map.mk_hashmap[ty.t,str](hasher, eqer);
auto abbrevs = map.mk_hashmap[ty.t,metadata.ty_abbrev](hasher, eqer);
auto ccx = @rec(sess = sess, auto ccx = @rec(sess = sess,
llmod = llmod, llmod = llmod,
@ -7698,6 +7704,7 @@ fn trans_crate(session.session sess, @ast.crate crate, ty.ctxt tcx,
names = namegen(0), names = namegen(0),
sha = std.sha1.mk_sha1(), sha = std.sha1.mk_sha1(),
type_sha1s = sha1s, type_sha1s = sha1s,
type_abbrevs = abbrevs,
tcx = tcx); tcx = tcx);
auto cx = new_local_ctxt(ccx); auto cx = new_local_ctxt(ccx);

View File

@ -46,7 +46,10 @@ type mt = rec(t ty, ast.mutability mut);
// Contains information needed to resolve types and (in the future) look up // Contains information needed to resolve types and (in the future) look up
// the types of AST nodes. // the types of AST nodes.
type ctxt = rec(@type_store ts, session.session sess); type creader_cache = hashmap[tup(int,uint,uint),ty.t];
type ctxt = rec(@type_store ts,
session.session sess,
creader_cache rcache);
type ty_ctxt = ctxt; // Needed for disambiguation from Unify.ctxt. type ty_ctxt = ctxt; // Needed for disambiguation from Unify.ctxt.
// Convert from method type to function type. Pretty easy; we just drop // Convert from method type to function type. Pretty easy; we just drop
@ -200,8 +203,26 @@ fn mk_type_store() -> @type_store {
others=map.mk_hashmap[t,t](hasher, eqer)); others=map.mk_hashmap[t,t](hasher, eqer));
} }
fn mk_ctxt(session.session s) -> ctxt { ret rec(ts=mk_type_store(), sess=s); } fn mk_rcache() -> creader_cache {
fn hash_cache_entry(&tup(int,uint,uint) k) -> uint {
ret (k._0 as uint) + k._1 + k._2;
}
fn eq_cache_entries(&tup(int,uint,uint) a,
&tup(int,uint,uint) b) -> bool {
ret a._0 == b._0 &&
a._1 == b._1 &&
a._2 == b._2;
}
auto h = hash_cache_entry;
auto e = eq_cache_entries;
ret map.mk_hashmap[tup(int,uint,uint),t](h, e);
}
fn mk_ctxt(session.session s) -> ctxt {
ret rec(ts = mk_type_store(),
sess = s,
rcache = mk_rcache());
}
// Type constructors // Type constructors
fn mk_ty_full(&sty st, option.t[str] cname) -> t { fn mk_ty_full(&sty st, option.t[str] cname) -> t {
@ -627,9 +648,10 @@ fn ty_to_str(ctxt cx, &t typ) -> str {
ret s; ret s;
} }
fn ty_to_abbrev_str(ctxt cx, t typ) -> str { fn ty_to_short_str(ctxt cx, hashmap[ty.t, metadata.ty_abbrev] abbrevs,
t typ) -> str {
auto f = def_to_str; auto f = def_to_str;
auto ecx = @rec(ds=f, tcx=cx); auto ecx = @rec(ds=f, tcx=cx, use_abbrevs=false, abbrevs=abbrevs);
auto s = metadata.Encode.ty_str(ecx, typ); auto s = metadata.Encode.ty_str(ecx, typ);
if (_str.byte_len(s) >= 64u) { s = _str.substr(s, 0u, 64u); } if (_str.byte_len(s) >= 64u) { s = _str.substr(s, 0u, 64u); }
ret s; ret s;