From ca3234036708599aa3dca645a58b819e0f6116e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20K=C3=A5re=20Alsaker?= Date: Thu, 10 May 2018 16:27:46 +0200 Subject: [PATCH] Allocate Symbol strings from an arena --- src/Cargo.lock | 1 + src/libsyntax_pos/Cargo.toml | 1 + src/libsyntax_pos/lib.rs | 1 + src/libsyntax_pos/symbol.rs | 40 ++++++++++++++++++++++++++++-------- 4 files changed, 35 insertions(+), 8 deletions(-) diff --git a/src/Cargo.lock b/src/Cargo.lock index e2a9a6efbda..9f415944420 100644 --- a/src/Cargo.lock +++ b/src/Cargo.lock @@ -2590,6 +2590,7 @@ dependencies = [ name = "syntax_pos" version = "0.0.0" dependencies = [ + "arena 0.0.0", "rustc_data_structures 0.0.0", "scoped-tls 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "serialize 0.0.0", diff --git a/src/libsyntax_pos/Cargo.toml b/src/libsyntax_pos/Cargo.toml index b9637b1855e..a9147b394f7 100644 --- a/src/libsyntax_pos/Cargo.toml +++ b/src/libsyntax_pos/Cargo.toml @@ -11,5 +11,6 @@ crate-type = ["dylib"] [dependencies] serialize = { path = "../libserialize" } rustc_data_structures = { path = "../librustc_data_structures" } +arena = { path = "../libarena" } scoped-tls = { version = "0.1.1", features = ["nightly"] } unicode-width = "0.1.4" diff --git a/src/libsyntax_pos/lib.rs b/src/libsyntax_pos/lib.rs index 8b4a3ea26a1..d30d3d78ca5 100644 --- a/src/libsyntax_pos/lib.rs +++ b/src/libsyntax_pos/lib.rs @@ -35,6 +35,7 @@ use std::path::PathBuf; use rustc_data_structures::stable_hasher::StableHasher; use rustc_data_structures::sync::{Lrc, Lock}; +extern crate arena; extern crate rustc_data_structures; #[macro_use] diff --git a/src/libsyntax_pos/symbol.rs b/src/libsyntax_pos/symbol.rs index 4a8b1e8b1c1..b84ff5697a4 100644 --- a/src/libsyntax_pos/symbol.rs +++ b/src/libsyntax_pos/symbol.rs @@ -16,8 +16,10 @@ use hygiene::SyntaxContext; use {Span, DUMMY_SP, GLOBALS}; use rustc_data_structures::fx::FxHashMap; +use arena::DroplessArena; use serialize::{Decodable, Decoder, Encodable, Encoder}; use std::fmt; +use std::str; use std::cmp::{PartialEq, Ordering, PartialOrd, Ord}; use std::hash::{Hash, Hasher}; @@ -198,22 +200,35 @@ impl> PartialEq for Symbol { } } -#[derive(Default)] +// The &'static strs in this type actually point into the arena pub struct Interner { - names: FxHashMap, Symbol>, - strings: Vec>, + arena: DroplessArena, + names: FxHashMap<&'static str, Symbol>, + strings: Vec<&'static str>, gensyms: Vec, } impl Interner { pub fn new() -> Self { - Interner::default() + Interner { + arena: DroplessArena::new(), + names: Default::default(), + strings: Default::default(), + gensyms: Default::default(), + } } fn prefill(init: &[&str]) -> Self { let mut this = Interner::new(); for &string in init { - this.intern(string); + if string == "" { + // We can't allocate empty strings in the arena, so handle this here + let name = Symbol(this.strings.len() as u32); + this.names.insert("", name); + this.strings.push(""); + } else { + this.intern(string); + } } this } @@ -224,8 +239,17 @@ impl Interner { } let name = Symbol(self.strings.len() as u32); - let string = string.to_string().into_boxed_str(); - self.strings.push(string.clone()); + + // from_utf8_unchecked is safe since we just allocated a &str which is known to be utf8 + let string: &str = unsafe { + str::from_utf8_unchecked(self.arena.alloc_slice(string.as_bytes())) + }; + // It is safe to extend the arena allocation to 'static because we only access + // these while the arena is still alive + let string: &'static str = unsafe { + &*(string as *const str) + }; + self.strings.push(string); self.names.insert(string, name); name } @@ -254,7 +278,7 @@ impl Interner { pub fn get(&self, symbol: Symbol) -> &str { match self.strings.get(symbol.0 as usize) { - Some(ref string) => string, + Some(string) => string, None => self.get(self.gensyms[(!0 - symbol.0) as usize]), } }