Auto merge of #59887 - eddyb:safer-metadata, r=Zoxc

rustc_metadata: more safely read/write the index positions.

This is a small part of a larger refactor, that I want to benchmark independently.
The final code would be even cleaner than this, so this is sort of an "worst case" test.
This commit is contained in:
bors 2019-04-27 08:13:23 +00:00
commit d4a32d504a

View File

@ -2,10 +2,71 @@ use crate::schema::*;
use rustc::hir::def_id::{DefId, DefIndex, DefIndexAddressSpace};
use rustc_serialize::opaque::Encoder;
use std::slice;
use std::u32;
use log::debug;
/// Helper trait, for encoding to, and decoding from, a fixed number of bytes.
pub trait FixedSizeEncoding {
const BYTE_LEN: usize;
// FIXME(eddyb) convert to and from `[u8; Self::BYTE_LEN]` instead,
// once that starts being allowed by the compiler (i.e. lazy normalization).
fn from_bytes(b: &[u8]) -> Self;
fn write_to_bytes(self, b: &mut [u8]);
// FIXME(eddyb) make these generic functions, or at least defaults here.
// (same problem as above, needs `[u8; Self::BYTE_LEN]`)
// For now, a macro (`fixed_size_encoding_byte_len_and_defaults`) is used.
fn read_from_bytes_at(b: &[u8], i: usize) -> Self;
fn write_to_bytes_at(self, b: &mut [u8], i: usize);
}
// HACK(eddyb) this shouldn't be needed (see comments on the methods above).
macro_rules! fixed_size_encoding_byte_len_and_defaults {
($byte_len:expr) => {
const BYTE_LEN: usize = $byte_len;
fn read_from_bytes_at(b: &[u8], i: usize) -> Self {
const BYTE_LEN: usize = $byte_len;
// HACK(eddyb) ideally this would be done with fully safe code,
// but slicing `[u8]` with `i * N..` is optimized worse, due to the
// possibility of `i * N` overflowing, than indexing `[[u8; N]]`.
let b = unsafe {
std::slice::from_raw_parts(
b.as_ptr() as *const [u8; BYTE_LEN],
b.len() / BYTE_LEN,
)
};
Self::from_bytes(&b[i])
}
fn write_to_bytes_at(self, b: &mut [u8], i: usize) {
const BYTE_LEN: usize = $byte_len;
// HACK(eddyb) ideally this would be done with fully safe code,
// see similar comment in `read_from_bytes_at` for why it can't yet.
let b = unsafe {
std::slice::from_raw_parts_mut(
b.as_mut_ptr() as *mut [u8; BYTE_LEN],
b.len() / BYTE_LEN,
)
};
self.write_to_bytes(&mut b[i]);
}
}
}
impl FixedSizeEncoding for u32 {
fixed_size_encoding_byte_len_and_defaults!(4);
fn from_bytes(b: &[u8]) -> Self {
let mut bytes = [0; Self::BYTE_LEN];
bytes.copy_from_slice(&b[..Self::BYTE_LEN]);
Self::from_le_bytes(bytes)
}
fn write_to_bytes(self, b: &mut [u8]) {
b[..Self::BYTE_LEN].copy_from_slice(&self.to_le_bytes());
}
}
/// While we are generating the metadata, we also track the position
/// of each DefIndex. It is not required that all definitions appear
/// in the metadata, nor that they are serialized in order, and
@ -14,14 +75,14 @@ use log::debug;
/// appropriate spot by calling `record_position`. We should never
/// visit the same index twice.
pub struct Index {
positions: [Vec<u32>; 2]
positions: [Vec<u8>; 2]
}
impl Index {
pub fn new((max_index_lo, max_index_hi): (usize, usize)) -> Index {
Index {
positions: [vec![u32::MAX; max_index_lo],
vec![u32::MAX; max_index_hi]],
positions: [vec![0xff; max_index_lo * 4],
vec![0xff; max_index_hi * 4]],
}
}
@ -36,26 +97,27 @@ impl Index {
let space_index = item.address_space().index();
let array_index = item.as_array_index();
assert!(self.positions[space_index][array_index] == u32::MAX,
let positions = &mut self.positions[space_index];
assert!(u32::read_from_bytes_at(positions, array_index) == u32::MAX,
"recorded position for item {:?} twice, first at {:?} and now at {:?}",
item,
self.positions[space_index][array_index],
u32::read_from_bytes_at(positions, array_index),
position);
self.positions[space_index][array_index] = position.to_le();
position.write_to_bytes_at(positions, array_index)
}
pub fn write_index(&self, buf: &mut Encoder) -> LazySeq<Index> {
let pos = buf.position();
// First we write the length of the lower range ...
buf.emit_raw_bytes(words_to_bytes(&[(self.positions[0].len() as u32).to_le()]));
buf.emit_raw_bytes(&(self.positions[0].len() as u32 / 4).to_le_bytes());
// ... then the values in the lower range ...
buf.emit_raw_bytes(words_to_bytes(&self.positions[0][..]));
buf.emit_raw_bytes(&self.positions[0]);
// ... then the values in the higher range.
buf.emit_raw_bytes(words_to_bytes(&self.positions[1][..]));
buf.emit_raw_bytes(&self.positions[1]);
LazySeq::with_position_and_length(pos as usize,
self.positions[0].len() + self.positions[1].len() + 1)
(self.positions[0].len() + self.positions[1].len()) / 4 + 1)
}
}
@ -64,24 +126,21 @@ impl<'tcx> LazySeq<Index> {
/// DefIndex (if any).
#[inline(never)]
pub fn lookup(&self, bytes: &[u8], def_index: DefIndex) -> Option<Lazy<Entry<'tcx>>> {
let words = &bytes_to_words(&bytes[self.position..])[..self.len];
debug!("Index::lookup: index={:?} words.len={:?}",
let bytes = &bytes[self.position..];
debug!("Index::lookup: index={:?} len={:?}",
def_index,
words.len());
self.len);
let positions = match def_index.address_space() {
DefIndexAddressSpace::Low => &words[1..],
let i = def_index.as_array_index() + match def_index.address_space() {
DefIndexAddressSpace::Low => 0,
DefIndexAddressSpace::High => {
// This is a DefIndex in the higher range, so find out where
// that starts:
let lo_count = u32::from_le(words[0].get()) as usize;
&words[lo_count + 1 .. ]
u32::read_from_bytes_at(bytes, 0) as usize
}
};
let array_index = def_index.as_array_index();
let position = u32::from_le(positions[array_index].get());
let position = u32::read_from_bytes_at(bytes, 1 + i);
if position == u32::MAX {
debug!("Index::lookup: position=u32::MAX");
None
@ -91,27 +150,3 @@ impl<'tcx> LazySeq<Index> {
}
}
}
#[repr(packed)]
#[derive(Copy)]
struct Unaligned<T>(T);
// The derived Clone impl is unsafe for this packed struct since it needs to pass a reference to
// the field to `T::clone`, but this reference may not be properly aligned.
impl<T: Copy> Clone for Unaligned<T> {
fn clone(&self) -> Self {
*self
}
}
impl<T> Unaligned<T> {
fn get(self) -> T { self.0 }
}
fn bytes_to_words(b: &[u8]) -> &[Unaligned<u32>] {
unsafe { slice::from_raw_parts(b.as_ptr() as *const Unaligned<u32>, b.len() / 4) }
}
fn words_to_bytes(w: &[u32]) -> &[u8] {
unsafe { slice::from_raw_parts(w.as_ptr() as *const u8, w.len() * 4) }
}