Auto merge of #80115 - tgnottingham:specialize_opaque_u8_sequences, r=oli-obk

rustc_serialize: specialize opaque encoding and decoding of some u8 sequences

This specializes encoding and decoding of some contiguous u8 sequences to use a more efficient implementation. The default implementations process each u8 individually, but that isn't necessary for the opaque encoder and decoder. The opaque encoding for u8s is a no-op, so we can just copy entire sequences as-is, rather than process them byte by byte.

This also changes some encode and decode implementations for contiguous sequences to forward to the slice and vector implementations, so that they can take advantage of the new specialization when applicable.
This commit is contained in:
bors 2021-01-02 09:52:26 +00:00
commit 929f66af9b
7 changed files with 84 additions and 47 deletions

View File

@ -4,7 +4,7 @@ use rustc_serialize::{
Decodable, Encodable,
};
use std::hash::{Hash, Hasher};
use std::mem;
use std::mem::{self, MaybeUninit};
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy)]
pub struct Fingerprint(u64, u64);
@ -61,7 +61,7 @@ impl Fingerprint {
}
pub fn decode_opaque(decoder: &mut opaque::Decoder<'_>) -> Result<Fingerprint, String> {
let mut bytes = [0; 16];
let mut bytes: [MaybeUninit<u8>; 16] = MaybeUninit::uninit_array();
decoder.read_raw_bytes(&mut bytes)?;

View File

@ -203,7 +203,7 @@ fn encodable_body(
#field_name,
#field_idx,
|__encoder|
::rustc_serialize::Encodable::encode(#bind_ident, __encoder),
::rustc_serialize::Encodable::<#encoder_ty>::encode(#bind_ident, __encoder),
) {
::std::result::Result::Ok(()) => (),
::std::result::Result::Err(__err)
@ -237,7 +237,7 @@ fn encodable_body(
__encoder,
#field_idx,
|__encoder|
::rustc_serialize::Encodable::encode(#bind_ident, __encoder),
::rustc_serialize::Encodable::<#encoder_ty>::encode(#bind_ident, __encoder),
) {
::std::result::Result::Ok(()) => (),
::std::result::Result::Err(__err)

View File

@ -807,6 +807,15 @@ impl<'a, 'tcx> TyDecoder<'tcx> for CacheDecoder<'a, 'tcx> {
crate::implement_ty_decoder!(CacheDecoder<'a, 'tcx>);
// This ensures that the `Decodable<opaque::Decoder>::decode` specialization for `Vec<u8>` is used
// when a `CacheDecoder` is passed to `Decodable::decode`. Unfortunately, we have to manually opt
// into specializations this way, given how `CacheDecoder` and the decoding traits currently work.
impl<'a, 'tcx> Decodable<CacheDecoder<'a, 'tcx>> for Vec<u8> {
fn decode(d: &mut CacheDecoder<'a, 'tcx>) -> Result<Self, String> {
Decodable::decode(&mut d.opaque)
}
}
impl<'a, 'tcx> Decodable<CacheDecoder<'a, 'tcx>> for SyntaxContext {
fn decode(decoder: &mut CacheDecoder<'a, 'tcx>) -> Result<Self, String> {
let syntax_contexts = decoder.syntax_contexts;
@ -1149,6 +1158,16 @@ where
}
}
// This ensures that the `Encodable<opaque::Encoder>::encode` specialization for byte slices
// is used when a `CacheEncoder` having an `opaque::Encoder` is passed to `Encodable::encode`.
// Unfortunately, we have to manually opt into specializations this way, given how `CacheEncoder`
// and the encoding traits currently work.
impl<'a, 'tcx> Encodable<CacheEncoder<'a, 'tcx, opaque::Encoder>> for [u8] {
fn encode(&self, e: &mut CacheEncoder<'a, 'tcx, opaque::Encoder>) -> opaque::EncodeResult {
self.encode(e.encoder)
}
}
// An integer that will always encode to 8 bytes.
struct IntEncodedWithFixedSize(u64);

View File

@ -11,12 +11,8 @@ use smallvec::{Array, SmallVec};
impl<S: Encoder, A: Array<Item: Encodable<S>>> Encodable<S> for SmallVec<A> {
fn encode(&self, s: &mut S) -> Result<(), S::Error> {
s.emit_seq(self.len(), |s| {
for (i, e) in self.iter().enumerate() {
s.emit_seq_elt(i, |s| e.encode(s))?;
}
Ok(())
})
let slice: &[A::Item] = self;
slice.encode(s)
}
}
@ -292,46 +288,28 @@ where
impl<E: Encoder, T: Encodable<E>> Encodable<E> for Rc<[T]> {
fn encode(&self, s: &mut E) -> Result<(), E::Error> {
s.emit_seq(self.len(), |s| {
for (index, e) in self.iter().enumerate() {
s.emit_seq_elt(index, |s| e.encode(s))?;
}
Ok(())
})
let slice: &[T] = self;
slice.encode(s)
}
}
impl<D: Decoder, T: Decodable<D>> Decodable<D> for Rc<[T]> {
fn decode(d: &mut D) -> Result<Rc<[T]>, D::Error> {
d.read_seq(|d, len| {
let mut vec = Vec::with_capacity(len);
for index in 0..len {
vec.push(d.read_seq_elt(index, |d| Decodable::decode(d))?);
}
Ok(vec.into())
})
let vec: Vec<T> = Decodable::decode(d)?;
Ok(vec.into())
}
}
impl<E: Encoder, T: Encodable<E>> Encodable<E> for Arc<[T]> {
fn encode(&self, s: &mut E) -> Result<(), E::Error> {
s.emit_seq(self.len(), |s| {
for (index, e) in self.iter().enumerate() {
s.emit_seq_elt(index, |s| e.encode(s))?;
}
Ok(())
})
let slice: &[T] = self;
slice.encode(s)
}
}
impl<D: Decoder, T: Decodable<D>> Decodable<D> for Arc<[T]> {
fn decode(d: &mut D) -> Result<Arc<[T]>, D::Error> {
d.read_seq(|d, len| {
let mut vec = Vec::with_capacity(len);
for index in 0..len {
vec.push(d.read_seq_elt(index, |d| Decodable::decode(d))?);
}
Ok(vec.into())
})
let vec: Vec<T> = Decodable::decode(d)?;
Ok(vec.into())
}
}

View File

@ -14,6 +14,8 @@ Core encoding and decoding interfaces.
#![feature(nll)]
#![feature(associated_type_bounds)]
#![cfg_attr(bootstrap, feature(min_const_generics))]
#![feature(min_specialization)]
#![feature(vec_spare_capacity)]
#![cfg_attr(test, feature(test))]
#![allow(rustc::internal)]

View File

@ -1,6 +1,8 @@
use crate::leb128::{self, read_signed_leb128, write_signed_leb128};
use crate::serialize;
use std::borrow::Cow;
use std::mem::MaybeUninit;
use std::ptr;
// -----------------------------------------------------------------------------
// Encoder
@ -179,11 +181,19 @@ impl<'a> Decoder<'a> {
}
#[inline]
pub fn read_raw_bytes(&mut self, s: &mut [u8]) -> Result<(), String> {
pub fn read_raw_bytes(&mut self, s: &mut [MaybeUninit<u8>]) -> Result<(), String> {
let start = self.position;
let end = start + s.len();
assert!(end <= self.data.len());
s.copy_from_slice(&self.data[start..end]);
// SAFETY: Both `src` and `dst` point to at least `s.len()` elements:
// `src` points to at least `s.len()` elements by above assert, and
// `dst` points to `s.len()` elements by derivation from `s`.
unsafe {
let src = self.data.as_ptr().add(start);
let dst = s.as_mut_ptr() as *mut u8;
ptr::copy_nonoverlapping(src, dst, s.len());
}
self.position = end;
@ -316,3 +326,36 @@ impl<'a> serialize::Decoder for Decoder<'a> {
err.to_string()
}
}
// Specializations for contiguous byte sequences follow. The default implementations for slices
// encode and decode each element individually. This isn't necessary for `u8` slices when using
// opaque encoders and decoders, because each `u8` is unchanged by encoding and decoding.
// Therefore, we can use more efficient implementations that process the entire sequence at once.
// Specialize encoding byte slices. This specialization also applies to encoding `Vec<u8>`s, etc.,
// since the default implementations call `encode` on their slices internally.
impl serialize::Encodable<Encoder> for [u8] {
fn encode(&self, e: &mut Encoder) -> EncodeResult {
serialize::Encoder::emit_usize(e, self.len())?;
e.emit_raw_bytes(self);
Ok(())
}
}
// Specialize decoding `Vec<u8>`. This specialization also applies to decoding `Box<[u8]>`s, etc.,
// since the default implementations call `decode` to produce a `Vec<u8>` internally.
impl<'a> serialize::Decodable<Decoder<'a>> for Vec<u8> {
fn decode(d: &mut Decoder<'a>) -> Result<Self, String> {
let len = serialize::Decoder::read_usize(d)?;
let mut v = Vec::with_capacity(len);
let buf = &mut v.spare_capacity_mut()[..len];
d.read_raw_bytes(buf)?;
unsafe {
v.set_len(len);
}
Ok(v)
}
}

View File

@ -527,7 +527,7 @@ impl<D: Decoder, T: Decodable<D>> Decodable<D> for Rc<T> {
}
impl<S: Encoder, T: Encodable<S>> Encodable<S> for [T] {
fn encode(&self, s: &mut S) -> Result<(), S::Error> {
default fn encode(&self, s: &mut S) -> Result<(), S::Error> {
s.emit_seq(self.len(), |s| {
for (i, e) in self.iter().enumerate() {
s.emit_seq_elt(i, |s| e.encode(s))?
@ -545,7 +545,7 @@ impl<S: Encoder, T: Encodable<S>> Encodable<S> for Vec<T> {
}
impl<D: Decoder, T: Decodable<D>> Decodable<D> for Vec<T> {
fn decode(d: &mut D) -> Result<Vec<T>, D::Error> {
default fn decode(d: &mut D) -> Result<Vec<T>, D::Error> {
d.read_seq(|d, len| {
let mut v = Vec::with_capacity(len);
for i in 0..len {
@ -591,13 +591,8 @@ where
[T]: ToOwned<Owned = Vec<T>>,
{
fn decode(d: &mut D) -> Result<Cow<'static, [T]>, D::Error> {
d.read_seq(|d, len| {
let mut v = Vec::with_capacity(len);
for i in 0..len {
v.push(d.read_seq_elt(i, |d| Decodable::decode(d))?);
}
Ok(Cow::Owned(v))
})
let v: Vec<T> = Decodable::decode(d)?;
Ok(Cow::Owned(v))
}
}