Auto merge of #50319 - nagisa:align_to, r=alexcrichton
Implement [T]::align_to Note that this PR deviates from what is accepted by RFC slightly by making `align_offset` to return an offset in elements, rather than bytes. This is necessary to sanely support `[T]::align_to` and also simply makes more sense™. The caveat is that trying to align a pointer of ZST is now an equivalent to `is_aligned` check, rather than anything else (as no number of ZST elements will align a misaligned ZST pointer). It also implements the `align_to` slightly differently than proposed in the RFC to properly handle cases where size of T and U aren’t co-prime. Furthermore, a promise is made that the slice containing `U`s will be as large as possible (contrary to the RFC) – otherwise the function is quite useless. The implementation uses quite a few underhanded tricks and takes advantage of the fact that alignment is a power-of-two quite heavily to optimise the machine code down to something that results in as few known-expensive instructions as possible. Currently calling `ptr.align_offset` with an unknown-at-compile-time `align` results in code that has just a single "expensive" modulo operation; the rest is "cheap" arithmetic and bitwise ops. cc https://github.com/rust-lang/rust/issues/44488 @oli-obk As mentioned in the commit message for align_offset, many thanks go to Chris McDonald.
This commit is contained in:
commit
37a409177c
@ -1364,38 +1364,8 @@ extern "rust-intrinsic" {
|
||||
/// source as well as std's catch implementation.
|
||||
pub fn try(f: fn(*mut u8), data: *mut u8, local_ptr: *mut u8) -> i32;
|
||||
|
||||
/// Computes the byte offset that needs to be applied to `ptr` in order to
|
||||
/// make it aligned to `align`.
|
||||
/// If it is not possible to align `ptr`, the implementation returns
|
||||
/// `usize::max_value()`.
|
||||
///
|
||||
/// There are no guarantees whatsover that offsetting the pointer will not
|
||||
/// overflow or go beyond the allocation that `ptr` points into.
|
||||
/// It is up to the caller to ensure that the returned offset is correct
|
||||
/// in all terms other than alignment.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// Accessing adjacent `u8` as `u16`
|
||||
///
|
||||
/// ```
|
||||
/// # #![feature(core_intrinsics)]
|
||||
/// # fn foo(n: usize) {
|
||||
/// # use std::intrinsics::align_offset;
|
||||
/// # use std::mem::align_of;
|
||||
/// # unsafe {
|
||||
/// let x = [5u8, 6u8, 7u8, 8u8, 9u8];
|
||||
/// let ptr = &x[n] as *const u8;
|
||||
/// let offset = align_offset(ptr as *const (), align_of::<u16>());
|
||||
/// if offset < x.len() - n - 1 {
|
||||
/// let u16_ptr = ptr.offset(offset as isize) as *const u16;
|
||||
/// assert_ne!(*u16_ptr, 500);
|
||||
/// } else {
|
||||
/// // while the pointer can be aligned via `offset`, it would point
|
||||
/// // outside the allocation
|
||||
/// }
|
||||
/// # } }
|
||||
/// ```
|
||||
#[cfg(stage0)]
|
||||
/// docs my friends, its friday!
|
||||
pub fn align_offset(ptr: *const (), align: usize) -> usize;
|
||||
|
||||
/// Emits a `!nontemporal` store according to LLVM (see their docs).
|
||||
|
@ -1203,15 +1203,22 @@ impl<T: ?Sized> *const T {
|
||||
copy_nonoverlapping(self, dest, count)
|
||||
}
|
||||
|
||||
/// Computes the byte offset that needs to be applied in order to
|
||||
/// make the pointer aligned to `align`.
|
||||
/// Computes the offset that needs to be applied to the pointer in order to make it aligned to
|
||||
/// `align`.
|
||||
///
|
||||
/// If it is not possible to align the pointer, the implementation returns
|
||||
/// `usize::max_value()`.
|
||||
///
|
||||
/// There are no guarantees whatsover that offsetting the pointer will not
|
||||
/// overflow or go beyond the allocation that the pointer points into.
|
||||
/// It is up to the caller to ensure that the returned offset is correct
|
||||
/// in all terms other than alignment.
|
||||
/// The offset is expressed in number of `T` elements, and not bytes. The value returned can be
|
||||
/// used with the `offset` or `offset_to` methods.
|
||||
///
|
||||
/// There are no guarantees whatsover that offsetting the pointer will not overflow or go
|
||||
/// beyond the allocation that the pointer points into. It is up to the caller to ensure that
|
||||
/// the returned offset is correct in all terms other than alignment.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// The function panics if `align` is not a power-of-two.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
@ -1235,13 +1242,30 @@ impl<T: ?Sized> *const T {
|
||||
/// # } }
|
||||
/// ```
|
||||
#[unstable(feature = "align_offset", issue = "44488")]
|
||||
pub fn align_offset(self, align: usize) -> usize {
|
||||
#[cfg(not(stage0))]
|
||||
pub fn align_offset(self, align: usize) -> usize where T: Sized {
|
||||
if !align.is_power_of_two() {
|
||||
panic!("align_offset: align is not a power-of-two");
|
||||
}
|
||||
unsafe {
|
||||
intrinsics::align_offset(self as *const _, align)
|
||||
align_offset(self, align)
|
||||
}
|
||||
}
|
||||
|
||||
/// definitely docs.
|
||||
#[unstable(feature = "align_offset", issue = "44488")]
|
||||
#[cfg(stage0)]
|
||||
pub fn align_offset(self, align: usize) -> usize where T: Sized {
|
||||
if !align.is_power_of_two() {
|
||||
panic!("align_offset: align is not a power-of-two");
|
||||
}
|
||||
unsafe {
|
||||
intrinsics::align_offset(self as *const (), align)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[lang = "mut_ptr"]
|
||||
impl<T: ?Sized> *mut T {
|
||||
/// Returns `true` if the pointer is null.
|
||||
@ -1574,44 +1598,6 @@ impl<T: ?Sized> *mut T {
|
||||
(self as *const T).wrapping_offset_from(origin)
|
||||
}
|
||||
|
||||
/// Computes the byte offset that needs to be applied in order to
|
||||
/// make the pointer aligned to `align`.
|
||||
/// If it is not possible to align the pointer, the implementation returns
|
||||
/// `usize::max_value()`.
|
||||
///
|
||||
/// There are no guarantees whatsover that offsetting the pointer will not
|
||||
/// overflow or go beyond the allocation that the pointer points into.
|
||||
/// It is up to the caller to ensure that the returned offset is correct
|
||||
/// in all terms other than alignment.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// Accessing adjacent `u8` as `u16`
|
||||
///
|
||||
/// ```
|
||||
/// # #![feature(align_offset)]
|
||||
/// # fn foo(n: usize) {
|
||||
/// # use std::mem::align_of;
|
||||
/// # unsafe {
|
||||
/// let x = [5u8, 6u8, 7u8, 8u8, 9u8];
|
||||
/// let ptr = &x[n] as *const u8;
|
||||
/// let offset = ptr.align_offset(align_of::<u16>());
|
||||
/// if offset < x.len() - n - 1 {
|
||||
/// let u16_ptr = ptr.offset(offset as isize) as *const u16;
|
||||
/// assert_ne!(*u16_ptr, 500);
|
||||
/// } else {
|
||||
/// // while the pointer can be aligned via `offset`, it would point
|
||||
/// // outside the allocation
|
||||
/// }
|
||||
/// # } }
|
||||
/// ```
|
||||
#[unstable(feature = "align_offset", issue = "44488")]
|
||||
pub fn align_offset(self, align: usize) -> usize {
|
||||
unsafe {
|
||||
intrinsics::align_offset(self as *const _, align)
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculates the offset from a pointer (convenience for `.offset(count as isize)`).
|
||||
///
|
||||
/// `count` is in units of T; e.g. a `count` of 3 represents a pointer
|
||||
@ -2281,8 +2267,200 @@ impl<T: ?Sized> *mut T {
|
||||
{
|
||||
swap(self, with)
|
||||
}
|
||||
|
||||
/// Computes the offset that needs to be applied to the pointer in order to make it aligned to
|
||||
/// `align`.
|
||||
///
|
||||
/// If it is not possible to align the pointer, the implementation returns
|
||||
/// `usize::max_value()`.
|
||||
///
|
||||
/// The offset is expressed in number of `T` elements, and not bytes. The value returned can be
|
||||
/// used with the `offset` or `offset_to` methods.
|
||||
///
|
||||
/// There are no guarantees whatsover that offsetting the pointer will not overflow or go
|
||||
/// beyond the allocation that the pointer points into. It is up to the caller to ensure that
|
||||
/// the returned offset is correct in all terms other than alignment.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// The function panics if `align` is not a power-of-two.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// Accessing adjacent `u8` as `u16`
|
||||
///
|
||||
/// ```
|
||||
/// # #![feature(align_offset)]
|
||||
/// # fn foo(n: usize) {
|
||||
/// # use std::mem::align_of;
|
||||
/// # unsafe {
|
||||
/// let x = [5u8, 6u8, 7u8, 8u8, 9u8];
|
||||
/// let ptr = &x[n] as *const u8;
|
||||
/// let offset = ptr.align_offset(align_of::<u16>());
|
||||
/// if offset < x.len() - n - 1 {
|
||||
/// let u16_ptr = ptr.offset(offset as isize) as *const u16;
|
||||
/// assert_ne!(*u16_ptr, 500);
|
||||
/// } else {
|
||||
/// // while the pointer can be aligned via `offset`, it would point
|
||||
/// // outside the allocation
|
||||
/// }
|
||||
/// # } }
|
||||
/// ```
|
||||
#[unstable(feature = "align_offset", issue = "44488")]
|
||||
#[cfg(not(stage0))]
|
||||
pub fn align_offset(self, align: usize) -> usize where T: Sized {
|
||||
if !align.is_power_of_two() {
|
||||
panic!("align_offset: align is not a power-of-two");
|
||||
}
|
||||
unsafe {
|
||||
align_offset(self, align)
|
||||
}
|
||||
}
|
||||
|
||||
/// definitely docs.
|
||||
#[unstable(feature = "align_offset", issue = "44488")]
|
||||
#[cfg(stage0)]
|
||||
pub fn align_offset(self, align: usize) -> usize where T: Sized {
|
||||
if !align.is_power_of_two() {
|
||||
panic!("align_offset: align is not a power-of-two");
|
||||
}
|
||||
unsafe {
|
||||
intrinsics::align_offset(self as *const (), align)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Align pointer `p`.
|
||||
///
|
||||
/// Calculate offset (in terms of elements of `stride` stride) that has to be applied
|
||||
/// to pointer `p` so that pointer `p` would get aligned to `a`.
|
||||
///
|
||||
/// Note: This implementation has been carefully tailored to not panic. It is UB for this to panic.
|
||||
/// The only real change that can be made here is change of `INV_TABLE_MOD_16` and associated
|
||||
/// constants.
|
||||
///
|
||||
/// If we ever decide to make it possible to call the intrinsic with `a` that is not a
|
||||
/// power-of-two, it will probably be more prudent to just change to a naive implementation rather
|
||||
/// than trying to adapt this to accomodate that change.
|
||||
///
|
||||
/// Any questions go to @nagisa.
|
||||
#[lang="align_offset"]
|
||||
#[cfg(not(stage0))]
|
||||
pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize {
|
||||
/// Calculate multiplicative modular inverse of `x` modulo `m`.
|
||||
///
|
||||
/// This implementation is tailored for align_offset and has following preconditions:
|
||||
///
|
||||
/// * `m` is a power-of-two;
|
||||
/// * `x < m`; (if `x ≥ m`, pass in `x % m` instead)
|
||||
///
|
||||
/// Implementation of this function shall not panic. Ever.
|
||||
#[inline]
|
||||
fn mod_inv(x: usize, m: usize) -> usize {
|
||||
/// Multiplicative modular inverse table modulo 2⁴ = 16.
|
||||
///
|
||||
/// Note, that this table does not contain values where inverse does not exist (i.e. for
|
||||
/// `0⁻¹ mod 16`, `2⁻¹ mod 16`, etc.)
|
||||
const INV_TABLE_MOD_16: [usize; 8] = [1, 11, 13, 7, 9, 3, 5, 15];
|
||||
/// Modulo for which the `INV_TABLE_MOD_16` is intended.
|
||||
const INV_TABLE_MOD: usize = 16;
|
||||
/// INV_TABLE_MOD²
|
||||
const INV_TABLE_MOD_SQUARED: usize = INV_TABLE_MOD * INV_TABLE_MOD;
|
||||
|
||||
let table_inverse = INV_TABLE_MOD_16[(x & (INV_TABLE_MOD - 1)) >> 1];
|
||||
if m <= INV_TABLE_MOD {
|
||||
return table_inverse & (m - 1);
|
||||
} else {
|
||||
// We iterate "up" using the following formula:
|
||||
//
|
||||
// $$ xy ≡ 1 (mod 2ⁿ) → xy (2 - xy) ≡ 1 (mod 2²ⁿ) $$
|
||||
//
|
||||
// until 2²ⁿ ≥ m. Then we can reduce to our desired `m` by taking the result `mod m`.
|
||||
let mut inverse = table_inverse;
|
||||
let mut going_mod = INV_TABLE_MOD_SQUARED;
|
||||
loop {
|
||||
// y = y * (2 - xy) mod n
|
||||
//
|
||||
// Note, that we use wrapping operations here intentionally – the original formula
|
||||
// uses e.g. subtraction `mod n`. It is entirely fine to do them `mod
|
||||
// usize::max_value()` instead, because we take the result `mod n` at the end
|
||||
// anyway.
|
||||
inverse = inverse.wrapping_mul(
|
||||
2usize.wrapping_sub(x.wrapping_mul(inverse))
|
||||
) & (going_mod - 1);
|
||||
if going_mod > m {
|
||||
return inverse & (m - 1);
|
||||
}
|
||||
going_mod = going_mod.wrapping_mul(going_mod);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let stride = ::mem::size_of::<T>();
|
||||
let a_minus_one = a.wrapping_sub(1);
|
||||
let pmoda = p as usize & a_minus_one;
|
||||
|
||||
if pmoda == 0 {
|
||||
// Already aligned. Yay!
|
||||
return 0;
|
||||
}
|
||||
|
||||
if stride <= 1 {
|
||||
return if stride == 0 {
|
||||
// If the pointer is not aligned, and the element is zero-sized, then no amount of
|
||||
// elements will ever align the pointer.
|
||||
!0
|
||||
} else {
|
||||
a.wrapping_sub(pmoda)
|
||||
};
|
||||
}
|
||||
|
||||
let smoda = stride & a_minus_one;
|
||||
// a is power-of-two so cannot be 0. stride = 0 is handled above.
|
||||
let gcdpow = intrinsics::cttz_nonzero(stride).min(intrinsics::cttz_nonzero(a));
|
||||
let gcd = 1usize << gcdpow;
|
||||
|
||||
if gcd == 1 {
|
||||
// This branch solves for the variable $o$ in following linear congruence equation:
|
||||
//
|
||||
// ⎰ p + o ≡ 0 (mod a) # $p + o$ must be aligned to specified alignment $a$
|
||||
// ⎱ o ≡ 0 (mod s) # offset $o$ must be a multiple of stride $s$
|
||||
//
|
||||
// where
|
||||
//
|
||||
// * a, s are co-prime
|
||||
//
|
||||
// This gives us the formula below:
|
||||
//
|
||||
// o = (a - (p mod a)) * (s⁻¹ mod a) * s
|
||||
//
|
||||
// The first term is “the relative alignment of p to a”, the second term is “how does
|
||||
// incrementing p by one s change the relative alignment of p”, the third term is
|
||||
// translating change in units of s to a byte count.
|
||||
//
|
||||
// Furthermore, the result produced by this solution is not “minimal”, so it is necessary
|
||||
// to take the result $o mod lcm(s, a)$. Since $s$ and $a$ are co-prime (i.e. $gcd(s, a) =
|
||||
// 1$) and $lcm(s, a) = s * a / gcd(s, a)$, we can replace $lcm(s, a)$ with just a $s * a$.
|
||||
//
|
||||
// (Author note: we decided later on to express the offset in "elements" rather than bytes,
|
||||
// which drops the multiplication by `s` on both sides of the modulo.)
|
||||
return intrinsics::unchecked_rem(a.wrapping_sub(pmoda).wrapping_mul(mod_inv(smoda, a)), a);
|
||||
}
|
||||
|
||||
if p as usize & (gcd - 1) == 0 {
|
||||
// This can be aligned, but `a` and `stride` are not co-prime, so a somewhat adapted
|
||||
// formula is used.
|
||||
let j = a.wrapping_sub(pmoda) >> gcdpow;
|
||||
let k = smoda >> gcdpow;
|
||||
return intrinsics::unchecked_rem(j.wrapping_mul(mod_inv(k, a)), a >> gcdpow);
|
||||
}
|
||||
|
||||
// Cannot be aligned at all.
|
||||
return usize::max_value();
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Equality for pointers
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
impl<T: ?Sized> PartialEq for *const T {
|
||||
|
@ -1696,6 +1696,173 @@ impl<T> [T] {
|
||||
self.as_mut_ptr(), other.as_mut_ptr(), self.len());
|
||||
}
|
||||
}
|
||||
|
||||
/// Function to calculate lenghts of the middle and trailing slice for `align_to{,_mut}`.
|
||||
fn align_to_offsets<U>(&self) -> (usize, usize) {
|
||||
// What we gonna do about `rest` is figure out what multiple of `U`s we can put in a
|
||||
// lowest number of `T`s. And how many `T`s we need for each such "multiple".
|
||||
//
|
||||
// Consider for example T=u8 U=u16. Then we can put 1 U in 2 Ts. Simple. Now, consider
|
||||
// for example a case where size_of::<T> = 16, size_of::<U> = 24. We can put 2 Us in
|
||||
// place of every 3 Ts in the `rest` slice. A bit more complicated.
|
||||
//
|
||||
// Formula to calculate this is:
|
||||
//
|
||||
// Us = lcm(size_of::<T>, size_of::<U>) / size_of::<U>
|
||||
// Ts = lcm(size_of::<T>, size_of::<U>) / size_of::<T>
|
||||
//
|
||||
// Expanded and simplified:
|
||||
//
|
||||
// Us = size_of::<T> / gcd(size_of::<T>, size_of::<U>)
|
||||
// Ts = size_of::<U> / gcd(size_of::<T>, size_of::<U>)
|
||||
//
|
||||
// Luckily since all this is constant-evaluated... performance here matters not!
|
||||
#[inline]
|
||||
fn gcd(a: usize, b: usize) -> usize {
|
||||
// iterative stein’s algorithm
|
||||
// We should still make this `const fn` (and revert to recursive algorithm if we do)
|
||||
// because relying on llvm to consteval all this is… well, it makes me
|
||||
let (ctz_a, mut ctz_b) = unsafe {
|
||||
if a == 0 { return b; }
|
||||
if b == 0 { return a; }
|
||||
(::intrinsics::cttz_nonzero(a), ::intrinsics::cttz_nonzero(b))
|
||||
};
|
||||
let k = ctz_a.min(ctz_b);
|
||||
let mut a = a >> ctz_a;
|
||||
let mut b = b;
|
||||
loop {
|
||||
// remove all factors of 2 from b
|
||||
b >>= ctz_b;
|
||||
if a > b {
|
||||
::mem::swap(&mut a, &mut b);
|
||||
}
|
||||
b = b - a;
|
||||
unsafe {
|
||||
if b == 0 {
|
||||
break;
|
||||
}
|
||||
ctz_b = ::intrinsics::cttz_nonzero(b);
|
||||
}
|
||||
}
|
||||
return a << k;
|
||||
}
|
||||
let gcd: usize = gcd(::mem::size_of::<T>(), ::mem::size_of::<U>());
|
||||
let ts: usize = ::mem::size_of::<U>() / gcd;
|
||||
let us: usize = ::mem::size_of::<T>() / gcd;
|
||||
|
||||
// Armed with this knowledge, we can find how many `U`s we can fit!
|
||||
let us_len = self.len() / ts * us;
|
||||
// And how many `T`s will be in the trailing slice!
|
||||
let ts_len = self.len() % ts;
|
||||
return (us_len, ts_len);
|
||||
}
|
||||
|
||||
/// Transmute the slice to a slice of another type, ensuring aligment of the types is
|
||||
/// maintained.
|
||||
///
|
||||
/// This method splits the slice into three distinct slices: prefix, correctly aligned middle
|
||||
/// slice of a new type, and the suffix slice. The middle slice will have the greatest length
|
||||
/// possible for a given type and input slice.
|
||||
///
|
||||
/// This method has no purpose when either input element `T` or output element `U` are
|
||||
/// zero-sized and will return the original slice without splitting anything.
|
||||
///
|
||||
/// # Unsafety
|
||||
///
|
||||
/// This method is essentially a `transmute` with respect to the elements in the returned
|
||||
/// middle slice, so all the usual caveats pertaining to `transmute::<T, U>` also apply here.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// Basic usage:
|
||||
///
|
||||
/// ```
|
||||
/// # #![feature(slice_align_to)]
|
||||
/// unsafe {
|
||||
/// let bytes: [u8; 7] = [1, 2, 3, 4, 5, 6, 7];
|
||||
/// let (prefix, shorts, suffix) = bytes.align_to::<u16>();
|
||||
/// // less_efficient_algorithm_for_bytes(prefix);
|
||||
/// // more_efficient_algorithm_for_aligned_shorts(shorts);
|
||||
/// // less_efficient_algorithm_for_bytes(suffix);
|
||||
/// }
|
||||
/// ```
|
||||
#[unstable(feature = "slice_align_to", issue = "44488")]
|
||||
#[cfg(not(stage0))]
|
||||
pub unsafe fn align_to<U>(&self) -> (&[T], &[U], &[T]) {
|
||||
// Note that most of this function will be constant-evaluated,
|
||||
if ::mem::size_of::<U>() == 0 || ::mem::size_of::<T>() == 0 {
|
||||
// handle ZSTs specially, which is – don't handle them at all.
|
||||
return (self, &[], &[]);
|
||||
}
|
||||
|
||||
// First, find at what point do we split between the first and 2nd slice. Easy with
|
||||
// ptr.align_offset.
|
||||
let ptr = self.as_ptr();
|
||||
let offset = ::ptr::align_offset(ptr, ::mem::align_of::<U>());
|
||||
if offset > self.len() {
|
||||
return (self, &[], &[]);
|
||||
} else {
|
||||
let (left, rest) = self.split_at(offset);
|
||||
let (us_len, ts_len) = rest.align_to_offsets::<U>();
|
||||
return (left,
|
||||
from_raw_parts(rest.as_ptr() as *const U, us_len),
|
||||
from_raw_parts(rest.as_ptr().offset((rest.len() - ts_len) as isize), ts_len))
|
||||
}
|
||||
}
|
||||
|
||||
/// Transmute the slice to a slice of another type, ensuring aligment of the types is
|
||||
/// maintained.
|
||||
///
|
||||
/// This method splits the slice into three distinct slices: prefix, correctly aligned middle
|
||||
/// slice of a new type, and the suffix slice. The middle slice will have the greatest length
|
||||
/// possible for a given type and input slice.
|
||||
///
|
||||
/// This method has no purpose when either input element `T` or output element `U` are
|
||||
/// zero-sized and will return the original slice without splitting anything.
|
||||
///
|
||||
/// # Unsafety
|
||||
///
|
||||
/// This method is essentially a `transmute` with respect to the elements in the returned
|
||||
/// middle slice, so all the usual caveats pertaining to `transmute::<T, U>` also apply here.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// Basic usage:
|
||||
///
|
||||
/// ```
|
||||
/// # #![feature(slice_align_to)]
|
||||
/// unsafe {
|
||||
/// let mut bytes: [u8; 7] = [1, 2, 3, 4, 5, 6, 7];
|
||||
/// let (prefix, shorts, suffix) = bytes.align_to_mut::<u16>();
|
||||
/// // less_efficient_algorithm_for_bytes(prefix);
|
||||
/// // more_efficient_algorithm_for_aligned_shorts(shorts);
|
||||
/// // less_efficient_algorithm_for_bytes(suffix);
|
||||
/// }
|
||||
/// ```
|
||||
#[unstable(feature = "slice_align_to", issue = "44488")]
|
||||
#[cfg(not(stage0))]
|
||||
pub unsafe fn align_to_mut<U>(&mut self) -> (&mut [T], &mut [U], &mut [T]) {
|
||||
// Note that most of this function will be constant-evaluated,
|
||||
if ::mem::size_of::<U>() == 0 || ::mem::size_of::<T>() == 0 {
|
||||
// handle ZSTs specially, which is – don't handle them at all.
|
||||
return (self, &mut [], &mut []);
|
||||
}
|
||||
|
||||
// First, find at what point do we split between the first and 2nd slice. Easy with
|
||||
// ptr.align_offset.
|
||||
let ptr = self.as_ptr();
|
||||
let offset = ::ptr::align_offset(ptr, ::mem::align_of::<U>());
|
||||
if offset > self.len() {
|
||||
return (self, &mut [], &mut []);
|
||||
} else {
|
||||
let (left, rest) = self.split_at_mut(offset);
|
||||
let (us_len, ts_len) = rest.align_to_offsets::<U>();
|
||||
let mut_ptr = rest.as_mut_ptr();
|
||||
return (left,
|
||||
from_raw_parts_mut(mut_ptr as *mut U, us_len),
|
||||
from_raw_parts_mut(mut_ptr.offset((rest.len() - ts_len) as isize), ts_len))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[lang = "slice_u8"]
|
||||
|
@ -41,6 +41,8 @@
|
||||
#![feature(try_from)]
|
||||
#![feature(try_trait)]
|
||||
#![feature(exact_chunks)]
|
||||
#![feature(slice_align_to)]
|
||||
#![feature(align_offset)]
|
||||
#![feature(reverse_bits)]
|
||||
#![feature(iterator_find_map)]
|
||||
#![feature(slice_internals)]
|
||||
|
@ -296,3 +296,92 @@ fn write_unaligned_drop() {
|
||||
}
|
||||
DROPS.with(|d| assert_eq!(*d.borrow(), [0]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn align_offset_zst() {
|
||||
// For pointers of stride = 0, the pointer is already aligned or it cannot be aligned at
|
||||
// all, because no amount of elements will align the pointer.
|
||||
let mut p = 1;
|
||||
while p < 1024 {
|
||||
assert_eq!((p as *const ()).align_offset(p), 0);
|
||||
if p != 1 {
|
||||
assert_eq!(((p + 1) as *const ()).align_offset(p), !0);
|
||||
}
|
||||
p = (p + 1).next_power_of_two();
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn align_offset_stride1() {
|
||||
// For pointers of stride = 1, the pointer can always be aligned. The offset is equal to
|
||||
// number of bytes.
|
||||
let mut align = 1;
|
||||
while align < 1024 {
|
||||
for ptr in 1..2*align {
|
||||
let expected = ptr % align;
|
||||
let offset = if expected == 0 { 0 } else { align - expected };
|
||||
assert_eq!((ptr as *const u8).align_offset(align), offset,
|
||||
"ptr = {}, align = {}, size = 1", ptr, align);
|
||||
}
|
||||
align = (align + 1).next_power_of_two();
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn align_offset_weird_strides() {
|
||||
#[repr(packed)]
|
||||
struct A3(u16, u8);
|
||||
struct A4(u32);
|
||||
#[repr(packed)]
|
||||
struct A5(u32, u8);
|
||||
#[repr(packed)]
|
||||
struct A6(u32, u16);
|
||||
#[repr(packed)]
|
||||
struct A7(u32, u16, u8);
|
||||
#[repr(packed)]
|
||||
struct A8(u32, u32);
|
||||
#[repr(packed)]
|
||||
struct A9(u32, u32, u8);
|
||||
#[repr(packed)]
|
||||
struct A10(u32, u32, u16);
|
||||
|
||||
unsafe fn test_weird_stride<T>(ptr: *const T, align: usize) -> bool {
|
||||
let numptr = ptr as usize;
|
||||
let mut expected = usize::max_value();
|
||||
// Naive but definitely correct way to find the *first* aligned element of stride::<T>.
|
||||
for el in 0..align {
|
||||
if (numptr + el * ::std::mem::size_of::<T>()) % align == 0 {
|
||||
expected = el;
|
||||
break;
|
||||
}
|
||||
}
|
||||
let got = ptr.align_offset(align);
|
||||
if got != expected {
|
||||
eprintln!("aligning {:p} (with stride of {}) to {}, expected {}, got {}", ptr,
|
||||
::std::mem::size_of::<T>(), align, expected, got);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// For pointers of stride != 1, we verify the algorithm against the naivest possible
|
||||
// implementation
|
||||
let mut align = 1;
|
||||
let mut x = false;
|
||||
while align < 1024 {
|
||||
for ptr in 1usize..4*align {
|
||||
unsafe {
|
||||
x |= test_weird_stride::<A3>(ptr as *const A3, align);
|
||||
x |= test_weird_stride::<A4>(ptr as *const A4, align);
|
||||
x |= test_weird_stride::<A5>(ptr as *const A5, align);
|
||||
x |= test_weird_stride::<A6>(ptr as *const A6, align);
|
||||
x |= test_weird_stride::<A7>(ptr as *const A7, align);
|
||||
x |= test_weird_stride::<A8>(ptr as *const A8, align);
|
||||
x |= test_weird_stride::<A9>(ptr as *const A9, align);
|
||||
x |= test_weird_stride::<A10>(ptr as *const A10, align);
|
||||
}
|
||||
}
|
||||
align = (align + 1).next_power_of_two();
|
||||
}
|
||||
assert!(!x);
|
||||
}
|
||||
|
@ -812,3 +812,37 @@ pub mod memchr {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_align_to_simple() {
|
||||
let bytes = [1u8, 2, 3, 4, 5, 6, 7];
|
||||
let (prefix, aligned, suffix) = unsafe { bytes.align_to::<u16>() };
|
||||
assert_eq!(aligned.len(), 3);
|
||||
assert!(prefix == [1] || suffix == [7]);
|
||||
let expect1 = [1 << 8 | 2, 3 << 8 | 4, 5 << 8 | 6];
|
||||
let expect2 = [1 | 2 << 8, 3 | 4 << 8, 5 | 6 << 8];
|
||||
let expect3 = [2 << 8 | 3, 4 << 8 | 5, 6 << 8 | 7];
|
||||
let expect4 = [2 | 3 << 8, 4 | 5 << 8, 6 | 7 << 8];
|
||||
assert!(aligned == expect1 || aligned == expect2 || aligned == expect3 || aligned == expect4,
|
||||
"aligned={:?} expected={:?} || {:?} || {:?} || {:?}",
|
||||
aligned, expect1, expect2, expect3, expect4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_align_to_zst() {
|
||||
let bytes = [1, 2, 3, 4, 5, 6, 7];
|
||||
let (prefix, aligned, suffix) = unsafe { bytes.align_to::<()>() };
|
||||
assert_eq!(aligned.len(), 0);
|
||||
assert!(prefix == [1, 2, 3, 4, 5, 6, 7] || suffix == [1, 2, 3, 4, 5, 6, 7]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_align_to_non_trivial() {
|
||||
#[repr(align(8))] struct U64(u64, u64);
|
||||
#[repr(align(8))] struct U64U64U32(u64, u64, u32);
|
||||
let data = [U64(1, 2), U64(3, 4), U64(5, 6), U64(7, 8), U64(9, 10), U64(11, 12), U64(13, 14),
|
||||
U64(15, 16)];
|
||||
let (prefix, aligned, suffix) = unsafe { data.align_to::<U64U64U32>() };
|
||||
assert_eq!(aligned.len(), 4);
|
||||
assert_eq!(prefix.len() + suffix.len(), 2);
|
||||
}
|
||||
|
@ -348,6 +348,9 @@ language_item_table! {
|
||||
I128ShroFnLangItem, "i128_shro", i128_shro_fn;
|
||||
U128ShroFnLangItem, "u128_shro", u128_shro_fn;
|
||||
|
||||
// Align offset for stride != 1, must not panic.
|
||||
AlignOffsetLangItem, "align_offset", align_offset_fn;
|
||||
|
||||
TerminationTraitLangItem, "termination", termination;
|
||||
}
|
||||
|
||||
|
@ -389,18 +389,6 @@ pub fn codegen_intrinsic_call<'a, 'tcx>(bx: &Builder<'a, 'tcx>,
|
||||
args[0].deref(bx.cx).codegen_get_discr(bx, ret_ty)
|
||||
}
|
||||
|
||||
"align_offset" => {
|
||||
// `ptr as usize`
|
||||
let ptr_val = bx.ptrtoint(args[0].immediate(), bx.cx.isize_ty);
|
||||
// `ptr_val % align`
|
||||
let align = args[1].immediate();
|
||||
let offset = bx.urem(ptr_val, align);
|
||||
let zero = C_null(bx.cx.isize_ty);
|
||||
// `offset == 0`
|
||||
let is_zero = bx.icmp(llvm::IntPredicate::IntEQ, offset, zero);
|
||||
// `if offset == 0 { 0 } else { align - offset }`
|
||||
bx.select(is_zero, zero, bx.sub(align, offset))
|
||||
}
|
||||
name if name.starts_with("simd_") => {
|
||||
match generic_simd_intrinsic(bx, name,
|
||||
callee_ty,
|
||||
|
@ -314,11 +314,6 @@ pub fn check_intrinsic_type<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
|
||||
(0, vec![tcx.mk_fn_ptr(fn_ty), mut_u8, mut_u8], tcx.types.i32)
|
||||
}
|
||||
|
||||
"align_offset" => {
|
||||
let ptr_ty = tcx.mk_imm_ptr(tcx.mk_nil());
|
||||
(0, vec![ptr_ty, tcx.types.usize], tcx.types.usize)
|
||||
},
|
||||
|
||||
"nontemporal_store" => {
|
||||
(1, vec![ tcx.mk_mut_ptr(param(0)), param(0) ], tcx.mk_nil())
|
||||
}
|
||||
|
@ -1,16 +0,0 @@
|
||||
// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
#![feature(align_offset)]
|
||||
|
||||
fn main() {
|
||||
let x = 1 as *const u8;
|
||||
assert_eq!(x.align_offset(8), 7);
|
||||
}
|
Loading…
Reference in New Issue
Block a user