Implement a faster sort algorithm
This is a complete rewrite of the standard sort algorithm. The new algorithm is a simplified variant of TimSort. In summary, the changes are: * Improved performance, especially on partially sorted inputs. * Performs less comparisons on both random and partially sorted inputs. * Decreased the size of temporary memory: the new sort allocates 4x less.
This commit is contained in:
parent
ff261d3a6b
commit
c8d73ea68a
@ -47,14 +47,14 @@
|
||||
#![feature(placement_in)]
|
||||
#![feature(placement_new_protocol)]
|
||||
#![feature(shared)]
|
||||
#![feature(slice_get_slice)]
|
||||
#![feature(slice_patterns)]
|
||||
#![feature(specialization)]
|
||||
#![feature(staged_api)]
|
||||
#![feature(step_by)]
|
||||
#![feature(trusted_len)]
|
||||
#![feature(unicode)]
|
||||
#![feature(unique)]
|
||||
#![feature(slice_get_slice)]
|
||||
#![feature(untagged_unions)]
|
||||
#![cfg_attr(test, feature(rand, test))]
|
||||
|
||||
#![no_std]
|
||||
|
@ -98,8 +98,7 @@
|
||||
#![cfg_attr(test, allow(unused_imports, dead_code))]
|
||||
|
||||
use alloc::boxed::Box;
|
||||
use core::cmp::Ordering::{self, Greater, Less};
|
||||
use core::cmp;
|
||||
use core::cmp::Ordering::{self, Greater};
|
||||
use core::mem::size_of;
|
||||
use core::mem;
|
||||
use core::ptr;
|
||||
@ -1042,8 +1041,8 @@ impl<T> [T] {
|
||||
|
||||
/// This is equivalent to `self.sort_by(|a, b| a.cmp(b))`.
|
||||
///
|
||||
/// This sort is stable and `O(n log n)` worst-case but allocates
|
||||
/// approximately `2 * n` where `n` is the length of `self`.
|
||||
/// This sort is stable and `O(n log n)` worst-case, but allocates
|
||||
/// temporary storage half the size of `self`.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
@ -1064,8 +1063,8 @@ impl<T> [T] {
|
||||
/// Sorts the slice, in place, using `f` to extract a key by which to
|
||||
/// order the sort by.
|
||||
///
|
||||
/// This sort is stable and `O(n log n)` worst-case but allocates
|
||||
/// approximately `2 * n`, where `n` is the length of `self`.
|
||||
/// This sort is stable and `O(n log n)` worst-case, but allocates
|
||||
/// temporary storage half the size of `self`.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
@ -1086,8 +1085,8 @@ impl<T> [T] {
|
||||
/// Sorts the slice, in place, using `compare` to compare
|
||||
/// elements.
|
||||
///
|
||||
/// This sort is stable and `O(n log n)` worst-case but allocates
|
||||
/// approximately `2 * n`, where `n` is the length of `self`.
|
||||
/// This sort is stable and `O(n log n)` worst-case, but allocates
|
||||
/// temporary storage half the size of `self`.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
@ -1305,213 +1304,332 @@ impl<T: Clone> ToOwned for [T] {
|
||||
// Sorting
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
fn insertion_sort<T, F>(v: &mut [T], mut compare: F)
|
||||
/// Inserts `v[0]` into pre-sorted sequence `v[1..]` so that whole `v[..]` becomes sorted.
|
||||
///
|
||||
/// This is the integral subroutine of insertion sort.
|
||||
fn insert_head<T, F>(v: &mut [T], compare: &mut F)
|
||||
where F: FnMut(&T, &T) -> Ordering
|
||||
{
|
||||
let len = v.len() as isize;
|
||||
let buf_v = v.as_mut_ptr();
|
||||
|
||||
// 1 <= i < len;
|
||||
for i in 1..len {
|
||||
// j satisfies: 0 <= j <= i;
|
||||
let mut j = i;
|
||||
if v.len() >= 2 && compare(&v[0], &v[1]) == Greater {
|
||||
unsafe {
|
||||
// `i` is in bounds.
|
||||
let read_ptr = buf_v.offset(i) as *const T;
|
||||
// There are three ways to implement insertion here:
|
||||
//
|
||||
// 1. Swap adjacent elements until the first one gets to its final destination.
|
||||
// However, this way we copy data around more than is necessary. If elements are big
|
||||
// structures (costly to copy), this method will be slow.
|
||||
//
|
||||
// 2. Iterate until the right place for the first element is found. Then shift the
|
||||
// elements succeeding it to make room for it and finally place it into the
|
||||
// remaining hole. This is a good method.
|
||||
//
|
||||
// 3. Copy the first element into a temporary variable. Iterate until the right place
|
||||
// for it is found. As we go along, copy every traversed element into the slot
|
||||
// preceding it. Finally, copy data from the temporary variable into the remaining
|
||||
// hole. This method is very good. Benchmarks demonstrated slightly better
|
||||
// performance than with the 2nd method.
|
||||
//
|
||||
// All methods were benchmarked, and the 3rd showed best results. So we chose that one.
|
||||
let mut tmp = NoDrop { value: ptr::read(&v[0]) };
|
||||
|
||||
// find where to insert, we need to do strict <,
|
||||
// rather than <=, to maintain stability.
|
||||
// Intermediate state of the insertion process is always tracked by `hole`, which
|
||||
// serves two purposes:
|
||||
// 1. Protects integrity of `v` from panics in `compare`.
|
||||
// 2. Fills the remaining hole in `v` in the end.
|
||||
//
|
||||
// Panic safety:
|
||||
//
|
||||
// If `compare` panics at any point during the process, `hole` will get dropped and
|
||||
// fill the hole in `v` with `tmp`, thus ensuring that `v` still holds every object it
|
||||
// initially held exactly once.
|
||||
let mut hole = InsertionHole {
|
||||
src: &mut tmp.value,
|
||||
dest: &mut v[1],
|
||||
};
|
||||
ptr::copy_nonoverlapping(&v[1], &mut v[0], 1);
|
||||
|
||||
// 0 <= j - 1 < len, so .offset(j - 1) is in bounds.
|
||||
while j > 0 && compare(&*read_ptr, &*buf_v.offset(j - 1)) == Less {
|
||||
j -= 1;
|
||||
for i in 2..v.len() {
|
||||
if compare(&tmp.value, &v[i]) != Greater {
|
||||
break;
|
||||
}
|
||||
ptr::copy_nonoverlapping(&v[i], &mut v[i - 1], 1);
|
||||
hole.dest = &mut v[i];
|
||||
}
|
||||
// `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
|
||||
}
|
||||
}
|
||||
|
||||
// shift everything to the right, to make space to
|
||||
// insert this value.
|
||||
// Holds a value, but never drops it.
|
||||
#[allow(unions_with_drop_fields)]
|
||||
union NoDrop<T> {
|
||||
value: T
|
||||
}
|
||||
|
||||
// j + 1 could be `len` (for the last `i`), but in
|
||||
// that case, `i == j` so we don't copy. The
|
||||
// `.offset(j)` is always in bounds.
|
||||
// When dropped, copies from `src` into `dest`.
|
||||
struct InsertionHole<T> {
|
||||
src: *mut T,
|
||||
dest: *mut T,
|
||||
}
|
||||
|
||||
if i != j {
|
||||
let tmp = ptr::read(read_ptr);
|
||||
ptr::copy(&*buf_v.offset(j), buf_v.offset(j + 1), (i - j) as usize);
|
||||
ptr::copy_nonoverlapping(&tmp, buf_v.offset(j), 1);
|
||||
mem::forget(tmp);
|
||||
}
|
||||
impl<T> Drop for InsertionHole<T> {
|
||||
fn drop(&mut self) {
|
||||
unsafe { ptr::copy_nonoverlapping(self.src, self.dest, 1); }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn merge_sort<T, F>(v: &mut [T], mut compare: F)
|
||||
/// Merges non-decreasing runs `v[..mid]` and `v[mid..]` using `buf` as temporary storage, and
|
||||
/// stores the result into `v[..]`.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// The two slices must be non-empty and `mid` must be in bounds. Buffer `buf` must be long enough
|
||||
/// to hold a copy of the shorter slice. Also, `T` must not be a zero-sized type.
|
||||
unsafe fn merge<T, F>(v: &mut [T], mid: usize, buf: *mut T, compare: &mut F)
|
||||
where F: FnMut(&T, &T) -> Ordering
|
||||
{
|
||||
// warning: this wildly uses unsafe.
|
||||
const BASE_INSERTION: usize = 32;
|
||||
const LARGE_INSERTION: usize = 16;
|
||||
|
||||
// FIXME #12092: smaller insertion runs seems to make sorting
|
||||
// vectors of large elements a little faster on some platforms,
|
||||
// but hasn't been tested/tuned extensively
|
||||
let insertion = if size_of::<T>() <= 16 {
|
||||
BASE_INSERTION
|
||||
} else {
|
||||
LARGE_INSERTION
|
||||
};
|
||||
|
||||
let len = v.len();
|
||||
let v = v.as_mut_ptr();
|
||||
let v_mid = v.offset(mid as isize);
|
||||
let v_end = v.offset(len as isize);
|
||||
|
||||
// short vectors get sorted in-place via insertion sort to avoid allocations
|
||||
if len <= insertion {
|
||||
insertion_sort(v, compare);
|
||||
return;
|
||||
}
|
||||
// The merge process first copies the shorter run into `buf`. Then it traces the newly copied
|
||||
// run and the longer run forwards (or backwards), comparing their next unconsumed elements and
|
||||
// copying the lesser (or greater) one into `v`.
|
||||
//
|
||||
// As soon as the shorter run is fully consumed, the process is done. If the longer run gets
|
||||
// consumed first, then we must copy whatever is left of the shorter run into the remaining
|
||||
// hole in `v`.
|
||||
//
|
||||
// Intermediate state of the process is always tracked by `hole`, which serves two purposes:
|
||||
// 1. Protects integrity of `v` from panics in `compare`.
|
||||
// 2. Fills the remaining hole in `v` if the longer run gets consumed first.
|
||||
//
|
||||
// Panic safety:
|
||||
//
|
||||
// If `compare` panics at any point during the process, `hole` will get dropped and fill the
|
||||
// hole in `v` with the unconsumed range in `buf`, thus ensuring that `v` still holds every
|
||||
// object it initially held exactly once.
|
||||
let mut hole;
|
||||
|
||||
// allocate some memory to use as scratch memory, we keep the
|
||||
// length 0 so we can keep shallow copies of the contents of `v`
|
||||
// without risking the dtors running on an object twice if
|
||||
// `compare` panics.
|
||||
let mut working_space = Vec::with_capacity(2 * len);
|
||||
// these both are buffers of length `len`.
|
||||
let mut buf_dat = working_space.as_mut_ptr();
|
||||
let mut buf_tmp = unsafe { buf_dat.offset(len as isize) };
|
||||
if mid <= len - mid {
|
||||
// The left run is shorter.
|
||||
ptr::copy_nonoverlapping(v, buf, mid);
|
||||
hole = MergeHole {
|
||||
start: buf,
|
||||
end: buf.offset(mid as isize),
|
||||
dest: v,
|
||||
};
|
||||
|
||||
// length `len`.
|
||||
let buf_v = v.as_ptr();
|
||||
// Initially, these pointers point to the beginnings of their arrays.
|
||||
let left = &mut hole.start;
|
||||
let mut right = v_mid;
|
||||
let out = &mut hole.dest;
|
||||
|
||||
// step 1. sort short runs with insertion sort. This takes the
|
||||
// values from `v` and sorts them into `buf_dat`, leaving that
|
||||
// with sorted runs of length INSERTION.
|
||||
while *left < hole.end && right < v_end {
|
||||
// Consume the lesser side.
|
||||
// If equal, prefer the left run to maintain stability.
|
||||
let to_copy = if compare(&**left, &*right) == Greater {
|
||||
get_and_increment(&mut right)
|
||||
} else {
|
||||
get_and_increment(left)
|
||||
};
|
||||
ptr::copy_nonoverlapping(to_copy, get_and_increment(out), 1);
|
||||
}
|
||||
} else {
|
||||
// The right run is shorter.
|
||||
ptr::copy_nonoverlapping(v_mid, buf, len - mid);
|
||||
hole = MergeHole {
|
||||
start: buf,
|
||||
end: buf.offset((len - mid) as isize),
|
||||
dest: v_mid,
|
||||
};
|
||||
|
||||
// We could hardcode the sorting comparisons here, and we could
|
||||
// manipulate/step the pointers themselves, rather than repeatedly
|
||||
// .offset-ing.
|
||||
for start in (0..len).step_by(insertion) {
|
||||
// start <= i < len;
|
||||
for i in start..cmp::min(start + insertion, len) {
|
||||
// j satisfies: start <= j <= i;
|
||||
let mut j = i as isize;
|
||||
unsafe {
|
||||
// `i` is in bounds.
|
||||
let read_ptr = buf_v.offset(i as isize);
|
||||
// Initially, these pointers point past the ends of their arrays.
|
||||
let left = &mut hole.dest;
|
||||
let right = &mut hole.end;
|
||||
let mut out = v_end;
|
||||
|
||||
// find where to insert, we need to do strict <,
|
||||
// rather than <=, to maintain stability.
|
||||
|
||||
// start <= j - 1 < len, so .offset(j - 1) is in
|
||||
// bounds.
|
||||
while j > start as isize && compare(&*read_ptr, &*buf_dat.offset(j - 1)) == Less {
|
||||
j -= 1;
|
||||
}
|
||||
|
||||
// shift everything to the right, to make space to
|
||||
// insert this value.
|
||||
|
||||
// j + 1 could be `len` (for the last `i`), but in
|
||||
// that case, `i == j` so we don't copy. The
|
||||
// `.offset(j)` is always in bounds.
|
||||
ptr::copy(&*buf_dat.offset(j), buf_dat.offset(j + 1), i - j as usize);
|
||||
ptr::copy_nonoverlapping(read_ptr, buf_dat.offset(j), 1);
|
||||
}
|
||||
while v < *left && buf < *right {
|
||||
// Consume the greater side.
|
||||
// If equal, prefer the right run to maintain stability.
|
||||
let to_copy = if compare(&*left.offset(-1), &*right.offset(-1)) == Greater {
|
||||
decrement_and_get(left)
|
||||
} else {
|
||||
decrement_and_get(right)
|
||||
};
|
||||
ptr::copy_nonoverlapping(to_copy, decrement_and_get(&mut out), 1);
|
||||
}
|
||||
}
|
||||
// Finally, `hole` gets dropped. If the shorter run was not fully consumed, whatever remains of
|
||||
// it will now be copied into the hole in `v`.
|
||||
|
||||
// step 2. merge the sorted runs.
|
||||
let mut width = insertion;
|
||||
while width < len {
|
||||
// merge the sorted runs of length `width` in `buf_dat` two at
|
||||
// a time, placing the result in `buf_tmp`.
|
||||
|
||||
// 0 <= start <= len.
|
||||
for start in (0..len).step_by(2 * width) {
|
||||
// manipulate pointers directly for speed (rather than
|
||||
// using a `for` loop with `range` and `.offset` inside
|
||||
// that loop).
|
||||
unsafe {
|
||||
// the end of the first run & start of the
|
||||
// second. Offset of `len` is defined, since this is
|
||||
// precisely one byte past the end of the object.
|
||||
let right_start = buf_dat.offset(cmp::min(start + width, len) as isize);
|
||||
// end of the second. Similar reasoning to the above re safety.
|
||||
let right_end_idx = cmp::min(start + 2 * width, len);
|
||||
let right_end = buf_dat.offset(right_end_idx as isize);
|
||||
|
||||
// the pointers to the elements under consideration
|
||||
// from the two runs.
|
||||
|
||||
// both of these are in bounds.
|
||||
let mut left = buf_dat.offset(start as isize);
|
||||
let mut right = right_start;
|
||||
|
||||
// where we're putting the results, it is a run of
|
||||
// length `2*width`, so we step it once for each step
|
||||
// of either `left` or `right`. `buf_tmp` has length
|
||||
// `len`, so these are in bounds.
|
||||
let mut out = buf_tmp.offset(start as isize);
|
||||
let out_end = buf_tmp.offset(right_end_idx as isize);
|
||||
|
||||
// If left[last] <= right[0], they are already in order:
|
||||
// fast-forward the left side (the right side is handled
|
||||
// in the loop).
|
||||
// If `right` is not empty then left is not empty, and
|
||||
// the offsets are in bounds.
|
||||
if right != right_end && compare(&*right.offset(-1), &*right) != Greater {
|
||||
let elems = (right_start as usize - left as usize) / mem::size_of::<T>();
|
||||
ptr::copy_nonoverlapping(&*left, out, elems);
|
||||
out = out.offset(elems as isize);
|
||||
left = right_start;
|
||||
}
|
||||
|
||||
while out < out_end {
|
||||
// Either the left or the right run are exhausted,
|
||||
// so just copy the remainder from the other run
|
||||
// and move on; this gives a huge speed-up (order
|
||||
// of 25%) for mostly sorted vectors (the best
|
||||
// case).
|
||||
if left == right_start {
|
||||
// the number remaining in this run.
|
||||
let elems = (right_end as usize - right as usize) / mem::size_of::<T>();
|
||||
ptr::copy_nonoverlapping(&*right, out, elems);
|
||||
break;
|
||||
} else if right == right_end {
|
||||
let elems = (right_start as usize - left as usize) / mem::size_of::<T>();
|
||||
ptr::copy_nonoverlapping(&*left, out, elems);
|
||||
break;
|
||||
}
|
||||
|
||||
// check which side is smaller, and that's the
|
||||
// next element for the new run.
|
||||
|
||||
// `left < right_start` and `right < right_end`,
|
||||
// so these are valid.
|
||||
let to_copy = if compare(&*left, &*right) == Greater {
|
||||
step(&mut right)
|
||||
} else {
|
||||
step(&mut left)
|
||||
};
|
||||
ptr::copy_nonoverlapping(&*to_copy, out, 1);
|
||||
step(&mut out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mem::swap(&mut buf_dat, &mut buf_tmp);
|
||||
|
||||
width *= 2;
|
||||
}
|
||||
|
||||
// write the result to `v` in one go, so that there are never two copies
|
||||
// of the same object in `v`.
|
||||
unsafe {
|
||||
ptr::copy_nonoverlapping(&*buf_dat, v.as_mut_ptr(), len);
|
||||
}
|
||||
|
||||
// increment the pointer, returning the old pointer.
|
||||
#[inline(always)]
|
||||
unsafe fn step<T>(ptr: &mut *mut T) -> *mut T {
|
||||
unsafe fn get_and_increment<T>(ptr: &mut *mut T) -> *mut T {
|
||||
let old = *ptr;
|
||||
*ptr = ptr.offset(1);
|
||||
old
|
||||
}
|
||||
|
||||
unsafe fn decrement_and_get<T>(ptr: &mut *mut T) -> *mut T {
|
||||
*ptr = ptr.offset(-1);
|
||||
*ptr
|
||||
}
|
||||
|
||||
// When dropped, copies the range `start..end` into `dest..`.
|
||||
struct MergeHole<T> {
|
||||
start: *mut T,
|
||||
end: *mut T,
|
||||
dest: *mut T,
|
||||
}
|
||||
|
||||
impl<T> Drop for MergeHole<T> {
|
||||
fn drop(&mut self) {
|
||||
// `T` is not a zero-sized type, so it's okay to divide by it's size.
|
||||
let len = (self.end as usize - self.start as usize) / mem::size_of::<T>();
|
||||
unsafe { ptr::copy_nonoverlapping(self.start, self.dest, len); }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// This merge sort borrows some (but not all) ideas from TimSort, which is described in detail
|
||||
/// [here](http://svn.python.org/projects/python/trunk/Objects/listsort.txt).
|
||||
///
|
||||
/// The algorithm identifies strictly descending and non-descending subsequences, which are called
|
||||
/// natural runs. There is a stack of pending runs yet to be merged. Each newly found run is pushed
|
||||
/// onto the stack, and then some pairs of adjacent runs are merged until these two invariants are
|
||||
/// satisfied, for every `i` in `0 .. runs.len() - 2`:
|
||||
///
|
||||
/// 1. `runs[i].len > runs[i + 1].len`
|
||||
/// 2. `runs[i].len > runs[i + 1].len + runs[i + 2].len`
|
||||
///
|
||||
/// The invariants ensure that the total running time is `O(n log n)` worst-case.
|
||||
fn merge_sort<T, F>(v: &mut [T], mut compare: F)
|
||||
where F: FnMut(&T, &T) -> Ordering
|
||||
{
|
||||
// Sorting has no meaningful behavior on zero-sized types.
|
||||
if size_of::<T>() == 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
// FIXME #12092: These numbers are platform-specific and need more extensive testing/tuning.
|
||||
//
|
||||
// If `v` has length up to `insertion_len`, simply switch to insertion sort because it is going
|
||||
// to perform better than merge sort. For bigger types `T`, the threshold is smaller.
|
||||
//
|
||||
// Short runs are extended using insertion sort to span at least `min_run` elements, in order
|
||||
// to improve performance.
|
||||
let (max_insertion, min_run) = if size_of::<T>() <= 16 {
|
||||
(64, 32)
|
||||
} else {
|
||||
(32, 16)
|
||||
};
|
||||
|
||||
let len = v.len();
|
||||
|
||||
// Short arrays get sorted in-place via insertion sort to avoid allocations.
|
||||
if len <= max_insertion {
|
||||
if len >= 2 {
|
||||
for i in (0..len-1).rev() {
|
||||
insert_head(&mut v[i..], &mut compare);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Allocate a buffer to use as scratch memory. We keep the length 0 so we can keep in it
|
||||
// shallow copies of the contents of `v` without risking the dtors running on copies if
|
||||
// `compare` panics. When merging two sorted runs, this buffer holds a copy of the shorter run,
|
||||
// which will always have length at most `len / 2`.
|
||||
let mut buf = Vec::with_capacity(len / 2);
|
||||
|
||||
// In order to identify natural runs in `v`, we traverse it backwards. That might seem like a
|
||||
// strange decision, but consider the fact that merges more often go in the opposite direction
|
||||
// (forwards). According to benchmarks, merging forwards is slightly faster than merging
|
||||
// backwards. To conclude, identifying runs by traversing backwards improves performance.
|
||||
let mut runs = vec![];
|
||||
let mut end = len;
|
||||
while end > 0 {
|
||||
// Find the next natural run, and reverse it if it's strictly descending.
|
||||
let mut start = end - 1;
|
||||
if start > 0 {
|
||||
start -= 1;
|
||||
if compare(&v[start], &v[start + 1]) == Greater {
|
||||
while start > 0 && compare(&v[start - 1], &v[start]) == Greater {
|
||||
start -= 1;
|
||||
}
|
||||
v[start..end].reverse();
|
||||
} else {
|
||||
while start > 0 && compare(&v[start - 1], &v[start]) != Greater {
|
||||
start -= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Insert some more elements into the run if it's too short. Insertion sort is faster than
|
||||
// merge sort on short sequences, so this significantly improves performance.
|
||||
while start > 0 && end - start < min_run {
|
||||
start -= 1;
|
||||
insert_head(&mut v[start..end], &mut compare);
|
||||
}
|
||||
|
||||
// Push this run onto the stack.
|
||||
runs.push(Run {
|
||||
start: start,
|
||||
len: end - start,
|
||||
});
|
||||
end = start;
|
||||
|
||||
// Merge some pairs of adjacent runs to satisfy the invariants.
|
||||
while let Some(r) = collapse(&runs) {
|
||||
let left = runs[r + 1];
|
||||
let right = runs[r];
|
||||
unsafe {
|
||||
merge(&mut v[left.start .. right.start + right.len], left.len, buf.as_mut_ptr(),
|
||||
&mut compare);
|
||||
}
|
||||
runs[r] = Run {
|
||||
start: left.start,
|
||||
len: left.len + right.len,
|
||||
};
|
||||
runs.remove(r + 1);
|
||||
}
|
||||
}
|
||||
|
||||
// Finally, exactly one run must remain in the stack.
|
||||
debug_assert!(runs.len() == 1 && runs[0].start == 0 && runs[0].len == len);
|
||||
|
||||
// Examines the stack of runs and identifies the next pair of runs to merge. More specifically,
|
||||
// if `Some(r)` is returned, that means `runs[r]` and `runs[r + 1]` must be merged next. If the
|
||||
// algorithm should continue building a new run instead, `None` is returned.
|
||||
//
|
||||
// TimSort is infamous for it's buggy implementations, as described here:
|
||||
// http://envisage-project.eu/timsort-specification-and-verification/
|
||||
//
|
||||
// The gist of the story is: we must enforce the invariants on the top four runs on the stack.
|
||||
// Enforcing them on just top three is not sufficient to ensure that the invariants will still
|
||||
// hold for *all* runs in the stack.
|
||||
//
|
||||
// This function correctly checks invariants for the top four runs. Additionally, if the top
|
||||
// run starts at index 0, it will always demand a merge operation until the stack is fully
|
||||
// collapsed, in order to complete the sort.
|
||||
fn collapse(runs: &[Run]) -> Option<usize> {
|
||||
let n = runs.len();
|
||||
if n >= 2 && (runs[n - 1].start == 0 ||
|
||||
runs[n - 2].len <= runs[n - 1].len ||
|
||||
(n >= 3 && runs[n - 3].len <= runs[n - 2].len + runs[n - 1].len) ||
|
||||
(n >= 4 && runs[n - 4].len <= runs[n - 3].len + runs[n - 2].len)) {
|
||||
if n >= 3 && runs[n - 3].len < runs[n - 1].len {
|
||||
Some(n - 3)
|
||||
} else {
|
||||
Some(n - 2)
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
struct Run {
|
||||
start: usize,
|
||||
len: usize,
|
||||
}
|
||||
}
|
||||
|
@ -383,7 +383,7 @@ fn test_reverse() {
|
||||
|
||||
#[test]
|
||||
fn test_sort() {
|
||||
for len in 4..25 {
|
||||
for len in (2..25).chain(500..510) {
|
||||
for _ in 0..100 {
|
||||
let mut v: Vec<_> = thread_rng().gen_iter::<i32>().take(len).collect();
|
||||
let mut v1 = v.clone();
|
||||
@ -410,7 +410,7 @@ fn test_sort() {
|
||||
|
||||
#[test]
|
||||
fn test_sort_stability() {
|
||||
for len in 4..25 {
|
||||
for len in (2..25).chain(500..510) {
|
||||
for _ in 0..10 {
|
||||
let mut counts = [0; 10];
|
||||
|
||||
@ -441,6 +441,13 @@ fn test_sort_stability() {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sort_zero_sized_type() {
|
||||
// Should not panic.
|
||||
[(); 10].sort();
|
||||
[(); 100].sort();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_concat() {
|
||||
let v: [Vec<i32>; 0] = [];
|
||||
@ -1338,89 +1345,104 @@ mod bench {
|
||||
})
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn sort_random_small(b: &mut Bencher) {
|
||||
fn gen_ascending(len: usize) -> Vec<u64> {
|
||||
(0..len as u64).collect()
|
||||
}
|
||||
|
||||
fn gen_descending(len: usize) -> Vec<u64> {
|
||||
(0..len as u64).rev().collect()
|
||||
}
|
||||
|
||||
fn gen_random(len: usize) -> Vec<u64> {
|
||||
let mut rng = thread_rng();
|
||||
b.iter(|| {
|
||||
let mut v: Vec<_> = rng.gen_iter::<u64>().take(5).collect();
|
||||
v.sort();
|
||||
});
|
||||
b.bytes = 5 * mem::size_of::<u64>() as u64;
|
||||
rng.gen_iter::<u64>().take(len).collect()
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn sort_random_medium(b: &mut Bencher) {
|
||||
fn gen_mostly_ascending(len: usize) -> Vec<u64> {
|
||||
let mut rng = thread_rng();
|
||||
b.iter(|| {
|
||||
let mut v: Vec<_> = rng.gen_iter::<u64>().take(100).collect();
|
||||
v.sort();
|
||||
});
|
||||
b.bytes = 100 * mem::size_of::<u64>() as u64;
|
||||
let mut v = gen_ascending(len);
|
||||
for _ in (0usize..).take_while(|x| x * x <= len) {
|
||||
let x = rng.gen::<usize>() % len;
|
||||
let y = rng.gen::<usize>() % len;
|
||||
v.swap(x, y);
|
||||
}
|
||||
v
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn sort_random_large(b: &mut Bencher) {
|
||||
fn gen_mostly_descending(len: usize) -> Vec<u64> {
|
||||
let mut rng = thread_rng();
|
||||
b.iter(|| {
|
||||
let mut v: Vec<_> = rng.gen_iter::<u64>().take(10000).collect();
|
||||
v.sort();
|
||||
});
|
||||
b.bytes = 10000 * mem::size_of::<u64>() as u64;
|
||||
let mut v = gen_descending(len);
|
||||
for _ in (0usize..).take_while(|x| x * x <= len) {
|
||||
let x = rng.gen::<usize>() % len;
|
||||
let y = rng.gen::<usize>() % len;
|
||||
v.swap(x, y);
|
||||
}
|
||||
v
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn sort_sorted(b: &mut Bencher) {
|
||||
let mut v: Vec<_> = (0..10000).collect();
|
||||
b.iter(|| {
|
||||
v.sort();
|
||||
});
|
||||
b.bytes = (v.len() * mem::size_of_val(&v[0])) as u64;
|
||||
}
|
||||
|
||||
type BigSortable = (u64, u64, u64, u64);
|
||||
|
||||
#[bench]
|
||||
fn sort_big_random_small(b: &mut Bencher) {
|
||||
fn gen_big_random(len: usize) -> Vec<[u64; 16]> {
|
||||
let mut rng = thread_rng();
|
||||
b.iter(|| {
|
||||
let mut v = rng.gen_iter::<BigSortable>()
|
||||
.take(5)
|
||||
.collect::<Vec<BigSortable>>();
|
||||
v.sort();
|
||||
});
|
||||
b.bytes = 5 * mem::size_of::<BigSortable>() as u64;
|
||||
rng.gen_iter().map(|x| [x; 16]).take(len).collect()
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn sort_big_random_medium(b: &mut Bencher) {
|
||||
let mut rng = thread_rng();
|
||||
b.iter(|| {
|
||||
let mut v = rng.gen_iter::<BigSortable>()
|
||||
.take(100)
|
||||
.collect::<Vec<BigSortable>>();
|
||||
v.sort();
|
||||
});
|
||||
b.bytes = 100 * mem::size_of::<BigSortable>() as u64;
|
||||
fn gen_big_ascending(len: usize) -> Vec<[u64; 16]> {
|
||||
(0..len as u64).map(|x| [x; 16]).take(len).collect()
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn sort_big_random_large(b: &mut Bencher) {
|
||||
let mut rng = thread_rng();
|
||||
b.iter(|| {
|
||||
let mut v = rng.gen_iter::<BigSortable>()
|
||||
.take(10000)
|
||||
.collect::<Vec<BigSortable>>();
|
||||
v.sort();
|
||||
});
|
||||
b.bytes = 10000 * mem::size_of::<BigSortable>() as u64;
|
||||
fn gen_big_descending(len: usize) -> Vec<[u64; 16]> {
|
||||
(0..len as u64).rev().map(|x| [x; 16]).take(len).collect()
|
||||
}
|
||||
|
||||
macro_rules! sort_bench {
|
||||
($name:ident, $gen:expr, $len:expr) => {
|
||||
#[bench]
|
||||
fn $name(b: &mut Bencher) {
|
||||
b.iter(|| $gen($len).sort());
|
||||
b.bytes = $len * mem::size_of_val(&$gen(1)[0]) as u64;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sort_bench!(sort_small_random, gen_random, 10);
|
||||
sort_bench!(sort_small_ascending, gen_ascending, 10);
|
||||
sort_bench!(sort_small_descending, gen_descending, 10);
|
||||
|
||||
sort_bench!(sort_small_big_random, gen_big_random, 10);
|
||||
sort_bench!(sort_small_big_ascending, gen_big_ascending, 10);
|
||||
sort_bench!(sort_small_big_descending, gen_big_descending, 10);
|
||||
|
||||
sort_bench!(sort_medium_random, gen_random, 100);
|
||||
sort_bench!(sort_medium_ascending, gen_ascending, 100);
|
||||
sort_bench!(sort_medium_descending, gen_descending, 100);
|
||||
|
||||
sort_bench!(sort_large_random, gen_random, 10000);
|
||||
sort_bench!(sort_large_ascending, gen_ascending, 10000);
|
||||
sort_bench!(sort_large_descending, gen_descending, 10000);
|
||||
sort_bench!(sort_large_mostly_ascending, gen_mostly_ascending, 10000);
|
||||
sort_bench!(sort_large_mostly_descending, gen_mostly_descending, 10000);
|
||||
|
||||
sort_bench!(sort_large_big_random, gen_big_random, 10000);
|
||||
sort_bench!(sort_large_big_ascending, gen_big_ascending, 10000);
|
||||
sort_bench!(sort_large_big_descending, gen_big_descending, 10000);
|
||||
|
||||
#[bench]
|
||||
fn sort_big_sorted(b: &mut Bencher) {
|
||||
let mut v: Vec<BigSortable> = (0..10000).map(|i| (i, i, i, i)).collect();
|
||||
fn sort_large_random_expensive(b: &mut Bencher) {
|
||||
let len = 10000;
|
||||
b.iter(|| {
|
||||
v.sort();
|
||||
let mut count = 0;
|
||||
let cmp = move |a: &u64, b: &u64| {
|
||||
count += 1;
|
||||
if count % 1_000_000_000 == 0 {
|
||||
panic!("should not happen");
|
||||
}
|
||||
(*a as f64).cos().partial_cmp(&(*b as f64).cos()).unwrap()
|
||||
};
|
||||
|
||||
let mut v = gen_random(len);
|
||||
v.sort_by(cmp);
|
||||
|
||||
black_box(count);
|
||||
});
|
||||
b.bytes = (v.len() * mem::size_of_val(&v[0])) as u64;
|
||||
b.bytes = len as u64 * mem::size_of::<u64>() as u64;
|
||||
}
|
||||
}
|
||||
|
@ -17,86 +17,111 @@ use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::__rand::{thread_rng, Rng};
|
||||
use std::thread;
|
||||
|
||||
const REPEATS: usize = 5;
|
||||
const MAX_LEN: usize = 32;
|
||||
static drop_counts: [AtomicUsize; MAX_LEN] =
|
||||
// FIXME #5244: AtomicUsize is not Copy.
|
||||
[
|
||||
AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0),
|
||||
AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0),
|
||||
AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0),
|
||||
AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0),
|
||||
AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0),
|
||||
AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0),
|
||||
AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0),
|
||||
AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0),
|
||||
AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0),
|
||||
AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0),
|
||||
AtomicUsize::new(0), AtomicUsize::new(0),
|
||||
];
|
||||
const MAX_LEN: usize = 80;
|
||||
|
||||
static creation_count: AtomicUsize = AtomicUsize::new(0);
|
||||
static DROP_COUNTS: [AtomicUsize; MAX_LEN] = [
|
||||
// FIXME #5244: AtomicUsize is not Copy.
|
||||
AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0),
|
||||
AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0),
|
||||
AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0),
|
||||
AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0),
|
||||
AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0),
|
||||
AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0),
|
||||
AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0),
|
||||
AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0),
|
||||
AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0),
|
||||
AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0),
|
||||
AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0),
|
||||
AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0),
|
||||
AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0),
|
||||
AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0),
|
||||
AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0),
|
||||
AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0),
|
||||
AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0),
|
||||
AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0),
|
||||
AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0),
|
||||
AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0),
|
||||
];
|
||||
|
||||
#[derive(Clone, PartialEq, PartialOrd, Eq, Ord)]
|
||||
struct DropCounter { x: u32, creation_id: usize }
|
||||
struct DropCounter {
|
||||
x: u32,
|
||||
id: usize,
|
||||
}
|
||||
|
||||
impl Drop for DropCounter {
|
||||
fn drop(&mut self) {
|
||||
drop_counts[self.creation_id].fetch_add(1, Ordering::Relaxed);
|
||||
DROP_COUNTS[self.id].fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn main() {
|
||||
// len can't go above 64.
|
||||
for len in 2..MAX_LEN {
|
||||
for _ in 0..REPEATS {
|
||||
// reset the count for these new DropCounters, so their
|
||||
// IDs start from 0.
|
||||
creation_count.store(0, Ordering::Relaxed);
|
||||
fn test(input: &[DropCounter]) {
|
||||
let len = input.len();
|
||||
|
||||
let mut rng = thread_rng();
|
||||
let main = (0..len).map(|_| {
|
||||
DropCounter {
|
||||
x: rng.next_u32(),
|
||||
creation_id: creation_count.fetch_add(1, Ordering::Relaxed),
|
||||
// Work out the total number of comparisons required to sort
|
||||
// this array...
|
||||
let mut count = 0usize;
|
||||
input.to_owned().sort_by(|a, b| { count += 1; a.cmp(b) });
|
||||
|
||||
// ... and then panic on each and every single one.
|
||||
for panic_countdown in 0..count {
|
||||
// Refresh the counters.
|
||||
for i in 0..len {
|
||||
DROP_COUNTS[i].store(0, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
let v = input.to_owned();
|
||||
let _ = thread::spawn(move || {
|
||||
let mut v = v;
|
||||
let mut panic_countdown = panic_countdown;
|
||||
v.sort_by(|a, b| {
|
||||
if panic_countdown == 0 {
|
||||
panic!();
|
||||
}
|
||||
}).collect::<Vec<_>>();
|
||||
panic_countdown -= 1;
|
||||
a.cmp(b)
|
||||
})
|
||||
}).join();
|
||||
|
||||
// work out the total number of comparisons required to sort
|
||||
// this array...
|
||||
let mut count = 0_usize;
|
||||
main.clone().sort_by(|a, b| { count += 1; a.cmp(b) });
|
||||
|
||||
// ... and then panic on each and every single one.
|
||||
for panic_countdown in 0..count {
|
||||
// refresh the counters.
|
||||
for c in &drop_counts {
|
||||
c.store(0, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
let v = main.clone();
|
||||
|
||||
let _ = thread::spawn(move|| {
|
||||
let mut v = v;
|
||||
let mut panic_countdown = panic_countdown;
|
||||
v.sort_by(|a, b| {
|
||||
if panic_countdown == 0 {
|
||||
panic!()
|
||||
}
|
||||
panic_countdown -= 1;
|
||||
a.cmp(b)
|
||||
})
|
||||
}).join();
|
||||
|
||||
// check that the number of things dropped is exactly
|
||||
// what we expect (i.e. the contents of `v`).
|
||||
for (i, c) in drop_counts.iter().enumerate().take(len) {
|
||||
let count = c.load(Ordering::Relaxed);
|
||||
assert!(count == 1,
|
||||
"found drop count == {} for i == {}, len == {}",
|
||||
count, i, len);
|
||||
}
|
||||
}
|
||||
// Check that the number of things dropped is exactly
|
||||
// what we expect (i.e. the contents of `v`).
|
||||
for (i, c) in DROP_COUNTS.iter().enumerate().take(len) {
|
||||
let count = c.load(Ordering::Relaxed);
|
||||
assert!(count == 1,
|
||||
"found drop count == {} for i == {}, len == {}",
|
||||
count, i, len);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
for len in (1..20).chain(70..MAX_LEN) {
|
||||
// Test on a random array.
|
||||
let mut rng = thread_rng();
|
||||
let input = (0..len).map(|id| {
|
||||
DropCounter {
|
||||
x: rng.next_u32(),
|
||||
id: id,
|
||||
}
|
||||
}).collect::<Vec<_>>();
|
||||
test(&input);
|
||||
|
||||
// Test on a sorted array with two elements randomly swapped, creating several natural
|
||||
// runs of random lengths. Such arrays have very high chances of hitting all code paths in
|
||||
// the merge procedure.
|
||||
for _ in 0..5 {
|
||||
let mut input = (0..len).map(|i|
|
||||
DropCounter {
|
||||
x: i as u32,
|
||||
id: i,
|
||||
}
|
||||
).collect::<Vec<_>>();
|
||||
|
||||
let a = rng.gen::<usize>() % len;
|
||||
let b = rng.gen::<usize>() % len;
|
||||
input.swap(a, b);
|
||||
|
||||
test(&input);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user