Auto merge of #51601 - Emerentius:step_by_range_diet, r=sfackler

Specialize StepBy<Range(Inclusive)>

Part of #51557, related to #43064, #31155

As discussed in the above issues, `step_by` optimizes very badly on ranges which is related to
1. the special casing of the first `StepBy::next()` call
2. the need to do 2 additions of `n - 1` and `1` inside the range's `next()`

This PR eliminates both by overriding `next()` to always produce the current element and also step ahead by `n` elements in one go. The generated code is much better, even identical in the case of a `Range` with constant `start` and `end` where `start+step` can't overflow. Without constant bounds it's a bit longer than the manual loop. `RangeInclusive` doesn't optimize as nicely but is still much better than the original asm.
Unsigned integers optimize better than signed ones for some reason.

See the following two links for a comparison.

[godbolt: specialization for ..](https://godbolt.org/g/haHLJr)
[godbolt: specialization for ..=](https://godbolt.org/g/ewyMu6)

`RangeFrom`, the only other range with an `Iterator` implementation can't be specialized like this without changing behaviour due to overflow. There is no way to save "finished-ness".

The approach can not be used in general, because it would produce side effects of the underlying iterator too early.

May obsolete #51435, haven't checked.
This commit is contained in:
bors 2018-06-21 08:55:13 +00:00
commit 95979dcfb2
2 changed files with 81 additions and 7 deletions

View File

@ -319,9 +319,10 @@
use cmp;
use fmt;
use iter_private::TrustedRandomAccess;
use ops::Try;
use ops::{self, Try};
use usize;
use intrinsics;
use mem;
#[stable(feature = "rust1", since = "1.0.0")]
pub use self::iterator::Iterator;
@ -672,12 +673,7 @@ impl<I> Iterator for StepBy<I> where I: Iterator {
#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.first_take {
self.first_take = false;
self.iter.next()
} else {
self.iter.nth(self.step)
}
<Self as StepBySpecIterator>::spec_next(self)
}
#[inline]
@ -737,6 +733,76 @@ impl<I> Iterator for StepBy<I> where I: Iterator {
}
}
// hidden trait for specializing iterator methods
// could be generalized but is currently only used for StepBy
trait StepBySpecIterator {
type Item;
fn spec_next(&mut self) -> Option<Self::Item>;
}
impl<I> StepBySpecIterator for StepBy<I>
where
I: Iterator,
{
type Item = I::Item;
#[inline]
default fn spec_next(&mut self) -> Option<I::Item> {
if self.first_take {
self.first_take = false;
self.iter.next()
} else {
self.iter.nth(self.step)
}
}
}
impl<T> StepBySpecIterator for StepBy<ops::Range<T>>
where
T: Step,
{
#[inline]
fn spec_next(&mut self) -> Option<Self::Item> {
self.first_take = false;
if !(self.iter.start < self.iter.end) {
return None;
}
// add 1 to self.step to get original step size back
// it was decremented for the general case on construction
if let Some(n) = self.iter.start.add_usize(self.step+1) {
let next = mem::replace(&mut self.iter.start, n);
Some(next)
} else {
let last = self.iter.start.clone();
self.iter.start = self.iter.end.clone();
Some(last)
}
}
}
impl<T> StepBySpecIterator for StepBy<ops::RangeInclusive<T>>
where
T: Step,
{
#[inline]
fn spec_next(&mut self) -> Option<Self::Item> {
self.first_take = false;
if !(self.iter.start <= self.iter.end) {
return None;
}
// add 1 to self.step to get original step size back
// it was decremented for the general case on construction
if let Some(n) = self.iter.start.add_usize(self.step+1) {
let next = mem::replace(&mut self.iter.start, n);
Some(next)
} else {
let last = self.iter.start.replace_one();
self.iter.end.replace_zero();
Some(last)
}
}
}
// StepBy can only make the iterator shorter, so the len will still fit.
#[stable(feature = "iterator_step_by", since = "1.28.0")]
impl<I> ExactSizeIterator for StepBy<I> where I: ExactSizeIterator {}

View File

@ -1618,6 +1618,14 @@ fn test_range_step() {
assert_eq!((isize::MIN..isize::MAX).step_by(1).size_hint(), (usize::MAX, Some(usize::MAX)));
}
#[test]
fn test_range_inclusive_step() {
assert_eq!((0..=50).step_by(10).collect::<Vec<_>>(), [0, 10, 20, 30, 40, 50]);
assert_eq!((0..=5).step_by(1).collect::<Vec<_>>(), [0, 1, 2, 3, 4, 5]);
assert_eq!((200..=255u8).step_by(10).collect::<Vec<_>>(), [200, 210, 220, 230, 240, 250]);
assert_eq!((250..=255u8).step_by(1).collect::<Vec<_>>(), [250, 251, 252, 253, 254, 255]);
}
#[test]
fn test_range_last_max() {
assert_eq!((0..20).last(), Some(19));