Rollup merge of #48012 - scottmcm:faster-rangeinclusive-fold, r=alexcrichton

Override try_[r]fold for RangeInclusive

Because the last item needs special handling, it seems that LLVM has trouble canonicalizing the loops in external iteration.  With the override, it becomes obvious that the start==end case exits the loop (as opposed to the one *after* that exiting the loop in external iteration).

Demo adapted from https://github.com/rust-lang/rust/issues/45222
```rust
#[no_mangle]
pub fn foo3r(n: u64) -> u64 {
    let mut count = 0;
    (0..n).for_each(|_| {
        (0 ..= n).rev().for_each(|j| {
            count += j;
        })
    });
    count
}
```

<details>
 <summary>Current nightly ASM, 100 lines (https://play.rust-lang.org/?gist=f5674c702c6e2045c3aab5d03763e5f6&version=nightly&mode=release)</summary>

```asm
foo3r:
	pushq	%rbx
.Lcfi0:
.Lcfi1:
	testq	%rdi, %rdi
	je	.LBB0_1
	testb	$1, %dil
	jne	.LBB0_4
	xorl	%eax, %eax
	xorl	%r8d, %r8d
	cmpq	$1, %rdi
	jne	.LBB0_11
	jmp	.LBB0_23
.LBB0_1:
	xorl	%eax, %eax
	popq	%rbx
	retq
.LBB0_4:
	xorl	%r8d, %r8d
	movq	$-1, %r9
	xorl	%eax, %eax
	movq	%rdi, %r11
	xorl	%r10d, %r10d
	jmp	.LBB0_5
.LBB0_8:
	addq	%r11, %rax
	movq	%rsi, %r11
	movq	%rdx, %r10
.LBB0_5:
	cmpq	%r11, %r10
	movl	$1, %ecx
	cmovbq	%r9, %rcx
	cmoveq	%r8, %rcx
	testq	%rcx, %rcx
	movl	$0, %esi
	movl	$1, %edx
	je	.LBB0_8
	cmpq	$-1, %rcx
	jne	.LBB0_9
	leaq	-1(%r11), %rsi
	movq	%r10, %rdx
	jmp	.LBB0_8
.LBB0_9:
	movl	$1, %r8d
	cmpq	$1, %rdi
	je	.LBB0_23
.LBB0_11:
	xorl	%r9d, %r9d
	movq	$-1, %r10
.LBB0_12:
	movq	%rdi, %rsi
	xorl	%r11d, %r11d
	jmp	.LBB0_13
.LBB0_16:
	addq	%rsi, %rax
	movq	%rcx, %rsi
	movq	%rbx, %r11
.LBB0_13:
	cmpq	%rsi, %r11
	movl	$1, %edx
	cmovbq	%r10, %rdx
	cmoveq	%r9, %rdx
	testq	%rdx, %rdx
	movl	$0, %ecx
	movl	$1, %ebx
	je	.LBB0_16
	cmpq	$-1, %rdx
	jne	.LBB0_17
	leaq	-1(%rsi), %rcx
	movq	%r11, %rbx
	jmp	.LBB0_16
.LBB0_17:
	movq	%rdi, %rcx
	xorl	%r11d, %r11d
	jmp	.LBB0_18
.LBB0_21:
	addq	%rcx, %rax
	movq	%rsi, %rcx
	movq	%rbx, %r11
.LBB0_18:
	cmpq	%rcx, %r11
	movl	$1, %edx
	cmovbq	%r10, %rdx
	cmoveq	%r9, %rdx
	testq	%rdx, %rdx
	movl	$0, %esi
	movl	$1, %ebx
	je	.LBB0_21
	cmpq	$-1, %rdx
	jne	.LBB0_22
	leaq	-1(%rcx), %rsi
	movq	%r11, %rbx
	jmp	.LBB0_21
.LBB0_22:
	addq	$2, %r8
	cmpq	%rdi, %r8
	jne	.LBB0_12
.LBB0_23:
	popq	%rbx
	retq
.Lfunc_end0:
```
</details><br>

With this PR:
```asm
foo3r:
	test	rcx, rcx
	je	.LBB3_1
	lea	r8, [rcx - 1]
	lea	rdx, [rcx - 2]
	mov	rax, r8
	mul	rdx
	shld	rdx, rax, 63
	imul	r8, r8
	add	r8, rcx
	sub	r8, rdx
	imul	r8, rcx
	mov	rax, r8
	ret
.LBB3_1:
	xor	r8d, r8d
	mov	rax, r8
	ret
```
This commit is contained in:
kennytm 2018-02-07 03:23:25 +08:00 committed by GitHub
commit 4f184eb6a3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 65 additions and 1 deletions

View File

@ -10,7 +10,7 @@
use convert::TryFrom;
use mem;
use ops::{self, Add, Sub};
use ops::{self, Add, Sub, Try};
use usize;
use super::{FusedIterator, TrustedLen};
@ -397,6 +397,28 @@ impl<A: Step> Iterator for ops::RangeInclusive<A> {
fn max(mut self) -> Option<A> {
self.next_back()
}
#[inline]
fn try_fold<B, F, R>(&mut self, init: B, mut f: F) -> R where
Self: Sized, F: FnMut(B, Self::Item) -> R, R: Try<Ok=B>
{
let mut accum = init;
if self.start <= self.end {
loop {
let (x, done) =
if self.start < self.end {
let n = self.start.add_one();
(mem::replace(&mut self.start, n), false)
} else {
self.end.replace_zero();
(self.start.replace_one(), true)
};
accum = f(accum, x)?;
if done { break }
}
}
Try::from_ok(accum)
}
}
#[unstable(feature = "inclusive_range", reason = "recently added, follows RFC", issue = "28237")]
@ -418,6 +440,28 @@ impl<A: Step> DoubleEndedIterator for ops::RangeInclusive<A> {
_ => None,
}
}
#[inline]
fn try_rfold<B, F, R>(&mut self, init: B, mut f: F) -> R where
Self: Sized, F: FnMut(B, Self::Item) -> R, R: Try<Ok=B>
{
let mut accum = init;
if self.start <= self.end {
loop {
let (x, done) =
if self.start < self.end {
let n = self.end.sub_one();
(mem::replace(&mut self.end, n), false)
} else {
self.start.replace_one();
(self.end.replace_zero(), true)
};
accum = f(accum, x)?;
if done { break }
}
}
Try::from_ok(accum)
}
}
#[unstable(feature = "fused", issue = "35602")]

View File

@ -1459,6 +1459,26 @@ fn test_range_inclusive_min() {
assert_eq!(r.min(), None);
}
#[test]
fn test_range_inclusive_folds() {
assert_eq!((1..=10).sum::<i32>(), 55);
assert_eq!((1..=10).rev().sum::<i32>(), 55);
let mut it = 40..=50;
assert_eq!(it.try_fold(0, i8::checked_add), None);
assert_eq!(it, 44..=50);
assert_eq!(it.try_rfold(0, i8::checked_add), None);
assert_eq!(it, 44..=47);
let mut it = 10..=20;
assert_eq!(it.try_fold(0, |a,b| Some(a+b)), Some(165));
assert_eq!(it, 1..=0);
let mut it = 10..=20;
assert_eq!(it.try_rfold(0, |a,b| Some(a+b)), Some(165));
assert_eq!(it, 1..=0);
}
#[test]
fn test_repeat() {
let mut it = repeat(42);