Rollup merge of #72413 - CAD97:char-range, r=dtolnay

impl Step for char (make Range*<char> iterable)

[[irlo thread]](https://internals.rust-lang.org/t/mini-rfc-make-range-char-work/12392?u=cad97) [[godbolt asm example]](https://rust.godbolt.org/z/fdveKo)

Add an implementation of the `Step` trait for `char`, which has the effect of making `RangeInclusive<char>` (and the other range types) iterable.

I've used the surrogate range magic numbers as magic numbers here rather than e.g. a `const SURROGATE_RANGE = 0xD800..0xE000` because these numbers appear to be used as magic numbers elsewhere and there doesn't exist constants for them yet. These files definitely aren't where surrogate range constants should live.

`ExactSizeIterator` is not implemented because `0x10FFFF` is bigger than fits in a `usize == u16`. However, given we already provide some `ExactSizeIterator` that are not correct on 16 bit targets, we might still want to consider providing it for `Range`[`Inclusive`]`<char>`, as it is definitely _very_ convenient. (At the very least, we want to make sure `.count()` doesn't bother iterating the range.)

The second commit in this PR changes a call to `Step::forward` to use `Step::forward_unchecked` in `RangeInclusive::next`. This is because without this patch, iteration over all codepoints (`'\0'..=char::MAX`) does not successfully optimize out the panicking branch. This was mentioned in the PR that updated `Step` to its current design, but was deemed not yet necessary as it did not impact codegen for integral types.

More of `Range*`'s implementations' calls to `Step` methods will probably want to see if they can use the `_unchecked` version as (if) we open up `Step` to being implemented on more types.

---

cc @rust-lang/libs, this is insta-stable and a fairly significant addition to `Range*`'s capabilities; this is the first instance of a noncontinuous domain being iterable with `Range` (or, well, anything other than primitive integers). I don't think this needs a full RFC, but it should definitely get some decent eyes on it.
This commit is contained in:
Ralf Jung 2020-05-29 21:58:32 +02:00 committed by GitHub
commit b965196ce0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 85 additions and 1 deletions

View File

@ -1,3 +1,4 @@
use crate::char;
use crate::convert::TryFrom;
use crate::mem;
use crate::ops::{self, Add, Sub, Try};
@ -400,6 +401,73 @@ step_integer_impls! {
wider than usize: [u32 i32], [u64 i64], [u128 i128];
}
#[unstable(feature = "step_trait", reason = "recently redesigned", issue = "42168")]
unsafe impl Step for char {
#[inline]
fn steps_between(&start: &char, &end: &char) -> Option<usize> {
let start = start as u32;
let end = end as u32;
if start <= end {
let count = end - start;
if start < 0xD800 && 0xE000 <= end {
usize::try_from(count - 0x800).ok()
} else {
usize::try_from(count).ok()
}
} else {
None
}
}
#[inline]
fn forward_checked(start: char, count: usize) -> Option<char> {
let start = start as u32;
let mut res = Step::forward_checked(start, count)?;
if start < 0xD800 && 0xD800 <= res {
res = Step::forward_checked(res, 0x800)?;
}
if res <= char::MAX as u32 {
// SAFETY: res is a valid unicode scalar
// (below 0x110000 and not in 0xD800..0xE000)
Some(unsafe { char::from_u32_unchecked(res) })
} else {
None
}
}
#[inline]
fn backward_checked(start: char, count: usize) -> Option<char> {
let start = start as u32;
let mut res = Step::backward_checked(start, count)?;
if start >= 0xE000 && 0xE000 > res {
res = Step::backward_checked(res, 0x800)?;
}
// SAFETY: res is a valid unicode scalar
// (below 0x110000 and not in 0xD800..0xE000)
Some(unsafe { char::from_u32_unchecked(res) })
}
#[inline]
unsafe fn forward_unchecked(start: char, count: usize) -> char {
let start = start as u32;
let mut res = Step::forward_unchecked(start, count);
if start < 0xD800 && 0xD800 <= res {
res = Step::forward_unchecked(res, 0x800);
}
char::from_u32_unchecked(res)
}
#[inline]
unsafe fn backward_unchecked(start: char, count: usize) -> char {
let start = start as u32;
let mut res = Step::backward_unchecked(start, count);
if start >= 0xE000 && 0xE000 > res {
res = Step::backward_unchecked(res, 0x800);
}
char::from_u32_unchecked(res)
}
}
macro_rules! range_exact_iter_impl {
($($t:ty)*) => ($(
#[stable(feature = "rust1", since = "1.0.0")]
@ -582,7 +650,11 @@ impl<A: Step> Iterator for ops::RangeInclusive<A> {
}
let is_iterating = self.start < self.end;
Some(if is_iterating {
let n = Step::forward(self.start.clone(), 1);
// SAFETY: just checked precondition
// We use the unchecked version here, because
// otherwise `for _ in '\0'..=char::MAX`
// does not successfully remove panicking code.
let n = unsafe { Step::forward_unchecked(self.start.clone(), 1) };
mem::replace(&mut self.start, n)
} else {
self.exhausted = true;

View File

@ -1932,6 +1932,18 @@ fn test_range() {
);
}
#[test]
fn test_char_range() {
use std::char;
assert!(('\0'..=char::MAX).eq((0..=char::MAX as u32).filter_map(char::from_u32)));
assert!(('\0'..=char::MAX).rev().eq((0..=char::MAX as u32).filter_map(char::from_u32).rev()));
assert_eq!(('\u{D7FF}'..='\u{E000}').count(), 2);
assert_eq!(('\u{D7FF}'..='\u{E000}').size_hint(), (2, Some(2)));
assert_eq!(('\u{D7FF}'..'\u{E000}').count(), 1);
assert_eq!(('\u{D7FF}'..'\u{E000}').size_hint(), (1, Some(1)));
}
#[test]
fn test_range_exhaustion() {
let mut r = 10..10;