add core::char::DecodeUtf8
This commit is contained in:
parent
fe96928d7d
commit
837029fec1
@ -676,3 +676,50 @@ impl Iterator for EncodeUtf16 {
|
||||
self.as_slice().iter().size_hint()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// An iterator over an iterator of bytes of the characters the bytes represent
|
||||
/// as UTF-8
|
||||
#[unstable(feature = "decode_utf8", issue = "33906")]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct DecodeUtf8<I: Iterator<Item = u8>>(::iter::Peekable<I>);
|
||||
|
||||
/// Decodes an `Iterator` of bytes as UTF-8.
|
||||
#[unstable(feature = "decode_utf8", issue = "33906")]
|
||||
#[inline]
|
||||
pub fn decode_utf8<I: IntoIterator<Item = u8>>(i: I) -> DecodeUtf8<I::IntoIter> {
|
||||
DecodeUtf8(i.into_iter().peekable())
|
||||
}
|
||||
|
||||
/// `<DecodeUtf8 as Iterator>::next` returns this for an invalid input sequence.
|
||||
#[unstable(feature = "decode_utf8", issue = "33906")]
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub struct InvalidSequence(());
|
||||
|
||||
#[unstable(feature = "decode_utf8", issue = "33906")]
|
||||
impl<I: Iterator<Item = u8>> Iterator for DecodeUtf8<I> {
|
||||
type Item = Result<char, InvalidSequence>;
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<Result<char, InvalidSequence>> {
|
||||
self.0.next().map(|b| {
|
||||
if b & 0x80 == 0 { Ok(b as char) } else {
|
||||
let l = (!b).leading_zeros() as usize; // number of bytes in UTF-8 representation
|
||||
if l < 2 || l > 6 { return Err(InvalidSequence(())) };
|
||||
let mut x = (b as u32) & (0x7F >> l);
|
||||
for _ in 0..l-1 {
|
||||
match self.0.peek() {
|
||||
Some(&b) if b & 0xC0 == 0x80 => {
|
||||
self.0.next();
|
||||
x = (x << 6) | (b as u32) & 0x3F;
|
||||
},
|
||||
_ => return Err(InvalidSequence(())),
|
||||
}
|
||||
}
|
||||
match from_u32(x) {
|
||||
Some(x) if l == x.len_utf8() => Ok(x),
|
||||
_ => Err(InvalidSequence(())),
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -302,3 +302,32 @@ fn eu_iterator_specializations() {
|
||||
check('\u{12340}');
|
||||
check('\u{10FFFF}');
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_decode_utf8() {
|
||||
use core::char::*;
|
||||
use core::iter::FromIterator;
|
||||
|
||||
for &(str, bs) in [("", &[] as &[u8]),
|
||||
("A", &[0x41u8] as &[u8]),
|
||||
("<EFBFBD>", &[0xC1u8, 0x81u8] as &[u8]),
|
||||
("♥", &[0xE2u8, 0x99u8, 0xA5u8]),
|
||||
("♥A", &[0xE2u8, 0x99u8, 0xA5u8, 0x41u8] as &[u8]),
|
||||
("<EFBFBD>", &[0xE2u8, 0x99u8] as &[u8]),
|
||||
("<EFBFBD>A", &[0xE2u8, 0x99u8, 0x41u8] as &[u8]),
|
||||
("<EFBFBD>", &[0xC0u8] as &[u8]),
|
||||
("<EFBFBD>A", &[0xC0u8, 0x41u8] as &[u8]),
|
||||
("<EFBFBD>", &[0x80u8] as &[u8]),
|
||||
("<EFBFBD>A", &[0x80u8, 0x41u8] as &[u8]),
|
||||
("<EFBFBD>", &[0xFEu8] as &[u8]),
|
||||
("<EFBFBD>A", &[0xFEu8, 0x41u8] as &[u8]),
|
||||
("<EFBFBD>", &[0xFFu8] as &[u8]),
|
||||
("<EFBFBD>A", &[0xFFu8, 0x41u8] as &[u8])].into_iter() {
|
||||
assert!(Iterator::eq(str.chars(),
|
||||
decode_utf8(bs.into_iter().map(|&b|b))
|
||||
.map(|r_b| r_b.unwrap_or('\u{FFFD}'))),
|
||||
"chars = {}, bytes = {:?}, decoded = {:?}", str, bs,
|
||||
Vec::from_iter(decode_utf8(bs.into_iter().map(|&b|b))
|
||||
.map(|r_b| r_b.unwrap_or('\u{FFFD}'))));
|
||||
}
|
||||
}
|
||||
|
@ -18,6 +18,7 @@
|
||||
#![feature(core_private_bignum)]
|
||||
#![feature(core_private_diy_float)]
|
||||
#![feature(dec2flt)]
|
||||
#![feature(decode_utf8)]
|
||||
#![feature(fixed_size_array)]
|
||||
#![feature(float_extras)]
|
||||
#![feature(flt2dec)]
|
||||
|
@ -39,6 +39,8 @@ pub use core::char::{MAX, from_digit, from_u32, from_u32_unchecked};
|
||||
pub use core::char::{EncodeUtf16, EncodeUtf8, EscapeDefault, EscapeUnicode};
|
||||
|
||||
// unstable reexports
|
||||
#[unstable(feature = "decode_utf8", issue = "33906")]
|
||||
pub use core::char::{DecodeUtf8, decode_utf8};
|
||||
#[unstable(feature = "unicode", issue = "27783")]
|
||||
pub use tables::UNICODE_VERSION;
|
||||
|
||||
|
@ -33,6 +33,7 @@
|
||||
#![no_std]
|
||||
|
||||
#![feature(core_char_ext)]
|
||||
#![feature(decode_utf8)]
|
||||
#![feature(lang_items)]
|
||||
#![feature(staged_api)]
|
||||
#![feature(unicode)]
|
||||
|
Loading…
Reference in New Issue
Block a user