diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 4677f0b523f..ad492c81bd3 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -16,6 +16,8 @@ #![stable(feature = "core_char", since = "1.2.0")] use char_private::is_printable; +use convert::TryFrom; +use fmt; use iter::FusedIterator; use mem::transmute; @@ -122,12 +124,7 @@ pub const MAX: char = '\u{10ffff}'; #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn from_u32(i: u32) -> Option { - // catch out-of-bounds and surrogates - if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) { - None - } else { - Some(unsafe { from_u32_unchecked(i) }) - } + char::try_from(i).ok() } /// Converts a `u32` to a `char`, ignoring validity. @@ -175,6 +172,66 @@ pub unsafe fn from_u32_unchecked(i: u32) -> char { transmute(i) } +#[stable(feature = "char_convert", since = "1.13.0")] +impl From for u32 { + #[inline] + fn from(c: char) -> Self { + c as u32 + } +} + +/// Maps a byte in 0x00...0xFF to a `char` whose code point has the same value, in U+0000 to U+00FF. +/// +/// Unicode is designed such that this effectively decodes bytes +/// with the character encoding that IANA calls ISO-8859-1. +/// This encoding is compatible with ASCII. +/// +/// Note that this is different from ISO/IEC 8859-1 a.k.a. ISO 8859-1 (with one less hypen), +/// which leaves some "blanks", byte values that are not assigned to any character. +/// ISO-8859-1 (the IANA one) assigns them to the C0 and C1 control codes. +/// +/// Note that this is *also* different from Windows-1252 a.k.a. code page 1252, +/// which is a superset ISO/IEC 8859-1 that assigns some (not all!) blanks +/// to punctuation and various Latin characters. +/// +/// To confuse things further, [on the Web](https://encoding.spec.whatwg.org/) +/// `ascii`, `iso-8859-1`, and `windows-1252` are all aliases +/// for a superset of Windows-1252 that fills the remaining blanks with corresponding +/// C0 and C1 control codes. +#[stable(feature = "char_convert", since = "1.13.0")] +impl From for char { + #[inline] + fn from(i: u8) -> Self { + i as char + } +} + +#[unstable(feature = "try_from", issue = "33417")] +impl TryFrom for char { + type Err = CharTryFromError; + + #[inline] + fn try_from(i: u32) -> Result { + if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) { + Err(CharTryFromError(())) + } else { + Ok(unsafe { from_u32_unchecked(i) }) + } + } +} + +/// The error type returned when a conversion from u32 to char fails. +#[unstable(feature = "try_from", issue = "33417")] +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct CharTryFromError(()); + +#[unstable(feature = "try_from", issue = "33417")] +impl fmt::Display for CharTryFromError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + "converted integer out of range for `char`".fmt(f) + } +} + /// Converts a digit in the given radix to a `char`. /// /// A 'radix' here is sometimes also called a 'base'. A radix of two diff --git a/src/libcoretest/char.rs b/src/libcoretest/char.rs index 333503d7389..199437a431e 100644 --- a/src/libcoretest/char.rs +++ b/src/libcoretest/char.rs @@ -9,6 +9,24 @@ // except according to those terms. use std::char; +use std::convert::TryFrom; + +#[test] +fn test_convert() { + assert_eq!(u32::from('a'), 0x61); + assert_eq!(char::from(b'\0'), '\0'); + assert_eq!(char::from(b'a'), 'a'); + assert_eq!(char::from(b'\xFF'), '\u{FF}'); + assert_eq!(char::try_from(0_u32), Ok('\0')); + assert_eq!(char::try_from(0x61_u32), Ok('a')); + assert_eq!(char::try_from(0xD7FF_u32), Ok('\u{D7FF}')); + assert!(char::try_from(0xD800_u32).is_err()); + assert!(char::try_from(0xDFFF_u32).is_err()); + assert_eq!(char::try_from(0xE000_u32), Ok('\u{E000}')); + assert_eq!(char::try_from(0x10FFFF_u32), Ok('\u{10FFFF}')); + assert!(char::try_from(0x110000_u32).is_err()); + assert!(char::try_from(0xFFFF_FFFF_u32).is_err()); +} #[test] fn test_is_lowercase() { diff --git a/src/librustc_unicode/char.rs b/src/librustc_unicode/char.rs index c2b7d7045dd..5a0c27d9c60 100644 --- a/src/librustc_unicode/char.rs +++ b/src/librustc_unicode/char.rs @@ -40,6 +40,8 @@ pub use core::char::{MAX, from_digit, from_u32, from_u32_unchecked}; pub use core::char::{EncodeUtf16, EncodeUtf8, EscapeDebug, EscapeDefault, EscapeUnicode}; // unstable reexports +#[unstable(feature = "try_from", issue = "33417")] +pub use core::char::CharTryFromError; #[unstable(feature = "decode_utf8", issue = "33906")] pub use core::char::{DecodeUtf8, decode_utf8}; #[unstable(feature = "unicode", issue = "27783")] diff --git a/src/librustc_unicode/lib.rs b/src/librustc_unicode/lib.rs index b812c262ac1..65bd717e01a 100644 --- a/src/librustc_unicode/lib.rs +++ b/src/librustc_unicode/lib.rs @@ -38,6 +38,7 @@ #![feature(fused)] #![feature(lang_items)] #![feature(staged_api)] +#![feature(try_from)] #![feature(unicode)] mod tables; diff --git a/src/libstd/error.rs b/src/libstd/error.rs index ab537f39bf9..16290620010 100644 --- a/src/libstd/error.rs +++ b/src/libstd/error.rs @@ -302,6 +302,13 @@ impl<'a, T: ?Sized + Reflect> Error for cell::BorrowMutError<'a, T> { } } +#[unstable(feature = "try_from", issue = "33417")] +impl Error for char::CharTryFromError { + fn description(&self) -> &str { + "converted integer out of range for `char`" + } +} + // copied from any.rs impl Error + 'static { /// Returns true if the boxed type is the same as `T`