diff --git a/src/libcollections/lib.rs b/src/libcollections/lib.rs index bf3b35b4f68..db236795038 100644 --- a/src/libcollections/lib.rs +++ b/src/libcollections/lib.rs @@ -103,7 +103,6 @@ mod std { mod prelude { // from core. pub use core::borrow::IntoCow; - pub use core::char::CharExt; pub use core::clone::Clone; pub use core::cmp::{PartialEq, Eq, PartialOrd, Ord}; pub use core::cmp::Ordering::{Less, Equal, Greater}; @@ -127,7 +126,7 @@ mod prelude { // from other crates. pub use alloc::boxed::Box; - pub use unicode::char::UnicodeChar; + pub use unicode::char::CharExt; // from collections. pub use slice::SliceConcatExt; diff --git a/src/libstd/io/mod.rs b/src/libstd/io/mod.rs index 51bf206f70e..3fa0b5645c5 100644 --- a/src/libstd/io/mod.rs +++ b/src/libstd/io/mod.rs @@ -248,7 +248,6 @@ use str; use string::String; use uint; use unicode; -use unicode::char::UnicodeChar; use vec::Vec; // Reexports diff --git a/src/libstd/num/strconv.rs b/src/libstd/num/strconv.rs index 20dd70f0faa..67fe599ecd6 100644 --- a/src/libstd/num/strconv.rs +++ b/src/libstd/num/strconv.rs @@ -16,7 +16,7 @@ use self::ExponentFormat::*; use self::SignificantDigits::*; use self::SignFormat::*; -use char::{self, Char}; +use char::{self, CharExt}; use num::{self, Int, Float, ToPrimitive}; use num::FpCategory as Fp; use ops::FnMut; diff --git a/src/libstd/path/windows.rs b/src/libstd/path/windows.rs index aae8d6cadef..cf8bc0e6242 100644 --- a/src/libstd/path/windows.rs +++ b/src/libstd/path/windows.rs @@ -16,6 +16,7 @@ use self::PathPrefix::*; use ascii::AsciiExt; use c_str::{CString, ToCStr}; +use char::CharExt; use clone::Clone; use cmp::{PartialEq, Eq, PartialOrd, Ord, Ordering}; use hash; @@ -28,7 +29,6 @@ use option::Option::{Some, None}; use slice::{SliceExt, SliceConcatExt}; use str::{SplitTerminator, FromStr, StrExt}; use string::{String, ToString}; -use unicode::char::UnicodeChar; use vec::Vec; use super::{contains_nul, BytesContainer, GenericPath, GenericPathUnsafe}; diff --git a/src/libstd/prelude/v1.rs b/src/libstd/prelude/v1.rs index eda20fc7d60..f6bdcd53dff 100644 --- a/src/libstd/prelude/v1.rs +++ b/src/libstd/prelude/v1.rs @@ -22,7 +22,7 @@ // Reexported types and traits #[stable] #[doc(no_inline)] pub use boxed::Box; -#[stable] #[doc(no_inline)] pub use char::{CharExt, UnicodeChar}; +#[stable] #[doc(no_inline)] pub use char::CharExt; #[stable] #[doc(no_inline)] pub use clone::Clone; #[stable] #[doc(no_inline)] pub use cmp::{PartialEq, PartialOrd, Eq, Ord}; #[stable] #[doc(no_inline)] pub use iter::CloneIteratorExt; diff --git a/src/libunicode/lib.rs b/src/libunicode/lib.rs index 170700fb4d5..a3884d0c86e 100644 --- a/src/libunicode/lib.rs +++ b/src/libunicode/lib.rs @@ -44,9 +44,9 @@ mod u_str; // re-export char so that std et al see it correctly /// Character manipulation (`char` type, Unicode Scalar Value) /// -/// This module provides the `Char` and `UnicodeChar` traits, as well as their -/// implementation for the primitive `char` type, in order to allow basic character -/// manipulation. +/// This module provides the `CharExt` trait, as well as its +/// implementation for the primitive `char` type, in order to allow +/// basic character manipulation. /// /// A `char` actually represents a /// *[Unicode Scalar Value](http://www.unicode.org/glossary/#unicode_scalar_value)*, @@ -58,14 +58,14 @@ mod u_str; /// however the converse is not always true due to the above range limits /// and, as such, should be performed via the `from_u32` function.. pub mod char { - pub use core::char::{MAX, from_u32, from_digit, CharExt}; + pub use core::char::{MAX, from_u32, from_digit}; pub use normalize::{decompose_canonical, decompose_compatible, compose}; pub use tables::normalization::canonical_combining_class; pub use tables::UNICODE_VERSION; - pub use u_char::UnicodeChar; + pub use u_char::CharExt; } pub mod str { diff --git a/src/libunicode/tables.rs b/src/libunicode/tables.rs index e3550810010..c755ea93184 100644 --- a/src/libunicode/tables.rs +++ b/src/libunicode/tables.rs @@ -13,7 +13,7 @@ #![allow(missing_docs, non_upper_case_globals, non_snake_case)] /// The version of [Unicode](http://www.unicode.org/) -/// that the `UnicodeChar` and `UnicodeStrPrelude` traits are based on. +/// that the unicode parts of `CharExt` and `UnicodeStrPrelude` traits are based on. pub const UNICODE_VERSION: (uint, uint, uint) = (7, 0, 0); fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool { diff --git a/src/libunicode/u_char.rs b/src/libunicode/u_char.rs index 9c356801604..c1abfd4e189 100644 --- a/src/libunicode/u_char.rs +++ b/src/libunicode/u_char.rs @@ -8,17 +8,99 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -//! Unicode-intensive `char` methods. +//! Unicode-intensive `char` methods along with the `core` methods. //! //! These methods implement functionality for `char` that requires knowledge of //! Unicode definitions, including normalization, categorization, and display information. +use core::char; +use core::char::CharExt as C; use core::option::Option; use tables::{derived_property, property, general_category, conversions, charwidth}; -/// Useful functions for Unicode characters. +/// Functionality for manipulating `char`. #[experimental = "pending prelude organization"] -pub trait UnicodeChar { +pub trait CharExt { + /// Checks if a `char` parses as a numeric digit in the given radix. + /// + /// Compared to `is_numeric()`, this function only recognizes the characters + /// `0-9`, `a-z` and `A-Z`. + /// + /// # Return value + /// + /// Returns `true` if `c` is a valid digit under `radix`, and `false` + /// otherwise. + /// + /// # Panics + /// + /// Panics if given a radix > 36. + #[unstable = "pending integer conventions"] + fn is_digit(self, radix: uint) -> bool; + + /// Converts a character to the corresponding digit. + /// + /// # Return value + /// + /// If `c` is between '0' and '9', the corresponding value between 0 and + /// 9. If `c` is 'a' or 'A', 10. If `c` is 'b' or 'B', 11, etc. Returns + /// none if the character does not refer to a digit in the given radix. + /// + /// # Panics + /// + /// Panics if given a radix outside the range [0..36]. + #[unstable = "pending integer conventions"] + fn to_digit(self, radix: uint) -> Option; + + /// Returns an iterator that yields the hexadecimal Unicode escape + /// of a character, as `char`s. + /// + /// All characters are escaped with Rust syntax of the form `\\u{NNNN}` + /// where `NNNN` is the shortest hexadecimal representation of the code + /// point. + #[stable] + fn escape_unicode(self) -> char::EscapeUnicode; + + /// Returns an iterator that yields the 'default' ASCII and + /// C++11-like literal escape of a character, as `char`s. + /// + /// The default is chosen with a bias toward producing literals that are + /// legal in a variety of languages, including C++11 and similar C-family + /// languages. The exact rules are: + /// + /// * Tab, CR and LF are escaped as '\t', '\r' and '\n' respectively. + /// * Single-quote, double-quote and backslash chars are backslash- + /// escaped. + /// * Any other chars in the range [0x20,0x7e] are not escaped. + /// * Any other chars are given hex Unicode escapes; see `escape_unicode`. + #[stable] + fn escape_default(self) -> char::EscapeDefault; + + /// Returns the amount of bytes this character would need if encoded in + /// UTF-8. + #[stable] + fn len_utf8(self) -> uint; + + /// Returns the amount of bytes this character would need if encoded in + /// UTF-16. + #[stable] + fn len_utf16(self) -> uint; + + /// Encodes this character as UTF-8 into the provided byte buffer, + /// and then returns the number of bytes written. + /// + /// If the buffer is not large enough, nothing will be written into it + /// and a `None` will be returned. + #[unstable = "pending decision about Iterator/Writer/Reader"] + fn encode_utf8(self, dst: &mut [u8]) -> Option; + + /// Encodes this character as UTF-16 into the provided `u16` buffer, + /// and then returns the number of `u16`s written. + /// + /// If the buffer is not large enough, nothing will be written into it + /// and a `None` will be returned. + #[unstable = "pending decision about Iterator/Writer/Reader"] + fn encode_utf16(self, dst: &mut [u16]) -> Option; + /// Returns whether the specified character is considered a Unicode /// alphabetic code point. fn is_alphabetic(self) -> bool; @@ -118,7 +200,24 @@ pub trait UnicodeChar { } #[experimental = "pending prelude organization"] -impl UnicodeChar for char { +impl CharExt for char { + #[unstable = "pending integer conventions"] + fn is_digit(self, radix: uint) -> bool { C::is_digit(self, radix) } + #[unstable = "pending integer conventions"] + fn to_digit(self, radix: uint) -> Option { C::to_digit(self, radix) } + #[stable] + fn escape_unicode(self) -> char::EscapeUnicode { C::escape_unicode(self) } + #[stable] + fn escape_default(self) -> char::EscapeDefault { C::escape_default(self) } + #[stable] + fn len_utf8(self) -> uint { C::len_utf8(self) } + #[stable] + fn len_utf16(self) -> uint { C::len_utf16(self) } + #[unstable = "pending decision about Iterator/Writer/Reader"] + fn encode_utf8(self, dst: &mut [u8]) -> Option { C::encode_utf8(self, dst) } + #[unstable = "pending decision about Iterator/Writer/Reader"] + fn encode_utf16(self, dst: &mut [u16]) -> Option { C::encode_utf16(self, dst) } + fn is_alphabetic(self) -> bool { match self { 'a' ... 'z' | 'A' ... 'Z' => true, diff --git a/src/libunicode/u_str.rs b/src/libunicode/u_str.rs index 1b0c4171134..90949437774 100644 --- a/src/libunicode/u_str.rs +++ b/src/libunicode/u_str.rs @@ -13,7 +13,7 @@ //! Unicode-intensive string manipulations. //! //! This module provides functionality to `str` that requires the Unicode methods provided by the -//! UnicodeChar trait. +//! unicode parts of the CharExt trait. use self::GraphemeState::*; use core::prelude::*; @@ -26,7 +26,7 @@ use core::num::Int; use core::slice; use core::str::Split; -use u_char::UnicodeChar; +use u_char::CharExt as UCharExt; // conflicts with core::prelude::CharExt use tables::grapheme::GraphemeCat; /// An iterator over the words of a string, separated by a sequence of whitespace @@ -529,7 +529,7 @@ impl Iterator for Utf16Encoder where I: Iterator { let mut buf = [0u16; 2]; self.chars.next().map(|ch| { - let n = ch.encode_utf16(buf.as_mut_slice()).unwrap_or(0); + let n = CharExt::encode_utf16(ch, buf.as_mut_slice()).unwrap_or(0); if n == 2 { self.extra = buf[1]; } buf[0] })