Minor rewrite of char primitive unicode intro.
Opened primarily to address #36998.
This commit is contained in:
parent
c774c95919
commit
1065ad418e
@ -103,26 +103,31 @@ mod prim_bool { }
|
||||
/// [`String`]: string/struct.String.html
|
||||
///
|
||||
/// As always, remember that a human intuition for 'character' may not map to
|
||||
/// Unicode's definitions. For example, emoji symbols such as '❤️' can be more
|
||||
/// than one Unicode code point; this ❤️ in particular is two:
|
||||
/// Unicode's definitions. For example, despite looking similar, the 'é'
|
||||
/// character is one Unicode code point while 'é' is two Unicode code points:
|
||||
///
|
||||
/// ```
|
||||
/// let s = String::from("❤️");
|
||||
/// let mut chars = "é".chars();
|
||||
/// // U+00e9: 'latin small letter e with acute'
|
||||
/// assert_eq!(Some('\u{00e9}'), chars.next());
|
||||
/// assert_eq!(None, chars.next());
|
||||
///
|
||||
/// // we get two chars out of a single ❤️
|
||||
/// let mut iter = s.chars();
|
||||
/// assert_eq!(Some('\u{2764}'), iter.next());
|
||||
/// assert_eq!(Some('\u{fe0f}'), iter.next());
|
||||
/// assert_eq!(None, iter.next());
|
||||
/// let mut chars = "é".chars();
|
||||
/// // U+0065: 'latin small letter e'
|
||||
/// assert_eq!(Some('\u{0065}'), chars.next());
|
||||
/// // U+0301: 'combining acute accent'
|
||||
/// assert_eq!(Some('\u{0301}'), chars.next());
|
||||
/// assert_eq!(None, chars.next());
|
||||
/// ```
|
||||
///
|
||||
/// This means it won't fit into a `char`. Trying to create a literal with
|
||||
/// `let heart = '❤️';` gives an error:
|
||||
/// This means that the contents of the first string above _will_ fit into a
|
||||
/// `char` while the contents of the second string _will not_. Trying to create
|
||||
/// a `char` literal with the contents of the second string gives an error:
|
||||
///
|
||||
/// ```text
|
||||
/// error: character literal may only contain one codepoint: '❤
|
||||
/// let heart = '❤️';
|
||||
/// ^~
|
||||
/// error: character literal may only contain one codepoint: 'é'
|
||||
/// let c = 'é';
|
||||
/// ^^^^
|
||||
/// ```
|
||||
///
|
||||
/// Another implication of the 4-byte fixed size of a `char` is that
|
||||
|
Loading…
Reference in New Issue
Block a user