From 071b4b6f7b162e92711677846dcfb1736f3b9440 Mon Sep 17 00:00:00 2001
From: Jack O'Connor <oconnor663@gmail.com>
Date: Mon, 15 Feb 2016 20:18:16 -0500
Subject: [PATCH] correct the primitive char doc's use of bytes and code points
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously the docs suggested that '❤️' doesn't fit in a char because
it's 6 bytes. But that's misleading. 'a̚' also doesn't fit in a char,
even though it's only 3 bytes. The important thing is the number of code
points, not the number of bytes. Clarify the primitive char docs around
this.
---
 src/libstd/primitive_docs.rs | 32 +++++++++++---------------------
 1 file changed, 11 insertions(+), 21 deletions(-)
diff --git a/src/libstd/primitive_docs.rs b/src/libstd/primitive_docs.rs
index ad93fe0094a..b840e51873e 100644
--- a/src/libstd/primitive_docs.rs
+++ b/src/libstd/primitive_docs.rs
@@ -50,18 +50,21 @@ mod prim_bool { }
 /// [`String`]: string/struct.String.html
 ///
 /// As always, remember that a human intuition for 'character' may not map to
-/// Unicode's definitions. For example, emoji symbols such as '❤️' are more than
-/// one byte; ❤️ in particular is six:
+/// Unicode's definitions. For example, emoji symbols such as '❤️' can be more
+/// than one Unicode code point; this ❤️ in particular is two:
 ///
 /// ```
 /// let s = String::from("❤️");
 ///
-/// // six bytes times one byte for each element
-/// assert_eq!(6, s.len() * std::mem::size_of::<u8>());
+/// // we get two chars out of a single ❤️
+/// let mut iter = s.chars();
+/// assert_eq!(Some('\u{2764}'), iter.next());
+/// assert_eq!(Some('\u{fe0f}'), iter.next());
+/// assert_eq!(None, iter.next());
 /// ```
 ///
-/// This also means it won't fit into a `char`, and so trying to create a
-/// literal with `let heart = '❤️';` gives an error:
+/// This means it won't fit into a `char`. Trying to create a literal with
+/// `let heart = '❤️';` gives an error:
 ///
 /// ```text
 /// error: character literal may only contain one codepoint: '❤
@@ -69,8 +72,8 @@ mod prim_bool { }
 ///             ^~
 /// ```
 ///
-/// Another implication of this is that if you want to do per-`char`acter
-/// processing, it can end up using a lot more memory:
+/// Another implication of the 4-byte fixed size of a `char`, is that
+/// per-`char`acter processing can end up using a lot more memory:
 ///
 /// ```
 /// let s = String::from("love: ❤️");
@@ -79,19 +82,6 @@ mod prim_bool { }
 /// assert_eq!(12, s.len() * std::mem::size_of::<u8>());
 /// assert_eq!(32, v.len() * std::mem::size_of::<char>());
 /// ```
-///
-/// Or may give you results you may not expect:
-///
-/// ```
-/// let s = String::from("❤️");
-///
-/// let mut iter = s.chars();
-///
-/// // we get two chars out of a single ❤️
-/// assert_eq!(Some('\u{2764}'), iter.next());
-/// assert_eq!(Some('\u{fe0f}'), iter.next());
-/// assert_eq!(None, iter.next());
-/// ```
 mod prim_char { }
 
 #[doc(primitive = "unit")]