From 93d6425b4392dd4a5244887bd21e9b092635d22d Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Thu, 4 Feb 2016 00:11:18 -0500 Subject: [PATCH] Clarify scenario where AsciiExt appears to operate on non-ASCII Fixes https://github.com/rust-lang/rust/issues/31203 --- src/libstd/ascii.rs | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/libstd/ascii.rs b/src/libstd/ascii.rs index b6123264ea8..38f79079b29 100644 --- a/src/libstd/ascii.rs +++ b/src/libstd/ascii.rs @@ -18,6 +18,27 @@ use mem; use ops::Range; /// Extension methods for ASCII-subset only operations on string slices. +/// +/// Be aware that operations on seemingly non-ASCII characters can sometimes +/// have unexpected results. Consider this example: +/// +/// ``` +/// use std::ascii::AsciiExt; +/// +/// assert_eq!("café".to_ascii_uppercase(), "CAFÉ"); +/// assert_eq!("café".to_ascii_uppercase(), "CAFé"); +/// ``` +/// +/// In the first example, the lowercased string is represented `"cafe\u{301}"` +/// (the last character is an acute accent [combining character]). Unlike the +/// other characters in the string, the combining character will not get mapped +/// to an uppercase variant, resulting in `"CAFE\u{301}"`. In the second +/// example, the lowercased string is represented `"caf\u{e9}"` (the last +/// character is a single Unicode character representing an 'e' with an acute +/// accent). Since the last character is defined outside the scope of ASCII, +/// it will not get mapped to an uppercase variant, resulting in `"CAF\u{e9}"`. +/// +/// [combining character]: https://en.wikipedia.org/wiki/Combining_character #[stable(feature = "rust1", since = "1.0.0")] pub trait AsciiExt { /// Container type for copied ASCII characters.