From 48615a68fb01d09749a5b73816d45e0d0669d1f9 Mon Sep 17 00:00:00 2001
From: Alex Crichton <alex@alexcrichton.com>
Date: Wed, 26 Aug 2015 17:30:45 -0700
Subject: [PATCH] std: Account for CRLF in {str, BufRead}::lines

This commit is an implementation of [RFC 1212][rfc] which tweaks the behavior of
the `str::lines` and `BufRead::lines` iterators. Both iterators now account for
`\r\n` sequences in addition to `\n`, allowing for less surprising behavior
across platforms (especially in the `BufRead` case). Splitting *only* on the
`\n` character can still be achieved with `split('\n')` in both cases.

The `str::lines_any` function is also now deprecated as `str::lines` is a
drop-in replacement for it.

[rfc]: https://github.com/rust-lang/rfcs/blob/master/text/1212-line-endings.md

Closes #28032
---
 src/libcollections/str.rs             | 10 ++++++----
 src/libcollectionstest/str.rs         |  4 ++--
 src/libcore/str/mod.rs                | 14 ++++++++++----
 src/librustdoc/passes.rs              |  2 +-
 src/libstd/io/mod.rs                  | 11 +++++++----
 src/libsyntax/parse/lexer/comments.rs |  2 +-
 6 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs
index 657a3f60448..7d1ed13d764 100644
--- a/src/libcollections/str.rs
+++ b/src/libcollections/str.rs
@@ -604,14 +604,14 @@ impl str {
         UnicodeStr::split_whitespace(self)
     }
 
-    /// An iterator over the lines of a string, separated by `\n`.
+    /// An iterator over the lines of a string, separated by `\n` or `\r\n`.
     ///
-    /// This does not include the empty string after a trailing `\n`.
+    /// This does not include the empty string after a trailing newline or CRLF.
     ///
     /// # Examples
     ///
     /// ```
-    /// let four_lines = "foo\nbar\n\nbaz";
+    /// let four_lines = "foo\nbar\n\r\nbaz";
     /// let v: Vec<&str> = four_lines.lines().collect();
     ///
     /// assert_eq!(v, ["foo", "bar", "", "baz"]);
@@ -620,7 +620,7 @@ impl str {
     /// Leaving off the trailing character:
     ///
     /// ```
-    /// let four_lines = "foo\nbar\n\nbaz\n";
+    /// let four_lines = "foo\r\nbar\n\nbaz\n";
     /// let v: Vec<&str> = four_lines.lines().collect();
     ///
     /// assert_eq!(v, ["foo", "bar", "", "baz"]);
@@ -654,7 +654,9 @@ impl str {
     /// assert_eq!(v, ["foo", "bar", "", "baz"]);
     /// ```
     #[stable(feature = "rust1", since = "1.0.0")]
+    #[deprecated(since = "1.4.0", reason = "use lines() instead now")]
     #[inline]
+    #[allow(deprecated)]
     pub fn lines_any(&self) -> LinesAny {
         core_str::StrExt::lines_any(self)
     }
diff --git a/src/libcollectionstest/str.rs b/src/libcollectionstest/str.rs
index 8c468e91567..7b481f63991 100644
--- a/src/libcollectionstest/str.rs
+++ b/src/libcollectionstest/str.rs
@@ -964,11 +964,11 @@ fn test_split_whitespace() {
 
 #[test]
 fn test_lines() {
-    let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
+    let data = "\nMäry häd ä little lämb\n\r\nLittle lämb\n";
     let lines: Vec<&str> = data.lines().collect();
     assert_eq!(lines, ["", "Märy häd ä little lämb", "", "Little lämb"]);
 
-    let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
+    let data = "\r\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
     let lines: Vec<&str> = data.lines().collect();
     assert_eq!(lines, ["", "Märy häd ä little lämb", "", "Little lämb"]);
 }
diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs
index 4612fc89008..4664162358d 100644
--- a/src/libcore/str/mod.rs
+++ b/src/libcore/str/mod.rs
@@ -827,7 +827,7 @@ generate_pattern_iterators! {
 /// Created with the method `.lines()`.
 #[stable(feature = "rust1", since = "1.0.0")]
 #[derive(Clone)]
-pub struct Lines<'a>(SplitTerminator<'a, char>);
+pub struct Lines<'a>(Map<SplitTerminator<'a, char>, LinesAnyMap>);
 
 #[stable(feature = "rust1", since = "1.0.0")]
 impl<'a> Iterator for Lines<'a> {
@@ -854,8 +854,10 @@ impl<'a> DoubleEndedIterator for Lines<'a> {
 
 /// Created with the method `.lines_any()`.
 #[stable(feature = "rust1", since = "1.0.0")]
+#[deprecated(since = "1.4.0", reason = "use lines()/Lines instead now")]
 #[derive(Clone)]
-pub struct LinesAny<'a>(Map<Lines<'a>, LinesAnyMap>);
+#[allow(deprecated)]
+pub struct LinesAny<'a>(Lines<'a>);
 
 /// A nameable, clonable fn type
 #[derive(Clone)]
@@ -887,6 +889,7 @@ impl<'a> FnOnce<(&'a str,)> for LinesAnyMap {
 }
 
 #[stable(feature = "rust1", since = "1.0.0")]
+#[allow(deprecated)]
 impl<'a> Iterator for LinesAny<'a> {
     type Item = &'a str;
 
@@ -902,6 +905,7 @@ impl<'a> Iterator for LinesAny<'a> {
 }
 
 #[stable(feature = "rust1", since = "1.0.0")]
+#[allow(deprecated)]
 impl<'a> DoubleEndedIterator for LinesAny<'a> {
     #[inline]
     fn next_back(&mut self) -> Option<&'a str> {
@@ -1289,6 +1293,7 @@ pub trait StrExt {
     fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P>
         where P::Searcher: ReverseSearcher<'a>;
     fn lines(&self) -> Lines;
+    #[allow(deprecated)]
     fn lines_any(&self) -> LinesAny;
     fn char_len(&self) -> usize;
     fn slice_chars(&self, begin: usize, end: usize) -> &str;
@@ -1428,12 +1433,13 @@ impl StrExt for str {
     }
     #[inline]
     fn lines(&self) -> Lines {
-        Lines(self.split_terminator('\n'))
+        Lines(self.split_terminator('\n').map(LinesAnyMap))
     }
 
     #[inline]
+    #[allow(deprecated)]
     fn lines_any(&self) -> LinesAny {
-        LinesAny(self.lines().map(LinesAnyMap))
+        LinesAny(self.lines())
     }
 
     #[inline]
diff --git a/src/librustdoc/passes.rs b/src/librustdoc/passes.rs
index c11d9b8e31d..8a57a50bdea 100644
--- a/src/librustdoc/passes.rs
+++ b/src/librustdoc/passes.rs
@@ -308,7 +308,7 @@ pub fn collapse_docs(krate: clean::Crate) -> plugins::PluginResult {
 }
 
 pub fn unindent(s: &str) -> String {
-    let lines = s.lines_any().collect::<Vec<&str> >();
+    let lines = s.lines().collect::<Vec<&str> >();
     let mut saw_first_line = false;
     let mut saw_second_line = false;
     let min_indent = lines.iter().fold(usize::MAX, |min_indent, line| {
diff --git a/src/libstd/io/mod.rs b/src/libstd/io/mod.rs
index 72a74c23dc8..54869807cac 100644
--- a/src/libstd/io/mod.rs
+++ b/src/libstd/io/mod.rs
@@ -1439,7 +1439,7 @@ pub trait BufRead: Read {
     ///
     /// The iterator returned from this function will yield instances of
     /// `io::Result<String>`. Each string returned will *not* have a newline
-    /// byte (the 0xA byte) at the end.
+    /// byte (the 0xA byte) or CRLF (0xD, 0xA bytes) at the end.
     ///
     /// # Examples
     ///
@@ -1763,6 +1763,9 @@ impl<B: BufRead> Iterator for Lines<B> {
             Ok(_n) => {
                 if buf.ends_with("\n") {
                     buf.pop();
+                    if buf.ends_with("\r") {
+                        buf.pop();
+                    }
                 }
                 Some(Ok(buf))
             }
@@ -1834,12 +1837,12 @@ mod tests {
 
     #[test]
     fn lines() {
-        let buf = Cursor::new(&b"12"[..]);
+        let buf = Cursor::new(&b"12\r"[..]);
         let mut s = buf.lines();
-        assert_eq!(s.next().unwrap().unwrap(), "12".to_string());
+        assert_eq!(s.next().unwrap().unwrap(), "12\r".to_string());
         assert!(s.next().is_none());
 
-        let buf = Cursor::new(&b"12\n\n"[..]);
+        let buf = Cursor::new(&b"12\r\n\n"[..]);
         let mut s = buf.lines();
         assert_eq!(s.next().unwrap().unwrap(), "12".to_string());
         assert_eq!(s.next().unwrap().unwrap(), "".to_string());
diff --git a/src/libsyntax/parse/lexer/comments.rs b/src/libsyntax/parse/lexer/comments.rs
index 467345624c2..9033208fbdb 100644
--- a/src/libsyntax/parse/lexer/comments.rs
+++ b/src/libsyntax/parse/lexer/comments.rs
@@ -132,7 +132,7 @@ pub fn strip_doc_comment_decoration(comment: &str) -> String {
 
     if comment.starts_with("/*") {
         let lines = comment[3..comment.len() - 2]
-            .lines_any()
+            .lines()
             .map(|s| s.to_string())
             .collect::<Vec<String> >();