From 48615a68fb01d09749a5b73816d45e0d0669d1f9 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 26 Aug 2015 17:30:45 -0700 Subject: [PATCH] std: Account for CRLF in {str, BufRead}::lines This commit is an implementation of [RFC 1212][rfc] which tweaks the behavior of the `str::lines` and `BufRead::lines` iterators. Both iterators now account for `\r\n` sequences in addition to `\n`, allowing for less surprising behavior across platforms (especially in the `BufRead` case). Splitting *only* on the `\n` character can still be achieved with `split('\n')` in both cases. The `str::lines_any` function is also now deprecated as `str::lines` is a drop-in replacement for it. [rfc]: https://github.com/rust-lang/rfcs/blob/master/text/1212-line-endings.md Closes #28032 --- src/libcollections/str.rs | 10 ++++++---- src/libcollectionstest/str.rs | 4 ++-- src/libcore/str/mod.rs | 14 ++++++++++---- src/librustdoc/passes.rs | 2 +- src/libstd/io/mod.rs | 11 +++++++---- src/libsyntax/parse/lexer/comments.rs | 2 +- 6 files changed, 27 insertions(+), 16 deletions(-) diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs index 657a3f60448..7d1ed13d764 100644 --- a/src/libcollections/str.rs +++ b/src/libcollections/str.rs @@ -604,14 +604,14 @@ impl str { UnicodeStr::split_whitespace(self) } - /// An iterator over the lines of a string, separated by `\n`. + /// An iterator over the lines of a string, separated by `\n` or `\r\n`. /// - /// This does not include the empty string after a trailing `\n`. + /// This does not include the empty string after a trailing newline or CRLF. /// /// # Examples /// /// ``` - /// let four_lines = "foo\nbar\n\nbaz"; + /// let four_lines = "foo\nbar\n\r\nbaz"; /// let v: Vec<&str> = four_lines.lines().collect(); /// /// assert_eq!(v, ["foo", "bar", "", "baz"]); @@ -620,7 +620,7 @@ impl str { /// Leaving off the trailing character: /// /// ``` - /// let four_lines = "foo\nbar\n\nbaz\n"; + /// let four_lines = "foo\r\nbar\n\nbaz\n"; /// let v: Vec<&str> = four_lines.lines().collect(); /// /// assert_eq!(v, ["foo", "bar", "", "baz"]); @@ -654,7 +654,9 @@ impl str { /// assert_eq!(v, ["foo", "bar", "", "baz"]); /// ``` #[stable(feature = "rust1", since = "1.0.0")] + #[deprecated(since = "1.4.0", reason = "use lines() instead now")] #[inline] + #[allow(deprecated)] pub fn lines_any(&self) -> LinesAny { core_str::StrExt::lines_any(self) } diff --git a/src/libcollectionstest/str.rs b/src/libcollectionstest/str.rs index 8c468e91567..7b481f63991 100644 --- a/src/libcollectionstest/str.rs +++ b/src/libcollectionstest/str.rs @@ -964,11 +964,11 @@ fn test_split_whitespace() { #[test] fn test_lines() { - let data = "\nMäry häd ä little lämb\n\nLittle lämb\n"; + let data = "\nMäry häd ä little lämb\n\r\nLittle lämb\n"; let lines: Vec<&str> = data.lines().collect(); assert_eq!(lines, ["", "Märy häd ä little lämb", "", "Little lämb"]); - let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n + let data = "\r\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n let lines: Vec<&str> = data.lines().collect(); assert_eq!(lines, ["", "Märy häd ä little lämb", "", "Little lämb"]); } diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs index 4612fc89008..4664162358d 100644 --- a/src/libcore/str/mod.rs +++ b/src/libcore/str/mod.rs @@ -827,7 +827,7 @@ generate_pattern_iterators! { /// Created with the method `.lines()`. #[stable(feature = "rust1", since = "1.0.0")] #[derive(Clone)] -pub struct Lines<'a>(SplitTerminator<'a, char>); +pub struct Lines<'a>(Map, LinesAnyMap>); #[stable(feature = "rust1", since = "1.0.0")] impl<'a> Iterator for Lines<'a> { @@ -854,8 +854,10 @@ impl<'a> DoubleEndedIterator for Lines<'a> { /// Created with the method `.lines_any()`. #[stable(feature = "rust1", since = "1.0.0")] +#[deprecated(since = "1.4.0", reason = "use lines()/Lines instead now")] #[derive(Clone)] -pub struct LinesAny<'a>(Map, LinesAnyMap>); +#[allow(deprecated)] +pub struct LinesAny<'a>(Lines<'a>); /// A nameable, clonable fn type #[derive(Clone)] @@ -887,6 +889,7 @@ impl<'a> FnOnce<(&'a str,)> for LinesAnyMap { } #[stable(feature = "rust1", since = "1.0.0")] +#[allow(deprecated)] impl<'a> Iterator for LinesAny<'a> { type Item = &'a str; @@ -902,6 +905,7 @@ impl<'a> Iterator for LinesAny<'a> { } #[stable(feature = "rust1", since = "1.0.0")] +#[allow(deprecated)] impl<'a> DoubleEndedIterator for LinesAny<'a> { #[inline] fn next_back(&mut self) -> Option<&'a str> { @@ -1289,6 +1293,7 @@ pub trait StrExt { fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P> where P::Searcher: ReverseSearcher<'a>; fn lines(&self) -> Lines; + #[allow(deprecated)] fn lines_any(&self) -> LinesAny; fn char_len(&self) -> usize; fn slice_chars(&self, begin: usize, end: usize) -> &str; @@ -1428,12 +1433,13 @@ impl StrExt for str { } #[inline] fn lines(&self) -> Lines { - Lines(self.split_terminator('\n')) + Lines(self.split_terminator('\n').map(LinesAnyMap)) } #[inline] + #[allow(deprecated)] fn lines_any(&self) -> LinesAny { - LinesAny(self.lines().map(LinesAnyMap)) + LinesAny(self.lines()) } #[inline] diff --git a/src/librustdoc/passes.rs b/src/librustdoc/passes.rs index c11d9b8e31d..8a57a50bdea 100644 --- a/src/librustdoc/passes.rs +++ b/src/librustdoc/passes.rs @@ -308,7 +308,7 @@ pub fn collapse_docs(krate: clean::Crate) -> plugins::PluginResult { } pub fn unindent(s: &str) -> String { - let lines = s.lines_any().collect:: >(); + let lines = s.lines().collect:: >(); let mut saw_first_line = false; let mut saw_second_line = false; let min_indent = lines.iter().fold(usize::MAX, |min_indent, line| { diff --git a/src/libstd/io/mod.rs b/src/libstd/io/mod.rs index 72a74c23dc8..54869807cac 100644 --- a/src/libstd/io/mod.rs +++ b/src/libstd/io/mod.rs @@ -1439,7 +1439,7 @@ pub trait BufRead: Read { /// /// The iterator returned from this function will yield instances of /// `io::Result`. Each string returned will *not* have a newline - /// byte (the 0xA byte) at the end. + /// byte (the 0xA byte) or CRLF (0xD, 0xA bytes) at the end. /// /// # Examples /// @@ -1763,6 +1763,9 @@ impl Iterator for Lines { Ok(_n) => { if buf.ends_with("\n") { buf.pop(); + if buf.ends_with("\r") { + buf.pop(); + } } Some(Ok(buf)) } @@ -1834,12 +1837,12 @@ mod tests { #[test] fn lines() { - let buf = Cursor::new(&b"12"[..]); + let buf = Cursor::new(&b"12\r"[..]); let mut s = buf.lines(); - assert_eq!(s.next().unwrap().unwrap(), "12".to_string()); + assert_eq!(s.next().unwrap().unwrap(), "12\r".to_string()); assert!(s.next().is_none()); - let buf = Cursor::new(&b"12\n\n"[..]); + let buf = Cursor::new(&b"12\r\n\n"[..]); let mut s = buf.lines(); assert_eq!(s.next().unwrap().unwrap(), "12".to_string()); assert_eq!(s.next().unwrap().unwrap(), "".to_string()); diff --git a/src/libsyntax/parse/lexer/comments.rs b/src/libsyntax/parse/lexer/comments.rs index 467345624c2..9033208fbdb 100644 --- a/src/libsyntax/parse/lexer/comments.rs +++ b/src/libsyntax/parse/lexer/comments.rs @@ -132,7 +132,7 @@ pub fn strip_doc_comment_decoration(comment: &str) -> String { if comment.starts_with("/*") { let lines = comment[3..comment.len() - 2] - .lines_any() + .lines() .map(|s| s.to_string()) .collect:: >();