From c361e13d7155552cb6e45da5016cea36f87ecfec Mon Sep 17 00:00:00 2001 From: kwantam Date: Sat, 18 Apr 2015 13:49:51 -0400 Subject: [PATCH] implement rfc 1054: split_whitespace() fn, deprecate words() For now, words() is left in (but deprecated), and Words is a type alias for struct SplitWhitespace. Also cleaned up references to s.words() throughout codebase. Closes #15628 --- src/libcollections/str.rs | 29 +++++++++++++++---- src/libcollectionstest/lib.rs | 1 - src/libcollectionstest/str.rs | 4 +-- src/libgetopts/lib.rs | 3 +- src/librustc/lib.rs | 1 - src/librustc/session/config.rs | 4 +-- src/librustc_unicode/lib.rs | 2 +- src/librustc_unicode/u_str.rs | 24 +++++++++++---- src/librustdoc/html/markdown.rs | 2 +- src/librustdoc/lib.rs | 7 ++--- .../run-pass/drop-with-type-ascription-1.rs | 4 +-- 11 files changed, 53 insertions(+), 28 deletions(-) diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs index 266cda9a237..b585c2fc6ed 100644 --- a/src/libcollections/str.rs +++ b/src/libcollections/str.rs @@ -76,7 +76,7 @@ pub use core::str::{Matches, RMatches}; pub use core::str::{MatchIndices, RMatchIndices}; pub use core::str::{from_utf8, Chars, CharIndices, Bytes}; pub use core::str::{from_utf8_unchecked, ParseBoolError}; -pub use rustc_unicode::str::{Words, Graphemes, GraphemeIndices}; +pub use rustc_unicode::str::{SplitWhitespace, Words, Graphemes, GraphemeIndices}; pub use core::str::pattern; /* @@ -1737,27 +1737,44 @@ impl str { UnicodeStr::grapheme_indices(&self[..], is_extended) } - /// An iterator over the non-empty words of `self`. - /// - /// A 'word' is a subsequence separated by any sequence of whitespace. - /// Sequences of whitespace - /// are collapsed, so empty "words" are not included. + /// An iterator over the non-empty substrings of `self` which contain no whitespace, + /// and which are separated by any amount of whitespace. /// /// # Examples /// /// ``` /// # #![feature(str_words)] + /// # #![allow(deprecated)] /// let some_words = " Mary had\ta little \n\t lamb"; /// let v: Vec<&str> = some_words.words().collect(); /// /// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]); /// ``` + #[deprecated(reason = "words() will be removed. Use split_whitespace() instead", + since = "1.1.0")] #[unstable(feature = "str_words", reason = "the precise algorithm to use is unclear")] + #[allow(deprecated)] pub fn words(&self) -> Words { UnicodeStr::words(&self[..]) } + /// An iterator over the non-empty substrings of `self` which contain no whitespace, + /// and which are separated by any amount of whitespace. + /// + /// # Examples + /// + /// ``` + /// let some_words = " Mary had\ta little \n\t lamb"; + /// let v: Vec<&str> = some_words.split_whitespace().collect(); + /// + /// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]); + /// ``` + #[stable(feature = "split_whitespace", since = "1.1.0")] + pub fn split_whitespace(&self) -> SplitWhitespace { + UnicodeStr::split_whitespace(&self[..]) + } + /// Returns a string's displayed width in columns. /// /// Control characters have zero width. diff --git a/src/libcollectionstest/lib.rs b/src/libcollectionstest/lib.rs index 549ff17e469..5c109dc8104 100644 --- a/src/libcollectionstest/lib.rs +++ b/src/libcollectionstest/lib.rs @@ -14,7 +14,6 @@ #![feature(hash)] #![feature(rand)] #![feature(rustc_private)] -#![feature(str_words)] #![feature(test)] #![feature(unboxed_closures)] #![feature(unicode)] diff --git a/src/libcollectionstest/str.rs b/src/libcollectionstest/str.rs index e23a8a34b09..170f49ab15b 100644 --- a/src/libcollectionstest/str.rs +++ b/src/libcollectionstest/str.rs @@ -939,9 +939,9 @@ fn test_rsplitn() { } #[test] -fn test_words() { +fn test_split_whitespace() { let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n"; - let words: Vec<&str> = data.words().collect(); + let words: Vec<&str> = data.split_whitespace().collect(); assert_eq!(words, ["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"]) } diff --git a/src/libgetopts/lib.rs b/src/libgetopts/lib.rs index 197199e743f..5782c57834e 100644 --- a/src/libgetopts/lib.rs +++ b/src/libgetopts/lib.rs @@ -91,7 +91,6 @@ #![deny(missing_docs)] #![feature(staged_api)] -#![feature(str_words)] #![feature(str_char)] #![cfg_attr(test, feature(rustc_private))] @@ -771,7 +770,7 @@ pub fn usage(brief: &str, opts: &[OptGroup]) -> String { // Normalize desc to contain words separated by one space character let mut desc_normalized_whitespace = String::new(); - for word in desc.words() { + for word in desc.split_whitespace() { desc_normalized_whitespace.push_str(word); desc_normalized_whitespace.push(' '); } diff --git a/src/librustc/lib.rs b/src/librustc/lib.rs index ab5c4e76966..5b4d6c144cf 100644 --- a/src/librustc/lib.rs +++ b/src/librustc/lib.rs @@ -38,7 +38,6 @@ #![feature(staged_api)] #![feature(std_misc)] #![feature(path_ext)] -#![feature(str_words)] #![feature(str_char)] #![feature(into_cow)] #![feature(slice_patterns)] diff --git a/src/librustc/session/config.rs b/src/librustc/session/config.rs index f7ff8b9e606..f4ea069447f 100644 --- a/src/librustc/session/config.rs +++ b/src/librustc/session/config.rs @@ -418,7 +418,7 @@ macro_rules! options { -> bool { match v { Some(s) => { - for s in s.words() { + for s in s.split_whitespace() { slot.push(s.to_string()); } true @@ -431,7 +431,7 @@ macro_rules! options { -> bool { match v { Some(s) => { - let v = s.words().map(|s| s.to_string()).collect(); + let v = s.split_whitespace().map(|s| s.to_string()).collect(); *slot = Some(v); true }, diff --git a/src/librustc_unicode/lib.rs b/src/librustc_unicode/lib.rs index 5c4e643c2a1..edfa8db311f 100644 --- a/src/librustc_unicode/lib.rs +++ b/src/librustc_unicode/lib.rs @@ -45,7 +45,7 @@ mod u_str; pub mod char; pub mod str { - pub use u_str::{UnicodeStr, Words, Graphemes, GraphemeIndices}; + pub use u_str::{UnicodeStr, SplitWhitespace, Words, Graphemes, GraphemeIndices}; pub use u_str::{utf8_char_width, is_utf16, Utf16Items, Utf16Item}; pub use u_str::{utf16_items, Utf16Encoder}; } diff --git a/src/librustc_unicode/u_str.rs b/src/librustc_unicode/u_str.rs index 097776312df..898844e3bf1 100644 --- a/src/librustc_unicode/u_str.rs +++ b/src/librustc_unicode/u_str.rs @@ -25,10 +25,16 @@ use core::str::Split; use tables::grapheme::GraphemeCat; -/// An iterator over the words of a string, separated by a sequence of whitespace +#[deprecated(reason = "struct Words is being replaced by struct SplitWhitespace", + since = "1.1.0")] #[unstable(feature = "str_words", reason = "words() will be replaced by split_whitespace() in 1.1.0")] -pub struct Words<'a> { +pub type Words<'a> = SplitWhitespace<'a>; + +/// An iterator over the non-whitespace substrings of a string, +/// separated by any amount of whitespace. +#[stable(feature = "split_whitespace", since = "1.1.0")] +pub struct SplitWhitespace<'a> { inner: Filter bool>, fn(&&str) -> bool>, } @@ -37,7 +43,9 @@ pub struct Words<'a> { pub trait UnicodeStr { fn graphemes<'a>(&'a self, is_extended: bool) -> Graphemes<'a>; fn grapheme_indices<'a>(&'a self, is_extended: bool) -> GraphemeIndices<'a>; + #[allow(deprecated)] fn words<'a>(&'a self) -> Words<'a>; + fn split_whitespace<'a>(&'a self) -> SplitWhitespace<'a>; fn is_whitespace(&self) -> bool; fn is_alphanumeric(&self) -> bool; fn width(&self, is_cjk: bool) -> usize; @@ -57,15 +65,21 @@ impl UnicodeStr for str { GraphemeIndices { start_offset: self.as_ptr() as usize, iter: self.graphemes(is_extended) } } + #[allow(deprecated)] #[inline] fn words(&self) -> Words { + self.split_whitespace() + } + + #[inline] + fn split_whitespace(&self) -> SplitWhitespace { fn is_not_empty(s: &&str) -> bool { !s.is_empty() } let is_not_empty: fn(&&str) -> bool = is_not_empty; // coerce to fn pointer fn is_whitespace(c: char) -> bool { c.is_whitespace() } let is_whitespace: fn(char) -> bool = is_whitespace; // coerce to fn pointer - Words { inner: self.split(is_whitespace).filter(is_not_empty) } + SplitWhitespace { inner: self.split(is_whitespace).filter(is_not_empty) } } #[inline] @@ -546,11 +560,11 @@ impl Iterator for Utf16Encoder where I: Iterator { } } -impl<'a> Iterator for Words<'a> { +impl<'a> Iterator for SplitWhitespace<'a> { type Item = &'a str; fn next(&mut self) -> Option<&'a str> { self.inner.next() } } -impl<'a> DoubleEndedIterator for Words<'a> { +impl<'a> DoubleEndedIterator for SplitWhitespace<'a> { fn next_back(&mut self) -> Option<&'a str> { self.inner.next_back() } } diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs index 334f05fb36f..17053e4f10a 100644 --- a/src/librustdoc/html/markdown.rs +++ b/src/librustdoc/html/markdown.rs @@ -274,7 +274,7 @@ pub fn render(w: &mut fmt::Formatter, s: &str, print_toc: bool) -> fmt::Result { }; // Transform the contents of the header into a hyphenated string - let id = s.words().map(|s| s.to_ascii_lowercase()) + let id = s.split_whitespace().map(|s| s.to_ascii_lowercase()) .collect::>().connect("-"); // This is a terrible hack working around how hoedown gives us rendered diff --git a/src/librustdoc/lib.rs b/src/librustdoc/lib.rs index 4389b66b52a..9c393d6f897 100644 --- a/src/librustdoc/lib.rs +++ b/src/librustdoc/lib.rs @@ -31,7 +31,6 @@ #![feature(std_misc)] #![feature(test)] #![feature(unicode)] -#![feature(str_words)] #![feature(path_ext)] #![feature(path_relative_from)] #![feature(slice_patterns)] @@ -240,7 +239,7 @@ pub fn main_args(args: &[String]) -> isize { let test_args = matches.opt_strs("test-args"); let test_args: Vec = test_args.iter() - .flat_map(|s| s.words()) + .flat_map(|s| s.split_whitespace()) .map(|s| s.to_string()) .collect(); @@ -404,13 +403,13 @@ fn rust_input(cratefile: &str, externs: core::Externs, matches: &getopts::Matche } clean::NameValue(ref x, ref value) if "passes" == *x => { - for pass in value.words() { + for pass in value.split_whitespace() { passes.push(pass.to_string()); } } clean::NameValue(ref x, ref value) if "plugins" == *x => { - for p in value.words() { + for p in value.split_whitespace() { plugins.push(p.to_string()); } } diff --git a/src/test/run-pass/drop-with-type-ascription-1.rs b/src/test/run-pass/drop-with-type-ascription-1.rs index 9dd458344cb..15f229dbca5 100644 --- a/src/test/run-pass/drop-with-type-ascription-1.rs +++ b/src/test/run-pass/drop-with-type-ascription-1.rs @@ -9,11 +9,9 @@ // except according to those terms. -#![feature(str_words)] - fn main() { let foo = "hello".to_string(); - let foo: Vec<&str> = foo.words().collect(); + let foo: Vec<&str> = foo.split_whitespace().collect(); let invalid_string = &foo[0]; assert_eq!(*invalid_string, "hello"); }