Auto merge of #23952 - Kimundi:more_string_pattern, r=alexcrichton

This adds the missing methods and turns `str::pattern` in a user facing module, as per RFC.

This also contains some big internal refactorings:
- string iterator pairs are implemented with a central macro to reduce redundancy 
- Moved all tests from `coretest::str` into `collectionstest::str` and left a note to prevent the two sets of tests drifting apart further.

See https://github.com/rust-lang/rust/issues/22477
This commit is contained in:
bors 2015-04-07 00:57:08 +00:00
commit b2e65ee6e4
6 changed files with 1514 additions and 869 deletions

View File

@ -58,6 +58,8 @@ use core::iter::{Iterator, Extend};
use core::option::Option::{self, Some, None};
use core::result::Result;
use core::str as core_str;
use core::str::pattern::Pattern;
use core::str::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher};
use unicode::str::{UnicodeStr, Utf16Encoder};
use core::convert::AsRef;
@ -69,14 +71,16 @@ use vec::Vec;
use slice::SliceConcatExt;
pub use core::str::{FromStr, Utf8Error, Str};
pub use core::str::{Lines, LinesAny, MatchIndices, CharRange};
pub use core::str::{Split, SplitTerminator, SplitN};
pub use core::str::{RSplit, RSplitN};
pub use core::str::{Lines, LinesAny, CharRange};
pub use core::str::{Split, RSplit};
pub use core::str::{SplitN, RSplitN};
pub use core::str::{SplitTerminator, RSplitTerminator};
pub use core::str::{Matches, RMatches};
pub use core::str::{MatchIndices, RMatchIndices};
pub use core::str::{from_utf8, Chars, CharIndices, Bytes};
pub use core::str::{from_utf8_unchecked, ParseBoolError};
pub use unicode::str::{Words, Graphemes, GraphemeIndices};
pub use core::str::Pattern;
pub use core::str::{Searcher, ReverseSearcher, DoubleEndedSearcher, SearchStep};
pub use core::str::pattern;
/*
Section: Creating a string
@ -429,7 +433,8 @@ impl str {
/// Replaces all occurrences of one string with another.
///
/// `replace` takes two arguments, a sub-`&str` to find in `self`, and a second `&str` to
/// `replace` takes two arguments, a sub-`&str` to find in `self`, and a
/// second `&str` to
/// replace it with. If the original `&str` isn't found, no change occurs.
///
/// # Examples
@ -581,12 +586,24 @@ impl str {
/// An iterator over substrings of `self`, separated by characters
/// matched by a pattern.
///
/// The pattern can be a simple `&str`, or a closure that determines
/// the split.
/// The pattern can be a simple `&str`, `char`, or a closure that
/// determines the split.
/// Additional libraries might provide more complex patterns like
/// regular expressions.
///
/// # Iterator behavior
///
/// The returned iterator will be double ended if the pattern allows a
/// reverse search and forward/reverse search yields the same elements.
/// This is true for, eg, `char` but not
/// for `&str`.
///
/// If the pattern allows a reverse search but its results might differ
/// from a forward search, `rsplit()` can be used.
///
/// # Examples
///
/// Simple `&str` patterns:
/// Simple patterns:
///
/// ```
/// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect();
@ -594,81 +611,116 @@ impl str {
///
/// let v: Vec<&str> = "".split('X').collect();
/// assert_eq!(v, [""]);
///
/// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect();
/// assert_eq!(v, ["lion", "", "tiger", "leopard"]);
///
/// let v: Vec<&str> = "lion::tiger::leopard".split("::").collect();
/// assert_eq!(v, ["lion", "tiger", "leopard"]);
/// ```
///
/// More complex patterns with a lambda:
/// More complex patterns with closures:
///
/// ```
/// let v: Vec<&str> = "abc1def2ghi".split(|c: char| c.is_numeric()).collect();
/// assert_eq!(v, ["abc", "def", "ghi"]);
///
/// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect();
/// assert_eq!(v, ["lion", "", "tiger", "leopard"]);
/// let v: Vec<&str> = "lionXtigerXleopard".split(char::is_uppercase).collect();
/// assert_eq!(v, ["lion", "tiger", "leopard"]);
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
pub fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> {
core_str::StrExt::split(&self[..], pat)
}
/// An iterator over substrings of `self`, separated by characters matched
/// by a pattern, returning most `count` items.
/// An iterator over substrings of `self`, separated by characters
/// matched by a pattern and yielded in reverse order.
///
/// The pattern can be a simple `&str`, or a closure that determines
/// the split.
/// The pattern can be a simple `&str`, `char`, or a closure that
/// determines the split.
/// Additional libraries might provide more complex patterns like
/// regular expressions.
///
/// The last element returned, if any, will contain the remainder of the
/// string.
/// # Iterator behavior
///
/// The returned iterator requires that the pattern supports a
/// reverse search,
/// and it will be double ended if a forward/reverse search yields
/// the same elements.
///
/// For iterating from the front, `split()` can be used.
///
/// # Examples
///
/// Simple `&str` patterns:
/// Simple patterns:
///
/// ```
/// let v: Vec<&str> = "Mary had a little lambda".splitn(2, ' ').collect();
/// assert_eq!(v, ["Mary", "had a little lambda"]);
/// ```rust
/// let v: Vec<&str> = "Mary had a little lamb".rsplit(' ').collect();
/// assert_eq!(v, ["lamb", "little", "a", "had", "Mary"]);
///
/// let v: Vec<&str> = "lionXXtigerXleopard".splitn(2, 'X').collect();
/// assert_eq!(v, ["lion", "XtigerXleopard"]);
///
/// let v: Vec<&str> = "abcXdef".splitn(1, 'X').collect();
/// assert_eq!(v, ["abcXdef"]);
///
/// let v: Vec<&str> = "".splitn(1, 'X').collect();
/// let v: Vec<&str> = "".rsplit('X').collect();
/// assert_eq!(v, [""]);
///
/// let v: Vec<&str> = "lionXXtigerXleopard".rsplit('X').collect();
/// assert_eq!(v, ["leopard", "tiger", "", "lion"]);
///
/// let v: Vec<&str> = "lion::tiger::leopard".rsplit("::").collect();
/// assert_eq!(v, ["leopard", "tiger", "lion"]);
/// ```
///
/// More complex patterns with a lambda:
/// More complex patterns with closures:
///
/// ```
/// let v: Vec<&str> = "abc1def2ghi".splitn(2, |c: char| c.is_numeric()).collect();
/// assert_eq!(v, ["abc", "def2ghi"]);
/// ```rust
/// let v: Vec<&str> = "abc1def2ghi".rsplit(|c: char| c.is_numeric()).collect();
/// assert_eq!(v, ["ghi", "def", "abc"]);
///
/// let v: Vec<&str> = "lionXtigerXleopard".rsplit(char::is_uppercase).collect();
/// assert_eq!(v, ["leopard", "tiger", "lion"]);
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
pub fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P> {
core_str::StrExt::splitn(&self[..], count, pat)
pub fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P>
where P::Searcher: ReverseSearcher<'a>
{
core_str::StrExt::rsplit(&self[..], pat)
}
/// An iterator over substrings of `self`, separated by characters
/// matched by a pattern.
///
/// Equivalent to `split`, except that the trailing substring is skipped if empty.
/// The pattern can be a simple `&str`, `char`, or a closure that
/// determines the split.
/// Additional libraries might provide more complex patterns
/// like regular expressions.
///
/// The pattern can be a simple `&str`, or a closure that determines
/// the split.
/// Equivalent to `split`, except that the trailing substring
/// is skipped if empty.
///
/// This method can be used for string data that is _terminated_,
/// rather than _seperated_ by a pattern.
///
/// # Iterator behavior
///
/// The returned iterator will be double ended if the pattern allows a
/// reverse search
/// and forward/reverse search yields the same elements. This is true
/// for, eg, `char` but not for `&str`.
///
/// If the pattern allows a reverse search but its results might differ
/// from a forward search, `rsplit_terminator()` can be used.
///
/// # Examples
///
/// Simple `&str` patterns:
/// Simple patterns:
///
/// ```
/// let v: Vec<&str> = "A.B.".split_terminator('.').collect();
/// assert_eq!(v, ["A", "B"]);
///
/// let v: Vec<&str> = "A..B..".split_terminator('.').collect();
/// let v: Vec<&str> = "A..B..".split_terminator(".").collect();
/// assert_eq!(v, ["A", "", "B", ""]);
/// ```
///
/// More complex patterns with a lambda:
/// More complex patterns with closures:
///
/// ```
/// let v: Vec<&str> = "abc1def2ghi3".split_terminator(|c: char| c.is_numeric()).collect();
@ -679,32 +731,98 @@ impl str {
core_str::StrExt::split_terminator(&self[..], pat)
}
/// An iterator over substrings of `self`, separated by a pattern,
/// starting from the end of the string.
/// An iterator over substrings of `self`, separated by characters
/// matched by a pattern and yielded in reverse order.
///
/// The pattern can be a simple `&str`, `char`, or a closure that
/// determines the split.
/// Additional libraries might provide more complex patterns like
/// regular expressions.
///
/// Equivalent to `split`, except that the trailing substring is
/// skipped if empty.
///
/// This method can be used for string data that is _terminated_,
/// rather than _seperated_ by a pattern.
///
/// # Iterator behavior
///
/// The returned iterator requires that the pattern supports a
/// reverse search, and it will be double ended if a forward/reverse
/// search yields the same elements.
///
/// For iterating from the front, `split_terminator()` can be used.
///
/// # Examples
///
/// Simple patterns:
///
/// ```
/// let v: Vec<&str> = "Mary had a little lamb".rsplit(' ').collect();
/// assert_eq!(v, ["lamb", "little", "a", "had", "Mary"]);
/// let v: Vec<&str> = "A.B.".rsplit_terminator('.').collect();
/// assert_eq!(v, ["B", "A"]);
///
/// let v: Vec<&str> = "lion::tiger::leopard".rsplit("::").collect();
/// assert_eq!(v, ["leopard", "tiger", "lion"]);
/// let v: Vec<&str> = "A..B..".rsplit_terminator(".").collect();
/// assert_eq!(v, ["", "B", "", "A"]);
/// ```
///
/// More complex patterns with a lambda:
/// More complex patterns with closures:
///
/// ```
/// let v: Vec<&str> = "abc1def2ghi".rsplit(|c: char| c.is_numeric()).collect();
/// let v: Vec<&str> = "abc1def2ghi3".rsplit_terminator(|c: char| c.is_numeric()).collect();
/// assert_eq!(v, ["ghi", "def", "abc"]);
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
pub fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P>
pub fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P>
where P::Searcher: ReverseSearcher<'a>
{
core_str::StrExt::rsplit(&self[..], pat)
core_str::StrExt::rsplit_terminator(&self[..], pat)
}
/// An iterator over substrings of `self`, separated by a pattern,
/// restricted to returning
/// at most `count` items.
///
/// The last element returned, if any, will contain the remainder of the
/// string.
/// The pattern can be a simple `&str`, `char`, or a closure that
/// determines the split.
/// Additional libraries might provide more complex patterns like
/// regular expressions.
///
/// # Iterator behavior
///
/// The returned iterator will not be double ended, because it is
/// not efficient to support.
///
/// If the pattern allows a reverse search, `rsplitn()` can be used.
///
/// # Examples
///
/// Simple patterns:
///
/// ```
/// let v: Vec<&str> = "Mary had a little lambda".splitn(3, ' ').collect();
/// assert_eq!(v, ["Mary", "had", "a little lambda"]);
///
/// let v: Vec<&str> = "lionXXtigerXleopard".splitn(3, "X").collect();
/// assert_eq!(v, ["lion", "", "tigerXleopard"]);
///
/// let v: Vec<&str> = "abcXdef".splitn(1, 'X').collect();
/// assert_eq!(v, ["abcXdef"]);
///
/// let v: Vec<&str> = "".splitn(1, 'X').collect();
/// assert_eq!(v, [""]);
/// ```
///
/// More complex patterns with closures:
///
/// ```
/// let v: Vec<&str> = "abc1def2ghi".splitn(2, |c: char| c.is_numeric()).collect();
/// assert_eq!(v, ["abc", "def2ghi"]);
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
pub fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P> {
core_str::StrExt::splitn(&self[..], count, pat)
}
/// An iterator over substrings of `self`, separated by a pattern,
@ -714,6 +832,18 @@ impl str {
/// The last element returned, if any, will contain the remainder of the
/// string.
///
/// The pattern can be a simple `&str`, `char`, or a closure that
/// determines the split.
/// Additional libraries might provide more complex patterns like
/// regular expressions.
///
/// # Iterator behavior
///
/// The returned iterator will not be double ended, because it is not
/// efficient to support.
///
/// `splitn()` can be used for splitting from the front.
///
/// # Examples
///
/// Simple patterns:
@ -722,11 +852,14 @@ impl str {
/// let v: Vec<&str> = "Mary had a little lamb".rsplitn(3, ' ').collect();
/// assert_eq!(v, ["lamb", "little", "Mary had a"]);
///
/// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(3, 'X').collect();
/// assert_eq!(v, ["leopard", "tiger", "lionX"]);
///
/// let v: Vec<&str> = "lion::tiger::leopard".rsplitn(2, "::").collect();
/// assert_eq!(v, ["leopard", "lion::tiger"]);
/// ```
///
/// More complex patterns with a lambda:
/// More complex patterns with closures:
///
/// ```
/// let v: Vec<&str> = "abc1def2ghi".rsplitn(2, |c: char| c.is_numeric()).collect();
@ -739,34 +872,166 @@ impl str {
core_str::StrExt::rsplitn(&self[..], count, pat)
}
/// An iterator over the start and end indices of the disjoint matches of a `&str` within
/// `self`.
/// An iterator over the matches of a pattern within `self`.
///
/// That is, each returned value `(start, end)` satisfies `self.slice(start, end) == sep`. For
/// matches of `sep` within `self` that overlap, only the indices corresponding to the first
/// The pattern can be a simple `&str`, `char`, or a closure that
/// determines the split.
/// Additional libraries might provide more complex patterns like
/// regular expressions.
///
/// # Iterator behavior
///
/// The returned iterator will be double ended if the pattern allows
/// a reverse search
/// and forward/reverse search yields the same elements. This is true
/// for, eg, `char` but not
/// for `&str`.
///
/// If the pattern allows a reverse search but its results might differ
/// from a forward search, `rmatches()` can be used.
///
/// # Examples
///
/// ```
/// # #![feature(collections)]
/// let v: Vec<&str> = "abcXXXabcYYYabc".matches("abc").collect();
/// assert_eq!(v, ["abc", "abc", "abc"]);
///
/// let v: Vec<&str> = "1abc2abc3".matches(|c: char| c.is_numeric()).collect();
/// assert_eq!(v, ["1", "2", "3"]);
/// ```
#[unstable(feature = "collections",
reason = "method got recently added")]
pub fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P> {
core_str::StrExt::matches(&self[..], pat)
}
/// An iterator over the matches of a pattern within `self`, yielded in
/// reverse order.
///
/// The pattern can be a simple `&str`, `char`, or a closure that
/// determines the split.
/// Additional libraries might provide more complex patterns like
/// regular expressions.
///
/// # Iterator behavior
///
/// The returned iterator requires that the pattern supports a
/// reverse search,
/// and it will be double ended if a forward/reverse search yields
/// the same elements.
///
/// For iterating from the front, `matches()` can be used.
///
/// # Examples
///
/// ```
/// # #![feature(collections)]
/// let v: Vec<&str> = "abcXXXabcYYYabc".rmatches("abc").collect();
/// assert_eq!(v, ["abc", "abc", "abc"]);
///
/// let v: Vec<&str> = "1abc2abc3".rmatches(|c: char| c.is_numeric()).collect();
/// assert_eq!(v, ["3", "2", "1"]);
/// ```
#[unstable(feature = "collections",
reason = "method got recently added")]
pub fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P>
where P::Searcher: ReverseSearcher<'a>
{
core_str::StrExt::rmatches(&self[..], pat)
}
/// An iterator over the start and end indices of the disjoint matches
/// of a pattern within `self`.
///
/// For matches of `pat` within `self` that overlap, only the indices
/// corresponding to the first
/// match are returned.
///
/// The pattern can be a simple `&str`, `char`, or a closure that
/// determines
/// the split.
/// Additional libraries might provide more complex patterns like
/// regular expressions.
///
/// # Iterator behavior
///
/// The returned iterator will be double ended if the pattern allows a
/// reverse search
/// and forward/reverse search yields the same elements. This is true for,
/// eg, `char` but not
/// for `&str`.
///
/// If the pattern allows a reverse search but its results might differ
/// from a forward search, `rmatch_indices()` can be used.
///
/// # Examples
///
/// ```
/// # #![feature(collections)]
/// let v: Vec<(usize, usize)> = "abcXXXabcYYYabc".match_indices("abc").collect();
/// assert_eq!(v, [(0,3), (6,9), (12,15)]);
/// assert_eq!(v, [(0, 3), (6, 9), (12, 15)]);
///
/// let v: Vec<(usize, usize)> = "1abcabc2".match_indices("abc").collect();
/// assert_eq!(v, [(1,4), (4,7)]);
/// assert_eq!(v, [(1, 4), (4, 7)]);
///
/// let v: Vec<(usize, usize)> = "ababa".match_indices("aba").collect();
/// assert_eq!(v, [(0, 3)]); // only the first `aba`
/// ```
#[unstable(feature = "collections",
reason = "might have its iterator type changed")]
// NB: Right now MatchIndices yields `(usize, usize)`,
// but it would be more consistent and useful to return `(usize, &str)`
// NB: Right now MatchIndices yields `(usize, usize)`, but it would
// be more consistent with `matches` and `char_indices` to return `(usize, &str)`
pub fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> {
core_str::StrExt::match_indices(&self[..], pat)
}
/// An iterator over the start and end indices of the disjoint matches of
/// a pattern within
/// `self`, yielded in reverse order.
///
/// For matches of `pat` within `self` that overlap, only the indices
/// corresponding to the last
/// match are returned.
///
/// The pattern can be a simple `&str`, `char`, or a closure that
/// determines
/// the split.
/// Additional libraries might provide more complex patterns like
/// regular expressions.
///
/// # Iterator behavior
///
/// The returned iterator requires that the pattern supports a
/// reverse search,
/// and it will be double ended if a forward/reverse search yields
/// the same elements.
///
/// For iterating from the front, `match_indices()` can be used.
///
/// # Examples
///
/// ```
/// # #![feature(collections)]
/// let v: Vec<(usize, usize)> = "abcXXXabcYYYabc".rmatch_indices("abc").collect();
/// assert_eq!(v, [(12, 15), (6, 9), (0, 3)]);
///
/// let v: Vec<(usize, usize)> = "1abcabc2".rmatch_indices("abc").collect();
/// assert_eq!(v, [(4, 7), (1, 4)]);
///
/// let v: Vec<(usize, usize)> = "ababa".rmatch_indices("aba").collect();
/// assert_eq!(v, [(2, 5)]); // only the last `aba`
/// ```
#[unstable(feature = "collections",
reason = "might have its iterator type changed")]
// NB: Right now RMatchIndices yields `(usize, usize)`, but it would
// be more consistent with `rmatches` and `char_indices` to return `(usize, &str)`
pub fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P>
where P::Searcher: ReverseSearcher<'a>
{
core_str::StrExt::rmatch_indices(&self[..], pat)
}
/// An iterator over the lines of a string, separated by `\n`.
///
/// This does not include the empty string after a trailing `\n`.
@ -793,7 +1058,8 @@ impl str {
core_str::StrExt::lines(&self[..])
}
/// An iterator over the lines of a string, separated by either `\n` or `\r\n`.
/// An iterator over the lines of a string, separated by either
/// `\n` or `\r\n`.
///
/// As with `.lines()`, this does not include an empty trailing line.
///
@ -855,7 +1121,8 @@ impl str {
///
/// # Unsafety
///
/// Caller must check both UTF-8 character boundaries and the boundaries of the entire slice as
/// Caller must check both UTF-8 character boundaries and the boundaries
/// of the entire slice as
/// well.
///
/// # Examples
@ -898,13 +1165,15 @@ impl str {
core_str::StrExt::ends_with(&self[..], pat)
}
/// Returns a string with all pre- and suffixes that match a pattern repeatedly removed.
/// Returns a string with all pre- and suffixes that match a pattern
/// repeatedly removed.
///
/// The pattern can be a simple `&str`, or a closure that determines the split.
/// The pattern can be a simple `char`, or a closure that determines
/// the split.
///
/// # Examples
///
/// Simple `&str` patterns:
/// Simple patterns:
///
/// ```
/// assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
@ -913,7 +1182,7 @@ impl str {
/// assert_eq!("12foo1bar12".trim_matches(x), "foo1bar");
/// ```
///
/// More complex patterns with a lambda:
/// More complex patterns with closures:
///
/// ```
/// assert_eq!("123foo1bar123".trim_matches(|c: char| c.is_numeric()), "foo1bar");
@ -925,13 +1194,15 @@ impl str {
core_str::StrExt::trim_matches(&self[..], pat)
}
/// Returns a string with all prefixes that match a pattern repeatedly removed.
/// Returns a string with all prefixes that match a pattern
/// repeatedly removed.
///
/// The pattern can be a simple `&str`, or a closure that determines the split.
/// The pattern can be a simple `&str`, `char`, or a closure that
/// determines the split.
///
/// # Examples
///
/// Simple `&str` patterns:
/// Simple patterns:
///
/// ```
/// assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
@ -940,7 +1211,7 @@ impl str {
/// assert_eq!("12foo1bar12".trim_left_matches(x), "foo1bar12");
/// ```
///
/// More complex patterns with a lambda:
/// More complex patterns with closures:
///
/// ```
/// assert_eq!("123foo1bar123".trim_left_matches(|c: char| c.is_numeric()), "foo1bar123");
@ -950,13 +1221,15 @@ impl str {
core_str::StrExt::trim_left_matches(&self[..], pat)
}
/// Returns a string with all suffixes that match a pattern repeatedly removed.
/// Returns a string with all suffixes that match a pattern
/// repeatedly removed.
///
/// The pattern can be a simple `&str`, or a closure that determines the split.
/// The pattern can be a simple `&str`, `char`, or a closure that
/// determines the split.
///
/// # Examples
///
/// Simple `&str` patterns:
/// Simple patterns:
///
/// ```
/// assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
@ -964,7 +1237,7 @@ impl str {
/// assert_eq!("12foo1bar12".trim_right_matches(x), "12foo1bar");
/// ```
///
/// More complex patterns with a lambda:
/// More complex patterns with closures:
///
/// ```
/// assert_eq!("123foo1bar123".trim_right_matches(|c: char| c.is_numeric()), "123foo1bar");
@ -976,9 +1249,11 @@ impl str {
core_str::StrExt::trim_right_matches(&self[..], pat)
}
/// Check that `index`-th byte lies at the start and/or end of a UTF-8 code point sequence.
/// Check that `index`-th byte lies at the start and/or end of a
/// UTF-8 code point sequence.
///
/// The start and end of the string (when `index == self.len()`) are considered to be
/// The start and end of the string (when `index == self.len()`) are
/// considered to be
/// boundaries.
///
/// # Panics
@ -1021,7 +1296,8 @@ impl str {
///
/// # Examples
///
/// This example manually iterates through the characters of a string; this should normally be
/// This example manually iterates through the characters of a string;
/// this should normally be
/// done by `.chars()` or `.char_indices()`.
///
/// ```
@ -1072,7 +1348,8 @@ impl str {
///
/// # Examples
///
/// This example manually iterates through the characters of a string; this should normally be
/// This example manually iterates through the characters of a string;
/// this should normally be
/// done by `.chars().rev()` or `.char_indices()`.
///
/// ```
@ -1135,7 +1412,8 @@ impl str {
core_str::StrExt::char_at(&self[..], i)
}
/// Given a byte position, return the `char` at that position, counting from the end.
/// Given a byte position, return the `char` at that position, counting
/// from the end.
///
/// # Panics
///
@ -1170,31 +1448,36 @@ impl str {
core_str::StrExt::as_bytes(&self[..])
}
/// Returns the byte index of the first character of `self` that matches the pattern, if it
/// Returns the byte index of the first character of `self` that matches
/// the pattern, if it
/// exists.
///
/// Returns `None` if it doesn't exist.
///
/// The pattern can be a simple `&str`, or a closure that determines the split.
/// The pattern can be a simple `&str`, `char`, or a closure that
/// determines the
/// split.
///
/// # Examples
///
/// Simple `&str` patterns:
/// Simple patterns:
///
/// ```
/// let s = "Löwe 老虎 Léopard";
///
/// assert_eq!(s.find('L'), Some(0));
/// assert_eq!(s.find('é'), Some(14));
/// assert_eq!(s.find("Léopard"), Some(13));
///
/// ```
///
/// More complex patterns with a lambda:
/// More complex patterns with closures:
///
/// ```
/// let s = "Löwe 老虎 Léopard";
///
/// assert_eq!(s.find(|c: char| c.is_whitespace()), Some(5));
/// assert_eq!(s.find(char::is_lowercase), Some(1));
/// ```
///
/// Not finding the pattern:
@ -1210,16 +1493,18 @@ impl str {
core_str::StrExt::find(&self[..], pat)
}
/// Returns the byte index of the last character of `self` that matches the pattern, if it
/// Returns the byte index of the last character of `self` that
/// matches the pattern, if it
/// exists.
///
/// Returns `None` if it doesn't exist.
///
/// The pattern can be a simple `&str`, or a closure that determines the split.
/// The pattern can be a simple `&str`, `char`,
/// or a closure that determines the split.
///
/// # Examples
///
/// Simple `&str` patterns:
/// Simple patterns:
///
/// ```
/// let s = "Löwe 老虎 Léopard";
@ -1228,12 +1513,13 @@ impl str {
/// assert_eq!(s.rfind('é'), Some(14));
/// ```
///
/// More complex patterns with a lambda:
/// More complex patterns with closures:
///
/// ```
/// let s = "Löwe 老虎 Léopard";
///
/// assert_eq!(s.rfind(|c: char| c.is_whitespace()), Some(12));
/// assert_eq!(s.rfind(char::is_lowercase), Some(20));
/// ```
///
/// Not finding the pattern:
@ -1253,7 +1539,8 @@ impl str {
/// Retrieves the first character from a `&str` and returns it.
///
/// This does not allocate a new string; instead, it returns a slice that points one character
/// This does not allocate a new string; instead, it returns a slice that
/// points one character
/// beyond the character that was shifted.
///
/// If the slice does not contain any characters, None is returned instead.
@ -1281,7 +1568,8 @@ impl str {
core_str::StrExt::slice_shift_char(&self[..])
}
/// Returns the byte offset of an inner slice relative to an enclosing outer slice.
/// Returns the byte offset of an inner slice relative to an enclosing
/// outer slice.
///
/// # Panics
///
@ -1306,7 +1594,8 @@ impl str {
/// Return an unsafe pointer to the `&str`'s buffer.
///
/// The caller must ensure that the string outlives this pointer, and that it is not
/// The caller must ensure that the string outlives this pointer, and
/// that it is not
/// reallocated (e.g. by pushing to the string).
///
/// # Examples
@ -1382,7 +1671,8 @@ impl str {
///
/// [graphemes]: http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries
///
/// If `is_extended` is true, the iterator is over the *extended grapheme clusters*;
/// If `is_extended` is true, the iterator is over the
/// *extended grapheme clusters*;
/// otherwise, the iterator is over the *legacy grapheme clusters*.
/// [UAX#29](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
/// recommends extended grapheme cluster boundaries for general processing.
@ -1407,7 +1697,8 @@ impl str {
UnicodeStr::graphemes(&self[..], is_extended)
}
/// Returns an iterator over the grapheme clusters of `self` and their byte offsets. See
/// Returns an iterator over the grapheme clusters of `self` and their
/// byte offsets. See
/// `graphemes()` for more information.
///
/// # Examples
@ -1427,7 +1718,8 @@ impl str {
/// An iterator over the non-empty words of `self`.
///
/// A 'word' is a subsequence separated by any sequence of whitespace. Sequences of whitespace
/// A 'word' is a subsequence separated by any sequence of whitespace.
/// Sequences of whitespace
/// are collapsed, so empty "words" are not included.
///
/// # Examples
@ -1449,11 +1741,15 @@ impl str {
///
/// Control characters have zero width.
///
/// `is_cjk` determines behavior for characters in the Ambiguous category: if `is_cjk` is
/// `true`, these are 2 columns wide; otherwise, they are 1. In CJK locales, `is_cjk` should be
/// `is_cjk` determines behavior for characters in the Ambiguous category:
/// if `is_cjk` is
/// `true`, these are 2 columns wide; otherwise, they are 1.
/// In CJK locales, `is_cjk` should be
/// `true`, else it should be `false`.
/// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/) recommends that these
/// characters be treated as 1 column (i.e., `is_cjk = false`) if the locale is unknown.
/// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
/// recommends that these
/// characters be treated as 1 column (i.e., `is_cjk = false`) if the
/// locale is unknown.
#[unstable(feature = "unicode",
reason = "this functionality may only be provided by libunicode")]
pub fn width(&self, is_cjk: bool) -> usize {

View File

@ -24,7 +24,7 @@ use core::mem;
use core::ops::{self, Deref, Add, Index};
use core::ptr;
use core::slice;
use core::str::Pattern;
use core::str::pattern::Pattern;
use unicode::str as unicode_str;
use unicode::str::Utf16Item;

View File

@ -1,4 +1,4 @@
// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
@ -1506,6 +1506,403 @@ fn test_str_from_utf8() {
assert_eq!(from_utf8(xs), Err(Utf8Error::TooShort));
}
#[test]
fn test_pattern_deref_forward() {
let data = "aabcdaa";
assert!(data.contains("bcd"));
assert!(data.contains(&"bcd"));
assert!(data.contains(&"bcd".to_string()));
}
#[test]
fn test_empty_match_indices() {
let data = "aä中!";
let vec: Vec<_> = data.match_indices("").collect();
assert_eq!(vec, [(0, 0), (1, 1), (3, 3), (6, 6), (7, 7)]);
}
#[test]
fn test_bool_from_str() {
assert_eq!("true".parse().ok(), Some(true));
assert_eq!("false".parse().ok(), Some(false));
assert_eq!("not even a boolean".parse::<bool>().ok(), None);
}
fn check_contains_all_substrings(s: &str) {
assert!(s.contains(""));
for i in 0..s.len() {
for j in i+1..s.len() + 1 {
assert!(s.contains(&s[i..j]));
}
}
}
#[test]
fn strslice_issue_16589() {
assert!("bananas".contains("nana"));
// prior to the fix for #16589, x.contains("abcdabcd") returned false
// test all substrings for good measure
check_contains_all_substrings("012345678901234567890123456789bcdabcdabcd");
}
#[test]
fn strslice_issue_16878() {
assert!(!"1234567ah012345678901ah".contains("hah"));
assert!(!"00abc01234567890123456789abc".contains("bcabc"));
}
#[test]
fn test_strslice_contains() {
let x = "There are moments, Jeeves, when one asks oneself, 'Do trousers matter?'";
check_contains_all_substrings(x);
}
#[test]
fn test_rsplitn_char_iterator() {
let data = "\nMäry häd ä little lämb\nLittle lämb\n";
let mut split: Vec<&str> = data.rsplitn(4, ' ').collect();
split.reverse();
assert_eq!(split, ["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
let mut split: Vec<&str> = data.rsplitn(4, |c: char| c == ' ').collect();
split.reverse();
assert_eq!(split, ["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
// Unicode
let mut split: Vec<&str> = data.rsplitn(4, 'ä').collect();
split.reverse();
assert_eq!(split, ["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
let mut split: Vec<&str> = data.rsplitn(4, |c: char| c == 'ä').collect();
split.reverse();
assert_eq!(split, ["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
}
#[test]
fn test_split_char_iterator() {
let data = "\nMäry häd ä little lämb\nLittle lämb\n";
let split: Vec<&str> = data.split(' ').collect();
assert_eq!( split, ["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
let mut rsplit: Vec<&str> = data.split(' ').rev().collect();
rsplit.reverse();
assert_eq!(rsplit, ["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
let split: Vec<&str> = data.split(|c: char| c == ' ').collect();
assert_eq!( split, ["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
let mut rsplit: Vec<&str> = data.split(|c: char| c == ' ').rev().collect();
rsplit.reverse();
assert_eq!(rsplit, ["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
// Unicode
let split: Vec<&str> = data.split('ä').collect();
assert_eq!( split, ["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
let mut rsplit: Vec<&str> = data.split('ä').rev().collect();
rsplit.reverse();
assert_eq!(rsplit, ["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
let split: Vec<&str> = data.split(|c: char| c == 'ä').collect();
assert_eq!( split, ["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
let mut rsplit: Vec<&str> = data.split(|c: char| c == 'ä').rev().collect();
rsplit.reverse();
assert_eq!(rsplit, ["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
}
#[test]
fn test_rev_split_char_iterator_no_trailing() {
let data = "\nMäry häd ä little lämb\nLittle lämb\n";
let mut split: Vec<&str> = data.split('\n').rev().collect();
split.reverse();
assert_eq!(split, ["", "Märy häd ä little lämb", "Little lämb", ""]);
let mut split: Vec<&str> = data.split_terminator('\n').rev().collect();
split.reverse();
assert_eq!(split, ["", "Märy häd ä little lämb", "Little lämb"]);
}
#[test]
fn test_utf16_code_units() {
use unicode::str::Utf16Encoder;
assert_eq!(Utf16Encoder::new(vec!['é', '\u{1F4A9}'].into_iter()).collect::<Vec<u16>>(),
[0xE9, 0xD83D, 0xDCA9])
}
#[test]
fn starts_with_in_unicode() {
assert!(!"├── Cargo.toml".starts_with("# "));
}
#[test]
fn starts_short_long() {
assert!(!"".starts_with("##"));
assert!(!"##".starts_with("####"));
assert!("####".starts_with("##"));
assert!(!"##ä".starts_with("####"));
assert!("####ä".starts_with("##"));
assert!(!"##".starts_with("####ä"));
assert!("##ä##".starts_with("##ä"));
assert!("".starts_with(""));
assert!("ä".starts_with(""));
assert!("".starts_with(""));
assert!("##ä".starts_with(""));
assert!("ä###".starts_with(""));
assert!("#ä##".starts_with(""));
assert!("##ä#".starts_with(""));
}
#[test]
fn contains_weird_cases() {
assert!("* \t".contains(' '));
assert!(!"* \t".contains('?'));
assert!(!"* \t".contains('\u{1F4A9}'));
}
#[test]
fn trim_ws() {
assert_eq!(" \t a \t ".trim_left_matches(|c: char| c.is_whitespace()),
"a \t ");
assert_eq!(" \t a \t ".trim_right_matches(|c: char| c.is_whitespace()),
" \t a");
assert_eq!(" \t a \t ".trim_matches(|c: char| c.is_whitespace()),
"a");
assert_eq!(" \t \t ".trim_left_matches(|c: char| c.is_whitespace()),
"");
assert_eq!(" \t \t ".trim_right_matches(|c: char| c.is_whitespace()),
"");
assert_eq!(" \t \t ".trim_matches(|c: char| c.is_whitespace()),
"");
}
mod pattern {
use std::str::pattern::Pattern;
use std::str::pattern::{Searcher, ReverseSearcher};
use std::str::pattern::SearchStep::{self, Match, Reject, Done};
macro_rules! make_test {
($name:ident, $p:expr, $h:expr, [$($e:expr,)*]) => {
mod $name {
use std::str::pattern::SearchStep::{Match, Reject};
use super::{cmp_search_to_vec};
#[test]
fn fwd() {
cmp_search_to_vec(false, $p, $h, vec![$($e),*]);
}
#[test]
fn bwd() {
cmp_search_to_vec(true, $p, $h, vec![$($e),*]);
}
}
}
}
fn cmp_search_to_vec<'a, P: Pattern<'a>>(rev: bool, pat: P, haystack: &'a str,
right: Vec<SearchStep>)
where P::Searcher: ReverseSearcher<'a>
{
let mut searcher = pat.into_searcher(haystack);
let mut v = vec![];
loop {
match if !rev {searcher.next()} else {searcher.next_back()} {
Match(a, b) => v.push(Match(a, b)),
Reject(a, b) => v.push(Reject(a, b)),
Done => break,
}
}
if rev {
v.reverse();
}
let mut first_index = 0;
let mut err = None;
for (i, e) in right.iter().enumerate() {
match *e {
Match(a, b) | Reject(a, b)
if a <= b && a == first_index => {
first_index = b;
}
_ => {
err = Some(i);
break;
}
}
}
if let Some(err) = err {
panic!("Input skipped range at {}", err);
}
if first_index != haystack.len() {
panic!("Did not cover whole input");
}
assert_eq!(v, right);
}
make_test!(str_searcher_ascii_haystack, "bb", "abbcbbd", [
Reject(0, 1),
Match (1, 3),
Reject(3, 4),
Match (4, 6),
Reject(6, 7),
]);
make_test!(str_searcher_empty_needle_ascii_haystack, "", "abbcbbd", [
Match (0, 0),
Reject(0, 1),
Match (1, 1),
Reject(1, 2),
Match (2, 2),
Reject(2, 3),
Match (3, 3),
Reject(3, 4),
Match (4, 4),
Reject(4, 5),
Match (5, 5),
Reject(5, 6),
Match (6, 6),
Reject(6, 7),
Match (7, 7),
]);
make_test!(str_searcher_mulibyte_haystack, " ", "├──", [
Reject(0, 3),
Reject(3, 6),
Reject(6, 9),
]);
make_test!(str_searcher_empty_needle_mulibyte_haystack, "", "├──", [
Match (0, 0),
Reject(0, 3),
Match (3, 3),
Reject(3, 6),
Match (6, 6),
Reject(6, 9),
Match (9, 9),
]);
make_test!(str_searcher_empty_needle_empty_haystack, "", "", [
Match(0, 0),
]);
make_test!(str_searcher_nonempty_needle_empty_haystack, "", "", [
]);
make_test!(char_searcher_ascii_haystack, 'b', "abbcbbd", [
Reject(0, 1),
Match (1, 2),
Match (2, 3),
Reject(3, 4),
Match (4, 5),
Match (5, 6),
Reject(6, 7),
]);
make_test!(char_searcher_mulibyte_haystack, ' ', "├──", [
Reject(0, 3),
Reject(3, 6),
Reject(6, 9),
]);
make_test!(char_searcher_short_haystack, '\u{1F4A9}', "* \t", [
Reject(0, 1),
Reject(1, 2),
Reject(2, 3),
]);
}
macro_rules! generate_iterator_test {
{
$name:ident {
$(
($($arg:expr),*) -> [$($t:tt)*];
)*
}
with $fwd:expr, $bwd:expr;
} => {
#[test]
fn $name() {
$(
{
let res = vec![$($t)*];
let fwd_vec: Vec<_> = ($fwd)($($arg),*).collect();
assert_eq!(fwd_vec, res);
let mut bwd_vec: Vec<_> = ($bwd)($($arg),*).collect();
bwd_vec.reverse();
assert_eq!(bwd_vec, res);
}
)*
}
};
{
$name:ident {
$(
($($arg:expr),*) -> [$($t:tt)*];
)*
}
with $fwd:expr;
} => {
#[test]
fn $name() {
$(
{
let res = vec![$($t)*];
let fwd_vec: Vec<_> = ($fwd)($($arg),*).collect();
assert_eq!(fwd_vec, res);
}
)*
}
}
}
generate_iterator_test! {
double_ended_split {
("foo.bar.baz", '.') -> ["foo", "bar", "baz"];
("foo::bar::baz", "::") -> ["foo", "bar", "baz"];
}
with str::split, str::rsplit;
}
generate_iterator_test! {
double_ended_split_terminator {
("foo;bar;baz;", ';') -> ["foo", "bar", "baz"];
}
with str::split_terminator, str::rsplit_terminator;
}
generate_iterator_test! {
double_ended_matches {
("a1b2c3", char::is_numeric) -> ["1", "2", "3"];
}
with str::matches, str::rmatches;
}
generate_iterator_test! {
double_ended_match_indices {
("a1b2c3", char::is_numeric) -> [(1, 2), (3, 4), (5, 6)];
}
with str::match_indices, str::rmatch_indices;
}
generate_iterator_test! {
not_double_ended_splitn {
("foo::bar::baz", 2, "::") -> ["foo", "bar::baz"];
}
with str::splitn;
}
generate_iterator_test! {
not_double_ended_rsplitn {
("foo::bar::baz", 2, "::") -> ["baz", "foo::bar"];
}
with str::rsplitn;
}
mod bench {
use test::{Bencher, black_box};
@ -1693,4 +2090,106 @@ malesuada sollicitudin quam eu fermentum.";
assert!(haystack.contains(needle));
})
}
macro_rules! make_test_inner {
($s:ident, $code:expr, $name:ident, $str:expr) => {
#[bench]
fn $name(bencher: &mut Bencher) {
let mut $s = $str;
black_box(&mut $s);
bencher.iter(|| $code);
}
}
}
macro_rules! make_test {
($name:ident, $s:ident, $code:expr) => {
mod $name {
use test::Bencher;
use test::black_box;
// Short strings: 65 bytes each
make_test_inner!($s, $code, short_ascii,
"Mary had a little lamb, Little lamb Mary had a littl lamb, lamb!");
make_test_inner!($s, $code, short_mixed,
"ศไทย中华Việt Nam; Mary had a little lamb, Little lam!");
make_test_inner!($s, $code, short_pile_of_poo,
"💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩!");
make_test_inner!($s, $code, long_lorem_ipsum,"\
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
tempus vel, gravida nec quam.
In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
interdum. Curabitur ut nisi justo.
Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
Aliquam sit amet placerat lorem.
Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
cursus accumsan.
Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
malesuada sollicitudin quam eu fermentum!");
}
}
}
make_test!(chars_count, s, s.chars().count());
make_test!(contains_bang_str, s, s.contains("!"));
make_test!(contains_bang_char, s, s.contains('!'));
make_test!(match_indices_a_str, s, s.match_indices("a").count());
make_test!(split_a_str, s, s.split("a").count());
make_test!(trim_ascii_char, s, {
use std::ascii::AsciiExt;
s.trim_matches(|c: char| c.is_ascii())
});
make_test!(trim_left_ascii_char, s, {
use std::ascii::AsciiExt;
s.trim_left_matches(|c: char| c.is_ascii())
});
make_test!(trim_right_ascii_char, s, {
use std::ascii::AsciiExt;
s.trim_right_matches(|c: char| c.is_ascii())
});
make_test!(find_underscore_char, s, s.find('_'));
make_test!(rfind_underscore_char, s, s.rfind('_'));
make_test!(find_underscore_str, s, s.find("_"));
make_test!(find_zzz_char, s, s.find('\u{1F4A4}'));
make_test!(rfind_zzz_char, s, s.rfind('\u{1F4A4}'));
make_test!(find_zzz_str, s, s.find("\u{1F4A4}"));
make_test!(split_space_char, s, s.split(' ').count());
make_test!(split_terminator_space_char, s, s.split_terminator(' ').count());
make_test!(splitn_space_char, s, s.splitn(10, ' ').count());
make_test!(rsplitn_space_char, s, s.rsplitn(10, ' ').count());
make_test!(split_space_str, s, s.split(" ").count());
make_test!(split_ad_str, s, s.split("ad").count());
}

View File

@ -17,6 +17,8 @@
#![doc(primitive = "str")]
use self::OldSearcher::{TwoWay, TwoWayLong};
use self::pattern::Pattern;
use self::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher};
use char::CharExt;
use clone::Clone;
@ -34,100 +36,7 @@ use result::Result::{self, Ok, Err};
use slice::{self, SliceExt};
use usize;
pub use self::pattern::Pattern;
pub use self::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher, SearchStep};
mod pattern;
macro_rules! delegate_iter {
(exact $te:ty : $ti:ty) => {
delegate_iter!{$te : $ti}
impl<'a> ExactSizeIterator for $ti {
#[inline]
fn len(&self) -> usize {
self.0.len()
}
}
};
($te:ty : $ti:ty) => {
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> Iterator for $ti {
type Item = $te;
#[inline]
fn next(&mut self) -> Option<$te> {
self.0.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.0.size_hint()
}
}
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> DoubleEndedIterator for $ti {
#[inline]
fn next_back(&mut self) -> Option<$te> {
self.0.next_back()
}
}
};
(pattern $te:ty : $ti:ty) => {
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a, P: Pattern<'a>> Iterator for $ti {
type Item = $te;
#[inline]
fn next(&mut self) -> Option<$te> {
self.0.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.0.size_hint()
}
}
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a, P: Pattern<'a>> DoubleEndedIterator for $ti
where P::Searcher: DoubleEndedSearcher<'a> {
#[inline]
fn next_back(&mut self) -> Option<$te> {
self.0.next_back()
}
}
};
(pattern forward $te:ty : $ti:ty) => {
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a, P: Pattern<'a>> Iterator for $ti
where P::Searcher: DoubleEndedSearcher<'a> {
type Item = $te;
#[inline]
fn next(&mut self) -> Option<$te> {
self.0.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.0.size_hint()
}
}
};
(pattern reverse $te:ty : $ti:ty) => {
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a, P: Pattern<'a>> Iterator for $ti
where P::Searcher: ReverseSearcher<'a>
{
type Item = $te;
#[inline]
fn next(&mut self) -> Option<$te> {
self.0.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.0.size_hint()
}
}
};
}
pub mod pattern;
/// A trait to abstract the idea of creating a new instance of a type from a
/// string.
@ -443,11 +352,9 @@ impl<'a> DoubleEndedIterator for CharIndices<'a> {
#[stable(feature = "rust1", since = "1.0.0")]
#[derive(Clone)]
pub struct Bytes<'a>(Map<slice::Iter<'a, u8>, BytesDeref>);
delegate_iter!{exact u8 : Bytes<'a>}
/// A temporary fn new type that ensures that the `Bytes` iterator
/// is cloneable.
#[derive(Copy, Clone)]
/// A nameable, clonable fn type
#[derive(Clone)]
struct BytesDeref;
impl<'a> Fn<(&'a u8,)> for BytesDeref {
@ -473,58 +380,210 @@ impl<'a> FnOnce<(&'a u8,)> for BytesDeref {
}
}
/// An iterator over the substrings of a string, separated by `sep`.
struct CharSplits<'a, P: Pattern<'a>> {
/// The slice remaining to be iterated
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> Iterator for Bytes<'a> {
type Item = u8;
#[inline]
fn next(&mut self) -> Option<u8> {
self.0.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.0.size_hint()
}
}
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> DoubleEndedIterator for Bytes<'a> {
#[inline]
fn next_back(&mut self) -> Option<u8> {
self.0.next_back()
}
}
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> ExactSizeIterator for Bytes<'a> {
#[inline]
fn len(&self) -> usize {
self.0.len()
}
}
/// This macro generates a Clone impl for string pattern API
/// wrapper types of the form X<'a, P>
macro_rules! derive_pattern_clone {
(clone $t:ident with |$s:ident| $e:expr) => {
impl<'a, P: Pattern<'a>> Clone for $t<'a, P>
where P::Searcher: Clone
{
fn clone(&self) -> Self {
let $s = self;
$e
}
}
}
}
/// This macro generates two public iterator structs
/// wrapping an private internal one that makes use of the `Pattern` API.
///
/// For all patterns `P: Pattern<'a>` the following items will be
/// generated (generics ommitted):
///
/// struct $forward_iterator($internal_iterator);
/// struct $reverse_iterator($internal_iterator);
///
/// impl Iterator for $forward_iterator
/// { /* internal ends up calling Searcher::next_match() */ }
///
/// impl DoubleEndedIterator for $forward_iterator
/// where P::Searcher: DoubleEndedSearcher
/// { /* internal ends up calling Searcher::next_match_back() */ }
///
/// impl Iterator for $reverse_iterator
/// where P::Searcher: ReverseSearcher
/// { /* internal ends up calling Searcher::next_match_back() */ }
///
/// impl DoubleEndedIterator for $reverse_iterator
/// where P::Searcher: DoubleEndedSearcher
/// { /* internal ends up calling Searcher::next_match() */ }
///
/// The internal one is defined outside the macro, and has almost the same
/// semantic as a DoubleEndedIterator by delegating to `pattern::Searcher` and
/// `pattern::ReverseSearcher` for both forward and reverse iteration.
///
/// "Almost", because a `Searcher` and a `ReverseSearcher` for a given
/// `Pattern` might not return the same elements, so actually implementing
/// `DoubleEndedIterator` for it would be incorrect.
/// (See the docs in `str::pattern` for more details)
///
/// However, the internal struct still represents a single ended iterator from
/// either end, and depending on pattern is also a valid double ended iterator,
/// so the two wrapper structs implement `Iterator`
/// and `DoubleEndedIterator` depending on the concrete pattern type, leading
/// to the complex impls seen above.
macro_rules! generate_pattern_iterators {
{
// Forward iterator
forward:
$(#[$forward_iterator_attribute:meta])*
struct $forward_iterator:ident;
// Reverse iterator
reverse:
$(#[$reverse_iterator_attribute:meta])*
struct $reverse_iterator:ident;
// Stability of all generated items
stability:
$(#[$common_stability_attribute:meta])*
// Internal almost-iterator that is being delegated to
internal:
$internal_iterator:ident yielding ($iterty:ty);
// Kind of delgation - either single ended or double ended
delegate $($t:tt)*
} => {
$(#[$forward_iterator_attribute])*
$(#[$common_stability_attribute])*
pub struct $forward_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>);
$(#[$common_stability_attribute])*
impl<'a, P: Pattern<'a>> Iterator for $forward_iterator<'a, P> {
type Item = $iterty;
#[inline]
fn next(&mut self) -> Option<$iterty> {
self.0.next()
}
}
$(#[$common_stability_attribute])*
impl<'a, P: Pattern<'a>> Clone for $forward_iterator<'a, P>
where P::Searcher: Clone
{
fn clone(&self) -> Self {
$forward_iterator(self.0.clone())
}
}
$(#[$reverse_iterator_attribute])*
$(#[$common_stability_attribute])*
pub struct $reverse_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>);
$(#[$common_stability_attribute])*
impl<'a, P: Pattern<'a>> Iterator for $reverse_iterator<'a, P>
where P::Searcher: ReverseSearcher<'a>
{
type Item = $iterty;
#[inline]
fn next(&mut self) -> Option<$iterty> {
self.0.next_back()
}
}
$(#[$common_stability_attribute])*
impl<'a, P: Pattern<'a>> Clone for $reverse_iterator<'a, P>
where P::Searcher: Clone
{
fn clone(&self) -> Self {
$reverse_iterator(self.0.clone())
}
}
generate_pattern_iterators!($($t)* with $(#[$common_stability_attribute])*,
$forward_iterator,
$reverse_iterator, $iterty);
};
{
double ended; with $(#[$common_stability_attribute:meta])*,
$forward_iterator:ident,
$reverse_iterator:ident, $iterty:ty
} => {
$(#[$common_stability_attribute])*
impl<'a, P: Pattern<'a>> DoubleEndedIterator for $forward_iterator<'a, P>
where P::Searcher: DoubleEndedSearcher<'a>
{
#[inline]
fn next_back(&mut self) -> Option<$iterty> {
self.0.next_back()
}
}
$(#[$common_stability_attribute])*
impl<'a, P: Pattern<'a>> DoubleEndedIterator for $reverse_iterator<'a, P>
where P::Searcher: DoubleEndedSearcher<'a>
{
#[inline]
fn next_back(&mut self) -> Option<$iterty> {
self.0.next()
}
}
};
{
single ended; with $(#[$common_stability_attribute:meta])*,
$forward_iterator:ident,
$reverse_iterator:ident, $iterty:ty
} => {}
}
derive_pattern_clone!{
clone SplitInternal
with |s| SplitInternal { matcher: s.matcher.clone(), ..*s }
}
struct SplitInternal<'a, P: Pattern<'a>> {
start: usize,
end: usize,
matcher: P::Searcher,
/// Whether an empty string at the end is allowed
allow_trailing_empty: bool,
finished: bool,
}
/// An iterator over the substrings of a string, separated by `sep`,
/// splitting at most `count` times.
struct CharSplitsN<'a, P: Pattern<'a>> {
iter: CharSplits<'a, P>,
/// The number of items remaining
count: usize,
}
/// An iterator over the substrings of a string, separated by a
/// pattern, in reverse order.
struct RCharSplits<'a, P: Pattern<'a>> {
/// The slice remaining to be iterated
start: usize,
end: usize,
matcher: P::Searcher,
/// Whether an empty string at the end of iteration is allowed
allow_final_empty: bool,
finished: bool,
}
/// An iterator over the substrings of a string, separated by a
/// pattern, splitting at most `count` times, in reverse order.
struct RCharSplitsN<'a, P: Pattern<'a>> {
iter: RCharSplits<'a, P>,
/// The number of splits remaining
count: usize,
}
/// An iterator over the lines of a string, separated by `\n`.
#[stable(feature = "rust1", since = "1.0.0")]
pub struct Lines<'a> {
inner: CharSplits<'a, char>,
}
/// An iterator over the lines of a string, separated by either `\n` or (`\r\n`).
#[stable(feature = "rust1", since = "1.0.0")]
pub struct LinesAny<'a> {
inner: Map<Lines<'a>, fn(&str) -> &str>,
}
impl<'a, P: Pattern<'a>> CharSplits<'a, P> {
impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {
#[inline]
fn get_end(&mut self) -> Option<&'a str> {
if !self.finished && (self.allow_trailing_empty || self.end - self.start > 0) {
@ -537,11 +596,6 @@ impl<'a, P: Pattern<'a>> CharSplits<'a, P> {
None
}
}
}
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a, P: Pattern<'a>> Iterator for CharSplits<'a, P> {
type Item = &'a str;
#[inline]
fn next(&mut self) -> Option<&'a str> {
@ -557,13 +611,11 @@ impl<'a, P: Pattern<'a>> Iterator for CharSplits<'a, P> {
None => self.get_end(),
}
}
}
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a, P: Pattern<'a>> DoubleEndedIterator for CharSplits<'a, P>
where P::Searcher: DoubleEndedSearcher<'a> {
#[inline]
fn next_back(&mut self) -> Option<&'a str> {
fn next_back(&mut self) -> Option<&'a str>
where P::Searcher: ReverseSearcher<'a>
{
if self.finished { return None }
if !self.allow_trailing_empty {
@ -589,10 +641,45 @@ where P::Searcher: DoubleEndedSearcher<'a> {
}
}
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a, P: Pattern<'a>> Iterator for CharSplitsN<'a, P> {
type Item = &'a str;
generate_pattern_iterators! {
forward:
/// Return type of `str::split()`
struct Split;
reverse:
/// Return type of `str::rsplit()`
struct RSplit;
stability:
#[stable(feature = "rust1", since = "1.0.0")]
internal:
SplitInternal yielding (&'a str);
delegate double ended;
}
generate_pattern_iterators! {
forward:
/// Return type of `str::split_terminator()`
struct SplitTerminator;
reverse:
/// Return type of `str::rsplit_terminator()`
struct RSplitTerminator;
stability:
#[stable(feature = "rust1", since = "1.0.0")]
internal:
SplitInternal yielding (&'a str);
delegate double ended;
}
derive_pattern_clone!{
clone SplitNInternal
with |s| SplitNInternal { iter: s.iter.clone(), ..*s }
}
struct SplitNInternal<'a, P: Pattern<'a>> {
iter: SplitInternal<'a, P>,
/// The number of splits remaining
count: usize,
}
impl<'a, P: Pattern<'a>> SplitNInternal<'a, P> {
#[inline]
fn next(&mut self) -> Option<&'a str> {
match self.count {
@ -601,61 +688,193 @@ impl<'a, P: Pattern<'a>> Iterator for CharSplitsN<'a, P> {
_ => { self.count -= 1; self.iter.next() }
}
}
}
impl<'a, P: Pattern<'a>> RCharSplits<'a, P> {
#[inline]
fn get_remainder(&mut self) -> Option<&'a str> {
if !self.finished && (self.allow_final_empty || self.end - self.start > 0) {
self.finished = true;
unsafe {
let string = self.matcher.haystack().slice_unchecked(self.start, self.end);
Some(string)
}
} else {
None
}
}
}
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a, P: Pattern<'a>> Iterator for RCharSplits<'a, P>
where P::Searcher: ReverseSearcher<'a>
{
type Item = &'a str;
#[inline]
fn next(&mut self) -> Option<&'a str> {
if self.finished { return None }
let haystack = self.matcher.haystack();
match self.matcher.next_match_back() {
Some((a, b)) => unsafe {
let elt = haystack.slice_unchecked(b, self.end);
self.end = a;
Some(elt)
},
None => self.get_remainder(),
}
}
}
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a, P: Pattern<'a>> Iterator for RCharSplitsN<'a, P>
where P::Searcher: ReverseSearcher<'a>
{
type Item = &'a str;
#[inline]
fn next(&mut self) -> Option<&'a str> {
fn next_back(&mut self) -> Option<&'a str>
where P::Searcher: ReverseSearcher<'a>
{
match self.count {
0 => None,
1 => { self.count -= 1; self.iter.get_remainder() }
_ => { self.count -= 1; self.iter.next() }
1 => { self.count = 0; self.iter.get_end() }
_ => { self.count -= 1; self.iter.next_back() }
}
}
}
generate_pattern_iterators! {
forward:
/// Return type of `str::splitn()`
struct SplitN;
reverse:
/// Return type of `str::rsplitn()`
struct RSplitN;
stability:
#[stable(feature = "rust1", since = "1.0.0")]
internal:
SplitNInternal yielding (&'a str);
delegate single ended;
}
derive_pattern_clone!{
clone MatchIndicesInternal
with |s| MatchIndicesInternal(s.0.clone())
}
struct MatchIndicesInternal<'a, P: Pattern<'a>>(P::Searcher);
impl<'a, P: Pattern<'a>> MatchIndicesInternal<'a, P> {
#[inline]
fn next(&mut self) -> Option<(usize, usize)> {
self.0.next_match()
}
#[inline]
fn next_back(&mut self) -> Option<(usize, usize)>
where P::Searcher: ReverseSearcher<'a>
{
self.0.next_match_back()
}
}
generate_pattern_iterators! {
forward:
/// Return type of `str::match_indices()`
struct MatchIndices;
reverse:
/// Return type of `str::rmatch_indices()`
struct RMatchIndices;
stability:
#[unstable(feature = "core",
reason = "type may be removed or have its iterator impl changed")]
internal:
MatchIndicesInternal yielding ((usize, usize));
delegate double ended;
}
derive_pattern_clone!{
clone MatchesInternal
with |s| MatchesInternal(s.0.clone())
}
struct MatchesInternal<'a, P: Pattern<'a>>(P::Searcher);
impl<'a, P: Pattern<'a>> MatchesInternal<'a, P> {
#[inline]
fn next(&mut self) -> Option<&'a str> {
self.0.next_match().map(|(a, b)| unsafe {
// Indices are known to be on utf8 boundaries
self.0.haystack().slice_unchecked(a, b)
})
}
#[inline]
fn next_back(&mut self) -> Option<&'a str>
where P::Searcher: ReverseSearcher<'a>
{
self.0.next_match_back().map(|(a, b)| unsafe {
// Indices are known to be on utf8 boundaries
self.0.haystack().slice_unchecked(a, b)
})
}
}
generate_pattern_iterators! {
forward:
/// Return type of `str::matches()`
struct Matches;
reverse:
/// Return type of `str::rmatches()`
struct RMatches;
stability:
#[unstable(feature = "core", reason = "type got recently added")]
internal:
MatchesInternal yielding (&'a str);
delegate double ended;
}
/// Return type of `str::lines()`
#[stable(feature = "rust1", since = "1.0.0")]
#[derive(Clone)]
pub struct Lines<'a>(SplitTerminator<'a, char>);
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> Iterator for Lines<'a> {
type Item = &'a str;
#[inline]
fn next(&mut self) -> Option<&'a str> {
self.0.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.0.size_hint()
}
}
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> DoubleEndedIterator for Lines<'a> {
#[inline]
fn next_back(&mut self) -> Option<&'a str> {
self.0.next_back()
}
}
/// Return type of `str::lines_any()`
#[stable(feature = "rust1", since = "1.0.0")]
#[derive(Clone)]
pub struct LinesAny<'a>(Map<Lines<'a>, LinesAnyMap>);
/// A nameable, clonable fn type
#[derive(Clone)]
struct LinesAnyMap;
impl<'a> Fn<(&'a str,)> for LinesAnyMap {
#[inline]
extern "rust-call" fn call(&self, (line,): (&'a str,)) -> &'a str {
let l = line.len();
if l > 0 && line.as_bytes()[l - 1] == b'\r' { &line[0 .. l - 1] }
else { line }
}
}
impl<'a> FnMut<(&'a str,)> for LinesAnyMap {
#[inline]
extern "rust-call" fn call_mut(&mut self, (line,): (&'a str,)) -> &'a str {
Fn::call(&*self, (line,))
}
}
impl<'a> FnOnce<(&'a str,)> for LinesAnyMap {
type Output = &'a str;
#[inline]
extern "rust-call" fn call_once(self, (line,): (&'a str,)) -> &'a str {
Fn::call(&self, (line,))
}
}
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> Iterator for LinesAny<'a> {
type Item = &'a str;
#[inline]
fn next(&mut self) -> Option<&'a str> {
self.0.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.0.size_hint()
}
}
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> DoubleEndedIterator for LinesAny<'a> {
#[inline]
fn next_back(&mut self) -> Option<&'a str> {
self.0.next_back()
}
}
/// The internal state of an iterator that searches for matches of a substring
/// within a larger string using two-way search
#[derive(Clone)]
@ -938,22 +1157,6 @@ struct OldMatchIndices<'a, 'b> {
searcher: OldSearcher
}
// FIXME: #21637 Prevents a Clone impl
/// An iterator over the start and end indices of the matches of a
/// substring within a larger string
#[unstable(feature = "core", reason = "type may be removed")]
pub struct MatchIndices<'a, P: Pattern<'a>>(P::Searcher);
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a, P: Pattern<'a>> Iterator for MatchIndices<'a, P> {
type Item = (usize, usize);
#[inline]
fn next(&mut self) -> Option<(usize, usize)> {
self.0.next_match()
}
}
impl<'a, 'b> OldMatchIndices<'a, 'b> {
#[inline]
#[allow(dead_code)]
@ -1291,31 +1494,6 @@ impl<'a, S: ?Sized> Str for &'a S where S: Str {
fn as_slice(&self) -> &str { Str::as_slice(*self) }
}
/// Return type of `str::split`
#[stable(feature = "rust1", since = "1.0.0")]
pub struct Split<'a, P: Pattern<'a>>(CharSplits<'a, P>);
delegate_iter!{pattern &'a str : Split<'a, P>}
/// Return type of `str::split_terminator`
#[stable(feature = "rust1", since = "1.0.0")]
pub struct SplitTerminator<'a, P: Pattern<'a>>(CharSplits<'a, P>);
delegate_iter!{pattern &'a str : SplitTerminator<'a, P>}
/// Return type of `str::splitn`
#[stable(feature = "rust1", since = "1.0.0")]
pub struct SplitN<'a, P: Pattern<'a>>(CharSplitsN<'a, P>);
delegate_iter!{pattern forward &'a str : SplitN<'a, P>}
/// Return type of `str::rsplit`
#[stable(feature = "rust1", since = "1.0.0")]
pub struct RSplit<'a, P: Pattern<'a>>(RCharSplits<'a, P>);
delegate_iter!{pattern reverse &'a str : RSplit<'a, P>}
/// Return type of `str::rsplitn`
#[stable(feature = "rust1", since = "1.0.0")]
pub struct RSplitN<'a, P: Pattern<'a>>(RCharSplitsN<'a, P>);
delegate_iter!{pattern reverse &'a str : RSplitN<'a, P>}
/// Methods for string slices
#[allow(missing_docs)]
pub trait StrExt {
@ -1328,13 +1506,20 @@ pub trait StrExt {
fn bytes<'a>(&'a self) -> Bytes<'a>;
fn char_indices<'a>(&'a self) -> CharIndices<'a>;
fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P>;
fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P>;
fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P>;
fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P>
where P::Searcher: ReverseSearcher<'a>;
fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P>;
fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P>
where P::Searcher: ReverseSearcher<'a>;
fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P>;
fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P>
where P::Searcher: ReverseSearcher<'a>;
fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P>;
fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P>
where P::Searcher: ReverseSearcher<'a>;
fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P>;
fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P>
where P::Searcher: ReverseSearcher<'a>;
fn lines<'a>(&'a self) -> Lines<'a>;
fn lines_any<'a>(&'a self) -> LinesAny<'a>;
fn char_len(&self) -> usize;
@ -1401,7 +1586,7 @@ impl StrExt for str {
#[inline]
fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> {
Split(CharSplits {
Split(SplitInternal {
start: 0,
end: self.len(),
matcher: pat.into_searcher(self),
@ -1410,32 +1595,18 @@ impl StrExt for str {
})
}
#[inline]
fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P> {
SplitN(CharSplitsN {
iter: self.split(pat).0,
count: count,
})
}
#[inline]
fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> {
SplitTerminator(CharSplits {
allow_trailing_empty: false,
..self.split(pat).0
})
}
#[inline]
fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P>
where P::Searcher: ReverseSearcher<'a>
{
RSplit(RCharSplits {
start: 0,
end: self.len(),
matcher: pat.into_searcher(self),
allow_final_empty: true,
finished: false,
RSplit(self.split(pat).0)
}
#[inline]
fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P> {
SplitN(SplitNInternal {
iter: self.split(pat).0,
count: count,
})
}
@ -1443,31 +1614,55 @@ impl StrExt for str {
fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P>
where P::Searcher: ReverseSearcher<'a>
{
RSplitN(RCharSplitsN {
iter: self.rsplit(pat).0,
count: count,
RSplitN(self.splitn(count, pat).0)
}
#[inline]
fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> {
SplitTerminator(SplitInternal {
allow_trailing_empty: false,
..self.split(pat).0
})
}
#[inline]
fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> {
MatchIndices(pat.into_searcher(self))
fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P>
where P::Searcher: ReverseSearcher<'a>
{
RSplitTerminator(self.split_terminator(pat).0)
}
#[inline]
fn lines(&self) -> Lines {
Lines { inner: self.split_terminator('\n').0 }
fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P> {
Matches(MatchesInternal(pat.into_searcher(self)))
}
fn lines_any(&self) -> LinesAny {
fn f(line: &str) -> &str {
let l = line.len();
if l > 0 && line.as_bytes()[l - 1] == b'\r' { &line[0 .. l - 1] }
else { line }
}
#[inline]
fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P>
where P::Searcher: ReverseSearcher<'a>
{
RMatches(self.matches(pat).0)
}
let f: fn(&str) -> &str = f; // coerce to fn pointer
LinesAny { inner: self.lines().map(f) }
#[inline]
fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> {
MatchIndices(MatchIndicesInternal(pat.into_searcher(self)))
}
#[inline]
fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P>
where P::Searcher: ReverseSearcher<'a>
{
RMatchIndices(self.match_indices(pat).0)
}
#[inline]
fn lines(&self) -> Lines {
Lines(self.split_terminator('\n'))
}
#[inline]
fn lines_any(&self) -> LinesAny {
LinesAny(self.lines().map(LinesAnyMap))
}
#[inline]
@ -1708,35 +1903,3 @@ impl<'a> Default for &'a str {
#[stable(feature = "rust1", since = "1.0.0")]
fn default() -> &'a str { "" }
}
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> Iterator for Lines<'a> {
type Item = &'a str;
#[inline]
fn next(&mut self) -> Option<&'a str> { self.inner.next() }
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) { self.inner.size_hint() }
}
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> DoubleEndedIterator for Lines<'a> {
#[inline]
fn next_back(&mut self) -> Option<&'a str> { self.inner.next_back() }
}
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> Iterator for LinesAny<'a> {
type Item = &'a str;
#[inline]
fn next(&mut self) -> Option<&'a str> { self.inner.next() }
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) { self.inner.size_hint() }
}
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> DoubleEndedIterator for LinesAny<'a> {
#[inline]
fn next_back(&mut self) -> Option<&'a str> { self.inner.next_back() }
}

View File

@ -8,6 +8,11 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! The string Pattern API.
//!
//! For more details, see the traits `Pattern`, `Searcher`,
//! `ReverseSearcher` and `DoubleEndedSearcher`.
use prelude::*;
// Pattern
@ -223,7 +228,9 @@ pub unsafe trait ReverseSearcher<'a>: Searcher<'a> {
/// `"[aa]a"` or `"a[aa]"`, depending from which side it is searched.
pub trait DoubleEndedSearcher<'a>: ReverseSearcher<'a> {}
/////////////////////////////////////////////////////////////////////////////
// Impl for a CharEq wrapper
/////////////////////////////////////////////////////////////////////////////
#[doc(hidden)]
trait CharEq {
@ -261,6 +268,7 @@ impl<'a> CharEq for &'a [char] {
struct CharEqPattern<C: CharEq>(C);
#[derive(Clone)]
struct CharEqSearcher<'a, C: CharEq> {
char_eq: C,
haystack: &'a str,
@ -330,17 +338,27 @@ unsafe impl<'a, C: CharEq> ReverseSearcher<'a> for CharEqSearcher<'a, C> {
impl<'a, C: CharEq> DoubleEndedSearcher<'a> for CharEqSearcher<'a, C> {}
/////////////////////////////////////////////////////////////////////////////
// Impl for &str
/////////////////////////////////////////////////////////////////////////////
// Todo: Optimize the naive implementation here
/// Associated type for `<&str as Pattern<'a>>::Searcher`.
#[derive(Clone)]
struct StrSearcher<'a, 'b> {
pub struct StrSearcher<'a, 'b> {
haystack: &'a str,
needle: &'b str,
start: usize,
end: usize,
done: bool,
state: State,
}
#[derive(Clone, PartialEq)]
enum State { Done, NotDone, Reject(usize, usize) }
impl State {
#[inline] fn done(&self) -> bool { *self == State::Done }
#[inline] fn take(&mut self) -> State { ::mem::replace(self, State::NotDone) }
}
/// Non-allocating substring search.
@ -357,7 +375,7 @@ impl<'a, 'b> Pattern<'a> for &'b str {
needle: self,
start: 0,
end: haystack.len(),
done: false,
state: State::NotDone,
}
}
}
@ -374,8 +392,9 @@ unsafe impl<'a, 'b> Searcher<'a> for StrSearcher<'a, 'b> {
|m: &mut StrSearcher| {
// Forward step for empty needle
let current_start = m.start;
if !m.done {
if !m.state.done() {
m.start = m.haystack.char_range_at(current_start).next;
m.state = State::Reject(current_start, m.start);
}
SearchStep::Match(current_start, current_start)
},
@ -404,8 +423,9 @@ unsafe impl<'a, 'b> ReverseSearcher<'a> for StrSearcher<'a, 'b> {
|m: &mut StrSearcher| {
// Backward step for empty needle
let current_end = m.end;
if !m.done {
if !m.state.done() {
m.end = m.haystack.char_range_at_reverse(current_end).next;
m.state = State::Reject(m.end, current_end);
}
SearchStep::Match(current_end, current_end)
},
@ -435,137 +455,178 @@ fn str_search_step<F, G>(mut m: &mut StrSearcher,
where F: FnOnce(&mut StrSearcher) -> SearchStep,
G: FnOnce(&mut StrSearcher) -> SearchStep
{
if m.done {
if m.state.done() {
SearchStep::Done
} else if m.needle.len() == 0 && m.start <= m.end {
// Case for needle == ""
if m.start == m.end {
m.done = true;
if let State::Reject(a, b) = m.state.take() {
SearchStep::Reject(a, b)
} else {
if m.start == m.end {
m.state = State::Done;
}
empty_needle_step(&mut m)
}
empty_needle_step(&mut m)
} else if m.start + m.needle.len() <= m.end {
// Case for needle != ""
nonempty_needle_step(&mut m)
} else if m.start < m.end {
// Remaining slice shorter than needle, reject it
m.done = true;
m.state = State::Done;
SearchStep::Reject(m.start, m.end)
} else {
m.done = true;
m.state = State::Done;
SearchStep::Done
}
}
macro_rules! char_eq_pattern_impl {
($wrapper:ty, $wrapper_ident:ident) => {
fn into_searcher(self, haystack: &'a str) -> $wrapper {
$wrapper_ident(CharEqPattern(self).into_searcher(haystack))
/////////////////////////////////////////////////////////////////////////////
macro_rules! pattern_methods {
($t:ty, $pmap:expr, $smap:expr) => {
type Searcher = $t;
#[inline]
fn into_searcher(self, haystack: &'a str) -> $t {
($smap)(($pmap)(self).into_searcher(haystack))
}
#[inline]
fn is_contained_in(self, haystack: &'a str) -> bool {
CharEqPattern(self).is_contained_in(haystack)
($pmap)(self).is_contained_in(haystack)
}
#[inline]
fn is_prefix_of(self, haystack: &'a str) -> bool {
CharEqPattern(self).is_prefix_of(haystack)
($pmap)(self).is_prefix_of(haystack)
}
#[inline]
fn is_suffix_of(self, haystack: &'a str) -> bool
where $wrapper: ReverseSearcher<'a>
where $t: ReverseSearcher<'a>
{
CharEqPattern(self).is_suffix_of(haystack)
($pmap)(self).is_suffix_of(haystack)
}
}
}
// Pattern for char
impl<'a> Pattern<'a> for char {
type Searcher = CharSearcher<'a>;
char_eq_pattern_impl!(CharSearcher<'a>, CharSearcher);
macro_rules! searcher_methods {
(forward) => {
#[inline]
fn haystack(&self) -> &'a str {
self.0.haystack()
}
#[inline]
fn next(&mut self) -> SearchStep {
self.0.next()
}
#[inline]
fn next_match(&mut self) -> Option<(usize, usize)> {
self.0.next_match()
}
#[inline]
fn next_reject(&mut self) -> Option<(usize, usize)> {
self.0.next_reject()
}
};
(reverse) => {
#[inline]
fn next_back(&mut self) -> SearchStep {
self.0.next_back()
}
#[inline]
fn next_match_back(&mut self) -> Option<(usize, usize)> {
self.0.next_match_back()
}
#[inline]
fn next_reject_back(&mut self) -> Option<(usize, usize)> {
self.0.next_reject_back()
}
}
}
pub struct CharSearcher<'a>(CharEqSearcher<'a, char>);
/////////////////////////////////////////////////////////////////////////////
// Impl for char
/////////////////////////////////////////////////////////////////////////////
/// Associated type for `<char as Pattern<'a>>::Searcher`.
#[derive(Clone)]
pub struct CharSearcher<'a>(<CharEqPattern<char> as Pattern<'a>>::Searcher);
unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
#[inline]
fn haystack(&self) -> &'a str { self.0.haystack() }
#[inline]
fn next(&mut self) -> SearchStep { self.0.next() }
searcher_methods!(forward);
}
unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
#[inline]
fn next_back(&mut self) -> SearchStep { self.0.next_back() }
searcher_methods!(reverse);
}
impl<'a> DoubleEndedSearcher<'a> for CharSearcher<'a> {}
// Pattern for &[char]
impl<'a, 'b> Pattern<'a> for &'b [char] {
type Searcher = CharSliceSearcher<'a, 'b>;
char_eq_pattern_impl!(CharSliceSearcher<'a, 'b>, CharSliceSearcher);
/// Searches for chars that are equal to a given char
impl<'a> Pattern<'a> for char {
pattern_methods!(CharSearcher<'a>, CharEqPattern, CharSearcher);
}
pub struct CharSliceSearcher<'a, 'b>(CharEqSearcher<'a, &'b [char]>);
/////////////////////////////////////////////////////////////////////////////
// Impl for &[char]
/////////////////////////////////////////////////////////////////////////////
// Todo: Change / Remove due to ambiguity in meaning.
/// Associated type for `<&[char] as Pattern<'a>>::Searcher`.
#[derive(Clone)]
pub struct CharSliceSearcher<'a, 'b>(<CharEqPattern<&'b [char]> as Pattern<'a>>::Searcher);
unsafe impl<'a, 'b> Searcher<'a> for CharSliceSearcher<'a, 'b> {
#[inline]
fn haystack(&self) -> &'a str { self.0.haystack() }
#[inline]
fn next(&mut self) -> SearchStep { self.0.next() }
searcher_methods!(forward);
}
unsafe impl<'a, 'b> ReverseSearcher<'a> for CharSliceSearcher<'a, 'b> {
#[inline]
fn next_back(&mut self) -> SearchStep { self.0.next_back() }
searcher_methods!(reverse);
}
impl<'a, 'b> DoubleEndedSearcher<'a> for CharSliceSearcher<'a, 'b> {}
// Pattern for predicates
impl<'a, F: FnMut(char) -> bool> Pattern<'a> for F {
type Searcher = CharPredSearcher<'a, F>;
char_eq_pattern_impl!(CharPredSearcher<'a, F>, CharPredSearcher);
/// Searches for chars that are equal to any of the chars in the array
impl<'a, 'b> Pattern<'a> for &'b [char] {
pattern_methods!(CharSliceSearcher<'a, 'b>, CharEqPattern, CharSliceSearcher);
}
pub struct CharPredSearcher<'a, F: FnMut(char) -> bool>(CharEqSearcher<'a, F>);
/////////////////////////////////////////////////////////////////////////////
// Impl for F: FnMut(char) -> bool
/////////////////////////////////////////////////////////////////////////////
unsafe impl<'a, F> Searcher<'a> for CharPredSearcher<'a, F>
/// Associated type for `<F as Pattern<'a>>::Searcher`.
#[derive(Clone)]
pub struct CharPredicateSearcher<'a, F>(<CharEqPattern<F> as Pattern<'a>>::Searcher)
where F: FnMut(char) -> bool;
unsafe impl<'a, F> Searcher<'a> for CharPredicateSearcher<'a, F>
where F: FnMut(char) -> bool
{
#[inline]
fn haystack(&self) -> &'a str { self.0.haystack() }
#[inline]
fn next(&mut self) -> SearchStep { self.0.next() }
searcher_methods!(forward);
}
unsafe impl<'a, F> ReverseSearcher<'a> for CharPredSearcher<'a, F>
unsafe impl<'a, F> ReverseSearcher<'a> for CharPredicateSearcher<'a, F>
where F: FnMut(char) -> bool
{
#[inline]
fn next_back(&mut self) -> SearchStep { self.0.next_back() }
searcher_methods!(reverse);
}
impl<'a, F> DoubleEndedSearcher<'a> for CharPredSearcher<'a, F>
where F: FnMut(char) -> bool
{}
// Pattern for &&str
impl<'a, F> DoubleEndedSearcher<'a> for CharPredicateSearcher<'a, F>
where F: FnMut(char) -> bool {}
/// Searches for chars that match the given predicate
impl<'a, F> Pattern<'a> for F where F: FnMut(char) -> bool {
pattern_methods!(CharPredicateSearcher<'a, F>, CharEqPattern, CharPredicateSearcher);
}
/////////////////////////////////////////////////////////////////////////////
// Impl for &&str
/////////////////////////////////////////////////////////////////////////////
/// Delegates to the `&str` impl.
impl<'a, 'b> Pattern<'a> for &'b &'b str {
type Searcher = <&'b str as Pattern<'a>>::Searcher;
#[inline]
fn into_searcher(self, haystack: &'a str)
-> <&'b str as Pattern<'a>>::Searcher {
(*self).into_searcher(haystack)
}
#[inline]
fn is_contained_in(self, haystack: &'a str) -> bool {
(*self).is_contained_in(haystack)
}
#[inline]
fn is_prefix_of(self, haystack: &'a str) -> bool {
(*self).is_prefix_of(haystack)
}
#[inline]
fn is_suffix_of(self, haystack: &'a str) -> bool {
(*self).is_suffix_of(haystack)
}
pattern_methods!(StrSearcher<'a, 'b>, |&s| s, |s| s);
}

View File

@ -8,378 +8,4 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#[test]
fn test_pattern_deref_forward() {
let data = "aabcdaa";
assert!(data.contains("bcd"));
assert!(data.contains(&"bcd"));
assert!(data.contains(&"bcd".to_string()));
}
#[test]
fn test_empty_match_indices() {
let data = "aä中!";
let vec: Vec<_> = data.match_indices("").collect();
assert_eq!(vec, [(0, 0), (1, 1), (3, 3), (6, 6), (7, 7)]);
}
#[test]
fn test_bool_from_str() {
assert_eq!("true".parse().ok(), Some(true));
assert_eq!("false".parse().ok(), Some(false));
assert_eq!("not even a boolean".parse::<bool>().ok(), None);
}
fn check_contains_all_substrings(s: &str) {
assert!(s.contains(""));
for i in 0..s.len() {
for j in i+1..s.len() + 1 {
assert!(s.contains(&s[i..j]));
}
}
}
#[test]
fn strslice_issue_16589() {
assert!("bananas".contains("nana"));
// prior to the fix for #16589, x.contains("abcdabcd") returned false
// test all substrings for good measure
check_contains_all_substrings("012345678901234567890123456789bcdabcdabcd");
}
#[test]
fn strslice_issue_16878() {
assert!(!"1234567ah012345678901ah".contains("hah"));
assert!(!"00abc01234567890123456789abc".contains("bcabc"));
}
#[test]
fn test_strslice_contains() {
let x = "There are moments, Jeeves, when one asks oneself, 'Do trousers matter?'";
check_contains_all_substrings(x);
}
#[test]
fn test_rsplitn_char_iterator() {
let data = "\nMäry häd ä little lämb\nLittle lämb\n";
let mut split: Vec<&str> = data.rsplitn(4, ' ').collect();
split.reverse();
assert_eq!(split, ["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
let mut split: Vec<&str> = data.rsplitn(4, |c: char| c == ' ').collect();
split.reverse();
assert_eq!(split, ["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
// Unicode
let mut split: Vec<&str> = data.rsplitn(4, 'ä').collect();
split.reverse();
assert_eq!(split, ["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
let mut split: Vec<&str> = data.rsplitn(4, |c: char| c == 'ä').collect();
split.reverse();
assert_eq!(split, ["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
}
#[test]
fn test_split_char_iterator() {
let data = "\nMäry häd ä little lämb\nLittle lämb\n";
let split: Vec<&str> = data.split(' ').collect();
assert_eq!( split, ["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
let mut rsplit: Vec<&str> = data.split(' ').rev().collect();
rsplit.reverse();
assert_eq!(rsplit, ["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
let split: Vec<&str> = data.split(|c: char| c == ' ').collect();
assert_eq!( split, ["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
let mut rsplit: Vec<&str> = data.split(|c: char| c == ' ').rev().collect();
rsplit.reverse();
assert_eq!(rsplit, ["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
// Unicode
let split: Vec<&str> = data.split('ä').collect();
assert_eq!( split, ["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
let mut rsplit: Vec<&str> = data.split('ä').rev().collect();
rsplit.reverse();
assert_eq!(rsplit, ["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
let split: Vec<&str> = data.split(|c: char| c == 'ä').collect();
assert_eq!( split, ["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
let mut rsplit: Vec<&str> = data.split(|c: char| c == 'ä').rev().collect();
rsplit.reverse();
assert_eq!(rsplit, ["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
}
#[test]
fn test_rev_split_char_iterator_no_trailing() {
let data = "\nMäry häd ä little lämb\nLittle lämb\n";
let mut split: Vec<&str> = data.split('\n').rev().collect();
split.reverse();
assert_eq!(split, ["", "Märy häd ä little lämb", "Little lämb", ""]);
let mut split: Vec<&str> = data.split_terminator('\n').rev().collect();
split.reverse();
assert_eq!(split, ["", "Märy häd ä little lämb", "Little lämb"]);
}
#[test]
fn test_utf16_code_units() {
use unicode::str::Utf16Encoder;
assert_eq!(Utf16Encoder::new(vec!['é', '\u{1F4A9}'].into_iter()).collect::<Vec<u16>>(),
[0xE9, 0xD83D, 0xDCA9])
}
#[test]
fn starts_with_in_unicode() {
assert!(!"├── Cargo.toml".starts_with("# "));
}
#[test]
fn starts_short_long() {
assert!(!"".starts_with("##"));
assert!(!"##".starts_with("####"));
assert!("####".starts_with("##"));
assert!(!"##ä".starts_with("####"));
assert!("####ä".starts_with("##"));
assert!(!"##".starts_with("####ä"));
assert!("##ä##".starts_with("##ä"));
assert!("".starts_with(""));
assert!("ä".starts_with(""));
assert!("".starts_with(""));
assert!("##ä".starts_with(""));
assert!("ä###".starts_with(""));
assert!("#ä##".starts_with(""));
assert!("##ä#".starts_with(""));
}
#[test]
fn contains_weird_cases() {
assert!("* \t".contains(' '));
assert!(!"* \t".contains('?'));
assert!(!"* \t".contains('\u{1F4A9}'));
}
#[test]
fn trim_ws() {
assert_eq!(" \t a \t ".trim_left_matches(|c: char| c.is_whitespace()),
"a \t ");
assert_eq!(" \t a \t ".trim_right_matches(|c: char| c.is_whitespace()),
" \t a");
assert_eq!(" \t a \t ".trim_matches(|c: char| c.is_whitespace()),
"a");
assert_eq!(" \t \t ".trim_left_matches(|c: char| c.is_whitespace()),
"");
assert_eq!(" \t \t ".trim_right_matches(|c: char| c.is_whitespace()),
"");
assert_eq!(" \t \t ".trim_matches(|c: char| c.is_whitespace()),
"");
}
mod pattern {
use std::str::Pattern;
use std::str::{Searcher, ReverseSearcher};
use std::str::SearchStep::{self, Match, Reject, Done};
macro_rules! make_test {
($name:ident, $p:expr, $h:expr, [$($e:expr,)*]) => {
mod $name {
use std::str::SearchStep::{Match, Reject};
use super::{cmp_search_to_vec};
#[test]
fn fwd() {
cmp_search_to_vec(false, $p, $h, vec![$($e),*]);
}
#[test]
fn bwd() {
cmp_search_to_vec(true, $p, $h, vec![$($e),*]);
}
}
}
}
fn cmp_search_to_vec<'a, P: Pattern<'a>>(rev: bool, pat: P, haystack: &'a str,
right: Vec<SearchStep>)
where P::Searcher: ReverseSearcher<'a>
{
let mut searcher = pat.into_searcher(haystack);
let mut v = vec![];
loop {
match if !rev {searcher.next()} else {searcher.next_back()} {
Match(a, b) => v.push(Match(a, b)),
Reject(a, b) => v.push(Reject(a, b)),
Done => break,
}
}
if rev {
v.reverse();
}
assert_eq!(v, right);
}
make_test!(str_searcher_ascii_haystack, "bb", "abbcbbd", [
Reject(0, 1),
Match (1, 3),
Reject(3, 4),
Match (4, 6),
Reject(6, 7),
]);
make_test!(str_searcher_empty_needle_ascii_haystack, "", "abbcbbd", [
Match(0, 0),
Match(1, 1),
Match(2, 2),
Match(3, 3),
Match(4, 4),
Match(5, 5),
Match(6, 6),
Match(7, 7),
]);
make_test!(str_searcher_mulibyte_haystack, " ", "├──", [
Reject(0, 3),
Reject(3, 6),
Reject(6, 9),
]);
make_test!(str_searcher_empty_needle_mulibyte_haystack, "", "├──", [
Match(0, 0),
Match(3, 3),
Match(6, 6),
Match(9, 9),
]);
make_test!(str_searcher_empty_needle_empty_haystack, "", "", [
Match(0, 0),
]);
make_test!(str_searcher_nonempty_needle_empty_haystack, "", "", [
]);
make_test!(char_searcher_ascii_haystack, 'b', "abbcbbd", [
Reject(0, 1),
Match (1, 2),
Match (2, 3),
Reject(3, 4),
Match (4, 5),
Match (5, 6),
Reject(6, 7),
]);
make_test!(char_searcher_mulibyte_haystack, ' ', "├──", [
Reject(0, 3),
Reject(3, 6),
Reject(6, 9),
]);
make_test!(char_searcher_short_haystack, '\u{1F4A9}', "* \t", [
Reject(0, 1),
Reject(1, 2),
Reject(2, 3),
]);
}
mod bench {
macro_rules! make_test_inner {
($s:ident, $code:expr, $name:ident, $str:expr) => {
#[bench]
fn $name(bencher: &mut Bencher) {
let mut $s = $str;
black_box(&mut $s);
bencher.iter(|| $code);
}
}
}
macro_rules! make_test {
($name:ident, $s:ident, $code:expr) => {
mod $name {
use test::Bencher;
use test::black_box;
// Short strings: 65 bytes each
make_test_inner!($s, $code, short_ascii,
"Mary had a little lamb, Little lamb Mary had a littl lamb, lamb!");
make_test_inner!($s, $code, short_mixed,
"ศไทย中华Việt Nam; Mary had a little lamb, Little lam!");
make_test_inner!($s, $code, short_pile_of_poo,
"💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩!");
make_test_inner!($s, $code, long_lorem_ipsum,"\
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
tempus vel, gravida nec quam.
In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
interdum. Curabitur ut nisi justo.
Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
Aliquam sit amet placerat lorem.
Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
cursus accumsan.
Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
malesuada sollicitudin quam eu fermentum!");
}
}
}
make_test!(chars_count, s, s.chars().count());
make_test!(contains_bang_str, s, s.contains("!"));
make_test!(contains_bang_char, s, s.contains('!'));
make_test!(match_indices_a_str, s, s.match_indices("a").count());
make_test!(split_a_str, s, s.split("a").count());
make_test!(trim_ascii_char, s, {
use std::ascii::AsciiExt;
s.trim_matches(|c: char| c.is_ascii())
});
make_test!(trim_left_ascii_char, s, {
use std::ascii::AsciiExt;
s.trim_left_matches(|c: char| c.is_ascii())
});
make_test!(trim_right_ascii_char, s, {
use std::ascii::AsciiExt;
s.trim_right_matches(|c: char| c.is_ascii())
});
make_test!(find_underscore_char, s, s.find('_'));
make_test!(rfind_underscore_char, s, s.rfind('_'));
make_test!(find_underscore_str, s, s.find("_"));
make_test!(find_zzz_char, s, s.find('\u{1F4A4}'));
make_test!(rfind_zzz_char, s, s.rfind('\u{1F4A4}'));
make_test!(find_zzz_str, s, s.find("\u{1F4A4}"));
make_test!(split_space_char, s, s.split(' ').count());
make_test!(split_terminator_space_char, s, s.split_terminator(' ').count());
make_test!(splitn_space_char, s, s.splitn(10, ' ').count());
make_test!(rsplitn_space_char, s, s.rsplitn(10, ' ').count());
make_test!(split_space_str, s, s.split(" ").count());
make_test!(split_ad_str, s, s.split("ad").count());
}
// All `str` tests live in libcollectiontest::str