Rollup merge of #69784 - benesch:fast-strip-prefix-suffix, r=kennytm

Optimize strip_prefix and strip_suffix with str patterns

As mentioned in https://github.com/rust-lang/rust/issues/67302#issuecomment-585639226.
I'm not sure whether adding these methods to `Pattern` is desirable—but they have default implementations so the change is backwards compatible. Plus it seems like they're slated for wholesale replacement soon anyway? #56345

----

Constructing a Searcher in strip_prefix and strip_suffix is
unnecessarily slow when the pattern is a fixed-length string. Add
strip_prefix and strip_suffix methods to the Pattern trait, and add
optimized implementations of these methods in the str implementation.
The old implementation is retained as the default for these methods.
This commit is contained in:
Mazdak Farrokhzad 2020-03-31 15:59:40 +02:00 committed by GitHub
commit 9ee373fd94
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 107 additions and 30 deletions

View File

@ -1849,6 +1849,21 @@ impl<'a, 'b> Pattern<'a> for &'b String {
fn is_prefix_of(self, haystack: &'a str) -> bool {
self[..].is_prefix_of(haystack)
}
#[inline]
fn strip_prefix_of(self, haystack: &'a str) -> Option<&'a str> {
self[..].strip_prefix_of(haystack)
}
#[inline]
fn is_suffix_of(self, haystack: &'a str) -> bool {
self[..].is_suffix_of(haystack)
}
#[inline]
fn strip_suffix_of(self, haystack: &'a str) -> Option<&'a str> {
self[..].strip_suffix_of(haystack)
}
}
#[stable(feature = "rust1", since = "1.0.0")]

View File

@ -9,7 +9,7 @@
#![stable(feature = "rust1", since = "1.0.0")]
use self::pattern::Pattern;
use self::pattern::{DoubleEndedSearcher, ReverseSearcher, SearchStep, Searcher};
use self::pattern::{DoubleEndedSearcher, ReverseSearcher, Searcher};
use crate::char;
use crate::fmt::{self, Write};
@ -3986,26 +3986,15 @@ impl str {
/// ```
/// #![feature(str_strip)]
///
/// assert_eq!("foobar".strip_prefix("foo"), Some("bar"));
/// assert_eq!("foobar".strip_prefix("bar"), None);
/// assert_eq!("foo:bar".strip_prefix("foo:"), Some("bar"));
/// assert_eq!("foo:bar".strip_prefix("bar"), None);
/// assert_eq!("foofoo".strip_prefix("foo"), Some("foo"));
/// ```
#[must_use = "this returns the remaining substring as a new slice, \
without modifying the original"]
#[unstable(feature = "str_strip", reason = "newly added", issue = "67302")]
pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a str> {
let mut matcher = prefix.into_searcher(self);
if let SearchStep::Match(start, len) = matcher.next() {
debug_assert_eq!(
start, 0,
"The first search step from Searcher \
must include the first character"
);
// SAFETY: `Searcher` is known to return valid indices.
unsafe { Some(self.get_unchecked(len..)) }
} else {
None
}
prefix.strip_prefix_of(self)
}
/// Returns a string slice with the suffix removed.
@ -4020,8 +4009,8 @@ impl str {
///
/// ```
/// #![feature(str_strip)]
/// assert_eq!("barfoo".strip_suffix("foo"), Some("bar"));
/// assert_eq!("barfoo".strip_suffix("bar"), None);
/// assert_eq!("bar:foo".strip_suffix(":foo"), Some("bar"));
/// assert_eq!("bar:foo".strip_suffix("bar"), None);
/// assert_eq!("foofoo".strip_suffix("foo"), Some("foo"));
/// ```
#[must_use = "this returns the remaining substring as a new slice, \
@ -4032,19 +4021,7 @@ impl str {
P: Pattern<'a>,
<P as Pattern<'a>>::Searcher: ReverseSearcher<'a>,
{
let mut matcher = suffix.into_searcher(self);
if let SearchStep::Match(start, end) = matcher.next_back() {
debug_assert_eq!(
end,
self.len(),
"The first search step from ReverseSearcher \
must include the last character"
);
// SAFETY: `Searcher` is known to return valid indices.
unsafe { Some(self.get_unchecked(..start)) }
} else {
None
}
suffix.strip_suffix_of(self)
}
/// Returns a string slice with all suffixes that match a pattern

View File

@ -47,6 +47,22 @@ pub trait Pattern<'a>: Sized {
matches!(self.into_searcher(haystack).next(), SearchStep::Match(0, _))
}
/// Removes the pattern from the front of haystack, if it matches.
#[inline]
fn strip_prefix_of(self, haystack: &'a str) -> Option<&'a str> {
if let SearchStep::Match(start, len) = self.into_searcher(haystack).next() {
debug_assert_eq!(
start, 0,
"The first search step from Searcher \
must include the first character"
);
// SAFETY: `Searcher` is known to return valid indices.
unsafe { Some(haystack.get_unchecked(len..)) }
} else {
None
}
}
/// Checks whether the pattern matches at the back of the haystack
#[inline]
fn is_suffix_of(self, haystack: &'a str) -> bool
@ -55,6 +71,26 @@ pub trait Pattern<'a>: Sized {
{
matches!(self.into_searcher(haystack).next_back(), SearchStep::Match(_, j) if haystack.len() == j)
}
/// Removes the pattern from the back of haystack, if it matches.
#[inline]
fn strip_suffix_of(self, haystack: &'a str) -> Option<&'a str>
where
Self::Searcher: ReverseSearcher<'a>,
{
if let SearchStep::Match(start, end) = self.into_searcher(haystack).next_back() {
debug_assert_eq!(
end,
haystack.len(),
"The first search step from ReverseSearcher \
must include the last character"
);
// SAFETY: `Searcher` is known to return valid indices.
unsafe { Some(haystack.get_unchecked(..start)) }
} else {
None
}
}
}
// Searcher
@ -448,6 +484,11 @@ impl<'a> Pattern<'a> for char {
self.encode_utf8(&mut [0u8; 4]).is_prefix_of(haystack)
}
#[inline]
fn strip_prefix_of(self, haystack: &'a str) -> Option<&'a str> {
self.encode_utf8(&mut [0u8; 4]).strip_prefix_of(haystack)
}
#[inline]
fn is_suffix_of(self, haystack: &'a str) -> bool
where
@ -455,6 +496,14 @@ impl<'a> Pattern<'a> for char {
{
self.encode_utf8(&mut [0u8; 4]).is_suffix_of(haystack)
}
#[inline]
fn strip_suffix_of(self, haystack: &'a str) -> Option<&'a str>
where
Self::Searcher: ReverseSearcher<'a>,
{
self.encode_utf8(&mut [0u8; 4]).strip_suffix_of(haystack)
}
}
/////////////////////////////////////////////////////////////////////////////
@ -569,6 +618,11 @@ macro_rules! pattern_methods {
($pmap)(self).is_prefix_of(haystack)
}
#[inline]
fn strip_prefix_of(self, haystack: &'a str) -> Option<&'a str> {
($pmap)(self).strip_prefix_of(haystack)
}
#[inline]
fn is_suffix_of(self, haystack: &'a str) -> bool
where
@ -576,6 +630,14 @@ macro_rules! pattern_methods {
{
($pmap)(self).is_suffix_of(haystack)
}
#[inline]
fn strip_suffix_of(self, haystack: &'a str) -> Option<&'a str>
where
$t: ReverseSearcher<'a>,
{
($pmap)(self).strip_suffix_of(haystack)
}
};
}
@ -715,11 +777,34 @@ impl<'a, 'b> Pattern<'a> for &'b str {
haystack.as_bytes().starts_with(self.as_bytes())
}
/// Removes the pattern from the front of haystack, if it matches.
#[inline]
fn strip_prefix_of(self, haystack: &'a str) -> Option<&'a str> {
if self.is_prefix_of(haystack) {
// SAFETY: prefix was just verified to exist.
unsafe { Some(haystack.get_unchecked(self.as_bytes().len()..)) }
} else {
None
}
}
/// Checks whether the pattern matches at the back of the haystack
#[inline]
fn is_suffix_of(self, haystack: &'a str) -> bool {
haystack.as_bytes().ends_with(self.as_bytes())
}
/// Removes the pattern from the back of haystack, if it matches.
#[inline]
fn strip_suffix_of(self, haystack: &'a str) -> Option<&'a str> {
if self.is_suffix_of(haystack) {
let i = haystack.len() - self.as_bytes().len();
// SAFETY: suffix was just verified to exist.
unsafe { Some(haystack.get_unchecked(..i)) }
} else {
None
}
}
}
/////////////////////////////////////////////////////////////////////////////