path2: Implement .as_display_str() and .to_display_str()

These functions are for working with a string representation of the path
even if it's not UTF-8 encoded. They replace invalid UTF-8 sequences
with the replacement char.
This commit is contained in:
Kevin Ballard 2013-09-26 02:10:16 -07:00
parent 1dfe5088d8
commit 33adf6dd6e
3 changed files with 94 additions and 1 deletions

View File

@ -16,7 +16,7 @@ use clone::Clone;
use iter::Iterator;
use option::{Option, None, Some};
use str;
use str::StrSlice;
use str::{OwnedStr, Str, StrSlice};
use vec;
use vec::{CopyableVector, OwnedCopyableVector, OwnedVector};
use vec::{ImmutableEqVector, ImmutableVector};
@ -140,6 +140,51 @@ pub trait GenericPath: Clone + GenericPathUnsafe {
/// Returns the path as a byte vector
fn as_vec<'a>(&'a self) -> &'a [u8];
/// Provides the path as a string
///
/// If the path is not UTF-8, invalid sequences will be replaced with the unicode
/// replacement char. This involves allocation.
#[inline]
fn as_display_str<T>(&self, f: &fn(&str) -> T) -> T {
match self.as_str() {
Some(s) => f(s),
None => {
let s = self.to_display_str();
f(s.as_slice())
}
}
}
/// Returns the path as a string
///
/// If the path is not UTF-8, invalid sequences will be replaced with the unicode
/// replacement char. This involves allocation.
///
/// This is similar to `as_display_str()` except it will always allocate a new ~str.
fn to_display_str(&self) -> ~str {
// FIXME (#9516): Don't decode utf-8 manually here once we have a good way to do it in str
// This is a truly horrifically bad implementation, done as a functionality stopgap until
// we have a proper utf-8 decoder. I don't really want to write one here.
static REPLACEMENT_CHAR: char = '\uFFFD';
let mut v = self.as_vec();
let mut s = str::with_capacity(v.len());
while !v.is_empty() {
let w = str::utf8_char_width(v[0]);
if w == 0u {
s.push_char(REPLACEMENT_CHAR);
v = v.slice_from(1);
} else if v.len() < w || !str::is_utf8(v.slice_to(w)) {
s.push_char(REPLACEMENT_CHAR);
v = v.slice_from(1);
} else {
s.push_str(unsafe { ::cast::transmute(v.slice_to(w)) });
v = v.slice_from(w);
}
}
s
}
/// Returns the directory component of `self`, as a byte vector (with no trailing separator).
/// If `self` has no directory component, returns ['.'].
fn dirname<'a>(&'a self) -> &'a [u8];

View File

@ -597,6 +597,32 @@ mod tests {
})
}
#[test]
fn test_display_str() {
assert_eq!(Path::from_str("foo").to_display_str(), ~"foo");
assert_eq!(Path::from_vec(b!("foo", 0x80)).to_display_str(), ~"foo\uFFFD");
assert_eq!(Path::from_vec(b!("foo", 0xff, "bar")).to_display_str(), ~"foo\uFFFDbar");
let mut called = false;
do Path::from_str("foo").as_display_str |s| {
assert_eq!(s, "foo");
called = true;
};
assert!(called);
called = false;
do Path::from_vec(b!("foo", 0x80)).as_display_str |s| {
assert_eq!(s, "foo\uFFFD");
called = true;
};
assert!(called);
called = false;
do Path::from_vec(b!("foo", 0xff, "bar")).as_display_str |s| {
assert_eq!(s, "foo\uFFFDbar");
called = true;
};
assert!(called);
}
#[test]
fn test_components() {
macro_rules! t(

View File

@ -349,6 +349,16 @@ impl GenericPath for Path {
self.repr.as_bytes()
}
#[inline]
fn as_display_str<T>(&self, f: &fn(&str) -> T) -> T {
f(self.repr.as_slice())
}
#[inline]
fn to_display_str(&self) -> ~str {
self.repr.clone()
}
#[inline]
fn dirname<'a>(&'a self) -> &'a [u8] {
self.dirname_str().unwrap().as_bytes()
@ -1339,6 +1349,18 @@ mod tests {
Path::from_vec(b!("hello", 0x80, ".txt"));
}
#[test]
fn test_display_str() {
assert_eq!(Path::from_str("foo").to_display_str(), ~"foo");
let mut called = false;
do Path::from_str("foo").as_display_str |s| {
assert_eq!(s, "foo");
called = true;
};
assert!(called);
}
#[test]
fn test_components() {
macro_rules! t(