diff --git a/src/libstd/c_str.rs b/src/libstd/c_str.rs index fe332a60efa..adbd4be316c 100644 --- a/src/libstd/c_str.rs +++ b/src/libstd/c_str.rs @@ -79,6 +79,7 @@ use str; use vec::{ImmutableVector, MutableVector}; use vec; use rt::global_heap::malloc_raw; +use unstable::raw::Slice; /// The representation of a C String. /// @@ -169,6 +170,7 @@ impl CString { } /// Converts the CString into a `&[u8]` without copying. + /// Includes the terminating NUL byte. /// /// # Failure /// @@ -177,7 +179,21 @@ impl CString { pub fn as_bytes<'a>(&'a self) -> &'a [u8] { if self.buf.is_null() { fail!("CString is null!"); } unsafe { - cast::transmute((self.buf, self.len() + 1)) + cast::transmute(Slice { data: self.buf, len: self.len() + 1 }) + } + } + + /// Converts the CString into a `&[u8]` without copying. + /// Does not include the terminating NUL byte. + /// + /// # Failure + /// + /// Fails if the CString is null. + #[inline] + pub fn as_bytes_no_nul<'a>(&'a self) -> &'a [u8] { + if self.buf.is_null() { fail!("CString is null!"); } + unsafe { + cast::transmute(Slice { data: self.buf, len: self.len() }) } } @@ -189,8 +205,7 @@ impl CString { /// Fails if the CString is null. #[inline] pub fn as_str<'a>(&'a self) -> Option<&'a str> { - let buf = self.as_bytes(); - let buf = buf.slice_to(buf.len()-1); // chop off the trailing NUL + let buf = self.as_bytes_no_nul(); str::from_utf8(buf) } @@ -417,7 +432,7 @@ mod tests { let expected = ["zero", "one"]; let mut it = expected.iter(); let result = from_c_multistring(ptr as *libc::c_char, None, |c| { - let cbytes = c.as_bytes().slice_to(c.len()); + let cbytes = c.as_bytes_no_nul(); assert_eq!(cbytes, it.next().unwrap().as_bytes()); }); assert_eq!(result, 2); @@ -552,6 +567,17 @@ mod tests { assert_eq!(c_str.as_bytes(), bytes!("foo", 0xff, 0)); } + #[test] + fn test_as_bytes_no_nul() { + let c_str = "hello".to_c_str(); + assert_eq!(c_str.as_bytes_no_nul(), bytes!("hello")); + let c_str = "".to_c_str(); + let exp: &[u8] = []; + assert_eq!(c_str.as_bytes_no_nul(), exp); + let c_str = bytes!("foo", 0xff).to_c_str(); + assert_eq!(c_str.as_bytes_no_nul(), bytes!("foo", 0xff)); + } + #[test] #[should_fail] fn test_as_bytes_fail() { @@ -559,6 +585,13 @@ mod tests { c_str.as_bytes(); } + #[test] + #[should_fail] + fn test_as_bytes_no_nul_fail() { + let c_str = unsafe { CString::new(ptr::null(), false) }; + c_str.as_bytes_no_nul(); + } + #[test] fn test_as_str() { let c_str = "hello".to_c_str(); diff --git a/src/libstd/os.rs b/src/libstd/os.rs index 78cae296457..20d1ae2f3e2 100644 --- a/src/libstd/os.rs +++ b/src/libstd/os.rs @@ -53,6 +53,8 @@ use ptr::RawPtr; #[cfg(unix)] use c_str::ToCStr; +#[cfg(windows)] +use str::OwnedStr; /// Delegates to the libc close() function, returning the same return value. pub fn close(fd: int) -> int { @@ -158,10 +160,23 @@ fn with_env_lock(f: || -> T) -> T { /// Returns a vector of (variable, value) pairs for all the environment /// variables of the current process. +/// +/// Invalid UTF-8 bytes are replaced with \uFFFD. See `str::from_utf8_lossy()` +/// for details. pub fn env() -> ~[(~str,~str)] { + env_as_bytes().move_iter().map(|(k,v)| { + let k = str::from_utf8_lossy(k).into_owned(); + let v = str::from_utf8_lossy(v).into_owned(); + (k,v) + }).collect() +} + +/// Returns a vector of (variable, value) byte-vector pairs for all the +/// environment variables of the current process. +pub fn env_as_bytes() -> ~[(~[u8],~[u8])] { unsafe { #[cfg(windows)] - unsafe fn get_env_pairs() -> ~[~str] { + unsafe fn get_env_pairs() -> ~[~[u8]] { use c_str; use str::StrSlice; @@ -176,13 +191,15 @@ pub fn env() -> ~[(~str,~str)] { } let mut result = ~[]; c_str::from_c_multistring(ch as *c_char, None, |cstr| { - result.push(cstr.as_str().unwrap().to_owned()); + result.push(cstr.as_bytes_no_nul().to_owned()); }); FreeEnvironmentStringsA(ch); result } #[cfg(unix)] - unsafe fn get_env_pairs() -> ~[~str] { + unsafe fn get_env_pairs() -> ~[~[u8]] { + use c_str::CString; + extern { fn rust_env_pairs() -> **c_char; } @@ -193,20 +210,19 @@ pub fn env() -> ~[(~str,~str)] { } let mut result = ~[]; ptr::array_each(environ, |e| { - let env_pair = str::raw::from_c_str(e); - debug!("get_env_pairs: {}", env_pair); + let env_pair = CString::new(e, false).as_bytes_no_nul().to_owned(); result.push(env_pair); }); result } - fn env_convert(input: ~[~str]) -> ~[(~str, ~str)] { + fn env_convert(input: ~[~[u8]]) -> ~[(~[u8], ~[u8])] { let mut pairs = ~[]; for p in input.iter() { - let vs: ~[&str] = p.splitn('=', 1).collect(); - debug!("splitting: len: {}", vs.len()); - assert_eq!(vs.len(), 2); - pairs.push((vs[0].to_owned(), vs[1].to_owned())); + let vs: ~[&[u8]] = p.splitn(1, |b| *b == '=' as u8).collect(); + let key = vs[0].to_owned(); + let val = (if vs.len() < 2 { ~[] } else { vs[1].to_owned() }); + pairs.push((key, val)); } pairs } @@ -220,14 +236,34 @@ pub fn env() -> ~[(~str,~str)] { #[cfg(unix)] /// Fetches the environment variable `n` from the current process, returning /// None if the variable isn't set. +/// +/// Any invalid UTF-8 bytes in the value are replaced by \uFFFD. See +/// `str::from_utf8_lossy()` for details. +/// +/// # Failure +/// +/// Fails if `n` has any interior NULs. pub fn getenv(n: &str) -> Option<~str> { + getenv_as_bytes(n).map(|v| str::from_utf8_lossy(v).into_owned()) +} + +#[cfg(unix)] +/// Fetches the environment variable `n` byte vector from the current process, +/// returning None if the variable isn't set. +/// +/// # Failure +/// +/// Fails if `n` has any interior NULs. +pub fn getenv_as_bytes(n: &str) -> Option<~[u8]> { + use c_str::CString; + unsafe { with_env_lock(|| { let s = n.with_c_str(|buf| libc::getenv(buf)); if s.is_null() { None } else { - Some(str::raw::from_c_str(s)) + Some(CString::new(s, false).as_bytes_no_nul().to_owned()) } }) } @@ -249,10 +285,21 @@ pub fn getenv(n: &str) -> Option<~str> { } } +#[cfg(windows)] +/// Fetches the environment variable `n` byte vector from the current process, +/// returning None if the variable isn't set. +pub fn getenv_as_bytes(n: &str) -> Option<~[u8]> { + getenv(n).map(|s| s.into_bytes()) +} + #[cfg(unix)] /// Sets the environment variable `n` to the value `v` for the currently running /// process +/// +/// # Failure +/// +/// Fails if `n` or `v` have any interior NULs. pub fn setenv(n: &str, v: &str) { unsafe { with_env_lock(|| { @@ -283,6 +330,10 @@ pub fn setenv(n: &str, v: &str) { } /// Remove a variable from the environment entirely +/// +/// # Failure +/// +/// Fails (on unix) if `n` has any interior NULs. pub fn unsetenv(n: &str) { #[cfg(unix)] fn _unsetenv(n: &str) { @@ -722,10 +773,12 @@ pub fn get_exit_status() -> int { } #[cfg(target_os = "macos")] -unsafe fn load_argc_and_argv(argc: int, argv: **c_char) -> ~[~str] { +unsafe fn load_argc_and_argv(argc: int, argv: **c_char) -> ~[~[u8]] { + use c_str::CString; + let mut args = ~[]; for i in range(0u, argc as uint) { - args.push(str::raw::from_c_str(*argv.offset(i as int))); + args.push(CString::new(*argv.offset(i as int), false).as_bytes_no_nul().to_owned()) } args } @@ -736,7 +789,7 @@ unsafe fn load_argc_and_argv(argc: int, argv: **c_char) -> ~[~str] { * Returns a list of the command line arguments. */ #[cfg(target_os = "macos")] -fn real_args() -> ~[~str] { +fn real_args_as_bytes() -> ~[~[u8]] { unsafe { let (argc, argv) = (*_NSGetArgc() as int, *_NSGetArgv() as **c_char); @@ -747,7 +800,7 @@ fn real_args() -> ~[~str] { #[cfg(target_os = "linux")] #[cfg(target_os = "android")] #[cfg(target_os = "freebsd")] -fn real_args() -> ~[~str] { +fn real_args_as_bytes() -> ~[~[u8]] { use rt; match rt::args::clone() { @@ -756,6 +809,11 @@ fn real_args() -> ~[~str] { } } +#[cfg(not(windows))] +fn real_args() -> ~[~str] { + real_args_as_bytes().move_iter().map(|v| str::from_utf8_lossy(v).into_owned()).collect() +} + #[cfg(windows)] fn real_args() -> ~[~str] { use vec; @@ -786,6 +844,11 @@ fn real_args() -> ~[~str] { return args; } +#[cfg(windows)] +fn real_args_as_bytes() -> ~[~[u8]] { + real_args().move_iter().map(|s| s.into_bytes()).collect() +} + type LPCWSTR = *u16; #[cfg(windows)] @@ -803,10 +866,19 @@ extern "system" { /// Returns the arguments which this program was started with (normally passed /// via the command line). +/// +/// The arguments are interpreted as utf-8, with invalid bytes replaced with \uFFFD. +/// See `str::from_utf8_lossy` for details. pub fn args() -> ~[~str] { real_args() } +/// Returns the arguments which this program was started with (normally passed +/// via the command line) as byte vectors. +pub fn args_as_bytes() -> ~[~[u8]] { + real_args_as_bytes() +} + #[cfg(target_os = "macos")] extern { // These functions are in crt_externs.h. diff --git a/src/libstd/path/mod.rs b/src/libstd/path/mod.rs index ed0ce201750..13496033fd0 100644 --- a/src/libstd/path/mod.rs +++ b/src/libstd/path/mod.rs @@ -578,8 +578,7 @@ impl BytesContainer for ~[u8] { impl BytesContainer for CString { #[inline] fn container_as_bytes<'a>(&'a self) -> &'a [u8] { - let s = self.as_bytes(); - s.slice_to(s.len()-1) + self.as_bytes_no_nul() } } diff --git a/src/libstd/rt/args.rs b/src/libstd/rt/args.rs index cef03d66923..c417ea375fd 100644 --- a/src/libstd/rt/args.rs +++ b/src/libstd/rt/args.rs @@ -36,8 +36,8 @@ pub unsafe fn init(argc: int, argv: **u8) { realargs::init(argc, argv) } #[cfg(test)] pub unsafe fn cleanup() { realargs::cleanup() } /// Take the global arguments from global storage. -#[cfg(not(test))] pub fn take() -> Option<~[~str]> { imp::take() } -#[cfg(test)] pub fn take() -> Option<~[~str]> { +#[cfg(not(test))] pub fn take() -> Option<~[~[u8]]> { imp::take() } +#[cfg(test)] pub fn take() -> Option<~[~[u8]]> { match realargs::take() { realstd::option::Some(a) => Some(a), realstd::option::None => None, @@ -47,12 +47,12 @@ pub unsafe fn init(argc: int, argv: **u8) { realargs::init(argc, argv) } /// Give the global arguments to global storage. /// /// It is an error if the arguments already exist. -#[cfg(not(test))] pub fn put(args: ~[~str]) { imp::put(args) } -#[cfg(test)] pub fn put(args: ~[~str]) { realargs::put(args) } +#[cfg(not(test))] pub fn put(args: ~[~[u8]]) { imp::put(args) } +#[cfg(test)] pub fn put(args: ~[~[u8]]) { realargs::put(args) } /// Make a clone of the global arguments. -#[cfg(not(test))] pub fn clone() -> Option<~[~str]> { imp::clone() } -#[cfg(test)] pub fn clone() -> Option<~[~str]> { +#[cfg(not(test))] pub fn clone() -> Option<~[~[u8]]> { imp::clone() } +#[cfg(test)] pub fn clone() -> Option<~[~[u8]]> { match realargs::clone() { realstd::option::Some(a) => Some(a), realstd::option::None => None, @@ -65,15 +65,12 @@ pub unsafe fn init(argc: int, argv: **u8) { realargs::init(argc, argv) } mod imp { use cast; use clone::Clone; - #[cfg(not(test))] use libc; use option::{Option, Some, None}; use ptr::RawPtr; use iter::Iterator; - #[cfg(not(test))] use str; use unstable::finally::Finally; use unstable::mutex::{Mutex, MUTEX_INIT}; use mem; - #[cfg(not(test))] use vec; static mut global_args_ptr: uint = 0; static mut lock: Mutex = MUTEX_INIT; @@ -90,15 +87,15 @@ mod imp { lock.destroy(); } - pub fn take() -> Option<~[~str]> { + pub fn take() -> Option<~[~[u8]]> { with_lock(|| unsafe { let ptr = get_global_ptr(); let val = mem::replace(&mut *ptr, None); - val.as_ref().map(|s: &~~[~str]| (**s).clone()) + val.as_ref().map(|s: &~~[~[u8]]| (**s).clone()) }) } - pub fn put(args: ~[~str]) { + pub fn put(args: ~[~[u8]]) { with_lock(|| unsafe { let ptr = get_global_ptr(); rtassert!((*ptr).is_none()); @@ -106,10 +103,10 @@ mod imp { }) } - pub fn clone() -> Option<~[~str]> { + pub fn clone() -> Option<~[~[u8]]> { with_lock(|| unsafe { let ptr = get_global_ptr(); - (*ptr).as_ref().map(|s: &~~[~str]| (**s).clone()) + (*ptr).as_ref().map(|s: &~~[~[u8]]| (**s).clone()) }) } @@ -126,15 +123,20 @@ mod imp { }) } - fn get_global_ptr() -> *mut Option<~~[~str]> { + fn get_global_ptr() -> *mut Option<~~[~[u8]]> { unsafe { cast::transmute(&global_args_ptr) } } // Copied from `os`. #[cfg(not(test))] - unsafe fn load_argc_and_argv(argc: int, argv: **u8) -> ~[~str] { + unsafe fn load_argc_and_argv(argc: int, argv: **u8) -> ~[~[u8]] { + use c_str::CString; + use {vec, libc}; + use vec::CloneableVector; + vec::from_fn(argc as uint, |i| { - str::raw::from_c_str(*(argv as **libc::c_char).offset(i as int)) + let cs = CString::new(*(argv as **libc::c_char).offset(i as int), false); + cs.as_bytes_no_nul().to_owned() }) } @@ -149,7 +151,7 @@ mod imp { // Preserve the actual global state. let saved_value = take(); - let expected = ~[~"happy", ~"today?"]; + let expected = ~[bytes!("happy").to_owned(), bytes!("today?").to_owned()]; put(expected.clone()); assert!(clone() == Some(expected.clone())); @@ -179,15 +181,15 @@ mod imp { pub fn cleanup() { } - pub fn take() -> Option<~[~str]> { + pub fn take() -> Option<~[~[u8]]> { fail!() } - pub fn put(_args: ~[~str]) { + pub fn put(_args: ~[~[u8]]) { fail!() } - pub fn clone() -> Option<~[~str]> { + pub fn clone() -> Option<~[~[u8]]> { fail!() } }