auto merge of #12283 : kballard/rust/env-args-bytes, r=erickt

Change `os::args()` and `os::env()` to use `str::from_utf8_lossy()`.
Add new functions `os::args_as_bytes()` and `os::env_as_bytes()` to retrieve the args/env as byte vectors instead.

The existing methods were left returning strings because I expect that the common use-case is to want string handling.

Fixes #7188.
This commit is contained in:
bors 2014-02-15 02:36:27 -08:00
commit fba32ea79f
4 changed files with 148 additions and 42 deletions

View File

@ -79,6 +79,7 @@ use str;
use vec::{ImmutableVector, MutableVector};
use vec;
use rt::global_heap::malloc_raw;
use unstable::raw::Slice;
/// The representation of a C String.
///
@ -169,6 +170,7 @@ impl CString {
}
/// Converts the CString into a `&[u8]` without copying.
/// Includes the terminating NUL byte.
///
/// # Failure
///
@ -177,7 +179,21 @@ impl CString {
pub fn as_bytes<'a>(&'a self) -> &'a [u8] {
if self.buf.is_null() { fail!("CString is null!"); }
unsafe {
cast::transmute((self.buf, self.len() + 1))
cast::transmute(Slice { data: self.buf, len: self.len() + 1 })
}
}
/// Converts the CString into a `&[u8]` without copying.
/// Does not include the terminating NUL byte.
///
/// # Failure
///
/// Fails if the CString is null.
#[inline]
pub fn as_bytes_no_nul<'a>(&'a self) -> &'a [u8] {
if self.buf.is_null() { fail!("CString is null!"); }
unsafe {
cast::transmute(Slice { data: self.buf, len: self.len() })
}
}
@ -189,8 +205,7 @@ impl CString {
/// Fails if the CString is null.
#[inline]
pub fn as_str<'a>(&'a self) -> Option<&'a str> {
let buf = self.as_bytes();
let buf = buf.slice_to(buf.len()-1); // chop off the trailing NUL
let buf = self.as_bytes_no_nul();
str::from_utf8(buf)
}
@ -417,7 +432,7 @@ mod tests {
let expected = ["zero", "one"];
let mut it = expected.iter();
let result = from_c_multistring(ptr as *libc::c_char, None, |c| {
let cbytes = c.as_bytes().slice_to(c.len());
let cbytes = c.as_bytes_no_nul();
assert_eq!(cbytes, it.next().unwrap().as_bytes());
});
assert_eq!(result, 2);
@ -552,6 +567,17 @@ mod tests {
assert_eq!(c_str.as_bytes(), bytes!("foo", 0xff, 0));
}
#[test]
fn test_as_bytes_no_nul() {
let c_str = "hello".to_c_str();
assert_eq!(c_str.as_bytes_no_nul(), bytes!("hello"));
let c_str = "".to_c_str();
let exp: &[u8] = [];
assert_eq!(c_str.as_bytes_no_nul(), exp);
let c_str = bytes!("foo", 0xff).to_c_str();
assert_eq!(c_str.as_bytes_no_nul(), bytes!("foo", 0xff));
}
#[test]
#[should_fail]
fn test_as_bytes_fail() {
@ -559,6 +585,13 @@ mod tests {
c_str.as_bytes();
}
#[test]
#[should_fail]
fn test_as_bytes_no_nul_fail() {
let c_str = unsafe { CString::new(ptr::null(), false) };
c_str.as_bytes_no_nul();
}
#[test]
fn test_as_str() {
let c_str = "hello".to_c_str();

View File

@ -53,6 +53,8 @@ use ptr::RawPtr;
#[cfg(unix)]
use c_str::ToCStr;
#[cfg(windows)]
use str::OwnedStr;
/// Delegates to the libc close() function, returning the same return value.
pub fn close(fd: int) -> int {
@ -158,10 +160,23 @@ fn with_env_lock<T>(f: || -> T) -> T {
/// Returns a vector of (variable, value) pairs for all the environment
/// variables of the current process.
///
/// Invalid UTF-8 bytes are replaced with \uFFFD. See `str::from_utf8_lossy()`
/// for details.
pub fn env() -> ~[(~str,~str)] {
env_as_bytes().move_iter().map(|(k,v)| {
let k = str::from_utf8_lossy(k).into_owned();
let v = str::from_utf8_lossy(v).into_owned();
(k,v)
}).collect()
}
/// Returns a vector of (variable, value) byte-vector pairs for all the
/// environment variables of the current process.
pub fn env_as_bytes() -> ~[(~[u8],~[u8])] {
unsafe {
#[cfg(windows)]
unsafe fn get_env_pairs() -> ~[~str] {
unsafe fn get_env_pairs() -> ~[~[u8]] {
use c_str;
use str::StrSlice;
@ -176,13 +191,15 @@ pub fn env() -> ~[(~str,~str)] {
}
let mut result = ~[];
c_str::from_c_multistring(ch as *c_char, None, |cstr| {
result.push(cstr.as_str().unwrap().to_owned());
result.push(cstr.as_bytes_no_nul().to_owned());
});
FreeEnvironmentStringsA(ch);
result
}
#[cfg(unix)]
unsafe fn get_env_pairs() -> ~[~str] {
unsafe fn get_env_pairs() -> ~[~[u8]] {
use c_str::CString;
extern {
fn rust_env_pairs() -> **c_char;
}
@ -193,20 +210,19 @@ pub fn env() -> ~[(~str,~str)] {
}
let mut result = ~[];
ptr::array_each(environ, |e| {
let env_pair = str::raw::from_c_str(e);
debug!("get_env_pairs: {}", env_pair);
let env_pair = CString::new(e, false).as_bytes_no_nul().to_owned();
result.push(env_pair);
});
result
}
fn env_convert(input: ~[~str]) -> ~[(~str, ~str)] {
fn env_convert(input: ~[~[u8]]) -> ~[(~[u8], ~[u8])] {
let mut pairs = ~[];
for p in input.iter() {
let vs: ~[&str] = p.splitn('=', 1).collect();
debug!("splitting: len: {}", vs.len());
assert_eq!(vs.len(), 2);
pairs.push((vs[0].to_owned(), vs[1].to_owned()));
let vs: ~[&[u8]] = p.splitn(1, |b| *b == '=' as u8).collect();
let key = vs[0].to_owned();
let val = (if vs.len() < 2 { ~[] } else { vs[1].to_owned() });
pairs.push((key, val));
}
pairs
}
@ -220,14 +236,34 @@ pub fn env() -> ~[(~str,~str)] {
#[cfg(unix)]
/// Fetches the environment variable `n` from the current process, returning
/// None if the variable isn't set.
///
/// Any invalid UTF-8 bytes in the value are replaced by \uFFFD. See
/// `str::from_utf8_lossy()` for details.
///
/// # Failure
///
/// Fails if `n` has any interior NULs.
pub fn getenv(n: &str) -> Option<~str> {
getenv_as_bytes(n).map(|v| str::from_utf8_lossy(v).into_owned())
}
#[cfg(unix)]
/// Fetches the environment variable `n` byte vector from the current process,
/// returning None if the variable isn't set.
///
/// # Failure
///
/// Fails if `n` has any interior NULs.
pub fn getenv_as_bytes(n: &str) -> Option<~[u8]> {
use c_str::CString;
unsafe {
with_env_lock(|| {
let s = n.with_c_str(|buf| libc::getenv(buf));
if s.is_null() {
None
} else {
Some(str::raw::from_c_str(s))
Some(CString::new(s, false).as_bytes_no_nul().to_owned())
}
})
}
@ -249,10 +285,21 @@ pub fn getenv(n: &str) -> Option<~str> {
}
}
#[cfg(windows)]
/// Fetches the environment variable `n` byte vector from the current process,
/// returning None if the variable isn't set.
pub fn getenv_as_bytes(n: &str) -> Option<~[u8]> {
getenv(n).map(|s| s.into_bytes())
}
#[cfg(unix)]
/// Sets the environment variable `n` to the value `v` for the currently running
/// process
///
/// # Failure
///
/// Fails if `n` or `v` have any interior NULs.
pub fn setenv(n: &str, v: &str) {
unsafe {
with_env_lock(|| {
@ -283,6 +330,10 @@ pub fn setenv(n: &str, v: &str) {
}
/// Remove a variable from the environment entirely
///
/// # Failure
///
/// Fails (on unix) if `n` has any interior NULs.
pub fn unsetenv(n: &str) {
#[cfg(unix)]
fn _unsetenv(n: &str) {
@ -722,10 +773,12 @@ pub fn get_exit_status() -> int {
}
#[cfg(target_os = "macos")]
unsafe fn load_argc_and_argv(argc: int, argv: **c_char) -> ~[~str] {
unsafe fn load_argc_and_argv(argc: int, argv: **c_char) -> ~[~[u8]] {
use c_str::CString;
let mut args = ~[];
for i in range(0u, argc as uint) {
args.push(str::raw::from_c_str(*argv.offset(i as int)));
args.push(CString::new(*argv.offset(i as int), false).as_bytes_no_nul().to_owned())
}
args
}
@ -736,7 +789,7 @@ unsafe fn load_argc_and_argv(argc: int, argv: **c_char) -> ~[~str] {
* Returns a list of the command line arguments.
*/
#[cfg(target_os = "macos")]
fn real_args() -> ~[~str] {
fn real_args_as_bytes() -> ~[~[u8]] {
unsafe {
let (argc, argv) = (*_NSGetArgc() as int,
*_NSGetArgv() as **c_char);
@ -747,7 +800,7 @@ fn real_args() -> ~[~str] {
#[cfg(target_os = "linux")]
#[cfg(target_os = "android")]
#[cfg(target_os = "freebsd")]
fn real_args() -> ~[~str] {
fn real_args_as_bytes() -> ~[~[u8]] {
use rt;
match rt::args::clone() {
@ -756,6 +809,11 @@ fn real_args() -> ~[~str] {
}
}
#[cfg(not(windows))]
fn real_args() -> ~[~str] {
real_args_as_bytes().move_iter().map(|v| str::from_utf8_lossy(v).into_owned()).collect()
}
#[cfg(windows)]
fn real_args() -> ~[~str] {
use vec;
@ -786,6 +844,11 @@ fn real_args() -> ~[~str] {
return args;
}
#[cfg(windows)]
fn real_args_as_bytes() -> ~[~[u8]] {
real_args().move_iter().map(|s| s.into_bytes()).collect()
}
type LPCWSTR = *u16;
#[cfg(windows)]
@ -803,10 +866,19 @@ extern "system" {
/// Returns the arguments which this program was started with (normally passed
/// via the command line).
///
/// The arguments are interpreted as utf-8, with invalid bytes replaced with \uFFFD.
/// See `str::from_utf8_lossy` for details.
pub fn args() -> ~[~str] {
real_args()
}
/// Returns the arguments which this program was started with (normally passed
/// via the command line) as byte vectors.
pub fn args_as_bytes() -> ~[~[u8]] {
real_args_as_bytes()
}
#[cfg(target_os = "macos")]
extern {
// These functions are in crt_externs.h.

View File

@ -578,8 +578,7 @@ impl BytesContainer for ~[u8] {
impl BytesContainer for CString {
#[inline]
fn container_as_bytes<'a>(&'a self) -> &'a [u8] {
let s = self.as_bytes();
s.slice_to(s.len()-1)
self.as_bytes_no_nul()
}
}

View File

@ -36,8 +36,8 @@ pub unsafe fn init(argc: int, argv: **u8) { realargs::init(argc, argv) }
#[cfg(test)] pub unsafe fn cleanup() { realargs::cleanup() }
/// Take the global arguments from global storage.
#[cfg(not(test))] pub fn take() -> Option<~[~str]> { imp::take() }
#[cfg(test)] pub fn take() -> Option<~[~str]> {
#[cfg(not(test))] pub fn take() -> Option<~[~[u8]]> { imp::take() }
#[cfg(test)] pub fn take() -> Option<~[~[u8]]> {
match realargs::take() {
realstd::option::Some(a) => Some(a),
realstd::option::None => None,
@ -47,12 +47,12 @@ pub unsafe fn init(argc: int, argv: **u8) { realargs::init(argc, argv) }
/// Give the global arguments to global storage.
///
/// It is an error if the arguments already exist.
#[cfg(not(test))] pub fn put(args: ~[~str]) { imp::put(args) }
#[cfg(test)] pub fn put(args: ~[~str]) { realargs::put(args) }
#[cfg(not(test))] pub fn put(args: ~[~[u8]]) { imp::put(args) }
#[cfg(test)] pub fn put(args: ~[~[u8]]) { realargs::put(args) }
/// Make a clone of the global arguments.
#[cfg(not(test))] pub fn clone() -> Option<~[~str]> { imp::clone() }
#[cfg(test)] pub fn clone() -> Option<~[~str]> {
#[cfg(not(test))] pub fn clone() -> Option<~[~[u8]]> { imp::clone() }
#[cfg(test)] pub fn clone() -> Option<~[~[u8]]> {
match realargs::clone() {
realstd::option::Some(a) => Some(a),
realstd::option::None => None,
@ -65,15 +65,12 @@ pub unsafe fn init(argc: int, argv: **u8) { realargs::init(argc, argv) }
mod imp {
use cast;
use clone::Clone;
#[cfg(not(test))] use libc;
use option::{Option, Some, None};
use ptr::RawPtr;
use iter::Iterator;
#[cfg(not(test))] use str;
use unstable::finally::Finally;
use unstable::mutex::{Mutex, MUTEX_INIT};
use mem;
#[cfg(not(test))] use vec;
static mut global_args_ptr: uint = 0;
static mut lock: Mutex = MUTEX_INIT;
@ -90,15 +87,15 @@ mod imp {
lock.destroy();
}
pub fn take() -> Option<~[~str]> {
pub fn take() -> Option<~[~[u8]]> {
with_lock(|| unsafe {
let ptr = get_global_ptr();
let val = mem::replace(&mut *ptr, None);
val.as_ref().map(|s: &~~[~str]| (**s).clone())
val.as_ref().map(|s: &~~[~[u8]]| (**s).clone())
})
}
pub fn put(args: ~[~str]) {
pub fn put(args: ~[~[u8]]) {
with_lock(|| unsafe {
let ptr = get_global_ptr();
rtassert!((*ptr).is_none());
@ -106,10 +103,10 @@ mod imp {
})
}
pub fn clone() -> Option<~[~str]> {
pub fn clone() -> Option<~[~[u8]]> {
with_lock(|| unsafe {
let ptr = get_global_ptr();
(*ptr).as_ref().map(|s: &~~[~str]| (**s).clone())
(*ptr).as_ref().map(|s: &~~[~[u8]]| (**s).clone())
})
}
@ -126,15 +123,20 @@ mod imp {
})
}
fn get_global_ptr() -> *mut Option<~~[~str]> {
fn get_global_ptr() -> *mut Option<~~[~[u8]]> {
unsafe { cast::transmute(&global_args_ptr) }
}
// Copied from `os`.
#[cfg(not(test))]
unsafe fn load_argc_and_argv(argc: int, argv: **u8) -> ~[~str] {
unsafe fn load_argc_and_argv(argc: int, argv: **u8) -> ~[~[u8]] {
use c_str::CString;
use {vec, libc};
use vec::CloneableVector;
vec::from_fn(argc as uint, |i| {
str::raw::from_c_str(*(argv as **libc::c_char).offset(i as int))
let cs = CString::new(*(argv as **libc::c_char).offset(i as int), false);
cs.as_bytes_no_nul().to_owned()
})
}
@ -149,7 +151,7 @@ mod imp {
// Preserve the actual global state.
let saved_value = take();
let expected = ~[~"happy", ~"today?"];
let expected = ~[bytes!("happy").to_owned(), bytes!("today?").to_owned()];
put(expected.clone());
assert!(clone() == Some(expected.clone()));
@ -179,15 +181,15 @@ mod imp {
pub fn cleanup() {
}
pub fn take() -> Option<~[~str]> {
pub fn take() -> Option<~[~[u8]]> {
fail!()
}
pub fn put(_args: ~[~str]) {
pub fn put(_args: ~[~[u8]]) {
fail!()
}
pub fn clone() -> Option<~[~str]> {
pub fn clone() -> Option<~[~[u8]]> {
fail!()
}
}