auto merge of #12283 : kballard/rust/env-args-bytes, r=erickt
Change `os::args()` and `os::env()` to use `str::from_utf8_lossy()`. Add new functions `os::args_as_bytes()` and `os::env_as_bytes()` to retrieve the args/env as byte vectors instead. The existing methods were left returning strings because I expect that the common use-case is to want string handling. Fixes #7188.
This commit is contained in:
commit
fba32ea79f
@ -79,6 +79,7 @@ use str;
|
||||
use vec::{ImmutableVector, MutableVector};
|
||||
use vec;
|
||||
use rt::global_heap::malloc_raw;
|
||||
use unstable::raw::Slice;
|
||||
|
||||
/// The representation of a C String.
|
||||
///
|
||||
@ -169,6 +170,7 @@ impl CString {
|
||||
}
|
||||
|
||||
/// Converts the CString into a `&[u8]` without copying.
|
||||
/// Includes the terminating NUL byte.
|
||||
///
|
||||
/// # Failure
|
||||
///
|
||||
@ -177,7 +179,21 @@ impl CString {
|
||||
pub fn as_bytes<'a>(&'a self) -> &'a [u8] {
|
||||
if self.buf.is_null() { fail!("CString is null!"); }
|
||||
unsafe {
|
||||
cast::transmute((self.buf, self.len() + 1))
|
||||
cast::transmute(Slice { data: self.buf, len: self.len() + 1 })
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts the CString into a `&[u8]` without copying.
|
||||
/// Does not include the terminating NUL byte.
|
||||
///
|
||||
/// # Failure
|
||||
///
|
||||
/// Fails if the CString is null.
|
||||
#[inline]
|
||||
pub fn as_bytes_no_nul<'a>(&'a self) -> &'a [u8] {
|
||||
if self.buf.is_null() { fail!("CString is null!"); }
|
||||
unsafe {
|
||||
cast::transmute(Slice { data: self.buf, len: self.len() })
|
||||
}
|
||||
}
|
||||
|
||||
@ -189,8 +205,7 @@ impl CString {
|
||||
/// Fails if the CString is null.
|
||||
#[inline]
|
||||
pub fn as_str<'a>(&'a self) -> Option<&'a str> {
|
||||
let buf = self.as_bytes();
|
||||
let buf = buf.slice_to(buf.len()-1); // chop off the trailing NUL
|
||||
let buf = self.as_bytes_no_nul();
|
||||
str::from_utf8(buf)
|
||||
}
|
||||
|
||||
@ -417,7 +432,7 @@ mod tests {
|
||||
let expected = ["zero", "one"];
|
||||
let mut it = expected.iter();
|
||||
let result = from_c_multistring(ptr as *libc::c_char, None, |c| {
|
||||
let cbytes = c.as_bytes().slice_to(c.len());
|
||||
let cbytes = c.as_bytes_no_nul();
|
||||
assert_eq!(cbytes, it.next().unwrap().as_bytes());
|
||||
});
|
||||
assert_eq!(result, 2);
|
||||
@ -552,6 +567,17 @@ mod tests {
|
||||
assert_eq!(c_str.as_bytes(), bytes!("foo", 0xff, 0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_as_bytes_no_nul() {
|
||||
let c_str = "hello".to_c_str();
|
||||
assert_eq!(c_str.as_bytes_no_nul(), bytes!("hello"));
|
||||
let c_str = "".to_c_str();
|
||||
let exp: &[u8] = [];
|
||||
assert_eq!(c_str.as_bytes_no_nul(), exp);
|
||||
let c_str = bytes!("foo", 0xff).to_c_str();
|
||||
assert_eq!(c_str.as_bytes_no_nul(), bytes!("foo", 0xff));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_fail]
|
||||
fn test_as_bytes_fail() {
|
||||
@ -559,6 +585,13 @@ mod tests {
|
||||
c_str.as_bytes();
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_fail]
|
||||
fn test_as_bytes_no_nul_fail() {
|
||||
let c_str = unsafe { CString::new(ptr::null(), false) };
|
||||
c_str.as_bytes_no_nul();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_as_str() {
|
||||
let c_str = "hello".to_c_str();
|
||||
|
102
src/libstd/os.rs
102
src/libstd/os.rs
@ -53,6 +53,8 @@ use ptr::RawPtr;
|
||||
|
||||
#[cfg(unix)]
|
||||
use c_str::ToCStr;
|
||||
#[cfg(windows)]
|
||||
use str::OwnedStr;
|
||||
|
||||
/// Delegates to the libc close() function, returning the same return value.
|
||||
pub fn close(fd: int) -> int {
|
||||
@ -158,10 +160,23 @@ fn with_env_lock<T>(f: || -> T) -> T {
|
||||
|
||||
/// Returns a vector of (variable, value) pairs for all the environment
|
||||
/// variables of the current process.
|
||||
///
|
||||
/// Invalid UTF-8 bytes are replaced with \uFFFD. See `str::from_utf8_lossy()`
|
||||
/// for details.
|
||||
pub fn env() -> ~[(~str,~str)] {
|
||||
env_as_bytes().move_iter().map(|(k,v)| {
|
||||
let k = str::from_utf8_lossy(k).into_owned();
|
||||
let v = str::from_utf8_lossy(v).into_owned();
|
||||
(k,v)
|
||||
}).collect()
|
||||
}
|
||||
|
||||
/// Returns a vector of (variable, value) byte-vector pairs for all the
|
||||
/// environment variables of the current process.
|
||||
pub fn env_as_bytes() -> ~[(~[u8],~[u8])] {
|
||||
unsafe {
|
||||
#[cfg(windows)]
|
||||
unsafe fn get_env_pairs() -> ~[~str] {
|
||||
unsafe fn get_env_pairs() -> ~[~[u8]] {
|
||||
use c_str;
|
||||
use str::StrSlice;
|
||||
|
||||
@ -176,13 +191,15 @@ pub fn env() -> ~[(~str,~str)] {
|
||||
}
|
||||
let mut result = ~[];
|
||||
c_str::from_c_multistring(ch as *c_char, None, |cstr| {
|
||||
result.push(cstr.as_str().unwrap().to_owned());
|
||||
result.push(cstr.as_bytes_no_nul().to_owned());
|
||||
});
|
||||
FreeEnvironmentStringsA(ch);
|
||||
result
|
||||
}
|
||||
#[cfg(unix)]
|
||||
unsafe fn get_env_pairs() -> ~[~str] {
|
||||
unsafe fn get_env_pairs() -> ~[~[u8]] {
|
||||
use c_str::CString;
|
||||
|
||||
extern {
|
||||
fn rust_env_pairs() -> **c_char;
|
||||
}
|
||||
@ -193,20 +210,19 @@ pub fn env() -> ~[(~str,~str)] {
|
||||
}
|
||||
let mut result = ~[];
|
||||
ptr::array_each(environ, |e| {
|
||||
let env_pair = str::raw::from_c_str(e);
|
||||
debug!("get_env_pairs: {}", env_pair);
|
||||
let env_pair = CString::new(e, false).as_bytes_no_nul().to_owned();
|
||||
result.push(env_pair);
|
||||
});
|
||||
result
|
||||
}
|
||||
|
||||
fn env_convert(input: ~[~str]) -> ~[(~str, ~str)] {
|
||||
fn env_convert(input: ~[~[u8]]) -> ~[(~[u8], ~[u8])] {
|
||||
let mut pairs = ~[];
|
||||
for p in input.iter() {
|
||||
let vs: ~[&str] = p.splitn('=', 1).collect();
|
||||
debug!("splitting: len: {}", vs.len());
|
||||
assert_eq!(vs.len(), 2);
|
||||
pairs.push((vs[0].to_owned(), vs[1].to_owned()));
|
||||
let vs: ~[&[u8]] = p.splitn(1, |b| *b == '=' as u8).collect();
|
||||
let key = vs[0].to_owned();
|
||||
let val = (if vs.len() < 2 { ~[] } else { vs[1].to_owned() });
|
||||
pairs.push((key, val));
|
||||
}
|
||||
pairs
|
||||
}
|
||||
@ -220,14 +236,34 @@ pub fn env() -> ~[(~str,~str)] {
|
||||
#[cfg(unix)]
|
||||
/// Fetches the environment variable `n` from the current process, returning
|
||||
/// None if the variable isn't set.
|
||||
///
|
||||
/// Any invalid UTF-8 bytes in the value are replaced by \uFFFD. See
|
||||
/// `str::from_utf8_lossy()` for details.
|
||||
///
|
||||
/// # Failure
|
||||
///
|
||||
/// Fails if `n` has any interior NULs.
|
||||
pub fn getenv(n: &str) -> Option<~str> {
|
||||
getenv_as_bytes(n).map(|v| str::from_utf8_lossy(v).into_owned())
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
/// Fetches the environment variable `n` byte vector from the current process,
|
||||
/// returning None if the variable isn't set.
|
||||
///
|
||||
/// # Failure
|
||||
///
|
||||
/// Fails if `n` has any interior NULs.
|
||||
pub fn getenv_as_bytes(n: &str) -> Option<~[u8]> {
|
||||
use c_str::CString;
|
||||
|
||||
unsafe {
|
||||
with_env_lock(|| {
|
||||
let s = n.with_c_str(|buf| libc::getenv(buf));
|
||||
if s.is_null() {
|
||||
None
|
||||
} else {
|
||||
Some(str::raw::from_c_str(s))
|
||||
Some(CString::new(s, false).as_bytes_no_nul().to_owned())
|
||||
}
|
||||
})
|
||||
}
|
||||
@ -249,10 +285,21 @@ pub fn getenv(n: &str) -> Option<~str> {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
/// Fetches the environment variable `n` byte vector from the current process,
|
||||
/// returning None if the variable isn't set.
|
||||
pub fn getenv_as_bytes(n: &str) -> Option<~[u8]> {
|
||||
getenv(n).map(|s| s.into_bytes())
|
||||
}
|
||||
|
||||
|
||||
#[cfg(unix)]
|
||||
/// Sets the environment variable `n` to the value `v` for the currently running
|
||||
/// process
|
||||
///
|
||||
/// # Failure
|
||||
///
|
||||
/// Fails if `n` or `v` have any interior NULs.
|
||||
pub fn setenv(n: &str, v: &str) {
|
||||
unsafe {
|
||||
with_env_lock(|| {
|
||||
@ -283,6 +330,10 @@ pub fn setenv(n: &str, v: &str) {
|
||||
}
|
||||
|
||||
/// Remove a variable from the environment entirely
|
||||
///
|
||||
/// # Failure
|
||||
///
|
||||
/// Fails (on unix) if `n` has any interior NULs.
|
||||
pub fn unsetenv(n: &str) {
|
||||
#[cfg(unix)]
|
||||
fn _unsetenv(n: &str) {
|
||||
@ -722,10 +773,12 @@ pub fn get_exit_status() -> int {
|
||||
}
|
||||
|
||||
#[cfg(target_os = "macos")]
|
||||
unsafe fn load_argc_and_argv(argc: int, argv: **c_char) -> ~[~str] {
|
||||
unsafe fn load_argc_and_argv(argc: int, argv: **c_char) -> ~[~[u8]] {
|
||||
use c_str::CString;
|
||||
|
||||
let mut args = ~[];
|
||||
for i in range(0u, argc as uint) {
|
||||
args.push(str::raw::from_c_str(*argv.offset(i as int)));
|
||||
args.push(CString::new(*argv.offset(i as int), false).as_bytes_no_nul().to_owned())
|
||||
}
|
||||
args
|
||||
}
|
||||
@ -736,7 +789,7 @@ unsafe fn load_argc_and_argv(argc: int, argv: **c_char) -> ~[~str] {
|
||||
* Returns a list of the command line arguments.
|
||||
*/
|
||||
#[cfg(target_os = "macos")]
|
||||
fn real_args() -> ~[~str] {
|
||||
fn real_args_as_bytes() -> ~[~[u8]] {
|
||||
unsafe {
|
||||
let (argc, argv) = (*_NSGetArgc() as int,
|
||||
*_NSGetArgv() as **c_char);
|
||||
@ -747,7 +800,7 @@ fn real_args() -> ~[~str] {
|
||||
#[cfg(target_os = "linux")]
|
||||
#[cfg(target_os = "android")]
|
||||
#[cfg(target_os = "freebsd")]
|
||||
fn real_args() -> ~[~str] {
|
||||
fn real_args_as_bytes() -> ~[~[u8]] {
|
||||
use rt;
|
||||
|
||||
match rt::args::clone() {
|
||||
@ -756,6 +809,11 @@ fn real_args() -> ~[~str] {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(windows))]
|
||||
fn real_args() -> ~[~str] {
|
||||
real_args_as_bytes().move_iter().map(|v| str::from_utf8_lossy(v).into_owned()).collect()
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
fn real_args() -> ~[~str] {
|
||||
use vec;
|
||||
@ -786,6 +844,11 @@ fn real_args() -> ~[~str] {
|
||||
return args;
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
fn real_args_as_bytes() -> ~[~[u8]] {
|
||||
real_args().move_iter().map(|s| s.into_bytes()).collect()
|
||||
}
|
||||
|
||||
type LPCWSTR = *u16;
|
||||
|
||||
#[cfg(windows)]
|
||||
@ -803,10 +866,19 @@ extern "system" {
|
||||
|
||||
/// Returns the arguments which this program was started with (normally passed
|
||||
/// via the command line).
|
||||
///
|
||||
/// The arguments are interpreted as utf-8, with invalid bytes replaced with \uFFFD.
|
||||
/// See `str::from_utf8_lossy` for details.
|
||||
pub fn args() -> ~[~str] {
|
||||
real_args()
|
||||
}
|
||||
|
||||
/// Returns the arguments which this program was started with (normally passed
|
||||
/// via the command line) as byte vectors.
|
||||
pub fn args_as_bytes() -> ~[~[u8]] {
|
||||
real_args_as_bytes()
|
||||
}
|
||||
|
||||
#[cfg(target_os = "macos")]
|
||||
extern {
|
||||
// These functions are in crt_externs.h.
|
||||
|
@ -578,8 +578,7 @@ impl BytesContainer for ~[u8] {
|
||||
impl BytesContainer for CString {
|
||||
#[inline]
|
||||
fn container_as_bytes<'a>(&'a self) -> &'a [u8] {
|
||||
let s = self.as_bytes();
|
||||
s.slice_to(s.len()-1)
|
||||
self.as_bytes_no_nul()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -36,8 +36,8 @@ pub unsafe fn init(argc: int, argv: **u8) { realargs::init(argc, argv) }
|
||||
#[cfg(test)] pub unsafe fn cleanup() { realargs::cleanup() }
|
||||
|
||||
/// Take the global arguments from global storage.
|
||||
#[cfg(not(test))] pub fn take() -> Option<~[~str]> { imp::take() }
|
||||
#[cfg(test)] pub fn take() -> Option<~[~str]> {
|
||||
#[cfg(not(test))] pub fn take() -> Option<~[~[u8]]> { imp::take() }
|
||||
#[cfg(test)] pub fn take() -> Option<~[~[u8]]> {
|
||||
match realargs::take() {
|
||||
realstd::option::Some(a) => Some(a),
|
||||
realstd::option::None => None,
|
||||
@ -47,12 +47,12 @@ pub unsafe fn init(argc: int, argv: **u8) { realargs::init(argc, argv) }
|
||||
/// Give the global arguments to global storage.
|
||||
///
|
||||
/// It is an error if the arguments already exist.
|
||||
#[cfg(not(test))] pub fn put(args: ~[~str]) { imp::put(args) }
|
||||
#[cfg(test)] pub fn put(args: ~[~str]) { realargs::put(args) }
|
||||
#[cfg(not(test))] pub fn put(args: ~[~[u8]]) { imp::put(args) }
|
||||
#[cfg(test)] pub fn put(args: ~[~[u8]]) { realargs::put(args) }
|
||||
|
||||
/// Make a clone of the global arguments.
|
||||
#[cfg(not(test))] pub fn clone() -> Option<~[~str]> { imp::clone() }
|
||||
#[cfg(test)] pub fn clone() -> Option<~[~str]> {
|
||||
#[cfg(not(test))] pub fn clone() -> Option<~[~[u8]]> { imp::clone() }
|
||||
#[cfg(test)] pub fn clone() -> Option<~[~[u8]]> {
|
||||
match realargs::clone() {
|
||||
realstd::option::Some(a) => Some(a),
|
||||
realstd::option::None => None,
|
||||
@ -65,15 +65,12 @@ pub unsafe fn init(argc: int, argv: **u8) { realargs::init(argc, argv) }
|
||||
mod imp {
|
||||
use cast;
|
||||
use clone::Clone;
|
||||
#[cfg(not(test))] use libc;
|
||||
use option::{Option, Some, None};
|
||||
use ptr::RawPtr;
|
||||
use iter::Iterator;
|
||||
#[cfg(not(test))] use str;
|
||||
use unstable::finally::Finally;
|
||||
use unstable::mutex::{Mutex, MUTEX_INIT};
|
||||
use mem;
|
||||
#[cfg(not(test))] use vec;
|
||||
|
||||
static mut global_args_ptr: uint = 0;
|
||||
static mut lock: Mutex = MUTEX_INIT;
|
||||
@ -90,15 +87,15 @@ mod imp {
|
||||
lock.destroy();
|
||||
}
|
||||
|
||||
pub fn take() -> Option<~[~str]> {
|
||||
pub fn take() -> Option<~[~[u8]]> {
|
||||
with_lock(|| unsafe {
|
||||
let ptr = get_global_ptr();
|
||||
let val = mem::replace(&mut *ptr, None);
|
||||
val.as_ref().map(|s: &~~[~str]| (**s).clone())
|
||||
val.as_ref().map(|s: &~~[~[u8]]| (**s).clone())
|
||||
})
|
||||
}
|
||||
|
||||
pub fn put(args: ~[~str]) {
|
||||
pub fn put(args: ~[~[u8]]) {
|
||||
with_lock(|| unsafe {
|
||||
let ptr = get_global_ptr();
|
||||
rtassert!((*ptr).is_none());
|
||||
@ -106,10 +103,10 @@ mod imp {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn clone() -> Option<~[~str]> {
|
||||
pub fn clone() -> Option<~[~[u8]]> {
|
||||
with_lock(|| unsafe {
|
||||
let ptr = get_global_ptr();
|
||||
(*ptr).as_ref().map(|s: &~~[~str]| (**s).clone())
|
||||
(*ptr).as_ref().map(|s: &~~[~[u8]]| (**s).clone())
|
||||
})
|
||||
}
|
||||
|
||||
@ -126,15 +123,20 @@ mod imp {
|
||||
})
|
||||
}
|
||||
|
||||
fn get_global_ptr() -> *mut Option<~~[~str]> {
|
||||
fn get_global_ptr() -> *mut Option<~~[~[u8]]> {
|
||||
unsafe { cast::transmute(&global_args_ptr) }
|
||||
}
|
||||
|
||||
// Copied from `os`.
|
||||
#[cfg(not(test))]
|
||||
unsafe fn load_argc_and_argv(argc: int, argv: **u8) -> ~[~str] {
|
||||
unsafe fn load_argc_and_argv(argc: int, argv: **u8) -> ~[~[u8]] {
|
||||
use c_str::CString;
|
||||
use {vec, libc};
|
||||
use vec::CloneableVector;
|
||||
|
||||
vec::from_fn(argc as uint, |i| {
|
||||
str::raw::from_c_str(*(argv as **libc::c_char).offset(i as int))
|
||||
let cs = CString::new(*(argv as **libc::c_char).offset(i as int), false);
|
||||
cs.as_bytes_no_nul().to_owned()
|
||||
})
|
||||
}
|
||||
|
||||
@ -149,7 +151,7 @@ mod imp {
|
||||
// Preserve the actual global state.
|
||||
let saved_value = take();
|
||||
|
||||
let expected = ~[~"happy", ~"today?"];
|
||||
let expected = ~[bytes!("happy").to_owned(), bytes!("today?").to_owned()];
|
||||
|
||||
put(expected.clone());
|
||||
assert!(clone() == Some(expected.clone()));
|
||||
@ -179,15 +181,15 @@ mod imp {
|
||||
pub fn cleanup() {
|
||||
}
|
||||
|
||||
pub fn take() -> Option<~[~str]> {
|
||||
pub fn take() -> Option<~[~[u8]]> {
|
||||
fail!()
|
||||
}
|
||||
|
||||
pub fn put(_args: ~[~str]) {
|
||||
pub fn put(_args: ~[~[u8]]) {
|
||||
fail!()
|
||||
}
|
||||
|
||||
pub fn clone() -> Option<~[~str]> {
|
||||
pub fn clone() -> Option<~[~[u8]]> {
|
||||
fail!()
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user