From c73d5ce8ab0033d88027e6ab3db956731daf544e Mon Sep 17 00:00:00 2001 From: Kevin Ballard Date: Fri, 14 Feb 2014 14:42:40 -0800 Subject: [PATCH] Use str::from_utf8_lossy() in os::args(), add os::args_as_bytes() os::args() was using str::raw::from_c_str(), which would assert if the C-string wasn't valid UTF-8. Switch to using from_utf8_lossy() instead, and add a separate function os::args_as_bytes() that returns the ~[u8] byte-vectors instead. --- src/libstd/os.rs | 31 ++++++++++++++++++++++++++---- src/libstd/rt/args.rs | 44 ++++++++++++++++++++++--------------------- 2 files changed, 50 insertions(+), 25 deletions(-) diff --git a/src/libstd/os.rs b/src/libstd/os.rs index 78cae296457..69d51025234 100644 --- a/src/libstd/os.rs +++ b/src/libstd/os.rs @@ -53,6 +53,8 @@ use ptr::RawPtr; #[cfg(unix)] use c_str::ToCStr; +#[cfg(windows)] +use str::OwnedStr; /// Delegates to the libc close() function, returning the same return value. pub fn close(fd: int) -> int { @@ -722,10 +724,12 @@ pub fn get_exit_status() -> int { } #[cfg(target_os = "macos")] -unsafe fn load_argc_and_argv(argc: int, argv: **c_char) -> ~[~str] { +unsafe fn load_argc_and_argv(argc: int, argv: **c_char) -> ~[~[u8]] { + use c_str::CString; + let mut args = ~[]; for i in range(0u, argc as uint) { - args.push(str::raw::from_c_str(*argv.offset(i as int))); + args.push(CString::new(*argv.offset(i as int), false).as_bytes_no_nul().to_owned()) } args } @@ -736,7 +740,7 @@ unsafe fn load_argc_and_argv(argc: int, argv: **c_char) -> ~[~str] { * Returns a list of the command line arguments. */ #[cfg(target_os = "macos")] -fn real_args() -> ~[~str] { +fn real_args_as_bytes() -> ~[~[u8]] { unsafe { let (argc, argv) = (*_NSGetArgc() as int, *_NSGetArgv() as **c_char); @@ -747,7 +751,7 @@ fn real_args() -> ~[~str] { #[cfg(target_os = "linux")] #[cfg(target_os = "android")] #[cfg(target_os = "freebsd")] -fn real_args() -> ~[~str] { +fn real_args_as_bytes() -> ~[~[u8]] { use rt; match rt::args::clone() { @@ -756,6 +760,11 @@ fn real_args() -> ~[~str] { } } +#[cfg(not(windows))] +fn real_args() -> ~[~str] { + real_args_as_bytes().move_iter().map(|v| str::from_utf8_lossy(v).into_owned()).collect() +} + #[cfg(windows)] fn real_args() -> ~[~str] { use vec; @@ -786,6 +795,11 @@ fn real_args() -> ~[~str] { return args; } +#[cfg(windows)] +fn real_args_as_bytes() -> ~[~[u8]] { + real_args().move_iter().map(|s| s.into_bytes()).collect() +} + type LPCWSTR = *u16; #[cfg(windows)] @@ -803,10 +817,19 @@ extern "system" { /// Returns the arguments which this program was started with (normally passed /// via the command line). +/// +/// The arguments are interpreted as utf-8, with invalid bytes replaced with \uFFFD. +/// See `str::from_utf8_lossy` for details. pub fn args() -> ~[~str] { real_args() } +/// Returns the arguments which this program was started with (normally passed +/// via the command line) as byte vectors. +pub fn args_as_bytes() -> ~[~[u8]] { + real_args_as_bytes() +} + #[cfg(target_os = "macos")] extern { // These functions are in crt_externs.h. diff --git a/src/libstd/rt/args.rs b/src/libstd/rt/args.rs index cef03d66923..c417ea375fd 100644 --- a/src/libstd/rt/args.rs +++ b/src/libstd/rt/args.rs @@ -36,8 +36,8 @@ pub unsafe fn init(argc: int, argv: **u8) { realargs::init(argc, argv) } #[cfg(test)] pub unsafe fn cleanup() { realargs::cleanup() } /// Take the global arguments from global storage. -#[cfg(not(test))] pub fn take() -> Option<~[~str]> { imp::take() } -#[cfg(test)] pub fn take() -> Option<~[~str]> { +#[cfg(not(test))] pub fn take() -> Option<~[~[u8]]> { imp::take() } +#[cfg(test)] pub fn take() -> Option<~[~[u8]]> { match realargs::take() { realstd::option::Some(a) => Some(a), realstd::option::None => None, @@ -47,12 +47,12 @@ pub unsafe fn init(argc: int, argv: **u8) { realargs::init(argc, argv) } /// Give the global arguments to global storage. /// /// It is an error if the arguments already exist. -#[cfg(not(test))] pub fn put(args: ~[~str]) { imp::put(args) } -#[cfg(test)] pub fn put(args: ~[~str]) { realargs::put(args) } +#[cfg(not(test))] pub fn put(args: ~[~[u8]]) { imp::put(args) } +#[cfg(test)] pub fn put(args: ~[~[u8]]) { realargs::put(args) } /// Make a clone of the global arguments. -#[cfg(not(test))] pub fn clone() -> Option<~[~str]> { imp::clone() } -#[cfg(test)] pub fn clone() -> Option<~[~str]> { +#[cfg(not(test))] pub fn clone() -> Option<~[~[u8]]> { imp::clone() } +#[cfg(test)] pub fn clone() -> Option<~[~[u8]]> { match realargs::clone() { realstd::option::Some(a) => Some(a), realstd::option::None => None, @@ -65,15 +65,12 @@ pub unsafe fn init(argc: int, argv: **u8) { realargs::init(argc, argv) } mod imp { use cast; use clone::Clone; - #[cfg(not(test))] use libc; use option::{Option, Some, None}; use ptr::RawPtr; use iter::Iterator; - #[cfg(not(test))] use str; use unstable::finally::Finally; use unstable::mutex::{Mutex, MUTEX_INIT}; use mem; - #[cfg(not(test))] use vec; static mut global_args_ptr: uint = 0; static mut lock: Mutex = MUTEX_INIT; @@ -90,15 +87,15 @@ mod imp { lock.destroy(); } - pub fn take() -> Option<~[~str]> { + pub fn take() -> Option<~[~[u8]]> { with_lock(|| unsafe { let ptr = get_global_ptr(); let val = mem::replace(&mut *ptr, None); - val.as_ref().map(|s: &~~[~str]| (**s).clone()) + val.as_ref().map(|s: &~~[~[u8]]| (**s).clone()) }) } - pub fn put(args: ~[~str]) { + pub fn put(args: ~[~[u8]]) { with_lock(|| unsafe { let ptr = get_global_ptr(); rtassert!((*ptr).is_none()); @@ -106,10 +103,10 @@ mod imp { }) } - pub fn clone() -> Option<~[~str]> { + pub fn clone() -> Option<~[~[u8]]> { with_lock(|| unsafe { let ptr = get_global_ptr(); - (*ptr).as_ref().map(|s: &~~[~str]| (**s).clone()) + (*ptr).as_ref().map(|s: &~~[~[u8]]| (**s).clone()) }) } @@ -126,15 +123,20 @@ mod imp { }) } - fn get_global_ptr() -> *mut Option<~~[~str]> { + fn get_global_ptr() -> *mut Option<~~[~[u8]]> { unsafe { cast::transmute(&global_args_ptr) } } // Copied from `os`. #[cfg(not(test))] - unsafe fn load_argc_and_argv(argc: int, argv: **u8) -> ~[~str] { + unsafe fn load_argc_and_argv(argc: int, argv: **u8) -> ~[~[u8]] { + use c_str::CString; + use {vec, libc}; + use vec::CloneableVector; + vec::from_fn(argc as uint, |i| { - str::raw::from_c_str(*(argv as **libc::c_char).offset(i as int)) + let cs = CString::new(*(argv as **libc::c_char).offset(i as int), false); + cs.as_bytes_no_nul().to_owned() }) } @@ -149,7 +151,7 @@ mod imp { // Preserve the actual global state. let saved_value = take(); - let expected = ~[~"happy", ~"today?"]; + let expected = ~[bytes!("happy").to_owned(), bytes!("today?").to_owned()]; put(expected.clone()); assert!(clone() == Some(expected.clone())); @@ -179,15 +181,15 @@ mod imp { pub fn cleanup() { } - pub fn take() -> Option<~[~str]> { + pub fn take() -> Option<~[~[u8]]> { fail!() } - pub fn put(_args: ~[~str]) { + pub fn put(_args: ~[~[u8]]) { fail!() } - pub fn clone() -> Option<~[~str]> { + pub fn clone() -> Option<~[~[u8]]> { fail!() } }