std: Refactor process spawning on Unix

* Build up the argp/envp pointers while the `Command` is being constructed
  rather than only when `spawn` is called. This will allow better sharing of
  code between fork/exec paths.
* Rename `child_after_fork` to `exec` and have it only perform the exec half of
  the spawning. This also means the return type has changed to `io::Error`
  rather than `!` to represent errors that happen.
This commit is contained in:
Alex Crichton 2015-10-31 11:09:43 -07:00
parent 096dbf84c7
commit 6c41984690
4 changed files with 165 additions and 167 deletions

View File

@ -209,7 +209,9 @@ impl Command {
/// Add multiple arguments to pass to the program.
#[stable(feature = "process", since = "1.0.0")]
pub fn args<S: AsRef<OsStr>>(&mut self, args: &[S]) -> &mut Command {
self.inner.args(args.iter().map(AsRef::as_ref));
for arg in args {
self.arg(arg.as_ref());
}
self
}

View File

@ -12,8 +12,8 @@
#![stable(feature = "rust1", since = "1.0.0")]
use os::unix::raw::{uid_t, gid_t};
use os::unix::io::{FromRawFd, RawFd, AsRawFd, IntoRawFd};
use os::unix::raw::{uid_t, gid_t};
use process;
use sys;
use sys_common::{AsInnerMut, AsInner, FromInner, IntoInner};

View File

@ -13,27 +13,46 @@
use prelude::v1::*;
use os::unix::prelude::*;
use collections::HashMap;
use collections::hash_map::{HashMap, Entry};
use env;
use ffi::{OsString, OsStr, CString, CStr};
use fmt;
use io::{self, Error, ErrorKind};
use libc::{self, pid_t, c_void, c_int, gid_t, uid_t};
use libc::{self, pid_t, c_int, gid_t, uid_t, c_char};
use mem;
use ptr;
use sys::fd::FileDesc;
use sys::fs::{File, OpenOptions};
use sys::pipe::AnonPipe;
use sys::{self, cvt, cvt_r};
////////////////////////////////////////////////////////////////////////////////
// Command
////////////////////////////////////////////////////////////////////////////////
#[derive(Clone)]
pub struct Command {
// Currently we try hard to ensure that the call to `.exec()` doesn't
// actually allocate any memory. While many platforms try to ensure that
// memory allocation works after a fork in a multithreaded process, it's
// been observed to be buggy and somewhat unreliable, so we do our best to
// just not do it at all!
//
// Along those lines, the `argv` and `envp` raw pointers here are exactly
// what's gonna get passed to `execvp`. The `argv` array starts with the
// `program` and ends with a NULL, and the `envp` pointer, if present, is
// also null-terminated.
//
// Right now we don't support removing arguments, so there's no much fancy
// support there, but we support adding and removing environment variables,
// so a side table is used to track where in the `envp` array each key is
// located. Whenever we add a key we update it in place if it's already
// present, and whenever we remove a key we update the locations of all
// other keys.
program: CString,
args: Vec<CString>,
env: Option<HashMap<OsString, OsString>>, // Guaranteed to have no NULs.
env: Option<HashMap<OsString, (usize, CString)>>,
argv: Vec<*const c_char>,
envp: Option<Vec<*const c_char>>,
cwd: Option<CString>,
uid: Option<uid_t>,
gid: Option<gid_t>,
@ -44,10 +63,13 @@ pub struct Command {
impl Command {
pub fn new(program: &OsStr) -> Command {
let mut saw_nul = false;
let program = os2c(program, &mut saw_nul);
Command {
program: os2c(program, &mut saw_nul),
argv: vec![program.as_ptr(), 0 as *const _],
program: program,
args: Vec::new(),
env: None,
envp: None,
cwd: None,
uid: None,
gid: None,
@ -57,40 +79,79 @@ impl Command {
}
pub fn arg(&mut self, arg: &OsStr) {
self.args.push(os2c(arg, &mut self.saw_nul));
// Overwrite the trailing NULL pointer in `argv` and then add a new null
// pointer.
let arg = os2c(arg, &mut self.saw_nul);
self.argv[self.args.len() + 1] = arg.as_ptr();
self.argv.push(0 as *const _);
// Also make sure we keep track of the owned value to schedule a
// destructor for this memory.
self.args.push(arg);
}
pub fn args<'a, I: Iterator<Item = &'a OsStr>>(&mut self, args: I) {
let mut saw_nul = self.saw_nul;
self.args.extend(args.map(|arg| os2c(arg, &mut saw_nul)));
self.saw_nul = saw_nul;
}
fn init_env_map(&mut self) {
fn init_env_map(&mut self) -> (&mut HashMap<OsString, (usize, CString)>,
&mut Vec<*const c_char>) {
if self.env.is_none() {
// Will not add NULs to env: preexisting environment will not contain any.
self.env = Some(env::vars_os().collect());
let mut map = HashMap::new();
let mut envp = Vec::new();
for (k, v) in env::vars_os() {
let s = pair_to_key(&k, &v, &mut self.saw_nul);
envp.push(s.as_ptr());
map.insert(k, (envp.len() - 1, s));
}
envp.push(0 as *const _);
self.env = Some(map);
self.envp = Some(envp);
}
(self.env.as_mut().unwrap(), self.envp.as_mut().unwrap())
}
pub fn env(&mut self, key: &OsStr, val: &OsStr) {
let k = OsString::from_vec(os2c(key, &mut self.saw_nul).into_bytes());
let v = OsString::from_vec(os2c(val, &mut self.saw_nul).into_bytes());
let new_key = pair_to_key(key, val, &mut self.saw_nul);
let (map, envp) = self.init_env_map();
// Will not add NULs to env: return without inserting if any were seen.
if self.saw_nul {
return;
// If `key` is already present then we we just update `envp` in place
// (and store the owned value), but if it's not there we override the
// trailing NULL pointer, add a new NULL pointer, and store where we
// were located.
match map.entry(key.to_owned()) {
Entry::Occupied(mut e) => {
let (i, ref mut s) = *e.get_mut();
envp[i] = new_key.as_ptr();
*s = new_key;
}
Entry::Vacant(e) => {
let len = envp.len();
envp[len - 1] = new_key.as_ptr();
envp.push(0 as *const _);
e.insert((len - 1, new_key));
}
}
}
self.init_env_map();
self.env.as_mut()
.unwrap()
.insert(k, v);
}
pub fn env_remove(&mut self, key: &OsStr) {
self.init_env_map();
self.env.as_mut().unwrap().remove(key);
let (map, envp) = self.init_env_map();
// If we actually ended up removing a key, then we need to update the
// position of all keys that come after us in `envp` because they're all
// one element sooner now.
if let Some((i, _)) = map.remove(key) {
envp.remove(i);
for (_, &mut (ref mut j, _)) in map.iter_mut() {
if *j >= i {
*j -= 1;
}
}
}
}
pub fn env_clear(&mut self) {
self.env = Some(HashMap::new())
self.env = Some(HashMap::new());
self.envp = Some(vec![0 as *const _]);
}
pub fn cwd(&mut self, dir: &OsStr) {
self.cwd = Some(os2c(dir, &mut self.saw_nul));
}
@ -112,6 +173,18 @@ fn os2c(s: &OsStr, saw_nul: &mut bool) -> CString {
})
}
fn pair_to_key(key: &OsStr, value: &OsStr, saw_nul: &mut bool) -> CString {
let (key, value) = (key.as_bytes(), value.as_bytes());
let mut v = Vec::with_capacity(key.len() + value.len() + 1);
v.extend(key);
v.push(b'=');
v.extend(value);
CString::new(v).unwrap_or_else(|_e| {
*saw_nul = true;
CString::new("foo=bar").unwrap()
})
}
impl fmt::Debug for Command {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
try!(write!(f, "{:?}", self.program));
@ -218,20 +291,28 @@ impl Process {
return Err(io::Error::new(ErrorKind::InvalidInput, "nul byte found in provided data"));
}
let dirp = cfg.cwd.as_ref().map(|c| c.as_ptr()).unwrap_or(ptr::null());
let (envp, _a, _b) = make_envp(cfg.env.as_ref());
let (argv, _a) = make_argv(&cfg.program, &cfg.args);
let (input, output) = try!(sys::pipe::anon_pipe());
let pid = unsafe {
match libc::fork() {
match try!(cvt(libc::fork())) {
0 => {
drop(input);
Process::child_after_fork(cfg, output, argv, envp, dirp,
in_fd, out_fd, err_fd)
let err = Process::exec(cfg, in_fd, out_fd, err_fd);
let errno = err.raw_os_error().unwrap_or(libc::EINVAL) as u32;
let bytes = [
(errno >> 24) as u8,
(errno >> 16) as u8,
(errno >> 8) as u8,
(errno >> 0) as u8,
CLOEXEC_MSG_FOOTER[0], CLOEXEC_MSG_FOOTER[1],
CLOEXEC_MSG_FOOTER[2], CLOEXEC_MSG_FOOTER[3]
];
// pipe I/O up to PIPE_BUF bytes should be atomic, and then
// we want to be sure we *don't* run at_exit destructors as
// we're being torn down regardless
assert!(output.write(&bytes).is_ok());
libc::_exit(1)
}
n if n < 0 => return Err(Error::last_os_error()),
n => n,
}
};
@ -306,29 +387,15 @@ impl Process {
// allocation). Instead we just close it manually. This will never
// have the drop glue anyway because this code never returns (the
// child will either exec() or invoke libc::exit)
unsafe fn child_after_fork(cfg: &Command,
mut output: AnonPipe,
argv: *const *const libc::c_char,
envp: *const libc::c_void,
dirp: *const libc::c_char,
in_fd: Stdio,
out_fd: Stdio,
err_fd: Stdio) -> ! {
fn fail(output: &mut AnonPipe) -> ! {
let errno = sys::os::errno() as u32;
let bytes = [
(errno >> 24) as u8,
(errno >> 16) as u8,
(errno >> 8) as u8,
(errno >> 0) as u8,
CLOEXEC_MSG_FOOTER[0], CLOEXEC_MSG_FOOTER[1],
CLOEXEC_MSG_FOOTER[2], CLOEXEC_MSG_FOOTER[3]
];
// pipe I/O up to PIPE_BUF bytes should be atomic, and then we want
// to be sure we *don't* run at_exit destructors as we're being torn
// down regardless
assert!(output.write(&bytes).is_ok());
unsafe { libc::_exit(1) }
unsafe fn exec(cfg: &Command,
in_fd: Stdio,
out_fd: Stdio,
err_fd: Stdio) -> io::Error {
macro_rules! try {
($e:expr) => (match $e {
Ok(e) => e,
Err(e) => return e,
})
}
// Make sure that the source descriptors are not an stdio descriptor,
@ -337,30 +404,30 @@ impl Process {
// suppose we want the child's stderr to be the parent's stdout, and
// the child's stdout to be the parent's stderr. No matter which we
// dup first, the second will get overwritten prematurely.
let maybe_migrate = |src: Stdio, output: &mut AnonPipe| {
let maybe_migrate = |src: Stdio| {
match src {
Stdio::Raw(fd @ libc::STDIN_FILENO) |
Stdio::Raw(fd @ libc::STDOUT_FILENO) |
Stdio::Raw(fd @ libc::STDERR_FILENO) => {
let fd = match cvt_r(|| libc::dup(fd)) {
Ok(fd) => fd,
Err(_) => fail(output),
};
let fd = FileDesc::new(fd);
fd.set_cloexec();
Stdio::Raw(fd.into_raw())
},
cvt_r(|| libc::dup(fd)).map(|fd| {
let fd = FileDesc::new(fd);
fd.set_cloexec();
Stdio::Raw(fd.into_raw())
})
}
s @ Stdio::None |
s @ Stdio::Inherit |
s @ Stdio::Raw(_) => s,
s @ Stdio::Raw(_) => Ok(s),
}
};
let in_fd = try!(maybe_migrate(in_fd));
let out_fd = try!(maybe_migrate(out_fd));
let err_fd = try!(maybe_migrate(err_fd));
let setup = |src: Stdio, dst: c_int| {
match src {
Stdio::Inherit => true,
Stdio::Raw(fd) => cvt_r(|| libc::dup2(fd, dst)).is_ok(),
Stdio::Inherit => Ok(()),
Stdio::Raw(fd) => cvt_r(|| libc::dup2(fd, dst)).map(|_| ()),
// If a stdio file descriptor is set to be ignored, we open up
// /dev/null into that file descriptor. Otherwise, the first
@ -373,29 +440,18 @@ impl Process {
opts.write(dst != libc::STDIN_FILENO);
let devnull = CStr::from_ptr(b"/dev/null\0".as_ptr()
as *const _);
if let Ok(f) = File::open_c(devnull, &opts) {
cvt_r(|| libc::dup2(f.fd().raw(), dst)).is_ok()
} else {
false
}
File::open_c(devnull, &opts).and_then(|f| {
cvt_r(|| libc::dup2(f.fd().raw(), dst)).map(|_| ())
})
}
}
};
// Make sure we migrate all source descriptors before
// we start overwriting them
let in_fd = maybe_migrate(in_fd, &mut output);
let out_fd = maybe_migrate(out_fd, &mut output);
let err_fd = maybe_migrate(err_fd, &mut output);
if !setup(in_fd, libc::STDIN_FILENO) { fail(&mut output) }
if !setup(out_fd, libc::STDOUT_FILENO) { fail(&mut output) }
if !setup(err_fd, libc::STDERR_FILENO) { fail(&mut output) }
try!(setup(in_fd, libc::STDIN_FILENO));
try!(setup(out_fd, libc::STDOUT_FILENO));
try!(setup(err_fd, libc::STDERR_FILENO));
if let Some(u) = cfg.gid {
if libc::setgid(u as libc::gid_t) != 0 {
fail(&mut output);
}
try!(cvt(libc::setgid(u as gid_t)));
}
if let Some(u) = cfg.uid {
// When dropping privileges from root, the `setgroups` call
@ -407,9 +463,7 @@ impl Process {
// privilege dropping function.
let _ = libc::setgroups(0, ptr::null());
if libc::setuid(u as libc::uid_t) != 0 {
fail(&mut output);
}
try!(cvt(libc::setuid(u as uid_t)));
}
if cfg.session_leader {
// Don't check the error of setsid because it fails if we're the
@ -417,16 +471,15 @@ impl Process {
// error, but ignore it anyway.
let _ = libc::setsid();
}
if !dirp.is_null() && libc::chdir(dirp) == -1 {
fail(&mut output);
if let Some(ref cwd) = cfg.cwd {
try!(cvt(libc::chdir(cwd.as_ptr())));
}
if !envp.is_null() {
*sys::os::environ() = envp as *const _;
if let Some(ref envp) = cfg.envp {
*sys::os::environ() = envp.as_ptr();
}
#[cfg(not(target_os = "nacl"))]
unsafe fn reset_signal_handling(output: &mut AnonPipe) {
use mem;
// NaCl has no signal support.
if cfg!(not(target_os = "nacl")) {
// Reset signal handling so the child process starts in a
// standardized state. libstd ignores SIGPIPE, and signal-handling
// libraries often set a mask. Child processes inherit ignored
@ -435,23 +488,17 @@ impl Process {
// need to clean things up now to avoid confusing the program
// we're about to run.
let mut set: libc::sigset_t = mem::uninitialized();
if libc::sigemptyset(&mut set) != 0 ||
libc::pthread_sigmask(libc::SIG_SETMASK, &set, ptr::null_mut()) != 0 ||
libc::signal(
libc::SIGPIPE, mem::transmute(libc::SIG_DFL)
) == mem::transmute(libc::SIG_ERR)
{
fail(output);
try!(cvt(libc::sigemptyset(&mut set)));
try!(cvt(libc::pthread_sigmask(libc::SIG_SETMASK, &set,
ptr::null_mut())));
let ret = libc::signal(libc::SIGPIPE, libc::SIG_DFL);
if ret == libc::SIG_ERR {
return io::Error::last_os_error()
}
}
#[cfg(target_os = "nacl")]
unsafe fn reset_signal_handling(_output: &mut AnonPipe) {
// NaCl has no signal support.
}
reset_signal_handling(&mut output);
let _ = libc::execvp(*argv, argv);
fail(&mut output)
libc::execvp(cfg.argv[0], cfg.argv.as_ptr());
io::Error::last_os_error()
}
pub fn id(&self) -> u32 {
@ -477,54 +524,6 @@ impl Process {
}
}
fn make_argv(prog: &CString, args: &[CString])
-> (*const *const libc::c_char, Vec<*const libc::c_char>)
{
let mut ptrs: Vec<*const libc::c_char> = Vec::with_capacity(args.len()+1);
// Convert the CStrings into an array of pointers. Also return the
// vector that owns the raw pointers, to ensure they live long
// enough.
ptrs.push(prog.as_ptr());
ptrs.extend(args.iter().map(|tmp| tmp.as_ptr()));
// Add a terminating null pointer (required by libc).
ptrs.push(ptr::null());
(ptrs.as_ptr(), ptrs)
}
fn make_envp(env: Option<&HashMap<OsString, OsString>>)
-> (*const c_void, Vec<Vec<u8>>, Vec<*const libc::c_char>)
{
// On posixy systems we can pass a char** for envp, which is a
// null-terminated array of "k=v\0" strings. As with make_argv, we
// return two vectors that own the data to ensure that they live
// long enough.
if let Some(env) = env {
let mut tmps = Vec::with_capacity(env.len());
for pair in env {
let mut kv = Vec::new();
kv.extend_from_slice(pair.0.as_bytes());
kv.push('=' as u8);
kv.extend_from_slice(pair.1.as_bytes());
kv.push(0); // terminating null
tmps.push(kv);
}
let mut ptrs: Vec<*const libc::c_char> =
tmps.iter()
.map(|tmp| tmp.as_ptr() as *const libc::c_char)
.collect();
ptrs.push(ptr::null());
(ptrs.as_ptr() as *const _, tmps, ptrs)
} else {
(ptr::null(), Vec::new(), Vec::new())
}
}
#[cfg(test)]
mod tests {
use super::*;

View File

@ -74,9 +74,6 @@ impl Command {
pub fn arg(&mut self, arg: &OsStr) {
self.args.push(arg.to_os_string())
}
pub fn args<'a, I: Iterator<Item = &'a OsStr>>(&mut self, args: I) {
self.args.extend(args.map(OsStr::to_os_string))
}
fn init_env_map(&mut self){
if self.env.is_none() {
self.env = Some(env::vars_os().map(|(key, val)| {