From 028754a2f7f62ed1e4df889fadfa74eb2d007109 Mon Sep 17 00:00:00 2001 From: The8472 Date: Sat, 21 Nov 2020 19:34:45 +0100 Subject: [PATCH] implement better availability probing for copy_file_range previously any attempt to copy to an immutable file (EPERM) would disable copy_file_range support for the whole process. --- library/std/src/sys/unix/kernel_copy.rs | 82 +++++++++++++++---------- 1 file changed, 48 insertions(+), 34 deletions(-) diff --git a/library/std/src/sys/unix/kernel_copy.rs b/library/std/src/sys/unix/kernel_copy.rs index 5bfac803153..56efc93a1e2 100644 --- a/library/std/src/sys/unix/kernel_copy.rs +++ b/library/std/src/sys/unix/kernel_copy.rs @@ -59,7 +59,7 @@ use crate::os::unix::io::{AsRawFd, FromRawFd, RawFd}; use crate::os::unix::net::UnixStream; use crate::process::{ChildStderr, ChildStdin, ChildStdout}; use crate::ptr; -use crate::sync::atomic::{AtomicBool, Ordering}; +use crate::sync::atomic::{AtomicBool, AtomicU8, Ordering}; use crate::sys::cvt; #[cfg(test)] @@ -491,6 +491,13 @@ impl CopyResult { } } +/// Invalid file descriptor. +/// +/// Valid file descriptors are guaranteed to be positive numbers (see `open()` manpage) +/// while negative values are used to indicate errors. +/// Thus -1 will never be overlap with a valid open file. +const INVALID_FD: RawFd = -1; + /// linux-specific implementation that will attempt to use copy_file_range for copy offloading /// as the name says, it only works on regular files /// @@ -500,9 +507,13 @@ impl CopyResult { pub(super) fn copy_regular_files(reader: RawFd, writer: RawFd, max_len: u64) -> CopyResult { use crate::cmp; + const NOT_PROBED: u8 = 0; + const UNAVAILABLE: u8 = 1; + const AVAILABLE: u8 = 2; + // Kernel prior to 4.5 don't have copy_file_range // We store the availability in a global to avoid unnecessary syscalls - static HAS_COPY_FILE_RANGE: AtomicBool = AtomicBool::new(true); + static HAS_COPY_FILE_RANGE: AtomicU8 = AtomicU8::new(NOT_PROBED); syscall! { fn copy_file_range( @@ -515,39 +526,39 @@ pub(super) fn copy_regular_files(reader: RawFd, writer: RawFd, max_len: u64) -> ) -> libc::ssize_t } - let has_copy_file_range = HAS_COPY_FILE_RANGE.load(Ordering::Relaxed); + match HAS_COPY_FILE_RANGE.load(Ordering::Relaxed) { + NOT_PROBED => { + // EPERM can indicate seccomp filters or an immutable file. + // To distinguish these cases we probe with invalid file descriptors which should result in EBADF if the syscall is supported + // and some other error (ENOSYS or EPERM) if it's not available + let result = unsafe { + cvt(copy_file_range(INVALID_FD, ptr::null_mut(), INVALID_FD, ptr::null_mut(), 1, 0)) + }; + + if matches!(result.map_err(|e| e.raw_os_error()), Err(Some(libc::EBADF))) { + HAS_COPY_FILE_RANGE.store(AVAILABLE, Ordering::Relaxed); + } else { + HAS_COPY_FILE_RANGE.store(UNAVAILABLE, Ordering::Relaxed); + return CopyResult::Fallback(0); + } + } + UNAVAILABLE => return CopyResult::Fallback(0), + _ => {} + }; + let mut written = 0u64; while written < max_len { - let copy_result = if has_copy_file_range { - let bytes_to_copy = cmp::min(max_len - written, usize::MAX as u64); - // cap to 1GB chunks in case u64::MAX is passed as max_len and the file has a non-zero seek position - // this allows us to copy large chunks without hitting EOVERFLOW, - // unless someone sets a file offset close to u64::MAX - 1GB, in which case a fallback would be required - let bytes_to_copy = cmp::min(bytes_to_copy as usize, 0x4000_0000usize); - let copy_result = unsafe { - // We actually don't have to adjust the offsets, - // because copy_file_range adjusts the file offset automatically - cvt(copy_file_range( - reader, - ptr::null_mut(), - writer, - ptr::null_mut(), - bytes_to_copy, - 0, - )) - }; - if let Err(ref copy_err) = copy_result { - match copy_err.raw_os_error() { - Some(libc::ENOSYS | libc::EPERM | libc::EOPNOTSUPP) => { - HAS_COPY_FILE_RANGE.store(false, Ordering::Relaxed); - } - _ => {} - } - } - copy_result - } else { - Err(Error::from_raw_os_error(libc::ENOSYS)) + let bytes_to_copy = cmp::min(max_len - written, usize::MAX as u64); + // cap to 1GB chunks in case u64::MAX is passed as max_len and the file has a non-zero seek position + // this allows us to copy large chunks without hitting EOVERFLOW, + // unless someone sets a file offset close to u64::MAX - 1GB, in which case a fallback would be required + let bytes_to_copy = cmp::min(bytes_to_copy as usize, 0x4000_0000usize); + let copy_result = unsafe { + // We actually don't have to adjust the offsets, + // because copy_file_range adjusts the file offset automatically + cvt(copy_file_range(reader, ptr::null_mut(), writer, ptr::null_mut(), bytes_to_copy, 0)) }; + match copy_result { Ok(0) if written == 0 => { // fallback to work around several kernel bugs where copy_file_range will fail to @@ -567,11 +578,14 @@ pub(super) fn copy_regular_files(reader: RawFd, writer: RawFd, max_len: u64) -> libc::ENOSYS | libc::EXDEV | libc::EINVAL | libc::EPERM | libc::EOPNOTSUPP, ) => { // Try fallback io::copy if either: - // - Kernel version is < 4.5 (ENOSYS) + // - Kernel version is < 4.5 (ENOSYS¹) // - Files are mounted on different fs (EXDEV) // - copy_file_range is broken in various ways on RHEL/CentOS 7 (EOPNOTSUPP) - // - copy_file_range is disallowed, for example by seccomp (EPERM) + // - copy_file_range file is immutable or syscall is blocked by seccomp¹ (EPERM) // - copy_file_range cannot be used with pipes or device nodes (EINVAL) + // + // ¹ these cases should be detected by the initial probe but we handle them here + // anyway in case syscall interception changes during runtime assert_eq!(written, 0); CopyResult::Fallback(0) }