samples: show race-free pidfd metadata access
This is a sample program showing userspace how to get race-free access to process metadata from a pidfd. It is rather easy to do and userspace can actually simply reuse code that currently parses a process's status file in procfs. The program can easily be extended into a generic helper suitable for inclusion in a libc to make it even easier for userspace to gain metadata access. Since this came up in a discussion because this API is going to be used in various service managers: A lot of programs will have a whitelist seccomp filter that returns <some-errno> for all new syscalls. This means that programs might get confused if CLONE_PIDFD works but the later pidfd_send_signal() syscall doesn't. Hence, here's a ahead of time check that pidfd_send_signal() is supported: bool pidfd_send_signal_supported() { int procfd = open("/proc/self", O_DIRECTORY | O_RDONLY | O_CLOEXEC); if (procfd < 0) return false; /* * A process is always allowed to signal itself so * pidfd_send_signal() should never fail this test. If it does * it must mean it is not available, blocked by an LSM, seccomp, * or other. */ return pidfd_send_signal(procfd, 0, NULL, 0) == 0; } Signed-off-by: Christian Brauner <christian@brauner.io> Co-developed-by: Jann Horn <jannh@google.com> Signed-off-by: Jann Horn <jannh@google.com> Reviewed-by: Oleg Nesterov <oleg@redhat.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Kees Cook <keescook@chromium.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: David Howells <dhowells@redhat.com> Cc: "Michael Kerrisk (man-pages)" <mtk.manpages@gmail.com> Cc: Andy Lutomirsky <luto@kernel.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Aleksa Sarai <cyphar@cyphar.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
parent
2151ad1b06
commit
43c6afee48
|
@ -3,4 +3,4 @@
|
||||||
obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ trace_events/ livepatch/ \
|
obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ trace_events/ livepatch/ \
|
||||||
hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ \
|
hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ \
|
||||||
configfs/ connector/ v4l/ trace_printk/ \
|
configfs/ connector/ v4l/ trace_printk/ \
|
||||||
vfio-mdev/ statx/ qmi/ binderfs/
|
vfio-mdev/ statx/ qmi/ binderfs/ pidfd/
|
||||||
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
# SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
hostprogs-y := pidfd-metadata
|
||||||
|
always := $(hostprogs-y)
|
||||||
|
HOSTCFLAGS_pidfd-metadata.o += -I$(objtree)/usr/include
|
||||||
|
all: pidfd-metadata
|
|
@ -0,0 +1,112 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
#include <err.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <inttypes.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <sched.h>
|
||||||
|
#include <signal.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <sys/syscall.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/wait.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#ifndef CLONE_PIDFD
|
||||||
|
#define CLONE_PIDFD 0x00001000
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static int do_child(void *args)
|
||||||
|
{
|
||||||
|
printf("%d\n", getpid());
|
||||||
|
_exit(EXIT_SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
static pid_t pidfd_clone(int flags, int *pidfd)
|
||||||
|
{
|
||||||
|
size_t stack_size = 1024;
|
||||||
|
char *stack[1024] = { 0 };
|
||||||
|
|
||||||
|
#ifdef __ia64__
|
||||||
|
return __clone2(do_child, stack, stack_size, flags | SIGCHLD, NULL, pidfd);
|
||||||
|
#else
|
||||||
|
return clone(do_child, stack + stack_size, flags | SIGCHLD, NULL, pidfd);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int sys_pidfd_send_signal(int pidfd, int sig, siginfo_t *info,
|
||||||
|
unsigned int flags)
|
||||||
|
{
|
||||||
|
return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int pidfd_metadata_fd(pid_t pid, int pidfd)
|
||||||
|
{
|
||||||
|
int procfd, ret;
|
||||||
|
char path[100];
|
||||||
|
|
||||||
|
snprintf(path, sizeof(path), "/proc/%d", pid);
|
||||||
|
procfd = open(path, O_DIRECTORY | O_RDONLY | O_CLOEXEC);
|
||||||
|
if (procfd < 0) {
|
||||||
|
warn("Failed to open %s\n", path);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Verify that the pid has not been recycled and our /proc/<pid> handle
|
||||||
|
* is still valid.
|
||||||
|
*/
|
||||||
|
ret = sys_pidfd_send_signal(pidfd, 0, NULL, 0);
|
||||||
|
if (ret < 0) {
|
||||||
|
switch (errno) {
|
||||||
|
case EPERM:
|
||||||
|
/* Process exists, just not allowed to signal it. */
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
warn("Failed to signal process\n");
|
||||||
|
close(procfd);
|
||||||
|
procfd = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return procfd;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int pidfd = 0, ret = EXIT_FAILURE;
|
||||||
|
char buf[4096] = { 0 };
|
||||||
|
pid_t pid;
|
||||||
|
int procfd, statusfd;
|
||||||
|
ssize_t bytes;
|
||||||
|
|
||||||
|
pid = pidfd_clone(CLONE_PIDFD, &pidfd);
|
||||||
|
if (pid < 0)
|
||||||
|
exit(ret);
|
||||||
|
|
||||||
|
procfd = pidfd_metadata_fd(pid, pidfd);
|
||||||
|
close(pidfd);
|
||||||
|
if (procfd < 0)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
statusfd = openat(procfd, "status", O_RDONLY | O_CLOEXEC);
|
||||||
|
close(procfd);
|
||||||
|
if (statusfd < 0)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
bytes = read(statusfd, buf, sizeof(buf));
|
||||||
|
if (bytes > 0)
|
||||||
|
bytes = write(STDOUT_FILENO, buf, bytes);
|
||||||
|
close(statusfd);
|
||||||
|
ret = EXIT_SUCCESS;
|
||||||
|
|
||||||
|
out:
|
||||||
|
(void)wait(NULL);
|
||||||
|
|
||||||
|
exit(ret);
|
||||||
|
}
|
Loading…
Reference in New Issue