diff --git a/fs/namespace.c b/fs/namespace.c index cf2cc234c8b4..8a0e90eb81d3 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3368,10 +3368,16 @@ static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns) return 0; } +static struct user_namespace *mntns_owner(struct ns_common *ns) +{ + return to_mnt_ns(ns)->user_ns; +} + const struct proc_ns_operations mntns_operations = { .name = "mnt", .type = CLONE_NEWNS, .get = mntns_get, .put = mntns_put, .install = mntns_install, + .owner = mntns_owner, }; diff --git a/fs/nsfs.c b/fs/nsfs.c index 8f20d6016e20..fb7b397a1297 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -5,11 +5,16 @@ #include #include #include +#include +#include static struct vfsmount *nsfs_mnt; +static long ns_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg); static const struct file_operations ns_file_operations = { .llseek = no_llseek, + .unlocked_ioctl = ns_ioctl, }; static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) @@ -44,22 +49,14 @@ static void nsfs_evict(struct inode *inode) ns->ops->put(ns); } -void *ns_get_path(struct path *path, struct task_struct *task, - const struct proc_ns_operations *ns_ops) +static void *__ns_get_path(struct path *path, struct ns_common *ns) { struct vfsmount *mnt = mntget(nsfs_mnt); struct qstr qname = { .name = "", }; struct dentry *dentry; struct inode *inode; - struct ns_common *ns; unsigned long d; -again: - ns = ns_ops->get(task); - if (!ns) { - mntput(mnt); - return ERR_PTR(-ENOENT); - } rcu_read_lock(); d = atomic_long_read(&ns->stashed); if (!d) @@ -68,7 +65,7 @@ again: if (!lockref_get_not_dead(&dentry->d_lockref)) goto slow; rcu_read_unlock(); - ns_ops->put(ns); + ns->ops->put(ns); got_it: path->mnt = mnt; path->dentry = dentry; @@ -77,7 +74,7 @@ slow: rcu_read_unlock(); inode = new_inode_pseudo(mnt->mnt_sb); if (!inode) { - ns_ops->put(ns); + ns->ops->put(ns); mntput(mnt); return ERR_PTR(-ENOMEM); } @@ -95,17 +92,94 @@ slow: return ERR_PTR(-ENOMEM); } d_instantiate(dentry, inode); - dentry->d_fsdata = (void *)ns_ops; + dentry->d_fsdata = (void *)ns->ops; d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry); if (d) { d_delete(dentry); /* make sure ->d_prune() does nothing */ dput(dentry); + mntput(mnt); cpu_relax(); - goto again; + return ERR_PTR(-EAGAIN); } goto got_it; } +void *ns_get_path(struct path *path, struct task_struct *task, + const struct proc_ns_operations *ns_ops) +{ + struct ns_common *ns; + void *ret; + +again: + ns = ns_ops->get(task); + if (!ns) + return ERR_PTR(-ENOENT); + + ret = __ns_get_path(path, ns); + if (IS_ERR(ret) && PTR_ERR(ret) == -EAGAIN) + goto again; + return ret; +} + +static int open_related_ns(struct ns_common *ns, + struct ns_common *(*get_ns)(struct ns_common *ns)) +{ + struct path path = {}; + struct file *f; + void *err; + int fd; + + fd = get_unused_fd_flags(O_CLOEXEC); + if (fd < 0) + return fd; + + while (1) { + struct ns_common *relative; + + relative = get_ns(ns); + if (IS_ERR(relative)) { + put_unused_fd(fd); + return PTR_ERR(relative); + } + + err = __ns_get_path(&path, relative); + if (IS_ERR(err) && PTR_ERR(err) == -EAGAIN) + continue; + break; + } + if (IS_ERR(err)) { + put_unused_fd(fd); + return PTR_ERR(err); + } + + f = dentry_open(&path, O_RDONLY, current_cred()); + path_put(&path); + if (IS_ERR(f)) { + put_unused_fd(fd); + fd = PTR_ERR(f); + } else + fd_install(fd, f); + + return fd; +} + +static long ns_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg) +{ + struct ns_common *ns = get_proc_ns(file_inode(filp)); + + switch (ioctl) { + case NS_GET_USERNS: + return open_related_ns(ns, ns_get_owner); + case NS_GET_PARENT: + if (!ns->ops->get_parent) + return -EINVAL; + return open_related_ns(ns, ns->ops->get_parent); + default: + return -ENOTTY; + } +} + int ns_get_name(char *buf, size_t size, struct task_struct *task, const struct proc_ns_operations *ns_ops) { diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index de0e7719d4c5..12cb8bd81d2d 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -18,6 +18,8 @@ struct proc_ns_operations { struct ns_common *(*get)(struct task_struct *task); void (*put)(struct ns_common *ns); int (*install)(struct nsproxy *nsproxy, struct ns_common *ns); + struct user_namespace *(*owner)(struct ns_common *ns); + struct ns_common *(*get_parent)(struct ns_common *ns); }; extern const struct proc_ns_operations netns_operations; diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 30ffe10cda18..eb209d4523f5 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -106,6 +106,8 @@ extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, extern int proc_setgroups_show(struct seq_file *m, void *v); extern bool userns_may_setgroups(const struct user_namespace *ns); extern bool current_in_userns(const struct user_namespace *target_ns); + +struct ns_common *ns_get_owner(struct ns_common *ns); #else static inline struct user_namespace *get_user_ns(struct user_namespace *ns) @@ -139,6 +141,11 @@ static inline bool current_in_userns(const struct user_namespace *target_ns) { return true; } + +static inline struct ns_common *ns_get_owner(struct ns_common *ns) +{ + return ERR_PTR(-EPERM); +} #endif #endif /* _LINUX_USER_H */ diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h new file mode 100644 index 000000000000..3af617230d1b --- /dev/null +++ b/include/uapi/linux/nsfs.h @@ -0,0 +1,13 @@ +#ifndef __LINUX_NSFS_H +#define __LINUX_NSFS_H + +#include + +#define NSIO 0xb7 + +/* Returns a file descriptor that refers to an owning user namespace */ +#define NS_GET_USERNS _IO(NSIO, 0x1) +/* Returns a file descriptor that refers to a parent namespace */ +#define NS_GET_PARENT _IO(NSIO, 0x2) + +#endif /* __LINUX_NSFS_H */ diff --git a/ipc/namespace.c b/ipc/namespace.c index fab727d9fe09..0abdea496493 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -188,10 +188,16 @@ static int ipcns_install(struct nsproxy *nsproxy, struct ns_common *new) return 0; } +static struct user_namespace *ipcns_owner(struct ns_common *ns) +{ + return to_ipc_ns(ns)->user_ns; +} + const struct proc_ns_operations ipcns_operations = { .name = "ipc", .type = CLONE_NEWIPC, .get = ipcns_get, .put = ipcns_put, .install = ipcns_install, + .owner = ipcns_owner, }; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index f1dd4b076210..d6504338e284 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -6421,12 +6421,18 @@ static void cgroupns_put(struct ns_common *ns) put_cgroup_ns(to_cg_ns(ns)); } +static struct user_namespace *cgroupns_owner(struct ns_common *ns) +{ + return to_cg_ns(ns)->user_ns; +} + const struct proc_ns_operations cgroupns_operations = { .name = "cgroup", .type = CLONE_NEWCGROUP, .get = cgroupns_get, .put = cgroupns_put, .install = cgroupns_install, + .owner = cgroupns_owner, }; static __init int cgroup_namespaces_init(void) diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 7542b28cc929..df9e8e9e0be7 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -405,12 +405,37 @@ static int pidns_install(struct nsproxy *nsproxy, struct ns_common *ns) return 0; } +static struct ns_common *pidns_get_parent(struct ns_common *ns) +{ + struct pid_namespace *active = task_active_pid_ns(current); + struct pid_namespace *pid_ns, *p; + + /* See if the parent is in the current namespace */ + pid_ns = p = to_pid_ns(ns)->parent; + for (;;) { + if (!p) + return ERR_PTR(-EPERM); + if (p == active) + break; + p = p->parent; + } + + return &get_pid_ns(pid_ns)->ns; +} + +static struct user_namespace *pidns_owner(struct ns_common *ns) +{ + return to_pid_ns(ns)->user_ns; +} + const struct proc_ns_operations pidns_operations = { .name = "pid", .type = CLONE_NEWPID, .get = pidns_get, .put = pidns_put, .install = pidns_install, + .owner = pidns_owner, + .get_parent = pidns_get_parent, }; static __init int pid_namespaces_init(void) diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index f2c5ba5505f1..86b7854fec8e 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -1050,12 +1050,37 @@ static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns) return commit_creds(cred); } +struct ns_common *ns_get_owner(struct ns_common *ns) +{ + struct user_namespace *my_user_ns = current_user_ns(); + struct user_namespace *owner, *p; + + /* See if the owner is in the current user namespace */ + owner = p = ns->ops->owner(ns); + for (;;) { + if (!p) + return ERR_PTR(-EPERM); + if (p == my_user_ns) + break; + p = p->parent; + } + + return &get_user_ns(owner)->ns; +} + +static struct user_namespace *userns_owner(struct ns_common *ns) +{ + return to_user_ns(ns)->parent; +} + const struct proc_ns_operations userns_operations = { .name = "user", .type = CLONE_NEWUSER, .get = userns_get, .put = userns_put, .install = userns_install, + .owner = userns_owner, + .get_parent = ns_get_owner, }; static __init int user_namespaces_init(void) diff --git a/kernel/utsname.c b/kernel/utsname.c index 35587b76faa3..6976cd47dcf6 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c @@ -154,10 +154,16 @@ static int utsns_install(struct nsproxy *nsproxy, struct ns_common *new) return 0; } +static struct user_namespace *utsns_owner(struct ns_common *ns) +{ + return to_uts_ns(ns)->user_ns; +} + const struct proc_ns_operations utsns_operations = { .name = "uts", .type = CLONE_NEWUTS, .get = utsns_get, .put = utsns_put, .install = utsns_install, + .owner = utsns_owner, }; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 06af5d6a883c..e8be581b47b0 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -1016,11 +1016,17 @@ static int netns_install(struct nsproxy *nsproxy, struct ns_common *ns) return 0; } +static struct user_namespace *netns_owner(struct ns_common *ns) +{ + return to_net_ns(ns)->user_ns; +} + const struct proc_ns_operations netns_operations = { .name = "net", .type = CLONE_NEWNET, .get = netns_get, .put = netns_put, .install = netns_install, + .owner = netns_owner, }; #endif diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index ff9e5f20a5a7..f770dba2a6f6 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -15,6 +15,7 @@ TARGETS += memory-hotplug TARGETS += mount TARGETS += mqueue TARGETS += net +TARGETS += nsfs TARGETS += powerpc TARGETS += pstore TARGETS += ptrace diff --git a/tools/testing/selftests/nsfs/Makefile b/tools/testing/selftests/nsfs/Makefile new file mode 100644 index 000000000000..2306054a901a --- /dev/null +++ b/tools/testing/selftests/nsfs/Makefile @@ -0,0 +1,12 @@ +TEST_PROGS := owner pidns + +CFLAGS := -Wall -Werror + +all: owner pidns +owner: owner.c +pidns: pidns.c + +clean: + $(RM) owner pidns + +include ../lib.mk diff --git a/tools/testing/selftests/nsfs/owner.c b/tools/testing/selftests/nsfs/owner.c new file mode 100644 index 000000000000..437205f8b714 --- /dev/null +++ b/tools/testing/selftests/nsfs/owner.c @@ -0,0 +1,91 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NSIO 0xb7 +#define NS_GET_USERNS _IO(NSIO, 0x1) + +#define pr_err(fmt, ...) \ + ({ \ + fprintf(stderr, "%s:%d:" fmt ": %m\n", \ + __func__, __LINE__, ##__VA_ARGS__); \ + 1; \ + }) + +int main(int argc, char *argvp[]) +{ + int pfd[2], ns, uns, init_uns; + struct stat st1, st2; + char path[128]; + pid_t pid; + char c; + + if (pipe(pfd)) + return 1; + + pid = fork(); + if (pid < 0) + return pr_err("fork"); + if (pid == 0) { + prctl(PR_SET_PDEATHSIG, SIGKILL); + if (unshare(CLONE_NEWUTS | CLONE_NEWUSER)) + return pr_err("unshare"); + close(pfd[0]); + close(pfd[1]); + while (1) + sleep(1); + return 0; + } + close(pfd[1]); + if (read(pfd[0], &c, 1) != 0) + return pr_err("Unable to read from pipe"); + close(pfd[0]); + + snprintf(path, sizeof(path), "/proc/%d/ns/uts", pid); + ns = open(path, O_RDONLY); + if (ns < 0) + return pr_err("Unable to open %s", path); + + uns = ioctl(ns, NS_GET_USERNS); + if (uns < 0) + return pr_err("Unable to get an owning user namespace"); + + if (fstat(uns, &st1)) + return pr_err("fstat"); + + snprintf(path, sizeof(path), "/proc/%d/ns/user", pid); + if (stat(path, &st2)) + return pr_err("stat"); + + if (st1.st_ino != st2.st_ino) + return pr_err("NS_GET_USERNS returned a wrong namespace"); + + init_uns = ioctl(uns, NS_GET_USERNS); + if (uns < 0) + return pr_err("Unable to get an owning user namespace"); + + if (ioctl(init_uns, NS_GET_USERNS) >= 0 || errno != EPERM) + return pr_err("Don't get EPERM"); + + if (unshare(CLONE_NEWUSER)) + return pr_err("unshare"); + + if (ioctl(ns, NS_GET_USERNS) >= 0 || errno != EPERM) + return pr_err("Don't get EPERM"); + if (ioctl(init_uns, NS_GET_USERNS) >= 0 || errno != EPERM) + return pr_err("Don't get EPERM"); + + kill(pid, SIGKILL); + wait(NULL); + return 0; +} diff --git a/tools/testing/selftests/nsfs/pidns.c b/tools/testing/selftests/nsfs/pidns.c new file mode 100644 index 000000000000..ae3a0d68e966 --- /dev/null +++ b/tools/testing/selftests/nsfs/pidns.c @@ -0,0 +1,78 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define pr_err(fmt, ...) \ + ({ \ + fprintf(stderr, "%s:%d:" fmt ": %m\n", \ + __func__, __LINE__, ##__VA_ARGS__); \ + 1; \ + }) + +#define NSIO 0xb7 +#define NS_GET_USERNS _IO(NSIO, 0x1) +#define NS_GET_PARENT _IO(NSIO, 0x2) + +#define __stack_aligned__ __attribute__((aligned(16))) +struct cr_clone_arg { + char stack[128] __stack_aligned__; + char stack_ptr[0]; +}; + +static int child(void *args) +{ + prctl(PR_SET_PDEATHSIG, SIGKILL); + while (1) + sleep(1); + exit(0); +} + +int main(int argc, char *argv[]) +{ + char *ns_strs[] = {"pid", "user"}; + char path[] = "/proc/0123456789/ns/pid"; + struct cr_clone_arg ca; + struct stat st1, st2; + int ns, pns, i; + pid_t pid; + + pid = clone(child, ca.stack_ptr, CLONE_NEWUSER | CLONE_NEWPID | SIGCHLD, NULL); + if (pid < 0) + return pr_err("clone"); + + for (i = 0; i < 2; i++) { + snprintf(path, sizeof(path), "/proc/%d/ns/%s", pid, ns_strs[i]); + ns = open(path, O_RDONLY); + if (ns < 0) + return pr_err("Unable to open %s", path); + + pns = ioctl(ns, NS_GET_PARENT); + if (pns < 0) + return pr_err("Unable to get a parent pidns"); + + snprintf(path, sizeof(path), "/proc/self/ns/%s", ns_strs[i]); + if (stat(path, &st2)) + return pr_err("Unable to stat %s", path); + if (fstat(pns, &st1)) + return pr_err("Unable to stat the parent pidns"); + if (st1.st_ino != st2.st_ino) + return pr_err("NS_GET_PARENT returned a wrong namespace"); + + if (ioctl(pns, NS_GET_PARENT) >= 0 || errno != EPERM) + return pr_err("Don't get EPERM");; + } + + kill(pid, SIGKILL); + wait(NULL); + return 0; +}