From a3a5c966a664dfde0393dd366c2cb916962d164f Mon Sep 17 00:00:00 2001 From: Seth Forshee Date: Fri, 22 Dec 2017 15:32:35 +0100 Subject: [PATCH 1/7] evm: Don't update hmacs in user ns mounts The kernel should not calculate new hmacs for mounts done by non-root users. Update evm_calc_hmac_or_hash() to refuse to calculate new hmacs for mounts for non-init user namespaces. Cc: linux-integrity@vger.kernel.org Cc: linux-security-module@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: James Morris Cc: Mimi Zohar Cc: "Serge E. Hallyn" Signed-off-by: Seth Forshee Signed-off-by: Dongsu Park Signed-off-by: Eric W. Biederman --- security/integrity/evm/evm_crypto.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c index a46fba322340..facf9cdd577d 100644 --- a/security/integrity/evm/evm_crypto.c +++ b/security/integrity/evm/evm_crypto.c @@ -200,7 +200,8 @@ static int evm_calc_hmac_or_hash(struct dentry *dentry, int size; bool ima_present = false; - if (!(inode->i_opflags & IOP_XATTR)) + if (!(inode->i_opflags & IOP_XATTR) || + inode->i_sb->s_user_ns != &init_user_ns) return -EOPNOTSUPP; desc = init_desc(type); From 593d1ce854dff93b3c9066e897192eb676b09c46 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 14 Sep 2017 12:07:32 -0500 Subject: [PATCH 2/7] vfs: Don't allow changing the link count of an inode with an invalid uid or gid Changing the link count of an inode via unlink or link will cause a write back of that inode. If the uids or gids are invalid (aka not known to the kernel) writing the inode back may change the uid or gid in the filesystem. To prevent possible filesystem and to avoid the need for filesystem maintainers to worry about it don't allow operations on inodes with an invalid uid or gid. Acked-by: Seth Forshee Signed-off-by: "Eric W. Biederman" --- fs/namei.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 186bd2464fd5..942c1f096f6b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -984,13 +984,15 @@ static bool safe_hardlink_source(struct inode *inode) */ static int may_linkat(struct path *link) { - struct inode *inode; + struct inode *inode = link->dentry->d_inode; + + /* Inode writeback is not safe when the uid or gid are invalid. */ + if (!uid_valid(inode->i_uid) || !gid_valid(inode->i_gid)) + return -EOVERFLOW; if (!sysctl_protected_hardlinks) return 0; - inode = link->dentry->d_inode; - /* Source inode owner (or CAP_FOWNER) can hardlink all they like, * otherwise, it must be a safe source. */ @@ -2749,6 +2751,11 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir) BUG_ON(!inode); BUG_ON(victim->d_parent->d_inode != dir); + + /* Inode writeback is not safe when the uid or gid are invalid. */ + if (!uid_valid(inode->i_uid) || !gid_valid(inode->i_gid)) + return -EOVERFLOW; + audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE); error = inode_permission(dir, MAY_WRITE | MAY_EXEC); From 55956b59df336f6738da916dbb520b6e37df9fbd Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 23 May 2018 15:24:18 -0500 Subject: [PATCH 3/7] vfs: Allow userns root to call mknod on owned filesystems. These filesystems already always set SB_I_NODEV so mknod will not be useful for gaining control of any devices no matter their permissions. This will allow overlayfs and applications like to fakeroot to use device nodes to represent things on disk. Acked-by: Seth Forshee Signed-off-by: "Eric W. Biederman" --- fs/namei.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/namei.c b/fs/namei.c index 942c1f096f6b..20335896dcce 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3679,7 +3679,8 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) if (error) return error; - if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) + if ((S_ISCHR(mode) || S_ISBLK(mode)) && + !ns_capable(dentry->d_sb->s_user_ns, CAP_MKNOD)) return -EPERM; if (!dir->i_op->mknod) From 0031181c49ca94b14b11f08e447f40c6ebc842a4 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 15 Oct 2016 18:36:59 -0500 Subject: [PATCH 4/7] fs: Allow superblock owner to replace invalid owners of inodes Allow users with CAP_SYS_CHOWN over the superblock of a filesystem to chown files when inode owner is invalid. Ordinarily the capable_wrt_inode_uidgid check is sufficient to allow access to files but when the underlying filesystem has uids or gids that don't map to the current user namespace it is not enough, so the chown permission checks need to be extended to allow this case. Calling chown on filesystem nodes whose uid or gid don't map is necessary if those nodes are going to be modified as writing back inodes which contain uids or gids that don't map is likely to cause filesystem corruption of the uid or gid fields. Once chown has been called the existing capable_wrt_inode_uidgid checks are sufficient to allow the owner of a superblock to do anything the global root user can do with an appropriate set of capabilities. An ordinary filesystem mountable by a userns root will limit all uids and gids in s_user_ns or the INVALID_UID and INVALID_GID to flag all others. So having this added permission limited to just INVALID_UID and INVALID_GID is sufficient to handle every case on an ordinary filesystem. Of the virtual filesystems at least proc is known to set s_user_ns to something other than &init_user_ns, while at the same time presenting some files owned by GLOBAL_ROOT_UID. Those files the mounter of proc in a user namespace should not be able to chown to get access to. Limiting the relaxation in permission to just the minimum of allowing changing INVALID_UID and INVALID_GID prevents problems with cases like that. The original version of this patch was written by: Seth Forshee. I have rewritten and rethought this patch enough so it's really not the same thing (certainly it needs a different description), but he deserves credit for getting out there and getting the conversation started, and finding the potential gotcha's and putting up with my semi-paranoid feedback. Inspired-by: Seth Forshee Acked-by: Seth Forshee Signed-off-by: Eric W. Biederman --- fs/attr.c | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/fs/attr.c b/fs/attr.c index 12ffdb6fb63c..d0b4d34878fb 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -18,6 +18,32 @@ #include #include +static bool chown_ok(const struct inode *inode, kuid_t uid) +{ + if (uid_eq(current_fsuid(), inode->i_uid) && + uid_eq(uid, inode->i_uid)) + return true; + if (capable_wrt_inode_uidgid(inode, CAP_CHOWN)) + return true; + if (uid_eq(inode->i_uid, INVALID_UID) && + ns_capable(inode->i_sb->s_user_ns, CAP_CHOWN)) + return true; + return false; +} + +static bool chgrp_ok(const struct inode *inode, kgid_t gid) +{ + if (uid_eq(current_fsuid(), inode->i_uid) && + (in_group_p(gid) || gid_eq(gid, inode->i_gid))) + return true; + if (capable_wrt_inode_uidgid(inode, CAP_CHOWN)) + return true; + if (gid_eq(inode->i_gid, INVALID_GID) && + ns_capable(inode->i_sb->s_user_ns, CAP_CHOWN)) + return true; + return false; +} + /** * setattr_prepare - check if attribute changes to a dentry are allowed * @dentry: dentry to check @@ -52,17 +78,11 @@ int setattr_prepare(struct dentry *dentry, struct iattr *attr) goto kill_priv; /* Make sure a caller can chown. */ - if ((ia_valid & ATTR_UID) && - (!uid_eq(current_fsuid(), inode->i_uid) || - !uid_eq(attr->ia_uid, inode->i_uid)) && - !capable_wrt_inode_uidgid(inode, CAP_CHOWN)) + if ((ia_valid & ATTR_UID) && !chown_ok(inode, attr->ia_uid)) return -EPERM; /* Make sure caller can chgrp. */ - if ((ia_valid & ATTR_GID) && - (!uid_eq(current_fsuid(), inode->i_uid) || - (!in_group_p(attr->ia_gid) && !gid_eq(attr->ia_gid, inode->i_gid))) && - !capable_wrt_inode_uidgid(inode, CAP_CHOWN)) + if ((ia_valid & ATTR_GID) && !chgrp_ok(inode, attr->ia_gid)) return -EPERM; /* Make sure a caller can chmod. */ From bc6155d1326092f4c29fe05a32b614249620d88e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 18 Sep 2017 17:58:08 -0500 Subject: [PATCH 5/7] fs: Allow superblock owner to access do_remount_sb() Superblock level remounts are currently restricted to global CAP_SYS_ADMIN, as is the path for changing the root mount to read only on umount. Loosen both of these permission checks to also allow CAP_SYS_ADMIN in any namespace which is privileged towards the userns which originally mounted the filesystem. Signed-off-by: Seth Forshee Acked-by: "Eric W. Biederman" Acked-by: Serge Hallyn Acked-by: Christian Brauner Signed-off-by: Eric W. Biederman --- fs/namespace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 5f75969adff1..8ddd14806799 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1590,7 +1590,7 @@ static int do_umount(struct mount *mnt, int flags) * Special case for "unmounting" root ... * we just try to remount it readonly. */ - if (!capable(CAP_SYS_ADMIN)) + if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) return -EPERM; down_write(&sb->s_umount); if (!sb_rdonly(sb)) @@ -2333,7 +2333,7 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags, down_write(&sb->s_umount); if (ms_flags & MS_BIND) err = change_mount_flags(path->mnt, ms_flags); - else if (!capable(CAP_SYS_ADMIN)) + else if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) err = -EPERM; else err = do_remount_sb(sb, sb_flags, data, 0); From b1d749c5c34112fab5902c43b2a37a0ba1e5f0f1 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 21 Apr 2017 19:14:32 -0500 Subject: [PATCH 6/7] capabilities: Allow privileged user in s_user_ns to set security.* xattrs A privileged user in s_user_ns will generally have the ability to manipulate the backing store and insert security.* xattrs into the filesystem directly. Therefore the kernel must be prepared to handle these xattrs from unprivileged mounts, and it makes little sense for commoncap to prevent writing these xattrs to the filesystem. The capability and LSM code have already been updated to appropriately handle xattrs from unprivileged mounts, so it is safe to loosen this restriction on setting xattrs. The exception to this logic is that writing xattrs to a mounted filesystem may also cause the LSM inode_post_setxattr or inode_setsecurity callbacks to be invoked. SELinux will deny the xattr update by virtue of applying mountpoint labeling to unprivileged userns mounts, and Smack will deny the writes for any user without global CAP_MAC_ADMIN, so loosening the capability check in commoncap is safe in this respect as well. Signed-off-by: Seth Forshee Acked-by: Serge Hallyn Acked-by: Christian Brauner Signed-off-by: Eric W. Biederman --- security/commoncap.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/security/commoncap.c b/security/commoncap.c index 1ce701fcb3f3..f4c33abd9959 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -919,6 +919,8 @@ int cap_bprm_set_creds(struct linux_binprm *bprm) int cap_inode_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { + struct user_namespace *user_ns = dentry->d_sb->s_user_ns; + /* Ignore non-security xattrs */ if (strncmp(name, XATTR_SECURITY_PREFIX, sizeof(XATTR_SECURITY_PREFIX) - 1) != 0) @@ -931,7 +933,7 @@ int cap_inode_setxattr(struct dentry *dentry, const char *name, if (strcmp(name, XATTR_NAME_CAPS) == 0) return 0; - if (!capable(CAP_SYS_ADMIN)) + if (!ns_capable(user_ns, CAP_SYS_ADMIN)) return -EPERM; return 0; } @@ -949,6 +951,8 @@ int cap_inode_setxattr(struct dentry *dentry, const char *name, */ int cap_inode_removexattr(struct dentry *dentry, const char *name) { + struct user_namespace *user_ns = dentry->d_sb->s_user_ns; + /* Ignore non-security xattrs */ if (strncmp(name, XATTR_SECURITY_PREFIX, sizeof(XATTR_SECURITY_PREFIX) - 1) != 0) @@ -964,7 +968,7 @@ int cap_inode_removexattr(struct dentry *dentry, const char *name) return 0; } - if (!capable(CAP_SYS_ADMIN)) + if (!ns_capable(user_ns, CAP_SYS_ADMIN)) return -EPERM; return 0; } From f3f1a18330ac1b717cd7a32adff38d965f365aa2 Mon Sep 17 00:00:00 2001 From: Seth Forshee Date: Sun, 15 Feb 2015 14:35:35 -0600 Subject: [PATCH 7/7] fs: Allow CAP_SYS_ADMIN in s_user_ns to freeze and thaw filesystems The user in control of a super block should be allowed to freeze and thaw it. Relax the restrictions on the FIFREEZE and FITHAW ioctls to require CAP_SYS_ADMIN in s_user_ns. Signed-off-by: Seth Forshee Acked-by: Christian Brauner Signed-off-by: Eric W. Biederman --- fs/ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ioctl.c b/fs/ioctl.c index 4823431d1c9d..b445b13fc59b 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -549,7 +549,7 @@ static int ioctl_fsfreeze(struct file *filp) { struct super_block *sb = file_inode(filp)->i_sb; - if (!capable(CAP_SYS_ADMIN)) + if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) return -EPERM; /* If filesystem doesn't support freeze feature, return. */ @@ -566,7 +566,7 @@ static int ioctl_fsthaw(struct file *filp) { struct super_block *sb = file_inode(filp)->i_sb; - if (!capable(CAP_SYS_ADMIN)) + if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) return -EPERM; /* Thaw */