linux/fs
Alexey Dobriyan 8bb2ee192e proc: fix coredump vs read /proc/*/stat race
do_task_stat() accesses IP and SP of a task without bumping reference
count of a stack (which became an entity with independent lifetime at
some point).

Steps to reproduce:

    #include <stdio.h>
    #include <sys/types.h>
    #include <sys/stat.h>
    #include <fcntl.h>
    #include <sys/time.h>
    #include <sys/resource.h>
    #include <unistd.h>
    #include <sys/wait.h>

    int main(void)
    {
    	setrlimit(RLIMIT_CORE, &(struct rlimit){});

    	while (1) {
    		char buf[64];
    		char buf2[4096];
    		pid_t pid;
    		int fd;

    		pid = fork();
    		if (pid == 0) {
    			*(volatile int *)0 = 0;
    		}

    		snprintf(buf, sizeof(buf), "/proc/%u/stat", pid);
    		fd = open(buf, O_RDONLY);
    		read(fd, buf2, sizeof(buf2));
    		close(fd);

    		waitpid(pid, NULL, 0);
    	}
    	return 0;
    }

    BUG: unable to handle kernel paging request at 0000000000003fd8
    IP: do_task_stat+0x8b4/0xaf0
    PGD 800000003d73e067 P4D 800000003d73e067 PUD 3d558067 PMD 0
    Oops: 0000 [#1] PREEMPT SMP PTI
    CPU: 0 PID: 1417 Comm: a.out Not tainted 4.15.0-rc8-dirty #2
    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1.fc27 04/01/2014
    RIP: 0010:do_task_stat+0x8b4/0xaf0
    Call Trace:
     proc_single_show+0x43/0x70
     seq_read+0xe6/0x3b0
     __vfs_read+0x1e/0x120
     vfs_read+0x84/0x110
     SyS_read+0x3d/0xa0
     entry_SYSCALL_64_fastpath+0x13/0x6c
    RIP: 0033:0x7f4d7928cba0
    RSP: 002b:00007ffddb245158 EFLAGS: 00000246
    Code: 03 b7 a0 01 00 00 4c 8b 4c 24 70 4c 8b 44 24 78 4c 89 74 24 18 e9 91 f9 ff ff f6 45 4d 02 0f 84 fd f7 ff ff 48 8b 45 40 48 89 ef <48> 8b 80 d8 3f 00 00 48 89 44 24 20 e8 9b 97 eb ff 48 89 44 24
    RIP: do_task_stat+0x8b4/0xaf0 RSP: ffffc90000607cc8
    CR2: 0000000000003fd8

John Ogness said: for my tests I added an else case to verify that the
race is hit and correctly mitigated.

Link: http://lkml.kernel.org/r/20180116175054.GA11513@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Reported-by: "Kohli, Gaurav" <gkohli@codeaurora.org>
Tested-by: John Ogness <john.ogness@linutronix.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-01-19 10:09:41 -08:00
..
9p
adfs
affs
afs afs: Fix missing error handling in afs_write_end() 2018-01-02 10:02:19 +00:00
autofs4 autofs: fix careless error in recent commit 2017-12-14 16:00:48 -08:00
befs
bfs
btrfs for-4.15-rc7-tag 2018-01-05 13:02:46 -08:00
cachefiles
ceph
cifs
coda
configfs
cramfs cramfs: fix MTD dependency 2017-12-17 12:20:58 -08:00
crypto
debugfs
devpts
dlm
ecryptfs
efivarfs
efs
exofs
exportfs
ext2
ext4 ext4: fix crash when a directory's i_size is too small 2017-12-11 15:00:57 -05:00
f2fs
fat
freevxfs
fscache
fuse
gfs2
hfs
hfsplus
hostfs
hpfs hpfs: don't bother with the i_version counter or f_version 2017-12-10 12:58:18 -08:00
hugetlbfs
isofs
jbd2
jffs2
jfs
kernfs
lockd
minix
ncpfs
nfs NFS client fixes for Linux 4.15-rc4 2017-12-16 13:12:53 -08:00
nfs_common
nfsd kernel: make groups_sort calling a responsibility group_info allocators 2017-12-14 16:00:49 -08:00
nilfs2
nls
notify
ntfs
ocfs2
omfs
openpromfs
orangefs
overlayfs ovl: fix overlay: warning prefix 2017-12-14 11:14:52 +01:00
proc proc: fix coredump vs read /proc/*/stat race 2018-01-19 10:09:41 -08:00
pstore
qnx4
qnx6
quota
ramfs
reiserfs
romfs
squashfs
sysfs
sysv
tracefs VFS: Don't use save/replace_mount_options if not using generic_show_options 2017-07-06 03:31:46 -04:00
ubifs
udf
ufs
xfs Changes since last update: 2018-01-05 12:59:32 -08:00
aio.c
anon_inodes.c
attr.c
bad_inode.c
binfmt_aout.c
binfmt_elf_fdpic.c
binfmt_elf.c
binfmt_em86.c fs/binfmt_em86.c: fix incompatible pointer type 2016-08-02 19:35:15 -04:00
binfmt_flat.c
binfmt_misc.c
binfmt_script.c
block_dev.c Merge branch 'for-4.15/block' of git://git.kernel.dk/linux-block 2017-11-14 15:32:19 -08:00
buffer.c
char_dev.c
compat_binfmt_elf.c
compat_ioctl.c
compat.c
coredump.c
dax.c Revert "mm: replace p??_write with pte_access_permitted in fault + gup paths" 2017-12-15 18:53:22 -08:00
dcache.c
dcookies.c
direct-io.c
drop_caches.c
eventfd.c
eventpoll.c
exec.c exec: Weaken dumpability for secureexec 2018-01-03 10:13:36 -08:00
fcntl.c
fhandle.c
file_table.c
file.c
filesystems.c
fs_pin.c
fs_struct.c
fs-writeback.c
inode.c
internal.h
ioctl.c
iomap.c
Kconfig
Kconfig.binfmt
libfs.c
locks.c
Makefile
mbcache.c
mount.h
mpage.c
namei.c
namespace.c
no-block.c
nsfs.c
open.c
pipe.c
pnode.c
pnode.h
posix_acl.c
proc_namespace.c
read_write.c
readdir.c
select.c
seq_file.c
signalfd.c
splice.c
stack.c
stat.c
statfs.c
super.c sget(): handle failures of register_shrinker() 2017-12-18 15:05:07 -05:00
sync.c
timerfd.c
userfaultfd.c userfaultfd: clear the vma->vm_userfaultfd_ctx if UFFD_EVENT_FORK fails 2018-01-04 16:45:09 -08:00
utimes.c
xattr.c