aio: simplify - and fix - fget/fput for io_submit()

Al Viro root-caused a race where the IOCB_CMD_POLL handling of
fget/fput() could cause us to access the file pointer after it had
already been freed:

 "In more details - normally IOCB_CMD_POLL handling looks so:

   1) io_submit(2) allocates aio_kiocb instance and passes it to
      aio_poll()

   2) aio_poll() resolves the descriptor to struct file by req->file =
      fget(iocb->aio_fildes)

   3) aio_poll() sets ->woken to false and raises ->ki_refcnt of that
      aio_kiocb to 2 (bumps by 1, that is).

   4) aio_poll() calls vfs_poll(). After sanity checks (basically,
      "poll_wait() had been called and only once") it locks the queue.
      That's what the extra reference to iocb had been for - we know we
      can safely access it.

   5) With queue locked, we check if ->woken has already been set to
      true (by aio_poll_wake()) and, if it had been, we unlock the
      queue, drop a reference to aio_kiocb and bugger off - at that
      point it's a responsibility to aio_poll_wake() and the stuff
      called/scheduled by it. That code will drop the reference to file
      in req->file, along with the other reference to our aio_kiocb.

   6) otherwise, we see whether we need to wait. If we do, we unlock the
      queue, drop one reference to aio_kiocb and go away - eventual
      wakeup (or cancel) will deal with the reference to file and with
      the other reference to aio_kiocb

   7) otherwise we remove ourselves from waitqueue (still under the
      queue lock), so that wakeup won't get us. No async activity will
      be happening, so we can safely drop req->file and iocb ourselves.

  If wakeup happens while we are in vfs_poll(), we are fine - aio_kiocb
  won't get freed under us, so we can do all the checks and locking
  safely. And we don't touch ->file if we detect that case.

  However, vfs_poll() most certainly *does* touch the file it had been
  given. So wakeup coming while we are still in ->poll() might end up
  doing fput() on that file. That case is not too rare, and usually we
  are saved by the still present reference from descriptor table - that
  fput() is not the final one.

  But if another thread closes that descriptor right after our fget()
  and wakeup does happen before ->poll() returns, we are in trouble -
  final fput() done while we are in the middle of a method:

Al also wrote a patch to take an extra reference to the file descriptor
to fix this, but I instead suggested we just streamline the whole file
pointer handling by submit_io() so that the generic aio submission code
simply keeps the file pointer around until the aio has completed.

Fixes: bfe4037e72 ("aio: implement IOCB_CMD_POLL")
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Reported-by: syzbot+503d4cc169fcec1cb18c@syzkaller.appspotmail.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Linus Torvalds 2019-03-03 14:23:33 -08:00
parent 00c42373d3
commit 84c4e1f89f
2 changed files with 36 additions and 44 deletions

View File

@ -167,9 +167,13 @@ struct kioctx {
unsigned id; unsigned id;
}; };
/*
* First field must be the file pointer in all the
* iocb unions! See also 'struct kiocb' in <linux/fs.h>
*/
struct fsync_iocb { struct fsync_iocb {
struct work_struct work;
struct file *file; struct file *file;
struct work_struct work;
bool datasync; bool datasync;
}; };
@ -183,8 +187,15 @@ struct poll_iocb {
struct work_struct work; struct work_struct work;
}; };
/*
* NOTE! Each of the iocb union members has the file pointer
* as the first entry in their struct definition. So you can
* access the file pointer through any of the sub-structs,
* or directly as just 'ki_filp' in this struct.
*/
struct aio_kiocb { struct aio_kiocb {
union { union {
struct file *ki_filp;
struct kiocb rw; struct kiocb rw;
struct fsync_iocb fsync; struct fsync_iocb fsync;
struct poll_iocb poll; struct poll_iocb poll;
@ -1060,6 +1071,8 @@ static inline void iocb_put(struct aio_kiocb *iocb)
{ {
if (refcount_read(&iocb->ki_refcnt) == 0 || if (refcount_read(&iocb->ki_refcnt) == 0 ||
refcount_dec_and_test(&iocb->ki_refcnt)) { refcount_dec_and_test(&iocb->ki_refcnt)) {
if (iocb->ki_filp)
fput(iocb->ki_filp);
percpu_ref_put(&iocb->ki_ctx->reqs); percpu_ref_put(&iocb->ki_ctx->reqs);
kmem_cache_free(kiocb_cachep, iocb); kmem_cache_free(kiocb_cachep, iocb);
} }
@ -1424,7 +1437,6 @@ static void aio_complete_rw(struct kiocb *kiocb, long res, long res2)
file_end_write(kiocb->ki_filp); file_end_write(kiocb->ki_filp);
} }
fput(kiocb->ki_filp);
aio_complete(iocb, res, res2); aio_complete(iocb, res, res2);
} }
@ -1432,9 +1444,6 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
{ {
int ret; int ret;
req->ki_filp = fget(iocb->aio_fildes);
if (unlikely(!req->ki_filp))
return -EBADF;
req->ki_complete = aio_complete_rw; req->ki_complete = aio_complete_rw;
req->private = NULL; req->private = NULL;
req->ki_pos = iocb->aio_offset; req->ki_pos = iocb->aio_offset;
@ -1451,7 +1460,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
ret = ioprio_check_cap(iocb->aio_reqprio); ret = ioprio_check_cap(iocb->aio_reqprio);
if (ret) { if (ret) {
pr_debug("aio ioprio check cap error: %d\n", ret); pr_debug("aio ioprio check cap error: %d\n", ret);
goto out_fput; return ret;
} }
req->ki_ioprio = iocb->aio_reqprio; req->ki_ioprio = iocb->aio_reqprio;
@ -1460,14 +1469,10 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags); ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags);
if (unlikely(ret)) if (unlikely(ret))
goto out_fput; return ret;
req->ki_flags &= ~IOCB_HIPRI; /* no one is going to poll for this I/O */ req->ki_flags &= ~IOCB_HIPRI; /* no one is going to poll for this I/O */
return 0; return 0;
out_fput:
fput(req->ki_filp);
return ret;
} }
static int aio_setup_rw(int rw, const struct iocb *iocb, struct iovec **iovec, static int aio_setup_rw(int rw, const struct iocb *iocb, struct iovec **iovec,
@ -1521,24 +1526,19 @@ static ssize_t aio_read(struct kiocb *req, const struct iocb *iocb,
if (ret) if (ret)
return ret; return ret;
file = req->ki_filp; file = req->ki_filp;
ret = -EBADF;
if (unlikely(!(file->f_mode & FMODE_READ))) if (unlikely(!(file->f_mode & FMODE_READ)))
goto out_fput; return -EBADF;
ret = -EINVAL; ret = -EINVAL;
if (unlikely(!file->f_op->read_iter)) if (unlikely(!file->f_op->read_iter))
goto out_fput; return -EINVAL;
ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter); ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter);
if (ret) if (ret)
goto out_fput; return ret;
ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter)); ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter));
if (!ret) if (!ret)
aio_rw_done(req, call_read_iter(file, req, &iter)); aio_rw_done(req, call_read_iter(file, req, &iter));
kfree(iovec); kfree(iovec);
out_fput:
if (unlikely(ret))
fput(file);
return ret; return ret;
} }
@ -1555,16 +1555,14 @@ static ssize_t aio_write(struct kiocb *req, const struct iocb *iocb,
return ret; return ret;
file = req->ki_filp; file = req->ki_filp;
ret = -EBADF;
if (unlikely(!(file->f_mode & FMODE_WRITE))) if (unlikely(!(file->f_mode & FMODE_WRITE)))
goto out_fput; return -EBADF;
ret = -EINVAL;
if (unlikely(!file->f_op->write_iter)) if (unlikely(!file->f_op->write_iter))
goto out_fput; return -EINVAL;
ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter); ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter);
if (ret) if (ret)
goto out_fput; return ret;
ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter)); ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter));
if (!ret) { if (!ret) {
/* /*
@ -1582,9 +1580,6 @@ static ssize_t aio_write(struct kiocb *req, const struct iocb *iocb,
aio_rw_done(req, call_write_iter(file, req, &iter)); aio_rw_done(req, call_write_iter(file, req, &iter));
} }
kfree(iovec); kfree(iovec);
out_fput:
if (unlikely(ret))
fput(file);
return ret; return ret;
} }
@ -1594,7 +1589,6 @@ static void aio_fsync_work(struct work_struct *work)
int ret; int ret;
ret = vfs_fsync(req->file, req->datasync); ret = vfs_fsync(req->file, req->datasync);
fput(req->file);
aio_complete(container_of(req, struct aio_kiocb, fsync), ret, 0); aio_complete(container_of(req, struct aio_kiocb, fsync), ret, 0);
} }
@ -1605,13 +1599,8 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
iocb->aio_rw_flags)) iocb->aio_rw_flags))
return -EINVAL; return -EINVAL;
req->file = fget(iocb->aio_fildes); if (unlikely(!req->file->f_op->fsync))
if (unlikely(!req->file))
return -EBADF;
if (unlikely(!req->file->f_op->fsync)) {
fput(req->file);
return -EINVAL; return -EINVAL;
}
req->datasync = datasync; req->datasync = datasync;
INIT_WORK(&req->work, aio_fsync_work); INIT_WORK(&req->work, aio_fsync_work);
@ -1621,10 +1610,7 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
static inline void aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask) static inline void aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask)
{ {
struct file *file = iocb->poll.file;
aio_complete(iocb, mangle_poll(mask), 0); aio_complete(iocb, mangle_poll(mask), 0);
fput(file);
} }
static void aio_poll_complete_work(struct work_struct *work) static void aio_poll_complete_work(struct work_struct *work)
@ -1743,9 +1729,6 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
INIT_WORK(&req->work, aio_poll_complete_work); INIT_WORK(&req->work, aio_poll_complete_work);
req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP; req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
req->file = fget(iocb->aio_fildes);
if (unlikely(!req->file))
return -EBADF;
req->head = NULL; req->head = NULL;
req->woken = false; req->woken = false;
@ -1788,10 +1771,8 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
spin_unlock_irq(&ctx->ctx_lock); spin_unlock_irq(&ctx->ctx_lock);
out: out:
if (unlikely(apt.error)) { if (unlikely(apt.error))
fput(req->file);
return apt.error; return apt.error;
}
if (mask) if (mask)
aio_poll_complete(aiocb, mask); aio_poll_complete(aiocb, mask);
@ -1829,6 +1810,11 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
if (unlikely(!req)) if (unlikely(!req))
goto out_put_reqs_available; goto out_put_reqs_available;
req->ki_filp = fget(iocb->aio_fildes);
ret = -EBADF;
if (unlikely(!req->ki_filp))
goto out_put_req;
if (iocb->aio_flags & IOCB_FLAG_RESFD) { if (iocb->aio_flags & IOCB_FLAG_RESFD) {
/* /*
* If the IOCB_FLAG_RESFD flag of aio_flags is set, get an * If the IOCB_FLAG_RESFD flag of aio_flags is set, get an

View File

@ -304,13 +304,19 @@ enum rw_hint {
struct kiocb { struct kiocb {
struct file *ki_filp; struct file *ki_filp;
/* The 'ki_filp' pointer is shared in a union for aio */
randomized_struct_fields_start
loff_t ki_pos; loff_t ki_pos;
void (*ki_complete)(struct kiocb *iocb, long ret, long ret2); void (*ki_complete)(struct kiocb *iocb, long ret, long ret2);
void *private; void *private;
int ki_flags; int ki_flags;
u16 ki_hint; u16 ki_hint;
u16 ki_ioprio; /* See linux/ioprio.h */ u16 ki_ioprio; /* See linux/ioprio.h */
} __randomize_layout;
randomized_struct_fields_end
};
static inline bool is_sync_kiocb(struct kiocb *kiocb) static inline bool is_sync_kiocb(struct kiocb *kiocb)
{ {