userfaultfd: prevent concurrent API initialization
[ Upstream commit 22e5fe2a2a279d9a6fcbdfb4dffe73821bef1c90 ]
userfaultfd assumes that the enabled features are set once and never
changed after UFFDIO_API ioctl succeeded.
However, currently, UFFDIO_API can be called concurrently from two
different threads, succeed on both threads and leave userfaultfd's
features in non-deterministic state. Theoretically, other uffd operations
(ioctl's and page-faults) can be dispatched while adversely affected by
such changes of features.
Moreover, the writes to ctx->state and ctx->features are not ordered,
which can - theoretically, again - let userfaultfd_ioctl() think that
userfaultfd API completed, while the features are still not initialized.
To avoid races, it is arguably best to get rid of ctx->state. Since there
are only 2 states, record the API initialization in ctx->features as the
uppermost bit and remove ctx->state.
Link: https://lkml.kernel.org/r/20210808020724.1022515-3-namit@vmware.com
Fixes: 9cd75c3cd4
("userfaultfd: non-cooperative: add ability to report non-PF events from uffd descriptor")
Signed-off-by: Nadav Amit <namit@vmware.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Peter Xu <peterx@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
parent
7bf2913a5b
commit
d766826eee
@ -32,11 +32,6 @@ int sysctl_unprivileged_userfaultfd __read_mostly = 1;
|
|||||||
|
|
||||||
static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly;
|
static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly;
|
||||||
|
|
||||||
enum userfaultfd_state {
|
|
||||||
UFFD_STATE_WAIT_API,
|
|
||||||
UFFD_STATE_RUNNING,
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Start with fault_pending_wqh and fault_wqh so they're more likely
|
* Start with fault_pending_wqh and fault_wqh so they're more likely
|
||||||
* to be in the same cacheline.
|
* to be in the same cacheline.
|
||||||
@ -68,8 +63,6 @@ struct userfaultfd_ctx {
|
|||||||
unsigned int flags;
|
unsigned int flags;
|
||||||
/* features requested from the userspace */
|
/* features requested from the userspace */
|
||||||
unsigned int features;
|
unsigned int features;
|
||||||
/* state machine */
|
|
||||||
enum userfaultfd_state state;
|
|
||||||
/* released */
|
/* released */
|
||||||
bool released;
|
bool released;
|
||||||
/* memory mappings are changing because of non-cooperative event */
|
/* memory mappings are changing because of non-cooperative event */
|
||||||
@ -103,6 +96,14 @@ struct userfaultfd_wake_range {
|
|||||||
unsigned long len;
|
unsigned long len;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* internal indication that UFFD_API ioctl was successfully executed */
|
||||||
|
#define UFFD_FEATURE_INITIALIZED (1u << 31)
|
||||||
|
|
||||||
|
static bool userfaultfd_is_initialized(struct userfaultfd_ctx *ctx)
|
||||||
|
{
|
||||||
|
return ctx->features & UFFD_FEATURE_INITIALIZED;
|
||||||
|
}
|
||||||
|
|
||||||
static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode,
|
static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode,
|
||||||
int wake_flags, void *key)
|
int wake_flags, void *key)
|
||||||
{
|
{
|
||||||
@ -699,7 +700,6 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
|
|||||||
|
|
||||||
refcount_set(&ctx->refcount, 1);
|
refcount_set(&ctx->refcount, 1);
|
||||||
ctx->flags = octx->flags;
|
ctx->flags = octx->flags;
|
||||||
ctx->state = UFFD_STATE_RUNNING;
|
|
||||||
ctx->features = octx->features;
|
ctx->features = octx->features;
|
||||||
ctx->released = false;
|
ctx->released = false;
|
||||||
ctx->mmap_changing = false;
|
ctx->mmap_changing = false;
|
||||||
@ -980,10 +980,9 @@ static __poll_t userfaultfd_poll(struct file *file, poll_table *wait)
|
|||||||
|
|
||||||
poll_wait(file, &ctx->fd_wqh, wait);
|
poll_wait(file, &ctx->fd_wqh, wait);
|
||||||
|
|
||||||
switch (ctx->state) {
|
if (!userfaultfd_is_initialized(ctx))
|
||||||
case UFFD_STATE_WAIT_API:
|
|
||||||
return EPOLLERR;
|
return EPOLLERR;
|
||||||
case UFFD_STATE_RUNNING:
|
|
||||||
/*
|
/*
|
||||||
* poll() never guarantees that read won't block.
|
* poll() never guarantees that read won't block.
|
||||||
* userfaults can be waken before they're read().
|
* userfaults can be waken before they're read().
|
||||||
@ -1008,10 +1007,6 @@ static __poll_t userfaultfd_poll(struct file *file, poll_table *wait)
|
|||||||
ret = EPOLLIN;
|
ret = EPOLLIN;
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
default:
|
|
||||||
WARN_ON_ONCE(1);
|
|
||||||
return EPOLLERR;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static const struct file_operations userfaultfd_fops;
|
static const struct file_operations userfaultfd_fops;
|
||||||
@ -1205,7 +1200,7 @@ static ssize_t userfaultfd_read(struct file *file, char __user *buf,
|
|||||||
struct uffd_msg msg;
|
struct uffd_msg msg;
|
||||||
int no_wait = file->f_flags & O_NONBLOCK;
|
int no_wait = file->f_flags & O_NONBLOCK;
|
||||||
|
|
||||||
if (ctx->state == UFFD_STATE_WAIT_API)
|
if (!userfaultfd_is_initialized(ctx))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
@ -1807,9 +1802,10 @@ out:
|
|||||||
static inline unsigned int uffd_ctx_features(__u64 user_features)
|
static inline unsigned int uffd_ctx_features(__u64 user_features)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* For the current set of features the bits just coincide
|
* For the current set of features the bits just coincide. Set
|
||||||
|
* UFFD_FEATURE_INITIALIZED to mark the features as enabled.
|
||||||
*/
|
*/
|
||||||
return (unsigned int)user_features;
|
return (unsigned int)user_features | UFFD_FEATURE_INITIALIZED;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1822,12 +1818,10 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
|
|||||||
{
|
{
|
||||||
struct uffdio_api uffdio_api;
|
struct uffdio_api uffdio_api;
|
||||||
void __user *buf = (void __user *)arg;
|
void __user *buf = (void __user *)arg;
|
||||||
|
unsigned int ctx_features;
|
||||||
int ret;
|
int ret;
|
||||||
__u64 features;
|
__u64 features;
|
||||||
|
|
||||||
ret = -EINVAL;
|
|
||||||
if (ctx->state != UFFD_STATE_WAIT_API)
|
|
||||||
goto out;
|
|
||||||
ret = -EFAULT;
|
ret = -EFAULT;
|
||||||
if (copy_from_user(&uffdio_api, buf, sizeof(uffdio_api)))
|
if (copy_from_user(&uffdio_api, buf, sizeof(uffdio_api)))
|
||||||
goto out;
|
goto out;
|
||||||
@ -1844,9 +1838,13 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
|
|||||||
ret = -EFAULT;
|
ret = -EFAULT;
|
||||||
if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api)))
|
if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api)))
|
||||||
goto out;
|
goto out;
|
||||||
ctx->state = UFFD_STATE_RUNNING;
|
|
||||||
/* only enable the requested features for this uffd context */
|
/* only enable the requested features for this uffd context */
|
||||||
ctx->features = uffd_ctx_features(features);
|
ctx_features = uffd_ctx_features(features);
|
||||||
|
ret = -EINVAL;
|
||||||
|
if (cmpxchg(&ctx->features, 0, ctx_features) != 0)
|
||||||
|
goto err_out;
|
||||||
|
|
||||||
ret = 0;
|
ret = 0;
|
||||||
out:
|
out:
|
||||||
return ret;
|
return ret;
|
||||||
@ -1863,7 +1861,7 @@ static long userfaultfd_ioctl(struct file *file, unsigned cmd,
|
|||||||
int ret = -EINVAL;
|
int ret = -EINVAL;
|
||||||
struct userfaultfd_ctx *ctx = file->private_data;
|
struct userfaultfd_ctx *ctx = file->private_data;
|
||||||
|
|
||||||
if (cmd != UFFDIO_API && ctx->state == UFFD_STATE_WAIT_API)
|
if (cmd != UFFDIO_API && !userfaultfd_is_initialized(ctx))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
switch(cmd) {
|
switch(cmd) {
|
||||||
@ -1964,7 +1962,6 @@ SYSCALL_DEFINE1(userfaultfd, int, flags)
|
|||||||
refcount_set(&ctx->refcount, 1);
|
refcount_set(&ctx->refcount, 1);
|
||||||
ctx->flags = flags;
|
ctx->flags = flags;
|
||||||
ctx->features = 0;
|
ctx->features = 0;
|
||||||
ctx->state = UFFD_STATE_WAIT_API;
|
|
||||||
ctx->released = false;
|
ctx->released = false;
|
||||||
ctx->mmap_changing = false;
|
ctx->mmap_changing = false;
|
||||||
ctx->mm = current->mm;
|
ctx->mm = current->mm;
|
||||||
|
Loading…
Reference in New Issue
Block a user