virtiofsd: introduce inode refcount to prevent use-after-free

If thread A is using an inode it must not be deleted by thread B when
processing a FUSE_FORGET request.

The FUSE protocol itself already has a counter called nlookup that is
used in FUSE_FORGET messages.  We cannot trust this counter since the
untrusted client can manipulate it via FUSE_FORGET messages.

Introduce a new refcount to keep inodes alive for the required lifespan.
lo_inode_put() must be called to release a reference.  FUSE's nlookup
counter holds exactly one reference so that the inode stays alive as
long as the client still wants to remember it.

Note that the lo_inode->is_symlink field is moved to avoid creating a
hole in the struct due to struct field alignment.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Reviewed-by: Sergio Lopez <slp@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
This commit is contained in:
Stefan Hajnoczi 2019-07-31 17:10:06 +01:00 committed by Dr. David Alan Gilbert
parent 9257e514d8
commit c241aa9457
1 changed files with 146 additions and 23 deletions

View File

@ -97,7 +97,13 @@ struct lo_key {
struct lo_inode {
int fd;
bool is_symlink;
/*
* Atomic reference count for this object. The nlookup field holds a
* reference and release it when nlookup reaches 0.
*/
gint refcount;
struct lo_key key;
/*
@ -116,6 +122,8 @@ struct lo_inode {
fuse_ino_t fuse_ino;
pthread_mutex_t plock_mutex;
GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */
bool is_symlink;
};
struct lo_cred {
@ -471,6 +479,23 @@ static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode)
return elem - lo_data(req)->ino_map.elems;
}
static void lo_inode_put(struct lo_data *lo, struct lo_inode **inodep)
{
struct lo_inode *inode = *inodep;
if (!inode) {
return;
}
*inodep = NULL;
if (g_atomic_int_dec_and_test(&inode->refcount)) {
close(inode->fd);
free(inode);
}
}
/* Caller must release refcount using lo_inode_put() */
static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
{
struct lo_data *lo = lo_data(req);
@ -478,6 +503,9 @@ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
pthread_mutex_lock(&lo->mutex);
elem = lo_map_get(&lo->ino_map, ino);
if (elem) {
g_atomic_int_inc(&elem->inode->refcount);
}
pthread_mutex_unlock(&lo->mutex);
if (!elem) {
@ -487,10 +515,23 @@ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
return elem->inode;
}
/*
* TODO Remove this helper and force callers to hold an inode refcount until
* they are done with the fd. This will be done in a later patch to make
* review easier.
*/
static int lo_fd(fuse_req_t req, fuse_ino_t ino)
{
struct lo_inode *inode = lo_inode(req, ino);
return inode ? inode->fd : -1;
int fd;
if (!inode) {
return -1;
}
fd = inode->fd;
lo_inode_put(lo_data(req), &inode);
return fd;
}
static void lo_init(void *userdata, struct fuse_conn_info *conn)
@ -545,6 +586,10 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
fuse_reply_attr(req, &buf, lo->timeout);
}
/*
* Increments parent->nlookup and caller must release refcount using
* lo_inode_put(&parent).
*/
static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode,
char path[PATH_MAX], struct lo_inode **parent)
{
@ -582,6 +627,7 @@ retry:
p = &lo->root;
pthread_mutex_lock(&lo->mutex);
p->nlookup++;
g_atomic_int_inc(&p->refcount);
pthread_mutex_unlock(&lo->mutex);
} else {
*last = '\0';
@ -625,6 +671,7 @@ retry:
fail_unref:
unref_inode_lolocked(lo, p, 1);
lo_inode_put(lo, &p);
fail:
if (retries) {
retries--;
@ -663,6 +710,7 @@ fallback:
if (res != -1) {
res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW);
unref_inode_lolocked(lo, parent, 1);
lo_inode_put(lo, &parent);
}
return res;
@ -780,11 +828,13 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
goto out_err;
}
}
lo_inode_put(lo, &inode);
return lo_getattr(req, ino, fi);
out_err:
saverr = errno;
lo_inode_put(lo, &inode);
fuse_reply_err(req, saverr);
}
@ -801,6 +851,7 @@ static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st)
if (p) {
assert(p->nlookup > 0);
p->nlookup++;
g_atomic_int_inc(&p->refcount);
}
pthread_mutex_unlock(&lo->mutex);
@ -820,6 +871,10 @@ static void posix_locks_value_destroy(gpointer data)
free(plock);
}
/*
* Increments nlookup and caller must release refcount using
* lo_inode_put(&parent).
*/
static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
struct fuse_entry_param *e)
{
@ -827,7 +882,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
int res;
int saverr;
struct lo_data *lo = lo_data(req);
struct lo_inode *inode, *dir = lo_inode(req, parent);
struct lo_inode *inode = NULL;
struct lo_inode *dir = lo_inode(req, parent);
/*
* name_to_handle_at() and open_by_handle_at() can reach here with fuse
@ -868,6 +924,13 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
}
inode->is_symlink = S_ISLNK(e->attr.st_mode);
/*
* One for the caller and one for nlookup (released in
* unref_inode_lolocked())
*/
g_atomic_int_set(&inode->refcount, 2);
inode->nlookup = 1;
inode->fd = newfd;
newfd = -1;
@ -883,6 +946,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
pthread_mutex_unlock(&lo->mutex);
}
e->ino = inode->fuse_ino;
lo_inode_put(lo, &inode);
lo_inode_put(lo, &dir);
fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent,
name, (unsigned long long)e->ino);
@ -894,6 +959,8 @@ out_err:
if (newfd != -1) {
close(newfd);
}
lo_inode_put(lo, &inode);
lo_inode_put(lo, &dir);
return saverr;
}
@ -991,6 +1058,7 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent,
{
int res;
int saverr;
struct lo_data *lo = lo_data(req);
struct lo_inode *dir;
struct fuse_entry_param e;
struct lo_cred old = {};
@ -1032,9 +1100,11 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent,
name, (unsigned long long)e.ino);
fuse_reply_entry(req, &e);
lo_inode_put(lo, &dir);
return;
out:
lo_inode_put(lo, &dir);
fuse_reply_err(req, saverr);
}
@ -1085,6 +1155,7 @@ fallback:
if (res != -1) {
res = linkat(parent->fd, path, dfd, name, 0);
unref_inode_lolocked(lo, parent, 1);
lo_inode_put(lo, &parent);
}
return res;
@ -1095,6 +1166,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
{
int res;
struct lo_data *lo = lo_data(req);
struct lo_inode *parent_inode;
struct lo_inode *inode;
struct fuse_entry_param e;
int saverr;
@ -1104,17 +1176,18 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
return;
}
parent_inode = lo_inode(req, parent);
inode = lo_inode(req, ino);
if (!inode) {
fuse_reply_err(req, EBADF);
return;
if (!parent_inode || !inode) {
errno = EBADF;
goto out_err;
}
memset(&e, 0, sizeof(struct fuse_entry_param));
e.attr_timeout = lo->timeout;
e.entry_timeout = lo->timeout;
res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name);
res = linkat_empty_nofollow(lo, inode, parent_inode->fd, name);
if (res == -1) {
goto out_err;
}
@ -1133,13 +1206,18 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
name, (unsigned long long)e.ino);
fuse_reply_entry(req, &e);
lo_inode_put(lo, &parent_inode);
lo_inode_put(lo, &inode);
return;
out_err:
saverr = errno;
lo_inode_put(lo, &parent_inode);
lo_inode_put(lo, &inode);
fuse_reply_err(req, saverr);
}
/* Increments nlookup and caller must release refcount using lo_inode_put() */
static struct lo_inode *lookup_name(fuse_req_t req, fuse_ino_t parent,
const char *name)
{
@ -1176,6 +1254,7 @@ static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name)
fuse_reply_err(req, res == -1 ? errno : 0);
unref_inode_lolocked(lo, inode, 1);
lo_inode_put(lo, &inode);
}
static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
@ -1183,8 +1262,10 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
unsigned int flags)
{
int res;
struct lo_inode *oldinode;
struct lo_inode *newinode;
struct lo_inode *parent_inode;
struct lo_inode *newparent_inode;
struct lo_inode *oldinode = NULL;
struct lo_inode *newinode = NULL;
struct lo_data *lo = lo_data(req);
if (!is_safe_path_component(name) || !is_safe_path_component(newname)) {
@ -1192,6 +1273,13 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
return;
}
parent_inode = lo_inode(req, parent);
newparent_inode = lo_inode(req, newparent);
if (!parent_inode || !newparent_inode) {
fuse_reply_err(req, EBADF);
goto out;
}
oldinode = lookup_name(req, parent, name);
newinode = lookup_name(req, newparent, newname);
@ -1204,8 +1292,8 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
#ifndef SYS_renameat2
fuse_reply_err(req, EINVAL);
#else
res = syscall(SYS_renameat2, lo_fd(req, parent), name,
lo_fd(req, newparent), newname, flags);
res = syscall(SYS_renameat2, parent_inode->fd, name,
newparent_inode->fd, newname, flags);
if (res == -1 && errno == ENOSYS) {
fuse_reply_err(req, EINVAL);
} else {
@ -1215,12 +1303,16 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
goto out;
}
res = renameat(lo_fd(req, parent), name, lo_fd(req, newparent), newname);
res = renameat(parent_inode->fd, name, newparent_inode->fd, newname);
fuse_reply_err(req, res == -1 ? errno : 0);
out:
unref_inode_lolocked(lo, oldinode, 1);
unref_inode_lolocked(lo, newinode, 1);
lo_inode_put(lo, &oldinode);
lo_inode_put(lo, &newinode);
lo_inode_put(lo, &parent_inode);
lo_inode_put(lo, &newparent_inode);
}
static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name)
@ -1244,6 +1336,7 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name)
fuse_reply_err(req, res == -1 ? errno : 0);
unref_inode_lolocked(lo, inode, 1);
lo_inode_put(lo, &inode);
}
static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode,
@ -1265,8 +1358,9 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode,
g_hash_table_destroy(inode->posix_locks);
pthread_mutex_destroy(&inode->plock_mutex);
pthread_mutex_unlock(&lo->mutex);
close(inode->fd);
free(inode);
/* Drop our refcount from lo_do_lookup() */
lo_inode_put(lo, &inode);
} else {
pthread_mutex_unlock(&lo->mutex);
}
@ -1280,6 +1374,7 @@ static int unref_all_inodes_cb(gpointer key, gpointer value, gpointer user_data)
inode->nlookup = 0;
lo_map_remove(&lo->ino_map, inode->fuse_ino);
close(inode->fd);
lo_inode_put(lo, &inode); /* Drop our refcount from lo_do_lookup() */
return TRUE;
}
@ -1306,6 +1401,7 @@ static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup)
(unsigned long long)nlookup);
unref_inode_lolocked(lo, inode, nlookup);
lo_inode_put(lo, &inode);
}
static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup)
@ -1537,6 +1633,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
err = 0;
error:
lo_dirp_put(&d);
lo_inode_put(lo, &dinode);
/*
* If there's an error, we can only signal it if we haven't stored
@ -1595,6 +1692,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
{
int fd;
struct lo_data *lo = lo_data(req);
struct lo_inode *parent_inode;
struct fuse_entry_param e;
int err;
struct lo_cred old = {};
@ -1607,12 +1705,18 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
return;
}
parent_inode = lo_inode(req, parent);
if (!parent_inode) {
fuse_reply_err(req, EBADF);
return;
}
err = lo_change_cred(req, &old);
if (err) {
goto out;
}
fd = openat(lo_fd(req, parent), name, (fi->flags | O_CREAT) & ~O_NOFOLLOW,
fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW,
mode);
err = fd == -1 ? errno : 0;
lo_restore_cred(&old);
@ -1625,8 +1729,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
pthread_mutex_unlock(&lo->mutex);
if (fh == -1) {
close(fd);
fuse_reply_err(req, ENOMEM);
return;
err = ENOMEM;
goto out;
}
fi->fh = fh;
@ -1639,6 +1743,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
}
out:
lo_inode_put(lo, &parent_inode);
if (err) {
fuse_reply_err(req, err);
} else {
@ -1712,16 +1818,18 @@ static void lo_getlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
plock =
lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret);
if (!plock) {
pthread_mutex_unlock(&inode->plock_mutex);
fuse_reply_err(req, ret);
return;
saverr = ret;
goto out;
}
ret = fcntl(plock->fd, F_OFD_GETLK, lock);
if (ret == -1) {
saverr = errno;
}
out:
pthread_mutex_unlock(&inode->plock_mutex);
lo_inode_put(lo, &inode);
if (saverr) {
fuse_reply_err(req, saverr);
@ -1761,9 +1869,8 @@ static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret);
if (!plock) {
pthread_mutex_unlock(&inode->plock_mutex);
fuse_reply_err(req, ret);
return;
saverr = ret;
goto out;
}
/* TODO: Is it alright to modify flock? */
@ -1772,7 +1879,11 @@ static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
if (ret == -1) {
saverr = errno;
}
out:
pthread_mutex_unlock(&inode->plock_mutex);
lo_inode_put(lo, &inode);
fuse_reply_err(req, saverr);
}
@ -1898,6 +2009,7 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
pthread_mutex_unlock(&inode->plock_mutex);
res = close(dup(lo_fi_fd(req, fi)));
lo_inode_put(lo_data(req), &inode);
fuse_reply_err(req, res == -1 ? errno : 0);
}
@ -2115,11 +2227,14 @@ out_free:
if (fd >= 0) {
close(fd);
}
lo_inode_put(lo, &inode);
return;
out_err:
saverr = errno;
out:
lo_inode_put(lo, &inode);
fuse_reply_err(req, saverr);
goto out_free;
}
@ -2190,11 +2305,14 @@ out_free:
if (fd >= 0) {
close(fd);
}
lo_inode_put(lo, &inode);
return;
out_err:
saverr = errno;
out:
lo_inode_put(lo, &inode);
fuse_reply_err(req, saverr);
goto out_free;
}
@ -2243,6 +2361,8 @@ out:
if (fd >= 0) {
close(fd);
}
lo_inode_put(lo, &inode);
fuse_reply_err(req, saverr);
}
@ -2289,6 +2409,8 @@ out:
if (fd >= 0) {
close(fd);
}
lo_inode_put(lo, &inode);
fuse_reply_err(req, saverr);
}
@ -2671,6 +2793,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root)
root->key.ino = stat.st_ino;
root->key.dev = stat.st_dev;
root->nlookup = 2;
g_atomic_int_set(&root->refcount, 2);
}
static guint lo_key_hash(gconstpointer key)