diff --git a/fs/fs_context.c b/fs/fs_context.c index 87c2c9687d90..138b5b4d621d 100644 --- a/fs/fs_context.c +++ b/fs/fs_context.c @@ -504,7 +504,6 @@ void put_fs_context(struct fs_context *fc) put_net(fc->net_ns); put_user_ns(fc->user_ns); put_cred(fc->cred); - kfree(fc->subtype); put_fc_log(fc); put_filesystem(fc->fs_type); kfree(fc->source); @@ -571,17 +570,6 @@ static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param) return 0; } - if ((fc->fs_type->fs_flags & FS_HAS_SUBTYPE) && - strcmp(param->key, "subtype") == 0) { - if (param->type != fs_value_is_string) - return invalf(fc, "VFS: Legacy: Non-string subtype"); - if (fc->subtype) - return invalf(fc, "VFS: Legacy: Multiple subtype"); - fc->subtype = param->string; - param->string = NULL; - return 0; - } - if (ctx->param_type == LEGACY_FS_MONOLITHIC_PARAMS) return invalf(fc, "VFS: Legacy: Can't mix monolithic and individual options"); @@ -738,8 +726,6 @@ void vfs_clean_context(struct fs_context *fc) fc->s_fs_info = NULL; fc->sb_flags = 0; security_free_mnt_opts(&fc->security); - kfree(fc->subtype); - fc->subtype = NULL; kfree(fc->source); fc->source = NULL; diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index bab7a0db81dd..00015d851382 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -142,11 +142,10 @@ static int cuse_open(struct inode *inode, struct file *file) static int cuse_release(struct inode *inode, struct file *file) { - struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_file *ff = file->private_data; struct fuse_conn *fc = ff->fc; - fuse_sync_release(fi, ff, file->f_flags); + fuse_sync_release(NULL, ff, file->f_flags); fuse_conn_put(fc); return 0; @@ -299,6 +298,14 @@ static void cuse_gendev_release(struct device *dev) kfree(dev); } +struct cuse_init_args { + struct fuse_args_pages ap; + struct cuse_init_in in; + struct cuse_init_out out; + struct page *page; + struct fuse_page_desc desc; +}; + /** * cuse_process_init_reply - finish initializing CUSE channel * @@ -306,21 +313,22 @@ static void cuse_gendev_release(struct device *dev) * required data structures for it. Please read the comment at the * top of this file for high level overview. */ -static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req) +static void cuse_process_init_reply(struct fuse_conn *fc, + struct fuse_args *args, int error) { + struct cuse_init_args *ia = container_of(args, typeof(*ia), ap.args); + struct fuse_args_pages *ap = &ia->ap; struct cuse_conn *cc = fc_to_cc(fc), *pos; - struct cuse_init_out *arg = req->out.args[0].value; - struct page *page = req->pages[0]; + struct cuse_init_out *arg = &ia->out; + struct page *page = ap->pages[0]; struct cuse_devinfo devinfo = { }; struct device *dev; struct cdev *cdev; dev_t devt; int rc, i; - if (req->out.h.error || - arg->major != FUSE_KERNEL_VERSION || arg->minor < 11) { + if (error || arg->major != FUSE_KERNEL_VERSION || arg->minor < 11) goto err; - } fc->minor = arg->minor; fc->max_read = max_t(unsigned, arg->max_read, 4096); @@ -329,7 +337,7 @@ static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req) /* parse init reply */ cc->unrestricted_ioctl = arg->flags & CUSE_UNRESTRICTED_IOCTL; - rc = cuse_parse_devinfo(page_address(page), req->out.args[1].size, + rc = cuse_parse_devinfo(page_address(page), ap->args.out_args[1].size, &devinfo); if (rc) goto err; @@ -396,7 +404,7 @@ static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req) dev_set_uevent_suppress(dev, 0); kobject_uevent(&dev->kobj, KOBJ_ADD); out: - kfree(arg); + kfree(ia); __free_page(page); return; @@ -415,55 +423,49 @@ err: static int cuse_send_init(struct cuse_conn *cc) { int rc; - struct fuse_req *req; struct page *page; struct fuse_conn *fc = &cc->fc; - struct cuse_init_in *arg; - void *outarg; + struct cuse_init_args *ia; + struct fuse_args_pages *ap; BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE); - req = fuse_get_req_for_background(fc, 1); - if (IS_ERR(req)) { - rc = PTR_ERR(req); - goto err; - } - rc = -ENOMEM; page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!page) - goto err_put_req; + goto err; - outarg = kzalloc(sizeof(struct cuse_init_out), GFP_KERNEL); - if (!outarg) + ia = kzalloc(sizeof(*ia), GFP_KERNEL); + if (!ia) goto err_free_page; - arg = &req->misc.cuse_init_in; - arg->major = FUSE_KERNEL_VERSION; - arg->minor = FUSE_KERNEL_MINOR_VERSION; - arg->flags |= CUSE_UNRESTRICTED_IOCTL; - req->in.h.opcode = CUSE_INIT; - req->in.numargs = 1; - req->in.args[0].size = sizeof(struct cuse_init_in); - req->in.args[0].value = arg; - req->out.numargs = 2; - req->out.args[0].size = sizeof(struct cuse_init_out); - req->out.args[0].value = outarg; - req->out.args[1].size = CUSE_INIT_INFO_MAX; - req->out.argvar = 1; - req->out.argpages = 1; - req->pages[0] = page; - req->page_descs[0].length = req->out.args[1].size; - req->num_pages = 1; - req->end = cuse_process_init_reply; - fuse_request_send_background(fc, req); - - return 0; + ap = &ia->ap; + ia->in.major = FUSE_KERNEL_VERSION; + ia->in.minor = FUSE_KERNEL_MINOR_VERSION; + ia->in.flags |= CUSE_UNRESTRICTED_IOCTL; + ap->args.opcode = CUSE_INIT; + ap->args.in_numargs = 1; + ap->args.in_args[0].size = sizeof(ia->in); + ap->args.in_args[0].value = &ia->in; + ap->args.out_numargs = 2; + ap->args.out_args[0].size = sizeof(ia->out); + ap->args.out_args[0].value = &ia->out; + ap->args.out_args[1].size = CUSE_INIT_INFO_MAX; + ap->args.out_argvar = 1; + ap->args.out_pages = 1; + ap->num_pages = 1; + ap->pages = &ia->page; + ap->descs = &ia->desc; + ia->page = page; + ia->desc.length = ap->args.out_args[1].size; + ap->args.end = cuse_process_init_reply; + rc = fuse_simple_background(fc, &ap->args, GFP_KERNEL); + if (rc) { + kfree(ia); err_free_page: - __free_page(page); -err_put_req: - fuse_put_request(fc, req); + __free_page(page); + } err: return rc; } @@ -504,9 +506,9 @@ static int cuse_channel_open(struct inode *inode, struct file *file) * Limit the cuse channel to requests that can * be represented in file->f_cred->user_ns. */ - fuse_conn_init(&cc->fc, file->f_cred->user_ns); + fuse_conn_init(&cc->fc, file->f_cred->user_ns, &fuse_dev_fiq_ops, NULL); - fud = fuse_dev_alloc(&cc->fc); + fud = fuse_dev_alloc_install(&cc->fc); if (!fud) { kfree(cc); return -ENOMEM; @@ -519,6 +521,7 @@ static int cuse_channel_open(struct inode *inode, struct file *file) rc = cuse_send_init(cc); if (rc) { fuse_dev_free(fud); + fuse_conn_put(&cc->fc); return rc; } file->private_data = fud; diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index ea8237513dfa..dadd617d826c 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -40,107 +40,30 @@ static struct fuse_dev *fuse_get_dev(struct file *file) return READ_ONCE(file->private_data); } -static void fuse_request_init(struct fuse_req *req, struct page **pages, - struct fuse_page_desc *page_descs, - unsigned npages) +static void fuse_request_init(struct fuse_req *req) { INIT_LIST_HEAD(&req->list); INIT_LIST_HEAD(&req->intr_entry); init_waitqueue_head(&req->waitq); refcount_set(&req->count, 1); - req->pages = pages; - req->page_descs = page_descs; - req->max_pages = npages; __set_bit(FR_PENDING, &req->flags); } -static struct page **fuse_req_pages_alloc(unsigned int npages, gfp_t flags, - struct fuse_page_desc **desc) -{ - struct page **pages; - - pages = kzalloc(npages * (sizeof(struct page *) + - sizeof(struct fuse_page_desc)), flags); - *desc = (void *) pages + npages * sizeof(struct page *); - - return pages; -} - -static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags) +static struct fuse_req *fuse_request_alloc(gfp_t flags) { struct fuse_req *req = kmem_cache_zalloc(fuse_req_cachep, flags); - if (req) { - struct page **pages = NULL; - struct fuse_page_desc *page_descs = NULL; + if (req) + fuse_request_init(req); - WARN_ON(npages > FUSE_MAX_MAX_PAGES); - if (npages > FUSE_REQ_INLINE_PAGES) { - pages = fuse_req_pages_alloc(npages, flags, - &page_descs); - if (!pages) { - kmem_cache_free(fuse_req_cachep, req); - return NULL; - } - } else if (npages) { - pages = req->inline_pages; - page_descs = req->inline_page_descs; - } - - fuse_request_init(req, pages, page_descs, npages); - } return req; } -struct fuse_req *fuse_request_alloc(unsigned npages) +static void fuse_request_free(struct fuse_req *req) { - return __fuse_request_alloc(npages, GFP_KERNEL); -} -EXPORT_SYMBOL_GPL(fuse_request_alloc); - -struct fuse_req *fuse_request_alloc_nofs(unsigned npages) -{ - return __fuse_request_alloc(npages, GFP_NOFS); -} - -static void fuse_req_pages_free(struct fuse_req *req) -{ - if (req->pages != req->inline_pages) - kfree(req->pages); -} - -bool fuse_req_realloc_pages(struct fuse_conn *fc, struct fuse_req *req, - gfp_t flags) -{ - struct page **pages; - struct fuse_page_desc *page_descs; - unsigned int npages = min_t(unsigned int, - max_t(unsigned int, req->max_pages * 2, - FUSE_DEFAULT_MAX_PAGES_PER_REQ), - fc->max_pages); - WARN_ON(npages <= req->max_pages); - - pages = fuse_req_pages_alloc(npages, flags, &page_descs); - if (!pages) - return false; - - memcpy(pages, req->pages, sizeof(struct page *) * req->max_pages); - memcpy(page_descs, req->page_descs, - sizeof(struct fuse_page_desc) * req->max_pages); - fuse_req_pages_free(req); - req->pages = pages; - req->page_descs = page_descs; - req->max_pages = npages; - - return true; -} - -void fuse_request_free(struct fuse_req *req) -{ - fuse_req_pages_free(req); kmem_cache_free(fuse_req_cachep, req); } -void __fuse_get_request(struct fuse_req *req) +static void __fuse_get_request(struct fuse_req *req) { refcount_inc(&req->count); } @@ -177,8 +100,9 @@ static void fuse_drop_waiting(struct fuse_conn *fc) } } -static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages, - bool for_background) +static void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req); + +static struct fuse_req *fuse_get_req(struct fuse_conn *fc, bool for_background) { struct fuse_req *req; int err; @@ -201,7 +125,7 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages, if (fc->conn_error) goto out; - req = fuse_request_alloc(npages); + req = fuse_request_alloc(GFP_KERNEL); err = -ENOMEM; if (!req) { if (for_background) @@ -229,101 +153,7 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages, return ERR_PTR(err); } -struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages) -{ - return __fuse_get_req(fc, npages, false); -} -EXPORT_SYMBOL_GPL(fuse_get_req); - -struct fuse_req *fuse_get_req_for_background(struct fuse_conn *fc, - unsigned npages) -{ - return __fuse_get_req(fc, npages, true); -} -EXPORT_SYMBOL_GPL(fuse_get_req_for_background); - -/* - * Return request in fuse_file->reserved_req. However that may - * currently be in use. If that is the case, wait for it to become - * available. - */ -static struct fuse_req *get_reserved_req(struct fuse_conn *fc, - struct file *file) -{ - struct fuse_req *req = NULL; - struct fuse_inode *fi = get_fuse_inode(file_inode(file)); - struct fuse_file *ff = file->private_data; - - do { - wait_event(fc->reserved_req_waitq, ff->reserved_req); - spin_lock(&fi->lock); - if (ff->reserved_req) { - req = ff->reserved_req; - ff->reserved_req = NULL; - req->stolen_file = get_file(file); - } - spin_unlock(&fi->lock); - } while (!req); - - return req; -} - -/* - * Put stolen request back into fuse_file->reserved_req - */ -static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req) -{ - struct file *file = req->stolen_file; - struct fuse_inode *fi = get_fuse_inode(file_inode(file)); - struct fuse_file *ff = file->private_data; - - WARN_ON(req->max_pages); - spin_lock(&fi->lock); - memset(req, 0, sizeof(*req)); - fuse_request_init(req, NULL, NULL, 0); - BUG_ON(ff->reserved_req); - ff->reserved_req = req; - wake_up_all(&fc->reserved_req_waitq); - spin_unlock(&fi->lock); - fput(file); -} - -/* - * Gets a requests for a file operation, always succeeds - * - * This is used for sending the FLUSH request, which must get to - * userspace, due to POSIX locks which may need to be unlocked. - * - * If allocation fails due to OOM, use the reserved request in - * fuse_file. - * - * This is very unlikely to deadlock accidentally, since the - * filesystem should not have it's own file open. If deadlock is - * intentional, it can still be broken by "aborting" the filesystem. - */ -struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc, - struct file *file) -{ - struct fuse_req *req; - - atomic_inc(&fc->num_waiting); - wait_event(fc->blocked_waitq, fc->initialized); - /* Matches smp_wmb() in fuse_set_initialized() */ - smp_rmb(); - req = fuse_request_alloc(0); - if (!req) - req = get_reserved_req(fc, file); - - req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid()); - req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid()); - req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns); - - __set_bit(FR_WAITING, &req->flags); - __clear_bit(FR_BACKGROUND, &req->flags); - return req; -} - -void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) +static void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) { if (refcount_dec_and_test(&req->count)) { if (test_bit(FR_BACKGROUND, &req->flags)) { @@ -342,15 +172,11 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) fuse_drop_waiting(fc); } - if (req->stolen_file) - put_reserved_req(fc, req); - else - fuse_request_free(req); + fuse_request_free(req); } } -EXPORT_SYMBOL_GPL(fuse_put_request); -static unsigned len_args(unsigned numargs, struct fuse_arg *args) +unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args) { unsigned nbytes = 0; unsigned i; @@ -360,25 +186,47 @@ static unsigned len_args(unsigned numargs, struct fuse_arg *args) return nbytes; } +EXPORT_SYMBOL_GPL(fuse_len_args); -static u64 fuse_get_unique(struct fuse_iqueue *fiq) +u64 fuse_get_unique(struct fuse_iqueue *fiq) { fiq->reqctr += FUSE_REQ_ID_STEP; return fiq->reqctr; } +EXPORT_SYMBOL_GPL(fuse_get_unique); static unsigned int fuse_req_hash(u64 unique) { return hash_long(unique & ~FUSE_INT_REQ_BIT, FUSE_PQ_HASH_BITS); } -static void queue_request(struct fuse_iqueue *fiq, struct fuse_req *req) +/** + * A new request is available, wake fiq->waitq + */ +static void fuse_dev_wake_and_unlock(struct fuse_iqueue *fiq) +__releases(fiq->lock) +{ + wake_up(&fiq->waitq); + kill_fasync(&fiq->fasync, SIGIO, POLL_IN); + spin_unlock(&fiq->lock); +} + +const struct fuse_iqueue_ops fuse_dev_fiq_ops = { + .wake_forget_and_unlock = fuse_dev_wake_and_unlock, + .wake_interrupt_and_unlock = fuse_dev_wake_and_unlock, + .wake_pending_and_unlock = fuse_dev_wake_and_unlock, +}; +EXPORT_SYMBOL_GPL(fuse_dev_fiq_ops); + +static void queue_request_and_unlock(struct fuse_iqueue *fiq, + struct fuse_req *req) +__releases(fiq->lock) { req->in.h.len = sizeof(struct fuse_in_header) + - len_args(req->in.numargs, (struct fuse_arg *) req->in.args); + fuse_len_args(req->args->in_numargs, + (struct fuse_arg *) req->args->in_args); list_add_tail(&req->list, &fiq->pending); - wake_up_locked(&fiq->waitq); - kill_fasync(&fiq->fasync, SIGIO, POLL_IN); + fiq->ops->wake_pending_and_unlock(fiq); } void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, @@ -389,16 +237,15 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, forget->forget_one.nodeid = nodeid; forget->forget_one.nlookup = nlookup; - spin_lock(&fiq->waitq.lock); + spin_lock(&fiq->lock); if (fiq->connected) { fiq->forget_list_tail->next = forget; fiq->forget_list_tail = forget; - wake_up_locked(&fiq->waitq); - kill_fasync(&fiq->fasync, SIGIO, POLL_IN); + fiq->ops->wake_forget_and_unlock(fiq); } else { kfree(forget); + spin_unlock(&fiq->lock); } - spin_unlock(&fiq->waitq.lock); } static void flush_bg_queue(struct fuse_conn *fc) @@ -412,10 +259,9 @@ static void flush_bg_queue(struct fuse_conn *fc) req = list_first_entry(&fc->bg_queue, struct fuse_req, list); list_del(&req->list); fc->active_background++; - spin_lock(&fiq->waitq.lock); + spin_lock(&fiq->lock); req->in.h.unique = fuse_get_unique(fiq); - queue_request(fiq, req); - spin_unlock(&fiq->waitq.lock); + queue_request_and_unlock(fiq, req); } } @@ -427,9 +273,10 @@ static void flush_bg_queue(struct fuse_conn *fc) * the 'end' callback is called if given, else the reference to the * request is released */ -static void request_end(struct fuse_conn *fc, struct fuse_req *req) +void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req) { struct fuse_iqueue *fiq = &fc->iq; + bool async = req->args->end; if (test_and_set_bit(FR_FINISHED, &req->flags)) goto put_request; @@ -439,9 +286,9 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) * smp_mb() from queue_interrupt(). */ if (!list_empty(&req->intr_entry)) { - spin_lock(&fiq->waitq.lock); + spin_lock(&fiq->lock); list_del_init(&req->intr_entry); - spin_unlock(&fiq->waitq.lock); + spin_unlock(&fiq->lock); } WARN_ON(test_bit(FR_PENDING, &req->flags)); WARN_ON(test_bit(FR_SENT, &req->flags)); @@ -475,18 +322,19 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) wake_up(&req->waitq); } - if (req->end) - req->end(fc, req); + if (async) + req->args->end(fc, req->args, req->out.h.error); put_request: fuse_put_request(fc, req); } +EXPORT_SYMBOL_GPL(fuse_request_end); static int queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) { - spin_lock(&fiq->waitq.lock); + spin_lock(&fiq->lock); /* Check for we've sent request to interrupt this req */ if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags))) { - spin_unlock(&fiq->waitq.lock); + spin_unlock(&fiq->lock); return -EINVAL; } @@ -499,13 +347,13 @@ static int queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) smp_mb(); if (test_bit(FR_FINISHED, &req->flags)) { list_del_init(&req->intr_entry); - spin_unlock(&fiq->waitq.lock); + spin_unlock(&fiq->lock); return 0; } - wake_up_locked(&fiq->waitq); - kill_fasync(&fiq->fasync, SIGIO, POLL_IN); + fiq->ops->wake_interrupt_and_unlock(fiq); + } else { + spin_unlock(&fiq->lock); } - spin_unlock(&fiq->waitq.lock); return 0; } @@ -535,16 +383,16 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) if (!err) return; - spin_lock(&fiq->waitq.lock); + spin_lock(&fiq->lock); /* Request is not yet in userspace, bail out */ if (test_bit(FR_PENDING, &req->flags)) { list_del(&req->list); - spin_unlock(&fiq->waitq.lock); + spin_unlock(&fiq->lock); __fuse_put_request(req); req->out.h.error = -EINTR; return; } - spin_unlock(&fiq->waitq.lock); + spin_unlock(&fiq->lock); } /* @@ -559,101 +407,110 @@ static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) struct fuse_iqueue *fiq = &fc->iq; BUG_ON(test_bit(FR_BACKGROUND, &req->flags)); - spin_lock(&fiq->waitq.lock); + spin_lock(&fiq->lock); if (!fiq->connected) { - spin_unlock(&fiq->waitq.lock); + spin_unlock(&fiq->lock); req->out.h.error = -ENOTCONN; } else { req->in.h.unique = fuse_get_unique(fiq); - queue_request(fiq, req); /* acquire extra reference, since request is still needed - after request_end() */ + after fuse_request_end() */ __fuse_get_request(req); - spin_unlock(&fiq->waitq.lock); + queue_request_and_unlock(fiq, req); request_wait_answer(fc, req); - /* Pairs with smp_wmb() in request_end() */ + /* Pairs with smp_wmb() in fuse_request_end() */ smp_rmb(); } } -void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) -{ - __set_bit(FR_ISREPLY, &req->flags); - if (!test_bit(FR_WAITING, &req->flags)) { - __set_bit(FR_WAITING, &req->flags); - atomic_inc(&fc->num_waiting); - } - __fuse_request_send(fc, req); -} -EXPORT_SYMBOL_GPL(fuse_request_send); - static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args) { - if (fc->minor < 4 && args->in.h.opcode == FUSE_STATFS) - args->out.args[0].size = FUSE_COMPAT_STATFS_SIZE; + if (fc->minor < 4 && args->opcode == FUSE_STATFS) + args->out_args[0].size = FUSE_COMPAT_STATFS_SIZE; if (fc->minor < 9) { - switch (args->in.h.opcode) { + switch (args->opcode) { case FUSE_LOOKUP: case FUSE_CREATE: case FUSE_MKNOD: case FUSE_MKDIR: case FUSE_SYMLINK: case FUSE_LINK: - args->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE; + args->out_args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE; break; case FUSE_GETATTR: case FUSE_SETATTR: - args->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE; + args->out_args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE; break; } } if (fc->minor < 12) { - switch (args->in.h.opcode) { + switch (args->opcode) { case FUSE_CREATE: - args->in.args[0].size = sizeof(struct fuse_open_in); + args->in_args[0].size = sizeof(struct fuse_open_in); break; case FUSE_MKNOD: - args->in.args[0].size = FUSE_COMPAT_MKNOD_IN_SIZE; + args->in_args[0].size = FUSE_COMPAT_MKNOD_IN_SIZE; break; } } } +static void fuse_force_creds(struct fuse_conn *fc, struct fuse_req *req) +{ + req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid()); + req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid()); + req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns); +} + +static void fuse_args_to_req(struct fuse_req *req, struct fuse_args *args) +{ + req->in.h.opcode = args->opcode; + req->in.h.nodeid = args->nodeid; + req->args = args; +} + ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args) { struct fuse_req *req; ssize_t ret; - req = fuse_get_req(fc, 0); - if (IS_ERR(req)) - return PTR_ERR(req); + if (args->force) { + atomic_inc(&fc->num_waiting); + req = fuse_request_alloc(GFP_KERNEL | __GFP_NOFAIL); + + if (!args->nocreds) + fuse_force_creds(fc, req); + + __set_bit(FR_WAITING, &req->flags); + __set_bit(FR_FORCE, &req->flags); + } else { + WARN_ON(args->nocreds); + req = fuse_get_req(fc, false); + if (IS_ERR(req)) + return PTR_ERR(req); + } /* Needs to be done after fuse_get_req() so that fc->minor is valid */ fuse_adjust_compat(fc, args); + fuse_args_to_req(req, args); - req->in.h.opcode = args->in.h.opcode; - req->in.h.nodeid = args->in.h.nodeid; - req->in.numargs = args->in.numargs; - memcpy(req->in.args, args->in.args, - args->in.numargs * sizeof(struct fuse_in_arg)); - req->out.argvar = args->out.argvar; - req->out.numargs = args->out.numargs; - memcpy(req->out.args, args->out.args, - args->out.numargs * sizeof(struct fuse_arg)); - fuse_request_send(fc, req); + if (!args->noreply) + __set_bit(FR_ISREPLY, &req->flags); + __fuse_request_send(fc, req); ret = req->out.h.error; - if (!ret && args->out.argvar) { - BUG_ON(args->out.numargs != 1); - ret = req->out.args[0].size; + if (!ret && args->out_argvar) { + BUG_ON(args->out_numargs == 0); + ret = args->out_args[args->out_numargs - 1].size; } fuse_put_request(fc, req); return ret; } -bool fuse_request_queue_background(struct fuse_conn *fc, struct fuse_req *req) +static bool fuse_request_queue_background(struct fuse_conn *fc, + struct fuse_req *req) { bool queued = false; @@ -681,56 +538,63 @@ bool fuse_request_queue_background(struct fuse_conn *fc, struct fuse_req *req) return queued; } -void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req) +int fuse_simple_background(struct fuse_conn *fc, struct fuse_args *args, + gfp_t gfp_flags) { - WARN_ON(!req->end); - if (!fuse_request_queue_background(fc, req)) { - req->out.h.error = -ENOTCONN; - req->end(fc, req); - fuse_put_request(fc, req); - } -} -EXPORT_SYMBOL_GPL(fuse_request_send_background); + struct fuse_req *req; -static int fuse_request_send_notify_reply(struct fuse_conn *fc, - struct fuse_req *req, u64 unique) + if (args->force) { + WARN_ON(!args->nocreds); + req = fuse_request_alloc(gfp_flags); + if (!req) + return -ENOMEM; + __set_bit(FR_BACKGROUND, &req->flags); + } else { + WARN_ON(args->nocreds); + req = fuse_get_req(fc, true); + if (IS_ERR(req)) + return PTR_ERR(req); + } + + fuse_args_to_req(req, args); + + if (!fuse_request_queue_background(fc, req)) { + fuse_put_request(fc, req); + return -ENOTCONN; + } + + return 0; +} +EXPORT_SYMBOL_GPL(fuse_simple_background); + +static int fuse_simple_notify_reply(struct fuse_conn *fc, + struct fuse_args *args, u64 unique) { - int err = -ENODEV; + struct fuse_req *req; struct fuse_iqueue *fiq = &fc->iq; + int err = 0; + + req = fuse_get_req(fc, false); + if (IS_ERR(req)) + return PTR_ERR(req); __clear_bit(FR_ISREPLY, &req->flags); req->in.h.unique = unique; - spin_lock(&fiq->waitq.lock); + + fuse_args_to_req(req, args); + + spin_lock(&fiq->lock); if (fiq->connected) { - queue_request(fiq, req); - err = 0; + queue_request_and_unlock(fiq, req); + } else { + err = -ENODEV; + spin_unlock(&fiq->lock); + fuse_put_request(fc, req); } - spin_unlock(&fiq->waitq.lock); return err; } -void fuse_force_forget(struct file *file, u64 nodeid) -{ - struct inode *inode = file_inode(file); - struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_req *req; - struct fuse_forget_in inarg; - - memset(&inarg, 0, sizeof(inarg)); - inarg.nlookup = 1; - req = fuse_get_req_nofail_nopages(fc, file); - req->in.h.opcode = FUSE_FORGET; - req->in.h.nodeid = nodeid; - req->in.numargs = 1; - req->in.args[0].size = sizeof(inarg); - req->in.args[0].value = &inarg; - __clear_bit(FR_ISREPLY, &req->flags); - __fuse_request_send(fc, req); - /* ignore errors */ - fuse_put_request(fc, req); -} - /* * Lock the request. Up to the next unlock_request() there mustn't be * anything that could cause a page-fault. If the request was already @@ -1084,14 +948,15 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes, { unsigned i; struct fuse_req *req = cs->req; + struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args); - for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) { + + for (i = 0; i < ap->num_pages && (nbytes || zeroing); i++) { int err; - unsigned offset = req->page_descs[i].offset; - unsigned count = min(nbytes, req->page_descs[i].length); + unsigned int offset = ap->descs[i].offset; + unsigned int count = min(nbytes, ap->descs[i].length); - err = fuse_copy_page(cs, &req->pages[i], offset, count, - zeroing); + err = fuse_copy_page(cs, &ap->pages[i], offset, count, zeroing); if (err) return err; @@ -1149,12 +1014,12 @@ static int request_pending(struct fuse_iqueue *fiq) * Unlike other requests this is assembled on demand, without a need * to allocate a separate fuse_req structure. * - * Called with fiq->waitq.lock held, releases it + * Called with fiq->lock held, releases it */ static int fuse_read_interrupt(struct fuse_iqueue *fiq, struct fuse_copy_state *cs, size_t nbytes, struct fuse_req *req) -__releases(fiq->waitq.lock) +__releases(fiq->lock) { struct fuse_in_header ih; struct fuse_interrupt_in arg; @@ -1169,7 +1034,7 @@ __releases(fiq->waitq.lock) ih.unique = (req->in.h.unique | FUSE_INT_REQ_BIT); arg.unique = req->in.h.unique; - spin_unlock(&fiq->waitq.lock); + spin_unlock(&fiq->lock); if (nbytes < reqsize) return -EINVAL; @@ -1181,9 +1046,9 @@ __releases(fiq->waitq.lock) return err ? err : reqsize; } -static struct fuse_forget_link *dequeue_forget(struct fuse_iqueue *fiq, - unsigned max, - unsigned *countp) +struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq, + unsigned int max, + unsigned int *countp) { struct fuse_forget_link *head = fiq->forget_list_head.next; struct fuse_forget_link **newhead = &head; @@ -1202,14 +1067,15 @@ static struct fuse_forget_link *dequeue_forget(struct fuse_iqueue *fiq, return head; } +EXPORT_SYMBOL(fuse_dequeue_forget); static int fuse_read_single_forget(struct fuse_iqueue *fiq, struct fuse_copy_state *cs, size_t nbytes) -__releases(fiq->waitq.lock) +__releases(fiq->lock) { int err; - struct fuse_forget_link *forget = dequeue_forget(fiq, 1, NULL); + struct fuse_forget_link *forget = fuse_dequeue_forget(fiq, 1, NULL); struct fuse_forget_in arg = { .nlookup = forget->forget_one.nlookup, }; @@ -1220,7 +1086,7 @@ __releases(fiq->waitq.lock) .len = sizeof(ih) + sizeof(arg), }; - spin_unlock(&fiq->waitq.lock); + spin_unlock(&fiq->lock); kfree(forget); if (nbytes < ih.len) return -EINVAL; @@ -1238,7 +1104,7 @@ __releases(fiq->waitq.lock) static int fuse_read_batch_forget(struct fuse_iqueue *fiq, struct fuse_copy_state *cs, size_t nbytes) -__releases(fiq->waitq.lock) +__releases(fiq->lock) { int err; unsigned max_forgets; @@ -1252,13 +1118,13 @@ __releases(fiq->waitq.lock) }; if (nbytes < ih.len) { - spin_unlock(&fiq->waitq.lock); + spin_unlock(&fiq->lock); return -EINVAL; } max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one); - head = dequeue_forget(fiq, max_forgets, &count); - spin_unlock(&fiq->waitq.lock); + head = fuse_dequeue_forget(fiq, max_forgets, &count); + spin_unlock(&fiq->lock); arg.count = count; ih.len += count * sizeof(struct fuse_forget_one); @@ -1288,7 +1154,7 @@ __releases(fiq->waitq.lock) static int fuse_read_forget(struct fuse_conn *fc, struct fuse_iqueue *fiq, struct fuse_copy_state *cs, size_t nbytes) -__releases(fiq->waitq.lock) +__releases(fiq->lock) { if (fc->minor < 16 || fiq->forget_list_head.next->next == NULL) return fuse_read_single_forget(fiq, cs, nbytes); @@ -1302,7 +1168,7 @@ __releases(fiq->waitq.lock) * the pending list and copies request data to userspace buffer. If * no reply is needed (FORGET) or request has been aborted or there * was an error during the copying then it's finished by calling - * request_end(). Otherwise add it to the processing list, and set + * fuse_request_end(). Otherwise add it to the processing list, and set * the 'sent' flag. */ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, @@ -1313,21 +1179,42 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, struct fuse_iqueue *fiq = &fc->iq; struct fuse_pqueue *fpq = &fud->pq; struct fuse_req *req; - struct fuse_in *in; + struct fuse_args *args; unsigned reqsize; unsigned int hash; - restart: - spin_lock(&fiq->waitq.lock); - err = -EAGAIN; - if ((file->f_flags & O_NONBLOCK) && fiq->connected && - !request_pending(fiq)) - goto err_unlock; + /* + * Require sane minimum read buffer - that has capacity for fixed part + * of any request header + negotiated max_write room for data. + * + * Historically libfuse reserves 4K for fixed header room, but e.g. + * GlusterFS reserves only 80 bytes + * + * = `sizeof(fuse_in_header) + sizeof(fuse_write_in)` + * + * which is the absolute minimum any sane filesystem should be using + * for header room. + */ + if (nbytes < max_t(size_t, FUSE_MIN_READ_BUFFER, + sizeof(struct fuse_in_header) + + sizeof(struct fuse_write_in) + + fc->max_write)) + return -EINVAL; - err = wait_event_interruptible_exclusive_locked(fiq->waitq, + restart: + for (;;) { + spin_lock(&fiq->lock); + if (!fiq->connected || request_pending(fiq)) + break; + spin_unlock(&fiq->lock); + + if (file->f_flags & O_NONBLOCK) + return -EAGAIN; + err = wait_event_interruptible_exclusive(fiq->waitq, !fiq->connected || request_pending(fiq)); - if (err) - goto err_unlock; + if (err) + return err; + } if (!fiq->connected) { err = fc->aborted ? -ECONNABORTED : -ENODEV; @@ -1351,28 +1238,28 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, req = list_entry(fiq->pending.next, struct fuse_req, list); clear_bit(FR_PENDING, &req->flags); list_del_init(&req->list); - spin_unlock(&fiq->waitq.lock); + spin_unlock(&fiq->lock); - in = &req->in; - reqsize = in->h.len; + args = req->args; + reqsize = req->in.h.len; /* If request is too large, reply with an error and restart the read */ if (nbytes < reqsize) { req->out.h.error = -EIO; /* SETXATTR is special, since it may contain too large data */ - if (in->h.opcode == FUSE_SETXATTR) + if (args->opcode == FUSE_SETXATTR) req->out.h.error = -E2BIG; - request_end(fc, req); + fuse_request_end(fc, req); goto restart; } spin_lock(&fpq->lock); list_add(&req->list, &fpq->io); spin_unlock(&fpq->lock); cs->req = req; - err = fuse_copy_one(cs, &in->h, sizeof(in->h)); + err = fuse_copy_one(cs, &req->in.h, sizeof(req->in.h)); if (!err) - err = fuse_copy_args(cs, in->numargs, in->argpages, - (struct fuse_arg *) in->args, 0); + err = fuse_copy_args(cs, args->in_numargs, args->in_pages, + (struct fuse_arg *) args->in_args, 0); fuse_copy_finish(cs); spin_lock(&fpq->lock); clear_bit(FR_LOCKED, &req->flags); @@ -1405,11 +1292,11 @@ out_end: if (!test_bit(FR_PRIVATE, &req->flags)) list_del_init(&req->list); spin_unlock(&fpq->lock); - request_end(fc, req); + fuse_request_end(fc, req); return err; err_unlock: - spin_unlock(&fiq->waitq.lock); + spin_unlock(&fiq->lock); return err; } @@ -1728,9 +1615,19 @@ out_finish: return err; } -static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req) +struct fuse_retrieve_args { + struct fuse_args_pages ap; + struct fuse_notify_retrieve_in inarg; +}; + +static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_args *args, + int error) { - release_pages(req->pages, req->num_pages); + struct fuse_retrieve_args *ra = + container_of(args, typeof(*ra), ap.args); + + release_pages(ra->ap.pages, ra->ap.num_pages); + kfree(ra); } static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, @@ -1738,13 +1635,16 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, { int err; struct address_space *mapping = inode->i_mapping; - struct fuse_req *req; pgoff_t index; loff_t file_size; unsigned int num; unsigned int offset; size_t total_len = 0; unsigned int num_pages; + struct fuse_retrieve_args *ra; + size_t args_size = sizeof(*ra); + struct fuse_args_pages *ap; + struct fuse_args *args; offset = outarg->offset & ~PAGE_MASK; file_size = i_size_read(inode); @@ -1758,19 +1658,26 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; num_pages = min(num_pages, fc->max_pages); - req = fuse_get_req(fc, num_pages); - if (IS_ERR(req)) - return PTR_ERR(req); + args_size += num_pages * (sizeof(ap->pages[0]) + sizeof(ap->descs[0])); - req->in.h.opcode = FUSE_NOTIFY_REPLY; - req->in.h.nodeid = outarg->nodeid; - req->in.numargs = 2; - req->in.argpages = 1; - req->end = fuse_retrieve_end; + ra = kzalloc(args_size, GFP_KERNEL); + if (!ra) + return -ENOMEM; + + ap = &ra->ap; + ap->pages = (void *) (ra + 1); + ap->descs = (void *) (ap->pages + num_pages); + + args = &ap->args; + args->nodeid = outarg->nodeid; + args->opcode = FUSE_NOTIFY_REPLY; + args->in_numargs = 2; + args->in_pages = true; + args->end = fuse_retrieve_end; index = outarg->offset >> PAGE_SHIFT; - while (num && req->num_pages < num_pages) { + while (num && ap->num_pages < num_pages) { struct page *page; unsigned int this_num; @@ -1779,27 +1686,25 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, break; this_num = min_t(unsigned, num, PAGE_SIZE - offset); - req->pages[req->num_pages] = page; - req->page_descs[req->num_pages].offset = offset; - req->page_descs[req->num_pages].length = this_num; - req->num_pages++; + ap->pages[ap->num_pages] = page; + ap->descs[ap->num_pages].offset = offset; + ap->descs[ap->num_pages].length = this_num; + ap->num_pages++; offset = 0; num -= this_num; total_len += this_num; index++; } - req->misc.retrieve_in.offset = outarg->offset; - req->misc.retrieve_in.size = total_len; - req->in.args[0].size = sizeof(req->misc.retrieve_in); - req->in.args[0].value = &req->misc.retrieve_in; - req->in.args[1].size = total_len; + ra->inarg.offset = outarg->offset; + ra->inarg.size = total_len; + args->in_args[0].size = sizeof(ra->inarg); + args->in_args[0].value = &ra->inarg; + args->in_args[1].size = total_len; - err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique); - if (err) { - fuse_retrieve_end(fc, req); - fuse_put_request(fc, req); - } + err = fuse_simple_notify_reply(fc, args, outarg->notify_unique); + if (err) + fuse_retrieve_end(fc, args, err); return err; } @@ -1885,27 +1790,25 @@ static struct fuse_req *request_find(struct fuse_pqueue *fpq, u64 unique) return NULL; } -static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out, +static int copy_out_args(struct fuse_copy_state *cs, struct fuse_args *args, unsigned nbytes) { unsigned reqsize = sizeof(struct fuse_out_header); - if (out->h.error) - return nbytes != reqsize ? -EINVAL : 0; + reqsize += fuse_len_args(args->out_numargs, args->out_args); - reqsize += len_args(out->numargs, out->args); - - if (reqsize < nbytes || (reqsize > nbytes && !out->argvar)) + if (reqsize < nbytes || (reqsize > nbytes && !args->out_argvar)) return -EINVAL; else if (reqsize > nbytes) { - struct fuse_arg *lastarg = &out->args[out->numargs-1]; + struct fuse_arg *lastarg = &args->out_args[args->out_numargs-1]; unsigned diffsize = reqsize - nbytes; + if (diffsize > lastarg->size) return -EINVAL; lastarg->size -= diffsize; } - return fuse_copy_args(cs, out->numargs, out->argpages, out->args, - out->page_zeroing); + return fuse_copy_args(cs, args->out_numargs, args->out_pages, + args->out_args, args->page_zeroing); } /* @@ -1913,7 +1816,7 @@ static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out, * the write buffer. The request is then searched on the processing * list by the unique ID found in the header. If found, then remove * it from the list and copy the rest of the buffer to the request. - * The request is finished by calling request_end() + * The request is finished by calling fuse_request_end(). */ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, struct fuse_copy_state *cs, size_t nbytes) @@ -1984,10 +1887,13 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, set_bit(FR_LOCKED, &req->flags); spin_unlock(&fpq->lock); cs->req = req; - if (!req->out.page_replace) + if (!req->args->page_replace) cs->move_pages = 0; - err = copy_out_args(cs, &req->out, nbytes); + if (oh.error) + err = nbytes != sizeof(oh) ? -EINVAL : 0; + else + err = copy_out_args(cs, req->args, nbytes); fuse_copy_finish(cs); spin_lock(&fpq->lock); @@ -2000,7 +1906,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, list_del_init(&req->list); spin_unlock(&fpq->lock); - request_end(fc, req); + fuse_request_end(fc, req); out: return err ? err : nbytes; @@ -2121,12 +2027,12 @@ static __poll_t fuse_dev_poll(struct file *file, poll_table *wait) fiq = &fud->fc->iq; poll_wait(file, &fiq->waitq, wait); - spin_lock(&fiq->waitq.lock); + spin_lock(&fiq->lock); if (!fiq->connected) mask = EPOLLERR; else if (request_pending(fiq)) mask |= EPOLLIN | EPOLLRDNORM; - spin_unlock(&fiq->waitq.lock); + spin_unlock(&fiq->lock); return mask; } @@ -2140,7 +2046,7 @@ static void end_requests(struct fuse_conn *fc, struct list_head *head) req->out.h.error = -ECONNABORTED; clear_bit(FR_SENT, &req->flags); list_del_init(&req->list); - request_end(fc, req); + fuse_request_end(fc, req); } } @@ -2221,15 +2127,15 @@ void fuse_abort_conn(struct fuse_conn *fc) flush_bg_queue(fc); spin_unlock(&fc->bg_lock); - spin_lock(&fiq->waitq.lock); + spin_lock(&fiq->lock); fiq->connected = 0; list_for_each_entry(req, &fiq->pending, list) clear_bit(FR_PENDING, &req->flags); list_splice_tail_init(&fiq->pending, &to_end); while (forget_pending(fiq)) - kfree(dequeue_forget(fiq, 1, NULL)); - wake_up_all_locked(&fiq->waitq); - spin_unlock(&fiq->waitq.lock); + kfree(fuse_dequeue_forget(fiq, 1, NULL)); + wake_up_all(&fiq->waitq); + spin_unlock(&fiq->lock); kill_fasync(&fiq->fasync, SIGIO, POLL_IN); end_polls(fc); wake_up_all(&fc->blocked_waitq); @@ -2296,7 +2202,7 @@ static int fuse_device_clone(struct fuse_conn *fc, struct file *new) if (new->private_data) return -EINVAL; - fud = fuse_dev_alloc(fc); + fud = fuse_dev_alloc_install(fc); if (!fud) return -ENOMEM; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index dd0f64f7bc06..d572c900bb0f 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -24,20 +24,54 @@ static void fuse_advise_use_readdirplus(struct inode *dir) set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state); } +#if BITS_PER_LONG >= 64 +static inline void __fuse_dentry_settime(struct dentry *entry, u64 time) +{ + entry->d_fsdata = (void *) time; +} + +static inline u64 fuse_dentry_time(const struct dentry *entry) +{ + return (u64)entry->d_fsdata; +} + +#else union fuse_dentry { u64 time; struct rcu_head rcu; }; -static inline void fuse_dentry_settime(struct dentry *entry, u64 time) +static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time) { - ((union fuse_dentry *) entry->d_fsdata)->time = time; + ((union fuse_dentry *) dentry->d_fsdata)->time = time; } -static inline u64 fuse_dentry_time(struct dentry *entry) +static inline u64 fuse_dentry_time(const struct dentry *entry) { return ((union fuse_dentry *) entry->d_fsdata)->time; } +#endif + +static void fuse_dentry_settime(struct dentry *dentry, u64 time) +{ + struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb); + bool delete = !time && fc->delete_stale; + /* + * Mess with DCACHE_OP_DELETE because dput() will be faster without it. + * Don't care about races, either way it's just an optimization + */ + if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) || + (delete && !(dentry->d_flags & DCACHE_OP_DELETE))) { + spin_lock(&dentry->d_lock); + if (!delete) + dentry->d_flags &= ~DCACHE_OP_DELETE; + else + dentry->d_flags |= DCACHE_OP_DELETE; + spin_unlock(&dentry->d_lock); + } + + __fuse_dentry_settime(dentry, time); +} /* * FUSE caches dentries and attributes with separate timeout. The @@ -139,14 +173,14 @@ static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args, struct fuse_entry_out *outarg) { memset(outarg, 0, sizeof(struct fuse_entry_out)); - args->in.h.opcode = FUSE_LOOKUP; - args->in.h.nodeid = nodeid; - args->in.numargs = 1; - args->in.args[0].size = name->len + 1; - args->in.args[0].value = name->name; - args->out.numargs = 1; - args->out.args[0].size = sizeof(struct fuse_entry_out); - args->out.args[0].value = outarg; + args->opcode = FUSE_LOOKUP; + args->nodeid = nodeid; + args->in_numargs = 1; + args->in_args[0].size = name->len + 1; + args->in_args[0].value = name->name; + args->out_numargs = 1; + args->out_args[0].size = sizeof(struct fuse_entry_out); + args->out_args[0].value = outarg; } /* @@ -242,9 +276,11 @@ invalid: goto out; } +#if BITS_PER_LONG < 64 static int fuse_dentry_init(struct dentry *dentry) { - dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry), GFP_KERNEL); + dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry), + GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE); return dentry->d_fsdata ? 0 : -ENOMEM; } @@ -254,16 +290,27 @@ static void fuse_dentry_release(struct dentry *dentry) kfree_rcu(fd, rcu); } +#endif + +static int fuse_dentry_delete(const struct dentry *dentry) +{ + return time_before64(fuse_dentry_time(dentry), get_jiffies_64()); +} const struct dentry_operations fuse_dentry_operations = { .d_revalidate = fuse_dentry_revalidate, + .d_delete = fuse_dentry_delete, +#if BITS_PER_LONG < 64 .d_init = fuse_dentry_init, .d_release = fuse_dentry_release, +#endif }; const struct dentry_operations fuse_root_dentry_operations = { +#if BITS_PER_LONG < 64 .d_init = fuse_dentry_init, .d_release = fuse_dentry_release, +#endif }; int fuse_valid_type(int m) @@ -410,18 +457,18 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, inarg.flags = flags; inarg.mode = mode; inarg.umask = current_umask(); - args.in.h.opcode = FUSE_CREATE; - args.in.h.nodeid = get_node_id(dir); - args.in.numargs = 2; - args.in.args[0].size = sizeof(inarg); - args.in.args[0].value = &inarg; - args.in.args[1].size = entry->d_name.len + 1; - args.in.args[1].value = entry->d_name.name; - args.out.numargs = 2; - args.out.args[0].size = sizeof(outentry); - args.out.args[0].value = &outentry; - args.out.args[1].size = sizeof(outopen); - args.out.args[1].value = &outopen; + args.opcode = FUSE_CREATE; + args.nodeid = get_node_id(dir); + args.in_numargs = 2; + args.in_args[0].size = sizeof(inarg); + args.in_args[0].value = &inarg; + args.in_args[1].size = entry->d_name.len + 1; + args.in_args[1].value = entry->d_name.name; + args.out_numargs = 2; + args.out_args[0].size = sizeof(outentry); + args.out_args[0].value = &outentry; + args.out_args[1].size = sizeof(outopen); + args.out_args[1].value = &outopen; err = fuse_simple_request(fc, &args); if (err) goto out_free_ff; @@ -526,10 +573,10 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args, return -ENOMEM; memset(&outarg, 0, sizeof(outarg)); - args->in.h.nodeid = get_node_id(dir); - args->out.numargs = 1; - args->out.args[0].size = sizeof(outarg); - args->out.args[0].value = &outarg; + args->nodeid = get_node_id(dir); + args->out_numargs = 1; + args->out_args[0].size = sizeof(outarg); + args->out_args[0].value = &outarg; err = fuse_simple_request(fc, args); if (err) goto out_put_forget_req; @@ -582,12 +629,12 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode, inarg.mode = mode; inarg.rdev = new_encode_dev(rdev); inarg.umask = current_umask(); - args.in.h.opcode = FUSE_MKNOD; - args.in.numargs = 2; - args.in.args[0].size = sizeof(inarg); - args.in.args[0].value = &inarg; - args.in.args[1].size = entry->d_name.len + 1; - args.in.args[1].value = entry->d_name.name; + args.opcode = FUSE_MKNOD; + args.in_numargs = 2; + args.in_args[0].size = sizeof(inarg); + args.in_args[0].value = &inarg; + args.in_args[1].size = entry->d_name.len + 1; + args.in_args[1].value = entry->d_name.name; return create_new_entry(fc, &args, dir, entry, mode); } @@ -609,12 +656,12 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode) memset(&inarg, 0, sizeof(inarg)); inarg.mode = mode; inarg.umask = current_umask(); - args.in.h.opcode = FUSE_MKDIR; - args.in.numargs = 2; - args.in.args[0].size = sizeof(inarg); - args.in.args[0].value = &inarg; - args.in.args[1].size = entry->d_name.len + 1; - args.in.args[1].value = entry->d_name.name; + args.opcode = FUSE_MKDIR; + args.in_numargs = 2; + args.in_args[0].size = sizeof(inarg); + args.in_args[0].value = &inarg; + args.in_args[1].size = entry->d_name.len + 1; + args.in_args[1].value = entry->d_name.name; return create_new_entry(fc, &args, dir, entry, S_IFDIR); } @@ -625,12 +672,12 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry, unsigned len = strlen(link) + 1; FUSE_ARGS(args); - args.in.h.opcode = FUSE_SYMLINK; - args.in.numargs = 2; - args.in.args[0].size = entry->d_name.len + 1; - args.in.args[0].value = entry->d_name.name; - args.in.args[1].size = len; - args.in.args[1].value = link; + args.opcode = FUSE_SYMLINK; + args.in_numargs = 2; + args.in_args[0].size = entry->d_name.len + 1; + args.in_args[0].value = entry->d_name.name; + args.in_args[1].size = len; + args.in_args[1].value = link; return create_new_entry(fc, &args, dir, entry, S_IFLNK); } @@ -648,11 +695,11 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) struct fuse_conn *fc = get_fuse_conn(dir); FUSE_ARGS(args); - args.in.h.opcode = FUSE_UNLINK; - args.in.h.nodeid = get_node_id(dir); - args.in.numargs = 1; - args.in.args[0].size = entry->d_name.len + 1; - args.in.args[0].value = entry->d_name.name; + args.opcode = FUSE_UNLINK; + args.nodeid = get_node_id(dir); + args.in_numargs = 1; + args.in_args[0].size = entry->d_name.len + 1; + args.in_args[0].value = entry->d_name.name; err = fuse_simple_request(fc, &args); if (!err) { struct inode *inode = d_inode(entry); @@ -684,11 +731,11 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) struct fuse_conn *fc = get_fuse_conn(dir); FUSE_ARGS(args); - args.in.h.opcode = FUSE_RMDIR; - args.in.h.nodeid = get_node_id(dir); - args.in.numargs = 1; - args.in.args[0].size = entry->d_name.len + 1; - args.in.args[0].value = entry->d_name.name; + args.opcode = FUSE_RMDIR; + args.nodeid = get_node_id(dir); + args.in_numargs = 1; + args.in_args[0].size = entry->d_name.len + 1; + args.in_args[0].value = entry->d_name.name; err = fuse_simple_request(fc, &args); if (!err) { clear_nlink(d_inode(entry)); @@ -711,15 +758,15 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent, memset(&inarg, 0, argsize); inarg.newdir = get_node_id(newdir); inarg.flags = flags; - args.in.h.opcode = opcode; - args.in.h.nodeid = get_node_id(olddir); - args.in.numargs = 3; - args.in.args[0].size = argsize; - args.in.args[0].value = &inarg; - args.in.args[1].size = oldent->d_name.len + 1; - args.in.args[1].value = oldent->d_name.name; - args.in.args[2].size = newent->d_name.len + 1; - args.in.args[2].value = newent->d_name.name; + args.opcode = opcode; + args.nodeid = get_node_id(olddir); + args.in_numargs = 3; + args.in_args[0].size = argsize; + args.in_args[0].value = &inarg; + args.in_args[1].size = oldent->d_name.len + 1; + args.in_args[1].value = oldent->d_name.name; + args.in_args[2].size = newent->d_name.len + 1; + args.in_args[2].value = newent->d_name.name; err = fuse_simple_request(fc, &args); if (!err) { /* ctime changes */ @@ -796,12 +843,12 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, memset(&inarg, 0, sizeof(inarg)); inarg.oldnodeid = get_node_id(inode); - args.in.h.opcode = FUSE_LINK; - args.in.numargs = 2; - args.in.args[0].size = sizeof(inarg); - args.in.args[0].value = &inarg; - args.in.args[1].size = newent->d_name.len + 1; - args.in.args[1].value = newent->d_name.name; + args.opcode = FUSE_LINK; + args.in_numargs = 2; + args.in_args[0].size = sizeof(inarg); + args.in_args[0].value = &inarg; + args.in_args[1].size = newent->d_name.len + 1; + args.in_args[1].value = newent->d_name.name; err = create_new_entry(fc, &args, newdir, newent, inode->i_mode); /* Contrary to "normal" filesystems it can happen that link makes two "logical" inodes point to the same "physical" @@ -884,14 +931,14 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat, inarg.getattr_flags |= FUSE_GETATTR_FH; inarg.fh = ff->fh; } - args.in.h.opcode = FUSE_GETATTR; - args.in.h.nodeid = get_node_id(inode); - args.in.numargs = 1; - args.in.args[0].size = sizeof(inarg); - args.in.args[0].value = &inarg; - args.out.numargs = 1; - args.out.args[0].size = sizeof(outarg); - args.out.args[0].value = &outarg; + args.opcode = FUSE_GETATTR; + args.nodeid = get_node_id(inode); + args.in_numargs = 1; + args.in_args[0].size = sizeof(inarg); + args.in_args[0].value = &inarg; + args.out_numargs = 1; + args.out_args[0].size = sizeof(outarg); + args.out_args[0].value = &outarg; err = fuse_simple_request(fc, &args); if (!err) { if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) { @@ -1056,11 +1103,11 @@ static int fuse_access(struct inode *inode, int mask) memset(&inarg, 0, sizeof(inarg)); inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC); - args.in.h.opcode = FUSE_ACCESS; - args.in.h.nodeid = get_node_id(inode); - args.in.numargs = 1; - args.in.args[0].size = sizeof(inarg); - args.in.args[0].value = &inarg; + args.opcode = FUSE_ACCESS; + args.nodeid = get_node_id(inode); + args.in_numargs = 1; + args.in_args[0].size = sizeof(inarg); + args.in_args[0].value = &inarg; err = fuse_simple_request(fc, &args); if (err == -ENOSYS) { fc->no_access = 1; @@ -1152,38 +1199,36 @@ static int fuse_permission(struct inode *inode, int mask) static int fuse_readlink_page(struct inode *inode, struct page *page) { struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_req *req; - int err; + struct fuse_page_desc desc = { .length = PAGE_SIZE - 1 }; + struct fuse_args_pages ap = { + .num_pages = 1, + .pages = &page, + .descs = &desc, + }; + char *link; + ssize_t res; - req = fuse_get_req(fc, 1); - if (IS_ERR(req)) - return PTR_ERR(req); + ap.args.opcode = FUSE_READLINK; + ap.args.nodeid = get_node_id(inode); + ap.args.out_pages = true; + ap.args.out_argvar = true; + ap.args.page_zeroing = true; + ap.args.out_numargs = 1; + ap.args.out_args[0].size = desc.length; + res = fuse_simple_request(fc, &ap.args); - req->out.page_zeroing = 1; - req->out.argpages = 1; - req->num_pages = 1; - req->pages[0] = page; - req->page_descs[0].length = PAGE_SIZE - 1; - req->in.h.opcode = FUSE_READLINK; - req->in.h.nodeid = get_node_id(inode); - req->out.argvar = 1; - req->out.numargs = 1; - req->out.args[0].size = PAGE_SIZE - 1; - fuse_request_send(fc, req); - err = req->out.h.error; - - if (!err) { - char *link = page_address(page); - size_t len = req->out.args[0].size; - - BUG_ON(len >= PAGE_SIZE); - link[len] = '\0'; - } - - fuse_put_request(fc, req); fuse_invalidate_atime(inode); - return err; + if (res < 0) + return res; + + if (WARN_ON(res >= PAGE_SIZE)) + return -EIO; + + link = page_address(page); + link[res] = '\0'; + + return 0; } static const char *fuse_get_link(struct dentry *dentry, struct inode *inode, @@ -1383,14 +1428,14 @@ static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args, struct fuse_setattr_in *inarg_p, struct fuse_attr_out *outarg_p) { - args->in.h.opcode = FUSE_SETATTR; - args->in.h.nodeid = get_node_id(inode); - args->in.numargs = 1; - args->in.args[0].size = sizeof(*inarg_p); - args->in.args[0].value = inarg_p; - args->out.numargs = 1; - args->out.args[0].size = sizeof(*outarg_p); - args->out.args[0].value = outarg_p; + args->opcode = FUSE_SETATTR; + args->nodeid = get_node_id(inode); + args->in_numargs = 1; + args->in_args[0].size = sizeof(*inarg_p); + args->in_args[0].value = inarg_p; + args->out_numargs = 1; + args->out_args[0].size = sizeof(*outarg_p); + args->out_args[0].value = outarg_p; } /* diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 5ae2828beb00..0f0225686aee 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -19,6 +19,18 @@ #include #include +static struct page **fuse_pages_alloc(unsigned int npages, gfp_t flags, + struct fuse_page_desc **desc) +{ + struct page **pages; + + pages = kzalloc(npages * (sizeof(struct page *) + + sizeof(struct fuse_page_desc)), flags); + *desc = (void *) (pages + npages); + + return pages; +} + static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file, int opcode, struct fuse_open_out *outargp) { @@ -29,29 +41,36 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file, inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY); if (!fc->atomic_o_trunc) inarg.flags &= ~O_TRUNC; - args.in.h.opcode = opcode; - args.in.h.nodeid = nodeid; - args.in.numargs = 1; - args.in.args[0].size = sizeof(inarg); - args.in.args[0].value = &inarg; - args.out.numargs = 1; - args.out.args[0].size = sizeof(*outargp); - args.out.args[0].value = outargp; + args.opcode = opcode; + args.nodeid = nodeid; + args.in_numargs = 1; + args.in_args[0].size = sizeof(inarg); + args.in_args[0].value = &inarg; + args.out_numargs = 1; + args.out_args[0].size = sizeof(*outargp); + args.out_args[0].value = outargp; return fuse_simple_request(fc, &args); } +struct fuse_release_args { + struct fuse_args args; + struct fuse_release_in inarg; + struct inode *inode; +}; + struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) { struct fuse_file *ff; - ff = kzalloc(sizeof(struct fuse_file), GFP_KERNEL); + ff = kzalloc(sizeof(struct fuse_file), GFP_KERNEL_ACCOUNT); if (unlikely(!ff)) return NULL; ff->fc = fc; - ff->reserved_req = fuse_request_alloc(0); - if (unlikely(!ff->reserved_req)) { + ff->release_args = kzalloc(sizeof(*ff->release_args), + GFP_KERNEL_ACCOUNT); + if (!ff->release_args) { kfree(ff); return NULL; } @@ -69,7 +88,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) void fuse_file_free(struct fuse_file *ff) { - fuse_request_free(ff->reserved_req); + kfree(ff->release_args); mutex_destroy(&ff->readdir.lock); kfree(ff); } @@ -80,34 +99,31 @@ static struct fuse_file *fuse_file_get(struct fuse_file *ff) return ff; } -static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) +static void fuse_release_end(struct fuse_conn *fc, struct fuse_args *args, + int error) { - iput(req->misc.release.inode); + struct fuse_release_args *ra = container_of(args, typeof(*ra), args); + + iput(ra->inode); + kfree(ra); } static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir) { if (refcount_dec_and_test(&ff->count)) { - struct fuse_req *req = ff->reserved_req; + struct fuse_args *args = &ff->release_args->args; if (isdir ? ff->fc->no_opendir : ff->fc->no_open) { - /* - * Drop the release request when client does not - * implement 'open' - */ - __clear_bit(FR_BACKGROUND, &req->flags); - iput(req->misc.release.inode); - fuse_put_request(ff->fc, req); + /* Do nothing when client does not implement 'open' */ + fuse_release_end(ff->fc, args, 0); } else if (sync) { - __set_bit(FR_FORCE, &req->flags); - __clear_bit(FR_BACKGROUND, &req->flags); - fuse_request_send(ff->fc, req); - iput(req->misc.release.inode); - fuse_put_request(ff->fc, req); + fuse_simple_request(ff->fc, args); + fuse_release_end(ff->fc, args, 0); } else { - req->end = fuse_release_end; - __set_bit(FR_BACKGROUND, &req->flags); - fuse_request_send_background(ff->fc, req); + args->end = fuse_release_end; + if (fuse_simple_background(ff->fc, args, + GFP_KERNEL | __GFP_NOFAIL)) + fuse_release_end(ff->fc, args, -ENOTCONN); } kfree(ff); } @@ -227,8 +243,7 @@ static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff, int flags, int opcode) { struct fuse_conn *fc = ff->fc; - struct fuse_req *req = ff->reserved_req; - struct fuse_release_in *inarg = &req->misc.release.in; + struct fuse_release_args *ra = ff->release_args; /* Inode is NULL on error path of fuse_create_open() */ if (likely(fi)) { @@ -243,32 +258,33 @@ static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff, wake_up_interruptible_all(&ff->poll_wait); - inarg->fh = ff->fh; - inarg->flags = flags; - req->in.h.opcode = opcode; - req->in.h.nodeid = ff->nodeid; - req->in.numargs = 1; - req->in.args[0].size = sizeof(struct fuse_release_in); - req->in.args[0].value = inarg; + ra->inarg.fh = ff->fh; + ra->inarg.flags = flags; + ra->args.in_numargs = 1; + ra->args.in_args[0].size = sizeof(struct fuse_release_in); + ra->args.in_args[0].value = &ra->inarg; + ra->args.opcode = opcode; + ra->args.nodeid = ff->nodeid; + ra->args.force = true; + ra->args.nocreds = true; } void fuse_release_common(struct file *file, bool isdir) { struct fuse_inode *fi = get_fuse_inode(file_inode(file)); struct fuse_file *ff = file->private_data; - struct fuse_req *req = ff->reserved_req; + struct fuse_release_args *ra = ff->release_args; int opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE; fuse_prepare_release(fi, ff, file->f_flags, opcode); if (ff->flock) { - struct fuse_release_in *inarg = &req->misc.release.in; - inarg->release_flags |= FUSE_RELEASE_FLOCK_UNLOCK; - inarg->lock_owner = fuse_lock_owner_id(ff->fc, - (fl_owner_t) file); + ra->inarg.release_flags |= FUSE_RELEASE_FLOCK_UNLOCK; + ra->inarg.lock_owner = fuse_lock_owner_id(ff->fc, + (fl_owner_t) file); } /* Hold inode until release is finished */ - req->misc.release.inode = igrab(file_inode(file)); + ra->inode = igrab(file_inode(file)); /* * Normally this will send the RELEASE request, however if @@ -279,7 +295,7 @@ void fuse_release_common(struct file *file, bool isdir) * synchronous RELEASE is allowed (and desirable) in this case * because the server can be trusted not to screw up. */ - fuse_file_put(ff, ff->fc->destroy_req != NULL, isdir); + fuse_file_put(ff, ff->fc->destroy, isdir); } static int fuse_open(struct inode *inode, struct file *file) @@ -335,19 +351,27 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id) return (u64) v0 + ((u64) v1 << 32); } -static struct fuse_req *fuse_find_writeback(struct fuse_inode *fi, +struct fuse_writepage_args { + struct fuse_io_args ia; + struct list_head writepages_entry; + struct list_head queue_entry; + struct fuse_writepage_args *next; + struct inode *inode; +}; + +static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi, pgoff_t idx_from, pgoff_t idx_to) { - struct fuse_req *req; + struct fuse_writepage_args *wpa; - list_for_each_entry(req, &fi->writepages, writepages_entry) { + list_for_each_entry(wpa, &fi->writepages, writepages_entry) { pgoff_t curr_index; - WARN_ON(get_fuse_inode(req->inode) != fi); - curr_index = req->misc.write.in.offset >> PAGE_SHIFT; - if (idx_from < curr_index + req->num_pages && + WARN_ON(get_fuse_inode(wpa->inode) != fi); + curr_index = wpa->ia.write.in.offset >> PAGE_SHIFT; + if (idx_from < curr_index + wpa->ia.ap.num_pages && curr_index <= idx_to) { - return req; + return wpa; } } return NULL; @@ -383,12 +407,11 @@ static inline bool fuse_page_is_writeback(struct inode *inode, pgoff_t index) * Since fuse doesn't rely on the VM writeback tracking, this has to * use some other means. */ -static int fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index) +static void fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index) { struct fuse_inode *fi = get_fuse_inode(inode); wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index)); - return 0; } /* @@ -411,8 +434,8 @@ static int fuse_flush(struct file *file, fl_owner_t id) struct inode *inode = file_inode(file); struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_file *ff = file->private_data; - struct fuse_req *req; struct fuse_flush_in inarg; + FUSE_ARGS(args); int err; if (is_bad_inode(inode)) @@ -433,19 +456,17 @@ static int fuse_flush(struct file *file, fl_owner_t id) if (err) return err; - req = fuse_get_req_nofail_nopages(fc, file); memset(&inarg, 0, sizeof(inarg)); inarg.fh = ff->fh; inarg.lock_owner = fuse_lock_owner_id(fc, id); - req->in.h.opcode = FUSE_FLUSH; - req->in.h.nodeid = get_node_id(inode); - req->in.numargs = 1; - req->in.args[0].size = sizeof(inarg); - req->in.args[0].value = &inarg; - __set_bit(FR_FORCE, &req->flags); - fuse_request_send(fc, req); - err = req->out.h.error; - fuse_put_request(fc, req); + args.opcode = FUSE_FLUSH; + args.nodeid = get_node_id(inode); + args.in_numargs = 1; + args.in_args[0].size = sizeof(inarg); + args.in_args[0].value = &inarg; + args.force = true; + + err = fuse_simple_request(fc, &args); if (err == -ENOSYS) { fc->no_flush = 1; err = 0; @@ -465,11 +486,11 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end, memset(&inarg, 0, sizeof(inarg)); inarg.fh = ff->fh; inarg.fsync_flags = datasync ? FUSE_FSYNC_FDATASYNC : 0; - args.in.h.opcode = opcode; - args.in.h.nodeid = get_node_id(inode); - args.in.numargs = 1; - args.in.args[0].size = sizeof(inarg); - args.in.args[0].value = &inarg; + args.opcode = opcode; + args.nodeid = get_node_id(inode); + args.in_numargs = 1; + args.in_args[0].size = sizeof(inarg); + args.in_args[0].value = &inarg; return fuse_simple_request(fc, &args); } @@ -523,35 +544,35 @@ out: return err; } -void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos, - size_t count, int opcode) +void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos, + size_t count, int opcode) { - struct fuse_read_in *inarg = &req->misc.read.in; struct fuse_file *ff = file->private_data; + struct fuse_args *args = &ia->ap.args; - inarg->fh = ff->fh; - inarg->offset = pos; - inarg->size = count; - inarg->flags = file->f_flags; - req->in.h.opcode = opcode; - req->in.h.nodeid = ff->nodeid; - req->in.numargs = 1; - req->in.args[0].size = sizeof(struct fuse_read_in); - req->in.args[0].value = inarg; - req->out.argvar = 1; - req->out.numargs = 1; - req->out.args[0].size = count; + ia->read.in.fh = ff->fh; + ia->read.in.offset = pos; + ia->read.in.size = count; + ia->read.in.flags = file->f_flags; + args->opcode = opcode; + args->nodeid = ff->nodeid; + args->in_numargs = 1; + args->in_args[0].size = sizeof(ia->read.in); + args->in_args[0].value = &ia->read.in; + args->out_argvar = true; + args->out_numargs = 1; + args->out_args[0].size = count; } -static void fuse_release_user_pages(struct fuse_req *req, bool should_dirty) +static void fuse_release_user_pages(struct fuse_args_pages *ap, + bool should_dirty) { - unsigned i; + unsigned int i; - for (i = 0; i < req->num_pages; i++) { - struct page *page = req->pages[i]; + for (i = 0; i < ap->num_pages; i++) { if (should_dirty) - set_page_dirty_lock(page); - put_page(page); + set_page_dirty_lock(ap->pages[i]); + put_page(ap->pages[i]); } } @@ -621,64 +642,94 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) kref_put(&io->refcnt, fuse_io_release); } -static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_req *req) +static struct fuse_io_args *fuse_io_alloc(struct fuse_io_priv *io, + unsigned int npages) { - struct fuse_io_priv *io = req->io; - ssize_t pos = -1; + struct fuse_io_args *ia; - fuse_release_user_pages(req, io->should_dirty); - - if (io->write) { - if (req->misc.write.in.size != req->misc.write.out.size) - pos = req->misc.write.in.offset - io->offset + - req->misc.write.out.size; - } else { - if (req->misc.read.in.size != req->out.args[0].size) - pos = req->misc.read.in.offset - io->offset + - req->out.args[0].size; + ia = kzalloc(sizeof(*ia), GFP_KERNEL); + if (ia) { + ia->io = io; + ia->ap.pages = fuse_pages_alloc(npages, GFP_KERNEL, + &ia->ap.descs); + if (!ia->ap.pages) { + kfree(ia); + ia = NULL; + } } - - fuse_aio_complete(io, req->out.h.error, pos); + return ia; } -static size_t fuse_async_req_send(struct fuse_conn *fc, struct fuse_req *req, - size_t num_bytes, struct fuse_io_priv *io) +static void fuse_io_free(struct fuse_io_args *ia) { + kfree(ia->ap.pages); + kfree(ia); +} + +static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_args *args, + int err) +{ + struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args); + struct fuse_io_priv *io = ia->io; + ssize_t pos = -1; + + fuse_release_user_pages(&ia->ap, io->should_dirty); + + if (err) { + /* Nothing */ + } else if (io->write) { + if (ia->write.out.size > ia->write.in.size) { + err = -EIO; + } else if (ia->write.in.size != ia->write.out.size) { + pos = ia->write.in.offset - io->offset + + ia->write.out.size; + } + } else { + u32 outsize = args->out_args[0].size; + + if (ia->read.in.size != outsize) + pos = ia->read.in.offset - io->offset + outsize; + } + + fuse_aio_complete(io, err, pos); + fuse_io_free(ia); +} + +static ssize_t fuse_async_req_send(struct fuse_conn *fc, + struct fuse_io_args *ia, size_t num_bytes) +{ + ssize_t err; + struct fuse_io_priv *io = ia->io; + spin_lock(&io->lock); kref_get(&io->refcnt); io->size += num_bytes; io->reqs++; spin_unlock(&io->lock); - req->io = io; - req->end = fuse_aio_complete_req; + ia->ap.args.end = fuse_aio_complete_req; + err = fuse_simple_background(fc, &ia->ap.args, GFP_KERNEL); - __fuse_get_request(req); - fuse_request_send_background(fc, req); - - return num_bytes; + return err ?: num_bytes; } -static size_t fuse_send_read(struct fuse_req *req, struct fuse_io_priv *io, - loff_t pos, size_t count, fl_owner_t owner) +static ssize_t fuse_send_read(struct fuse_io_args *ia, loff_t pos, size_t count, + fl_owner_t owner) { - struct file *file = io->iocb->ki_filp; + struct file *file = ia->io->iocb->ki_filp; struct fuse_file *ff = file->private_data; struct fuse_conn *fc = ff->fc; - fuse_read_fill(req, file, pos, count, FUSE_READ); + fuse_read_args_fill(ia, file, pos, count, FUSE_READ); if (owner != NULL) { - struct fuse_read_in *inarg = &req->misc.read.in; - - inarg->read_flags |= FUSE_READ_LOCKOWNER; - inarg->lock_owner = fuse_lock_owner_id(fc, owner); + ia->read.in.read_flags |= FUSE_READ_LOCKOWNER; + ia->read.in.lock_owner = fuse_lock_owner_id(fc, owner); } - if (io->async) - return fuse_async_req_send(fc, req, count, io); + if (ia->io->async) + return fuse_async_req_send(fc, ia, count); - fuse_request_send(fc, req); - return req->out.args[0].size; + return fuse_simple_request(fc, &ia->ap.args); } static void fuse_read_update_size(struct inode *inode, loff_t size, @@ -696,10 +747,9 @@ static void fuse_read_update_size(struct inode *inode, loff_t size, spin_unlock(&fi->lock); } -static void fuse_short_read(struct fuse_req *req, struct inode *inode, - u64 attr_ver) +static void fuse_short_read(struct inode *inode, u64 attr_ver, size_t num_read, + struct fuse_args_pages *ap) { - size_t num_read = req->out.args[0].size; struct fuse_conn *fc = get_fuse_conn(inode); if (fc->writeback_cache) { @@ -712,28 +762,31 @@ static void fuse_short_read(struct fuse_req *req, struct inode *inode, int start_idx = num_read >> PAGE_SHIFT; size_t off = num_read & (PAGE_SIZE - 1); - for (i = start_idx; i < req->num_pages; i++) { - zero_user_segment(req->pages[i], off, PAGE_SIZE); + for (i = start_idx; i < ap->num_pages; i++) { + zero_user_segment(ap->pages[i], off, PAGE_SIZE); off = 0; } } else { - loff_t pos = page_offset(req->pages[0]) + num_read; + loff_t pos = page_offset(ap->pages[0]) + num_read; fuse_read_update_size(inode, pos, attr_ver); } } static int fuse_do_readpage(struct file *file, struct page *page) { - struct kiocb iocb; - struct fuse_io_priv io; struct inode *inode = page->mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_req *req; - size_t num_read; loff_t pos = page_offset(page); - size_t count = PAGE_SIZE; + struct fuse_page_desc desc = { .length = PAGE_SIZE }; + struct fuse_io_args ia = { + .ap.args.page_zeroing = true, + .ap.args.out_pages = true, + .ap.num_pages = 1, + .ap.pages = &page, + .ap.descs = &desc, + }; + ssize_t res; u64 attr_ver; - int err; /* * Page writeback can extend beyond the lifetime of the @@ -742,35 +795,21 @@ static int fuse_do_readpage(struct file *file, struct page *page) */ fuse_wait_on_page_writeback(inode, page->index); - req = fuse_get_req(fc, 1); - if (IS_ERR(req)) - return PTR_ERR(req); - attr_ver = fuse_get_attr_version(fc); - req->out.page_zeroing = 1; - req->out.argpages = 1; - req->num_pages = 1; - req->pages[0] = page; - req->page_descs[0].length = count; - init_sync_kiocb(&iocb, file); - io = (struct fuse_io_priv) FUSE_IO_PRIV_SYNC(&iocb); - num_read = fuse_send_read(req, &io, pos, count, NULL); - err = req->out.h.error; + fuse_read_args_fill(&ia, file, pos, desc.length, FUSE_READ); + res = fuse_simple_request(fc, &ia.ap.args); + if (res < 0) + return res; + /* + * Short read means EOF. If file size is larger, truncate it + */ + if (res < desc.length) + fuse_short_read(inode, attr_ver, res, &ia.ap); - if (!err) { - /* - * Short read means EOF. If file size is larger, truncate it - */ - if (num_read < count) - fuse_short_read(req, inode, attr_ver); + SetPageUptodate(page); - SetPageUptodate(page); - } - - fuse_put_request(fc, req); - - return err; + return 0; } static int fuse_readpage(struct file *file, struct page *page) @@ -789,15 +828,18 @@ static int fuse_readpage(struct file *file, struct page *page) return err; } -static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req) +static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_args *args, + int err) { int i; - size_t count = req->misc.read.in.size; - size_t num_read = req->out.args[0].size; + struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args); + struct fuse_args_pages *ap = &ia->ap; + size_t count = ia->read.in.size; + size_t num_read = args->out_args[0].size; struct address_space *mapping = NULL; - for (i = 0; mapping == NULL && i < req->num_pages; i++) - mapping = req->pages[i]->mapping; + for (i = 0; mapping == NULL && i < ap->num_pages; i++) + mapping = ap->pages[i]->mapping; if (mapping) { struct inode *inode = mapping->host; @@ -805,93 +847,97 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req) /* * Short read means EOF. If file size is larger, truncate it */ - if (!req->out.h.error && num_read < count) - fuse_short_read(req, inode, req->misc.read.attr_ver); + if (!err && num_read < count) + fuse_short_read(inode, ia->read.attr_ver, num_read, ap); fuse_invalidate_atime(inode); } - for (i = 0; i < req->num_pages; i++) { - struct page *page = req->pages[i]; - if (!req->out.h.error) + for (i = 0; i < ap->num_pages; i++) { + struct page *page = ap->pages[i]; + + if (!err) SetPageUptodate(page); else SetPageError(page); unlock_page(page); put_page(page); } - if (req->ff) - fuse_file_put(req->ff, false, false); + if (ia->ff) + fuse_file_put(ia->ff, false, false); + + fuse_io_free(ia); } -static void fuse_send_readpages(struct fuse_req *req, struct file *file) +static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file) { struct fuse_file *ff = file->private_data; struct fuse_conn *fc = ff->fc; - loff_t pos = page_offset(req->pages[0]); - size_t count = req->num_pages << PAGE_SHIFT; + struct fuse_args_pages *ap = &ia->ap; + loff_t pos = page_offset(ap->pages[0]); + size_t count = ap->num_pages << PAGE_SHIFT; + int err; - req->out.argpages = 1; - req->out.page_zeroing = 1; - req->out.page_replace = 1; - fuse_read_fill(req, file, pos, count, FUSE_READ); - req->misc.read.attr_ver = fuse_get_attr_version(fc); + ap->args.out_pages = true; + ap->args.page_zeroing = true; + ap->args.page_replace = true; + fuse_read_args_fill(ia, file, pos, count, FUSE_READ); + ia->read.attr_ver = fuse_get_attr_version(fc); if (fc->async_read) { - req->ff = fuse_file_get(ff); - req->end = fuse_readpages_end; - fuse_request_send_background(fc, req); + ia->ff = fuse_file_get(ff); + ap->args.end = fuse_readpages_end; + err = fuse_simple_background(fc, &ap->args, GFP_KERNEL); + if (!err) + return; } else { - fuse_request_send(fc, req); - fuse_readpages_end(fc, req); - fuse_put_request(fc, req); + err = fuse_simple_request(fc, &ap->args); } + fuse_readpages_end(fc, &ap->args, err); } struct fuse_fill_data { - struct fuse_req *req; + struct fuse_io_args *ia; struct file *file; struct inode *inode; - unsigned nr_pages; + unsigned int nr_pages; + unsigned int max_pages; }; static int fuse_readpages_fill(void *_data, struct page *page) { struct fuse_fill_data *data = _data; - struct fuse_req *req = data->req; + struct fuse_io_args *ia = data->ia; + struct fuse_args_pages *ap = &ia->ap; struct inode *inode = data->inode; struct fuse_conn *fc = get_fuse_conn(inode); fuse_wait_on_page_writeback(inode, page->index); - if (req->num_pages && - (req->num_pages == fc->max_pages || - (req->num_pages + 1) * PAGE_SIZE > fc->max_read || - req->pages[req->num_pages - 1]->index + 1 != page->index)) { - unsigned int nr_alloc = min_t(unsigned int, data->nr_pages, - fc->max_pages); - fuse_send_readpages(req, data->file); - if (fc->async_read) - req = fuse_get_req_for_background(fc, nr_alloc); - else - req = fuse_get_req(fc, nr_alloc); - - data->req = req; - if (IS_ERR(req)) { + if (ap->num_pages && + (ap->num_pages == fc->max_pages || + (ap->num_pages + 1) * PAGE_SIZE > fc->max_read || + ap->pages[ap->num_pages - 1]->index + 1 != page->index)) { + data->max_pages = min_t(unsigned int, data->nr_pages, + fc->max_pages); + fuse_send_readpages(ia, data->file); + data->ia = ia = fuse_io_alloc(NULL, data->max_pages); + if (!ia) { unlock_page(page); - return PTR_ERR(req); + return -ENOMEM; } + ap = &ia->ap; } - if (WARN_ON(req->num_pages >= req->max_pages)) { + if (WARN_ON(ap->num_pages >= data->max_pages)) { unlock_page(page); - fuse_put_request(fc, req); + fuse_io_free(ia); return -EIO; } get_page(page); - req->pages[req->num_pages] = page; - req->page_descs[req->num_pages].length = PAGE_SIZE; - req->num_pages++; + ap->pages[ap->num_pages] = page; + ap->descs[ap->num_pages].length = PAGE_SIZE; + ap->num_pages++; data->nr_pages--; return 0; } @@ -903,7 +949,6 @@ static int fuse_readpages(struct file *file, struct address_space *mapping, struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_fill_data data; int err; - unsigned int nr_alloc = min_t(unsigned int, nr_pages, fc->max_pages); err = -EIO; if (is_bad_inode(inode)) @@ -911,21 +956,20 @@ static int fuse_readpages(struct file *file, struct address_space *mapping, data.file = file; data.inode = inode; - if (fc->async_read) - data.req = fuse_get_req_for_background(fc, nr_alloc); - else - data.req = fuse_get_req(fc, nr_alloc); data.nr_pages = nr_pages; - err = PTR_ERR(data.req); - if (IS_ERR(data.req)) + data.max_pages = min_t(unsigned int, nr_pages, fc->max_pages); +; + data.ia = fuse_io_alloc(NULL, data.max_pages); + err = -ENOMEM; + if (!data.ia) goto out; err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data); if (!err) { - if (data.req->num_pages) - fuse_send_readpages(data.req, file); + if (data.ia->ap.num_pages) + fuse_send_readpages(data.ia, file); else - fuse_put_request(fc, data.req); + fuse_io_free(data.ia); } out: return err; @@ -952,54 +996,65 @@ static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to) return generic_file_read_iter(iocb, to); } -static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff, - loff_t pos, size_t count) +static void fuse_write_args_fill(struct fuse_io_args *ia, struct fuse_file *ff, + loff_t pos, size_t count) { - struct fuse_write_in *inarg = &req->misc.write.in; - struct fuse_write_out *outarg = &req->misc.write.out; + struct fuse_args *args = &ia->ap.args; - inarg->fh = ff->fh; - inarg->offset = pos; - inarg->size = count; - req->in.h.opcode = FUSE_WRITE; - req->in.h.nodeid = ff->nodeid; - req->in.numargs = 2; + ia->write.in.fh = ff->fh; + ia->write.in.offset = pos; + ia->write.in.size = count; + args->opcode = FUSE_WRITE; + args->nodeid = ff->nodeid; + args->in_numargs = 2; if (ff->fc->minor < 9) - req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE; + args->in_args[0].size = FUSE_COMPAT_WRITE_IN_SIZE; else - req->in.args[0].size = sizeof(struct fuse_write_in); - req->in.args[0].value = inarg; - req->in.args[1].size = count; - req->out.numargs = 1; - req->out.args[0].size = sizeof(struct fuse_write_out); - req->out.args[0].value = outarg; + args->in_args[0].size = sizeof(ia->write.in); + args->in_args[0].value = &ia->write.in; + args->in_args[1].size = count; + args->out_numargs = 1; + args->out_args[0].size = sizeof(ia->write.out); + args->out_args[0].value = &ia->write.out; } -static size_t fuse_send_write(struct fuse_req *req, struct fuse_io_priv *io, - loff_t pos, size_t count, fl_owner_t owner) +static unsigned int fuse_write_flags(struct kiocb *iocb) { - struct kiocb *iocb = io->iocb; + unsigned int flags = iocb->ki_filp->f_flags; + + if (iocb->ki_flags & IOCB_DSYNC) + flags |= O_DSYNC; + if (iocb->ki_flags & IOCB_SYNC) + flags |= O_SYNC; + + return flags; +} + +static ssize_t fuse_send_write(struct fuse_io_args *ia, loff_t pos, + size_t count, fl_owner_t owner) +{ + struct kiocb *iocb = ia->io->iocb; struct file *file = iocb->ki_filp; struct fuse_file *ff = file->private_data; struct fuse_conn *fc = ff->fc; - struct fuse_write_in *inarg = &req->misc.write.in; + struct fuse_write_in *inarg = &ia->write.in; + ssize_t err; - fuse_write_fill(req, ff, pos, count); - inarg->flags = file->f_flags; - if (iocb->ki_flags & IOCB_DSYNC) - inarg->flags |= O_DSYNC; - if (iocb->ki_flags & IOCB_SYNC) - inarg->flags |= O_SYNC; + fuse_write_args_fill(ia, ff, pos, count); + inarg->flags = fuse_write_flags(iocb); if (owner != NULL) { inarg->write_flags |= FUSE_WRITE_LOCKOWNER; inarg->lock_owner = fuse_lock_owner_id(fc, owner); } - if (io->async) - return fuse_async_req_send(fc, req, count, io); + if (ia->io->async) + return fuse_async_req_send(fc, ia, count); - fuse_request_send(fc, req); - return req->misc.write.out.size; + err = fuse_simple_request(fc, &ia->ap.args); + if (!err && ia->write.out.size > count) + err = -EIO; + + return err ?: ia->write.out.size; } bool fuse_write_update_size(struct inode *inode, loff_t pos) @@ -1019,26 +1074,31 @@ bool fuse_write_update_size(struct inode *inode, loff_t pos) return ret; } -static size_t fuse_send_write_pages(struct fuse_req *req, struct kiocb *iocb, - struct inode *inode, loff_t pos, - size_t count) +static ssize_t fuse_send_write_pages(struct fuse_io_args *ia, + struct kiocb *iocb, struct inode *inode, + loff_t pos, size_t count) { - size_t res; - unsigned offset; - unsigned i; - struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb); + struct fuse_args_pages *ap = &ia->ap; + struct file *file = iocb->ki_filp; + struct fuse_file *ff = file->private_data; + struct fuse_conn *fc = ff->fc; + unsigned int offset, i; + int err; - for (i = 0; i < req->num_pages; i++) - fuse_wait_on_page_writeback(inode, req->pages[i]->index); + for (i = 0; i < ap->num_pages; i++) + fuse_wait_on_page_writeback(inode, ap->pages[i]->index); - res = fuse_send_write(req, &io, pos, count, NULL); + fuse_write_args_fill(ia, ff, pos, count); + ia->write.in.flags = fuse_write_flags(iocb); - offset = req->page_descs[0].offset; - count = res; - for (i = 0; i < req->num_pages; i++) { - struct page *page = req->pages[i]; + err = fuse_simple_request(fc, &ap->args); - if (!req->out.h.error && !offset && count >= PAGE_SIZE) + offset = ap->descs[0].offset; + count = ia->write.out.size; + for (i = 0; i < ap->num_pages; i++) { + struct page *page = ap->pages[i]; + + if (!err && !offset && count >= PAGE_SIZE) SetPageUptodate(page); if (count > PAGE_SIZE - offset) @@ -1051,20 +1111,21 @@ static size_t fuse_send_write_pages(struct fuse_req *req, struct kiocb *iocb, put_page(page); } - return res; + return err; } -static ssize_t fuse_fill_write_pages(struct fuse_req *req, - struct address_space *mapping, - struct iov_iter *ii, loff_t pos) +static ssize_t fuse_fill_write_pages(struct fuse_args_pages *ap, + struct address_space *mapping, + struct iov_iter *ii, loff_t pos, + unsigned int max_pages) { struct fuse_conn *fc = get_fuse_conn(mapping->host); unsigned offset = pos & (PAGE_SIZE - 1); size_t count = 0; int err; - req->in.argpages = 1; - req->page_descs[0].offset = offset; + ap->args.in_pages = true; + ap->descs[0].offset = offset; do { size_t tmp; @@ -1100,9 +1161,9 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req, } err = 0; - req->pages[req->num_pages] = page; - req->page_descs[req->num_pages].length = tmp; - req->num_pages++; + ap->pages[ap->num_pages] = page; + ap->descs[ap->num_pages].length = tmp; + ap->num_pages++; count += tmp; pos += tmp; @@ -1113,7 +1174,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req, if (!fc->big_writes) break; } while (iov_iter_count(ii) && count < fc->max_write && - req->num_pages < req->max_pages && offset == 0); + ap->num_pages < max_pages && offset == 0); return count > 0 ? count : err; } @@ -1141,27 +1202,27 @@ static ssize_t fuse_perform_write(struct kiocb *iocb, set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); do { - struct fuse_req *req; ssize_t count; + struct fuse_io_args ia = {}; + struct fuse_args_pages *ap = &ia.ap; unsigned int nr_pages = fuse_wr_pages(pos, iov_iter_count(ii), fc->max_pages); - req = fuse_get_req(fc, nr_pages); - if (IS_ERR(req)) { - err = PTR_ERR(req); + ap->pages = fuse_pages_alloc(nr_pages, GFP_KERNEL, &ap->descs); + if (!ap->pages) { + err = -ENOMEM; break; } - count = fuse_fill_write_pages(req, mapping, ii, pos); + count = fuse_fill_write_pages(ap, mapping, ii, pos, nr_pages); if (count <= 0) { err = count; } else { - size_t num_written; - - num_written = fuse_send_write_pages(req, iocb, inode, - pos, count); - err = req->out.h.error; + err = fuse_send_write_pages(&ia, iocb, inode, + pos, count); if (!err) { + size_t num_written = ia.write.out.size; + res += num_written; pos += num_written; @@ -1170,7 +1231,7 @@ static ssize_t fuse_perform_write(struct kiocb *iocb, err = -EIO; } } - fuse_put_request(fc, req); + kfree(ap->pages); } while (!err && iov_iter_count(ii)); if (res > 0) @@ -1258,14 +1319,14 @@ out: return written ? written : err; } -static inline void fuse_page_descs_length_init(struct fuse_req *req, - unsigned index, unsigned nr_pages) +static inline void fuse_page_descs_length_init(struct fuse_page_desc *descs, + unsigned int index, + unsigned int nr_pages) { int i; for (i = index; i < index + nr_pages; i++) - req->page_descs[i].length = PAGE_SIZE - - req->page_descs[i].offset; + descs[i].length = PAGE_SIZE - descs[i].offset; } static inline unsigned long fuse_get_user_addr(const struct iov_iter *ii) @@ -1279,8 +1340,9 @@ static inline size_t fuse_get_frag_size(const struct iov_iter *ii, return min(iov_iter_single_seg_count(ii), max_size); } -static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, - size_t *nbytesp, int write) +static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii, + size_t *nbytesp, int write, + unsigned int max_pages) { size_t nbytes = 0; /* # bytes already packed in req */ ssize_t ret = 0; @@ -1291,21 +1353,21 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, size_t frag_size = fuse_get_frag_size(ii, *nbytesp); if (write) - req->in.args[1].value = (void *) user_addr; + ap->args.in_args[1].value = (void *) user_addr; else - req->out.args[0].value = (void *) user_addr; + ap->args.out_args[0].value = (void *) user_addr; iov_iter_advance(ii, frag_size); *nbytesp = frag_size; return 0; } - while (nbytes < *nbytesp && req->num_pages < req->max_pages) { + while (nbytes < *nbytesp && ap->num_pages < max_pages) { unsigned npages; size_t start; - ret = iov_iter_get_pages(ii, &req->pages[req->num_pages], + ret = iov_iter_get_pages(ii, &ap->pages[ap->num_pages], *nbytesp - nbytes, - req->max_pages - req->num_pages, + max_pages - ap->num_pages, &start); if (ret < 0) break; @@ -1316,18 +1378,18 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, ret += start; npages = (ret + PAGE_SIZE - 1) / PAGE_SIZE; - req->page_descs[req->num_pages].offset = start; - fuse_page_descs_length_init(req, req->num_pages, npages); + ap->descs[ap->num_pages].offset = start; + fuse_page_descs_length_init(ap->descs, ap->num_pages, npages); - req->num_pages += npages; - req->page_descs[req->num_pages - 1].length -= + ap->num_pages += npages; + ap->descs[ap->num_pages - 1].length -= (PAGE_SIZE - ret) & (PAGE_SIZE - 1); } if (write) - req->in.argpages = 1; + ap->args.in_pages = 1; else - req->out.argpages = 1; + ap->args.out_pages = 1; *nbytesp = nbytes; @@ -1349,17 +1411,16 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, pgoff_t idx_from = pos >> PAGE_SHIFT; pgoff_t idx_to = (pos + count - 1) >> PAGE_SHIFT; ssize_t res = 0; - struct fuse_req *req; int err = 0; + struct fuse_io_args *ia; + unsigned int max_pages; - if (io->async) - req = fuse_get_req_for_background(fc, iov_iter_npages(iter, - fc->max_pages)); - else - req = fuse_get_req(fc, iov_iter_npages(iter, fc->max_pages)); - if (IS_ERR(req)) - return PTR_ERR(req); + max_pages = iov_iter_npages(iter, fc->max_pages); + ia = fuse_io_alloc(io, max_pages); + if (!ia) + return -ENOMEM; + ia->io = io; if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) { if (!write) inode_lock(inode); @@ -1370,54 +1431,49 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, io->should_dirty = !write && iter_is_iovec(iter); while (count) { - size_t nres; + ssize_t nres; fl_owner_t owner = current->files; size_t nbytes = min(count, nmax); - err = fuse_get_user_pages(req, iter, &nbytes, write); + + err = fuse_get_user_pages(&ia->ap, iter, &nbytes, write, + max_pages); if (err && !nbytes) break; if (write) { - if (!capable(CAP_FSETID)) { - struct fuse_write_in *inarg; + if (!capable(CAP_FSETID)) + ia->write.in.write_flags |= FUSE_WRITE_KILL_PRIV; - inarg = &req->misc.write.in; - inarg->write_flags |= FUSE_WRITE_KILL_PRIV; - } - nres = fuse_send_write(req, io, pos, nbytes, owner); + nres = fuse_send_write(ia, pos, nbytes, owner); } else { - nres = fuse_send_read(req, io, pos, nbytes, owner); + nres = fuse_send_read(ia, pos, nbytes, owner); } - if (!io->async) - fuse_release_user_pages(req, io->should_dirty); - if (req->out.h.error) { - err = req->out.h.error; - break; - } else if (nres > nbytes) { - res = 0; - err = -EIO; + if (!io->async || nres < 0) { + fuse_release_user_pages(&ia->ap, io->should_dirty); + fuse_io_free(ia); + } + ia = NULL; + if (nres < 0) { + err = nres; break; } + WARN_ON(nres > nbytes); + count -= nres; res += nres; pos += nres; if (nres != nbytes) break; if (count) { - fuse_put_request(fc, req); - if (io->async) - req = fuse_get_req_for_background(fc, - iov_iter_npages(iter, fc->max_pages)); - else - req = fuse_get_req(fc, iov_iter_npages(iter, - fc->max_pages)); - if (IS_ERR(req)) + max_pages = iov_iter_npages(iter, fc->max_pages); + ia = fuse_io_alloc(io, max_pages); + if (!ia) break; } } - if (!IS_ERR(req)) - fuse_put_request(fc, req); + if (ia) + fuse_io_free(ia); if (res > 0) *ppos = pos; @@ -1509,45 +1565,53 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) return fuse_direct_write_iter(iocb, from); } -static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req) +static void fuse_writepage_free(struct fuse_writepage_args *wpa) { + struct fuse_args_pages *ap = &wpa->ia.ap; int i; - for (i = 0; i < req->num_pages; i++) - __free_page(req->pages[i]); + for (i = 0; i < ap->num_pages; i++) + __free_page(ap->pages[i]); - if (req->ff) - fuse_file_put(req->ff, false, false); + if (wpa->ia.ff) + fuse_file_put(wpa->ia.ff, false, false); + + kfree(ap->pages); + kfree(wpa); } -static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) +static void fuse_writepage_finish(struct fuse_conn *fc, + struct fuse_writepage_args *wpa) { - struct inode *inode = req->inode; + struct fuse_args_pages *ap = &wpa->ia.ap; + struct inode *inode = wpa->inode; struct fuse_inode *fi = get_fuse_inode(inode); struct backing_dev_info *bdi = inode_to_bdi(inode); int i; - list_del(&req->writepages_entry); - for (i = 0; i < req->num_pages; i++) { + list_del(&wpa->writepages_entry); + for (i = 0; i < ap->num_pages; i++) { dec_wb_stat(&bdi->wb, WB_WRITEBACK); - dec_node_page_state(req->pages[i], NR_WRITEBACK_TEMP); + dec_node_page_state(ap->pages[i], NR_WRITEBACK_TEMP); wb_writeout_inc(&bdi->wb); } wake_up(&fi->page_waitq); } /* Called under fi->lock, may release and reacquire it */ -static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req, - loff_t size) +static void fuse_send_writepage(struct fuse_conn *fc, + struct fuse_writepage_args *wpa, loff_t size) __releases(fi->lock) __acquires(fi->lock) { - struct fuse_req *aux, *next; - struct fuse_inode *fi = get_fuse_inode(req->inode); - struct fuse_write_in *inarg = &req->misc.write.in; - __u64 data_size = req->num_pages * PAGE_SIZE; - bool queued; + struct fuse_writepage_args *aux, *next; + struct fuse_inode *fi = get_fuse_inode(wpa->inode); + struct fuse_write_in *inarg = &wpa->ia.write.in; + struct fuse_args *args = &wpa->ia.ap.args; + __u64 data_size = wpa->ia.ap.num_pages * PAGE_SIZE; + int err; + fi->writectr++; if (inarg->offset + data_size <= size) { inarg->size = data_size; } else if (inarg->offset < size) { @@ -1557,29 +1621,36 @@ __acquires(fi->lock) goto out_free; } - req->in.args[1].size = inarg->size; - queued = fuse_request_queue_background(fc, req); + args->in_args[1].size = inarg->size; + args->force = true; + args->nocreds = true; + + err = fuse_simple_background(fc, args, GFP_ATOMIC); + if (err == -ENOMEM) { + spin_unlock(&fi->lock); + err = fuse_simple_background(fc, args, GFP_NOFS | __GFP_NOFAIL); + spin_lock(&fi->lock); + } + /* Fails on broken connection only */ - if (unlikely(!queued)) + if (unlikely(err)) goto out_free; - fi->writectr++; return; out_free: - fuse_writepage_finish(fc, req); + fi->writectr--; + fuse_writepage_finish(fc, wpa); spin_unlock(&fi->lock); /* After fuse_writepage_finish() aux request list is private */ - for (aux = req->misc.write.next; aux; aux = next) { - next = aux->misc.write.next; - aux->misc.write.next = NULL; - fuse_writepage_free(fc, aux); - fuse_put_request(fc, aux); + for (aux = wpa->next; aux; aux = next) { + next = aux->next; + aux->next = NULL; + fuse_writepage_free(aux); } - fuse_writepage_free(fc, req); - fuse_put_request(fc, req); + fuse_writepage_free(wpa); spin_lock(&fi->lock); } @@ -1596,29 +1667,34 @@ __acquires(fi->lock) struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); loff_t crop = i_size_read(inode); - struct fuse_req *req; + struct fuse_writepage_args *wpa; while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) { - req = list_entry(fi->queued_writes.next, struct fuse_req, list); - list_del_init(&req->list); - fuse_send_writepage(fc, req, crop); + wpa = list_entry(fi->queued_writes.next, + struct fuse_writepage_args, queue_entry); + list_del_init(&wpa->queue_entry); + fuse_send_writepage(fc, wpa, crop); } } -static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req) +static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args, + int error) { - struct inode *inode = req->inode; + struct fuse_writepage_args *wpa = + container_of(args, typeof(*wpa), ia.ap.args); + struct inode *inode = wpa->inode; struct fuse_inode *fi = get_fuse_inode(inode); - mapping_set_error(inode->i_mapping, req->out.h.error); + mapping_set_error(inode->i_mapping, error); spin_lock(&fi->lock); - while (req->misc.write.next) { + while (wpa->next) { struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_write_in *inarg = &req->misc.write.in; - struct fuse_req *next = req->misc.write.next; - req->misc.write.next = next->misc.write.next; - next->misc.write.next = NULL; - next->ff = fuse_file_get(req->ff); + struct fuse_write_in *inarg = &wpa->ia.write.in; + struct fuse_writepage_args *next = wpa->next; + + wpa->next = next->next; + next->next = NULL; + next->ia.ff = fuse_file_get(wpa->ia.ff); list_add(&next->writepages_entry, &fi->writepages); /* @@ -1647,9 +1723,9 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req) fuse_send_writepage(fc, next, inarg->offset + inarg->size); } fi->writectr--; - fuse_writepage_finish(fc, req); + fuse_writepage_finish(fc, wpa); spin_unlock(&fi->lock); - fuse_writepage_free(fc, req); + fuse_writepage_free(wpa); } static struct fuse_file *__fuse_write_file_get(struct fuse_conn *fc, @@ -1691,52 +1767,71 @@ int fuse_write_inode(struct inode *inode, struct writeback_control *wbc) return err; } +static struct fuse_writepage_args *fuse_writepage_args_alloc(void) +{ + struct fuse_writepage_args *wpa; + struct fuse_args_pages *ap; + + wpa = kzalloc(sizeof(*wpa), GFP_NOFS); + if (wpa) { + ap = &wpa->ia.ap; + ap->num_pages = 0; + ap->pages = fuse_pages_alloc(1, GFP_NOFS, &ap->descs); + if (!ap->pages) { + kfree(wpa); + wpa = NULL; + } + } + return wpa; + +} + static int fuse_writepage_locked(struct page *page) { struct address_space *mapping = page->mapping; struct inode *inode = mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); - struct fuse_req *req; + struct fuse_writepage_args *wpa; + struct fuse_args_pages *ap; struct page *tmp_page; int error = -ENOMEM; set_page_writeback(page); - req = fuse_request_alloc_nofs(1); - if (!req) + wpa = fuse_writepage_args_alloc(); + if (!wpa) goto err; + ap = &wpa->ia.ap; - /* writeback always goes to bg_queue */ - __set_bit(FR_BACKGROUND, &req->flags); tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); if (!tmp_page) goto err_free; error = -EIO; - req->ff = fuse_write_file_get(fc, fi); - if (!req->ff) + wpa->ia.ff = fuse_write_file_get(fc, fi); + if (!wpa->ia.ff) goto err_nofile; - fuse_write_fill(req, req->ff, page_offset(page), 0); + fuse_write_args_fill(&wpa->ia, wpa->ia.ff, page_offset(page), 0); copy_highpage(tmp_page, page); - req->misc.write.in.write_flags |= FUSE_WRITE_CACHE; - req->misc.write.next = NULL; - req->in.argpages = 1; - req->num_pages = 1; - req->pages[0] = tmp_page; - req->page_descs[0].offset = 0; - req->page_descs[0].length = PAGE_SIZE; - req->end = fuse_writepage_end; - req->inode = inode; + wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE; + wpa->next = NULL; + ap->args.in_pages = true; + ap->num_pages = 1; + ap->pages[0] = tmp_page; + ap->descs[0].offset = 0; + ap->descs[0].length = PAGE_SIZE; + ap->args.end = fuse_writepage_end; + wpa->inode = inode; inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK); inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP); spin_lock(&fi->lock); - list_add(&req->writepages_entry, &fi->writepages); - list_add_tail(&req->list, &fi->queued_writes); + list_add(&wpa->writepages_entry, &fi->writepages); + list_add_tail(&wpa->queue_entry, &fi->queued_writes); fuse_flush_writepages(inode); spin_unlock(&fi->lock); @@ -1747,7 +1842,7 @@ static int fuse_writepage_locked(struct page *page) err_nofile: __free_page(tmp_page); err_free: - fuse_request_free(req); + kfree(wpa); err: mapping_set_error(page->mapping, error); end_page_writeback(page); @@ -1767,6 +1862,7 @@ static int fuse_writepage(struct page *page, struct writeback_control *wbc) WARN_ON(wbc->sync_mode == WB_SYNC_ALL); redirty_page_for_writepage(wbc, page); + unlock_page(page); return 0; } @@ -1777,23 +1873,50 @@ static int fuse_writepage(struct page *page, struct writeback_control *wbc) } struct fuse_fill_wb_data { - struct fuse_req *req; + struct fuse_writepage_args *wpa; struct fuse_file *ff; struct inode *inode; struct page **orig_pages; + unsigned int max_pages; }; +static bool fuse_pages_realloc(struct fuse_fill_wb_data *data) +{ + struct fuse_args_pages *ap = &data->wpa->ia.ap; + struct fuse_conn *fc = get_fuse_conn(data->inode); + struct page **pages; + struct fuse_page_desc *descs; + unsigned int npages = min_t(unsigned int, + max_t(unsigned int, data->max_pages * 2, + FUSE_DEFAULT_MAX_PAGES_PER_REQ), + fc->max_pages); + WARN_ON(npages <= data->max_pages); + + pages = fuse_pages_alloc(npages, GFP_NOFS, &descs); + if (!pages) + return false; + + memcpy(pages, ap->pages, sizeof(struct page *) * ap->num_pages); + memcpy(descs, ap->descs, sizeof(struct fuse_page_desc) * ap->num_pages); + kfree(ap->pages); + ap->pages = pages; + ap->descs = descs; + data->max_pages = npages; + + return true; +} + static void fuse_writepages_send(struct fuse_fill_wb_data *data) { - struct fuse_req *req = data->req; + struct fuse_writepage_args *wpa = data->wpa; struct inode *inode = data->inode; struct fuse_inode *fi = get_fuse_inode(inode); - int num_pages = req->num_pages; + int num_pages = wpa->ia.ap.num_pages; int i; - req->ff = fuse_file_get(data->ff); + wpa->ia.ff = fuse_file_get(data->ff); spin_lock(&fi->lock); - list_add_tail(&req->list, &fi->queued_writes); + list_add_tail(&wpa->queue_entry, &fi->queued_writes); fuse_flush_writepages(inode); spin_unlock(&fi->lock); @@ -1808,54 +1931,52 @@ static void fuse_writepages_send(struct fuse_fill_wb_data *data) * this new request onto the auxiliary list, otherwise reuse the existing one by * copying the new page contents over to the old temporary page. */ -static bool fuse_writepage_in_flight(struct fuse_req *new_req, +static bool fuse_writepage_in_flight(struct fuse_writepage_args *new_wpa, struct page *page) { - struct fuse_conn *fc = get_fuse_conn(new_req->inode); - struct fuse_inode *fi = get_fuse_inode(new_req->inode); - struct fuse_req *tmp; - struct fuse_req *old_req; + struct fuse_inode *fi = get_fuse_inode(new_wpa->inode); + struct fuse_writepage_args *tmp; + struct fuse_writepage_args *old_wpa; + struct fuse_args_pages *new_ap = &new_wpa->ia.ap; - WARN_ON(new_req->num_pages != 0); + WARN_ON(new_ap->num_pages != 0); spin_lock(&fi->lock); - list_del(&new_req->writepages_entry); - old_req = fuse_find_writeback(fi, page->index, page->index); - if (!old_req) { - list_add(&new_req->writepages_entry, &fi->writepages); + list_del(&new_wpa->writepages_entry); + old_wpa = fuse_find_writeback(fi, page->index, page->index); + if (!old_wpa) { + list_add(&new_wpa->writepages_entry, &fi->writepages); spin_unlock(&fi->lock); return false; } - new_req->num_pages = 1; - for (tmp = old_req->misc.write.next; tmp; tmp = tmp->misc.write.next) { + new_ap->num_pages = 1; + for (tmp = old_wpa->next; tmp; tmp = tmp->next) { pgoff_t curr_index; - WARN_ON(tmp->inode != new_req->inode); - curr_index = tmp->misc.write.in.offset >> PAGE_SHIFT; + WARN_ON(tmp->inode != new_wpa->inode); + curr_index = tmp->ia.write.in.offset >> PAGE_SHIFT; if (curr_index == page->index) { - WARN_ON(tmp->num_pages != 1); - WARN_ON(!test_bit(FR_PENDING, &tmp->flags)); - swap(tmp->pages[0], new_req->pages[0]); + WARN_ON(tmp->ia.ap.num_pages != 1); + swap(tmp->ia.ap.pages[0], new_ap->pages[0]); break; } } if (!tmp) { - new_req->misc.write.next = old_req->misc.write.next; - old_req->misc.write.next = new_req; + new_wpa->next = old_wpa->next; + old_wpa->next = new_wpa; } spin_unlock(&fi->lock); if (tmp) { - struct backing_dev_info *bdi = inode_to_bdi(new_req->inode); + struct backing_dev_info *bdi = inode_to_bdi(new_wpa->inode); dec_wb_stat(&bdi->wb, WB_WRITEBACK); - dec_node_page_state(new_req->pages[0], NR_WRITEBACK_TEMP); + dec_node_page_state(new_ap->pages[0], NR_WRITEBACK_TEMP); wb_writeout_inc(&bdi->wb); - fuse_writepage_free(fc, new_req); - fuse_request_free(new_req); + fuse_writepage_free(new_wpa); } return true; @@ -1865,7 +1986,8 @@ static int fuse_writepages_fill(struct page *page, struct writeback_control *wbc, void *_data) { struct fuse_fill_wb_data *data = _data; - struct fuse_req *req = data->req; + struct fuse_writepage_args *wpa = data->wpa; + struct fuse_args_pages *ap = &wpa->ia.ap; struct inode *inode = data->inode; struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_conn *fc = get_fuse_conn(inode); @@ -1888,16 +2010,16 @@ static int fuse_writepages_fill(struct page *page, */ is_writeback = fuse_page_is_writeback(inode, page->index); - if (req && req->num_pages && - (is_writeback || req->num_pages == fc->max_pages || - (req->num_pages + 1) * PAGE_SIZE > fc->max_write || - data->orig_pages[req->num_pages - 1]->index + 1 != page->index)) { + if (wpa && ap->num_pages && + (is_writeback || ap->num_pages == fc->max_pages || + (ap->num_pages + 1) * PAGE_SIZE > fc->max_write || + data->orig_pages[ap->num_pages - 1]->index + 1 != page->index)) { fuse_writepages_send(data); - data->req = NULL; - } else if (req && req->num_pages == req->max_pages) { - if (!fuse_req_realloc_pages(fc, req, GFP_NOFS)) { + data->wpa = NULL; + } else if (wpa && ap->num_pages == data->max_pages) { + if (!fuse_pages_realloc(data)) { fuse_writepages_send(data); - req = data->req = NULL; + data->wpa = NULL; } } @@ -1915,59 +2037,60 @@ static int fuse_writepages_fill(struct page *page, * This is ensured by holding the page lock in page_mkwrite() while * checking fuse_page_is_writeback(). We already hold the page lock * since clear_page_dirty_for_io() and keep it held until we add the - * request to the fi->writepages list and increment req->num_pages. + * request to the fi->writepages list and increment ap->num_pages. * After this fuse_page_is_writeback() will indicate that the page is * under writeback, so we can release the page lock. */ - if (data->req == NULL) { + if (data->wpa == NULL) { struct fuse_inode *fi = get_fuse_inode(inode); err = -ENOMEM; - req = fuse_request_alloc_nofs(FUSE_REQ_INLINE_PAGES); - if (!req) { + wpa = fuse_writepage_args_alloc(); + if (!wpa) { __free_page(tmp_page); goto out_unlock; } + data->max_pages = 1; - fuse_write_fill(req, data->ff, page_offset(page), 0); - req->misc.write.in.write_flags |= FUSE_WRITE_CACHE; - req->misc.write.next = NULL; - req->in.argpages = 1; - __set_bit(FR_BACKGROUND, &req->flags); - req->num_pages = 0; - req->end = fuse_writepage_end; - req->inode = inode; + ap = &wpa->ia.ap; + fuse_write_args_fill(&wpa->ia, data->ff, page_offset(page), 0); + wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE; + wpa->next = NULL; + ap->args.in_pages = true; + ap->args.end = fuse_writepage_end; + ap->num_pages = 0; + wpa->inode = inode; spin_lock(&fi->lock); - list_add(&req->writepages_entry, &fi->writepages); + list_add(&wpa->writepages_entry, &fi->writepages); spin_unlock(&fi->lock); - data->req = req; + data->wpa = wpa; } set_page_writeback(page); copy_highpage(tmp_page, page); - req->pages[req->num_pages] = tmp_page; - req->page_descs[req->num_pages].offset = 0; - req->page_descs[req->num_pages].length = PAGE_SIZE; + ap->pages[ap->num_pages] = tmp_page; + ap->descs[ap->num_pages].offset = 0; + ap->descs[ap->num_pages].length = PAGE_SIZE; inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK); inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP); err = 0; - if (is_writeback && fuse_writepage_in_flight(req, page)) { + if (is_writeback && fuse_writepage_in_flight(wpa, page)) { end_page_writeback(page); - data->req = NULL; + data->wpa = NULL; goto out_unlock; } - data->orig_pages[req->num_pages] = page; + data->orig_pages[ap->num_pages] = page; /* * Protected by fi->lock against concurrent access by * fuse_page_is_writeback(). */ spin_lock(&fi->lock); - req->num_pages++; + ap->num_pages++; spin_unlock(&fi->lock); out_unlock: @@ -1989,7 +2112,7 @@ static int fuse_writepages(struct address_space *mapping, goto out; data.inode = inode; - data.req = NULL; + data.wpa = NULL; data.ff = NULL; err = -ENOMEM; @@ -2000,9 +2123,9 @@ static int fuse_writepages(struct address_space *mapping, goto out; err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data); - if (data.req) { + if (data.wpa) { /* Ignore errors if we can write at least one page */ - BUG_ON(!data.req->num_pages); + WARN_ON(!data.wpa->ia.ap.num_pages); fuse_writepages_send(&data); err = 0; } @@ -2222,11 +2345,11 @@ static void fuse_lk_fill(struct fuse_args *args, struct file *file, inarg->lk.pid = pid; if (flock) inarg->lk_flags |= FUSE_LK_FLOCK; - args->in.h.opcode = opcode; - args->in.h.nodeid = get_node_id(inode); - args->in.numargs = 1; - args->in.args[0].size = sizeof(*inarg); - args->in.args[0].value = inarg; + args->opcode = opcode; + args->nodeid = get_node_id(inode); + args->in_numargs = 1; + args->in_args[0].size = sizeof(*inarg); + args->in_args[0].value = inarg; } static int fuse_getlk(struct file *file, struct file_lock *fl) @@ -2239,9 +2362,9 @@ static int fuse_getlk(struct file *file, struct file_lock *fl) int err; fuse_lk_fill(&args, file, fl, FUSE_GETLK, 0, 0, &inarg); - args.out.numargs = 1; - args.out.args[0].size = sizeof(outarg); - args.out.args[0].value = &outarg; + args.out_numargs = 1; + args.out_args[0].size = sizeof(outarg); + args.out_args[0].value = &outarg; err = fuse_simple_request(fc, &args); if (!err) err = convert_fuse_file_lock(fc, &outarg.lk, fl); @@ -2336,14 +2459,14 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block) memset(&inarg, 0, sizeof(inarg)); inarg.block = block; inarg.blocksize = inode->i_sb->s_blocksize; - args.in.h.opcode = FUSE_BMAP; - args.in.h.nodeid = get_node_id(inode); - args.in.numargs = 1; - args.in.args[0].size = sizeof(inarg); - args.in.args[0].value = &inarg; - args.out.numargs = 1; - args.out.args[0].size = sizeof(outarg); - args.out.args[0].value = &outarg; + args.opcode = FUSE_BMAP; + args.nodeid = get_node_id(inode); + args.in_numargs = 1; + args.in_args[0].size = sizeof(inarg); + args.in_args[0].value = &inarg; + args.out_numargs = 1; + args.out_args[0].size = sizeof(outarg); + args.out_args[0].value = &outarg; err = fuse_simple_request(fc, &args); if (err == -ENOSYS) fc->no_bmap = 1; @@ -2368,14 +2491,14 @@ static loff_t fuse_lseek(struct file *file, loff_t offset, int whence) if (fc->no_lseek) goto fallback; - args.in.h.opcode = FUSE_LSEEK; - args.in.h.nodeid = ff->nodeid; - args.in.numargs = 1; - args.in.args[0].size = sizeof(inarg); - args.in.args[0].value = &inarg; - args.out.numargs = 1; - args.out.args[0].size = sizeof(outarg); - args.out.args[0].value = &outarg; + args.opcode = FUSE_LSEEK; + args.nodeid = ff->nodeid; + args.in_numargs = 1; + args.in_args[0].size = sizeof(inarg); + args.in_args[0].value = &inarg; + args.out_numargs = 1; + args.out_args[0].size = sizeof(outarg); + args.out_args[0].value = &outarg; err = fuse_simple_request(fc, &args); if (err) { if (err == -ENOSYS) { @@ -2573,14 +2696,14 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, .flags = flags }; struct fuse_ioctl_out outarg; - struct fuse_req *req = NULL; - struct page **pages = NULL; struct iovec *iov_page = NULL; struct iovec *in_iov = NULL, *out_iov = NULL; - unsigned int in_iovs = 0, out_iovs = 0, num_pages = 0, max_pages; - size_t in_size, out_size, transferred, c; + unsigned int in_iovs = 0, out_iovs = 0, max_pages; + size_t in_size, out_size, c; + ssize_t transferred; int err, i; struct iov_iter ii; + struct fuse_args_pages ap = {}; #if BITS_PER_LONG == 32 inarg.flags |= FUSE_IOCTL_32BIT; @@ -2598,11 +2721,13 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); err = -ENOMEM; - pages = kcalloc(fc->max_pages, sizeof(pages[0]), GFP_KERNEL); + ap.pages = fuse_pages_alloc(fc->max_pages, GFP_KERNEL, &ap.descs); iov_page = (struct iovec *) __get_free_page(GFP_KERNEL); - if (!pages || !iov_page) + if (!ap.pages || !iov_page) goto out; + fuse_page_descs_length_init(ap.descs, 0, fc->max_pages); + /* * If restricted, initialize IO parameters as encoded in @cmd. * RETRY from server is not allowed. @@ -2639,56 +2764,44 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, err = -ENOMEM; if (max_pages > fc->max_pages) goto out; - while (num_pages < max_pages) { - pages[num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); - if (!pages[num_pages]) + while (ap.num_pages < max_pages) { + ap.pages[ap.num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); + if (!ap.pages[ap.num_pages]) goto out; - num_pages++; + ap.num_pages++; } - req = fuse_get_req(fc, num_pages); - if (IS_ERR(req)) { - err = PTR_ERR(req); - req = NULL; - goto out; - } - memcpy(req->pages, pages, sizeof(req->pages[0]) * num_pages); - req->num_pages = num_pages; - fuse_page_descs_length_init(req, 0, req->num_pages); /* okay, let's send it to the client */ - req->in.h.opcode = FUSE_IOCTL; - req->in.h.nodeid = ff->nodeid; - req->in.numargs = 1; - req->in.args[0].size = sizeof(inarg); - req->in.args[0].value = &inarg; + ap.args.opcode = FUSE_IOCTL; + ap.args.nodeid = ff->nodeid; + ap.args.in_numargs = 1; + ap.args.in_args[0].size = sizeof(inarg); + ap.args.in_args[0].value = &inarg; if (in_size) { - req->in.numargs++; - req->in.args[1].size = in_size; - req->in.argpages = 1; + ap.args.in_numargs++; + ap.args.in_args[1].size = in_size; + ap.args.in_pages = true; err = -EFAULT; iov_iter_init(&ii, WRITE, in_iov, in_iovs, in_size); - for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= num_pages); i++) { - c = copy_page_from_iter(pages[i], 0, PAGE_SIZE, &ii); + for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) { + c = copy_page_from_iter(ap.pages[i], 0, PAGE_SIZE, &ii); if (c != PAGE_SIZE && iov_iter_count(&ii)) goto out; } } - req->out.numargs = 2; - req->out.args[0].size = sizeof(outarg); - req->out.args[0].value = &outarg; - req->out.args[1].size = out_size; - req->out.argpages = 1; - req->out.argvar = 1; + ap.args.out_numargs = 2; + ap.args.out_args[0].size = sizeof(outarg); + ap.args.out_args[0].value = &outarg; + ap.args.out_args[1].size = out_size; + ap.args.out_pages = true; + ap.args.out_argvar = true; - fuse_request_send(fc, req); - err = req->out.h.error; - transferred = req->out.args[1].size; - fuse_put_request(fc, req); - req = NULL; - if (err) + transferred = fuse_simple_request(fc, &ap.args); + err = transferred; + if (transferred < 0) goto out; /* did it ask for retry? */ @@ -2713,7 +2826,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV) goto out; - vaddr = kmap_atomic(pages[0]); + vaddr = kmap_atomic(ap.pages[0]); err = fuse_copy_ioctl_iovec(fc, iov_page, vaddr, transferred, in_iovs + out_iovs, (flags & FUSE_IOCTL_COMPAT) != 0); @@ -2741,19 +2854,17 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, err = -EFAULT; iov_iter_init(&ii, READ, out_iov, out_iovs, transferred); - for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= num_pages); i++) { - c = copy_page_to_iter(pages[i], 0, PAGE_SIZE, &ii); + for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) { + c = copy_page_to_iter(ap.pages[i], 0, PAGE_SIZE, &ii); if (c != PAGE_SIZE && iov_iter_count(&ii)) goto out; } err = 0; out: - if (req) - fuse_put_request(fc, req); free_page((unsigned long) iov_page); - while (num_pages) - __free_page(pages[--num_pages]); - kfree(pages); + while (ap.num_pages) + __free_page(ap.pages[--ap.num_pages]); + kfree(ap.pages); return err ? err : outarg.result; } @@ -2861,14 +2972,14 @@ __poll_t fuse_file_poll(struct file *file, poll_table *wait) fuse_register_polled_file(fc, ff); } - args.in.h.opcode = FUSE_POLL; - args.in.h.nodeid = ff->nodeid; - args.in.numargs = 1; - args.in.args[0].size = sizeof(inarg); - args.in.args[0].value = &inarg; - args.out.numargs = 1; - args.out.args[0].size = sizeof(outarg); - args.out.args[0].value = &outarg; + args.opcode = FUSE_POLL; + args.nodeid = ff->nodeid; + args.in_numargs = 1; + args.in_args[0].size = sizeof(inarg); + args.in_args[0].value = &inarg; + args.out_numargs = 1; + args.out_args[0].size = sizeof(outarg); + args.out_args[0].value = &outarg; err = fuse_simple_request(fc, &args); if (!err) @@ -3076,11 +3187,11 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, if (!(mode & FALLOC_FL_KEEP_SIZE)) set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); - args.in.h.opcode = FUSE_FALLOCATE; - args.in.h.nodeid = ff->nodeid; - args.in.numargs = 1; - args.in.args[0].size = sizeof(inarg); - args.in.args[0].value = &inarg; + args.opcode = FUSE_FALLOCATE; + args.nodeid = ff->nodeid; + args.in_numargs = 1; + args.in_args[0].size = sizeof(inarg); + args.in_args[0].value = &inarg; err = fuse_simple_request(fc, &args); if (err == -ENOSYS) { fc->no_fallocate = 1; @@ -3168,14 +3279,14 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, if (is_unstable) set_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state); - args.in.h.opcode = FUSE_COPY_FILE_RANGE; - args.in.h.nodeid = ff_in->nodeid; - args.in.numargs = 1; - args.in.args[0].size = sizeof(inarg); - args.in.args[0].value = &inarg; - args.out.numargs = 1; - args.out.args[0].size = sizeof(outarg); - args.out.args[0].value = &outarg; + args.opcode = FUSE_COPY_FILE_RANGE; + args.nodeid = ff_in->nodeid; + args.in_numargs = 1; + args.in_args[0].size = sizeof(inarg); + args.in_args[0].value = &inarg; + args.out_numargs = 1; + args.out_args[0].size = sizeof(outarg); + args.out_args[0].value = &outarg; err = fuse_simple_request(fc, &args); if (err == -ENOSYS) { fc->no_copy_file_range = 1; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 24dbca777775..fc89cb40e874 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -47,9 +47,6 @@ /** Number of dentries for each connection in the control filesystem */ #define FUSE_CTL_NUM_DENTRIES 5 -/** Number of page pointers embedded in fuse_req */ -#define FUSE_REQ_INLINE_PAGES 1 - /** List of active connections */ extern struct list_head fuse_conn_list; @@ -164,17 +161,15 @@ enum { }; struct fuse_conn; +struct fuse_release_args; /** FUSE specific file data */ struct fuse_file { /** Fuse connection for this file */ struct fuse_conn *fc; - /* - * Request reserved for flush and release. - * Modified under relative fuse_inode::lock. - */ - struct fuse_req *reserved_req; + /* Argument space reserved for release */ + struct fuse_release_args *release_args; /** Kernel file handle guaranteed to be unique */ u64 kh; @@ -229,57 +224,12 @@ struct fuse_in_arg { const void *value; }; -/** The request input */ -struct fuse_in { - /** The request header */ - struct fuse_in_header h; - - /** True if the data for the last argument is in req->pages */ - unsigned argpages:1; - - /** Number of arguments */ - unsigned numargs; - - /** Array of arguments */ - struct fuse_in_arg args[3]; -}; - /** One output argument of a request */ struct fuse_arg { unsigned size; void *value; }; -/** The request output */ -struct fuse_out { - /** Header returned from userspace */ - struct fuse_out_header h; - - /* - * The following bitfields are not changed during the request - * processing - */ - - /** Last argument is variable length (can be shorter than - arg->size) */ - unsigned argvar:1; - - /** Last argument is a list of pages to copy data to */ - unsigned argpages:1; - - /** Zero partially or not copied pages */ - unsigned page_zeroing:1; - - /** Pages may be replaced with new ones */ - unsigned page_replace:1; - - /** Number or arguments */ - unsigned numargs; - - /** Array of arguments */ - struct fuse_arg args[2]; -}; - /** FUSE page descriptor */ struct fuse_page_desc { unsigned int length; @@ -287,20 +237,28 @@ struct fuse_page_desc { }; struct fuse_args { - struct { - struct { - uint32_t opcode; - uint64_t nodeid; - } h; - unsigned numargs; - struct fuse_in_arg args[3]; + uint64_t nodeid; + uint32_t opcode; + unsigned short in_numargs; + unsigned short out_numargs; + bool force:1; + bool noreply:1; + bool nocreds:1; + bool in_pages:1; + bool out_pages:1; + bool out_argvar:1; + bool page_zeroing:1; + bool page_replace:1; + struct fuse_in_arg in_args[3]; + struct fuse_arg out_args[2]; + void (*end)(struct fuse_conn *fc, struct fuse_args *args, int error); +}; - } in; - struct { - unsigned argvar:1; - unsigned numargs; - struct fuse_arg args[2]; - } out; +struct fuse_args_pages { + struct fuse_args args; + struct page **pages; + struct fuse_page_desc *descs; + unsigned int num_pages; }; #define FUSE_ARGS(args) struct fuse_args args = {} @@ -373,83 +331,70 @@ struct fuse_req { /** Entry on the interrupts list */ struct list_head intr_entry; + /* Input/output arguments */ + struct fuse_args *args; + /** refcount */ refcount_t count; /* Request flags, updated with test/set/clear_bit() */ unsigned long flags; - /** The request input */ - struct fuse_in in; + /* The request input header */ + struct { + struct fuse_in_header h; + } in; - /** The request output */ - struct fuse_out out; + /* The request output header */ + struct { + struct fuse_out_header h; + } out; /** Used to wake up the task waiting for completion of request*/ wait_queue_head_t waitq; - /** Data for asynchronous requests */ - union { - struct { - struct fuse_release_in in; - struct inode *inode; - } release; - struct fuse_init_in init_in; - struct fuse_init_out init_out; - struct cuse_init_in cuse_init_in; - struct { - struct fuse_read_in in; - u64 attr_ver; - } read; - struct { - struct fuse_write_in in; - struct fuse_write_out out; - struct fuse_req *next; - } write; - struct fuse_notify_retrieve_in retrieve_in; - } misc; - - /** page vector */ - struct page **pages; - - /** page-descriptor vector */ - struct fuse_page_desc *page_descs; - - /** size of the 'pages' array */ - unsigned max_pages; - - /** inline page vector */ - struct page *inline_pages[FUSE_REQ_INLINE_PAGES]; - - /** inline page-descriptor vector */ - struct fuse_page_desc inline_page_descs[FUSE_REQ_INLINE_PAGES]; - - /** number of pages in vector */ - unsigned num_pages; - - /** File used in the request (or NULL) */ - struct fuse_file *ff; - - /** Inode used in the request or NULL */ - struct inode *inode; - - /** AIO control block */ - struct fuse_io_priv *io; - - /** Link on fi->writepages */ - struct list_head writepages_entry; - - /** Request completion callback */ - void (*end)(struct fuse_conn *, struct fuse_req *); - - /** Request is stolen from fuse_file->reserved_req */ - struct file *stolen_file; }; +struct fuse_iqueue; + +/** + * Input queue callbacks + * + * Input queue signalling is device-specific. For example, the /dev/fuse file + * uses fiq->waitq and fasync to wake processes that are waiting on queue + * readiness. These callbacks allow other device types to respond to input + * queue activity. + */ +struct fuse_iqueue_ops { + /** + * Signal that a forget has been queued + */ + void (*wake_forget_and_unlock)(struct fuse_iqueue *fiq) + __releases(fiq->lock); + + /** + * Signal that an INTERRUPT request has been queued + */ + void (*wake_interrupt_and_unlock)(struct fuse_iqueue *fiq) + __releases(fiq->lock); + + /** + * Signal that a request has been queued + */ + void (*wake_pending_and_unlock)(struct fuse_iqueue *fiq) + __releases(fiq->lock); +}; + +/** /dev/fuse input queue operations */ +extern const struct fuse_iqueue_ops fuse_dev_fiq_ops; + struct fuse_iqueue { /** Connection established */ unsigned connected; + /** Lock protecting accesses to members of this structure */ + spinlock_t lock; + /** Readers of the connection are waiting on this */ wait_queue_head_t waitq; @@ -471,6 +416,12 @@ struct fuse_iqueue { /** O_ASYNC requests */ struct fasync_struct *fasync; + + /** Device-specific callbacks */ + const struct fuse_iqueue_ops *ops; + + /** Device-specific state */ + void *priv; }; #define FUSE_PQ_HASH_BITS 8 @@ -504,6 +455,29 @@ struct fuse_dev { struct list_head entry; }; +struct fuse_fs_context { + int fd; + unsigned int rootmode; + kuid_t user_id; + kgid_t group_id; + bool is_bdev:1; + bool fd_present:1; + bool rootmode_present:1; + bool user_id_present:1; + bool group_id_present:1; + bool default_permissions:1; + bool allow_other:1; + bool destroy:1; + bool no_control:1; + bool no_force_umount:1; + unsigned int max_read; + unsigned int blksize; + const char *subtype; + + /* fuse_dev pointer to fill in, should contain NULL on entry */ + void **fudptr; +}; + /** * A Fuse connection. * @@ -584,9 +558,6 @@ struct fuse_conn { /** waitq for blocked connection */ wait_queue_head_t blocked_waitq; - /** waitq for reserved requests */ - wait_queue_head_t reserved_req_waitq; - /** Connection established, cleared on umount, connection abort and device release */ unsigned connected; @@ -721,6 +692,18 @@ struct fuse_conn { /** Does the filesystem support copy_file_range? */ unsigned no_copy_file_range:1; + /* Send DESTROY request */ + unsigned int destroy:1; + + /* Delete dentries that have gone stale */ + unsigned int delete_stale:1; + + /** Do not create entry in fusectl fs */ + unsigned int no_control:1; + + /** Do not allow MNT_FORCE umount */ + unsigned int no_force_umount:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; @@ -742,9 +725,6 @@ struct fuse_conn { /** Key for lock owner ID scrambling */ u32 scramble_key[4]; - /** Reserved request for the DESTROY message */ - struct fuse_req *destroy_req; - /** Version counter for attribute changes */ atomic64_t attr_version; @@ -820,14 +800,32 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, struct fuse_forget_link *fuse_alloc_forget(void); -/* Used by READDIRPLUS */ -void fuse_force_forget(struct file *file, u64 nodeid); +struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq, + unsigned int max, + unsigned int *countp); -/** +/* * Initialize READ or READDIR request */ -void fuse_read_fill(struct fuse_req *req, struct file *file, - loff_t pos, size_t count, int opcode); +struct fuse_io_args { + union { + struct { + struct fuse_read_in in; + u64 attr_ver; + } read; + struct { + struct fuse_write_in in; + struct fuse_write_out out; + } write; + }; + struct fuse_args_pages ap; + struct fuse_io_priv *io; + struct fuse_file *ff; +}; + +void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos, + size_t count, int opcode); + /** * Send OPEN or OPENDIR request @@ -899,62 +897,17 @@ void fuse_dev_cleanup(void); int fuse_ctl_init(void); void __exit fuse_ctl_cleanup(void); -/** - * Allocate a request - */ -struct fuse_req *fuse_request_alloc(unsigned npages); - -struct fuse_req *fuse_request_alloc_nofs(unsigned npages); - -bool fuse_req_realloc_pages(struct fuse_conn *fc, struct fuse_req *req, - gfp_t flags); - - -/** - * Free a request - */ -void fuse_request_free(struct fuse_req *req); - -/** - * Get a request, may fail with -ENOMEM, - * caller should specify # elements in req->pages[] explicitly - */ -struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages); -struct fuse_req *fuse_get_req_for_background(struct fuse_conn *fc, - unsigned npages); - -/* - * Increment reference count on request - */ -void __fuse_get_request(struct fuse_req *req); - -/** - * Gets a requests for a file operation, always succeeds - */ -struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc, - struct file *file); - -/** - * Decrement reference count of a request. If count goes to zero free - * the request. - */ -void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req); - -/** - * Send a request (synchronous) - */ -void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req); - /** * Simple request sending that does request allocation and freeing */ ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args); +int fuse_simple_background(struct fuse_conn *fc, struct fuse_args *args, + gfp_t gfp_flags); /** - * Send a request in the background + * End a finished request */ -void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req); -bool fuse_request_queue_background(struct fuse_conn *fc, struct fuse_req *req); +void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req); /* Abort all requests */ void fuse_abort_conn(struct fuse_conn *fc); @@ -980,15 +933,33 @@ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc); /** * Initialize fuse_conn */ -void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns); +void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns, + const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv); /** * Release reference to fuse_conn */ void fuse_conn_put(struct fuse_conn *fc); -struct fuse_dev *fuse_dev_alloc(struct fuse_conn *fc); +struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc); +struct fuse_dev *fuse_dev_alloc(void); +void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc); void fuse_dev_free(struct fuse_dev *fud); +void fuse_send_init(struct fuse_conn *fc); + +/** + * Fill in superblock and initialize fuse connection + * @sb: partially-initialized superblock to fill in + * @ctx: mount context + */ +int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx); + +/** + * Disassociate fuse connection from superblock and kill the superblock + * + * Calls kill_anon_super(), do not use with bdev mounts. + */ +void fuse_kill_sb_anon(struct super_block *sb); /** * Add connection to control filesystem @@ -1093,4 +1064,15 @@ int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type); /* readdir.c */ int fuse_readdir(struct file *file, struct dir_context *ctx); +/** + * Return the number of bytes in an arguments list + */ +unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args); + +/** + * Get the next unique ID for a request + */ +u64 fuse_get_unique(struct fuse_iqueue *fiq); +void fuse_free_conn(struct fuse_conn *fc); + #endif /* _FS_FUSE_I_H */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 4bb885b0f032..51cb471f4dc3 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -15,7 +15,8 @@ #include #include #include -#include +#include +#include #include #include #include @@ -59,24 +60,13 @@ MODULE_PARM_DESC(max_user_congthresh, /** Congestion starts at 75% of maximum */ #define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4) -struct fuse_mount_data { - int fd; - unsigned rootmode; - kuid_t user_id; - kgid_t group_id; - unsigned fd_present:1; - unsigned rootmode_present:1; - unsigned user_id_present:1; - unsigned group_id_present:1; - unsigned default_permissions:1; - unsigned allow_other:1; - unsigned max_read; - unsigned blksize; -}; +#ifdef CONFIG_BLOCK +static struct file_system_type fuseblk_fs_type; +#endif struct fuse_forget_link *fuse_alloc_forget(void) { - return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL); + return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL_ACCOUNT); } static struct inode *fuse_alloc_inode(struct super_block *sb) @@ -374,19 +364,21 @@ void fuse_unlock_inode(struct inode *inode, bool locked) static void fuse_umount_begin(struct super_block *sb) { - fuse_abort_conn(get_fuse_conn_super(sb)); + struct fuse_conn *fc = get_fuse_conn_super(sb); + + if (!fc->no_force_umount) + fuse_abort_conn(fc); } static void fuse_send_destroy(struct fuse_conn *fc) { - struct fuse_req *req = fc->destroy_req; - if (req && fc->conn_init) { - fc->destroy_req = NULL; - req->in.h.opcode = FUSE_DESTROY; - __set_bit(FR_FORCE, &req->flags); - __clear_bit(FR_BACKGROUND, &req->flags); - fuse_request_send(fc, req); - fuse_put_request(fc, req); + if (fc->conn_init) { + FUSE_ARGS(args); + + args.opcode = FUSE_DESTROY; + args.force = true; + args.nocreds = true; + fuse_simple_request(fc, &args); } } @@ -430,12 +422,12 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) } memset(&outarg, 0, sizeof(outarg)); - args.in.numargs = 0; - args.in.h.opcode = FUSE_STATFS; - args.in.h.nodeid = get_node_id(d_inode(dentry)); - args.out.numargs = 1; - args.out.args[0].size = sizeof(outarg); - args.out.args[0].value = &outarg; + args.in_numargs = 0; + args.opcode = FUSE_STATFS; + args.nodeid = get_node_id(d_inode(dentry)); + args.out_numargs = 1; + args.out_args[0].size = sizeof(outarg); + args.out_args[0].value = &outarg; err = fuse_simple_request(fc, &args); if (!err) convert_fuse_statfs(buf, &outarg.st); @@ -443,6 +435,8 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) } enum { + OPT_SOURCE, + OPT_SUBTYPE, OPT_FD, OPT_ROOTMODE, OPT_USER_ID, @@ -454,111 +448,109 @@ enum { OPT_ERR }; -static const match_table_t tokens = { - {OPT_FD, "fd=%u"}, - {OPT_ROOTMODE, "rootmode=%o"}, - {OPT_USER_ID, "user_id=%u"}, - {OPT_GROUP_ID, "group_id=%u"}, - {OPT_DEFAULT_PERMISSIONS, "default_permissions"}, - {OPT_ALLOW_OTHER, "allow_other"}, - {OPT_MAX_READ, "max_read=%u"}, - {OPT_BLKSIZE, "blksize=%u"}, - {OPT_ERR, NULL} +static const struct fs_parameter_spec fuse_param_specs[] = { + fsparam_string ("source", OPT_SOURCE), + fsparam_u32 ("fd", OPT_FD), + fsparam_u32oct ("rootmode", OPT_ROOTMODE), + fsparam_u32 ("user_id", OPT_USER_ID), + fsparam_u32 ("group_id", OPT_GROUP_ID), + fsparam_flag ("default_permissions", OPT_DEFAULT_PERMISSIONS), + fsparam_flag ("allow_other", OPT_ALLOW_OTHER), + fsparam_u32 ("max_read", OPT_MAX_READ), + fsparam_u32 ("blksize", OPT_BLKSIZE), + fsparam_string ("subtype", OPT_SUBTYPE), + {} }; -static int fuse_match_uint(substring_t *s, unsigned int *res) +static const struct fs_parameter_description fuse_fs_parameters = { + .name = "fuse", + .specs = fuse_param_specs, +}; + +static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param) { - int err = -ENOMEM; - char *buf = match_strdup(s); - if (buf) { - err = kstrtouint(buf, 10, res); - kfree(buf); - } - return err; -} + struct fs_parse_result result; + struct fuse_fs_context *ctx = fc->fs_private; + int opt; -static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev, - struct user_namespace *user_ns) -{ - char *p; - memset(d, 0, sizeof(struct fuse_mount_data)); - d->max_read = ~0; - d->blksize = FUSE_DEFAULT_BLKSIZE; + opt = fs_parse(fc, &fuse_fs_parameters, param, &result); + if (opt < 0) + return opt; - while ((p = strsep(&opt, ",")) != NULL) { - int token; - int value; - unsigned uv; - substring_t args[MAX_OPT_ARGS]; - if (!*p) - continue; + switch (opt) { + case OPT_SOURCE: + if (fc->source) + return invalf(fc, "fuse: Multiple sources specified"); + fc->source = param->string; + param->string = NULL; + break; - token = match_token(p, tokens, args); - switch (token) { - case OPT_FD: - if (match_int(&args[0], &value)) - return 0; - d->fd = value; - d->fd_present = 1; - break; - - case OPT_ROOTMODE: - if (match_octal(&args[0], &value)) - return 0; - if (!fuse_valid_type(value)) - return 0; - d->rootmode = value; - d->rootmode_present = 1; - break; - - case OPT_USER_ID: - if (fuse_match_uint(&args[0], &uv)) - return 0; - d->user_id = make_kuid(user_ns, uv); - if (!uid_valid(d->user_id)) - return 0; - d->user_id_present = 1; - break; - - case OPT_GROUP_ID: - if (fuse_match_uint(&args[0], &uv)) - return 0; - d->group_id = make_kgid(user_ns, uv); - if (!gid_valid(d->group_id)) - return 0; - d->group_id_present = 1; - break; - - case OPT_DEFAULT_PERMISSIONS: - d->default_permissions = 1; - break; - - case OPT_ALLOW_OTHER: - d->allow_other = 1; - break; - - case OPT_MAX_READ: - if (match_int(&args[0], &value)) - return 0; - d->max_read = value; - break; - - case OPT_BLKSIZE: - if (!is_bdev || match_int(&args[0], &value)) - return 0; - d->blksize = value; - break; - - default: - return 0; - } - } - - if (!d->fd_present || !d->rootmode_present || - !d->user_id_present || !d->group_id_present) + case OPT_SUBTYPE: + if (ctx->subtype) + return invalf(fc, "fuse: Multiple subtypes specified"); + ctx->subtype = param->string; + param->string = NULL; return 0; - return 1; + case OPT_FD: + ctx->fd = result.uint_32; + ctx->fd_present = 1; + break; + + case OPT_ROOTMODE: + if (!fuse_valid_type(result.uint_32)) + return invalf(fc, "fuse: Invalid rootmode"); + ctx->rootmode = result.uint_32; + ctx->rootmode_present = 1; + break; + + case OPT_USER_ID: + ctx->user_id = make_kuid(fc->user_ns, result.uint_32); + if (!uid_valid(ctx->user_id)) + return invalf(fc, "fuse: Invalid user_id"); + ctx->user_id_present = 1; + break; + + case OPT_GROUP_ID: + ctx->group_id = make_kgid(fc->user_ns, result.uint_32); + if (!gid_valid(ctx->group_id)) + return invalf(fc, "fuse: Invalid group_id"); + ctx->group_id_present = 1; + break; + + case OPT_DEFAULT_PERMISSIONS: + ctx->default_permissions = 1; + break; + + case OPT_ALLOW_OTHER: + ctx->allow_other = 1; + break; + + case OPT_MAX_READ: + ctx->max_read = result.uint_32; + break; + + case OPT_BLKSIZE: + if (!ctx->is_bdev) + return invalf(fc, "fuse: blksize only supported for fuseblk"); + ctx->blksize = result.uint_32; + break; + + default: + return -EINVAL; + } + + return 0; +} + +static void fuse_free_fc(struct fs_context *fc) +{ + struct fuse_fs_context *ctx = fc->fs_private; + + if (ctx) { + kfree(ctx->subtype); + kfree(ctx); + } } static int fuse_show_options(struct seq_file *m, struct dentry *root) @@ -579,14 +571,19 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root) return 0; } -static void fuse_iqueue_init(struct fuse_iqueue *fiq) +static void fuse_iqueue_init(struct fuse_iqueue *fiq, + const struct fuse_iqueue_ops *ops, + void *priv) { memset(fiq, 0, sizeof(struct fuse_iqueue)); + spin_lock_init(&fiq->lock); init_waitqueue_head(&fiq->waitq); INIT_LIST_HEAD(&fiq->pending); INIT_LIST_HEAD(&fiq->interrupts); fiq->forget_list_tail = &fiq->forget_list_head; fiq->connected = 1; + fiq->ops = ops; + fiq->priv = priv; } static void fuse_pqueue_init(struct fuse_pqueue *fpq) @@ -600,7 +597,8 @@ static void fuse_pqueue_init(struct fuse_pqueue *fpq) fpq->connected = 1; } -void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns) +void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns, + const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv) { memset(fc, 0, sizeof(*fc)); spin_lock_init(&fc->lock); @@ -609,8 +607,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns) refcount_set(&fc->count, 1); atomic_set(&fc->dev_count, 1); init_waitqueue_head(&fc->blocked_waitq); - init_waitqueue_head(&fc->reserved_req_waitq); - fuse_iqueue_init(&fc->iq); + fuse_iqueue_init(&fc->iq, fiq_ops, fiq_priv); INIT_LIST_HEAD(&fc->bg_queue); INIT_LIST_HEAD(&fc->entry); INIT_LIST_HEAD(&fc->devices); @@ -633,8 +630,6 @@ EXPORT_SYMBOL_GPL(fuse_conn_init); void fuse_conn_put(struct fuse_conn *fc) { if (refcount_dec_and_test(&fc->count)) { - if (fc->destroy_req) - fuse_request_free(fc->destroy_req); put_pid_ns(fc->pid_ns); put_user_ns(fc->user_ns); fc->release(fc); @@ -822,9 +817,12 @@ static const struct super_operations fuse_super_operations = { static void sanitize_global_limit(unsigned *limit) { + /* + * The default maximum number of async requests is calculated to consume + * 1/2^13 of the total memory, assuming 392 bytes per request. + */ if (*limit == 0) - *limit = ((totalram_pages() << PAGE_SHIFT) >> 13) / - sizeof(struct fuse_req); + *limit = ((totalram_pages() << PAGE_SHIFT) >> 13) / 392; if (*limit >= 1 << 16) *limit = (1 << 16) - 1; @@ -870,11 +868,19 @@ static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg) spin_unlock(&fc->bg_lock); } -static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) -{ - struct fuse_init_out *arg = &req->misc.init_out; +struct fuse_init_args { + struct fuse_args args; + struct fuse_init_in in; + struct fuse_init_out out; +}; - if (req->out.h.error || arg->major != FUSE_KERNEL_VERSION) +static void process_init_reply(struct fuse_conn *fc, struct fuse_args *args, + int error) +{ + struct fuse_init_args *ia = container_of(args, typeof(*ia), args); + struct fuse_init_out *arg = &ia->out; + + if (error || arg->major != FUSE_KERNEL_VERSION) fc->conn_error = 1; else { unsigned long ra_pages; @@ -951,18 +957,23 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->max_write = max_t(unsigned, 4096, fc->max_write); fc->conn_init = 1; } + kfree(ia); + fuse_set_initialized(fc); wake_up_all(&fc->blocked_waitq); } -static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) +void fuse_send_init(struct fuse_conn *fc) { - struct fuse_init_in *arg = &req->misc.init_in; + struct fuse_init_args *ia; - arg->major = FUSE_KERNEL_VERSION; - arg->minor = FUSE_KERNEL_MINOR_VERSION; - arg->max_readahead = fc->sb->s_bdi->ra_pages * PAGE_SIZE; - arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | + ia = kzalloc(sizeof(*ia), GFP_KERNEL | __GFP_NOFAIL); + + ia->in.major = FUSE_KERNEL_VERSION; + ia->in.minor = FUSE_KERNEL_MINOR_VERSION; + ia->in.max_readahead = fc->sb->s_bdi->ra_pages * PAGE_SIZE; + ia->in.flags |= + FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | @@ -971,26 +982,32 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL | FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS | FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA; - req->in.h.opcode = FUSE_INIT; - req->in.numargs = 1; - req->in.args[0].size = sizeof(*arg); - req->in.args[0].value = arg; - req->out.numargs = 1; + ia->args.opcode = FUSE_INIT; + ia->args.in_numargs = 1; + ia->args.in_args[0].size = sizeof(ia->in); + ia->args.in_args[0].value = &ia->in; + ia->args.out_numargs = 1; /* Variable length argument used for backward compatibility with interface version < 7.5. Rest of init_out is zeroed by do_get_request(), so a short reply is not a problem */ - req->out.argvar = 1; - req->out.args[0].size = sizeof(struct fuse_init_out); - req->out.args[0].value = &req->misc.init_out; - req->end = process_init_reply; - fuse_request_send_background(fc, req); -} + ia->args.out_argvar = 1; + ia->args.out_args[0].size = sizeof(ia->out); + ia->args.out_args[0].value = &ia->out; + ia->args.force = true; + ia->args.nocreds = true; + ia->args.end = process_init_reply; -static void fuse_free_conn(struct fuse_conn *fc) + if (fuse_simple_background(fc, &ia->args, GFP_KERNEL) != 0) + process_init_reply(fc, &ia->args, -ENOTCONN); +} +EXPORT_SYMBOL_GPL(fuse_send_init); + +void fuse_free_conn(struct fuse_conn *fc) { WARN_ON(!list_empty(&fc->devices)); kfree_rcu(fc, rcu); } +EXPORT_SYMBOL_GPL(fuse_free_conn); static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) { @@ -1032,7 +1049,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) return 0; } -struct fuse_dev *fuse_dev_alloc(struct fuse_conn *fc) +struct fuse_dev *fuse_dev_alloc(void) { struct fuse_dev *fud; struct list_head *pq; @@ -1048,17 +1065,34 @@ struct fuse_dev *fuse_dev_alloc(struct fuse_conn *fc) } fud->pq.processing = pq; - fud->fc = fuse_conn_get(fc); fuse_pqueue_init(&fud->pq); - spin_lock(&fc->lock); - list_add_tail(&fud->entry, &fc->devices); - spin_unlock(&fc->lock); - return fud; } EXPORT_SYMBOL_GPL(fuse_dev_alloc); +void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc) +{ + fud->fc = fuse_conn_get(fc); + spin_lock(&fc->lock); + list_add_tail(&fud->entry, &fc->devices); + spin_unlock(&fc->lock); +} +EXPORT_SYMBOL_GPL(fuse_dev_install); + +struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc) +{ + struct fuse_dev *fud; + + fud = fuse_dev_alloc(); + if (!fud) + return NULL; + + fuse_dev_install(fud, fc); + return fud; +} +EXPORT_SYMBOL_GPL(fuse_dev_alloc_install); + void fuse_dev_free(struct fuse_dev *fud) { struct fuse_conn *fc = fud->fc; @@ -1075,17 +1109,13 @@ void fuse_dev_free(struct fuse_dev *fud) } EXPORT_SYMBOL_GPL(fuse_dev_free); -static int fuse_fill_super(struct super_block *sb, void *data, int silent) +int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) { struct fuse_dev *fud; - struct fuse_conn *fc; + struct fuse_conn *fc = get_fuse_conn_super(sb); struct inode *root; - struct fuse_mount_data d; - struct file *file; struct dentry *root_dentry; - struct fuse_req *init_req; int err; - int is_bdev = sb->s_bdev != NULL; err = -EINVAL; if (sb->s_flags & SB_MANDLOCK) @@ -1093,19 +1123,19 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION); - if (!parse_fuse_opt(data, &d, is_bdev, sb->s_user_ns)) - goto err; - - if (is_bdev) { + if (ctx->is_bdev) { #ifdef CONFIG_BLOCK err = -EINVAL; - if (!sb_set_blocksize(sb, d.blksize)) + if (!sb_set_blocksize(sb, ctx->blksize)) goto err; #endif } else { sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; } + + sb->s_subtype = ctx->subtype; + ctx->subtype = NULL; sb->s_magic = FUSE_SUPER_MAGIC; sb->s_op = &fuse_super_operations; sb->s_xattr = fuse_xattr_handlers; @@ -1116,19 +1146,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) if (sb->s_user_ns != &init_user_ns) sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER; - file = fget(d.fd); - err = -EINVAL; - if (!file) - goto err; - - /* - * Require mount to happen from the same user namespace which - * opened /dev/fuse to prevent potential attacks. - */ - if (file->f_op != &fuse_dev_operations || - file->f_cred->user_ns != sb->s_user_ns) - goto err_fput; - /* * If we are not in the initial user namespace posix * acls must be translated. @@ -1136,17 +1153,9 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) if (sb->s_user_ns != &init_user_ns) sb->s_xattr = fuse_no_acl_xattr_handlers; - fc = kmalloc(sizeof(*fc), GFP_KERNEL); - err = -ENOMEM; - if (!fc) - goto err_fput; - - fuse_conn_init(fc, sb->s_user_ns); - fc->release = fuse_free_conn; - - fud = fuse_dev_alloc(fc); + fud = fuse_dev_alloc_install(fc); if (!fud) - goto err_put_conn; + goto err; fc->dev = sb->s_dev; fc->sb = sb; @@ -1159,17 +1168,17 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) fc->dont_mask = 1; sb->s_flags |= SB_POSIXACL; - fc->default_permissions = d.default_permissions; - fc->allow_other = d.allow_other; - fc->user_id = d.user_id; - fc->group_id = d.group_id; - fc->max_read = max_t(unsigned, 4096, d.max_read); - - /* Used by get_root_inode() */ - sb->s_fs_info = fc; + fc->default_permissions = ctx->default_permissions; + fc->allow_other = ctx->allow_other; + fc->user_id = ctx->user_id; + fc->group_id = ctx->group_id; + fc->max_read = max_t(unsigned, 4096, ctx->max_read); + fc->destroy = ctx->destroy; + fc->no_control = ctx->no_control; + fc->no_force_umount = ctx->no_force_umount; err = -ENOMEM; - root = fuse_get_root_inode(sb, d.rootmode); + root = fuse_get_root_inode(sb, ctx->rootmode); sb->s_d_op = &fuse_root_dentry_operations; root_dentry = d_make_root(root); if (!root_dentry) @@ -1177,20 +1186,9 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) /* Root dentry doesn't have .d_revalidate */ sb->s_d_op = &fuse_dentry_operations; - init_req = fuse_request_alloc(0); - if (!init_req) - goto err_put_root; - __set_bit(FR_BACKGROUND, &init_req->flags); - - if (is_bdev) { - fc->destroy_req = fuse_request_alloc(0); - if (!fc->destroy_req) - goto err_free_init_req; - } - mutex_lock(&fuse_mutex); err = -EINVAL; - if (file->private_data) + if (*ctx->fudptr) goto err_unlock; err = fuse_ctl_add_conn(fc); @@ -1199,27 +1197,62 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) list_add_tail(&fc->entry, &fuse_conn_list); sb->s_root = root_dentry; - file->private_data = fud; + *ctx->fudptr = fud; mutex_unlock(&fuse_mutex); + return 0; + + err_unlock: + mutex_unlock(&fuse_mutex); + dput(root_dentry); + err_dev_free: + fuse_dev_free(fud); + err: + return err; +} +EXPORT_SYMBOL_GPL(fuse_fill_super_common); + +static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc) +{ + struct fuse_fs_context *ctx = fsc->fs_private; + struct file *file; + int err; + struct fuse_conn *fc; + + err = -EINVAL; + file = fget(ctx->fd); + if (!file) + goto err; + + /* + * Require mount to happen from the same user namespace which + * opened /dev/fuse to prevent potential attacks. + */ + if ((file->f_op != &fuse_dev_operations) || + (file->f_cred->user_ns != sb->s_user_ns)) + goto err_fput; + ctx->fudptr = &file->private_data; + + fc = kmalloc(sizeof(*fc), GFP_KERNEL); + err = -ENOMEM; + if (!fc) + goto err_fput; + + fuse_conn_init(fc, sb->s_user_ns, &fuse_dev_fiq_ops, NULL); + fc->release = fuse_free_conn; + sb->s_fs_info = fc; + + err = fuse_fill_super_common(sb, ctx); + if (err) + goto err_put_conn; /* * atomic_dec_and_test() in fput() provides the necessary * memory barrier for file->private_data to be visible on all * CPUs after this */ fput(file); - - fuse_send_init(fc, init_req); - + fuse_send_init(get_fuse_conn_super(sb)); return 0; - err_unlock: - mutex_unlock(&fuse_mutex); - err_free_init_req: - fuse_request_free(init_req); - err_put_root: - dput(root_dentry); - err_dev_free: - fuse_dev_free(fud); err_put_conn: fuse_conn_put(fc); sb->s_fs_info = NULL; @@ -1229,11 +1262,52 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) return err; } -static struct dentry *fuse_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *raw_data) +static int fuse_get_tree(struct fs_context *fc) { - return mount_nodev(fs_type, flags, raw_data, fuse_fill_super); + struct fuse_fs_context *ctx = fc->fs_private; + + if (!ctx->fd_present || !ctx->rootmode_present || + !ctx->user_id_present || !ctx->group_id_present) + return -EINVAL; + +#ifdef CONFIG_BLOCK + if (ctx->is_bdev) + return get_tree_bdev(fc, fuse_fill_super); +#endif + + return get_tree_nodev(fc, fuse_fill_super); +} + +static const struct fs_context_operations fuse_context_ops = { + .free = fuse_free_fc, + .parse_param = fuse_parse_param, + .get_tree = fuse_get_tree, +}; + +/* + * Set up the filesystem mount context. + */ +static int fuse_init_fs_context(struct fs_context *fc) +{ + struct fuse_fs_context *ctx; + + ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->max_read = ~0; + ctx->blksize = FUSE_DEFAULT_BLKSIZE; + +#ifdef CONFIG_BLOCK + if (fc->fs_type == &fuseblk_fs_type) { + ctx->is_bdev = true; + ctx->destroy = true; + } +#endif + + fc->fs_private = ctx; + fc->ops = &fuse_context_ops; + return 0; } static void fuse_sb_destroy(struct super_block *sb) @@ -1241,7 +1315,8 @@ static void fuse_sb_destroy(struct super_block *sb) struct fuse_conn *fc = get_fuse_conn_super(sb); if (fc) { - fuse_send_destroy(fc); + if (fc->destroy) + fuse_send_destroy(fc); fuse_abort_conn(fc); fuse_wait_aborted(fc); @@ -1252,29 +1327,24 @@ static void fuse_sb_destroy(struct super_block *sb) } } -static void fuse_kill_sb_anon(struct super_block *sb) +void fuse_kill_sb_anon(struct super_block *sb) { fuse_sb_destroy(sb); kill_anon_super(sb); } +EXPORT_SYMBOL_GPL(fuse_kill_sb_anon); static struct file_system_type fuse_fs_type = { .owner = THIS_MODULE, .name = "fuse", .fs_flags = FS_HAS_SUBTYPE | FS_USERNS_MOUNT, - .mount = fuse_mount, + .init_fs_context = fuse_init_fs_context, + .parameters = &fuse_fs_parameters, .kill_sb = fuse_kill_sb_anon, }; MODULE_ALIAS_FS("fuse"); #ifdef CONFIG_BLOCK -static struct dentry *fuse_mount_blk(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *raw_data) -{ - return mount_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super); -} - static void fuse_kill_sb_blk(struct super_block *sb) { fuse_sb_destroy(sb); @@ -1284,7 +1354,8 @@ static void fuse_kill_sb_blk(struct super_block *sb) static struct file_system_type fuseblk_fs_type = { .owner = THIS_MODULE, .name = "fuseblk", - .mount = fuse_mount_blk, + .init_fs_context = fuse_init_fs_context, + .parameters = &fuse_fs_parameters, .kill_sb = fuse_kill_sb_blk, .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, }; diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index 574d03f8a573..5c38b9d84c6e 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -249,6 +249,27 @@ retry: return 0; } +static void fuse_force_forget(struct file *file, u64 nodeid) +{ + struct inode *inode = file_inode(file); + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_forget_in inarg; + FUSE_ARGS(args); + + memset(&inarg, 0, sizeof(inarg)); + inarg.nlookup = 1; + args.opcode = FUSE_FORGET; + args.nodeid = nodeid; + args.in_numargs = 1; + args.in_args[0].size = sizeof(inarg); + args.in_args[0].value = &inarg; + args.force = true; + args.noreply = true; + + fuse_simple_request(fc, &args); + /* ignore errors */ +} + static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, struct dir_context *ctx, u64 attr_version) { @@ -295,62 +316,55 @@ static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx) { - int plus, err; - size_t nbytes; + int plus; + ssize_t res; struct page *page; struct inode *inode = file_inode(file); struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_req *req; + struct fuse_io_args ia = {}; + struct fuse_args_pages *ap = &ia.ap; + struct fuse_page_desc desc = { .length = PAGE_SIZE }; u64 attr_version = 0; bool locked; - req = fuse_get_req(fc, 1); - if (IS_ERR(req)) - return PTR_ERR(req); - page = alloc_page(GFP_KERNEL); - if (!page) { - fuse_put_request(fc, req); + if (!page) return -ENOMEM; - } plus = fuse_use_readdirplus(inode, ctx); - req->out.argpages = 1; - req->num_pages = 1; - req->pages[0] = page; - req->page_descs[0].length = PAGE_SIZE; + ap->args.out_pages = 1; + ap->num_pages = 1; + ap->pages = &page; + ap->descs = &desc; if (plus) { attr_version = fuse_get_attr_version(fc); - fuse_read_fill(req, file, ctx->pos, PAGE_SIZE, - FUSE_READDIRPLUS); + fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE, + FUSE_READDIRPLUS); } else { - fuse_read_fill(req, file, ctx->pos, PAGE_SIZE, - FUSE_READDIR); + fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE, + FUSE_READDIR); } locked = fuse_lock_inode(inode); - fuse_request_send(fc, req); + res = fuse_simple_request(fc, &ap->args); fuse_unlock_inode(inode, locked); - nbytes = req->out.args[0].size; - err = req->out.h.error; - fuse_put_request(fc, req); - if (!err) { - if (!nbytes) { + if (res >= 0) { + if (!res) { struct fuse_file *ff = file->private_data; if (ff->open_flags & FOPEN_CACHE_DIR) fuse_readdir_cache_end(file, ctx->pos); } else if (plus) { - err = parse_dirplusfile(page_address(page), nbytes, + res = parse_dirplusfile(page_address(page), res, file, ctx, attr_version); } else { - err = parse_dirfile(page_address(page), nbytes, file, + res = parse_dirfile(page_address(page), res, file, ctx); } } __free_page(page); fuse_invalidate_atime(inode); - return err; + return res; } enum fuse_parse_result { @@ -372,11 +386,13 @@ static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff, for (;;) { struct fuse_dirent *dirent = addr + offset; unsigned int nbytes = size - offset; - size_t reclen = FUSE_DIRENT_SIZE(dirent); + size_t reclen; if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen) break; + reclen = FUSE_DIRENT_SIZE(dirent); /* derefs ->namelen */ + if (WARN_ON(dirent->namelen > FUSE_NAME_MAX)) return FOUND_ERR; if (WARN_ON(reclen > nbytes)) diff --git a/fs/fuse/xattr.c b/fs/fuse/xattr.c index 433717640f78..20d052e08b3b 100644 --- a/fs/fuse/xattr.c +++ b/fs/fuse/xattr.c @@ -25,15 +25,15 @@ int fuse_setxattr(struct inode *inode, const char *name, const void *value, memset(&inarg, 0, sizeof(inarg)); inarg.size = size; inarg.flags = flags; - args.in.h.opcode = FUSE_SETXATTR; - args.in.h.nodeid = get_node_id(inode); - args.in.numargs = 3; - args.in.args[0].size = sizeof(inarg); - args.in.args[0].value = &inarg; - args.in.args[1].size = strlen(name) + 1; - args.in.args[1].value = name; - args.in.args[2].size = size; - args.in.args[2].value = value; + args.opcode = FUSE_SETXATTR; + args.nodeid = get_node_id(inode); + args.in_numargs = 3; + args.in_args[0].size = sizeof(inarg); + args.in_args[0].value = &inarg; + args.in_args[1].size = strlen(name) + 1; + args.in_args[1].value = name; + args.in_args[2].size = size; + args.in_args[2].value = value; err = fuse_simple_request(fc, &args); if (err == -ENOSYS) { fc->no_setxattr = 1; @@ -60,22 +60,22 @@ ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value, memset(&inarg, 0, sizeof(inarg)); inarg.size = size; - args.in.h.opcode = FUSE_GETXATTR; - args.in.h.nodeid = get_node_id(inode); - args.in.numargs = 2; - args.in.args[0].size = sizeof(inarg); - args.in.args[0].value = &inarg; - args.in.args[1].size = strlen(name) + 1; - args.in.args[1].value = name; + args.opcode = FUSE_GETXATTR; + args.nodeid = get_node_id(inode); + args.in_numargs = 2; + args.in_args[0].size = sizeof(inarg); + args.in_args[0].value = &inarg; + args.in_args[1].size = strlen(name) + 1; + args.in_args[1].value = name; /* This is really two different operations rolled into one */ - args.out.numargs = 1; + args.out_numargs = 1; if (size) { - args.out.argvar = 1; - args.out.args[0].size = size; - args.out.args[0].value = value; + args.out_argvar = true; + args.out_args[0].size = size; + args.out_args[0].value = value; } else { - args.out.args[0].size = sizeof(outarg); - args.out.args[0].value = &outarg; + args.out_args[0].size = sizeof(outarg); + args.out_args[0].value = &outarg; } ret = fuse_simple_request(fc, &args); if (!ret && !size) @@ -121,20 +121,20 @@ ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size) memset(&inarg, 0, sizeof(inarg)); inarg.size = size; - args.in.h.opcode = FUSE_LISTXATTR; - args.in.h.nodeid = get_node_id(inode); - args.in.numargs = 1; - args.in.args[0].size = sizeof(inarg); - args.in.args[0].value = &inarg; + args.opcode = FUSE_LISTXATTR; + args.nodeid = get_node_id(inode); + args.in_numargs = 1; + args.in_args[0].size = sizeof(inarg); + args.in_args[0].value = &inarg; /* This is really two different operations rolled into one */ - args.out.numargs = 1; + args.out_numargs = 1; if (size) { - args.out.argvar = 1; - args.out.args[0].size = size; - args.out.args[0].value = list; + args.out_argvar = true; + args.out_args[0].size = size; + args.out_args[0].value = list; } else { - args.out.args[0].size = sizeof(outarg); - args.out.args[0].value = &outarg; + args.out_args[0].size = sizeof(outarg); + args.out_args[0].value = &outarg; } ret = fuse_simple_request(fc, &args); if (!ret && !size) @@ -157,11 +157,11 @@ int fuse_removexattr(struct inode *inode, const char *name) if (fc->no_removexattr) return -EOPNOTSUPP; - args.in.h.opcode = FUSE_REMOVEXATTR; - args.in.h.nodeid = get_node_id(inode); - args.in.numargs = 1; - args.in.args[0].size = strlen(name) + 1; - args.in.args[0].value = name; + args.opcode = FUSE_REMOVEXATTR; + args.nodeid = get_node_id(inode); + args.in_numargs = 1; + args.in_args[0].size = strlen(name) + 1; + args.in_args[0].value = name; err = fuse_simple_request(fc, &args); if (err == -ENOSYS) { fc->no_removexattr = 1; diff --git a/fs/namespace.c b/fs/namespace.c index 93c043245c46..b75d458f817d 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2802,8 +2802,6 @@ static int do_new_mount(struct path *path, const char *fstype, int sb_flags, put_filesystem(type); return -EINVAL; } - } else { - subtype = ""; } } diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index e16fb8f2049e..273ee82d8aa9 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -88,7 +88,7 @@ static inline void mangle(struct seq_file *m, const char *s) static void show_type(struct seq_file *m, struct super_block *sb) { mangle(m, sb->s_type->name); - if (sb->s_subtype && sb->s_subtype[0]) { + if (sb->s_subtype) { seq_putc(m, '.'); mangle(m, sb->s_subtype); } diff --git a/fs/super.c b/fs/super.c index 8020974b2a68..f627b7c53d2b 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1555,11 +1555,6 @@ int vfs_get_tree(struct fs_context *fc) sb = fc->root->d_sb; WARN_ON(!sb->s_bdi); - if (fc->subtype && !sb->s_subtype) { - sb->s_subtype = fc->subtype; - fc->subtype = NULL; - } - /* * Write barrier is for super_cache_count(). We place it before setting * SB_BORN as the data dependency between the two functions is the diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 0424df7f6e6b..e5c14e2c53d3 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -95,7 +95,6 @@ struct fs_context { const struct cred *cred; /* The mounter's credentials */ struct fc_log *log; /* Logging buffer */ const char *source; /* The source name (eg. dev path) */ - const char *subtype; /* The subtype to set on the superblock */ void *security; /* Linux S&M options */ void *s_fs_info; /* Proposed s_fs_info */ unsigned int sb_flags; /* Proposed superblock flags (SB_*) */ diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 2971d29a42e4..df2e12fb3381 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -425,6 +425,10 @@ enum fuse_opcode { /* CUSE specific operations */ CUSE_INIT = 4096, + + /* Reserved opcodes: helpful to detect structure endian-ness */ + CUSE_INIT_BSWAP_RESERVED = 1048576, /* CUSE_INIT << 8 */ + FUSE_INIT_BSWAP_RESERVED = 436207616, /* FUSE_INIT << 24 */ }; enum fuse_notify_code {