diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index c35c5c614e38..06ea5cd05cd9 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -239,23 +239,21 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) return err; } -/** - * Must be called with lock_flocks() already held. Fills in the passed - * counter variables, so you can prepare pagelist metadata before calling - * ceph_encode_locks. +/* + * Fills in the passed counter variables, so you can prepare pagelist metadata + * before calling ceph_encode_locks. */ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) { - struct file_lock *lock; + struct file_lock_context *ctx; *fcntl_count = 0; *flock_count = 0; - for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { - if (lock->fl_flags & FL_POSIX) - ++(*fcntl_count); - else if (lock->fl_flags & FL_FLOCK) - ++(*flock_count); + ctx = inode->i_flctx; + if (ctx) { + *fcntl_count = ctx->flc_posix_cnt; + *flock_count = ctx->flc_flock_cnt; } dout("counted %d flock locks and %d fcntl locks", *flock_count, *fcntl_count); @@ -271,6 +269,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode, int num_fcntl_locks, int num_flock_locks) { struct file_lock *lock; + struct file_lock_context *ctx = inode->i_flctx; int err = 0; int seen_fcntl = 0; int seen_flock = 0; @@ -279,33 +278,34 @@ int ceph_encode_locks_to_buffer(struct inode *inode, dout("encoding %d flock and %d fcntl locks", num_flock_locks, num_fcntl_locks); - for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { - if (lock->fl_flags & FL_POSIX) { - ++seen_fcntl; - if (seen_fcntl > num_fcntl_locks) { - err = -ENOSPC; - goto fail; - } - err = lock_to_ceph_filelock(lock, &flocks[l]); - if (err) - goto fail; - ++l; + if (!ctx) + return 0; + + spin_lock(&ctx->flc_lock); + list_for_each_entry(lock, &ctx->flc_flock, fl_list) { + ++seen_fcntl; + if (seen_fcntl > num_fcntl_locks) { + err = -ENOSPC; + goto fail; } + err = lock_to_ceph_filelock(lock, &flocks[l]); + if (err) + goto fail; + ++l; } - for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { - if (lock->fl_flags & FL_FLOCK) { - ++seen_flock; - if (seen_flock > num_flock_locks) { - err = -ENOSPC; - goto fail; - } - err = lock_to_ceph_filelock(lock, &flocks[l]); - if (err) - goto fail; - ++l; + list_for_each_entry(lock, &ctx->flc_flock, fl_list) { + ++seen_flock; + if (seen_flock > num_flock_locks) { + err = -ENOSPC; + goto fail; } + err = lock_to_ceph_filelock(lock, &flocks[l]); + if (err) + goto fail; + ++l; } fail: + spin_unlock(&ctx->flc_lock); return err; } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index d2171f4a6980..5f62fb7a5d0a 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2700,20 +2700,16 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, struct ceph_filelock *flocks; encode_again: - spin_lock(&inode->i_lock); ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); - spin_unlock(&inode->i_lock); flocks = kmalloc((num_fcntl_locks+num_flock_locks) * sizeof(struct ceph_filelock), GFP_NOFS); if (!flocks) { err = -ENOMEM; goto out_free; } - spin_lock(&inode->i_lock); err = ceph_encode_locks_to_buffer(inode, flocks, num_fcntl_locks, num_flock_locks); - spin_unlock(&inode->i_lock); if (err) { kfree(flocks); if (err == -ENOSPC) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 74f12877493a..c1a86764bbf7 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1113,11 +1113,6 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) return rc; } -/* copied from fs/locks.c with a name change */ -#define cifs_for_each_lock(inode, lockp) \ - for (lockp = &inode->i_flock; *lockp != NULL; \ - lockp = &(*lockp)->fl_next) - struct lock_to_push { struct list_head llist; __u64 offset; @@ -1132,8 +1127,9 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) { struct inode *inode = cfile->dentry->d_inode; struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); - struct file_lock *flock, **before; - unsigned int count = 0, i = 0; + struct file_lock *flock; + struct file_lock_context *flctx = inode->i_flctx; + unsigned int i; int rc = 0, xid, type; struct list_head locks_to_send, *el; struct lock_to_push *lck, *tmp; @@ -1141,21 +1137,17 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) xid = get_xid(); - spin_lock(&inode->i_lock); - cifs_for_each_lock(inode, before) { - if ((*before)->fl_flags & FL_POSIX) - count++; - } - spin_unlock(&inode->i_lock); + if (!flctx) + goto out; INIT_LIST_HEAD(&locks_to_send); /* - * Allocating count locks is enough because no FL_POSIX locks can be - * added to the list while we are holding cinode->lock_sem that + * Allocating flc_posix_cnt locks is enough because no FL_POSIX locks + * can be added to the list while we are holding cinode->lock_sem that * protects locking operations of this inode. */ - for (; i < count; i++) { + for (i = 0; i < flctx->flc_posix_cnt; i++) { lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL); if (!lck) { rc = -ENOMEM; @@ -1165,11 +1157,8 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) } el = locks_to_send.next; - spin_lock(&inode->i_lock); - cifs_for_each_lock(inode, before) { - flock = *before; - if ((flock->fl_flags & FL_POSIX) == 0) - continue; + spin_lock(&flctx->flc_lock); + list_for_each_entry(flock, &flctx->flc_posix, fl_list) { if (el == &locks_to_send) { /* * The list ended. We don't have enough allocated @@ -1189,9 +1178,8 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) lck->length = length; lck->type = type; lck->offset = flock->fl_start; - el = el->next; } - spin_unlock(&inode->i_lock); + spin_unlock(&flctx->flc_lock); list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { int stored_rc; diff --git a/fs/inode.c b/fs/inode.c index aa149e7262ac..f30872ade6d7 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -194,7 +194,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) #ifdef CONFIG_FSNOTIFY inode->i_fsnotify_mask = 0; #endif - + inode->i_flctx = NULL; this_cpu_inc(nr_inodes); return 0; @@ -237,6 +237,7 @@ void __destroy_inode(struct inode *inode) BUG_ON(inode_has_buffers(inode)); security_inode_free(inode); fsnotify_inode_delete(inode); + locks_free_lock_context(inode->i_flctx); if (!inode->i_nlink) { WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0); atomic_long_dec(&inode->i_sb->s_remove_count); diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index d12ff4e2dbe7..665ef5a05183 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -164,12 +164,15 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, { struct inode *inode = nlmsvc_file_inode(file); struct file_lock *fl; + struct file_lock_context *flctx = inode->i_flctx; struct nlm_host *lockhost; + if (!flctx || list_empty_careful(&flctx->flc_posix)) + return 0; again: file->f_locks = 0; - spin_lock(&inode->i_lock); - for (fl = inode->i_flock; fl; fl = fl->fl_next) { + spin_lock(&flctx->flc_lock); + list_for_each_entry(fl, &flctx->flc_posix, fl_list) { if (fl->fl_lmops != &nlmsvc_lock_operations) continue; @@ -180,7 +183,7 @@ again: if (match(lockhost, host)) { struct file_lock lock = *fl; - spin_unlock(&inode->i_lock); + spin_unlock(&flctx->flc_lock); lock.fl_type = F_UNLCK; lock.fl_start = 0; lock.fl_end = OFFSET_MAX; @@ -192,7 +195,7 @@ again: goto again; } } - spin_unlock(&inode->i_lock); + spin_unlock(&flctx->flc_lock); return 0; } @@ -223,18 +226,21 @@ nlm_file_inuse(struct nlm_file *file) { struct inode *inode = nlmsvc_file_inode(file); struct file_lock *fl; + struct file_lock_context *flctx = inode->i_flctx; if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares) return 1; - spin_lock(&inode->i_lock); - for (fl = inode->i_flock; fl; fl = fl->fl_next) { - if (fl->fl_lmops == &nlmsvc_lock_operations) { - spin_unlock(&inode->i_lock); - return 1; + if (flctx && !list_empty_careful(&flctx->flc_posix)) { + spin_lock(&flctx->flc_lock); + list_for_each_entry(fl, &flctx->flc_posix, fl_list) { + if (fl->fl_lmops == &nlmsvc_lock_operations) { + spin_unlock(&flctx->flc_lock); + return 1; + } } + spin_unlock(&flctx->flc_lock); } - spin_unlock(&inode->i_lock); file->f_locks = 0; return 0; } diff --git a/fs/locks.c b/fs/locks.c index 59e2f905e4ff..4d0d41163a50 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -157,14 +157,11 @@ static int target_leasetype(struct file_lock *fl) int leases_enable = 1; int lease_break_time = 45; -#define for_each_lock(inode, lockp) \ - for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next) - /* * The global file_lock_list is only used for displaying /proc/locks, so we * keep a list on each CPU, with each list protected by its own spinlock via * the file_lock_lglock. Note that alterations to the list also require that - * the relevant i_lock is held. + * the relevant flc_lock is held. */ DEFINE_STATIC_LGLOCK(file_lock_lglock); static DEFINE_PER_CPU(struct hlist_head, file_lock_list); @@ -192,21 +189,68 @@ static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS); * contrast to those that are acting as records of acquired locks). * * Note that when we acquire this lock in order to change the above fields, - * we often hold the i_lock as well. In certain cases, when reading the fields + * we often hold the flc_lock as well. In certain cases, when reading the fields * protected by this lock, we can skip acquiring it iff we already hold the - * i_lock. + * flc_lock. * * In particular, adding an entry to the fl_block list requires that you hold - * both the i_lock and the blocked_lock_lock (acquired in that order). Deleting - * an entry from the list however only requires the file_lock_lock. + * both the flc_lock and the blocked_lock_lock (acquired in that order). + * Deleting an entry from the list however only requires the file_lock_lock. */ static DEFINE_SPINLOCK(blocked_lock_lock); +static struct kmem_cache *flctx_cache __read_mostly; static struct kmem_cache *filelock_cache __read_mostly; +static struct file_lock_context * +locks_get_lock_context(struct inode *inode) +{ + struct file_lock_context *new; + + if (likely(inode->i_flctx)) + goto out; + + new = kmem_cache_alloc(flctx_cache, GFP_KERNEL); + if (!new) + goto out; + + spin_lock_init(&new->flc_lock); + INIT_LIST_HEAD(&new->flc_flock); + INIT_LIST_HEAD(&new->flc_posix); + INIT_LIST_HEAD(&new->flc_lease); + + /* + * Assign the pointer if it's not already assigned. If it is, then + * free the context we just allocated. + */ + spin_lock(&inode->i_lock); + if (likely(!inode->i_flctx)) { + inode->i_flctx = new; + new = NULL; + } + spin_unlock(&inode->i_lock); + + if (new) + kmem_cache_free(flctx_cache, new); +out: + return inode->i_flctx; +} + +void +locks_free_lock_context(struct file_lock_context *ctx) +{ + if (ctx) { + WARN_ON_ONCE(!list_empty(&ctx->flc_flock)); + WARN_ON_ONCE(!list_empty(&ctx->flc_posix)); + WARN_ON_ONCE(!list_empty(&ctx->flc_lease)); + kmem_cache_free(flctx_cache, ctx); + } +} + static void locks_init_lock_heads(struct file_lock *fl) { INIT_HLIST_NODE(&fl->fl_link); + INIT_LIST_HEAD(&fl->fl_list); INIT_LIST_HEAD(&fl->fl_block); init_waitqueue_head(&fl->fl_wait); } @@ -243,6 +287,7 @@ EXPORT_SYMBOL_GPL(locks_release_private); void locks_free_lock(struct file_lock *fl) { BUG_ON(waitqueue_active(&fl->fl_wait)); + BUG_ON(!list_empty(&fl->fl_list)); BUG_ON(!list_empty(&fl->fl_block)); BUG_ON(!hlist_unhashed(&fl->fl_link)); @@ -257,8 +302,8 @@ locks_dispose_list(struct list_head *dispose) struct file_lock *fl; while (!list_empty(dispose)) { - fl = list_first_entry(dispose, struct file_lock, fl_block); - list_del_init(&fl->fl_block); + fl = list_first_entry(dispose, struct file_lock, fl_list); + list_del_init(&fl->fl_list); locks_free_lock(fl); } } @@ -513,7 +558,7 @@ static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2) return fl1->fl_owner == fl2->fl_owner; } -/* Must be called with the i_lock held! */ +/* Must be called with the flc_lock held! */ static void locks_insert_global_locks(struct file_lock *fl) { lg_local_lock(&file_lock_lglock); @@ -522,12 +567,12 @@ static void locks_insert_global_locks(struct file_lock *fl) lg_local_unlock(&file_lock_lglock); } -/* Must be called with the i_lock held! */ +/* Must be called with the flc_lock held! */ static void locks_delete_global_locks(struct file_lock *fl) { /* * Avoid taking lock if already unhashed. This is safe since this check - * is done while holding the i_lock, and new insertions into the list + * is done while holding the flc_lock, and new insertions into the list * also require that it be held. */ if (hlist_unhashed(&fl->fl_link)) @@ -579,10 +624,10 @@ static void locks_delete_block(struct file_lock *waiter) * the order they blocked. The documentation doesn't require this but * it seems like the reasonable thing to do. * - * Must be called with both the i_lock and blocked_lock_lock held. The fl_block - * list itself is protected by the blocked_lock_lock, but by ensuring that the - * i_lock is also held on insertions we can avoid taking the blocked_lock_lock - * in some cases when we see that the fl_block list is empty. + * Must be called with both the flc_lock and blocked_lock_lock held. The + * fl_block list itself is protected by the blocked_lock_lock, but by ensuring + * that the flc_lock is also held on insertions we can avoid taking the + * blocked_lock_lock in some cases when we see that the fl_block list is empty. */ static void __locks_insert_block(struct file_lock *blocker, struct file_lock *waiter) @@ -594,7 +639,7 @@ static void __locks_insert_block(struct file_lock *blocker, locks_insert_global_blocked(waiter); } -/* Must be called with i_lock held. */ +/* Must be called with flc_lock held. */ static void locks_insert_block(struct file_lock *blocker, struct file_lock *waiter) { @@ -606,15 +651,15 @@ static void locks_insert_block(struct file_lock *blocker, /* * Wake up processes blocked waiting for blocker. * - * Must be called with the inode->i_lock held! + * Must be called with the inode->flc_lock held! */ static void locks_wake_up_blocks(struct file_lock *blocker) { /* * Avoid taking global lock if list is empty. This is safe since new - * blocked requests are only added to the list under the i_lock, and - * the i_lock is always held here. Note that removal from the fl_block - * list does not require the i_lock, so we must recheck list_empty() + * blocked requests are only added to the list under the flc_lock, and + * the flc_lock is always held here. Note that removal from the fl_block + * list does not require the flc_lock, so we must recheck list_empty() * after acquiring the blocked_lock_lock. */ if (list_empty(&blocker->fl_block)) @@ -635,63 +680,36 @@ static void locks_wake_up_blocks(struct file_lock *blocker) spin_unlock(&blocked_lock_lock); } -/* Insert file lock fl into an inode's lock list at the position indicated - * by pos. At the same time add the lock to the global file lock list. - * - * Must be called with the i_lock held! - */ -static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) +static void +locks_insert_lock_ctx(struct file_lock *fl, int *counter, + struct list_head *before) { fl->fl_nspid = get_pid(task_tgid(current)); - - /* insert into file's list */ - fl->fl_next = *pos; - *pos = fl; - + list_add_tail(&fl->fl_list, before); + ++*counter; locks_insert_global_locks(fl); } -/** - * locks_delete_lock - Delete a lock and then free it. - * @thisfl_p: pointer that points to the fl_next field of the previous - * inode->i_flock list entry - * - * Unlink a lock from all lists and free the namespace reference, but don't - * free it yet. Wake up processes that are blocked waiting for this lock and - * notify the FS that the lock has been cleared. - * - * Must be called with the i_lock held! - */ -static void locks_unlink_lock(struct file_lock **thisfl_p) +static void +locks_unlink_lock_ctx(struct file_lock *fl, int *counter) { - struct file_lock *fl = *thisfl_p; - locks_delete_global_locks(fl); - - *thisfl_p = fl->fl_next; - fl->fl_next = NULL; - + list_del_init(&fl->fl_list); + --*counter; if (fl->fl_nspid) { put_pid(fl->fl_nspid); fl->fl_nspid = NULL; } - locks_wake_up_blocks(fl); } -/* - * Unlink a lock from all lists and free it. - * - * Must be called with i_lock held! - */ -static void locks_delete_lock(struct file_lock **thisfl_p, - struct list_head *dispose) +static void +locks_delete_lock_ctx(struct file_lock *fl, int *counter, + struct list_head *dispose) { - struct file_lock *fl = *thisfl_p; - - locks_unlink_lock(thisfl_p); + locks_unlink_lock_ctx(fl, counter); if (dispose) - list_add(&fl->fl_block, dispose); + list_add(&fl->fl_list, dispose); else locks_free_lock(fl); } @@ -746,22 +764,27 @@ void posix_test_lock(struct file *filp, struct file_lock *fl) { struct file_lock *cfl; + struct file_lock_context *ctx; struct inode *inode = file_inode(filp); - spin_lock(&inode->i_lock); - for (cfl = file_inode(filp)->i_flock; cfl; cfl = cfl->fl_next) { - if (!IS_POSIX(cfl)) - continue; - if (posix_locks_conflict(fl, cfl)) - break; - } - if (cfl) { - locks_copy_conflock(fl, cfl); - if (cfl->fl_nspid) - fl->fl_pid = pid_vnr(cfl->fl_nspid); - } else + ctx = inode->i_flctx; + if (!ctx || list_empty_careful(&ctx->flc_posix)) { fl->fl_type = F_UNLCK; - spin_unlock(&inode->i_lock); + return; + } + + spin_lock(&ctx->flc_lock); + list_for_each_entry(cfl, &ctx->flc_posix, fl_list) { + if (posix_locks_conflict(fl, cfl)) { + locks_copy_conflock(fl, cfl); + if (cfl->fl_nspid) + fl->fl_pid = pid_vnr(cfl->fl_nspid); + goto out; + } + } + fl->fl_type = F_UNLCK; +out: + spin_unlock(&ctx->flc_lock); return; } EXPORT_SYMBOL(posix_test_lock); @@ -845,34 +868,34 @@ static int posix_locks_deadlock(struct file_lock *caller_fl, static int flock_lock_file(struct file *filp, struct file_lock *request) { struct file_lock *new_fl = NULL; - struct file_lock **before; - struct inode * inode = file_inode(filp); + struct file_lock *fl; + struct file_lock_context *ctx; + struct inode *inode = file_inode(filp); int error = 0; - int found = 0; + bool found = false; LIST_HEAD(dispose); + ctx = locks_get_lock_context(inode); + if (!ctx) + return -ENOMEM; + if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) { new_fl = locks_alloc_lock(); if (!new_fl) return -ENOMEM; } - spin_lock(&inode->i_lock); + spin_lock(&ctx->flc_lock); if (request->fl_flags & FL_ACCESS) goto find_conflict; - for_each_lock(inode, before) { - struct file_lock *fl = *before; - if (IS_POSIX(fl)) - break; - if (IS_LEASE(fl)) - continue; + list_for_each_entry(fl, &ctx->flc_flock, fl_list) { if (filp != fl->fl_file) continue; if (request->fl_type == fl->fl_type) goto out; - found = 1; - locks_delete_lock(before, &dispose); + found = true; + locks_delete_lock_ctx(fl, &ctx->flc_flock_cnt, &dispose); break; } @@ -887,18 +910,13 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) * give it the opportunity to lock the file. */ if (found) { - spin_unlock(&inode->i_lock); + spin_unlock(&ctx->flc_lock); cond_resched(); - spin_lock(&inode->i_lock); + spin_lock(&ctx->flc_lock); } find_conflict: - for_each_lock(inode, before) { - struct file_lock *fl = *before; - if (IS_POSIX(fl)) - break; - if (IS_LEASE(fl)) - continue; + list_for_each_entry(fl, &ctx->flc_flock, fl_list) { if (!flock_locks_conflict(request, fl)) continue; error = -EAGAIN; @@ -911,12 +929,12 @@ find_conflict: if (request->fl_flags & FL_ACCESS) goto out; locks_copy_lock(new_fl, request); - locks_insert_lock(before, new_fl); + locks_insert_lock_ctx(new_fl, &ctx->flc_flock_cnt, &ctx->flc_flock); new_fl = NULL; error = 0; out: - spin_unlock(&inode->i_lock); + spin_unlock(&ctx->flc_lock); if (new_fl) locks_free_lock(new_fl); locks_dispose_list(&dispose); @@ -925,16 +943,20 @@ out: static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock) { - struct file_lock *fl; + struct file_lock *fl, *tmp; struct file_lock *new_fl = NULL; struct file_lock *new_fl2 = NULL; struct file_lock *left = NULL; struct file_lock *right = NULL; - struct file_lock **before; + struct file_lock_context *ctx; int error; bool added = false; LIST_HEAD(dispose); + ctx = locks_get_lock_context(inode); + if (!ctx) + return -ENOMEM; + /* * We may need two file_lock structures for this operation, * so we get them in advance to avoid races. @@ -948,15 +970,14 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str new_fl2 = locks_alloc_lock(); } - spin_lock(&inode->i_lock); + spin_lock(&ctx->flc_lock); /* * New lock request. Walk all POSIX locks and look for conflicts. If * there are any, either return error or put the request on the * blocker's list of waiters and the global blocked_hash. */ if (request->fl_type != F_UNLCK) { - for_each_lock(inode, before) { - fl = *before; + list_for_each_entry(fl, &ctx->flc_posix, fl_list) { if (!IS_POSIX(fl)) continue; if (!posix_locks_conflict(request, fl)) @@ -986,29 +1007,25 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str if (request->fl_flags & FL_ACCESS) goto out; - /* - * Find the first old lock with the same owner as the new lock. - */ - - before = &inode->i_flock; - - /* First skip locks owned by other processes. */ - while ((fl = *before) && (!IS_POSIX(fl) || - !posix_same_owner(request, fl))) { - before = &fl->fl_next; + /* Find the first old lock with the same owner as the new lock */ + list_for_each_entry(fl, &ctx->flc_posix, fl_list) { + if (posix_same_owner(request, fl)) + break; } /* Process locks with this owner. */ - while ((fl = *before) && posix_same_owner(request, fl)) { - /* Detect adjacent or overlapping regions (if same lock type) - */ + list_for_each_entry_safe_from(fl, tmp, &ctx->flc_posix, fl_list) { + if (!posix_same_owner(request, fl)) + break; + + /* Detect adjacent or overlapping regions (if same lock type) */ if (request->fl_type == fl->fl_type) { /* In all comparisons of start vs end, use * "start - 1" rather than "end + 1". If end * is OFFSET_MAX, end + 1 will become negative. */ if (fl->fl_end < request->fl_start - 1) - goto next_lock; + continue; /* If the next lock in the list has entirely bigger * addresses than the new one, insert the lock here. */ @@ -1029,18 +1046,18 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str else request->fl_end = fl->fl_end; if (added) { - locks_delete_lock(before, &dispose); + locks_delete_lock_ctx(fl, &ctx->flc_posix_cnt, + &dispose); continue; } request = fl; added = true; - } - else { + } else { /* Processing for different lock types is a bit * more complex. */ if (fl->fl_end < request->fl_start) - goto next_lock; + continue; if (fl->fl_start > request->fl_end) break; if (request->fl_type == F_UNLCK) @@ -1059,7 +1076,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str * one (This may happen several times). */ if (added) { - locks_delete_lock(before, &dispose); + locks_delete_lock_ctx(fl, + &ctx->flc_posix_cnt, &dispose); continue; } /* @@ -1075,15 +1093,13 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str locks_copy_lock(new_fl, request); request = new_fl; new_fl = NULL; - locks_delete_lock(before, &dispose); - locks_insert_lock(before, request); + locks_insert_lock_ctx(request, + &ctx->flc_posix_cnt, &fl->fl_list); + locks_delete_lock_ctx(fl, + &ctx->flc_posix_cnt, &dispose); added = true; } } - /* Go on to next lock. - */ - next_lock: - before = &fl->fl_next; } /* @@ -1108,7 +1124,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str goto out; } locks_copy_lock(new_fl, request); - locks_insert_lock(before, new_fl); + locks_insert_lock_ctx(new_fl, &ctx->flc_posix_cnt, + &fl->fl_list); new_fl = NULL; } if (right) { @@ -1119,7 +1136,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str left = new_fl2; new_fl2 = NULL; locks_copy_lock(left, right); - locks_insert_lock(before, left); + locks_insert_lock_ctx(left, &ctx->flc_posix_cnt, + &fl->fl_list); } right->fl_start = request->fl_end + 1; locks_wake_up_blocks(right); @@ -1129,7 +1147,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str locks_wake_up_blocks(left); } out: - spin_unlock(&inode->i_lock); + spin_unlock(&ctx->flc_lock); /* * Free any unused locks. */ @@ -1199,22 +1217,29 @@ EXPORT_SYMBOL(posix_lock_file_wait); */ int locks_mandatory_locked(struct file *file) { + int ret; struct inode *inode = file_inode(file); + struct file_lock_context *ctx; struct file_lock *fl; + ctx = inode->i_flctx; + if (!ctx || list_empty_careful(&ctx->flc_posix)) + return 0; + /* * Search the lock list for this inode for any POSIX locks. */ - spin_lock(&inode->i_lock); - for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { - if (!IS_POSIX(fl)) - continue; + spin_lock(&ctx->flc_lock); + ret = 0; + list_for_each_entry(fl, &ctx->flc_posix, fl_list) { if (fl->fl_owner != current->files && - fl->fl_owner != file) + fl->fl_owner != file) { + ret = -EAGAIN; break; + } } - spin_unlock(&inode->i_lock); - return fl ? -EAGAIN : 0; + spin_unlock(&ctx->flc_lock); + return ret; } /** @@ -1294,9 +1319,9 @@ static void lease_clear_pending(struct file_lock *fl, int arg) } /* We already had a lease on this file; just change its type */ -int lease_modify(struct file_lock **before, int arg, struct list_head *dispose) +int lease_modify(struct file_lock *fl, int arg, struct list_head *dispose) { - struct file_lock *fl = *before; + struct file_lock_context *flctx; int error = assign_type(fl, arg); if (error) @@ -1306,6 +1331,7 @@ int lease_modify(struct file_lock **before, int arg, struct list_head *dispose) if (arg == F_UNLCK) { struct file *filp = fl->fl_file; + flctx = file_inode(filp)->i_flctx; f_delown(filp); filp->f_owner.signum = 0; fasync_helper(0, fl->fl_file, 0, &fl->fl_fasync); @@ -1313,7 +1339,7 @@ int lease_modify(struct file_lock **before, int arg, struct list_head *dispose) printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync); fl->fl_fasync = NULL; } - locks_delete_lock(before, dispose); + locks_delete_lock_ctx(fl, &flctx->flc_lease_cnt, dispose); } return 0; } @@ -1329,20 +1355,17 @@ static bool past_time(unsigned long then) static void time_out_leases(struct inode *inode, struct list_head *dispose) { - struct file_lock **before; - struct file_lock *fl; + struct file_lock_context *ctx = inode->i_flctx; + struct file_lock *fl, *tmp; - lockdep_assert_held(&inode->i_lock); + lockdep_assert_held(&ctx->flc_lock); - before = &inode->i_flock; - while ((fl = *before) && IS_LEASE(fl) && lease_breaking(fl)) { + list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) { trace_time_out_leases(inode, fl); if (past_time(fl->fl_downgrade_time)) - lease_modify(before, F_RDLCK, dispose); + lease_modify(fl, F_RDLCK, dispose); if (past_time(fl->fl_break_time)) - lease_modify(before, F_UNLCK, dispose); - if (fl == *before) /* lease_modify may have freed fl */ - before = &fl->fl_next; + lease_modify(fl, F_UNLCK, dispose); } } @@ -1356,11 +1379,12 @@ static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker) static bool any_leases_conflict(struct inode *inode, struct file_lock *breaker) { + struct file_lock_context *ctx = inode->i_flctx; struct file_lock *fl; - lockdep_assert_held(&inode->i_lock); + lockdep_assert_held(&ctx->flc_lock); - for (fl = inode->i_flock ; fl && IS_LEASE(fl); fl = fl->fl_next) { + list_for_each_entry(fl, &ctx->flc_lease, fl_list) { if (leases_conflict(fl, breaker)) return true; } @@ -1384,7 +1408,8 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) { int error = 0; struct file_lock *new_fl; - struct file_lock *fl, **before; + struct file_lock_context *ctx = inode->i_flctx; + struct file_lock *fl; unsigned long break_time; int want_write = (mode & O_ACCMODE) != O_RDONLY; LIST_HEAD(dispose); @@ -1394,7 +1419,13 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) return PTR_ERR(new_fl); new_fl->fl_flags = type; - spin_lock(&inode->i_lock); + /* typically we will check that ctx is non-NULL before calling */ + if (!ctx) { + WARN_ON_ONCE(1); + return error; + } + + spin_lock(&ctx->flc_lock); time_out_leases(inode, &dispose); @@ -1408,9 +1439,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) break_time++; /* so that 0 means no break time */ } - for (before = &inode->i_flock; - ((fl = *before) != NULL) && IS_LEASE(fl); - before = &fl->fl_next) { + list_for_each_entry(fl, &ctx->flc_lease, fl_list) { if (!leases_conflict(fl, new_fl)) continue; if (want_write) { @@ -1419,17 +1448,17 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) fl->fl_flags |= FL_UNLOCK_PENDING; fl->fl_break_time = break_time; } else { - if (lease_breaking(inode->i_flock)) + if (lease_breaking(fl)) continue; fl->fl_flags |= FL_DOWNGRADE_PENDING; fl->fl_downgrade_time = break_time; } if (fl->fl_lmops->lm_break(fl)) - locks_delete_lock(before, &dispose); + locks_delete_lock_ctx(fl, &ctx->flc_lease_cnt, + &dispose); } - fl = inode->i_flock; - if (!fl || !IS_LEASE(fl)) + if (list_empty(&ctx->flc_lease)) goto out; if (mode & O_NONBLOCK) { @@ -1439,18 +1468,19 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) } restart: - break_time = inode->i_flock->fl_break_time; + fl = list_first_entry(&ctx->flc_lease, struct file_lock, fl_list); + break_time = fl->fl_break_time; if (break_time != 0) break_time -= jiffies; if (break_time == 0) break_time++; - locks_insert_block(inode->i_flock, new_fl); + locks_insert_block(fl, new_fl); trace_break_lease_block(inode, new_fl); - spin_unlock(&inode->i_lock); + spin_unlock(&ctx->flc_lock); locks_dispose_list(&dispose); error = wait_event_interruptible_timeout(new_fl->fl_wait, !new_fl->fl_next, break_time); - spin_lock(&inode->i_lock); + spin_lock(&ctx->flc_lock); trace_break_lease_unblock(inode, new_fl); locks_delete_block(new_fl); if (error >= 0) { @@ -1462,12 +1492,10 @@ restart: time_out_leases(inode, &dispose); if (any_leases_conflict(inode, new_fl)) goto restart; - error = 0; } - out: - spin_unlock(&inode->i_lock); + spin_unlock(&ctx->flc_lock); locks_dispose_list(&dispose); locks_free_lock(new_fl); return error; @@ -1487,14 +1515,18 @@ EXPORT_SYMBOL(__break_lease); void lease_get_mtime(struct inode *inode, struct timespec *time) { bool has_lease = false; - struct file_lock *flock; + struct file_lock_context *ctx = inode->i_flctx; + struct file_lock *fl; - if (inode->i_flock) { - spin_lock(&inode->i_lock); - flock = inode->i_flock; - if (flock && IS_LEASE(flock) && (flock->fl_type == F_WRLCK)) - has_lease = true; - spin_unlock(&inode->i_lock); + if (ctx && !list_empty_careful(&ctx->flc_lease)) { + spin_lock(&ctx->flc_lock); + if (!list_empty(&ctx->flc_lease)) { + fl = list_first_entry(&ctx->flc_lease, + struct file_lock, fl_list); + if (fl->fl_type == F_WRLCK) + has_lease = true; + } + spin_unlock(&ctx->flc_lock); } if (has_lease) @@ -1532,20 +1564,22 @@ int fcntl_getlease(struct file *filp) { struct file_lock *fl; struct inode *inode = file_inode(filp); + struct file_lock_context *ctx = inode->i_flctx; int type = F_UNLCK; LIST_HEAD(dispose); - spin_lock(&inode->i_lock); - time_out_leases(file_inode(filp), &dispose); - for (fl = file_inode(filp)->i_flock; fl && IS_LEASE(fl); - fl = fl->fl_next) { - if (fl->fl_file == filp) { + if (ctx && !list_empty_careful(&ctx->flc_lease)) { + spin_lock(&ctx->flc_lock); + time_out_leases(file_inode(filp), &dispose); + list_for_each_entry(fl, &ctx->flc_lease, fl_list) { + if (fl->fl_file != filp) + continue; type = target_leasetype(fl); break; } + spin_unlock(&ctx->flc_lock); + locks_dispose_list(&dispose); } - spin_unlock(&inode->i_lock); - locks_dispose_list(&dispose); return type; } @@ -1578,9 +1612,10 @@ check_conflicting_open(const struct dentry *dentry, const long arg) static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **priv) { - struct file_lock *fl, **before, **my_before = NULL, *lease; + struct file_lock *fl, *my_fl = NULL, *lease; struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; + struct file_lock_context *ctx; bool is_deleg = (*flp)->fl_flags & FL_DELEG; int error; LIST_HEAD(dispose); @@ -1588,6 +1623,10 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr lease = *flp; trace_generic_add_lease(inode, lease); + ctx = locks_get_lock_context(inode); + if (!ctx) + return -ENOMEM; + /* * In the delegation case we need mutual exclusion with * a number of operations that take the i_mutex. We trylock @@ -1606,7 +1645,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr return -EINVAL; } - spin_lock(&inode->i_lock); + spin_lock(&ctx->flc_lock); time_out_leases(inode, &dispose); error = check_conflicting_open(dentry, arg); if (error) @@ -1621,13 +1660,12 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr * except for this filp. */ error = -EAGAIN; - for (before = &inode->i_flock; - ((fl = *before) != NULL) && IS_LEASE(fl); - before = &fl->fl_next) { + list_for_each_entry(fl, &ctx->flc_lease, fl_list) { if (fl->fl_file == filp) { - my_before = before; + my_fl = fl; continue; } + /* * No exclusive leases if someone else has a lease on * this file: @@ -1642,9 +1680,8 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr goto out; } - if (my_before != NULL) { - lease = *my_before; - error = lease->fl_lmops->lm_change(my_before, arg, &dispose); + if (my_fl != NULL) { + error = lease->fl_lmops->lm_change(my_fl, arg, &dispose); if (error) goto out; goto out_setup; @@ -1654,7 +1691,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr if (!leases_enable) goto out; - locks_insert_lock(before, lease); + locks_insert_lock_ctx(lease, &ctx->flc_lease_cnt, &ctx->flc_lease); /* * The check in break_lease() is lockless. It's possible for another * open to race in after we did the earlier check for a conflicting @@ -1666,45 +1703,49 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr */ smp_mb(); error = check_conflicting_open(dentry, arg); - if (error) - goto out_unlink; + if (error) { + locks_unlink_lock_ctx(lease, &ctx->flc_lease_cnt); + goto out; + } out_setup: if (lease->fl_lmops->lm_setup) lease->fl_lmops->lm_setup(lease, priv); out: - spin_unlock(&inode->i_lock); + spin_unlock(&ctx->flc_lock); locks_dispose_list(&dispose); if (is_deleg) mutex_unlock(&inode->i_mutex); - if (!error && !my_before) + if (!error && !my_fl) *flp = NULL; return error; -out_unlink: - locks_unlink_lock(before); - goto out; } static int generic_delete_lease(struct file *filp) { int error = -EAGAIN; - struct file_lock *fl, **before; + struct file_lock *fl, *victim = NULL; struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; + struct file_lock_context *ctx = inode->i_flctx; LIST_HEAD(dispose); - spin_lock(&inode->i_lock); - time_out_leases(inode, &dispose); - for (before = &inode->i_flock; - ((fl = *before) != NULL) && IS_LEASE(fl); - before = &fl->fl_next) { - if (fl->fl_file == filp) + if (!ctx) { + trace_generic_delete_lease(inode, NULL); + return error; + } + + spin_lock(&ctx->flc_lock); + list_for_each_entry(fl, &ctx->flc_lease, fl_list) { + if (fl->fl_file == filp) { + victim = fl; break; + } } trace_generic_delete_lease(inode, fl); - if (fl && IS_LEASE(fl)) - error = fl->fl_lmops->lm_change(before, F_UNLCK, &dispose); - spin_unlock(&inode->i_lock); + if (victim) + error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose); + spin_unlock(&ctx->flc_lock); locks_dispose_list(&dispose); return error; } @@ -2171,7 +2212,7 @@ again: */ /* * we need that spin_lock here - it prevents reordering between - * update of inode->i_flock and check for it done in close(). + * update of i_flctx->flc_posix and check for it done in close(). * rcu_read_lock() wouldn't do. */ spin_lock(¤t->files->file_lock); @@ -2331,13 +2372,14 @@ out: void locks_remove_posix(struct file *filp, fl_owner_t owner) { struct file_lock lock; + struct file_lock_context *ctx = file_inode(filp)->i_flctx; /* * If there are no locks held on this file, we don't need to call * posix_lock_file(). Another process could be setting a lock on this * file at the same time, but we wouldn't remove that lock anyway. */ - if (!file_inode(filp)->i_flock) + if (!ctx || list_empty(&ctx->flc_posix)) return; lock.fl_type = F_UNLCK; @@ -2358,67 +2400,67 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner) EXPORT_SYMBOL(locks_remove_posix); +/* The i_flctx must be valid when calling into here */ +static void +locks_remove_flock(struct file *filp) +{ + struct file_lock fl = { + .fl_owner = filp, + .fl_pid = current->tgid, + .fl_file = filp, + .fl_flags = FL_FLOCK, + .fl_type = F_UNLCK, + .fl_end = OFFSET_MAX, + }; + struct file_lock_context *flctx = file_inode(filp)->i_flctx; + + if (list_empty(&flctx->flc_flock)) + return; + + if (filp->f_op->flock) + filp->f_op->flock(filp, F_SETLKW, &fl); + else + flock_lock_file(filp, &fl); + + if (fl.fl_ops && fl.fl_ops->fl_release_private) + fl.fl_ops->fl_release_private(&fl); +} + +/* The i_flctx must be valid when calling into here */ +static void +locks_remove_lease(struct file *filp) +{ + struct inode *inode = file_inode(filp); + struct file_lock_context *ctx = inode->i_flctx; + struct file_lock *fl, *tmp; + LIST_HEAD(dispose); + + if (list_empty(&ctx->flc_lease)) + return; + + spin_lock(&ctx->flc_lock); + list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) + lease_modify(fl, F_UNLCK, &dispose); + spin_unlock(&ctx->flc_lock); + locks_dispose_list(&dispose); +} + /* * This function is called on the last close of an open file. */ void locks_remove_file(struct file *filp) { - struct inode * inode = file_inode(filp); - struct file_lock *fl; - struct file_lock **before; - LIST_HEAD(dispose); - - if (!inode->i_flock) + if (!file_inode(filp)->i_flctx) return; + /* remove any OFD locks */ locks_remove_posix(filp, filp); - if (filp->f_op->flock) { - struct file_lock fl = { - .fl_owner = filp, - .fl_pid = current->tgid, - .fl_file = filp, - .fl_flags = FL_FLOCK, - .fl_type = F_UNLCK, - .fl_end = OFFSET_MAX, - }; - filp->f_op->flock(filp, F_SETLKW, &fl); - if (fl.fl_ops && fl.fl_ops->fl_release_private) - fl.fl_ops->fl_release_private(&fl); - } + /* remove flock locks */ + locks_remove_flock(filp); - spin_lock(&inode->i_lock); - before = &inode->i_flock; - - while ((fl = *before) != NULL) { - if (fl->fl_file == filp) { - if (IS_LEASE(fl)) { - lease_modify(before, F_UNLCK, &dispose); - continue; - } - - /* - * There's a leftover lock on the list of a type that - * we didn't expect to see. Most likely a classic - * POSIX lock that ended up not getting released - * properly, or that raced onto the list somehow. Log - * some info about it and then just remove it from - * the list. - */ - WARN(!IS_FLOCK(fl), - "leftover lock: dev=%u:%u ino=%lu type=%hhd flags=0x%x start=%lld end=%lld\n", - MAJOR(inode->i_sb->s_dev), - MINOR(inode->i_sb->s_dev), inode->i_ino, - fl->fl_type, fl->fl_flags, - fl->fl_start, fl->fl_end); - - locks_delete_lock(before, &dispose); - continue; - } - before = &fl->fl_next; - } - spin_unlock(&inode->i_lock); - locks_dispose_list(&dispose); + /* remove any leases */ + locks_remove_lease(filp); } /** @@ -2621,6 +2663,9 @@ static int __init filelock_init(void) { int i; + flctx_cache = kmem_cache_create("file_lock_ctx", + sizeof(struct file_lock_context), 0, SLAB_PANIC, NULL); + filelock_cache = kmem_cache_create("file_lock_cache", sizeof(struct file_lock), 0, SLAB_PANIC, NULL); diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 7f3f60641344..8cdb2b28a104 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -85,25 +85,30 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_ { struct inode *inode = state->inode; struct file_lock *fl; + struct file_lock_context *flctx = inode->i_flctx; + struct list_head *list; int status = 0; - if (inode->i_flock == NULL) + if (flctx == NULL) goto out; - /* Protect inode->i_flock using the i_lock */ - spin_lock(&inode->i_lock); - for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { - if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) - continue; + list = &flctx->flc_posix; + spin_lock(&flctx->flc_lock); +restart: + list_for_each_entry(fl, list, fl_list) { if (nfs_file_open_context(fl->fl_file) != ctx) continue; - spin_unlock(&inode->i_lock); + spin_unlock(&flctx->flc_lock); status = nfs4_lock_delegation_recall(fl, state, stateid); if (status < 0) goto out; - spin_lock(&inode->i_lock); + spin_lock(&flctx->flc_lock); } - spin_unlock(&inode->i_lock); + if (list == &flctx->flc_posix) { + list = &flctx->flc_flock; + goto restart; + } + spin_unlock(&flctx->flc_lock); out: return status; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 5194933ed419..a3bb22ab68c5 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1366,49 +1366,55 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ struct nfs_inode *nfsi = NFS_I(inode); struct file_lock *fl; int status = 0; + struct file_lock_context *flctx = inode->i_flctx; + struct list_head *list; - if (inode->i_flock == NULL) + if (flctx == NULL) return 0; + list = &flctx->flc_posix; + /* Guard against delegation returns and new lock/unlock calls */ down_write(&nfsi->rwsem); - /* Protect inode->i_flock using the BKL */ - spin_lock(&inode->i_lock); - for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { - if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) - continue; + spin_lock(&flctx->flc_lock); +restart: + list_for_each_entry(fl, list, fl_list) { if (nfs_file_open_context(fl->fl_file)->state != state) continue; - spin_unlock(&inode->i_lock); + spin_unlock(&flctx->flc_lock); status = ops->recover_lock(state, fl); switch (status) { - case 0: - break; - case -ESTALE: - case -NFS4ERR_ADMIN_REVOKED: - case -NFS4ERR_STALE_STATEID: - case -NFS4ERR_BAD_STATEID: - case -NFS4ERR_EXPIRED: - case -NFS4ERR_NO_GRACE: - case -NFS4ERR_STALE_CLIENTID: - case -NFS4ERR_BADSESSION: - case -NFS4ERR_BADSLOT: - case -NFS4ERR_BAD_HIGH_SLOT: - case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: - goto out; - default: - printk(KERN_ERR "NFS: %s: unhandled error %d\n", - __func__, status); - case -ENOMEM: - case -NFS4ERR_DENIED: - case -NFS4ERR_RECLAIM_BAD: - case -NFS4ERR_RECLAIM_CONFLICT: - /* kill_proc(fl->fl_pid, SIGLOST, 1); */ - status = 0; + case 0: + break; + case -ESTALE: + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_EXPIRED: + case -NFS4ERR_NO_GRACE: + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_BADSESSION: + case -NFS4ERR_BADSLOT: + case -NFS4ERR_BAD_HIGH_SLOT: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + goto out; + default: + pr_err("NFS: %s: unhandled error %d\n", + __func__, status); + case -ENOMEM: + case -NFS4ERR_DENIED: + case -NFS4ERR_RECLAIM_BAD: + case -NFS4ERR_RECLAIM_CONFLICT: + /* kill_proc(fl->fl_pid, SIGLOST, 1); */ + status = 0; } - spin_lock(&inode->i_lock); + spin_lock(&flctx->flc_lock); } - spin_unlock(&inode->i_lock); + if (list == &flctx->flc_posix) { + list = &flctx->flc_flock; + goto restart; + } + spin_unlock(&flctx->flc_lock); out: up_write(&nfsi->rwsem); return status; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 2b5e769beb16..29c7f33c9cf1 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -826,11 +826,15 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, struct nfs_pageio_descriptor *pgio) { size_t size; + struct file_lock_context *flctx; if (prev) { if (!nfs_match_open_context(req->wb_context, prev->wb_context)) return false; - if (req->wb_context->dentry->d_inode->i_flock != NULL && + flctx = req->wb_context->dentry->d_inode->i_flctx; + if (flctx != NULL && + !(list_empty_careful(&flctx->flc_posix) && + list_empty_careful(&flctx->flc_flock)) && !nfs_match_lock_context(req->wb_lock_context, prev->wb_lock_context)) return false; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index af3af685a9e3..4ae66f416eb9 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1091,6 +1091,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page) { struct nfs_open_context *ctx = nfs_file_open_context(file); struct nfs_lock_context *l_ctx; + struct file_lock_context *flctx = file_inode(file)->i_flctx; struct nfs_page *req; int do_flush, status; /* @@ -1109,7 +1110,9 @@ int nfs_flush_incompatible(struct file *file, struct page *page) do_flush = req->wb_page != page || req->wb_context != ctx; /* for now, flush if more than 1 request in page_group */ do_flush |= req->wb_this_page != req; - if (l_ctx && ctx->dentry->d_inode->i_flock != NULL) { + if (l_ctx && flctx && + !(list_empty_careful(&flctx->flc_posix) && + list_empty_careful(&flctx->flc_flock))) { do_flush |= l_ctx->lockowner.l_owner != current->files || l_ctx->lockowner.l_pid != current->tgid; } @@ -1170,6 +1173,13 @@ out: return PageUptodate(page) != 0; } +static bool +is_whole_file_wrlock(struct file_lock *fl) +{ + return fl->fl_start == 0 && fl->fl_end == OFFSET_MAX && + fl->fl_type == F_WRLCK; +} + /* If we know the page is up to date, and we're not using byte range locks (or * if we have the whole file locked for writing), it may be more efficient to * extend the write to cover the entire page in order to avoid fragmentation @@ -1180,17 +1190,36 @@ out: */ static int nfs_can_extend_write(struct file *file, struct page *page, struct inode *inode) { + int ret; + struct file_lock_context *flctx = inode->i_flctx; + struct file_lock *fl; + if (file->f_flags & O_DSYNC) return 0; if (!nfs_write_pageuptodate(page, inode)) return 0; if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) return 1; - if (inode->i_flock == NULL || (inode->i_flock->fl_start == 0 && - inode->i_flock->fl_end == OFFSET_MAX && - inode->i_flock->fl_type != F_RDLCK)) - return 1; - return 0; + if (!flctx || (list_empty_careful(&flctx->flc_flock) && + list_empty_careful(&flctx->flc_posix))) + return 0; + + /* Check to see if there are whole file write locks */ + ret = 0; + spin_lock(&flctx->flc_lock); + if (!list_empty(&flctx->flc_posix)) { + fl = list_first_entry(&flctx->flc_posix, struct file_lock, + fl_list); + if (is_whole_file_wrlock(fl)) + ret = 1; + } else if (!list_empty(&flctx->flc_flock)) { + fl = list_first_entry(&flctx->flc_flock, struct file_lock, + fl_list); + if (fl->fl_type == F_WRLCK) + ret = 1; + } + spin_unlock(&flctx->flc_lock); + return ret; } /* diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c06a1ba80d73..532a60cca2fb 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3477,7 +3477,8 @@ nfsd_break_deleg_cb(struct file_lock *fl) } static int -nfsd_change_deleg_cb(struct file_lock **onlist, int arg, struct list_head *dispose) +nfsd_change_deleg_cb(struct file_lock *onlist, int arg, + struct list_head *dispose) { if (arg & F_UNLCK) return lease_modify(onlist, arg, dispose); @@ -5556,10 +5557,11 @@ out_nfserr: static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) { - struct file_lock **flpp; + struct file_lock *fl; int status = false; struct file *filp = find_any_file(fp); struct inode *inode; + struct file_lock_context *flctx; if (!filp) { /* Any valid lock stateid should have some sort of access */ @@ -5568,15 +5570,18 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) } inode = file_inode(filp); + flctx = inode->i_flctx; - spin_lock(&inode->i_lock); - for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) { - if ((*flpp)->fl_owner == (fl_owner_t)lowner) { - status = true; - break; + if (flctx && !list_empty_careful(&flctx->flc_posix)) { + spin_lock(&flctx->flc_lock); + list_for_each_entry(fl, &flctx->flc_posix, fl_list) { + if (fl->fl_owner == (fl_owner_t)lowner) { + status = true; + break; + } } + spin_unlock(&flctx->flc_lock); } - spin_unlock(&inode->i_lock); fput(filp); return status; } diff --git a/fs/read_write.c b/fs/read_write.c index c0805c93b6fa..4060691e78f7 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -358,7 +358,7 @@ int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t return retval; } - if (unlikely(inode->i_flock && mandatory_lock(inode))) { + if (unlikely(inode->i_flctx && mandatory_lock(inode))) { retval = locks_mandatory_area( read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, inode, file, pos, count); diff --git a/include/linux/fs.h b/include/linux/fs.h index 42efe13077b6..ddd2fa7cefd3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -625,7 +625,7 @@ struct inode { atomic_t i_readcount; /* struct files open RO */ #endif const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ - struct file_lock *i_flock; + struct file_lock_context *i_flctx; struct address_space i_data; struct list_head i_devices; union { @@ -885,6 +885,8 @@ static inline struct file *get_file(struct file *f) /* legacy typedef, should eventually be removed */ typedef void *fl_owner_t; +struct file_lock; + struct file_lock_operations { void (*fl_copy_lock)(struct file_lock *, struct file_lock *); void (*fl_release_private)(struct file_lock *); @@ -898,7 +900,7 @@ struct lock_manager_operations { void (*lm_notify)(struct file_lock *); /* unblock callback */ int (*lm_grant)(struct file_lock *, int); bool (*lm_break)(struct file_lock *); - int (*lm_change)(struct file_lock **, int, struct list_head *); + int (*lm_change)(struct file_lock *, int, struct list_head *); void (*lm_setup)(struct file_lock *, void **); }; @@ -923,17 +925,17 @@ int locks_in_grace(struct net *); * FIXME: should we create a separate "struct lock_request" to help distinguish * these two uses? * - * The i_flock list is ordered by: + * The varous i_flctx lists are ordered by: * - * 1) lock type -- FL_LEASEs first, then FL_FLOCK, and finally FL_POSIX - * 2) lock owner - * 3) lock range start - * 4) lock range end + * 1) lock owner + * 2) lock range start + * 3) lock range end * * Obviously, the last two criteria only matter for POSIX locks. */ struct file_lock { struct file_lock *fl_next; /* singly linked list for this inode */ + struct list_head fl_list; /* link into file_lock_context */ struct hlist_node fl_link; /* node in global lists */ struct list_head fl_block; /* circular list of blocked processes */ fl_owner_t fl_owner; @@ -964,6 +966,16 @@ struct file_lock { } fl_u; }; +struct file_lock_context { + spinlock_t flc_lock; + struct list_head flc_flock; + struct list_head flc_posix; + struct list_head flc_lease; + int flc_flock_cnt; + int flc_posix_cnt; + int flc_lease_cnt; +}; + /* The following constant reflects the upper bound of the file/locking space */ #ifndef OFFSET_MAX #define INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1))) @@ -990,6 +1002,7 @@ extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg); extern int fcntl_getlease(struct file *filp); /* fs/locks.c */ +void locks_free_lock_context(struct file_lock_context *ctx); void locks_free_lock(struct file_lock *fl); extern void locks_init_lock(struct file_lock *); extern struct file_lock * locks_alloc_lock(void); @@ -1010,7 +1023,7 @@ extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int t extern void lease_get_mtime(struct inode *, struct timespec *time); extern int generic_setlease(struct file *, long, struct file_lock **, void **priv); extern int vfs_setlease(struct file *, long, struct file_lock **, void **); -extern int lease_modify(struct file_lock **, int, struct list_head *); +extern int lease_modify(struct file_lock *, int, struct list_head *); #else /* !CONFIG_FILE_LOCKING */ static inline int fcntl_getlk(struct file *file, unsigned int cmd, struct flock __user *user) @@ -1047,6 +1060,11 @@ static inline int fcntl_getlease(struct file *filp) return F_UNLCK; } +static inline void +locks_free_lock_context(struct file_lock_context *ctx) +{ +} + static inline void locks_init_lock(struct file_lock *fl) { return; @@ -1137,7 +1155,7 @@ static inline int vfs_setlease(struct file *filp, long arg, return -EINVAL; } -static inline int lease_modify(struct file_lock **before, int arg, +static inline int lease_modify(struct file_lock *fl, int arg, struct list_head *dispose) { return -EINVAL; @@ -1959,7 +1977,7 @@ static inline int locks_verify_truncate(struct inode *inode, struct file *filp, loff_t size) { - if (inode->i_flock && mandatory_lock(inode)) + if (inode->i_flctx && mandatory_lock(inode)) return locks_mandatory_area( FLOCK_VERIFY_WRITE, inode, filp, size < inode->i_size ? size : inode->i_size, @@ -1973,11 +1991,12 @@ static inline int break_lease(struct inode *inode, unsigned int mode) { /* * Since this check is lockless, we must ensure that any refcounts - * taken are done before checking inode->i_flock. Otherwise, we could - * end up racing with tasks trying to set a new lease on this file. + * taken are done before checking i_flctx->flc_lease. Otherwise, we + * could end up racing with tasks trying to set a new lease on this + * file. */ smp_mb(); - if (inode->i_flock) + if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease)) return __break_lease(inode, mode, FL_LEASE); return 0; } @@ -1986,11 +2005,12 @@ static inline int break_deleg(struct inode *inode, unsigned int mode) { /* * Since this check is lockless, we must ensure that any refcounts - * taken are done before checking inode->i_flock. Otherwise, we could - * end up racing with tasks trying to set a new lease on this file. + * taken are done before checking i_flctx->flc_lease. Otherwise, we + * could end up racing with tasks trying to set a new lease on this + * file. */ smp_mb(); - if (inode->i_flock) + if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease)) return __break_lease(inode, mode, FL_DELEG); return 0; }