File locking related changes for v4.5 (pile #1)

-----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJWkwhdAAoJEAAOaEEZVoIVgpUQAMhB2+ryZtlJy4s7lkfI3Wwi
 ni7lAuJ6xXB0FIA8wqNzz6fVDY0pbsfwR45OS11fh+hU2FnM8REHCDPC47E8MQYx
 ft0Kfp7Z0tLAPni7XTVd/gFy8zTDGOKXBlu44PNaVEdtPJzIXwVzm2QkT7F3ExOz
 mkXSCta7lFemBQ0DhbafiWbfQ8yav1HFGZG7XN06A76y8ZET+Uu1oyiPPI4jvHlO
 vHrxpwia2ROnQHeG0pLR7KvOmN3ZSTJZuH6LiMZH0QFqyocYzmhR9rQ/hrxBg0rU
 IDzcMjP0ybU9Fu/o7sDShnkTawRuVLt0zasfdlYtGVCTYBx8f7WqkJnLTCwWYVDG
 MLQM7y8xWHM1f7uLhgT8WHg8O/e5saVUQ/djBqPI/ubGG1/LHDxyxH/GPVbeKa66
 G8jChyPmIdxdsjIapzefOjnTIi2vhZqv9I1gSKCj+x554GahoYQe7l0YbNnZGmNS
 O12QQ7dUpkzgDQEiTh73S3Ay2Ng95K2DztuHs6NXFdbiwpFMZqVATLXBEOYryBx/
 n487ZqrsTV7T3jH/ekxth1+j0Hpmigj8FNy21/nZ0Nr0OaTJFwsLEdN4Vi7LIM+H
 jBMEBk5dGIHODMvB/8NCud0eWzB671iLgVto7or/rT1YmaFapl/KR7FEWNv19sLN
 tshSViTosLGffQMpObOk
 =wJUS
 -----END PGP SIGNATURE-----

Merge tag 'locks-v4.5-1' of git://git.samba.org/jlayton/linux

Pull file locking updates from Jeff Layton:
 "File locking related changes for v4.5 (pile #1)

  Highlights:
   - new Kconfig option to allow disabling mandatory locking (which is
     racy anyway)
   - new tracepoints for setlk and close codepaths
   - fix for a long-standing bug in code that handles races between
     setting a POSIX lock and close()"

* tag 'locks-v4.5-1' of git://git.samba.org/jlayton/linux:
  locks: rename __posix_lock_file to posix_lock_inode
  locks: prink more detail when there are leaked locks
  locks: pass inode pointer to locks_free_lock_context
  locks: sprinkle some tracepoints around the file locking code
  locks: don't check for race with close when setting OFD lock
  locks: fix unlock when fcntl_setlk races with a close
  fs: make locks.c explicitly non-modular
  locks: use list_first_entry_or_null()
  locks: Don't allow mounts in user namespaces to enable mandatory locking
  locks: Allow disabling mandatory locking at compile time
This commit is contained in:
Linus Torvalds 2016-01-12 15:46:17 -08:00
commit 065019a38f
6 changed files with 233 additions and 84 deletions

View File

@ -73,6 +73,16 @@ config FILE_LOCKING
for filesystems like NFS and for the flock() system
call. Disabling this option saves about 11k.
config MANDATORY_FILE_LOCKING
bool "Enable Mandatory file locking"
depends on FILE_LOCKING
default y
help
This option enables files appropriately marked files on appropriely
mounted filesystems to support mandatory locking.
To the best of my knowledge this is dead code that no one cares about.
source "fs/notify/Kconfig"
source "fs/quota/Kconfig"

View File

@ -225,7 +225,7 @@ void __destroy_inode(struct inode *inode)
inode_detach_wb(inode);
security_inode_free(inode);
fsnotify_inode_delete(inode);
locks_free_lock_context(inode->i_flctx);
locks_free_lock_context(inode);
if (!inode->i_nlink) {
WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0);
atomic_long_dec(&inode->i_sb->s_remove_count);

View File

@ -119,7 +119,6 @@
#include <linux/fdtable.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/security.h>
#include <linux/slab.h>
#include <linux/syscalls.h>
@ -230,16 +229,44 @@ locks_get_lock_context(struct inode *inode, int type)
ctx = smp_load_acquire(&inode->i_flctx);
}
out:
trace_locks_get_lock_context(inode, type, ctx);
return ctx;
}
void
locks_free_lock_context(struct file_lock_context *ctx)
static void
locks_dump_ctx_list(struct list_head *list, char *list_type)
{
if (ctx) {
WARN_ON_ONCE(!list_empty(&ctx->flc_flock));
WARN_ON_ONCE(!list_empty(&ctx->flc_posix));
WARN_ON_ONCE(!list_empty(&ctx->flc_lease));
struct file_lock *fl;
list_for_each_entry(fl, list, fl_list) {
pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n", list_type, fl->fl_owner, fl->fl_flags, fl->fl_type, fl->fl_pid);
}
}
static void
locks_check_ctx_lists(struct inode *inode)
{
struct file_lock_context *ctx = inode->i_flctx;
if (unlikely(!list_empty(&ctx->flc_flock) ||
!list_empty(&ctx->flc_posix) ||
!list_empty(&ctx->flc_lease))) {
pr_warn("Leaked locks on dev=0x%x:0x%x ino=0x%lx:\n",
MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev),
inode->i_ino);
locks_dump_ctx_list(&ctx->flc_flock, "FLOCK");
locks_dump_ctx_list(&ctx->flc_posix, "POSIX");
locks_dump_ctx_list(&ctx->flc_lease, "LEASE");
}
}
void
locks_free_lock_context(struct inode *inode)
{
struct file_lock_context *ctx = inode->i_flctx;
if (unlikely(ctx)) {
locks_check_ctx_lists(inode);
kmem_cache_free(flctx_cache, ctx);
}
}
@ -934,7 +961,8 @@ out:
return error;
}
static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
static int posix_lock_inode(struct inode *inode, struct file_lock *request,
struct file_lock *conflock)
{
struct file_lock *fl, *tmp;
struct file_lock *new_fl = NULL;
@ -1142,6 +1170,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
if (new_fl2)
locks_free_lock(new_fl2);
locks_dispose_list(&dispose);
trace_posix_lock_inode(inode, request, error);
return error;
}
@ -1162,7 +1192,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
int posix_lock_file(struct file *filp, struct file_lock *fl,
struct file_lock *conflock)
{
return __posix_lock_file(file_inode(filp), fl, conflock);
return posix_lock_inode(file_inode(filp), fl, conflock);
}
EXPORT_SYMBOL(posix_lock_file);
@ -1178,7 +1208,7 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
int error;
might_sleep ();
for (;;) {
error = __posix_lock_file(inode, fl, NULL);
error = posix_lock_inode(inode, fl, NULL);
if (error != FILE_LOCK_DEFERRED)
break;
error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
@ -1191,6 +1221,7 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
return error;
}
#ifdef CONFIG_MANDATORY_FILE_LOCKING
/**
* locks_mandatory_locked - Check for an active lock
* @file: the file to check
@ -1260,7 +1291,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
if (filp) {
fl.fl_owner = filp;
fl.fl_flags &= ~FL_SLEEP;
error = __posix_lock_file(inode, &fl, NULL);
error = posix_lock_inode(inode, &fl, NULL);
if (!error)
break;
}
@ -1268,7 +1299,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
if (sleep)
fl.fl_flags |= FL_SLEEP;
fl.fl_owner = current->files;
error = __posix_lock_file(inode, &fl, NULL);
error = posix_lock_inode(inode, &fl, NULL);
if (error != FILE_LOCK_DEFERRED)
break;
error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
@ -1289,6 +1320,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
}
EXPORT_SYMBOL(locks_mandatory_area);
#endif /* CONFIG_MANDATORY_FILE_LOCKING */
static void lease_clear_pending(struct file_lock *fl, int arg)
{
@ -1503,12 +1535,10 @@ void lease_get_mtime(struct inode *inode, struct timespec *time)
ctx = smp_load_acquire(&inode->i_flctx);
if (ctx && !list_empty_careful(&ctx->flc_lease)) {
spin_lock(&ctx->flc_lock);
if (!list_empty(&ctx->flc_lease)) {
fl = list_first_entry(&ctx->flc_lease,
struct file_lock, fl_list);
if (fl->fl_type == F_WRLCK)
has_lease = true;
}
fl = list_first_entry_or_null(&ctx->flc_lease,
struct file_lock, fl_list);
if (fl && (fl->fl_type == F_WRLCK))
has_lease = true;
spin_unlock(&ctx->flc_lock);
}
@ -2165,6 +2195,8 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
if (file_lock == NULL)
return -ENOLCK;
inode = file_inode(filp);
/*
* This might block, so we do it before checking the inode.
*/
@ -2172,8 +2204,6 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
if (copy_from_user(&flock, l, sizeof(flock)))
goto out;
inode = file_inode(filp);
/* Don't allow mandatory locks on files that may be memory mapped
* and shared.
*/
@ -2182,7 +2212,6 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
goto out;
}
again:
error = flock_to_posix_lock(filp, file_lock, &flock);
if (error)
goto out;
@ -2221,23 +2250,29 @@ again:
error = do_lock_file_wait(filp, cmd, file_lock);
/*
* Attempt to detect a close/fcntl race and recover by
* releasing the lock that was just acquired.
* Attempt to detect a close/fcntl race and recover by releasing the
* lock that was just acquired. There is no need to do that when we're
* unlocking though, or for OFD locks.
*/
/*
* we need that spin_lock here - it prevents reordering between
* update of i_flctx->flc_posix and check for it done in close().
* rcu_read_lock() wouldn't do.
*/
spin_lock(&current->files->file_lock);
f = fcheck(fd);
spin_unlock(&current->files->file_lock);
if (!error && f != filp && flock.l_type != F_UNLCK) {
flock.l_type = F_UNLCK;
goto again;
if (!error && file_lock->fl_type != F_UNLCK &&
!(file_lock->fl_flags & FL_OFDLCK)) {
/*
* We need that spin_lock here - it prevents reordering between
* update of i_flctx->flc_posix and check for it done in
* close(). rcu_read_lock() wouldn't do.
*/
spin_lock(&current->files->file_lock);
f = fcheck(fd);
spin_unlock(&current->files->file_lock);
if (f != filp) {
file_lock->fl_type = F_UNLCK;
error = do_lock_file_wait(filp, cmd, file_lock);
WARN_ON_ONCE(error);
error = -EBADF;
}
}
out:
trace_fcntl_setlk(inode, file_lock, error);
locks_free_lock(file_lock);
return error;
}
@ -2322,7 +2357,6 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
goto out;
}
again:
error = flock64_to_posix_lock(filp, file_lock, &flock);
if (error)
goto out;
@ -2361,17 +2395,27 @@ again:
error = do_lock_file_wait(filp, cmd, file_lock);
/*
* Attempt to detect a close/fcntl race and recover by
* releasing the lock that was just acquired.
* Attempt to detect a close/fcntl race and recover by releasing the
* lock that was just acquired. There is no need to do that when we're
* unlocking though, or for OFD locks.
*/
spin_lock(&current->files->file_lock);
f = fcheck(fd);
spin_unlock(&current->files->file_lock);
if (!error && f != filp && flock.l_type != F_UNLCK) {
flock.l_type = F_UNLCK;
goto again;
if (!error && file_lock->fl_type != F_UNLCK &&
!(file_lock->fl_flags & FL_OFDLCK)) {
/*
* We need that spin_lock here - it prevents reordering between
* update of i_flctx->flc_posix and check for it done in
* close(). rcu_read_lock() wouldn't do.
*/
spin_lock(&current->files->file_lock);
f = fcheck(fd);
spin_unlock(&current->files->file_lock);
if (f != filp) {
file_lock->fl_type = F_UNLCK;
error = do_lock_file_wait(filp, cmd, file_lock);
WARN_ON_ONCE(error);
error = -EBADF;
}
}
out:
locks_free_lock(file_lock);
return error;
@ -2385,6 +2429,7 @@ out:
*/
void locks_remove_posix(struct file *filp, fl_owner_t owner)
{
int error;
struct file_lock lock;
struct file_lock_context *ctx;
@ -2407,10 +2452,11 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
lock.fl_ops = NULL;
lock.fl_lmops = NULL;
vfs_lock_file(filp, F_SETLK, &lock, NULL);
error = vfs_lock_file(filp, F_SETLK, &lock, NULL);
if (lock.fl_ops && lock.fl_ops->fl_release_private)
lock.fl_ops->fl_release_private(&lock);
trace_locks_remove_posix(file_inode(filp), &lock, error);
}
EXPORT_SYMBOL(locks_remove_posix);
@ -2706,7 +2752,7 @@ static int __init proc_locks_init(void)
proc_create("locks", 0, NULL, &proc_locks_operations);
return 0;
}
module_init(proc_locks_init);
fs_initcall(proc_locks_init);
#endif
static int __init filelock_init(void)

View File

@ -1584,6 +1584,14 @@ static inline bool may_mount(void)
return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
}
static inline bool may_mandlock(void)
{
#ifndef CONFIG_MANDATORY_FILE_LOCKING
return false;
#endif
return capable(CAP_SYS_ADMIN);
}
/*
* Now umount can handle mount points as well as block devices.
* This is important for filesystems which use unnamed block devices.
@ -2677,6 +2685,8 @@ long do_mount(const char *dev_name, const char __user *dir_name,
type_page, flags, data_page);
if (!retval && !may_mount())
retval = -EPERM;
if (!retval && (flags & MS_MANDLOCK) && !may_mandlock())
retval = -EPERM;
if (retval)
goto dput_out;

View File

@ -1043,7 +1043,7 @@ extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg);
extern int fcntl_getlease(struct file *filp);
/* fs/locks.c */
void locks_free_lock_context(struct file_lock_context *ctx);
void locks_free_lock_context(struct inode *inode);
void locks_free_lock(struct file_lock *fl);
extern void locks_init_lock(struct file_lock *);
extern struct file_lock * locks_alloc_lock(void);
@ -1104,7 +1104,7 @@ static inline int fcntl_getlease(struct file *filp)
}
static inline void
locks_free_lock_context(struct file_lock_context *ctx)
locks_free_lock_context(struct inode *inode)
{
}
@ -2030,7 +2030,7 @@ extern struct kobject *fs_kobj;
#define FLOCK_VERIFY_READ 1
#define FLOCK_VERIFY_WRITE 2
#ifdef CONFIG_FILE_LOCKING
#ifdef CONFIG_MANDATORY_FILE_LOCKING
extern int locks_mandatory_locked(struct file *);
extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t);
@ -2075,6 +2075,45 @@ static inline int locks_verify_truncate(struct inode *inode,
return 0;
}
#else /* !CONFIG_MANDATORY_FILE_LOCKING */
static inline int locks_mandatory_locked(struct file *file)
{
return 0;
}
static inline int locks_mandatory_area(int rw, struct inode *inode,
struct file *filp, loff_t offset,
size_t count)
{
return 0;
}
static inline int __mandatory_lock(struct inode *inode)
{
return 0;
}
static inline int mandatory_lock(struct inode *inode)
{
return 0;
}
static inline int locks_verify_locked(struct file *file)
{
return 0;
}
static inline int locks_verify_truncate(struct inode *inode, struct file *filp,
size_t size)
{
return 0;
}
#endif /* CONFIG_MANDATORY_FILE_LOCKING */
#ifdef CONFIG_FILE_LOCKING
static inline int break_lease(struct inode *inode, unsigned int mode)
{
/*
@ -2136,39 +2175,6 @@ static inline int break_layout(struct inode *inode, bool wait)
}
#else /* !CONFIG_FILE_LOCKING */
static inline int locks_mandatory_locked(struct file *file)
{
return 0;
}
static inline int locks_mandatory_area(int rw, struct inode *inode,
struct file *filp, loff_t offset,
size_t count)
{
return 0;
}
static inline int __mandatory_lock(struct inode *inode)
{
return 0;
}
static inline int mandatory_lock(struct inode *inode)
{
return 0;
}
static inline int locks_verify_locked(struct file *file)
{
return 0;
}
static inline int locks_verify_truncate(struct inode *inode, struct file *filp,
size_t size)
{
return 0;
}
static inline int break_lease(struct inode *inode, unsigned int mode)
{
return 0;

View File

@ -34,6 +34,83 @@
{ F_WRLCK, "F_WRLCK" }, \
{ F_UNLCK, "F_UNLCK" })
TRACE_EVENT(locks_get_lock_context,
TP_PROTO(struct inode *inode, int type, struct file_lock_context *ctx),
TP_ARGS(inode, type, ctx),
TP_STRUCT__entry(
__field(unsigned long, i_ino)
__field(dev_t, s_dev)
__field(unsigned char, type)
__field(struct file_lock_context *, ctx)
),
TP_fast_assign(
__entry->s_dev = inode->i_sb->s_dev;
__entry->i_ino = inode->i_ino;
__entry->type = type;
__entry->ctx = ctx;
),
TP_printk("dev=0x%x:0x%x ino=0x%lx type=%s ctx=%p",
MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
__entry->i_ino, show_fl_type(__entry->type), __entry->ctx)
);
DECLARE_EVENT_CLASS(filelock_lock,
TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
TP_ARGS(inode, fl, ret),
TP_STRUCT__entry(
__field(struct file_lock *, fl)
__field(unsigned long, i_ino)
__field(dev_t, s_dev)
__field(struct file_lock *, fl_next)
__field(fl_owner_t, fl_owner)
__field(unsigned int, fl_pid)
__field(unsigned int, fl_flags)
__field(unsigned char, fl_type)
__field(loff_t, fl_start)
__field(loff_t, fl_end)
__field(int, ret)
),
TP_fast_assign(
__entry->fl = fl ? fl : NULL;
__entry->s_dev = inode->i_sb->s_dev;
__entry->i_ino = inode->i_ino;
__entry->fl_next = fl ? fl->fl_next : NULL;
__entry->fl_owner = fl ? fl->fl_owner : NULL;
__entry->fl_pid = fl ? fl->fl_pid : 0;
__entry->fl_flags = fl ? fl->fl_flags : 0;
__entry->fl_type = fl ? fl->fl_type : 0;
__entry->fl_start = fl ? fl->fl_start : 0;
__entry->fl_end = fl ? fl->fl_end : 0;
__entry->ret = ret;
),
TP_printk("fl=0x%p dev=0x%x:0x%x ino=0x%lx fl_next=0x%p fl_owner=0x%p fl_pid=%u fl_flags=%s fl_type=%s fl_start=%lld fl_end=%lld ret=%d",
__entry->fl, MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
__entry->i_ino, __entry->fl_next, __entry->fl_owner,
__entry->fl_pid, show_fl_flags(__entry->fl_flags),
show_fl_type(__entry->fl_type),
__entry->fl_start, __entry->fl_end, __entry->ret)
);
DEFINE_EVENT(filelock_lock, posix_lock_inode,
TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
TP_ARGS(inode, fl, ret));
DEFINE_EVENT(filelock_lock, fcntl_setlk,
TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
TP_ARGS(inode, fl, ret));
DEFINE_EVENT(filelock_lock, locks_remove_posix,
TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
TP_ARGS(inode, fl, ret));
DECLARE_EVENT_CLASS(filelock_lease,
TP_PROTO(struct inode *inode, struct file_lock *fl),