Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs

Pull filesystem fixes from Jan Kara:
 "Notification, writeback, udf, quota fixes

  The notification patches are (with one exception) a fallout of my
  fsnotify rework which went into -rc1 (I've extented LTP to cover these
  cornercases to avoid similar breakage in future).

  The UDF patch is a nasty data corruption Al has recently reported,
  the revert of the writeback patch is due to possibility of violating
  sync(2) guarantees, and a quota bug can lead to corruption of quota
  files in ocfs2"

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs:
  fsnotify: Allocate overflow events with proper type
  fanotify: Handle overflow in case of permission events
  fsnotify: Fix detection whether overflow event is queued
  Revert "writeback: do not sync data dirtied after sync start"
  quota: Fix race between dqput() and dquot_scan_active()
  udf: Fix data corruption on file type conversion
  inotify: Fix reporting of cookies for inotify events
This commit is contained in:
Linus Torvalds 2014-02-27 10:37:22 -08:00
commit 8d7531825c
20 changed files with 108 additions and 59 deletions

View File

@ -40,18 +40,13 @@
struct wb_writeback_work {
long nr_pages;
struct super_block *sb;
/*
* Write only inodes dirtied before this time. Don't forget to set
* older_than_this_is_set when you set this.
*/
unsigned long older_than_this;
unsigned long *older_than_this;
enum writeback_sync_modes sync_mode;
unsigned int tagged_writepages:1;
unsigned int for_kupdate:1;
unsigned int range_cyclic:1;
unsigned int for_background:1;
unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */
unsigned int older_than_this_is_set:1;
enum wb_reason reason; /* why was writeback initiated? */
struct list_head list; /* pending work list */
@ -252,10 +247,10 @@ static int move_expired_inodes(struct list_head *delaying_queue,
int do_sb_sort = 0;
int moved = 0;
WARN_ON_ONCE(!work->older_than_this_is_set);
while (!list_empty(delaying_queue)) {
inode = wb_inode(delaying_queue->prev);
if (inode_dirtied_after(inode, work->older_than_this))
if (work->older_than_this &&
inode_dirtied_after(inode, *work->older_than_this))
break;
list_move(&inode->i_wb_list, &tmp);
moved++;
@ -742,8 +737,6 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
.sync_mode = WB_SYNC_NONE,
.range_cyclic = 1,
.reason = reason,
.older_than_this = jiffies,
.older_than_this_is_set = 1,
};
spin_lock(&wb->list_lock);
@ -802,13 +795,12 @@ static long wb_writeback(struct bdi_writeback *wb,
{
unsigned long wb_start = jiffies;
long nr_pages = work->nr_pages;
unsigned long oldest_jif;
struct inode *inode;
long progress;
if (!work->older_than_this_is_set) {
work->older_than_this = jiffies;
work->older_than_this_is_set = 1;
}
oldest_jif = jiffies;
work->older_than_this = &oldest_jif;
spin_lock(&wb->list_lock);
for (;;) {
@ -842,10 +834,10 @@ static long wb_writeback(struct bdi_writeback *wb,
* safe.
*/
if (work->for_kupdate) {
work->older_than_this = jiffies -
oldest_jif = jiffies -
msecs_to_jiffies(dirty_expire_interval * 10);
} else if (work->for_background)
work->older_than_this = jiffies;
oldest_jif = jiffies;
trace_writeback_start(wb->bdi, work);
if (list_empty(&wb->b_io))
@ -1357,21 +1349,18 @@ EXPORT_SYMBOL(try_to_writeback_inodes_sb);
/**
* sync_inodes_sb - sync sb inode pages
* @sb: the superblock
* @older_than_this: timestamp
* @sb: the superblock
*
* This function writes and waits on any dirty inode belonging to this
* superblock that has been dirtied before given timestamp.
* super_block.
*/
void sync_inodes_sb(struct super_block *sb, unsigned long older_than_this)
void sync_inodes_sb(struct super_block *sb)
{
DECLARE_COMPLETION_ONSTACK(done);
struct wb_writeback_work work = {
.sb = sb,
.sync_mode = WB_SYNC_ALL,
.nr_pages = LONG_MAX,
.older_than_this = older_than_this,
.older_than_this_is_set = 1,
.range_cyclic = 0,
.done = &done,
.reason = WB_REASON_SYNC,

View File

@ -86,7 +86,7 @@ static int dnotify_handle_event(struct fsnotify_group *group,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
u32 mask, void *data, int data_type,
const unsigned char *file_name)
const unsigned char *file_name, u32 cookie)
{
struct dnotify_mark *dn_mark;
struct dnotify_struct *dn;

View File

@ -147,7 +147,7 @@ static int fanotify_handle_event(struct fsnotify_group *group,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *fanotify_mark,
u32 mask, void *data, int data_type,
const unsigned char *file_name)
const unsigned char *file_name, u32 cookie)
{
int ret = 0;
struct fanotify_event_info *event;
@ -192,10 +192,12 @@ static int fanotify_handle_event(struct fsnotify_group *group,
ret = fsnotify_add_notify_event(group, fsn_event, fanotify_merge);
if (ret) {
BUG_ON(mask & FAN_ALL_PERM_EVENTS);
/* Permission events shouldn't be merged */
BUG_ON(ret == 1 && mask & FAN_ALL_PERM_EVENTS);
/* Our event wasn't used in the end. Free it. */
fsnotify_destroy_event(group, fsn_event);
ret = 0;
return 0;
}
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS

View File

@ -698,6 +698,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
struct fsnotify_group *group;
int f_flags, fd;
struct user_struct *user;
struct fanotify_event_info *oevent;
pr_debug("%s: flags=%d event_f_flags=%d\n",
__func__, flags, event_f_flags);
@ -730,8 +731,20 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
group->fanotify_data.user = user;
atomic_inc(&user->fanotify_listeners);
oevent = kmem_cache_alloc(fanotify_event_cachep, GFP_KERNEL);
if (unlikely(!oevent)) {
fd = -ENOMEM;
goto out_destroy_group;
}
group->overflow_event = &oevent->fse;
fsnotify_init_event(group->overflow_event, NULL, FS_Q_OVERFLOW);
oevent->tgid = get_pid(task_tgid(current));
oevent->path.mnt = NULL;
oevent->path.dentry = NULL;
group->fanotify_data.f_flags = event_f_flags;
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
oevent->response = 0;
mutex_init(&group->fanotify_data.access_mutex);
init_waitqueue_head(&group->fanotify_data.access_waitq);
INIT_LIST_HEAD(&group->fanotify_data.access_list);

View File

@ -179,7 +179,7 @@ static int send_to_group(struct inode *to_tell,
return group->ops->handle_event(group, to_tell, inode_mark,
vfsmount_mark, mask, data, data_is,
file_name);
file_name, cookie);
}
/*

View File

@ -55,6 +55,13 @@ void fsnotify_destroy_group(struct fsnotify_group *group)
/* clear the notification queue of all events */
fsnotify_flush_notify(group);
/*
* Destroy overflow event (we cannot use fsnotify_destroy_event() as
* that deliberately ignores overflow events.
*/
if (group->overflow_event)
group->ops->free_event(group->overflow_event);
fsnotify_put_group(group);
}
@ -99,7 +106,6 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
INIT_LIST_HEAD(&group->marks_list);
group->ops = ops;
fsnotify_init_event(&group->overflow_event, NULL, FS_Q_OVERFLOW);
return group;
}

View File

@ -27,6 +27,6 @@ extern int inotify_handle_event(struct fsnotify_group *group,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
u32 mask, void *data, int data_type,
const unsigned char *file_name);
const unsigned char *file_name, u32 cookie);
extern const struct fsnotify_ops inotify_fsnotify_ops;

View File

@ -67,7 +67,7 @@ int inotify_handle_event(struct fsnotify_group *group,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
u32 mask, void *data, int data_type,
const unsigned char *file_name)
const unsigned char *file_name, u32 cookie)
{
struct inotify_inode_mark *i_mark;
struct inotify_event_info *event;
@ -103,6 +103,7 @@ int inotify_handle_event(struct fsnotify_group *group,
fsn_event = &event->fse;
fsnotify_init_event(fsn_event, inode, mask);
event->wd = i_mark->wd;
event->sync_cookie = cookie;
event->name_len = len;
if (len)
strcpy(event->name, file_name);

View File

@ -495,7 +495,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
/* Queue ignore event for the watch */
inotify_handle_event(group, NULL, fsn_mark, NULL, FS_IN_IGNORED,
NULL, FSNOTIFY_EVENT_NONE, NULL);
NULL, FSNOTIFY_EVENT_NONE, NULL, 0);
i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
/* remove this mark from the idr */
@ -633,11 +633,23 @@ static int inotify_update_watch(struct fsnotify_group *group, struct inode *inod
static struct fsnotify_group *inotify_new_group(unsigned int max_events)
{
struct fsnotify_group *group;
struct inotify_event_info *oevent;
group = fsnotify_alloc_group(&inotify_fsnotify_ops);
if (IS_ERR(group))
return group;
oevent = kmalloc(sizeof(struct inotify_event_info), GFP_KERNEL);
if (unlikely(!oevent)) {
fsnotify_destroy_group(group);
return ERR_PTR(-ENOMEM);
}
group->overflow_event = &oevent->fse;
fsnotify_init_event(group->overflow_event, NULL, FS_Q_OVERFLOW);
oevent->wd = -1;
oevent->sync_cookie = 0;
oevent->name_len = 0;
group->max_events = max_events;
spin_lock_init(&group->inotify_data.idr_lock);

View File

@ -80,7 +80,8 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
/*
* Add an event to the group notification queue. The group can later pull this
* event off the queue to deal with. The function returns 0 if the event was
* added to the queue, 1 if the event was merged with some other queued event.
* added to the queue, 1 if the event was merged with some other queued event,
* 2 if the queue of events has overflown.
*/
int fsnotify_add_notify_event(struct fsnotify_group *group,
struct fsnotify_event *event,
@ -95,10 +96,14 @@ int fsnotify_add_notify_event(struct fsnotify_group *group,
mutex_lock(&group->notification_mutex);
if (group->q_len >= group->max_events) {
ret = 2;
/* Queue overflow event only if it isn't already queued */
if (list_empty(&group->overflow_event.list))
event = &group->overflow_event;
ret = 1;
if (!list_empty(&group->overflow_event->list)) {
mutex_unlock(&group->notification_mutex);
return ret;
}
event = group->overflow_event;
goto queue;
}
if (!list_empty(list) && merge) {
@ -109,6 +114,7 @@ int fsnotify_add_notify_event(struct fsnotify_group *group,
}
}
queue:
group->q_len++;
list_add_tail(&event->list, list);
mutex_unlock(&group->notification_mutex);
@ -132,7 +138,11 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group
event = list_first_entry(&group->notification_list,
struct fsnotify_event, list);
list_del(&event->list);
/*
* We need to init list head for the case of overflow event so that
* check in fsnotify_add_notify_events() works
*/
list_del_init(&event->list);
group->q_len--;
return event;

View File

@ -581,9 +581,17 @@ int dquot_scan_active(struct super_block *sb,
dqstats_inc(DQST_LOOKUPS);
dqput(old_dquot);
old_dquot = dquot;
ret = fn(dquot, priv);
if (ret < 0)
goto out;
/*
* ->release_dquot() can be racing with us. Our reference
* protects us from new calls to it so just wait for any
* outstanding call and recheck the DQ_ACTIVE_B after that.
*/
wait_on_dquot(dquot);
if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) {
ret = fn(dquot, priv);
if (ret < 0)
goto out;
}
spin_lock(&dq_list_lock);
/* We are safe to continue now because our dquot could not
* be moved out of the inuse list while we hold the reference */

View File

@ -27,11 +27,10 @@
* wait == 1 case since in that case write_inode() functions do
* sync_dirty_buffer() and thus effectively write one block at a time.
*/
static int __sync_filesystem(struct super_block *sb, int wait,
unsigned long start)
static int __sync_filesystem(struct super_block *sb, int wait)
{
if (wait)
sync_inodes_sb(sb, start);
sync_inodes_sb(sb);
else
writeback_inodes_sb(sb, WB_REASON_SYNC);
@ -48,7 +47,6 @@ static int __sync_filesystem(struct super_block *sb, int wait,
int sync_filesystem(struct super_block *sb)
{
int ret;
unsigned long start = jiffies;
/*
* We need to be protected against the filesystem going from
@ -62,17 +60,17 @@ int sync_filesystem(struct super_block *sb)
if (sb->s_flags & MS_RDONLY)
return 0;
ret = __sync_filesystem(sb, 0, start);
ret = __sync_filesystem(sb, 0);
if (ret < 0)
return ret;
return __sync_filesystem(sb, 1, start);
return __sync_filesystem(sb, 1);
}
EXPORT_SYMBOL_GPL(sync_filesystem);
static void sync_inodes_one_sb(struct super_block *sb, void *arg)
{
if (!(sb->s_flags & MS_RDONLY))
sync_inodes_sb(sb, *((unsigned long *)arg));
sync_inodes_sb(sb);
}
static void sync_fs_one_sb(struct super_block *sb, void *arg)
@ -104,10 +102,9 @@ static void fdatawait_one_bdev(struct block_device *bdev, void *arg)
SYSCALL_DEFINE0(sync)
{
int nowait = 0, wait = 1;
unsigned long start = jiffies;
wakeup_flusher_threads(0, WB_REASON_SYNC);
iterate_supers(sync_inodes_one_sb, &start);
iterate_supers(sync_inodes_one_sb, NULL);
iterate_supers(sync_fs_one_sb, &nowait);
iterate_supers(sync_fs_one_sb, &wait);
iterate_bdevs(fdatawrite_one_bdev, NULL);

View File

@ -144,6 +144,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
size_t count = iocb->ki_nbytes;
struct udf_inode_info *iinfo = UDF_I(inode);
mutex_lock(&inode->i_mutex);
down_write(&iinfo->i_data_sem);
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
if (file->f_flags & O_APPEND)
@ -156,6 +157,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
pos + count)) {
err = udf_expand_file_adinicb(inode);
if (err) {
mutex_unlock(&inode->i_mutex);
udf_debug("udf_expand_adinicb: err=%d\n", err);
return err;
}
@ -169,9 +171,17 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
} else
up_write(&iinfo->i_data_sem);
retval = generic_file_aio_write(iocb, iov, nr_segs, ppos);
if (retval > 0)
retval = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
mutex_unlock(&inode->i_mutex);
if (retval > 0) {
ssize_t err;
mark_inode_dirty(inode);
err = generic_write_sync(file, iocb->ki_pos - retval, retval);
if (err < 0)
retval = err;
}
return retval;
}

View File

@ -265,6 +265,7 @@ int udf_expand_file_adinicb(struct inode *inode)
.nr_to_write = 1,
};
WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
if (!iinfo->i_lenAlloc) {
if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD))
iinfo->i_alloc_type = ICBTAG_FLAG_AD_SHORT;

View File

@ -913,7 +913,7 @@ xfs_flush_inodes(
struct super_block *sb = mp->m_super;
if (down_read_trylock(&sb->s_umount)) {
sync_inodes_sb(sb, jiffies);
sync_inodes_sb(sb);
up_read(&sb->s_umount);
}
}

View File

@ -99,7 +99,7 @@ struct fsnotify_ops {
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
u32 mask, void *data, int data_type,
const unsigned char *file_name);
const unsigned char *file_name, u32 cookie);
void (*free_group_priv)(struct fsnotify_group *group);
void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group);
void (*free_event)(struct fsnotify_event *event);
@ -160,7 +160,7 @@ struct fsnotify_group {
struct fasync_struct *fsn_fa; /* async notification */
struct fsnotify_event overflow_event; /* Event we queue when the
struct fsnotify_event *overflow_event; /* Event we queue when the
* notification list is too
* full */

View File

@ -97,7 +97,7 @@ void writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
int try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason);
int try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
enum wb_reason reason);
void sync_inodes_sb(struct super_block *sb, unsigned long older_than_this);
void sync_inodes_sb(struct super_block *);
void wakeup_flusher_threads(long nr_pages, enum wb_reason reason);
void inode_wait_for_writeback(struct inode *inode);

View File

@ -287,11 +287,11 @@ TRACE_EVENT(writeback_queue_io,
__field(int, reason)
),
TP_fast_assign(
unsigned long older_than_this = work->older_than_this;
unsigned long *older_than_this = work->older_than_this;
strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
__entry->older = older_than_this;
__entry->older = older_than_this ? *older_than_this : 0;
__entry->age = older_than_this ?
(jiffies - older_than_this) * 1000 / HZ : -1;
(jiffies - *older_than_this) * 1000 / HZ : -1;
__entry->moved = moved;
__entry->reason = work->reason;
),

View File

@ -916,7 +916,7 @@ static int audit_tree_handle_event(struct fsnotify_group *group,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
u32 mask, void *data, int data_type,
const unsigned char *file_name)
const unsigned char *file_name, u32 cookie)
{
return 0;
}

View File

@ -471,7 +471,7 @@ static int audit_watch_handle_event(struct fsnotify_group *group,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
u32 mask, void *data, int data_type,
const unsigned char *dname)
const unsigned char *dname, u32 cookie)
{
struct inode *inode;
struct audit_parent *parent;