diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3b8b6c212701..a421c32c6cfe 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3797,7 +3797,8 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root break; if (wret < 0) ret = wret; - if (trans->transaction->in_commit) { + if (trans->transaction->in_commit || + trans->transaction->delayed_refs.flushing) { ret = -EAGAIN; break; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7d4f948bc22a..13a17477c4f4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1502,6 +1502,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) struct btrfs_trans_handle *trans; struct btrfs_ordered_extent *ordered_extent; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct btrfs_path *path; int compressed = 0; int ret; @@ -1509,6 +1510,23 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) if (!ret) return 0; + /* + * before we join the transaction, try to do some of our IO. + * This will limit the amount of IO that we have to do with + * the transaction running. We're unlikely to need to do any + * IO if the file extents are new, the disk_i_size checks + * covers the most common case. + */ + if (start < BTRFS_I(inode)->disk_i_size) { + path = btrfs_alloc_path(); + if (path) { + ret = btrfs_lookup_file_extent(NULL, root, path, + inode->i_ino, + start, 0); + btrfs_free_path(path); + } + } + trans = btrfs_join_transaction(root, 1); ordered_extent = btrfs_lookup_ordered_extent(inode, start); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 903edab3659a..01c9620bb001 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -192,6 +192,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, h->alloc_exclude_nr = 0; h->alloc_exclude_start = 0; h->delayed_ref_updates = 0; + root->fs_info->running_transaction->use_count++; mutex_unlock(&root->fs_info->trans_mutex); return h; @@ -281,7 +282,6 @@ void btrfs_throttle(struct btrfs_root *root) if (!root->fs_info->open_ioctl_trans) wait_current_trans(root); mutex_unlock(&root->fs_info->trans_mutex); - throttle_on_drops(root); } @@ -298,6 +298,13 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, if (cur && trans->transaction->delayed_refs.num_heads_ready > 64) { trans->delayed_ref_updates = 0; + + /* + * do a full flush if the transaction is trying + * to close + */ + if (trans->transaction->delayed_refs.flushing) + cur = 0; btrfs_run_delayed_refs(trans, root, cur); } else { break; @@ -665,6 +672,31 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) return 0; } +/* + * when dropping snapshots, we generate a ton of delayed refs, and it makes + * sense not to join the transaction while it is trying to flush the current + * queue of delayed refs out. + * + * This is used by the drop snapshot code only + */ +static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info) +{ + DEFINE_WAIT(wait); + + mutex_lock(&info->trans_mutex); + while (info->running_transaction && + info->running_transaction->delayed_refs.flushing) { + prepare_to_wait(&info->transaction_wait, &wait, + TASK_UNINTERRUPTIBLE); + mutex_unlock(&info->trans_mutex); + schedule(); + mutex_lock(&info->trans_mutex); + finish_wait(&info->transaction_wait, &wait); + } + mutex_unlock(&info->trans_mutex); + return 0; +} + /* * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on * all of them @@ -692,7 +724,22 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, atomic_inc(&root->fs_info->throttles); while (1) { + /* + * we don't want to jump in and create a bunch of + * delayed refs if the transaction is starting to close + */ + wait_transaction_pre_flush(tree_root->fs_info); trans = btrfs_start_transaction(tree_root, 1); + + /* + * we've joined a transaction, make sure it isn't + * closing right now + */ + if (trans->transaction->delayed_refs.flushing) { + btrfs_end_transaction(trans, tree_root); + continue; + } + mutex_lock(&root->fs_info->drop_mutex); ret = btrfs_drop_snapshot(trans, dirty->root); if (ret != -EAGAIN) @@ -932,20 +979,20 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ret = btrfs_run_delayed_refs(trans, root, 0); BUG_ON(ret); + cur_trans = trans->transaction; /* * set the flushing flag so procs in this transaction have to * start sending their work down. */ - trans->transaction->delayed_refs.flushing = 1; + cur_trans->delayed_refs.flushing = 1; ret = btrfs_run_delayed_refs(trans, root, 0); BUG_ON(ret); - INIT_LIST_HEAD(&dirty_fs_roots); mutex_lock(&root->fs_info->trans_mutex); - if (trans->transaction->in_commit) { - cur_trans = trans->transaction; - trans->transaction->use_count++; + INIT_LIST_HEAD(&dirty_fs_roots); + if (cur_trans->in_commit) { + cur_trans->use_count++; mutex_unlock(&root->fs_info->trans_mutex); btrfs_end_transaction(trans, root); @@ -968,7 +1015,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, trans->transaction->in_commit = 1; trans->transaction->blocked = 1; - cur_trans = trans->transaction; if (cur_trans->list.prev != &root->fs_info->trans_list) { prev_trans = list_entry(cur_trans->list.prev, struct btrfs_transaction, list); @@ -1081,6 +1127,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_copy_pinned(root, pinned_copy); trans->transaction->blocked = 0; + wake_up(&root->fs_info->transaction_throttle); wake_up(&root->fs_info->transaction_wait); @@ -1107,6 +1154,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->trans_mutex); cur_trans->commit_done = 1; + root->fs_info->last_trans_committed = cur_trans->transid; wake_up(&cur_trans->commit_wait); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 94876709217f..94f5bde2b58d 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -23,7 +23,12 @@ struct btrfs_transaction { u64 transid; + /* + * total writers in this transaction, it must be zero before the + * transaction can end + */ unsigned long num_writers; + unsigned long num_joined; int in_commit; int use_count;