diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a4373db5967a..42bf99168056 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -307,9 +307,7 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page) goto err; } found_level = btrfs_header_level(eb); - spin_lock(&root->fs_info->hash_lock); - btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); - spin_unlock(&root->fs_info->hash_lock); + csum_tree_block(root, eb, 0); err: free_extent_buffer(eb); @@ -1998,7 +1996,36 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) return ret; } +int btree_lock_page_hook(struct page *page) +{ + struct inode *inode = page->mapping->host; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct extent_buffer *eb; + unsigned long len; + u64 bytenr = page_offset(page); + + if (page->private == EXTENT_PAGE_PRIVATE) + goto out; + + len = page->private >> 2; + eb = find_extent_buffer(io_tree, bytenr, len, GFP_NOFS); + if (!eb) + goto out; + + btrfs_tree_lock(eb); + spin_lock(&root->fs_info->hash_lock); + btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); + spin_unlock(&root->fs_info->hash_lock); + btrfs_tree_unlock(eb); + free_extent_buffer(eb); +out: + lock_page(page); + return 0; +} + static struct extent_io_ops btree_extent_io_ops = { + .write_cache_pages_lock_hook = btree_lock_page_hook, .writepage_io_hook = btree_writepage_io_hook, .readpage_end_io_hook = btree_readpage_end_io_hook, .submit_bio_hook = btree_submit_bio_hook, diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 6b6fdc697f31..f84f5058dbbb 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -80,4 +80,5 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); +int btree_lock_page_hook(struct page *page); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 646b9148ca21..3181759da1cf 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1590,13 +1590,17 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, } static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, - int pending) + int is_data, int pending) { int err = 0; WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); if (!pending) { struct extent_buffer *buf; + + if (is_data) + goto pinit; + buf = btrfs_find_tree_block(root, bytenr, num_bytes); if (buf) { /* we can reuse a block if it hasn't been written @@ -1624,6 +1628,7 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, } free_extent_buffer(buf); } +pinit: btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); } else { set_extent_bits(&root->fs_info->pending_del, @@ -1744,7 +1749,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root #endif if (pin) { - ret = pin_down_bytes(root, bytenr, num_bytes, 0); + ret = pin_down_bytes(root, bytenr, num_bytes, + owner_objectid >= BTRFS_FIRST_FREE_OBJECTID, 0); if (ret > 0) mark_free = 1; BUG_ON(ret < 0); @@ -1862,9 +1868,17 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, ref_generation = 0; if (root == extent_root) { - pin_down_bytes(root, bytenr, num_bytes, 1); + pin_down_bytes(root, bytenr, num_bytes, 0, 1); return 0; } + /* if metadata always pin */ + if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) + pin = 1; + + /* if data pin when any transaction has committed this */ + if (ref_generation != trans->transid) + pin = 1; + ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid, ref_generation, owner_objectid, owner_offset, pin, pin == 0); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 239e7c908abf..319a0c7a4a58 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -29,7 +29,10 @@ static struct kmem_cache *extent_buffer_cache; static LIST_HEAD(buffers); static LIST_HEAD(states); + +#ifdef LEAK_DEBUG static spinlock_t leak_lock = SPIN_LOCK_UNLOCKED; +#endif #define BUFFER_LRU_MAX 64 @@ -106,7 +109,9 @@ EXPORT_SYMBOL(extent_io_tree_init); struct extent_state *alloc_extent_state(gfp_t mask) { struct extent_state *state; +#ifdef LEAK_DEBUG unsigned long flags; +#endif state = kmem_cache_alloc(extent_state_cache, mask); if (!state) @@ -114,10 +119,11 @@ struct extent_state *alloc_extent_state(gfp_t mask) state->state = 0; state->private = 0; state->tree = NULL; +#ifdef LEAK_DEBUG spin_lock_irqsave(&leak_lock, flags); list_add(&state->leak_list, &states); spin_unlock_irqrestore(&leak_lock, flags); - +#endif atomic_set(&state->refs, 1); init_waitqueue_head(&state->wq); return state; @@ -129,11 +135,15 @@ void free_extent_state(struct extent_state *state) if (!state) return; if (atomic_dec_and_test(&state->refs)) { +#ifdef LEAK_DEBUG unsigned long flags; +#endif WARN_ON(state->tree); +#ifdef LEAK_DEBUG spin_lock_irqsave(&leak_lock, flags); list_del(&state->leak_list); spin_unlock_irqrestore(&leak_lock, flags); +#endif kmem_cache_free(extent_state_cache, state); } } @@ -2070,13 +2080,13 @@ done: } #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) -/* Taken directly from 2.6.23 for 2.6.18 back port */ +/* Taken directly from 2.6.23 with a mod for a lockpage hook */ typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, void *data); +#endif /** - * write_cache_pages - walk the list of dirty pages of the given address space - * and write all of them. + * write_cache_pages - walk the list of dirty pages of the given address space and write all of them. * @mapping: address space structure to write * @wbc: subtract the number of written pages from *@wbc->nr_to_write * @writepage: function called for each page @@ -2090,9 +2100,10 @@ typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, * WB_SYNC_ALL then we were called for data integrity and we must wait for * existing IO to complete. */ -static int write_cache_pages(struct address_space *mapping, - struct writeback_control *wbc, writepage_t writepage, - void *data) +int extent_write_cache_pages(struct extent_io_tree *tree, + struct address_space *mapping, + struct writeback_control *wbc, + writepage_t writepage, void *data) { struct backing_dev_info *bdi = mapping->backing_dev_info; int ret = 0; @@ -2138,7 +2149,10 @@ retry: * swizzled back from swapper_space to tmpfs file * mapping */ - lock_page(page); + if (tree->ops && tree->ops->write_cache_pages_lock_hook) + tree->ops->write_cache_pages_lock_hook(page); + else + lock_page(page); if (unlikely(page->mapping != mapping)) { unlock_page(page); @@ -2187,9 +2201,12 @@ retry: } if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = index; + + if (wbc->range_cont) + wbc->range_start = index << PAGE_CACHE_SHIFT; return ret; } -#endif +EXPORT_SYMBOL(extent_write_cache_pages); int extent_write_full_page(struct extent_io_tree *tree, struct page *page, get_extent_t *get_extent, @@ -2214,7 +2231,8 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, ret = __extent_writepage(page, wbc, &epd); - write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd); + extent_write_cache_pages(tree, mapping, &wbc_writepages, + __extent_writepage, &epd); if (epd.bio) { submit_one_bio(WRITE, epd.bio, 0); } @@ -2235,7 +2253,8 @@ int extent_writepages(struct extent_io_tree *tree, .get_extent = get_extent, }; - ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd); + ret = extent_write_cache_pages(tree, mapping, wbc, + __extent_writepage, &epd); if (epd.bio) { submit_one_bio(WRITE, epd.bio, 0); } @@ -2567,15 +2586,19 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, gfp_t mask) { struct extent_buffer *eb = NULL; +#ifdef LEAK_DEBUG unsigned long flags; +#endif eb = kmem_cache_zalloc(extent_buffer_cache, mask); eb->start = start; eb->len = len; mutex_init(&eb->mutex); +#ifdef LEAK_DEBUG spin_lock_irqsave(&leak_lock, flags); list_add(&eb->leak_list, &buffers); spin_unlock_irqrestore(&leak_lock, flags); +#endif atomic_set(&eb->refs, 1); return eb; @@ -2583,10 +2606,12 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, static void __free_extent_buffer(struct extent_buffer *eb) { +#ifdef LEAK_DEBUG unsigned long flags; spin_lock_irqsave(&leak_lock, flags); list_del(&eb->leak_list); spin_unlock_irqrestore(&leak_lock, flags); +#endif kmem_cache_free(extent_buffer_cache, eb); } diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 315cfceae312..3cb411a5f4d3 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -50,6 +50,7 @@ struct extent_io_ops { unsigned long old, unsigned long bits); int (*clear_bit_hook)(struct inode *inode, u64 start, u64 end, unsigned long old, unsigned long bits); + int (*write_cache_pages_lock_hook)(struct page *page); }; struct extent_io_tree { diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 49c4f5b40ed6..61a377bcb2fb 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -161,7 +161,8 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, int ret; mutex_lock(&root->fs_info->trans_mutex); - if ((wait == 1 && !root->fs_info->open_ioctl_trans) || wait == 2) + if (!root->fs_info->log_root_recovering && + ((wait == 1 && !root->fs_info->open_ioctl_trans) || wait == 2)) wait_current_trans(root); ret = join_transaction(root); BUG_ON(ret); @@ -328,9 +329,17 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, index = start >> PAGE_CACHE_SHIFT; start = (u64)(index + 1) << PAGE_CACHE_SHIFT; - page = find_lock_page(btree_inode->i_mapping, index); + page = find_get_page(btree_inode->i_mapping, index); if (!page) continue; + + btree_lock_page_hook(page); + if (!page->mapping) { + unlock_page(page); + page_cache_release(page); + continue; + } + if (PageWriteback(page)) { if (PageDirty(page)) wait_on_page_writeback(page); @@ -360,7 +369,8 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, if (!page) continue; if (PageDirty(page)) { - lock_page(page); + btree_lock_page_hook(page); + wait_on_page_writeback(page); err = write_one_page(page, 0); if (err) werr = err; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index d1ce8314b948..13d7ee8e0c52 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1176,8 +1176,8 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, struct btrfs_key found_key; struct btrfs_key log_key; struct inode *dir; - struct inode *inode; u8 log_type; + int exists; int ret; dir = read_one_inode(root, key->objectid); @@ -1190,6 +1190,13 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, name_len); btrfs_dir_item_key_to_cpu(eb, di, &log_key); + exists = btrfs_lookup_inode(trans, root, path, &log_key, 0); + if (exists == 0) + exists = 1; + else + exists = 0; + btrfs_release_path(root, path); + if (key->type == BTRFS_DIR_ITEM_KEY) { dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid, name, name_len, 1); @@ -1224,11 +1231,9 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, * don't drop the conflicting directory entry if the inode * for the new entry doesn't exist */ - inode = read_one_inode(root, log_key.objectid); - if (!inode) + if (!exists) goto out; - iput(inode); ret = drop_one_dir_item(trans, root, path, dir, dst_di); BUG_ON(ret);