Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs updates from Chris Mason:
 "This pull is mostly cleanups and fixes:

   - The raid5/6 cleanups from Zhao Lei fixup some long standing warts
     in the code and add improvements on top of the scrubbing support
     from 3.19.

   - Josef has round one of our ENOSPC fixes coming from large btrfs
     clusters here at FB.

   - Dave Sterba continues a long series of cleanups (thanks Dave), and
     Filipe continues hammering on corner cases in fsync and others

  This all was held up a little trying to track down a use-after-free in
  btrfs raid5/6.  It's not clear yet if this is just made easier to
  trigger with this pull or if its a new bug from the raid5/6 cleanups.
  Dave Sterba is the only one to trigger it so far, but he has a
  consistent way to reproduce, so we'll get it nailed shortly"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (68 commits)
  Btrfs: don't remove extents and xattrs when logging new names
  Btrfs: fix fsync data loss after adding hard link to inode
  Btrfs: fix BUG_ON in btrfs_orphan_add() when delete unused block group
  Btrfs: account for large extents with enospc
  Btrfs: don't set and clear delalloc for O_DIRECT writes
  Btrfs: only adjust outstanding_extents when we do a short write
  btrfs: Fix out-of-space bug
  Btrfs: scrub, fix sleep in atomic context
  Btrfs: fix scheduler warning when syncing log
  Btrfs: Remove unnecessary placeholder in btrfs_err_code
  btrfs: cleanup init for list in free-space-cache
  btrfs: delete chunk allocation attemp when setting block group ro
  btrfs: clear bio reference after submit_one_bio()
  Btrfs: fix scrub race leading to use-after-free
  Btrfs: add missing cleanup on sysfs init failure
  Btrfs: fix race between transaction commit and empty block group removal
  btrfs: add more checks to btrfs_read_sys_array
  btrfs: cleanup, rename a few variables in btrfs_read_sys_array
  btrfs: add checks for sys_chunk_array sizes
  btrfs: more superblock checks, lower bounds on devices and sectorsize/nodesize
  ...
This commit is contained in:
Linus Torvalds 2015-02-19 14:36:00 -08:00
commit 2b9fb532d4
34 changed files with 1065 additions and 863 deletions

View File

@ -1246,25 +1246,6 @@ int btrfs_check_shared(struct btrfs_trans_handle *trans,
return ret; return ret;
} }
/*
* this makes the path point to (inum INODE_ITEM ioff)
*/
int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
struct btrfs_path *path)
{
struct btrfs_key key;
return btrfs_find_item(fs_root, path, inum, ioff,
BTRFS_INODE_ITEM_KEY, &key);
}
static int inode_ref_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
struct btrfs_path *path,
struct btrfs_key *found_key)
{
return btrfs_find_item(fs_root, path, inum, ioff,
BTRFS_INODE_REF_KEY, found_key);
}
int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
u64 start_off, struct btrfs_path *path, u64 start_off, struct btrfs_path *path,
struct btrfs_inode_extref **ret_extref, struct btrfs_inode_extref **ret_extref,
@ -1374,7 +1355,8 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
btrfs_tree_read_unlock_blocking(eb); btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb); free_extent_buffer(eb);
} }
ret = inode_ref_info(parent, 0, fs_root, path, &found_key); ret = btrfs_find_item(fs_root, path, parent, 0,
BTRFS_INODE_REF_KEY, &found_key);
if (ret > 0) if (ret > 0)
ret = -ENOENT; ret = -ENOENT;
if (ret) if (ret)
@ -1727,8 +1709,10 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
struct btrfs_key found_key; struct btrfs_key found_key;
while (!ret) { while (!ret) {
ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path, ret = btrfs_find_item(fs_root, path, inum,
&found_key); parent ? parent + 1 : 0, BTRFS_INODE_REF_KEY,
&found_key);
if (ret < 0) if (ret < 0)
break; break;
if (ret) { if (ret) {

View File

@ -32,9 +32,6 @@ struct inode_fs_paths {
typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root, typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root,
void *ctx); void *ctx);
int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
struct btrfs_path *path);
int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
struct btrfs_path *path, struct btrfs_key *found_key, struct btrfs_path *path, struct btrfs_key *found_key,
u64 *flags); u64 *flags);

View File

@ -185,6 +185,9 @@ struct btrfs_inode {
struct btrfs_delayed_node *delayed_node; struct btrfs_delayed_node *delayed_node;
/* File creation time. */
struct timespec i_otime;
struct inode vfs_inode; struct inode vfs_inode;
}; };

View File

@ -213,11 +213,19 @@ static struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
*/ */
static void add_root_to_dirty_list(struct btrfs_root *root) static void add_root_to_dirty_list(struct btrfs_root *root)
{ {
if (test_bit(BTRFS_ROOT_DIRTY, &root->state) ||
!test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state))
return;
spin_lock(&root->fs_info->trans_lock); spin_lock(&root->fs_info->trans_lock);
if (test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state) && if (!test_and_set_bit(BTRFS_ROOT_DIRTY, &root->state)) {
list_empty(&root->dirty_list)) { /* Want the extent tree to be the last on the list */
list_add(&root->dirty_list, if (root->objectid == BTRFS_EXTENT_TREE_OBJECTID)
&root->fs_info->dirty_cowonly_roots); list_move_tail(&root->dirty_list,
&root->fs_info->dirty_cowonly_roots);
else
list_move(&root->dirty_list,
&root->fs_info->dirty_cowonly_roots);
} }
spin_unlock(&root->fs_info->trans_lock); spin_unlock(&root->fs_info->trans_lock);
} }
@ -1363,8 +1371,7 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) { if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
BUG_ON(tm->slot != 0); BUG_ON(tm->slot != 0);
eb_rewin = alloc_dummy_extent_buffer(eb->start, eb_rewin = alloc_dummy_extent_buffer(fs_info, eb->start);
fs_info->tree_root->nodesize);
if (!eb_rewin) { if (!eb_rewin) {
btrfs_tree_read_unlock_blocking(eb); btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb); free_extent_buffer(eb);
@ -1444,7 +1451,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
} else if (old_root) { } else if (old_root) {
btrfs_tree_read_unlock(eb_root); btrfs_tree_read_unlock(eb_root);
free_extent_buffer(eb_root); free_extent_buffer(eb_root);
eb = alloc_dummy_extent_buffer(logical, root->nodesize); eb = alloc_dummy_extent_buffer(root->fs_info, logical);
} else { } else {
btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK); btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK);
eb = btrfs_clone_extent_buffer(eb_root); eb = btrfs_clone_extent_buffer(eb_root);
@ -2282,7 +2289,7 @@ static void reada_for_search(struct btrfs_root *root,
if ((search <= target && target - search <= 65536) || if ((search <= target && target - search <= 65536) ||
(search > target && search - target <= 65536)) { (search > target && search - target <= 65536)) {
gen = btrfs_node_ptr_generation(node, nr); gen = btrfs_node_ptr_generation(node, nr);
readahead_tree_block(root, search, blocksize); readahead_tree_block(root, search);
nread += blocksize; nread += blocksize;
} }
nscan++; nscan++;
@ -2301,7 +2308,6 @@ static noinline void reada_for_balance(struct btrfs_root *root,
u64 gen; u64 gen;
u64 block1 = 0; u64 block1 = 0;
u64 block2 = 0; u64 block2 = 0;
int blocksize;
parent = path->nodes[level + 1]; parent = path->nodes[level + 1];
if (!parent) if (!parent)
@ -2309,7 +2315,6 @@ static noinline void reada_for_balance(struct btrfs_root *root,
nritems = btrfs_header_nritems(parent); nritems = btrfs_header_nritems(parent);
slot = path->slots[level + 1]; slot = path->slots[level + 1];
blocksize = root->nodesize;
if (slot > 0) { if (slot > 0) {
block1 = btrfs_node_blockptr(parent, slot - 1); block1 = btrfs_node_blockptr(parent, slot - 1);
@ -2334,9 +2339,9 @@ static noinline void reada_for_balance(struct btrfs_root *root,
} }
if (block1) if (block1)
readahead_tree_block(root, block1, blocksize); readahead_tree_block(root, block1);
if (block2) if (block2)
readahead_tree_block(root, block2, blocksize); readahead_tree_block(root, block2);
} }
@ -2609,32 +2614,24 @@ static int key_search(struct extent_buffer *b, struct btrfs_key *key,
return 0; return 0;
} }
int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *found_path, int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
u64 iobjectid, u64 ioff, u8 key_type, u64 iobjectid, u64 ioff, u8 key_type,
struct btrfs_key *found_key) struct btrfs_key *found_key)
{ {
int ret; int ret;
struct btrfs_key key; struct btrfs_key key;
struct extent_buffer *eb; struct extent_buffer *eb;
struct btrfs_path *path;
ASSERT(path);
ASSERT(found_key);
key.type = key_type; key.type = key_type;
key.objectid = iobjectid; key.objectid = iobjectid;
key.offset = ioff; key.offset = ioff;
if (found_path == NULL) {
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
} else
path = found_path;
ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0); ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
if ((ret < 0) || (found_key == NULL)) { if (ret < 0)
if (path != found_path)
btrfs_free_path(path);
return ret; return ret;
}
eb = path->nodes[0]; eb = path->nodes[0];
if (ret && path->slots[0] >= btrfs_header_nritems(eb)) { if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
@ -3383,7 +3380,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
add_root_to_dirty_list(root); add_root_to_dirty_list(root);
extent_buffer_get(c); extent_buffer_get(c);
path->nodes[level] = c; path->nodes[level] = c;
path->locks[level] = BTRFS_WRITE_LOCK; path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
path->slots[level] = 0; path->slots[level] = 0;
return 0; return 0;
} }
@ -4356,13 +4353,15 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
path->search_for_split = 1; path->search_for_split = 1;
ret = btrfs_search_slot(trans, root, &key, path, 0, 1); ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
path->search_for_split = 0; path->search_for_split = 0;
if (ret > 0)
ret = -EAGAIN;
if (ret < 0) if (ret < 0)
goto err; goto err;
ret = -EAGAIN; ret = -EAGAIN;
leaf = path->nodes[0]; leaf = path->nodes[0];
/* if our item isn't there or got smaller, return now */ /* if our item isn't there, return now */
if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0])) if (item_size != btrfs_item_size_nr(leaf, path->slots[0]))
goto err; goto err;
/* the leaf has changed, it now has room. return now */ /* the leaf has changed, it now has room. return now */

View File

@ -198,6 +198,8 @@ static int btrfs_csum_sizes[] = { 4, 0 };
#define BTRFS_DIRTY_METADATA_THRESH (32 * 1024 * 1024) #define BTRFS_DIRTY_METADATA_THRESH (32 * 1024 * 1024)
#define BTRFS_MAX_EXTENT_SIZE (128 * 1024 * 1024)
/* /*
* The key defines the order in the tree, and so it also defines (optimal) * The key defines the order in the tree, and so it also defines (optimal)
* block layout. * block layout.
@ -1020,6 +1022,9 @@ enum btrfs_raid_types {
BTRFS_BLOCK_GROUP_RAID6 | \ BTRFS_BLOCK_GROUP_RAID6 | \
BTRFS_BLOCK_GROUP_DUP | \ BTRFS_BLOCK_GROUP_DUP | \
BTRFS_BLOCK_GROUP_RAID10) BTRFS_BLOCK_GROUP_RAID10)
#define BTRFS_BLOCK_GROUP_RAID56_MASK (BTRFS_BLOCK_GROUP_RAID5 | \
BTRFS_BLOCK_GROUP_RAID6)
/* /*
* We need a bit for restriper to be able to tell when chunks of type * We need a bit for restriper to be able to tell when chunks of type
* SINGLE are available. This "extended" profile format is used in * SINGLE are available. This "extended" profile format is used in
@ -1239,7 +1244,6 @@ enum btrfs_disk_cache_state {
BTRFS_DC_ERROR = 1, BTRFS_DC_ERROR = 1,
BTRFS_DC_CLEAR = 2, BTRFS_DC_CLEAR = 2,
BTRFS_DC_SETUP = 3, BTRFS_DC_SETUP = 3,
BTRFS_DC_NEED_WRITE = 4,
}; };
struct btrfs_caching_control { struct btrfs_caching_control {
@ -1277,7 +1281,6 @@ struct btrfs_block_group_cache {
unsigned long full_stripe_len; unsigned long full_stripe_len;
unsigned int ro:1; unsigned int ro:1;
unsigned int dirty:1;
unsigned int iref:1; unsigned int iref:1;
unsigned int has_caching_ctl:1; unsigned int has_caching_ctl:1;
unsigned int removed:1; unsigned int removed:1;
@ -1315,6 +1318,9 @@ struct btrfs_block_group_cache {
struct list_head ro_list; struct list_head ro_list;
atomic_t trimming; atomic_t trimming;
/* For dirty block groups */
struct list_head dirty_list;
}; };
/* delayed seq elem */ /* delayed seq elem */
@ -1741,6 +1747,7 @@ struct btrfs_fs_info {
spinlock_t unused_bgs_lock; spinlock_t unused_bgs_lock;
struct list_head unused_bgs; struct list_head unused_bgs;
struct mutex unused_bg_unpin_mutex;
/* For btrfs to record security options */ /* For btrfs to record security options */
struct security_mnt_opts security_opts; struct security_mnt_opts security_opts;
@ -1776,6 +1783,7 @@ struct btrfs_subvolume_writers {
#define BTRFS_ROOT_DEFRAG_RUNNING 6 #define BTRFS_ROOT_DEFRAG_RUNNING 6
#define BTRFS_ROOT_FORCE_COW 7 #define BTRFS_ROOT_FORCE_COW 7
#define BTRFS_ROOT_MULTI_LOG_TASKS 8 #define BTRFS_ROOT_MULTI_LOG_TASKS 8
#define BTRFS_ROOT_DIRTY 9
/* /*
* in ram representation of the tree. extent_root is used for all allocations * in ram representation of the tree. extent_root is used for all allocations
@ -1794,8 +1802,6 @@ struct btrfs_root {
struct btrfs_fs_info *fs_info; struct btrfs_fs_info *fs_info;
struct extent_io_tree dirty_log_pages; struct extent_io_tree dirty_log_pages;
struct kobject root_kobj;
struct completion kobj_unregister;
struct mutex objectid_mutex; struct mutex objectid_mutex;
spinlock_t accounting_lock; spinlock_t accounting_lock;
@ -2465,31 +2471,6 @@ BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32);
BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32); BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32);
BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64); BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64);
BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64); BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64);
static inline struct btrfs_timespec *
btrfs_inode_atime(struct btrfs_inode_item *inode_item)
{
unsigned long ptr = (unsigned long)inode_item;
ptr += offsetof(struct btrfs_inode_item, atime);
return (struct btrfs_timespec *)ptr;
}
static inline struct btrfs_timespec *
btrfs_inode_mtime(struct btrfs_inode_item *inode_item)
{
unsigned long ptr = (unsigned long)inode_item;
ptr += offsetof(struct btrfs_inode_item, mtime);
return (struct btrfs_timespec *)ptr;
}
static inline struct btrfs_timespec *
btrfs_inode_ctime(struct btrfs_inode_item *inode_item)
{
unsigned long ptr = (unsigned long)inode_item;
ptr += offsetof(struct btrfs_inode_item, ctime);
return (struct btrfs_timespec *)ptr;
}
BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64); BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64);
BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32); BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32);
BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64); BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64);

View File

@ -1755,27 +1755,31 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags); btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
btrfs_set_stack_inode_block_group(inode_item, 0); btrfs_set_stack_inode_block_group(inode_item, 0);
btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item), btrfs_set_stack_timespec_sec(&inode_item->atime,
inode->i_atime.tv_sec); inode->i_atime.tv_sec);
btrfs_set_stack_timespec_nsec(btrfs_inode_atime(inode_item), btrfs_set_stack_timespec_nsec(&inode_item->atime,
inode->i_atime.tv_nsec); inode->i_atime.tv_nsec);
btrfs_set_stack_timespec_sec(btrfs_inode_mtime(inode_item), btrfs_set_stack_timespec_sec(&inode_item->mtime,
inode->i_mtime.tv_sec); inode->i_mtime.tv_sec);
btrfs_set_stack_timespec_nsec(btrfs_inode_mtime(inode_item), btrfs_set_stack_timespec_nsec(&inode_item->mtime,
inode->i_mtime.tv_nsec); inode->i_mtime.tv_nsec);
btrfs_set_stack_timespec_sec(btrfs_inode_ctime(inode_item), btrfs_set_stack_timespec_sec(&inode_item->ctime,
inode->i_ctime.tv_sec); inode->i_ctime.tv_sec);
btrfs_set_stack_timespec_nsec(btrfs_inode_ctime(inode_item), btrfs_set_stack_timespec_nsec(&inode_item->ctime,
inode->i_ctime.tv_nsec); inode->i_ctime.tv_nsec);
btrfs_set_stack_timespec_sec(&inode_item->otime,
BTRFS_I(inode)->i_otime.tv_sec);
btrfs_set_stack_timespec_nsec(&inode_item->otime,
BTRFS_I(inode)->i_otime.tv_nsec);
} }
int btrfs_fill_inode(struct inode *inode, u32 *rdev) int btrfs_fill_inode(struct inode *inode, u32 *rdev)
{ {
struct btrfs_delayed_node *delayed_node; struct btrfs_delayed_node *delayed_node;
struct btrfs_inode_item *inode_item; struct btrfs_inode_item *inode_item;
struct btrfs_timespec *tspec;
delayed_node = btrfs_get_delayed_node(inode); delayed_node = btrfs_get_delayed_node(inode);
if (!delayed_node) if (!delayed_node)
@ -1802,17 +1806,19 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
*rdev = btrfs_stack_inode_rdev(inode_item); *rdev = btrfs_stack_inode_rdev(inode_item);
BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item); BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
tspec = btrfs_inode_atime(inode_item); inode->i_atime.tv_sec = btrfs_stack_timespec_sec(&inode_item->atime);
inode->i_atime.tv_sec = btrfs_stack_timespec_sec(tspec); inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->atime);
inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
tspec = btrfs_inode_mtime(inode_item); inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(&inode_item->mtime);
inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(tspec); inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->mtime);
inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
tspec = btrfs_inode_ctime(inode_item); inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(&inode_item->ctime);
inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(tspec); inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->ctime);
inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
BTRFS_I(inode)->i_otime.tv_sec =
btrfs_stack_timespec_sec(&inode_item->otime);
BTRFS_I(inode)->i_otime.tv_nsec =
btrfs_stack_timespec_nsec(&inode_item->otime);
inode->i_generation = BTRFS_I(inode)->generation; inode->i_generation = BTRFS_I(inode)->generation;
BTRFS_I(inode)->index_cnt = (u64)-1; BTRFS_I(inode)->index_cnt = (u64)-1;

View File

@ -440,18 +440,9 @@ leave:
*/ */
static void btrfs_rm_dev_replace_blocked(struct btrfs_fs_info *fs_info) static void btrfs_rm_dev_replace_blocked(struct btrfs_fs_info *fs_info)
{ {
s64 writers;
DEFINE_WAIT(wait);
set_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state); set_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state);
do { wait_event(fs_info->replace_wait, !percpu_counter_sum(
prepare_to_wait(&fs_info->replace_wait, &wait, &fs_info->bio_counter));
TASK_UNINTERRUPTIBLE);
writers = percpu_counter_sum(&fs_info->bio_counter);
if (writers)
schedule();
finish_wait(&fs_info->replace_wait, &wait);
} while (writers);
} }
/* /*
@ -932,15 +923,15 @@ void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount)
void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info) void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info)
{ {
DEFINE_WAIT(wait); while (1) {
again: percpu_counter_inc(&fs_info->bio_counter);
percpu_counter_inc(&fs_info->bio_counter); if (likely(!test_bit(BTRFS_FS_STATE_DEV_REPLACING,
if (test_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state)) { &fs_info->fs_state)))
break;
btrfs_bio_counter_dec(fs_info); btrfs_bio_counter_dec(fs_info);
wait_event(fs_info->replace_wait, wait_event(fs_info->replace_wait,
!test_bit(BTRFS_FS_STATE_DEV_REPLACING, !test_bit(BTRFS_FS_STATE_DEV_REPLACING,
&fs_info->fs_state)); &fs_info->fs_state));
goto again;
} }
} }

View File

@ -318,7 +318,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
memcpy(&found, result, csum_size); memcpy(&found, result, csum_size);
read_extent_buffer(buf, &val, 0, csum_size); read_extent_buffer(buf, &val, 0, csum_size);
printk_ratelimited(KERN_INFO printk_ratelimited(KERN_WARNING
"BTRFS: %s checksum verify failed on %llu wanted %X found %X " "BTRFS: %s checksum verify failed on %llu wanted %X found %X "
"level %d\n", "level %d\n",
root->fs_info->sb->s_id, buf->start, root->fs_info->sb->s_id, buf->start,
@ -367,7 +367,8 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
ret = 0; ret = 0;
goto out; goto out;
} }
printk_ratelimited(KERN_INFO "BTRFS (device %s): parent transid verify failed on %llu wanted %llu found %llu\n", printk_ratelimited(KERN_ERR
"BTRFS (device %s): parent transid verify failed on %llu wanted %llu found %llu\n",
eb->fs_info->sb->s_id, eb->start, eb->fs_info->sb->s_id, eb->start,
parent_transid, btrfs_header_generation(eb)); parent_transid, btrfs_header_generation(eb));
ret = 1; ret = 1;
@ -633,21 +634,21 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
found_start = btrfs_header_bytenr(eb); found_start = btrfs_header_bytenr(eb);
if (found_start != eb->start) { if (found_start != eb->start) {
printk_ratelimited(KERN_INFO "BTRFS (device %s): bad tree block start " printk_ratelimited(KERN_ERR "BTRFS (device %s): bad tree block start "
"%llu %llu\n", "%llu %llu\n",
eb->fs_info->sb->s_id, found_start, eb->start); eb->fs_info->sb->s_id, found_start, eb->start);
ret = -EIO; ret = -EIO;
goto err; goto err;
} }
if (check_tree_block_fsid(root, eb)) { if (check_tree_block_fsid(root, eb)) {
printk_ratelimited(KERN_INFO "BTRFS (device %s): bad fsid on block %llu\n", printk_ratelimited(KERN_ERR "BTRFS (device %s): bad fsid on block %llu\n",
eb->fs_info->sb->s_id, eb->start); eb->fs_info->sb->s_id, eb->start);
ret = -EIO; ret = -EIO;
goto err; goto err;
} }
found_level = btrfs_header_level(eb); found_level = btrfs_header_level(eb);
if (found_level >= BTRFS_MAX_LEVEL) { if (found_level >= BTRFS_MAX_LEVEL) {
btrfs_info(root->fs_info, "bad tree block level %d", btrfs_err(root->fs_info, "bad tree block level %d",
(int)btrfs_header_level(eb)); (int)btrfs_header_level(eb));
ret = -EIO; ret = -EIO;
goto err; goto err;
@ -1073,12 +1074,12 @@ static const struct address_space_operations btree_aops = {
.set_page_dirty = btree_set_page_dirty, .set_page_dirty = btree_set_page_dirty,
}; };
void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) void readahead_tree_block(struct btrfs_root *root, u64 bytenr)
{ {
struct extent_buffer *buf = NULL; struct extent_buffer *buf = NULL;
struct inode *btree_inode = root->fs_info->btree_inode; struct inode *btree_inode = root->fs_info->btree_inode;
buf = btrfs_find_create_tree_block(root, bytenr, blocksize); buf = btrfs_find_create_tree_block(root, bytenr);
if (!buf) if (!buf)
return; return;
read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
@ -1086,7 +1087,7 @@ void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize)
free_extent_buffer(buf); free_extent_buffer(buf);
} }
int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr,
int mirror_num, struct extent_buffer **eb) int mirror_num, struct extent_buffer **eb)
{ {
struct extent_buffer *buf = NULL; struct extent_buffer *buf = NULL;
@ -1094,7 +1095,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
struct extent_io_tree *io_tree = &BTRFS_I(btree_inode)->io_tree; struct extent_io_tree *io_tree = &BTRFS_I(btree_inode)->io_tree;
int ret; int ret;
buf = btrfs_find_create_tree_block(root, bytenr, blocksize); buf = btrfs_find_create_tree_block(root, bytenr);
if (!buf) if (!buf)
return 0; return 0;
@ -1125,12 +1126,11 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
} }
struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
u64 bytenr, u32 blocksize) u64 bytenr)
{ {
if (btrfs_test_is_dummy_root(root)) if (btrfs_test_is_dummy_root(root))
return alloc_test_extent_buffer(root->fs_info, bytenr, return alloc_test_extent_buffer(root->fs_info, bytenr);
blocksize); return alloc_extent_buffer(root->fs_info, bytenr);
return alloc_extent_buffer(root->fs_info, bytenr, blocksize);
} }
@ -1152,7 +1152,7 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
struct extent_buffer *buf = NULL; struct extent_buffer *buf = NULL;
int ret; int ret;
buf = btrfs_find_create_tree_block(root, bytenr, root->nodesize); buf = btrfs_find_create_tree_block(root, bytenr);
if (!buf) if (!buf)
return NULL; return NULL;
@ -1275,12 +1275,10 @@ static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize,
memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_key, 0, sizeof(root->root_key));
memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->root_item, 0, sizeof(root->root_item));
memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
memset(&root->root_kobj, 0, sizeof(root->root_kobj));
if (fs_info) if (fs_info)
root->defrag_trans_start = fs_info->generation; root->defrag_trans_start = fs_info->generation;
else else
root->defrag_trans_start = 0; root->defrag_trans_start = 0;
init_completion(&root->kobj_unregister);
root->root_key.objectid = objectid; root->root_key.objectid = objectid;
root->anon_dev = 0; root->anon_dev = 0;
@ -1630,6 +1628,8 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
bool check_ref) bool check_ref)
{ {
struct btrfs_root *root; struct btrfs_root *root;
struct btrfs_path *path;
struct btrfs_key key;
int ret; int ret;
if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
@ -1669,8 +1669,17 @@ again:
if (ret) if (ret)
goto fail; goto fail;
ret = btrfs_find_item(fs_info->tree_root, NULL, BTRFS_ORPHAN_OBJECTID, path = btrfs_alloc_path();
location->objectid, BTRFS_ORPHAN_ITEM_KEY, NULL); if (!path) {
ret = -ENOMEM;
goto fail;
}
key.objectid = BTRFS_ORPHAN_OBJECTID;
key.type = BTRFS_ORPHAN_ITEM_KEY;
key.offset = location->objectid;
ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
btrfs_free_path(path);
if (ret < 0) if (ret < 0)
goto fail; goto fail;
if (ret == 0) if (ret == 0)
@ -2232,6 +2241,7 @@ int open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->qgroup_op_lock); spin_lock_init(&fs_info->qgroup_op_lock);
spin_lock_init(&fs_info->buffer_lock); spin_lock_init(&fs_info->buffer_lock);
spin_lock_init(&fs_info->unused_bgs_lock); spin_lock_init(&fs_info->unused_bgs_lock);
mutex_init(&fs_info->unused_bg_unpin_mutex);
rwlock_init(&fs_info->tree_mod_log_lock); rwlock_init(&fs_info->tree_mod_log_lock);
mutex_init(&fs_info->reloc_mutex); mutex_init(&fs_info->reloc_mutex);
mutex_init(&fs_info->delalloc_root_mutex); mutex_init(&fs_info->delalloc_root_mutex);
@ -2496,7 +2506,7 @@ int open_ctree(struct super_block *sb,
features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA) if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
printk(KERN_ERR "BTRFS: has skinny extents\n"); printk(KERN_INFO "BTRFS: has skinny extents\n");
/* /*
* flag our filesystem as having big metadata blocks if * flag our filesystem as having big metadata blocks if
@ -2520,7 +2530,7 @@ int open_ctree(struct super_block *sb,
*/ */
if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) && if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
(sectorsize != nodesize)) { (sectorsize != nodesize)) {
printk(KERN_WARNING "BTRFS: unequal leaf/node/sector sizes " printk(KERN_ERR "BTRFS: unequal leaf/node/sector sizes "
"are not allowed for mixed block groups on %s\n", "are not allowed for mixed block groups on %s\n",
sb->s_id); sb->s_id);
goto fail_alloc; goto fail_alloc;
@ -2628,12 +2638,12 @@ int open_ctree(struct super_block *sb,
sb->s_blocksize_bits = blksize_bits(sectorsize); sb->s_blocksize_bits = blksize_bits(sectorsize);
if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) { if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
printk(KERN_INFO "BTRFS: valid FS not found on %s\n", sb->s_id); printk(KERN_ERR "BTRFS: valid FS not found on %s\n", sb->s_id);
goto fail_sb_buffer; goto fail_sb_buffer;
} }
if (sectorsize != PAGE_SIZE) { if (sectorsize != PAGE_SIZE) {
printk(KERN_WARNING "BTRFS: Incompatible sector size(%lu) " printk(KERN_ERR "BTRFS: incompatible sector size (%lu) "
"found on %s\n", (unsigned long)sectorsize, sb->s_id); "found on %s\n", (unsigned long)sectorsize, sb->s_id);
goto fail_sb_buffer; goto fail_sb_buffer;
} }
@ -2642,7 +2652,7 @@ int open_ctree(struct super_block *sb,
ret = btrfs_read_sys_array(tree_root); ret = btrfs_read_sys_array(tree_root);
mutex_unlock(&fs_info->chunk_mutex); mutex_unlock(&fs_info->chunk_mutex);
if (ret) { if (ret) {
printk(KERN_WARNING "BTRFS: failed to read the system " printk(KERN_ERR "BTRFS: failed to read the system "
"array on %s\n", sb->s_id); "array on %s\n", sb->s_id);
goto fail_sb_buffer; goto fail_sb_buffer;
} }
@ -2657,7 +2667,7 @@ int open_ctree(struct super_block *sb,
generation); generation);
if (!chunk_root->node || if (!chunk_root->node ||
!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
printk(KERN_WARNING "BTRFS: failed to read chunk root on %s\n", printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n",
sb->s_id); sb->s_id);
goto fail_tree_roots; goto fail_tree_roots;
} }
@ -2669,7 +2679,7 @@ int open_ctree(struct super_block *sb,
ret = btrfs_read_chunk_tree(chunk_root); ret = btrfs_read_chunk_tree(chunk_root);
if (ret) { if (ret) {
printk(KERN_WARNING "BTRFS: failed to read chunk tree on %s\n", printk(KERN_ERR "BTRFS: failed to read chunk tree on %s\n",
sb->s_id); sb->s_id);
goto fail_tree_roots; goto fail_tree_roots;
} }
@ -2681,7 +2691,7 @@ int open_ctree(struct super_block *sb,
btrfs_close_extra_devices(fs_info, fs_devices, 0); btrfs_close_extra_devices(fs_info, fs_devices, 0);
if (!fs_devices->latest_bdev) { if (!fs_devices->latest_bdev) {
printk(KERN_CRIT "BTRFS: failed to read devices on %s\n", printk(KERN_ERR "BTRFS: failed to read devices on %s\n",
sb->s_id); sb->s_id);
goto fail_tree_roots; goto fail_tree_roots;
} }
@ -2765,7 +2775,7 @@ retry_root_backup:
ret = btrfs_recover_balance(fs_info); ret = btrfs_recover_balance(fs_info);
if (ret) { if (ret) {
printk(KERN_WARNING "BTRFS: failed to recover balance\n"); printk(KERN_ERR "BTRFS: failed to recover balance\n");
goto fail_block_groups; goto fail_block_groups;
} }
@ -3860,6 +3870,21 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n", printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n",
btrfs_super_log_root(sb)); btrfs_super_log_root(sb));
/*
* Check the lower bound, the alignment and other constraints are
* checked later.
*/
if (btrfs_super_nodesize(sb) < 4096) {
printk(KERN_ERR "BTRFS: nodesize too small: %u < 4096\n",
btrfs_super_nodesize(sb));
ret = -EINVAL;
}
if (btrfs_super_sectorsize(sb) < 4096) {
printk(KERN_ERR "BTRFS: sectorsize too small: %u < 4096\n",
btrfs_super_sectorsize(sb));
ret = -EINVAL;
}
if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) { if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) {
printk(KERN_ERR "BTRFS: dev_item UUID does not match fsid: %pU != %pU\n", printk(KERN_ERR "BTRFS: dev_item UUID does not match fsid: %pU != %pU\n",
fs_info->fsid, sb->dev_item.fsid); fs_info->fsid, sb->dev_item.fsid);
@ -3873,6 +3898,10 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
if (btrfs_super_num_devices(sb) > (1UL << 31)) if (btrfs_super_num_devices(sb) > (1UL << 31))
printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n", printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n",
btrfs_super_num_devices(sb)); btrfs_super_num_devices(sb));
if (btrfs_super_num_devices(sb) == 0) {
printk(KERN_ERR "BTRFS: number of devices is 0\n");
ret = -EINVAL;
}
if (btrfs_super_bytenr(sb) != BTRFS_SUPER_INFO_OFFSET) { if (btrfs_super_bytenr(sb) != BTRFS_SUPER_INFO_OFFSET) {
printk(KERN_ERR "BTRFS: super offset mismatch %llu != %u\n", printk(KERN_ERR "BTRFS: super offset mismatch %llu != %u\n",
@ -3880,6 +3909,25 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
ret = -EINVAL; ret = -EINVAL;
} }
/*
* Obvious sys_chunk_array corruptions, it must hold at least one key
* and one chunk
*/
if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
printk(KERN_ERR "BTRFS: system chunk array too big %u > %u\n",
btrfs_super_sys_array_size(sb),
BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
ret = -EINVAL;
}
if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
+ sizeof(struct btrfs_chunk)) {
printk(KERN_ERR "BTRFS: system chunk array too small %u < %lu\n",
btrfs_super_sys_array_size(sb),
sizeof(struct btrfs_disk_key)
+ sizeof(struct btrfs_chunk));
ret = -EINVAL;
}
/* /*
* The generation is a global counter, we'll trust it more than the others * The generation is a global counter, we'll trust it more than the others
* but it's still possible that it's the one that's wrong. * but it's still possible that it's the one that's wrong.

View File

@ -46,11 +46,11 @@ struct btrfs_fs_devices;
struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
u64 parent_transid); u64 parent_transid);
void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); void readahead_tree_block(struct btrfs_root *root, u64 bytenr);
int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr,
int mirror_num, struct extent_buffer **eb); int mirror_num, struct extent_buffer **eb);
struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
u64 bytenr, u32 blocksize); u64 bytenr);
void clean_tree_block(struct btrfs_trans_handle *trans, void clean_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *buf); struct btrfs_root *root, struct extent_buffer *buf);
int open_ctree(struct super_block *sb, int open_ctree(struct super_block *sb,

View File

@ -74,8 +74,9 @@ enum {
RESERVE_ALLOC_NO_ACCOUNT = 2, RESERVE_ALLOC_NO_ACCOUNT = 2,
}; };
static int update_block_group(struct btrfs_root *root, static int update_block_group(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, int alloc); struct btrfs_root *root, u64 bytenr,
u64 num_bytes, int alloc);
static int __btrfs_free_extent(struct btrfs_trans_handle *trans, static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent, u64 bytenr, u64 num_bytes, u64 parent,
@ -1925,7 +1926,7 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
*/ */
ret = 0; ret = 0;
} }
kfree(bbio); btrfs_put_bbio(bbio);
} }
if (actual_bytes) if (actual_bytes)
@ -2768,7 +2769,6 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head; struct btrfs_delayed_ref_head *head;
int ret; int ret;
int run_all = count == (unsigned long)-1; int run_all = count == (unsigned long)-1;
int run_most = 0;
/* We'll clean this up in btrfs_cleanup_transaction */ /* We'll clean this up in btrfs_cleanup_transaction */
if (trans->aborted) if (trans->aborted)
@ -2778,10 +2778,8 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
root = root->fs_info->tree_root; root = root->fs_info->tree_root;
delayed_refs = &trans->transaction->delayed_refs; delayed_refs = &trans->transaction->delayed_refs;
if (count == 0) { if (count == 0)
count = atomic_read(&delayed_refs->num_entries) * 2; count = atomic_read(&delayed_refs->num_entries) * 2;
run_most = 1;
}
again: again:
#ifdef SCRAMBLE_DELAYED_REFS #ifdef SCRAMBLE_DELAYED_REFS
@ -3315,120 +3313,42 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root) struct btrfs_root *root)
{ {
struct btrfs_block_group_cache *cache; struct btrfs_block_group_cache *cache;
int err = 0; struct btrfs_transaction *cur_trans = trans->transaction;
int ret = 0;
struct btrfs_path *path; struct btrfs_path *path;
u64 last = 0;
if (list_empty(&cur_trans->dirty_bgs))
return 0;
path = btrfs_alloc_path(); path = btrfs_alloc_path();
if (!path) if (!path)
return -ENOMEM; return -ENOMEM;
again: /*
while (1) { * We don't need the lock here since we are protected by the transaction
cache = btrfs_lookup_first_block_group(root->fs_info, last); * commit. We want to do the cache_save_setup first and then run the
while (cache) { * delayed refs to make sure we have the best chance at doing this all
if (cache->disk_cache_state == BTRFS_DC_CLEAR) * in one shot.
break; */
cache = next_block_group(root, cache); while (!list_empty(&cur_trans->dirty_bgs)) {
} cache = list_first_entry(&cur_trans->dirty_bgs,
if (!cache) { struct btrfs_block_group_cache,
if (last == 0) dirty_list);
break; list_del_init(&cache->dirty_list);
last = 0; if (cache->disk_cache_state == BTRFS_DC_CLEAR)
continue; cache_save_setup(cache, trans, path);
} if (!ret)
err = cache_save_setup(cache, trans, path); ret = btrfs_run_delayed_refs(trans, root,
last = cache->key.objectid + cache->key.offset; (unsigned long) -1);
if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP)
btrfs_write_out_cache(root, trans, cache, path);
if (!ret)
ret = write_one_cache_group(trans, root, path, cache);
btrfs_put_block_group(cache); btrfs_put_block_group(cache);
} }
while (1) {
if (last == 0) {
err = btrfs_run_delayed_refs(trans, root,
(unsigned long)-1);
if (err) /* File system offline */
goto out;
}
cache = btrfs_lookup_first_block_group(root->fs_info, last);
while (cache) {
if (cache->disk_cache_state == BTRFS_DC_CLEAR) {
btrfs_put_block_group(cache);
goto again;
}
if (cache->dirty)
break;
cache = next_block_group(root, cache);
}
if (!cache) {
if (last == 0)
break;
last = 0;
continue;
}
if (cache->disk_cache_state == BTRFS_DC_SETUP)
cache->disk_cache_state = BTRFS_DC_NEED_WRITE;
cache->dirty = 0;
last = cache->key.objectid + cache->key.offset;
err = write_one_cache_group(trans, root, path, cache);
btrfs_put_block_group(cache);
if (err) /* File system offline */
goto out;
}
while (1) {
/*
* I don't think this is needed since we're just marking our
* preallocated extent as written, but just in case it can't
* hurt.
*/
if (last == 0) {
err = btrfs_run_delayed_refs(trans, root,
(unsigned long)-1);
if (err) /* File system offline */
goto out;
}
cache = btrfs_lookup_first_block_group(root->fs_info, last);
while (cache) {
/*
* Really this shouldn't happen, but it could if we
* couldn't write the entire preallocated extent and
* splitting the extent resulted in a new block.
*/
if (cache->dirty) {
btrfs_put_block_group(cache);
goto again;
}
if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
break;
cache = next_block_group(root, cache);
}
if (!cache) {
if (last == 0)
break;
last = 0;
continue;
}
err = btrfs_write_out_cache(root, trans, cache, path);
/*
* If we didn't have an error then the cache state is still
* NEED_WRITE, so we can set it to WRITTEN.
*/
if (!err && cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
cache->disk_cache_state = BTRFS_DC_WRITTEN;
last = cache->key.objectid + cache->key.offset;
btrfs_put_block_group(cache);
}
out:
btrfs_free_path(path); btrfs_free_path(path);
return err; return ret;
} }
int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
@ -5043,19 +4963,25 @@ void btrfs_subvolume_release_metadata(struct btrfs_root *root,
/** /**
* drop_outstanding_extent - drop an outstanding extent * drop_outstanding_extent - drop an outstanding extent
* @inode: the inode we're dropping the extent for * @inode: the inode we're dropping the extent for
* @num_bytes: the number of bytes we're relaseing.
* *
* This is called when we are freeing up an outstanding extent, either called * This is called when we are freeing up an outstanding extent, either called
* after an error or after an extent is written. This will return the number of * after an error or after an extent is written. This will return the number of
* reserved extents that need to be freed. This must be called with * reserved extents that need to be freed. This must be called with
* BTRFS_I(inode)->lock held. * BTRFS_I(inode)->lock held.
*/ */
static unsigned drop_outstanding_extent(struct inode *inode) static unsigned drop_outstanding_extent(struct inode *inode, u64 num_bytes)
{ {
unsigned drop_inode_space = 0; unsigned drop_inode_space = 0;
unsigned dropped_extents = 0; unsigned dropped_extents = 0;
unsigned num_extents = 0;
BUG_ON(!BTRFS_I(inode)->outstanding_extents); num_extents = (unsigned)div64_u64(num_bytes +
BTRFS_I(inode)->outstanding_extents--; BTRFS_MAX_EXTENT_SIZE - 1,
BTRFS_MAX_EXTENT_SIZE);
ASSERT(num_extents);
ASSERT(BTRFS_I(inode)->outstanding_extents >= num_extents);
BTRFS_I(inode)->outstanding_extents -= num_extents;
if (BTRFS_I(inode)->outstanding_extents == 0 && if (BTRFS_I(inode)->outstanding_extents == 0 &&
test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED, test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
@ -5226,7 +5152,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
out_fail: out_fail:
spin_lock(&BTRFS_I(inode)->lock); spin_lock(&BTRFS_I(inode)->lock);
dropped = drop_outstanding_extent(inode); dropped = drop_outstanding_extent(inode, num_bytes);
/* /*
* If the inodes csum_bytes is the same as the original * If the inodes csum_bytes is the same as the original
* csum_bytes then we know we haven't raced with any free()ers * csum_bytes then we know we haven't raced with any free()ers
@ -5305,7 +5231,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
num_bytes = ALIGN(num_bytes, root->sectorsize); num_bytes = ALIGN(num_bytes, root->sectorsize);
spin_lock(&BTRFS_I(inode)->lock); spin_lock(&BTRFS_I(inode)->lock);
dropped = drop_outstanding_extent(inode); dropped = drop_outstanding_extent(inode, num_bytes);
if (num_bytes) if (num_bytes)
to_free = calc_csum_metadata_size(inode, num_bytes, 0); to_free = calc_csum_metadata_size(inode, num_bytes, 0);
@ -5375,8 +5301,9 @@ void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes)
btrfs_free_reserved_data_space(inode, num_bytes); btrfs_free_reserved_data_space(inode, num_bytes);
} }
static int update_block_group(struct btrfs_root *root, static int update_block_group(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, int alloc) struct btrfs_root *root, u64 bytenr,
u64 num_bytes, int alloc)
{ {
struct btrfs_block_group_cache *cache = NULL; struct btrfs_block_group_cache *cache = NULL;
struct btrfs_fs_info *info = root->fs_info; struct btrfs_fs_info *info = root->fs_info;
@ -5414,6 +5341,14 @@ static int update_block_group(struct btrfs_root *root,
if (!alloc && cache->cached == BTRFS_CACHE_NO) if (!alloc && cache->cached == BTRFS_CACHE_NO)
cache_block_group(cache, 1); cache_block_group(cache, 1);
spin_lock(&trans->transaction->dirty_bgs_lock);
if (list_empty(&cache->dirty_list)) {
list_add_tail(&cache->dirty_list,
&trans->transaction->dirty_bgs);
btrfs_get_block_group(cache);
}
spin_unlock(&trans->transaction->dirty_bgs_lock);
byte_in_group = bytenr - cache->key.objectid; byte_in_group = bytenr - cache->key.objectid;
WARN_ON(byte_in_group > cache->key.offset); WARN_ON(byte_in_group > cache->key.offset);
@ -5424,7 +5359,6 @@ static int update_block_group(struct btrfs_root *root,
cache->disk_cache_state < BTRFS_DC_CLEAR) cache->disk_cache_state < BTRFS_DC_CLEAR)
cache->disk_cache_state = BTRFS_DC_CLEAR; cache->disk_cache_state = BTRFS_DC_CLEAR;
cache->dirty = 1;
old_val = btrfs_block_group_used(&cache->item); old_val = btrfs_block_group_used(&cache->item);
num_bytes = min(total, cache->key.offset - byte_in_group); num_bytes = min(total, cache->key.offset - byte_in_group);
if (alloc) { if (alloc) {
@ -5807,10 +5741,13 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
unpin = &fs_info->freed_extents[0]; unpin = &fs_info->freed_extents[0];
while (1) { while (1) {
mutex_lock(&fs_info->unused_bg_unpin_mutex);
ret = find_first_extent_bit(unpin, 0, &start, &end, ret = find_first_extent_bit(unpin, 0, &start, &end,
EXTENT_DIRTY, NULL); EXTENT_DIRTY, NULL);
if (ret) if (ret) {
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
break; break;
}
if (btrfs_test_opt(root, DISCARD)) if (btrfs_test_opt(root, DISCARD))
ret = btrfs_discard_extent(root, start, ret = btrfs_discard_extent(root, start,
@ -5818,6 +5755,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
clear_extent_dirty(unpin, start, end, GFP_NOFS); clear_extent_dirty(unpin, start, end, GFP_NOFS);
unpin_extent_range(root, start, end, true); unpin_extent_range(root, start, end, true);
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
cond_resched(); cond_resched();
} }
@ -6103,7 +6041,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
} }
} }
ret = update_block_group(root, bytenr, num_bytes, 0); ret = update_block_group(trans, root, bytenr, num_bytes, 0);
if (ret) { if (ret) {
btrfs_abort_transaction(trans, extent_root, ret); btrfs_abort_transaction(trans, extent_root, ret);
goto out; goto out;
@ -6205,7 +6143,6 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
struct extent_buffer *buf, struct extent_buffer *buf,
u64 parent, int last_ref) u64 parent, int last_ref)
{ {
struct btrfs_block_group_cache *cache = NULL;
int pin = 1; int pin = 1;
int ret; int ret;
@ -6221,17 +6158,20 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
if (!last_ref) if (!last_ref)
return; return;
cache = btrfs_lookup_block_group(root->fs_info, buf->start);
if (btrfs_header_generation(buf) == trans->transid) { if (btrfs_header_generation(buf) == trans->transid) {
struct btrfs_block_group_cache *cache;
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
ret = check_ref_cleanup(trans, root, buf->start); ret = check_ref_cleanup(trans, root, buf->start);
if (!ret) if (!ret)
goto out; goto out;
} }
cache = btrfs_lookup_block_group(root->fs_info, buf->start);
if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
pin_down_extent(root, cache, buf->start, buf->len, 1); pin_down_extent(root, cache, buf->start, buf->len, 1);
btrfs_put_block_group(cache);
goto out; goto out;
} }
@ -6239,6 +6179,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
btrfs_add_free_space(cache, buf->start, buf->len); btrfs_add_free_space(cache, buf->start, buf->len);
btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0); btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0);
btrfs_put_block_group(cache);
trace_btrfs_reserved_extent_free(root, buf->start, buf->len); trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
pin = 0; pin = 0;
} }
@ -6253,7 +6194,6 @@ out:
* anymore. * anymore.
*/ */
clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags); clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
btrfs_put_block_group(cache);
} }
/* Can return -ENOMEM */ /* Can return -ENOMEM */
@ -7063,7 +7003,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
if (ret) if (ret)
return ret; return ret;
ret = update_block_group(root, ins->objectid, ins->offset, 1); ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
if (ret) { /* -ENOENT, logic error */ if (ret) { /* -ENOENT, logic error */
btrfs_err(fs_info, "update block group failed for %llu %llu", btrfs_err(fs_info, "update block group failed for %llu %llu",
ins->objectid, ins->offset); ins->objectid, ins->offset);
@ -7152,7 +7092,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
return ret; return ret;
} }
ret = update_block_group(root, ins->objectid, root->nodesize, 1); ret = update_block_group(trans, root, ins->objectid, root->nodesize,
1);
if (ret) { /* -ENOENT, logic error */ if (ret) { /* -ENOENT, logic error */
btrfs_err(fs_info, "update block group failed for %llu %llu", btrfs_err(fs_info, "update block group failed for %llu %llu",
ins->objectid, ins->offset); ins->objectid, ins->offset);
@ -7217,11 +7158,11 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
static struct extent_buffer * static struct extent_buffer *
btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
u64 bytenr, u32 blocksize, int level) u64 bytenr, int level)
{ {
struct extent_buffer *buf; struct extent_buffer *buf;
buf = btrfs_find_create_tree_block(root, bytenr, blocksize); buf = btrfs_find_create_tree_block(root, bytenr);
if (!buf) if (!buf)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
btrfs_set_header_generation(buf, trans->transid); btrfs_set_header_generation(buf, trans->transid);
@ -7340,7 +7281,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
if (btrfs_test_is_dummy_root(root)) { if (btrfs_test_is_dummy_root(root)) {
buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr, buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
blocksize, level); level);
if (!IS_ERR(buf)) if (!IS_ERR(buf))
root->alloc_bytenr += blocksize; root->alloc_bytenr += blocksize;
return buf; return buf;
@ -7357,8 +7298,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
return ERR_PTR(ret); return ERR_PTR(ret);
} }
buf = btrfs_init_new_buffer(trans, root, ins.objectid, buf = btrfs_init_new_buffer(trans, root, ins.objectid, level);
blocksize, level);
BUG_ON(IS_ERR(buf)); /* -ENOMEM */ BUG_ON(IS_ERR(buf)); /* -ENOMEM */
if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
@ -7487,7 +7427,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
continue; continue;
} }
reada: reada:
readahead_tree_block(root, bytenr, blocksize); readahead_tree_block(root, bytenr);
nread++; nread++;
} }
wc->reada_slot = slot; wc->reada_slot = slot;
@ -7828,7 +7768,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
next = btrfs_find_tree_block(root, bytenr); next = btrfs_find_tree_block(root, bytenr);
if (!next) { if (!next) {
next = btrfs_find_create_tree_block(root, bytenr, blocksize); next = btrfs_find_create_tree_block(root, bytenr);
if (!next) if (!next)
return -ENOMEM; return -ENOMEM;
btrfs_set_buffer_lockdep_class(root->root_key.objectid, next, btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
@ -8548,14 +8488,6 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
if (IS_ERR(trans)) if (IS_ERR(trans))
return PTR_ERR(trans); return PTR_ERR(trans);
alloc_flags = update_block_group_flags(root, cache->flags);
if (alloc_flags != cache->flags) {
ret = do_chunk_alloc(trans, root, alloc_flags,
CHUNK_ALLOC_FORCE);
if (ret < 0)
goto out;
}
ret = set_block_group_ro(cache, 0); ret = set_block_group_ro(cache, 0);
if (!ret) if (!ret)
goto out; goto out;
@ -8566,6 +8498,11 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
goto out; goto out;
ret = set_block_group_ro(cache, 0); ret = set_block_group_ro(cache, 0);
out: out:
if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
alloc_flags = update_block_group_flags(root, cache->flags);
check_system_chunk(trans, root, alloc_flags);
}
btrfs_end_transaction(trans, root); btrfs_end_transaction(trans, root);
return ret; return ret;
} }
@ -9005,6 +8942,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
INIT_LIST_HEAD(&cache->cluster_list); INIT_LIST_HEAD(&cache->cluster_list);
INIT_LIST_HEAD(&cache->bg_list); INIT_LIST_HEAD(&cache->bg_list);
INIT_LIST_HEAD(&cache->ro_list); INIT_LIST_HEAD(&cache->ro_list);
INIT_LIST_HEAD(&cache->dirty_list);
btrfs_init_free_space_ctl(cache); btrfs_init_free_space_ctl(cache);
atomic_set(&cache->trimming, 0); atomic_set(&cache->trimming, 0);
@ -9068,9 +9006,8 @@ int btrfs_read_block_groups(struct btrfs_root *root)
* b) Setting 'dirty flag' makes sure that we flush * b) Setting 'dirty flag' makes sure that we flush
* the new space cache info onto disk. * the new space cache info onto disk.
*/ */
cache->disk_cache_state = BTRFS_DC_CLEAR;
if (btrfs_test_opt(root, SPACE_CACHE)) if (btrfs_test_opt(root, SPACE_CACHE))
cache->dirty = 1; cache->disk_cache_state = BTRFS_DC_CLEAR;
} }
read_extent_buffer(leaf, &cache->item, read_extent_buffer(leaf, &cache->item,
@ -9460,6 +9397,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
} }
} }
spin_lock(&trans->transaction->dirty_bgs_lock);
if (!list_empty(&block_group->dirty_list)) {
list_del_init(&block_group->dirty_list);
btrfs_put_block_group(block_group);
}
spin_unlock(&trans->transaction->dirty_bgs_lock);
btrfs_remove_free_space_cache(block_group); btrfs_remove_free_space_cache(block_group);
spin_lock(&block_group->space_info->lock); spin_lock(&block_group->space_info->lock);
@ -9611,7 +9555,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
* Want to do this before we do anything else so we can recover * Want to do this before we do anything else so we can recover
* properly if we fail to join the transaction. * properly if we fail to join the transaction.
*/ */
trans = btrfs_join_transaction(root); /* 1 for btrfs_orphan_reserve_metadata() */
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) { if (IS_ERR(trans)) {
btrfs_set_block_group_rw(root, block_group); btrfs_set_block_group_rw(root, block_group);
ret = PTR_ERR(trans); ret = PTR_ERR(trans);
@ -9624,18 +9569,33 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
*/ */
start = block_group->key.objectid; start = block_group->key.objectid;
end = start + block_group->key.offset - 1; end = start + block_group->key.offset - 1;
/*
* Hold the unused_bg_unpin_mutex lock to avoid racing with
* btrfs_finish_extent_commit(). If we are at transaction N,
* another task might be running finish_extent_commit() for the
* previous transaction N - 1, and have seen a range belonging
* to the block group in freed_extents[] before we were able to
* clear the whole block group range from freed_extents[]. This
* means that task can lookup for the block group after we
* unpinned it from freed_extents[] and removed it, leading to
* a BUG_ON() at btrfs_unpin_extent_range().
*/
mutex_lock(&fs_info->unused_bg_unpin_mutex);
ret = clear_extent_bits(&fs_info->freed_extents[0], start, end, ret = clear_extent_bits(&fs_info->freed_extents[0], start, end,
EXTENT_DIRTY, GFP_NOFS); EXTENT_DIRTY, GFP_NOFS);
if (ret) { if (ret) {
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
btrfs_set_block_group_rw(root, block_group); btrfs_set_block_group_rw(root, block_group);
goto end_trans; goto end_trans;
} }
ret = clear_extent_bits(&fs_info->freed_extents[1], start, end, ret = clear_extent_bits(&fs_info->freed_extents[1], start, end,
EXTENT_DIRTY, GFP_NOFS); EXTENT_DIRTY, GFP_NOFS);
if (ret) { if (ret) {
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
btrfs_set_block_group_rw(root, block_group); btrfs_set_block_group_rw(root, block_group);
goto end_trans; goto end_trans;
} }
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
/* Reset pinned so btrfs_put_block_group doesn't complain */ /* Reset pinned so btrfs_put_block_group doesn't complain */
block_group->pinned = 0; block_group->pinned = 0;

View File

@ -64,7 +64,7 @@ void btrfs_leak_debug_check(void)
while (!list_empty(&states)) { while (!list_empty(&states)) {
state = list_entry(states.next, struct extent_state, leak_list); state = list_entry(states.next, struct extent_state, leak_list);
pr_err("BTRFS: state leak: start %llu end %llu state %lu in tree %d refs %d\n", pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
state->start, state->end, state->state, state->start, state->end, state->state,
extent_state_in_tree(state), extent_state_in_tree(state),
atomic_read(&state->refs)); atomic_read(&state->refs));
@ -396,21 +396,21 @@ static void merge_state(struct extent_io_tree *tree,
} }
static void set_state_cb(struct extent_io_tree *tree, static void set_state_cb(struct extent_io_tree *tree,
struct extent_state *state, unsigned long *bits) struct extent_state *state, unsigned *bits)
{ {
if (tree->ops && tree->ops->set_bit_hook) if (tree->ops && tree->ops->set_bit_hook)
tree->ops->set_bit_hook(tree->mapping->host, state, bits); tree->ops->set_bit_hook(tree->mapping->host, state, bits);
} }
static void clear_state_cb(struct extent_io_tree *tree, static void clear_state_cb(struct extent_io_tree *tree,
struct extent_state *state, unsigned long *bits) struct extent_state *state, unsigned *bits)
{ {
if (tree->ops && tree->ops->clear_bit_hook) if (tree->ops && tree->ops->clear_bit_hook)
tree->ops->clear_bit_hook(tree->mapping->host, state, bits); tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
} }
static void set_state_bits(struct extent_io_tree *tree, static void set_state_bits(struct extent_io_tree *tree,
struct extent_state *state, unsigned long *bits); struct extent_state *state, unsigned *bits);
/* /*
* insert an extent_state struct into the tree. 'bits' are set on the * insert an extent_state struct into the tree. 'bits' are set on the
@ -426,7 +426,7 @@ static int insert_state(struct extent_io_tree *tree,
struct extent_state *state, u64 start, u64 end, struct extent_state *state, u64 start, u64 end,
struct rb_node ***p, struct rb_node ***p,
struct rb_node **parent, struct rb_node **parent,
unsigned long *bits) unsigned *bits)
{ {
struct rb_node *node; struct rb_node *node;
@ -511,10 +511,10 @@ static struct extent_state *next_state(struct extent_state *state)
*/ */
static struct extent_state *clear_state_bit(struct extent_io_tree *tree, static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
struct extent_state *state, struct extent_state *state,
unsigned long *bits, int wake) unsigned *bits, int wake)
{ {
struct extent_state *next; struct extent_state *next;
unsigned long bits_to_clear = *bits & ~EXTENT_CTLBITS; unsigned bits_to_clear = *bits & ~EXTENT_CTLBITS;
if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
u64 range = state->end - state->start + 1; u64 range = state->end - state->start + 1;
@ -570,7 +570,7 @@ static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
* This takes the tree lock, and returns 0 on success and < 0 on error. * This takes the tree lock, and returns 0 on success and < 0 on error.
*/ */
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, int wake, int delete, unsigned bits, int wake, int delete,
struct extent_state **cached_state, struct extent_state **cached_state,
gfp_t mask) gfp_t mask)
{ {
@ -789,9 +789,9 @@ out:
static void set_state_bits(struct extent_io_tree *tree, static void set_state_bits(struct extent_io_tree *tree,
struct extent_state *state, struct extent_state *state,
unsigned long *bits) unsigned *bits)
{ {
unsigned long bits_to_set = *bits & ~EXTENT_CTLBITS; unsigned bits_to_set = *bits & ~EXTENT_CTLBITS;
set_state_cb(tree, state, bits); set_state_cb(tree, state, bits);
if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
@ -803,7 +803,7 @@ static void set_state_bits(struct extent_io_tree *tree,
static void cache_state_if_flags(struct extent_state *state, static void cache_state_if_flags(struct extent_state *state,
struct extent_state **cached_ptr, struct extent_state **cached_ptr,
const u64 flags) unsigned flags)
{ {
if (cached_ptr && !(*cached_ptr)) { if (cached_ptr && !(*cached_ptr)) {
if (!flags || (state->state & flags)) { if (!flags || (state->state & flags)) {
@ -833,7 +833,7 @@ static void cache_state(struct extent_state *state,
static int __must_check static int __must_check
__set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, unsigned long exclusive_bits, unsigned bits, unsigned exclusive_bits,
u64 *failed_start, struct extent_state **cached_state, u64 *failed_start, struct extent_state **cached_state,
gfp_t mask) gfp_t mask)
{ {
@ -1034,7 +1034,7 @@ search_again:
} }
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, u64 * failed_start, unsigned bits, u64 * failed_start,
struct extent_state **cached_state, gfp_t mask) struct extent_state **cached_state, gfp_t mask)
{ {
return __set_extent_bit(tree, start, end, bits, 0, failed_start, return __set_extent_bit(tree, start, end, bits, 0, failed_start,
@ -1060,7 +1060,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
* boundary bits like LOCK. * boundary bits like LOCK.
*/ */
int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, unsigned long clear_bits, unsigned bits, unsigned clear_bits,
struct extent_state **cached_state, gfp_t mask) struct extent_state **cached_state, gfp_t mask)
{ {
struct extent_state *state; struct extent_state *state;
@ -1268,14 +1268,14 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
} }
int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, gfp_t mask) unsigned bits, gfp_t mask)
{ {
return set_extent_bit(tree, start, end, bits, NULL, return set_extent_bit(tree, start, end, bits, NULL,
NULL, mask); NULL, mask);
} }
int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, gfp_t mask) unsigned bits, gfp_t mask)
{ {
return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask); return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask);
} }
@ -1330,10 +1330,11 @@ int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
* us if waiting is desired. * us if waiting is desired.
*/ */
int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, struct extent_state **cached_state) unsigned bits, struct extent_state **cached_state)
{ {
int err; int err;
u64 failed_start; u64 failed_start;
while (1) { while (1) {
err = __set_extent_bit(tree, start, end, EXTENT_LOCKED | bits, err = __set_extent_bit(tree, start, end, EXTENT_LOCKED | bits,
EXTENT_LOCKED, &failed_start, EXTENT_LOCKED, &failed_start,
@ -1440,7 +1441,7 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
*/ */
static struct extent_state * static struct extent_state *
find_first_extent_bit_state(struct extent_io_tree *tree, find_first_extent_bit_state(struct extent_io_tree *tree,
u64 start, unsigned long bits) u64 start, unsigned bits)
{ {
struct rb_node *node; struct rb_node *node;
struct extent_state *state; struct extent_state *state;
@ -1474,7 +1475,7 @@ out:
* If nothing was found, 1 is returned. If found something, return 0. * If nothing was found, 1 is returned. If found something, return 0.
*/ */
int find_first_extent_bit(struct extent_io_tree *tree, u64 start, int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, unsigned long bits, u64 *start_ret, u64 *end_ret, unsigned bits,
struct extent_state **cached_state) struct extent_state **cached_state)
{ {
struct extent_state *state; struct extent_state *state;
@ -1753,7 +1754,7 @@ out_failed:
int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
struct page *locked_page, struct page *locked_page,
unsigned long clear_bits, unsigned clear_bits,
unsigned long page_ops) unsigned long page_ops)
{ {
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
@ -1810,7 +1811,7 @@ int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
*/ */
u64 count_range_bits(struct extent_io_tree *tree, u64 count_range_bits(struct extent_io_tree *tree,
u64 *start, u64 search_end, u64 max_bytes, u64 *start, u64 search_end, u64 max_bytes,
unsigned long bits, int contig) unsigned bits, int contig)
{ {
struct rb_node *node; struct rb_node *node;
struct extent_state *state; struct extent_state *state;
@ -1928,7 +1929,7 @@ out:
* range is found set. * range is found set.
*/ */
int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, int filled, struct extent_state *cached) unsigned bits, int filled, struct extent_state *cached)
{ {
struct extent_state *state = NULL; struct extent_state *state = NULL;
struct rb_node *node; struct rb_node *node;
@ -2057,7 +2058,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
sector = bbio->stripes[mirror_num-1].physical >> 9; sector = bbio->stripes[mirror_num-1].physical >> 9;
bio->bi_iter.bi_sector = sector; bio->bi_iter.bi_sector = sector;
dev = bbio->stripes[mirror_num-1].dev; dev = bbio->stripes[mirror_num-1].dev;
kfree(bbio); btrfs_put_bbio(bbio);
if (!dev || !dev->bdev || !dev->writeable) { if (!dev || !dev->bdev || !dev->writeable) {
bio_put(bio); bio_put(bio);
return -EIO; return -EIO;
@ -2816,8 +2817,10 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
bio_add_page(bio, page, page_size, offset) < page_size) { bio_add_page(bio, page, page_size, offset) < page_size) {
ret = submit_one_bio(rw, bio, mirror_num, ret = submit_one_bio(rw, bio, mirror_num,
prev_bio_flags); prev_bio_flags);
if (ret < 0) if (ret < 0) {
*bio_ret = NULL;
return ret; return ret;
}
bio = NULL; bio = NULL;
} else { } else {
return 0; return 0;
@ -3239,7 +3242,7 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,
page, page,
&delalloc_start, &delalloc_start,
&delalloc_end, &delalloc_end,
128 * 1024 * 1024); BTRFS_MAX_EXTENT_SIZE);
if (nr_delalloc == 0) { if (nr_delalloc == 0) {
delalloc_start = delalloc_end + 1; delalloc_start = delalloc_end + 1;
continue; continue;
@ -4598,11 +4601,11 @@ static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
static struct extent_buffer * static struct extent_buffer *
__alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
unsigned long len, gfp_t mask) unsigned long len)
{ {
struct extent_buffer *eb = NULL; struct extent_buffer *eb = NULL;
eb = kmem_cache_zalloc(extent_buffer_cache, mask); eb = kmem_cache_zalloc(extent_buffer_cache, GFP_NOFS);
if (eb == NULL) if (eb == NULL)
return NULL; return NULL;
eb->start = start; eb->start = start;
@ -4643,7 +4646,7 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
struct extent_buffer *new; struct extent_buffer *new;
unsigned long num_pages = num_extent_pages(src->start, src->len); unsigned long num_pages = num_extent_pages(src->start, src->len);
new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_NOFS); new = __alloc_extent_buffer(src->fs_info, src->start, src->len);
if (new == NULL) if (new == NULL)
return NULL; return NULL;
@ -4666,13 +4669,26 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
return new; return new;
} }
struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len) struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start)
{ {
struct extent_buffer *eb; struct extent_buffer *eb;
unsigned long num_pages = num_extent_pages(0, len); unsigned long len;
unsigned long num_pages;
unsigned long i; unsigned long i;
eb = __alloc_extent_buffer(NULL, start, len, GFP_NOFS); if (!fs_info) {
/*
* Called only from tests that don't always have a fs_info
* available, but we know that nodesize is 4096
*/
len = 4096;
} else {
len = fs_info->tree_root->nodesize;
}
num_pages = num_extent_pages(0, len);
eb = __alloc_extent_buffer(fs_info, start, len);
if (!eb) if (!eb)
return NULL; return NULL;
@ -4762,7 +4778,7 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start, unsigned long len) u64 start)
{ {
struct extent_buffer *eb, *exists = NULL; struct extent_buffer *eb, *exists = NULL;
int ret; int ret;
@ -4770,7 +4786,7 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
eb = find_extent_buffer(fs_info, start); eb = find_extent_buffer(fs_info, start);
if (eb) if (eb)
return eb; return eb;
eb = alloc_dummy_extent_buffer(start, len); eb = alloc_dummy_extent_buffer(fs_info, start);
if (!eb) if (!eb)
return NULL; return NULL;
eb->fs_info = fs_info; eb->fs_info = fs_info;
@ -4808,8 +4824,9 @@ free_eb:
#endif #endif
struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start, unsigned long len) u64 start)
{ {
unsigned long len = fs_info->tree_root->nodesize;
unsigned long num_pages = num_extent_pages(start, len); unsigned long num_pages = num_extent_pages(start, len);
unsigned long i; unsigned long i;
unsigned long index = start >> PAGE_CACHE_SHIFT; unsigned long index = start >> PAGE_CACHE_SHIFT;
@ -4824,7 +4841,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
if (eb) if (eb)
return eb; return eb;
eb = __alloc_extent_buffer(fs_info, start, len, GFP_NOFS); eb = __alloc_extent_buffer(fs_info, start, len);
if (!eb) if (!eb)
return NULL; return NULL;

View File

@ -4,22 +4,22 @@
#include <linux/rbtree.h> #include <linux/rbtree.h>
/* bits for the extent state */ /* bits for the extent state */
#define EXTENT_DIRTY 1 #define EXTENT_DIRTY (1U << 0)
#define EXTENT_WRITEBACK (1 << 1) #define EXTENT_WRITEBACK (1U << 1)
#define EXTENT_UPTODATE (1 << 2) #define EXTENT_UPTODATE (1U << 2)
#define EXTENT_LOCKED (1 << 3) #define EXTENT_LOCKED (1U << 3)
#define EXTENT_NEW (1 << 4) #define EXTENT_NEW (1U << 4)
#define EXTENT_DELALLOC (1 << 5) #define EXTENT_DELALLOC (1U << 5)
#define EXTENT_DEFRAG (1 << 6) #define EXTENT_DEFRAG (1U << 6)
#define EXTENT_BOUNDARY (1 << 9) #define EXTENT_BOUNDARY (1U << 9)
#define EXTENT_NODATASUM (1 << 10) #define EXTENT_NODATASUM (1U << 10)
#define EXTENT_DO_ACCOUNTING (1 << 11) #define EXTENT_DO_ACCOUNTING (1U << 11)
#define EXTENT_FIRST_DELALLOC (1 << 12) #define EXTENT_FIRST_DELALLOC (1U << 12)
#define EXTENT_NEED_WAIT (1 << 13) #define EXTENT_NEED_WAIT (1U << 13)
#define EXTENT_DAMAGED (1 << 14) #define EXTENT_DAMAGED (1U << 14)
#define EXTENT_NORESERVE (1 << 15) #define EXTENT_NORESERVE (1U << 15)
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
/* /*
* flags for bio submission. The high bits indicate the compression * flags for bio submission. The high bits indicate the compression
@ -81,9 +81,9 @@ struct extent_io_ops {
int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
struct extent_state *state, int uptodate); struct extent_state *state, int uptodate);
void (*set_bit_hook)(struct inode *inode, struct extent_state *state, void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
unsigned long *bits); unsigned *bits);
void (*clear_bit_hook)(struct inode *inode, struct extent_state *state, void (*clear_bit_hook)(struct inode *inode, struct extent_state *state,
unsigned long *bits); unsigned *bits);
void (*merge_extent_hook)(struct inode *inode, void (*merge_extent_hook)(struct inode *inode,
struct extent_state *new, struct extent_state *new,
struct extent_state *other); struct extent_state *other);
@ -108,7 +108,7 @@ struct extent_state {
/* ADD NEW ELEMENTS AFTER THIS */ /* ADD NEW ELEMENTS AFTER THIS */
wait_queue_head_t wq; wait_queue_head_t wq;
atomic_t refs; atomic_t refs;
unsigned long state; unsigned state;
/* for use by the FS */ /* for use by the FS */
u64 private; u64 private;
@ -188,7 +188,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
int try_release_extent_buffer(struct page *page); int try_release_extent_buffer(struct page *page);
int lock_extent(struct extent_io_tree *tree, u64 start, u64 end); int lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, struct extent_state **cached); unsigned bits, struct extent_state **cached);
int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end); int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end);
int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached, gfp_t mask); struct extent_state **cached, gfp_t mask);
@ -202,21 +202,21 @@ void extent_io_exit(void);
u64 count_range_bits(struct extent_io_tree *tree, u64 count_range_bits(struct extent_io_tree *tree,
u64 *start, u64 search_end, u64 *start, u64 search_end,
u64 max_bytes, unsigned long bits, int contig); u64 max_bytes, unsigned bits, int contig);
void free_extent_state(struct extent_state *state); void free_extent_state(struct extent_state *state);
int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, int filled, unsigned bits, int filled,
struct extent_state *cached_state); struct extent_state *cached_state);
int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, gfp_t mask); unsigned bits, gfp_t mask);
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, int wake, int delete, unsigned bits, int wake, int delete,
struct extent_state **cached, gfp_t mask); struct extent_state **cached, gfp_t mask);
int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, gfp_t mask); unsigned bits, gfp_t mask);
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, u64 *failed_start, unsigned bits, u64 *failed_start,
struct extent_state **cached_state, gfp_t mask); struct extent_state **cached_state, gfp_t mask);
int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached_state, gfp_t mask); struct extent_state **cached_state, gfp_t mask);
@ -229,14 +229,14 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask); gfp_t mask);
int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, unsigned long clear_bits, unsigned bits, unsigned clear_bits,
struct extent_state **cached_state, gfp_t mask); struct extent_state **cached_state, gfp_t mask);
int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached_state, gfp_t mask); struct extent_state **cached_state, gfp_t mask);
int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end, int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached_state, gfp_t mask); struct extent_state **cached_state, gfp_t mask);
int find_first_extent_bit(struct extent_io_tree *tree, u64 start, int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, unsigned long bits, u64 *start_ret, u64 *end_ret, unsigned bits,
struct extent_state **cached_state); struct extent_state **cached_state);
int extent_invalidatepage(struct extent_io_tree *tree, int extent_invalidatepage(struct extent_io_tree *tree,
struct page *page, unsigned long offset); struct page *page, unsigned long offset);
@ -262,8 +262,9 @@ int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
void set_page_extent_mapped(struct page *page); void set_page_extent_mapped(struct page *page);
struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start, unsigned long len); u64 start);
struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len); struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start);
struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src); struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src);
struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start); u64 start);
@ -322,7 +323,7 @@ int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end); int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
struct page *locked_page, struct page *locked_page,
unsigned long bits_to_clear, unsigned bits_to_clear,
unsigned long page_ops); unsigned long page_ops);
struct bio * struct bio *
btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
@ -377,5 +378,5 @@ noinline u64 find_lock_delalloc_range(struct inode *inode,
u64 *end, u64 max_bytes); u64 *end, u64 max_bytes);
#endif #endif
struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start, unsigned long len); u64 start);
#endif #endif

View File

@ -651,15 +651,13 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
struct io_ctl io_ctl; struct io_ctl io_ctl;
struct btrfs_key key; struct btrfs_key key;
struct btrfs_free_space *e, *n; struct btrfs_free_space *e, *n;
struct list_head bitmaps; LIST_HEAD(bitmaps);
u64 num_entries; u64 num_entries;
u64 num_bitmaps; u64 num_bitmaps;
u64 generation; u64 generation;
u8 type; u8 type;
int ret = 0; int ret = 0;
INIT_LIST_HEAD(&bitmaps);
/* Nothing in the space cache, goodbye */ /* Nothing in the space cache, goodbye */
if (!i_size_read(inode)) if (!i_size_read(inode))
return 0; return 0;
@ -1243,6 +1241,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct inode *inode; struct inode *inode;
int ret = 0; int ret = 0;
enum btrfs_disk_cache_state dcs = BTRFS_DC_WRITTEN;
root = root->fs_info->tree_root; root = root->fs_info->tree_root;
@ -1266,9 +1265,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans, ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
path, block_group->key.objectid); path, block_group->key.objectid);
if (ret) { if (ret) {
spin_lock(&block_group->lock); dcs = BTRFS_DC_ERROR;
block_group->disk_cache_state = BTRFS_DC_ERROR;
spin_unlock(&block_group->lock);
ret = 0; ret = 0;
#ifdef DEBUG #ifdef DEBUG
btrfs_err(root->fs_info, btrfs_err(root->fs_info,
@ -1277,6 +1274,9 @@ int btrfs_write_out_cache(struct btrfs_root *root,
#endif #endif
} }
spin_lock(&block_group->lock);
block_group->disk_cache_state = dcs;
spin_unlock(&block_group->lock);
iput(inode); iput(inode);
return ret; return ret;
} }
@ -2903,7 +2903,6 @@ int btrfs_find_space_cluster(struct btrfs_root *root,
trace_btrfs_find_cluster(block_group, offset, bytes, empty_size, trace_btrfs_find_cluster(block_group, offset, bytes, empty_size,
min_bytes); min_bytes);
INIT_LIST_HEAD(&bitmaps);
ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
bytes + empty_size, bytes + empty_size,
cont1_bytes, min_bytes); cont1_bytes, min_bytes);

View File

@ -344,6 +344,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
return -ENOMEM; return -ENOMEM;
path->leave_spinning = 1; path->leave_spinning = 1;
path->skip_release_on_error = 1;
ret = btrfs_insert_empty_item(trans, root, path, &key, ret = btrfs_insert_empty_item(trans, root, path, &key,
ins_len); ins_len);
if (ret == -EEXIST) { if (ret == -EEXIST) {
@ -362,8 +363,12 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
ptr = (unsigned long)(ref + 1); ptr = (unsigned long)(ref + 1);
ret = 0; ret = 0;
} else if (ret < 0) { } else if (ret < 0) {
if (ret == -EOVERFLOW) if (ret == -EOVERFLOW) {
ret = -EMLINK; if (find_name_in_backref(path, name, name_len, &ref))
ret = -EEXIST;
else
ret = -EMLINK;
}
goto out; goto out;
} else { } else {
ref = btrfs_item_ptr(path->nodes[0], path->slots[0], ref = btrfs_item_ptr(path->nodes[0], path->slots[0],

View File

@ -1530,10 +1530,45 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
static void btrfs_split_extent_hook(struct inode *inode, static void btrfs_split_extent_hook(struct inode *inode,
struct extent_state *orig, u64 split) struct extent_state *orig, u64 split)
{ {
u64 size;
/* not delalloc, ignore it */ /* not delalloc, ignore it */
if (!(orig->state & EXTENT_DELALLOC)) if (!(orig->state & EXTENT_DELALLOC))
return; return;
size = orig->end - orig->start + 1;
if (size > BTRFS_MAX_EXTENT_SIZE) {
u64 num_extents;
u64 new_size;
/*
* We need the largest size of the remaining extent to see if we
* need to add a new outstanding extent. Think of the following
* case
*
* [MEAX_EXTENT_SIZEx2 - 4k][4k]
*
* The new_size would just be 4k and we'd think we had enough
* outstanding extents for this if we only took one side of the
* split, same goes for the other direction. We need to see if
* the larger size still is the same amount of extents as the
* original size, because if it is we need to add a new
* outstanding extent. But if we split up and the larger size
* is less than the original then we are good to go since we've
* already accounted for the extra extent in our original
* accounting.
*/
new_size = orig->end - split + 1;
if ((split - orig->start) > new_size)
new_size = split - orig->start;
num_extents = div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
BTRFS_MAX_EXTENT_SIZE);
if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
BTRFS_MAX_EXTENT_SIZE) < num_extents)
return;
}
spin_lock(&BTRFS_I(inode)->lock); spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++; BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock); spin_unlock(&BTRFS_I(inode)->lock);
@ -1549,10 +1584,34 @@ static void btrfs_merge_extent_hook(struct inode *inode,
struct extent_state *new, struct extent_state *new,
struct extent_state *other) struct extent_state *other)
{ {
u64 new_size, old_size;
u64 num_extents;
/* not delalloc, ignore it */ /* not delalloc, ignore it */
if (!(other->state & EXTENT_DELALLOC)) if (!(other->state & EXTENT_DELALLOC))
return; return;
old_size = other->end - other->start + 1;
new_size = old_size + (new->end - new->start + 1);
/* we're not bigger than the max, unreserve the space and go */
if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents--;
spin_unlock(&BTRFS_I(inode)->lock);
return;
}
/*
* If we grew by another max_extent, just return, we want to keep that
* reserved amount.
*/
num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
BTRFS_MAX_EXTENT_SIZE);
if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
BTRFS_MAX_EXTENT_SIZE) > num_extents)
return;
spin_lock(&BTRFS_I(inode)->lock); spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents--; BTRFS_I(inode)->outstanding_extents--;
spin_unlock(&BTRFS_I(inode)->lock); spin_unlock(&BTRFS_I(inode)->lock);
@ -1604,7 +1663,7 @@ static void btrfs_del_delalloc_inode(struct btrfs_root *root,
* have pending delalloc work to be done. * have pending delalloc work to be done.
*/ */
static void btrfs_set_bit_hook(struct inode *inode, static void btrfs_set_bit_hook(struct inode *inode,
struct extent_state *state, unsigned long *bits) struct extent_state *state, unsigned *bits)
{ {
if ((*bits & EXTENT_DEFRAG) && !(*bits & EXTENT_DELALLOC)) if ((*bits & EXTENT_DEFRAG) && !(*bits & EXTENT_DELALLOC))
@ -1645,9 +1704,11 @@ static void btrfs_set_bit_hook(struct inode *inode,
*/ */
static void btrfs_clear_bit_hook(struct inode *inode, static void btrfs_clear_bit_hook(struct inode *inode,
struct extent_state *state, struct extent_state *state,
unsigned long *bits) unsigned *bits)
{ {
u64 len = state->end + 1 - state->start; u64 len = state->end + 1 - state->start;
u64 num_extents = div64_u64(len + BTRFS_MAX_EXTENT_SIZE -1,
BTRFS_MAX_EXTENT_SIZE);
spin_lock(&BTRFS_I(inode)->lock); spin_lock(&BTRFS_I(inode)->lock);
if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG)) if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG))
@ -1667,7 +1728,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
*bits &= ~EXTENT_FIRST_DELALLOC; *bits &= ~EXTENT_FIRST_DELALLOC;
} else if (!(*bits & EXTENT_DO_ACCOUNTING)) { } else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
spin_lock(&BTRFS_I(inode)->lock); spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents--; BTRFS_I(inode)->outstanding_extents -= num_extents;
spin_unlock(&BTRFS_I(inode)->lock); spin_unlock(&BTRFS_I(inode)->lock);
} }
@ -2945,7 +3006,7 @@ static int __readpage_endio_check(struct inode *inode,
return 0; return 0;
zeroit: zeroit:
if (__ratelimit(&_rs)) if (__ratelimit(&_rs))
btrfs_info(BTRFS_I(inode)->root->fs_info, btrfs_warn(BTRFS_I(inode)->root->fs_info,
"csum failed ino %llu off %llu csum %u expected csum %u", "csum failed ino %llu off %llu csum %u expected csum %u",
btrfs_ino(inode), start, csum, csum_expected); btrfs_ino(inode), start, csum, csum_expected);
memset(kaddr + pgoff, 1, len); memset(kaddr + pgoff, 1, len);
@ -3407,7 +3468,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
out: out:
if (ret) if (ret)
btrfs_crit(root->fs_info, btrfs_err(root->fs_info,
"could not do orphan cleanup %d", ret); "could not do orphan cleanup %d", ret);
btrfs_free_path(path); btrfs_free_path(path);
return ret; return ret;
@ -3490,7 +3551,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
struct btrfs_path *path; struct btrfs_path *path;
struct extent_buffer *leaf; struct extent_buffer *leaf;
struct btrfs_inode_item *inode_item; struct btrfs_inode_item *inode_item;
struct btrfs_timespec *tspec;
struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_key location; struct btrfs_key location;
unsigned long ptr; unsigned long ptr;
@ -3527,17 +3587,19 @@ static void btrfs_read_locked_inode(struct inode *inode)
i_gid_write(inode, btrfs_inode_gid(leaf, inode_item)); i_gid_write(inode, btrfs_inode_gid(leaf, inode_item));
btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item)); btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
tspec = btrfs_inode_atime(inode_item); inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime);
inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec); inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->atime);
inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
tspec = btrfs_inode_mtime(inode_item); inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->mtime);
inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec); inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->mtime);
inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
tspec = btrfs_inode_ctime(inode_item); inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->ctime);
inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec); inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->ctime);
inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
BTRFS_I(inode)->i_otime.tv_sec =
btrfs_timespec_sec(leaf, &inode_item->otime);
BTRFS_I(inode)->i_otime.tv_nsec =
btrfs_timespec_nsec(leaf, &inode_item->otime);
inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item)); inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item));
BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
@ -3656,21 +3718,26 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token); btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token); btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item), btrfs_set_token_timespec_sec(leaf, &item->atime,
inode->i_atime.tv_sec, &token); inode->i_atime.tv_sec, &token);
btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item), btrfs_set_token_timespec_nsec(leaf, &item->atime,
inode->i_atime.tv_nsec, &token); inode->i_atime.tv_nsec, &token);
btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item), btrfs_set_token_timespec_sec(leaf, &item->mtime,
inode->i_mtime.tv_sec, &token); inode->i_mtime.tv_sec, &token);
btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item), btrfs_set_token_timespec_nsec(leaf, &item->mtime,
inode->i_mtime.tv_nsec, &token); inode->i_mtime.tv_nsec, &token);
btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item), btrfs_set_token_timespec_sec(leaf, &item->ctime,
inode->i_ctime.tv_sec, &token); inode->i_ctime.tv_sec, &token);
btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item), btrfs_set_token_timespec_nsec(leaf, &item->ctime,
inode->i_ctime.tv_nsec, &token); inode->i_ctime.tv_nsec, &token);
btrfs_set_token_timespec_sec(leaf, &item->otime,
BTRFS_I(inode)->i_otime.tv_sec, &token);
btrfs_set_token_timespec_nsec(leaf, &item->otime,
BTRFS_I(inode)->i_otime.tv_nsec, &token);
btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode), btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
&token); &token);
btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation, btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation,
@ -5007,6 +5074,7 @@ static int fixup_tree_root_location(struct btrfs_root *root,
struct btrfs_root *new_root; struct btrfs_root *new_root;
struct btrfs_root_ref *ref; struct btrfs_root_ref *ref;
struct extent_buffer *leaf; struct extent_buffer *leaf;
struct btrfs_key key;
int ret; int ret;
int err = 0; int err = 0;
@ -5017,9 +5085,12 @@ static int fixup_tree_root_location(struct btrfs_root *root,
} }
err = -ENOENT; err = -ENOENT;
ret = btrfs_find_item(root->fs_info->tree_root, path, key.objectid = BTRFS_I(dir)->root->root_key.objectid;
BTRFS_I(dir)->root->root_key.objectid, key.type = BTRFS_ROOT_REF_KEY;
location->objectid, BTRFS_ROOT_REF_KEY, NULL); key.offset = location->objectid;
ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, path,
0, 0);
if (ret) { if (ret) {
if (ret < 0) if (ret < 0)
err = ret; err = ret;
@ -5258,7 +5329,10 @@ static struct inode *new_simple_dir(struct super_block *s,
inode->i_op = &btrfs_dir_ro_inode_operations; inode->i_op = &btrfs_dir_ro_inode_operations;
inode->i_fop = &simple_dir_operations; inode->i_fop = &simple_dir_operations;
inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_mtime = CURRENT_TIME;
inode->i_atime = inode->i_mtime;
inode->i_ctime = inode->i_mtime;
BTRFS_I(inode)->i_otime = inode->i_mtime;
return inode; return inode;
} }
@ -5826,7 +5900,12 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
inode_init_owner(inode, dir, mode); inode_init_owner(inode, dir, mode);
inode_set_bytes(inode, 0); inode_set_bytes(inode, 0);
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
inode->i_mtime = CURRENT_TIME;
inode->i_atime = inode->i_mtime;
inode->i_ctime = inode->i_mtime;
BTRFS_I(inode)->i_otime = inode->i_mtime;
inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item); struct btrfs_inode_item);
memset_extent_buffer(path->nodes[0], 0, (unsigned long)inode_item, memset_extent_buffer(path->nodes[0], 0, (unsigned long)inode_item,
@ -7134,11 +7213,12 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
u64 start = iblock << inode->i_blkbits; u64 start = iblock << inode->i_blkbits;
u64 lockstart, lockend; u64 lockstart, lockend;
u64 len = bh_result->b_size; u64 len = bh_result->b_size;
u64 orig_len = len;
int unlock_bits = EXTENT_LOCKED; int unlock_bits = EXTENT_LOCKED;
int ret = 0; int ret = 0;
if (create) if (create)
unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY; unlock_bits |= EXTENT_DIRTY;
else else
len = min_t(u64, len, root->sectorsize); len = min_t(u64, len, root->sectorsize);
@ -7269,14 +7349,12 @@ unlock:
if (start + len > i_size_read(inode)) if (start + len > i_size_read(inode))
i_size_write(inode, start + len); i_size_write(inode, start + len);
spin_lock(&BTRFS_I(inode)->lock); if (len < orig_len) {
BTRFS_I(inode)->outstanding_extents++; spin_lock(&BTRFS_I(inode)->lock);
spin_unlock(&BTRFS_I(inode)->lock); BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, }
lockstart + len - 1, EXTENT_DELALLOC, NULL, btrfs_free_reserved_data_space(inode, len);
&cached_state, GFP_NOFS);
BUG_ON(ret);
} }
/* /*
@ -7805,8 +7883,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
} }
/* async crcs make it difficult to collect full stripe writes. */ /* async crcs make it difficult to collect full stripe writes. */
if (btrfs_get_alloc_profile(root, 1) & if (btrfs_get_alloc_profile(root, 1) & BTRFS_BLOCK_GROUP_RAID56_MASK)
(BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6))
async_submit = 0; async_submit = 0;
else else
async_submit = 1; async_submit = 1;
@ -8053,8 +8130,6 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
else if (ret >= 0 && (size_t)ret < count) else if (ret >= 0 && (size_t)ret < count)
btrfs_delalloc_release_space(inode, btrfs_delalloc_release_space(inode,
count - (size_t)ret); count - (size_t)ret);
else
btrfs_delalloc_release_metadata(inode, 0);
} }
out: out:
if (wakeup) if (wakeup)
@ -8575,6 +8650,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->delayed_node = NULL; ei->delayed_node = NULL;
ei->i_otime.tv_sec = 0;
ei->i_otime.tv_nsec = 0;
inode = &ei->vfs_inode; inode = &ei->vfs_inode;
extent_map_tree_init(&ei->extent_tree); extent_map_tree_init(&ei->extent_tree);
extent_io_tree_init(&ei->io_tree, &inode->i_data); extent_io_tree_init(&ei->io_tree, &inode->i_data);

View File

@ -1431,9 +1431,8 @@ static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
qgroup = u64_to_ptr(unode->aux); qgroup = u64_to_ptr(unode->aux);
qgroup->rfer += sign * oper->num_bytes; qgroup->rfer += sign * oper->num_bytes;
qgroup->rfer_cmpr += sign * oper->num_bytes; qgroup->rfer_cmpr += sign * oper->num_bytes;
WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes);
qgroup->excl += sign * oper->num_bytes; qgroup->excl += sign * oper->num_bytes;
if (sign < 0)
WARN_ON(qgroup->excl < oper->num_bytes);
qgroup->excl_cmpr += sign * oper->num_bytes; qgroup->excl_cmpr += sign * oper->num_bytes;
qgroup_dirty(fs_info, qgroup); qgroup_dirty(fs_info, qgroup);

View File

@ -58,15 +58,6 @@
*/ */
#define RBIO_CACHE_READY_BIT 3 #define RBIO_CACHE_READY_BIT 3
/*
* bbio and raid_map is managed by the caller, so we shouldn't free
* them here. And besides that, all rbios with this flag should not
* be cached, because we need raid_map to check the rbios' stripe
* is the same or not, but it is very likely that the caller has
* free raid_map, so don't cache those rbios.
*/
#define RBIO_HOLD_BBIO_MAP_BIT 4
#define RBIO_CACHE_SIZE 1024 #define RBIO_CACHE_SIZE 1024
enum btrfs_rbio_ops { enum btrfs_rbio_ops {
@ -79,13 +70,6 @@ struct btrfs_raid_bio {
struct btrfs_fs_info *fs_info; struct btrfs_fs_info *fs_info;
struct btrfs_bio *bbio; struct btrfs_bio *bbio;
/*
* logical block numbers for the start of each stripe
* The last one or two are p/q. These are sorted,
* so raid_map[0] is the start of our full stripe
*/
u64 *raid_map;
/* while we're doing rmw on a stripe /* while we're doing rmw on a stripe
* we put it into a hash table so we can * we put it into a hash table so we can
* lock the stripe and merge more rbios * lock the stripe and merge more rbios
@ -303,7 +287,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
*/ */
static int rbio_bucket(struct btrfs_raid_bio *rbio) static int rbio_bucket(struct btrfs_raid_bio *rbio)
{ {
u64 num = rbio->raid_map[0]; u64 num = rbio->bbio->raid_map[0];
/* /*
* we shift down quite a bit. We're using byte * we shift down quite a bit. We're using byte
@ -606,8 +590,8 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
test_bit(RBIO_CACHE_BIT, &cur->flags)) test_bit(RBIO_CACHE_BIT, &cur->flags))
return 0; return 0;
if (last->raid_map[0] != if (last->bbio->raid_map[0] !=
cur->raid_map[0]) cur->bbio->raid_map[0])
return 0; return 0;
/* we can't merge with different operations */ /* we can't merge with different operations */
@ -689,7 +673,7 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
spin_lock_irqsave(&h->lock, flags); spin_lock_irqsave(&h->lock, flags);
list_for_each_entry(cur, &h->hash_list, hash_list) { list_for_each_entry(cur, &h->hash_list, hash_list) {
walk++; walk++;
if (cur->raid_map[0] == rbio->raid_map[0]) { if (cur->bbio->raid_map[0] == rbio->bbio->raid_map[0]) {
spin_lock(&cur->bio_list_lock); spin_lock(&cur->bio_list_lock);
/* can we steal this cached rbio's pages? */ /* can we steal this cached rbio's pages? */
@ -841,21 +825,6 @@ done_nolock:
remove_rbio_from_cache(rbio); remove_rbio_from_cache(rbio);
} }
static inline void
__free_bbio_and_raid_map(struct btrfs_bio *bbio, u64 *raid_map, int need)
{
if (need) {
kfree(raid_map);
kfree(bbio);
}
}
static inline void free_bbio_and_raid_map(struct btrfs_raid_bio *rbio)
{
__free_bbio_and_raid_map(rbio->bbio, rbio->raid_map,
!test_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags));
}
static void __free_raid_bio(struct btrfs_raid_bio *rbio) static void __free_raid_bio(struct btrfs_raid_bio *rbio)
{ {
int i; int i;
@ -875,8 +844,7 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio)
} }
} }
free_bbio_and_raid_map(rbio); btrfs_put_bbio(rbio->bbio);
kfree(rbio); kfree(rbio);
} }
@ -985,8 +953,7 @@ static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes)
* this does not allocate any pages for rbio->pages. * this does not allocate any pages for rbio->pages.
*/ */
static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root, static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
struct btrfs_bio *bbio, u64 *raid_map, struct btrfs_bio *bbio, u64 stripe_len)
u64 stripe_len)
{ {
struct btrfs_raid_bio *rbio; struct btrfs_raid_bio *rbio;
int nr_data = 0; int nr_data = 0;
@ -1007,7 +974,6 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
INIT_LIST_HEAD(&rbio->stripe_cache); INIT_LIST_HEAD(&rbio->stripe_cache);
INIT_LIST_HEAD(&rbio->hash_list); INIT_LIST_HEAD(&rbio->hash_list);
rbio->bbio = bbio; rbio->bbio = bbio;
rbio->raid_map = raid_map;
rbio->fs_info = root->fs_info; rbio->fs_info = root->fs_info;
rbio->stripe_len = stripe_len; rbio->stripe_len = stripe_len;
rbio->nr_pages = num_pages; rbio->nr_pages = num_pages;
@ -1028,10 +994,12 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
rbio->bio_pages = p + sizeof(struct page *) * num_pages; rbio->bio_pages = p + sizeof(struct page *) * num_pages;
rbio->dbitmap = p + sizeof(struct page *) * num_pages * 2; rbio->dbitmap = p + sizeof(struct page *) * num_pages * 2;
if (raid_map[real_stripes - 1] == RAID6_Q_STRIPE) if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
nr_data = real_stripes - 1;
else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
nr_data = real_stripes - 2; nr_data = real_stripes - 2;
else else
nr_data = real_stripes - 1; BUG();
rbio->nr_data = nr_data; rbio->nr_data = nr_data;
return rbio; return rbio;
@ -1182,7 +1150,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio)
spin_lock_irq(&rbio->bio_list_lock); spin_lock_irq(&rbio->bio_list_lock);
bio_list_for_each(bio, &rbio->bio_list) { bio_list_for_each(bio, &rbio->bio_list) {
start = (u64)bio->bi_iter.bi_sector << 9; start = (u64)bio->bi_iter.bi_sector << 9;
stripe_offset = start - rbio->raid_map[0]; stripe_offset = start - rbio->bbio->raid_map[0];
page_index = stripe_offset >> PAGE_CACHE_SHIFT; page_index = stripe_offset >> PAGE_CACHE_SHIFT;
for (i = 0; i < bio->bi_vcnt; i++) { for (i = 0; i < bio->bi_vcnt; i++) {
@ -1402,7 +1370,7 @@ static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio,
logical <<= 9; logical <<= 9;
for (i = 0; i < rbio->nr_data; i++) { for (i = 0; i < rbio->nr_data; i++) {
stripe_start = rbio->raid_map[i]; stripe_start = rbio->bbio->raid_map[i];
if (logical >= stripe_start && if (logical >= stripe_start &&
logical < stripe_start + rbio->stripe_len) { logical < stripe_start + rbio->stripe_len) {
return i; return i;
@ -1776,17 +1744,16 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
* our main entry point for writes from the rest of the FS. * our main entry point for writes from the rest of the FS.
*/ */
int raid56_parity_write(struct btrfs_root *root, struct bio *bio, int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
struct btrfs_bio *bbio, u64 *raid_map, struct btrfs_bio *bbio, u64 stripe_len)
u64 stripe_len)
{ {
struct btrfs_raid_bio *rbio; struct btrfs_raid_bio *rbio;
struct btrfs_plug_cb *plug = NULL; struct btrfs_plug_cb *plug = NULL;
struct blk_plug_cb *cb; struct blk_plug_cb *cb;
int ret; int ret;
rbio = alloc_rbio(root, bbio, raid_map, stripe_len); rbio = alloc_rbio(root, bbio, stripe_len);
if (IS_ERR(rbio)) { if (IS_ERR(rbio)) {
__free_bbio_and_raid_map(bbio, raid_map, 1); btrfs_put_bbio(bbio);
return PTR_ERR(rbio); return PTR_ERR(rbio);
} }
bio_list_add(&rbio->bio_list, bio); bio_list_add(&rbio->bio_list, bio);
@ -1885,9 +1852,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
} }
/* all raid6 handling here */ /* all raid6 handling here */
if (rbio->raid_map[rbio->real_stripes - 1] == if (rbio->bbio->map_type & BTRFS_BLOCK_GROUP_RAID6) {
RAID6_Q_STRIPE) {
/* /*
* single failure, rebuild from parity raid5 * single failure, rebuild from parity raid5
* style * style
@ -1922,8 +1887,9 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
* here due to a crc mismatch and we can't give them the * here due to a crc mismatch and we can't give them the
* data they want * data they want
*/ */
if (rbio->raid_map[failb] == RAID6_Q_STRIPE) { if (rbio->bbio->raid_map[failb] == RAID6_Q_STRIPE) {
if (rbio->raid_map[faila] == RAID5_P_STRIPE) { if (rbio->bbio->raid_map[faila] ==
RAID5_P_STRIPE) {
err = -EIO; err = -EIO;
goto cleanup; goto cleanup;
} }
@ -1934,7 +1900,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
goto pstripe; goto pstripe;
} }
if (rbio->raid_map[failb] == RAID5_P_STRIPE) { if (rbio->bbio->raid_map[failb] == RAID5_P_STRIPE) {
raid6_datap_recov(rbio->real_stripes, raid6_datap_recov(rbio->real_stripes,
PAGE_SIZE, faila, pointers); PAGE_SIZE, faila, pointers);
} else { } else {
@ -2001,8 +1967,7 @@ cleanup:
cleanup_io: cleanup_io:
if (rbio->operation == BTRFS_RBIO_READ_REBUILD) { if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
if (err == 0 && if (err == 0)
!test_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags))
cache_rbio_pages(rbio); cache_rbio_pages(rbio);
else else
clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
@ -2156,15 +2121,16 @@ cleanup:
* of the drive. * of the drive.
*/ */
int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
struct btrfs_bio *bbio, u64 *raid_map, struct btrfs_bio *bbio, u64 stripe_len,
u64 stripe_len, int mirror_num, int generic_io) int mirror_num, int generic_io)
{ {
struct btrfs_raid_bio *rbio; struct btrfs_raid_bio *rbio;
int ret; int ret;
rbio = alloc_rbio(root, bbio, raid_map, stripe_len); rbio = alloc_rbio(root, bbio, stripe_len);
if (IS_ERR(rbio)) { if (IS_ERR(rbio)) {
__free_bbio_and_raid_map(bbio, raid_map, generic_io); if (generic_io)
btrfs_put_bbio(bbio);
return PTR_ERR(rbio); return PTR_ERR(rbio);
} }
@ -2175,7 +2141,8 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
rbio->faila = find_logical_bio_stripe(rbio, bio); rbio->faila = find_logical_bio_stripe(rbio, bio);
if (rbio->faila == -1) { if (rbio->faila == -1) {
BUG(); BUG();
__free_bbio_and_raid_map(bbio, raid_map, generic_io); if (generic_io)
btrfs_put_bbio(bbio);
kfree(rbio); kfree(rbio);
return -EIO; return -EIO;
} }
@ -2184,7 +2151,7 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
btrfs_bio_counter_inc_noblocked(root->fs_info); btrfs_bio_counter_inc_noblocked(root->fs_info);
rbio->generic_bio_cnt = 1; rbio->generic_bio_cnt = 1;
} else { } else {
set_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags); btrfs_get_bbio(bbio);
} }
/* /*
@ -2240,14 +2207,14 @@ static void read_rebuild_work(struct btrfs_work *work)
struct btrfs_raid_bio * struct btrfs_raid_bio *
raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio, raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio,
struct btrfs_bio *bbio, u64 *raid_map, struct btrfs_bio *bbio, u64 stripe_len,
u64 stripe_len, struct btrfs_device *scrub_dev, struct btrfs_device *scrub_dev,
unsigned long *dbitmap, int stripe_nsectors) unsigned long *dbitmap, int stripe_nsectors)
{ {
struct btrfs_raid_bio *rbio; struct btrfs_raid_bio *rbio;
int i; int i;
rbio = alloc_rbio(root, bbio, raid_map, stripe_len); rbio = alloc_rbio(root, bbio, stripe_len);
if (IS_ERR(rbio)) if (IS_ERR(rbio))
return NULL; return NULL;
bio_list_add(&rbio->bio_list, bio); bio_list_add(&rbio->bio_list, bio);
@ -2279,10 +2246,10 @@ void raid56_parity_add_scrub_pages(struct btrfs_raid_bio *rbio,
int stripe_offset; int stripe_offset;
int index; int index;
ASSERT(logical >= rbio->raid_map[0]); ASSERT(logical >= rbio->bbio->raid_map[0]);
ASSERT(logical + PAGE_SIZE <= rbio->raid_map[0] + ASSERT(logical + PAGE_SIZE <= rbio->bbio->raid_map[0] +
rbio->stripe_len * rbio->nr_data); rbio->stripe_len * rbio->nr_data);
stripe_offset = (int)(logical - rbio->raid_map[0]); stripe_offset = (int)(logical - rbio->bbio->raid_map[0]);
index = stripe_offset >> PAGE_CACHE_SHIFT; index = stripe_offset >> PAGE_CACHE_SHIFT;
rbio->bio_pages[index] = page; rbio->bio_pages[index] = page;
} }

View File

@ -43,16 +43,15 @@ struct btrfs_raid_bio;
struct btrfs_device; struct btrfs_device;
int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
struct btrfs_bio *bbio, u64 *raid_map, struct btrfs_bio *bbio, u64 stripe_len,
u64 stripe_len, int mirror_num, int generic_io); int mirror_num, int generic_io);
int raid56_parity_write(struct btrfs_root *root, struct bio *bio, int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
struct btrfs_bio *bbio, u64 *raid_map, struct btrfs_bio *bbio, u64 stripe_len);
u64 stripe_len);
struct btrfs_raid_bio * struct btrfs_raid_bio *
raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio, raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio,
struct btrfs_bio *bbio, u64 *raid_map, struct btrfs_bio *bbio, u64 stripe_len,
u64 stripe_len, struct btrfs_device *scrub_dev, struct btrfs_device *scrub_dev,
unsigned long *dbitmap, int stripe_nsectors); unsigned long *dbitmap, int stripe_nsectors);
void raid56_parity_add_scrub_pages(struct btrfs_raid_bio *rbio, void raid56_parity_add_scrub_pages(struct btrfs_raid_bio *rbio,
struct page *page, u64 logical); struct page *page, u64 logical);

View File

@ -66,7 +66,6 @@ struct reada_extctl {
struct reada_extent { struct reada_extent {
u64 logical; u64 logical;
struct btrfs_key top; struct btrfs_key top;
u32 blocksize;
int err; int err;
struct list_head extctl; struct list_head extctl;
int refcnt; int refcnt;
@ -349,7 +348,6 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
blocksize = root->nodesize; blocksize = root->nodesize;
re->logical = logical; re->logical = logical;
re->blocksize = blocksize;
re->top = *top; re->top = *top;
INIT_LIST_HEAD(&re->extctl); INIT_LIST_HEAD(&re->extctl);
spin_lock_init(&re->lock); spin_lock_init(&re->lock);
@ -463,7 +461,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
spin_unlock(&fs_info->reada_lock); spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace); btrfs_dev_replace_unlock(&fs_info->dev_replace);
kfree(bbio); btrfs_put_bbio(bbio);
return re; return re;
error: error:
@ -488,7 +486,7 @@ error:
kref_put(&zone->refcnt, reada_zone_release); kref_put(&zone->refcnt, reada_zone_release);
spin_unlock(&fs_info->reada_lock); spin_unlock(&fs_info->reada_lock);
} }
kfree(bbio); btrfs_put_bbio(bbio);
kfree(re); kfree(re);
return re_exist; return re_exist;
} }
@ -660,7 +658,6 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
int mirror_num = 0; int mirror_num = 0;
struct extent_buffer *eb = NULL; struct extent_buffer *eb = NULL;
u64 logical; u64 logical;
u32 blocksize;
int ret; int ret;
int i; int i;
int need_kick = 0; int need_kick = 0;
@ -694,7 +691,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
spin_unlock(&fs_info->reada_lock); spin_unlock(&fs_info->reada_lock);
return 0; return 0;
} }
dev->reada_next = re->logical + re->blocksize; dev->reada_next = re->logical + fs_info->tree_root->nodesize;
re->refcnt++; re->refcnt++;
spin_unlock(&fs_info->reada_lock); spin_unlock(&fs_info->reada_lock);
@ -709,7 +706,6 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
} }
} }
logical = re->logical; logical = re->logical;
blocksize = re->blocksize;
spin_lock(&re->lock); spin_lock(&re->lock);
if (re->scheduled_for == NULL) { if (re->scheduled_for == NULL) {
@ -724,8 +720,8 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
return 0; return 0;
atomic_inc(&dev->reada_in_flight); atomic_inc(&dev->reada_in_flight);
ret = reada_tree_block_flagged(fs_info->extent_root, logical, blocksize, ret = reada_tree_block_flagged(fs_info->extent_root, logical,
mirror_num, &eb); mirror_num, &eb);
if (ret) if (ret)
__readahead_hook(fs_info->extent_root, NULL, logical, ret); __readahead_hook(fs_info->extent_root, NULL, logical, ret);
else if (eb) else if (eb)
@ -851,7 +847,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
break; break;
printk(KERN_DEBUG printk(KERN_DEBUG
" re: logical %llu size %u empty %d for %lld", " re: logical %llu size %u empty %d for %lld",
re->logical, re->blocksize, re->logical, fs_info->tree_root->nodesize,
list_empty(&re->extctl), re->scheduled_for ? list_empty(&re->extctl), re->scheduled_for ?
re->scheduled_for->devid : -1); re->scheduled_for->devid : -1);
@ -886,7 +882,8 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
} }
printk(KERN_DEBUG printk(KERN_DEBUG
"re: logical %llu size %u list empty %d for %lld", "re: logical %llu size %u list empty %d for %lld",
re->logical, re->blocksize, list_empty(&re->extctl), re->logical, fs_info->tree_root->nodesize,
list_empty(&re->extctl),
re->scheduled_for ? re->scheduled_for->devid : -1); re->scheduled_for ? re->scheduled_for->devid : -1);
for (i = 0; i < re->nzones; ++i) { for (i = 0; i < re->nzones; ++i) {
printk(KERN_CONT " zone %llu-%llu devs", printk(KERN_CONT " zone %llu-%llu devs",

View File

@ -2855,9 +2855,10 @@ static void update_processed_blocks(struct reloc_control *rc,
} }
} }
static int tree_block_processed(u64 bytenr, u32 blocksize, static int tree_block_processed(u64 bytenr, struct reloc_control *rc)
struct reloc_control *rc)
{ {
u32 blocksize = rc->extent_root->nodesize;
if (test_range_bit(&rc->processed_blocks, bytenr, if (test_range_bit(&rc->processed_blocks, bytenr,
bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL)) bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL))
return 1; return 1;
@ -2965,8 +2966,7 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
while (rb_node) { while (rb_node) {
block = rb_entry(rb_node, struct tree_block, rb_node); block = rb_entry(rb_node, struct tree_block, rb_node);
if (!block->key_ready) if (!block->key_ready)
readahead_tree_block(rc->extent_root, block->bytenr, readahead_tree_block(rc->extent_root, block->bytenr);
block->key.objectid);
rb_node = rb_next(rb_node); rb_node = rb_next(rb_node);
} }
@ -3353,7 +3353,7 @@ static int __add_tree_block(struct reloc_control *rc,
bool skinny = btrfs_fs_incompat(rc->extent_root->fs_info, bool skinny = btrfs_fs_incompat(rc->extent_root->fs_info,
SKINNY_METADATA); SKINNY_METADATA);
if (tree_block_processed(bytenr, blocksize, rc)) if (tree_block_processed(bytenr, rc))
return 0; return 0;
if (tree_search(blocks, bytenr)) if (tree_search(blocks, bytenr))
@ -3611,7 +3611,7 @@ static int find_data_references(struct reloc_control *rc,
if (added) if (added)
goto next; goto next;
if (!tree_block_processed(leaf->start, leaf->len, rc)) { if (!tree_block_processed(leaf->start, rc)) {
block = kmalloc(sizeof(*block), GFP_NOFS); block = kmalloc(sizeof(*block), GFP_NOFS);
if (!block) { if (!block) {
err = -ENOMEM; err = -ENOMEM;

View File

@ -66,7 +66,6 @@ struct scrub_ctx;
struct scrub_recover { struct scrub_recover {
atomic_t refs; atomic_t refs;
struct btrfs_bio *bbio; struct btrfs_bio *bbio;
u64 *raid_map;
u64 map_length; u64 map_length;
}; };
@ -80,7 +79,7 @@ struct scrub_page {
u64 logical; u64 logical;
u64 physical; u64 physical;
u64 physical_for_dev_replace; u64 physical_for_dev_replace;
atomic_t ref_count; atomic_t refs;
struct { struct {
unsigned int mirror_num:8; unsigned int mirror_num:8;
unsigned int have_csum:1; unsigned int have_csum:1;
@ -113,7 +112,7 @@ struct scrub_block {
struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK]; struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK];
int page_count; int page_count;
atomic_t outstanding_pages; atomic_t outstanding_pages;
atomic_t ref_count; /* free mem on transition to zero */ atomic_t refs; /* free mem on transition to zero */
struct scrub_ctx *sctx; struct scrub_ctx *sctx;
struct scrub_parity *sparity; struct scrub_parity *sparity;
struct { struct {
@ -142,7 +141,7 @@ struct scrub_parity {
int stripe_len; int stripe_len;
atomic_t ref_count; atomic_t refs;
struct list_head spages; struct list_head spages;
@ -194,6 +193,15 @@ struct scrub_ctx {
*/ */
struct btrfs_scrub_progress stat; struct btrfs_scrub_progress stat;
spinlock_t stat_lock; spinlock_t stat_lock;
/*
* Use a ref counter to avoid use-after-free issues. Scrub workers
* decrement bios_in_flight and workers_pending and then do a wakeup
* on the list_wait wait queue. We must ensure the main scrub task
* doesn't free the scrub context before or while the workers are
* doing the wakeup() call.
*/
atomic_t refs;
}; };
struct scrub_fixup_nodatasum { struct scrub_fixup_nodatasum {
@ -236,10 +244,7 @@ static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx); static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx);
static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx); static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx);
static int scrub_handle_errored_block(struct scrub_block *sblock_to_check); static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
static int scrub_setup_recheck_block(struct scrub_ctx *sctx, static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
struct btrfs_fs_info *fs_info,
struct scrub_block *original_sblock,
u64 length, u64 logical,
struct scrub_block *sblocks_for_recheck); struct scrub_block *sblocks_for_recheck);
static void scrub_recheck_block(struct btrfs_fs_info *fs_info, static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
struct scrub_block *sblock, int is_metadata, struct scrub_block *sblock, int is_metadata,
@ -251,8 +256,7 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
const u8 *csum, u64 generation, const u8 *csum, u64 generation,
u16 csum_size); u16 csum_size);
static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
struct scrub_block *sblock_good, struct scrub_block *sblock_good);
int force_write);
static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
struct scrub_block *sblock_good, struct scrub_block *sblock_good,
int page_num, int force_write); int page_num, int force_write);
@ -302,10 +306,12 @@ static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
static void copy_nocow_pages_worker(struct btrfs_work *work); static void copy_nocow_pages_worker(struct btrfs_work *work);
static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
static void scrub_put_ctx(struct scrub_ctx *sctx);
static void scrub_pending_bio_inc(struct scrub_ctx *sctx) static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
{ {
atomic_inc(&sctx->refs);
atomic_inc(&sctx->bios_in_flight); atomic_inc(&sctx->bios_in_flight);
} }
@ -313,6 +319,7 @@ static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
{ {
atomic_dec(&sctx->bios_in_flight); atomic_dec(&sctx->bios_in_flight);
wake_up(&sctx->list_wait); wake_up(&sctx->list_wait);
scrub_put_ctx(sctx);
} }
static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info) static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
@ -346,6 +353,7 @@ static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx)
{ {
struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
atomic_inc(&sctx->refs);
/* /*
* increment scrubs_running to prevent cancel requests from * increment scrubs_running to prevent cancel requests from
* completing as long as a worker is running. we must also * completing as long as a worker is running. we must also
@ -388,6 +396,7 @@ static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx)
atomic_dec(&sctx->workers_pending); atomic_dec(&sctx->workers_pending);
wake_up(&fs_info->scrub_pause_wait); wake_up(&fs_info->scrub_pause_wait);
wake_up(&sctx->list_wait); wake_up(&sctx->list_wait);
scrub_put_ctx(sctx);
} }
static void scrub_free_csums(struct scrub_ctx *sctx) static void scrub_free_csums(struct scrub_ctx *sctx)
@ -433,6 +442,12 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
kfree(sctx); kfree(sctx);
} }
static void scrub_put_ctx(struct scrub_ctx *sctx)
{
if (atomic_dec_and_test(&sctx->refs))
scrub_free_ctx(sctx);
}
static noinline_for_stack static noinline_for_stack
struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
{ {
@ -457,6 +472,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
sctx = kzalloc(sizeof(*sctx), GFP_NOFS); sctx = kzalloc(sizeof(*sctx), GFP_NOFS);
if (!sctx) if (!sctx)
goto nomem; goto nomem;
atomic_set(&sctx->refs, 1);
sctx->is_dev_replace = is_dev_replace; sctx->is_dev_replace = is_dev_replace;
sctx->pages_per_rd_bio = pages_per_rd_bio; sctx->pages_per_rd_bio = pages_per_rd_bio;
sctx->curr = -1; sctx->curr = -1;
@ -520,6 +536,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
struct inode_fs_paths *ipath = NULL; struct inode_fs_paths *ipath = NULL;
struct btrfs_root *local_root; struct btrfs_root *local_root;
struct btrfs_key root_key; struct btrfs_key root_key;
struct btrfs_key key;
root_key.objectid = root; root_key.objectid = root;
root_key.type = BTRFS_ROOT_ITEM_KEY; root_key.type = BTRFS_ROOT_ITEM_KEY;
@ -530,7 +547,14 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
goto err; goto err;
} }
ret = inode_item_info(inum, 0, local_root, swarn->path); /*
* this makes the path point to (inum INODE_ITEM ioff)
*/
key.objectid = inum;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, local_root, &key, swarn->path, 0, 0);
if (ret) { if (ret) {
btrfs_release_path(swarn->path); btrfs_release_path(swarn->path);
goto err; goto err;
@ -848,8 +872,7 @@ static inline void scrub_get_recover(struct scrub_recover *recover)
static inline void scrub_put_recover(struct scrub_recover *recover) static inline void scrub_put_recover(struct scrub_recover *recover)
{ {
if (atomic_dec_and_test(&recover->refs)) { if (atomic_dec_and_test(&recover->refs)) {
kfree(recover->bbio); btrfs_put_bbio(recover->bbio);
kfree(recover->raid_map);
kfree(recover); kfree(recover);
} }
} }
@ -955,8 +978,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
} }
/* setup the context, map the logical blocks and alloc the pages */ /* setup the context, map the logical blocks and alloc the pages */
ret = scrub_setup_recheck_block(sctx, fs_info, sblock_to_check, length, ret = scrub_setup_recheck_block(sblock_to_check, sblocks_for_recheck);
logical, sblocks_for_recheck);
if (ret) { if (ret) {
spin_lock(&sctx->stat_lock); spin_lock(&sctx->stat_lock);
sctx->stat.read_errors++; sctx->stat.read_errors++;
@ -1030,9 +1052,10 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
if (!is_metadata && !have_csum) { if (!is_metadata && !have_csum) {
struct scrub_fixup_nodatasum *fixup_nodatasum; struct scrub_fixup_nodatasum *fixup_nodatasum;
nodatasum_case:
WARN_ON(sctx->is_dev_replace); WARN_ON(sctx->is_dev_replace);
nodatasum_case:
/* /*
* !is_metadata and !have_csum, this means that the data * !is_metadata and !have_csum, this means that the data
* might not be COW'ed, that it might be modified * might not be COW'ed, that it might be modified
@ -1091,76 +1114,20 @@ nodatasum_case:
sblock_other->no_io_error_seen) { sblock_other->no_io_error_seen) {
if (sctx->is_dev_replace) { if (sctx->is_dev_replace) {
scrub_write_block_to_dev_replace(sblock_other); scrub_write_block_to_dev_replace(sblock_other);
} else {
int force_write = is_metadata || have_csum;
ret = scrub_repair_block_from_good_copy(
sblock_bad, sblock_other,
force_write);
}
if (0 == ret)
goto corrected_error; goto corrected_error;
} else {
ret = scrub_repair_block_from_good_copy(
sblock_bad, sblock_other);
if (!ret)
goto corrected_error;
}
} }
} }
/* if (sblock_bad->no_io_error_seen && !sctx->is_dev_replace)
* for dev_replace, pick good pages and write to the target device. goto did_not_correct_error;
*/
if (sctx->is_dev_replace) {
success = 1;
for (page_num = 0; page_num < sblock_bad->page_count;
page_num++) {
int sub_success;
sub_success = 0;
for (mirror_index = 0;
mirror_index < BTRFS_MAX_MIRRORS &&
sblocks_for_recheck[mirror_index].page_count > 0;
mirror_index++) {
struct scrub_block *sblock_other =
sblocks_for_recheck + mirror_index;
struct scrub_page *page_other =
sblock_other->pagev[page_num];
if (!page_other->io_error) {
ret = scrub_write_page_to_dev_replace(
sblock_other, page_num);
if (ret == 0) {
/* succeeded for this page */
sub_success = 1;
break;
} else {
btrfs_dev_replace_stats_inc(
&sctx->dev_root->
fs_info->dev_replace.
num_write_errors);
}
}
}
if (!sub_success) {
/*
* did not find a mirror to fetch the page
* from. scrub_write_page_to_dev_replace()
* handles this case (page->io_error), by
* filling the block with zeros before
* submitting the write request
*/
success = 0;
ret = scrub_write_page_to_dev_replace(
sblock_bad, page_num);
if (ret)
btrfs_dev_replace_stats_inc(
&sctx->dev_root->fs_info->
dev_replace.num_write_errors);
}
}
goto out;
}
/* /*
* for regular scrub, repair those pages that are errored.
* In case of I/O errors in the area that is supposed to be * In case of I/O errors in the area that is supposed to be
* repaired, continue by picking good copies of those pages. * repaired, continue by picking good copies of those pages.
* Select the good pages from mirrors to rewrite bad pages from * Select the good pages from mirrors to rewrite bad pages from
@ -1184,44 +1151,64 @@ nodatasum_case:
* mirror, even if other 512 byte sectors in the same PAGE_SIZE * mirror, even if other 512 byte sectors in the same PAGE_SIZE
* area are unreadable. * area are unreadable.
*/ */
/* can only fix I/O errors from here on */
if (sblock_bad->no_io_error_seen)
goto did_not_correct_error;
success = 1; success = 1;
for (page_num = 0; page_num < sblock_bad->page_count; page_num++) { for (page_num = 0; page_num < sblock_bad->page_count;
page_num++) {
struct scrub_page *page_bad = sblock_bad->pagev[page_num]; struct scrub_page *page_bad = sblock_bad->pagev[page_num];
struct scrub_block *sblock_other = NULL;
if (!page_bad->io_error) /* skip no-io-error page in scrub */
if (!page_bad->io_error && !sctx->is_dev_replace)
continue; continue;
for (mirror_index = 0; /* try to find no-io-error page in mirrors */
mirror_index < BTRFS_MAX_MIRRORS && if (page_bad->io_error) {
sblocks_for_recheck[mirror_index].page_count > 0; for (mirror_index = 0;
mirror_index++) { mirror_index < BTRFS_MAX_MIRRORS &&
struct scrub_block *sblock_other = sblocks_for_recheck + sblocks_for_recheck[mirror_index].page_count > 0;
mirror_index; mirror_index++) {
struct scrub_page *page_other = sblock_other->pagev[ if (!sblocks_for_recheck[mirror_index].
page_num]; pagev[page_num]->io_error) {
sblock_other = sblocks_for_recheck +
if (!page_other->io_error) { mirror_index;
ret = scrub_repair_page_from_good_copy( break;
sblock_bad, sblock_other, page_num, 0);
if (0 == ret) {
page_bad->io_error = 0;
break; /* succeeded for this page */
} }
} }
if (!sblock_other)
success = 0;
} }
if (page_bad->io_error) { if (sctx->is_dev_replace) {
/* did not find a mirror to copy the page from */ /*
success = 0; * did not find a mirror to fetch the page
* from. scrub_write_page_to_dev_replace()
* handles this case (page->io_error), by
* filling the block with zeros before
* submitting the write request
*/
if (!sblock_other)
sblock_other = sblock_bad;
if (scrub_write_page_to_dev_replace(sblock_other,
page_num) != 0) {
btrfs_dev_replace_stats_inc(
&sctx->dev_root->
fs_info->dev_replace.
num_write_errors);
success = 0;
}
} else if (sblock_other) {
ret = scrub_repair_page_from_good_copy(sblock_bad,
sblock_other,
page_num, 0);
if (0 == ret)
page_bad->io_error = 0;
else
success = 0;
} }
} }
if (success) { if (success && !sctx->is_dev_replace) {
if (is_metadata || have_csum) { if (is_metadata || have_csum) {
/* /*
* need to verify the checksum now that all * need to verify the checksum now that all
@ -1288,19 +1275,18 @@ out:
return 0; return 0;
} }
static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio, u64 *raid_map) static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio)
{ {
if (raid_map) { if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
if (raid_map[bbio->num_stripes - 1] == RAID6_Q_STRIPE) return 2;
return 3; else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
else return 3;
return 2; else
} else {
return (int)bbio->num_stripes; return (int)bbio->num_stripes;
}
} }
static inline void scrub_stripe_index_and_offset(u64 logical, u64 *raid_map, static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
u64 *raid_map,
u64 mapped_length, u64 mapped_length,
int nstripes, int mirror, int nstripes, int mirror,
int *stripe_index, int *stripe_index,
@ -1308,7 +1294,7 @@ static inline void scrub_stripe_index_and_offset(u64 logical, u64 *raid_map,
{ {
int i; int i;
if (raid_map) { if (map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
/* RAID5/6 */ /* RAID5/6 */
for (i = 0; i < nstripes; i++) { for (i = 0; i < nstripes; i++) {
if (raid_map[i] == RAID6_Q_STRIPE || if (raid_map[i] == RAID6_Q_STRIPE ||
@ -1329,72 +1315,65 @@ static inline void scrub_stripe_index_and_offset(u64 logical, u64 *raid_map,
} }
} }
static int scrub_setup_recheck_block(struct scrub_ctx *sctx, static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
struct btrfs_fs_info *fs_info,
struct scrub_block *original_sblock,
u64 length, u64 logical,
struct scrub_block *sblocks_for_recheck) struct scrub_block *sblocks_for_recheck)
{ {
struct scrub_ctx *sctx = original_sblock->sctx;
struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
u64 length = original_sblock->page_count * PAGE_SIZE;
u64 logical = original_sblock->pagev[0]->logical;
struct scrub_recover *recover; struct scrub_recover *recover;
struct btrfs_bio *bbio; struct btrfs_bio *bbio;
u64 *raid_map;
u64 sublen; u64 sublen;
u64 mapped_length; u64 mapped_length;
u64 stripe_offset; u64 stripe_offset;
int stripe_index; int stripe_index;
int page_index; int page_index = 0;
int mirror_index; int mirror_index;
int nmirrors; int nmirrors;
int ret; int ret;
/* /*
* note: the two members ref_count and outstanding_pages * note: the two members refs and outstanding_pages
* are not used (and not set) in the blocks that are used for * are not used (and not set) in the blocks that are used for
* the recheck procedure * the recheck procedure
*/ */
page_index = 0;
while (length > 0) { while (length > 0) {
sublen = min_t(u64, length, PAGE_SIZE); sublen = min_t(u64, length, PAGE_SIZE);
mapped_length = sublen; mapped_length = sublen;
bbio = NULL; bbio = NULL;
raid_map = NULL;
/* /*
* with a length of PAGE_SIZE, each returned stripe * with a length of PAGE_SIZE, each returned stripe
* represents one mirror * represents one mirror
*/ */
ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical, ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical,
&mapped_length, &bbio, 0, &raid_map); &mapped_length, &bbio, 0, 1);
if (ret || !bbio || mapped_length < sublen) { if (ret || !bbio || mapped_length < sublen) {
kfree(bbio); btrfs_put_bbio(bbio);
kfree(raid_map);
return -EIO; return -EIO;
} }
recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS); recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
if (!recover) { if (!recover) {
kfree(bbio); btrfs_put_bbio(bbio);
kfree(raid_map);
return -ENOMEM; return -ENOMEM;
} }
atomic_set(&recover->refs, 1); atomic_set(&recover->refs, 1);
recover->bbio = bbio; recover->bbio = bbio;
recover->raid_map = raid_map;
recover->map_length = mapped_length; recover->map_length = mapped_length;
BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO); BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO);
nmirrors = scrub_nr_raid_mirrors(bbio, raid_map); nmirrors = min(scrub_nr_raid_mirrors(bbio), BTRFS_MAX_MIRRORS);
for (mirror_index = 0; mirror_index < nmirrors; for (mirror_index = 0; mirror_index < nmirrors;
mirror_index++) { mirror_index++) {
struct scrub_block *sblock; struct scrub_block *sblock;
struct scrub_page *page; struct scrub_page *page;
if (mirror_index >= BTRFS_MAX_MIRRORS)
continue;
sblock = sblocks_for_recheck + mirror_index; sblock = sblocks_for_recheck + mirror_index;
sblock->sctx = sctx; sblock->sctx = sctx;
page = kzalloc(sizeof(*page), GFP_NOFS); page = kzalloc(sizeof(*page), GFP_NOFS);
@ -1410,9 +1389,12 @@ leave_nomem:
sblock->pagev[page_index] = page; sblock->pagev[page_index] = page;
page->logical = logical; page->logical = logical;
scrub_stripe_index_and_offset(logical, raid_map, scrub_stripe_index_and_offset(logical,
bbio->map_type,
bbio->raid_map,
mapped_length, mapped_length,
bbio->num_stripes, bbio->num_stripes -
bbio->num_tgtdevs,
mirror_index, mirror_index,
&stripe_index, &stripe_index,
&stripe_offset); &stripe_offset);
@ -1458,7 +1440,8 @@ static void scrub_bio_wait_endio(struct bio *bio, int error)
static inline int scrub_is_page_on_raid56(struct scrub_page *page) static inline int scrub_is_page_on_raid56(struct scrub_page *page)
{ {
return page->recover && page->recover->raid_map; return page->recover &&
(page->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
} }
static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info, static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
@ -1475,7 +1458,6 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
bio->bi_end_io = scrub_bio_wait_endio; bio->bi_end_io = scrub_bio_wait_endio;
ret = raid56_parity_recover(fs_info->fs_root, bio, page->recover->bbio, ret = raid56_parity_recover(fs_info->fs_root, bio, page->recover->bbio,
page->recover->raid_map,
page->recover->map_length, page->recover->map_length,
page->mirror_num, 0); page->mirror_num, 0);
if (ret) if (ret)
@ -1615,8 +1597,7 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
} }
static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
struct scrub_block *sblock_good, struct scrub_block *sblock_good)
int force_write)
{ {
int page_num; int page_num;
int ret = 0; int ret = 0;
@ -1626,8 +1607,7 @@ static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
ret_sub = scrub_repair_page_from_good_copy(sblock_bad, ret_sub = scrub_repair_page_from_good_copy(sblock_bad,
sblock_good, sblock_good,
page_num, page_num, 1);
force_write);
if (ret_sub) if (ret_sub)
ret = ret_sub; ret = ret_sub;
} }
@ -2067,12 +2047,12 @@ static int scrub_checksum_super(struct scrub_block *sblock)
static void scrub_block_get(struct scrub_block *sblock) static void scrub_block_get(struct scrub_block *sblock)
{ {
atomic_inc(&sblock->ref_count); atomic_inc(&sblock->refs);
} }
static void scrub_block_put(struct scrub_block *sblock) static void scrub_block_put(struct scrub_block *sblock)
{ {
if (atomic_dec_and_test(&sblock->ref_count)) { if (atomic_dec_and_test(&sblock->refs)) {
int i; int i;
if (sblock->sparity) if (sblock->sparity)
@ -2086,12 +2066,12 @@ static void scrub_block_put(struct scrub_block *sblock)
static void scrub_page_get(struct scrub_page *spage) static void scrub_page_get(struct scrub_page *spage)
{ {
atomic_inc(&spage->ref_count); atomic_inc(&spage->refs);
} }
static void scrub_page_put(struct scrub_page *spage) static void scrub_page_put(struct scrub_page *spage)
{ {
if (atomic_dec_and_test(&spage->ref_count)) { if (atomic_dec_and_test(&spage->refs)) {
if (spage->page) if (spage->page)
__free_page(spage->page); __free_page(spage->page);
kfree(spage); kfree(spage);
@ -2217,7 +2197,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
/* one ref inside this function, plus one for each page added to /* one ref inside this function, plus one for each page added to
* a bio later on */ * a bio later on */
atomic_set(&sblock->ref_count, 1); atomic_set(&sblock->refs, 1);
sblock->sctx = sctx; sblock->sctx = sctx;
sblock->no_io_error_seen = 1; sblock->no_io_error_seen = 1;
@ -2510,7 +2490,7 @@ static int scrub_pages_for_parity(struct scrub_parity *sparity,
/* one ref inside this function, plus one for each page added to /* one ref inside this function, plus one for each page added to
* a bio later on */ * a bio later on */
atomic_set(&sblock->ref_count, 1); atomic_set(&sblock->refs, 1);
sblock->sctx = sctx; sblock->sctx = sctx;
sblock->no_io_error_seen = 1; sblock->no_io_error_seen = 1;
sblock->sparity = sparity; sblock->sparity = sparity;
@ -2705,7 +2685,6 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
struct btrfs_raid_bio *rbio; struct btrfs_raid_bio *rbio;
struct scrub_page *spage; struct scrub_page *spage;
struct btrfs_bio *bbio = NULL; struct btrfs_bio *bbio = NULL;
u64 *raid_map = NULL;
u64 length; u64 length;
int ret; int ret;
@ -2716,8 +2695,8 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
length = sparity->logic_end - sparity->logic_start + 1; length = sparity->logic_end - sparity->logic_start + 1;
ret = btrfs_map_sblock(sctx->dev_root->fs_info, WRITE, ret = btrfs_map_sblock(sctx->dev_root->fs_info, WRITE,
sparity->logic_start, sparity->logic_start,
&length, &bbio, 0, &raid_map); &length, &bbio, 0, 1);
if (ret || !bbio || !raid_map) if (ret || !bbio || !bbio->raid_map)
goto bbio_out; goto bbio_out;
bio = btrfs_io_bio_alloc(GFP_NOFS, 0); bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
@ -2729,8 +2708,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
bio->bi_end_io = scrub_parity_bio_endio; bio->bi_end_io = scrub_parity_bio_endio;
rbio = raid56_parity_alloc_scrub_rbio(sctx->dev_root, bio, bbio, rbio = raid56_parity_alloc_scrub_rbio(sctx->dev_root, bio, bbio,
raid_map, length, length, sparity->scrub_dev,
sparity->scrub_dev,
sparity->dbitmap, sparity->dbitmap,
sparity->nsectors); sparity->nsectors);
if (!rbio) if (!rbio)
@ -2747,8 +2725,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
rbio_out: rbio_out:
bio_put(bio); bio_put(bio);
bbio_out: bbio_out:
kfree(bbio); btrfs_put_bbio(bbio);
kfree(raid_map);
bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap, bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
sparity->nsectors); sparity->nsectors);
spin_lock(&sctx->stat_lock); spin_lock(&sctx->stat_lock);
@ -2765,12 +2742,12 @@ static inline int scrub_calc_parity_bitmap_len(int nsectors)
static void scrub_parity_get(struct scrub_parity *sparity) static void scrub_parity_get(struct scrub_parity *sparity)
{ {
atomic_inc(&sparity->ref_count); atomic_inc(&sparity->refs);
} }
static void scrub_parity_put(struct scrub_parity *sparity) static void scrub_parity_put(struct scrub_parity *sparity)
{ {
if (!atomic_dec_and_test(&sparity->ref_count)) if (!atomic_dec_and_test(&sparity->refs))
return; return;
scrub_parity_check_and_repair(sparity); scrub_parity_check_and_repair(sparity);
@ -2820,7 +2797,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
sparity->scrub_dev = sdev; sparity->scrub_dev = sdev;
sparity->logic_start = logic_start; sparity->logic_start = logic_start;
sparity->logic_end = logic_end; sparity->logic_end = logic_end;
atomic_set(&sparity->ref_count, 1); atomic_set(&sparity->refs, 1);
INIT_LIST_HEAD(&sparity->spages); INIT_LIST_HEAD(&sparity->spages);
sparity->dbitmap = sparity->bitmap; sparity->dbitmap = sparity->bitmap;
sparity->ebitmap = (void *)sparity->bitmap + bitmap_len; sparity->ebitmap = (void *)sparity->bitmap + bitmap_len;
@ -3037,8 +3014,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
} else if (map->type & BTRFS_BLOCK_GROUP_DUP) { } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
increment = map->stripe_len; increment = map->stripe_len;
mirror_num = num % map->num_stripes + 1; mirror_num = num % map->num_stripes + 1;
} else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
BTRFS_BLOCK_GROUP_RAID6)) {
get_raid56_logic_offset(physical, num, map, &offset, NULL); get_raid56_logic_offset(physical, num, map, &offset, NULL);
increment = map->stripe_len * nr_data_stripes(map); increment = map->stripe_len * nr_data_stripes(map);
mirror_num = 1; mirror_num = 1;
@ -3074,8 +3050,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
*/ */
logical = base + offset; logical = base + offset;
physical_end = physical + nstripes * map->stripe_len; physical_end = physical + nstripes * map->stripe_len;
if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
BTRFS_BLOCK_GROUP_RAID6)) {
get_raid56_logic_offset(physical_end, num, get_raid56_logic_offset(physical_end, num,
map, &logic_end, NULL); map, &logic_end, NULL);
logic_end += base; logic_end += base;
@ -3121,8 +3096,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
ret = 0; ret = 0;
while (physical < physical_end) { while (physical < physical_end) {
/* for raid56, we skip parity stripe */ /* for raid56, we skip parity stripe */
if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
BTRFS_BLOCK_GROUP_RAID6)) {
ret = get_raid56_logic_offset(physical, num, ret = get_raid56_logic_offset(physical, num,
map, &logical, &stripe_logical); map, &logical, &stripe_logical);
logical += base; logical += base;
@ -3280,8 +3254,7 @@ again:
scrub_free_csums(sctx); scrub_free_csums(sctx);
if (extent_logical + extent_len < if (extent_logical + extent_len <
key.objectid + bytes) { key.objectid + bytes) {
if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
BTRFS_BLOCK_GROUP_RAID6)) {
/* /*
* loop until we find next data stripe * loop until we find next data stripe
* or we have finished all stripes. * or we have finished all stripes.
@ -3775,7 +3748,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
scrub_workers_put(fs_info); scrub_workers_put(fs_info);
mutex_unlock(&fs_info->scrub_lock); mutex_unlock(&fs_info->scrub_lock);
scrub_free_ctx(sctx); scrub_put_ctx(sctx);
return ret; return ret;
} }
@ -3881,14 +3854,14 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
&mapped_length, &bbio, 0); &mapped_length, &bbio, 0);
if (ret || !bbio || mapped_length < extent_len || if (ret || !bbio || mapped_length < extent_len ||
!bbio->stripes[0].dev->bdev) { !bbio->stripes[0].dev->bdev) {
kfree(bbio); btrfs_put_bbio(bbio);
return; return;
} }
*extent_physical = bbio->stripes[0].physical; *extent_physical = bbio->stripes[0].physical;
*extent_mirror_num = bbio->mirror_num; *extent_mirror_num = bbio->mirror_num;
*extent_dev = bbio->stripes[0].dev; *extent_dev = bbio->stripes[0].dev;
kfree(bbio); btrfs_put_bbio(bbio);
} }
static int scrub_setup_wr_ctx(struct scrub_ctx *sctx, static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,

View File

@ -2471,12 +2471,9 @@ verbose_printk("btrfs: send_utimes %llu\n", ino);
if (ret < 0) if (ret < 0)
goto out; goto out;
TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, &ii->atime);
btrfs_inode_atime(ii)); TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, &ii->mtime);
TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, &ii->ctime);
btrfs_inode_mtime(ii));
TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb,
btrfs_inode_ctime(ii));
/* TODO Add otime support when the otime patches get into upstream */ /* TODO Add otime support when the otime patches get into upstream */
ret = send_cmd(sctx); ret = send_cmd(sctx);

View File

@ -1958,11 +1958,6 @@ static int btrfs_freeze(struct super_block *sb)
return btrfs_commit_transaction(trans, root); return btrfs_commit_transaction(trans, root);
} }
static int btrfs_unfreeze(struct super_block *sb)
{
return 0;
}
static int btrfs_show_devname(struct seq_file *m, struct dentry *root) static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
{ {
struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb); struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
@ -2011,7 +2006,6 @@ static const struct super_operations btrfs_super_ops = {
.statfs = btrfs_statfs, .statfs = btrfs_statfs,
.remount_fs = btrfs_remount, .remount_fs = btrfs_remount,
.freeze_fs = btrfs_freeze, .freeze_fs = btrfs_freeze,
.unfreeze_fs = btrfs_unfreeze,
}; };
static const struct file_operations btrfs_ctl_fops = { static const struct file_operations btrfs_ctl_fops = {

View File

@ -733,10 +733,18 @@ int btrfs_init_sysfs(void)
ret = btrfs_init_debugfs(); ret = btrfs_init_debugfs();
if (ret) if (ret)
return ret; goto out1;
init_feature_attrs(); init_feature_attrs();
ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_feature_attr_group); ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_feature_attr_group);
if (ret)
goto out2;
return 0;
out2:
debugfs_remove_recursive(btrfs_debugfs_root_dentry);
out1:
kset_unregister(btrfs_kset);
return ret; return ret;
} }

View File

@ -53,7 +53,7 @@ static int test_btrfs_split_item(void)
return -ENOMEM; return -ENOMEM;
} }
path->nodes[0] = eb = alloc_dummy_extent_buffer(0, 4096); path->nodes[0] = eb = alloc_dummy_extent_buffer(NULL, 4096);
if (!eb) { if (!eb) {
test_msg("Could not allocate dummy buffer\n"); test_msg("Could not allocate dummy buffer\n");
ret = -ENOMEM; ret = -ENOMEM;

View File

@ -258,8 +258,7 @@ static int test_find_delalloc(void)
} }
ret = 0; ret = 0;
out_bits: out_bits:
clear_extent_bits(&tmp, 0, total_dirty - 1, clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_NOFS);
(unsigned long)-1, GFP_NOFS);
out: out:
if (locked_page) if (locked_page)
page_cache_release(locked_page); page_cache_release(locked_page);

View File

@ -255,7 +255,7 @@ static noinline int test_btrfs_get_extent(void)
goto out; goto out;
} }
root->node = alloc_dummy_extent_buffer(0, 4096); root->node = alloc_dummy_extent_buffer(NULL, 4096);
if (!root->node) { if (!root->node) {
test_msg("Couldn't allocate dummy buffer\n"); test_msg("Couldn't allocate dummy buffer\n");
goto out; goto out;
@ -843,7 +843,7 @@ static int test_hole_first(void)
goto out; goto out;
} }
root->node = alloc_dummy_extent_buffer(0, 4096); root->node = alloc_dummy_extent_buffer(NULL, 4096);
if (!root->node) { if (!root->node) {
test_msg("Couldn't allocate dummy buffer\n"); test_msg("Couldn't allocate dummy buffer\n");
goto out; goto out;

View File

@ -404,12 +404,22 @@ int btrfs_test_qgroups(void)
ret = -ENOMEM; ret = -ENOMEM;
goto out; goto out;
} }
/* We are using this root as our extent root */
root->fs_info->extent_root = root;
/*
* Some of the paths we test assume we have a filled out fs_info, so we
* just need to add the root in there so we don't panic.
*/
root->fs_info->tree_root = root;
root->fs_info->quota_root = root;
root->fs_info->quota_enabled = 1;
/* /*
* Can't use bytenr 0, some things freak out * Can't use bytenr 0, some things freak out
* *cough*backref walking code*cough* * *cough*backref walking code*cough*
*/ */
root->node = alloc_test_extent_buffer(root->fs_info, 4096, 4096); root->node = alloc_test_extent_buffer(root->fs_info, 4096);
if (!root->node) { if (!root->node) {
test_msg("Couldn't allocate dummy buffer\n"); test_msg("Couldn't allocate dummy buffer\n");
ret = -ENOMEM; ret = -ENOMEM;
@ -448,17 +458,6 @@ int btrfs_test_qgroups(void)
goto out; goto out;
} }
/* We are using this root as our extent root */
root->fs_info->extent_root = root;
/*
* Some of the paths we test assume we have a filled out fs_info, so we
* just need to addt he root in there so we don't panic.
*/
root->fs_info->tree_root = root;
root->fs_info->quota_root = root;
root->fs_info->quota_enabled = 1;
test_msg("Running qgroup tests\n"); test_msg("Running qgroup tests\n");
ret = test_no_shared_qgroup(root); ret = test_no_shared_qgroup(root);
if (ret) if (ret)

View File

@ -220,6 +220,7 @@ loop:
* commit the transaction. * commit the transaction.
*/ */
atomic_set(&cur_trans->use_count, 2); atomic_set(&cur_trans->use_count, 2);
cur_trans->have_free_bgs = 0;
cur_trans->start_time = get_seconds(); cur_trans->start_time = get_seconds();
cur_trans->delayed_refs.href_root = RB_ROOT; cur_trans->delayed_refs.href_root = RB_ROOT;
@ -248,6 +249,8 @@ loop:
INIT_LIST_HEAD(&cur_trans->pending_chunks); INIT_LIST_HEAD(&cur_trans->pending_chunks);
INIT_LIST_HEAD(&cur_trans->switch_commits); INIT_LIST_HEAD(&cur_trans->switch_commits);
INIT_LIST_HEAD(&cur_trans->pending_ordered); INIT_LIST_HEAD(&cur_trans->pending_ordered);
INIT_LIST_HEAD(&cur_trans->dirty_bgs);
spin_lock_init(&cur_trans->dirty_bgs_lock);
list_add_tail(&cur_trans->list, &fs_info->trans_list); list_add_tail(&cur_trans->list, &fs_info->trans_list);
extent_io_tree_init(&cur_trans->dirty_pages, extent_io_tree_init(&cur_trans->dirty_pages,
fs_info->btree_inode->i_mapping); fs_info->btree_inode->i_mapping);
@ -1020,6 +1023,7 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
u64 old_root_bytenr; u64 old_root_bytenr;
u64 old_root_used; u64 old_root_used;
struct btrfs_root *tree_root = root->fs_info->tree_root; struct btrfs_root *tree_root = root->fs_info->tree_root;
bool extent_root = (root->objectid == BTRFS_EXTENT_TREE_OBJECTID);
old_root_used = btrfs_root_used(&root->root_item); old_root_used = btrfs_root_used(&root->root_item);
btrfs_write_dirty_block_groups(trans, root); btrfs_write_dirty_block_groups(trans, root);
@ -1027,7 +1031,9 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
while (1) { while (1) {
old_root_bytenr = btrfs_root_bytenr(&root->root_item); old_root_bytenr = btrfs_root_bytenr(&root->root_item);
if (old_root_bytenr == root->node->start && if (old_root_bytenr == root->node->start &&
old_root_used == btrfs_root_used(&root->root_item)) old_root_used == btrfs_root_used(&root->root_item) &&
(!extent_root ||
list_empty(&trans->transaction->dirty_bgs)))
break; break;
btrfs_set_root_node(&root->root_item, root->node); btrfs_set_root_node(&root->root_item, root->node);
@ -1038,7 +1044,15 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
return ret; return ret;
old_root_used = btrfs_root_used(&root->root_item); old_root_used = btrfs_root_used(&root->root_item);
ret = btrfs_write_dirty_block_groups(trans, root); if (extent_root) {
ret = btrfs_write_dirty_block_groups(trans, root);
if (ret)
return ret;
}
ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
if (ret)
return ret;
ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
if (ret) if (ret)
return ret; return ret;
} }
@ -1061,10 +1075,6 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
struct extent_buffer *eb; struct extent_buffer *eb;
int ret; int ret;
ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
if (ret)
return ret;
eb = btrfs_lock_root_node(fs_info->tree_root); eb = btrfs_lock_root_node(fs_info->tree_root);
ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL,
0, &eb); 0, &eb);
@ -1097,6 +1107,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
next = fs_info->dirty_cowonly_roots.next; next = fs_info->dirty_cowonly_roots.next;
list_del_init(next); list_del_init(next);
root = list_entry(next, struct btrfs_root, dirty_list); root = list_entry(next, struct btrfs_root, dirty_list);
clear_bit(BTRFS_ROOT_DIRTY, &root->state);
if (root != fs_info->extent_root) if (root != fs_info->extent_root)
list_add_tail(&root->dirty_list, list_add_tail(&root->dirty_list,
@ -1983,6 +1994,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
switch_commit_roots(cur_trans, root->fs_info); switch_commit_roots(cur_trans, root->fs_info);
assert_qgroups_uptodate(trans); assert_qgroups_uptodate(trans);
ASSERT(list_empty(&cur_trans->dirty_bgs));
update_super_roots(root); update_super_roots(root);
btrfs_set_super_log_root(root->fs_info->super_copy, 0); btrfs_set_super_log_root(root->fs_info->super_copy, 0);
@ -2026,6 +2038,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
btrfs_finish_extent_commit(trans, root); btrfs_finish_extent_commit(trans, root);
if (cur_trans->have_free_bgs)
btrfs_clear_space_info_full(root->fs_info);
root->fs_info->last_trans_committed = cur_trans->transid; root->fs_info->last_trans_committed = cur_trans->transid;
/* /*
* We needn't acquire the lock here because there is no other task * We needn't acquire the lock here because there is no other task

View File

@ -47,6 +47,11 @@ struct btrfs_transaction {
atomic_t num_writers; atomic_t num_writers;
atomic_t use_count; atomic_t use_count;
/*
* true if there is free bgs operations in this transaction
*/
int have_free_bgs;
/* Be protected by fs_info->trans_lock when we want to change it. */ /* Be protected by fs_info->trans_lock when we want to change it. */
enum btrfs_trans_state state; enum btrfs_trans_state state;
struct list_head list; struct list_head list;
@ -58,6 +63,8 @@ struct btrfs_transaction {
struct list_head pending_chunks; struct list_head pending_chunks;
struct list_head pending_ordered; struct list_head pending_ordered;
struct list_head switch_commits; struct list_head switch_commits;
struct list_head dirty_bgs;
spinlock_t dirty_bgs_lock;
struct btrfs_delayed_ref_root delayed_refs; struct btrfs_delayed_ref_root delayed_refs;
int aborted; int aborted;
}; };

View File

@ -453,11 +453,13 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
insert: insert:
btrfs_release_path(path); btrfs_release_path(path);
/* try to insert the key into the destination tree */ /* try to insert the key into the destination tree */
path->skip_release_on_error = 1;
ret = btrfs_insert_empty_item(trans, root, path, ret = btrfs_insert_empty_item(trans, root, path,
key, item_size); key, item_size);
path->skip_release_on_error = 0;
/* make sure any existing item is the correct size */ /* make sure any existing item is the correct size */
if (ret == -EEXIST) { if (ret == -EEXIST || ret == -EOVERFLOW) {
u32 found_size; u32 found_size;
found_size = btrfs_item_size_nr(path->nodes[0], found_size = btrfs_item_size_nr(path->nodes[0],
path->slots[0]); path->slots[0]);
@ -488,8 +490,20 @@ insert:
src_item = (struct btrfs_inode_item *)src_ptr; src_item = (struct btrfs_inode_item *)src_ptr;
dst_item = (struct btrfs_inode_item *)dst_ptr; dst_item = (struct btrfs_inode_item *)dst_ptr;
if (btrfs_inode_generation(eb, src_item) == 0) if (btrfs_inode_generation(eb, src_item) == 0) {
struct extent_buffer *dst_eb = path->nodes[0];
if (S_ISREG(btrfs_inode_mode(eb, src_item)) &&
S_ISREG(btrfs_inode_mode(dst_eb, dst_item))) {
struct btrfs_map_token token;
u64 ino_size = btrfs_inode_size(eb, src_item);
btrfs_init_map_token(&token);
btrfs_set_token_inode_size(dst_eb, dst_item,
ino_size, &token);
}
goto no_copy; goto no_copy;
}
if (overwrite_root && if (overwrite_root &&
S_ISDIR(btrfs_inode_mode(eb, src_item)) && S_ISDIR(btrfs_inode_mode(eb, src_item)) &&
@ -844,7 +858,7 @@ out:
static noinline int backref_in_log(struct btrfs_root *log, static noinline int backref_in_log(struct btrfs_root *log,
struct btrfs_key *key, struct btrfs_key *key,
u64 ref_objectid, u64 ref_objectid,
char *name, int namelen) const char *name, int namelen)
{ {
struct btrfs_path *path; struct btrfs_path *path;
struct btrfs_inode_ref *ref; struct btrfs_inode_ref *ref;
@ -1254,13 +1268,14 @@ out:
} }
static int insert_orphan_item(struct btrfs_trans_handle *trans, static int insert_orphan_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 offset) struct btrfs_root *root, u64 ino)
{ {
int ret; int ret;
ret = btrfs_find_item(root, NULL, BTRFS_ORPHAN_OBJECTID,
offset, BTRFS_ORPHAN_ITEM_KEY, NULL); ret = btrfs_insert_orphan_item(trans, root, ino);
if (ret > 0) if (ret == -EEXIST)
ret = btrfs_insert_orphan_item(trans, root, offset); ret = 0;
return ret; return ret;
} }
@ -1287,6 +1302,7 @@ static int count_inode_extrefs(struct btrfs_root *root,
leaf = path->nodes[0]; leaf = path->nodes[0];
item_size = btrfs_item_size_nr(leaf, path->slots[0]); item_size = btrfs_item_size_nr(leaf, path->slots[0]);
ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
cur_offset = 0;
while (cur_offset < item_size) { while (cur_offset < item_size) {
extref = (struct btrfs_inode_extref *) (ptr + cur_offset); extref = (struct btrfs_inode_extref *) (ptr + cur_offset);
@ -1302,7 +1318,7 @@ static int count_inode_extrefs(struct btrfs_root *root,
} }
btrfs_release_path(path); btrfs_release_path(path);
if (ret < 0) if (ret < 0 && ret != -ENOENT)
return ret; return ret;
return nlink; return nlink;
} }
@ -1394,9 +1410,6 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
nlink = ret; nlink = ret;
ret = count_inode_extrefs(root, inode, path); ret = count_inode_extrefs(root, inode, path);
if (ret == -ENOENT)
ret = 0;
if (ret < 0) if (ret < 0)
goto out; goto out;
@ -1556,6 +1569,30 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans,
return ret; return ret;
} }
/*
* Return true if an inode reference exists in the log for the given name,
* inode and parent inode.
*/
static bool name_in_log_ref(struct btrfs_root *log_root,
const char *name, const int name_len,
const u64 dirid, const u64 ino)
{
struct btrfs_key search_key;
search_key.objectid = ino;
search_key.type = BTRFS_INODE_REF_KEY;
search_key.offset = dirid;
if (backref_in_log(log_root, &search_key, dirid, name, name_len))
return true;
search_key.type = BTRFS_INODE_EXTREF_KEY;
search_key.offset = btrfs_extref_hash(dirid, name, name_len);
if (backref_in_log(log_root, &search_key, dirid, name, name_len))
return true;
return false;
}
/* /*
* take a single entry in a log directory item and replay it into * take a single entry in a log directory item and replay it into
* the subvolume. * the subvolume.
@ -1666,10 +1703,17 @@ out:
return ret; return ret;
insert: insert:
if (name_in_log_ref(root->log_root, name, name_len,
key->objectid, log_key.objectid)) {
/* The dentry will be added later. */
ret = 0;
update_size = false;
goto out;
}
btrfs_release_path(path); btrfs_release_path(path);
ret = insert_one_name(trans, root, path, key->objectid, key->offset, ret = insert_one_name(trans, root, path, key->objectid, key->offset,
name, name_len, log_type, &log_key); name, name_len, log_type, &log_key);
if (ret && ret != -ENOENT) if (ret && ret != -ENOENT && ret != -EEXIST)
goto out; goto out;
update_size = false; update_size = false;
ret = 0; ret = 0;
@ -2164,7 +2208,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
parent = path->nodes[*level]; parent = path->nodes[*level];
root_owner = btrfs_header_owner(parent); root_owner = btrfs_header_owner(parent);
next = btrfs_find_create_tree_block(root, bytenr, blocksize); next = btrfs_find_create_tree_block(root, bytenr);
if (!next) if (!next)
return -ENOMEM; return -ENOMEM;
@ -2416,8 +2460,8 @@ static void wait_for_writer(struct btrfs_trans_handle *trans,
mutex_unlock(&root->log_mutex); mutex_unlock(&root->log_mutex);
if (atomic_read(&root->log_writers)) if (atomic_read(&root->log_writers))
schedule(); schedule();
mutex_lock(&root->log_mutex);
finish_wait(&root->log_writer_wait, &wait); finish_wait(&root->log_writer_wait, &wait);
mutex_lock(&root->log_mutex);
} }
} }
@ -3219,7 +3263,8 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
static void fill_inode_item(struct btrfs_trans_handle *trans, static void fill_inode_item(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf, struct extent_buffer *leaf,
struct btrfs_inode_item *item, struct btrfs_inode_item *item,
struct inode *inode, int log_inode_only) struct inode *inode, int log_inode_only,
u64 logged_isize)
{ {
struct btrfs_map_token token; struct btrfs_map_token token;
@ -3232,7 +3277,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
* to say 'update this inode with these values' * to say 'update this inode with these values'
*/ */
btrfs_set_token_inode_generation(leaf, item, 0, &token); btrfs_set_token_inode_generation(leaf, item, 0, &token);
btrfs_set_token_inode_size(leaf, item, 0, &token); btrfs_set_token_inode_size(leaf, item, logged_isize, &token);
} else { } else {
btrfs_set_token_inode_generation(leaf, item, btrfs_set_token_inode_generation(leaf, item,
BTRFS_I(inode)->generation, BTRFS_I(inode)->generation,
@ -3245,19 +3290,19 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token); btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token); btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item), btrfs_set_token_timespec_sec(leaf, &item->atime,
inode->i_atime.tv_sec, &token); inode->i_atime.tv_sec, &token);
btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item), btrfs_set_token_timespec_nsec(leaf, &item->atime,
inode->i_atime.tv_nsec, &token); inode->i_atime.tv_nsec, &token);
btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item), btrfs_set_token_timespec_sec(leaf, &item->mtime,
inode->i_mtime.tv_sec, &token); inode->i_mtime.tv_sec, &token);
btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item), btrfs_set_token_timespec_nsec(leaf, &item->mtime,
inode->i_mtime.tv_nsec, &token); inode->i_mtime.tv_nsec, &token);
btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item), btrfs_set_token_timespec_sec(leaf, &item->ctime,
inode->i_ctime.tv_sec, &token); inode->i_ctime.tv_sec, &token);
btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item), btrfs_set_token_timespec_nsec(leaf, &item->ctime,
inode->i_ctime.tv_nsec, &token); inode->i_ctime.tv_nsec, &token);
btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode), btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
@ -3284,7 +3329,7 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
return ret; return ret;
inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item); struct btrfs_inode_item);
fill_inode_item(trans, path->nodes[0], inode_item, inode, 0); fill_inode_item(trans, path->nodes[0], inode_item, inode, 0, 0);
btrfs_release_path(path); btrfs_release_path(path);
return 0; return 0;
} }
@ -3293,7 +3338,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
struct inode *inode, struct inode *inode,
struct btrfs_path *dst_path, struct btrfs_path *dst_path,
struct btrfs_path *src_path, u64 *last_extent, struct btrfs_path *src_path, u64 *last_extent,
int start_slot, int nr, int inode_only) int start_slot, int nr, int inode_only,
u64 logged_isize)
{ {
unsigned long src_offset; unsigned long src_offset;
unsigned long dst_offset; unsigned long dst_offset;
@ -3350,7 +3396,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
dst_path->slots[0], dst_path->slots[0],
struct btrfs_inode_item); struct btrfs_inode_item);
fill_inode_item(trans, dst_path->nodes[0], inode_item, fill_inode_item(trans, dst_path->nodes[0], inode_item,
inode, inode_only == LOG_INODE_EXISTS); inode, inode_only == LOG_INODE_EXISTS,
logged_isize);
} else { } else {
copy_extent_buffer(dst_path->nodes[0], src, dst_offset, copy_extent_buffer(dst_path->nodes[0], src, dst_offset,
src_offset, ins_sizes[i]); src_offset, ins_sizes[i]);
@ -3902,6 +3949,33 @@ process:
return ret; return ret;
} }
static int logged_inode_size(struct btrfs_root *log, struct inode *inode,
struct btrfs_path *path, u64 *size_ret)
{
struct btrfs_key key;
int ret;
key.objectid = btrfs_ino(inode);
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, log, &key, path, 0, 0);
if (ret < 0) {
return ret;
} else if (ret > 0) {
*size_ret = i_size_read(inode);
} else {
struct btrfs_inode_item *item;
item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
*size_ret = btrfs_inode_size(path->nodes[0], item);
}
btrfs_release_path(path);
return 0;
}
/* log a single inode in the tree log. /* log a single inode in the tree log.
* At least one parent directory for this inode must exist in the tree * At least one parent directory for this inode must exist in the tree
* or be logged already. * or be logged already.
@ -3939,6 +4013,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
bool fast_search = false; bool fast_search = false;
u64 ino = btrfs_ino(inode); u64 ino = btrfs_ino(inode);
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
u64 logged_isize = 0;
path = btrfs_alloc_path(); path = btrfs_alloc_path();
if (!path) if (!path)
@ -3966,15 +4041,22 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
max_key.type = (u8)-1; max_key.type = (u8)-1;
max_key.offset = (u64)-1; max_key.offset = (u64)-1;
/* Only run delayed items if we are a dir or a new file */ /*
* Only run delayed items if we are a dir or a new file.
* Otherwise commit the delayed inode only, which is needed in
* order for the log replay code to mark inodes for link count
* fixup (create temporary BTRFS_TREE_LOG_FIXUP_OBJECTID items).
*/
if (S_ISDIR(inode->i_mode) || if (S_ISDIR(inode->i_mode) ||
BTRFS_I(inode)->generation > root->fs_info->last_trans_committed) { BTRFS_I(inode)->generation > root->fs_info->last_trans_committed)
ret = btrfs_commit_inode_delayed_items(trans, inode); ret = btrfs_commit_inode_delayed_items(trans, inode);
if (ret) { else
btrfs_free_path(path); ret = btrfs_commit_inode_delayed_inode(inode);
btrfs_free_path(dst_path);
return ret; if (ret) {
} btrfs_free_path(path);
btrfs_free_path(dst_path);
return ret;
} }
mutex_lock(&BTRFS_I(inode)->log_mutex); mutex_lock(&BTRFS_I(inode)->log_mutex);
@ -3988,22 +4070,56 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
if (S_ISDIR(inode->i_mode)) { if (S_ISDIR(inode->i_mode)) {
int max_key_type = BTRFS_DIR_LOG_INDEX_KEY; int max_key_type = BTRFS_DIR_LOG_INDEX_KEY;
if (inode_only == LOG_INODE_EXISTS) if (inode_only == LOG_INODE_EXISTS) {
max_key_type = BTRFS_XATTR_ITEM_KEY; max_key_type = BTRFS_INODE_EXTREF_KEY;
max_key.type = max_key_type;
}
ret = drop_objectid_items(trans, log, path, ino, max_key_type); ret = drop_objectid_items(trans, log, path, ino, max_key_type);
} else { } else {
if (test_and_clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, if (inode_only == LOG_INODE_EXISTS) {
&BTRFS_I(inode)->runtime_flags)) { /*
clear_bit(BTRFS_INODE_COPY_EVERYTHING, * Make sure the new inode item we write to the log has
&BTRFS_I(inode)->runtime_flags); * the same isize as the current one (if it exists).
ret = btrfs_truncate_inode_items(trans, log, * This is necessary to prevent data loss after log
inode, 0, 0); * replay, and also to prevent doing a wrong expanding
} else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING, * truncate - for e.g. create file, write 4K into offset
&BTRFS_I(inode)->runtime_flags) || * 0, fsync, write 4K into offset 4096, add hard link,
* fsync some other file (to sync log), power fail - if
* we use the inode's current i_size, after log replay
* we get a 8Kb file, with the last 4Kb extent as a hole
* (zeroes), as if an expanding truncate happened,
* instead of getting a file of 4Kb only.
*/
err = logged_inode_size(log, inode, path,
&logged_isize);
if (err)
goto out_unlock;
}
if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&BTRFS_I(inode)->runtime_flags)) {
if (inode_only == LOG_INODE_EXISTS) {
max_key.type = BTRFS_INODE_EXTREF_KEY;
ret = drop_objectid_items(trans, log, path, ino,
max_key.type);
} else {
clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&BTRFS_I(inode)->runtime_flags);
clear_bit(BTRFS_INODE_COPY_EVERYTHING,
&BTRFS_I(inode)->runtime_flags);
ret = btrfs_truncate_inode_items(trans, log,
inode, 0, 0);
}
} else if (test_bit(BTRFS_INODE_COPY_EVERYTHING,
&BTRFS_I(inode)->runtime_flags) ||
inode_only == LOG_INODE_EXISTS) { inode_only == LOG_INODE_EXISTS) {
if (inode_only == LOG_INODE_ALL) if (inode_only == LOG_INODE_ALL) {
clear_bit(BTRFS_INODE_COPY_EVERYTHING,
&BTRFS_I(inode)->runtime_flags);
fast_search = true; fast_search = true;
max_key.type = BTRFS_XATTR_ITEM_KEY; max_key.type = BTRFS_XATTR_ITEM_KEY;
} else {
max_key.type = BTRFS_INODE_EXTREF_KEY;
}
ret = drop_objectid_items(trans, log, path, ino, ret = drop_objectid_items(trans, log, path, ino,
max_key.type); max_key.type);
} else { } else {
@ -4047,7 +4163,8 @@ again:
} }
ret = copy_items(trans, inode, dst_path, path, &last_extent, ret = copy_items(trans, inode, dst_path, path, &last_extent,
ins_start_slot, ins_nr, inode_only); ins_start_slot, ins_nr, inode_only,
logged_isize);
if (ret < 0) { if (ret < 0) {
err = ret; err = ret;
goto out_unlock; goto out_unlock;
@ -4071,7 +4188,7 @@ next_slot:
if (ins_nr) { if (ins_nr) {
ret = copy_items(trans, inode, dst_path, path, ret = copy_items(trans, inode, dst_path, path,
&last_extent, ins_start_slot, &last_extent, ins_start_slot,
ins_nr, inode_only); ins_nr, inode_only, logged_isize);
if (ret < 0) { if (ret < 0) {
err = ret; err = ret;
goto out_unlock; goto out_unlock;
@ -4092,7 +4209,8 @@ next_slot:
} }
if (ins_nr) { if (ins_nr) {
ret = copy_items(trans, inode, dst_path, path, &last_extent, ret = copy_items(trans, inode, dst_path, path, &last_extent,
ins_start_slot, ins_nr, inode_only); ins_start_slot, ins_nr, inode_only,
logged_isize);
if (ret < 0) { if (ret < 0) {
err = ret; err = ret;
goto out_unlock; goto out_unlock;
@ -4273,6 +4391,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
struct dentry *old_parent = NULL; struct dentry *old_parent = NULL;
int ret = 0; int ret = 0;
u64 last_committed = root->fs_info->last_trans_committed; u64 last_committed = root->fs_info->last_trans_committed;
const struct dentry * const first_parent = parent;
const bool did_unlink = (BTRFS_I(inode)->last_unlink_trans >
last_committed);
sb = inode->i_sb; sb = inode->i_sb;
@ -4328,7 +4449,6 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
goto end_trans; goto end_trans;
} }
inode_only = LOG_INODE_EXISTS;
while (1) { while (1) {
if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
break; break;
@ -4337,8 +4457,22 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
if (root != BTRFS_I(inode)->root) if (root != BTRFS_I(inode)->root)
break; break;
/*
* On unlink we must make sure our immediate parent directory
* inode is fully logged. This is to prevent leaving dangling
* directory index entries and a wrong directory inode's i_size.
* Not doing so can result in a directory being impossible to
* delete after log replay (rmdir will always fail with error
* -ENOTEMPTY).
*/
if (did_unlink && parent == first_parent)
inode_only = LOG_INODE_ALL;
else
inode_only = LOG_INODE_EXISTS;
if (BTRFS_I(inode)->generation > if (BTRFS_I(inode)->generation >
root->fs_info->last_trans_committed) { root->fs_info->last_trans_committed ||
inode_only == LOG_INODE_ALL) {
ret = btrfs_log_inode(trans, root, inode, inode_only, ret = btrfs_log_inode(trans, root, inode, inode_only,
0, LLONG_MAX, ctx); 0, LLONG_MAX, ctx);
if (ret) if (ret)

View File

@ -1310,6 +1310,8 @@ again:
if (ret) { if (ret) {
btrfs_error(root->fs_info, ret, btrfs_error(root->fs_info, ret,
"Failed to remove dev extent item"); "Failed to remove dev extent item");
} else {
trans->transaction->have_free_bgs = 1;
} }
out: out:
btrfs_free_path(path); btrfs_free_path(path);
@ -4196,7 +4198,7 @@ static u32 find_raid56_stripe_len(u32 data_devices, u32 dev_stripe_target)
static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type) static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
{ {
if (!(type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6))) if (!(type & BTRFS_BLOCK_GROUP_RAID56_MASK))
return; return;
btrfs_set_fs_incompat(info, RAID56); btrfs_set_fs_incompat(info, RAID56);
@ -4803,10 +4805,8 @@ unsigned long btrfs_full_stripe_len(struct btrfs_root *root,
BUG_ON(em->start > logical || em->start + em->len < logical); BUG_ON(em->start > logical || em->start + em->len < logical);
map = (struct map_lookup *)em->bdev; map = (struct map_lookup *)em->bdev;
if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
BTRFS_BLOCK_GROUP_RAID6)) {
len = map->stripe_len * nr_data_stripes(map); len = map->stripe_len * nr_data_stripes(map);
}
free_extent_map(em); free_extent_map(em);
return len; return len;
} }
@ -4826,8 +4826,7 @@ int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
BUG_ON(em->start > logical || em->start + em->len < logical); BUG_ON(em->start > logical || em->start + em->len < logical);
map = (struct map_lookup *)em->bdev; map = (struct map_lookup *)em->bdev;
if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
BTRFS_BLOCK_GROUP_RAID6))
ret = 1; ret = 1;
free_extent_map(em); free_extent_map(em);
return ret; return ret;
@ -4876,32 +4875,24 @@ static inline int parity_smaller(u64 a, u64 b)
} }
/* Bubble-sort the stripe set to put the parity/syndrome stripes last */ /* Bubble-sort the stripe set to put the parity/syndrome stripes last */
static void sort_parity_stripes(struct btrfs_bio *bbio, u64 *raid_map) static void sort_parity_stripes(struct btrfs_bio *bbio, int num_stripes)
{ {
struct btrfs_bio_stripe s; struct btrfs_bio_stripe s;
int real_stripes = bbio->num_stripes - bbio->num_tgtdevs;
int i; int i;
u64 l; u64 l;
int again = 1; int again = 1;
int m;
while (again) { while (again) {
again = 0; again = 0;
for (i = 0; i < real_stripes - 1; i++) { for (i = 0; i < num_stripes - 1; i++) {
if (parity_smaller(raid_map[i], raid_map[i+1])) { if (parity_smaller(bbio->raid_map[i],
bbio->raid_map[i+1])) {
s = bbio->stripes[i]; s = bbio->stripes[i];
l = raid_map[i]; l = bbio->raid_map[i];
bbio->stripes[i] = bbio->stripes[i+1]; bbio->stripes[i] = bbio->stripes[i+1];
raid_map[i] = raid_map[i+1]; bbio->raid_map[i] = bbio->raid_map[i+1];
bbio->stripes[i+1] = s; bbio->stripes[i+1] = s;
raid_map[i+1] = l; bbio->raid_map[i+1] = l;
if (bbio->tgtdev_map) {
m = bbio->tgtdev_map[i];
bbio->tgtdev_map[i] =
bbio->tgtdev_map[i + 1];
bbio->tgtdev_map[i + 1] = m;
}
again = 1; again = 1;
} }
@ -4909,10 +4900,41 @@ static void sort_parity_stripes(struct btrfs_bio *bbio, u64 *raid_map)
} }
} }
static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes)
{
struct btrfs_bio *bbio = kzalloc(
sizeof(struct btrfs_bio) +
sizeof(struct btrfs_bio_stripe) * (total_stripes) +
sizeof(int) * (real_stripes) +
sizeof(u64) * (real_stripes),
GFP_NOFS);
if (!bbio)
return NULL;
atomic_set(&bbio->error, 0);
atomic_set(&bbio->refs, 1);
return bbio;
}
void btrfs_get_bbio(struct btrfs_bio *bbio)
{
WARN_ON(!atomic_read(&bbio->refs));
atomic_inc(&bbio->refs);
}
void btrfs_put_bbio(struct btrfs_bio *bbio)
{
if (!bbio)
return;
if (atomic_dec_and_test(&bbio->refs))
kfree(bbio);
}
static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
u64 logical, u64 *length, u64 logical, u64 *length,
struct btrfs_bio **bbio_ret, struct btrfs_bio **bbio_ret,
int mirror_num, u64 **raid_map_ret) int mirror_num, int need_raid_map)
{ {
struct extent_map *em; struct extent_map *em;
struct map_lookup *map; struct map_lookup *map;
@ -4925,7 +4947,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
u64 stripe_nr_orig; u64 stripe_nr_orig;
u64 stripe_nr_end; u64 stripe_nr_end;
u64 stripe_len; u64 stripe_len;
u64 *raid_map = NULL;
int stripe_index; int stripe_index;
int i; int i;
int ret = 0; int ret = 0;
@ -4976,7 +4997,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
stripe_offset = offset - stripe_offset; stripe_offset = offset - stripe_offset;
/* if we're here for raid56, we need to know the stripe aligned start */ /* if we're here for raid56, we need to know the stripe aligned start */
if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)) { if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
unsigned long full_stripe_len = stripe_len * nr_data_stripes(map); unsigned long full_stripe_len = stripe_len * nr_data_stripes(map);
raid56_full_stripe_start = offset; raid56_full_stripe_start = offset;
@ -4989,8 +5010,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
if (rw & REQ_DISCARD) { if (rw & REQ_DISCARD) {
/* we don't discard raid56 yet */ /* we don't discard raid56 yet */
if (map->type & if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
(BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)) {
ret = -EOPNOTSUPP; ret = -EOPNOTSUPP;
goto out; goto out;
} }
@ -5000,7 +5020,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
/* For writes to RAID[56], allow a full stripeset across all disks. /* For writes to RAID[56], allow a full stripeset across all disks.
For other RAID types and for RAID[56] reads, just allow a single For other RAID types and for RAID[56] reads, just allow a single
stripe (on a single disk). */ stripe (on a single disk). */
if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6) && if ((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
(rw & REQ_WRITE)) { (rw & REQ_WRITE)) {
max_len = stripe_len * nr_data_stripes(map) - max_len = stripe_len * nr_data_stripes(map) -
(offset - raid56_full_stripe_start); (offset - raid56_full_stripe_start);
@ -5047,7 +5067,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
u64 physical_of_found = 0; u64 physical_of_found = 0;
ret = __btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, ret = __btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS,
logical, &tmp_length, &tmp_bbio, 0, NULL); logical, &tmp_length, &tmp_bbio, 0, 0);
if (ret) { if (ret) {
WARN_ON(tmp_bbio != NULL); WARN_ON(tmp_bbio != NULL);
goto out; goto out;
@ -5061,7 +5081,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
* is not left of the left cursor * is not left of the left cursor
*/ */
ret = -EIO; ret = -EIO;
kfree(tmp_bbio); btrfs_put_bbio(tmp_bbio);
goto out; goto out;
} }
@ -5096,11 +5116,11 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
} else { } else {
WARN_ON(1); WARN_ON(1);
ret = -EIO; ret = -EIO;
kfree(tmp_bbio); btrfs_put_bbio(tmp_bbio);
goto out; goto out;
} }
kfree(tmp_bbio); btrfs_put_bbio(tmp_bbio);
} else if (mirror_num > map->num_stripes) { } else if (mirror_num > map->num_stripes) {
mirror_num = 0; mirror_num = 0;
} }
@ -5166,15 +5186,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
mirror_num = stripe_index - old_stripe_index + 1; mirror_num = stripe_index - old_stripe_index + 1;
} }
} else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
BTRFS_BLOCK_GROUP_RAID6)) { if (need_raid_map &&
u64 tmp;
if (raid_map_ret &&
((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) || ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) ||
mirror_num > 1)) { mirror_num > 1)) {
int i, rot;
/* push stripe_nr back to the start of the full stripe */ /* push stripe_nr back to the start of the full stripe */
stripe_nr = raid56_full_stripe_start; stripe_nr = raid56_full_stripe_start;
do_div(stripe_nr, stripe_len * nr_data_stripes(map)); do_div(stripe_nr, stripe_len * nr_data_stripes(map));
@ -5183,32 +5198,12 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
num_stripes = map->num_stripes; num_stripes = map->num_stripes;
max_errors = nr_parity_stripes(map); max_errors = nr_parity_stripes(map);
raid_map = kmalloc_array(num_stripes, sizeof(u64),
GFP_NOFS);
if (!raid_map) {
ret = -ENOMEM;
goto out;
}
/* Work out the disk rotation on this stripe-set */
tmp = stripe_nr;
rot = do_div(tmp, num_stripes);
/* Fill in the logical address of each stripe */
tmp = stripe_nr * nr_data_stripes(map);
for (i = 0; i < nr_data_stripes(map); i++)
raid_map[(i+rot) % num_stripes] =
em->start + (tmp + i) * map->stripe_len;
raid_map[(i+rot) % map->num_stripes] = RAID5_P_STRIPE;
if (map->type & BTRFS_BLOCK_GROUP_RAID6)
raid_map[(i+rot+1) % num_stripes] =
RAID6_Q_STRIPE;
*length = map->stripe_len; *length = map->stripe_len;
stripe_index = 0; stripe_index = 0;
stripe_offset = 0; stripe_offset = 0;
} else { } else {
u64 tmp;
/* /*
* Mirror #0 or #1 means the original data block. * Mirror #0 or #1 means the original data block.
* Mirror #2 is RAID5 parity block. * Mirror #2 is RAID5 parity block.
@ -5246,17 +5241,42 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
tgtdev_indexes = num_stripes; tgtdev_indexes = num_stripes;
} }
bbio = kzalloc(btrfs_bio_size(num_alloc_stripes, tgtdev_indexes), bbio = alloc_btrfs_bio(num_alloc_stripes, tgtdev_indexes);
GFP_NOFS);
if (!bbio) { if (!bbio) {
kfree(raid_map);
ret = -ENOMEM; ret = -ENOMEM;
goto out; goto out;
} }
atomic_set(&bbio->error, 0);
if (dev_replace_is_ongoing) if (dev_replace_is_ongoing)
bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes); bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes);
/* build raid_map */
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK &&
need_raid_map && ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) ||
mirror_num > 1)) {
u64 tmp;
int i, rot;
bbio->raid_map = (u64 *)((void *)bbio->stripes +
sizeof(struct btrfs_bio_stripe) *
num_alloc_stripes +
sizeof(int) * tgtdev_indexes);
/* Work out the disk rotation on this stripe-set */
tmp = stripe_nr;
rot = do_div(tmp, num_stripes);
/* Fill in the logical address of each stripe */
tmp = stripe_nr * nr_data_stripes(map);
for (i = 0; i < nr_data_stripes(map); i++)
bbio->raid_map[(i+rot) % num_stripes] =
em->start + (tmp + i) * map->stripe_len;
bbio->raid_map[(i+rot) % map->num_stripes] = RAID5_P_STRIPE;
if (map->type & BTRFS_BLOCK_GROUP_RAID6)
bbio->raid_map[(i+rot+1) % num_stripes] =
RAID6_Q_STRIPE;
}
if (rw & REQ_DISCARD) { if (rw & REQ_DISCARD) {
int factor = 0; int factor = 0;
int sub_stripes = 0; int sub_stripes = 0;
@ -5340,6 +5360,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS))
max_errors = btrfs_chunk_max_errors(map); max_errors = btrfs_chunk_max_errors(map);
if (bbio->raid_map)
sort_parity_stripes(bbio, num_stripes);
tgtdev_indexes = 0; tgtdev_indexes = 0;
if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) && if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) &&
dev_replace->tgtdev != NULL) { dev_replace->tgtdev != NULL) {
@ -5427,6 +5450,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
} }
*bbio_ret = bbio; *bbio_ret = bbio;
bbio->map_type = map->type;
bbio->num_stripes = num_stripes; bbio->num_stripes = num_stripes;
bbio->max_errors = max_errors; bbio->max_errors = max_errors;
bbio->mirror_num = mirror_num; bbio->mirror_num = mirror_num;
@ -5443,10 +5467,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
bbio->stripes[0].physical = physical_to_patch_in_first_stripe; bbio->stripes[0].physical = physical_to_patch_in_first_stripe;
bbio->mirror_num = map->num_stripes + 1; bbio->mirror_num = map->num_stripes + 1;
} }
if (raid_map) {
sort_parity_stripes(bbio, raid_map);
*raid_map_ret = raid_map;
}
out: out:
if (dev_replace_is_ongoing) if (dev_replace_is_ongoing)
btrfs_dev_replace_unlock(dev_replace); btrfs_dev_replace_unlock(dev_replace);
@ -5459,17 +5479,17 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
struct btrfs_bio **bbio_ret, int mirror_num) struct btrfs_bio **bbio_ret, int mirror_num)
{ {
return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret, return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret,
mirror_num, NULL); mirror_num, 0);
} }
/* For Scrub/replace */ /* For Scrub/replace */
int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw, int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw,
u64 logical, u64 *length, u64 logical, u64 *length,
struct btrfs_bio **bbio_ret, int mirror_num, struct btrfs_bio **bbio_ret, int mirror_num,
u64 **raid_map_ret) int need_raid_map)
{ {
return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret, return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret,
mirror_num, raid_map_ret); mirror_num, need_raid_map);
} }
int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
@ -5511,8 +5531,7 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
do_div(length, map->num_stripes / map->sub_stripes); do_div(length, map->num_stripes / map->sub_stripes);
else if (map->type & BTRFS_BLOCK_GROUP_RAID0) else if (map->type & BTRFS_BLOCK_GROUP_RAID0)
do_div(length, map->num_stripes); do_div(length, map->num_stripes);
else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
BTRFS_BLOCK_GROUP_RAID6)) {
do_div(length, nr_data_stripes(map)); do_div(length, nr_data_stripes(map));
rmap_len = map->stripe_len * nr_data_stripes(map); rmap_len = map->stripe_len * nr_data_stripes(map);
} }
@ -5565,7 +5584,7 @@ static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int e
bio_endio_nodec(bio, err); bio_endio_nodec(bio, err);
else else
bio_endio(bio, err); bio_endio(bio, err);
kfree(bbio); btrfs_put_bbio(bbio);
} }
static void btrfs_end_bio(struct bio *bio, int err) static void btrfs_end_bio(struct bio *bio, int err)
@ -5808,7 +5827,6 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
u64 logical = (u64)bio->bi_iter.bi_sector << 9; u64 logical = (u64)bio->bi_iter.bi_sector << 9;
u64 length = 0; u64 length = 0;
u64 map_length; u64 map_length;
u64 *raid_map = NULL;
int ret; int ret;
int dev_nr = 0; int dev_nr = 0;
int total_devs = 1; int total_devs = 1;
@ -5819,7 +5837,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
btrfs_bio_counter_inc_blocked(root->fs_info); btrfs_bio_counter_inc_blocked(root->fs_info);
ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio, ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio,
mirror_num, &raid_map); mirror_num, 1);
if (ret) { if (ret) {
btrfs_bio_counter_dec(root->fs_info); btrfs_bio_counter_dec(root->fs_info);
return ret; return ret;
@ -5832,15 +5850,13 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
bbio->fs_info = root->fs_info; bbio->fs_info = root->fs_info;
atomic_set(&bbio->stripes_pending, bbio->num_stripes); atomic_set(&bbio->stripes_pending, bbio->num_stripes);
if (raid_map) { if (bbio->raid_map) {
/* In this case, map_length has been set to the length of /* In this case, map_length has been set to the length of
a single stripe; not the whole write */ a single stripe; not the whole write */
if (rw & WRITE) { if (rw & WRITE) {
ret = raid56_parity_write(root, bio, bbio, ret = raid56_parity_write(root, bio, bbio, map_length);
raid_map, map_length);
} else { } else {
ret = raid56_parity_recover(root, bio, bbio, ret = raid56_parity_recover(root, bio, bbio, map_length,
raid_map, map_length,
mirror_num, 1); mirror_num, 1);
} }
@ -6238,17 +6254,22 @@ int btrfs_read_sys_array(struct btrfs_root *root)
struct extent_buffer *sb; struct extent_buffer *sb;
struct btrfs_disk_key *disk_key; struct btrfs_disk_key *disk_key;
struct btrfs_chunk *chunk; struct btrfs_chunk *chunk;
u8 *ptr; u8 *array_ptr;
unsigned long sb_ptr; unsigned long sb_array_offset;
int ret = 0; int ret = 0;
u32 num_stripes; u32 num_stripes;
u32 array_size; u32 array_size;
u32 len = 0; u32 len = 0;
u32 cur; u32 cur_offset;
struct btrfs_key key; struct btrfs_key key;
sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET, ASSERT(BTRFS_SUPER_INFO_SIZE <= root->nodesize);
BTRFS_SUPER_INFO_SIZE); /*
* This will create extent buffer of nodesize, superblock size is
* fixed to BTRFS_SUPER_INFO_SIZE. If nodesize > sb size, this will
* overallocate but we can keep it as-is, only the first page is used.
*/
sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET);
if (!sb) if (!sb)
return -ENOMEM; return -ENOMEM;
btrfs_set_buffer_uptodate(sb); btrfs_set_buffer_uptodate(sb);
@ -6271,35 +6292,56 @@ int btrfs_read_sys_array(struct btrfs_root *root)
write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
array_size = btrfs_super_sys_array_size(super_copy); array_size = btrfs_super_sys_array_size(super_copy);
ptr = super_copy->sys_chunk_array; array_ptr = super_copy->sys_chunk_array;
sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array); sb_array_offset = offsetof(struct btrfs_super_block, sys_chunk_array);
cur = 0; cur_offset = 0;
while (cur_offset < array_size) {
disk_key = (struct btrfs_disk_key *)array_ptr;
len = sizeof(*disk_key);
if (cur_offset + len > array_size)
goto out_short_read;
while (cur < array_size) {
disk_key = (struct btrfs_disk_key *)ptr;
btrfs_disk_key_to_cpu(&key, disk_key); btrfs_disk_key_to_cpu(&key, disk_key);
len = sizeof(*disk_key); ptr += len; array_ptr += len;
sb_ptr += len; sb_array_offset += len;
cur += len; cur_offset += len;
if (key.type == BTRFS_CHUNK_ITEM_KEY) { if (key.type == BTRFS_CHUNK_ITEM_KEY) {
chunk = (struct btrfs_chunk *)sb_ptr; chunk = (struct btrfs_chunk *)sb_array_offset;
/*
* At least one btrfs_chunk with one stripe must be
* present, exact stripe count check comes afterwards
*/
len = btrfs_chunk_item_size(1);
if (cur_offset + len > array_size)
goto out_short_read;
num_stripes = btrfs_chunk_num_stripes(sb, chunk);
len = btrfs_chunk_item_size(num_stripes);
if (cur_offset + len > array_size)
goto out_short_read;
ret = read_one_chunk(root, &key, sb, chunk); ret = read_one_chunk(root, &key, sb, chunk);
if (ret) if (ret)
break; break;
num_stripes = btrfs_chunk_num_stripes(sb, chunk);
len = btrfs_chunk_item_size(num_stripes);
} else { } else {
ret = -EIO; ret = -EIO;
break; break;
} }
ptr += len; array_ptr += len;
sb_ptr += len; sb_array_offset += len;
cur += len; cur_offset += len;
} }
free_extent_buffer(sb); free_extent_buffer(sb);
return ret; return ret;
out_short_read:
printk(KERN_ERR "BTRFS: sys_array too short to read %u bytes at offset %u\n",
len, cur_offset);
free_extent_buffer(sb);
return -EIO;
} }
int btrfs_read_chunk_tree(struct btrfs_root *root) int btrfs_read_chunk_tree(struct btrfs_root *root)

View File

@ -295,8 +295,10 @@ typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err);
#define BTRFS_BIO_ORIG_BIO_SUBMITTED (1 << 0) #define BTRFS_BIO_ORIG_BIO_SUBMITTED (1 << 0)
struct btrfs_bio { struct btrfs_bio {
atomic_t refs;
atomic_t stripes_pending; atomic_t stripes_pending;
struct btrfs_fs_info *fs_info; struct btrfs_fs_info *fs_info;
u64 map_type; /* get from map_lookup->type */
bio_end_io_t *end_io; bio_end_io_t *end_io;
struct bio *orig_bio; struct bio *orig_bio;
unsigned long flags; unsigned long flags;
@ -307,6 +309,12 @@ struct btrfs_bio {
int mirror_num; int mirror_num;
int num_tgtdevs; int num_tgtdevs;
int *tgtdev_map; int *tgtdev_map;
/*
* logical block numbers for the start of each stripe
* The last one or two are p/q. These are sorted,
* so raid_map[0] is the start of our full stripe
*/
u64 *raid_map;
struct btrfs_bio_stripe stripes[]; struct btrfs_bio_stripe stripes[];
}; };
@ -388,19 +396,15 @@ struct btrfs_balance_control {
int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
u64 end, u64 *length); u64 end, u64 *length);
void btrfs_get_bbio(struct btrfs_bio *bbio);
#define btrfs_bio_size(total_stripes, real_stripes) \ void btrfs_put_bbio(struct btrfs_bio *bbio);
(sizeof(struct btrfs_bio) + \
(sizeof(struct btrfs_bio_stripe) * (total_stripes)) + \
(sizeof(int) * (real_stripes)))
int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
u64 logical, u64 *length, u64 logical, u64 *length,
struct btrfs_bio **bbio_ret, int mirror_num); struct btrfs_bio **bbio_ret, int mirror_num);
int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw, int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw,
u64 logical, u64 *length, u64 logical, u64 *length,
struct btrfs_bio **bbio_ret, int mirror_num, struct btrfs_bio **bbio_ret, int mirror_num,
u64 **raid_map_ret); int need_raid_map);
int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
u64 chunk_start, u64 physical, u64 devid, u64 chunk_start, u64 physical, u64 devid,
u64 **logical, int *naddrs, int *stripe_len); u64 **logical, int *naddrs, int *stripe_len);

View File

@ -495,8 +495,7 @@ struct btrfs_ioctl_send_args {
/* Error codes as returned by the kernel */ /* Error codes as returned by the kernel */
enum btrfs_err_code { enum btrfs_err_code {
notused, BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1,
BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET,
BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET, BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET,
BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET, BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET,
BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET, BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,