Btrfs: Fix misuse of chunk mutex

There were several problems about chunk mutex usage:
- Lock chunk mutex when updating metadata. It would cause the nested
  deadlock because updating metadata might need allocate new chunks
  that need acquire chunk mutex. We remove chunk mutex at this case,
  because b-tree lock and other lock mechanism can help us.
- ABBA deadlock occured between device_list_mutex and chunk_mutex.
  When we update device status, we must acquire device_list_mutex at the
  beginning, and then we might get chunk_mutex during the device status
  update because we need allocate new chunks for metadata COW. But at
  most place, we acquire chunk_mutex at first and then acquire device list
  mutex. We need change the lock order.
- Some place we needn't acquire chunk_mutex. For example we needn't get
  chunk_mutex when we free a empty seed fs_devices structure.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Chris Mason <clm@fb.com>
This commit is contained in:
Miao Xie 2014-09-03 21:35:41 +08:00 committed by Chris Mason
parent 15484377f5
commit 2196d6e8a7
3 changed files with 66 additions and 73 deletions

View File

@ -510,8 +510,8 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
WARN_ON(ret); WARN_ON(ret);
/* keep away write_all_supers() during the finishing procedure */ /* keep away write_all_supers() during the finishing procedure */
mutex_lock(&root->fs_info->chunk_mutex);
mutex_lock(&root->fs_info->fs_devices->device_list_mutex); mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
mutex_lock(&root->fs_info->chunk_mutex);
btrfs_dev_replace_lock(dev_replace); btrfs_dev_replace_lock(dev_replace);
dev_replace->replace_state = dev_replace->replace_state =
scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED
@ -534,8 +534,8 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
src_device->devid, src_device->devid,
rcu_str_deref(tgt_device->name), scrub_ret); rcu_str_deref(tgt_device->name), scrub_ret);
btrfs_dev_replace_unlock(dev_replace); btrfs_dev_replace_unlock(dev_replace);
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
mutex_unlock(&root->fs_info->chunk_mutex); mutex_unlock(&root->fs_info->chunk_mutex);
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
if (tgt_device) if (tgt_device)
btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
@ -589,8 +589,8 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
* superblock is scratched out so that it is no longer marked to * superblock is scratched out so that it is no longer marked to
* belong to this filesystem. * belong to this filesystem.
*/ */
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
mutex_unlock(&root->fs_info->chunk_mutex); mutex_unlock(&root->fs_info->chunk_mutex);
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
/* write back the superblocks */ /* write back the superblocks */
trans = btrfs_start_transaction(root, 0); trans = btrfs_start_transaction(root, 0);

View File

@ -9415,8 +9415,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
memcpy(&key, &block_group->key, sizeof(key)); memcpy(&key, &block_group->key, sizeof(key));
btrfs_clear_space_info_full(root->fs_info);
btrfs_put_block_group(block_group); btrfs_put_block_group(block_group);
btrfs_put_block_group(block_group); btrfs_put_block_group(block_group);

View File

@ -1264,7 +1264,7 @@ out:
static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
struct btrfs_device *device, struct btrfs_device *device,
u64 start) u64 start, u64 *dev_extent_len)
{ {
int ret; int ret;
struct btrfs_path *path; struct btrfs_path *path;
@ -1306,13 +1306,8 @@ again:
goto out; goto out;
} }
if (device->bytes_used > 0) { *dev_extent_len = btrfs_dev_extent_length(leaf, extent);
u64 len = btrfs_dev_extent_length(leaf, extent);
btrfs_device_set_bytes_used(device, device->bytes_used - len);
spin_lock(&root->fs_info->free_chunk_lock);
root->fs_info->free_chunk_space += len;
spin_unlock(&root->fs_info->free_chunk_lock);
}
ret = btrfs_del_item(trans, root, path); ret = btrfs_del_item(trans, root, path);
if (ret) { if (ret) {
btrfs_error(root->fs_info, ret, btrfs_error(root->fs_info, ret,
@ -1521,7 +1516,6 @@ static int btrfs_rm_dev_item(struct btrfs_root *root,
key.objectid = BTRFS_DEV_ITEMS_OBJECTID; key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
key.type = BTRFS_DEV_ITEM_KEY; key.type = BTRFS_DEV_ITEM_KEY;
key.offset = device->devid; key.offset = device->devid;
lock_chunks(root);
ret = btrfs_search_slot(trans, root, &key, path, -1, 1); ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret < 0) if (ret < 0)
@ -1537,7 +1531,6 @@ static int btrfs_rm_dev_item(struct btrfs_root *root,
goto out; goto out;
out: out:
btrfs_free_path(path); btrfs_free_path(path);
unlock_chunks(root);
btrfs_commit_transaction(trans, root); btrfs_commit_transaction(trans, root);
return ret; return ret;
} }
@ -1726,9 +1719,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
fs_devices = fs_devices->seed; fs_devices = fs_devices->seed;
} }
cur_devices->seed = NULL; cur_devices->seed = NULL;
lock_chunks(root);
__btrfs_close_devices(cur_devices); __btrfs_close_devices(cur_devices);
unlock_chunks(root);
free_fs_devices(cur_devices); free_fs_devices(cur_devices);
} }
@ -1990,11 +1981,12 @@ static int btrfs_prepare_sprout(struct btrfs_root *root)
mutex_lock(&root->fs_info->fs_devices->device_list_mutex); mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices, list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
synchronize_rcu); synchronize_rcu);
list_for_each_entry(device, &seed_devices->devices, dev_list)
list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list);
list_for_each_entry(device, &seed_devices->devices, dev_list) {
device->fs_devices = seed_devices; device->fs_devices = seed_devices;
}
lock_chunks(root);
list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list);
unlock_chunks(root);
fs_devices->seeding = 0; fs_devices->seeding = 0;
fs_devices->num_devices = 0; fs_devices->num_devices = 0;
@ -2155,8 +2147,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
goto error; goto error;
} }
lock_chunks(root);
q = bdev_get_queue(bdev); q = bdev_get_queue(bdev);
if (blk_queue_discard(q)) if (blk_queue_discard(q))
device->can_discard = 1; device->can_discard = 1;
@ -2185,6 +2175,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
device->fs_devices = root->fs_info->fs_devices; device->fs_devices = root->fs_info->fs_devices;
mutex_lock(&root->fs_info->fs_devices->device_list_mutex); mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
lock_chunks(root);
list_add_rcu(&device->dev_list, &root->fs_info->fs_devices->devices); list_add_rcu(&device->dev_list, &root->fs_info->fs_devices->devices);
list_add(&device->dev_alloc_list, list_add(&device->dev_alloc_list,
&root->fs_info->fs_devices->alloc_list); &root->fs_info->fs_devices->alloc_list);
@ -2212,15 +2203,34 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
/* add sysfs device entry */ /* add sysfs device entry */
btrfs_kobj_add_device(root->fs_info, device); btrfs_kobj_add_device(root->fs_info, device);
/*
* we've got more storage, clear any full flags on the space
* infos
*/
btrfs_clear_space_info_full(root->fs_info);
unlock_chunks(root);
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
if (seeding_dev) { if (seeding_dev) {
char fsid_buf[BTRFS_UUID_UNPARSED_SIZE]; lock_chunks(root);
ret = init_first_rw_device(trans, root, device); ret = init_first_rw_device(trans, root, device);
unlock_chunks(root);
if (ret) { if (ret) {
btrfs_abort_transaction(trans, root, ret); btrfs_abort_transaction(trans, root, ret);
goto error_trans; goto error_trans;
} }
}
ret = btrfs_add_device(trans, root, device);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
goto error_trans;
}
if (seeding_dev) {
char fsid_buf[BTRFS_UUID_UNPARSED_SIZE];
ret = btrfs_finish_sprout(trans, root); ret = btrfs_finish_sprout(trans, root);
if (ret) { if (ret) {
btrfs_abort_transaction(trans, root, ret); btrfs_abort_transaction(trans, root, ret);
@ -2234,21 +2244,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
root->fs_info->fsid); root->fs_info->fsid);
if (kobject_rename(&root->fs_info->super_kobj, fsid_buf)) if (kobject_rename(&root->fs_info->super_kobj, fsid_buf))
goto error_trans; goto error_trans;
} else {
ret = btrfs_add_device(trans, root, device);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
goto error_trans;
}
} }
/*
* we've got more storage, clear any full flags on the space
* infos
*/
btrfs_clear_space_info_full(root->fs_info);
unlock_chunks(root);
root->fs_info->num_tolerated_disk_barrier_failures = root->fs_info->num_tolerated_disk_barrier_failures =
btrfs_calc_num_tolerated_disk_barrier_failures(root->fs_info); btrfs_calc_num_tolerated_disk_barrier_failures(root->fs_info);
ret = btrfs_commit_transaction(trans, root); ret = btrfs_commit_transaction(trans, root);
@ -2280,7 +2277,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
return ret; return ret;
error_trans: error_trans:
unlock_chunks(root);
btrfs_end_transaction(trans, root); btrfs_end_transaction(trans, root);
rcu_string_free(device->name); rcu_string_free(device->name);
btrfs_kobj_rm_device(root->fs_info, device); btrfs_kobj_rm_device(root->fs_info, device);
@ -2449,20 +2445,27 @@ out:
return ret; return ret;
} }
static int __btrfs_grow_device(struct btrfs_trans_handle *trans, int btrfs_grow_device(struct btrfs_trans_handle *trans,
struct btrfs_device *device, u64 new_size) struct btrfs_device *device, u64 new_size)
{ {
struct btrfs_super_block *super_copy = struct btrfs_super_block *super_copy =
device->dev_root->fs_info->super_copy; device->dev_root->fs_info->super_copy;
struct btrfs_fs_devices *fs_devices; struct btrfs_fs_devices *fs_devices;
u64 old_total = btrfs_super_total_bytes(super_copy); u64 old_total;
u64 diff = new_size - device->total_bytes; u64 diff;
if (!device->writeable) if (!device->writeable)
return -EACCES; return -EACCES;
lock_chunks(device->dev_root);
old_total = btrfs_super_total_bytes(super_copy);
diff = new_size - device->total_bytes;
if (new_size <= device->total_bytes || if (new_size <= device->total_bytes ||
device->is_tgtdev_for_dev_replace) device->is_tgtdev_for_dev_replace) {
unlock_chunks(device->dev_root);
return -EINVAL; return -EINVAL;
}
fs_devices = device->dev_root->fs_info->fs_devices; fs_devices = device->dev_root->fs_info->fs_devices;
@ -2475,20 +2478,11 @@ static int __btrfs_grow_device(struct btrfs_trans_handle *trans,
if (list_empty(&device->resized_list)) if (list_empty(&device->resized_list))
list_add_tail(&device->resized_list, list_add_tail(&device->resized_list,
&fs_devices->resized_devices); &fs_devices->resized_devices);
unlock_chunks(device->dev_root);
return btrfs_update_device(trans, device); return btrfs_update_device(trans, device);
} }
int btrfs_grow_device(struct btrfs_trans_handle *trans,
struct btrfs_device *device, u64 new_size)
{
int ret;
lock_chunks(device->dev_root);
ret = __btrfs_grow_device(trans, device, new_size);
unlock_chunks(device->dev_root);
return ret;
}
static int btrfs_free_chunk(struct btrfs_trans_handle *trans, static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
u64 chunk_tree, u64 chunk_objectid, u64 chunk_tree, u64 chunk_objectid,
@ -2540,6 +2534,7 @@ static int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64
u32 cur; u32 cur;
struct btrfs_key key; struct btrfs_key key;
lock_chunks(root);
array_size = btrfs_super_sys_array_size(super_copy); array_size = btrfs_super_sys_array_size(super_copy);
ptr = super_copy->sys_chunk_array; ptr = super_copy->sys_chunk_array;
@ -2569,6 +2564,7 @@ static int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64
cur += len; cur += len;
} }
} }
unlock_chunks(root);
return ret; return ret;
} }
@ -2579,8 +2575,10 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
struct extent_map_tree *em_tree; struct extent_map_tree *em_tree;
struct btrfs_root *extent_root; struct btrfs_root *extent_root;
struct btrfs_trans_handle *trans; struct btrfs_trans_handle *trans;
struct btrfs_device *device;
struct extent_map *em; struct extent_map *em;
struct map_lookup *map; struct map_lookup *map;
u64 dev_extent_len = 0;
int ret; int ret;
int i; int i;
@ -2604,8 +2602,6 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
return ret; return ret;
} }
lock_chunks(root);
/* /*
* step two, delete the device extents and the * step two, delete the device extents and the
* chunk tree entries * chunk tree entries
@ -2619,10 +2615,23 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
map = (struct map_lookup *)em->bdev; map = (struct map_lookup *)em->bdev;
for (i = 0; i < map->num_stripes; i++) { for (i = 0; i < map->num_stripes; i++) {
ret = btrfs_free_dev_extent(trans, map->stripes[i].dev, device = map->stripes[i].dev;
map->stripes[i].physical); ret = btrfs_free_dev_extent(trans, device,
map->stripes[i].physical,
&dev_extent_len);
BUG_ON(ret); BUG_ON(ret);
if (device->bytes_used > 0) {
lock_chunks(root);
btrfs_device_set_bytes_used(device,
device->bytes_used - dev_extent_len);
spin_lock(&root->fs_info->free_chunk_lock);
root->fs_info->free_chunk_space += dev_extent_len;
spin_unlock(&root->fs_info->free_chunk_lock);
btrfs_clear_space_info_full(root->fs_info);
unlock_chunks(root);
}
if (map->stripes[i].dev) { if (map->stripes[i].dev) {
ret = btrfs_update_device(trans, map->stripes[i].dev); ret = btrfs_update_device(trans, map->stripes[i].dev);
BUG_ON(ret); BUG_ON(ret);
@ -2652,7 +2661,6 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
/* once for us */ /* once for us */
free_extent_map(em); free_extent_map(em);
unlock_chunks(root);
btrfs_end_transaction(trans, root); btrfs_end_transaction(trans, root);
return 0; return 0;
} }
@ -4029,16 +4037,12 @@ again:
list_add_tail(&device->resized_list, list_add_tail(&device->resized_list,
&root->fs_info->fs_devices->resized_devices); &root->fs_info->fs_devices->resized_devices);
/* Now btrfs_update_device() will change the on-disk size. */
ret = btrfs_update_device(trans, device);
if (ret) {
unlock_chunks(root);
btrfs_end_transaction(trans, root);
goto done;
}
WARN_ON(diff > old_total); WARN_ON(diff > old_total);
btrfs_set_super_total_bytes(super_copy, old_total - diff); btrfs_set_super_total_bytes(super_copy, old_total - diff);
unlock_chunks(root); unlock_chunks(root);
/* Now btrfs_update_device() will change the on-disk size. */
ret = btrfs_update_device(trans, device);
btrfs_end_transaction(trans, root); btrfs_end_transaction(trans, root);
done: done:
btrfs_free_path(path); btrfs_free_path(path);
@ -4612,15 +4616,6 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
alloc_profile = btrfs_get_alloc_profile(fs_info->chunk_root, 0); alloc_profile = btrfs_get_alloc_profile(fs_info->chunk_root, 0);
ret = __btrfs_alloc_chunk(trans, extent_root, sys_chunk_offset, ret = __btrfs_alloc_chunk(trans, extent_root, sys_chunk_offset,
alloc_profile); alloc_profile);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
goto out;
}
ret = btrfs_add_device(trans, fs_info->chunk_root, device);
if (ret)
btrfs_abort_transaction(trans, root, ret);
out:
return ret; return ret;
} }