diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 0d1d966b0fe4..c3df14ce2cc2 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2304,12 +2304,17 @@ noinline int btrfs_leaf_free_space(struct btrfs_root *root, return ret; } +/* + * min slot controls the lowest index we're willing to push to the + * right. We'll push up to and including min_slot, but no lower + */ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int data_size, int empty, struct extent_buffer *right, - int free_space, u32 left_nritems) + int free_space, u32 left_nritems, + u32 min_slot) { struct extent_buffer *left = path->nodes[0]; struct extent_buffer *upper = path->nodes[1]; @@ -2327,7 +2332,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, if (empty) nr = 0; else - nr = 1; + nr = max_t(u32, 1, min_slot); if (path->slots[0] >= left_nritems) push_space += data_size; @@ -2469,10 +2474,14 @@ out_unlock: * * returns 1 if the push failed because the other node didn't have enough * room, 0 if everything worked out and < 0 if there were major errors. + * + * this will push starting from min_slot to the end of the leaf. It won't + * push any slot lower than min_slot */ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, int data_size, - int empty) + *root, struct btrfs_path *path, + int min_data_size, int data_size, + int empty, u32 min_slot) { struct extent_buffer *left = path->nodes[0]; struct extent_buffer *right; @@ -2514,8 +2523,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root if (left_nritems == 0) goto out_unlock; - return __push_leaf_right(trans, root, path, data_size, empty, - right, free_space, left_nritems); + return __push_leaf_right(trans, root, path, min_data_size, empty, + right, free_space, left_nritems, min_slot); out_unlock: btrfs_tree_unlock(right); free_extent_buffer(right); @@ -2525,12 +2534,17 @@ out_unlock: /* * push some data in the path leaf to the left, trying to free up at * least data_size bytes. returns zero if the push worked, nonzero otherwise + * + * max_slot can put a limit on how far into the leaf we'll push items. The + * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the + * items */ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int data_size, int empty, struct extent_buffer *left, - int free_space, int right_nritems) + int free_space, u32 right_nritems, + u32 max_slot) { struct btrfs_disk_key disk_key; struct extent_buffer *right = path->nodes[0]; @@ -2549,9 +2563,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, slot = path->slots[1]; if (empty) - nr = right_nritems; + nr = min(right_nritems, max_slot); else - nr = right_nritems - 1; + nr = min(right_nritems - 1, max_slot); for (i = 0; i < nr; i++) { item = btrfs_item_nr(right, i); @@ -2712,10 +2726,14 @@ out: /* * push some data in the path leaf to the left, trying to free up at * least data_size bytes. returns zero if the push worked, nonzero otherwise + * + * max_slot can put a limit on how far into the leaf we'll push items. The + * item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the + * items */ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, int data_size, - int empty) + *root, struct btrfs_path *path, int min_data_size, + int data_size, int empty, u32 max_slot) { struct extent_buffer *right = path->nodes[0]; struct extent_buffer *left; @@ -2761,8 +2779,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root goto out; } - return __push_leaf_left(trans, root, path, data_size, - empty, left, free_space, right_nritems); + return __push_leaf_left(trans, root, path, min_data_size, + empty, left, free_space, right_nritems, + max_slot); out: btrfs_tree_unlock(left); free_extent_buffer(left); @@ -2854,6 +2873,64 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans, return ret; } +/* + * double splits happen when we need to insert a big item in the middle + * of a leaf. A double split can leave us with 3 mostly empty leaves: + * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ] + * A B C + * + * We avoid this by trying to push the items on either side of our target + * into the adjacent leaves. If all goes well we can avoid the double split + * completely. + */ +static noinline int push_for_double_split(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + int data_size) +{ + int ret; + int progress = 0; + int slot; + u32 nritems; + + slot = path->slots[0]; + + /* + * try to push all the items after our slot into the + * right leaf + */ + ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot); + if (ret < 0) + return ret; + + if (ret == 0) + progress++; + + nritems = btrfs_header_nritems(path->nodes[0]); + /* + * our goal is to get our slot at the start or end of a leaf. If + * we've done so we're done + */ + if (path->slots[0] == 0 || path->slots[0] == nritems) + return 0; + + if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size) + return 0; + + /* try to push all the items before our slot into the next leaf */ + slot = path->slots[0]; + ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot); + if (ret < 0) + return ret; + + if (ret == 0) + progress++; + + if (progress) + return 0; + return 1; +} + /* * split the path's leaf in two, making sure there is at least data_size * available for the resulting leaf level of the path. @@ -2876,6 +2953,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, int wret; int split; int num_doubles = 0; + int tried_avoid_double = 0; l = path->nodes[0]; slot = path->slots[0]; @@ -2884,12 +2962,14 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, return -EOVERFLOW; /* first try to make some room by pushing left and right */ - if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { - wret = push_leaf_right(trans, root, path, data_size, 0); + if (data_size) { + wret = push_leaf_right(trans, root, path, data_size, + data_size, 0, 0); if (wret < 0) return wret; if (wret) { - wret = push_leaf_left(trans, root, path, data_size, 0); + wret = push_leaf_left(trans, root, path, data_size, + data_size, 0, (u32)-1); if (wret < 0) return wret; } @@ -2923,6 +3003,8 @@ again: if (mid != nritems && leaf_space_used(l, mid, nritems - mid) + data_size > BTRFS_LEAF_DATA_SIZE(root)) { + if (data_size && !tried_avoid_double) + goto push_for_double; split = 2; } } @@ -2939,6 +3021,8 @@ again: if (mid != nritems && leaf_space_used(l, mid, nritems - mid) + data_size > BTRFS_LEAF_DATA_SIZE(root)) { + if (data_size && !tried_avoid_double) + goto push_for_double; split = 2 ; } } @@ -3019,6 +3103,13 @@ again: } return ret; + +push_for_double: + push_for_double_split(trans, root, path, data_size); + tried_avoid_double = 1; + if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size) + return 0; + goto again; } static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, @@ -3915,13 +4006,15 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, extent_buffer_get(leaf); btrfs_set_path_blocking(path); - wret = push_leaf_left(trans, root, path, 1, 1); + wret = push_leaf_left(trans, root, path, 1, 1, + 1, (u32)-1); if (wret < 0 && wret != -ENOSPC) ret = wret; if (path->nodes[0] == leaf && btrfs_header_nritems(leaf)) { - wret = push_leaf_right(trans, root, path, 1, 1); + wret = push_leaf_right(trans, root, path, 1, + 1, 1, 0); if (wret < 0 && wret != -ENOSPC) ret = wret; }