diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 94660063a162..670973025048 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1279,6 +1279,16 @@ struct btrfs_root { #endif }; +struct btrfs_clone_extent_info { + u64 disk_offset; + u64 disk_len; + u64 data_offset; + u64 data_len; + u64 file_offset; + char *extent_buf; + u32 item_size; +}; + struct btrfs_file_private { void *filldir_buf; }; @@ -3233,6 +3243,10 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans, int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, u64 start, u64 end, int drop_cache); +int btrfs_punch_hole_range(struct inode *inode, struct btrfs_path *path, + const u64 start, const u64 end, + struct btrfs_clone_extent_info *clone_info, + struct btrfs_trans_handle **trans_out); int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, u64 start, u64 end); int btrfs_release_file(struct inode *inode, struct file *file); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 16dc09736310..474ff1cac640 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2439,13 +2439,76 @@ static int btrfs_punch_hole_lock_range(struct inode *inode, return 0; } +static int btrfs_insert_clone_extent(struct btrfs_trans_handle *trans, + struct inode *inode, + struct btrfs_path *path, + struct btrfs_clone_extent_info *clone_info, + const u64 clone_len) +{ + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_file_extent_item *extent; + struct extent_buffer *leaf; + struct btrfs_key key; + int slot; + struct btrfs_ref ref = { 0 }; + u64 ref_offset; + int ret; + + if (clone_len == 0) + return 0; + + if (clone_info->disk_offset == 0 && + btrfs_fs_incompat(fs_info, NO_HOLES)) + return 0; + + key.objectid = btrfs_ino(BTRFS_I(inode)); + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = clone_info->file_offset; + ret = btrfs_insert_empty_item(trans, root, path, &key, + clone_info->item_size); + if (ret) + return ret; + leaf = path->nodes[0]; + slot = path->slots[0]; + write_extent_buffer(leaf, clone_info->extent_buf, + btrfs_item_ptr_offset(leaf, slot), + clone_info->item_size); + extent = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); + btrfs_set_file_extent_offset(leaf, extent, clone_info->data_offset); + btrfs_set_file_extent_num_bytes(leaf, extent, clone_len); + btrfs_mark_buffer_dirty(leaf); + btrfs_release_path(path); + + /* If it's a hole, nothing more needs to be done. */ + if (clone_info->disk_offset == 0) + return 0; + + inode_add_bytes(inode, clone_len); + btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, + clone_info->disk_offset, + clone_info->disk_len, 0); + ref_offset = clone_info->file_offset - clone_info->data_offset; + btrfs_init_data_ref(&ref, root->root_key.objectid, + btrfs_ino(BTRFS_I(inode)), ref_offset); + ret = btrfs_inc_extent_ref(trans, &ref); + + return ret; +} + /* * The respective range must have been previously locked, as well as the inode. * The end offset is inclusive (last byte of the range). + * @clone_info is NULL for fallocate's hole punching and non-NULL for extent + * cloning. + * When cloning, we don't want to end up in a state where we dropped extents + * without inserting a new one, so we must abort the transaction to avoid a + * corruption. */ -static int btrfs_punch_hole_range(struct inode *inode, struct btrfs_path *path, - const u64 start, const u64 end, - struct btrfs_trans_handle **trans_out) +int btrfs_punch_hole_range(struct inode *inode, struct btrfs_path *path, + const u64 start, const u64 end, + struct btrfs_clone_extent_info *clone_info, + struct btrfs_trans_handle **trans_out) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); u64 min_size = btrfs_calc_trans_metadata_size(fs_info, 1); @@ -2473,9 +2536,14 @@ static int btrfs_punch_hole_range(struct inode *inode, struct btrfs_path *path, /* * 1 - update the inode * 1 - removing the extents in the range - * 1 - adding the hole extent if no_holes isn't set + * 1 - adding the hole extent if no_holes isn't set or if we are cloning + * an extent */ - rsv_count = btrfs_fs_incompat(fs_info, NO_HOLES) ? 2 : 3; + if (!btrfs_fs_incompat(fs_info, NO_HOLES) || clone_info) + rsv_count = 3; + else + rsv_count = 2; + trans = btrfs_start_transaction(root, rsv_count); if (IS_ERR(trans)) { ret = PTR_ERR(trans); @@ -2493,12 +2561,23 @@ static int btrfs_punch_hole_range(struct inode *inode, struct btrfs_path *path, ret = __btrfs_drop_extents(trans, root, inode, path, cur_offset, end + 1, &drop_end, 1, 0, 0, NULL); - if (ret != -ENOSPC) + if (ret != -ENOSPC) { + /* + * When cloning we want to avoid transaction aborts when + * nothing was done and we are attempting to clone parts + * of inline extents, in such cases -EOPNOTSUPP is + * returned by __btrfs_drop_extents() without having + * changed anything in the file. + */ + if (clone_info && ret && ret != -EOPNOTSUPP) + btrfs_abort_transaction(trans, ret); break; + } trans->block_rsv = &fs_info->trans_block_rsv; - if (cur_offset < drop_end && cur_offset < ino_size) { + if (!clone_info && cur_offset < drop_end && + cur_offset < ino_size) { ret = fill_holes(trans, BTRFS_I(inode), path, cur_offset, drop_end); if (ret) { @@ -2513,6 +2592,20 @@ static int btrfs_punch_hole_range(struct inode *inode, struct btrfs_path *path, } } + if (clone_info) { + u64 clone_len = drop_end - cur_offset; + + ret = btrfs_insert_clone_extent(trans, inode, path, + clone_info, clone_len); + if (ret) { + btrfs_abort_transaction(trans, ret); + break; + } + clone_info->data_len -= clone_len; + clone_info->data_offset += clone_len; + clone_info->file_offset += clone_len; + } + cur_offset = drop_end; ret = btrfs_update_inode(trans, root, inode); @@ -2534,15 +2627,29 @@ static int btrfs_punch_hole_range(struct inode *inode, struct btrfs_path *path, BUG_ON(ret); /* shouldn't happen */ trans->block_rsv = rsv; - ret = find_first_non_hole(inode, &cur_offset, &len); - if (unlikely(ret < 0)) - break; - if (ret && !len) { - ret = 0; - break; + if (!clone_info) { + ret = find_first_non_hole(inode, &cur_offset, &len); + if (unlikely(ret < 0)) + break; + if (ret && !len) { + ret = 0; + break; + } } } + /* + * If we were cloning, force the next fsync to be a full one since we + * we replaced (or just dropped in the case of cloning holes when + * NO_HOLES is enabled) extents and extent maps. + * This is for the sake of simplicity, and cloning into files larger + * than 16Mb would force the full fsync any way (when + * try_release_extent_mapping() is invoked during page cache truncation. + */ + if (clone_info) + set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, + &BTRFS_I(inode)->runtime_flags); + if (ret) goto out_trans; @@ -2565,7 +2672,7 @@ static int btrfs_punch_hole_range(struct inode *inode, struct btrfs_path *path, * (because it's useless) or if it represents a 0 bytes range (when * cur_offset == drop_end). */ - if (cur_offset < ino_size && cur_offset < drop_end) { + if (!clone_info && cur_offset < ino_size && cur_offset < drop_end) { ret = fill_holes(trans, BTRFS_I(inode), path, cur_offset, drop_end); if (ret) { @@ -2574,6 +2681,14 @@ static int btrfs_punch_hole_range(struct inode *inode, struct btrfs_path *path, goto out_trans; } } + if (clone_info) { + ret = btrfs_insert_clone_extent(trans, inode, path, clone_info, + clone_info->data_len); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto out_trans; + } + } out_trans: if (!trans) @@ -2710,7 +2825,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) goto out; } - ret = btrfs_punch_hole_range(inode, path, lockstart, lockend, &trans); + ret = btrfs_punch_hole_range(inode, path, lockstart, lockend, NULL, + &trans); btrfs_free_path(path); if (ret) goto out; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 818f7ec8bb0e..0a0c54e99b22 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3324,61 +3324,6 @@ out: return ret; } -static void clone_update_extent_map(struct btrfs_inode *inode, - const struct btrfs_trans_handle *trans, - const struct btrfs_path *path, - const u64 hole_offset, - const u64 hole_len) -{ - struct extent_map_tree *em_tree = &inode->extent_tree; - struct extent_map *em; - int ret; - - em = alloc_extent_map(); - if (!em) { - set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags); - return; - } - - if (path) { - struct btrfs_file_extent_item *fi; - - fi = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_file_extent_item); - btrfs_extent_item_to_extent_map(inode, path, fi, false, em); - em->generation = -1; - if (btrfs_file_extent_type(path->nodes[0], fi) == - BTRFS_FILE_EXTENT_INLINE) - set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, - &inode->runtime_flags); - } else { - em->start = hole_offset; - em->len = hole_len; - em->ram_bytes = em->len; - em->orig_start = hole_offset; - em->block_start = EXTENT_MAP_HOLE; - em->block_len = 0; - em->orig_block_len = 0; - em->compress_type = BTRFS_COMPRESS_NONE; - em->generation = trans->transid; - } - - while (1) { - write_lock(&em_tree->lock); - ret = add_extent_mapping(em_tree, em, 1); - write_unlock(&em_tree->lock); - if (ret != -EEXIST) { - free_extent_map(em); - break; - } - btrfs_drop_extent_cache(inode, em->start, - em->start + em->len - 1, 0); - } - - if (ret) - set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags); -} - /* * Make sure we do not end up inserting an inline extent into a file that has * already other (non-inline) extents. If a file has an inline extent it can @@ -3519,6 +3464,7 @@ copy_inline_extent: path->slots[0]), size); inode_add_bytes(dst, datal); + set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(dst)->runtime_flags); return 0; } @@ -3678,19 +3624,10 @@ process_slot: else drop_start = new_key.offset; - /* - * 1 - adjusting old extent (we may have to split it) - * 1 - add new extent - * 1 - inode update - */ - trans = btrfs_start_transaction(root, 3); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - goto out; - } - if (type == BTRFS_FILE_EXTENT_REG || type == BTRFS_FILE_EXTENT_PREALLOC) { + struct btrfs_clone_extent_info clone_info; + /* * a | --- range to clone ---| b * | ------------- extent ------------- | @@ -3706,63 +3643,19 @@ process_slot: datal -= off - key.offset; } - ret = btrfs_drop_extents(trans, root, inode, - drop_start, - new_key.offset + datal, - 1); - if (ret) { - if (ret != -EOPNOTSUPP) - btrfs_abort_transaction(trans, - ret); - btrfs_end_transaction(trans); + clone_info.disk_offset = disko; + clone_info.disk_len = diskl; + clone_info.data_offset = datao; + clone_info.data_len = datal; + clone_info.file_offset = new_key.offset; + clone_info.extent_buf = buf; + clone_info.item_size = size; + ret = btrfs_punch_hole_range(inode, path, + drop_start, + new_key.offset + datal - 1, + &clone_info, &trans); + if (ret) goto out; - } - - ret = btrfs_insert_empty_item(trans, root, path, - &new_key, size); - if (ret) { - btrfs_abort_transaction(trans, ret); - btrfs_end_transaction(trans); - goto out; - } - - leaf = path->nodes[0]; - slot = path->slots[0]; - write_extent_buffer(leaf, buf, - btrfs_item_ptr_offset(leaf, slot), - size); - - extent = btrfs_item_ptr(leaf, slot, - struct btrfs_file_extent_item); - - /* disko == 0 means it's a hole */ - if (!disko) - datao = 0; - - btrfs_set_file_extent_offset(leaf, extent, - datao); - btrfs_set_file_extent_num_bytes(leaf, extent, - datal); - - if (disko) { - struct btrfs_ref ref = { 0 }; - inode_add_bytes(inode, datal); - btrfs_init_generic_ref(&ref, - BTRFS_ADD_DELAYED_REF, disko, - diskl, 0); - btrfs_init_data_ref(&ref, - root->root_key.objectid, - btrfs_ino(BTRFS_I(inode)), - new_key.offset - datao); - ret = btrfs_inc_extent_ref(trans, &ref); - if (ret) { - btrfs_abort_transaction(trans, - ret); - btrfs_end_transaction(trans); - goto out; - - } - } } else if (type == BTRFS_FILE_EXTENT_INLINE) { u64 skip = 0; u64 trim = 0; @@ -3777,12 +3670,27 @@ process_slot: if (comp && (skip || trim)) { ret = -EINVAL; - btrfs_end_transaction(trans); goto out; } size -= skip + trim; datal -= skip + trim; + /* + * If our extent is inline, we know we will drop + * or adjust at most 1 extent item in the + * destination root. + * + * 1 - adjusting old extent (we may have to + * split it) + * 1 - add new extent + * 1 - inode update + */ + trans = btrfs_start_transaction(root, 3); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + ret = clone_copy_inline_extent(inode, trans, path, &new_key, @@ -3796,20 +3704,8 @@ process_slot: btrfs_end_transaction(trans); goto out; } - leaf = path->nodes[0]; - slot = path->slots[0]; } - /* If we have an implicit hole (NO_HOLES feature). */ - if (drop_start < new_key.offset) - clone_update_extent_map(BTRFS_I(inode), trans, - NULL, drop_start, - new_key.offset - drop_start); - - clone_update_extent_map(BTRFS_I(inode), trans, - path, 0, 0); - - btrfs_mark_buffer_dirty(leaf); btrfs_release_path(path); last_dest_end = ALIGN(new_key.offset + datal, @@ -3834,32 +3730,27 @@ process_slot: ret = 0; if (last_dest_end < destoff + len) { + struct btrfs_clone_extent_info clone_info = { 0 }; /* * We have an implicit hole (NO_HOLES feature is enabled) that * fully or partially overlaps our cloning range at its end. */ btrfs_release_path(path); + path->leave_spinning = 0; /* - * 1 - remove extent(s) - * 1 - inode update + * We are dealing with a hole and our clone_info already has a + * disk_offset of 0, we only need to fill the data length and + * file offset. */ - trans = btrfs_start_transaction(root, 2); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); + clone_info.data_len = destoff + len - last_dest_end; + clone_info.file_offset = last_dest_end; + ret = btrfs_punch_hole_range(inode, path, + last_dest_end, destoff + len - 1, + &clone_info, &trans); + if (ret) goto out; - } - ret = btrfs_drop_extents(trans, root, inode, - last_dest_end, destoff + len, 1); - if (ret) { - if (ret != -EOPNOTSUPP) - btrfs_abort_transaction(trans, ret); - btrfs_end_transaction(trans); - goto out; - } - clone_update_extent_map(BTRFS_I(inode), trans, NULL, - last_dest_end, - destoff + len - last_dest_end); + ret = clone_finish_inode_update(trans, inode, destoff + len, destoff, olen, no_time_update); }