e7cdb60fd2
Pull zstd support from Chris Mason: "Nick Terrell's patch series to add zstd support to the kernel has been floating around for a while. After talking with Dave Sterba, Herbert and Phillip, we decided to send the whole thing in as one pull request. zstd is a big win in speed over zlib and in compression ratio over lzo, and the compression team here at FB has gotten great results using it in production. Nick will continue to update the kernel side with new improvements from the open source zstd userland code. Nick has a number of benchmarks for the main zstd code in his lib/zstd commit: I ran the benchmarks on a Ubuntu 14.04 VM with 2 cores and 4 GiB of RAM. The VM is running on a MacBook Pro with a 3.1 GHz Intel Core i7 processor, 16 GB of RAM, and a SSD. I benchmarked using `silesia.tar` [3], which is 211,988,480 B large. Run the following commands for the benchmark: sudo modprobe zstd_compress_test sudo mknod zstd_compress_test c 245 0 sudo cp silesia.tar zstd_compress_test The time is reported by the time of the userland `cp`. The MB/s is computed with 1,536,217,008 B / time(buffer size, hash) which includes the time to copy from userland. The Adjusted MB/s is computed with 1,536,217,088 B / (time(buffer size, hash) - time(buffer size, none)). The memory reported is the amount of memory the compressor requests. | Method | Size (B) | Time (s) | Ratio | MB/s | Adj MB/s | Mem (MB) | |----------|----------|----------|-------|---------|----------|----------| | none | 11988480 | 0.100 | 1 | 2119.88 | - | - | | zstd -1 | 73645762 | 1.044 | 2.878 | 203.05 | 224.56 | 1.23 | | zstd -3 | 66988878 | 1.761 | 3.165 | 120.38 | 127.63 | 2.47 | | zstd -5 | 65001259 | 2.563 | 3.261 | 82.71 | 86.07 | 2.86 | | zstd -10 | 60165346 | 13.242 | 3.523 | 16.01 | 16.13 | 13.22 | | zstd -15 | 58009756 | 47.601 | 3.654 | 4.45 | 4.46 | 21.61 | | zstd -19 | 54014593 | 102.835 | 3.925 | 2.06 | 2.06 | 60.15 | | zlib -1 | 77260026 | 2.895 | 2.744 | 73.23 | 75.85 | 0.27 | | zlib -3 | 72972206 | 4.116 | 2.905 | 51.50 | 52.79 | 0.27 | | zlib -6 | 68190360 | 9.633 | 3.109 | 22.01 | 22.24 | 0.27 | | zlib -9 | 67613382 | 22.554 | 3.135 | 9.40 | 9.44 | 0.27 | I benchmarked zstd decompression using the same method on the same machine. The benchmark file is located in the upstream zstd repo under `contrib/linux-kernel/zstd_decompress_test.c` [4]. The memory reported is the amount of memory required to decompress data compressed with the given compression level. If you know the maximum size of your input, you can reduce the memory usage of decompression irrespective of the compression level. | Method | Time (s) | MB/s | Adjusted MB/s | Memory (MB) | |----------|----------|---------|---------------|-------------| | none | 0.025 | 8479.54 | - | - | | zstd -1 | 0.358 | 592.15 | 636.60 | 0.84 | | zstd -3 | 0.396 | 535.32 | 571.40 | 1.46 | | zstd -5 | 0.396 | 535.32 | 571.40 | 1.46 | | zstd -10 | 0.374 | 566.81 | 607.42 | 2.51 | | zstd -15 | 0.379 | 559.34 | 598.84 | 4.61 | | zstd -19 | 0.412 | 514.54 | 547.77 | 8.80 | | zlib -1 | 0.940 | 225.52 | 231.68 | 0.04 | | zlib -3 | 0.883 | 240.08 | 247.07 | 0.04 | | zlib -6 | 0.844 | 251.17 | 258.84 | 0.04 | | zlib -9 | 0.837 | 253.27 | 287.64 | 0.04 | I ran a long series of tests and benchmarks on the btrfs side and the gains are very similar to the core benchmarks Nick ran" * 'zstd-minimal' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: squashfs: Add zstd support btrfs: Add zstd support lib: Add zstd modules lib: Add xxhash module
444 lines
10 KiB
C
444 lines
10 KiB
C
/*
|
|
* Copyright (C) 2014 Filipe David Borba Manana <fdmanana@gmail.com>
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public
|
|
* License v2 as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public
|
|
* License along with this program; if not, write to the
|
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
* Boston, MA 021110-1307, USA.
|
|
*/
|
|
|
|
#include <linux/hashtable.h>
|
|
#include "props.h"
|
|
#include "btrfs_inode.h"
|
|
#include "hash.h"
|
|
#include "transaction.h"
|
|
#include "xattr.h"
|
|
#include "compression.h"
|
|
|
|
#define BTRFS_PROP_HANDLERS_HT_BITS 8
|
|
static DEFINE_HASHTABLE(prop_handlers_ht, BTRFS_PROP_HANDLERS_HT_BITS);
|
|
|
|
struct prop_handler {
|
|
struct hlist_node node;
|
|
const char *xattr_name;
|
|
int (*validate)(const char *value, size_t len);
|
|
int (*apply)(struct inode *inode, const char *value, size_t len);
|
|
const char *(*extract)(struct inode *inode);
|
|
int inheritable;
|
|
};
|
|
|
|
static int prop_compression_validate(const char *value, size_t len);
|
|
static int prop_compression_apply(struct inode *inode,
|
|
const char *value,
|
|
size_t len);
|
|
static const char *prop_compression_extract(struct inode *inode);
|
|
|
|
static struct prop_handler prop_handlers[] = {
|
|
{
|
|
.xattr_name = XATTR_BTRFS_PREFIX "compression",
|
|
.validate = prop_compression_validate,
|
|
.apply = prop_compression_apply,
|
|
.extract = prop_compression_extract,
|
|
.inheritable = 1
|
|
},
|
|
};
|
|
|
|
void __init btrfs_props_init(void)
|
|
{
|
|
int i;
|
|
|
|
hash_init(prop_handlers_ht);
|
|
|
|
for (i = 0; i < ARRAY_SIZE(prop_handlers); i++) {
|
|
struct prop_handler *p = &prop_handlers[i];
|
|
u64 h = btrfs_name_hash(p->xattr_name, strlen(p->xattr_name));
|
|
|
|
hash_add(prop_handlers_ht, &p->node, h);
|
|
}
|
|
}
|
|
|
|
static const struct hlist_head *find_prop_handlers_by_hash(const u64 hash)
|
|
{
|
|
struct hlist_head *h;
|
|
|
|
h = &prop_handlers_ht[hash_min(hash, BTRFS_PROP_HANDLERS_HT_BITS)];
|
|
if (hlist_empty(h))
|
|
return NULL;
|
|
|
|
return h;
|
|
}
|
|
|
|
static const struct prop_handler *
|
|
find_prop_handler(const char *name,
|
|
const struct hlist_head *handlers)
|
|
{
|
|
struct prop_handler *h;
|
|
|
|
if (!handlers) {
|
|
u64 hash = btrfs_name_hash(name, strlen(name));
|
|
|
|
handlers = find_prop_handlers_by_hash(hash);
|
|
if (!handlers)
|
|
return NULL;
|
|
}
|
|
|
|
hlist_for_each_entry(h, handlers, node)
|
|
if (!strcmp(h->xattr_name, name))
|
|
return h;
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static int __btrfs_set_prop(struct btrfs_trans_handle *trans,
|
|
struct inode *inode,
|
|
const char *name,
|
|
const char *value,
|
|
size_t value_len,
|
|
int flags)
|
|
{
|
|
const struct prop_handler *handler;
|
|
int ret;
|
|
|
|
if (strlen(name) <= XATTR_BTRFS_PREFIX_LEN)
|
|
return -EINVAL;
|
|
|
|
handler = find_prop_handler(name, NULL);
|
|
if (!handler)
|
|
return -EINVAL;
|
|
|
|
if (value_len == 0) {
|
|
ret = __btrfs_setxattr(trans, inode, handler->xattr_name,
|
|
NULL, 0, flags);
|
|
if (ret)
|
|
return ret;
|
|
|
|
ret = handler->apply(inode, NULL, 0);
|
|
ASSERT(ret == 0);
|
|
|
|
return ret;
|
|
}
|
|
|
|
ret = handler->validate(value, value_len);
|
|
if (ret)
|
|
return ret;
|
|
ret = __btrfs_setxattr(trans, inode, handler->xattr_name,
|
|
value, value_len, flags);
|
|
if (ret)
|
|
return ret;
|
|
ret = handler->apply(inode, value, value_len);
|
|
if (ret) {
|
|
__btrfs_setxattr(trans, inode, handler->xattr_name,
|
|
NULL, 0, flags);
|
|
return ret;
|
|
}
|
|
|
|
set_bit(BTRFS_INODE_HAS_PROPS, &BTRFS_I(inode)->runtime_flags);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int btrfs_set_prop(struct inode *inode,
|
|
const char *name,
|
|
const char *value,
|
|
size_t value_len,
|
|
int flags)
|
|
{
|
|
return __btrfs_set_prop(NULL, inode, name, value, value_len, flags);
|
|
}
|
|
|
|
static int iterate_object_props(struct btrfs_root *root,
|
|
struct btrfs_path *path,
|
|
u64 objectid,
|
|
void (*iterator)(void *,
|
|
const struct prop_handler *,
|
|
const char *,
|
|
size_t),
|
|
void *ctx)
|
|
{
|
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
|
int ret;
|
|
char *name_buf = NULL;
|
|
char *value_buf = NULL;
|
|
int name_buf_len = 0;
|
|
int value_buf_len = 0;
|
|
|
|
while (1) {
|
|
struct btrfs_key key;
|
|
struct btrfs_dir_item *di;
|
|
struct extent_buffer *leaf;
|
|
u32 total_len, cur, this_len;
|
|
int slot;
|
|
const struct hlist_head *handlers;
|
|
|
|
slot = path->slots[0];
|
|
leaf = path->nodes[0];
|
|
|
|
if (slot >= btrfs_header_nritems(leaf)) {
|
|
ret = btrfs_next_leaf(root, path);
|
|
if (ret < 0)
|
|
goto out;
|
|
else if (ret > 0)
|
|
break;
|
|
continue;
|
|
}
|
|
|
|
btrfs_item_key_to_cpu(leaf, &key, slot);
|
|
if (key.objectid != objectid)
|
|
break;
|
|
if (key.type != BTRFS_XATTR_ITEM_KEY)
|
|
break;
|
|
|
|
handlers = find_prop_handlers_by_hash(key.offset);
|
|
if (!handlers)
|
|
goto next_slot;
|
|
|
|
di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
|
|
cur = 0;
|
|
total_len = btrfs_item_size_nr(leaf, slot);
|
|
|
|
while (cur < total_len) {
|
|
u32 name_len = btrfs_dir_name_len(leaf, di);
|
|
u32 data_len = btrfs_dir_data_len(leaf, di);
|
|
unsigned long name_ptr, data_ptr;
|
|
const struct prop_handler *handler;
|
|
|
|
this_len = sizeof(*di) + name_len + data_len;
|
|
name_ptr = (unsigned long)(di + 1);
|
|
data_ptr = name_ptr + name_len;
|
|
|
|
if (verify_dir_item(fs_info, leaf,
|
|
path->slots[0], di)) {
|
|
ret = -EIO;
|
|
goto out;
|
|
}
|
|
|
|
if (name_len <= XATTR_BTRFS_PREFIX_LEN ||
|
|
memcmp_extent_buffer(leaf, XATTR_BTRFS_PREFIX,
|
|
name_ptr,
|
|
XATTR_BTRFS_PREFIX_LEN))
|
|
goto next_dir_item;
|
|
|
|
if (name_len >= name_buf_len) {
|
|
kfree(name_buf);
|
|
name_buf_len = name_len + 1;
|
|
name_buf = kmalloc(name_buf_len, GFP_NOFS);
|
|
if (!name_buf) {
|
|
ret = -ENOMEM;
|
|
goto out;
|
|
}
|
|
}
|
|
read_extent_buffer(leaf, name_buf, name_ptr, name_len);
|
|
name_buf[name_len] = '\0';
|
|
|
|
handler = find_prop_handler(name_buf, handlers);
|
|
if (!handler)
|
|
goto next_dir_item;
|
|
|
|
if (data_len > value_buf_len) {
|
|
kfree(value_buf);
|
|
value_buf_len = data_len;
|
|
value_buf = kmalloc(data_len, GFP_NOFS);
|
|
if (!value_buf) {
|
|
ret = -ENOMEM;
|
|
goto out;
|
|
}
|
|
}
|
|
read_extent_buffer(leaf, value_buf, data_ptr, data_len);
|
|
|
|
iterator(ctx, handler, value_buf, data_len);
|
|
next_dir_item:
|
|
cur += this_len;
|
|
di = (struct btrfs_dir_item *)((char *) di + this_len);
|
|
}
|
|
|
|
next_slot:
|
|
path->slots[0]++;
|
|
}
|
|
|
|
ret = 0;
|
|
out:
|
|
btrfs_release_path(path);
|
|
kfree(name_buf);
|
|
kfree(value_buf);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void inode_prop_iterator(void *ctx,
|
|
const struct prop_handler *handler,
|
|
const char *value,
|
|
size_t len)
|
|
{
|
|
struct inode *inode = ctx;
|
|
struct btrfs_root *root = BTRFS_I(inode)->root;
|
|
int ret;
|
|
|
|
ret = handler->apply(inode, value, len);
|
|
if (unlikely(ret))
|
|
btrfs_warn(root->fs_info,
|
|
"error applying prop %s to ino %llu (root %llu): %d",
|
|
handler->xattr_name, btrfs_ino(BTRFS_I(inode)),
|
|
root->root_key.objectid, ret);
|
|
else
|
|
set_bit(BTRFS_INODE_HAS_PROPS, &BTRFS_I(inode)->runtime_flags);
|
|
}
|
|
|
|
int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path)
|
|
{
|
|
struct btrfs_root *root = BTRFS_I(inode)->root;
|
|
u64 ino = btrfs_ino(BTRFS_I(inode));
|
|
int ret;
|
|
|
|
ret = iterate_object_props(root, path, ino, inode_prop_iterator, inode);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int inherit_props(struct btrfs_trans_handle *trans,
|
|
struct inode *inode,
|
|
struct inode *parent)
|
|
{
|
|
struct btrfs_root *root = BTRFS_I(inode)->root;
|
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
|
int ret;
|
|
int i;
|
|
|
|
if (!test_bit(BTRFS_INODE_HAS_PROPS,
|
|
&BTRFS_I(parent)->runtime_flags))
|
|
return 0;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(prop_handlers); i++) {
|
|
const struct prop_handler *h = &prop_handlers[i];
|
|
const char *value;
|
|
u64 num_bytes;
|
|
|
|
if (!h->inheritable)
|
|
continue;
|
|
|
|
value = h->extract(parent);
|
|
if (!value)
|
|
continue;
|
|
|
|
num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
|
|
ret = btrfs_block_rsv_add(root, trans->block_rsv,
|
|
num_bytes, BTRFS_RESERVE_NO_FLUSH);
|
|
if (ret)
|
|
goto out;
|
|
ret = __btrfs_set_prop(trans, inode, h->xattr_name,
|
|
value, strlen(value), 0);
|
|
btrfs_block_rsv_release(fs_info, trans->block_rsv, num_bytes);
|
|
if (ret)
|
|
goto out;
|
|
}
|
|
ret = 0;
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
int btrfs_inode_inherit_props(struct btrfs_trans_handle *trans,
|
|
struct inode *inode,
|
|
struct inode *dir)
|
|
{
|
|
if (!dir)
|
|
return 0;
|
|
|
|
return inherit_props(trans, inode, dir);
|
|
}
|
|
|
|
int btrfs_subvol_inherit_props(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root,
|
|
struct btrfs_root *parent_root)
|
|
{
|
|
struct super_block *sb = root->fs_info->sb;
|
|
struct btrfs_key key;
|
|
struct inode *parent_inode, *child_inode;
|
|
int ret;
|
|
|
|
key.objectid = BTRFS_FIRST_FREE_OBJECTID;
|
|
key.type = BTRFS_INODE_ITEM_KEY;
|
|
key.offset = 0;
|
|
|
|
parent_inode = btrfs_iget(sb, &key, parent_root, NULL);
|
|
if (IS_ERR(parent_inode))
|
|
return PTR_ERR(parent_inode);
|
|
|
|
child_inode = btrfs_iget(sb, &key, root, NULL);
|
|
if (IS_ERR(child_inode)) {
|
|
iput(parent_inode);
|
|
return PTR_ERR(child_inode);
|
|
}
|
|
|
|
ret = inherit_props(trans, child_inode, parent_inode);
|
|
iput(child_inode);
|
|
iput(parent_inode);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int prop_compression_validate(const char *value, size_t len)
|
|
{
|
|
if (!strncmp("lzo", value, len))
|
|
return 0;
|
|
else if (!strncmp("zlib", value, len))
|
|
return 0;
|
|
else if (!strncmp("zstd", value, len))
|
|
return 0;
|
|
|
|
return -EINVAL;
|
|
}
|
|
|
|
static int prop_compression_apply(struct inode *inode,
|
|
const char *value,
|
|
size_t len)
|
|
{
|
|
int type;
|
|
|
|
if (len == 0) {
|
|
BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
|
|
BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
|
|
BTRFS_I(inode)->prop_compress = BTRFS_COMPRESS_NONE;
|
|
|
|
return 0;
|
|
}
|
|
|
|
if (!strncmp("lzo", value, 3))
|
|
type = BTRFS_COMPRESS_LZO;
|
|
else if (!strncmp("zlib", value, 4))
|
|
type = BTRFS_COMPRESS_ZLIB;
|
|
else if (!strncmp("zstd", value, len))
|
|
type = BTRFS_COMPRESS_ZSTD;
|
|
else
|
|
return -EINVAL;
|
|
|
|
BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
|
|
BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
|
|
BTRFS_I(inode)->prop_compress = type;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static const char *prop_compression_extract(struct inode *inode)
|
|
{
|
|
switch (BTRFS_I(inode)->prop_compress) {
|
|
case BTRFS_COMPRESS_ZLIB:
|
|
return "zlib";
|
|
case BTRFS_COMPRESS_LZO:
|
|
return "lzo";
|
|
case BTRFS_COMPRESS_ZSTD:
|
|
return "zstd";
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
|