linux/fs/btrfs/props.c
Linus Torvalds e7cdb60fd2 Merge branch 'zstd-minimal' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull zstd support from Chris Mason:
 "Nick Terrell's patch series to add zstd support to the kernel has been
  floating around for a while. After talking with Dave Sterba, Herbert
  and Phillip, we decided to send the whole thing in as one pull
  request.

  zstd is a big win in speed over zlib and in compression ratio over
  lzo, and the compression team here at FB has gotten great results
  using it in production. Nick will continue to update the kernel side
  with new improvements from the open source zstd userland code.

  Nick has a number of benchmarks for the main zstd code in his lib/zstd
  commit:

      I ran the benchmarks on a Ubuntu 14.04 VM with 2 cores and 4 GiB
      of RAM. The VM is running on a MacBook Pro with a 3.1 GHz Intel
      Core i7 processor, 16 GB of RAM, and a SSD. I benchmarked using
      `silesia.tar` [3], which is 211,988,480 B large. Run the following
      commands for the benchmark:

        sudo modprobe zstd_compress_test
        sudo mknod zstd_compress_test c 245 0
        sudo cp silesia.tar zstd_compress_test

      The time is reported by the time of the userland `cp`.
      The MB/s is computed with

        1,536,217,008 B / time(buffer size, hash)

      which includes the time to copy from userland.
      The Adjusted MB/s is computed with

        1,536,217,088 B / (time(buffer size, hash) - time(buffer size, none)).

      The memory reported is the amount of memory the compressor
      requests.

        | Method   | Size (B) | Time (s) | Ratio | MB/s    | Adj MB/s | Mem (MB) |
        |----------|----------|----------|-------|---------|----------|----------|
        | none     | 11988480 |    0.100 |     1 | 2119.88 |        - |        - |
        | zstd -1  | 73645762 |    1.044 | 2.878 |  203.05 |   224.56 |     1.23 |
        | zstd -3  | 66988878 |    1.761 | 3.165 |  120.38 |   127.63 |     2.47 |
        | zstd -5  | 65001259 |    2.563 | 3.261 |   82.71 |    86.07 |     2.86 |
        | zstd -10 | 60165346 |   13.242 | 3.523 |   16.01 |    16.13 |    13.22 |
        | zstd -15 | 58009756 |   47.601 | 3.654 |    4.45 |     4.46 |    21.61 |
        | zstd -19 | 54014593 |  102.835 | 3.925 |    2.06 |     2.06 |    60.15 |
        | zlib -1  | 77260026 |    2.895 | 2.744 |   73.23 |    75.85 |     0.27 |
        | zlib -3  | 72972206 |    4.116 | 2.905 |   51.50 |    52.79 |     0.27 |
        | zlib -6  | 68190360 |    9.633 | 3.109 |   22.01 |    22.24 |     0.27 |
        | zlib -9  | 67613382 |   22.554 | 3.135 |    9.40 |     9.44 |     0.27 |

      I benchmarked zstd decompression using the same method on the same
      machine. The benchmark file is located in the upstream zstd repo
      under `contrib/linux-kernel/zstd_decompress_test.c` [4]. The
      memory reported is the amount of memory required to decompress
      data compressed with the given compression level. If you know the
      maximum size of your input, you can reduce the memory usage of
      decompression irrespective of the compression level.

        | Method   | Time (s) | MB/s    | Adjusted MB/s | Memory (MB) |
        |----------|----------|---------|---------------|-------------|
        | none     |    0.025 | 8479.54 |             - |           - |
        | zstd -1  |    0.358 |  592.15 |        636.60 |        0.84 |
        | zstd -3  |    0.396 |  535.32 |        571.40 |        1.46 |
        | zstd -5  |    0.396 |  535.32 |        571.40 |        1.46 |
        | zstd -10 |    0.374 |  566.81 |        607.42 |        2.51 |
        | zstd -15 |    0.379 |  559.34 |        598.84 |        4.61 |
        | zstd -19 |    0.412 |  514.54 |        547.77 |        8.80 |
        | zlib -1  |    0.940 |  225.52 |        231.68 |        0.04 |
        | zlib -3  |    0.883 |  240.08 |        247.07 |        0.04 |
        | zlib -6  |    0.844 |  251.17 |        258.84 |        0.04 |
        | zlib -9  |    0.837 |  253.27 |        287.64 |        0.04 |

  I ran a long series of tests and benchmarks on the btrfs side and the
  gains are very similar to the core benchmarks Nick ran"

* 'zstd-minimal' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  squashfs: Add zstd support
  btrfs: Add zstd support
  lib: Add zstd modules
  lib: Add xxhash module
2017-09-14 17:30:49 -07:00

444 lines
10 KiB
C

/*
* Copyright (C) 2014 Filipe David Borba Manana <fdmanana@gmail.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/hashtable.h>
#include "props.h"
#include "btrfs_inode.h"
#include "hash.h"
#include "transaction.h"
#include "xattr.h"
#include "compression.h"
#define BTRFS_PROP_HANDLERS_HT_BITS 8
static DEFINE_HASHTABLE(prop_handlers_ht, BTRFS_PROP_HANDLERS_HT_BITS);
struct prop_handler {
struct hlist_node node;
const char *xattr_name;
int (*validate)(const char *value, size_t len);
int (*apply)(struct inode *inode, const char *value, size_t len);
const char *(*extract)(struct inode *inode);
int inheritable;
};
static int prop_compression_validate(const char *value, size_t len);
static int prop_compression_apply(struct inode *inode,
const char *value,
size_t len);
static const char *prop_compression_extract(struct inode *inode);
static struct prop_handler prop_handlers[] = {
{
.xattr_name = XATTR_BTRFS_PREFIX "compression",
.validate = prop_compression_validate,
.apply = prop_compression_apply,
.extract = prop_compression_extract,
.inheritable = 1
},
};
void __init btrfs_props_init(void)
{
int i;
hash_init(prop_handlers_ht);
for (i = 0; i < ARRAY_SIZE(prop_handlers); i++) {
struct prop_handler *p = &prop_handlers[i];
u64 h = btrfs_name_hash(p->xattr_name, strlen(p->xattr_name));
hash_add(prop_handlers_ht, &p->node, h);
}
}
static const struct hlist_head *find_prop_handlers_by_hash(const u64 hash)
{
struct hlist_head *h;
h = &prop_handlers_ht[hash_min(hash, BTRFS_PROP_HANDLERS_HT_BITS)];
if (hlist_empty(h))
return NULL;
return h;
}
static const struct prop_handler *
find_prop_handler(const char *name,
const struct hlist_head *handlers)
{
struct prop_handler *h;
if (!handlers) {
u64 hash = btrfs_name_hash(name, strlen(name));
handlers = find_prop_handlers_by_hash(hash);
if (!handlers)
return NULL;
}
hlist_for_each_entry(h, handlers, node)
if (!strcmp(h->xattr_name, name))
return h;
return NULL;
}
static int __btrfs_set_prop(struct btrfs_trans_handle *trans,
struct inode *inode,
const char *name,
const char *value,
size_t value_len,
int flags)
{
const struct prop_handler *handler;
int ret;
if (strlen(name) <= XATTR_BTRFS_PREFIX_LEN)
return -EINVAL;
handler = find_prop_handler(name, NULL);
if (!handler)
return -EINVAL;
if (value_len == 0) {
ret = __btrfs_setxattr(trans, inode, handler->xattr_name,
NULL, 0, flags);
if (ret)
return ret;
ret = handler->apply(inode, NULL, 0);
ASSERT(ret == 0);
return ret;
}
ret = handler->validate(value, value_len);
if (ret)
return ret;
ret = __btrfs_setxattr(trans, inode, handler->xattr_name,
value, value_len, flags);
if (ret)
return ret;
ret = handler->apply(inode, value, value_len);
if (ret) {
__btrfs_setxattr(trans, inode, handler->xattr_name,
NULL, 0, flags);
return ret;
}
set_bit(BTRFS_INODE_HAS_PROPS, &BTRFS_I(inode)->runtime_flags);
return 0;
}
int btrfs_set_prop(struct inode *inode,
const char *name,
const char *value,
size_t value_len,
int flags)
{
return __btrfs_set_prop(NULL, inode, name, value, value_len, flags);
}
static int iterate_object_props(struct btrfs_root *root,
struct btrfs_path *path,
u64 objectid,
void (*iterator)(void *,
const struct prop_handler *,
const char *,
size_t),
void *ctx)
{
struct btrfs_fs_info *fs_info = root->fs_info;
int ret;
char *name_buf = NULL;
char *value_buf = NULL;
int name_buf_len = 0;
int value_buf_len = 0;
while (1) {
struct btrfs_key key;
struct btrfs_dir_item *di;
struct extent_buffer *leaf;
u32 total_len, cur, this_len;
int slot;
const struct hlist_head *handlers;
slot = path->slots[0];
leaf = path->nodes[0];
if (slot >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(root, path);
if (ret < 0)
goto out;
else if (ret > 0)
break;
continue;
}
btrfs_item_key_to_cpu(leaf, &key, slot);
if (key.objectid != objectid)
break;
if (key.type != BTRFS_XATTR_ITEM_KEY)
break;
handlers = find_prop_handlers_by_hash(key.offset);
if (!handlers)
goto next_slot;
di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
cur = 0;
total_len = btrfs_item_size_nr(leaf, slot);
while (cur < total_len) {
u32 name_len = btrfs_dir_name_len(leaf, di);
u32 data_len = btrfs_dir_data_len(leaf, di);
unsigned long name_ptr, data_ptr;
const struct prop_handler *handler;
this_len = sizeof(*di) + name_len + data_len;
name_ptr = (unsigned long)(di + 1);
data_ptr = name_ptr + name_len;
if (verify_dir_item(fs_info, leaf,
path->slots[0], di)) {
ret = -EIO;
goto out;
}
if (name_len <= XATTR_BTRFS_PREFIX_LEN ||
memcmp_extent_buffer(leaf, XATTR_BTRFS_PREFIX,
name_ptr,
XATTR_BTRFS_PREFIX_LEN))
goto next_dir_item;
if (name_len >= name_buf_len) {
kfree(name_buf);
name_buf_len = name_len + 1;
name_buf = kmalloc(name_buf_len, GFP_NOFS);
if (!name_buf) {
ret = -ENOMEM;
goto out;
}
}
read_extent_buffer(leaf, name_buf, name_ptr, name_len);
name_buf[name_len] = '\0';
handler = find_prop_handler(name_buf, handlers);
if (!handler)
goto next_dir_item;
if (data_len > value_buf_len) {
kfree(value_buf);
value_buf_len = data_len;
value_buf = kmalloc(data_len, GFP_NOFS);
if (!value_buf) {
ret = -ENOMEM;
goto out;
}
}
read_extent_buffer(leaf, value_buf, data_ptr, data_len);
iterator(ctx, handler, value_buf, data_len);
next_dir_item:
cur += this_len;
di = (struct btrfs_dir_item *)((char *) di + this_len);
}
next_slot:
path->slots[0]++;
}
ret = 0;
out:
btrfs_release_path(path);
kfree(name_buf);
kfree(value_buf);
return ret;
}
static void inode_prop_iterator(void *ctx,
const struct prop_handler *handler,
const char *value,
size_t len)
{
struct inode *inode = ctx;
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret;
ret = handler->apply(inode, value, len);
if (unlikely(ret))
btrfs_warn(root->fs_info,
"error applying prop %s to ino %llu (root %llu): %d",
handler->xattr_name, btrfs_ino(BTRFS_I(inode)),
root->root_key.objectid, ret);
else
set_bit(BTRFS_INODE_HAS_PROPS, &BTRFS_I(inode)->runtime_flags);
}
int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 ino = btrfs_ino(BTRFS_I(inode));
int ret;
ret = iterate_object_props(root, path, ino, inode_prop_iterator, inode);
return ret;
}
static int inherit_props(struct btrfs_trans_handle *trans,
struct inode *inode,
struct inode *parent)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_fs_info *fs_info = root->fs_info;
int ret;
int i;
if (!test_bit(BTRFS_INODE_HAS_PROPS,
&BTRFS_I(parent)->runtime_flags))
return 0;
for (i = 0; i < ARRAY_SIZE(prop_handlers); i++) {
const struct prop_handler *h = &prop_handlers[i];
const char *value;
u64 num_bytes;
if (!h->inheritable)
continue;
value = h->extract(parent);
if (!value)
continue;
num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
ret = btrfs_block_rsv_add(root, trans->block_rsv,
num_bytes, BTRFS_RESERVE_NO_FLUSH);
if (ret)
goto out;
ret = __btrfs_set_prop(trans, inode, h->xattr_name,
value, strlen(value), 0);
btrfs_block_rsv_release(fs_info, trans->block_rsv, num_bytes);
if (ret)
goto out;
}
ret = 0;
out:
return ret;
}
int btrfs_inode_inherit_props(struct btrfs_trans_handle *trans,
struct inode *inode,
struct inode *dir)
{
if (!dir)
return 0;
return inherit_props(trans, inode, dir);
}
int btrfs_subvol_inherit_props(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_root *parent_root)
{
struct super_block *sb = root->fs_info->sb;
struct btrfs_key key;
struct inode *parent_inode, *child_inode;
int ret;
key.objectid = BTRFS_FIRST_FREE_OBJECTID;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
parent_inode = btrfs_iget(sb, &key, parent_root, NULL);
if (IS_ERR(parent_inode))
return PTR_ERR(parent_inode);
child_inode = btrfs_iget(sb, &key, root, NULL);
if (IS_ERR(child_inode)) {
iput(parent_inode);
return PTR_ERR(child_inode);
}
ret = inherit_props(trans, child_inode, parent_inode);
iput(child_inode);
iput(parent_inode);
return ret;
}
static int prop_compression_validate(const char *value, size_t len)
{
if (!strncmp("lzo", value, len))
return 0;
else if (!strncmp("zlib", value, len))
return 0;
else if (!strncmp("zstd", value, len))
return 0;
return -EINVAL;
}
static int prop_compression_apply(struct inode *inode,
const char *value,
size_t len)
{
int type;
if (len == 0) {
BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
BTRFS_I(inode)->prop_compress = BTRFS_COMPRESS_NONE;
return 0;
}
if (!strncmp("lzo", value, 3))
type = BTRFS_COMPRESS_LZO;
else if (!strncmp("zlib", value, 4))
type = BTRFS_COMPRESS_ZLIB;
else if (!strncmp("zstd", value, len))
type = BTRFS_COMPRESS_ZSTD;
else
return -EINVAL;
BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
BTRFS_I(inode)->prop_compress = type;
return 0;
}
static const char *prop_compression_extract(struct inode *inode)
{
switch (BTRFS_I(inode)->prop_compress) {
case BTRFS_COMPRESS_ZLIB:
return "zlib";
case BTRFS_COMPRESS_LZO:
return "lzo";
case BTRFS_COMPRESS_ZSTD:
return "zstd";
}
return NULL;
}