A set of device-mapper fixes for 3.9.

Fix reported data loss with discards and thin snapshots; avoid a
 deadlock observed in dm verity; fix a race in the new dm cache code
 along with some other minor bugs; store the cache policy version on disk
 to make the stored hints format future-proof.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.11 (GNU/Linux)
 
 iQIcBAABAgAGBQJRSfH2AAoJEK2W1qbAHj1nENIP+wQCLqY5Sl1uBbtJ1s1T102U
 RDDqSNQKRvsRlR+lfL4QR+kAeUyOyU8C4K/RPh3G4BUXjT+3UxMtQ1ZeliUQX8qa
 Vt2gywtAS+eyydOHgUmywnDc/u9OT5JRcbww/uEoeUj9GnhBZJQ205t4eqPxcP6q
 cx0S3pBAQqwMu2BTmBQY1CrDP/7LQ545TU/PPbu/Y7VStgd+XyNkPA5VcuyBuwl1
 MlK1666x2QMQ5whJBSs8diGZYXZo4rRLUWQuaytJdyvANdip7Kv1jZyx/M1nMBk2
 /dtBY79RoOQEeUrptwF/BltuFrXY1YGnX5f/lhCaVtpaKusKcLX19foIM+0jnlVg
 qigMn0RVtXKckWfSVa0rA5I+XFpRwjgPtqAIbeFWYrOG6qcjamvlIB4fz2ARMYDD
 clymfIzPk/HRrbgmrFvZSa/LCwLWrU6ZXchVuPKzqjtj1doBBdKT4tgrLeIlsNJE
 Cx1q5bLKJAUXGpoW9yfNGWrAujF6rmMU0xhOk+SjmUQU5JZguCpX+5TsCqB43tup
 wW8/BamGWijlsIJzFi0VyvtqspXL/nwfXO+hx8KitXaszduj5EulDJ9dopqJOV1p
 N9VfCQ6xbWW+iTBsnkCf5bgNPSZpPa2V/CQrXM6eSmYQhnK1YMTKXmYRfCnBJtjC
 rmvMV/VpWddO82NA4bq0
 =7+/X
 -----END PGP SIGNATURE-----

Merge tag 'dm-3.9-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm

Pull device-mapper fixes from Alasdair G Kergon:
 "Fix reported data loss with discards and thin snapshots; avoid a
  deadlock observed in dm verity; fix a race in the new dm cache code
  along with some other minor bugs; store the cache policy version on
  disk to make the stored hints format future-proof."

* tag 'dm-3.9-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm:
  dm cache: policy ignore hints if generated by different version
  dm cache: policy change version from string to integer set
  dm cache: fix race in writethrough implementation
  dm cache: metadata clear dirty bits on clean shutdown
  dm cache: avoid calling policy destructor twice on error
  dm cache: detect cache_create failure
  dm cache: avoid 64 bit division on 32 bit
  dm verity: avoid deadlock
  dm thin: fix non power of two discard granularity calc
  dm thin: fix discard corruption
This commit is contained in:
Linus Torvalds 2013-03-21 08:27:03 -07:00
commit 85ab3c4617
12 changed files with 252 additions and 108 deletions

View File

@ -1025,6 +1025,8 @@ void dm_bufio_prefetch(struct dm_bufio_client *c,
{ {
struct blk_plug plug; struct blk_plug plug;
BUG_ON(dm_bufio_in_request());
blk_start_plug(&plug); blk_start_plug(&plug);
dm_bufio_lock(c); dm_bufio_lock(c);

View File

@ -83,6 +83,8 @@ struct cache_disk_superblock {
__le32 read_misses; __le32 read_misses;
__le32 write_hits; __le32 write_hits;
__le32 write_misses; __le32 write_misses;
__le32 policy_version[CACHE_POLICY_VERSION_SIZE];
} __packed; } __packed;
struct dm_cache_metadata { struct dm_cache_metadata {
@ -109,6 +111,7 @@ struct dm_cache_metadata {
bool clean_when_opened:1; bool clean_when_opened:1;
char policy_name[CACHE_POLICY_NAME_SIZE]; char policy_name[CACHE_POLICY_NAME_SIZE];
unsigned policy_version[CACHE_POLICY_VERSION_SIZE];
size_t policy_hint_size; size_t policy_hint_size;
struct dm_cache_statistics stats; struct dm_cache_statistics stats;
}; };
@ -268,7 +271,8 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd)
memset(disk_super->uuid, 0, sizeof(disk_super->uuid)); memset(disk_super->uuid, 0, sizeof(disk_super->uuid));
disk_super->magic = cpu_to_le64(CACHE_SUPERBLOCK_MAGIC); disk_super->magic = cpu_to_le64(CACHE_SUPERBLOCK_MAGIC);
disk_super->version = cpu_to_le32(CACHE_VERSION); disk_super->version = cpu_to_le32(CACHE_VERSION);
memset(disk_super->policy_name, 0, CACHE_POLICY_NAME_SIZE); memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name));
memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version));
disk_super->policy_hint_size = 0; disk_super->policy_hint_size = 0;
r = dm_sm_copy_root(cmd->metadata_sm, &disk_super->metadata_space_map_root, r = dm_sm_copy_root(cmd->metadata_sm, &disk_super->metadata_space_map_root,
@ -284,7 +288,6 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd)
disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT);
disk_super->data_block_size = cpu_to_le32(cmd->data_block_size); disk_super->data_block_size = cpu_to_le32(cmd->data_block_size);
disk_super->cache_blocks = cpu_to_le32(0); disk_super->cache_blocks = cpu_to_le32(0);
memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name));
disk_super->read_hits = cpu_to_le32(0); disk_super->read_hits = cpu_to_le32(0);
disk_super->read_misses = cpu_to_le32(0); disk_super->read_misses = cpu_to_le32(0);
@ -478,6 +481,9 @@ static void read_superblock_fields(struct dm_cache_metadata *cmd,
cmd->data_block_size = le32_to_cpu(disk_super->data_block_size); cmd->data_block_size = le32_to_cpu(disk_super->data_block_size);
cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks)); cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks));
strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name)); strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name));
cmd->policy_version[0] = le32_to_cpu(disk_super->policy_version[0]);
cmd->policy_version[1] = le32_to_cpu(disk_super->policy_version[1]);
cmd->policy_version[2] = le32_to_cpu(disk_super->policy_version[2]);
cmd->policy_hint_size = le32_to_cpu(disk_super->policy_hint_size); cmd->policy_hint_size = le32_to_cpu(disk_super->policy_hint_size);
cmd->stats.read_hits = le32_to_cpu(disk_super->read_hits); cmd->stats.read_hits = le32_to_cpu(disk_super->read_hits);
@ -572,6 +578,9 @@ static int __commit_transaction(struct dm_cache_metadata *cmd,
disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks)); disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks));
disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks)); disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks));
strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name)); strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name));
disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]);
disk_super->policy_version[1] = cpu_to_le32(cmd->policy_version[1]);
disk_super->policy_version[2] = cpu_to_le32(cmd->policy_version[2]);
disk_super->read_hits = cpu_to_le32(cmd->stats.read_hits); disk_super->read_hits = cpu_to_le32(cmd->stats.read_hits);
disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses); disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses);
@ -854,18 +863,43 @@ struct thunk {
bool hints_valid; bool hints_valid;
}; };
static bool policy_unchanged(struct dm_cache_metadata *cmd,
struct dm_cache_policy *policy)
{
const char *policy_name = dm_cache_policy_get_name(policy);
const unsigned *policy_version = dm_cache_policy_get_version(policy);
size_t policy_hint_size = dm_cache_policy_get_hint_size(policy);
/*
* Ensure policy names match.
*/
if (strncmp(cmd->policy_name, policy_name, sizeof(cmd->policy_name)))
return false;
/*
* Ensure policy major versions match.
*/
if (cmd->policy_version[0] != policy_version[0])
return false;
/*
* Ensure policy hint sizes match.
*/
if (cmd->policy_hint_size != policy_hint_size)
return false;
return true;
}
static bool hints_array_initialized(struct dm_cache_metadata *cmd) static bool hints_array_initialized(struct dm_cache_metadata *cmd)
{ {
return cmd->hint_root && cmd->policy_hint_size; return cmd->hint_root && cmd->policy_hint_size;
} }
static bool hints_array_available(struct dm_cache_metadata *cmd, static bool hints_array_available(struct dm_cache_metadata *cmd,
const char *policy_name) struct dm_cache_policy *policy)
{ {
bool policy_names_match = !strncmp(cmd->policy_name, policy_name, return cmd->clean_when_opened && policy_unchanged(cmd, policy) &&
sizeof(cmd->policy_name));
return cmd->clean_when_opened && policy_names_match &&
hints_array_initialized(cmd); hints_array_initialized(cmd);
} }
@ -899,7 +933,8 @@ static int __load_mapping(void *context, uint64_t cblock, void *leaf)
return r; return r;
} }
static int __load_mappings(struct dm_cache_metadata *cmd, const char *policy_name, static int __load_mappings(struct dm_cache_metadata *cmd,
struct dm_cache_policy *policy,
load_mapping_fn fn, void *context) load_mapping_fn fn, void *context)
{ {
struct thunk thunk; struct thunk thunk;
@ -909,18 +944,19 @@ static int __load_mappings(struct dm_cache_metadata *cmd, const char *policy_nam
thunk.cmd = cmd; thunk.cmd = cmd;
thunk.respect_dirty_flags = cmd->clean_when_opened; thunk.respect_dirty_flags = cmd->clean_when_opened;
thunk.hints_valid = hints_array_available(cmd, policy_name); thunk.hints_valid = hints_array_available(cmd, policy);
return dm_array_walk(&cmd->info, cmd->root, __load_mapping, &thunk); return dm_array_walk(&cmd->info, cmd->root, __load_mapping, &thunk);
} }
int dm_cache_load_mappings(struct dm_cache_metadata *cmd, const char *policy_name, int dm_cache_load_mappings(struct dm_cache_metadata *cmd,
struct dm_cache_policy *policy,
load_mapping_fn fn, void *context) load_mapping_fn fn, void *context)
{ {
int r; int r;
down_read(&cmd->root_lock); down_read(&cmd->root_lock);
r = __load_mappings(cmd, policy_name, fn, context); r = __load_mappings(cmd, policy, fn, context);
up_read(&cmd->root_lock); up_read(&cmd->root_lock);
return r; return r;
@ -979,7 +1015,7 @@ static int __dirty(struct dm_cache_metadata *cmd, dm_cblock_t cblock, bool dirty
/* nothing to be done */ /* nothing to be done */
return 0; return 0;
value = pack_value(oblock, flags | (dirty ? M_DIRTY : 0)); value = pack_value(oblock, (flags & ~M_DIRTY) | (dirty ? M_DIRTY : 0));
__dm_bless_for_disk(&value); __dm_bless_for_disk(&value);
r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock), r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
@ -1070,13 +1106,15 @@ static int begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *po
__le32 value; __le32 value;
size_t hint_size; size_t hint_size;
const char *policy_name = dm_cache_policy_get_name(policy); const char *policy_name = dm_cache_policy_get_name(policy);
const unsigned *policy_version = dm_cache_policy_get_version(policy);
if (!policy_name[0] || if (!policy_name[0] ||
(strlen(policy_name) > sizeof(cmd->policy_name) - 1)) (strlen(policy_name) > sizeof(cmd->policy_name) - 1))
return -EINVAL; return -EINVAL;
if (strcmp(cmd->policy_name, policy_name)) { if (!policy_unchanged(cmd, policy)) {
strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name)); strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name));
memcpy(cmd->policy_version, policy_version, sizeof(cmd->policy_version));
hint_size = dm_cache_policy_get_hint_size(policy); hint_size = dm_cache_policy_get_hint_size(policy);
if (!hint_size) if (!hint_size)

View File

@ -89,7 +89,7 @@ typedef int (*load_mapping_fn)(void *context, dm_oblock_t oblock,
dm_cblock_t cblock, bool dirty, dm_cblock_t cblock, bool dirty,
uint32_t hint, bool hint_valid); uint32_t hint, bool hint_valid);
int dm_cache_load_mappings(struct dm_cache_metadata *cmd, int dm_cache_load_mappings(struct dm_cache_metadata *cmd,
const char *policy_name, struct dm_cache_policy *policy,
load_mapping_fn fn, load_mapping_fn fn,
void *context); void *context);

View File

@ -17,7 +17,6 @@
/*----------------------------------------------------------------*/ /*----------------------------------------------------------------*/
#define DM_MSG_PREFIX "cache cleaner" #define DM_MSG_PREFIX "cache cleaner"
#define CLEANER_VERSION "1.0.0"
/* Cache entry struct. */ /* Cache entry struct. */
struct wb_cache_entry { struct wb_cache_entry {
@ -434,6 +433,7 @@ static struct dm_cache_policy *wb_create(dm_cblock_t cache_size,
static struct dm_cache_policy_type wb_policy_type = { static struct dm_cache_policy_type wb_policy_type = {
.name = "cleaner", .name = "cleaner",
.version = {1, 0, 0},
.hint_size = 0, .hint_size = 0,
.owner = THIS_MODULE, .owner = THIS_MODULE,
.create = wb_create .create = wb_create
@ -446,7 +446,10 @@ static int __init wb_init(void)
if (r < 0) if (r < 0)
DMERR("register failed %d", r); DMERR("register failed %d", r);
else else
DMINFO("version " CLEANER_VERSION " loaded"); DMINFO("version %u.%u.%u loaded",
wb_policy_type.version[0],
wb_policy_type.version[1],
wb_policy_type.version[2]);
return r; return r;
} }

View File

@ -117,6 +117,8 @@ void dm_cache_policy_destroy(struct dm_cache_policy *p);
*/ */
const char *dm_cache_policy_get_name(struct dm_cache_policy *p); const char *dm_cache_policy_get_name(struct dm_cache_policy *p);
const unsigned *dm_cache_policy_get_version(struct dm_cache_policy *p);
size_t dm_cache_policy_get_hint_size(struct dm_cache_policy *p); size_t dm_cache_policy_get_hint_size(struct dm_cache_policy *p);
/*----------------------------------------------------------------*/ /*----------------------------------------------------------------*/

View File

@ -14,7 +14,6 @@
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#define DM_MSG_PREFIX "cache-policy-mq" #define DM_MSG_PREFIX "cache-policy-mq"
#define MQ_VERSION "1.0.0"
static struct kmem_cache *mq_entry_cache; static struct kmem_cache *mq_entry_cache;
@ -1133,6 +1132,7 @@ bad_cache_alloc:
static struct dm_cache_policy_type mq_policy_type = { static struct dm_cache_policy_type mq_policy_type = {
.name = "mq", .name = "mq",
.version = {1, 0, 0},
.hint_size = 4, .hint_size = 4,
.owner = THIS_MODULE, .owner = THIS_MODULE,
.create = mq_create .create = mq_create
@ -1140,6 +1140,7 @@ static struct dm_cache_policy_type mq_policy_type = {
static struct dm_cache_policy_type default_policy_type = { static struct dm_cache_policy_type default_policy_type = {
.name = "default", .name = "default",
.version = {1, 0, 0},
.hint_size = 4, .hint_size = 4,
.owner = THIS_MODULE, .owner = THIS_MODULE,
.create = mq_create .create = mq_create
@ -1164,7 +1165,10 @@ static int __init mq_init(void)
r = dm_cache_policy_register(&default_policy_type); r = dm_cache_policy_register(&default_policy_type);
if (!r) { if (!r) {
DMINFO("version " MQ_VERSION " loaded"); DMINFO("version %u.%u.%u loaded",
mq_policy_type.version[0],
mq_policy_type.version[1],
mq_policy_type.version[2]);
return 0; return 0;
} }

View File

@ -150,6 +150,14 @@ const char *dm_cache_policy_get_name(struct dm_cache_policy *p)
} }
EXPORT_SYMBOL_GPL(dm_cache_policy_get_name); EXPORT_SYMBOL_GPL(dm_cache_policy_get_name);
const unsigned *dm_cache_policy_get_version(struct dm_cache_policy *p)
{
struct dm_cache_policy_type *t = p->private;
return t->version;
}
EXPORT_SYMBOL_GPL(dm_cache_policy_get_version);
size_t dm_cache_policy_get_hint_size(struct dm_cache_policy *p) size_t dm_cache_policy_get_hint_size(struct dm_cache_policy *p)
{ {
struct dm_cache_policy_type *t = p->private; struct dm_cache_policy_type *t = p->private;

View File

@ -196,6 +196,7 @@ struct dm_cache_policy {
* We maintain a little register of the different policy types. * We maintain a little register of the different policy types.
*/ */
#define CACHE_POLICY_NAME_SIZE 16 #define CACHE_POLICY_NAME_SIZE 16
#define CACHE_POLICY_VERSION_SIZE 3
struct dm_cache_policy_type { struct dm_cache_policy_type {
/* For use by the register code only. */ /* For use by the register code only. */
@ -206,6 +207,7 @@ struct dm_cache_policy_type {
* what gets passed on the target line to select your policy. * what gets passed on the target line to select your policy.
*/ */
char name[CACHE_POLICY_NAME_SIZE]; char name[CACHE_POLICY_NAME_SIZE];
unsigned version[CACHE_POLICY_VERSION_SIZE];
/* /*
* Policies may store a hint for each each cache block. * Policies may store a hint for each each cache block.

View File

@ -142,6 +142,7 @@ struct cache {
spinlock_t lock; spinlock_t lock;
struct bio_list deferred_bios; struct bio_list deferred_bios;
struct bio_list deferred_flush_bios; struct bio_list deferred_flush_bios;
struct bio_list deferred_writethrough_bios;
struct list_head quiesced_migrations; struct list_head quiesced_migrations;
struct list_head completed_migrations; struct list_head completed_migrations;
struct list_head need_commit_migrations; struct list_head need_commit_migrations;
@ -158,7 +159,7 @@ struct cache {
/* /*
* origin_blocks entries, discarded if set. * origin_blocks entries, discarded if set.
*/ */
sector_t discard_block_size; /* a power of 2 times sectors per block */ uint32_t discard_block_size; /* a power of 2 times sectors per block */
dm_dblock_t discard_nr_blocks; dm_dblock_t discard_nr_blocks;
unsigned long *discard_bitset; unsigned long *discard_bitset;
@ -199,6 +200,11 @@ struct per_bio_data {
bool tick:1; bool tick:1;
unsigned req_nr:2; unsigned req_nr:2;
struct dm_deferred_entry *all_io_entry; struct dm_deferred_entry *all_io_entry;
/* writethrough fields */
struct cache *cache;
dm_cblock_t cblock;
bio_end_io_t *saved_bi_end_io;
}; };
struct dm_cache_migration { struct dm_cache_migration {
@ -412,17 +418,24 @@ static bool block_size_is_power_of_two(struct cache *cache)
return cache->sectors_per_block_shift >= 0; return cache->sectors_per_block_shift >= 0;
} }
static dm_block_t block_div(dm_block_t b, uint32_t n)
{
do_div(b, n);
return b;
}
static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock) static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
{ {
sector_t discard_blocks = cache->discard_block_size; uint32_t discard_blocks = cache->discard_block_size;
dm_block_t b = from_oblock(oblock); dm_block_t b = from_oblock(oblock);
if (!block_size_is_power_of_two(cache)) if (!block_size_is_power_of_two(cache))
(void) sector_div(discard_blocks, cache->sectors_per_block); discard_blocks = discard_blocks / cache->sectors_per_block;
else else
discard_blocks >>= cache->sectors_per_block_shift; discard_blocks >>= cache->sectors_per_block_shift;
(void) sector_div(b, discard_blocks); b = block_div(b, discard_blocks);
return to_dblock(b); return to_dblock(b);
} }
@ -609,6 +622,56 @@ static void issue(struct cache *cache, struct bio *bio)
spin_unlock_irqrestore(&cache->lock, flags); spin_unlock_irqrestore(&cache->lock, flags);
} }
static void defer_writethrough_bio(struct cache *cache, struct bio *bio)
{
unsigned long flags;
spin_lock_irqsave(&cache->lock, flags);
bio_list_add(&cache->deferred_writethrough_bios, bio);
spin_unlock_irqrestore(&cache->lock, flags);
wake_worker(cache);
}
static void writethrough_endio(struct bio *bio, int err)
{
struct per_bio_data *pb = get_per_bio_data(bio);
bio->bi_end_io = pb->saved_bi_end_io;
if (err) {
bio_endio(bio, err);
return;
}
remap_to_cache(pb->cache, bio, pb->cblock);
/*
* We can't issue this bio directly, since we're in interrupt
* context. So it get's put on a bio list for processing by the
* worker thread.
*/
defer_writethrough_bio(pb->cache, bio);
}
/*
* When running in writethrough mode we need to send writes to clean blocks
* to both the cache and origin devices. In future we'd like to clone the
* bio and send them in parallel, but for now we're doing them in
* series as this is easier.
*/
static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio,
dm_oblock_t oblock, dm_cblock_t cblock)
{
struct per_bio_data *pb = get_per_bio_data(bio);
pb->cache = cache;
pb->cblock = cblock;
pb->saved_bi_end_io = bio->bi_end_io;
bio->bi_end_io = writethrough_endio;
remap_to_origin_clear_discard(pb->cache, bio, oblock);
}
/*---------------------------------------------------------------- /*----------------------------------------------------------------
* Migration processing * Migration processing
* *
@ -1002,7 +1065,7 @@ static void process_discard_bio(struct cache *cache, struct bio *bio)
dm_block_t end_block = bio->bi_sector + bio_sectors(bio); dm_block_t end_block = bio->bi_sector + bio_sectors(bio);
dm_block_t b; dm_block_t b;
(void) sector_div(end_block, cache->discard_block_size); end_block = block_div(end_block, cache->discard_block_size);
for (b = start_block; b < end_block; b++) for (b = start_block; b < end_block; b++)
set_discard(cache, to_dblock(b)); set_discard(cache, to_dblock(b));
@ -1070,14 +1133,9 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
inc_hit_counter(cache, bio); inc_hit_counter(cache, bio);
pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
if (is_writethrough_io(cache, bio, lookup_result.cblock)) { if (is_writethrough_io(cache, bio, lookup_result.cblock))
/* remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
* No need to mark anything dirty in write through mode. else
*/
pb->req_nr == 0 ?
remap_to_cache(cache, bio, lookup_result.cblock) :
remap_to_origin_clear_discard(cache, bio, block);
} else
remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
issue(cache, bio); issue(cache, bio);
@ -1086,17 +1144,8 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
case POLICY_MISS: case POLICY_MISS:
inc_miss_counter(cache, bio); inc_miss_counter(cache, bio);
pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
remap_to_origin_clear_discard(cache, bio, block);
if (pb->req_nr != 0) { issue(cache, bio);
/*
* This is a duplicate writethrough io that is no
* longer needed because the block has been demoted.
*/
bio_endio(bio, 0);
} else {
remap_to_origin_clear_discard(cache, bio, block);
issue(cache, bio);
}
break; break;
case POLICY_NEW: case POLICY_NEW:
@ -1217,6 +1266,23 @@ static void process_deferred_flush_bios(struct cache *cache, bool submit_bios)
submit_bios ? generic_make_request(bio) : bio_io_error(bio); submit_bios ? generic_make_request(bio) : bio_io_error(bio);
} }
static void process_deferred_writethrough_bios(struct cache *cache)
{
unsigned long flags;
struct bio_list bios;
struct bio *bio;
bio_list_init(&bios);
spin_lock_irqsave(&cache->lock, flags);
bio_list_merge(&bios, &cache->deferred_writethrough_bios);
bio_list_init(&cache->deferred_writethrough_bios);
spin_unlock_irqrestore(&cache->lock, flags);
while ((bio = bio_list_pop(&bios)))
generic_make_request(bio);
}
static void writeback_some_dirty_blocks(struct cache *cache) static void writeback_some_dirty_blocks(struct cache *cache)
{ {
int r = 0; int r = 0;
@ -1313,6 +1379,7 @@ static int more_work(struct cache *cache)
else else
return !bio_list_empty(&cache->deferred_bios) || return !bio_list_empty(&cache->deferred_bios) ||
!bio_list_empty(&cache->deferred_flush_bios) || !bio_list_empty(&cache->deferred_flush_bios) ||
!bio_list_empty(&cache->deferred_writethrough_bios) ||
!list_empty(&cache->quiesced_migrations) || !list_empty(&cache->quiesced_migrations) ||
!list_empty(&cache->completed_migrations) || !list_empty(&cache->completed_migrations) ||
!list_empty(&cache->need_commit_migrations); !list_empty(&cache->need_commit_migrations);
@ -1331,6 +1398,8 @@ static void do_worker(struct work_struct *ws)
writeback_some_dirty_blocks(cache); writeback_some_dirty_blocks(cache);
process_deferred_writethrough_bios(cache);
if (commit_if_needed(cache)) { if (commit_if_needed(cache)) {
process_deferred_flush_bios(cache, false); process_deferred_flush_bios(cache, false);
@ -1756,8 +1825,11 @@ static int create_cache_policy(struct cache *cache, struct cache_args *ca,
} }
r = set_config_values(cache->policy, ca->policy_argc, ca->policy_argv); r = set_config_values(cache->policy, ca->policy_argc, ca->policy_argv);
if (r) if (r) {
*error = "Error setting cache policy's config values";
dm_cache_policy_destroy(cache->policy); dm_cache_policy_destroy(cache->policy);
cache->policy = NULL;
}
return r; return r;
} }
@ -1793,8 +1865,6 @@ static sector_t calculate_discard_block_size(sector_t cache_block_size,
#define DEFAULT_MIGRATION_THRESHOLD (2048 * 100) #define DEFAULT_MIGRATION_THRESHOLD (2048 * 100)
static unsigned cache_num_write_bios(struct dm_target *ti, struct bio *bio);
static int cache_create(struct cache_args *ca, struct cache **result) static int cache_create(struct cache_args *ca, struct cache **result)
{ {
int r = 0; int r = 0;
@ -1821,9 +1891,6 @@ static int cache_create(struct cache_args *ca, struct cache **result)
memcpy(&cache->features, &ca->features, sizeof(cache->features)); memcpy(&cache->features, &ca->features, sizeof(cache->features));
if (cache->features.write_through)
ti->num_write_bios = cache_num_write_bios;
cache->callbacks.congested_fn = cache_is_congested; cache->callbacks.congested_fn = cache_is_congested;
dm_table_add_target_callbacks(ti->table, &cache->callbacks); dm_table_add_target_callbacks(ti->table, &cache->callbacks);
@ -1835,7 +1902,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
/* FIXME: factor out this whole section */ /* FIXME: factor out this whole section */
origin_blocks = cache->origin_sectors = ca->origin_sectors; origin_blocks = cache->origin_sectors = ca->origin_sectors;
(void) sector_div(origin_blocks, ca->block_size); origin_blocks = block_div(origin_blocks, ca->block_size);
cache->origin_blocks = to_oblock(origin_blocks); cache->origin_blocks = to_oblock(origin_blocks);
cache->sectors_per_block = ca->block_size; cache->sectors_per_block = ca->block_size;
@ -1848,7 +1915,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
dm_block_t cache_size = ca->cache_sectors; dm_block_t cache_size = ca->cache_sectors;
cache->sectors_per_block_shift = -1; cache->sectors_per_block_shift = -1;
(void) sector_div(cache_size, ca->block_size); cache_size = block_div(cache_size, ca->block_size);
cache->cache_size = to_cblock(cache_size); cache->cache_size = to_cblock(cache_size);
} else { } else {
cache->sectors_per_block_shift = __ffs(ca->block_size); cache->sectors_per_block_shift = __ffs(ca->block_size);
@ -1873,6 +1940,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
spin_lock_init(&cache->lock); spin_lock_init(&cache->lock);
bio_list_init(&cache->deferred_bios); bio_list_init(&cache->deferred_bios);
bio_list_init(&cache->deferred_flush_bios); bio_list_init(&cache->deferred_flush_bios);
bio_list_init(&cache->deferred_writethrough_bios);
INIT_LIST_HEAD(&cache->quiesced_migrations); INIT_LIST_HEAD(&cache->quiesced_migrations);
INIT_LIST_HEAD(&cache->completed_migrations); INIT_LIST_HEAD(&cache->completed_migrations);
INIT_LIST_HEAD(&cache->need_commit_migrations); INIT_LIST_HEAD(&cache->need_commit_migrations);
@ -2002,6 +2070,8 @@ static int cache_ctr(struct dm_target *ti, unsigned argc, char **argv)
goto out; goto out;
r = cache_create(ca, &cache); r = cache_create(ca, &cache);
if (r)
goto out;
r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3); r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3);
if (r) { if (r) {
@ -2016,20 +2086,6 @@ out:
return r; return r;
} }
static unsigned cache_num_write_bios(struct dm_target *ti, struct bio *bio)
{
int r;
struct cache *cache = ti->private;
dm_oblock_t block = get_bio_block(cache, bio);
dm_cblock_t cblock;
r = policy_lookup(cache->policy, block, &cblock);
if (r < 0)
return 2; /* assume the worst */
return (!r && !is_dirty(cache, cblock)) ? 2 : 1;
}
static int cache_map(struct dm_target *ti, struct bio *bio) static int cache_map(struct dm_target *ti, struct bio *bio)
{ {
struct cache *cache = ti->private; struct cache *cache = ti->private;
@ -2097,18 +2153,12 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
inc_hit_counter(cache, bio); inc_hit_counter(cache, bio);
pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
if (is_writethrough_io(cache, bio, lookup_result.cblock)) { if (is_writethrough_io(cache, bio, lookup_result.cblock))
/* remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
* No need to mark anything dirty in write through mode. else
*/
pb->req_nr == 0 ?
remap_to_cache(cache, bio, lookup_result.cblock) :
remap_to_origin_clear_discard(cache, bio, block);
cell_defer(cache, cell, false);
} else {
remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
cell_defer(cache, cell, false);
} cell_defer(cache, cell, false);
break; break;
case POLICY_MISS: case POLICY_MISS:
@ -2319,8 +2369,7 @@ static int cache_preresume(struct dm_target *ti)
} }
if (!cache->loaded_mappings) { if (!cache->loaded_mappings) {
r = dm_cache_load_mappings(cache->cmd, r = dm_cache_load_mappings(cache->cmd, cache->policy,
dm_cache_policy_get_name(cache->policy),
load_mapping, cache); load_mapping, cache);
if (r) { if (r) {
DMERR("could not load cache mappings"); DMERR("could not load cache mappings");
@ -2535,7 +2584,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type cache_target = { static struct target_type cache_target = {
.name = "cache", .name = "cache",
.version = {1, 0, 0}, .version = {1, 1, 0},
.module = THIS_MODULE, .module = THIS_MODULE,
.ctr = cache_ctr, .ctr = cache_ctr,
.dtr = cache_dtr, .dtr = cache_dtr,

View File

@ -1577,6 +1577,11 @@ static bool data_dev_supports_discard(struct pool_c *pt)
return q && blk_queue_discard(q); return q && blk_queue_discard(q);
} }
static bool is_factor(sector_t block_size, uint32_t n)
{
return !sector_div(block_size, n);
}
/* /*
* If discard_passdown was enabled verify that the data device * If discard_passdown was enabled verify that the data device
* supports discards. Disable discard_passdown if not. * supports discards. Disable discard_passdown if not.
@ -1602,7 +1607,7 @@ static void disable_passdown_if_not_supported(struct pool_c *pt)
else if (data_limits->discard_granularity > block_size) else if (data_limits->discard_granularity > block_size)
reason = "discard granularity larger than a block"; reason = "discard granularity larger than a block";
else if (block_size & (data_limits->discard_granularity - 1)) else if (!is_factor(block_size, data_limits->discard_granularity))
reason = "discard granularity not a factor of block size"; reason = "discard granularity not a factor of block size";
if (reason) { if (reason) {
@ -2544,7 +2549,7 @@ static struct target_type pool_target = {
.name = "thin-pool", .name = "thin-pool",
.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
DM_TARGET_IMMUTABLE, DM_TARGET_IMMUTABLE,
.version = {1, 6, 1}, .version = {1, 7, 0},
.module = THIS_MODULE, .module = THIS_MODULE,
.ctr = pool_ctr, .ctr = pool_ctr,
.dtr = pool_dtr, .dtr = pool_dtr,
@ -2831,7 +2836,7 @@ static int thin_iterate_devices(struct dm_target *ti,
static struct target_type thin_target = { static struct target_type thin_target = {
.name = "thin", .name = "thin",
.version = {1, 7, 1}, .version = {1, 8, 0},
.module = THIS_MODULE, .module = THIS_MODULE,
.ctr = thin_ctr, .ctr = thin_ctr,
.dtr = thin_dtr, .dtr = thin_dtr,

View File

@ -93,6 +93,13 @@ struct dm_verity_io {
*/ */
}; };
struct dm_verity_prefetch_work {
struct work_struct work;
struct dm_verity *v;
sector_t block;
unsigned n_blocks;
};
static struct shash_desc *io_hash_desc(struct dm_verity *v, struct dm_verity_io *io) static struct shash_desc *io_hash_desc(struct dm_verity *v, struct dm_verity_io *io)
{ {
return (struct shash_desc *)(io + 1); return (struct shash_desc *)(io + 1);
@ -424,15 +431,18 @@ static void verity_end_io(struct bio *bio, int error)
* The root buffer is not prefetched, it is assumed that it will be cached * The root buffer is not prefetched, it is assumed that it will be cached
* all the time. * all the time.
*/ */
static void verity_prefetch_io(struct dm_verity *v, struct dm_verity_io *io) static void verity_prefetch_io(struct work_struct *work)
{ {
struct dm_verity_prefetch_work *pw =
container_of(work, struct dm_verity_prefetch_work, work);
struct dm_verity *v = pw->v;
int i; int i;
for (i = v->levels - 2; i >= 0; i--) { for (i = v->levels - 2; i >= 0; i--) {
sector_t hash_block_start; sector_t hash_block_start;
sector_t hash_block_end; sector_t hash_block_end;
verity_hash_at_level(v, io->block, i, &hash_block_start, NULL); verity_hash_at_level(v, pw->block, i, &hash_block_start, NULL);
verity_hash_at_level(v, io->block + io->n_blocks - 1, i, &hash_block_end, NULL); verity_hash_at_level(v, pw->block + pw->n_blocks - 1, i, &hash_block_end, NULL);
if (!i) { if (!i) {
unsigned cluster = ACCESS_ONCE(dm_verity_prefetch_cluster); unsigned cluster = ACCESS_ONCE(dm_verity_prefetch_cluster);
@ -452,6 +462,25 @@ no_prefetch_cluster:
dm_bufio_prefetch(v->bufio, hash_block_start, dm_bufio_prefetch(v->bufio, hash_block_start,
hash_block_end - hash_block_start + 1); hash_block_end - hash_block_start + 1);
} }
kfree(pw);
}
static void verity_submit_prefetch(struct dm_verity *v, struct dm_verity_io *io)
{
struct dm_verity_prefetch_work *pw;
pw = kmalloc(sizeof(struct dm_verity_prefetch_work),
GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
if (!pw)
return;
INIT_WORK(&pw->work, verity_prefetch_io);
pw->v = v;
pw->block = io->block;
pw->n_blocks = io->n_blocks;
queue_work(v->verify_wq, &pw->work);
} }
/* /*
@ -498,7 +527,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
memcpy(io->io_vec, bio_iovec(bio), memcpy(io->io_vec, bio_iovec(bio),
io->io_vec_size * sizeof(struct bio_vec)); io->io_vec_size * sizeof(struct bio_vec));
verity_prefetch_io(v, io); verity_submit_prefetch(v, io);
generic_make_request(bio); generic_make_request(bio);
@ -858,7 +887,7 @@ bad:
static struct target_type verity_target = { static struct target_type verity_target = {
.name = "verity", .name = "verity",
.version = {1, 1, 1}, .version = {1, 2, 0},
.module = THIS_MODULE, .module = THIS_MODULE,
.ctr = verity_ctr, .ctr = verity_ctr,
.dtr = verity_dtr, .dtr = verity_dtr,

View File

@ -139,15 +139,8 @@ struct child {
struct btree_node *n; struct btree_node *n;
}; };
static struct dm_btree_value_type le64_type = { static int init_child(struct dm_btree_info *info, struct dm_btree_value_type *vt,
.context = NULL, struct btree_node *parent,
.size = sizeof(__le64),
.inc = NULL,
.dec = NULL,
.equal = NULL
};
static int init_child(struct dm_btree_info *info, struct btree_node *parent,
unsigned index, struct child *result) unsigned index, struct child *result)
{ {
int r, inc; int r, inc;
@ -164,7 +157,7 @@ static int init_child(struct dm_btree_info *info, struct btree_node *parent,
result->n = dm_block_data(result->block); result->n = dm_block_data(result->block);
if (inc) if (inc)
inc_children(info->tm, result->n, &le64_type); inc_children(info->tm, result->n, vt);
*((__le64 *) value_ptr(parent, index)) = *((__le64 *) value_ptr(parent, index)) =
cpu_to_le64(dm_block_location(result->block)); cpu_to_le64(dm_block_location(result->block));
@ -236,7 +229,7 @@ static void __rebalance2(struct dm_btree_info *info, struct btree_node *parent,
} }
static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info, static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info,
unsigned left_index) struct dm_btree_value_type *vt, unsigned left_index)
{ {
int r; int r;
struct btree_node *parent; struct btree_node *parent;
@ -244,11 +237,11 @@ static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info,
parent = dm_block_data(shadow_current(s)); parent = dm_block_data(shadow_current(s));
r = init_child(info, parent, left_index, &left); r = init_child(info, vt, parent, left_index, &left);
if (r) if (r)
return r; return r;
r = init_child(info, parent, left_index + 1, &right); r = init_child(info, vt, parent, left_index + 1, &right);
if (r) { if (r) {
exit_child(info, &left); exit_child(info, &left);
return r; return r;
@ -368,7 +361,7 @@ static void __rebalance3(struct dm_btree_info *info, struct btree_node *parent,
} }
static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info, static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info,
unsigned left_index) struct dm_btree_value_type *vt, unsigned left_index)
{ {
int r; int r;
struct btree_node *parent = dm_block_data(shadow_current(s)); struct btree_node *parent = dm_block_data(shadow_current(s));
@ -377,17 +370,17 @@ static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info,
/* /*
* FIXME: fill out an array? * FIXME: fill out an array?
*/ */
r = init_child(info, parent, left_index, &left); r = init_child(info, vt, parent, left_index, &left);
if (r) if (r)
return r; return r;
r = init_child(info, parent, left_index + 1, &center); r = init_child(info, vt, parent, left_index + 1, &center);
if (r) { if (r) {
exit_child(info, &left); exit_child(info, &left);
return r; return r;
} }
r = init_child(info, parent, left_index + 2, &right); r = init_child(info, vt, parent, left_index + 2, &right);
if (r) { if (r) {
exit_child(info, &left); exit_child(info, &left);
exit_child(info, &center); exit_child(info, &center);
@ -434,7 +427,8 @@ static int get_nr_entries(struct dm_transaction_manager *tm,
} }
static int rebalance_children(struct shadow_spine *s, static int rebalance_children(struct shadow_spine *s,
struct dm_btree_info *info, uint64_t key) struct dm_btree_info *info,
struct dm_btree_value_type *vt, uint64_t key)
{ {
int i, r, has_left_sibling, has_right_sibling; int i, r, has_left_sibling, has_right_sibling;
uint32_t child_entries; uint32_t child_entries;
@ -472,13 +466,13 @@ static int rebalance_children(struct shadow_spine *s,
has_right_sibling = i < (le32_to_cpu(n->header.nr_entries) - 1); has_right_sibling = i < (le32_to_cpu(n->header.nr_entries) - 1);
if (!has_left_sibling) if (!has_left_sibling)
r = rebalance2(s, info, i); r = rebalance2(s, info, vt, i);
else if (!has_right_sibling) else if (!has_right_sibling)
r = rebalance2(s, info, i - 1); r = rebalance2(s, info, vt, i - 1);
else else
r = rebalance3(s, info, i - 1); r = rebalance3(s, info, vt, i - 1);
return r; return r;
} }
@ -529,7 +523,7 @@ static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info,
if (le32_to_cpu(n->header.flags) & LEAF_NODE) if (le32_to_cpu(n->header.flags) & LEAF_NODE)
return do_leaf(n, key, index); return do_leaf(n, key, index);
r = rebalance_children(s, info, key); r = rebalance_children(s, info, vt, key);
if (r) if (r)
break; break;
@ -550,6 +544,14 @@ static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info,
return r; return r;
} }
static struct dm_btree_value_type le64_type = {
.context = NULL,
.size = sizeof(__le64),
.inc = NULL,
.dec = NULL,
.equal = NULL
};
int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
uint64_t *keys, dm_block_t *new_root) uint64_t *keys, dm_block_t *new_root)
{ {