md-cluster: Use a small window for resync

Suspending the entire device for resync could take too long. Resync
in small chunks.

cluster's resync window (32M) is maintained in r1conf as
cluster_sync_low and cluster_sync_high and processed in
raid1's sync_request(). If the current resync is outside the cluster
resync window:

1. Set the cluster_sync_low to curr_resync_completed.
2. Check if the sync will fit in the new window, if not issue a
   wait_barrier() and set cluster_sync_low to sector_nr.
3. Set cluster_sync_high to cluster_sync_low + resync_window.
4. Send a message to all nodes so they may add it in their suspension
   list.

bitmap_cond_end_sync is modified to allow to force a sync inorder
to get the curr_resync_completed uptodate with the sector passed.

Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: NeilBrown <neilb@suse.de>
This commit is contained in:
Goldwyn Rodrigues 2015-08-19 08:14:42 +10:00
parent 3c462c880b
commit c40f341f1e
9 changed files with 43 additions and 53 deletions

View File

@ -1570,7 +1570,7 @@ void bitmap_close_sync(struct bitmap *bitmap)
}
EXPORT_SYMBOL(bitmap_close_sync);
void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force)
{
sector_t s = 0;
sector_t blocks;
@ -1581,7 +1581,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
bitmap->last_end_sync = jiffies;
return;
}
if (time_before(jiffies, (bitmap->last_end_sync
if (!force && time_before(jiffies, (bitmap->last_end_sync
+ bitmap->mddev->bitmap_info.daemon_sleep)))
return;
wait_event(bitmap->mddev->recovery_wait,

View File

@ -257,7 +257,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset,
int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int degraded);
void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted);
void bitmap_close_sync(struct bitmap *bitmap);
void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector);
void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force);
void bitmap_unplug(struct bitmap *bitmap);
void bitmap_daemon_work(struct mddev *mddev);

View File

@ -802,15 +802,6 @@ static int slot_number(struct mddev *mddev)
return cinfo->slot_number - 1;
}
static void resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
{
struct md_cluster_info *cinfo = mddev->cluster_info;
add_resync_info(mddev, cinfo->bitmap_lockres, lo, hi);
/* Re-acquire the lock to refresh LVB */
dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW);
}
static int metadata_update_start(struct mddev *mddev)
{
return lock_comm(mddev->cluster_info);
@ -836,45 +827,25 @@ static int metadata_update_cancel(struct mddev *mddev)
return dlm_unlock_sync(cinfo->token_lockres);
}
static int resync_send(struct mddev *mddev, enum msg_type type,
sector_t lo, sector_t hi)
static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
{
struct md_cluster_info *cinfo = mddev->cluster_info;
struct cluster_msg cmsg;
int slot = cinfo->slot_number - 1;
add_resync_info(mddev, cinfo->bitmap_lockres, lo, hi);
/* Re-acquire the lock to refresh LVB */
dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW);
pr_info("%s:%d lo: %llu hi: %llu\n", __func__, __LINE__,
(unsigned long long)lo,
(unsigned long long)hi);
resync_info_update(mddev, lo, hi);
cmsg.type = cpu_to_le32(type);
cmsg.type = cpu_to_le32(RESYNCING);
cmsg.slot = cpu_to_le32(slot);
cmsg.low = cpu_to_le64(lo);
cmsg.high = cpu_to_le64(hi);
return sendmsg(cinfo, &cmsg);
}
static int resync_start(struct mddev *mddev, sector_t lo, sector_t hi)
{
pr_info("%s:%d\n", __func__, __LINE__);
return resync_send(mddev, RESYNCING, lo, hi);
}
static void resync_finish(struct mddev *mddev)
{
struct md_cluster_info *cinfo = mddev->cluster_info;
struct cluster_msg cmsg;
int slot = cinfo->slot_number - 1;
pr_info("%s:%d\n", __func__, __LINE__);
resync_send(mddev, RESYNCING, 0, 0);
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
cmsg.type = cpu_to_le32(BITMAP_NEEDS_SYNC);
cmsg.slot = cpu_to_le32(slot);
sendmsg(cinfo, &cmsg);
}
}
static int area_resyncing(struct mddev *mddev, int direction,
sector_t lo, sector_t hi)
{
@ -997,8 +968,6 @@ static struct md_cluster_operations cluster_ops = {
.leave = leave,
.slot_number = slot_number,
.resync_info_update = resync_info_update,
.resync_start = resync_start,
.resync_finish = resync_finish,
.metadata_update_start = metadata_update_start,
.metadata_update_finish = metadata_update_finish,
.metadata_update_cancel = metadata_update_cancel,

View File

@ -12,9 +12,7 @@ struct md_cluster_operations {
int (*join)(struct mddev *mddev, int nodes);
int (*leave)(struct mddev *mddev);
int (*slot_number)(struct mddev *mddev);
void (*resync_info_update)(struct mddev *mddev, sector_t lo, sector_t hi);
int (*resync_start)(struct mddev *mddev, sector_t lo, sector_t hi);
void (*resync_finish)(struct mddev *mddev);
int (*resync_info_update)(struct mddev *mddev, sector_t lo, sector_t hi);
int (*metadata_update_start)(struct mddev *mddev);
int (*metadata_update_finish)(struct mddev *mddev);
int (*metadata_update_cancel)(struct mddev *mddev);

View File

@ -7805,9 +7805,6 @@ void md_do_sync(struct md_thread *thread)
md_new_event(mddev);
update_time = jiffies;
if (mddev_is_clustered(mddev))
md_cluster_ops->resync_start(mddev, j, max_sectors);
blk_start_plug(&plug);
while (j < max_sectors) {
sector_t sectors;
@ -7871,8 +7868,6 @@ void md_do_sync(struct md_thread *thread)
j = max_sectors;
if (j > 2)
mddev->curr_resync = j;
if (mddev_is_clustered(mddev))
md_cluster_ops->resync_info_update(mddev, j, max_sectors);
mddev->curr_mark_cnt = io_sectors;
if (last_check == 0)
/* this is the earliest that rebuild will be
@ -7979,9 +7974,6 @@ void md_do_sync(struct md_thread *thread)
}
}
skip:
if (mddev_is_clustered(mddev))
md_cluster_ops->resync_finish(mddev);
set_bit(MD_CHANGE_DEVS, &mddev->flags);
spin_lock(&mddev->lock);

View File

@ -90,6 +90,8 @@ static void r1bio_pool_free(void *r1_bio, void *data)
#define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE)
#define RESYNC_WINDOW (RESYNC_BLOCK_SIZE * RESYNC_DEPTH)
#define RESYNC_WINDOW_SECTORS (RESYNC_WINDOW >> 9)
#define CLUSTER_RESYNC_WINDOW (16 * RESYNC_WINDOW)
#define CLUSTER_RESYNC_WINDOW_SECTORS (CLUSTER_RESYNC_WINDOW >> 9)
#define NEXT_NORMALIO_DISTANCE (3 * RESYNC_WINDOW_SECTORS)
static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
@ -2488,6 +2490,13 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
bitmap_close_sync(mddev->bitmap);
close_sync(conf);
if (mddev_is_clustered(mddev)) {
conf->cluster_sync_low = 0;
conf->cluster_sync_high = 0;
/* Send zeros to mark end of resync */
md_cluster_ops->resync_info_update(mddev, 0, 0);
}
return 0;
}
@ -2508,7 +2517,12 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
return sync_blocks;
}
bitmap_cond_end_sync(mddev->bitmap, sector_nr);
/* we are incrementing sector_nr below. To be safe, we check against
* sector_nr + two times RESYNC_SECTORS
*/
bitmap_cond_end_sync(mddev->bitmap, sector_nr,
mddev_is_clustered(mddev) && (sector_nr + 2 * RESYNC_SECTORS > conf->cluster_sync_high));
r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO);
raise_barrier(conf, sector_nr);
@ -2699,6 +2713,16 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
bio_full:
r1_bio->sectors = nr_sectors;
if (mddev_is_clustered(mddev) &&
conf->cluster_sync_high < sector_nr + nr_sectors) {
conf->cluster_sync_low = mddev->curr_resync_completed;
conf->cluster_sync_high = conf->cluster_sync_low + CLUSTER_RESYNC_WINDOW_SECTORS;
/* Send resync message */
md_cluster_ops->resync_info_update(mddev,
conf->cluster_sync_low,
conf->cluster_sync_high);
}
/* For a user-requested sync, we read all readable devices and do a
* compare
*/

View File

@ -111,6 +111,13 @@ struct r1conf {
* the new thread here until we fully activate the array.
*/
struct md_thread *thread;
/* Keep track of cluster resync window to send to other
* nodes.
*/
sector_t cluster_sync_low;
sector_t cluster_sync_high;
};
/*

View File

@ -3137,7 +3137,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
/* resync. Schedule a read for every block at this virt offset */
int count = 0;
bitmap_cond_end_sync(mddev->bitmap, sector_nr);
bitmap_cond_end_sync(mddev->bitmap, sector_nr, 0);
if (!bitmap_start_sync(mddev->bitmap, sector_nr,
&sync_blocks, mddev->degraded) &&

View File

@ -5613,7 +5613,7 @@ static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int
return sync_blocks * STRIPE_SECTORS; /* keep things rounded to whole stripes */
}
bitmap_cond_end_sync(mddev->bitmap, sector_nr);
bitmap_cond_end_sync(mddev->bitmap, sector_nr, false);
sh = get_active_stripe(conf, sector_nr, 0, 1, 0);
if (sh == NULL) {