md-cluster: Use a small window for resync
Suspending the entire device for resync could take too long. Resync in small chunks. cluster's resync window (32M) is maintained in r1conf as cluster_sync_low and cluster_sync_high and processed in raid1's sync_request(). If the current resync is outside the cluster resync window: 1. Set the cluster_sync_low to curr_resync_completed. 2. Check if the sync will fit in the new window, if not issue a wait_barrier() and set cluster_sync_low to sector_nr. 3. Set cluster_sync_high to cluster_sync_low + resync_window. 4. Send a message to all nodes so they may add it in their suspension list. bitmap_cond_end_sync is modified to allow to force a sync inorder to get the curr_resync_completed uptodate with the sector passed. Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> Signed-off-by: NeilBrown <neilb@suse.de>
This commit is contained in:
parent
3c462c880b
commit
c40f341f1e
|
@ -1570,7 +1570,7 @@ void bitmap_close_sync(struct bitmap *bitmap)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(bitmap_close_sync);
|
EXPORT_SYMBOL(bitmap_close_sync);
|
||||||
|
|
||||||
void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
|
void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force)
|
||||||
{
|
{
|
||||||
sector_t s = 0;
|
sector_t s = 0;
|
||||||
sector_t blocks;
|
sector_t blocks;
|
||||||
|
@ -1581,7 +1581,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
|
||||||
bitmap->last_end_sync = jiffies;
|
bitmap->last_end_sync = jiffies;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (time_before(jiffies, (bitmap->last_end_sync
|
if (!force && time_before(jiffies, (bitmap->last_end_sync
|
||||||
+ bitmap->mddev->bitmap_info.daemon_sleep)))
|
+ bitmap->mddev->bitmap_info.daemon_sleep)))
|
||||||
return;
|
return;
|
||||||
wait_event(bitmap->mddev->recovery_wait,
|
wait_event(bitmap->mddev->recovery_wait,
|
||||||
|
|
|
@ -257,7 +257,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset,
|
||||||
int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int degraded);
|
int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int degraded);
|
||||||
void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted);
|
void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted);
|
||||||
void bitmap_close_sync(struct bitmap *bitmap);
|
void bitmap_close_sync(struct bitmap *bitmap);
|
||||||
void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector);
|
void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force);
|
||||||
|
|
||||||
void bitmap_unplug(struct bitmap *bitmap);
|
void bitmap_unplug(struct bitmap *bitmap);
|
||||||
void bitmap_daemon_work(struct mddev *mddev);
|
void bitmap_daemon_work(struct mddev *mddev);
|
||||||
|
|
|
@ -802,15 +802,6 @@ static int slot_number(struct mddev *mddev)
|
||||||
return cinfo->slot_number - 1;
|
return cinfo->slot_number - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
|
|
||||||
{
|
|
||||||
struct md_cluster_info *cinfo = mddev->cluster_info;
|
|
||||||
|
|
||||||
add_resync_info(mddev, cinfo->bitmap_lockres, lo, hi);
|
|
||||||
/* Re-acquire the lock to refresh LVB */
|
|
||||||
dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int metadata_update_start(struct mddev *mddev)
|
static int metadata_update_start(struct mddev *mddev)
|
||||||
{
|
{
|
||||||
return lock_comm(mddev->cluster_info);
|
return lock_comm(mddev->cluster_info);
|
||||||
|
@ -836,45 +827,25 @@ static int metadata_update_cancel(struct mddev *mddev)
|
||||||
return dlm_unlock_sync(cinfo->token_lockres);
|
return dlm_unlock_sync(cinfo->token_lockres);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int resync_send(struct mddev *mddev, enum msg_type type,
|
static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
|
||||||
sector_t lo, sector_t hi)
|
|
||||||
{
|
{
|
||||||
struct md_cluster_info *cinfo = mddev->cluster_info;
|
struct md_cluster_info *cinfo = mddev->cluster_info;
|
||||||
struct cluster_msg cmsg;
|
struct cluster_msg cmsg;
|
||||||
int slot = cinfo->slot_number - 1;
|
int slot = cinfo->slot_number - 1;
|
||||||
|
|
||||||
|
add_resync_info(mddev, cinfo->bitmap_lockres, lo, hi);
|
||||||
|
/* Re-acquire the lock to refresh LVB */
|
||||||
|
dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW);
|
||||||
pr_info("%s:%d lo: %llu hi: %llu\n", __func__, __LINE__,
|
pr_info("%s:%d lo: %llu hi: %llu\n", __func__, __LINE__,
|
||||||
(unsigned long long)lo,
|
(unsigned long long)lo,
|
||||||
(unsigned long long)hi);
|
(unsigned long long)hi);
|
||||||
resync_info_update(mddev, lo, hi);
|
cmsg.type = cpu_to_le32(RESYNCING);
|
||||||
cmsg.type = cpu_to_le32(type);
|
|
||||||
cmsg.slot = cpu_to_le32(slot);
|
cmsg.slot = cpu_to_le32(slot);
|
||||||
cmsg.low = cpu_to_le64(lo);
|
cmsg.low = cpu_to_le64(lo);
|
||||||
cmsg.high = cpu_to_le64(hi);
|
cmsg.high = cpu_to_le64(hi);
|
||||||
return sendmsg(cinfo, &cmsg);
|
return sendmsg(cinfo, &cmsg);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int resync_start(struct mddev *mddev, sector_t lo, sector_t hi)
|
|
||||||
{
|
|
||||||
pr_info("%s:%d\n", __func__, __LINE__);
|
|
||||||
return resync_send(mddev, RESYNCING, lo, hi);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void resync_finish(struct mddev *mddev)
|
|
||||||
{
|
|
||||||
struct md_cluster_info *cinfo = mddev->cluster_info;
|
|
||||||
struct cluster_msg cmsg;
|
|
||||||
int slot = cinfo->slot_number - 1;
|
|
||||||
|
|
||||||
pr_info("%s:%d\n", __func__, __LINE__);
|
|
||||||
resync_send(mddev, RESYNCING, 0, 0);
|
|
||||||
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
|
|
||||||
cmsg.type = cpu_to_le32(BITMAP_NEEDS_SYNC);
|
|
||||||
cmsg.slot = cpu_to_le32(slot);
|
|
||||||
sendmsg(cinfo, &cmsg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static int area_resyncing(struct mddev *mddev, int direction,
|
static int area_resyncing(struct mddev *mddev, int direction,
|
||||||
sector_t lo, sector_t hi)
|
sector_t lo, sector_t hi)
|
||||||
{
|
{
|
||||||
|
@ -997,8 +968,6 @@ static struct md_cluster_operations cluster_ops = {
|
||||||
.leave = leave,
|
.leave = leave,
|
||||||
.slot_number = slot_number,
|
.slot_number = slot_number,
|
||||||
.resync_info_update = resync_info_update,
|
.resync_info_update = resync_info_update,
|
||||||
.resync_start = resync_start,
|
|
||||||
.resync_finish = resync_finish,
|
|
||||||
.metadata_update_start = metadata_update_start,
|
.metadata_update_start = metadata_update_start,
|
||||||
.metadata_update_finish = metadata_update_finish,
|
.metadata_update_finish = metadata_update_finish,
|
||||||
.metadata_update_cancel = metadata_update_cancel,
|
.metadata_update_cancel = metadata_update_cancel,
|
||||||
|
|
|
@ -12,9 +12,7 @@ struct md_cluster_operations {
|
||||||
int (*join)(struct mddev *mddev, int nodes);
|
int (*join)(struct mddev *mddev, int nodes);
|
||||||
int (*leave)(struct mddev *mddev);
|
int (*leave)(struct mddev *mddev);
|
||||||
int (*slot_number)(struct mddev *mddev);
|
int (*slot_number)(struct mddev *mddev);
|
||||||
void (*resync_info_update)(struct mddev *mddev, sector_t lo, sector_t hi);
|
int (*resync_info_update)(struct mddev *mddev, sector_t lo, sector_t hi);
|
||||||
int (*resync_start)(struct mddev *mddev, sector_t lo, sector_t hi);
|
|
||||||
void (*resync_finish)(struct mddev *mddev);
|
|
||||||
int (*metadata_update_start)(struct mddev *mddev);
|
int (*metadata_update_start)(struct mddev *mddev);
|
||||||
int (*metadata_update_finish)(struct mddev *mddev);
|
int (*metadata_update_finish)(struct mddev *mddev);
|
||||||
int (*metadata_update_cancel)(struct mddev *mddev);
|
int (*metadata_update_cancel)(struct mddev *mddev);
|
||||||
|
|
|
@ -7805,9 +7805,6 @@ void md_do_sync(struct md_thread *thread)
|
||||||
md_new_event(mddev);
|
md_new_event(mddev);
|
||||||
update_time = jiffies;
|
update_time = jiffies;
|
||||||
|
|
||||||
if (mddev_is_clustered(mddev))
|
|
||||||
md_cluster_ops->resync_start(mddev, j, max_sectors);
|
|
||||||
|
|
||||||
blk_start_plug(&plug);
|
blk_start_plug(&plug);
|
||||||
while (j < max_sectors) {
|
while (j < max_sectors) {
|
||||||
sector_t sectors;
|
sector_t sectors;
|
||||||
|
@ -7871,8 +7868,6 @@ void md_do_sync(struct md_thread *thread)
|
||||||
j = max_sectors;
|
j = max_sectors;
|
||||||
if (j > 2)
|
if (j > 2)
|
||||||
mddev->curr_resync = j;
|
mddev->curr_resync = j;
|
||||||
if (mddev_is_clustered(mddev))
|
|
||||||
md_cluster_ops->resync_info_update(mddev, j, max_sectors);
|
|
||||||
mddev->curr_mark_cnt = io_sectors;
|
mddev->curr_mark_cnt = io_sectors;
|
||||||
if (last_check == 0)
|
if (last_check == 0)
|
||||||
/* this is the earliest that rebuild will be
|
/* this is the earliest that rebuild will be
|
||||||
|
@ -7979,9 +7974,6 @@ void md_do_sync(struct md_thread *thread)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
skip:
|
skip:
|
||||||
if (mddev_is_clustered(mddev))
|
|
||||||
md_cluster_ops->resync_finish(mddev);
|
|
||||||
|
|
||||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||||
|
|
||||||
spin_lock(&mddev->lock);
|
spin_lock(&mddev->lock);
|
||||||
|
|
|
@ -90,6 +90,8 @@ static void r1bio_pool_free(void *r1_bio, void *data)
|
||||||
#define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE)
|
#define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE)
|
||||||
#define RESYNC_WINDOW (RESYNC_BLOCK_SIZE * RESYNC_DEPTH)
|
#define RESYNC_WINDOW (RESYNC_BLOCK_SIZE * RESYNC_DEPTH)
|
||||||
#define RESYNC_WINDOW_SECTORS (RESYNC_WINDOW >> 9)
|
#define RESYNC_WINDOW_SECTORS (RESYNC_WINDOW >> 9)
|
||||||
|
#define CLUSTER_RESYNC_WINDOW (16 * RESYNC_WINDOW)
|
||||||
|
#define CLUSTER_RESYNC_WINDOW_SECTORS (CLUSTER_RESYNC_WINDOW >> 9)
|
||||||
#define NEXT_NORMALIO_DISTANCE (3 * RESYNC_WINDOW_SECTORS)
|
#define NEXT_NORMALIO_DISTANCE (3 * RESYNC_WINDOW_SECTORS)
|
||||||
|
|
||||||
static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
|
static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
|
||||||
|
@ -2488,6 +2490,13 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
|
||||||
|
|
||||||
bitmap_close_sync(mddev->bitmap);
|
bitmap_close_sync(mddev->bitmap);
|
||||||
close_sync(conf);
|
close_sync(conf);
|
||||||
|
|
||||||
|
if (mddev_is_clustered(mddev)) {
|
||||||
|
conf->cluster_sync_low = 0;
|
||||||
|
conf->cluster_sync_high = 0;
|
||||||
|
/* Send zeros to mark end of resync */
|
||||||
|
md_cluster_ops->resync_info_update(mddev, 0, 0);
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2508,7 +2517,12 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
|
||||||
return sync_blocks;
|
return sync_blocks;
|
||||||
}
|
}
|
||||||
|
|
||||||
bitmap_cond_end_sync(mddev->bitmap, sector_nr);
|
/* we are incrementing sector_nr below. To be safe, we check against
|
||||||
|
* sector_nr + two times RESYNC_SECTORS
|
||||||
|
*/
|
||||||
|
|
||||||
|
bitmap_cond_end_sync(mddev->bitmap, sector_nr,
|
||||||
|
mddev_is_clustered(mddev) && (sector_nr + 2 * RESYNC_SECTORS > conf->cluster_sync_high));
|
||||||
r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO);
|
r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO);
|
||||||
|
|
||||||
raise_barrier(conf, sector_nr);
|
raise_barrier(conf, sector_nr);
|
||||||
|
@ -2699,6 +2713,16 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
|
||||||
bio_full:
|
bio_full:
|
||||||
r1_bio->sectors = nr_sectors;
|
r1_bio->sectors = nr_sectors;
|
||||||
|
|
||||||
|
if (mddev_is_clustered(mddev) &&
|
||||||
|
conf->cluster_sync_high < sector_nr + nr_sectors) {
|
||||||
|
conf->cluster_sync_low = mddev->curr_resync_completed;
|
||||||
|
conf->cluster_sync_high = conf->cluster_sync_low + CLUSTER_RESYNC_WINDOW_SECTORS;
|
||||||
|
/* Send resync message */
|
||||||
|
md_cluster_ops->resync_info_update(mddev,
|
||||||
|
conf->cluster_sync_low,
|
||||||
|
conf->cluster_sync_high);
|
||||||
|
}
|
||||||
|
|
||||||
/* For a user-requested sync, we read all readable devices and do a
|
/* For a user-requested sync, we read all readable devices and do a
|
||||||
* compare
|
* compare
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -111,6 +111,13 @@ struct r1conf {
|
||||||
* the new thread here until we fully activate the array.
|
* the new thread here until we fully activate the array.
|
||||||
*/
|
*/
|
||||||
struct md_thread *thread;
|
struct md_thread *thread;
|
||||||
|
|
||||||
|
/* Keep track of cluster resync window to send to other
|
||||||
|
* nodes.
|
||||||
|
*/
|
||||||
|
sector_t cluster_sync_low;
|
||||||
|
sector_t cluster_sync_high;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -3137,7 +3137,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||||
/* resync. Schedule a read for every block at this virt offset */
|
/* resync. Schedule a read for every block at this virt offset */
|
||||||
int count = 0;
|
int count = 0;
|
||||||
|
|
||||||
bitmap_cond_end_sync(mddev->bitmap, sector_nr);
|
bitmap_cond_end_sync(mddev->bitmap, sector_nr, 0);
|
||||||
|
|
||||||
if (!bitmap_start_sync(mddev->bitmap, sector_nr,
|
if (!bitmap_start_sync(mddev->bitmap, sector_nr,
|
||||||
&sync_blocks, mddev->degraded) &&
|
&sync_blocks, mddev->degraded) &&
|
||||||
|
|
|
@ -5613,7 +5613,7 @@ static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int
|
||||||
return sync_blocks * STRIPE_SECTORS; /* keep things rounded to whole stripes */
|
return sync_blocks * STRIPE_SECTORS; /* keep things rounded to whole stripes */
|
||||||
}
|
}
|
||||||
|
|
||||||
bitmap_cond_end_sync(mddev->bitmap, sector_nr);
|
bitmap_cond_end_sync(mddev->bitmap, sector_nr, false);
|
||||||
|
|
||||||
sh = get_active_stripe(conf, sector_nr, 0, 1, 0);
|
sh = get_active_stripe(conf, sector_nr, 0, 1, 0);
|
||||||
if (sh == NULL) {
|
if (sh == NULL) {
|
||||||
|
|
Loading…
Reference in New Issue