diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 78da3392f577..d9ee4edd7a53 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -66,7 +66,8 @@ */ static int max_queued_requests = 1024; -static void allow_barrier(struct r1conf *conf); +static void allow_barrier(struct r1conf *conf, sector_t start_next_window, + sector_t bi_sector); static void lower_barrier(struct r1conf *conf); static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data) @@ -227,6 +228,8 @@ static void call_bio_endio(struct r1bio *r1_bio) struct bio *bio = r1_bio->master_bio; int done; struct r1conf *conf = r1_bio->mddev->private; + sector_t start_next_window = r1_bio->start_next_window; + sector_t bi_sector = bio->bi_sector; if (bio->bi_phys_segments) { unsigned long flags; @@ -234,6 +237,11 @@ static void call_bio_endio(struct r1bio *r1_bio) bio->bi_phys_segments--; done = (bio->bi_phys_segments == 0); spin_unlock_irqrestore(&conf->device_lock, flags); + /* + * make_request() might be waiting for + * bi_phys_segments to decrease + */ + wake_up(&conf->wait_barrier); } else done = 1; @@ -245,7 +253,7 @@ static void call_bio_endio(struct r1bio *r1_bio) * Wake up any possible resync thread that waits for the device * to go idle. */ - allow_barrier(conf); + allow_barrier(conf, start_next_window, bi_sector); } } @@ -827,10 +835,19 @@ static void raise_barrier(struct r1conf *conf) /* block any new IO from starting */ conf->barrier++; - /* Now wait for all pending IO to complete */ + /* For these conditions we must wait: + * A: while the array is in frozen state + * B: while barrier >= RESYNC_DEPTH, meaning resync reach + * the max count which allowed. + * C: next_resync + RESYNC_SECTORS > start_next_window, meaning + * next resync will reach to the window which normal bios are + * handling. + */ wait_event_lock_irq(conf->wait_barrier, !conf->array_frozen && - !conf->nr_pending && conf->barrier < RESYNC_DEPTH, + conf->barrier < RESYNC_DEPTH && + (conf->start_next_window >= + conf->next_resync + RESYNC_SECTORS), conf->resync_lock); spin_unlock_irq(&conf->resync_lock); @@ -846,10 +863,33 @@ static void lower_barrier(struct r1conf *conf) wake_up(&conf->wait_barrier); } -static void wait_barrier(struct r1conf *conf) +static bool need_to_wait_for_sync(struct r1conf *conf, struct bio *bio) { + bool wait = false; + + if (conf->array_frozen || !bio) + wait = true; + else if (conf->barrier && bio_data_dir(bio) == WRITE) { + if (conf->next_resync < RESYNC_WINDOW_SECTORS) + wait = true; + else if ((conf->next_resync - RESYNC_WINDOW_SECTORS + >= bio_end_sector(bio)) || + (conf->next_resync + NEXT_NORMALIO_DISTANCE + <= bio->bi_sector)) + wait = false; + else + wait = true; + } + + return wait; +} + +static sector_t wait_barrier(struct r1conf *conf, struct bio *bio) +{ + sector_t sector = 0; + spin_lock_irq(&conf->resync_lock); - if (conf->barrier) { + if (need_to_wait_for_sync(conf, bio)) { conf->nr_waiting++; /* Wait for the barrier to drop. * However if there are already pending @@ -863,21 +903,65 @@ static void wait_barrier(struct r1conf *conf) wait_event_lock_irq(conf->wait_barrier, !conf->array_frozen && (!conf->barrier || - (conf->nr_pending && + ((conf->start_next_window < + conf->next_resync + RESYNC_SECTORS) && current->bio_list && !bio_list_empty(current->bio_list))), conf->resync_lock); conf->nr_waiting--; } + + if (bio && bio_data_dir(bio) == WRITE) { + if (conf->next_resync + NEXT_NORMALIO_DISTANCE + <= bio->bi_sector) { + if (conf->start_next_window == MaxSector) + conf->start_next_window = + conf->next_resync + + NEXT_NORMALIO_DISTANCE; + + if ((conf->start_next_window + NEXT_NORMALIO_DISTANCE) + <= bio->bi_sector) + conf->next_window_requests++; + else + conf->current_window_requests++; + } + if (bio->bi_sector >= conf->start_next_window) + sector = conf->start_next_window; + } + conf->nr_pending++; spin_unlock_irq(&conf->resync_lock); + return sector; } -static void allow_barrier(struct r1conf *conf) +static void allow_barrier(struct r1conf *conf, sector_t start_next_window, + sector_t bi_sector) { unsigned long flags; + spin_lock_irqsave(&conf->resync_lock, flags); conf->nr_pending--; + if (start_next_window) { + if (start_next_window == conf->start_next_window) { + if (conf->start_next_window + NEXT_NORMALIO_DISTANCE + <= bi_sector) + conf->next_window_requests--; + else + conf->current_window_requests--; + } else + conf->current_window_requests--; + + if (!conf->current_window_requests) { + if (conf->next_window_requests) { + conf->current_window_requests = + conf->next_window_requests; + conf->next_window_requests = 0; + conf->start_next_window += + NEXT_NORMALIO_DISTANCE; + } else + conf->start_next_window = MaxSector; + } + } spin_unlock_irqrestore(&conf->resync_lock, flags); wake_up(&conf->wait_barrier); } @@ -1012,6 +1096,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) int first_clone; int sectors_handled; int max_sectors; + sector_t start_next_window; /* * Register the new request and wait if the reconstruction @@ -1041,7 +1126,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) finish_wait(&conf->wait_barrier, &w); } - wait_barrier(conf); + start_next_window = wait_barrier(conf, bio); bitmap = mddev->bitmap; @@ -1162,6 +1247,7 @@ read_again: disks = conf->raid_disks * 2; retry_write: + r1_bio->start_next_window = start_next_window; blocked_rdev = NULL; rcu_read_lock(); max_sectors = r1_bio->sectors; @@ -1230,14 +1316,24 @@ read_again: if (unlikely(blocked_rdev)) { /* Wait for this device to become unblocked */ int j; + sector_t old = start_next_window; for (j = 0; j < i; j++) if (r1_bio->bios[j]) rdev_dec_pending(conf->mirrors[j].rdev, mddev); r1_bio->state = 0; - allow_barrier(conf); + allow_barrier(conf, start_next_window, bio->bi_sector); md_wait_for_blocked_rdev(blocked_rdev, mddev); - wait_barrier(conf); + start_next_window = wait_barrier(conf, bio); + /* + * We must make sure the multi r1bios of bio have + * the same value of bi_phys_segments + */ + if (bio->bi_phys_segments && old && + old != start_next_window) + /* Wait for the former r1bio(s) to complete */ + wait_event(conf->wait_barrier, + bio->bi_phys_segments == 1); goto retry_write; } @@ -1437,11 +1533,14 @@ static void print_conf(struct r1conf *conf) static void close_sync(struct r1conf *conf) { - wait_barrier(conf); - allow_barrier(conf); + wait_barrier(conf, NULL); + allow_barrier(conf, 0, 0); mempool_destroy(conf->r1buf_pool); conf->r1buf_pool = NULL; + + conf->next_resync = 0; + conf->start_next_window = MaxSector; } static int raid1_spare_active(struct mddev *mddev) @@ -2713,6 +2812,9 @@ static struct r1conf *setup_conf(struct mddev *mddev) conf->pending_count = 0; conf->recovery_disabled = mddev->recovery_disabled - 1; + conf->start_next_window = MaxSector; + conf->current_window_requests = conf->next_window_requests = 0; + err = -EIO; for (i = 0; i < conf->raid_disks * 2; i++) { diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index 331a98a231b4..9bebca7bff2f 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h @@ -41,6 +41,19 @@ struct r1conf { */ sector_t next_resync; + /* When raid1 starts resync, we divide array into four partitions + * |---------|--------------|---------------------|-------------| + * next_resync start_next_window end_window + * start_next_window = next_resync + NEXT_NORMALIO_DISTANCE + * end_window = start_next_window + NEXT_NORMALIO_DISTANCE + * current_window_requests means the count of normalIO between + * start_next_window and end_window. + * next_window_requests means the count of normalIO after end_window. + * */ + sector_t start_next_window; + int current_window_requests; + int next_window_requests; + spinlock_t device_lock; /* list of 'struct r1bio' that need to be processed by raid1d, @@ -112,6 +125,7 @@ struct r1bio { * in this BehindIO request */ sector_t sector; + sector_t start_next_window; int sectors; unsigned long state; struct mddev *mddev;