diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index d6dde7d2cf76..4c1e14615001 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -85,6 +85,7 @@ config VIRTIO_MEM depends on VIRTIO depends on MEMORY_HOTPLUG_SPARSE depends on MEMORY_HOTREMOVE + select CONTIG_ALLOC help This driver provides access to virtio-mem paravirtualized memory devices, allowing to hotplug and hotunplug memory. diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index a3ec795be8be..74f0d3cb1d22 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -23,6 +23,10 @@ #include +static bool unplug_online = true; +module_param(unplug_online, bool, 0644); +MODULE_PARM_DESC(unplug_online, "Try to unplug online memory"); + enum virtio_mem_mb_state { /* Unplugged, not added to Linux. Can be reused later. */ VIRTIO_MEM_MB_STATE_UNUSED = 0, @@ -654,23 +658,35 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, } /* - * Set a range of pages PG_offline. + * Set a range of pages PG_offline. Remember pages that were never onlined + * (via generic_online_page()) using PageDirty(). */ static void virtio_mem_set_fake_offline(unsigned long pfn, - unsigned int nr_pages) + unsigned int nr_pages, bool onlined) { - for (; nr_pages--; pfn++) - __SetPageOffline(pfn_to_page(pfn)); + for (; nr_pages--; pfn++) { + struct page *page = pfn_to_page(pfn); + + __SetPageOffline(page); + if (!onlined) + SetPageDirty(page); + } } /* - * Clear PG_offline from a range of pages. + * Clear PG_offline from a range of pages. If the pages were never onlined, + * (via generic_online_page()), clear PageDirty(). */ static void virtio_mem_clear_fake_offline(unsigned long pfn, - unsigned int nr_pages) + unsigned int nr_pages, bool onlined) { - for (; nr_pages--; pfn++) - __ClearPageOffline(pfn_to_page(pfn)); + for (; nr_pages--; pfn++) { + struct page *page = pfn_to_page(pfn); + + __ClearPageOffline(page); + if (!onlined) + ClearPageDirty(page); + } } /* @@ -686,10 +702,26 @@ static void virtio_mem_fake_online(unsigned long pfn, unsigned int nr_pages) * We are always called with subblock granularity, which is at least * aligned to MAX_ORDER - 1. */ - virtio_mem_clear_fake_offline(pfn, nr_pages); + for (i = 0; i < nr_pages; i += 1 << order) { + struct page *page = pfn_to_page(pfn + i); - for (i = 0; i < nr_pages; i += 1 << order) - generic_online_page(pfn_to_page(pfn + i), order); + /* + * If the page is PageDirty(), it was kept fake-offline when + * onlining the memory block. Otherwise, it was allocated + * using alloc_contig_range(). All pages in a subblock are + * alike. + */ + if (PageDirty(page)) { + virtio_mem_clear_fake_offline(pfn + i, 1 << order, + false); + generic_online_page(page, order); + } else { + virtio_mem_clear_fake_offline(pfn + i, 1 << order, + true); + free_contig_range(pfn + i, 1 << order); + adjust_managed_page_count(page, 1 << order); + } + } } static void virtio_mem_online_page_cb(struct page *page, unsigned int order) @@ -718,7 +750,8 @@ static void virtio_mem_online_page_cb(struct page *page, unsigned int order) if (virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1)) generic_online_page(page, order); else - virtio_mem_set_fake_offline(PFN_DOWN(addr), 1 << order); + virtio_mem_set_fake_offline(PFN_DOWN(addr), 1 << order, + false); rcu_read_unlock(); return; } @@ -1186,6 +1219,72 @@ static int virtio_mem_mb_unplug_any_sb_offline(struct virtio_mem *vm, return 0; } +/* + * Unplug the desired number of plugged subblocks of an online memory block. + * Will skip subblock that are busy. + * + * Will modify the state of the memory block. + * + * Note: Can fail after some subblocks were successfully unplugged. Can + * return 0 even if subblocks were busy and could not get unplugged. + */ +static int virtio_mem_mb_unplug_any_sb_online(struct virtio_mem *vm, + unsigned long mb_id, + uint64_t *nb_sb) +{ + const unsigned long nr_pages = PFN_DOWN(vm->subblock_size); + unsigned long start_pfn; + int rc, sb_id; + + /* + * TODO: To increase the performance we want to try bigger, consecutive + * subblocks first before falling back to single subblocks. Also, + * we should sense via something like is_mem_section_removable() + * first if it makes sense to go ahead any try to allocate. + */ + for (sb_id = 0; sb_id < vm->nb_sb_per_mb && *nb_sb; sb_id++) { + /* Find the next candidate subblock */ + while (sb_id < vm->nb_sb_per_mb && + !virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1)) + sb_id++; + if (sb_id >= vm->nb_sb_per_mb) + break; + + start_pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + + sb_id * vm->subblock_size); + rc = alloc_contig_range(start_pfn, start_pfn + nr_pages, + MIGRATE_MOVABLE, GFP_KERNEL); + if (rc == -ENOMEM) + /* whoops, out of memory */ + return rc; + if (rc) + /* memory busy, we can't unplug this chunk */ + continue; + + /* Mark it as fake-offline before unplugging it */ + virtio_mem_set_fake_offline(start_pfn, nr_pages, true); + adjust_managed_page_count(pfn_to_page(start_pfn), -nr_pages); + + /* Try to unplug the allocated memory */ + rc = virtio_mem_mb_unplug_sb(vm, mb_id, sb_id, 1); + if (rc) { + /* Return the memory to the buddy. */ + virtio_mem_fake_online(start_pfn, nr_pages); + return rc; + } + + virtio_mem_mb_set_state(vm, mb_id, + VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL); + *nb_sb -= 1; + } + + /* + * TODO: Once all subblocks of a memory block were unplugged, we want + * to offline the memory block and remove it. + */ + return 0; +} + /* * Try to unplug the requested amount of memory. */ @@ -1225,8 +1324,37 @@ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) cond_resched(); } + if (!unplug_online) { + mutex_unlock(&vm->hotplug_mutex); + return 0; + } + + /* Try to unplug subblocks of partially plugged online blocks. */ + virtio_mem_for_each_mb_state_rev(vm, mb_id, + VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL) { + rc = virtio_mem_mb_unplug_any_sb_online(vm, mb_id, + &nb_sb); + if (rc || !nb_sb) + goto out_unlock; + mutex_unlock(&vm->hotplug_mutex); + cond_resched(); + mutex_lock(&vm->hotplug_mutex); + } + + /* Try to unplug subblocks of plugged online blocks. */ + virtio_mem_for_each_mb_state_rev(vm, mb_id, + VIRTIO_MEM_MB_STATE_ONLINE) { + rc = virtio_mem_mb_unplug_any_sb_online(vm, mb_id, + &nb_sb); + if (rc || !nb_sb) + goto out_unlock; + mutex_unlock(&vm->hotplug_mutex); + cond_resched(); + mutex_lock(&vm->hotplug_mutex); + } + mutex_unlock(&vm->hotplug_mutex); - return 0; + return nb_sb ? -EBUSY : 0; out_unlock: mutex_unlock(&vm->hotplug_mutex); return rc; @@ -1332,7 +1460,8 @@ retry: case -EBUSY: /* * The hypervisor cannot process our request right now - * (e.g., out of memory, migrating). + * (e.g., out of memory, migrating) or we cannot free up + * any memory to unplug it (all plugged memory is busy). */ case -ENOMEM: /* Out of memory, try again later. */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 13cc653122b7..ce1c9df54eac 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -8603,6 +8603,7 @@ done: pfn_max_align_up(end), migratetype); return ret; } +EXPORT_SYMBOL(alloc_contig_range); static int __alloc_contig_pages(unsigned long start_pfn, unsigned long nr_pages, gfp_t gfp_mask) @@ -8718,6 +8719,7 @@ void free_contig_range(unsigned long pfn, unsigned int nr_pages) } WARN(count != 0, "%d pages are still in use!\n", count); } +EXPORT_SYMBOL(free_contig_range); /* * The zone indicated has a new number of managed_pages; batch sizes and percpu