diff --git a/mm/vmscan.c b/mm/vmscan.c index 26ad67f1962c..1c10ee512215 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2654,8 +2654,12 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining, /* * kswapd shrinks the zone by the number of pages required to reach * the high watermark. + * + * Returns true if kswapd scanned at least the requested number of pages to + * reclaim. This is used to determine if the scanning priority needs to be + * raised. */ -static void kswapd_shrink_zone(struct zone *zone, +static bool kswapd_shrink_zone(struct zone *zone, struct scan_control *sc, unsigned long lru_pages) { @@ -2675,6 +2679,8 @@ static void kswapd_shrink_zone(struct zone *zone, if (nr_slab == 0 && !zone_reclaimable(zone)) zone->all_unreclaimable = 1; + + return sc->nr_scanned >= sc->nr_to_reclaim; } /* @@ -2701,26 +2707,26 @@ static void kswapd_shrink_zone(struct zone *zone, static unsigned long balance_pgdat(pg_data_t *pgdat, int order, int *classzone_idx) { - bool pgdat_is_balanced = false; int i; int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ unsigned long nr_soft_reclaimed; unsigned long nr_soft_scanned; struct scan_control sc = { .gfp_mask = GFP_KERNEL, + .priority = DEF_PRIORITY, .may_unmap = 1, .may_swap = 1, + .may_writepage = !laptop_mode, .order = order, .target_mem_cgroup = NULL, }; -loop_again: - sc.priority = DEF_PRIORITY; - sc.nr_reclaimed = 0; - sc.may_writepage = !laptop_mode; count_vm_event(PAGEOUTRUN); do { unsigned long lru_pages = 0; + bool raise_priority = true; + + sc.nr_reclaimed = 0; /* * Scan in the highmem->dma direction for the highest @@ -2762,10 +2768,8 @@ loop_again: } } - if (i < 0) { - pgdat_is_balanced = true; + if (i < 0) goto out; - } for (i = 0; i <= end_zone; i++) { struct zone *zone = pgdat->node_zones + i; @@ -2832,8 +2836,16 @@ loop_again: if ((buffer_heads_over_limit && is_highmem_idx(i)) || !zone_balanced(zone, testorder, - balance_gap, end_zone)) - kswapd_shrink_zone(zone, &sc, lru_pages); + balance_gap, end_zone)) { + /* + * There should be no need to raise the + * scanning priority if enough pages are + * already being scanned that high + * watermark would be met at 100% efficiency. + */ + if (kswapd_shrink_zone(zone, &sc, lru_pages)) + raise_priority = false; + } /* * If we're getting trouble reclaiming, start doing @@ -2868,46 +2880,29 @@ loop_again: pfmemalloc_watermark_ok(pgdat)) wake_up(&pgdat->pfmemalloc_wait); - if (pgdat_balanced(pgdat, order, *classzone_idx)) { - pgdat_is_balanced = true; - break; /* kswapd: all done */ - } - /* - * We do this so kswapd doesn't build up large priorities for - * example when it is freeing in parallel with allocators. It - * matches the direct reclaim path behaviour in terms of impact - * on zone->*_priority. + * Fragmentation may mean that the system cannot be rebalanced + * for high-order allocations in all zones. If twice the + * allocation size has been reclaimed and the zones are still + * not balanced then recheck the watermarks at order-0 to + * prevent kswapd reclaiming excessively. Assume that a + * process requested a high-order can direct reclaim/compact. */ - if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX) - break; - } while (--sc.priority >= 0); - -out: - if (!pgdat_is_balanced) { - cond_resched(); - - try_to_freeze(); - - /* - * Fragmentation may mean that the system cannot be - * rebalanced for high-order allocations in all zones. - * At this point, if nr_reclaimed < SWAP_CLUSTER_MAX, - * it means the zones have been fully scanned and are still - * not balanced. For high-order allocations, there is - * little point trying all over again as kswapd may - * infinite loop. - * - * Instead, recheck all watermarks at order-0 as they - * are the most important. If watermarks are ok, kswapd will go - * back to sleep. High-order users can still perform direct - * reclaim if they wish. - */ - if (sc.nr_reclaimed < SWAP_CLUSTER_MAX) + if (order && sc.nr_reclaimed >= 2UL << order) order = sc.order = 0; - goto loop_again; - } + /* Check if kswapd should be suspending */ + if (try_to_freeze() || kthread_should_stop()) + break; + + /* + * Raise priority if scanning rate is too low or there was no + * progress in reclaiming pages + */ + if (raise_priority || !sc.nr_reclaimed) + sc.priority--; + } while (sc.priority >= 0 && + !pgdat_balanced(pgdat, order, *classzone_idx)); /* * If kswapd was reclaiming at a higher order, it has the option of @@ -2936,6 +2931,7 @@ out: compact_pgdat(pgdat, order); } +out: /* * Return the order we were reclaiming at so prepare_kswapd_sleep() * makes a decision on the order we were last reclaiming at. However,