diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 309b1d551f25..95b944ecf7e4 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2155,6 +2155,7 @@ void task_numa_work(struct callback_head *work) unsigned long migrate, next_scan, now = jiffies; struct task_struct *p = current; struct mm_struct *mm = p->mm; + u64 runtime = p->se.sum_exec_runtime; struct vm_area_struct *vma; unsigned long start, end; unsigned long nr_pte_updates = 0; @@ -2277,6 +2278,17 @@ out: else reset_ptenuma_scan(p); up_read(&mm->mmap_sem); + + /* + * Make sure tasks use at least 32x as much time to run other code + * than they used here, to limit NUMA PTE scanning overhead to 3% max. + * Usually update_task_scan_period slows down scanning enough; on an + * overloaded system we need to limit overhead on a per task basis. + */ + if (unlikely(p->se.sum_exec_runtime != runtime)) { + u64 diff = p->se.sum_exec_runtime - runtime; + p->node_stamp += 32 * diff; + } } /*