diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 07f1e9935f21..7f1da77b83f3 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -575,7 +575,8 @@ void wake_up_idle_cpu(int cpu) static inline bool got_nohz_idle_kick(void) { - return idle_cpu(smp_processor_id()) && this_rq()->nohz_balance_kick; + int cpu = smp_processor_id(); + return idle_cpu(cpu) && test_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu)); } #else /* CONFIG_NO_HZ */ @@ -6840,7 +6841,7 @@ void __init sched_init(void) rq->avg_idle = 2*sysctl_sched_migration_cost; rq_attach_root(rq, &def_root_domain); #ifdef CONFIG_NO_HZ - rq->nohz_balance_kick = 0; + rq->nohz_flags = 0; #endif #endif init_rq_hrtick(rq); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 81ccb811afb4..50c06b0e9fab 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4889,18 +4889,15 @@ static void nohz_balancer_kick(int cpu) return; } - if (!cpu_rq(ilb_cpu)->nohz_balance_kick) { - cpu_rq(ilb_cpu)->nohz_balance_kick = 1; - - smp_mb(); - /* - * Use smp_send_reschedule() instead of resched_cpu(). - * This way we generate a sched IPI on the target cpu which - * is idle. And the softirq performing nohz idle load balance - * will be run before returning from the IPI. - */ - smp_send_reschedule(ilb_cpu); - } + if (test_and_set_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu))) + return; + /* + * Use smp_send_reschedule() instead of resched_cpu(). + * This way we generate a sched IPI on the target cpu which + * is idle. And the softirq performing nohz idle load balance + * will be run before returning from the IPI. + */ + smp_send_reschedule(ilb_cpu); return; } @@ -4964,6 +4961,8 @@ void select_nohz_load_balancer(int stop_tick) } return; } + + set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); } else { if (!cpumask_test_cpu(cpu, nohz.idle_cpus_mask)) return; @@ -5079,8 +5078,9 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) struct rq *rq; int balance_cpu; - if (idle != CPU_IDLE || !this_rq->nohz_balance_kick) - return; + if (idle != CPU_IDLE || + !test_bit(NOHZ_BALANCE_KICK, nohz_flags(this_cpu))) + goto end; for_each_cpu(balance_cpu, nohz.idle_cpus_mask) { if (balance_cpu == this_cpu) @@ -5091,10 +5091,8 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) * work being done for other cpus. Next load * balancing owner will pick it up. */ - if (need_resched()) { - this_rq->nohz_balance_kick = 0; + if (need_resched()) break; - } raw_spin_lock_irq(&this_rq->lock); update_rq_clock(this_rq); @@ -5108,7 +5106,8 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) this_rq->next_balance = rq->next_balance; } nohz.next_balance = this_rq->next_balance; - this_rq->nohz_balance_kick = 0; +end: + clear_bit(NOHZ_BALANCE_KICK, nohz_flags(this_cpu)); } /* @@ -5129,10 +5128,17 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu) int ret; int first_pick_cpu, second_pick_cpu; - if (time_before(now, nohz.next_balance)) + if (unlikely(idle_cpu(cpu))) return 0; - if (idle_cpu(cpu)) + /* + * We may be recently in ticked or tickless idle mode. At the first + * busy tick after returning from idle, we will update the busy stats. + */ + if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) + clear_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); + + if (time_before(now, nohz.next_balance)) return 0; first_pick_cpu = atomic_read(&nohz.first_pick_cpu); @@ -5196,7 +5202,7 @@ void trigger_load_balance(struct rq *rq, int cpu) likely(!on_null_domain(cpu))) raise_softirq(SCHED_SOFTIRQ); #ifdef CONFIG_NO_HZ - else if (nohz_kick_needed(rq, cpu) && likely(!on_null_domain(cpu))) + if (nohz_kick_needed(rq, cpu) && likely(!on_null_domain(cpu))) nohz_balancer_kick(cpu); #endif } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 8715055979d1..cf7d02662bc2 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -371,7 +371,7 @@ struct rq { unsigned long last_load_update_tick; #ifdef CONFIG_NO_HZ u64 nohz_stamp; - unsigned char nohz_balance_kick; + unsigned long nohz_flags; #endif int skip_clock_update; @@ -1064,3 +1064,12 @@ extern void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq); extern void unthrottle_offline_cfs_rqs(struct rq *rq); extern void account_cfs_bandwidth_used(int enabled, int was_enabled); + +#ifdef CONFIG_NO_HZ +enum rq_nohz_flag_bits { + NOHZ_TICK_STOPPED, + NOHZ_BALANCE_KICK, +}; + +#define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags) +#endif