sched/fair: Move rebalance_domains()
This pure code movement results in two #ifdef CONFIG_NO_HZ_COMMON sections landing next to each other. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
63928384fa
commit
af3fe03c56
|
@ -9121,6 +9121,124 @@ out_unlock:
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static DEFINE_SPINLOCK(balancing);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Scale the max load_balance interval with the number of CPUs in the system.
|
||||||
|
* This trades load-balance latency on larger machines for less cross talk.
|
||||||
|
*/
|
||||||
|
void update_max_interval(void)
|
||||||
|
{
|
||||||
|
max_load_balance_interval = HZ*num_online_cpus()/10;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* It checks each scheduling domain to see if it is due to be balanced,
|
||||||
|
* and initiates a balancing operation if so.
|
||||||
|
*
|
||||||
|
* Balancing parameters are set up in init_sched_domains.
|
||||||
|
*/
|
||||||
|
static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
|
||||||
|
{
|
||||||
|
int continue_balancing = 1;
|
||||||
|
int cpu = rq->cpu;
|
||||||
|
unsigned long interval;
|
||||||
|
struct sched_domain *sd;
|
||||||
|
/* Earliest time when we have to do rebalance again */
|
||||||
|
unsigned long next_balance = jiffies + 60*HZ;
|
||||||
|
int update_next_balance = 0;
|
||||||
|
int need_serialize, need_decay = 0;
|
||||||
|
u64 max_cost = 0;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
for_each_domain(cpu, sd) {
|
||||||
|
/*
|
||||||
|
* Decay the newidle max times here because this is a regular
|
||||||
|
* visit to all the domains. Decay ~1% per second.
|
||||||
|
*/
|
||||||
|
if (time_after(jiffies, sd->next_decay_max_lb_cost)) {
|
||||||
|
sd->max_newidle_lb_cost =
|
||||||
|
(sd->max_newidle_lb_cost * 253) / 256;
|
||||||
|
sd->next_decay_max_lb_cost = jiffies + HZ;
|
||||||
|
need_decay = 1;
|
||||||
|
}
|
||||||
|
max_cost += sd->max_newidle_lb_cost;
|
||||||
|
|
||||||
|
if (!(sd->flags & SD_LOAD_BALANCE))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Stop the load balance at this level. There is another
|
||||||
|
* CPU in our sched group which is doing load balancing more
|
||||||
|
* actively.
|
||||||
|
*/
|
||||||
|
if (!continue_balancing) {
|
||||||
|
if (need_decay)
|
||||||
|
continue;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
interval = get_sd_balance_interval(sd, idle != CPU_IDLE);
|
||||||
|
|
||||||
|
need_serialize = sd->flags & SD_SERIALIZE;
|
||||||
|
if (need_serialize) {
|
||||||
|
if (!spin_trylock(&balancing))
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (time_after_eq(jiffies, sd->last_balance + interval)) {
|
||||||
|
if (load_balance(cpu, rq, sd, idle, &continue_balancing)) {
|
||||||
|
/*
|
||||||
|
* The LBF_DST_PINNED logic could have changed
|
||||||
|
* env->dst_cpu, so we can't know our idle
|
||||||
|
* state even if we migrated tasks. Update it.
|
||||||
|
*/
|
||||||
|
idle = idle_cpu(cpu) ? CPU_IDLE : CPU_NOT_IDLE;
|
||||||
|
}
|
||||||
|
sd->last_balance = jiffies;
|
||||||
|
interval = get_sd_balance_interval(sd, idle != CPU_IDLE);
|
||||||
|
}
|
||||||
|
if (need_serialize)
|
||||||
|
spin_unlock(&balancing);
|
||||||
|
out:
|
||||||
|
if (time_after(next_balance, sd->last_balance + interval)) {
|
||||||
|
next_balance = sd->last_balance + interval;
|
||||||
|
update_next_balance = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (need_decay) {
|
||||||
|
/*
|
||||||
|
* Ensure the rq-wide value also decays but keep it at a
|
||||||
|
* reasonable floor to avoid funnies with rq->avg_idle.
|
||||||
|
*/
|
||||||
|
rq->max_idle_balance_cost =
|
||||||
|
max((u64)sysctl_sched_migration_cost, max_cost);
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* next_balance will be updated only when there is a need.
|
||||||
|
* When the cpu is attached to null domain for ex, it will not be
|
||||||
|
* updated.
|
||||||
|
*/
|
||||||
|
if (likely(update_next_balance)) {
|
||||||
|
rq->next_balance = next_balance;
|
||||||
|
|
||||||
|
#ifdef CONFIG_NO_HZ_COMMON
|
||||||
|
/*
|
||||||
|
* If this CPU has been elected to perform the nohz idle
|
||||||
|
* balance. Other idle CPUs have already rebalanced with
|
||||||
|
* nohz_idle_balance() and nohz.next_balance has been
|
||||||
|
* updated accordingly. This CPU is now running the idle load
|
||||||
|
* balance for itself and we need to update the
|
||||||
|
* nohz.next_balance accordingly.
|
||||||
|
*/
|
||||||
|
if ((idle == CPU_IDLE) && time_after(nohz.next_balance, rq->next_balance))
|
||||||
|
nohz.next_balance = rq->next_balance;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static inline int on_null_domain(struct rq *rq)
|
static inline int on_null_domain(struct rq *rq)
|
||||||
{
|
{
|
||||||
return unlikely(!rcu_dereference_sched(rq->sd));
|
return unlikely(!rcu_dereference_sched(rq->sd));
|
||||||
|
@ -9373,124 +9491,6 @@ out:
|
||||||
static inline void nohz_balancer_kick(struct rq *rq) { }
|
static inline void nohz_balancer_kick(struct rq *rq) { }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static DEFINE_SPINLOCK(balancing);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Scale the max load_balance interval with the number of CPUs in the system.
|
|
||||||
* This trades load-balance latency on larger machines for less cross talk.
|
|
||||||
*/
|
|
||||||
void update_max_interval(void)
|
|
||||||
{
|
|
||||||
max_load_balance_interval = HZ*num_online_cpus()/10;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* It checks each scheduling domain to see if it is due to be balanced,
|
|
||||||
* and initiates a balancing operation if so.
|
|
||||||
*
|
|
||||||
* Balancing parameters are set up in init_sched_domains.
|
|
||||||
*/
|
|
||||||
static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
|
|
||||||
{
|
|
||||||
int continue_balancing = 1;
|
|
||||||
int cpu = rq->cpu;
|
|
||||||
unsigned long interval;
|
|
||||||
struct sched_domain *sd;
|
|
||||||
/* Earliest time when we have to do rebalance again */
|
|
||||||
unsigned long next_balance = jiffies + 60*HZ;
|
|
||||||
int update_next_balance = 0;
|
|
||||||
int need_serialize, need_decay = 0;
|
|
||||||
u64 max_cost = 0;
|
|
||||||
|
|
||||||
rcu_read_lock();
|
|
||||||
for_each_domain(cpu, sd) {
|
|
||||||
/*
|
|
||||||
* Decay the newidle max times here because this is a regular
|
|
||||||
* visit to all the domains. Decay ~1% per second.
|
|
||||||
*/
|
|
||||||
if (time_after(jiffies, sd->next_decay_max_lb_cost)) {
|
|
||||||
sd->max_newidle_lb_cost =
|
|
||||||
(sd->max_newidle_lb_cost * 253) / 256;
|
|
||||||
sd->next_decay_max_lb_cost = jiffies + HZ;
|
|
||||||
need_decay = 1;
|
|
||||||
}
|
|
||||||
max_cost += sd->max_newidle_lb_cost;
|
|
||||||
|
|
||||||
if (!(sd->flags & SD_LOAD_BALANCE))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Stop the load balance at this level. There is another
|
|
||||||
* CPU in our sched group which is doing load balancing more
|
|
||||||
* actively.
|
|
||||||
*/
|
|
||||||
if (!continue_balancing) {
|
|
||||||
if (need_decay)
|
|
||||||
continue;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
interval = get_sd_balance_interval(sd, idle != CPU_IDLE);
|
|
||||||
|
|
||||||
need_serialize = sd->flags & SD_SERIALIZE;
|
|
||||||
if (need_serialize) {
|
|
||||||
if (!spin_trylock(&balancing))
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (time_after_eq(jiffies, sd->last_balance + interval)) {
|
|
||||||
if (load_balance(cpu, rq, sd, idle, &continue_balancing)) {
|
|
||||||
/*
|
|
||||||
* The LBF_DST_PINNED logic could have changed
|
|
||||||
* env->dst_cpu, so we can't know our idle
|
|
||||||
* state even if we migrated tasks. Update it.
|
|
||||||
*/
|
|
||||||
idle = idle_cpu(cpu) ? CPU_IDLE : CPU_NOT_IDLE;
|
|
||||||
}
|
|
||||||
sd->last_balance = jiffies;
|
|
||||||
interval = get_sd_balance_interval(sd, idle != CPU_IDLE);
|
|
||||||
}
|
|
||||||
if (need_serialize)
|
|
||||||
spin_unlock(&balancing);
|
|
||||||
out:
|
|
||||||
if (time_after(next_balance, sd->last_balance + interval)) {
|
|
||||||
next_balance = sd->last_balance + interval;
|
|
||||||
update_next_balance = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (need_decay) {
|
|
||||||
/*
|
|
||||||
* Ensure the rq-wide value also decays but keep it at a
|
|
||||||
* reasonable floor to avoid funnies with rq->avg_idle.
|
|
||||||
*/
|
|
||||||
rq->max_idle_balance_cost =
|
|
||||||
max((u64)sysctl_sched_migration_cost, max_cost);
|
|
||||||
}
|
|
||||||
rcu_read_unlock();
|
|
||||||
|
|
||||||
/*
|
|
||||||
* next_balance will be updated only when there is a need.
|
|
||||||
* When the CPU is attached to null domain for ex, it will not be
|
|
||||||
* updated.
|
|
||||||
*/
|
|
||||||
if (likely(update_next_balance)) {
|
|
||||||
rq->next_balance = next_balance;
|
|
||||||
|
|
||||||
#ifdef CONFIG_NO_HZ_COMMON
|
|
||||||
/*
|
|
||||||
* If this CPU has been elected to perform the nohz idle
|
|
||||||
* balance. Other idle CPUs have already rebalanced with
|
|
||||||
* nohz_idle_balance() and nohz.next_balance has been
|
|
||||||
* updated accordingly. This CPU is now running the idle load
|
|
||||||
* balance for itself and we need to update the
|
|
||||||
* nohz.next_balance accordingly.
|
|
||||||
*/
|
|
||||||
if ((idle == CPU_IDLE) && time_after(nohz.next_balance, rq->next_balance))
|
|
||||||
nohz.next_balance = rq->next_balance;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CONFIG_NO_HZ_COMMON
|
#ifdef CONFIG_NO_HZ_COMMON
|
||||||
/*
|
/*
|
||||||
* In CONFIG_NO_HZ_COMMON case, the idle balance kickee will do the
|
* In CONFIG_NO_HZ_COMMON case, the idle balance kickee will do the
|
||||||
|
|
Loading…
Reference in New Issue