kernel/sched/core: add migrate_disable()

[bristot@redhat.com: rt: Increase/decrease the nr of migratory tasks when enabling/disabling migration
 Link: https://lkml.kernel.org/r/e981d271cbeca975bca710e2fbcc6078c09741b0.1498482127.git.bristot@redhat.com
]
[swood@redhat.com: fixups and optimisations
 Link:https://lkml.kernel.org/r/20190727055638.20443-1-swood@redhat.com
 Link:https://lkml.kernel.org/r/20191012065214.28109-1-swood@redhat.com
]
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
This commit is contained in:
Sebastian Andrzej Siewior 2017-05-27 19:02:06 +02:00 committed by Alibek Omarov
parent 3300e7bfa5
commit 7c6495618c
12 changed files with 336 additions and 10 deletions

View File

@ -201,6 +201,31 @@ do { \
#define preemptible() (preempt_count() == 0 && !irqs_disabled())
#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
extern void migrate_disable(void);
extern void migrate_enable(void);
int __migrate_disabled(struct task_struct *p);
#elif !defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
extern void migrate_disable(void);
extern void migrate_enable(void);
static inline int __migrate_disabled(struct task_struct *p)
{
return 0;
}
#else
#define migrate_disable() preempt_disable()
#define migrate_enable() preempt_enable()
static inline int __migrate_disabled(struct task_struct *p)
{
return 0;
}
#endif
#ifdef CONFIG_PREEMPTION
#define preempt_enable() \
do { \
@ -270,6 +295,13 @@ do { \
#define preempt_check_resched_rt() barrier()
#define preemptible() 0
#define migrate_disable() barrier()
#define migrate_enable() barrier()
static inline int __migrate_disabled(struct task_struct *p)
{
return 0;
}
#endif /* CONFIG_PREEMPT_COUNT */
#ifdef MODULE

View File

@ -233,6 +233,8 @@ extern void io_schedule_finish(int token);
extern long io_schedule_timeout(long timeout);
extern void io_schedule(void);
int cpu_nr_pinned(int cpu);
/**
* struct prev_cputime - snapshot of system and user cputime
* @utime: time spent in user mode
@ -705,6 +707,20 @@ struct task_struct {
int nr_cpus_allowed;
const cpumask_t *cpus_ptr;
cpumask_t cpus_mask;
#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
int migrate_disable;
bool migrate_disable_scheduled;
# ifdef CONFIG_SCHED_DEBUG
int pinned_on_cpu;
# endif
#elif !defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
# ifdef CONFIG_SCHED_DEBUG
int migrate_disable;
# endif
#endif
#ifdef CONFIG_PREEMPT_RT
int sleeping_lock;
#endif
#ifdef CONFIG_PREEMPT_RCU
int rcu_read_lock_nesting;
@ -1865,6 +1881,23 @@ static __always_inline bool need_resched(void)
return unlikely(tif_need_resched());
}
#ifdef CONFIG_PREEMPT_RT
static inline void sleeping_lock_inc(void)
{
current->sleeping_lock++;
}
static inline void sleeping_lock_dec(void)
{
current->sleeping_lock--;
}
#else
static inline void sleeping_lock_inc(void) { }
static inline void sleeping_lock_dec(void) { }
#endif
/*
* Wrappers for p->thread_info->cpu access. No-op on UP.
*/
@ -2056,4 +2089,6 @@ int sched_trace_rq_cpu(struct rq *rq);
const struct cpumask *sched_trace_rd_span(struct root_domain *rd);
extern struct task_struct *takedown_cpu_task;
#endif

View File

@ -221,6 +221,9 @@ static inline int get_boot_cpu_id(void)
#define get_cpu() ({ preempt_disable(); __smp_processor_id(); })
#define put_cpu() preempt_enable()
#define get_cpu_light() ({ migrate_disable(); __smp_processor_id(); })
#define put_cpu_light() migrate_enable()
/*
* Callback to arch code if there's nosmp or maxcpus=0 on the
* boot command line:

View File

@ -74,6 +74,10 @@ struct task_struct init_task
.cpus_ptr = &init_task.cpus_mask,
.cpus_mask = CPU_MASK_ALL,
.nr_cpus_allowed= NR_CPUS,
#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT) && \
defined(CONFIG_SCHED_DEBUG)
.pinned_on_cpu = -1,
#endif
.mm = NULL,
.active_mm = &init_mm,
.restart_block = {

View File

@ -915,6 +915,15 @@ static int take_cpu_down(void *_param)
int err, cpu = smp_processor_id();
int ret;
#ifdef CONFIG_PREEMPT_RT
/*
* If any tasks disabled migration before we got here,
* go back and sleep again.
*/
if (cpu_nr_pinned(cpu))
return -EAGAIN;
#endif
/* Ensure this CPU doesn't handle any more interrupts. */
err = __cpu_disable();
if (err < 0)
@ -944,6 +953,8 @@ static int take_cpu_down(void *_param)
return 0;
}
struct task_struct *takedown_cpu_task;
static int takedown_cpu(unsigned int cpu)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
@ -958,11 +969,39 @@ static int takedown_cpu(unsigned int cpu)
*/
irq_lock_sparse();
#ifdef CONFIG_PREEMPT_RT
WARN_ON_ONCE(takedown_cpu_task);
takedown_cpu_task = current;
again:
/*
* If a task pins this CPU after we pass this check, take_cpu_down
* will return -EAGAIN.
*/
for (;;) {
int nr_pinned;
set_current_state(TASK_UNINTERRUPTIBLE);
nr_pinned = cpu_nr_pinned(cpu);
if (nr_pinned == 0)
break;
schedule();
}
set_current_state(TASK_RUNNING);
#endif
/*
* So now all preempt/rcu users must observe !cpu_active().
*/
err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
#ifdef CONFIG_PREEMPT_RT
if (err == -EAGAIN)
goto again;
#endif
if (err) {
#ifdef CONFIG_PREEMPT_RT
takedown_cpu_task = NULL;
#endif
/* CPU refused to die */
irq_unlock_sparse();
/* Unpark the hotplug thread so we can rollback there */
@ -981,6 +1020,9 @@ static int takedown_cpu(unsigned int cpu)
wait_for_ap_thread(st, false);
BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
#ifdef CONFIG_PREEMPT_RT
takedown_cpu_task = NULL;
#endif
/* Interrupts are moved away from the dying cpu, reenable alloc/free */
irq_unlock_sparse();

View File

@ -1140,6 +1140,7 @@ void __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
void __lockfunc rt_spin_lock(spinlock_t *lock)
{
sleeping_lock_inc();
migrate_disable();
spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
@ -1154,6 +1155,7 @@ void __lockfunc __rt_spin_lock(struct rt_mutex *lock)
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass)
{
sleeping_lock_inc();
migrate_disable();
spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
@ -1167,6 +1169,7 @@ void __lockfunc rt_spin_unlock(spinlock_t *lock)
spin_release(&lock->dep_map, 1, _RET_IP_);
rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
migrate_enable();
sleeping_lock_dec();
}
EXPORT_SYMBOL(rt_spin_unlock);
@ -1192,12 +1195,15 @@ int __lockfunc rt_spin_trylock(spinlock_t *lock)
{
int ret;
sleeping_lock_inc();
migrate_disable();
ret = __rt_mutex_trylock(&lock->lock);
if (ret)
if (ret) {
spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
else
} else {
migrate_enable();
sleeping_lock_dec();
}
return ret;
}
EXPORT_SYMBOL(rt_spin_trylock);
@ -1209,6 +1215,7 @@ int __lockfunc rt_spin_trylock_bh(spinlock_t *lock)
local_bh_disable();
ret = __rt_mutex_trylock(&lock->lock);
if (ret) {
sleeping_lock_inc();
migrate_disable();
spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
} else
@ -1224,6 +1231,7 @@ int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags)
*flags = 0;
ret = __rt_mutex_trylock(&lock->lock);
if (ret) {
sleeping_lock_inc();
migrate_disable();
spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
}

View File

@ -305,12 +305,15 @@ int __lockfunc rt_read_trylock(rwlock_t *rwlock)
{
int ret;
sleeping_lock_inc();
migrate_disable();
ret = do_read_rt_trylock(rwlock);
if (ret)
if (ret) {
rwlock_acquire_read(&rwlock->dep_map, 0, 1, _RET_IP_);
else
} else {
migrate_enable();
sleeping_lock_dec();
}
return ret;
}
EXPORT_SYMBOL(rt_read_trylock);
@ -319,18 +322,22 @@ int __lockfunc rt_write_trylock(rwlock_t *rwlock)
{
int ret;
sleeping_lock_inc();
migrate_disable();
ret = do_write_rt_trylock(rwlock);
if (ret)
if (ret) {
rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
else
} else {
migrate_enable();
sleeping_lock_dec();
}
return ret;
}
EXPORT_SYMBOL(rt_write_trylock);
void __lockfunc rt_read_lock(rwlock_t *rwlock)
{
sleeping_lock_inc();
migrate_disable();
rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_);
do_read_rt_lock(rwlock);
@ -339,6 +346,7 @@ EXPORT_SYMBOL(rt_read_lock);
void __lockfunc rt_write_lock(rwlock_t *rwlock)
{
sleeping_lock_inc();
migrate_disable();
rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
do_write_rt_lock(rwlock);
@ -350,6 +358,7 @@ void __lockfunc rt_read_unlock(rwlock_t *rwlock)
rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
do_read_rt_unlock(rwlock);
migrate_enable();
sleeping_lock_dec();
}
EXPORT_SYMBOL(rt_read_unlock);
@ -358,6 +367,7 @@ void __lockfunc rt_write_unlock(rwlock_t *rwlock)
rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
do_write_rt_unlock(rwlock);
migrate_enable();
sleeping_lock_dec();
}
EXPORT_SYMBOL(rt_write_unlock);

View File

@ -287,10 +287,14 @@ void rcu_note_context_switch(bool preempt)
struct task_struct *t = current;
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
struct rcu_node *rnp;
int sleeping_l = 0;
trace_rcu_utilization(TPS("Start context switch"));
lockdep_assert_irqs_disabled();
WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0);
#if defined(CONFIG_PREEMPT_RT)
sleeping_l = t->sleeping_lock;
#endif
WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0 && !sleeping_l);
if (t->rcu_read_lock_nesting > 0 &&
!t->rcu_read_unlock_special.b.blocked) {

View File

@ -1582,7 +1582,7 @@ static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
if (!cpumask_test_cpu(cpu, p->cpus_ptr))
return false;
if (is_per_cpu_kthread(p))
if (is_per_cpu_kthread(p) || __migrate_disabled(p))
return cpu_online(cpu);
return cpu_active(cpu);
@ -1706,9 +1706,18 @@ static int migration_cpu_stop(void *data)
void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
{
cpumask_copy(&p->cpus_mask, new_mask);
p->nr_cpus_allowed = cpumask_weight(new_mask);
if (p->cpus_ptr == &p->cpus_mask)
p->nr_cpus_allowed = cpumask_weight(new_mask);
}
#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
int __migrate_disabled(struct task_struct *p)
{
return p->migrate_disable;
}
EXPORT_SYMBOL_GPL(__migrate_disabled);
#endif
void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
{
struct rq *rq = task_rq(p);
@ -1797,7 +1806,8 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
}
/* Can the task run on the task's current CPU? If so, we're done */
if (cpumask_test_cpu(task_cpu(p), new_mask))
if (cpumask_test_cpu(task_cpu(p), new_mask) ||
p->cpus_ptr != &p->cpus_mask)
goto out;
if (task_running(rq, p) || p->state == TASK_WAKING) {
@ -4130,6 +4140,8 @@ restart:
BUG();
}
static void migrate_disabled_sched(struct task_struct *p);
/*
* __schedule() is the main scheduler function.
*
@ -4200,6 +4212,9 @@ static void __sched notrace __schedule(bool preempt)
rq_lock(rq, &rf);
smp_mb__after_spinlock();
if (__migrate_disabled(prev))
migrate_disabled_sched(prev);
/* Promote REQ to ACT */
rq->clock_update_flags <<= 1;
update_rq_clock(rq);
@ -6437,6 +6452,7 @@ static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
break;
next = __pick_migrate_task(rq);
WARN_ON_ONCE(__migrate_disabled(next));
/*
* Rules for changing task_struct::cpus_mask are holding
@ -8156,3 +8172,162 @@ const u32 sched_prio_to_wmult[40] = {
};
#undef CREATE_TRACE_POINTS
#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
static inline void
update_nr_migratory(struct task_struct *p, long delta)
{
if (unlikely((p->sched_class == &rt_sched_class ||
p->sched_class == &dl_sched_class) &&
p->nr_cpus_allowed > 1)) {
if (p->sched_class == &rt_sched_class)
task_rq(p)->rt.rt_nr_migratory += delta;
else
task_rq(p)->dl.dl_nr_migratory += delta;
}
}
static inline void
migrate_disable_update_cpus_allowed(struct task_struct *p)
{
p->cpus_ptr = cpumask_of(smp_processor_id());
update_nr_migratory(p, -1);
p->nr_cpus_allowed = 1;
}
static inline void
migrate_enable_update_cpus_allowed(struct task_struct *p)
{
struct rq *rq;
struct rq_flags rf;
rq = task_rq_lock(p, &rf);
p->cpus_ptr = &p->cpus_mask;
p->nr_cpus_allowed = cpumask_weight(&p->cpus_mask);
update_nr_migratory(p, 1);
task_rq_unlock(rq, p, &rf);
}
void migrate_disable(void)
{
preempt_disable();
if (++current->migrate_disable == 1) {
this_rq()->nr_pinned++;
#ifdef CONFIG_SCHED_DEBUG
WARN_ON_ONCE(current->pinned_on_cpu >= 0);
current->pinned_on_cpu = smp_processor_id();
#endif
}
preempt_enable();
}
EXPORT_SYMBOL(migrate_disable);
static void migrate_disabled_sched(struct task_struct *p)
{
if (p->migrate_disable_scheduled)
return;
migrate_disable_update_cpus_allowed(p);
p->migrate_disable_scheduled = 1;
}
void migrate_enable(void)
{
struct task_struct *p = current;
struct rq *rq = this_rq();
int cpu = task_cpu(p);
WARN_ON_ONCE(p->migrate_disable <= 0);
if (p->migrate_disable > 1) {
p->migrate_disable--;
return;
}
preempt_disable();
#ifdef CONFIG_SCHED_DEBUG
WARN_ON_ONCE(current->pinned_on_cpu != cpu);
current->pinned_on_cpu = -1;
#endif
WARN_ON_ONCE(rq->nr_pinned < 1);
p->migrate_disable = 0;
rq->nr_pinned--;
if (rq->nr_pinned == 0 && unlikely(!cpu_active(cpu)) &&
takedown_cpu_task)
wake_up_process(takedown_cpu_task);
if (!p->migrate_disable_scheduled)
goto out;
p->migrate_disable_scheduled = 0;
migrate_enable_update_cpus_allowed(p);
WARN_ON(smp_processor_id() != cpu);
if (!is_cpu_allowed(p, cpu)) {
struct migration_arg arg = { p };
struct rq_flags rf;
rq = task_rq_lock(p, &rf);
update_rq_clock(rq);
arg.dest_cpu = select_fallback_rq(cpu, p);
task_rq_unlock(rq, p, &rf);
preempt_enable();
sleeping_lock_inc();
stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
sleeping_lock_dec();
return;
}
out:
preempt_enable();
}
EXPORT_SYMBOL(migrate_enable);
int cpu_nr_pinned(int cpu)
{
struct rq *rq = cpu_rq(cpu);
return rq->nr_pinned;
}
#elif !defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
static void migrate_disabled_sched(struct task_struct *p)
{
}
void migrate_disable(void)
{
#ifdef CONFIG_SCHED_DEBUG
current->migrate_disable++;
#endif
barrier();
}
EXPORT_SYMBOL(migrate_disable);
void migrate_enable(void)
{
#ifdef CONFIG_SCHED_DEBUG
struct task_struct *p = current;
WARN_ON_ONCE(p->migrate_disable <= 0);
p->migrate_disable--;
#endif
barrier();
}
EXPORT_SYMBOL(migrate_enable);
#else
static void migrate_disabled_sched(struct task_struct *p)
{
}
#endif

View File

@ -974,6 +974,10 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
P(dl.runtime);
P(dl.deadline);
}
#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
P(migrate_disable);
#endif
P(nr_cpus_allowed);
#undef PN_SCHEDSTAT
#undef PN
#undef __PN

View File

@ -1004,6 +1004,10 @@ struct rq {
/* Must be inspected within a rcu lock section */
struct cpuidle_state *idle_state;
#endif
#if defined(CONFIG_PREEMPT_RT) && defined(CONFIG_SMP)
int nr_pinned;
#endif
};
#ifdef CONFIG_FAIR_GROUP_SCHED

View File

@ -23,6 +23,11 @@ unsigned int check_preemption_disabled(const char *what1, const char *what2)
* Kernel threads bound to a single CPU can safely use
* smp_processor_id():
*/
#if defined(CONFIG_PREEMPT_RT) && (defined(CONFIG_SMP) || defined(CONFIG_SCHED_DEBUG))
if (current->migrate_disable)
goto out;
#endif
if (current->nr_cpus_allowed == 1)
goto out;