Simplify stop_machine

stop_machine creates a kthread which creates kernel threads.  We can
create those threads directly and simplify things a little.  Some care
must be taken with CPU hotunplug, which has special needs, but that code
seems more robust than it was in the past.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
This commit is contained in:
Rusty Russell 2008-07-28 12:16:28 -05:00
parent 5c2aed6225
commit ffdb5976c4
3 changed files with 144 additions and 198 deletions

View File

@ -17,13 +17,12 @@
* @data: the data ptr for the @fn() * @data: the data ptr for the @fn()
* @cpu: if @cpu == n, run @fn() on cpu n * @cpu: if @cpu == n, run @fn() on cpu n
* if @cpu == NR_CPUS, run @fn() on any cpu * if @cpu == NR_CPUS, run @fn() on any cpu
* if @cpu == ALL_CPUS, run @fn() first on the calling cpu, and then * if @cpu == ALL_CPUS, run @fn() on every online CPU.
* concurrently on all the other cpus
* *
* Description: This causes a thread to be scheduled on every other cpu, * Description: This causes a thread to be scheduled on every cpu,
* each of which disables interrupts, and finally interrupts are disabled * each of which disables interrupts. The result is that noone is
* on the current CPU. The result is that noone is holding a spinlock * holding a spinlock or inside any other preempt-disabled region when
* or inside any other preempt-disabled region when @fn() runs. * @fn() runs.
* *
* This can be thought of as a very heavy write lock, equivalent to * This can be thought of as a very heavy write lock, equivalent to
* grabbing every spinlock in the kernel. */ * grabbing every spinlock in the kernel. */
@ -35,13 +34,10 @@ int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu);
* @data: the data ptr for the @fn * @data: the data ptr for the @fn
* @cpu: the cpu to run @fn on (or any, if @cpu == NR_CPUS. * @cpu: the cpu to run @fn on (or any, if @cpu == NR_CPUS.
* *
* Description: This is a special version of the above, which returns the * Description: This is a special version of the above, which assumes cpus
* thread which has run @fn(): kthread_stop will return the return value * won't come or go while it's being called. Used by hotplug cpu.
* of @fn(). Used by hotplug cpu.
*/ */
struct task_struct *__stop_machine_run(int (*fn)(void *), void *data, int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu);
unsigned int cpu);
#else #else
static inline int stop_machine_run(int (*fn)(void *), void *data, static inline int stop_machine_run(int (*fn)(void *), void *data,

View File

@ -216,7 +216,6 @@ static int __ref take_cpu_down(void *_param)
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
{ {
int err, nr_calls = 0; int err, nr_calls = 0;
struct task_struct *p;
cpumask_t old_allowed, tmp; cpumask_t old_allowed, tmp;
void *hcpu = (void *)(long)cpu; void *hcpu = (void *)(long)cpu;
unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
@ -250,19 +249,15 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
cpu_clear(cpu, tmp); cpu_clear(cpu, tmp);
set_cpus_allowed_ptr(current, &tmp); set_cpus_allowed_ptr(current, &tmp);
p = __stop_machine_run(take_cpu_down, &tcd_param, cpu); err = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
if (IS_ERR(p) || cpu_online(cpu)) { if (err || cpu_online(cpu)) {
/* CPU didn't die: tell everyone. Can't complain. */ /* CPU didn't die: tell everyone. Can't complain. */
if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
hcpu) == NOTIFY_BAD) hcpu) == NOTIFY_BAD)
BUG(); BUG();
if (IS_ERR(p)) { goto out_allowed;
err = PTR_ERR(p);
goto out_allowed;
}
goto out_thread;
} }
/* Wait for it to sleep (leaving idle task). */ /* Wait for it to sleep (leaving idle task). */
@ -279,8 +274,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
check_for_tasks(cpu); check_for_tasks(cpu);
out_thread:
err = kthread_stop(p);
out_allowed: out_allowed:
set_cpus_allowed_ptr(current, &old_allowed); set_cpus_allowed_ptr(current, &old_allowed);
out_release: out_release:

View File

@ -1,4 +1,4 @@
/* Copyright 2005 Rusty Russell rusty@rustcorp.com.au IBM Corporation. /* Copyright 2008, 2005 Rusty Russell rusty@rustcorp.com.au IBM Corporation.
* GPL v2 and any later version. * GPL v2 and any later version.
*/ */
#include <linux/cpu.h> #include <linux/cpu.h>
@ -13,220 +13,177 @@
#include <asm/atomic.h> #include <asm/atomic.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
/* Since we effect priority and affinity (both of which are visible /* This controls the threads on each CPU. */
* to, and settable by outside processes) we do indirection via a
* kthread. */
/* Thread to stop each CPU in user context. */
enum stopmachine_state { enum stopmachine_state {
STOPMACHINE_WAIT, /* Dummy starting state for thread. */
STOPMACHINE_NONE,
/* Awaiting everyone to be scheduled. */
STOPMACHINE_PREPARE, STOPMACHINE_PREPARE,
/* Disable interrupts. */
STOPMACHINE_DISABLE_IRQ, STOPMACHINE_DISABLE_IRQ,
/* Run the function */
STOPMACHINE_RUN, STOPMACHINE_RUN,
/* Exit */
STOPMACHINE_EXIT, STOPMACHINE_EXIT,
}; };
static enum stopmachine_state state;
struct stop_machine_data { struct stop_machine_data {
int (*fn)(void *); int (*fn)(void *);
void *data; void *data;
struct completion done; int fnret;
int run_all; };
} smdata;
static enum stopmachine_state stopmachine_state; /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
static unsigned int stopmachine_num_threads; static unsigned int num_threads;
static atomic_t stopmachine_thread_ack; static atomic_t thread_ack;
static struct completion finished;
static DEFINE_MUTEX(lock);
static int stopmachine(void *cpu) static void set_state(enum stopmachine_state newstate)
{ {
int irqs_disabled = 0; /* Reset ack counter. */
int prepared = 0; atomic_set(&thread_ack, num_threads);
int ran = 0; smp_wmb();
cpumask_of_cpu_ptr(cpumask, (int)(long)cpu); state = newstate;
}
set_cpus_allowed_ptr(current, cpumask); /* Last one to ack a state moves to the next state. */
static void ack_state(void)
{
if (atomic_dec_and_test(&thread_ack)) {
/* If we're the last one to ack the EXIT, we're finished. */
if (state == STOPMACHINE_EXIT)
complete(&finished);
else
set_state(state + 1);
}
}
/* Ack: we are alive */ /* This is the actual thread which stops the CPU. It exits by itself rather
smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */ * than waiting for kthread_stop(), because it's easier for hotplug CPU. */
atomic_inc(&stopmachine_thread_ack); static int stop_cpu(struct stop_machine_data *smdata)
{
enum stopmachine_state curstate = STOPMACHINE_NONE;
int uninitialized_var(ret);
/* Simple state machine */ /* Simple state machine */
while (stopmachine_state != STOPMACHINE_EXIT) { do {
if (stopmachine_state == STOPMACHINE_DISABLE_IRQ /* Chill out and ensure we re-read stopmachine_state. */
&& !irqs_disabled) {
local_irq_disable();
hard_irq_disable();
irqs_disabled = 1;
/* Ack: irqs disabled. */
smp_mb(); /* Must read state first. */
atomic_inc(&stopmachine_thread_ack);
} else if (stopmachine_state == STOPMACHINE_PREPARE
&& !prepared) {
/* Everyone is in place, hold CPU. */
preempt_disable();
prepared = 1;
smp_mb(); /* Must read state first. */
atomic_inc(&stopmachine_thread_ack);
} else if (stopmachine_state == STOPMACHINE_RUN && !ran) {
smdata.fn(smdata.data);
ran = 1;
smp_mb(); /* Must read state first. */
atomic_inc(&stopmachine_thread_ack);
}
/* Yield in first stage: migration threads need to
* help our sisters onto their CPUs. */
if (!prepared && !irqs_disabled)
yield();
cpu_relax(); cpu_relax();
} if (state != curstate) {
curstate = state;
switch (curstate) {
case STOPMACHINE_DISABLE_IRQ:
local_irq_disable();
hard_irq_disable();
break;
case STOPMACHINE_RUN:
/* |= allows error detection if functions on
* multiple CPUs. */
smdata->fnret |= smdata->fn(smdata->data);
break;
default:
break;
}
ack_state();
}
} while (curstate != STOPMACHINE_EXIT);
/* Ack: we are exiting. */ local_irq_enable();
smp_mb(); /* Must read state first. */ do_exit(0);
atomic_inc(&stopmachine_thread_ack); }
if (irqs_disabled)
local_irq_enable();
if (prepared)
preempt_enable();
/* Callback for CPUs which aren't supposed to do anything. */
static int chill(void *unused)
{
return 0; return 0;
} }
/* Change the thread state */ int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
static void stopmachine_set_state(enum stopmachine_state state)
{ {
atomic_set(&stopmachine_thread_ack, 0); int i, err;
smp_wmb(); struct stop_machine_data active, idle;
stopmachine_state = state; struct task_struct **threads;
while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads)
cpu_relax();
}
static int stop_machine(void) active.fn = fn;
{ active.data = data;
int i, ret = 0; active.fnret = 0;
idle.fn = chill;
idle.data = NULL;
atomic_set(&stopmachine_thread_ack, 0); /* If they don't care which cpu fn runs on, just pick one. */
stopmachine_num_threads = 0; if (cpu == NR_CPUS)
stopmachine_state = STOPMACHINE_WAIT; cpu = any_online_cpu(cpu_online_map);
/* This could be too big for stack on large machines. */
threads = kcalloc(NR_CPUS, sizeof(threads[0]), GFP_KERNEL);
if (!threads)
return -ENOMEM;
/* Set up initial state. */
mutex_lock(&lock);
init_completion(&finished);
num_threads = num_online_cpus();
set_state(STOPMACHINE_PREPARE);
for_each_online_cpu(i) { for_each_online_cpu(i) {
if (i == raw_smp_processor_id()) struct stop_machine_data *smdata;
continue;
ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
if (ret < 0)
break;
stopmachine_num_threads++;
}
/* Wait for them all to come to life. */
while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads) {
yield();
cpu_relax();
}
/* If some failed, kill them all. */
if (ret < 0) {
stopmachine_set_state(STOPMACHINE_EXIT);
return ret;
}
/* Now they are all started, make them hold the CPUs, ready. */
preempt_disable();
stopmachine_set_state(STOPMACHINE_PREPARE);
/* Make them disable irqs. */
local_irq_disable();
hard_irq_disable();
stopmachine_set_state(STOPMACHINE_DISABLE_IRQ);
return 0;
}
static void restart_machine(void)
{
stopmachine_set_state(STOPMACHINE_EXIT);
local_irq_enable();
preempt_enable_no_resched();
}
static void run_other_cpus(void)
{
stopmachine_set_state(STOPMACHINE_RUN);
}
static int do_stop(void *_smdata)
{
struct stop_machine_data *smdata = _smdata;
int ret;
ret = stop_machine();
if (ret == 0) {
ret = smdata->fn(smdata->data);
if (smdata->run_all)
run_other_cpus();
restart_machine();
}
/* We're done: you can kthread_stop us now */
complete(&smdata->done);
/* Wait for kthread_stop */
set_current_state(TASK_INTERRUPTIBLE);
while (!kthread_should_stop()) {
schedule();
set_current_state(TASK_INTERRUPTIBLE);
}
__set_current_state(TASK_RUNNING);
return ret;
}
struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
unsigned int cpu)
{
static DEFINE_MUTEX(stopmachine_mutex);
struct stop_machine_data smdata;
struct task_struct *p;
mutex_lock(&stopmachine_mutex);
smdata.fn = fn;
smdata.data = data;
smdata.run_all = (cpu == ALL_CPUS) ? 1 : 0;
init_completion(&smdata.done);
smp_wmb(); /* make sure other cpus see smdata updates */
/* If they don't care which CPU fn runs on, bind to any online one. */
if (cpu == NR_CPUS || cpu == ALL_CPUS)
cpu = raw_smp_processor_id();
p = kthread_create(do_stop, &smdata, "kstopmachine");
if (!IS_ERR(p)) {
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
/* One high-prio thread per cpu. We'll do this one. */ if (cpu == ALL_CPUS || i == cpu)
sched_setscheduler_nocheck(p, SCHED_FIFO, &param); smdata = &active;
kthread_bind(p, cpu); else
wake_up_process(p); smdata = &idle;
wait_for_completion(&smdata.done);
threads[i] = kthread_create((void *)stop_cpu, smdata, "kstop%u",
i);
if (IS_ERR(threads[i])) {
err = PTR_ERR(threads[i]);
threads[i] = NULL;
goto kill_threads;
}
/* Place it onto correct cpu. */
kthread_bind(threads[i], i);
/* Make it highest prio. */
if (sched_setscheduler_nocheck(threads[i], SCHED_FIFO, &param))
BUG();
} }
mutex_unlock(&stopmachine_mutex);
return p; /* We've created all the threads. Wake them all: hold this CPU so one
* doesn't hit this CPU until we're ready. */
cpu = get_cpu();
for_each_online_cpu(i)
wake_up_process(threads[i]);
/* This will release the thread on our CPU. */
put_cpu();
wait_for_completion(&finished);
mutex_unlock(&lock);
kfree(threads);
return active.fnret;
kill_threads:
for_each_online_cpu(i)
if (threads[i])
kthread_stop(threads[i]);
mutex_unlock(&lock);
kfree(threads);
return err;
} }
int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu) int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
{ {
struct task_struct *p;
int ret; int ret;
/* No CPUs can come up or down during this. */ /* No CPUs can come up or down during this. */
get_online_cpus(); get_online_cpus();
p = __stop_machine_run(fn, data, cpu); ret = __stop_machine_run(fn, data, cpu);
if (!IS_ERR(p))
ret = kthread_stop(p);
else
ret = PTR_ERR(p);
put_online_cpus(); put_online_cpus();
return ret; return ret;