From eef6a7d5c2f38adadab8240fabf43730fe796482 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 12 Feb 2010 17:39:21 +0900 Subject: [PATCH 1/2] workqueue: warn about flush_scheduled_work() This patch (as1319) adds kerneldoc and a pointed warning to flush_scheduled_work(). Signed-off-by: Alan Stern Signed-off-by: Tejun Heo --- kernel/workqueue.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 5bfb213984b2..0225fea89340 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -845,6 +845,30 @@ int schedule_on_each_cpu(work_func_t func) return 0; } +/** + * flush_scheduled_work - ensure that any scheduled work has run to completion. + * + * Forces execution of the kernel-global workqueue and blocks until its + * completion. + * + * Think twice before calling this function! It's very easy to get into + * trouble if you don't take great care. Either of the following situations + * will lead to deadlock: + * + * One of the work items currently on the workqueue needs to acquire + * a lock held by your code or its caller. + * + * Your code is running in the context of a work routine. + * + * They will be detected by lockdep when they occur, but the first might not + * occur very often. It depends on what work items are on the workqueue and + * what locks they need, which you have no control over. + * + * In most situations flushing the entire workqueue is overkill; you merely + * need to know that a particular work item isn't queued and isn't running. + * In such cases you should use cancel_delayed_work_sync() or + * cancel_work_sync() instead. + */ void flush_scheduled_work(void) { flush_workqueue(keventd_wq); From 4d707b9f48e2c4aa94b96f1133813b73df71fb55 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 23 Apr 2010 17:40:40 +0200 Subject: [PATCH 2/2] workqueue: change cancel_work_sync() to clear work->data In short: change cancel_work_sync(work) to mark this work as "never queued" upon return. When cancel_work_sync(work) succeeds, we know that this work can't be queued or running, and since we own WORK_STRUCT_PENDING nobody can change the bits in work->data under us. This means we can also clear the "cwq" part along with _PENDING bit lockless before return, unless the work is queued nobody can assume get_wq_data() is stable even under cwq->lock. This change can speedup the subsequent cancel/flush requests, and as Dmitry pointed out this simplifies the usage of work_struct's which can be queued on different workqueues. Consider this pseudo code from the input subsystem: struct workqueue_struct *WQ; struct work_struct *WORK; for (;;) { WQ = create_workqueue(); ... if (condition()) queue_work(WQ, WORK); ... cancel_work_sync(WORK); destroy_workqueue(WQ); } If condition() returns T and then F, cancel_work_sync() will crash the kernel because WORK->data still points to the already destroyed workqueue. With this patch the code like above becomes correct. Suggested-by: Dmitry Torokhov Signed-off-by: Oleg Nesterov Signed-off-by: Tejun Heo --- kernel/workqueue.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 0225fea89340..77dabbf64b8f 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -229,6 +229,16 @@ static inline void set_wq_data(struct work_struct *work, atomic_long_set(&work->data, new); } +/* + * Clear WORK_STRUCT_PENDING and the workqueue on which it was queued. + */ +static inline void clear_wq_data(struct work_struct *work) +{ + unsigned long flags = *work_data_bits(work) & + (1UL << WORK_STRUCT_STATIC); + atomic_long_set(&work->data, flags); +} + static inline struct cpu_workqueue_struct *get_wq_data(struct work_struct *work) { @@ -671,7 +681,7 @@ static int __cancel_work_timer(struct work_struct *work, wait_on_work(work); } while (unlikely(ret < 0)); - work_clear_pending(work); + clear_wq_data(work); return ret; }