From: Nick Piggin When a process sleeps on contention, a "waiter" object is allocated on its stack, and put onto a list. Right before a sleeping process is woken, it gets a flag cleared which allows it to break out of its sleeping loop: ------------- /* wait to be given the lock */ for (;;) { if (!waiter->flags) break; schedule(); WARN_ON(waiter->flags); set_task_state(tsk, TASK_UNINTERRUPTIBLE); } tsk->state = TASK_RUNNING; ------------- So, a spurious wakeup after the flag is cleared and before the "real" wakeup can cause the waiter object to go off the stack while still in use. Or, if the flag is cleared and the real wake occurs *after* a spurious wakeup but before schedule(), we lose that wakeup. So fix this by using the completion API. Significantly simplifies the code. (Such a spurious wakeup can happen if, say, a process parks itself on a waitqueue and then executes down_read()) --- 25-akpm/lib/rwsem-spinlock.c | 46 ++++++++++--------------------------------- 25-akpm/lib/rwsem.c | 22 ++++++-------------- 2 files changed, 18 insertions(+), 50 deletions(-) diff -puN lib/rwsem.c~de-racify-rwsem-take-2 lib/rwsem.c --- 25/lib/rwsem.c~de-racify-rwsem-take-2 2004-04-14 19:39:05.574640824 -0700 +++ 25-akpm/lib/rwsem.c 2004-04-14 19:39:10.724857872 -0700 @@ -7,10 +7,12 @@ #include #include #include +#include struct rwsem_waiter { struct list_head list; struct task_struct *task; + struct completion granted; unsigned int flags; #define RWSEM_WAITING_FOR_READ 0x00000001 #define RWSEM_WAITING_FOR_WRITE 0x00000002 @@ -72,8 +74,7 @@ try_again: goto readers_only; list_del(&waiter->list); - waiter->flags = 0; - wake_up_process(waiter->task); + complete(&waiter->granted); goto out; /* don't want to wake any writers */ @@ -109,8 +110,7 @@ readers_only: for (; loop > 0; loop--) { waiter = list_entry(next, struct rwsem_waiter, list); next = waiter->list.next; - waiter->flags = 0; - wake_up_process(waiter->task); + complete(&waiter->granted); } sem->wait_list.next = next; @@ -130,18 +130,17 @@ undo: /* * wait for a lock to be granted */ -static inline struct rw_semaphore * +static struct rw_semaphore * rwsem_down_failed_common(struct rw_semaphore *sem, struct rwsem_waiter *waiter, signed long adjustment) { struct task_struct *tsk = current; signed long count; - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - /* set up my own style of waitqueue */ spin_lock(&sem->wait_lock); waiter->task = tsk; + init_completion(&waiter->granted); list_add_tail(&waiter->list, &sem->wait_list); @@ -159,14 +158,7 @@ rwsem_down_failed_common(struct rw_semap spin_unlock(&sem->wait_lock); /* wait to be given the lock */ - for (;;) { - if (!waiter->flags) - break; - schedule(); - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - } - - tsk->state = TASK_RUNNING; + wait_for_completion(&waiter->granted); return sem; } diff -puN lib/rwsem-spinlock.c~de-racify-rwsem-take-2 lib/rwsem-spinlock.c --- 25/lib/rwsem-spinlock.c~de-racify-rwsem-take-2 2004-04-14 19:39:05.575640672 -0700 +++ 25-akpm/lib/rwsem-spinlock.c 2004-04-14 19:39:05.581639760 -0700 @@ -8,10 +8,12 @@ #include #include #include +#include struct rwsem_waiter { struct list_head list; struct task_struct *task; + struct completion granted; unsigned int flags; #define RWSEM_WAITING_FOR_READ 0x00000001 #define RWSEM_WAITING_FOR_WRITE 0x00000002 @@ -73,8 +75,7 @@ __rwsem_do_wake(struct rw_semaphore *sem if (waiter->flags & RWSEM_WAITING_FOR_WRITE) { sem->activity = -1; list_del(&waiter->list); - waiter->flags = 0; - wake_up_process(waiter->task); + complete(&waiter->granted); goto out; } @@ -86,8 +87,7 @@ dont_wake_writers: struct list_head *next = waiter->list.next; list_del(&waiter->list); - waiter->flags = 0; - wake_up_process(waiter->task); + complete(&waiter->granted); woken++; if (list_empty(&sem->wait_list)) break; @@ -114,8 +114,7 @@ __rwsem_wake_one_writer(struct rw_semaph waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); list_del(&waiter->list); - waiter->flags = 0; - wake_up_process(waiter->task); + complete(&waiter->granted); return sem; } @@ -125,7 +124,6 @@ __rwsem_wake_one_writer(struct rw_semaph void fastcall __down_read(struct rw_semaphore *sem) { struct rwsem_waiter waiter; - struct task_struct *tsk; rwsemtrace(sem, "Entering __down_read"); @@ -138,11 +136,9 @@ void fastcall __down_read(struct rw_sema goto out; } - tsk = current; - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - /* set up my own style of waitqueue */ - waiter.task = tsk; + init_completion(&waiter.granted); + waiter.task = current; waiter.flags = RWSEM_WAITING_FOR_READ; list_add_tail(&waiter.list, &sem->wait_list); @@ -150,15 +146,7 @@ void fastcall __down_read(struct rw_sema /* we don't need to touch the semaphore struct anymore */ spin_unlock(&sem->wait_lock); - /* wait to be given the lock */ - for (;;) { - if (!waiter.flags) - break; - schedule(); - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - } - - tsk->state = TASK_RUNNING; + wait_for_completion(&waiter.granted); out: rwsemtrace(sem, "Leaving __down_read"); @@ -194,7 +182,6 @@ int fastcall __down_read_trylock(struct void fastcall __down_write(struct rw_semaphore *sem) { struct rwsem_waiter waiter; - struct task_struct *tsk; rwsemtrace(sem, "Entering __down_write"); @@ -207,11 +194,9 @@ void fastcall __down_write(struct rw_sem goto out; } - tsk = current; - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - /* set up my own style of waitqueue */ - waiter.task = tsk; + init_completion(&waiter.granted); + waiter.task = current; waiter.flags = RWSEM_WAITING_FOR_WRITE; list_add_tail(&waiter.list, &sem->wait_list); @@ -219,16 +204,7 @@ void fastcall __down_write(struct rw_sem /* we don't need to touch the semaphore struct anymore */ spin_unlock(&sem->wait_lock); - /* wait to be given the lock */ - for (;;) { - if (!waiter.flags) - break; - schedule(); - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - } - - tsk->state = TASK_RUNNING; - + wait_for_completion(&waiter.granted); out: rwsemtrace(sem, "Leaving __down_write"); } _