From: Sebastian Andrzej Siewior Date: Fri, 7 Jun 2013 22:37:06 +0200 Subject: kernel/cpu: fix cpu down problem if kthread's cpu is going down If kthread is pinned to CPUx and CPUx is going down then we get into trouble: - first the unplug thread is created - it will set itself to hp->unplug. As a result, every task that is going to take a lock, has to leave the CPU. - the CPU_DOWN_PREPARE notifier are started. The worker thread will start a new process for the "high priority worker". Now kthread would like to take a lock but since it can't leave the CPU it will never complete its task. We could fire the unplug thread after the notifier but then the cpu is no longer marked "online" and the unplug thread will run on CPU0 which was fixed before :) So instead the unplug thread is started and kept waiting until the notfier complete their work. Signed-off-by: Sebastian Andrzej Siewior --- kernel/cpu.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -109,6 +109,7 @@ struct hotplug_pcp { int refcount; int grab_lock; struct completion synced; + struct completion unplug_wait; #ifdef CONFIG_PREEMPT_RT_FULL /* * Note, on PREEMPT_RT, the hotplug lock must save the state of @@ -212,6 +213,7 @@ static int sync_unplug_thread(void *data { struct hotplug_pcp *hp = data; + wait_for_completion(&hp->unplug_wait); preempt_disable(); hp->unplug = current; wait_for_pinned_cpus(hp); @@ -277,6 +279,14 @@ static void __cpu_unplug_sync(struct hot wait_for_completion(&hp->synced); } +static void __cpu_unplug_wait(unsigned int cpu) +{ + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu); + + complete(&hp->unplug_wait); + wait_for_completion(&hp->synced); +} + /* * Start the sync_unplug_thread on the target cpu and wait for it to * complete. @@ -300,6 +310,7 @@ static int cpu_unplug_begin(unsigned int tell_sched_cpu_down_begin(cpu); init_completion(&hp->synced); + init_completion(&hp->unplug_wait); hp->sync_tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu); if (IS_ERR(hp->sync_tsk)) { @@ -315,8 +326,7 @@ static int cpu_unplug_begin(unsigned int * wait for tasks that are going to enter these sections and * we must not have them block. */ - __cpu_unplug_sync(hp); - + wake_up_process(hp->sync_tsk); return 0; } @@ -671,6 +681,7 @@ static int _cpu_down(unsigned int cpu, i else synchronize_rcu(); + __cpu_unplug_wait(cpu); smpboot_park_threads(cpu); /* Notifiers are done. Don't let any more tasks pin this CPU. */