xen: fix hang on suspend.
In 65f63384
"xen: improve error handling in do_suspend" I said:
- xs_suspend()/xs_resume() and dpm_suspend_noirq()/dpm_resume_noirq() were not
nested in the obvious way.
and changed the ordering of the calls as so:
BEFORE AFTER
xs_suspend dpm_suspend_noirq
dpm_suspend_noirq xs_suspend
*SUSPEND* *SUSPEND*
dpm_resume_noirq dpm_resume_noirq
xs_resume xs_resume
Clearly this is not an improvement and I was talking rubbish.
In particular the new ordering is susceptible to a hang if a xenstore write is
in progress at the point at which the suspend kicks in. When the suspend
process calls xs_suspend it tries to take the request_mutex but if a write is
in progress it could be looping in xenbus_xs.c:read_reply() waiting for
something to arrive on &xs_state.reply_list while holding the request_mutex
(taken in the caller of read_reply).
However if we have done dpm_suspend_noirq before xs_suspend then we won't get
any more xenstore interrupts and process_msg() will never be woken up to add
anything to the reply_list.
Fix this by calling xs_suspend before dpm_suspend_noirq. If dpm_suspend_noirq
fails then make sure we go through the xs_suspend_cancel() code path.
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Acked-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stable Kernel <stable@kernel.org>
This commit is contained in:
parent
7284ce6c9f
commit
c5cae661d6
@ -102,15 +102,15 @@ static void do_suspend(void)
|
||||
goto out_thaw;
|
||||
}
|
||||
|
||||
printk(KERN_DEBUG "suspending xenstore...\n");
|
||||
xs_suspend();
|
||||
|
||||
err = dpm_suspend_noirq(PMSG_SUSPEND);
|
||||
if (err) {
|
||||
printk(KERN_ERR "dpm_suspend_noirq failed: %d\n", err);
|
||||
goto out_resume;
|
||||
}
|
||||
|
||||
printk(KERN_DEBUG "suspending xenstore...\n");
|
||||
xs_suspend();
|
||||
|
||||
err = stop_machine(xen_suspend, &cancelled, cpumask_of(0));
|
||||
|
||||
dpm_resume_noirq(PMSG_RESUME);
|
||||
@ -120,13 +120,13 @@ static void do_suspend(void)
|
||||
cancelled = 1;
|
||||
}
|
||||
|
||||
out_resume:
|
||||
if (!cancelled) {
|
||||
xen_arch_resume();
|
||||
xs_resume();
|
||||
} else
|
||||
xs_suspend_cancel();
|
||||
|
||||
out_resume:
|
||||
dpm_resume_end(PMSG_RESUME);
|
||||
|
||||
/* Make sure timer events get retriggered on all CPUs */
|
||||
|
Loading…
Reference in New Issue
Block a user