7407054209
Tetsuo has reported the following potential oom_killer_disable vs.
oom_reaper race:
(1) freeze_processes() starts freezing user space threads.
(2) Somebody (maybe a kenrel thread) calls out_of_memory().
(3) The OOM killer calls mark_oom_victim() on a user space thread
P1 which is already in __refrigerator().
(4) oom_killer_disable() sets oom_killer_disabled = true.
(5) P1 leaves __refrigerator() and enters do_exit().
(6) The OOM reaper calls exit_oom_victim(P1) before P1 can call
exit_oom_victim(P1).
(7) oom_killer_disable() returns while P1 not yet finished
(8) P1 perform IO/interfere with the freezer.
This situation is unfortunate. We cannot move oom_killer_disable after
all the freezable kernel threads are frozen because the oom victim might
depend on some of those kthreads to make a forward progress to exit so
we could deadlock. It is also far from trivial to teach the oom_reaper
to not call exit_oom_victim() because then we would lose a guarantee of
the OOM killer and oom_killer_disable forward progress because
exit_mm->mmput might block and never call exit_oom_victim.
It seems the easiest way forward is to workaround this race by calling
try_to_freeze_tasks again after oom_killer_disable. This will make sure
that all the tasks are frozen or it bails out.
Fixes: 449d777d7a
("mm, oom_reaper: clear TIF_MEMDIE for all tasks queued for oom_reaper")
Link: http://lkml.kernel.org/r/1466597634-16199-1-git-send-email-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
248 lines
5.6 KiB
C
248 lines
5.6 KiB
C
/*
|
|
* drivers/power/process.c - Functions for starting/stopping processes on
|
|
* suspend transitions.
|
|
*
|
|
* Originally from swsusp.
|
|
*/
|
|
|
|
|
|
#undef DEBUG
|
|
|
|
#include <linux/interrupt.h>
|
|
#include <linux/oom.h>
|
|
#include <linux/suspend.h>
|
|
#include <linux/module.h>
|
|
#include <linux/syscalls.h>
|
|
#include <linux/freezer.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/workqueue.h>
|
|
#include <linux/kmod.h>
|
|
#include <trace/events/power.h>
|
|
|
|
/*
|
|
* Timeout for stopping processes
|
|
*/
|
|
unsigned int __read_mostly freeze_timeout_msecs = 20 * MSEC_PER_SEC;
|
|
|
|
static int try_to_freeze_tasks(bool user_only)
|
|
{
|
|
struct task_struct *g, *p;
|
|
unsigned long end_time;
|
|
unsigned int todo;
|
|
bool wq_busy = false;
|
|
ktime_t start, end, elapsed;
|
|
unsigned int elapsed_msecs;
|
|
bool wakeup = false;
|
|
int sleep_usecs = USEC_PER_MSEC;
|
|
|
|
start = ktime_get_boottime();
|
|
|
|
end_time = jiffies + msecs_to_jiffies(freeze_timeout_msecs);
|
|
|
|
if (!user_only)
|
|
freeze_workqueues_begin();
|
|
|
|
while (true) {
|
|
todo = 0;
|
|
read_lock(&tasklist_lock);
|
|
for_each_process_thread(g, p) {
|
|
if (p == current || !freeze_task(p))
|
|
continue;
|
|
|
|
if (!freezer_should_skip(p))
|
|
todo++;
|
|
}
|
|
read_unlock(&tasklist_lock);
|
|
|
|
if (!user_only) {
|
|
wq_busy = freeze_workqueues_busy();
|
|
todo += wq_busy;
|
|
}
|
|
|
|
if (!todo || time_after(jiffies, end_time))
|
|
break;
|
|
|
|
if (pm_wakeup_pending()) {
|
|
wakeup = true;
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* We need to retry, but first give the freezing tasks some
|
|
* time to enter the refrigerator. Start with an initial
|
|
* 1 ms sleep followed by exponential backoff until 8 ms.
|
|
*/
|
|
usleep_range(sleep_usecs / 2, sleep_usecs);
|
|
if (sleep_usecs < 8 * USEC_PER_MSEC)
|
|
sleep_usecs *= 2;
|
|
}
|
|
|
|
end = ktime_get_boottime();
|
|
elapsed = ktime_sub(end, start);
|
|
elapsed_msecs = ktime_to_ms(elapsed);
|
|
|
|
if (todo) {
|
|
pr_cont("\n");
|
|
pr_err("Freezing of tasks %s after %d.%03d seconds "
|
|
"(%d tasks refusing to freeze, wq_busy=%d):\n",
|
|
wakeup ? "aborted" : "failed",
|
|
elapsed_msecs / 1000, elapsed_msecs % 1000,
|
|
todo - wq_busy, wq_busy);
|
|
|
|
if (!wakeup) {
|
|
read_lock(&tasklist_lock);
|
|
for_each_process_thread(g, p) {
|
|
if (p != current && !freezer_should_skip(p)
|
|
&& freezing(p) && !frozen(p))
|
|
sched_show_task(p);
|
|
}
|
|
read_unlock(&tasklist_lock);
|
|
}
|
|
} else {
|
|
pr_cont("(elapsed %d.%03d seconds) ", elapsed_msecs / 1000,
|
|
elapsed_msecs % 1000);
|
|
}
|
|
|
|
return todo ? -EBUSY : 0;
|
|
}
|
|
|
|
/**
|
|
* freeze_processes - Signal user space processes to enter the refrigerator.
|
|
* The current thread will not be frozen. The same process that calls
|
|
* freeze_processes must later call thaw_processes.
|
|
*
|
|
* On success, returns 0. On failure, -errno and system is fully thawed.
|
|
*/
|
|
int freeze_processes(void)
|
|
{
|
|
int error;
|
|
|
|
error = __usermodehelper_disable(UMH_FREEZING);
|
|
if (error)
|
|
return error;
|
|
|
|
/* Make sure this task doesn't get frozen */
|
|
current->flags |= PF_SUSPEND_TASK;
|
|
|
|
if (!pm_freezing)
|
|
atomic_inc(&system_freezing_cnt);
|
|
|
|
pm_wakeup_clear();
|
|
pr_info("Freezing user space processes ... ");
|
|
pm_freezing = true;
|
|
error = try_to_freeze_tasks(true);
|
|
if (!error) {
|
|
__usermodehelper_set_disable_depth(UMH_DISABLED);
|
|
pr_cont("done.");
|
|
}
|
|
pr_cont("\n");
|
|
BUG_ON(in_atomic());
|
|
|
|
/*
|
|
* Now that the whole userspace is frozen we need to disbale
|
|
* the OOM killer to disallow any further interference with
|
|
* killable tasks.
|
|
*/
|
|
if (!error && !oom_killer_disable())
|
|
error = -EBUSY;
|
|
|
|
/*
|
|
* There is a hard to fix race between oom_reaper kernel thread
|
|
* and oom_killer_disable. oom_reaper calls exit_oom_victim
|
|
* before the victim reaches exit_mm so try to freeze all the tasks
|
|
* again and catch such a left over task.
|
|
*/
|
|
if (!error) {
|
|
pr_info("Double checking all user space processes after OOM killer disable... ");
|
|
error = try_to_freeze_tasks(true);
|
|
pr_cont("\n");
|
|
}
|
|
|
|
if (error)
|
|
thaw_processes();
|
|
return error;
|
|
}
|
|
|
|
/**
|
|
* freeze_kernel_threads - Make freezable kernel threads go to the refrigerator.
|
|
*
|
|
* On success, returns 0. On failure, -errno and only the kernel threads are
|
|
* thawed, so as to give a chance to the caller to do additional cleanups
|
|
* (if any) before thawing the userspace tasks. So, it is the responsibility
|
|
* of the caller to thaw the userspace tasks, when the time is right.
|
|
*/
|
|
int freeze_kernel_threads(void)
|
|
{
|
|
int error;
|
|
|
|
pr_info("Freezing remaining freezable tasks ... ");
|
|
|
|
pm_nosig_freezing = true;
|
|
error = try_to_freeze_tasks(false);
|
|
if (!error)
|
|
pr_cont("done.");
|
|
|
|
pr_cont("\n");
|
|
BUG_ON(in_atomic());
|
|
|
|
if (error)
|
|
thaw_kernel_threads();
|
|
return error;
|
|
}
|
|
|
|
void thaw_processes(void)
|
|
{
|
|
struct task_struct *g, *p;
|
|
struct task_struct *curr = current;
|
|
|
|
trace_suspend_resume(TPS("thaw_processes"), 0, true);
|
|
if (pm_freezing)
|
|
atomic_dec(&system_freezing_cnt);
|
|
pm_freezing = false;
|
|
pm_nosig_freezing = false;
|
|
|
|
oom_killer_enable();
|
|
|
|
pr_info("Restarting tasks ... ");
|
|
|
|
__usermodehelper_set_disable_depth(UMH_FREEZING);
|
|
thaw_workqueues();
|
|
|
|
read_lock(&tasklist_lock);
|
|
for_each_process_thread(g, p) {
|
|
/* No other threads should have PF_SUSPEND_TASK set */
|
|
WARN_ON((p != curr) && (p->flags & PF_SUSPEND_TASK));
|
|
__thaw_task(p);
|
|
}
|
|
read_unlock(&tasklist_lock);
|
|
|
|
WARN_ON(!(curr->flags & PF_SUSPEND_TASK));
|
|
curr->flags &= ~PF_SUSPEND_TASK;
|
|
|
|
usermodehelper_enable();
|
|
|
|
schedule();
|
|
pr_cont("done.\n");
|
|
trace_suspend_resume(TPS("thaw_processes"), 0, false);
|
|
}
|
|
|
|
void thaw_kernel_threads(void)
|
|
{
|
|
struct task_struct *g, *p;
|
|
|
|
pm_nosig_freezing = false;
|
|
pr_info("Restarting kernel threads ... ");
|
|
|
|
thaw_workqueues();
|
|
|
|
read_lock(&tasklist_lock);
|
|
for_each_process_thread(g, p) {
|
|
if (p->flags & (PF_KTHREAD | PF_WQ_WORKER))
|
|
__thaw_task(p);
|
|
}
|
|
read_unlock(&tasklist_lock);
|
|
|
|
schedule();
|
|
pr_cont("done.\n");
|
|
}
|