Merge branch 'for-5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq
Pull workqueue updates from Tejun Heo: - The code around workqueue scheduler hooks got reorganized early 2019 which unfortuately introdued a couple subtle and rare race conditions where preemption can mangle internal workqueue state triggering a WARN and possibly causing a stall or at least delay in execution. Frederic fixed both early December and the fixes were sitting in for-5.16-fixes which I forgot to push. They are here now. I'll forward them to stable after they land. - The scheduler hook reorganization has more implicatoins for workqueue code in that the hooks are now more strictly synchronized and thus the interacting operations can become more straight-forward. Lai is in the process of simplifying workqueue code and this pull request contains some of the patches. * 'for-5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq: workqueue: Remove the cacheline_aligned for nr_running workqueue: Move the code of waking a worker up in unbind_workers() workqueue: Remove schedule() in unbind_workers() workqueue: Remove outdated comment about exceptional workers in unbind_workers() workqueue: Remove the advanced kicking of the idle workers in rebind_workers() workqueue: Remove the outdated comment before wq_worker_sleeping() workqueue: Fix unbind_workers() VS wq_worker_sleeping() race workqueue: Fix unbind_workers() VS wq_worker_running() race workqueue: Upgrade queue_work_on() comment
This commit is contained in:
commit
e9e64f85b4
@ -154,6 +154,9 @@ struct worker_pool {
|
|||||||
|
|
||||||
unsigned long watchdog_ts; /* L: watchdog timestamp */
|
unsigned long watchdog_ts; /* L: watchdog timestamp */
|
||||||
|
|
||||||
|
/* The current concurrency level. */
|
||||||
|
atomic_t nr_running;
|
||||||
|
|
||||||
struct list_head worklist; /* L: list of pending works */
|
struct list_head worklist; /* L: list of pending works */
|
||||||
|
|
||||||
int nr_workers; /* L: total number of workers */
|
int nr_workers; /* L: total number of workers */
|
||||||
@ -177,19 +180,12 @@ struct worker_pool {
|
|||||||
struct hlist_node hash_node; /* PL: unbound_pool_hash node */
|
struct hlist_node hash_node; /* PL: unbound_pool_hash node */
|
||||||
int refcnt; /* PL: refcnt for unbound pools */
|
int refcnt; /* PL: refcnt for unbound pools */
|
||||||
|
|
||||||
/*
|
|
||||||
* The current concurrency level. As it's likely to be accessed
|
|
||||||
* from other CPUs during try_to_wake_up(), put it in a separate
|
|
||||||
* cacheline.
|
|
||||||
*/
|
|
||||||
atomic_t nr_running ____cacheline_aligned_in_smp;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Destruction of pool is RCU protected to allow dereferences
|
* Destruction of pool is RCU protected to allow dereferences
|
||||||
* from get_work_pool().
|
* from get_work_pool().
|
||||||
*/
|
*/
|
||||||
struct rcu_head rcu;
|
struct rcu_head rcu;
|
||||||
} ____cacheline_aligned_in_smp;
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The per-pool workqueue. While queued, the lower WORK_STRUCT_FLAG_BITS
|
* The per-pool workqueue. While queued, the lower WORK_STRUCT_FLAG_BITS
|
||||||
@ -868,8 +864,17 @@ void wq_worker_running(struct task_struct *task)
|
|||||||
|
|
||||||
if (!worker->sleeping)
|
if (!worker->sleeping)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If preempted by unbind_workers() between the WORKER_NOT_RUNNING check
|
||||||
|
* and the nr_running increment below, we may ruin the nr_running reset
|
||||||
|
* and leave with an unexpected pool->nr_running == 1 on the newly unbound
|
||||||
|
* pool. Protect against such race.
|
||||||
|
*/
|
||||||
|
preempt_disable();
|
||||||
if (!(worker->flags & WORKER_NOT_RUNNING))
|
if (!(worker->flags & WORKER_NOT_RUNNING))
|
||||||
atomic_inc(&worker->pool->nr_running);
|
atomic_inc(&worker->pool->nr_running);
|
||||||
|
preempt_enable();
|
||||||
worker->sleeping = 0;
|
worker->sleeping = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -878,8 +883,7 @@ void wq_worker_running(struct task_struct *task)
|
|||||||
* @task: task going to sleep
|
* @task: task going to sleep
|
||||||
*
|
*
|
||||||
* This function is called from schedule() when a busy worker is
|
* This function is called from schedule() when a busy worker is
|
||||||
* going to sleep. Preemption needs to be disabled to protect ->sleeping
|
* going to sleep.
|
||||||
* assignment.
|
|
||||||
*/
|
*/
|
||||||
void wq_worker_sleeping(struct task_struct *task)
|
void wq_worker_sleeping(struct task_struct *task)
|
||||||
{
|
{
|
||||||
@ -903,6 +907,16 @@ void wq_worker_sleeping(struct task_struct *task)
|
|||||||
worker->sleeping = 1;
|
worker->sleeping = 1;
|
||||||
raw_spin_lock_irq(&pool->lock);
|
raw_spin_lock_irq(&pool->lock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Recheck in case unbind_workers() preempted us. We don't
|
||||||
|
* want to decrement nr_running after the worker is unbound
|
||||||
|
* and nr_running has been reset.
|
||||||
|
*/
|
||||||
|
if (worker->flags & WORKER_NOT_RUNNING) {
|
||||||
|
raw_spin_unlock_irq(&pool->lock);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The counterpart of the following dec_and_test, implied mb,
|
* The counterpart of the following dec_and_test, implied mb,
|
||||||
* worklist not empty test sequence is in insert_work().
|
* worklist not empty test sequence is in insert_work().
|
||||||
@ -1531,7 +1545,8 @@ out:
|
|||||||
* @work: work to queue
|
* @work: work to queue
|
||||||
*
|
*
|
||||||
* We queue the work to a specific CPU, the caller must ensure it
|
* We queue the work to a specific CPU, the caller must ensure it
|
||||||
* can't go away.
|
* can't go away. Callers that fail to ensure that the specified
|
||||||
|
* CPU cannot go away will execute on a randomly chosen CPU.
|
||||||
*
|
*
|
||||||
* Return: %false if @work was already on a queue, %true otherwise.
|
* Return: %false if @work was already on a queue, %true otherwise.
|
||||||
*/
|
*/
|
||||||
@ -1811,14 +1826,8 @@ static void worker_enter_idle(struct worker *worker)
|
|||||||
if (too_many_workers(pool) && !timer_pending(&pool->idle_timer))
|
if (too_many_workers(pool) && !timer_pending(&pool->idle_timer))
|
||||||
mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
|
mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
|
||||||
|
|
||||||
/*
|
/* Sanity check nr_running. */
|
||||||
* Sanity check nr_running. Because unbind_workers() releases
|
WARN_ON_ONCE(pool->nr_workers == pool->nr_idle &&
|
||||||
* pool->lock between setting %WORKER_UNBOUND and zapping
|
|
||||||
* nr_running, the warning may trigger spuriously. Check iff
|
|
||||||
* unbind is not in progress.
|
|
||||||
*/
|
|
||||||
WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
|
|
||||||
pool->nr_workers == pool->nr_idle &&
|
|
||||||
atomic_read(&pool->nr_running));
|
atomic_read(&pool->nr_running));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4979,38 +4988,22 @@ static void unbind_workers(int cpu)
|
|||||||
/*
|
/*
|
||||||
* We've blocked all attach/detach operations. Make all workers
|
* We've blocked all attach/detach operations. Make all workers
|
||||||
* unbound and set DISASSOCIATED. Before this, all workers
|
* unbound and set DISASSOCIATED. Before this, all workers
|
||||||
* except for the ones which are still executing works from
|
* must be on the cpu. After this, they may become diasporas.
|
||||||
* before the last CPU down must be on the cpu. After
|
* And the preemption disabled section in their sched callbacks
|
||||||
* this, they may become diasporas.
|
* are guaranteed to see WORKER_UNBOUND since the code here
|
||||||
|
* is on the same cpu.
|
||||||
*/
|
*/
|
||||||
for_each_pool_worker(worker, pool)
|
for_each_pool_worker(worker, pool)
|
||||||
worker->flags |= WORKER_UNBOUND;
|
worker->flags |= WORKER_UNBOUND;
|
||||||
|
|
||||||
pool->flags |= POOL_DISASSOCIATED;
|
pool->flags |= POOL_DISASSOCIATED;
|
||||||
|
|
||||||
raw_spin_unlock_irq(&pool->lock);
|
|
||||||
|
|
||||||
for_each_pool_worker(worker, pool) {
|
|
||||||
kthread_set_per_cpu(worker->task, -1);
|
|
||||||
WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, cpu_possible_mask) < 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
mutex_unlock(&wq_pool_attach_mutex);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Call schedule() so that we cross rq->lock and thus can
|
* The handling of nr_running in sched callbacks are disabled
|
||||||
* guarantee sched callbacks see the %WORKER_UNBOUND flag.
|
* now. Zap nr_running. After this, nr_running stays zero and
|
||||||
* This is necessary as scheduler callbacks may be invoked
|
* need_more_worker() and keep_working() are always true as
|
||||||
* from other cpus.
|
* long as the worklist is not empty. This pool now behaves as
|
||||||
*/
|
* an unbound (in terms of concurrency management) pool which
|
||||||
schedule();
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Sched callbacks are disabled now. Zap nr_running.
|
|
||||||
* After this, nr_running stays zero and need_more_worker()
|
|
||||||
* and keep_working() are always true as long as the
|
|
||||||
* worklist is not empty. This pool now behaves as an
|
|
||||||
* unbound (in terms of concurrency management) pool which
|
|
||||||
* are served by workers tied to the pool.
|
* are served by workers tied to the pool.
|
||||||
*/
|
*/
|
||||||
atomic_set(&pool->nr_running, 0);
|
atomic_set(&pool->nr_running, 0);
|
||||||
@ -5020,9 +5013,16 @@ static void unbind_workers(int cpu)
|
|||||||
* worker blocking could lead to lengthy stalls. Kick off
|
* worker blocking could lead to lengthy stalls. Kick off
|
||||||
* unbound chain execution of currently pending work items.
|
* unbound chain execution of currently pending work items.
|
||||||
*/
|
*/
|
||||||
raw_spin_lock_irq(&pool->lock);
|
|
||||||
wake_up_worker(pool);
|
wake_up_worker(pool);
|
||||||
|
|
||||||
raw_spin_unlock_irq(&pool->lock);
|
raw_spin_unlock_irq(&pool->lock);
|
||||||
|
|
||||||
|
for_each_pool_worker(worker, pool) {
|
||||||
|
kthread_set_per_cpu(worker->task, -1);
|
||||||
|
WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, cpu_possible_mask) < 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
mutex_unlock(&wq_pool_attach_mutex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -5058,17 +5058,6 @@ static void rebind_workers(struct worker_pool *pool)
|
|||||||
for_each_pool_worker(worker, pool) {
|
for_each_pool_worker(worker, pool) {
|
||||||
unsigned int worker_flags = worker->flags;
|
unsigned int worker_flags = worker->flags;
|
||||||
|
|
||||||
/*
|
|
||||||
* A bound idle worker should actually be on the runqueue
|
|
||||||
* of the associated CPU for local wake-ups targeting it to
|
|
||||||
* work. Kick all idle workers so that they migrate to the
|
|
||||||
* associated CPU. Doing this in the same loop as
|
|
||||||
* replacing UNBOUND with REBOUND is safe as no worker will
|
|
||||||
* be bound before @pool->lock is released.
|
|
||||||
*/
|
|
||||||
if (worker_flags & WORKER_IDLE)
|
|
||||||
wake_up_process(worker->task);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We want to clear UNBOUND but can't directly call
|
* We want to clear UNBOUND but can't directly call
|
||||||
* worker_clr_flags() or adjust nr_running. Atomically
|
* worker_clr_flags() or adjust nr_running. Atomically
|
||||||
|
Loading…
Reference in New Issue
Block a user