io-wq: check max_worker limits if a worker transitions bound state

For the two places where new workers are created, we diligently check if
we are allowed to create a new worker. If we're currently at the limit
of how many workers of a given type we can have, then we don't create
any new ones.

If you have a mixed workload with various types of bound and unbounded
work, then it can happen that a worker finishes one type of work and
is then transitioned to the other type. For this case, we don't check
if we are actually allowed to do so. This can cause io-wq to temporarily
exceed the allowed number of workers for a given type.

When retrieving work, check that the types match. If they don't, check
if we are allowed to transition to the other type. If not, then don't
handle the new work.

Cc: stable@vger.kernel.org
Reported-by: Johannes Lundberg <johalun0@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Jens Axboe 2021-08-29 16:13:03 -06:00
parent f1042b6ccb
commit ecc53c48c1

View File

@ -424,7 +424,28 @@ static void io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
spin_unlock(&wq->hash->wait.lock); spin_unlock(&wq->hash->wait.lock);
} }
static struct io_wq_work *io_get_next_work(struct io_wqe *wqe) /*
* We can always run the work if the worker is currently the same type as
* the work (eg both are bound, or both are unbound). If they are not the
* same, only allow it if incrementing the worker count would be allowed.
*/
static bool io_worker_can_run_work(struct io_worker *worker,
struct io_wq_work *work)
{
struct io_wqe_acct *acct;
if (!(worker->flags & IO_WORKER_F_BOUND) !=
!(work->flags & IO_WQ_WORK_UNBOUND))
return true;
/* not the same type, check if we'd go over the limit */
acct = io_work_get_acct(worker->wqe, work);
return acct->nr_workers < acct->max_workers;
}
static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
struct io_worker *worker,
bool *stalled)
__must_hold(wqe->lock) __must_hold(wqe->lock)
{ {
struct io_wq_work_node *node, *prev; struct io_wq_work_node *node, *prev;
@ -436,6 +457,9 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)
work = container_of(node, struct io_wq_work, list); work = container_of(node, struct io_wq_work, list);
if (!io_worker_can_run_work(worker, work))
break;
/* not hashed, can run anytime */ /* not hashed, can run anytime */
if (!io_wq_is_hashed(work)) { if (!io_wq_is_hashed(work)) {
wq_list_del(&wqe->work_list, node, prev); wq_list_del(&wqe->work_list, node, prev);
@ -462,6 +486,7 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)
raw_spin_unlock(&wqe->lock); raw_spin_unlock(&wqe->lock);
io_wait_on_hash(wqe, stall_hash); io_wait_on_hash(wqe, stall_hash);
raw_spin_lock(&wqe->lock); raw_spin_lock(&wqe->lock);
*stalled = true;
} }
return NULL; return NULL;
@ -501,6 +526,7 @@ static void io_worker_handle_work(struct io_worker *worker)
do { do {
struct io_wq_work *work; struct io_wq_work *work;
bool stalled;
get_next: get_next:
/* /*
* If we got some work, mark us as busy. If we didn't, but * If we got some work, mark us as busy. If we didn't, but
@ -509,10 +535,11 @@ get_next:
* can't make progress, any work completion or insertion will * can't make progress, any work completion or insertion will
* clear the stalled flag. * clear the stalled flag.
*/ */
work = io_get_next_work(wqe); stalled = false;
work = io_get_next_work(wqe, worker, &stalled);
if (work) if (work)
__io_worker_busy(wqe, worker, work); __io_worker_busy(wqe, worker, work);
else if (!wq_list_empty(&wqe->work_list)) else if (stalled)
wqe->flags |= IO_WQE_FLAG_STALLED; wqe->flags |= IO_WQE_FLAG_STALLED;
raw_spin_unlock_irq(&wqe->lock); raw_spin_unlock_irq(&wqe->lock);