io_uring: allow user configurable IO thread CPU affinity
io-wq defaults to per-node masks for IO workers. This works fine by default, but isn't particularly handy for workloads that prefer more specific affinities, for either performance or isolation reasons. This adds IORING_REGISTER_IOWQ_AFF that allows the user to pass in a CPU mask that is then applied to IO thread workers, and an IORING_UNREGISTER_IOWQ_AFF that simply resets the masks back to the default of per-node. Note that no care is given to existing IO threads, they will need to go through a reschedule before the affinity is correct if they are already running or sleeping. Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
0e03496d19
commit
fe76421d1d
17
fs/io-wq.c
17
fs/io-wq.c
@ -1087,6 +1087,23 @@ static int io_wq_cpu_offline(unsigned int cpu, struct hlist_node *node)
|
|||||||
return __io_wq_cpu_online(wq, cpu, false);
|
return __io_wq_cpu_online(wq, cpu, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
for_each_node(i) {
|
||||||
|
struct io_wqe *wqe = wq->wqes[i];
|
||||||
|
|
||||||
|
if (mask)
|
||||||
|
cpumask_copy(wqe->cpu_mask, mask);
|
||||||
|
else
|
||||||
|
cpumask_copy(wqe->cpu_mask, cpumask_of_node(i));
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static __init int io_wq_init(void)
|
static __init int io_wq_init(void)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
|
@ -128,6 +128,8 @@ void io_wq_put_and_exit(struct io_wq *wq);
|
|||||||
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
|
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
|
||||||
void io_wq_hash_work(struct io_wq_work *work, void *val);
|
void io_wq_hash_work(struct io_wq_work *work, void *val);
|
||||||
|
|
||||||
|
int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask);
|
||||||
|
|
||||||
static inline bool io_wq_is_hashed(struct io_wq_work *work)
|
static inline bool io_wq_is_hashed(struct io_wq_work *work)
|
||||||
{
|
{
|
||||||
return work->flags & IO_WQ_WORK_HASHED;
|
return work->flags & IO_WQ_WORK_HASHED;
|
||||||
|
@ -9983,6 +9983,43 @@ static int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg,
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int io_register_iowq_aff(struct io_ring_ctx *ctx, void __user *arg,
|
||||||
|
unsigned len)
|
||||||
|
{
|
||||||
|
struct io_uring_task *tctx = current->io_uring;
|
||||||
|
cpumask_var_t new_mask;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (!tctx || !tctx->io_wq)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
cpumask_clear(new_mask);
|
||||||
|
if (len > cpumask_size())
|
||||||
|
len = cpumask_size();
|
||||||
|
|
||||||
|
if (copy_from_user(new_mask, arg, len)) {
|
||||||
|
free_cpumask_var(new_mask);
|
||||||
|
return -EFAULT;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = io_wq_cpu_affinity(tctx->io_wq, new_mask);
|
||||||
|
free_cpumask_var(new_mask);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int io_unregister_iowq_aff(struct io_ring_ctx *ctx)
|
||||||
|
{
|
||||||
|
struct io_uring_task *tctx = current->io_uring;
|
||||||
|
|
||||||
|
if (!tctx || !tctx->io_wq)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
return io_wq_cpu_affinity(tctx->io_wq, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
static bool io_register_op_must_quiesce(int op)
|
static bool io_register_op_must_quiesce(int op)
|
||||||
{
|
{
|
||||||
switch (op) {
|
switch (op) {
|
||||||
@ -9998,6 +10035,8 @@ static bool io_register_op_must_quiesce(int op)
|
|||||||
case IORING_REGISTER_FILES_UPDATE2:
|
case IORING_REGISTER_FILES_UPDATE2:
|
||||||
case IORING_REGISTER_BUFFERS2:
|
case IORING_REGISTER_BUFFERS2:
|
||||||
case IORING_REGISTER_BUFFERS_UPDATE:
|
case IORING_REGISTER_BUFFERS_UPDATE:
|
||||||
|
case IORING_REGISTER_IOWQ_AFF:
|
||||||
|
case IORING_UNREGISTER_IOWQ_AFF:
|
||||||
return false;
|
return false;
|
||||||
default:
|
default:
|
||||||
return true;
|
return true;
|
||||||
@ -10137,6 +10176,18 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
|
|||||||
ret = io_register_rsrc_update(ctx, arg, nr_args,
|
ret = io_register_rsrc_update(ctx, arg, nr_args,
|
||||||
IORING_RSRC_BUFFER);
|
IORING_RSRC_BUFFER);
|
||||||
break;
|
break;
|
||||||
|
case IORING_REGISTER_IOWQ_AFF:
|
||||||
|
ret = -EINVAL;
|
||||||
|
if (!arg || !nr_args)
|
||||||
|
break;
|
||||||
|
ret = io_register_iowq_aff(ctx, arg, nr_args);
|
||||||
|
break;
|
||||||
|
case IORING_UNREGISTER_IOWQ_AFF:
|
||||||
|
ret = -EINVAL;
|
||||||
|
if (arg || nr_args)
|
||||||
|
break;
|
||||||
|
ret = io_unregister_iowq_aff(ctx);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
break;
|
break;
|
||||||
|
@ -306,6 +306,10 @@ enum {
|
|||||||
IORING_REGISTER_BUFFERS2 = 15,
|
IORING_REGISTER_BUFFERS2 = 15,
|
||||||
IORING_REGISTER_BUFFERS_UPDATE = 16,
|
IORING_REGISTER_BUFFERS_UPDATE = 16,
|
||||||
|
|
||||||
|
/* set/clear io-wq thread affinities */
|
||||||
|
IORING_REGISTER_IOWQ_AFF = 17,
|
||||||
|
IORING_UNREGISTER_IOWQ_AFF = 18,
|
||||||
|
|
||||||
/* this goes last */
|
/* this goes last */
|
||||||
IORING_REGISTER_LAST
|
IORING_REGISTER_LAST
|
||||||
};
|
};
|
||||||
|
Loading…
x
Reference in New Issue
Block a user