nvme-tcp: optimize queue io_cpu assignment for multiple queue maps
Currently, queue io_cpu assignment is done sequentially for default, read and poll queues based on queue id. This causes miss-alignment between context of CPU initiating I/O and the I/O worker thread processing queued requests or completions. Change to modify queue io_cpu assignment to take into account queue maps offset. Each queue io_cpu will start at zero for each queue map. This essentially aligns read/poll queues to start over the same range as default queues. Testing performed by Mark with: - ram device (nvmet) - single CPU core (pinned) - 100% 4k reads - engine io_uring (not using sq_thread option) - hipri flag set Micro-benchmark results show a net gain of: - increase of 18%-29% in IOPs - reduction of 16%-22% in average latency - reduction of 7%-23% in 99.99% latency Baseline: ======== QDepth/Batch | IOPs [k] | Avg. Lat [us] | 99.99% Lat [us] ----------------------------------------------------------------- 1/1 | 32.4 | 30.11 | 50.94 32/8 | 179 | 168.20 | 371 CPU alignment: ============= QDepth/Batch | IOPs [k] | Avg. Lat [us] | 99.99% Lat [us] ----------------------------------------------------------------- 1/1 | 38.5 | 25.18 | 39.16 32/8 | 231 | 130.75 | 343 Reported-by: Mark Wunderlich <mark.wunderlich@intel.com> Signed-off-by: Sagi Grimberg <sagi@grimberg.me> Signed-off-by: Keith Busch <kbusch@kernel.org>
This commit is contained in:
parent
fa059b856a
commit
40510a639e
@ -1258,13 +1258,67 @@ free_icreq:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool nvme_tcp_admin_queue(struct nvme_tcp_queue *queue)
|
||||
{
|
||||
return nvme_tcp_queue_id(queue) == 0;
|
||||
}
|
||||
|
||||
static bool nvme_tcp_default_queue(struct nvme_tcp_queue *queue)
|
||||
{
|
||||
struct nvme_tcp_ctrl *ctrl = queue->ctrl;
|
||||
int qid = nvme_tcp_queue_id(queue);
|
||||
|
||||
return !nvme_tcp_admin_queue(queue) &&
|
||||
qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT];
|
||||
}
|
||||
|
||||
static bool nvme_tcp_read_queue(struct nvme_tcp_queue *queue)
|
||||
{
|
||||
struct nvme_tcp_ctrl *ctrl = queue->ctrl;
|
||||
int qid = nvme_tcp_queue_id(queue);
|
||||
|
||||
return !nvme_tcp_admin_queue(queue) &&
|
||||
!nvme_tcp_default_queue(queue) &&
|
||||
qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT] +
|
||||
ctrl->io_queues[HCTX_TYPE_READ];
|
||||
}
|
||||
|
||||
static bool nvme_tcp_poll_queue(struct nvme_tcp_queue *queue)
|
||||
{
|
||||
struct nvme_tcp_ctrl *ctrl = queue->ctrl;
|
||||
int qid = nvme_tcp_queue_id(queue);
|
||||
|
||||
return !nvme_tcp_admin_queue(queue) &&
|
||||
!nvme_tcp_default_queue(queue) &&
|
||||
!nvme_tcp_read_queue(queue) &&
|
||||
qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT] +
|
||||
ctrl->io_queues[HCTX_TYPE_READ] +
|
||||
ctrl->io_queues[HCTX_TYPE_POLL];
|
||||
}
|
||||
|
||||
static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue)
|
||||
{
|
||||
struct nvme_tcp_ctrl *ctrl = queue->ctrl;
|
||||
int qid = nvme_tcp_queue_id(queue);
|
||||
int n = 0;
|
||||
|
||||
if (nvme_tcp_default_queue(queue))
|
||||
n = qid - 1;
|
||||
else if (nvme_tcp_read_queue(queue))
|
||||
n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] - 1;
|
||||
else if (nvme_tcp_poll_queue(queue))
|
||||
n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] -
|
||||
ctrl->io_queues[HCTX_TYPE_READ] - 1;
|
||||
queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false);
|
||||
}
|
||||
|
||||
static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
|
||||
int qid, size_t queue_size)
|
||||
{
|
||||
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
|
||||
struct nvme_tcp_queue *queue = &ctrl->queues[qid];
|
||||
struct linger sol = { .l_onoff = 1, .l_linger = 0 };
|
||||
int ret, opt, rcv_pdu_size, n;
|
||||
int ret, opt, rcv_pdu_size;
|
||||
|
||||
queue->ctrl = ctrl;
|
||||
INIT_LIST_HEAD(&queue->send_list);
|
||||
@ -1343,11 +1397,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
|
||||
}
|
||||
|
||||
queue->sock->sk->sk_allocation = GFP_ATOMIC;
|
||||
if (!qid)
|
||||
n = 0;
|
||||
else
|
||||
n = (qid - 1) % num_online_cpus();
|
||||
queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false);
|
||||
nvme_tcp_set_queue_io_cpu(queue);
|
||||
queue->request = NULL;
|
||||
queue->data_remaining = 0;
|
||||
queue->ddgst_remaining = 0;
|
||||
|
Loading…
Reference in New Issue
Block a user