for-5.16/io_uring-2021-10-29
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmF8KHcQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgphvVEADHMsZP3fOGyJNqnIibIrDL5ZdUGtr5iH3c 0UIi9It0jo9xOyPX/aY2n1pInXK4vvND9ULC+XGYttSJZXWuYEbMGYQ34du2EP0r dypN4JPwO6X+mFkJND6x8IeDCzj/fy6LCFbWbRlDNsndTZ/gavVTOybMpOLdCJx9 IyXE1iHismaIaD7I3Q77zvN0ei87cEwBfg9R0vRAXKBKUh5raSiLWsOYOiXQkZH4 8iUeDmOLlaWghgXwweODxARXuWq+gWZgiBMd0tp0QCECXMv+NIpfJYauvLHJDa/u QScr9uRMrJS3KgRgt61o+Z2fcpzJF/bL0e0s5Ul9CgflRWucARbgodUMl4rZCi9D WOwxPxv8Oab8IT7Qc/ZHdY3ULJsULRgbtmc/9OqPL5Y/Ww9/9E63Is8O4q/QFc7T xJ1p5yZKw3G+G7oG0YBYE0U+x3RUzi4b/Ob+ECeLcAAAcp+XFg6epK6Aj8HDWd8K kGYlEBKEq1hILM44K59YTwAT/Cp+fkwe+x7pNQ3JjqtPpVpqGT7RoMUuCduofT1J ROtB+S8/AwhdABL6KKUYSVF8zlfoXbQpQs3SUKjaBtPVjwXLZwXERy7ttD/4STtT QjC+5/qAWnMR8CYADE0E3rlicUkHJm1+AHukYLz0REphDcNO8GuB9PCDzX4SX/ol SGJ6hoprYQ== =5U4u -----END PGP SIGNATURE----- Merge tag 'for-5.16/io_uring-2021-10-29' of git://git.kernel.dk/linux-block Pull io_uring updates from Jens Axboe: "Light on new features - basically just the hybrid mode support. Outside of that it's just fixes, cleanups, and performance improvements. In detail: - Add ring related information to the fdinfo output (Hao) - Hybrid async mode (Hao) - Support for batched issue on block (me) - sqe error trace improvement (me) - IOPOLL efficiency improvements (Pavel) - submit state cleanups and improvements (Pavel) - Completion side improvements (Pavel) - Drain improvements (Pavel) - Buffer selection cleanups (Pavel) - Fixed file node improvements (Pavel) - io-wq setup cancelation fix (Pavel) - Various other performance improvements and cleanups (Pavel) - Misc fixes (Arnd, Bixuan, Changcheng, Hao, me, Noah)" * tag 'for-5.16/io_uring-2021-10-29' of git://git.kernel.dk/linux-block: (97 commits) io-wq: remove worker to owner tw dependency io_uring: harder fdinfo sq/cq ring iterating io_uring: don't assign write hint in the read path io_uring: clusterise ki_flags access in rw_prep io_uring: kill unused param from io_file_supports_nowait io_uring: clean up timeout async_data allocation io_uring: don't try io-wq polling if not supported io_uring: check if opcode needs poll first on arming io_uring: clean iowq submit work cancellation io_uring: clean io_wq_submit_work()'s main loop io-wq: use helper for worker refcounting io_uring: implement async hybrid mode for pollable requests io_uring: Use ERR_CAST() instead of ERR_PTR(PTR_ERR()) io_uring: split logic of force_nonblock io_uring: warning about unused-but-set parameter io_uring: inform block layer of how many requests we are submitting io_uring: simplify io_file_supports_nowait() io_uring: combine REQ_F_NOWAIT_{READ,WRITE} flags io_uring: arm poll for non-nowait files fs/io_uring: Prioritise checking faster conditions first in io_write ...
This commit is contained in:
commit
8d1f01775f
58
fs/io-wq.c
58
fs/io-wq.c
@ -140,6 +140,7 @@ static void io_wqe_dec_running(struct io_worker *worker);
|
||||
static bool io_acct_cancel_pending_work(struct io_wqe *wqe,
|
||||
struct io_wqe_acct *acct,
|
||||
struct io_cb_cancel_data *match);
|
||||
static void create_worker_cb(struct callback_head *cb);
|
||||
|
||||
static bool io_worker_get(struct io_worker *worker)
|
||||
{
|
||||
@ -174,12 +175,46 @@ static void io_worker_ref_put(struct io_wq *wq)
|
||||
complete(&wq->worker_done);
|
||||
}
|
||||
|
||||
static void io_worker_cancel_cb(struct io_worker *worker)
|
||||
{
|
||||
struct io_wqe_acct *acct = io_wqe_get_acct(worker);
|
||||
struct io_wqe *wqe = worker->wqe;
|
||||
struct io_wq *wq = wqe->wq;
|
||||
|
||||
atomic_dec(&acct->nr_running);
|
||||
raw_spin_lock(&worker->wqe->lock);
|
||||
acct->nr_workers--;
|
||||
raw_spin_unlock(&worker->wqe->lock);
|
||||
io_worker_ref_put(wq);
|
||||
clear_bit_unlock(0, &worker->create_state);
|
||||
io_worker_release(worker);
|
||||
}
|
||||
|
||||
static bool io_task_worker_match(struct callback_head *cb, void *data)
|
||||
{
|
||||
struct io_worker *worker;
|
||||
|
||||
if (cb->func != create_worker_cb)
|
||||
return false;
|
||||
worker = container_of(cb, struct io_worker, create_work);
|
||||
return worker == data;
|
||||
}
|
||||
|
||||
static void io_worker_exit(struct io_worker *worker)
|
||||
{
|
||||
struct io_wqe *wqe = worker->wqe;
|
||||
struct io_wq *wq = wqe->wq;
|
||||
|
||||
if (refcount_dec_and_test(&worker->ref))
|
||||
complete(&worker->ref_done);
|
||||
while (1) {
|
||||
struct callback_head *cb = task_work_cancel_match(wq->task,
|
||||
io_task_worker_match, worker);
|
||||
|
||||
if (!cb)
|
||||
break;
|
||||
io_worker_cancel_cb(worker);
|
||||
}
|
||||
|
||||
io_worker_release(worker);
|
||||
wait_for_completion(&worker->ref_done);
|
||||
|
||||
raw_spin_lock(&wqe->lock);
|
||||
@ -323,8 +358,10 @@ static bool io_queue_worker_create(struct io_worker *worker,
|
||||
|
||||
init_task_work(&worker->create_work, func);
|
||||
worker->create_index = acct->index;
|
||||
if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL))
|
||||
if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL)) {
|
||||
clear_bit_unlock(0, &worker->create_state);
|
||||
return true;
|
||||
}
|
||||
clear_bit_unlock(0, &worker->create_state);
|
||||
fail_release:
|
||||
io_worker_release(worker);
|
||||
@ -716,11 +753,8 @@ static void io_workqueue_create(struct work_struct *work)
|
||||
struct io_worker *worker = container_of(work, struct io_worker, work);
|
||||
struct io_wqe_acct *acct = io_wqe_get_acct(worker);
|
||||
|
||||
if (!io_queue_worker_create(worker, acct, create_worker_cont)) {
|
||||
clear_bit_unlock(0, &worker->create_state);
|
||||
io_worker_release(worker);
|
||||
if (!io_queue_worker_create(worker, acct, create_worker_cont))
|
||||
kfree(worker);
|
||||
}
|
||||
}
|
||||
|
||||
static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
|
||||
@ -1150,17 +1184,9 @@ static void io_wq_exit_workers(struct io_wq *wq)
|
||||
|
||||
while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) {
|
||||
struct io_worker *worker;
|
||||
struct io_wqe_acct *acct;
|
||||
|
||||
worker = container_of(cb, struct io_worker, create_work);
|
||||
acct = io_wqe_get_acct(worker);
|
||||
atomic_dec(&acct->nr_running);
|
||||
raw_spin_lock(&worker->wqe->lock);
|
||||
acct->nr_workers--;
|
||||
raw_spin_unlock(&worker->wqe->lock);
|
||||
io_worker_ref_put(wq);
|
||||
clear_bit_unlock(0, &worker->create_state);
|
||||
io_worker_release(worker);
|
||||
io_worker_cancel_cb(worker);
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
|
59
fs/io-wq.h
59
fs/io-wq.h
@ -29,6 +29,17 @@ struct io_wq_work_list {
|
||||
struct io_wq_work_node *last;
|
||||
};
|
||||
|
||||
#define wq_list_for_each(pos, prv, head) \
|
||||
for (pos = (head)->first, prv = NULL; pos; prv = pos, pos = (pos)->next)
|
||||
|
||||
#define wq_list_for_each_resume(pos, prv) \
|
||||
for (; pos; prv = pos, pos = (pos)->next)
|
||||
|
||||
#define wq_list_empty(list) (READ_ONCE((list)->first) == NULL)
|
||||
#define INIT_WQ_LIST(list) do { \
|
||||
(list)->first = NULL; \
|
||||
} while (0)
|
||||
|
||||
static inline void wq_list_add_after(struct io_wq_work_node *node,
|
||||
struct io_wq_work_node *pos,
|
||||
struct io_wq_work_list *list)
|
||||
@ -54,6 +65,15 @@ static inline void wq_list_add_tail(struct io_wq_work_node *node,
|
||||
}
|
||||
}
|
||||
|
||||
static inline void wq_list_add_head(struct io_wq_work_node *node,
|
||||
struct io_wq_work_list *list)
|
||||
{
|
||||
node->next = list->first;
|
||||
if (!node->next)
|
||||
list->last = node;
|
||||
WRITE_ONCE(list->first, node);
|
||||
}
|
||||
|
||||
static inline void wq_list_cut(struct io_wq_work_list *list,
|
||||
struct io_wq_work_node *last,
|
||||
struct io_wq_work_node *prev)
|
||||
@ -69,6 +89,31 @@ static inline void wq_list_cut(struct io_wq_work_list *list,
|
||||
last->next = NULL;
|
||||
}
|
||||
|
||||
static inline void __wq_list_splice(struct io_wq_work_list *list,
|
||||
struct io_wq_work_node *to)
|
||||
{
|
||||
list->last->next = to->next;
|
||||
to->next = list->first;
|
||||
INIT_WQ_LIST(list);
|
||||
}
|
||||
|
||||
static inline bool wq_list_splice(struct io_wq_work_list *list,
|
||||
struct io_wq_work_node *to)
|
||||
{
|
||||
if (!wq_list_empty(list)) {
|
||||
__wq_list_splice(list, to);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void wq_stack_add_head(struct io_wq_work_node *node,
|
||||
struct io_wq_work_node *stack)
|
||||
{
|
||||
node->next = stack->next;
|
||||
stack->next = node;
|
||||
}
|
||||
|
||||
static inline void wq_list_del(struct io_wq_work_list *list,
|
||||
struct io_wq_work_node *node,
|
||||
struct io_wq_work_node *prev)
|
||||
@ -76,14 +121,14 @@ static inline void wq_list_del(struct io_wq_work_list *list,
|
||||
wq_list_cut(list, node, prev);
|
||||
}
|
||||
|
||||
#define wq_list_for_each(pos, prv, head) \
|
||||
for (pos = (head)->first, prv = NULL; pos; prv = pos, pos = (pos)->next)
|
||||
static inline
|
||||
struct io_wq_work_node *wq_stack_extract(struct io_wq_work_node *stack)
|
||||
{
|
||||
struct io_wq_work_node *node = stack->next;
|
||||
|
||||
#define wq_list_empty(list) (READ_ONCE((list)->first) == NULL)
|
||||
#define INIT_WQ_LIST(list) do { \
|
||||
(list)->first = NULL; \
|
||||
(list)->last = NULL; \
|
||||
} while (0)
|
||||
stack->next = node->next;
|
||||
return node;
|
||||
}
|
||||
|
||||
struct io_wq_work {
|
||||
struct io_wq_work_node list;
|
||||
|
1738
fs/io_uring.c
1738
fs/io_uring.c
File diff suppressed because it is too large
Load Diff
@ -6,6 +6,7 @@
|
||||
#define _TRACE_IO_URING_H
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
#include <uapi/linux/io_uring.h>
|
||||
|
||||
struct io_wq_work;
|
||||
|
||||
@ -497,6 +498,66 @@ TRACE_EVENT(io_uring_task_run,
|
||||
(unsigned long long) __entry->user_data)
|
||||
);
|
||||
|
||||
/*
|
||||
* io_uring_req_failed - called when an sqe is errored dring submission
|
||||
*
|
||||
* @sqe: pointer to the io_uring_sqe that failed
|
||||
* @error: error it failed with
|
||||
*
|
||||
* Allows easier diagnosing of malformed requests in production systems.
|
||||
*/
|
||||
TRACE_EVENT(io_uring_req_failed,
|
||||
|
||||
TP_PROTO(const struct io_uring_sqe *sqe, int error),
|
||||
|
||||
TP_ARGS(sqe, error),
|
||||
|
||||
TP_STRUCT__entry (
|
||||
__field( u8, opcode )
|
||||
__field( u8, flags )
|
||||
__field( u8, ioprio )
|
||||
__field( u64, off )
|
||||
__field( u64, addr )
|
||||
__field( u32, len )
|
||||
__field( u32, op_flags )
|
||||
__field( u64, user_data )
|
||||
__field( u16, buf_index )
|
||||
__field( u16, personality )
|
||||
__field( u32, file_index )
|
||||
__field( u64, pad1 )
|
||||
__field( u64, pad2 )
|
||||
__field( int, error )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->opcode = sqe->opcode;
|
||||
__entry->flags = sqe->flags;
|
||||
__entry->ioprio = sqe->ioprio;
|
||||
__entry->off = sqe->off;
|
||||
__entry->addr = sqe->addr;
|
||||
__entry->len = sqe->len;
|
||||
__entry->op_flags = sqe->rw_flags;
|
||||
__entry->user_data = sqe->user_data;
|
||||
__entry->buf_index = sqe->buf_index;
|
||||
__entry->personality = sqe->personality;
|
||||
__entry->file_index = sqe->file_index;
|
||||
__entry->pad1 = sqe->__pad2[0];
|
||||
__entry->pad2 = sqe->__pad2[1];
|
||||
__entry->error = error;
|
||||
),
|
||||
|
||||
TP_printk("op %d, flags=0x%x, prio=%d, off=%llu, addr=%llu, "
|
||||
"len=%u, rw_flags=0x%x, user_data=0x%llx, buf_index=%d, "
|
||||
"personality=%d, file_index=%d, pad=0x%llx/%llx, error=%d",
|
||||
__entry->opcode, __entry->flags, __entry->ioprio,
|
||||
(unsigned long long)__entry->off,
|
||||
(unsigned long long) __entry->addr, __entry->len,
|
||||
__entry->op_flags, (unsigned long long) __entry->user_data,
|
||||
__entry->buf_index, __entry->personality, __entry->file_index,
|
||||
(unsigned long long) __entry->pad1,
|
||||
(unsigned long long) __entry->pad2, __entry->error)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_IO_URING_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
|
@ -158,6 +158,7 @@ enum {
|
||||
#define IORING_TIMEOUT_BOOTTIME (1U << 2)
|
||||
#define IORING_TIMEOUT_REALTIME (1U << 3)
|
||||
#define IORING_LINK_TIMEOUT_UPDATE (1U << 4)
|
||||
#define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5)
|
||||
#define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME)
|
||||
#define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE)
|
||||
/*
|
||||
|
Loading…
x
Reference in New Issue
Block a user