io_uring-5.10-2020-10-20
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAl+O9WEQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpqajEADSD5PiO94YWTtVNWFUjn5RW+GlCE70/0VV XImdasmvM8nb48E+z2EW0Ky4vKXeVy5r+WZAeIYqPUHy/ogQDVpEn00NL7tFQmOz 8UYrlZ3LLE/bSeWM5iavgG7TldVs/ZfspJ0hj3/Ac7jJpzuRGEI5TClsxJ0mWV39 b2qT4OYBDhwvwVPZ/qhWgEwJXJFzFywckouIbMw8gPkveebUYeyu/yuScNGwYuiQ 46YPEk/XIuOy8iUvQjqhLY+NNlAKJwt3z9WZgt5F+TZIhkpp7z6h20+jezFQcuFP GXzIDN+EADpsbw7MWJYIZVffxDEMlDpkJlAVMT1hsYLDfTXoEzmFwRddoFh2Fjf6 ghWqhOKffUuAOX2xs1MrS2xLaxd0ot7QqZJVTYk7zEljkaRANlstSZZ+PpI+Sad/ rNieQvs6jnsmTODDEaV3qyFX5aBQ2NdvyndZNU9wz0GZAWAdz+wxE0A1FVD0A37i p6m8sIvhNg3/cW89G04IDYUkAygi8knVDnEDHRwaJtswZQ4pRSGMp+N4qZ0GpnK7 BviaAhofGaYlqruavO6Ug2YyomYpWGlUxTaB9ZKh0HkEDlDM945+0sgQRdxfsE8d OboycqJn3puOl/wh5Fc4oGYrWLsDbaA/5kksC4lm85Z+HUf+UXMS4QFdoPJYjhuM H6oMz1w2bQ== =v56S -----END PGP SIGNATURE----- Merge tag 'io_uring-5.10-2020-10-20' of git://git.kernel.dk/linux-block Pull io_uring updates from Jens Axboe: "A mix of fixes and a few stragglers. In detail: - Revert the bogus __read_mostly that we discussed for the initial pull request. - Fix a merge window regression with fixed file registration error path handling. - Fix io-wq numa node affinities. - Series abstracting out an io_identity struct, making it both easier to see what the personality items are, and also easier to to adopt more. Use this to cover audit logging. - Fix for read-ahead disabled block condition in async buffered reads, and using single page read-ahead to unify what generic_file_buffer_read() path is used. - Series for REQ_F_COMP_LOCKED fix and removal of it (Pavel) - Poll fix (Pavel)" * tag 'io_uring-5.10-2020-10-20' of git://git.kernel.dk/linux-block: (21 commits) io_uring: use blk_queue_nowait() to check if NOWAIT supported mm: use limited read-ahead to satisfy read mm: mark async iocb read as NOWAIT once some data has been copied io_uring: fix double poll mask init io-wq: inherit audit loginuid and sessionid io_uring: use percpu counters to track inflight requests io_uring: assign new io_identity for task if members have changed io_uring: store io_identity in io_uring_task io_uring: COW io_identity on mismatch io_uring: move io identity items into separate struct io_uring: rely solely on work flags to determine personality. io_uring: pass required context in as flags io-wq: assign NUMA node locality if appropriate io_uring: fix error path cleanup in io_sqe_files_register() Revert "io_uring: mark io_uring_fops/io_op_defs as __read_mostly" io_uring: fix REQ_F_COMP_LOCKED by killing it io_uring: dig out COMP_LOCK from deep call chain io_uring: don't put a poll req under spinlock io_uring: don't unnecessarily clear F_LINK_TIMEOUT io_uring: don't set COMP_LOCKED if won't put ...
This commit is contained in:
commit
4962a85696
51
fs/io-wq.c
51
fs/io-wq.c
@ -18,6 +18,7 @@
|
||||
#include <linux/fs_struct.h>
|
||||
#include <linux/task_work.h>
|
||||
#include <linux/blk-cgroup.h>
|
||||
#include <linux/audit.h>
|
||||
|
||||
#include "io-wq.h"
|
||||
|
||||
@ -429,14 +430,10 @@ static void io_wq_switch_mm(struct io_worker *worker, struct io_wq_work *work)
|
||||
mmput(worker->mm);
|
||||
worker->mm = NULL;
|
||||
}
|
||||
if (!work->mm)
|
||||
return;
|
||||
|
||||
if (mmget_not_zero(work->mm)) {
|
||||
kthread_use_mm(work->mm);
|
||||
worker->mm = work->mm;
|
||||
/* hang on to this mm */
|
||||
work->mm = NULL;
|
||||
if (mmget_not_zero(work->identity->mm)) {
|
||||
kthread_use_mm(work->identity->mm);
|
||||
worker->mm = work->identity->mm;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -448,9 +445,11 @@ static inline void io_wq_switch_blkcg(struct io_worker *worker,
|
||||
struct io_wq_work *work)
|
||||
{
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
if (work->blkcg_css != worker->blkcg_css) {
|
||||
kthread_associate_blkcg(work->blkcg_css);
|
||||
worker->blkcg_css = work->blkcg_css;
|
||||
if (!(work->flags & IO_WQ_WORK_BLKCG))
|
||||
return;
|
||||
if (work->identity->blkcg_css != worker->blkcg_css) {
|
||||
kthread_associate_blkcg(work->identity->blkcg_css);
|
||||
worker->blkcg_css = work->identity->blkcg_css;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@ -458,9 +457,9 @@ static inline void io_wq_switch_blkcg(struct io_worker *worker,
|
||||
static void io_wq_switch_creds(struct io_worker *worker,
|
||||
struct io_wq_work *work)
|
||||
{
|
||||
const struct cred *old_creds = override_creds(work->creds);
|
||||
const struct cred *old_creds = override_creds(work->identity->creds);
|
||||
|
||||
worker->cur_creds = work->creds;
|
||||
worker->cur_creds = work->identity->creds;
|
||||
if (worker->saved_creds)
|
||||
put_cred(old_creds); /* creds set by previous switch */
|
||||
else
|
||||
@ -470,20 +469,26 @@ static void io_wq_switch_creds(struct io_worker *worker,
|
||||
static void io_impersonate_work(struct io_worker *worker,
|
||||
struct io_wq_work *work)
|
||||
{
|
||||
if (work->files && current->files != work->files) {
|
||||
if ((work->flags & IO_WQ_WORK_FILES) &&
|
||||
current->files != work->identity->files) {
|
||||
task_lock(current);
|
||||
current->files = work->files;
|
||||
current->nsproxy = work->nsproxy;
|
||||
current->files = work->identity->files;
|
||||
current->nsproxy = work->identity->nsproxy;
|
||||
task_unlock(current);
|
||||
}
|
||||
if (work->fs && current->fs != work->fs)
|
||||
current->fs = work->fs;
|
||||
if (work->mm != worker->mm)
|
||||
if ((work->flags & IO_WQ_WORK_FS) && current->fs != work->identity->fs)
|
||||
current->fs = work->identity->fs;
|
||||
if ((work->flags & IO_WQ_WORK_MM) && work->identity->mm != worker->mm)
|
||||
io_wq_switch_mm(worker, work);
|
||||
if (worker->cur_creds != work->creds)
|
||||
if ((work->flags & IO_WQ_WORK_CREDS) &&
|
||||
worker->cur_creds != work->identity->creds)
|
||||
io_wq_switch_creds(worker, work);
|
||||
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = work->fsize;
|
||||
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = work->identity->fsize;
|
||||
io_wq_switch_blkcg(worker, work);
|
||||
#ifdef CONFIG_AUDIT
|
||||
current->loginuid = work->identity->loginuid;
|
||||
current->sessionid = work->identity->sessionid;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void io_assign_current_work(struct io_worker *worker,
|
||||
@ -496,6 +501,11 @@ static void io_assign_current_work(struct io_worker *worker,
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_AUDIT
|
||||
current->loginuid = KUIDT_INIT(AUDIT_UID_UNSET);
|
||||
current->sessionid = AUDIT_SID_UNSET;
|
||||
#endif
|
||||
|
||||
spin_lock_irq(&worker->lock);
|
||||
worker->cur_work = work;
|
||||
spin_unlock_irq(&worker->lock);
|
||||
@ -676,6 +686,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
|
||||
kfree(worker);
|
||||
return false;
|
||||
}
|
||||
kthread_bind_mask(worker->task, cpumask_of_node(wqe->node));
|
||||
|
||||
raw_spin_lock_irq(&wqe->lock);
|
||||
hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
|
||||
|
18
fs/io-wq.h
18
fs/io-wq.h
@ -1,6 +1,8 @@
|
||||
#ifndef INTERNAL_IO_WQ_H
|
||||
#define INTERNAL_IO_WQ_H
|
||||
|
||||
#include <linux/io_uring.h>
|
||||
|
||||
struct io_wq;
|
||||
|
||||
enum {
|
||||
@ -10,6 +12,12 @@ enum {
|
||||
IO_WQ_WORK_NO_CANCEL = 8,
|
||||
IO_WQ_WORK_CONCURRENT = 16,
|
||||
|
||||
IO_WQ_WORK_FILES = 32,
|
||||
IO_WQ_WORK_FS = 64,
|
||||
IO_WQ_WORK_MM = 128,
|
||||
IO_WQ_WORK_CREDS = 256,
|
||||
IO_WQ_WORK_BLKCG = 512,
|
||||
|
||||
IO_WQ_HASH_SHIFT = 24, /* upper 8 bits are used for hash key */
|
||||
};
|
||||
|
||||
@ -85,15 +93,7 @@ static inline void wq_list_del(struct io_wq_work_list *list,
|
||||
|
||||
struct io_wq_work {
|
||||
struct io_wq_work_node list;
|
||||
struct files_struct *files;
|
||||
struct mm_struct *mm;
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
struct cgroup_subsys_state *blkcg_css;
|
||||
#endif
|
||||
const struct cred *creds;
|
||||
struct nsproxy *nsproxy;
|
||||
struct fs_struct *fs;
|
||||
unsigned long fsize;
|
||||
struct io_identity *identity;
|
||||
unsigned flags;
|
||||
};
|
||||
|
||||
|
670
fs/io_uring.c
670
fs/io_uring.c
File diff suppressed because it is too large
Load Diff
@ -1268,6 +1268,10 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
|
||||
kuid_t kloginuid;
|
||||
int rv;
|
||||
|
||||
/* Don't let kthreads write their own loginuid */
|
||||
if (current->flags & PF_KTHREAD)
|
||||
return -EPERM;
|
||||
|
||||
rcu_read_lock();
|
||||
if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) {
|
||||
rcu_read_unlock();
|
||||
|
@ -4,18 +4,33 @@
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/xarray.h>
|
||||
#include <linux/percpu-refcount.h>
|
||||
|
||||
struct io_identity {
|
||||
struct files_struct *files;
|
||||
struct mm_struct *mm;
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
struct cgroup_subsys_state *blkcg_css;
|
||||
#endif
|
||||
const struct cred *creds;
|
||||
struct nsproxy *nsproxy;
|
||||
struct fs_struct *fs;
|
||||
unsigned long fsize;
|
||||
#ifdef CONFIG_AUDIT
|
||||
kuid_t loginuid;
|
||||
unsigned int sessionid;
|
||||
#endif
|
||||
refcount_t count;
|
||||
};
|
||||
|
||||
struct io_uring_task {
|
||||
/* submission side */
|
||||
struct xarray xa;
|
||||
struct wait_queue_head wait;
|
||||
struct file *last;
|
||||
atomic_long_t req_issue;
|
||||
|
||||
/* completion side */
|
||||
bool in_idle ____cacheline_aligned_in_smp;
|
||||
atomic_long_t req_complete;
|
||||
struct percpu_counter inflight;
|
||||
struct io_identity __identity;
|
||||
struct io_identity *identity;
|
||||
bool in_idle;
|
||||
};
|
||||
|
||||
#if defined(CONFIG_IO_URING)
|
||||
|
@ -2199,6 +2199,14 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
|
||||
last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
|
||||
offset = *ppos & ~PAGE_MASK;
|
||||
|
||||
/*
|
||||
* If we've already successfully copied some data, then we
|
||||
* can no longer safely return -EIOCBQUEUED. Hence mark
|
||||
* an async read NOWAIT at that point.
|
||||
*/
|
||||
if (written && (iocb->ki_flags & IOCB_WAITQ))
|
||||
iocb->ki_flags |= IOCB_NOWAIT;
|
||||
|
||||
for (;;) {
|
||||
struct page *page;
|
||||
pgoff_t end_index;
|
||||
|
@ -552,15 +552,23 @@ readit:
|
||||
void page_cache_sync_ra(struct readahead_control *ractl,
|
||||
struct file_ra_state *ra, unsigned long req_count)
|
||||
{
|
||||
/* no read-ahead */
|
||||
if (!ra->ra_pages)
|
||||
return;
|
||||
bool do_forced_ra = ractl->file && (ractl->file->f_mode & FMODE_RANDOM);
|
||||
|
||||
if (blk_cgroup_congested())
|
||||
return;
|
||||
/*
|
||||
* Even if read-ahead is disabled, issue this request as read-ahead
|
||||
* as we'll need it to satisfy the requested range. The forced
|
||||
* read-ahead will do the right thing and limit the read to just the
|
||||
* requested range, which we'll set to 1 page for this case.
|
||||
*/
|
||||
if (!ra->ra_pages || blk_cgroup_congested()) {
|
||||
if (!ractl->file)
|
||||
return;
|
||||
req_count = 1;
|
||||
do_forced_ra = true;
|
||||
}
|
||||
|
||||
/* be dumb */
|
||||
if (ractl->file && (ractl->file->f_mode & FMODE_RANDOM)) {
|
||||
if (do_forced_ra) {
|
||||
force_page_cache_ra(ractl, ra, req_count);
|
||||
return;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user