diff --git a/block/bsg.c b/block/bsg.c index 8a05a404ae70..a57046de2f07 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -655,6 +655,9 @@ bsg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) dprintk("%s: write %Zd bytes\n", bd->name, count); + if (unlikely(segment_eq(get_fs(), KERNEL_DS))) + return -EINVAL; + bsg_set_block(bd, file); bytes_written = 0; diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 070332eb41f3..dbe5b4b95df0 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -581,6 +581,9 @@ sg_write(struct file *filp, const char __user *buf, size_t count, loff_t * ppos) sg_io_hdr_t *hp; unsigned char cmnd[SG_MAX_CDB_SIZE]; + if (unlikely(segment_eq(get_fs(), KERNEL_DS))) + return -EINVAL; + if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp))) return -ENXIO; SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp, diff --git a/fs/aio.c b/fs/aio.c index 8edf253484af..8c79e1a53af9 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1367,6 +1367,39 @@ out: return ret; } +#ifdef CONFIG_COMPAT +COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_events, u32 __user *, ctx32p) +{ + struct kioctx *ioctx = NULL; + unsigned long ctx; + long ret; + + ret = get_user(ctx, ctx32p); + if (unlikely(ret)) + goto out; + + ret = -EINVAL; + if (unlikely(ctx || nr_events == 0)) { + pr_debug("EINVAL: ctx %lu nr_events %u\n", + ctx, nr_events); + goto out; + } + + ioctx = ioctx_alloc(nr_events); + ret = PTR_ERR(ioctx); + if (!IS_ERR(ioctx)) { + /* truncating is ok because it's a user address */ + ret = put_user((u32)ioctx->user_id, ctx32p); + if (ret) + kill_ioctx(current->mm, ioctx, NULL); + percpu_ref_put(&ioctx->users); + } + +out: + return ret; +} +#endif + /* sys_io_destroy: * Destroy the aio_context specified. May cancel any outstanding * AIOs and block on completion. Will fail with -ENOSYS if not @@ -1591,8 +1624,8 @@ out_put_req: return ret; } -long do_io_submit(aio_context_t ctx_id, long nr, - struct iocb __user *__user *iocbpp, bool compat) +static long do_io_submit(aio_context_t ctx_id, long nr, + struct iocb __user *__user *iocbpp, bool compat) { struct kioctx *ctx; long ret = 0; @@ -1662,6 +1695,44 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, return do_io_submit(ctx_id, nr, iocbpp, 0); } +#ifdef CONFIG_COMPAT +static inline long +copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64) +{ + compat_uptr_t uptr; + int i; + + for (i = 0; i < nr; ++i) { + if (get_user(uptr, ptr32 + i)) + return -EFAULT; + if (put_user(compat_ptr(uptr), ptr64 + i)) + return -EFAULT; + } + return 0; +} + +#define MAX_AIO_SUBMITS (PAGE_SIZE/sizeof(struct iocb *)) + +COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id, + int, nr, u32 __user *, iocb) +{ + struct iocb __user * __user *iocb64; + long ret; + + if (unlikely(nr < 0)) + return -EINVAL; + + if (nr > MAX_AIO_SUBMITS) + nr = MAX_AIO_SUBMITS; + + iocb64 = compat_alloc_user_space(nr * sizeof(*iocb64)); + ret = copy_iocb(nr, iocb, iocb64); + if (!ret) + ret = do_io_submit(ctx_id, nr, iocb64, 1); + return ret; +} +#endif + /* lookup_kiocb * Finds a given iocb for cancellation. */ @@ -1761,3 +1832,25 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id, } return ret; } + +#ifdef CONFIG_COMPAT +COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id, + compat_long_t, min_nr, + compat_long_t, nr, + struct io_event __user *, events, + struct compat_timespec __user *, timeout) +{ + struct timespec t; + struct timespec __user *ut = NULL; + + if (timeout) { + if (compat_get_timespec(&t, timeout)) + return -EFAULT; + + ut = compat_alloc_user_space(sizeof(*ut)); + if (copy_to_user(ut, &t, sizeof(t))) + return -EFAULT; + } + return sys_io_getevents(ctx_id, min_nr, nr, events, ut); +} +#endif diff --git a/fs/compat.c b/fs/compat.c index 543b48c29ac3..3f4908c28698 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -487,45 +487,6 @@ COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, return compat_sys_fcntl64(fd, cmd, arg); } -COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_reqs, u32 __user *, ctx32p) -{ - long ret; - aio_context_t ctx64; - - mm_segment_t oldfs = get_fs(); - if (unlikely(get_user(ctx64, ctx32p))) - return -EFAULT; - - set_fs(KERNEL_DS); - /* The __user pointer cast is valid because of the set_fs() */ - ret = sys_io_setup(nr_reqs, (aio_context_t __user *) &ctx64); - set_fs(oldfs); - /* truncating is ok because it's a user address */ - if (!ret) - ret = put_user((u32) ctx64, ctx32p); - return ret; -} - -COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id, - compat_long_t, min_nr, - compat_long_t, nr, - struct io_event __user *, events, - struct compat_timespec __user *, timeout) -{ - struct timespec t; - struct timespec __user *ut = NULL; - - if (timeout) { - if (compat_get_timespec(&t, timeout)) - return -EFAULT; - - ut = compat_alloc_user_space(sizeof(*ut)); - if (copy_to_user(ut, &t, sizeof(t)) ) - return -EFAULT; - } - return sys_io_getevents(ctx_id, min_nr, nr, events, ut); -} - /* A write operation does a read from user space and vice versa */ #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) @@ -602,42 +563,6 @@ out: return ret; } -static inline long -copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64) -{ - compat_uptr_t uptr; - int i; - - for (i = 0; i < nr; ++i) { - if (get_user(uptr, ptr32 + i)) - return -EFAULT; - if (put_user(compat_ptr(uptr), ptr64 + i)) - return -EFAULT; - } - return 0; -} - -#define MAX_AIO_SUBMITS (PAGE_SIZE/sizeof(struct iocb *)) - -COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id, - int, nr, u32 __user *, iocb) -{ - struct iocb __user * __user *iocb64; - long ret; - - if (unlikely(nr < 0)) - return -EINVAL; - - if (nr > MAX_AIO_SUBMITS) - nr = MAX_AIO_SUBMITS; - - iocb64 = compat_alloc_user_space(nr * sizeof(*iocb64)); - ret = copy_iocb(nr, iocb, iocb64); - if (!ret) - ret = do_io_submit(ctx_id, nr, iocb64, 1); - return ret; -} - struct compat_ncp_mount_data { compat_int_t version; compat_uint_t ncp_fd; diff --git a/fs/exec.c b/fs/exec.c index 8112eacf10f3..eadbf5069c38 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -19,7 +19,7 @@ * current->executable is only used by the procfs. This allows a dispatch * table to check for several different types of binary formats. We keep * trying until we recognize the file or we run out of supported binary - * formats. + * formats. */ #include @@ -1268,6 +1268,13 @@ int flush_old_exec(struct linux_binprm * bprm) flush_thread(); current->personality &= ~bprm->per_clear; + /* + * We have to apply CLOEXEC before we change whether the process is + * dumpable (in setup_new_exec) to avoid a race with a process in userspace + * trying to access the should-be-closed file descriptors of a process + * undergoing exec(2). + */ + do_close_on_exec(current->files); return 0; out: @@ -1330,7 +1337,6 @@ void setup_new_exec(struct linux_binprm * bprm) group */ current->self_exec_id++; flush_signal_handlers(current, 0); - do_close_on_exec(current->files); } EXPORT_SYMBOL(setup_new_exec); diff --git a/fs/namespace.c b/fs/namespace.c index f7e28f8ea04d..b5b1259e064f 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -96,10 +96,6 @@ static inline struct hlist_head *mp_hash(struct dentry *dentry) return &mountpoint_hashtable[tmp & mp_hash_mask]; } -/* - * allocation is serialized by namespace_sem, but we need the spinlock to - * serialize with freeing. - */ static int mnt_alloc_id(struct mount *mnt) { int res; @@ -1034,6 +1030,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, if (IS_MNT_SLAVE(old)) list_add(&mnt->mnt_slave, &old->mnt_slave); mnt->mnt_master = old->mnt_master; + } else { + CLEAR_MNT_SHARED(mnt); } if (flag & CL_MAKE_SHARED) set_mnt_shared(mnt); @@ -1828,9 +1826,7 @@ struct vfsmount *clone_private_mount(const struct path *path) if (IS_MNT_UNBINDABLE(old_mnt)) return ERR_PTR(-EINVAL); - down_read(&namespace_sem); new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE); - up_read(&namespace_sem); if (IS_ERR(new_mnt)) return ERR_CAST(new_mnt); diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index d171d2c53f7f..f8933cb53d68 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4834,7 +4834,7 @@ int ocfs2_reflink_remap_range(struct file *file_in, ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out, &len, is_dedupe); - if (ret || len == 0) + if (ret <= 0) goto out_unlock; /* Lock out changes to the allocation maps and remap. */ diff --git a/fs/pnode.c b/fs/pnode.c index 234a9ac49958..06a793f4ae38 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -67,49 +67,47 @@ int get_dominating_id(struct mount *mnt, const struct path *root) static int do_make_slave(struct mount *mnt) { - struct mount *peer_mnt = mnt, *master = mnt->mnt_master; - struct mount *slave_mnt; + struct mount *master, *slave_mnt; - /* - * slave 'mnt' to a peer mount that has the - * same root dentry. If none is available then - * slave it to anything that is available. - */ - while ((peer_mnt = next_peer(peer_mnt)) != mnt && - peer_mnt->mnt.mnt_root != mnt->mnt.mnt_root) ; - - if (peer_mnt == mnt) { - peer_mnt = next_peer(mnt); - if (peer_mnt == mnt) - peer_mnt = NULL; - } - if (mnt->mnt_group_id && IS_MNT_SHARED(mnt) && - list_empty(&mnt->mnt_share)) - mnt_release_group_id(mnt); - - list_del_init(&mnt->mnt_share); - mnt->mnt_group_id = 0; - - if (peer_mnt) - master = peer_mnt; - - if (master) { - list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave) - slave_mnt->mnt_master = master; - list_move(&mnt->mnt_slave, &master->mnt_slave_list); - list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev); - INIT_LIST_HEAD(&mnt->mnt_slave_list); - } else { - struct list_head *p = &mnt->mnt_slave_list; - while (!list_empty(p)) { - slave_mnt = list_first_entry(p, - struct mount, mnt_slave); - list_del_init(&slave_mnt->mnt_slave); - slave_mnt->mnt_master = NULL; + if (list_empty(&mnt->mnt_share)) { + if (IS_MNT_SHARED(mnt)) { + mnt_release_group_id(mnt); + CLEAR_MNT_SHARED(mnt); } + master = mnt->mnt_master; + if (!master) { + struct list_head *p = &mnt->mnt_slave_list; + while (!list_empty(p)) { + slave_mnt = list_first_entry(p, + struct mount, mnt_slave); + list_del_init(&slave_mnt->mnt_slave); + slave_mnt->mnt_master = NULL; + } + return 0; + } + } else { + struct mount *m; + /* + * slave 'mnt' to a peer mount that has the + * same root dentry. If none is available then + * slave it to anything that is available. + */ + for (m = master = next_peer(mnt); m != mnt; m = next_peer(m)) { + if (m->mnt.mnt_root == mnt->mnt.mnt_root) { + master = m; + break; + } + } + list_del_init(&mnt->mnt_share); + mnt->mnt_group_id = 0; + CLEAR_MNT_SHARED(mnt); } + list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave) + slave_mnt->mnt_master = master; + list_move(&mnt->mnt_slave, &master->mnt_slave_list); + list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev); + INIT_LIST_HEAD(&mnt->mnt_slave_list); mnt->mnt_master = master; - CLEAR_MNT_SHARED(mnt); return 0; } diff --git a/fs/read_write.c b/fs/read_write.c index da6de12b5c46..7537b6b6b5a2 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1669,6 +1669,9 @@ static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write) * Check that the two inodes are eligible for cloning, the ranges make * sense, and then flush all dirty data. Caller must ensure that the * inodes have been locked against any other modifications. + * + * Returns: 0 for "nothing to clone", 1 for "something to clone", or + * the usual negative error code. */ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, struct inode *inode_out, loff_t pos_out, @@ -1695,17 +1698,15 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, /* Are we going all the way to the end? */ isize = i_size_read(inode_in); - if (isize == 0) { - *len = 0; + if (isize == 0) return 0; - } /* Zero length dedupe exits immediately; reflink goes to EOF. */ if (*len == 0) { - if (is_dedupe) { - *len = 0; + if (is_dedupe || pos_in == isize) return 0; - } + if (pos_in > isize) + return -EINVAL; *len = isize - pos_in; } @@ -1769,7 +1770,7 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, return -EBADE; } - return 0; + return 1; } EXPORT_SYMBOL(vfs_clone_file_prep_inodes); @@ -1955,6 +1956,9 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) goto out; ret = 0; + if (off + len > i_size_read(src)) + return -EINVAL; + /* pre-format output fields to sane values */ for (i = 0; i < count; i++) { same->info[i].bytes_deduped = 0ULL; diff --git a/fs/seq_file.c b/fs/seq_file.c index 368bfb92b115..a11f271800ef 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -190,6 +190,13 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) */ m->version = file->f_version; + /* + * if request is to read from zero offset, reset iterator to first + * record as it might have been already advanced by previous requests + */ + if (*ppos == 0) + m->index = 0; + /* Don't assume *ppos is where we left it */ if (unlikely(*ppos != m->read_pos)) { while ((err = traverse(m, *ppos)) == -EAGAIN) diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 45ceb94e89e4..1bc0bd6a9848 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -1191,7 +1191,7 @@ out: return err; } -void ufs_truncate_blocks(struct inode *inode) +static void ufs_truncate_blocks(struct inode *inode) { if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index aca2d4bd4303..07593a362cd0 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1161,7 +1161,7 @@ xfs_reflink_remap_range( ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out, &len, is_dedupe); - if (ret || len == 0) + if (ret <= 0) goto out_unlock; trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); diff --git a/include/linux/aio.h b/include/linux/aio.h index 9eb42dbc5582..fdd0a343f455 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -14,14 +14,9 @@ typedef int (kiocb_cancel_fn)(struct kiocb *); /* prototypes */ #ifdef CONFIG_AIO extern void exit_aio(struct mm_struct *mm); -extern long do_io_submit(aio_context_t ctx_id, long nr, - struct iocb __user *__user *iocbpp, bool compat); void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel); #else static inline void exit_aio(struct mm_struct *mm) { } -static inline long do_io_submit(aio_context_t ctx_id, long nr, - struct iocb __user * __user *iocbpp, - bool compat) { return 0; } static inline void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel) { } #endif /* CONFIG_AIO */ diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 635482e60ca3..8acef8576ce9 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -150,6 +150,9 @@ cond_syscall(sys_io_destroy); cond_syscall(sys_io_submit); cond_syscall(sys_io_cancel); cond_syscall(sys_io_getevents); +cond_syscall(compat_sys_io_setup); +cond_syscall(compat_sys_io_submit); +cond_syscall(compat_sys_io_getevents); cond_syscall(sys_sysfs); cond_syscall(sys_syslog); cond_syscall(sys_process_vm_readv); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 228892dabba6..25f572303801 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -73,19 +73,21 @@ } #define iterate_all_kinds(i, n, v, I, B, K) { \ - size_t skip = i->iov_offset; \ - if (unlikely(i->type & ITER_BVEC)) { \ - struct bio_vec v; \ - struct bvec_iter __bi; \ - iterate_bvec(i, n, v, __bi, skip, (B)) \ - } else if (unlikely(i->type & ITER_KVEC)) { \ - const struct kvec *kvec; \ - struct kvec v; \ - iterate_kvec(i, n, v, kvec, skip, (K)) \ - } else { \ - const struct iovec *iov; \ - struct iovec v; \ - iterate_iovec(i, n, v, iov, skip, (I)) \ + if (likely(n)) { \ + size_t skip = i->iov_offset; \ + if (unlikely(i->type & ITER_BVEC)) { \ + struct bio_vec v; \ + struct bvec_iter __bi; \ + iterate_bvec(i, n, v, __bi, skip, (B)) \ + } else if (unlikely(i->type & ITER_KVEC)) { \ + const struct kvec *kvec; \ + struct kvec v; \ + iterate_kvec(i, n, v, kvec, skip, (K)) \ + } else { \ + const struct iovec *iov; \ + struct iovec v; \ + iterate_iovec(i, n, v, iov, skip, (I)) \ + } \ } \ } @@ -576,7 +578,7 @@ bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) WARN_ON(1); return false; } - if (unlikely(i->count < bytes)) \ + if (unlikely(i->count < bytes)) return false; iterate_all_kinds(i, bytes, v, ({ @@ -620,7 +622,7 @@ bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) WARN_ON(1); return false; } - if (unlikely(i->count < bytes)) \ + if (unlikely(i->count < bytes)) return false; iterate_all_kinds(i, bytes, v, ({ if (__copy_from_user_nocache((to += v.iov_len) - v.iov_len, @@ -837,11 +839,8 @@ unsigned long iov_iter_alignment(const struct iov_iter *i) unsigned long res = 0; size_t size = i->count; - if (!size) - return 0; - if (unlikely(i->type & ITER_PIPE)) { - if (i->iov_offset && allocated(&i->pipe->bufs[i->idx])) + if (size && i->iov_offset && allocated(&i->pipe->bufs[i->idx])) return size | i->iov_offset; return size; } @@ -856,10 +855,8 @@ EXPORT_SYMBOL(iov_iter_alignment); unsigned long iov_iter_gap_alignment(const struct iov_iter *i) { - unsigned long res = 0; + unsigned long res = 0; size_t size = i->count; - if (!size) - return 0; if (unlikely(i->type & ITER_PIPE)) { WARN_ON(1); @@ -874,7 +871,7 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i) (res |= (!res ? 0 : (unsigned long)v.iov_base) | (size != v.iov_len ? size : 0)) ); - return res; + return res; } EXPORT_SYMBOL(iov_iter_gap_alignment); @@ -908,6 +905,9 @@ static ssize_t pipe_get_pages(struct iov_iter *i, size_t capacity; int idx; + if (!maxsize) + return 0; + if (!sanity(i)) return -EFAULT; @@ -926,9 +926,6 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, if (maxsize > i->count) maxsize = i->count; - if (!maxsize) - return 0; - if (unlikely(i->type & ITER_PIPE)) return pipe_get_pages(i, pages, maxsize, maxpages, start); iterate_all_kinds(i, maxsize, v, ({ @@ -975,6 +972,9 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i, int idx; int npages; + if (!maxsize) + return 0; + if (!sanity(i)) return -EFAULT; @@ -1006,9 +1006,6 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, if (maxsize > i->count) maxsize = i->count; - if (!maxsize) - return 0; - if (unlikely(i->type & ITER_PIPE)) return pipe_get_pages_alloc(i, pages, maxsize, start); iterate_all_kinds(i, maxsize, v, ({