vfs-6.8.misc
-----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCZZUxRQAKCRCRxhvAZXjc ov/QAQDzvge3oQ9MEymmOiyzzcF+HhAXBr+9oEsYJjFc1p0TsgEA61gXjZo7F1jY KBqd6znOZCR+Waj0kIVJRAo/ISRBqQc= =0bRl -----END PGP SIGNATURE----- Merge tag 'vfs-6.8.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs Pull misc vfs updates from Christian Brauner: "This contains the usual miscellaneous features, cleanups, and fixes for vfs and individual fses. Features: - Add Jan Kara as VFS reviewer - Show correct device and inode numbers in proc/<pid>/maps for vma files on stacked filesystems. This is now easily doable thanks to the backing file work from the last cycles. This comes with selftests Cleanups: - Remove a redundant might_sleep() from wait_on_inode() - Initialize pointer with NULL, not 0 - Clarify comment on access_override_creds() - Rework and simplify eventfd_signal() and eventfd_signal_mask() helpers - Process aio completions in batches to avoid needless wakeups - Completely decouple struct mnt_idmap from namespaces. We now only keep the actual idmapping around and don't stash references to namespaces - Reformat maintainer entries to indicate that a given subsystem belongs to fs/ - Simplify fput() for files that were never opened - Get rid of various pointless file helpers - Rename various file helpers - Rename struct file members after SLAB_TYPESAFE_BY_RCU switch from last cycle - Make relatime_need_update() return bool - Use GFP_KERNEL instead of GFP_USER when allocating superblocks - Replace deprecated ida_simple_*() calls with their current ida_*() counterparts Fixes: - Fix comments on user namespace id mapping helpers. They aren't kernel doc comments so they shouldn't be using /** - s/Retuns/Returns/g in various places - Add missing parameter documentation on can_move_mount_beneath() - Rename i_mapping->private_data to i_mapping->i_private_data - Fix a false-positive lockdep warning in pipe_write() for watch queues - Improve __fget_files_rcu() code generation to improve performance - Only notify writer that pipe resizing has finished after setting pipe->max_usage otherwise writers are never notified that the pipe has been resized and hang - Fix some kernel docs in hfsplus - s/passs/pass/g in various places - Fix kernel docs in ntfs - Fix kcalloc() arguments order reported by gcc 14 - Fix uninitialized value in reiserfs" * tag 'vfs-6.8.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (36 commits) reiserfs: fix uninit-value in comp_keys watch_queue: fix kcalloc() arguments order ntfs: dir.c: fix kernel-doc function parameter warnings fs: fix doc comment typo fs tree wide selftests/overlayfs: verify device and inode numbers in /proc/pid/maps fs/proc: show correct device and inode numbers in /proc/pid/maps eventfd: Remove usage of the deprecated ida_simple_xx() API fs: super: use GFP_KERNEL instead of GFP_USER for super block allocation fs/hfsplus: wrapper.c: fix kernel-doc warnings fs: add Jan Kara as reviewer fs/inode: Make relatime_need_update return bool pipe: wakeup wr_wait after setting max_usage file: remove __receive_fd() file: stop exposing receive_fd_user() fs: replace f_rcuhead with f_task_work file: remove pointless wrapper file: s/close_fd_get_file()/file_close_fd()/g Improve __fget_files_rcu() code generation (and thus __fget_light()) file: massage cleanup of files that failed to open fs/pipe: Fix lockdep false-positive in watchqueue pipe_write() ...
This commit is contained in:
commit
c604110e66
21
MAINTAINERS
21
MAINTAINERS
@ -8103,6 +8103,7 @@ F: include/trace/events/fs_dax.h
|
||||
FILESYSTEMS (VFS and infrastructure)
|
||||
M: Alexander Viro <viro@zeniv.linux.org.uk>
|
||||
M: Christian Brauner <brauner@kernel.org>
|
||||
R: Jan Kara <jack@suse.cz>
|
||||
L: linux-fsdevel@vger.kernel.org
|
||||
S: Maintained
|
||||
F: fs/*
|
||||
@ -8123,6 +8124,16 @@ F: fs/exportfs/
|
||||
F: fs/fhandle.c
|
||||
F: include/linux/exportfs.h
|
||||
|
||||
FILESYSTEMS [IDMAPPED MOUNTS]
|
||||
M: Christian Brauner <brauner@kernel.org>
|
||||
M: Seth Forshee <sforshee@kernel.org>
|
||||
L: linux-fsdevel@vger.kernel.org
|
||||
S: Maintained
|
||||
F: Documentation/filesystems/idmappings.rst
|
||||
F: fs/mnt_idmapping.c
|
||||
F: include/linux/mnt_idmapping.*
|
||||
F: tools/testing/selftests/mount_setattr/
|
||||
|
||||
FILESYSTEMS [IOMAP]
|
||||
M: Christian Brauner <brauner@kernel.org>
|
||||
R: Darrick J. Wong <djwong@kernel.org>
|
||||
@ -10202,16 +10213,6 @@ S: Maintained
|
||||
W: https://github.com/o2genum/ideapad-slidebar
|
||||
F: drivers/input/misc/ideapad_slidebar.c
|
||||
|
||||
IDMAPPED MOUNTS
|
||||
M: Christian Brauner <brauner@kernel.org>
|
||||
M: Seth Forshee <sforshee@kernel.org>
|
||||
L: linux-fsdevel@vger.kernel.org
|
||||
S: Maintained
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/vfs/idmapping.git
|
||||
F: Documentation/filesystems/idmappings.rst
|
||||
F: include/linux/mnt_idmapping.*
|
||||
F: tools/testing/selftests/mount_setattr/
|
||||
|
||||
IDT VersaClock 5 CLOCK DRIVER
|
||||
M: Luca Ceresoli <luca@lucaceresoli.net>
|
||||
S: Maintained
|
||||
|
@ -2388,7 +2388,7 @@ static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *h
|
||||
if (!eventfd)
|
||||
return HV_STATUS_INVALID_PORT_ID;
|
||||
|
||||
eventfd_signal(eventfd, 1);
|
||||
eventfd_signal(eventfd);
|
||||
return HV_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -2088,7 +2088,7 @@ static bool kvm_xen_hcall_evtchn_send(struct kvm_vcpu *vcpu, u64 param, u64 *r)
|
||||
if (ret < 0 && ret != -ENOTCONN)
|
||||
return false;
|
||||
} else {
|
||||
eventfd_signal(evtchnfd->deliver.eventfd.ctx, 1);
|
||||
eventfd_signal(evtchnfd->deliver.eventfd.ctx);
|
||||
}
|
||||
|
||||
*r = 0;
|
||||
|
@ -2044,7 +2044,7 @@ static void hl_notifier_event_send(struct hl_notifier_event *notifier_event, u64
|
||||
notifier_event->events_mask |= event_mask;
|
||||
|
||||
if (notifier_event->eventfd)
|
||||
eventfd_signal(notifier_event->eventfd, 1);
|
||||
eventfd_signal(notifier_event->eventfd);
|
||||
|
||||
mutex_unlock(¬ifier_event->lock);
|
||||
}
|
||||
|
@ -1921,7 +1921,7 @@ static void binder_deferred_fd_close(int fd)
|
||||
if (!twcb)
|
||||
return;
|
||||
init_task_work(&twcb->twork, binder_do_fd_close);
|
||||
twcb->file = close_fd_get_file(fd);
|
||||
twcb->file = file_close_fd(fd);
|
||||
if (twcb->file) {
|
||||
// pin it until binder_do_fd_close(); see comments there
|
||||
get_file(twcb->file);
|
||||
|
@ -1872,7 +1872,7 @@ static irqreturn_t dfl_irq_handler(int irq, void *arg)
|
||||
{
|
||||
struct eventfd_ctx *trigger = arg;
|
||||
|
||||
eventfd_signal(trigger, 1);
|
||||
eventfd_signal(trigger);
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
|
@ -1365,7 +1365,7 @@ static void syncobj_eventfd_entry_fence_func(struct dma_fence *fence,
|
||||
struct syncobj_eventfd_entry *entry =
|
||||
container_of(cb, struct syncobj_eventfd_entry, fence_cb);
|
||||
|
||||
eventfd_signal(entry->ev_fd_ctx, 1);
|
||||
eventfd_signal(entry->ev_fd_ctx);
|
||||
syncobj_eventfd_entry_free(entry);
|
||||
}
|
||||
|
||||
@ -1388,13 +1388,13 @@ syncobj_eventfd_entry_func(struct drm_syncobj *syncobj,
|
||||
entry->fence = fence;
|
||||
|
||||
if (entry->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE) {
|
||||
eventfd_signal(entry->ev_fd_ctx, 1);
|
||||
eventfd_signal(entry->ev_fd_ctx);
|
||||
syncobj_eventfd_entry_free(entry);
|
||||
} else {
|
||||
ret = dma_fence_add_callback(fence, &entry->fence_cb,
|
||||
syncobj_eventfd_entry_fence_func);
|
||||
if (ret == -ENOENT) {
|
||||
eventfd_signal(entry->ev_fd_ctx, 1);
|
||||
eventfd_signal(entry->ev_fd_ctx);
|
||||
syncobj_eventfd_entry_free(entry);
|
||||
}
|
||||
}
|
||||
|
@ -422,7 +422,7 @@ static void init_irq_map(struct intel_gvt_irq *irq)
|
||||
#define MSI_CAP_DATA(offset) (offset + 8)
|
||||
#define MSI_CAP_EN 0x1
|
||||
|
||||
static int inject_virtual_interrupt(struct intel_vgpu *vgpu)
|
||||
static void inject_virtual_interrupt(struct intel_vgpu *vgpu)
|
||||
{
|
||||
unsigned long offset = vgpu->gvt->device_info.msi_cap_offset;
|
||||
u16 control, data;
|
||||
@ -434,10 +434,10 @@ static int inject_virtual_interrupt(struct intel_vgpu *vgpu)
|
||||
|
||||
/* Do not generate MSI if MSIEN is disabled */
|
||||
if (!(control & MSI_CAP_EN))
|
||||
return 0;
|
||||
return;
|
||||
|
||||
if (WARN(control & GENMASK(15, 1), "only support one MSI format\n"))
|
||||
return -EINVAL;
|
||||
return;
|
||||
|
||||
trace_inject_msi(vgpu->id, addr, data);
|
||||
|
||||
@ -451,10 +451,9 @@ static int inject_virtual_interrupt(struct intel_vgpu *vgpu)
|
||||
* returned and don't inject interrupt into guest.
|
||||
*/
|
||||
if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
|
||||
return -ESRCH;
|
||||
if (vgpu->msi_trigger && eventfd_signal(vgpu->msi_trigger, 1) != 1)
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
return;
|
||||
if (vgpu->msi_trigger)
|
||||
eventfd_signal(vgpu->msi_trigger);
|
||||
}
|
||||
|
||||
static void propagate_event(struct intel_gvt_irq *irq,
|
||||
|
@ -2498,7 +2498,7 @@ static void dispatch_event_fd(struct list_head *fd_list,
|
||||
|
||||
list_for_each_entry_rcu(item, fd_list, xa_list) {
|
||||
if (item->eventfd)
|
||||
eventfd_signal(item->eventfd, 1);
|
||||
eventfd_signal(item->eventfd);
|
||||
else
|
||||
deliver_event(item, data);
|
||||
}
|
||||
|
@ -184,7 +184,7 @@ static irqreturn_t irq_handler(void *private)
|
||||
{
|
||||
struct eventfd_ctx *ev_ctx = private;
|
||||
|
||||
eventfd_signal(ev_ctx, 1);
|
||||
eventfd_signal(ev_ctx);
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
|
@ -115,7 +115,7 @@ static ssize_t vfio_ccw_crw_region_read(struct vfio_ccw_private *private,
|
||||
|
||||
/* Notify the guest if more CRWs are on our queue */
|
||||
if (!list_empty(&private->crw) && private->crw_trigger)
|
||||
eventfd_signal(private->crw_trigger, 1);
|
||||
eventfd_signal(private->crw_trigger);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -112,7 +112,7 @@ void vfio_ccw_sch_io_todo(struct work_struct *work)
|
||||
private->state = VFIO_CCW_STATE_IDLE;
|
||||
|
||||
if (private->io_trigger)
|
||||
eventfd_signal(private->io_trigger, 1);
|
||||
eventfd_signal(private->io_trigger);
|
||||
}
|
||||
|
||||
void vfio_ccw_crw_todo(struct work_struct *work)
|
||||
@ -122,7 +122,7 @@ void vfio_ccw_crw_todo(struct work_struct *work)
|
||||
private = container_of(work, struct vfio_ccw_private, crw_work);
|
||||
|
||||
if (!list_empty(&private->crw) && private->crw_trigger)
|
||||
eventfd_signal(private->crw_trigger, 1);
|
||||
eventfd_signal(private->crw_trigger);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -421,7 +421,7 @@ static int vfio_ccw_mdev_set_irqs(struct vfio_ccw_private *private,
|
||||
case VFIO_IRQ_SET_DATA_NONE:
|
||||
{
|
||||
if (*ctx)
|
||||
eventfd_signal(*ctx, 1);
|
||||
eventfd_signal(*ctx);
|
||||
return 0;
|
||||
}
|
||||
case VFIO_IRQ_SET_DATA_BOOL:
|
||||
@ -432,7 +432,7 @@ static int vfio_ccw_mdev_set_irqs(struct vfio_ccw_private *private,
|
||||
return -EFAULT;
|
||||
|
||||
if (trigger && *ctx)
|
||||
eventfd_signal(*ctx, 1);
|
||||
eventfd_signal(*ctx);
|
||||
return 0;
|
||||
}
|
||||
case VFIO_IRQ_SET_DATA_EVENTFD:
|
||||
@ -612,7 +612,7 @@ static void vfio_ccw_mdev_request(struct vfio_device *vdev, unsigned int count)
|
||||
"Relaying device request to user (#%u)\n",
|
||||
count);
|
||||
|
||||
eventfd_signal(private->req_trigger, 1);
|
||||
eventfd_signal(private->req_trigger);
|
||||
} else if (count == 0) {
|
||||
dev_notice(dev,
|
||||
"No device request channel registered, blocked until released by user\n");
|
||||
|
@ -1794,7 +1794,7 @@ static void vfio_ap_mdev_request(struct vfio_device *vdev, unsigned int count)
|
||||
"Relaying device request to user (#%u)\n",
|
||||
count);
|
||||
|
||||
eventfd_signal(matrix_mdev->req_trigger, 1);
|
||||
eventfd_signal(matrix_mdev->req_trigger);
|
||||
} else if (count == 0) {
|
||||
dev_notice(dev,
|
||||
"No device request registered, blocked until released by user\n");
|
||||
|
@ -831,7 +831,7 @@ static void ffs_user_copy_worker(struct work_struct *work)
|
||||
io_data->kiocb->ki_complete(io_data->kiocb, ret);
|
||||
|
||||
if (io_data->ffs->ffs_eventfd && !kiocb_has_eventfd)
|
||||
eventfd_signal(io_data->ffs->ffs_eventfd, 1);
|
||||
eventfd_signal(io_data->ffs->ffs_eventfd);
|
||||
|
||||
if (io_data->read)
|
||||
kfree(io_data->to_free);
|
||||
@ -2738,7 +2738,7 @@ static void __ffs_event_add(struct ffs_data *ffs,
|
||||
ffs->ev.types[ffs->ev.count++] = type;
|
||||
wake_up_locked(&ffs->ev.waitq);
|
||||
if (ffs->ffs_eventfd)
|
||||
eventfd_signal(ffs->ffs_eventfd, 1);
|
||||
eventfd_signal(ffs->ffs_eventfd);
|
||||
}
|
||||
|
||||
static void ffs_event_add(struct ffs_data *ffs,
|
||||
|
@ -493,7 +493,7 @@ static void vduse_vq_kick(struct vduse_virtqueue *vq)
|
||||
goto unlock;
|
||||
|
||||
if (vq->kickfd)
|
||||
eventfd_signal(vq->kickfd, 1);
|
||||
eventfd_signal(vq->kickfd);
|
||||
else
|
||||
vq->kicked = true;
|
||||
unlock:
|
||||
@ -911,7 +911,7 @@ static int vduse_kickfd_setup(struct vduse_dev *dev,
|
||||
eventfd_ctx_put(vq->kickfd);
|
||||
vq->kickfd = ctx;
|
||||
if (vq->ready && vq->kicked && vq->kickfd) {
|
||||
eventfd_signal(vq->kickfd, 1);
|
||||
eventfd_signal(vq->kickfd);
|
||||
vq->kicked = false;
|
||||
}
|
||||
spin_unlock(&vq->kick_lock);
|
||||
@ -960,7 +960,7 @@ static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq)
|
||||
|
||||
spin_lock_irq(&vq->irq_lock);
|
||||
if (vq->ready && vq->cb.trigger) {
|
||||
eventfd_signal(vq->cb.trigger, 1);
|
||||
eventfd_signal(vq->cb.trigger);
|
||||
signal = true;
|
||||
}
|
||||
spin_unlock_irq(&vq->irq_lock);
|
||||
@ -1157,7 +1157,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
|
||||
fput(f);
|
||||
break;
|
||||
}
|
||||
ret = receive_fd(f, perm_to_file_flags(entry.perm));
|
||||
ret = receive_fd(f, NULL, perm_to_file_flags(entry.perm));
|
||||
fput(f);
|
||||
break;
|
||||
}
|
||||
|
@ -54,7 +54,7 @@ static irqreturn_t vfio_fsl_mc_irq_handler(int irq_num, void *arg)
|
||||
{
|
||||
struct vfio_fsl_mc_irq *mc_irq = (struct vfio_fsl_mc_irq *)arg;
|
||||
|
||||
eventfd_signal(mc_irq->trigger, 1);
|
||||
eventfd_signal(mc_irq->trigger);
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
|
@ -443,7 +443,7 @@ static int vfio_pci_core_runtime_resume(struct device *dev)
|
||||
*/
|
||||
down_write(&vdev->memory_lock);
|
||||
if (vdev->pm_wake_eventfd_ctx) {
|
||||
eventfd_signal(vdev->pm_wake_eventfd_ctx, 1);
|
||||
eventfd_signal(vdev->pm_wake_eventfd_ctx);
|
||||
__vfio_pci_runtime_pm_exit(vdev);
|
||||
}
|
||||
up_write(&vdev->memory_lock);
|
||||
@ -1883,7 +1883,7 @@ void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count)
|
||||
pci_notice_ratelimited(pdev,
|
||||
"Relaying device request to user (#%u)\n",
|
||||
count);
|
||||
eventfd_signal(vdev->req_trigger, 1);
|
||||
eventfd_signal(vdev->req_trigger);
|
||||
} else if (count == 0) {
|
||||
pci_warn(pdev,
|
||||
"No device request channel registered, blocked until released by user\n");
|
||||
@ -2302,7 +2302,7 @@ pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev,
|
||||
mutex_lock(&vdev->igate);
|
||||
|
||||
if (vdev->err_trigger)
|
||||
eventfd_signal(vdev->err_trigger, 1);
|
||||
eventfd_signal(vdev->err_trigger);
|
||||
|
||||
mutex_unlock(&vdev->igate);
|
||||
|
||||
|
@ -94,7 +94,7 @@ static void vfio_send_intx_eventfd(void *opaque, void *unused)
|
||||
ctx = vfio_irq_ctx_get(vdev, 0);
|
||||
if (WARN_ON_ONCE(!ctx))
|
||||
return;
|
||||
eventfd_signal(ctx->trigger, 1);
|
||||
eventfd_signal(ctx->trigger);
|
||||
}
|
||||
}
|
||||
|
||||
@ -342,7 +342,7 @@ static irqreturn_t vfio_msihandler(int irq, void *arg)
|
||||
{
|
||||
struct eventfd_ctx *trigger = arg;
|
||||
|
||||
eventfd_signal(trigger, 1);
|
||||
eventfd_signal(trigger);
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
@ -689,11 +689,11 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev,
|
||||
if (!ctx)
|
||||
continue;
|
||||
if (flags & VFIO_IRQ_SET_DATA_NONE) {
|
||||
eventfd_signal(ctx->trigger, 1);
|
||||
eventfd_signal(ctx->trigger);
|
||||
} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
|
||||
uint8_t *bools = data;
|
||||
if (bools[i - start])
|
||||
eventfd_signal(ctx->trigger, 1);
|
||||
eventfd_signal(ctx->trigger);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
@ -707,7 +707,7 @@ static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx,
|
||||
if (flags & VFIO_IRQ_SET_DATA_NONE) {
|
||||
if (*ctx) {
|
||||
if (count) {
|
||||
eventfd_signal(*ctx, 1);
|
||||
eventfd_signal(*ctx);
|
||||
} else {
|
||||
eventfd_ctx_put(*ctx);
|
||||
*ctx = NULL;
|
||||
@ -722,7 +722,7 @@ static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx,
|
||||
|
||||
trigger = *(uint8_t *)data;
|
||||
if (trigger && *ctx)
|
||||
eventfd_signal(*ctx, 1);
|
||||
eventfd_signal(*ctx);
|
||||
|
||||
return 0;
|
||||
} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
|
||||
|
@ -155,7 +155,7 @@ static irqreturn_t vfio_automasked_irq_handler(int irq, void *dev_id)
|
||||
spin_unlock_irqrestore(&irq_ctx->lock, flags);
|
||||
|
||||
if (ret == IRQ_HANDLED)
|
||||
eventfd_signal(irq_ctx->trigger, 1);
|
||||
eventfd_signal(irq_ctx->trigger);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -164,7 +164,7 @@ static irqreturn_t vfio_irq_handler(int irq, void *dev_id)
|
||||
{
|
||||
struct vfio_platform_irq *irq_ctx = dev_id;
|
||||
|
||||
eventfd_signal(irq_ctx->trigger, 1);
|
||||
eventfd_signal(irq_ctx->trigger);
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
@ -178,7 +178,7 @@ static irqreturn_t vhost_vdpa_virtqueue_cb(void *private)
|
||||
struct eventfd_ctx *call_ctx = vq->call_ctx.ctx;
|
||||
|
||||
if (call_ctx)
|
||||
eventfd_signal(call_ctx, 1);
|
||||
eventfd_signal(call_ctx);
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
@ -189,7 +189,7 @@ static irqreturn_t vhost_vdpa_config_cb(void *private)
|
||||
struct eventfd_ctx *config_ctx = v->config_ctx;
|
||||
|
||||
if (config_ctx)
|
||||
eventfd_signal(config_ctx, 1);
|
||||
eventfd_signal(config_ctx);
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
@ -2248,7 +2248,7 @@ int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
|
||||
len -= l;
|
||||
if (!len) {
|
||||
if (vq->log_ctx)
|
||||
eventfd_signal(vq->log_ctx, 1);
|
||||
eventfd_signal(vq->log_ctx);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@ -2271,7 +2271,7 @@ static int vhost_update_used_flags(struct vhost_virtqueue *vq)
|
||||
log_used(vq, (used - (void __user *)vq->used),
|
||||
sizeof vq->used->flags);
|
||||
if (vq->log_ctx)
|
||||
eventfd_signal(vq->log_ctx, 1);
|
||||
eventfd_signal(vq->log_ctx);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -2289,7 +2289,7 @@ static int vhost_update_avail_event(struct vhost_virtqueue *vq)
|
||||
log_used(vq, (used - (void __user *)vq->used),
|
||||
sizeof *vhost_avail_event(vq));
|
||||
if (vq->log_ctx)
|
||||
eventfd_signal(vq->log_ctx, 1);
|
||||
eventfd_signal(vq->log_ctx);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -2715,7 +2715,7 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
|
||||
log_used(vq, offsetof(struct vring_used, idx),
|
||||
sizeof vq->used->idx);
|
||||
if (vq->log_ctx)
|
||||
eventfd_signal(vq->log_ctx, 1);
|
||||
eventfd_signal(vq->log_ctx);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
@ -2763,7 +2763,7 @@ void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
|
||||
{
|
||||
/* Signal the Guest tell them we used something up. */
|
||||
if (vq->call_ctx.ctx && vhost_notify(dev, vq))
|
||||
eventfd_signal(vq->call_ctx.ctx, 1);
|
||||
eventfd_signal(vq->call_ctx.ctx);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vhost_signal);
|
||||
|
||||
|
@ -249,7 +249,7 @@ void vhost_iotlb_map_free(struct vhost_iotlb *iotlb,
|
||||
#define vq_err(vq, fmt, ...) do { \
|
||||
pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \
|
||||
if ((vq)->error_ctx) \
|
||||
eventfd_signal((vq)->error_ctx, 1);\
|
||||
eventfd_signal((vq)->error_ctx);\
|
||||
} while (0)
|
||||
|
||||
enum {
|
||||
|
@ -223,7 +223,7 @@ static int acrn_ioeventfd_handler(struct acrn_ioreq_client *client,
|
||||
mutex_lock(&client->vm->ioeventfds_lock);
|
||||
p = hsm_ioeventfd_match(client->vm, addr, val, size, req->type);
|
||||
if (p)
|
||||
eventfd_signal(p->eventfd, 1);
|
||||
eventfd_signal(p->eventfd);
|
||||
mutex_unlock(&client->vm->ioeventfds_lock);
|
||||
|
||||
return 0;
|
||||
|
@ -1147,7 +1147,7 @@ static irqreturn_t ioeventfd_interrupt(int irq, void *dev_id)
|
||||
if (ioreq->addr == kioeventfd->addr + VIRTIO_MMIO_QUEUE_NOTIFY &&
|
||||
ioreq->size == kioeventfd->addr_len &&
|
||||
(ioreq->data & QUEUE_NOTIFY_VQ_MASK) == kioeventfd->vq) {
|
||||
eventfd_signal(kioeventfd->eventfd, 1);
|
||||
eventfd_signal(kioeventfd->eventfd);
|
||||
state = STATE_IORESP_READY;
|
||||
break;
|
||||
}
|
||||
|
85
fs/aio.c
85
fs/aio.c
@ -266,7 +266,7 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
|
||||
return ERR_CAST(inode);
|
||||
|
||||
inode->i_mapping->a_ops = &aio_ctx_aops;
|
||||
inode->i_mapping->private_data = ctx;
|
||||
inode->i_mapping->i_private_data = ctx;
|
||||
inode->i_size = PAGE_SIZE * nr_pages;
|
||||
|
||||
file = alloc_file_pseudo(inode, aio_mnt, "[aio]",
|
||||
@ -316,10 +316,10 @@ static void put_aio_ring_file(struct kioctx *ctx)
|
||||
|
||||
/* Prevent further access to the kioctx from migratepages */
|
||||
i_mapping = aio_ring_file->f_mapping;
|
||||
spin_lock(&i_mapping->private_lock);
|
||||
i_mapping->private_data = NULL;
|
||||
spin_lock(&i_mapping->i_private_lock);
|
||||
i_mapping->i_private_data = NULL;
|
||||
ctx->aio_ring_file = NULL;
|
||||
spin_unlock(&i_mapping->private_lock);
|
||||
spin_unlock(&i_mapping->i_private_lock);
|
||||
|
||||
fput(aio_ring_file);
|
||||
}
|
||||
@ -422,9 +422,9 @@ static int aio_migrate_folio(struct address_space *mapping, struct folio *dst,
|
||||
|
||||
rc = 0;
|
||||
|
||||
/* mapping->private_lock here protects against the kioctx teardown. */
|
||||
spin_lock(&mapping->private_lock);
|
||||
ctx = mapping->private_data;
|
||||
/* mapping->i_private_lock here protects against the kioctx teardown. */
|
||||
spin_lock(&mapping->i_private_lock);
|
||||
ctx = mapping->i_private_data;
|
||||
if (!ctx) {
|
||||
rc = -EINVAL;
|
||||
goto out;
|
||||
@ -476,7 +476,7 @@ static int aio_migrate_folio(struct address_space *mapping, struct folio *dst,
|
||||
out_unlock:
|
||||
mutex_unlock(&ctx->ring_lock);
|
||||
out:
|
||||
spin_unlock(&mapping->private_lock);
|
||||
spin_unlock(&mapping->i_private_lock);
|
||||
return rc;
|
||||
}
|
||||
#else
|
||||
@ -1106,6 +1106,11 @@ static inline void iocb_destroy(struct aio_kiocb *iocb)
|
||||
kmem_cache_free(kiocb_cachep, iocb);
|
||||
}
|
||||
|
||||
struct aio_waiter {
|
||||
struct wait_queue_entry w;
|
||||
size_t min_nr;
|
||||
};
|
||||
|
||||
/* aio_complete
|
||||
* Called when the io request on the given iocb is complete.
|
||||
*/
|
||||
@ -1114,7 +1119,7 @@ static void aio_complete(struct aio_kiocb *iocb)
|
||||
struct kioctx *ctx = iocb->ki_ctx;
|
||||
struct aio_ring *ring;
|
||||
struct io_event *ev_page, *event;
|
||||
unsigned tail, pos, head;
|
||||
unsigned tail, pos, head, avail;
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
@ -1156,6 +1161,10 @@ static void aio_complete(struct aio_kiocb *iocb)
|
||||
ctx->completed_events++;
|
||||
if (ctx->completed_events > 1)
|
||||
refill_reqs_available(ctx, head, tail);
|
||||
|
||||
avail = tail > head
|
||||
? tail - head
|
||||
: tail + ctx->nr_events - head;
|
||||
spin_unlock_irqrestore(&ctx->completion_lock, flags);
|
||||
|
||||
pr_debug("added to ring %p at [%u]\n", iocb, tail);
|
||||
@ -1166,7 +1175,7 @@ static void aio_complete(struct aio_kiocb *iocb)
|
||||
* from IRQ context.
|
||||
*/
|
||||
if (iocb->ki_eventfd)
|
||||
eventfd_signal(iocb->ki_eventfd, 1);
|
||||
eventfd_signal(iocb->ki_eventfd);
|
||||
|
||||
/*
|
||||
* We have to order our ring_info tail store above and test
|
||||
@ -1176,8 +1185,18 @@ static void aio_complete(struct aio_kiocb *iocb)
|
||||
*/
|
||||
smp_mb();
|
||||
|
||||
if (waitqueue_active(&ctx->wait))
|
||||
wake_up(&ctx->wait);
|
||||
if (waitqueue_active(&ctx->wait)) {
|
||||
struct aio_waiter *curr, *next;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&ctx->wait.lock, flags);
|
||||
list_for_each_entry_safe(curr, next, &ctx->wait.head, w.entry)
|
||||
if (avail >= curr->min_nr) {
|
||||
list_del_init_careful(&curr->w.entry);
|
||||
wake_up_process(curr->w.private);
|
||||
}
|
||||
spin_unlock_irqrestore(&ctx->wait.lock, flags);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void iocb_put(struct aio_kiocb *iocb)
|
||||
@ -1290,7 +1309,9 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
|
||||
struct io_event __user *event,
|
||||
ktime_t until)
|
||||
{
|
||||
long ret = 0;
|
||||
struct hrtimer_sleeper t;
|
||||
struct aio_waiter w;
|
||||
long ret = 0, ret2 = 0;
|
||||
|
||||
/*
|
||||
* Note that aio_read_events() is being called as the conditional - i.e.
|
||||
@ -1306,12 +1327,38 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
|
||||
* the ringbuffer empty. So in practice we should be ok, but it's
|
||||
* something to be aware of when touching this code.
|
||||
*/
|
||||
if (until == 0)
|
||||
aio_read_events(ctx, min_nr, nr, event, &ret);
|
||||
else
|
||||
wait_event_interruptible_hrtimeout(ctx->wait,
|
||||
aio_read_events(ctx, min_nr, nr, event, &ret),
|
||||
until);
|
||||
aio_read_events(ctx, min_nr, nr, event, &ret);
|
||||
if (until == 0 || ret < 0 || ret >= min_nr)
|
||||
return ret;
|
||||
|
||||
hrtimer_init_sleeper_on_stack(&t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
||||
if (until != KTIME_MAX) {
|
||||
hrtimer_set_expires_range_ns(&t.timer, until, current->timer_slack_ns);
|
||||
hrtimer_sleeper_start_expires(&t, HRTIMER_MODE_REL);
|
||||
}
|
||||
|
||||
init_wait(&w.w);
|
||||
|
||||
while (1) {
|
||||
unsigned long nr_got = ret;
|
||||
|
||||
w.min_nr = min_nr - ret;
|
||||
|
||||
ret2 = prepare_to_wait_event(&ctx->wait, &w.w, TASK_INTERRUPTIBLE);
|
||||
if (!ret2 && !t.task)
|
||||
ret2 = -ETIME;
|
||||
|
||||
if (aio_read_events(ctx, min_nr, nr, event, &ret) || ret2)
|
||||
break;
|
||||
|
||||
if (nr_got == ret)
|
||||
schedule();
|
||||
}
|
||||
|
||||
finish_wait(&ctx->wait, &w.w);
|
||||
hrtimer_cancel(&t.timer);
|
||||
destroy_hrtimer_on_stack(&t.timer);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -157,7 +157,7 @@ static bool chgrp_ok(struct mnt_idmap *idmap,
|
||||
* the vfsmount must be passed through @idmap. This function will then
|
||||
* take care to map the inode according to @idmap before checking
|
||||
* permissions. On non-idmapped mounts or if permission checking is to be
|
||||
* performed on the raw inode simply passs @nop_mnt_idmap.
|
||||
* performed on the raw inode simply pass @nop_mnt_idmap.
|
||||
*
|
||||
* Should be called as the first thing in ->setattr implementations,
|
||||
* possibly after taking additional locks.
|
||||
|
@ -875,7 +875,7 @@ static int attach_extent_buffer_page(struct extent_buffer *eb,
|
||||
* will not race with any other ebs.
|
||||
*/
|
||||
if (page->mapping)
|
||||
lockdep_assert_held(&page->mapping->private_lock);
|
||||
lockdep_assert_held(&page->mapping->i_private_lock);
|
||||
|
||||
if (fs_info->nodesize >= PAGE_SIZE) {
|
||||
if (!PagePrivate(page))
|
||||
@ -1741,16 +1741,16 @@ static int submit_eb_subpage(struct page *page, struct writeback_control *wbc)
|
||||
* Take private lock to ensure the subpage won't be detached
|
||||
* in the meantime.
|
||||
*/
|
||||
spin_lock(&page->mapping->private_lock);
|
||||
spin_lock(&page->mapping->i_private_lock);
|
||||
if (!PagePrivate(page)) {
|
||||
spin_unlock(&page->mapping->private_lock);
|
||||
spin_unlock(&page->mapping->i_private_lock);
|
||||
break;
|
||||
}
|
||||
spin_lock_irqsave(&subpage->lock, flags);
|
||||
if (!test_bit(bit_start + fs_info->subpage_info->dirty_offset,
|
||||
subpage->bitmaps)) {
|
||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||
spin_unlock(&page->mapping->private_lock);
|
||||
spin_unlock(&page->mapping->i_private_lock);
|
||||
bit_start++;
|
||||
continue;
|
||||
}
|
||||
@ -1764,7 +1764,7 @@ static int submit_eb_subpage(struct page *page, struct writeback_control *wbc)
|
||||
*/
|
||||
eb = find_extent_buffer_nolock(fs_info, start);
|
||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||
spin_unlock(&page->mapping->private_lock);
|
||||
spin_unlock(&page->mapping->i_private_lock);
|
||||
|
||||
/*
|
||||
* The eb has already reached 0 refs thus find_extent_buffer()
|
||||
@ -1816,9 +1816,9 @@ static int submit_eb_page(struct page *page, struct btrfs_eb_write_context *ctx)
|
||||
if (btrfs_sb(page->mapping->host->i_sb)->nodesize < PAGE_SIZE)
|
||||
return submit_eb_subpage(page, wbc);
|
||||
|
||||
spin_lock(&mapping->private_lock);
|
||||
spin_lock(&mapping->i_private_lock);
|
||||
if (!PagePrivate(page)) {
|
||||
spin_unlock(&mapping->private_lock);
|
||||
spin_unlock(&mapping->i_private_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1829,16 +1829,16 @@ static int submit_eb_page(struct page *page, struct btrfs_eb_write_context *ctx)
|
||||
* crashing the machine for something we can survive anyway.
|
||||
*/
|
||||
if (WARN_ON(!eb)) {
|
||||
spin_unlock(&mapping->private_lock);
|
||||
spin_unlock(&mapping->i_private_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (eb == ctx->eb) {
|
||||
spin_unlock(&mapping->private_lock);
|
||||
spin_unlock(&mapping->i_private_lock);
|
||||
return 0;
|
||||
}
|
||||
ret = atomic_inc_not_zero(&eb->refs);
|
||||
spin_unlock(&mapping->private_lock);
|
||||
spin_unlock(&mapping->i_private_lock);
|
||||
if (!ret)
|
||||
return 0;
|
||||
|
||||
@ -3062,7 +3062,7 @@ static bool page_range_has_eb(struct btrfs_fs_info *fs_info, struct page *page)
|
||||
{
|
||||
struct btrfs_subpage *subpage;
|
||||
|
||||
lockdep_assert_held(&page->mapping->private_lock);
|
||||
lockdep_assert_held(&page->mapping->i_private_lock);
|
||||
|
||||
if (PagePrivate(page)) {
|
||||
subpage = (struct btrfs_subpage *)page->private;
|
||||
@ -3085,14 +3085,14 @@ static void detach_extent_buffer_page(struct extent_buffer *eb, struct page *pag
|
||||
|
||||
/*
|
||||
* For mapped eb, we're going to change the page private, which should
|
||||
* be done under the private_lock.
|
||||
* be done under the i_private_lock.
|
||||
*/
|
||||
if (mapped)
|
||||
spin_lock(&page->mapping->private_lock);
|
||||
spin_lock(&page->mapping->i_private_lock);
|
||||
|
||||
if (!PagePrivate(page)) {
|
||||
if (mapped)
|
||||
spin_unlock(&page->mapping->private_lock);
|
||||
spin_unlock(&page->mapping->i_private_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -3116,7 +3116,7 @@ static void detach_extent_buffer_page(struct extent_buffer *eb, struct page *pag
|
||||
detach_page_private(page);
|
||||
}
|
||||
if (mapped)
|
||||
spin_unlock(&page->mapping->private_lock);
|
||||
spin_unlock(&page->mapping->i_private_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -3139,7 +3139,7 @@ static void detach_extent_buffer_page(struct extent_buffer *eb, struct page *pag
|
||||
if (!page_range_has_eb(fs_info, page))
|
||||
btrfs_detach_subpage(fs_info, page);
|
||||
|
||||
spin_unlock(&page->mapping->private_lock);
|
||||
spin_unlock(&page->mapping->i_private_lock);
|
||||
}
|
||||
|
||||
/* Release all pages attached to the extent buffer */
|
||||
@ -3520,7 +3520,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
|
||||
|
||||
/*
|
||||
* Preallocate page->private for subpage case, so that we won't
|
||||
* allocate memory with private_lock nor page lock hold.
|
||||
* allocate memory with i_private_lock nor page lock hold.
|
||||
*
|
||||
* The memory will be freed by attach_extent_buffer_page() or freed
|
||||
* manually if we exit earlier.
|
||||
@ -3541,10 +3541,10 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
|
||||
goto free_eb;
|
||||
}
|
||||
|
||||
spin_lock(&mapping->private_lock);
|
||||
spin_lock(&mapping->i_private_lock);
|
||||
exists = grab_extent_buffer(fs_info, p);
|
||||
if (exists) {
|
||||
spin_unlock(&mapping->private_lock);
|
||||
spin_unlock(&mapping->i_private_lock);
|
||||
unlock_page(p);
|
||||
put_page(p);
|
||||
mark_extent_buffer_accessed(exists, p);
|
||||
@ -3564,7 +3564,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
|
||||
* Thus needs no special handling in error path.
|
||||
*/
|
||||
btrfs_page_inc_eb_refs(fs_info, p);
|
||||
spin_unlock(&mapping->private_lock);
|
||||
spin_unlock(&mapping->i_private_lock);
|
||||
|
||||
WARN_ON(btrfs_page_test_dirty(fs_info, p, eb->start, eb->len));
|
||||
eb->pages[i] = p;
|
||||
@ -4569,12 +4569,12 @@ static int try_release_subpage_extent_buffer(struct page *page)
|
||||
* Finally to check if we have cleared page private, as if we have
|
||||
* released all ebs in the page, the page private should be cleared now.
|
||||
*/
|
||||
spin_lock(&page->mapping->private_lock);
|
||||
spin_lock(&page->mapping->i_private_lock);
|
||||
if (!PagePrivate(page))
|
||||
ret = 1;
|
||||
else
|
||||
ret = 0;
|
||||
spin_unlock(&page->mapping->private_lock);
|
||||
spin_unlock(&page->mapping->i_private_lock);
|
||||
return ret;
|
||||
|
||||
}
|
||||
@ -4590,9 +4590,9 @@ int try_release_extent_buffer(struct page *page)
|
||||
* We need to make sure nobody is changing page->private, as we rely on
|
||||
* page->private as the pointer to extent buffer.
|
||||
*/
|
||||
spin_lock(&page->mapping->private_lock);
|
||||
spin_lock(&page->mapping->i_private_lock);
|
||||
if (!PagePrivate(page)) {
|
||||
spin_unlock(&page->mapping->private_lock);
|
||||
spin_unlock(&page->mapping->i_private_lock);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -4607,10 +4607,10 @@ int try_release_extent_buffer(struct page *page)
|
||||
spin_lock(&eb->refs_lock);
|
||||
if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
|
||||
spin_unlock(&eb->refs_lock);
|
||||
spin_unlock(&page->mapping->private_lock);
|
||||
spin_unlock(&page->mapping->i_private_lock);
|
||||
return 0;
|
||||
}
|
||||
spin_unlock(&page->mapping->private_lock);
|
||||
spin_unlock(&page->mapping->i_private_lock);
|
||||
|
||||
/*
|
||||
* If tree ref isn't set then we know the ref on this eb is a real ref,
|
||||
|
@ -200,7 +200,7 @@ void btrfs_page_inc_eb_refs(const struct btrfs_fs_info *fs_info,
|
||||
return;
|
||||
|
||||
ASSERT(PagePrivate(page) && page->mapping);
|
||||
lockdep_assert_held(&page->mapping->private_lock);
|
||||
lockdep_assert_held(&page->mapping->i_private_lock);
|
||||
|
||||
subpage = (struct btrfs_subpage *)page->private;
|
||||
atomic_inc(&subpage->eb_refs);
|
||||
@ -215,7 +215,7 @@ void btrfs_page_dec_eb_refs(const struct btrfs_fs_info *fs_info,
|
||||
return;
|
||||
|
||||
ASSERT(PagePrivate(page) && page->mapping);
|
||||
lockdep_assert_held(&page->mapping->private_lock);
|
||||
lockdep_assert_held(&page->mapping->i_private_lock);
|
||||
|
||||
subpage = (struct btrfs_subpage *)page->private;
|
||||
ASSERT(atomic_read(&subpage->eb_refs));
|
||||
|
108
fs/buffer.c
108
fs/buffer.c
@ -180,11 +180,11 @@ EXPORT_SYMBOL(end_buffer_write_sync);
|
||||
* Various filesystems appear to want __find_get_block to be non-blocking.
|
||||
* But it's the page lock which protects the buffers. To get around this,
|
||||
* we get exclusion from try_to_free_buffers with the blockdev mapping's
|
||||
* private_lock.
|
||||
* i_private_lock.
|
||||
*
|
||||
* Hack idea: for the blockdev mapping, private_lock contention
|
||||
* Hack idea: for the blockdev mapping, i_private_lock contention
|
||||
* may be quite high. This code could TryLock the page, and if that
|
||||
* succeeds, there is no need to take private_lock.
|
||||
* succeeds, there is no need to take i_private_lock.
|
||||
*/
|
||||
static struct buffer_head *
|
||||
__find_get_block_slow(struct block_device *bdev, sector_t block)
|
||||
@ -204,7 +204,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
|
||||
if (IS_ERR(folio))
|
||||
goto out;
|
||||
|
||||
spin_lock(&bd_mapping->private_lock);
|
||||
spin_lock(&bd_mapping->i_private_lock);
|
||||
head = folio_buffers(folio);
|
||||
if (!head)
|
||||
goto out_unlock;
|
||||
@ -236,7 +236,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
|
||||
1 << bd_inode->i_blkbits);
|
||||
}
|
||||
out_unlock:
|
||||
spin_unlock(&bd_mapping->private_lock);
|
||||
spin_unlock(&bd_mapping->i_private_lock);
|
||||
folio_put(folio);
|
||||
out:
|
||||
return ret;
|
||||
@ -467,25 +467,25 @@ EXPORT_SYMBOL(mark_buffer_async_write);
|
||||
*
|
||||
* The functions mark_buffer_inode_dirty(), fsync_inode_buffers(),
|
||||
* inode_has_buffers() and invalidate_inode_buffers() are provided for the
|
||||
* management of a list of dependent buffers at ->i_mapping->private_list.
|
||||
* management of a list of dependent buffers at ->i_mapping->i_private_list.
|
||||
*
|
||||
* Locking is a little subtle: try_to_free_buffers() will remove buffers
|
||||
* from their controlling inode's queue when they are being freed. But
|
||||
* try_to_free_buffers() will be operating against the *blockdev* mapping
|
||||
* at the time, not against the S_ISREG file which depends on those buffers.
|
||||
* So the locking for private_list is via the private_lock in the address_space
|
||||
* So the locking for i_private_list is via the i_private_lock in the address_space
|
||||
* which backs the buffers. Which is different from the address_space
|
||||
* against which the buffers are listed. So for a particular address_space,
|
||||
* mapping->private_lock does *not* protect mapping->private_list! In fact,
|
||||
* mapping->private_list will always be protected by the backing blockdev's
|
||||
* ->private_lock.
|
||||
* mapping->i_private_lock does *not* protect mapping->i_private_list! In fact,
|
||||
* mapping->i_private_list will always be protected by the backing blockdev's
|
||||
* ->i_private_lock.
|
||||
*
|
||||
* Which introduces a requirement: all buffers on an address_space's
|
||||
* ->private_list must be from the same address_space: the blockdev's.
|
||||
* ->i_private_list must be from the same address_space: the blockdev's.
|
||||
*
|
||||
* address_spaces which do not place buffers at ->private_list via these
|
||||
* utility functions are free to use private_lock and private_list for
|
||||
* whatever they want. The only requirement is that list_empty(private_list)
|
||||
* address_spaces which do not place buffers at ->i_private_list via these
|
||||
* utility functions are free to use i_private_lock and i_private_list for
|
||||
* whatever they want. The only requirement is that list_empty(i_private_list)
|
||||
* be true at clear_inode() time.
|
||||
*
|
||||
* FIXME: clear_inode should not call invalidate_inode_buffers(). The
|
||||
@ -508,7 +508,7 @@ EXPORT_SYMBOL(mark_buffer_async_write);
|
||||
*/
|
||||
|
||||
/*
|
||||
* The buffer's backing address_space's private_lock must be held
|
||||
* The buffer's backing address_space's i_private_lock must be held
|
||||
*/
|
||||
static void __remove_assoc_queue(struct buffer_head *bh)
|
||||
{
|
||||
@ -519,7 +519,7 @@ static void __remove_assoc_queue(struct buffer_head *bh)
|
||||
|
||||
int inode_has_buffers(struct inode *inode)
|
||||
{
|
||||
return !list_empty(&inode->i_data.private_list);
|
||||
return !list_empty(&inode->i_data.i_private_list);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -561,7 +561,7 @@ repeat:
|
||||
* sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers
|
||||
* @mapping: the mapping which wants those buffers written
|
||||
*
|
||||
* Starts I/O against the buffers at mapping->private_list, and waits upon
|
||||
* Starts I/O against the buffers at mapping->i_private_list, and waits upon
|
||||
* that I/O.
|
||||
*
|
||||
* Basically, this is a convenience function for fsync().
|
||||
@ -570,13 +570,13 @@ repeat:
|
||||
*/
|
||||
int sync_mapping_buffers(struct address_space *mapping)
|
||||
{
|
||||
struct address_space *buffer_mapping = mapping->private_data;
|
||||
struct address_space *buffer_mapping = mapping->i_private_data;
|
||||
|
||||
if (buffer_mapping == NULL || list_empty(&mapping->private_list))
|
||||
if (buffer_mapping == NULL || list_empty(&mapping->i_private_list))
|
||||
return 0;
|
||||
|
||||
return fsync_buffers_list(&buffer_mapping->private_lock,
|
||||
&mapping->private_list);
|
||||
return fsync_buffers_list(&buffer_mapping->i_private_lock,
|
||||
&mapping->i_private_list);
|
||||
}
|
||||
EXPORT_SYMBOL(sync_mapping_buffers);
|
||||
|
||||
@ -673,17 +673,17 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
|
||||
struct address_space *buffer_mapping = bh->b_folio->mapping;
|
||||
|
||||
mark_buffer_dirty(bh);
|
||||
if (!mapping->private_data) {
|
||||
mapping->private_data = buffer_mapping;
|
||||
if (!mapping->i_private_data) {
|
||||
mapping->i_private_data = buffer_mapping;
|
||||
} else {
|
||||
BUG_ON(mapping->private_data != buffer_mapping);
|
||||
BUG_ON(mapping->i_private_data != buffer_mapping);
|
||||
}
|
||||
if (!bh->b_assoc_map) {
|
||||
spin_lock(&buffer_mapping->private_lock);
|
||||
spin_lock(&buffer_mapping->i_private_lock);
|
||||
list_move_tail(&bh->b_assoc_buffers,
|
||||
&mapping->private_list);
|
||||
&mapping->i_private_list);
|
||||
bh->b_assoc_map = mapping;
|
||||
spin_unlock(&buffer_mapping->private_lock);
|
||||
spin_unlock(&buffer_mapping->i_private_lock);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(mark_buffer_dirty_inode);
|
||||
@ -706,7 +706,7 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
|
||||
* bit, see a bunch of clean buffers and we'd end up with dirty buffers/clean
|
||||
* page on the dirty page list.
|
||||
*
|
||||
* We use private_lock to lock against try_to_free_buffers while using the
|
||||
* We use i_private_lock to lock against try_to_free_buffers while using the
|
||||
* page's buffer list. Also use this to protect against clean buffers being
|
||||
* added to the page after it was set dirty.
|
||||
*
|
||||
@ -718,7 +718,7 @@ bool block_dirty_folio(struct address_space *mapping, struct folio *folio)
|
||||
struct buffer_head *head;
|
||||
bool newly_dirty;
|
||||
|
||||
spin_lock(&mapping->private_lock);
|
||||
spin_lock(&mapping->i_private_lock);
|
||||
head = folio_buffers(folio);
|
||||
if (head) {
|
||||
struct buffer_head *bh = head;
|
||||
@ -734,7 +734,7 @@ bool block_dirty_folio(struct address_space *mapping, struct folio *folio)
|
||||
*/
|
||||
folio_memcg_lock(folio);
|
||||
newly_dirty = !folio_test_set_dirty(folio);
|
||||
spin_unlock(&mapping->private_lock);
|
||||
spin_unlock(&mapping->i_private_lock);
|
||||
|
||||
if (newly_dirty)
|
||||
__folio_mark_dirty(folio, mapping, 1);
|
||||
@ -827,7 +827,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
|
||||
smp_mb();
|
||||
if (buffer_dirty(bh)) {
|
||||
list_add(&bh->b_assoc_buffers,
|
||||
&mapping->private_list);
|
||||
&mapping->i_private_list);
|
||||
bh->b_assoc_map = mapping;
|
||||
}
|
||||
spin_unlock(lock);
|
||||
@ -851,7 +851,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
|
||||
* probably unmounting the fs, but that doesn't mean we have already
|
||||
* done a sync(). Just drop the buffers from the inode list.
|
||||
*
|
||||
* NOTE: we take the inode's blockdev's mapping's private_lock. Which
|
||||
* NOTE: we take the inode's blockdev's mapping's i_private_lock. Which
|
||||
* assumes that all the buffers are against the blockdev. Not true
|
||||
* for reiserfs.
|
||||
*/
|
||||
@ -859,13 +859,13 @@ void invalidate_inode_buffers(struct inode *inode)
|
||||
{
|
||||
if (inode_has_buffers(inode)) {
|
||||
struct address_space *mapping = &inode->i_data;
|
||||
struct list_head *list = &mapping->private_list;
|
||||
struct address_space *buffer_mapping = mapping->private_data;
|
||||
struct list_head *list = &mapping->i_private_list;
|
||||
struct address_space *buffer_mapping = mapping->i_private_data;
|
||||
|
||||
spin_lock(&buffer_mapping->private_lock);
|
||||
spin_lock(&buffer_mapping->i_private_lock);
|
||||
while (!list_empty(list))
|
||||
__remove_assoc_queue(BH_ENTRY(list->next));
|
||||
spin_unlock(&buffer_mapping->private_lock);
|
||||
spin_unlock(&buffer_mapping->i_private_lock);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(invalidate_inode_buffers);
|
||||
@ -882,10 +882,10 @@ int remove_inode_buffers(struct inode *inode)
|
||||
|
||||
if (inode_has_buffers(inode)) {
|
||||
struct address_space *mapping = &inode->i_data;
|
||||
struct list_head *list = &mapping->private_list;
|
||||
struct address_space *buffer_mapping = mapping->private_data;
|
||||
struct list_head *list = &mapping->i_private_list;
|
||||
struct address_space *buffer_mapping = mapping->i_private_data;
|
||||
|
||||
spin_lock(&buffer_mapping->private_lock);
|
||||
spin_lock(&buffer_mapping->i_private_lock);
|
||||
while (!list_empty(list)) {
|
||||
struct buffer_head *bh = BH_ENTRY(list->next);
|
||||
if (buffer_dirty(bh)) {
|
||||
@ -894,7 +894,7 @@ int remove_inode_buffers(struct inode *inode)
|
||||
}
|
||||
__remove_assoc_queue(bh);
|
||||
}
|
||||
spin_unlock(&buffer_mapping->private_lock);
|
||||
spin_unlock(&buffer_mapping->i_private_lock);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
@ -1064,11 +1064,11 @@ grow_dev_page(struct block_device *bdev, sector_t block,
|
||||
* lock to be atomic wrt __find_get_block(), which does not
|
||||
* run under the folio lock.
|
||||
*/
|
||||
spin_lock(&inode->i_mapping->private_lock);
|
||||
spin_lock(&inode->i_mapping->i_private_lock);
|
||||
link_dev_buffers(folio, bh);
|
||||
end_block = folio_init_buffers(folio, bdev,
|
||||
(sector_t)index << sizebits, size);
|
||||
spin_unlock(&inode->i_mapping->private_lock);
|
||||
spin_unlock(&inode->i_mapping->i_private_lock);
|
||||
done:
|
||||
ret = (block < end_block) ? 1 : -ENXIO;
|
||||
failed:
|
||||
@ -1168,7 +1168,7 @@ __getblk_slow(struct block_device *bdev, sector_t block,
|
||||
* and then attach the address_space's inode to its superblock's dirty
|
||||
* inode list.
|
||||
*
|
||||
* mark_buffer_dirty() is atomic. It takes bh->b_folio->mapping->private_lock,
|
||||
* mark_buffer_dirty() is atomic. It takes bh->b_folio->mapping->i_private_lock,
|
||||
* i_pages lock and mapping->host->i_lock.
|
||||
*/
|
||||
void mark_buffer_dirty(struct buffer_head *bh)
|
||||
@ -1246,10 +1246,10 @@ void __bforget(struct buffer_head *bh)
|
||||
if (bh->b_assoc_map) {
|
||||
struct address_space *buffer_mapping = bh->b_folio->mapping;
|
||||
|
||||
spin_lock(&buffer_mapping->private_lock);
|
||||
spin_lock(&buffer_mapping->i_private_lock);
|
||||
list_del_init(&bh->b_assoc_buffers);
|
||||
bh->b_assoc_map = NULL;
|
||||
spin_unlock(&buffer_mapping->private_lock);
|
||||
spin_unlock(&buffer_mapping->i_private_lock);
|
||||
}
|
||||
__brelse(bh);
|
||||
}
|
||||
@ -1638,7 +1638,7 @@ EXPORT_SYMBOL(block_invalidate_folio);
|
||||
|
||||
/*
|
||||
* We attach and possibly dirty the buffers atomically wrt
|
||||
* block_dirty_folio() via private_lock. try_to_free_buffers
|
||||
* block_dirty_folio() via i_private_lock. try_to_free_buffers
|
||||
* is already excluded via the folio lock.
|
||||
*/
|
||||
struct buffer_head *create_empty_buffers(struct folio *folio,
|
||||
@ -1656,7 +1656,7 @@ struct buffer_head *create_empty_buffers(struct folio *folio,
|
||||
} while (bh);
|
||||
tail->b_this_page = head;
|
||||
|
||||
spin_lock(&folio->mapping->private_lock);
|
||||
spin_lock(&folio->mapping->i_private_lock);
|
||||
if (folio_test_uptodate(folio) || folio_test_dirty(folio)) {
|
||||
bh = head;
|
||||
do {
|
||||
@ -1668,7 +1668,7 @@ struct buffer_head *create_empty_buffers(struct folio *folio,
|
||||
} while (bh != head);
|
||||
}
|
||||
folio_attach_private(folio, head);
|
||||
spin_unlock(&folio->mapping->private_lock);
|
||||
spin_unlock(&folio->mapping->i_private_lock);
|
||||
|
||||
return head;
|
||||
}
|
||||
@ -1715,7 +1715,7 @@ void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
|
||||
if (!folio_buffers(folio))
|
||||
continue;
|
||||
/*
|
||||
* We use folio lock instead of bd_mapping->private_lock
|
||||
* We use folio lock instead of bd_mapping->i_private_lock
|
||||
* to pin buffers here since we can afford to sleep and
|
||||
* it scales better than a global spinlock lock.
|
||||
*/
|
||||
@ -2883,7 +2883,7 @@ EXPORT_SYMBOL(sync_dirty_buffer);
|
||||
* are unused, and releases them if so.
|
||||
*
|
||||
* Exclusion against try_to_free_buffers may be obtained by either
|
||||
* locking the folio or by holding its mapping's private_lock.
|
||||
* locking the folio or by holding its mapping's i_private_lock.
|
||||
*
|
||||
* If the folio is dirty but all the buffers are clean then we need to
|
||||
* be sure to mark the folio clean as well. This is because the folio
|
||||
@ -2894,7 +2894,7 @@ EXPORT_SYMBOL(sync_dirty_buffer);
|
||||
* The same applies to regular filesystem folios: if all the buffers are
|
||||
* clean then we set the folio clean and proceed. To do that, we require
|
||||
* total exclusion from block_dirty_folio(). That is obtained with
|
||||
* private_lock.
|
||||
* i_private_lock.
|
||||
*
|
||||
* try_to_free_buffers() is non-blocking.
|
||||
*/
|
||||
@ -2946,7 +2946,7 @@ bool try_to_free_buffers(struct folio *folio)
|
||||
goto out;
|
||||
}
|
||||
|
||||
spin_lock(&mapping->private_lock);
|
||||
spin_lock(&mapping->i_private_lock);
|
||||
ret = drop_buffers(folio, &buffers_to_free);
|
||||
|
||||
/*
|
||||
@ -2959,13 +2959,13 @@ bool try_to_free_buffers(struct folio *folio)
|
||||
* the folio's buffers clean. We discover that here and clean
|
||||
* the folio also.
|
||||
*
|
||||
* private_lock must be held over this entire operation in order
|
||||
* i_private_lock must be held over this entire operation in order
|
||||
* to synchronise against block_dirty_folio and prevent the
|
||||
* dirty bit from being lost.
|
||||
*/
|
||||
if (ret)
|
||||
folio_cancel_dirty(folio);
|
||||
spin_unlock(&mapping->private_lock);
|
||||
spin_unlock(&mapping->i_private_lock);
|
||||
out:
|
||||
if (buffers_to_free) {
|
||||
struct buffer_head *bh = buffers_to_free;
|
||||
|
2
fs/dax.c
2
fs/dax.c
@ -1128,7 +1128,7 @@ static int dax_iomap_copy_around(loff_t pos, uint64_t length, size_t align_size,
|
||||
/* zero the edges if srcmap is a HOLE or IOMAP_UNWRITTEN */
|
||||
bool zero_edge = srcmap->flags & IOMAP_F_SHARED ||
|
||||
srcmap->type == IOMAP_UNWRITTEN;
|
||||
void *saddr = 0;
|
||||
void *saddr = NULL;
|
||||
int ret = 0;
|
||||
|
||||
if (!zero_edge) {
|
||||
|
@ -1114,7 +1114,7 @@ ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
|
||||
loff_t offset = iocb->ki_pos;
|
||||
const loff_t end = offset + count;
|
||||
struct dio *dio;
|
||||
struct dio_submit sdio = { 0, };
|
||||
struct dio_submit sdio = { NULL, };
|
||||
struct buffer_head map_bh = { 0, };
|
||||
struct blk_plug plug;
|
||||
unsigned long align = offset | iov_iter_alignment(iter);
|
||||
|
46
fs/eventfd.c
46
fs/eventfd.c
@ -43,7 +43,17 @@ struct eventfd_ctx {
|
||||
int id;
|
||||
};
|
||||
|
||||
__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, __poll_t mask)
|
||||
/**
|
||||
* eventfd_signal_mask - Increment the event counter
|
||||
* @ctx: [in] Pointer to the eventfd context.
|
||||
* @mask: [in] poll mask
|
||||
*
|
||||
* This function is supposed to be called by the kernel in paths that do not
|
||||
* allow sleeping. In this function we allow the counter to reach the ULLONG_MAX
|
||||
* value, and we signal this as overflow condition by returning a EPOLLERR
|
||||
* to poll(2).
|
||||
*/
|
||||
void eventfd_signal_mask(struct eventfd_ctx *ctx, __poll_t mask)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
@ -56,45 +66,23 @@ __u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, __poll_t mask)
|
||||
* safe context.
|
||||
*/
|
||||
if (WARN_ON_ONCE(current->in_eventfd))
|
||||
return 0;
|
||||
return;
|
||||
|
||||
spin_lock_irqsave(&ctx->wqh.lock, flags);
|
||||
current->in_eventfd = 1;
|
||||
if (ULLONG_MAX - ctx->count < n)
|
||||
n = ULLONG_MAX - ctx->count;
|
||||
ctx->count += n;
|
||||
if (ctx->count < ULLONG_MAX)
|
||||
ctx->count++;
|
||||
if (waitqueue_active(&ctx->wqh))
|
||||
wake_up_locked_poll(&ctx->wqh, EPOLLIN | mask);
|
||||
current->in_eventfd = 0;
|
||||
spin_unlock_irqrestore(&ctx->wqh.lock, flags);
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
/**
|
||||
* eventfd_signal - Adds @n to the eventfd counter.
|
||||
* @ctx: [in] Pointer to the eventfd context.
|
||||
* @n: [in] Value of the counter to be added to the eventfd internal counter.
|
||||
* The value cannot be negative.
|
||||
*
|
||||
* This function is supposed to be called by the kernel in paths that do not
|
||||
* allow sleeping. In this function we allow the counter to reach the ULLONG_MAX
|
||||
* value, and we signal this as overflow condition by returning a EPOLLERR
|
||||
* to poll(2).
|
||||
*
|
||||
* Returns the amount by which the counter was incremented. This will be less
|
||||
* than @n if the counter has overflowed.
|
||||
*/
|
||||
__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
|
||||
{
|
||||
return eventfd_signal_mask(ctx, n, 0);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(eventfd_signal);
|
||||
EXPORT_SYMBOL_GPL(eventfd_signal_mask);
|
||||
|
||||
static void eventfd_free_ctx(struct eventfd_ctx *ctx)
|
||||
{
|
||||
if (ctx->id >= 0)
|
||||
ida_simple_remove(&eventfd_ida, ctx->id);
|
||||
ida_free(&eventfd_ida, ctx->id);
|
||||
kfree(ctx);
|
||||
}
|
||||
|
||||
@ -407,7 +395,7 @@ static int do_eventfd(unsigned int count, int flags)
|
||||
init_waitqueue_head(&ctx->wqh);
|
||||
ctx->count = count;
|
||||
ctx->flags = flags;
|
||||
ctx->id = ida_simple_get(&eventfd_ida, 0, 0, GFP_KERNEL);
|
||||
ctx->id = ida_alloc(&eventfd_ida, GFP_KERNEL);
|
||||
|
||||
flags &= EFD_SHARED_FCNTL_FLAGS;
|
||||
flags |= O_RDWR;
|
||||
|
@ -1261,7 +1261,7 @@ static int write_end_fn(handle_t *handle, struct inode *inode,
|
||||
* We need to pick up the new inode size which generic_commit_write gave us
|
||||
* `file' can be NULL - eg, when called from page_symlink().
|
||||
*
|
||||
* ext4 never places buffers on inode->i_mapping->private_list. metadata
|
||||
* ext4 never places buffers on inode->i_mapping->i_private_list. metadata
|
||||
* buffers are managed internally.
|
||||
*/
|
||||
static int ext4_write_end(struct file *file,
|
||||
@ -3213,7 +3213,7 @@ static bool ext4_inode_datasync_dirty(struct inode *inode)
|
||||
}
|
||||
|
||||
/* Any metadata buffers to write? */
|
||||
if (!list_empty(&inode->i_mapping->private_list))
|
||||
if (!list_empty(&inode->i_mapping->i_private_list))
|
||||
return true;
|
||||
return inode->i_state & I_DIRTY_DATASYNC;
|
||||
}
|
||||
|
99
fs/file.c
99
fs/file.c
@ -629,19 +629,23 @@ void fd_install(unsigned int fd, struct file *file)
|
||||
EXPORT_SYMBOL(fd_install);
|
||||
|
||||
/**
|
||||
* pick_file - return file associatd with fd
|
||||
* file_close_fd_locked - return file associated with fd
|
||||
* @files: file struct to retrieve file from
|
||||
* @fd: file descriptor to retrieve file for
|
||||
*
|
||||
* Doesn't take a separate reference count.
|
||||
*
|
||||
* Context: files_lock must be held.
|
||||
*
|
||||
* Returns: The file associated with @fd (NULL if @fd is not open)
|
||||
*/
|
||||
static struct file *pick_file(struct files_struct *files, unsigned fd)
|
||||
struct file *file_close_fd_locked(struct files_struct *files, unsigned fd)
|
||||
{
|
||||
struct fdtable *fdt = files_fdtable(files);
|
||||
struct file *file;
|
||||
|
||||
lockdep_assert_held(&files->file_lock);
|
||||
|
||||
if (fd >= fdt->max_fds)
|
||||
return NULL;
|
||||
|
||||
@ -660,7 +664,7 @@ int close_fd(unsigned fd)
|
||||
struct file *file;
|
||||
|
||||
spin_lock(&files->file_lock);
|
||||
file = pick_file(files, fd);
|
||||
file = file_close_fd_locked(files, fd);
|
||||
spin_unlock(&files->file_lock);
|
||||
if (!file)
|
||||
return -EBADF;
|
||||
@ -707,7 +711,7 @@ static inline void __range_close(struct files_struct *files, unsigned int fd,
|
||||
max_fd = min(max_fd, n);
|
||||
|
||||
for (; fd <= max_fd; fd++) {
|
||||
file = pick_file(files, fd);
|
||||
file = file_close_fd_locked(files, fd);
|
||||
if (file) {
|
||||
spin_unlock(&files->file_lock);
|
||||
filp_close(file, files);
|
||||
@ -795,26 +799,21 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* See close_fd_get_file() below, this variant assumes current->files->file_lock
|
||||
* is held.
|
||||
/**
|
||||
* file_close_fd - return file associated with fd
|
||||
* @fd: file descriptor to retrieve file for
|
||||
*
|
||||
* Doesn't take a separate reference count.
|
||||
*
|
||||
* Returns: The file associated with @fd (NULL if @fd is not open)
|
||||
*/
|
||||
struct file *__close_fd_get_file(unsigned int fd)
|
||||
{
|
||||
return pick_file(current->files, fd);
|
||||
}
|
||||
|
||||
/*
|
||||
* variant of close_fd that gets a ref on the file for later fput.
|
||||
* The caller must ensure that filp_close() called on the file.
|
||||
*/
|
||||
struct file *close_fd_get_file(unsigned int fd)
|
||||
struct file *file_close_fd(unsigned int fd)
|
||||
{
|
||||
struct files_struct *files = current->files;
|
||||
struct file *file;
|
||||
|
||||
spin_lock(&files->file_lock);
|
||||
file = pick_file(files, fd);
|
||||
file = file_close_fd_locked(files, fd);
|
||||
spin_unlock(&files->file_lock);
|
||||
|
||||
return file;
|
||||
@ -959,31 +958,45 @@ static inline struct file *__fget_files_rcu(struct files_struct *files,
|
||||
struct file *file;
|
||||
struct fdtable *fdt = rcu_dereference_raw(files->fdt);
|
||||
struct file __rcu **fdentry;
|
||||
unsigned long nospec_mask;
|
||||
|
||||
if (unlikely(fd >= fdt->max_fds))
|
||||
return NULL;
|
||||
|
||||
fdentry = fdt->fd + array_index_nospec(fd, fdt->max_fds);
|
||||
/* Mask is a 0 for invalid fd's, ~0 for valid ones */
|
||||
nospec_mask = array_index_mask_nospec(fd, fdt->max_fds);
|
||||
|
||||
/*
|
||||
* Ok, we have a file pointer. However, because we do
|
||||
* this all locklessly under RCU, we may be racing with
|
||||
* that file being closed.
|
||||
* fdentry points to the 'fd' offset, or fdt->fd[0].
|
||||
* Loading from fdt->fd[0] is always safe, because the
|
||||
* array always exists.
|
||||
*/
|
||||
fdentry = fdt->fd + (fd & nospec_mask);
|
||||
|
||||
/* Do the load, then mask any invalid result */
|
||||
file = rcu_dereference_raw(*fdentry);
|
||||
file = (void *)(nospec_mask & (unsigned long)file);
|
||||
if (unlikely(!file))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* Ok, we have a file pointer that was valid at
|
||||
* some point, but it might have become stale since.
|
||||
*
|
||||
* We need to confirm it by incrementing the refcount
|
||||
* and then check the lookup again.
|
||||
*
|
||||
* atomic_long_inc_not_zero() gives us a full memory
|
||||
* barrier. We only really need an 'acquire' one to
|
||||
* protect the loads below, but we don't have that.
|
||||
*/
|
||||
if (unlikely(!atomic_long_inc_not_zero(&file->f_count)))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Such a race can take two forms:
|
||||
*
|
||||
* (a) the file ref already went down to zero and the
|
||||
* file hasn't been reused yet or the file count
|
||||
* isn't zero but the file has already been reused.
|
||||
*/
|
||||
file = __get_file_rcu(fdentry);
|
||||
if (unlikely(!file))
|
||||
return NULL;
|
||||
|
||||
if (unlikely(IS_ERR(file)))
|
||||
continue;
|
||||
|
||||
/*
|
||||
*
|
||||
* (b) the file table entry has changed under us.
|
||||
* Note that we don't need to re-check the 'fdt->fd'
|
||||
* pointer having changed, because it always goes
|
||||
@ -991,7 +1004,8 @@ static inline struct file *__fget_files_rcu(struct files_struct *files,
|
||||
*
|
||||
* If so, we need to put our ref and try again.
|
||||
*/
|
||||
if (unlikely(rcu_dereference_raw(files->fdt) != fdt)) {
|
||||
if (unlikely(file != rcu_dereference_raw(*fdentry)) ||
|
||||
unlikely(rcu_dereference_raw(files->fdt) != fdt)) {
|
||||
fput(file);
|
||||
continue;
|
||||
}
|
||||
@ -1128,13 +1142,13 @@ static unsigned long __fget_light(unsigned int fd, fmode_t mask)
|
||||
* atomic_read_acquire() pairs with atomic_dec_and_test() in
|
||||
* put_files_struct().
|
||||
*/
|
||||
if (atomic_read_acquire(&files->count) == 1) {
|
||||
if (likely(atomic_read_acquire(&files->count) == 1)) {
|
||||
file = files_lookup_fd_raw(files, fd);
|
||||
if (!file || unlikely(file->f_mode & mask))
|
||||
return 0;
|
||||
return (unsigned long)file;
|
||||
} else {
|
||||
file = __fget(fd, mask);
|
||||
file = __fget_files(files, fd, mask);
|
||||
if (!file)
|
||||
return 0;
|
||||
return FDPUT_FPUT | (unsigned long)file;
|
||||
@ -1282,7 +1296,7 @@ out_unlock:
|
||||
}
|
||||
|
||||
/**
|
||||
* __receive_fd() - Install received file into file descriptor table
|
||||
* receive_fd() - Install received file into file descriptor table
|
||||
* @file: struct file that was received from another process
|
||||
* @ufd: __user pointer to write new fd number to
|
||||
* @o_flags: the O_* flags to apply to the new fd entry
|
||||
@ -1296,7 +1310,7 @@ out_unlock:
|
||||
*
|
||||
* Returns newly install fd or -ve on error.
|
||||
*/
|
||||
int __receive_fd(struct file *file, int __user *ufd, unsigned int o_flags)
|
||||
int receive_fd(struct file *file, int __user *ufd, unsigned int o_flags)
|
||||
{
|
||||
int new_fd;
|
||||
int error;
|
||||
@ -1321,6 +1335,7 @@ int __receive_fd(struct file *file, int __user *ufd, unsigned int o_flags)
|
||||
__receive_sock(file);
|
||||
return new_fd;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(receive_fd);
|
||||
|
||||
int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags)
|
||||
{
|
||||
@ -1336,12 +1351,6 @@ int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags)
|
||||
return new_fd;
|
||||
}
|
||||
|
||||
int receive_fd(struct file *file, unsigned int o_flags)
|
||||
{
|
||||
return __receive_fd(file, NULL, o_flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(receive_fd);
|
||||
|
||||
static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
|
||||
{
|
||||
int err = -EBADF;
|
||||
|
@ -75,18 +75,6 @@ static inline void file_free(struct file *f)
|
||||
}
|
||||
}
|
||||
|
||||
void release_empty_file(struct file *f)
|
||||
{
|
||||
WARN_ON_ONCE(f->f_mode & (FMODE_BACKING | FMODE_OPENED));
|
||||
if (atomic_long_dec_and_test(&f->f_count)) {
|
||||
security_file_free(f);
|
||||
put_cred(f->f_cred);
|
||||
if (likely(!(f->f_mode & FMODE_NOACCOUNT)))
|
||||
percpu_counter_dec(&nr_files);
|
||||
kmem_cache_free(filp_cachep, f);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the total number of open files in the system
|
||||
*/
|
||||
@ -419,7 +407,7 @@ static void delayed_fput(struct work_struct *unused)
|
||||
|
||||
static void ____fput(struct callback_head *work)
|
||||
{
|
||||
__fput(container_of(work, struct file, f_rcuhead));
|
||||
__fput(container_of(work, struct file, f_task_work));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -445,9 +433,13 @@ void fput(struct file *file)
|
||||
if (atomic_long_dec_and_test(&file->f_count)) {
|
||||
struct task_struct *task = current;
|
||||
|
||||
if (unlikely(!(file->f_mode & (FMODE_BACKING | FMODE_OPENED)))) {
|
||||
file_free(file);
|
||||
return;
|
||||
}
|
||||
if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
|
||||
init_task_work(&file->f_rcuhead, ____fput);
|
||||
if (!task_work_add(task, &file->f_rcuhead, TWA_RESUME))
|
||||
init_task_work(&file->f_task_work, ____fput);
|
||||
if (!task_work_add(task, &file->f_task_work, TWA_RESUME))
|
||||
return;
|
||||
/*
|
||||
* After this task has run exit_task_work(),
|
||||
|
@ -1213,7 +1213,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
|
||||
mapping->host = s->s_bdev->bd_inode;
|
||||
mapping->flags = 0;
|
||||
mapping_set_gfp_mask(mapping, GFP_NOFS);
|
||||
mapping->private_data = NULL;
|
||||
mapping->i_private_data = NULL;
|
||||
mapping->writeback_index = 0;
|
||||
}
|
||||
|
||||
|
@ -117,7 +117,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
|
||||
mapping->host = sb->s_bdev->bd_inode;
|
||||
mapping->flags = 0;
|
||||
mapping_set_gfp_mask(mapping, GFP_NOFS);
|
||||
mapping->private_data = NULL;
|
||||
mapping->i_private_data = NULL;
|
||||
mapping->writeback_index = 0;
|
||||
|
||||
spin_lock_init(&sdp->sd_log_lock);
|
||||
|
@ -30,8 +30,7 @@ struct hfsplus_wd {
|
||||
* @sector: block to read or write, for blocks of HFSPLUS_SECTOR_SIZE bytes
|
||||
* @buf: buffer for I/O
|
||||
* @data: output pointer for location of requested data
|
||||
* @op: direction of I/O
|
||||
* @op_flags: request op flags
|
||||
* @opf: request op flags
|
||||
*
|
||||
* The unit of I/O is hfsplus_min_io_size(sb), which may be bigger than
|
||||
* HFSPLUS_SECTOR_SIZE, and @buf must be sized accordingly. On reads
|
||||
@ -43,6 +42,8 @@ struct hfsplus_wd {
|
||||
* that starts at the rounded-down address. As long as the data was
|
||||
* read using hfsplus_submit_bio() and the same buffer is used things
|
||||
* will work correctly.
|
||||
*
|
||||
* Returns: %0 on success else -errno code
|
||||
*/
|
||||
int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
|
||||
void *buf, void **data, blk_opf_t opf)
|
||||
|
@ -686,7 +686,7 @@ static void hugetlbfs_evict_inode(struct inode *inode)
|
||||
* at inode creation time. If this is a device special inode,
|
||||
* i_mapping may not point to the original address space.
|
||||
*/
|
||||
resv_map = (struct resv_map *)(&inode->i_data)->private_data;
|
||||
resv_map = (struct resv_map *)(&inode->i_data)->i_private_data;
|
||||
/* Only regular and link inodes have associated reserve maps */
|
||||
if (resv_map)
|
||||
resv_map_release(&resv_map->refs);
|
||||
@ -1000,7 +1000,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
|
||||
&hugetlbfs_i_mmap_rwsem_key);
|
||||
inode->i_mapping->a_ops = &hugetlbfs_aops;
|
||||
simple_inode_init_ts(inode);
|
||||
inode->i_mapping->private_data = resv_map;
|
||||
inode->i_mapping->i_private_data = resv_map;
|
||||
info->seals = F_SEAL_SEAL;
|
||||
switch (mode & S_IFMT) {
|
||||
default:
|
||||
|
22
fs/inode.c
22
fs/inode.c
@ -209,7 +209,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
|
||||
atomic_set(&mapping->nr_thps, 0);
|
||||
#endif
|
||||
mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
|
||||
mapping->private_data = NULL;
|
||||
mapping->i_private_data = NULL;
|
||||
mapping->writeback_index = 0;
|
||||
init_rwsem(&mapping->invalidate_lock);
|
||||
lockdep_set_class_and_name(&mapping->invalidate_lock,
|
||||
@ -398,8 +398,8 @@ static void __address_space_init_once(struct address_space *mapping)
|
||||
{
|
||||
xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT);
|
||||
init_rwsem(&mapping->i_mmap_rwsem);
|
||||
INIT_LIST_HEAD(&mapping->private_list);
|
||||
spin_lock_init(&mapping->private_lock);
|
||||
INIT_LIST_HEAD(&mapping->i_private_list);
|
||||
spin_lock_init(&mapping->i_private_lock);
|
||||
mapping->i_mmap = RB_ROOT_CACHED;
|
||||
}
|
||||
|
||||
@ -620,7 +620,7 @@ void clear_inode(struct inode *inode)
|
||||
* nor even WARN_ON(!mapping_empty).
|
||||
*/
|
||||
xa_unlock_irq(&inode->i_data.i_pages);
|
||||
BUG_ON(!list_empty(&inode->i_data.private_list));
|
||||
BUG_ON(!list_empty(&inode->i_data.i_private_list));
|
||||
BUG_ON(!(inode->i_state & I_FREEING));
|
||||
BUG_ON(inode->i_state & I_CLEAR);
|
||||
BUG_ON(!list_empty(&inode->i_wb_list));
|
||||
@ -1836,37 +1836,37 @@ EXPORT_SYMBOL(bmap);
|
||||
* earlier than or equal to either the ctime or mtime,
|
||||
* or if at least a day has passed since the last atime update.
|
||||
*/
|
||||
static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
|
||||
static bool relatime_need_update(struct vfsmount *mnt, struct inode *inode,
|
||||
struct timespec64 now)
|
||||
{
|
||||
struct timespec64 atime, mtime, ctime;
|
||||
|
||||
if (!(mnt->mnt_flags & MNT_RELATIME))
|
||||
return 1;
|
||||
return true;
|
||||
/*
|
||||
* Is mtime younger than or equal to atime? If yes, update atime:
|
||||
*/
|
||||
atime = inode_get_atime(inode);
|
||||
mtime = inode_get_mtime(inode);
|
||||
if (timespec64_compare(&mtime, &atime) >= 0)
|
||||
return 1;
|
||||
return true;
|
||||
/*
|
||||
* Is ctime younger than or equal to atime? If yes, update atime:
|
||||
*/
|
||||
ctime = inode_get_ctime(inode);
|
||||
if (timespec64_compare(&ctime, &atime) >= 0)
|
||||
return 1;
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Is the previous atime value older than a day? If yes,
|
||||
* update atime:
|
||||
*/
|
||||
if ((long)(now.tv_sec - atime.tv_sec) >= 24*60*60)
|
||||
return 1;
|
||||
return true;
|
||||
/*
|
||||
* Good, we can skip the atime update:
|
||||
*/
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -2404,7 +2404,7 @@ EXPORT_SYMBOL(inode_init_owner);
|
||||
* the vfsmount must be passed through @idmap. This function will then take
|
||||
* care to map the inode according to @idmap before checking permissions.
|
||||
* On non-idmapped mounts or if permission checking is to be performed on the
|
||||
* raw inode simply passs @nop_mnt_idmap.
|
||||
* raw inode simply pass @nop_mnt_idmap.
|
||||
*/
|
||||
bool inode_owner_or_capable(struct mnt_idmap *idmap,
|
||||
const struct inode *inode)
|
||||
|
@ -94,7 +94,6 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
|
||||
struct file *alloc_empty_file(int flags, const struct cred *cred);
|
||||
struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred);
|
||||
struct file *alloc_empty_backing_file(int flags, const struct cred *cred);
|
||||
void release_empty_file(struct file *f);
|
||||
|
||||
static inline void file_put_write_access(struct file *file)
|
||||
{
|
||||
@ -180,7 +179,7 @@ extern struct file *do_file_open_root(const struct path *,
|
||||
const char *, const struct open_flags *);
|
||||
extern struct open_how build_open_how(int flags, umode_t mode);
|
||||
extern int build_open_flags(const struct open_how *how, struct open_flags *op);
|
||||
extern struct file *__close_fd_get_file(unsigned int fd);
|
||||
struct file *file_close_fd_locked(struct files_struct *files, unsigned fd);
|
||||
|
||||
long do_sys_ftruncate(unsigned int fd, loff_t length, int small);
|
||||
int chmod_common(const struct path *path, umode_t mode);
|
||||
|
@ -9,8 +9,16 @@
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
/*
|
||||
* Outside of this file vfs{g,u}id_t are always created from k{g,u}id_t,
|
||||
* never from raw values. These are just internal helpers.
|
||||
*/
|
||||
#define VFSUIDT_INIT_RAW(val) (vfsuid_t){ val }
|
||||
#define VFSGIDT_INIT_RAW(val) (vfsgid_t){ val }
|
||||
|
||||
struct mnt_idmap {
|
||||
struct user_namespace *owner;
|
||||
struct uid_gid_map uid_map;
|
||||
struct uid_gid_map gid_map;
|
||||
refcount_t count;
|
||||
};
|
||||
|
||||
@ -20,24 +28,10 @@ struct mnt_idmap {
|
||||
* mapped to {g,u}id 1, [...], {g,u}id 1000 to {g,u}id 1000, [...].
|
||||
*/
|
||||
struct mnt_idmap nop_mnt_idmap = {
|
||||
.owner = &init_user_ns,
|
||||
.count = REFCOUNT_INIT(1),
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(nop_mnt_idmap);
|
||||
|
||||
/**
|
||||
* check_fsmapping - check whether an mount idmapping is allowed
|
||||
* @idmap: idmap of the relevent mount
|
||||
* @sb: super block of the filesystem
|
||||
*
|
||||
* Return: true if @idmap is allowed, false if not.
|
||||
*/
|
||||
bool check_fsmapping(const struct mnt_idmap *idmap,
|
||||
const struct super_block *sb)
|
||||
{
|
||||
return idmap->owner != sb->s_user_ns;
|
||||
}
|
||||
|
||||
/**
|
||||
* initial_idmapping - check whether this is the initial mapping
|
||||
* @ns: idmapping to check
|
||||
@ -52,26 +46,6 @@ static inline bool initial_idmapping(const struct user_namespace *ns)
|
||||
return ns == &init_user_ns;
|
||||
}
|
||||
|
||||
/**
|
||||
* no_idmapping - check whether we can skip remapping a kuid/gid
|
||||
* @mnt_userns: the mount's idmapping
|
||||
* @fs_userns: the filesystem's idmapping
|
||||
*
|
||||
* This function can be used to check whether a remapping between two
|
||||
* idmappings is required.
|
||||
* An idmapped mount is a mount that has an idmapping attached to it that
|
||||
* is different from the filsystem's idmapping and the initial idmapping.
|
||||
* If the initial mapping is used or the idmapping of the mount and the
|
||||
* filesystem are identical no remapping is required.
|
||||
*
|
||||
* Return: true if remapping can be skipped, false if not.
|
||||
*/
|
||||
static inline bool no_idmapping(const struct user_namespace *mnt_userns,
|
||||
const struct user_namespace *fs_userns)
|
||||
{
|
||||
return initial_idmapping(mnt_userns) || mnt_userns == fs_userns;
|
||||
}
|
||||
|
||||
/**
|
||||
* make_vfsuid - map a filesystem kuid according to an idmapping
|
||||
* @idmap: the mount's idmapping
|
||||
@ -81,8 +55,8 @@ static inline bool no_idmapping(const struct user_namespace *mnt_userns,
|
||||
* Take a @kuid and remap it from @fs_userns into @idmap. Use this
|
||||
* function when preparing a @kuid to be reported to userspace.
|
||||
*
|
||||
* If no_idmapping() determines that this is not an idmapped mount we can
|
||||
* simply return @kuid unchanged.
|
||||
* If initial_idmapping() determines that this is not an idmapped mount
|
||||
* we can simply return @kuid unchanged.
|
||||
* If initial_idmapping() tells us that the filesystem is not mounted with an
|
||||
* idmapping we know the value of @kuid won't change when calling
|
||||
* from_kuid() so we can simply retrieve the value via __kuid_val()
|
||||
@ -94,13 +68,12 @@ static inline bool no_idmapping(const struct user_namespace *mnt_userns,
|
||||
*/
|
||||
|
||||
vfsuid_t make_vfsuid(struct mnt_idmap *idmap,
|
||||
struct user_namespace *fs_userns,
|
||||
kuid_t kuid)
|
||||
struct user_namespace *fs_userns,
|
||||
kuid_t kuid)
|
||||
{
|
||||
uid_t uid;
|
||||
struct user_namespace *mnt_userns = idmap->owner;
|
||||
|
||||
if (no_idmapping(mnt_userns, fs_userns))
|
||||
if (idmap == &nop_mnt_idmap)
|
||||
return VFSUIDT_INIT(kuid);
|
||||
if (initial_idmapping(fs_userns))
|
||||
uid = __kuid_val(kuid);
|
||||
@ -108,7 +81,7 @@ vfsuid_t make_vfsuid(struct mnt_idmap *idmap,
|
||||
uid = from_kuid(fs_userns, kuid);
|
||||
if (uid == (uid_t)-1)
|
||||
return INVALID_VFSUID;
|
||||
return VFSUIDT_INIT(make_kuid(mnt_userns, uid));
|
||||
return VFSUIDT_INIT_RAW(map_id_down(&idmap->uid_map, uid));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(make_vfsuid);
|
||||
|
||||
@ -121,8 +94,8 @@ EXPORT_SYMBOL_GPL(make_vfsuid);
|
||||
* Take a @kgid and remap it from @fs_userns into @idmap. Use this
|
||||
* function when preparing a @kgid to be reported to userspace.
|
||||
*
|
||||
* If no_idmapping() determines that this is not an idmapped mount we can
|
||||
* simply return @kgid unchanged.
|
||||
* If initial_idmapping() determines that this is not an idmapped mount
|
||||
* we can simply return @kgid unchanged.
|
||||
* If initial_idmapping() tells us that the filesystem is not mounted with an
|
||||
* idmapping we know the value of @kgid won't change when calling
|
||||
* from_kgid() so we can simply retrieve the value via __kgid_val()
|
||||
@ -136,9 +109,8 @@ vfsgid_t make_vfsgid(struct mnt_idmap *idmap,
|
||||
struct user_namespace *fs_userns, kgid_t kgid)
|
||||
{
|
||||
gid_t gid;
|
||||
struct user_namespace *mnt_userns = idmap->owner;
|
||||
|
||||
if (no_idmapping(mnt_userns, fs_userns))
|
||||
if (idmap == &nop_mnt_idmap)
|
||||
return VFSGIDT_INIT(kgid);
|
||||
if (initial_idmapping(fs_userns))
|
||||
gid = __kgid_val(kgid);
|
||||
@ -146,7 +118,7 @@ vfsgid_t make_vfsgid(struct mnt_idmap *idmap,
|
||||
gid = from_kgid(fs_userns, kgid);
|
||||
if (gid == (gid_t)-1)
|
||||
return INVALID_VFSGID;
|
||||
return VFSGIDT_INIT(make_kgid(mnt_userns, gid));
|
||||
return VFSGIDT_INIT_RAW(map_id_down(&idmap->gid_map, gid));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(make_vfsgid);
|
||||
|
||||
@ -165,11 +137,10 @@ kuid_t from_vfsuid(struct mnt_idmap *idmap,
|
||||
struct user_namespace *fs_userns, vfsuid_t vfsuid)
|
||||
{
|
||||
uid_t uid;
|
||||
struct user_namespace *mnt_userns = idmap->owner;
|
||||
|
||||
if (no_idmapping(mnt_userns, fs_userns))
|
||||
if (idmap == &nop_mnt_idmap)
|
||||
return AS_KUIDT(vfsuid);
|
||||
uid = from_kuid(mnt_userns, AS_KUIDT(vfsuid));
|
||||
uid = map_id_up(&idmap->uid_map, __vfsuid_val(vfsuid));
|
||||
if (uid == (uid_t)-1)
|
||||
return INVALID_UID;
|
||||
if (initial_idmapping(fs_userns))
|
||||
@ -193,11 +164,10 @@ kgid_t from_vfsgid(struct mnt_idmap *idmap,
|
||||
struct user_namespace *fs_userns, vfsgid_t vfsgid)
|
||||
{
|
||||
gid_t gid;
|
||||
struct user_namespace *mnt_userns = idmap->owner;
|
||||
|
||||
if (no_idmapping(mnt_userns, fs_userns))
|
||||
if (idmap == &nop_mnt_idmap)
|
||||
return AS_KGIDT(vfsgid);
|
||||
gid = from_kgid(mnt_userns, AS_KGIDT(vfsgid));
|
||||
gid = map_id_up(&idmap->gid_map, __vfsgid_val(vfsgid));
|
||||
if (gid == (gid_t)-1)
|
||||
return INVALID_GID;
|
||||
if (initial_idmapping(fs_userns))
|
||||
@ -228,16 +198,91 @@ int vfsgid_in_group_p(vfsgid_t vfsgid)
|
||||
#endif
|
||||
EXPORT_SYMBOL_GPL(vfsgid_in_group_p);
|
||||
|
||||
static int copy_mnt_idmap(struct uid_gid_map *map_from,
|
||||
struct uid_gid_map *map_to)
|
||||
{
|
||||
struct uid_gid_extent *forward, *reverse;
|
||||
u32 nr_extents = READ_ONCE(map_from->nr_extents);
|
||||
/* Pairs with smp_wmb() when writing the idmapping. */
|
||||
smp_rmb();
|
||||
|
||||
/*
|
||||
* Don't blindly copy @map_to into @map_from if nr_extents is
|
||||
* smaller or equal to UID_GID_MAP_MAX_BASE_EXTENTS. Since we
|
||||
* read @nr_extents someone could have written an idmapping and
|
||||
* then we might end up with inconsistent data. So just don't do
|
||||
* anything at all.
|
||||
*/
|
||||
if (nr_extents == 0)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Here we know that nr_extents is greater than zero which means
|
||||
* a map has been written. Since idmappings can't be changed
|
||||
* once they have been written we know that we can safely copy
|
||||
* from @map_to into @map_from.
|
||||
*/
|
||||
|
||||
if (nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) {
|
||||
*map_to = *map_from;
|
||||
return 0;
|
||||
}
|
||||
|
||||
forward = kmemdup(map_from->forward,
|
||||
nr_extents * sizeof(struct uid_gid_extent),
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (!forward)
|
||||
return -ENOMEM;
|
||||
|
||||
reverse = kmemdup(map_from->reverse,
|
||||
nr_extents * sizeof(struct uid_gid_extent),
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (!reverse) {
|
||||
kfree(forward);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/*
|
||||
* The idmapping isn't exposed anywhere so we don't need to care
|
||||
* about ordering between extent pointers and @nr_extents
|
||||
* initialization.
|
||||
*/
|
||||
map_to->forward = forward;
|
||||
map_to->reverse = reverse;
|
||||
map_to->nr_extents = nr_extents;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void free_mnt_idmap(struct mnt_idmap *idmap)
|
||||
{
|
||||
if (idmap->uid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
|
||||
kfree(idmap->uid_map.forward);
|
||||
kfree(idmap->uid_map.reverse);
|
||||
}
|
||||
if (idmap->gid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
|
||||
kfree(idmap->gid_map.forward);
|
||||
kfree(idmap->gid_map.reverse);
|
||||
}
|
||||
kfree(idmap);
|
||||
}
|
||||
|
||||
struct mnt_idmap *alloc_mnt_idmap(struct user_namespace *mnt_userns)
|
||||
{
|
||||
struct mnt_idmap *idmap;
|
||||
int ret;
|
||||
|
||||
idmap = kzalloc(sizeof(struct mnt_idmap), GFP_KERNEL_ACCOUNT);
|
||||
if (!idmap)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
idmap->owner = get_user_ns(mnt_userns);
|
||||
refcount_set(&idmap->count, 1);
|
||||
ret = copy_mnt_idmap(&mnt_userns->uid_map, &idmap->uid_map);
|
||||
if (!ret)
|
||||
ret = copy_mnt_idmap(&mnt_userns->gid_map, &idmap->gid_map);
|
||||
if (ret) {
|
||||
free_mnt_idmap(idmap);
|
||||
idmap = ERR_PTR(ret);
|
||||
}
|
||||
return idmap;
|
||||
}
|
||||
|
||||
@ -267,9 +312,7 @@ EXPORT_SYMBOL_GPL(mnt_idmap_get);
|
||||
*/
|
||||
void mnt_idmap_put(struct mnt_idmap *idmap)
|
||||
{
|
||||
if (idmap != &nop_mnt_idmap && refcount_dec_and_test(&idmap->count)) {
|
||||
put_user_ns(idmap->owner);
|
||||
kfree(idmap);
|
||||
}
|
||||
if (idmap != &nop_mnt_idmap && refcount_dec_and_test(&idmap->count))
|
||||
free_mnt_idmap(idmap);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mnt_idmap_put);
|
||||
|
31
fs/namei.c
31
fs/namei.c
@ -289,7 +289,7 @@ EXPORT_SYMBOL(putname);
|
||||
* the vfsmount must be passed through @idmap. This function will then take
|
||||
* care to map the inode according to @idmap before checking permissions.
|
||||
* On non-idmapped mounts or if permission checking is to be performed on the
|
||||
* raw inode simply passs @nop_mnt_idmap.
|
||||
* raw inode simply pass @nop_mnt_idmap.
|
||||
*/
|
||||
static int check_acl(struct mnt_idmap *idmap,
|
||||
struct inode *inode, int mask)
|
||||
@ -334,7 +334,7 @@ static int check_acl(struct mnt_idmap *idmap,
|
||||
* the vfsmount must be passed through @idmap. This function will then take
|
||||
* care to map the inode according to @idmap before checking permissions.
|
||||
* On non-idmapped mounts or if permission checking is to be performed on the
|
||||
* raw inode simply passs @nop_mnt_idmap.
|
||||
* raw inode simply pass @nop_mnt_idmap.
|
||||
*/
|
||||
static int acl_permission_check(struct mnt_idmap *idmap,
|
||||
struct inode *inode, int mask)
|
||||
@ -395,7 +395,7 @@ static int acl_permission_check(struct mnt_idmap *idmap,
|
||||
* the vfsmount must be passed through @idmap. This function will then take
|
||||
* care to map the inode according to @idmap before checking permissions.
|
||||
* On non-idmapped mounts or if permission checking is to be performed on the
|
||||
* raw inode simply passs @nop_mnt_idmap.
|
||||
* raw inode simply pass @nop_mnt_idmap.
|
||||
*/
|
||||
int generic_permission(struct mnt_idmap *idmap, struct inode *inode,
|
||||
int mask)
|
||||
@ -2467,7 +2467,7 @@ static int handle_lookup_down(struct nameidata *nd)
|
||||
return PTR_ERR(step_into(nd, WALK_NOFOLLOW, nd->path.dentry));
|
||||
}
|
||||
|
||||
/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
|
||||
/* Returns 0 and nd will be valid on success; Returns error, otherwise. */
|
||||
static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path)
|
||||
{
|
||||
const char *s = path_init(nd, flags);
|
||||
@ -2522,7 +2522,7 @@ int filename_lookup(int dfd, struct filename *name, unsigned flags,
|
||||
return retval;
|
||||
}
|
||||
|
||||
/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
|
||||
/* Returns 0 and nd will be valid on success; Returns error, otherwise. */
|
||||
static int path_parentat(struct nameidata *nd, unsigned flags,
|
||||
struct path *parent)
|
||||
{
|
||||
@ -3158,7 +3158,7 @@ static inline umode_t vfs_prepare_mode(struct mnt_idmap *idmap,
|
||||
* the vfsmount must be passed through @idmap. This function will then take
|
||||
* care to map the inode according to @idmap before checking permissions.
|
||||
* On non-idmapped mounts or if permission checking is to be performed on the
|
||||
* raw inode simply passs @nop_mnt_idmap.
|
||||
* raw inode simply pass @nop_mnt_idmap.
|
||||
*/
|
||||
int vfs_create(struct mnt_idmap *idmap, struct inode *dir,
|
||||
struct dentry *dentry, umode_t mode, bool want_excl)
|
||||
@ -3646,7 +3646,7 @@ static int do_open(struct nameidata *nd,
|
||||
* the vfsmount must be passed through @idmap. This function will then take
|
||||
* care to map the inode according to @idmap before checking permissions.
|
||||
* On non-idmapped mounts or if permission checking is to be performed on the
|
||||
* raw inode simply passs @nop_mnt_idmap.
|
||||
* raw inode simply pass @nop_mnt_idmap.
|
||||
*/
|
||||
static int vfs_tmpfile(struct mnt_idmap *idmap,
|
||||
const struct path *parentpath,
|
||||
@ -3785,10 +3785,7 @@ static struct file *path_openat(struct nameidata *nd,
|
||||
WARN_ON(1);
|
||||
error = -EINVAL;
|
||||
}
|
||||
if (unlikely(file->f_mode & FMODE_OPENED))
|
||||
fput(file);
|
||||
else
|
||||
release_empty_file(file);
|
||||
fput(file);
|
||||
if (error == -EOPENSTALE) {
|
||||
if (flags & LOOKUP_RCU)
|
||||
error = -ECHILD;
|
||||
@ -3954,7 +3951,7 @@ EXPORT_SYMBOL(user_path_create);
|
||||
* the vfsmount must be passed through @idmap. This function will then take
|
||||
* care to map the inode according to @idmap before checking permissions.
|
||||
* On non-idmapped mounts or if permission checking is to be performed on the
|
||||
* raw inode simply passs @nop_mnt_idmap.
|
||||
* raw inode simply pass @nop_mnt_idmap.
|
||||
*/
|
||||
int vfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
|
||||
struct dentry *dentry, umode_t mode, dev_t dev)
|
||||
@ -4080,7 +4077,7 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d
|
||||
* the vfsmount must be passed through @idmap. This function will then take
|
||||
* care to map the inode according to @idmap before checking permissions.
|
||||
* On non-idmapped mounts or if permission checking is to be performed on the
|
||||
* raw inode simply passs @nop_mnt_idmap.
|
||||
* raw inode simply pass @nop_mnt_idmap.
|
||||
*/
|
||||
int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
|
||||
struct dentry *dentry, umode_t mode)
|
||||
@ -4161,7 +4158,7 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
|
||||
* the vfsmount must be passed through @idmap. This function will then take
|
||||
* care to map the inode according to @idmap before checking permissions.
|
||||
* On non-idmapped mounts or if permission checking is to be performed on the
|
||||
* raw inode simply passs @nop_mnt_idmap.
|
||||
* raw inode simply pass @nop_mnt_idmap.
|
||||
*/
|
||||
int vfs_rmdir(struct mnt_idmap *idmap, struct inode *dir,
|
||||
struct dentry *dentry)
|
||||
@ -4290,7 +4287,7 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
|
||||
* the vfsmount must be passed through @idmap. This function will then take
|
||||
* care to map the inode according to @idmap before checking permissions.
|
||||
* On non-idmapped mounts or if permission checking is to be performed on the
|
||||
* raw inode simply passs @nop_mnt_idmap.
|
||||
* raw inode simply pass @nop_mnt_idmap.
|
||||
*/
|
||||
int vfs_unlink(struct mnt_idmap *idmap, struct inode *dir,
|
||||
struct dentry *dentry, struct inode **delegated_inode)
|
||||
@ -4443,7 +4440,7 @@ SYSCALL_DEFINE1(unlink, const char __user *, pathname)
|
||||
* the vfsmount must be passed through @idmap. This function will then take
|
||||
* care to map the inode according to @idmap before checking permissions.
|
||||
* On non-idmapped mounts or if permission checking is to be performed on the
|
||||
* raw inode simply passs @nop_mnt_idmap.
|
||||
* raw inode simply pass @nop_mnt_idmap.
|
||||
*/
|
||||
int vfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
|
||||
struct dentry *dentry, const char *oldname)
|
||||
@ -4535,7 +4532,7 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn
|
||||
* the vfsmount must be passed through @idmap. This function will then take
|
||||
* care to map the inode according to @idmap before checking permissions.
|
||||
* On non-idmapped mounts or if permission checking is to be performed on the
|
||||
* raw inode simply passs @nop_mnt_idmap.
|
||||
* raw inode simply pass @nop_mnt_idmap.
|
||||
*/
|
||||
int vfs_link(struct dentry *old_dentry, struct mnt_idmap *idmap,
|
||||
struct inode *dir, struct dentry *new_dentry,
|
||||
|
@ -3026,6 +3026,7 @@ static inline bool path_overmounted(const struct path *path)
|
||||
* can_move_mount_beneath - check that we can mount beneath the top mount
|
||||
* @from: mount to mount beneath
|
||||
* @to: mount under which to mount
|
||||
* @mp: mountpoint of @to
|
||||
*
|
||||
* - Make sure that @to->dentry is actually the root of a mount under
|
||||
* which we can mount another mount.
|
||||
@ -4288,7 +4289,7 @@ static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
|
||||
* Creating an idmapped mount with the filesystem wide idmapping
|
||||
* doesn't make sense so block that. We don't allow mushy semantics.
|
||||
*/
|
||||
if (!check_fsmapping(kattr->mnt_idmap, m->mnt_sb))
|
||||
if (kattr->mnt_userns == m->mnt_sb->s_user_ns)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
|
@ -192,13 +192,13 @@ static struct nfs_page *nfs_folio_find_private_request(struct folio *folio)
|
||||
|
||||
if (!folio_test_private(folio))
|
||||
return NULL;
|
||||
spin_lock(&mapping->private_lock);
|
||||
spin_lock(&mapping->i_private_lock);
|
||||
req = nfs_folio_private_request(folio);
|
||||
if (req) {
|
||||
WARN_ON_ONCE(req->wb_head != req);
|
||||
kref_get(&req->wb_kref);
|
||||
}
|
||||
spin_unlock(&mapping->private_lock);
|
||||
spin_unlock(&mapping->i_private_lock);
|
||||
return req;
|
||||
}
|
||||
|
||||
@ -769,13 +769,13 @@ static void nfs_inode_add_request(struct nfs_page *req)
|
||||
* Swap-space should not get truncated. Hence no need to plug the race
|
||||
* with invalidate/truncate.
|
||||
*/
|
||||
spin_lock(&mapping->private_lock);
|
||||
spin_lock(&mapping->i_private_lock);
|
||||
if (likely(!folio_test_swapcache(folio))) {
|
||||
set_bit(PG_MAPPED, &req->wb_flags);
|
||||
folio_set_private(folio);
|
||||
folio->private = req;
|
||||
}
|
||||
spin_unlock(&mapping->private_lock);
|
||||
spin_unlock(&mapping->i_private_lock);
|
||||
atomic_long_inc(&nfsi->nrequests);
|
||||
/* this a head request for a page group - mark it as having an
|
||||
* extra reference so sub groups can follow suit.
|
||||
@ -796,13 +796,13 @@ static void nfs_inode_remove_request(struct nfs_page *req)
|
||||
struct folio *folio = nfs_page_to_folio(req->wb_head);
|
||||
struct address_space *mapping = folio_file_mapping(folio);
|
||||
|
||||
spin_lock(&mapping->private_lock);
|
||||
spin_lock(&mapping->i_private_lock);
|
||||
if (likely(folio && !folio_test_swapcache(folio))) {
|
||||
folio->private = NULL;
|
||||
folio_clear_private(folio);
|
||||
clear_bit(PG_MAPPED, &req->wb_head->wb_flags);
|
||||
}
|
||||
spin_unlock(&mapping->private_lock);
|
||||
spin_unlock(&mapping->i_private_lock);
|
||||
}
|
||||
|
||||
if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) {
|
||||
|
@ -214,7 +214,7 @@ static bool nilfs_dirty_folio(struct address_space *mapping,
|
||||
/*
|
||||
* The page may not be locked, eg if called from try_to_unmap_one()
|
||||
*/
|
||||
spin_lock(&mapping->private_lock);
|
||||
spin_lock(&mapping->i_private_lock);
|
||||
head = folio_buffers(folio);
|
||||
if (head) {
|
||||
struct buffer_head *bh = head;
|
||||
@ -230,7 +230,7 @@ static bool nilfs_dirty_folio(struct address_space *mapping,
|
||||
} else if (ret) {
|
||||
nr_dirty = 1 << (folio_shift(folio) - inode->i_blkbits);
|
||||
}
|
||||
spin_unlock(&mapping->private_lock);
|
||||
spin_unlock(&mapping->i_private_lock);
|
||||
|
||||
if (nr_dirty)
|
||||
nilfs_set_file_dirty(inode, nr_dirty);
|
||||
|
@ -1690,7 +1690,7 @@ const struct address_space_operations ntfs_mst_aops = {
|
||||
*
|
||||
* If the page does not have buffers, we create them and set them uptodate.
|
||||
* The page may not be locked which is why we need to handle the buffers under
|
||||
* the mapping->private_lock. Once the buffers are marked dirty we no longer
|
||||
* the mapping->i_private_lock. Once the buffers are marked dirty we no longer
|
||||
* need the lock since try_to_free_buffers() does not free dirty buffers.
|
||||
*/
|
||||
void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) {
|
||||
@ -1702,11 +1702,11 @@ void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) {
|
||||
BUG_ON(!PageUptodate(page));
|
||||
end = ofs + ni->itype.index.block_size;
|
||||
bh_size = VFS_I(ni)->i_sb->s_blocksize;
|
||||
spin_lock(&mapping->private_lock);
|
||||
spin_lock(&mapping->i_private_lock);
|
||||
if (unlikely(!page_has_buffers(page))) {
|
||||
spin_unlock(&mapping->private_lock);
|
||||
spin_unlock(&mapping->i_private_lock);
|
||||
bh = head = alloc_page_buffers(page, bh_size, true);
|
||||
spin_lock(&mapping->private_lock);
|
||||
spin_lock(&mapping->i_private_lock);
|
||||
if (likely(!page_has_buffers(page))) {
|
||||
struct buffer_head *tail;
|
||||
|
||||
@ -1730,7 +1730,7 @@ void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) {
|
||||
break;
|
||||
set_buffer_dirty(bh);
|
||||
} while ((bh = bh->b_this_page) != head);
|
||||
spin_unlock(&mapping->private_lock);
|
||||
spin_unlock(&mapping->i_private_lock);
|
||||
filemap_dirty_folio(mapping, page_folio(page));
|
||||
if (unlikely(buffers_to_free)) {
|
||||
do {
|
||||
|
@ -1462,7 +1462,8 @@ static int ntfs_dir_open(struct inode *vi, struct file *filp)
|
||||
/**
|
||||
* ntfs_dir_fsync - sync a directory to disk
|
||||
* @filp: directory to be synced
|
||||
* @dentry: dentry describing the directory to sync
|
||||
* @start: offset in bytes of the beginning of data range to sync
|
||||
* @end: offset in bytes of the end of data range (inclusive)
|
||||
* @datasync: if non-zero only flush user data and not metadata
|
||||
*
|
||||
* Data integrity sync of a directory to disk. Used for fsync, fdatasync, and
|
||||
|
@ -442,7 +442,8 @@ static const struct cred *access_override_creds(void)
|
||||
* 'get_current_cred()' function), that will clear the
|
||||
* non_rcu field, because now that other user may be
|
||||
* expecting RCU freeing. But normal thread-synchronous
|
||||
* cred accesses will keep things non-RCY.
|
||||
* cred accesses will keep things non-racy to avoid RCU
|
||||
* freeing.
|
||||
*/
|
||||
override_cred->non_rcu = 1;
|
||||
|
||||
@ -1574,7 +1575,7 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
|
||||
int retval;
|
||||
struct file *file;
|
||||
|
||||
file = close_fd_get_file(fd);
|
||||
file = file_close_fd(fd);
|
||||
if (!file)
|
||||
return -EBADF;
|
||||
|
||||
|
24
fs/pipe.c
24
fs/pipe.c
@ -446,6 +446,18 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
|
||||
bool was_empty = false;
|
||||
bool wake_next_writer = false;
|
||||
|
||||
/*
|
||||
* Reject writing to watch queue pipes before the point where we lock
|
||||
* the pipe.
|
||||
* Otherwise, lockdep would be unhappy if the caller already has another
|
||||
* pipe locked.
|
||||
* If we had to support locking a normal pipe and a notification pipe at
|
||||
* the same time, we could set up lockdep annotations for that, but
|
||||
* since we don't actually need that, it's simpler to just bail here.
|
||||
*/
|
||||
if (pipe_has_watch_queue(pipe))
|
||||
return -EXDEV;
|
||||
|
||||
/* Null write succeeds. */
|
||||
if (unlikely(total_len == 0))
|
||||
return 0;
|
||||
@ -458,11 +470,6 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (pipe_has_watch_queue(pipe)) {
|
||||
ret = -EXDEV;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* If it wasn't empty we try to merge new data into
|
||||
* the last buffer.
|
||||
@ -1317,6 +1324,11 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
|
||||
pipe->tail = tail;
|
||||
pipe->head = head;
|
||||
|
||||
if (!pipe_has_watch_queue(pipe)) {
|
||||
pipe->max_usage = nr_slots;
|
||||
pipe->nr_accounted = nr_slots;
|
||||
}
|
||||
|
||||
spin_unlock_irq(&pipe->rd_wait.lock);
|
||||
|
||||
/* This might have made more room for writers */
|
||||
@ -1368,8 +1380,6 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned int arg)
|
||||
if (ret < 0)
|
||||
goto out_revert_acct;
|
||||
|
||||
pipe->max_usage = nr_slots;
|
||||
pipe->nr_accounted = nr_slots;
|
||||
return pipe->max_usage * PAGE_SIZE;
|
||||
|
||||
out_revert_acct:
|
||||
|
@ -600,7 +600,7 @@ EXPORT_SYMBOL(__posix_acl_chmod);
|
||||
* the vfsmount must be passed through @idmap. This function will then
|
||||
* take care to map the inode according to @idmap before checking
|
||||
* permissions. On non-idmapped mounts or if permission checking is to be
|
||||
* performed on the raw inode simply passs @nop_mnt_idmap.
|
||||
* performed on the raw inode simply pass @nop_mnt_idmap.
|
||||
*/
|
||||
int
|
||||
posix_acl_chmod(struct mnt_idmap *idmap, struct dentry *dentry,
|
||||
@ -700,7 +700,7 @@ EXPORT_SYMBOL_GPL(posix_acl_create);
|
||||
* the vfsmount must be passed through @idmap. This function will then
|
||||
* take care to map the inode according to @idmap before checking
|
||||
* permissions. On non-idmapped mounts or if permission checking is to be
|
||||
* performed on the raw inode simply passs @nop_mnt_idmap.
|
||||
* performed on the raw inode simply pass @nop_mnt_idmap.
|
||||
*
|
||||
* Called from set_acl inode operations.
|
||||
*/
|
||||
|
@ -273,7 +273,8 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
|
||||
const char *name = NULL;
|
||||
|
||||
if (file) {
|
||||
struct inode *inode = file_inode(vma->vm_file);
|
||||
const struct inode *inode = file_user_inode(vma->vm_file);
|
||||
|
||||
dev = inode->i_sb->s_dev;
|
||||
ino = inode->i_ino;
|
||||
pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
|
||||
|
@ -1407,7 +1407,7 @@ void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
|
||||
INITIALIZE_PATH(path);
|
||||
int item_len = 0;
|
||||
int tb_init = 0;
|
||||
struct cpu_key cpu_key;
|
||||
struct cpu_key cpu_key = {};
|
||||
int retval;
|
||||
int quota_cut_bytes = 0;
|
||||
|
||||
|
@ -41,7 +41,7 @@
|
||||
* the vfsmount must be passed through @idmap. This function will then
|
||||
* take care to map the inode according to @idmap before filling in the
|
||||
* uid and gid filds. On non-idmapped mounts or if permission checking is to be
|
||||
* performed on the raw inode simply passs @nop_mnt_idmap.
|
||||
* performed on the raw inode simply pass @nop_mnt_idmap.
|
||||
*/
|
||||
void generic_fillattr(struct mnt_idmap *idmap, u32 request_mask,
|
||||
struct inode *inode, struct kstat *stat)
|
||||
|
@ -323,7 +323,7 @@ static void destroy_unused_super(struct super_block *s)
|
||||
static struct super_block *alloc_super(struct file_system_type *type, int flags,
|
||||
struct user_namespace *user_ns)
|
||||
{
|
||||
struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER);
|
||||
struct super_block *s = kzalloc(sizeof(struct super_block), GFP_KERNEL);
|
||||
static const struct super_operations default_op;
|
||||
int i;
|
||||
|
||||
|
@ -35,8 +35,7 @@ void eventfd_ctx_put(struct eventfd_ctx *ctx);
|
||||
struct file *eventfd_fget(int fd);
|
||||
struct eventfd_ctx *eventfd_ctx_fdget(int fd);
|
||||
struct eventfd_ctx *eventfd_ctx_fileget(struct file *file);
|
||||
__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n);
|
||||
__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, __poll_t mask);
|
||||
void eventfd_signal_mask(struct eventfd_ctx *ctx, __poll_t mask);
|
||||
int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait,
|
||||
__u64 *cnt);
|
||||
void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt);
|
||||
@ -58,15 +57,8 @@ static inline struct eventfd_ctx *eventfd_ctx_fdget(int fd)
|
||||
return ERR_PTR(-ENOSYS);
|
||||
}
|
||||
|
||||
static inline int eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
|
||||
static inline void eventfd_signal_mask(struct eventfd_ctx *ctx, __poll_t mask)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
static inline int eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n,
|
||||
unsigned mask)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
static inline void eventfd_ctx_put(struct eventfd_ctx *ctx)
|
||||
@ -92,5 +84,10 @@ static inline void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
|
||||
|
||||
#endif
|
||||
|
||||
static inline void eventfd_signal(struct eventfd_ctx *ctx)
|
||||
{
|
||||
eventfd_signal_mask(ctx, 0);
|
||||
}
|
||||
|
||||
#endif /* _LINUX_EVENTFD_H */
|
||||
|
||||
|
@ -83,12 +83,17 @@ struct dentry;
|
||||
static inline struct file *files_lookup_fd_raw(struct files_struct *files, unsigned int fd)
|
||||
{
|
||||
struct fdtable *fdt = rcu_dereference_raw(files->fdt);
|
||||
unsigned long mask = array_index_mask_nospec(fd, fdt->max_fds);
|
||||
struct file *needs_masking;
|
||||
|
||||
if (fd < fdt->max_fds) {
|
||||
fd = array_index_nospec(fd, fdt->max_fds);
|
||||
return rcu_dereference_raw(fdt->fd[fd]);
|
||||
}
|
||||
return NULL;
|
||||
/*
|
||||
* 'mask' is zero for an out-of-bounds fd, all ones for ok.
|
||||
* 'fd&mask' is 'fd' for ok, or 0 for out of bounds.
|
||||
*
|
||||
* Accessing fdt->fd[0] is ok, but needs masking of the result.
|
||||
*/
|
||||
needs_masking = rcu_dereference_raw(fdt->fd[fd&mask]);
|
||||
return (struct file *)(mask & (unsigned long)needs_masking);
|
||||
}
|
||||
|
||||
static inline struct file *files_lookup_fd_locked(struct files_struct *files, unsigned int fd)
|
||||
@ -114,7 +119,7 @@ int iterate_fd(struct files_struct *, unsigned,
|
||||
|
||||
extern int close_fd(unsigned int fd);
|
||||
extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags);
|
||||
extern struct file *close_fd_get_file(unsigned int fd);
|
||||
extern struct file *file_close_fd(unsigned int fd);
|
||||
extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
|
||||
struct files_struct **new_fdp);
|
||||
|
||||
|
@ -96,18 +96,8 @@ DEFINE_CLASS(get_unused_fd, int, if (_T >= 0) put_unused_fd(_T),
|
||||
|
||||
extern void fd_install(unsigned int fd, struct file *file);
|
||||
|
||||
extern int __receive_fd(struct file *file, int __user *ufd,
|
||||
unsigned int o_flags);
|
||||
int receive_fd(struct file *file, int __user *ufd, unsigned int o_flags);
|
||||
|
||||
extern int receive_fd(struct file *file, unsigned int o_flags);
|
||||
|
||||
static inline int receive_fd_user(struct file *file, int __user *ufd,
|
||||
unsigned int o_flags)
|
||||
{
|
||||
if (ufd == NULL)
|
||||
return -EFAULT;
|
||||
return __receive_fd(file, ufd, o_flags);
|
||||
}
|
||||
int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags);
|
||||
|
||||
extern void flush_delayed_fput(void);
|
||||
|
@ -463,9 +463,9 @@ extern const struct address_space_operations empty_aops;
|
||||
* @a_ops: Methods.
|
||||
* @flags: Error bits and flags (AS_*).
|
||||
* @wb_err: The most recent error which has occurred.
|
||||
* @private_lock: For use by the owner of the address_space.
|
||||
* @private_list: For use by the owner of the address_space.
|
||||
* @private_data: For use by the owner of the address_space.
|
||||
* @i_private_lock: For use by the owner of the address_space.
|
||||
* @i_private_list: For use by the owner of the address_space.
|
||||
* @i_private_data: For use by the owner of the address_space.
|
||||
*/
|
||||
struct address_space {
|
||||
struct inode *host;
|
||||
@ -484,9 +484,9 @@ struct address_space {
|
||||
unsigned long flags;
|
||||
struct rw_semaphore i_mmap_rwsem;
|
||||
errseq_t wb_err;
|
||||
spinlock_t private_lock;
|
||||
struct list_head private_list;
|
||||
void *private_data;
|
||||
spinlock_t i_private_lock;
|
||||
struct list_head i_private_list;
|
||||
void * i_private_data;
|
||||
} __attribute__((aligned(sizeof(long)))) __randomize_layout;
|
||||
/*
|
||||
* On most architectures that alignment is already the case; but
|
||||
@ -991,8 +991,10 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
|
||||
*/
|
||||
struct file {
|
||||
union {
|
||||
/* fput() uses task work when closing and freeing file (default). */
|
||||
struct callback_head f_task_work;
|
||||
/* fput() must use workqueue (most kernel threads). */
|
||||
struct llist_node f_llist;
|
||||
struct rcu_head f_rcuhead;
|
||||
unsigned int f_iocb_flags;
|
||||
};
|
||||
|
||||
@ -2523,20 +2525,28 @@ struct file *backing_file_open(const struct path *user_path, int flags,
|
||||
struct path *backing_file_user_path(struct file *f);
|
||||
|
||||
/*
|
||||
* file_user_path - get the path to display for memory mapped file
|
||||
*
|
||||
* When mmapping a file on a stackable filesystem (e.g., overlayfs), the file
|
||||
* stored in ->vm_file is a backing file whose f_inode is on the underlying
|
||||
* filesystem. When the mapped file path is displayed to user (e.g. via
|
||||
* /proc/<pid>/maps), this helper should be used to get the path to display
|
||||
* to the user, which is the path of the fd that user has requested to map.
|
||||
* filesystem. When the mapped file path and inode number are displayed to
|
||||
* user (e.g. via /proc/<pid>/maps), these helpers should be used to get the
|
||||
* path and inode number to display to the user, which is the path of the fd
|
||||
* that user has requested to map and the inode number that would be returned
|
||||
* by fstat() on that same fd.
|
||||
*/
|
||||
/* Get the path to display in /proc/<pid>/maps */
|
||||
static inline const struct path *file_user_path(struct file *f)
|
||||
{
|
||||
if (unlikely(f->f_mode & FMODE_BACKING))
|
||||
return backing_file_user_path(f);
|
||||
return &f->f_path;
|
||||
}
|
||||
/* Get the inode whose inode number to display in /proc/<pid>/maps */
|
||||
static inline const struct inode *file_user_inode(struct file *f)
|
||||
{
|
||||
if (unlikely(f->f_mode & FMODE_BACKING))
|
||||
return d_inode(backing_file_user_path(f)->dentry);
|
||||
return file_inode(f);
|
||||
}
|
||||
|
||||
static inline struct file *file_clone_open(struct file *file)
|
||||
{
|
||||
|
@ -244,7 +244,4 @@ static inline kgid_t mapped_fsgid(struct mnt_idmap *idmap,
|
||||
return from_vfsgid(idmap, fs_userns, VFSGIDT_INIT(current_fsgid()));
|
||||
}
|
||||
|
||||
bool check_fsmapping(const struct mnt_idmap *idmap,
|
||||
const struct super_block *sb);
|
||||
|
||||
#endif /* _LINUX_MNT_IDMAPPING_H */
|
||||
|
@ -17,6 +17,7 @@
|
||||
|
||||
struct user_namespace;
|
||||
extern struct user_namespace init_user_ns;
|
||||
struct uid_gid_map;
|
||||
|
||||
typedef struct {
|
||||
uid_t val;
|
||||
@ -138,6 +139,9 @@ static inline bool kgid_has_mapping(struct user_namespace *ns, kgid_t gid)
|
||||
return from_kgid(ns, gid) != (gid_t) -1;
|
||||
}
|
||||
|
||||
u32 map_id_down(struct uid_gid_map *map, u32 id);
|
||||
u32 map_id_up(struct uid_gid_map *map, u32 id);
|
||||
|
||||
#else
|
||||
|
||||
static inline kuid_t make_kuid(struct user_namespace *from, uid_t uid)
|
||||
@ -186,6 +190,15 @@ static inline bool kgid_has_mapping(struct user_namespace *ns, kgid_t gid)
|
||||
return gid_valid(gid);
|
||||
}
|
||||
|
||||
static inline u32 map_id_down(struct uid_gid_map *map, u32 id)
|
||||
{
|
||||
return id;
|
||||
}
|
||||
|
||||
static inline u32 map_id_up(struct uid_gid_map *map, u32 id)
|
||||
{
|
||||
return id;
|
||||
}
|
||||
#endif /* CONFIG_USER_NS */
|
||||
|
||||
#endif /* _LINUX_UIDGID_H */
|
||||
|
@ -193,7 +193,6 @@ void inode_io_list_del(struct inode *inode);
|
||||
/* writeback.h requires fs.h; it, too, is not included from here. */
|
||||
static inline void wait_on_inode(struct inode *inode)
|
||||
{
|
||||
might_sleep();
|
||||
wait_on_bit(&inode->i_state, __I_NEW, TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <linux/limits.h>
|
||||
#include <linux/net.h>
|
||||
#include <linux/cred.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/pid.h>
|
||||
#include <linux/nsproxy.h>
|
||||
@ -208,5 +209,13 @@ static inline void scm_recv_unix(struct socket *sock, struct msghdr *msg,
|
||||
scm_destroy_cred(scm);
|
||||
}
|
||||
|
||||
static inline int scm_recv_one_fd(struct file *f, int __user *ufd,
|
||||
unsigned int flags)
|
||||
{
|
||||
if (!ufd)
|
||||
return -EFAULT;
|
||||
return receive_fd(f, ufd, flags);
|
||||
}
|
||||
|
||||
#endif /* __LINUX_NET_SCM_H */
|
||||
|
||||
|
@ -561,7 +561,7 @@ static void io_eventfd_ops(struct rcu_head *rcu)
|
||||
int ops = atomic_xchg(&ev_fd->ops, 0);
|
||||
|
||||
if (ops & BIT(IO_EVENTFD_OP_SIGNAL_BIT))
|
||||
eventfd_signal_mask(ev_fd->cq_ev_fd, 1, EPOLL_URING_WAKE);
|
||||
eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
|
||||
|
||||
/* IO_EVENTFD_OP_FREE_BIT may not be set here depending on callback
|
||||
* ordering in a race but if references are 0 we know we have to free
|
||||
@ -597,7 +597,7 @@ static void io_eventfd_signal(struct io_ring_ctx *ctx)
|
||||
goto out;
|
||||
|
||||
if (likely(eventfd_signal_allowed())) {
|
||||
eventfd_signal_mask(ev_fd->cq_ev_fd, 1, EPOLL_URING_WAKE);
|
||||
eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
|
||||
} else {
|
||||
atomic_inc(&ev_fd->refs);
|
||||
if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops))
|
||||
|
@ -241,7 +241,7 @@ int io_close(struct io_kiocb *req, unsigned int issue_flags)
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
file = __close_fd_get_file(close->fd);
|
||||
file = file_close_fd_locked(files, close->fd);
|
||||
spin_unlock(&files->file_lock);
|
||||
if (!file)
|
||||
goto err;
|
||||
|
@ -700,7 +700,7 @@ static int pidfd_getfd(struct pid *pid, int fd)
|
||||
if (IS_ERR(file))
|
||||
return PTR_ERR(file);
|
||||
|
||||
ret = receive_fd(file, O_CLOEXEC);
|
||||
ret = receive_fd(file, NULL, O_CLOEXEC);
|
||||
fput(file);
|
||||
|
||||
return ret;
|
||||
|
@ -1072,7 +1072,7 @@ static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd, struct seccomp_kn
|
||||
*/
|
||||
list_del_init(&addfd->list);
|
||||
if (!addfd->setfd)
|
||||
fd = receive_fd(addfd->file, addfd->flags);
|
||||
fd = receive_fd(addfd->file, NULL, addfd->flags);
|
||||
else
|
||||
fd = receive_fd_replace(addfd->fd, addfd->file, addfd->flags);
|
||||
addfd->ret = fd;
|
||||
|
@ -231,7 +231,7 @@ void __put_user_ns(struct user_namespace *ns)
|
||||
}
|
||||
EXPORT_SYMBOL(__put_user_ns);
|
||||
|
||||
/**
|
||||
/*
|
||||
* struct idmap_key - holds the information necessary to find an idmapping in a
|
||||
* sorted idmap array. It is passed to cmp_map_id() as first argument.
|
||||
*/
|
||||
@ -241,7 +241,7 @@ struct idmap_key {
|
||||
u32 count; /* == 0 unless used with map_id_range_down() */
|
||||
};
|
||||
|
||||
/**
|
||||
/*
|
||||
* cmp_map_id - Function to be passed to bsearch() to find the requested
|
||||
* idmapping. Expects struct idmap_key to be passed via @k.
|
||||
*/
|
||||
@ -271,7 +271,7 @@ static int cmp_map_id(const void *k, const void *e)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
/*
|
||||
* map_id_range_down_max - Find idmap via binary search in ordered idmap array.
|
||||
* Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS.
|
||||
*/
|
||||
@ -288,7 +288,7 @@ map_id_range_down_max(unsigned extents, struct uid_gid_map *map, u32 id, u32 cou
|
||||
sizeof(struct uid_gid_extent), cmp_map_id);
|
||||
}
|
||||
|
||||
/**
|
||||
/*
|
||||
* map_id_range_down_base - Find idmap via binary search in static extent array.
|
||||
* Can only be called if number of mappings is equal or less than
|
||||
* UID_GID_MAP_MAX_BASE_EXTENTS.
|
||||
@ -332,12 +332,12 @@ static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count)
|
||||
return id;
|
||||
}
|
||||
|
||||
static u32 map_id_down(struct uid_gid_map *map, u32 id)
|
||||
u32 map_id_down(struct uid_gid_map *map, u32 id)
|
||||
{
|
||||
return map_id_range_down(map, id, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
/*
|
||||
* map_id_up_base - Find idmap via binary search in static extent array.
|
||||
* Can only be called if number of mappings is equal or less than
|
||||
* UID_GID_MAP_MAX_BASE_EXTENTS.
|
||||
@ -358,7 +358,7 @@ map_id_up_base(unsigned extents, struct uid_gid_map *map, u32 id)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
/*
|
||||
* map_id_up_max - Find idmap via binary search in ordered idmap array.
|
||||
* Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS.
|
||||
*/
|
||||
@ -375,7 +375,7 @@ map_id_up_max(unsigned extents, struct uid_gid_map *map, u32 id)
|
||||
sizeof(struct uid_gid_extent), cmp_map_id);
|
||||
}
|
||||
|
||||
static u32 map_id_up(struct uid_gid_map *map, u32 id)
|
||||
u32 map_id_up(struct uid_gid_map *map, u32 id)
|
||||
{
|
||||
struct uid_gid_extent *extent;
|
||||
unsigned extents = map->nr_extents;
|
||||
@ -770,7 +770,7 @@ static bool mappings_overlap(struct uid_gid_map *new_map,
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
/*
|
||||
* insert_extent - Safely insert a new idmap extent into struct uid_gid_map.
|
||||
* Takes care to allocate a 4K block of memory if the number of mappings exceeds
|
||||
* UID_GID_MAP_MAX_BASE_EXTENTS.
|
||||
@ -839,7 +839,7 @@ static int cmp_extents_reverse(const void *a, const void *b)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
/*
|
||||
* sort_idmaps - Sorts an array of idmap entries.
|
||||
* Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS.
|
||||
*/
|
||||
|
@ -270,7 +270,7 @@ long watch_queue_set_size(struct pipe_inode_info *pipe, unsigned int nr_notes)
|
||||
goto error;
|
||||
|
||||
ret = -ENOMEM;
|
||||
pages = kcalloc(sizeof(struct page *), nr_pages, GFP_KERNEL);
|
||||
pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
|
||||
if (!pages)
|
||||
goto error;
|
||||
|
||||
|
@ -1141,7 +1141,7 @@ static inline struct resv_map *inode_resv_map(struct inode *inode)
|
||||
* The VERY common case is inode->mapping == &inode->i_data but,
|
||||
* this may not be true for device special inodes.
|
||||
*/
|
||||
return (struct resv_map *)(&inode->i_data)->private_data;
|
||||
return (struct resv_map *)(&inode->i_data)->i_private_data;
|
||||
}
|
||||
|
||||
static struct resv_map *vma_resv_map(struct vm_area_struct *vma)
|
||||
|
@ -4379,7 +4379,7 @@ static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap)
|
||||
* only one element of the array here.
|
||||
*/
|
||||
for (; i >= 0 && unlikely(t->entries[i].threshold > usage); i--)
|
||||
eventfd_signal(t->entries[i].eventfd, 1);
|
||||
eventfd_signal(t->entries[i].eventfd);
|
||||
|
||||
/* i = current_threshold + 1 */
|
||||
i++;
|
||||
@ -4391,7 +4391,7 @@ static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap)
|
||||
* only one element of the array here.
|
||||
*/
|
||||
for (; i < t->size && unlikely(t->entries[i].threshold <= usage); i++)
|
||||
eventfd_signal(t->entries[i].eventfd, 1);
|
||||
eventfd_signal(t->entries[i].eventfd);
|
||||
|
||||
/* Update current_threshold */
|
||||
t->current_threshold = i - 1;
|
||||
@ -4431,7 +4431,7 @@ static int mem_cgroup_oom_notify_cb(struct mem_cgroup *memcg)
|
||||
spin_lock(&memcg_oom_lock);
|
||||
|
||||
list_for_each_entry(ev, &memcg->oom_notify, list)
|
||||
eventfd_signal(ev->eventfd, 1);
|
||||
eventfd_signal(ev->eventfd);
|
||||
|
||||
spin_unlock(&memcg_oom_lock);
|
||||
return 0;
|
||||
@ -4650,7 +4650,7 @@ static int mem_cgroup_oom_register_event(struct mem_cgroup *memcg,
|
||||
|
||||
/* already in OOM ? */
|
||||
if (memcg->under_oom)
|
||||
eventfd_signal(eventfd, 1);
|
||||
eventfd_signal(eventfd);
|
||||
spin_unlock(&memcg_oom_lock);
|
||||
|
||||
return 0;
|
||||
@ -4942,7 +4942,7 @@ static void memcg_event_remove(struct work_struct *work)
|
||||
event->unregister_event(memcg, event->eventfd);
|
||||
|
||||
/* Notify userspace the event is going away. */
|
||||
eventfd_signal(event->eventfd, 1);
|
||||
eventfd_signal(event->eventfd);
|
||||
|
||||
eventfd_ctx_put(event->eventfd);
|
||||
kfree(event);
|
||||
|
@ -753,7 +753,7 @@ static int __buffer_migrate_folio(struct address_space *mapping,
|
||||
|
||||
recheck_buffers:
|
||||
busy = false;
|
||||
spin_lock(&mapping->private_lock);
|
||||
spin_lock(&mapping->i_private_lock);
|
||||
bh = head;
|
||||
do {
|
||||
if (atomic_read(&bh->b_count)) {
|
||||
@ -767,7 +767,7 @@ recheck_buffers:
|
||||
rc = -EAGAIN;
|
||||
goto unlock_buffers;
|
||||
}
|
||||
spin_unlock(&mapping->private_lock);
|
||||
spin_unlock(&mapping->i_private_lock);
|
||||
invalidate_bh_lrus();
|
||||
invalidated = true;
|
||||
goto recheck_buffers;
|
||||
@ -794,7 +794,7 @@ recheck_buffers:
|
||||
rc = MIGRATEPAGE_SUCCESS;
|
||||
unlock_buffers:
|
||||
if (check_refs)
|
||||
spin_unlock(&mapping->private_lock);
|
||||
spin_unlock(&mapping->i_private_lock);
|
||||
bh = head;
|
||||
do {
|
||||
unlock_buffer(bh);
|
||||
|
@ -169,7 +169,7 @@ static bool vmpressure_event(struct vmpressure *vmpr,
|
||||
continue;
|
||||
if (level < ev->level)
|
||||
continue;
|
||||
eventfd_signal(ev->efd, 1);
|
||||
eventfd_signal(ev->efd);
|
||||
ret = true;
|
||||
}
|
||||
mutex_unlock(&vmpr->events_lock);
|
||||
|
@ -297,7 +297,7 @@ void scm_detach_fds_compat(struct msghdr *msg, struct scm_cookie *scm)
|
||||
int err = 0, i;
|
||||
|
||||
for (i = 0; i < fdmax; i++) {
|
||||
err = receive_fd_user(scm->fp->fp[i], cmsg_data + i, o_flags);
|
||||
err = scm_recv_one_fd(scm->fp->fp[i], cmsg_data + i, o_flags);
|
||||
if (err < 0)
|
||||
break;
|
||||
}
|
||||
|
@ -325,7 +325,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
|
||||
}
|
||||
|
||||
for (i = 0; i < fdmax; i++) {
|
||||
err = receive_fd_user(scm->fp->fp[i], cmsg_data + i, o_flags);
|
||||
err = scm_recv_one_fd(scm->fp->fp[i], cmsg_data + i, o_flags);
|
||||
if (err < 0)
|
||||
break;
|
||||
}
|
||||
|
@ -234,10 +234,10 @@ static void mtty_trigger_interrupt(struct mdev_state *mdev_state)
|
||||
|
||||
if (is_msi(mdev_state)) {
|
||||
if (mdev_state->msi_evtfd)
|
||||
eventfd_signal(mdev_state->msi_evtfd, 1);
|
||||
eventfd_signal(mdev_state->msi_evtfd);
|
||||
} else if (is_intx(mdev_state)) {
|
||||
if (mdev_state->intx_evtfd && !mdev_state->intx_mask) {
|
||||
eventfd_signal(mdev_state->intx_evtfd, 1);
|
||||
eventfd_signal(mdev_state->intx_evtfd);
|
||||
mdev_state->intx_mask = true;
|
||||
}
|
||||
}
|
||||
|
@ -26,6 +26,7 @@ TARGETS += filesystems
|
||||
TARGETS += filesystems/binderfs
|
||||
TARGETS += filesystems/epoll
|
||||
TARGETS += filesystems/fat
|
||||
TARGETS += filesystems/overlayfs
|
||||
TARGETS += firmware
|
||||
TARGETS += fpu
|
||||
TARGETS += ftrace
|
||||
|
2
tools/testing/selftests/filesystems/overlayfs/.gitignore
vendored
Normal file
2
tools/testing/selftests/filesystems/overlayfs/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
dev_in_maps
|
7
tools/testing/selftests/filesystems/overlayfs/Makefile
Normal file
7
tools/testing/selftests/filesystems/overlayfs/Makefile
Normal file
@ -0,0 +1,7 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
TEST_GEN_PROGS := dev_in_maps
|
||||
|
||||
CFLAGS := -Wall -Werror
|
||||
|
||||
include ../../lib.mk
|
182
tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c
Normal file
182
tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c
Normal file
@ -0,0 +1,182 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#define _GNU_SOURCE
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include <linux/unistd.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/mount.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/mount.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sched.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#include "../../kselftest.h"
|
||||
#include "log.h"
|
||||
|
||||
static int sys_fsopen(const char *fsname, unsigned int flags)
|
||||
{
|
||||
return syscall(__NR_fsopen, fsname, flags);
|
||||
}
|
||||
|
||||
static int sys_fsconfig(int fd, unsigned int cmd, const char *key, const char *value, int aux)
|
||||
{
|
||||
return syscall(__NR_fsconfig, fd, cmd, key, value, aux);
|
||||
}
|
||||
|
||||
static int sys_fsmount(int fd, unsigned int flags, unsigned int attr_flags)
|
||||
{
|
||||
return syscall(__NR_fsmount, fd, flags, attr_flags);
|
||||
}
|
||||
|
||||
static int sys_move_mount(int from_dfd, const char *from_pathname,
|
||||
int to_dfd, const char *to_pathname,
|
||||
unsigned int flags)
|
||||
{
|
||||
return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, to_pathname, flags);
|
||||
}
|
||||
|
||||
static long get_file_dev_and_inode(void *addr, struct statx *stx)
|
||||
{
|
||||
char buf[4096];
|
||||
FILE *mapf;
|
||||
|
||||
mapf = fopen("/proc/self/maps", "r");
|
||||
if (mapf == NULL)
|
||||
return pr_perror("fopen(/proc/self/maps)");
|
||||
|
||||
while (fgets(buf, sizeof(buf), mapf)) {
|
||||
unsigned long start, end;
|
||||
uint32_t maj, min;
|
||||
__u64 ino;
|
||||
|
||||
if (sscanf(buf, "%lx-%lx %*s %*s %x:%x %llu",
|
||||
&start, &end, &maj, &min, &ino) != 5)
|
||||
return pr_perror("unable to parse: %s", buf);
|
||||
if (start == (unsigned long)addr) {
|
||||
stx->stx_dev_major = maj;
|
||||
stx->stx_dev_minor = min;
|
||||
stx->stx_ino = ino;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return pr_err("unable to find the mapping");
|
||||
}
|
||||
|
||||
static int ovl_mount(void)
|
||||
{
|
||||
int tmpfs, fsfd, ovl;
|
||||
|
||||
fsfd = sys_fsopen("tmpfs", 0);
|
||||
if (fsfd == -1)
|
||||
return pr_perror("fsopen(tmpfs)");
|
||||
|
||||
if (sys_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0) == -1)
|
||||
return pr_perror("FSCONFIG_CMD_CREATE");
|
||||
|
||||
tmpfs = sys_fsmount(fsfd, 0, 0);
|
||||
if (tmpfs == -1)
|
||||
return pr_perror("fsmount");
|
||||
|
||||
close(fsfd);
|
||||
|
||||
/* overlayfs can't be constructed on top of a detached mount. */
|
||||
if (sys_move_mount(tmpfs, "", AT_FDCWD, "/tmp", MOVE_MOUNT_F_EMPTY_PATH))
|
||||
return pr_perror("move_mount");
|
||||
close(tmpfs);
|
||||
|
||||
if (mkdir("/tmp/w", 0755) == -1 ||
|
||||
mkdir("/tmp/u", 0755) == -1 ||
|
||||
mkdir("/tmp/l", 0755) == -1)
|
||||
return pr_perror("mkdir");
|
||||
|
||||
fsfd = sys_fsopen("overlay", 0);
|
||||
if (fsfd == -1)
|
||||
return pr_perror("fsopen(overlay)");
|
||||
if (sys_fsconfig(fsfd, FSCONFIG_SET_STRING, "source", "test", 0) == -1 ||
|
||||
sys_fsconfig(fsfd, FSCONFIG_SET_STRING, "lowerdir", "/tmp/l", 0) == -1 ||
|
||||
sys_fsconfig(fsfd, FSCONFIG_SET_STRING, "upperdir", "/tmp/u", 0) == -1 ||
|
||||
sys_fsconfig(fsfd, FSCONFIG_SET_STRING, "workdir", "/tmp/w", 0) == -1)
|
||||
return pr_perror("fsconfig");
|
||||
if (sys_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0) == -1)
|
||||
return pr_perror("fsconfig");
|
||||
ovl = sys_fsmount(fsfd, 0, 0);
|
||||
if (ovl == -1)
|
||||
return pr_perror("fsmount");
|
||||
|
||||
return ovl;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that the file device and inode shown in /proc/pid/maps match values
|
||||
* returned by stat(2).
|
||||
*/
|
||||
static int test(void)
|
||||
{
|
||||
struct statx stx, mstx;
|
||||
int ovl, fd;
|
||||
void *addr;
|
||||
|
||||
ovl = ovl_mount();
|
||||
if (ovl == -1)
|
||||
return -1;
|
||||
|
||||
fd = openat(ovl, "test", O_RDWR | O_CREAT, 0644);
|
||||
if (fd == -1)
|
||||
return pr_perror("openat");
|
||||
|
||||
addr = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_FILE | MAP_SHARED, fd, 0);
|
||||
if (addr == MAP_FAILED)
|
||||
return pr_perror("mmap");
|
||||
|
||||
if (get_file_dev_and_inode(addr, &mstx))
|
||||
return -1;
|
||||
if (statx(fd, "", AT_EMPTY_PATH | AT_STATX_SYNC_AS_STAT, STATX_INO, &stx))
|
||||
return pr_perror("statx");
|
||||
|
||||
if (stx.stx_dev_major != mstx.stx_dev_major ||
|
||||
stx.stx_dev_minor != mstx.stx_dev_minor ||
|
||||
stx.stx_ino != mstx.stx_ino)
|
||||
return pr_fail("unmatched dev:ino %x:%x:%llx (expected %x:%x:%llx)\n",
|
||||
mstx.stx_dev_major, mstx.stx_dev_minor, mstx.stx_ino,
|
||||
stx.stx_dev_major, stx.stx_dev_minor, stx.stx_ino);
|
||||
|
||||
ksft_test_result_pass("devices are matched\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int fsfd;
|
||||
|
||||
fsfd = sys_fsopen("overlay", 0);
|
||||
if (fsfd == -1) {
|
||||
ksft_test_result_skip("unable to create overlay mount\n");
|
||||
return 1;
|
||||
}
|
||||
close(fsfd);
|
||||
|
||||
/* Create a new mount namespace to not care about cleaning test mounts. */
|
||||
if (unshare(CLONE_NEWNS) == -1) {
|
||||
ksft_test_result_skip("unable to create a new mount namespace\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) == -1) {
|
||||
pr_perror("mount");
|
||||
return 1;
|
||||
}
|
||||
|
||||
ksft_set_plan(1);
|
||||
|
||||
if (test())
|
||||
return 1;
|
||||
|
||||
ksft_exit_pass();
|
||||
return 0;
|
||||
}
|
26
tools/testing/selftests/filesystems/overlayfs/log.h
Normal file
26
tools/testing/selftests/filesystems/overlayfs/log.h
Normal file
@ -0,0 +1,26 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef __SELFTEST_TIMENS_LOG_H__
|
||||
#define __SELFTEST_TIMENS_LOG_H__
|
||||
|
||||
#define pr_msg(fmt, lvl, ...) \
|
||||
ksft_print_msg("[%s] (%s:%d)\t" fmt "\n", \
|
||||
lvl, __FILE__, __LINE__, ##__VA_ARGS__)
|
||||
|
||||
#define pr_p(func, fmt, ...) func(fmt ": %m", ##__VA_ARGS__)
|
||||
|
||||
#define pr_err(fmt, ...) \
|
||||
({ \
|
||||
ksft_test_result_error(fmt "\n", ##__VA_ARGS__); \
|
||||
-1; \
|
||||
})
|
||||
|
||||
#define pr_fail(fmt, ...) \
|
||||
({ \
|
||||
ksft_test_result_fail(fmt, ##__VA_ARGS__); \
|
||||
-1; \
|
||||
})
|
||||
|
||||
#define pr_perror(fmt, ...) pr_p(pr_err, fmt, ##__VA_ARGS__)
|
||||
|
||||
#endif
|
@ -61,7 +61,7 @@ static void irqfd_resampler_notify(struct kvm_kernel_irqfd_resampler *resampler)
|
||||
|
||||
list_for_each_entry_srcu(irqfd, &resampler->list, resampler_link,
|
||||
srcu_read_lock_held(&resampler->kvm->irq_srcu))
|
||||
eventfd_signal(irqfd->resamplefd, 1);
|
||||
eventfd_signal(irqfd->resamplefd);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -786,7 +786,7 @@ ioeventfd_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
|
||||
if (!ioeventfd_in_range(p, addr, len, val))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
eventfd_signal(p->eventfd, 1);
|
||||
eventfd_signal(p->eventfd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user