a33121e548
In a case when a ptp chardev (like /dev/ptp0) is open but an underlying device is removed, closing this file leads to a race. This reproduces easily in a kvm virtual machine: ts# cat openptp0.c int main() { ... fp = fopen("/dev/ptp0", "r"); ... sleep(10); } ts# uname -r 5.5.0-rc3-46cf053e ts# cat /proc/cmdline ... slub_debug=FZP ts# modprobe ptp_kvm ts# ./openptp0 & [1] 670 opened /dev/ptp0, sleeping 10s... ts# rmmod ptp_kvm ts# ls /dev/ptp* ls: cannot access '/dev/ptp*': No such file or directory ts# ...woken up [ 48.010809] general protection fault: 0000 [#1] SMP [ 48.012502] CPU: 6 PID: 658 Comm: openptp0 Not tainted 5.5.0-rc3-46cf053e #25 [ 48.014624] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), ... [ 48.016270] RIP: 0010:module_put.part.0+0x7/0x80 [ 48.017939] RSP: 0018:ffffb3850073be00 EFLAGS: 00010202 [ 48.018339] RAX: 000000006b6b6b6b RBX: 6b6b6b6b6b6b6b6b RCX: ffff89a476c00ad0 [ 48.018936] RDX: fffff65a08d3ea08 RSI: 0000000000000247 RDI: 6b6b6b6b6b6b6b6b [ 48.019470] ... ^^^ a slub poison [ 48.023854] Call Trace: [ 48.024050] __fput+0x21f/0x240 [ 48.024288] task_work_run+0x79/0x90 [ 48.024555] do_exit+0x2af/0xab0 [ 48.024799] ? vfs_write+0x16a/0x190 [ 48.025082] do_group_exit+0x35/0x90 [ 48.025387] __x64_sys_exit_group+0xf/0x10 [ 48.025737] do_syscall_64+0x3d/0x130 [ 48.026056] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 48.026479] RIP: 0033:0x7f53b12082f6 [ 48.026792] ... [ 48.030945] Modules linked in: ptp i6300esb watchdog [last unloaded: ptp_kvm] [ 48.045001] Fixing recursive fault but reboot is needed! This happens in: static void __fput(struct file *file) { ... if (file->f_op->release) file->f_op->release(inode, file); <<< cdev is kfree'd here if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && !(mode & FMODE_PATH))) { cdev_put(inode->i_cdev); <<< cdev fields are accessed here Namely: __fput() posix_clock_release() kref_put(&clk->kref, delete_clock) <<< the last reference delete_clock() delete_ptp_clock() kfree(ptp) <<< cdev is embedded in ptp cdev_put module_put(p->owner) <<< *p is kfree'd, bang! Here cdev is embedded in posix_clock which is embedded in ptp_clock. The race happens because ptp_clock's lifetime is controlled by two refcounts: kref and cdev.kobj in posix_clock. This is wrong. Make ptp_clock's sysfs device a parent of cdev with cdev_device_add() created especially for such cases. This way the parent device with its ptp_clock is not released until all references to the cdev are released. This adds a requirement that an initialized but not exposed struct device should be provided to posix_clock_register() by a caller instead of a simple dev_t. This approach was adopted from the commit 72139dfa2464 ("watchdog: Fix the race between the release of watchdog_core_data and cdev"). See details of the implementation in the commit 233ed09d7fda ("chardev: add helper function to register char devs with a struct device"). Link: https://lore.kernel.org/linux-fsdevel/20191125125342.6189-1-vdronov@redhat.com/T/#u Analyzed-by: Stephen Johnston <sjohnsto@redhat.com> Analyzed-by: Vern Lovejoy <vlovejoy@redhat.com> Signed-off-by: Vladis Dronov <vdronov@redhat.com> Acked-by: Richard Cochran <richardcochran@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
318 lines
5.6 KiB
C
318 lines
5.6 KiB
C
// SPDX-License-Identifier: GPL-2.0+
|
|
/*
|
|
* Support for dynamic clock devices
|
|
*
|
|
* Copyright (C) 2010 OMICRON electronics GmbH
|
|
*/
|
|
#include <linux/device.h>
|
|
#include <linux/export.h>
|
|
#include <linux/file.h>
|
|
#include <linux/posix-clock.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/syscalls.h>
|
|
#include <linux/uaccess.h>
|
|
|
|
#include "posix-timers.h"
|
|
|
|
/*
|
|
* Returns NULL if the posix_clock instance attached to 'fp' is old and stale.
|
|
*/
|
|
static struct posix_clock *get_posix_clock(struct file *fp)
|
|
{
|
|
struct posix_clock *clk = fp->private_data;
|
|
|
|
down_read(&clk->rwsem);
|
|
|
|
if (!clk->zombie)
|
|
return clk;
|
|
|
|
up_read(&clk->rwsem);
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static void put_posix_clock(struct posix_clock *clk)
|
|
{
|
|
up_read(&clk->rwsem);
|
|
}
|
|
|
|
static ssize_t posix_clock_read(struct file *fp, char __user *buf,
|
|
size_t count, loff_t *ppos)
|
|
{
|
|
struct posix_clock *clk = get_posix_clock(fp);
|
|
int err = -EINVAL;
|
|
|
|
if (!clk)
|
|
return -ENODEV;
|
|
|
|
if (clk->ops.read)
|
|
err = clk->ops.read(clk, fp->f_flags, buf, count);
|
|
|
|
put_posix_clock(clk);
|
|
|
|
return err;
|
|
}
|
|
|
|
static __poll_t posix_clock_poll(struct file *fp, poll_table *wait)
|
|
{
|
|
struct posix_clock *clk = get_posix_clock(fp);
|
|
__poll_t result = 0;
|
|
|
|
if (!clk)
|
|
return EPOLLERR;
|
|
|
|
if (clk->ops.poll)
|
|
result = clk->ops.poll(clk, fp, wait);
|
|
|
|
put_posix_clock(clk);
|
|
|
|
return result;
|
|
}
|
|
|
|
static long posix_clock_ioctl(struct file *fp,
|
|
unsigned int cmd, unsigned long arg)
|
|
{
|
|
struct posix_clock *clk = get_posix_clock(fp);
|
|
int err = -ENOTTY;
|
|
|
|
if (!clk)
|
|
return -ENODEV;
|
|
|
|
if (clk->ops.ioctl)
|
|
err = clk->ops.ioctl(clk, cmd, arg);
|
|
|
|
put_posix_clock(clk);
|
|
|
|
return err;
|
|
}
|
|
|
|
#ifdef CONFIG_COMPAT
|
|
static long posix_clock_compat_ioctl(struct file *fp,
|
|
unsigned int cmd, unsigned long arg)
|
|
{
|
|
struct posix_clock *clk = get_posix_clock(fp);
|
|
int err = -ENOTTY;
|
|
|
|
if (!clk)
|
|
return -ENODEV;
|
|
|
|
if (clk->ops.ioctl)
|
|
err = clk->ops.ioctl(clk, cmd, arg);
|
|
|
|
put_posix_clock(clk);
|
|
|
|
return err;
|
|
}
|
|
#endif
|
|
|
|
static int posix_clock_open(struct inode *inode, struct file *fp)
|
|
{
|
|
int err;
|
|
struct posix_clock *clk =
|
|
container_of(inode->i_cdev, struct posix_clock, cdev);
|
|
|
|
down_read(&clk->rwsem);
|
|
|
|
if (clk->zombie) {
|
|
err = -ENODEV;
|
|
goto out;
|
|
}
|
|
if (clk->ops.open)
|
|
err = clk->ops.open(clk, fp->f_mode);
|
|
else
|
|
err = 0;
|
|
|
|
if (!err) {
|
|
get_device(clk->dev);
|
|
fp->private_data = clk;
|
|
}
|
|
out:
|
|
up_read(&clk->rwsem);
|
|
return err;
|
|
}
|
|
|
|
static int posix_clock_release(struct inode *inode, struct file *fp)
|
|
{
|
|
struct posix_clock *clk = fp->private_data;
|
|
int err = 0;
|
|
|
|
if (clk->ops.release)
|
|
err = clk->ops.release(clk);
|
|
|
|
put_device(clk->dev);
|
|
|
|
fp->private_data = NULL;
|
|
|
|
return err;
|
|
}
|
|
|
|
static const struct file_operations posix_clock_file_operations = {
|
|
.owner = THIS_MODULE,
|
|
.llseek = no_llseek,
|
|
.read = posix_clock_read,
|
|
.poll = posix_clock_poll,
|
|
.unlocked_ioctl = posix_clock_ioctl,
|
|
.open = posix_clock_open,
|
|
.release = posix_clock_release,
|
|
#ifdef CONFIG_COMPAT
|
|
.compat_ioctl = posix_clock_compat_ioctl,
|
|
#endif
|
|
};
|
|
|
|
int posix_clock_register(struct posix_clock *clk, struct device *dev)
|
|
{
|
|
int err;
|
|
|
|
init_rwsem(&clk->rwsem);
|
|
|
|
cdev_init(&clk->cdev, &posix_clock_file_operations);
|
|
err = cdev_device_add(&clk->cdev, dev);
|
|
if (err) {
|
|
pr_err("%s unable to add device %d:%d\n",
|
|
dev_name(dev), MAJOR(dev->devt), MINOR(dev->devt));
|
|
return err;
|
|
}
|
|
clk->cdev.owner = clk->ops.owner;
|
|
clk->dev = dev;
|
|
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(posix_clock_register);
|
|
|
|
void posix_clock_unregister(struct posix_clock *clk)
|
|
{
|
|
cdev_device_del(&clk->cdev, clk->dev);
|
|
|
|
down_write(&clk->rwsem);
|
|
clk->zombie = true;
|
|
up_write(&clk->rwsem);
|
|
|
|
put_device(clk->dev);
|
|
}
|
|
EXPORT_SYMBOL_GPL(posix_clock_unregister);
|
|
|
|
struct posix_clock_desc {
|
|
struct file *fp;
|
|
struct posix_clock *clk;
|
|
};
|
|
|
|
static int get_clock_desc(const clockid_t id, struct posix_clock_desc *cd)
|
|
{
|
|
struct file *fp = fget(clockid_to_fd(id));
|
|
int err = -EINVAL;
|
|
|
|
if (!fp)
|
|
return err;
|
|
|
|
if (fp->f_op->open != posix_clock_open || !fp->private_data)
|
|
goto out;
|
|
|
|
cd->fp = fp;
|
|
cd->clk = get_posix_clock(fp);
|
|
|
|
err = cd->clk ? 0 : -ENODEV;
|
|
out:
|
|
if (err)
|
|
fput(fp);
|
|
return err;
|
|
}
|
|
|
|
static void put_clock_desc(struct posix_clock_desc *cd)
|
|
{
|
|
put_posix_clock(cd->clk);
|
|
fput(cd->fp);
|
|
}
|
|
|
|
static int pc_clock_adjtime(clockid_t id, struct __kernel_timex *tx)
|
|
{
|
|
struct posix_clock_desc cd;
|
|
int err;
|
|
|
|
err = get_clock_desc(id, &cd);
|
|
if (err)
|
|
return err;
|
|
|
|
if ((cd.fp->f_mode & FMODE_WRITE) == 0) {
|
|
err = -EACCES;
|
|
goto out;
|
|
}
|
|
|
|
if (cd.clk->ops.clock_adjtime)
|
|
err = cd.clk->ops.clock_adjtime(cd.clk, tx);
|
|
else
|
|
err = -EOPNOTSUPP;
|
|
out:
|
|
put_clock_desc(&cd);
|
|
|
|
return err;
|
|
}
|
|
|
|
static int pc_clock_gettime(clockid_t id, struct timespec64 *ts)
|
|
{
|
|
struct posix_clock_desc cd;
|
|
int err;
|
|
|
|
err = get_clock_desc(id, &cd);
|
|
if (err)
|
|
return err;
|
|
|
|
if (cd.clk->ops.clock_gettime)
|
|
err = cd.clk->ops.clock_gettime(cd.clk, ts);
|
|
else
|
|
err = -EOPNOTSUPP;
|
|
|
|
put_clock_desc(&cd);
|
|
|
|
return err;
|
|
}
|
|
|
|
static int pc_clock_getres(clockid_t id, struct timespec64 *ts)
|
|
{
|
|
struct posix_clock_desc cd;
|
|
int err;
|
|
|
|
err = get_clock_desc(id, &cd);
|
|
if (err)
|
|
return err;
|
|
|
|
if (cd.clk->ops.clock_getres)
|
|
err = cd.clk->ops.clock_getres(cd.clk, ts);
|
|
else
|
|
err = -EOPNOTSUPP;
|
|
|
|
put_clock_desc(&cd);
|
|
|
|
return err;
|
|
}
|
|
|
|
static int pc_clock_settime(clockid_t id, const struct timespec64 *ts)
|
|
{
|
|
struct posix_clock_desc cd;
|
|
int err;
|
|
|
|
err = get_clock_desc(id, &cd);
|
|
if (err)
|
|
return err;
|
|
|
|
if ((cd.fp->f_mode & FMODE_WRITE) == 0) {
|
|
err = -EACCES;
|
|
goto out;
|
|
}
|
|
|
|
if (cd.clk->ops.clock_settime)
|
|
err = cd.clk->ops.clock_settime(cd.clk, ts);
|
|
else
|
|
err = -EOPNOTSUPP;
|
|
out:
|
|
put_clock_desc(&cd);
|
|
|
|
return err;
|
|
}
|
|
|
|
const struct k_clock clock_posix_dynamic = {
|
|
.clock_getres = pc_clock_getres,
|
|
.clock_set = pc_clock_settime,
|
|
.clock_get = pc_clock_gettime,
|
|
.clock_adj = pc_clock_adjtime,
|
|
};
|