linux/drivers/ptp/ptp_private.h

143 lines
3.9 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* PTP 1588 clock support - private declarations for the core module.
*
* Copyright (C) 2010 OMICRON electronics GmbH
*/
#ifndef _PTP_PRIVATE_H_
#define _PTP_PRIVATE_H_
#include <linux/cdev.h>
#include <linux/device.h>
ptp: introduce ptp auxiliary worker Many PTP drivers required to perform some asynchronous or periodic work, like periodically handling PHC counter overflow or handle delayed timestamp for RX/TX network packets. In most of the cases, such work is implemented using workqueues. Unfortunately, Kernel workqueues might introduce significant delay in work scheduling under high system load and on -RT, which could cause misbehavior of PTP drivers due to internal counter overflow, for example, and there is no way to tune its execution policy and priority manuallly. Hence, The kthread_worker can be used insted of workqueues, as it create separte named kthread for each worker and its its execution policy and priority can be configured using chrt tool. This prblem was reported for two drivers TI CPSW CPTS and dp83640, so instead of modifying each of these driver it was proposed to add PTP auxiliary worker to the PHC subsystem. The patch adds PTP auxiliary worker in PHC subsystem using kthread_worker and kthread_delayed_work and introduces two new PHC subsystem APIs: - long (*do_aux_work)(struct ptp_clock_info *ptp) callback in ptp_clock_info structure, which driver should assign if it require to perform asynchronous or periodic work. Driver should return the delay of the PTP next auxiliary work scheduling time (>=0) or negative value in case further scheduling is not required. - int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay) which allows schedule PTP auxiliary work. The name of kthread_worker thread corresponds PTP PHC device name "ptp%d". Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2017-07-29 01:30:02 +03:00
#include <linux/kthread.h>
#include <linux/mutex.h>
#include <linux/posix-clock.h>
#include <linux/ptp_clock.h>
#include <linux/ptp_clock_kernel.h>
#include <linux/time.h>
#define PTP_MAX_TIMESTAMPS 128
#define PTP_BUF_TIMESTAMPS 30
#define PTP_DEFAULT_MAX_VCLOCKS 20
struct timestamp_event_queue {
struct ptp_extts_event buf[PTP_MAX_TIMESTAMPS];
int head;
int tail;
spinlock_t lock;
};
struct ptp_clock {
struct posix_clock clock;
ptp: fix the race between the release of ptp_clock and cdev In a case when a ptp chardev (like /dev/ptp0) is open but an underlying device is removed, closing this file leads to a race. This reproduces easily in a kvm virtual machine: ts# cat openptp0.c int main() { ... fp = fopen("/dev/ptp0", "r"); ... sleep(10); } ts# uname -r 5.5.0-rc3-46cf053e ts# cat /proc/cmdline ... slub_debug=FZP ts# modprobe ptp_kvm ts# ./openptp0 & [1] 670 opened /dev/ptp0, sleeping 10s... ts# rmmod ptp_kvm ts# ls /dev/ptp* ls: cannot access '/dev/ptp*': No such file or directory ts# ...woken up [ 48.010809] general protection fault: 0000 [#1] SMP [ 48.012502] CPU: 6 PID: 658 Comm: openptp0 Not tainted 5.5.0-rc3-46cf053e #25 [ 48.014624] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), ... [ 48.016270] RIP: 0010:module_put.part.0+0x7/0x80 [ 48.017939] RSP: 0018:ffffb3850073be00 EFLAGS: 00010202 [ 48.018339] RAX: 000000006b6b6b6b RBX: 6b6b6b6b6b6b6b6b RCX: ffff89a476c00ad0 [ 48.018936] RDX: fffff65a08d3ea08 RSI: 0000000000000247 RDI: 6b6b6b6b6b6b6b6b [ 48.019470] ... ^^^ a slub poison [ 48.023854] Call Trace: [ 48.024050] __fput+0x21f/0x240 [ 48.024288] task_work_run+0x79/0x90 [ 48.024555] do_exit+0x2af/0xab0 [ 48.024799] ? vfs_write+0x16a/0x190 [ 48.025082] do_group_exit+0x35/0x90 [ 48.025387] __x64_sys_exit_group+0xf/0x10 [ 48.025737] do_syscall_64+0x3d/0x130 [ 48.026056] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 48.026479] RIP: 0033:0x7f53b12082f6 [ 48.026792] ... [ 48.030945] Modules linked in: ptp i6300esb watchdog [last unloaded: ptp_kvm] [ 48.045001] Fixing recursive fault but reboot is needed! This happens in: static void __fput(struct file *file) { ... if (file->f_op->release) file->f_op->release(inode, file); <<< cdev is kfree'd here if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && !(mode & FMODE_PATH))) { cdev_put(inode->i_cdev); <<< cdev fields are accessed here Namely: __fput() posix_clock_release() kref_put(&clk->kref, delete_clock) <<< the last reference delete_clock() delete_ptp_clock() kfree(ptp) <<< cdev is embedded in ptp cdev_put module_put(p->owner) <<< *p is kfree'd, bang! Here cdev is embedded in posix_clock which is embedded in ptp_clock. The race happens because ptp_clock's lifetime is controlled by two refcounts: kref and cdev.kobj in posix_clock. This is wrong. Make ptp_clock's sysfs device a parent of cdev with cdev_device_add() created especially for such cases. This way the parent device with its ptp_clock is not released until all references to the cdev are released. This adds a requirement that an initialized but not exposed struct device should be provided to posix_clock_register() by a caller instead of a simple dev_t. This approach was adopted from the commit 72139dfa2464 ("watchdog: Fix the race between the release of watchdog_core_data and cdev"). See details of the implementation in the commit 233ed09d7fda ("chardev: add helper function to register char devs with a struct device"). Link: https://lore.kernel.org/linux-fsdevel/20191125125342.6189-1-vdronov@redhat.com/T/#u Analyzed-by: Stephen Johnston <sjohnsto@redhat.com> Analyzed-by: Vern Lovejoy <vlovejoy@redhat.com> Signed-off-by: Vladis Dronov <vdronov@redhat.com> Acked-by: Richard Cochran <richardcochran@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-12-27 05:26:27 +03:00
struct device dev;
struct ptp_clock_info *info;
dev_t devid;
int index; /* index into clocks.map */
struct pps_device *pps_source;
long dialed_frequency; /* remembers the frequency adjustment */
struct timestamp_event_queue tsevq; /* simple fifo for time stamps */
struct mutex tsevq_mux; /* one process at a time reading the fifo */
struct mutex pincfg_mux; /* protect concurrent info->pin_config access */
wait_queue_head_t tsev_wq;
int defunct; /* tells readers to go away when clock is being removed */
struct device_attribute *pin_dev_attr;
struct attribute **pin_attr;
struct attribute_group pin_attr_group;
/* 1st entry is a pointer to the real group, 2nd is NULL terminator */
const struct attribute_group *pin_attr_groups[2];
ptp: introduce ptp auxiliary worker Many PTP drivers required to perform some asynchronous or periodic work, like periodically handling PHC counter overflow or handle delayed timestamp for RX/TX network packets. In most of the cases, such work is implemented using workqueues. Unfortunately, Kernel workqueues might introduce significant delay in work scheduling under high system load and on -RT, which could cause misbehavior of PTP drivers due to internal counter overflow, for example, and there is no way to tune its execution policy and priority manuallly. Hence, The kthread_worker can be used insted of workqueues, as it create separte named kthread for each worker and its its execution policy and priority can be configured using chrt tool. This prblem was reported for two drivers TI CPSW CPTS and dp83640, so instead of modifying each of these driver it was proposed to add PTP auxiliary worker to the PHC subsystem. The patch adds PTP auxiliary worker in PHC subsystem using kthread_worker and kthread_delayed_work and introduces two new PHC subsystem APIs: - long (*do_aux_work)(struct ptp_clock_info *ptp) callback in ptp_clock_info structure, which driver should assign if it require to perform asynchronous or periodic work. Driver should return the delay of the PTP next auxiliary work scheduling time (>=0) or negative value in case further scheduling is not required. - int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay) which allows schedule PTP auxiliary work. The name of kthread_worker thread corresponds PTP PHC device name "ptp%d". Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2017-07-29 01:30:02 +03:00
struct kthread_worker *kworker;
struct kthread_delayed_work aux_work;
unsigned int max_vclocks;
unsigned int n_vclocks;
int *vclock_index;
struct mutex n_vclocks_mux; /* protect concurrent n_vclocks access */
bool is_virtual_clock;
bool has_cycles;
};
#define info_to_vclock(d) container_of((d), struct ptp_vclock, info)
#define cc_to_vclock(d) container_of((d), struct ptp_vclock, cc)
#define dw_to_vclock(d) container_of((d), struct ptp_vclock, refresh_work)
struct ptp_vclock {
struct ptp_clock *pclock;
struct ptp_clock_info info;
struct ptp_clock *clock;
struct hlist_node vclock_hash_node;
struct cyclecounter cc;
struct timecounter tc;
ptp: vclock: use mutex to fix "sleep on atomic" bug vclocks were using spinlocks to protect access to its timecounter and cyclecounter. Access to timecounter/cyclecounter is backed by the same driver callbacks that are used for non-virtual PHCs, but the usage of the spinlock imposes a new limitation that didn't exist previously: now they're called in atomic context so they mustn't sleep. Some drivers like sfc or ice may sleep on these callbacks, causing errors like "BUG: scheduling while atomic: ptp5/25223/0x00000002" Fix it replacing the vclock's spinlock by a mutex. It fix the mentioned bug and it doesn't introduce longer delays. I've tested synchronizing various different combinations of clocks: - vclock->sysclock - sysclock->vclock - vclock->vclock - hardware PHC in different NIC -> vclock - created 4 vclocks and launch 4 parallel phc2sys processes with lockdep enabled In all cases, comparing the delays reported by phc2sys, they are in the same range of values than before applying the patch. Link: https://lore.kernel.org/netdev/69d0ff33-bd32-6aa5-d36c-fbdc3c01337c@redhat.com/ Fixes: 5d43f951b1ac ("ptp: add ptp virtual clock driver framework") Reported-by: Yalin Li <yalli@redhat.com> Suggested-by: Richard Cochran <richardcochran@gmail.com> Tested-by: Miroslav Lichvar <mlichvar@redhat.com> Signed-off-by: Íñigo Huguet <ihuguet@redhat.com> Acked-by: Richard Cochran <richardcochran@gmail.com> Link: https://lore.kernel.org/r/20230221130616.21837-1-ihuguet@redhat.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-02-21 16:06:16 +03:00
struct mutex lock; /* protects tc/cc */
};
/*
* The function queue_cnt() is safe for readers to call without
* holding q->lock. Readers use this function to verify that the queue
* is nonempty before proceeding with a dequeue operation. The fact
* that a writer might concurrently increment the tail does not
* matter, since the queue remains nonempty nonetheless.
*/
static inline int queue_cnt(struct timestamp_event_queue *q)
{
int cnt = q->tail - q->head;
return cnt < 0 ? PTP_MAX_TIMESTAMPS + cnt : cnt;
}
/* Check if ptp virtual clock is in use */
static inline bool ptp_vclock_in_use(struct ptp_clock *ptp)
{
bool in_use = false;
if (mutex_lock_interruptible(&ptp->n_vclocks_mux))
return true;
if (!ptp->is_virtual_clock && ptp->n_vclocks)
in_use = true;
mutex_unlock(&ptp->n_vclocks_mux);
return in_use;
}
/* Check if ptp clock shall be free running */
static inline bool ptp_clock_freerun(struct ptp_clock *ptp)
{
if (ptp->has_cycles)
return false;
return ptp_vclock_in_use(ptp);
}
extern struct class *ptp_class;
/*
* see ptp_chardev.c
*/
/* caller must hold pincfg_mux */
int ptp_set_pinfunc(struct ptp_clock *ptp, unsigned int pin,
enum ptp_pin_function func, unsigned int chan);
long ptp_ioctl(struct posix_clock *pc,
unsigned int cmd, unsigned long arg);
int ptp_open(struct posix_clock *pc, fmode_t fmode);
ssize_t ptp_read(struct posix_clock *pc,
uint flags, char __user *buf, size_t cnt);
__poll_t ptp_poll(struct posix_clock *pc,
struct file *fp, poll_table *wait);
/*
* see ptp_sysfs.c
*/
extern const struct attribute_group *ptp_groups[];
int ptp_populate_pin_groups(struct ptp_clock *ptp);
void ptp_cleanup_pin_groups(struct ptp_clock *ptp);
struct ptp_vclock *ptp_vclock_register(struct ptp_clock *pclock);
void ptp_vclock_unregister(struct ptp_vclock *vclock);
#endif