From b7a7ce1bb77b19ff2859d365da96285340fbc145 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 17 May 2023 15:11:02 +0200 Subject: [PATCH 01/39] vdso/timens: Always provide arch_get_vdso_data() prototype for vdso The arch_get_vdso_data() function is defined separately on each architecture, but only called when CONFIG_TIME_NS is set. If the definition is a global function, this causes a W=1 warning without TIME_NS: arch/x86/entry/vdso/vma.c:35:19: error: no previous prototype for 'arch_get_vdso_data' [-Werror=missing-prototypes] Move the prototype out of the #ifdef block to reliably turn off that warning. Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230517131102.934196-15-arnd@kernel.org --- include/linux/time_namespace.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h index bb9d3f5542f8..03d9c5ac01d1 100644 --- a/include/linux/time_namespace.h +++ b/include/linux/time_namespace.h @@ -44,7 +44,6 @@ struct time_namespace *copy_time_ns(unsigned long flags, struct time_namespace *old_ns); void free_time_ns(struct time_namespace *ns); void timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk); -struct vdso_data *arch_get_vdso_data(void *vvar_page); struct page *find_timens_vvar_page(struct vm_area_struct *vma); static inline void put_time_ns(struct time_namespace *ns) @@ -163,4 +162,6 @@ static inline ktime_t timens_ktime_to_host(clockid_t clockid, ktime_t tim) } #endif +struct vdso_data *arch_get_vdso_data(void *vvar_page); + #endif /* _LINUX_TIMENS_H */ From 9d9e522010eb5685d8b53e8a24320653d9d4cbbf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 1 Jun 2023 22:16:34 +0200 Subject: [PATCH 02/39] posix-timers: Prevent RT livelock in itimer_delete() itimer_delete() has a retry loop when the timer is concurrently expired. On non-RT kernels this just spin-waits until the timer callback has completed, except for posix CPU timers which have HAVE_POSIX_CPU_TIMERS_TASK_WORK enabled. In that case and on RT kernels the existing task could live lock when preempting the task which does the timer delivery. Replace spin_unlock() with an invocation of timer_wait_running() to handle it the same way as the other retry loops in the posix timer code. Fixes: ec8f954a40da ("posix-timers: Use a callback for cancel synchronization on PREEMPT_RT") Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/87v8g7c50d.ffs@tglx --- kernel/time/posix-timers.c | 43 +++++++++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 808a247205a9..ed3c4a954398 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -1037,27 +1037,52 @@ retry_delete: } /* - * return timer owned by the process, used by exit_itimers + * Delete a timer if it is armed, remove it from the hash and schedule it + * for RCU freeing. */ static void itimer_delete(struct k_itimer *timer) { -retry_delete: - spin_lock_irq(&timer->it_lock); + unsigned long flags; + /* + * irqsave is required to make timer_wait_running() work. + */ + spin_lock_irqsave(&timer->it_lock, flags); + +retry_delete: + /* + * Even if the timer is not longer accessible from other tasks + * it still might be armed and queued in the underlying timer + * mechanism. Worse, that timer mechanism might run the expiry + * function concurrently. + */ if (timer_delete_hook(timer) == TIMER_RETRY) { - spin_unlock_irq(&timer->it_lock); + /* + * Timer is expired concurrently, prevent livelocks + * and pointless spinning on RT. + * + * timer_wait_running() drops timer::it_lock, which opens + * the possibility for another task to delete the timer. + * + * That's not possible here because this is invoked from + * do_exit() only for the last thread of the thread group. + * So no other task can access and delete that timer. + */ + if (WARN_ON_ONCE(timer_wait_running(timer, &flags) != timer)) + return; + goto retry_delete; } list_del(&timer->list); - spin_unlock_irq(&timer->it_lock); + spin_unlock_irqrestore(&timer->it_lock, flags); release_posix_timer(timer, IT_ID_SET); } /* - * This is called by do_exit or de_thread, only when nobody else can - * modify the signal->posix_timers list. Yet we need sighand->siglock - * to prevent the race with /proc/pid/timers. + * Invoked from do_exit() when the last thread of a thread group exits. + * At that point no other task can access the timers of the dying + * task anymore. */ void exit_itimers(struct task_struct *tsk) { @@ -1067,10 +1092,12 @@ void exit_itimers(struct task_struct *tsk) if (list_empty(&tsk->signal->posix_timers)) return; + /* Protect against concurrent read via /proc/$PID/timers */ spin_lock_irq(&tsk->sighand->siglock); list_replace_init(&tsk->signal->posix_timers, &timers); spin_unlock_irq(&tsk->sighand->siglock); + /* The timers are not longer accessible via tsk::signal */ while (!list_empty(&timers)) { tmr = list_first_entry(&timers, struct k_itimer, list); itimer_delete(tmr); From 8ce8849dd1e78dadcee0ec9acbd259d239b7069f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 1 Jun 2023 20:58:47 +0200 Subject: [PATCH 03/39] posix-timers: Ensure timer ID search-loop limit is valid posix_timer_add() tries to allocate a posix timer ID by starting from the cached ID which was stored by the last successful allocation. This is done in a loop searching the ID space for a free slot one by one. The loop has to terminate when the search wrapped around to the starting point. But that's racy vs. establishing the starting point. That is read out lockless, which leads to the following problem: CPU0 CPU1 posix_timer_add() start = sig->posix_timer_id; lock(hash_lock); ... posix_timer_add() if (++sig->posix_timer_id < 0) start = sig->posix_timer_id; sig->posix_timer_id = 0; So CPU1 can observe a negative start value, i.e. -1, and the loop break never happens because the condition can never be true: if (sig->posix_timer_id == start) break; While this is unlikely to ever turn into an endless loop as the ID space is huge (INT_MAX), the racy read of the start value caught the attention of KCSAN and Dmitry unearthed that incorrectness. Rewrite it so that all id operations are under the hash lock. Reported-by: syzbot+5c54bd3eb218bb595aa9@syzkaller.appspotmail.com Reported-by: Dmitry Vyukov Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/87bkhzdn6g.ffs@tglx --- include/linux/sched/signal.h | 2 +- kernel/time/posix-timers.c | 31 ++++++++++++++++++------------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 20099268fa25..669e8cff40c7 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -135,7 +135,7 @@ struct signal_struct { #ifdef CONFIG_POSIX_TIMERS /* POSIX.1b Interval Timers */ - int posix_timer_id; + unsigned int next_posix_timer_id; struct list_head posix_timers; /* ITIMER_REAL timer for the process */ diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index ed3c4a954398..2d6cf93ca370 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -140,25 +140,30 @@ static struct k_itimer *posix_timer_by_id(timer_t id) static int posix_timer_add(struct k_itimer *timer) { struct signal_struct *sig = current->signal; - int first_free_id = sig->posix_timer_id; struct hlist_head *head; - int ret = -ENOENT; + unsigned int cnt, id; - do { + /* + * FIXME: Replace this by a per signal struct xarray once there is + * a plan to handle the resulting CRIU regression gracefully. + */ + for (cnt = 0; cnt <= INT_MAX; cnt++) { spin_lock(&hash_lock); - head = &posix_timers_hashtable[hash(sig, sig->posix_timer_id)]; - if (!__posix_timers_find(head, sig, sig->posix_timer_id)) { + id = sig->next_posix_timer_id; + + /* Write the next ID back. Clamp it to the positive space */ + sig->next_posix_timer_id = (id + 1) & INT_MAX; + + head = &posix_timers_hashtable[hash(sig, id)]; + if (!__posix_timers_find(head, sig, id)) { hlist_add_head_rcu(&timer->t_hash, head); - ret = sig->posix_timer_id; + spin_unlock(&hash_lock); + return id; } - if (++sig->posix_timer_id < 0) - sig->posix_timer_id = 0; - if ((sig->posix_timer_id == first_free_id) && (ret == -ENOENT)) - /* Loop over all possible ids completed */ - ret = -EAGAIN; spin_unlock(&hash_lock); - } while (ret == -ENOENT); - return ret; + } + /* POSIX return code when no timer ID could be allocated */ + return -EAGAIN; } static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) From 7d9909026645064cd31b20cee5939a0f72282261 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:00 +0200 Subject: [PATCH 04/39] posix-timers: Clarify timer_wait_running() comment Explain it better and add the CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y aspect for completeness. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183312.985681995@linutronix.de --- kernel/time/posix-timers.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 2d6cf93ca370..0c61f29ef5d5 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -836,10 +836,18 @@ static void common_timer_wait_running(struct k_itimer *timer) } /* - * On PREEMPT_RT this prevent priority inversion against softirq kthread in - * case it gets preempted while executing a timer callback. See comments in - * hrtimer_cancel_wait_running. For PREEMPT_RT=n this just results in a - * cpu_relax(). + * On PREEMPT_RT this prevents priority inversion and a potential livelock + * against the ksoftirqd thread in case that ksoftirqd gets preempted while + * executing a hrtimer callback. + * + * See the comments in hrtimer_cancel_wait_running(). For PREEMPT_RT=n this + * just results in a cpu_relax(). + * + * For POSIX CPU timers with CONFIG_POSIX_CPU_TIMERS_TASK_WORK=n this is + * just a cpu_relax(). With CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y this + * prevents spinning on an eventually scheduled out task and a livelock + * when the task which tries to delete or disarm the timer has preempted + * the task which runs the expiry in task work context. */ static struct k_itimer *timer_wait_running(struct k_itimer *timer, unsigned long *flags) From 8d44b958a1a088195524c884f0437660e0522380 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:01 +0200 Subject: [PATCH 05/39] posix-timers: Cleanup comments about timer ID tracking Describe the hash table properly and remove the IDR leftover comments. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183313.038444551@linutronix.de --- kernel/time/posix-timers.c | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 0c61f29ef5d5..79909a2ac3e4 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -35,20 +35,17 @@ #include "timekeeping.h" #include "posix-timers.h" -/* - * Management arrays for POSIX timers. Timers are now kept in static hash table - * with 512 entries. - * Timer ids are allocated by local routine, which selects proper hash head by - * key, constructed from current->signal address and per signal struct counter. - * This keeps timer ids unique per process, but now they can intersect between - * processes. - */ - -/* - * Lets keep our timers in a slab cache :-) - */ static struct kmem_cache *posix_timers_cache; +/* + * Timers are managed in a hash table for lockless lookup. The hash key is + * constructed from current::signal and the timer ID and the timer is + * matched against current::signal and the timer ID when walking the hash + * bucket list. + * + * This allows checkpoint/restore to reconstruct the exact timer IDs for + * a process. + */ static DEFINE_HASHTABLE(posix_timers_hashtable, 9); static DEFINE_SPINLOCK(hash_lock); @@ -65,15 +62,6 @@ static const struct k_clock clock_realtime, clock_monotonic; #error "SIGEV_THREAD_ID must not share bit with other SIGEV values!" #endif -/* - * The timer ID is turned into a timer address by idr_find(). - * Verifying a valid ID consists of: - * - * a) checking that idr_find() returns other than -1. - * b) checking that the timer id matches the one in the timer itself. - * c) that the timer owner is in the callers thread group. - */ - /* * CLOCKs: The POSIX standard calls for a couple of clocks and allows us * to implement others. This structure defines the various From ae88967d71f1b4ffb6e48043993d37a106da8109 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:03 +0200 Subject: [PATCH 06/39] posix-timers: Add comments about timer lookup Document how the timer ID validation in the hash table works. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183313.091081515@linutronix.de --- kernel/time/posix-timers.c | 39 +++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 79909a2ac3e4..d7890ac703ae 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -506,6 +506,12 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event, return -EAGAIN; spin_lock_init(&new_timer->it_lock); + + /* + * Add the timer to the hash table. The timer is not yet valid + * because new_timer::it_signal is still NULL. The timer id is also + * not yet visible to user space. + */ new_timer_id = posix_timer_add(new_timer); if (new_timer_id < 0) { error = new_timer_id; @@ -551,6 +557,7 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event, goto out; spin_lock_irq(¤t->sighand->siglock); + /* This makes the timer valid in the hash table */ new_timer->it_signal = current->signal; list_add(&new_timer->list, ¤t->signal->posix_timers); spin_unlock_irq(¤t->sighand->siglock); @@ -597,13 +604,6 @@ COMPAT_SYSCALL_DEFINE3(timer_create, clockid_t, which_clock, } #endif -/* - * Locking issues: We need to protect the result of the id look up until - * we get the timer locked down so it is not deleted under us. The - * removal is done under the idr spinlock so we use that here to bridge - * the find to the timer lock. To avoid a dead lock, the timer id MUST - * be release with out holding the timer lock. - */ static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags) { struct k_itimer *timr; @@ -615,10 +615,35 @@ static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags) if ((unsigned long long)timer_id > INT_MAX) return NULL; + /* + * The hash lookup and the timers are RCU protected. + * + * Timers are added to the hash in invalid state where + * timr::it_signal == NULL. timer::it_signal is only set after the + * rest of the initialization succeeded. + * + * Timer destruction happens in steps: + * 1) Set timr::it_signal to NULL with timr::it_lock held + * 2) Release timr::it_lock + * 3) Remove from the hash under hash_lock + * 4) Call RCU for removal after the grace period + * + * Holding rcu_read_lock() accross the lookup ensures that + * the timer cannot be freed. + * + * The lookup validates locklessly that timr::it_signal == + * current::it_signal and timr::it_id == @timer_id. timr::it_id + * can't change, but timr::it_signal becomes NULL during + * destruction. + */ rcu_read_lock(); timr = posix_timer_by_id(timer_id); if (timr) { spin_lock_irqsave(&timr->it_lock, *flags); + /* + * Validate under timr::it_lock that timr::it_signal is + * still valid. Pairs with #1 above. + */ if (timr->it_signal == current->signal) { rcu_read_unlock(); return timr; From 028cf5eaa12846c4e32104132ff70ca1cd6f5943 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:05 +0200 Subject: [PATCH 07/39] posix-timers: Annotate concurrent access to k_itimer:: It_signal k_itimer::it_signal is read lockless in the RCU protected hash lookup, but it can be written concurrently in the timer_create() and timer_delete() path. Annotate these places with READ_ONCE() and WRITE_ONCE() Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183313.143596887@linutronix.de --- kernel/time/posix-timers.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index d7890ac703ae..de3fca8dae55 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -109,9 +109,9 @@ static struct k_itimer *__posix_timers_find(struct hlist_head *head, { struct k_itimer *timer; - hlist_for_each_entry_rcu(timer, head, t_hash, - lockdep_is_held(&hash_lock)) { - if ((timer->it_signal == sig) && (timer->it_id == id)) + hlist_for_each_entry_rcu(timer, head, t_hash, lockdep_is_held(&hash_lock)) { + /* timer->it_signal can be set concurrently */ + if ((READ_ONCE(timer->it_signal) == sig) && (timer->it_id == id)) return timer; } return NULL; @@ -558,7 +558,7 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event, spin_lock_irq(¤t->sighand->siglock); /* This makes the timer valid in the hash table */ - new_timer->it_signal = current->signal; + WRITE_ONCE(new_timer->it_signal, current->signal); list_add(&new_timer->list, ¤t->signal->posix_timers); spin_unlock_irq(¤t->sighand->siglock); @@ -1052,10 +1052,10 @@ retry_delete: list_del(&timer->list); spin_unlock(¤t->sighand->siglock); /* - * This keeps any tasks waiting on the spin lock from thinking - * they got something (see the lock code above). + * A concurrent lookup could check timer::it_signal lockless. It + * will reevaluate with timer::it_lock held and observe the NULL. */ - timer->it_signal = NULL; + WRITE_ONCE(timer->it_signal, NULL); unlock_timer(timer, flags); release_posix_timer(timer, IT_ID_SET); From 72786ff23d5acb7bf3e2535831b2f1dc55c7f44e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:06 +0200 Subject: [PATCH 08/39] posix-timers: Set k_itimer:: It_signal to NULL on exit() Technically it's not required to set k_itimer::it_signal to NULL on exit() because there is no other thread anymore which could lookup the timer concurrently. Set it to NULL for consistency sake and add a comment to that effect. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183313.196462644@linutronix.de --- kernel/time/posix-timers.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index de3fca8dae55..c1b77c597f5f 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -1101,6 +1101,14 @@ retry_delete: } list_del(&timer->list); + /* + * Setting timer::it_signal to NULL is technically not required + * here as nothing can access the timer anymore legitimately via + * the hash table. Set it to NULL nevertheless so that all deletion + * paths are consistent. + */ + WRITE_ONCE(timer->it_signal, NULL); + spin_unlock_irqrestore(&timer->it_lock, flags); release_posix_timer(timer, IT_ID_SET); } From 11fbe6cd41210c7b5173257158a22e11e225622d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:08 +0200 Subject: [PATCH 09/39] posix-timers: Remove pointless irqsafe from hash_lock All usage of hash_lock is in thread context. No point in using spin_lock_irqsave()/irqrestore() for a single usage site. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183313.249063953@linutronix.de --- kernel/time/posix-timers.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index c1b77c597f5f..ed7d260f51ef 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -471,10 +471,9 @@ static void k_itimer_rcu_free(struct rcu_head *head) static void release_posix_timer(struct k_itimer *tmr, int it_id_set) { if (it_id_set) { - unsigned long flags; - spin_lock_irqsave(&hash_lock, flags); + spin_lock(&hash_lock, flags); hlist_del_rcu(&tmr->t_hash); - spin_unlock_irqrestore(&hash_lock, flags); + spin_unlock(&hash_lock, flags); } put_pid(tmr->it_pid); sigqueue_free(tmr->sigq); From 8cc96ca2c75f6da59de41321797c87562703c9e1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:09 +0200 Subject: [PATCH 10/39] posix-timers: Split release_posix_timers() release_posix_timers() is called for cleaning up both hashed and unhashed timers. The cases are differentiated by an argument and the usage is hideous. Seperate the actual free path out and use it for unhashed timers. Provide a function for hashed timers. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183313.301432503@linutronix.de --- kernel/time/posix-timers.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index ed7d260f51ef..6ac693331df9 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -466,20 +466,21 @@ static void k_itimer_rcu_free(struct rcu_head *head) kmem_cache_free(posix_timers_cache, tmr); } -#define IT_ID_SET 1 -#define IT_ID_NOT_SET 0 -static void release_posix_timer(struct k_itimer *tmr, int it_id_set) +static void posix_timer_free(struct k_itimer *tmr) { - if (it_id_set) { - spin_lock(&hash_lock, flags); - hlist_del_rcu(&tmr->t_hash); - spin_unlock(&hash_lock, flags); - } put_pid(tmr->it_pid); sigqueue_free(tmr->sigq); call_rcu(&tmr->rcu, k_itimer_rcu_free); } +static void posix_timer_unhash_and_free(struct k_itimer *tmr) +{ + spin_lock(&hash_lock); + hlist_del_rcu(&tmr->t_hash); + spin_unlock(&hash_lock); + posix_timer_free(tmr); +} + static int common_timer_create(struct k_itimer *new_timer) { hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0); @@ -493,7 +494,6 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event, const struct k_clock *kc = clockid_to_kclock(which_clock); struct k_itimer *new_timer; int error, new_timer_id; - int it_id_set = IT_ID_NOT_SET; if (!kc) return -EINVAL; @@ -513,11 +513,10 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event, */ new_timer_id = posix_timer_add(new_timer); if (new_timer_id < 0) { - error = new_timer_id; - goto out; + posix_timer_free(new_timer); + return new_timer_id; } - it_id_set = IT_ID_SET; new_timer->it_id = (timer_t) new_timer_id; new_timer->it_clock = which_clock; new_timer->kclock = kc; @@ -569,7 +568,7 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event, * new_timer after the unlock call. */ out: - release_posix_timer(new_timer, it_id_set); + posix_timer_unhash_and_free(new_timer); return error; } @@ -1057,7 +1056,7 @@ retry_delete: WRITE_ONCE(timer->it_signal, NULL); unlock_timer(timer, flags); - release_posix_timer(timer, IT_ID_SET); + posix_timer_unhash_and_free(timer); return 0; } @@ -1109,7 +1108,7 @@ retry_delete: WRITE_ONCE(timer->it_signal, NULL); spin_unlock_irqrestore(&timer->it_lock, flags); - release_posix_timer(timer, IT_ID_SET); + posix_timer_unhash_and_free(timer); } /* From 01679b5db7172b898be325ff272e10aebd412911 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:11 +0200 Subject: [PATCH 11/39] posix-timers: Document sys_clock_getres() correctly The decades old comment about Posix clock resolution is confusing at best. Remove it and add a proper explanation to sys_clock_getres(). Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183313.356427330@linutronix.de --- kernel/time/posix-timers.c | 81 ++++++++++++++++++++++++++++++++++---- 1 file changed, 73 insertions(+), 8 deletions(-) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 6ac693331df9..1acdd04e5d26 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -67,14 +67,6 @@ static const struct k_clock clock_realtime, clock_monotonic; * to implement others. This structure defines the various * clocks. * - * RESOLUTION: Clock resolution is used to round up timer and interval - * times, NOT to report clock times, which are reported with as - * much resolution as the system can muster. In some cases this - * resolution may depend on the underlying clock hardware and - * may not be quantifiable until run time, and only then is the - * necessary code is written. The standard says we should say - * something about this issue in the documentation... - * * FUNCTIONS: The CLOCKs structure defines possible functions to * handle various clock functions. * @@ -1198,6 +1190,79 @@ SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock, return err; } +/** + * sys_clock_getres - Get the resolution of a clock + * @which_clock: The clock to get the resolution for + * @tp: Pointer to a a user space timespec64 for storage + * + * POSIX defines: + * + * "The clock_getres() function shall return the resolution of any + * clock. Clock resolutions are implementation-defined and cannot be set by + * a process. If the argument res is not NULL, the resolution of the + * specified clock shall be stored in the location pointed to by res. If + * res is NULL, the clock resolution is not returned. If the time argument + * of clock_settime() is not a multiple of res, then the value is truncated + * to a multiple of res." + * + * Due to the various hardware constraints the real resolution can vary + * wildly and even change during runtime when the underlying devices are + * replaced. The kernel also can use hardware devices with different + * resolutions for reading the time and for arming timers. + * + * The kernel therefore deviates from the POSIX spec in various aspects: + * + * 1) The resolution returned to user space + * + * For CLOCK_REALTIME, CLOCK_MONOTONIC, CLOCK_BOOTTIME, CLOCK_TAI, + * CLOCK_REALTIME_ALARM, CLOCK_BOOTTIME_ALAREM and CLOCK_MONOTONIC_RAW + * the kernel differentiates only two cases: + * + * I) Low resolution mode: + * + * When high resolution timers are disabled at compile or runtime + * the resolution returned is nanoseconds per tick, which represents + * the precision at which timers expire. + * + * II) High resolution mode: + * + * When high resolution timers are enabled the resolution returned + * is always one nanosecond independent of the actual resolution of + * the underlying hardware devices. + * + * For CLOCK_*_ALARM the actual resolution depends on system + * state. When system is running the resolution is the same as the + * resolution of the other clocks. During suspend the actual + * resolution is the resolution of the underlying RTC device which + * might be way less precise than the clockevent device used during + * running state. + * + * For CLOCK_REALTIME_COARSE and CLOCK_MONOTONIC_COARSE the resolution + * returned is always nanoseconds per tick. + * + * For CLOCK_PROCESS_CPUTIME and CLOCK_THREAD_CPUTIME the resolution + * returned is always one nanosecond under the assumption that the + * underlying scheduler clock has a better resolution than nanoseconds + * per tick. + * + * For dynamic POSIX clocks (PTP devices) the resolution returned is + * always one nanosecond. + * + * 2) Affect on sys_clock_settime() + * + * The kernel does not truncate the time which is handed in to + * sys_clock_settime(). The kernel internal timekeeping is always using + * nanoseconds precision independent of the clocksource device which is + * used to read the time from. The resolution of that device only + * affects the presicion of the time returned by sys_clock_gettime(). + * + * Returns: + * 0 Success. @tp contains the resolution + * -EINVAL @which_clock is not a valid clock ID + * -EFAULT Copying the resolution to @tp faulted + * -ENODEV Dynamic POSIX clock is not backed by a device + * -EOPNOTSUPP Dynamic POSIX clock does not support getres() + */ SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock, struct __kernel_timespec __user *, tp) { From a86e9284336729a8f79a65371eacd0c1c7fae142 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:12 +0200 Subject: [PATCH 12/39] posix-timers: Document common_clock_get() correctly Replace another confusing and inaccurate set of comments. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183313.409169321@linutronix.de --- kernel/time/posix-timers.c | 50 +++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 1acdd04e5d26..e1af74ca80ab 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -660,20 +660,16 @@ static s64 common_hrtimer_forward(struct k_itimer *timr, ktime_t now) } /* - * Get the time remaining on a POSIX.1b interval timer. This function - * is ALWAYS called with spin_lock_irq on the timer, thus it must not - * mess with irq. + * Get the time remaining on a POSIX.1b interval timer. * - * We have a couple of messes to clean up here. First there is the case - * of a timer that has a requeue pending. These timers should appear to - * be in the timer list with an expiry as if we were to requeue them - * now. + * Two issues to handle here: * - * The second issue is the SIGEV_NONE timer which may be active but is - * not really ever put in the timer list (to save system resources). - * This timer may be expired, and if so, we will do it here. Otherwise - * it is the same as a requeue pending timer WRT to what we should - * report. + * 1) The timer has a requeue pending. The return value must appear as + * if the timer has been requeued right now. + * + * 2) The timer is a SIGEV_NONE timer. These timers are never enqueued + * into the hrtimer queue and therefore never expired. Emulate expiry + * here taking #1 into account. */ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting) { @@ -689,8 +685,12 @@ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting) cur_setting->it_interval = ktime_to_timespec64(iv); } else if (!timr->it_active) { /* - * SIGEV_NONE oneshot timers are never queued. Check them - * below. + * SIGEV_NONE oneshot timers are never queued and therefore + * timr->it_active is always false. The check below + * vs. remaining time will handle this case. + * + * For all other timers there is nothing to update here, so + * return. */ if (!sig_none) return; @@ -699,18 +699,29 @@ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting) now = kc->clock_get_ktime(timr->it_clock); /* - * When a requeue is pending or this is a SIGEV_NONE timer move the - * expiry time forward by intervals, so expiry is > now. + * If this is an interval timer and either has requeue pending or + * is a SIGEV_NONE timer move the expiry time forward by intervals, + * so expiry is > now. */ if (iv && (timr->it_requeue_pending & REQUEUE_PENDING || sig_none)) timr->it_overrun += kc->timer_forward(timr, now); remaining = kc->timer_remaining(timr, now); - /* Return 0 only, when the timer is expired and not pending */ + /* + * As @now is retrieved before a possible timer_forward() and + * cannot be reevaluated by the compiler @remaining is based on the + * same @now value. Therefore @remaining is consistent vs. @now. + * + * Consequently all interval timers, i.e. @iv > 0, cannot have a + * remaining time <= 0 because timer_forward() guarantees to move + * them forward so that the next timer expiry is > @now. + */ if (remaining <= 0) { /* - * A single shot SIGEV_NONE timer must return 0, when - * it is expired ! + * A single shot SIGEV_NONE timer must return 0, when it is + * expired! Timers which have a real signal delivery mode + * must return a remaining time greater than 0 because the + * signal has not yet been delivered. */ if (!sig_none) cur_setting->it_value.tv_nsec = 1; @@ -719,7 +730,6 @@ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting) } } -/* Get the time remaining on a POSIX.1b interval timer. */ static int do_timer_gettime(timer_t timer_id, struct itimerspec64 *setting) { struct k_itimer *timr; From 65cade468dee537885b51897ba41ba05a4dcf6ca Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:14 +0200 Subject: [PATCH 13/39] posix-timers: Document sys_clock_getoverrun() Document the syscall in detail and with coherent sentences. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183313.462051641@linutronix.de --- kernel/time/posix-timers.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index e1af74ca80ab..191ecf59cb98 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -783,14 +783,23 @@ SYSCALL_DEFINE2(timer_gettime32, timer_t, timer_id, #endif -/* - * Get the number of overruns of a POSIX.1b interval timer. This is to - * be the overrun of the timer last delivered. At the same time we are - * accumulating overruns on the next timer. The overrun is frozen when - * the signal is delivered, either at the notify time (if the info block - * is not queued) or at the actual delivery time (as we are informed by - * the call back to posixtimer_rearm(). So all we need to do is - * to pick up the frozen overrun. +/** + * sys_timer_getoverrun - Get the number of overruns of a POSIX.1b interval timer + * @timer_id: The timer ID which identifies the timer + * + * The "overrun count" of a timer is one plus the number of expiration + * intervals which have elapsed between the first expiry, which queues the + * signal and the actual signal delivery. On signal delivery the "overrun + * count" is calculated and cached, so it can be returned directly here. + * + * As this is relative to the last queued signal the returned overrun count + * is meaningless outside of the signal delivery path and even there it + * does not accurately reflect the current state when user space evaluates + * it. + * + * Returns: + * -EINVAL @timer_id is invalid + * 1..INT_MAX The number of overruns related to the last delivered signal */ SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id) { From 3561fcb402b7ab7fdb4c1746dae4995889506605 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:16 +0200 Subject: [PATCH 14/39] posix-timers: Document sys_clock_settime() permissions in place The documentation of sys_clock_settime() permissions is at a random place and mostly word salad. Remove it and add a concise comment into sys_clock_settime(). Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183313.514700292@linutronix.de --- kernel/time/posix-timers.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 191ecf59cb98..03ef6af1633b 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -74,13 +74,6 @@ static const struct k_clock clock_realtime, clock_monotonic; * following: 1.) The k_itimer struct (sched.h) is used for * the timer. 2.) The list, it_lock, it_clock, it_id and * it_pid fields are not modified by timer code. - * - * Permissions: It is assumed that the clock_settime() function defined - * for each clock will take care of permission checks. Some - * clocks may be set able by any user (i.e. local process - * clocks) others not. Currently the only set able clock we - * have is CLOCK_REALTIME and its high res counter part, both of - * which we beg off on and pass to do_sys_settimeofday(). */ static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags); @@ -1159,6 +1152,10 @@ SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock, if (get_timespec64(&new_tp, tp)) return -EFAULT; + /* + * Permission checks have to be done inside the clock specific + * setter callback. + */ return kc->clock_set(which_clock, &new_tp); } From 640fe745d7d4b6e47f3b455cb5de99a08c6b6d23 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:17 +0200 Subject: [PATCH 15/39] posix-timers: Document nanosleep() details The descriptions for common_nsleep() is wrong and common_nsleep_timens() lacks any form of comment. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183313.567072835@linutronix.de --- kernel/time/posix-timers.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 03ef6af1633b..54adb4cfd4f2 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -1370,7 +1370,7 @@ SYSCALL_DEFINE2(clock_getres_time32, clockid_t, which_clock, #endif /* - * nanosleep for monotonic and realtime clocks + * sys_clock_nanosleep() for CLOCK_REALTIME and CLOCK_TAI */ static int common_nsleep(const clockid_t which_clock, int flags, const struct timespec64 *rqtp) @@ -1382,8 +1382,13 @@ static int common_nsleep(const clockid_t which_clock, int flags, which_clock); } +/* + * sys_clock_nanosleep() for CLOCK_MONOTONIC and CLOCK_BOOTTIME + * + * Absolute nanosleeps for these clocks are time-namespace adjusted. + */ static int common_nsleep_timens(const clockid_t which_clock, int flags, - const struct timespec64 *rqtp) + const struct timespec64 *rqtp) { ktime_t texp = timespec64_to_ktime(*rqtp); From 52f090b164b59c06a4da87c0599424809a6bba16 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:19 +0200 Subject: [PATCH 16/39] posix-timers: Add proper comments in do_timer_create() The comment about timer lifetime at the end of the function is misplaced and uncomprehensible. Make it understandable and put it at the right place. Add a new comment about the visibility of the new timer ID to user space. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183313.619897296@linutronix.de --- kernel/time/posix-timers.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 54adb4cfd4f2..20d3b991ec49 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -529,12 +529,17 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event, new_timer->sigq->info.si_tid = new_timer->it_id; new_timer->sigq->info.si_code = SI_TIMER; - if (copy_to_user(created_timer_id, - &new_timer_id, sizeof (new_timer_id))) { + if (copy_to_user(created_timer_id, &new_timer_id, sizeof (new_timer_id))) { error = -EFAULT; goto out; } - + /* + * After succesful copy out, the timer ID is visible to user space + * now but not yet valid because new_timer::signal is still NULL. + * + * Complete the initialization with the clock specific create + * callback. + */ error = kc->timer_create(new_timer); if (error) goto out; @@ -544,14 +549,11 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event, WRITE_ONCE(new_timer->it_signal, current->signal); list_add(&new_timer->list, ¤t->signal->posix_timers); spin_unlock_irq(¤t->sighand->siglock); - - return 0; /* - * In the case of the timer belonging to another task, after - * the task is unlocked, the timer is owned by the other task - * and may cease to exist at any time. Don't use or modify - * new_timer after the unlock call. + * After unlocking sighand::siglock @new_timer is subject to + * concurrent removal and cannot be touched anymore */ + return 0; out: posix_timer_unhash_and_free(new_timer); return error; From c575689d66378611bb04137b96e67ebb3ac8482b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:20 +0200 Subject: [PATCH 17/39] posix-timers: Comment SIGEV_THREAD_ID properly Replace the word salad. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183313.672220780@linutronix.de --- kernel/time/posix-timers.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 20d3b991ec49..c8b0f52e5eb6 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -53,12 +53,9 @@ static const struct k_clock * const posix_clocks[]; static const struct k_clock *clockid_to_kclock(const clockid_t id); static const struct k_clock clock_realtime, clock_monotonic; -/* - * we assume that the new SIGEV_THREAD_ID shares no bits with the other - * SIGEV values. Here we put out an error if this assumption fails. - */ +/* SIGEV_THREAD_ID cannot share a bit with the other SIGEV values. */ #if SIGEV_THREAD_ID != (SIGEV_THREAD_ID & \ - ~(SIGEV_SIGNAL | SIGEV_NONE | SIGEV_THREAD)) + ~(SIGEV_SIGNAL | SIGEV_NONE | SIGEV_THREAD)) #error "SIGEV_THREAD_ID must not share bit with other SIGEV values!" #endif From 02972d7955341b711d4c392f14faa9f9cd69d551 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:22 +0200 Subject: [PATCH 18/39] posix-timers: Clarify posix_timer_rearm() comment Yet another incomprehensible piece of art. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183313.724863461@linutronix.de --- kernel/time/posix-timers.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index c8b0f52e5eb6..d8d2169408f4 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -275,15 +275,9 @@ static void common_hrtimer_rearm(struct k_itimer *timr) } /* - * This function is exported for use by the signal deliver code. It is - * called just prior to the info block being released and passes that - * block to us. It's function is to update the overrun entry AND to - * restart the timer. It should only be called if the timer is to be - * restarted (i.e. we have flagged this in the sys_private entry of the - * info block). - * - * To protect against the timer going away while the interrupt is queued, - * we require that the it_requeue_pending flag be set. + * This function is called from the signal delivery code if + * info->si_sys_private is not zero, which indicates that the timer has to + * be rearmed. Restart the timer and update info::si_overrun. */ void posixtimer_rearm(struct kernel_siginfo *info) { From 84999b8bdb4969816c7bb7c14c3a55ed42aa4b94 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 1 Jun 2023 21:07:37 +0200 Subject: [PATCH 19/39] posix-timers: Clarify posix_timer_fn() comments Make the issues vs. SIG_IGN understandable and remove the 15 years old promise that a proper solution is already on the horizon. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/874jnrdmrq.ffs@tglx --- kernel/time/posix-timers.c | 62 ++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index d8d2169408f4..ae8799e97fcd 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -326,11 +326,11 @@ int posix_timer_event(struct k_itimer *timr, int si_private) } /* - * This function gets called when a POSIX.1b interval timer expires. It - * is used as a callback from the kernel internal timer. The - * run_timer_list code ALWAYS calls with interrupts on. - - * This code is for CLOCK_REALTIME* and CLOCK_MONOTONIC* timers. + * This function gets called when a POSIX.1b interval timer expires from + * the HRTIMER interrupt (soft interrupt on RT kernels). + * + * Handles CLOCK_REALTIME, CLOCK_MONOTONIC, CLOCK_BOOTTIME and CLOCK_TAI + * based timers. */ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) { @@ -348,9 +348,10 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) if (posix_timer_event(timr, si_private)) { /* - * signal was not sent because of sig_ignor - * we will not get a call back to restart it AND - * it should be restarted. + * The signal was not queued due to SIG_IGN. As a + * consequence the timer is not going to be rearmed from + * the signal delivery path. But as a real signal handler + * can be installed later the timer must be rearmed here. */ if (timr->it_interval != 0) { ktime_t now = hrtimer_cb_get_time(timer); @@ -359,34 +360,35 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) * FIXME: What we really want, is to stop this * timer completely and restart it in case the * SIG_IGN is removed. This is a non trivial - * change which involves sighand locking - * (sigh !), which we don't want to do late in - * the release cycle. + * change to the signal handling code. * - * For now we just let timers with an interval - * less than a jiffie expire every jiffie to - * avoid softirq starvation in case of SIG_IGN - * and a very small interval, which would put - * the timer right back on the softirq pending - * list. By moving now ahead of time we trick - * hrtimer_forward() to expire the timer - * later, while we still maintain the overrun - * accuracy, but have some inconsistency in - * the timer_gettime() case. This is at least - * better than a starved softirq. A more - * complex fix which solves also another related - * inconsistency is already in the pipeline. + * For now let timers with an interval less than a + * jiffie expire every jiffie and recheck for a + * valid signal handler. + * + * This avoids interrupt starvation in case of a + * very small interval, which would expire the + * timer immediately again. + * + * Moving now ahead of time by one jiffie tricks + * hrtimer_forward() to expire the timer later, + * while it still maintains the overrun accuracy + * for the price of a slight inconsistency in the + * timer_gettime() case. This is at least better + * than a timer storm. + * + * Only required when high resolution timers are + * enabled as the periodic tick based timers are + * automatically aligned to the next tick. */ -#ifdef CONFIG_HIGH_RES_TIMERS - { - ktime_t kj = NSEC_PER_SEC / HZ; + if (IS_ENABLED(CONFIG_HIGHRES_TIMERS)) { + ktime_t kj = TICK_NSEC; if (timr->it_interval < kj) now = ktime_add(now, kj); } -#endif - timr->it_overrun += hrtimer_forward(timer, now, - timr->it_interval); + + timr->it_overrun += hrtimer_forward(timer, now, timr->it_interval); ret = HRTIMER_RESTART; ++timr->it_requeue_pending; timr->it_active = 1; From 200dbd6d14e6a3b22162d78b4f7a253426dba7ee Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:25 +0200 Subject: [PATCH 20/39] posix-timers: Remove pointless comments Documenting the obvious is just consuming space for no value. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183313.832240451@linutronix.de --- kernel/time/posix-timers.c | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index ae8799e97fcd..d357728b7f00 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -59,19 +59,6 @@ static const struct k_clock clock_realtime, clock_monotonic; #error "SIGEV_THREAD_ID must not share bit with other SIGEV values!" #endif -/* - * CLOCKs: The POSIX standard calls for a couple of clocks and allows us - * to implement others. This structure defines the various - * clocks. - * - * FUNCTIONS: The CLOCKs structure defines possible functions to - * handle various clock functions. - * - * The standard POSIX timer management code assumes the - * following: 1.) The k_itimer struct (sched.h) is used for - * the timer. 2.) The list, it_lock, it_clock, it_id and - * it_pid fields are not modified by timer code. - */ static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags); #define lock_timer(tid, flags) \ @@ -141,7 +128,6 @@ static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) spin_unlock_irqrestore(&timr->it_lock, flags); } -/* Get clock_realtime */ static int posix_get_realtime_timespec(clockid_t which_clock, struct timespec64 *tp) { ktime_get_real_ts64(tp); @@ -153,7 +139,6 @@ static ktime_t posix_get_realtime_ktime(clockid_t which_clock) return ktime_get_real(); } -/* Set clock_realtime */ static int posix_clock_realtime_set(const clockid_t which_clock, const struct timespec64 *tp) { @@ -166,9 +151,6 @@ static int posix_clock_realtime_adj(const clockid_t which_clock, return do_adjtimex(t); } -/* - * Get monotonic time for posix timers - */ static int posix_get_monotonic_timespec(clockid_t which_clock, struct timespec64 *tp) { ktime_get_ts64(tp); @@ -181,9 +163,6 @@ static ktime_t posix_get_monotonic_ktime(clockid_t which_clock) return ktime_get(); } -/* - * Get monotonic-raw time for posix timers - */ static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec64 *tp) { ktime_get_raw_ts64(tp); @@ -191,7 +170,6 @@ static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec64 *tp) return 0; } - static int posix_get_realtime_coarse(clockid_t which_clock, struct timespec64 *tp) { ktime_get_coarse_real_ts64(tp); @@ -242,9 +220,6 @@ static int posix_get_hrtimer_res(clockid_t which_clock, struct timespec64 *tp) return 0; } -/* - * Initialize everything, well, just everything in Posix clocks/timers ;) - */ static __init int init_posix_timers(void) { posix_timers_cache = kmem_cache_create("posix_timers_cache", From b96ce4931fcd13d73e32c62c2df3fa8f9f467e33 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:27 +0200 Subject: [PATCH 21/39] posix-timers: Polish coding style in a few places Make it consistent with the TIP tree documentation. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183313.888493625@linutronix.de --- kernel/time/posix-timers.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index d357728b7f00..e3cddd5f3c7f 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -309,10 +309,10 @@ int posix_timer_event(struct k_itimer *timr, int si_private) */ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) { + enum hrtimer_restart ret = HRTIMER_NORESTART; struct k_itimer *timr; unsigned long flags; int si_private = 0; - enum hrtimer_restart ret = HRTIMER_NORESTART; timr = container_of(timer, struct k_itimer, it.real.timer); spin_lock_irqsave(&timr->it_lock, flags); @@ -400,8 +400,8 @@ static struct pid *good_sigevent(sigevent_t * event) static struct k_itimer * alloc_posix_timer(void) { - struct k_itimer *tmr; - tmr = kmem_cache_zalloc(posix_timers_cache, GFP_KERNEL); + struct k_itimer *tmr = kmem_cache_zalloc(posix_timers_cache, GFP_KERNEL); + if (!tmr) return tmr; if (unlikely(!(tmr->sigq = sigqueue_alloc()))) { @@ -695,8 +695,8 @@ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting) static int do_timer_gettime(timer_t timer_id, struct itimerspec64 *setting) { - struct k_itimer *timr; const struct k_clock *kc; + struct k_itimer *timr; unsigned long flags; int ret = 0; @@ -767,8 +767,8 @@ SYSCALL_DEFINE2(timer_gettime32, timer_t, timer_id, SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id) { struct k_itimer *timr; - int overrun; unsigned long flags; + int overrun; timr = lock_timer(timer_id, &flags); if (!timr) @@ -941,8 +941,7 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, const struct __kernel_itimerspec __user *, new_setting, struct __kernel_itimerspec __user *, old_setting) { - struct itimerspec64 new_spec, old_spec; - struct itimerspec64 *rtn = old_setting ? &old_spec : NULL; + struct itimerspec64 new_spec, old_spec, *rtn; int error = 0; if (!new_setting) @@ -951,6 +950,7 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, if (get_itimerspec64(&new_spec, new_setting)) return -EFAULT; + rtn = old_setting ? &old_spec : NULL; error = do_timer_settime(timer_id, flags, &new_spec, rtn); if (!error && old_setting) { if (put_itimerspec64(&old_spec, old_setting)) From b9a40f24d8ea86f89f1299a3f2dccd601febe3e5 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Fri, 9 Jun 2023 11:46:43 +0200 Subject: [PATCH 22/39] posix-timers: Refer properly to CONFIG_HIGH_RES_TIMERS Commit c78f261e5dcb ("posix-timers: Clarify posix_timer_fn() comments") turns an ifdef CONFIG_HIGH_RES_TIMERS into an conditional on "IS_ENABLED(CONFIG_HIGHRES_TIMERS)"; note that the new conditional refers to "HIGHRES_TIMERS" not "HIGH_RES_TIMERS" as before. Fix this typo introduced in that refactoring. Fixes: c78f261e5dcb ("posix-timers: Clarify posix_timer_fn() comments") Signed-off-by: Lukas Bulwahn Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230609094643.26253-1-lukas.bulwahn@gmail.com --- kernel/time/posix-timers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index e3cddd5f3c7f..b924f0f096fa 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -356,7 +356,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) * enabled as the periodic tick based timers are * automatically aligned to the next tick. */ - if (IS_ENABLED(CONFIG_HIGHRES_TIMERS)) { + if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS)) { ktime_t kj = TICK_NSEC; if (timr->it_interval < kj) From 986af8dc5af834071a8a7afdf2c3b7a57d562df2 Mon Sep 17 00:00:00 2001 From: Li zeming Date: Sat, 10 Jun 2023 02:28:56 +0800 Subject: [PATCH 23/39] alarmtimer: Remove unnecessary initialization of variable 'ret' ret is assigned before checked, so it does not need to initialize the variable Signed-off-by: Li zeming Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230609182856.4660-1-zeming@nfschina.com --- kernel/time/alarmtimer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 82b28ab0f328..09c9cde06f20 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -847,7 +847,7 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags, struct restart_block *restart = ¤t->restart_block; struct alarm alarm; ktime_t exp; - int ret = 0; + int ret; if (!alarmtimer_get_rtcdev()) return -EOPNOTSUPP; From fc4b4d96f793c827a5abf23df3e793592dbd38ce Mon Sep 17 00:00:00 2001 From: Li zeming Date: Sat, 10 Jun 2023 02:20:59 +0800 Subject: [PATCH 24/39] alarmtimer: Remove unnecessary (void *) cast Pointers of type void * do not require a type cast when they are assigned to a real pointer. Signed-off-by: Li zeming Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230609182059.4509-1-zeming@nfschina.com --- kernel/time/alarmtimer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 09c9cde06f20..8d9f13d847f0 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -751,7 +751,7 @@ static int alarm_timer_create(struct k_itimer *new_timer) static enum alarmtimer_restart alarmtimer_nsleep_wakeup(struct alarm *alarm, ktime_t now) { - struct task_struct *task = (struct task_struct *)alarm->data; + struct task_struct *task = alarm->data; alarm->data = NULL; if (task) From a7e282c77785c7eabf98836431b1f029481085ad Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Fri, 5 May 2023 00:12:53 +0800 Subject: [PATCH 25/39] tick/rcu: Fix bogus ratelimit condition The ratelimit logic in report_idle_softirq() is broken because the exit condition is always true: static int ratelimit; if (ratelimit < 10) return false; ---> always returns here ratelimit++; ---> no chance to run Make it check for >= 10 instead. Fixes: 0345691b24c0 ("tick/rcu: Stop allowing RCU_SOFTIRQ in idle") Signed-off-by: Wen Yang Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/tencent_5AAA3EEAB42095C9B7740BE62FBF9A67E007@qq.com --- kernel/time/tick-sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 52254679ec48..89055050d1ac 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1030,7 +1030,7 @@ static bool report_idle_softirq(void) return false; } - if (ratelimit < 10) + if (ratelimit >= 10) return false; /* On RT, softirqs handling may be waiting on some lock */ From 892f439ea17cbf56a36e57c584d583649a64b404 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 7 Jun 2023 16:28:45 +0200 Subject: [PATCH 26/39] posix-timers: Add sys_ni_posix_timers() prototype The sys_ni_posix_timers() definition causes a warning when the declaration is missing, so this needs to be added along with the normal syscalls, outside of the #ifdef. kernel/time/posix-stubs.c:26:17: error: no previous prototype for 'sys_ni_posix_timers' [-Werror=missing-prototypes] Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20230607142925.3126422-1-arnd@kernel.org --- arch/alpha/kernel/osf_sys.c | 2 -- include/linux/syscalls.h | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 2a9a877a0508..d98701ee36c6 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1014,8 +1014,6 @@ SYSCALL_DEFINE2(osf_settimeofday, struct timeval32 __user *, tv, return do_sys_settimeofday64(tv ? &kts : NULL, tz ? &ktz : NULL); } -asmlinkage long sys_ni_posix_timers(void); - SYSCALL_DEFINE2(osf_utimes, const char __user *, filename, struct timeval32 __user *, tvs) { diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 33a0ee3bcb2e..24871f8ec8bb 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1280,6 +1280,7 @@ asmlinkage long sys_ni_syscall(void); #endif /* CONFIG_ARCH_HAS_SYSCALL_WRAPPER */ +asmlinkage long sys_ni_posix_timers(void); /* * Kernel code should not call syscalls (i.e., sys_xyzyyz()) directly. From 2951580ba6adb082bb6b7154a5ecb24e7c1f7569 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 18 Apr 2023 16:38:54 +0200 Subject: [PATCH 27/39] tracing/timer: Add missing hrtimer modes to decode_hrtimer_mode(). The trace output for the HRTIMER_MODE_.*_HARD modes is seen as a number since these modes are not decoded. The author was not aware of the fancy decoding function which makes the life easier. Extend decode_hrtimer_mode() with the additional HRTIMER_MODE_.*_HARD modes. Fixes: ae6683d815895 ("hrtimer: Introduce HARD expiry mode") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Reviewed-by: Mukesh Ojha Acked-by: Steven Rostedt (Google) Link: https://lore.kernel.org/r/20230418143854.8vHWQKLM@linutronix.de --- include/trace/events/timer.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index 3e8619c72f77..b4bc2828fa09 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -158,7 +158,11 @@ DEFINE_EVENT(timer_class, timer_cancel, { HRTIMER_MODE_ABS_SOFT, "ABS|SOFT" }, \ { HRTIMER_MODE_REL_SOFT, "REL|SOFT" }, \ { HRTIMER_MODE_ABS_PINNED_SOFT, "ABS|PINNED|SOFT" }, \ - { HRTIMER_MODE_REL_PINNED_SOFT, "REL|PINNED|SOFT" }) + { HRTIMER_MODE_REL_PINNED_SOFT, "REL|PINNED|SOFT" }, \ + { HRTIMER_MODE_ABS_HARD, "ABS|HARD" }, \ + { HRTIMER_MODE_REL_HARD, "REL|HARD" }, \ + { HRTIMER_MODE_ABS_PINNED_HARD, "ABS|PINNED|HARD" }, \ + { HRTIMER_MODE_REL_PINNED_HARD, "REL|PINNED|HARD" }) /** * hrtimer_init - called when the hrtimer is initialized From ea95051063f95bf612683f5697e44604d0cbd0f1 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Sun, 18 Jun 2023 17:39:37 +0200 Subject: [PATCH 28/39] clocksource/drivers/ingenic-timer: Use pm_sleep_ptr() macro The use of the pm_sleep_ptr() macro allows the compiler to always see the dev_pm_ops structure and related functions, while still allowing the unused code to be removed, without the need for the __maybe_unused markings. Signed-off-by: Paul Cercueil Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20230618153937.96649-1-paul@crapouillou.net --- drivers/clocksource/ingenic-timer.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/clocksource/ingenic-timer.c b/drivers/clocksource/ingenic-timer.c index 089ce64b1c3f..154ee5f7954a 100644 --- a/drivers/clocksource/ingenic-timer.c +++ b/drivers/clocksource/ingenic-timer.c @@ -369,7 +369,7 @@ static int __init ingenic_tcu_probe(struct platform_device *pdev) return 0; } -static int __maybe_unused ingenic_tcu_suspend(struct device *dev) +static int ingenic_tcu_suspend(struct device *dev) { struct ingenic_tcu *tcu = dev_get_drvdata(dev); unsigned int cpu; @@ -382,7 +382,7 @@ static int __maybe_unused ingenic_tcu_suspend(struct device *dev) return 0; } -static int __maybe_unused ingenic_tcu_resume(struct device *dev) +static int ingenic_tcu_resume(struct device *dev) { struct ingenic_tcu *tcu = dev_get_drvdata(dev); unsigned int cpu; @@ -406,7 +406,7 @@ err_timer_clk_disable: return ret; } -static const struct dev_pm_ops __maybe_unused ingenic_tcu_pm_ops = { +static const struct dev_pm_ops ingenic_tcu_pm_ops = { /* _noirq: We want the TCU clocks to be gated last / ungated first */ .suspend_noirq = ingenic_tcu_suspend, .resume_noirq = ingenic_tcu_resume, @@ -415,9 +415,7 @@ static const struct dev_pm_ops __maybe_unused ingenic_tcu_pm_ops = { static struct platform_driver ingenic_tcu_driver = { .driver = { .name = "ingenic-tcu-timer", -#ifdef CONFIG_PM_SLEEP - .pm = &ingenic_tcu_pm_ops, -#endif + .pm = pm_sleep_ptr(&ingenic_tcu_pm_ops), .of_match_table = ingenic_tcu_of_match, }, }; From 9e130e2be4e7ac54de1ecc3fa424ca7ddfb1a748 Mon Sep 17 00:00:00 2001 From: Keguang Zhang Date: Fri, 12 May 2023 18:37:22 +0800 Subject: [PATCH 29/39] MIPS: Loongson32: Remove deprecated PWM timer clocksource The Loongson1 PWM timer will be moved to clocksource framework. Then, the old driver is no longer needed. Remove the deprecated code and update the Kconfig. Signed-off-by: Keguang Zhang Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20230512103724.587760-2-keguang.zhang@gmail.com --- .../include/asm/mach-loongson32/loongson1.h | 1 - .../include/asm/mach-loongson32/regs-pwm.h | 25 --- arch/mips/loongson32/Kconfig | 37 --- arch/mips/loongson32/common/time.c | 210 ------------------ 4 files changed, 273 deletions(-) delete mode 100644 arch/mips/include/asm/mach-loongson32/regs-pwm.h diff --git a/arch/mips/include/asm/mach-loongson32/loongson1.h b/arch/mips/include/asm/mach-loongson32/loongson1.h index eb3ddbec1752..d8f9dec0ecc3 100644 --- a/arch/mips/include/asm/mach-loongson32/loongson1.h +++ b/arch/mips/include/asm/mach-loongson32/loongson1.h @@ -47,7 +47,6 @@ #include #include -#include #include #include diff --git a/arch/mips/include/asm/mach-loongson32/regs-pwm.h b/arch/mips/include/asm/mach-loongson32/regs-pwm.h deleted file mode 100644 index ec870c82d492..000000000000 --- a/arch/mips/include/asm/mach-loongson32/regs-pwm.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2014 Zhang, Keguang - * - * Loongson 1 PWM Register Definitions. - */ - -#ifndef __ASM_MACH_LOONGSON32_REGS_PWM_H -#define __ASM_MACH_LOONGSON32_REGS_PWM_H - -/* Loongson 1 PWM Timer Register Definitions */ -#define PWM_CNT 0x0 -#define PWM_HRC 0x4 -#define PWM_LRC 0x8 -#define PWM_CTRL 0xc - -/* PWM Control Register Bits */ -#define CNT_RST BIT(7) -#define INT_SR BIT(6) -#define INT_EN BIT(5) -#define PWM_SINGLE BIT(4) -#define PWM_OE BIT(3) -#define CNT_EN BIT(0) - -#endif /* __ASM_MACH_LOONGSON32_REGS_PWM_H */ diff --git a/arch/mips/loongson32/Kconfig b/arch/mips/loongson32/Kconfig index 2ef9da0016df..a7c500959577 100644 --- a/arch/mips/loongson32/Kconfig +++ b/arch/mips/loongson32/Kconfig @@ -35,41 +35,4 @@ config LOONGSON1_LS1C select COMMON_CLK endchoice -menuconfig CEVT_CSRC_LS1X - bool "Use PWM Timer for clockevent/clocksource" - select MIPS_EXTERNAL_TIMER - depends on CPU_LOONGSON32 - help - This option changes the default clockevent/clocksource to PWM Timer, - and is required by Loongson1 CPUFreq support. - - If unsure, say N. - -choice - prompt "Select clockevent/clocksource" - depends on CEVT_CSRC_LS1X - default TIMER_USE_PWM0 - -config TIMER_USE_PWM0 - bool "Use PWM Timer 0" - help - Use PWM Timer 0 as the default clockevent/clocksourcer. - -config TIMER_USE_PWM1 - bool "Use PWM Timer 1" - help - Use PWM Timer 1 as the default clockevent/clocksourcer. - -config TIMER_USE_PWM2 - bool "Use PWM Timer 2" - help - Use PWM Timer 2 as the default clockevent/clocksourcer. - -config TIMER_USE_PWM3 - bool "Use PWM Timer 3" - help - Use PWM Timer 3 as the default clockevent/clocksourcer. - -endchoice - endif # MACH_LOONGSON32 diff --git a/arch/mips/loongson32/common/time.c b/arch/mips/loongson32/common/time.c index 965c04aa56fd..74ad2b17918d 100644 --- a/arch/mips/loongson32/common/time.c +++ b/arch/mips/loongson32/common/time.c @@ -5,208 +5,8 @@ #include #include -#include -#include #include -#include -#include - -#ifdef CONFIG_CEVT_CSRC_LS1X - -#if defined(CONFIG_TIMER_USE_PWM1) -#define LS1X_TIMER_BASE LS1X_PWM1_BASE -#define LS1X_TIMER_IRQ LS1X_PWM1_IRQ - -#elif defined(CONFIG_TIMER_USE_PWM2) -#define LS1X_TIMER_BASE LS1X_PWM2_BASE -#define LS1X_TIMER_IRQ LS1X_PWM2_IRQ - -#elif defined(CONFIG_TIMER_USE_PWM3) -#define LS1X_TIMER_BASE LS1X_PWM3_BASE -#define LS1X_TIMER_IRQ LS1X_PWM3_IRQ - -#else -#define LS1X_TIMER_BASE LS1X_PWM0_BASE -#define LS1X_TIMER_IRQ LS1X_PWM0_IRQ -#endif - -DEFINE_RAW_SPINLOCK(ls1x_timer_lock); - -static void __iomem *timer_reg_base; -static uint32_t ls1x_jiffies_per_tick; - -static inline void ls1x_pwmtimer_set_period(uint32_t period) -{ - __raw_writel(period, timer_reg_base + PWM_HRC); - __raw_writel(period, timer_reg_base + PWM_LRC); -} - -static inline void ls1x_pwmtimer_restart(void) -{ - __raw_writel(0x0, timer_reg_base + PWM_CNT); - __raw_writel(INT_EN | CNT_EN, timer_reg_base + PWM_CTRL); -} - -void __init ls1x_pwmtimer_init(void) -{ - timer_reg_base = ioremap(LS1X_TIMER_BASE, SZ_16); - if (!timer_reg_base) - panic("Failed to remap timer registers"); - - ls1x_jiffies_per_tick = DIV_ROUND_CLOSEST(mips_hpt_frequency, HZ); - - ls1x_pwmtimer_set_period(ls1x_jiffies_per_tick); - ls1x_pwmtimer_restart(); -} - -static u64 ls1x_clocksource_read(struct clocksource *cs) -{ - unsigned long flags; - int count; - u32 jifs; - static int old_count; - static u32 old_jifs; - - raw_spin_lock_irqsave(&ls1x_timer_lock, flags); - /* - * Although our caller may have the read side of xtime_lock, - * this is now a seqlock, and we are cheating in this routine - * by having side effects on state that we cannot undo if - * there is a collision on the seqlock and our caller has to - * retry. (Namely, old_jifs and old_count.) So we must treat - * jiffies as volatile despite the lock. We read jiffies - * before latching the timer count to guarantee that although - * the jiffies value might be older than the count (that is, - * the counter may underflow between the last point where - * jiffies was incremented and the point where we latch the - * count), it cannot be newer. - */ - jifs = jiffies; - /* read the count */ - count = __raw_readl(timer_reg_base + PWM_CNT); - - /* - * It's possible for count to appear to go the wrong way for this - * reason: - * - * The timer counter underflows, but we haven't handled the resulting - * interrupt and incremented jiffies yet. - * - * Previous attempts to handle these cases intelligently were buggy, so - * we just do the simple thing now. - */ - if (count < old_count && jifs == old_jifs) - count = old_count; - - old_count = count; - old_jifs = jifs; - - raw_spin_unlock_irqrestore(&ls1x_timer_lock, flags); - - return (u64) (jifs * ls1x_jiffies_per_tick) + count; -} - -static struct clocksource ls1x_clocksource = { - .name = "ls1x-pwmtimer", - .read = ls1x_clocksource_read, - .mask = CLOCKSOURCE_MASK(24), - .flags = CLOCK_SOURCE_IS_CONTINUOUS, -}; - -static irqreturn_t ls1x_clockevent_isr(int irq, void *devid) -{ - struct clock_event_device *cd = devid; - - ls1x_pwmtimer_restart(); - cd->event_handler(cd); - - return IRQ_HANDLED; -} - -static int ls1x_clockevent_set_state_periodic(struct clock_event_device *cd) -{ - raw_spin_lock(&ls1x_timer_lock); - ls1x_pwmtimer_set_period(ls1x_jiffies_per_tick); - ls1x_pwmtimer_restart(); - __raw_writel(INT_EN | CNT_EN, timer_reg_base + PWM_CTRL); - raw_spin_unlock(&ls1x_timer_lock); - - return 0; -} - -static int ls1x_clockevent_tick_resume(struct clock_event_device *cd) -{ - raw_spin_lock(&ls1x_timer_lock); - __raw_writel(INT_EN | CNT_EN, timer_reg_base + PWM_CTRL); - raw_spin_unlock(&ls1x_timer_lock); - - return 0; -} - -static int ls1x_clockevent_set_state_shutdown(struct clock_event_device *cd) -{ - raw_spin_lock(&ls1x_timer_lock); - __raw_writel(__raw_readl(timer_reg_base + PWM_CTRL) & ~CNT_EN, - timer_reg_base + PWM_CTRL); - raw_spin_unlock(&ls1x_timer_lock); - - return 0; -} - -static int ls1x_clockevent_set_next(unsigned long evt, - struct clock_event_device *cd) -{ - raw_spin_lock(&ls1x_timer_lock); - ls1x_pwmtimer_set_period(evt); - ls1x_pwmtimer_restart(); - raw_spin_unlock(&ls1x_timer_lock); - - return 0; -} - -static struct clock_event_device ls1x_clockevent = { - .name = "ls1x-pwmtimer", - .features = CLOCK_EVT_FEAT_PERIODIC, - .rating = 300, - .irq = LS1X_TIMER_IRQ, - .set_next_event = ls1x_clockevent_set_next, - .set_state_shutdown = ls1x_clockevent_set_state_shutdown, - .set_state_periodic = ls1x_clockevent_set_state_periodic, - .set_state_oneshot = ls1x_clockevent_set_state_shutdown, - .tick_resume = ls1x_clockevent_tick_resume, -}; - -static void __init ls1x_time_init(void) -{ - struct clock_event_device *cd = &ls1x_clockevent; - int ret; - - if (!mips_hpt_frequency) - panic("Invalid timer clock rate"); - - ls1x_pwmtimer_init(); - - clockevent_set_clock(cd, mips_hpt_frequency); - cd->max_delta_ns = clockevent_delta2ns(0xffffff, cd); - cd->max_delta_ticks = 0xffffff; - cd->min_delta_ns = clockevent_delta2ns(0x000300, cd); - cd->min_delta_ticks = 0x000300; - cd->cpumask = cpumask_of(smp_processor_id()); - clockevents_register_device(cd); - - ls1x_clocksource.rating = 200 + mips_hpt_frequency / 10000000; - ret = clocksource_register_hz(&ls1x_clocksource, mips_hpt_frequency); - if (ret) - panic(KERN_ERR "Failed to register clocksource: %d\n", ret); - - if (request_irq(LS1X_TIMER_IRQ, ls1x_clockevent_isr, - IRQF_PERCPU | IRQF_TIMER, "ls1x-pwmtimer", - &ls1x_clockevent)) - pr_err("Failed to register ls1x-pwmtimer interrupt\n"); -} -#endif /* CONFIG_CEVT_CSRC_LS1X */ - void __init plat_time_init(void) { struct clk *clk = NULL; @@ -214,20 +14,10 @@ void __init plat_time_init(void) /* initialize LS1X clocks */ of_clk_init(NULL); -#ifdef CONFIG_CEVT_CSRC_LS1X - /* setup LS1X PWM timer */ - clk = clk_get(NULL, "ls1x-pwmtimer"); - if (IS_ERR(clk)) - panic("unable to get timer clock, err=%ld", PTR_ERR(clk)); - - mips_hpt_frequency = clk_get_rate(clk); - ls1x_time_init(); -#else /* setup mips r4k timer */ clk = clk_get(NULL, "cpu_clk"); if (IS_ERR(clk)) panic("unable to get cpu clock, err=%ld", PTR_ERR(clk)); mips_hpt_frequency = clk_get_rate(clk) / 2; -#endif /* CONFIG_CEVT_CSRC_LS1X */ } From b25efff2a63f0d573f409f22e82904db9cc7d577 Mon Sep 17 00:00:00 2001 From: Keguang Zhang Date: Fri, 12 May 2023 18:37:23 +0800 Subject: [PATCH 30/39] dt-bindings: timer: Add Loongson-1 clocksource Add devicetree binding document for Loongson-1 clocksource. Signed-off-by: Keguang Zhang Reviewed-by: Krzysztof Kozlowski Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20230512103724.587760-3-keguang.zhang@gmail.com --- .../timer/loongson,ls1x-pwmtimer.yaml | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 Documentation/devicetree/bindings/timer/loongson,ls1x-pwmtimer.yaml diff --git a/Documentation/devicetree/bindings/timer/loongson,ls1x-pwmtimer.yaml b/Documentation/devicetree/bindings/timer/loongson,ls1x-pwmtimer.yaml new file mode 100644 index 000000000000..ad61ae55850b --- /dev/null +++ b/Documentation/devicetree/bindings/timer/loongson,ls1x-pwmtimer.yaml @@ -0,0 +1,48 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/timer/loongson,ls1x-pwmtimer.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Loongson-1 PWM timer + +maintainers: + - Keguang Zhang + +description: + Loongson-1 PWM timer can be used for system clock source + and clock event timers. + +properties: + compatible: + const: loongson,ls1b-pwmtimer + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + interrupts: + maxItems: 1 + +required: + - compatible + - reg + - clocks + - interrupts + +additionalProperties: false + +examples: + - | + #include + #include + clocksource: timer@1fe5c030 { + compatible = "loongson,ls1b-pwmtimer"; + reg = <0x1fe5c030 0x10>; + + clocks = <&clkc LS1X_CLKID_APB>; + interrupt-parent = <&intc0>; + interrupts = <20 IRQ_TYPE_LEVEL_HIGH>; + }; From e738521a11f13e40af89f66527e59306c4169782 Mon Sep 17 00:00:00 2001 From: Keguang Zhang Date: Fri, 12 May 2023 18:37:24 +0800 Subject: [PATCH 31/39] clocksource/drivers/loongson1: Move PWM timer to clocksource framework This patch moves most part of arch/mips/loongson32/common/time.c into drivers/clocksource. Adapt the driver to clocksource framework with devicetree support and updates Kconfig/Makefile options. Signed-off-by: Keguang Zhang Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20230512103724.587760-4-keguang.zhang@gmail.com --- drivers/clocksource/Kconfig | 9 + drivers/clocksource/Makefile | 1 + drivers/clocksource/timer-loongson1-pwm.c | 236 ++++++++++++++++++++++ 3 files changed, 246 insertions(+) create mode 100644 drivers/clocksource/timer-loongson1-pwm.c diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 526382dc7482..c4d671a5a13d 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -612,6 +612,15 @@ config TIMER_IMX_SYS_CTR Enable this option to use i.MX system counter timer as a clockevent. +config CLKSRC_LOONGSON1_PWM + bool "Clocksource using Loongson1 PWM" + depends on MACH_LOONGSON32 || COMPILE_TEST + select MIPS_EXTERNAL_TIMER + select TIMER_OF + help + Enable this option to use Loongson1 PWM timer as clocksource + instead of the performance counter. + config CLKSRC_ST_LPC bool "Low power clocksource found in the LPC" if COMPILE_TEST select TIMER_OF if OF diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index f12d3987a960..5d93c9e3fc55 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -89,3 +89,4 @@ obj-$(CONFIG_MICROCHIP_PIT64B) += timer-microchip-pit64b.o obj-$(CONFIG_MSC313E_TIMER) += timer-msc313e.o obj-$(CONFIG_GOLDFISH_TIMER) += timer-goldfish.o obj-$(CONFIG_GXP_TIMER) += timer-gxp.o +obj-$(CONFIG_CLKSRC_LOONGSON1_PWM) += timer-loongson1-pwm.o diff --git a/drivers/clocksource/timer-loongson1-pwm.c b/drivers/clocksource/timer-loongson1-pwm.c new file mode 100644 index 000000000000..6335fee03017 --- /dev/null +++ b/drivers/clocksource/timer-loongson1-pwm.c @@ -0,0 +1,236 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Clocksource driver for Loongson-1 SoC + * + * Copyright (c) 2023 Keguang Zhang + */ + +#include +#include +#include +#include "timer-of.h" + +/* Loongson-1 PWM Timer Register Definitions */ +#define PWM_CNTR 0x0 +#define PWM_HRC 0x4 +#define PWM_LRC 0x8 +#define PWM_CTRL 0xc + +/* PWM Control Register Bits */ +#define INT_LRC_EN BIT(11) +#define INT_HRC_EN BIT(10) +#define CNTR_RST BIT(7) +#define INT_SR BIT(6) +#define INT_EN BIT(5) +#define PWM_SINGLE BIT(4) +#define PWM_OE BIT(3) +#define CNT_EN BIT(0) + +#define CNTR_WIDTH 24 + +DEFINE_RAW_SPINLOCK(ls1x_timer_lock); + +struct ls1x_clocksource { + void __iomem *reg_base; + unsigned long ticks_per_jiffy; + struct clocksource clksrc; +}; + +static inline struct ls1x_clocksource *to_ls1x_clksrc(struct clocksource *c) +{ + return container_of(c, struct ls1x_clocksource, clksrc); +} + +static inline void ls1x_pwmtimer_set_period(unsigned int period, + struct timer_of *to) +{ + writel(period, timer_of_base(to) + PWM_LRC); + writel(period, timer_of_base(to) + PWM_HRC); +} + +static inline void ls1x_pwmtimer_clear(struct timer_of *to) +{ + writel(0, timer_of_base(to) + PWM_CNTR); +} + +static inline void ls1x_pwmtimer_start(struct timer_of *to) +{ + writel((INT_EN | PWM_OE | CNT_EN), timer_of_base(to) + PWM_CTRL); +} + +static inline void ls1x_pwmtimer_stop(struct timer_of *to) +{ + writel(0, timer_of_base(to) + PWM_CTRL); +} + +static inline void ls1x_pwmtimer_irq_ack(struct timer_of *to) +{ + int val; + + val = readl(timer_of_base(to) + PWM_CTRL); + val |= INT_SR; + writel(val, timer_of_base(to) + PWM_CTRL); +} + +static irqreturn_t ls1x_clockevent_isr(int irq, void *dev_id) +{ + struct clock_event_device *clkevt = dev_id; + struct timer_of *to = to_timer_of(clkevt); + + ls1x_pwmtimer_irq_ack(to); + ls1x_pwmtimer_clear(to); + ls1x_pwmtimer_start(to); + + clkevt->event_handler(clkevt); + + return IRQ_HANDLED; +} + +static int ls1x_clockevent_set_state_periodic(struct clock_event_device *clkevt) +{ + struct timer_of *to = to_timer_of(clkevt); + + raw_spin_lock(&ls1x_timer_lock); + ls1x_pwmtimer_set_period(timer_of_period(to), to); + ls1x_pwmtimer_clear(to); + ls1x_pwmtimer_start(to); + raw_spin_unlock(&ls1x_timer_lock); + + return 0; +} + +static int ls1x_clockevent_tick_resume(struct clock_event_device *clkevt) +{ + raw_spin_lock(&ls1x_timer_lock); + ls1x_pwmtimer_start(to_timer_of(clkevt)); + raw_spin_unlock(&ls1x_timer_lock); + + return 0; +} + +static int ls1x_clockevent_set_state_shutdown(struct clock_event_device *clkevt) +{ + raw_spin_lock(&ls1x_timer_lock); + ls1x_pwmtimer_stop(to_timer_of(clkevt)); + raw_spin_unlock(&ls1x_timer_lock); + + return 0; +} + +static int ls1x_clockevent_set_next(unsigned long evt, + struct clock_event_device *clkevt) +{ + struct timer_of *to = to_timer_of(clkevt); + + raw_spin_lock(&ls1x_timer_lock); + ls1x_pwmtimer_set_period(evt, to); + ls1x_pwmtimer_clear(to); + ls1x_pwmtimer_start(to); + raw_spin_unlock(&ls1x_timer_lock); + + return 0; +} + +static struct timer_of ls1x_to = { + .flags = TIMER_OF_IRQ | TIMER_OF_BASE | TIMER_OF_CLOCK, + .clkevt = { + .name = "ls1x-pwmtimer", + .features = CLOCK_EVT_FEAT_PERIODIC | + CLOCK_EVT_FEAT_ONESHOT, + .rating = 300, + .set_next_event = ls1x_clockevent_set_next, + .set_state_periodic = ls1x_clockevent_set_state_periodic, + .set_state_oneshot = ls1x_clockevent_set_state_shutdown, + .set_state_shutdown = ls1x_clockevent_set_state_shutdown, + .tick_resume = ls1x_clockevent_tick_resume, + }, + .of_irq = { + .handler = ls1x_clockevent_isr, + .flags = IRQF_TIMER, + }, +}; + +/* + * Since the PWM timer overflows every two ticks, its not very useful + * to just read by itself. So use jiffies to emulate a free + * running counter: + */ +static u64 ls1x_clocksource_read(struct clocksource *cs) +{ + struct ls1x_clocksource *ls1x_cs = to_ls1x_clksrc(cs); + unsigned long flags; + int count; + u32 jifs; + static int old_count; + static u32 old_jifs; + + raw_spin_lock_irqsave(&ls1x_timer_lock, flags); + /* + * Although our caller may have the read side of xtime_lock, + * this is now a seqlock, and we are cheating in this routine + * by having side effects on state that we cannot undo if + * there is a collision on the seqlock and our caller has to + * retry. (Namely, old_jifs and old_count.) So we must treat + * jiffies as volatile despite the lock. We read jiffies + * before latching the timer count to guarantee that although + * the jiffies value might be older than the count (that is, + * the counter may underflow between the last point where + * jiffies was incremented and the point where we latch the + * count), it cannot be newer. + */ + jifs = jiffies; + /* read the count */ + count = readl(ls1x_cs->reg_base + PWM_CNTR); + + /* + * It's possible for count to appear to go the wrong way for this + * reason: + * + * The timer counter underflows, but we haven't handled the resulting + * interrupt and incremented jiffies yet. + * + * Previous attempts to handle these cases intelligently were buggy, so + * we just do the simple thing now. + */ + if (count < old_count && jifs == old_jifs) + count = old_count; + + old_count = count; + old_jifs = jifs; + + raw_spin_unlock_irqrestore(&ls1x_timer_lock, flags); + + return (u64)(jifs * ls1x_cs->ticks_per_jiffy) + count; +} + +static struct ls1x_clocksource ls1x_clocksource = { + .clksrc = { + .name = "ls1x-pwmtimer", + .rating = 300, + .read = ls1x_clocksource_read, + .mask = CLOCKSOURCE_MASK(CNTR_WIDTH), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, + }, +}; + +static int __init ls1x_pwm_clocksource_init(struct device_node *np) +{ + struct timer_of *to = &ls1x_to; + int ret; + + ret = timer_of_init(np, to); + if (ret) + return ret; + + clockevents_config_and_register(&to->clkevt, timer_of_rate(to), + 0x1, GENMASK(CNTR_WIDTH - 1, 0)); + + ls1x_clocksource.reg_base = timer_of_base(to); + ls1x_clocksource.ticks_per_jiffy = timer_of_period(to); + + return clocksource_register_hz(&ls1x_clocksource.clksrc, + timer_of_rate(to)); +} + +TIMER_OF_DECLARE(ls1x_pwm_clocksource, "loongson,ls1b-pwmtimer", + ls1x_pwm_clocksource_init); From 95aded1b1c409fb2e902c6bd455068700ac38878 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 28 Mar 2023 11:15:14 +0200 Subject: [PATCH 32/39] clocksource/drivers/imx-gpt: Use only a single name for functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When looking at the data structs defining the different behaviours of the GPT blocks in different SoCs it's not helpful that the same functions are used with different names. So drop the cpp defines and use the original names. This commit was generated using: perl -i -e 'my %m; while (<>) { if (/^#define (imx[a-zA-Z0-6_]*)\s(imx[a-zA-Z0-6_]*)/) {$m{$1} = $2; } else { foreach my $f (keys %m) {s/$f/$m{$f}/; } print; } }' drivers/clocksource/timer-imx-gpt.c This patch has no effect on the generated code. Signed-off-by: Uwe Kleine-König Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20230328091514.874724-1-u.kleine-koenig@pengutronix.de --- drivers/clocksource/timer-imx-gpt.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/clocksource/timer-imx-gpt.c b/drivers/clocksource/timer-imx-gpt.c index ca3e4cbc80c6..83cefff2bec1 100644 --- a/drivers/clocksource/timer-imx-gpt.c +++ b/drivers/clocksource/timer-imx-gpt.c @@ -93,13 +93,11 @@ static void imx1_gpt_irq_disable(struct imx_timer *imxtm) tmp = readl_relaxed(imxtm->base + MXC_TCTL); writel_relaxed(tmp & ~MX1_2_TCTL_IRQEN, imxtm->base + MXC_TCTL); } -#define imx21_gpt_irq_disable imx1_gpt_irq_disable static void imx31_gpt_irq_disable(struct imx_timer *imxtm) { writel_relaxed(0, imxtm->base + V2_IR); } -#define imx6dl_gpt_irq_disable imx31_gpt_irq_disable static void imx1_gpt_irq_enable(struct imx_timer *imxtm) { @@ -108,13 +106,11 @@ static void imx1_gpt_irq_enable(struct imx_timer *imxtm) tmp = readl_relaxed(imxtm->base + MXC_TCTL); writel_relaxed(tmp | MX1_2_TCTL_IRQEN, imxtm->base + MXC_TCTL); } -#define imx21_gpt_irq_enable imx1_gpt_irq_enable static void imx31_gpt_irq_enable(struct imx_timer *imxtm) { writel_relaxed(1<<0, imxtm->base + V2_IR); } -#define imx6dl_gpt_irq_enable imx31_gpt_irq_enable static void imx1_gpt_irq_acknowledge(struct imx_timer *imxtm) { @@ -131,7 +127,6 @@ static void imx31_gpt_irq_acknowledge(struct imx_timer *imxtm) { writel_relaxed(V2_TSTAT_OF1, imxtm->base + V2_TSTAT); } -#define imx6dl_gpt_irq_acknowledge imx31_gpt_irq_acknowledge static void __iomem *sched_clock_reg; @@ -296,7 +291,6 @@ static void imx1_gpt_setup_tctl(struct imx_timer *imxtm) tctl_val = MX1_2_TCTL_FRR | MX1_2_TCTL_CLK_PCLK1 | MXC_TCTL_TEN; writel_relaxed(tctl_val, imxtm->base + MXC_TCTL); } -#define imx21_gpt_setup_tctl imx1_gpt_setup_tctl static void imx31_gpt_setup_tctl(struct imx_timer *imxtm) { @@ -343,10 +337,10 @@ static const struct imx_gpt_data imx21_gpt_data = { .reg_tstat = MX1_2_TSTAT, .reg_tcn = MX1_2_TCN, .reg_tcmp = MX1_2_TCMP, - .gpt_irq_enable = imx21_gpt_irq_enable, - .gpt_irq_disable = imx21_gpt_irq_disable, + .gpt_irq_enable = imx1_gpt_irq_enable, + .gpt_irq_disable = imx1_gpt_irq_disable, .gpt_irq_acknowledge = imx21_gpt_irq_acknowledge, - .gpt_setup_tctl = imx21_gpt_setup_tctl, + .gpt_setup_tctl = imx1_gpt_setup_tctl, .set_next_event = mx1_2_set_next_event, }; @@ -365,9 +359,9 @@ static const struct imx_gpt_data imx6dl_gpt_data = { .reg_tstat = V2_TSTAT, .reg_tcn = V2_TCN, .reg_tcmp = V2_TCMP, - .gpt_irq_enable = imx6dl_gpt_irq_enable, - .gpt_irq_disable = imx6dl_gpt_irq_disable, - .gpt_irq_acknowledge = imx6dl_gpt_irq_acknowledge, + .gpt_irq_enable = imx31_gpt_irq_enable, + .gpt_irq_disable = imx31_gpt_irq_disable, + .gpt_irq_acknowledge = imx31_gpt_irq_acknowledge, .gpt_setup_tctl = imx6dl_gpt_setup_tctl, .set_next_event = v2_set_next_event, }; From ccaa4926c2264ca2a2fcad4b3511fe435d7d4d15 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Wed, 21 Jun 2023 08:59:28 +0100 Subject: [PATCH 33/39] hrtimer: Add missing sparse annotations to hrtimer locking Sparse warns about lock imbalance vs. the hrtimer_base lock due to missing sparse annotations: kernel/time/hrtimer.c:175:33: warning: context imbalance in 'lock_hrtimer_base' - wrong count at exit kernel/time/hrtimer.c:1301:28: warning: context imbalance in 'hrtimer_start_range_ns' - unexpected unlock kernel/time/hrtimer.c:1336:28: warning: context imbalance in 'hrtimer_try_to_cancel' - unexpected unlock kernel/time/hrtimer.c:1457:9: warning: context imbalance in '__hrtimer_get_remaining' - unexpected unlock Add the annotations to the relevant functions. Signed-off-by: Ben Dooks Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230621075928.394481-1-ben.dooks@codethink.co.uk --- kernel/time/hrtimer.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index e8c08292defc..238262e4aba7 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -164,6 +164,7 @@ static inline bool is_migration_base(struct hrtimer_clock_base *base) static struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) + __acquires(&timer->base->lock) { struct hrtimer_clock_base *base; @@ -280,6 +281,7 @@ static inline bool is_migration_base(struct hrtimer_clock_base *base) static inline struct hrtimer_clock_base * lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) + __acquires(&timer->base->cpu_base->lock) { struct hrtimer_clock_base *base = timer->base; @@ -1013,6 +1015,7 @@ void hrtimers_resume_local(void) */ static inline void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) + __releases(&timer->base->cpu_base->lock) { raw_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags); } From 011da162da2f915989a571b557867f7eea699000 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 28 Mar 2023 12:05:30 +0200 Subject: [PATCH 34/39] clk: imx: Drop inclusion of unused header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The clk drivers use none of the symbols defined in . Signed-off-by: Uwe Kleine-König Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20230328100531.879485-2-u.kleine-koenig@pengutronix.de --- drivers/clk/imx/clk-imx1.c | 1 - drivers/clk/imx/clk-imx27.c | 1 - drivers/clk/imx/clk-imx31.c | 1 - drivers/clk/imx/clk-imx35.c | 1 - 4 files changed, 4 deletions(-) diff --git a/drivers/clk/imx/clk-imx1.c b/drivers/clk/imx/clk-imx1.c index 22fc7491ba00..f6ea7e5052d5 100644 --- a/drivers/clk/imx/clk-imx1.c +++ b/drivers/clk/imx/clk-imx1.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include "clk.h" diff --git a/drivers/clk/imx/clk-imx27.c b/drivers/clk/imx/clk-imx27.c index 5d177125728d..99618ded0939 100644 --- a/drivers/clk/imx/clk-imx27.c +++ b/drivers/clk/imx/clk-imx27.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include "clk.h" diff --git a/drivers/clk/imx/clk-imx31.c b/drivers/clk/imx/clk-imx31.c index c44e18c6f63f..4c8d9ff0b2ad 100644 --- a/drivers/clk/imx/clk-imx31.c +++ b/drivers/clk/imx/clk-imx31.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include "clk.h" diff --git a/drivers/clk/imx/clk-imx35.c b/drivers/clk/imx/clk-imx35.c index 7dcbaea3fea3..3b6fdb4e0be7 100644 --- a/drivers/clk/imx/clk-imx35.c +++ b/drivers/clk/imx/clk-imx35.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include "clk.h" From 281bf6b94aec092096d788b56c106a8c9c2a432a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 28 Mar 2023 12:05:31 +0200 Subject: [PATCH 35/39] clocksource/drivers/imx-gpt: Fold into its only user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only the imx-gpt timer driver makes use of enum imx_gpt_type that is otherwise unused. Move its definition into the timer-imx-gpt driver. Signed-off-by: Uwe Kleine-König Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20230328100531.879485-3-u.kleine-koenig@pengutronix.de --- drivers/clocksource/timer-imx-gpt.c | 7 ++++++- include/soc/imx/timer.h | 16 ---------------- 2 files changed, 6 insertions(+), 17 deletions(-) delete mode 100644 include/soc/imx/timer.h diff --git a/drivers/clocksource/timer-imx-gpt.c b/drivers/clocksource/timer-imx-gpt.c index 83cefff2bec1..28ab4f1a7c71 100644 --- a/drivers/clocksource/timer-imx-gpt.c +++ b/drivers/clocksource/timer-imx-gpt.c @@ -16,7 +16,6 @@ #include #include #include -#include /* * There are 4 versions of the timer hardware on Freescale MXC hardware. @@ -25,6 +24,12 @@ * - MX25, MX31, MX35, MX37, MX51, MX6Q(rev1.0) * - MX6DL, MX6SX, MX6Q(rev1.1+) */ +enum imx_gpt_type { + GPT_TYPE_IMX1, /* i.MX1 */ + GPT_TYPE_IMX21, /* i.MX21/27 */ + GPT_TYPE_IMX31, /* i.MX31/35/25/37/51/6Q */ + GPT_TYPE_IMX6DL, /* i.MX6DL/SX/SL */ +}; /* defines common for all i.MX */ #define MXC_TCTL 0x00 diff --git a/include/soc/imx/timer.h b/include/soc/imx/timer.h deleted file mode 100644 index 25f29c6bbd0b..000000000000 --- a/include/soc/imx/timer.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright 2015 Linaro Ltd. - */ - -#ifndef __SOC_IMX_TIMER_H__ -#define __SOC_IMX_TIMER_H__ - -enum imx_gpt_type { - GPT_TYPE_IMX1, /* i.MX1 */ - GPT_TYPE_IMX21, /* i.MX21/27 */ - GPT_TYPE_IMX31, /* i.MX31/35/25/37/51/6Q */ - GPT_TYPE_IMX6DL, /* i.MX6DL/SX/SL */ -}; - -#endif /* __SOC_IMX_TIMER_H__ */ From 038d454ad996a5e275d46188d65d890d2a243f36 Mon Sep 17 00:00:00 2001 From: Stanislav Jakubek Date: Sun, 18 Jun 2023 16:46:35 +0200 Subject: [PATCH 36/39] dt-bindings: timer: brcm,kona-timer: convert to YAML Convert Broadcom Kona family timer bindings to DT schema. Changes during conversion: - drop deprecated compatible (it's been deprecated for ~10 years) Signed-off-by: Stanislav Jakubek Reviewed-by: Krzysztof Kozlowski Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20230618144635.GA22166@standask-GA-A55M-S2HP --- .../bindings/timer/brcm,kona-timer.txt | 25 --------- .../bindings/timer/brcm,kona-timer.yaml | 52 +++++++++++++++++++ 2 files changed, 52 insertions(+), 25 deletions(-) delete mode 100644 Documentation/devicetree/bindings/timer/brcm,kona-timer.txt create mode 100644 Documentation/devicetree/bindings/timer/brcm,kona-timer.yaml diff --git a/Documentation/devicetree/bindings/timer/brcm,kona-timer.txt b/Documentation/devicetree/bindings/timer/brcm,kona-timer.txt deleted file mode 100644 index 39adf54b4388..000000000000 --- a/Documentation/devicetree/bindings/timer/brcm,kona-timer.txt +++ /dev/null @@ -1,25 +0,0 @@ -Broadcom Kona Family timer ------------------------------------------------------ -This timer is used in the following Broadcom SoCs: - BCM11130, BCM11140, BCM11351, BCM28145, BCM28155 - -Required properties: -- compatible : "brcm,kona-timer" -- DEPRECATED: compatible : "bcm,kona-timer" -- reg : Register range for the timer -- interrupts : interrupt for the timer -- clocks: phandle + clock specifier pair of the external clock -- clock-frequency: frequency that the clock operates - -Only one of clocks or clock-frequency should be specified. - -Refer to clocks/clock-bindings.txt for generic clock consumer properties. - -Example: - timer@35006000 { - compatible = "brcm,kona-timer"; - reg = <0x35006000 0x1000>; - interrupts = <0x0 7 0x4>; - clocks = <&hub_timer_clk>; - }; - diff --git a/Documentation/devicetree/bindings/timer/brcm,kona-timer.yaml b/Documentation/devicetree/bindings/timer/brcm,kona-timer.yaml new file mode 100644 index 000000000000..d6af8383d6fc --- /dev/null +++ b/Documentation/devicetree/bindings/timer/brcm,kona-timer.yaml @@ -0,0 +1,52 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/timer/brcm,kona-timer.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Broadcom Kona family timer + +maintainers: + - Florian Fainelli + +properties: + compatible: + const: brcm,kona-timer + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + maxItems: 1 + + clock-frequency: true + +oneOf: + - required: + - clocks + - required: + - clock-frequency + +required: + - compatible + - reg + - interrupts + +additionalProperties: false + +examples: + - | + #include + #include + #include + + timer@35006000 { + compatible = "brcm,kona-timer"; + reg = <0x35006000 0x1000>; + interrupts = ; + clocks = <&aon_ccu BCM281XX_AON_CCU_HUB_TIMER>; + }; +... From e5313f1c540434b18ea57927633b1584c534b14a Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 19 Jun 2023 12:02:40 -0700 Subject: [PATCH 37/39] clocksource/drivers/hyper-v: Rework clocksource and sched clock setup Current code assigns either the Hyper-V TSC page or MSR-based ref counter as the sched clock. This may be sub-optimal in two cases. First, if there is hardware support to ensure consistent TSC frequency across live migrations and Hyper-V is using that support, the raw TSC is a faster source of time than the Hyper-V TSC page. Second, the MSR-based ref counter is relatively slow because reads require a trap to the hypervisor. As such, it should never be used as the sched clock. The native sched clock based on the raw TSC or jiffies is much better. Rework the sched clock setup so it is set to the TSC page only if Hyper-V indicates that the TSC may have inconsistent frequency across live migrations. Also, remove the code that sets the sched clock to the MSR-based ref counter. In the cases where it is not set, the sched clock will then be the native sched clock. As part of the rework, always enable both the TSC page clocksource and the MSR-based ref counter clocksource. Set the ratings so the TSC page clocksource is preferred. While the MSR-based ref counter clocksource is unlikely to ever be the default, having it available for manual selection is convenient for development purposes. Signed-off-by: Michael Kelley Reviewed-by: Dexuan Cui Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/1687201360-16003-1-git-send-email-mikelley@microsoft.com --- drivers/clocksource/hyperv_timer.c | 54 +++++++++++++----------------- 1 file changed, 23 insertions(+), 31 deletions(-) diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index bcd9042a0c9f..9fc008c16636 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -475,15 +475,9 @@ static u64 notrace read_hv_clock_msr_cs(struct clocksource *arg) return read_hv_clock_msr(); } -static u64 notrace read_hv_sched_clock_msr(void) -{ - return (read_hv_clock_msr() - hv_sched_clock_offset) * - (NSEC_PER_SEC / HV_CLOCK_HZ); -} - static struct clocksource hyperv_cs_msr = { .name = "hyperv_clocksource_msr", - .rating = 500, + .rating = 495, .read = read_hv_clock_msr_cs, .mask = CLOCKSOURCE_MASK(64), .flags = CLOCK_SOURCE_IS_CONTINUOUS, @@ -513,7 +507,7 @@ static __always_inline void hv_setup_sched_clock(void *sched_clock) static __always_inline void hv_setup_sched_clock(void *sched_clock) {} #endif /* CONFIG_GENERIC_SCHED_CLOCK */ -static bool __init hv_init_tsc_clocksource(void) +static void __init hv_init_tsc_clocksource(void) { union hv_reference_tsc_msr tsc_msr; @@ -524,17 +518,14 @@ static bool __init hv_init_tsc_clocksource(void) * Hyper-V Reference TSC rating, causing the generic TSC to be used. * TSC_INVARIANT is not offered on ARM64, so the Hyper-V Reference * TSC will be preferred over the virtualized ARM64 arch counter. - * While the Hyper-V MSR clocksource won't be used since the - * Reference TSC clocksource is present, change its rating as - * well for consistency. */ if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) { hyperv_cs_tsc.rating = 250; - hyperv_cs_msr.rating = 250; + hyperv_cs_msr.rating = 245; } if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE)) - return false; + return; hv_read_reference_counter = read_hv_clock_tsc; @@ -565,33 +556,34 @@ static bool __init hv_init_tsc_clocksource(void) clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); - hv_sched_clock_offset = hv_read_reference_counter(); - hv_setup_sched_clock(read_hv_sched_clock_tsc); - - return true; + /* + * If TSC is invariant, then let it stay as the sched clock since it + * will be faster than reading the TSC page. But if not invariant, use + * the TSC page so that live migrations across hosts with different + * frequencies is handled correctly. + */ + if (!(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT)) { + hv_sched_clock_offset = hv_read_reference_counter(); + hv_setup_sched_clock(read_hv_sched_clock_tsc); + } } void __init hv_init_clocksource(void) { /* - * Try to set up the TSC page clocksource. If it succeeds, we're - * done. Otherwise, set up the MSR clocksource. At least one of - * these will always be available except on very old versions of - * Hyper-V on x86. In that case we won't have a Hyper-V + * Try to set up the TSC page clocksource, then the MSR clocksource. + * At least one of these will always be available except on very old + * versions of Hyper-V on x86. In that case we won't have a Hyper-V * clocksource, but Linux will still run with a clocksource based * on the emulated PIT or LAPIC timer. + * + * Never use the MSR clocksource as sched clock. It's too slow. + * Better to use the native sched clock as the fallback. */ - if (hv_init_tsc_clocksource()) - return; + hv_init_tsc_clocksource(); - if (!(ms_hyperv.features & HV_MSR_TIME_REF_COUNT_AVAILABLE)) - return; - - hv_read_reference_counter = read_hv_clock_msr; - clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100); - - hv_sched_clock_offset = hv_read_reference_counter(); - hv_setup_sched_clock(read_hv_sched_clock_msr); + if (ms_hyperv.features & HV_MSR_TIME_REF_COUNT_AVAILABLE) + clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100); } void __init hv_remap_tsc_clocksource(void) From 6d0d4df8e7e1fe22d961d667c2bfa40c3d5022e8 Mon Sep 17 00:00:00 2001 From: Sergio Paracuellos Date: Tue, 20 Jun 2023 12:02:31 +0200 Subject: [PATCH 38/39] dt-bindings: timers: Add Ralink SoCs timer Add YAML documentation for the timer which is present on Ralink SoCs. Signed-off-by: Sergio Paracuellos Reviewed-by: Rob Herring Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20230620100231.1412582-1-sergio.paracuellos@gmail.com --- .../bindings/timer/ralink,rt2880-timer.yaml | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 Documentation/devicetree/bindings/timer/ralink,rt2880-timer.yaml diff --git a/Documentation/devicetree/bindings/timer/ralink,rt2880-timer.yaml b/Documentation/devicetree/bindings/timer/ralink,rt2880-timer.yaml new file mode 100644 index 000000000000..daa7832babe3 --- /dev/null +++ b/Documentation/devicetree/bindings/timer/ralink,rt2880-timer.yaml @@ -0,0 +1,44 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/timer/ralink,rt2880-timer.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Timer present in Ralink family SoCs + +maintainers: + - Sergio Paracuellos + +properties: + compatible: + const: ralink,rt2880-timer + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + interrupts: + maxItems: 1 + +required: + - compatible + - reg + - clocks + - interrupts + +additionalProperties: false + +examples: + - | + timer@100 { + compatible = "ralink,rt2880-timer"; + reg = <0x100 0x20>; + + clocks = <&sysc 3>; + + interrupt-parent = <&intc>; + interrupts = <1>; + }; +... From 8b5bf64c89c7100c921bd807ba39b2eb003061ab Mon Sep 17 00:00:00 2001 From: Feng Mingxi Date: Tue, 25 Apr 2023 06:56:11 +0000 Subject: [PATCH 39/39] clocksource/drivers/cadence-ttc: Fix memory leak in ttc_timer_probe Smatch reports: drivers/clocksource/timer-cadence-ttc.c:529 ttc_timer_probe() warn: 'timer_baseaddr' from of_iomap() not released on lines: 498,508,516. timer_baseaddr may have the problem of not being released after use, I replaced it with the devm_of_iomap() function and added the clk_put() function to cleanup the "clk_ce" and "clk_cs". Fixes: e932900a3279 ("arm: zynq: Use standard timer binding") Fixes: 70504f311d4b ("clocksource/drivers/cadence_ttc: Convert init function to return error") Signed-off-by: Feng Mingxi Reviewed-by: Dongliang Mu Acked-by: Michal Simek Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20230425065611.702917-1-m202271825@hust.edu.cn --- drivers/clocksource/timer-cadence-ttc.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/clocksource/timer-cadence-ttc.c b/drivers/clocksource/timer-cadence-ttc.c index 4efd0cf3b602..0d52e28fea4d 100644 --- a/drivers/clocksource/timer-cadence-ttc.c +++ b/drivers/clocksource/timer-cadence-ttc.c @@ -486,10 +486,10 @@ static int __init ttc_timer_probe(struct platform_device *pdev) * and use it. Note that the event timer uses the interrupt and it's the * 2nd TTC hence the irq_of_parse_and_map(,1) */ - timer_baseaddr = of_iomap(timer, 0); - if (!timer_baseaddr) { + timer_baseaddr = devm_of_iomap(&pdev->dev, timer, 0, NULL); + if (IS_ERR(timer_baseaddr)) { pr_err("ERROR: invalid timer base address\n"); - return -ENXIO; + return PTR_ERR(timer_baseaddr); } irq = irq_of_parse_and_map(timer, 1); @@ -513,20 +513,27 @@ static int __init ttc_timer_probe(struct platform_device *pdev) clk_ce = of_clk_get(timer, clksel); if (IS_ERR(clk_ce)) { pr_err("ERROR: timer input clock not found\n"); - return PTR_ERR(clk_ce); + ret = PTR_ERR(clk_ce); + goto put_clk_cs; } ret = ttc_setup_clocksource(clk_cs, timer_baseaddr, timer_width); if (ret) - return ret; + goto put_clk_ce; ret = ttc_setup_clockevent(clk_ce, timer_baseaddr + 4, irq); if (ret) - return ret; + goto put_clk_ce; pr_info("%pOFn #0 at %p, irq=%d\n", timer, timer_baseaddr, irq); return 0; + +put_clk_ce: + clk_put(clk_ce); +put_clk_cs: + clk_put(clk_cs); + return ret; } static const struct of_device_id ttc_timer_of_match[] = {