a0e928ed7c
Pull timer updates from Ingo Molnar: "This cycle had the following changes: - Timer tracing improvements (Anna-Maria Gleixner) - Continued tasklet reduction work: remove the hrtimer_tasklet (Thomas Gleixner) - Fix CPU hotplug remove race in the tick-broadcast mask handling code (Thomas Gleixner) - Force upper bound for setting CLOCK_REALTIME, to fix ABI inconsistencies with handling values that are close to the maximum supported and the vagueness of when uptime related wraparound might occur. Make the consistent maximum the year 2232 across all relevant ABIs and APIs. (Thomas Gleixner) - various cleanups and smaller fixes" * 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: tick: Fix typos in comments tick/broadcast: Fix warning about undefined tick_broadcast_oneshot_offline() timekeeping: Force upper bound for setting CLOCK_REALTIME timer/trace: Improve timer tracing timer/trace: Replace deprecated vsprintf pointer extension %pf by %ps timer: Move trace point to get proper index tick/sched: Update tick_sched struct documentation tick: Remove outgoing CPU from broadcast masks timekeeping: Consistently use unsigned int for seqcount snapshot softirq: Remove tasklet_hrtimer xfrm: Replace hrtimer tasklet with softirq hrtimer mac80211_hwsim: Replace hrtimer tasklet with softirq hrtimer
578 lines
14 KiB
C
578 lines
14 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* This file contains the base functions to manage periodic tick
|
|
* related events.
|
|
*
|
|
* Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
|
|
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
|
|
* Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
|
|
*/
|
|
#include <linux/cpu.h>
|
|
#include <linux/err.h>
|
|
#include <linux/hrtimer.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/percpu.h>
|
|
#include <linux/profile.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/module.h>
|
|
#include <trace/events/power.h>
|
|
|
|
#include <asm/irq_regs.h>
|
|
|
|
#include "tick-internal.h"
|
|
|
|
/*
|
|
* Tick devices
|
|
*/
|
|
DEFINE_PER_CPU(struct tick_device, tick_cpu_device);
|
|
/*
|
|
* Tick next event: keeps track of the tick time
|
|
*/
|
|
ktime_t tick_next_period;
|
|
ktime_t tick_period;
|
|
|
|
/*
|
|
* tick_do_timer_cpu is a timer core internal variable which holds the CPU NR
|
|
* which is responsible for calling do_timer(), i.e. the timekeeping stuff. This
|
|
* variable has two functions:
|
|
*
|
|
* 1) Prevent a thundering herd issue of a gazillion of CPUs trying to grab the
|
|
* timekeeping lock all at once. Only the CPU which is assigned to do the
|
|
* update is handling it.
|
|
*
|
|
* 2) Hand off the duty in the NOHZ idle case by setting the value to
|
|
* TICK_DO_TIMER_NONE, i.e. a non existing CPU. So the next cpu which looks
|
|
* at it will take over and keep the time keeping alive. The handover
|
|
* procedure also covers cpu hotplug.
|
|
*/
|
|
int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT;
|
|
#ifdef CONFIG_NO_HZ_FULL
|
|
/*
|
|
* tick_do_timer_boot_cpu indicates the boot CPU temporarily owns
|
|
* tick_do_timer_cpu and it should be taken over by an eligible secondary
|
|
* when one comes online.
|
|
*/
|
|
static int tick_do_timer_boot_cpu __read_mostly = -1;
|
|
#endif
|
|
|
|
/*
|
|
* Debugging: see timer_list.c
|
|
*/
|
|
struct tick_device *tick_get_device(int cpu)
|
|
{
|
|
return &per_cpu(tick_cpu_device, cpu);
|
|
}
|
|
|
|
/**
|
|
* tick_is_oneshot_available - check for a oneshot capable event device
|
|
*/
|
|
int tick_is_oneshot_available(void)
|
|
{
|
|
struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
|
|
|
|
if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT))
|
|
return 0;
|
|
if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
|
|
return 1;
|
|
return tick_broadcast_oneshot_available();
|
|
}
|
|
|
|
/*
|
|
* Periodic tick
|
|
*/
|
|
static void tick_periodic(int cpu)
|
|
{
|
|
if (tick_do_timer_cpu == cpu) {
|
|
write_seqlock(&jiffies_lock);
|
|
|
|
/* Keep track of the next tick event */
|
|
tick_next_period = ktime_add(tick_next_period, tick_period);
|
|
|
|
do_timer(1);
|
|
write_sequnlock(&jiffies_lock);
|
|
update_wall_time();
|
|
}
|
|
|
|
update_process_times(user_mode(get_irq_regs()));
|
|
profile_tick(CPU_PROFILING);
|
|
}
|
|
|
|
/*
|
|
* Event handler for periodic ticks
|
|
*/
|
|
void tick_handle_periodic(struct clock_event_device *dev)
|
|
{
|
|
int cpu = smp_processor_id();
|
|
ktime_t next = dev->next_event;
|
|
|
|
tick_periodic(cpu);
|
|
|
|
#if defined(CONFIG_HIGH_RES_TIMERS) || defined(CONFIG_NO_HZ_COMMON)
|
|
/*
|
|
* The cpu might have transitioned to HIGHRES or NOHZ mode via
|
|
* update_process_times() -> run_local_timers() ->
|
|
* hrtimer_run_queues().
|
|
*/
|
|
if (dev->event_handler != tick_handle_periodic)
|
|
return;
|
|
#endif
|
|
|
|
if (!clockevent_state_oneshot(dev))
|
|
return;
|
|
for (;;) {
|
|
/*
|
|
* Setup the next period for devices, which do not have
|
|
* periodic mode:
|
|
*/
|
|
next = ktime_add(next, tick_period);
|
|
|
|
if (!clockevents_program_event(dev, next, false))
|
|
return;
|
|
/*
|
|
* Have to be careful here. If we're in oneshot mode,
|
|
* before we call tick_periodic() in a loop, we need
|
|
* to be sure we're using a real hardware clocksource.
|
|
* Otherwise we could get trapped in an infinite
|
|
* loop, as the tick_periodic() increments jiffies,
|
|
* which then will increment time, possibly causing
|
|
* the loop to trigger again and again.
|
|
*/
|
|
if (timekeeping_valid_for_hres())
|
|
tick_periodic(cpu);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Setup the device for a periodic tick
|
|
*/
|
|
void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
|
|
{
|
|
tick_set_periodic_handler(dev, broadcast);
|
|
|
|
/* Broadcast setup ? */
|
|
if (!tick_device_is_functional(dev))
|
|
return;
|
|
|
|
if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
|
|
!tick_broadcast_oneshot_active()) {
|
|
clockevents_switch_state(dev, CLOCK_EVT_STATE_PERIODIC);
|
|
} else {
|
|
unsigned int seq;
|
|
ktime_t next;
|
|
|
|
do {
|
|
seq = read_seqbegin(&jiffies_lock);
|
|
next = tick_next_period;
|
|
} while (read_seqretry(&jiffies_lock, seq));
|
|
|
|
clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
|
|
|
|
for (;;) {
|
|
if (!clockevents_program_event(dev, next, false))
|
|
return;
|
|
next = ktime_add(next, tick_period);
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifdef CONFIG_NO_HZ_FULL
|
|
static void giveup_do_timer(void *info)
|
|
{
|
|
int cpu = *(unsigned int *)info;
|
|
|
|
WARN_ON(tick_do_timer_cpu != smp_processor_id());
|
|
|
|
tick_do_timer_cpu = cpu;
|
|
}
|
|
|
|
static void tick_take_do_timer_from_boot(void)
|
|
{
|
|
int cpu = smp_processor_id();
|
|
int from = tick_do_timer_boot_cpu;
|
|
|
|
if (from >= 0 && from != cpu)
|
|
smp_call_function_single(from, giveup_do_timer, &cpu, 1);
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Setup the tick device
|
|
*/
|
|
static void tick_setup_device(struct tick_device *td,
|
|
struct clock_event_device *newdev, int cpu,
|
|
const struct cpumask *cpumask)
|
|
{
|
|
void (*handler)(struct clock_event_device *) = NULL;
|
|
ktime_t next_event = 0;
|
|
|
|
/*
|
|
* First device setup ?
|
|
*/
|
|
if (!td->evtdev) {
|
|
/*
|
|
* If no cpu took the do_timer update, assign it to
|
|
* this cpu:
|
|
*/
|
|
if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
|
|
tick_do_timer_cpu = cpu;
|
|
|
|
tick_next_period = ktime_get();
|
|
tick_period = NSEC_PER_SEC / HZ;
|
|
#ifdef CONFIG_NO_HZ_FULL
|
|
/*
|
|
* The boot CPU may be nohz_full, in which case set
|
|
* tick_do_timer_boot_cpu so the first housekeeping
|
|
* secondary that comes up will take do_timer from
|
|
* us.
|
|
*/
|
|
if (tick_nohz_full_cpu(cpu))
|
|
tick_do_timer_boot_cpu = cpu;
|
|
|
|
} else if (tick_do_timer_boot_cpu != -1 &&
|
|
!tick_nohz_full_cpu(cpu)) {
|
|
tick_take_do_timer_from_boot();
|
|
tick_do_timer_boot_cpu = -1;
|
|
WARN_ON(tick_do_timer_cpu != cpu);
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* Startup in periodic mode first.
|
|
*/
|
|
td->mode = TICKDEV_MODE_PERIODIC;
|
|
} else {
|
|
handler = td->evtdev->event_handler;
|
|
next_event = td->evtdev->next_event;
|
|
td->evtdev->event_handler = clockevents_handle_noop;
|
|
}
|
|
|
|
td->evtdev = newdev;
|
|
|
|
/*
|
|
* When the device is not per cpu, pin the interrupt to the
|
|
* current cpu:
|
|
*/
|
|
if (!cpumask_equal(newdev->cpumask, cpumask))
|
|
irq_set_affinity(newdev->irq, cpumask);
|
|
|
|
/*
|
|
* When global broadcasting is active, check if the current
|
|
* device is registered as a placeholder for broadcast mode.
|
|
* This allows us to handle this x86 misfeature in a generic
|
|
* way. This function also returns !=0 when we keep the
|
|
* current active broadcast state for this CPU.
|
|
*/
|
|
if (tick_device_uses_broadcast(newdev, cpu))
|
|
return;
|
|
|
|
if (td->mode == TICKDEV_MODE_PERIODIC)
|
|
tick_setup_periodic(newdev, 0);
|
|
else
|
|
tick_setup_oneshot(newdev, handler, next_event);
|
|
}
|
|
|
|
void tick_install_replacement(struct clock_event_device *newdev)
|
|
{
|
|
struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
|
|
int cpu = smp_processor_id();
|
|
|
|
clockevents_exchange_device(td->evtdev, newdev);
|
|
tick_setup_device(td, newdev, cpu, cpumask_of(cpu));
|
|
if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
|
|
tick_oneshot_notify();
|
|
}
|
|
|
|
static bool tick_check_percpu(struct clock_event_device *curdev,
|
|
struct clock_event_device *newdev, int cpu)
|
|
{
|
|
if (!cpumask_test_cpu(cpu, newdev->cpumask))
|
|
return false;
|
|
if (cpumask_equal(newdev->cpumask, cpumask_of(cpu)))
|
|
return true;
|
|
/* Check if irq affinity can be set */
|
|
if (newdev->irq >= 0 && !irq_can_set_affinity(newdev->irq))
|
|
return false;
|
|
/* Prefer an existing cpu local device */
|
|
if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu)))
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
static bool tick_check_preferred(struct clock_event_device *curdev,
|
|
struct clock_event_device *newdev)
|
|
{
|
|
/* Prefer oneshot capable device */
|
|
if (!(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) {
|
|
if (curdev && (curdev->features & CLOCK_EVT_FEAT_ONESHOT))
|
|
return false;
|
|
if (tick_oneshot_mode_active())
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Use the higher rated one, but prefer a CPU local device with a lower
|
|
* rating than a non-CPU local device
|
|
*/
|
|
return !curdev ||
|
|
newdev->rating > curdev->rating ||
|
|
!cpumask_equal(curdev->cpumask, newdev->cpumask);
|
|
}
|
|
|
|
/*
|
|
* Check whether the new device is a better fit than curdev. curdev
|
|
* can be NULL !
|
|
*/
|
|
bool tick_check_replacement(struct clock_event_device *curdev,
|
|
struct clock_event_device *newdev)
|
|
{
|
|
if (!tick_check_percpu(curdev, newdev, smp_processor_id()))
|
|
return false;
|
|
|
|
return tick_check_preferred(curdev, newdev);
|
|
}
|
|
|
|
/*
|
|
* Check, if the new registered device should be used. Called with
|
|
* clockevents_lock held and interrupts disabled.
|
|
*/
|
|
void tick_check_new_device(struct clock_event_device *newdev)
|
|
{
|
|
struct clock_event_device *curdev;
|
|
struct tick_device *td;
|
|
int cpu;
|
|
|
|
cpu = smp_processor_id();
|
|
td = &per_cpu(tick_cpu_device, cpu);
|
|
curdev = td->evtdev;
|
|
|
|
/* cpu local device ? */
|
|
if (!tick_check_percpu(curdev, newdev, cpu))
|
|
goto out_bc;
|
|
|
|
/* Preference decision */
|
|
if (!tick_check_preferred(curdev, newdev))
|
|
goto out_bc;
|
|
|
|
if (!try_module_get(newdev->owner))
|
|
return;
|
|
|
|
/*
|
|
* Replace the eventually existing device by the new
|
|
* device. If the current device is the broadcast device, do
|
|
* not give it back to the clockevents layer !
|
|
*/
|
|
if (tick_is_broadcast_device(curdev)) {
|
|
clockevents_shutdown(curdev);
|
|
curdev = NULL;
|
|
}
|
|
clockevents_exchange_device(curdev, newdev);
|
|
tick_setup_device(td, newdev, cpu, cpumask_of(cpu));
|
|
if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
|
|
tick_oneshot_notify();
|
|
return;
|
|
|
|
out_bc:
|
|
/*
|
|
* Can the new device be used as a broadcast device ?
|
|
*/
|
|
tick_install_broadcast_device(newdev);
|
|
}
|
|
|
|
/**
|
|
* tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode
|
|
* @state: The target state (enter/exit)
|
|
*
|
|
* The system enters/leaves a state, where affected devices might stop
|
|
* Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups.
|
|
*
|
|
* Called with interrupts disabled, so clockevents_lock is not
|
|
* required here because the local clock event device cannot go away
|
|
* under us.
|
|
*/
|
|
int tick_broadcast_oneshot_control(enum tick_broadcast_state state)
|
|
{
|
|
struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
|
|
|
|
if (!(td->evtdev->features & CLOCK_EVT_FEAT_C3STOP))
|
|
return 0;
|
|
|
|
return __tick_broadcast_oneshot_control(state);
|
|
}
|
|
EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control);
|
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
/*
|
|
* Transfer the do_timer job away from a dying cpu.
|
|
*
|
|
* Called with interrupts disabled. Not locking required. If
|
|
* tick_do_timer_cpu is owned by this cpu, nothing can change it.
|
|
*/
|
|
void tick_handover_do_timer(void)
|
|
{
|
|
if (tick_do_timer_cpu == smp_processor_id()) {
|
|
int cpu = cpumask_first(cpu_online_mask);
|
|
|
|
tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu :
|
|
TICK_DO_TIMER_NONE;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Shutdown an event device on a given cpu:
|
|
*
|
|
* This is called on a life CPU, when a CPU is dead. So we cannot
|
|
* access the hardware device itself.
|
|
* We just set the mode and remove it from the lists.
|
|
*/
|
|
void tick_shutdown(unsigned int cpu)
|
|
{
|
|
struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
|
|
struct clock_event_device *dev = td->evtdev;
|
|
|
|
td->mode = TICKDEV_MODE_PERIODIC;
|
|
if (dev) {
|
|
/*
|
|
* Prevent that the clock events layer tries to call
|
|
* the set mode function!
|
|
*/
|
|
clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED);
|
|
clockevents_exchange_device(dev, NULL);
|
|
dev->event_handler = clockevents_handle_noop;
|
|
td->evtdev = NULL;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/**
|
|
* tick_suspend_local - Suspend the local tick device
|
|
*
|
|
* Called from the local cpu for freeze with interrupts disabled.
|
|
*
|
|
* No locks required. Nothing can change the per cpu device.
|
|
*/
|
|
void tick_suspend_local(void)
|
|
{
|
|
struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
|
|
|
|
clockevents_shutdown(td->evtdev);
|
|
}
|
|
|
|
/**
|
|
* tick_resume_local - Resume the local tick device
|
|
*
|
|
* Called from the local CPU for unfreeze or XEN resume magic.
|
|
*
|
|
* No locks required. Nothing can change the per cpu device.
|
|
*/
|
|
void tick_resume_local(void)
|
|
{
|
|
struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
|
|
bool broadcast = tick_resume_check_broadcast();
|
|
|
|
clockevents_tick_resume(td->evtdev);
|
|
if (!broadcast) {
|
|
if (td->mode == TICKDEV_MODE_PERIODIC)
|
|
tick_setup_periodic(td->evtdev, 0);
|
|
else
|
|
tick_resume_oneshot();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* tick_suspend - Suspend the tick and the broadcast device
|
|
*
|
|
* Called from syscore_suspend() via timekeeping_suspend with only one
|
|
* CPU online and interrupts disabled or from tick_unfreeze() under
|
|
* tick_freeze_lock.
|
|
*
|
|
* No locks required. Nothing can change the per cpu device.
|
|
*/
|
|
void tick_suspend(void)
|
|
{
|
|
tick_suspend_local();
|
|
tick_suspend_broadcast();
|
|
}
|
|
|
|
/**
|
|
* tick_resume - Resume the tick and the broadcast device
|
|
*
|
|
* Called from syscore_resume() via timekeeping_resume with only one
|
|
* CPU online and interrupts disabled.
|
|
*
|
|
* No locks required. Nothing can change the per cpu device.
|
|
*/
|
|
void tick_resume(void)
|
|
{
|
|
tick_resume_broadcast();
|
|
tick_resume_local();
|
|
}
|
|
|
|
#ifdef CONFIG_SUSPEND
|
|
static DEFINE_RAW_SPINLOCK(tick_freeze_lock);
|
|
static unsigned int tick_freeze_depth;
|
|
|
|
/**
|
|
* tick_freeze - Suspend the local tick and (possibly) timekeeping.
|
|
*
|
|
* Check if this is the last online CPU executing the function and if so,
|
|
* suspend timekeeping. Otherwise suspend the local tick.
|
|
*
|
|
* Call with interrupts disabled. Must be balanced with %tick_unfreeze().
|
|
* Interrupts must not be enabled before the subsequent %tick_unfreeze().
|
|
*/
|
|
void tick_freeze(void)
|
|
{
|
|
raw_spin_lock(&tick_freeze_lock);
|
|
|
|
tick_freeze_depth++;
|
|
if (tick_freeze_depth == num_online_cpus()) {
|
|
trace_suspend_resume(TPS("timekeeping_freeze"),
|
|
smp_processor_id(), true);
|
|
system_state = SYSTEM_SUSPEND;
|
|
sched_clock_suspend();
|
|
timekeeping_suspend();
|
|
} else {
|
|
tick_suspend_local();
|
|
}
|
|
|
|
raw_spin_unlock(&tick_freeze_lock);
|
|
}
|
|
|
|
/**
|
|
* tick_unfreeze - Resume the local tick and (possibly) timekeeping.
|
|
*
|
|
* Check if this is the first CPU executing the function and if so, resume
|
|
* timekeeping. Otherwise resume the local tick.
|
|
*
|
|
* Call with interrupts disabled. Must be balanced with %tick_freeze().
|
|
* Interrupts must not be enabled after the preceding %tick_freeze().
|
|
*/
|
|
void tick_unfreeze(void)
|
|
{
|
|
raw_spin_lock(&tick_freeze_lock);
|
|
|
|
if (tick_freeze_depth == num_online_cpus()) {
|
|
timekeeping_resume();
|
|
sched_clock_resume();
|
|
system_state = SYSTEM_RUNNING;
|
|
trace_suspend_resume(TPS("timekeeping_freeze"),
|
|
smp_processor_id(), false);
|
|
} else {
|
|
tick_resume_local();
|
|
}
|
|
|
|
tick_freeze_depth--;
|
|
|
|
raw_spin_unlock(&tick_freeze_lock);
|
|
}
|
|
#endif /* CONFIG_SUSPEND */
|
|
|
|
/**
|
|
* tick_init - initialize the tick control
|
|
*/
|
|
void __init tick_init(void)
|
|
{
|
|
tick_broadcast_init();
|
|
tick_nohz_init();
|
|
}
|