2018-08-16 11:23:53 -04:00
// SPDX-License-Identifier: GPL-2.0
2016-06-23 12:45:36 -04:00
/*
2019-10-10 11:51:17 -07:00
* trace_hwlat . c - A simple Hardware Latency detector .
2016-06-23 12:45:36 -04:00
*
* Use this tracer to detect large system latencies induced by the behavior of
* certain underlying system hardware or firmware , independent of Linux itself .
* The code was developed originally to detect the presence of SMIs on Intel
* and AMD systems , although there is no dependency upon x86 herein .
*
* The classical example usage of this tracer is in detecting the presence of
* SMIs or System Management Interrupts on Intel and AMD systems . An SMI is a
* somewhat special form of hardware interrupt spawned from earlier CPU debug
* modes in which the ( BIOS / EFI / etc . ) firmware arranges for the South Bridge
* LPC ( or other device ) to generate a special interrupt under certain
* circumstances , for example , upon expiration of a special SMI timer device ,
* due to certain external thermal readings , on certain I / O address accesses ,
* and other situations . An SMI hits a special CPU pin , triggers a special
* SMI mode ( complete with special memory map ) , and the OS is unaware .
*
* Although certain hardware - inducing latencies are necessary ( for example ,
* a modern system often requires an SMI handler for correct thermal control
* and remote management ) they can wreak havoc upon any OS - level performance
* guarantees toward low - latency , especially when the OS is not even made
* aware of the presence of these interrupts . For this reason , we need a
* somewhat brute force mechanism to detect these interrupts . In this case ,
* we do it by hogging all of the CPU ( s ) for configurable timer intervals ,
* sampling the built - in CPU timer , looking for discontiguous readings .
*
* WARNING : This implementation necessarily introduces latencies . Therefore ,
* you should NEVER use this tracer while running in a production
* environment requiring any kind of low - latency performance
* guarantee ( s ) .
*
* Copyright ( C ) 2008 - 2009 Jon Masters , Red Hat , Inc . < jcm @ redhat . com >
* Copyright ( C ) 2013 - 2016 Steven Rostedt , Red Hat , Inc . < srostedt @ redhat . com >
*
2021-06-22 16:42:19 +02:00
* Includes useful feedback from Clark Williams < williams @ redhat . com >
2016-06-23 12:45:36 -04:00
*
*/
# include <linux/kthread.h>
# include <linux/tracefs.h>
# include <linux/uaccess.h>
2016-07-15 15:48:56 -04:00
# include <linux/cpumask.h>
2016-06-23 12:45:36 -04:00
# include <linux/delay.h>
2017-02-01 16:36:40 +01:00
# include <linux/sched/clock.h>
2016-06-23 12:45:36 -04:00
# include "trace.h"
static struct trace_array * hwlat_trace ;
# define U64STR_SIZE 22 /* 20 digits max */
# define BANNER "hwlat_detector: "
# define DEFAULT_SAMPLE_WINDOW 1000000 /* 1s */
# define DEFAULT_SAMPLE_WIDTH 500000 /* 0.5s */
# define DEFAULT_LAT_THRESHOLD 10 /* 10us */
static struct dentry * hwlat_sample_width ; /* sample width us */
static struct dentry * hwlat_sample_window ; /* sample window us */
2021-06-22 16:42:20 +02:00
static struct dentry * hwlat_thread_mode ; /* hwlat thread mode */
enum {
MODE_NONE = 0 ,
MODE_ROUND_ROBIN ,
2021-06-22 16:42:22 +02:00
MODE_PER_CPU ,
2021-06-22 16:42:20 +02:00
MODE_MAX
} ;
2021-06-22 16:42:22 +02:00
static char * thread_mode_str [ ] = { " none " , " round-robin " , " per-cpu " } ;
2016-06-23 12:45:36 -04:00
/* Save the previous tracing_thresh value */
static unsigned long save_tracing_thresh ;
2021-06-22 16:42:22 +02:00
/* runtime kthread data */
struct hwlat_kthread_data {
struct task_struct * kthread ;
/* NMI timestamp counters */
u64 nmi_ts_start ;
u64 nmi_total_ts ;
int nmi_count ;
int nmi_cpu ;
} ;
2021-10-21 11:52:25 +08:00
static struct hwlat_kthread_data hwlat_single_cpu_data ;
static DEFINE_PER_CPU ( struct hwlat_kthread_data , hwlat_per_cpu_data ) ;
2016-08-04 12:49:53 -04:00
/* Tells NMIs to call back to the hwlat tracer to record timestamps */
bool trace_hwlat_callback_enabled ;
2016-06-23 12:45:36 -04:00
/* If the user changed threshold, remember it */
static u64 last_tracing_thresh = DEFAULT_LAT_THRESHOLD * NSEC_PER_USEC ;
/* Individual latency samples are stored here when detected. */
struct hwlat_sample {
2017-05-08 15:59:13 -07:00
u64 seqnum ; /* unique sequence */
u64 duration ; /* delta */
u64 outer_duration ; /* delta (outer loop) */
u64 nmi_total_ts ; /* Total time spent in NMIs */
struct timespec64 timestamp ; /* wall time */
int nmi_count ; /* # NMIs during this sample */
2021-03-23 18:49:35 +01:00
int count ; /* # of iterations over thresh */
2016-06-23 12:45:36 -04:00
} ;
/* keep the global state somewhere. */
static struct hwlat_data {
struct mutex lock ; /* protect changes */
u64 count ; /* total since reset */
u64 sample_window ; /* total sampling window (on+off) */
u64 sample_width ; /* active sampling portion of window */
2021-06-22 16:42:20 +02:00
int thread_mode ; /* thread mode */
2016-06-23 12:45:36 -04:00
} hwlat_data = {
. sample_window = DEFAULT_SAMPLE_WINDOW ,
. sample_width = DEFAULT_SAMPLE_WIDTH ,
2021-06-22 16:42:20 +02:00
. thread_mode = MODE_ROUND_ROBIN
2016-06-23 12:45:36 -04:00
} ;
2021-06-22 16:42:22 +02:00
static struct hwlat_kthread_data * get_cpu_data ( void )
{
if ( hwlat_data . thread_mode = = MODE_PER_CPU )
return this_cpu_ptr ( & hwlat_per_cpu_data ) ;
else
return & hwlat_single_cpu_data ;
}
2021-06-22 16:42:20 +02:00
static bool hwlat_busy ;
2016-06-23 12:45:36 -04:00
static void trace_hwlat_sample ( struct hwlat_sample * sample )
{
struct trace_array * tr = hwlat_trace ;
struct trace_event_call * call = & event_hwlat ;
2019-12-13 13:58:57 -05:00
struct trace_buffer * buffer = tr - > array_buffer . buffer ;
2016-06-23 12:45:36 -04:00
struct ring_buffer_event * event ;
struct hwlat_entry * entry ;
event = trace_buffer_lock_reserve ( buffer , TRACE_HWLAT , sizeof ( * entry ) ,
2021-01-25 20:45:08 +01:00
tracing_gen_ctx ( ) ) ;
2016-06-23 12:45:36 -04:00
if ( ! event )
return ;
entry = ring_buffer_event_data ( event ) ;
entry - > seqnum = sample - > seqnum ;
entry - > duration = sample - > duration ;
entry - > outer_duration = sample - > outer_duration ;
entry - > timestamp = sample - > timestamp ;
2016-08-04 12:49:53 -04:00
entry - > nmi_total_ts = sample - > nmi_total_ts ;
entry - > nmi_count = sample - > nmi_count ;
2020-02-12 12:21:03 -05:00
entry - > count = sample - > count ;
2016-06-23 12:45:36 -04:00
if ( ! call_filter_check_discard ( call , entry , buffer , event ) )
2016-11-23 20:28:38 -05:00
trace_buffer_unlock_commit_nostack ( buffer , event ) ;
2016-06-23 12:45:36 -04:00
}
/* Macros to encapsulate the time capturing infrastructure */
# define time_type u64
# define time_get() trace_clock_local()
# define time_to_us(x) div_u64(x, 1000)
# define time_sub(a, b) ((a) - (b))
# define init_time(a, b) (a = b)
# define time_u64(a) a
2016-08-04 12:49:53 -04:00
void trace_hwlat_callback ( bool enter )
{
2021-06-22 16:42:22 +02:00
struct hwlat_kthread_data * kdata = get_cpu_data ( ) ;
if ( ! kdata - > kthread )
2016-08-04 12:49:53 -04:00
return ;
/*
* Currently trace_clock_local ( ) calls sched_clock ( ) and the
* generic version is not NMI safe .
*/
if ( ! IS_ENABLED ( CONFIG_GENERIC_SCHED_CLOCK ) ) {
if ( enter )
2021-06-22 16:42:22 +02:00
kdata - > nmi_ts_start = time_get ( ) ;
2016-08-04 12:49:53 -04:00
else
2021-06-22 16:42:22 +02:00
kdata - > nmi_total_ts + = time_get ( ) - kdata - > nmi_ts_start ;
2016-08-04 12:49:53 -04:00
}
if ( enter )
2021-06-22 16:42:22 +02:00
kdata - > nmi_count + + ;
2016-08-04 12:49:53 -04:00
}
2021-06-22 16:42:25 +02:00
/*
* hwlat_err - report a hwlat error .
*/
# define hwlat_err(msg) ({ \
struct trace_array * tr = hwlat_trace ; \
\
trace_array_printk_buf ( tr - > array_buffer . buffer , _THIS_IP_ , msg ) ; \
} )
2016-06-23 12:45:36 -04:00
/**
* get_sample - sample the CPU TSC and look for likely hardware latencies
*
* Used to repeatedly capture the CPU TSC ( or similar ) , looking for potential
* hardware - induced latency . Called with interrupts disabled and with
* hwlat_data . lock held .
*/
static int get_sample ( void )
{
2021-06-22 16:42:22 +02:00
struct hwlat_kthread_data * kdata = get_cpu_data ( ) ;
2016-06-23 12:45:36 -04:00
struct trace_array * tr = hwlat_trace ;
2020-02-12 12:21:03 -05:00
struct hwlat_sample s ;
2016-06-23 12:45:36 -04:00
time_type start , t1 , t2 , last_t2 ;
2020-02-12 12:21:03 -05:00
s64 diff , outer_diff , total , last_total = 0 ;
2016-06-23 12:45:36 -04:00
u64 sample = 0 ;
u64 thresh = tracing_thresh ;
u64 outer_sample = 0 ;
int ret = - 1 ;
2020-02-12 12:21:03 -05:00
unsigned int count = 0 ;
2016-06-23 12:45:36 -04:00
do_div ( thresh , NSEC_PER_USEC ) ; /* modifies interval value */
2021-06-22 16:42:22 +02:00
kdata - > nmi_total_ts = 0 ;
kdata - > nmi_count = 0 ;
2016-08-04 12:49:53 -04:00
/* Make sure NMIs see this first */
barrier ( ) ;
trace_hwlat_callback_enabled = true ;
2016-06-23 12:45:36 -04:00
init_time ( last_t2 , 0 ) ;
start = time_get ( ) ; /* start timestamp */
2020-02-12 12:21:03 -05:00
outer_diff = 0 ;
2016-06-23 12:45:36 -04:00
do {
t1 = time_get ( ) ; /* we'll look for a discontinuity */
t2 = time_get ( ) ;
if ( time_u64 ( last_t2 ) ) {
/* Check the delta from outer loop (t2 to next t1) */
2020-02-12 12:21:03 -05:00
outer_diff = time_to_us ( time_sub ( t1 , last_t2 ) ) ;
2016-06-23 12:45:36 -04:00
/* This shouldn't happen */
2020-02-12 12:21:03 -05:00
if ( outer_diff < 0 ) {
2021-06-22 16:42:25 +02:00
hwlat_err ( BANNER " time running backwards \n " ) ;
2016-06-23 12:45:36 -04:00
goto out ;
}
2020-02-12 12:21:03 -05:00
if ( outer_diff > outer_sample )
outer_sample = outer_diff ;
2016-06-23 12:45:36 -04:00
}
last_t2 = t2 ;
total = time_to_us ( time_sub ( t2 , start ) ) ; /* sample width */
/* Check for possible overflows */
if ( total < last_total ) {
2021-06-22 16:42:25 +02:00
hwlat_err ( " Time total overflowed \n " ) ;
2016-06-23 12:45:36 -04:00
break ;
}
last_total = total ;
/* This checks the inner loop (t1 to t2) */
diff = time_to_us ( time_sub ( t2 , t1 ) ) ; /* current diff */
2020-02-12 12:21:03 -05:00
if ( diff > thresh | | outer_diff > thresh ) {
if ( ! count )
ktime_get_real_ts64 ( & s . timestamp ) ;
count + + ;
}
2016-06-23 12:45:36 -04:00
/* This shouldn't happen */
if ( diff < 0 ) {
2021-06-22 16:42:25 +02:00
hwlat_err ( BANNER " time running backwards \n " ) ;
2016-06-23 12:45:36 -04:00
goto out ;
}
if ( diff > sample )
sample = diff ; /* only want highest value */
} while ( total < = hwlat_data . sample_width ) ;
2016-08-04 12:49:53 -04:00
barrier ( ) ; /* finish the above in the view for NMIs */
trace_hwlat_callback_enabled = false ;
barrier ( ) ; /* Make sure nmi_total_ts is no longer updated */
2016-06-23 12:45:36 -04:00
ret = 0 ;
/* If we exceed the threshold value, we have found a hardware latency */
if ( sample > thresh | | outer_sample > thresh ) {
2019-10-09 00:08:21 +02:00
u64 latency ;
2016-06-23 12:45:36 -04:00
ret = 1 ;
2016-08-04 12:49:53 -04:00
/* We read in microseconds */
2021-06-22 16:42:22 +02:00
if ( kdata - > nmi_total_ts )
do_div ( kdata - > nmi_total_ts , NSEC_PER_USEC ) ;
2016-08-04 12:49:53 -04:00
2016-06-23 12:45:36 -04:00
hwlat_data . count + + ;
s . seqnum = hwlat_data . count ;
s . duration = sample ;
s . outer_duration = outer_sample ;
2021-06-22 16:42:22 +02:00
s . nmi_total_ts = kdata - > nmi_total_ts ;
s . nmi_count = kdata - > nmi_count ;
2020-02-12 12:21:03 -05:00
s . count = count ;
2016-06-23 12:45:36 -04:00
trace_hwlat_sample ( & s ) ;
2019-10-09 00:08:21 +02:00
latency = max ( sample , outer_sample ) ;
2016-06-23 12:45:36 -04:00
/* Keep a running maximum ever recorded hardware latency */
2019-10-09 00:08:21 +02:00
if ( latency > tr - > max_latency ) {
tr - > max_latency = latency ;
latency_fsnotify ( tr ) ;
}
2016-06-23 12:45:36 -04:00
}
out :
return ret ;
}
2016-07-15 15:48:56 -04:00
static struct cpumask save_cpumask ;
2017-01-31 16:48:23 -05:00
static void move_to_next_cpu ( void )
2016-07-15 15:48:56 -04:00
{
2017-01-31 16:48:23 -05:00
struct cpumask * current_mask = & save_cpumask ;
2020-07-30 16:23:18 +08:00
struct trace_array * tr = hwlat_trace ;
2016-07-15 15:48:56 -04:00
int next_cpu ;
/*
* If for some reason the user modifies the CPU affinity
2019-10-10 11:51:17 -07:00
* of this thread , then stop migrating for the duration
2016-07-15 15:48:56 -04:00
* of the current test .
*/
2019-04-23 16:26:36 +02:00
if ( ! cpumask_equal ( current_mask , current - > cpus_ptr ) )
2021-06-22 16:42:21 +02:00
goto change_mode ;
2016-07-15 15:48:56 -04:00
2021-08-03 16:16:19 +02:00
cpus_read_lock ( ) ;
2020-07-30 16:23:18 +08:00
cpumask_and ( current_mask , cpu_online_mask , tr - > tracing_cpumask ) ;
2021-08-04 14:18:48 -04:00
next_cpu = cpumask_next ( raw_smp_processor_id ( ) , current_mask ) ;
2021-08-03 16:16:19 +02:00
cpus_read_unlock ( ) ;
2016-07-15 15:48:56 -04:00
if ( next_cpu > = nr_cpu_ids )
next_cpu = cpumask_first ( current_mask ) ;
if ( next_cpu > = nr_cpu_ids ) /* Shouldn't happen! */
2021-06-22 16:42:21 +02:00
goto change_mode ;
2016-07-15 15:48:56 -04:00
cpumask_clear ( current_mask ) ;
cpumask_set_cpu ( next_cpu , current_mask ) ;
tracing/hwlat: Replace sched_setaffinity with set_cpus_allowed_ptr
There is a problem with the behavior of hwlat in a container,
resulting in incorrect output. A warning message is generated:
"cpumask changed while in round-robin mode, switching to mode none",
and the tracing_cpumask is ignored. This issue arises because
the kernel thread, hwlatd, is not a part of the container, and
the function sched_setaffinity is unable to locate it using its PID.
Additionally, the task_struct of hwlatd is already known.
Ultimately, the function set_cpus_allowed_ptr achieves
the same outcome as sched_setaffinity, but employs task_struct
instead of PID.
Test case:
# cd /sys/kernel/tracing
# echo 0 > tracing_on
# echo round-robin > hwlat_detector/mode
# echo hwlat > current_tracer
# unshare --fork --pid bash -c 'echo 1 > tracing_on'
# dmesg -c
Actual behavior:
[573502.809060] hwlat_detector: cpumask changed while in round-robin mode, switching to mode none
Link: https://lore.kernel.org/linux-trace-kernel/20230316144535.1004952-1-costa.shul@redhat.com
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Fixes: 0330f7aa8ee63 ("tracing: Have hwlat trace migrate across tracing_cpumask CPUs")
Signed-off-by: Costa Shulyupin <costa.shul@redhat.com>
Acked-by: Daniel Bristot de Oliveira <bristot@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2023-03-16 16:45:35 +02:00
set_cpus_allowed_ptr ( current , current_mask ) ;
2016-07-15 15:48:56 -04:00
return ;
2021-06-22 16:42:21 +02:00
change_mode :
hwlat_data . thread_mode = MODE_NONE ;
pr_info ( BANNER " cpumask changed while in round-robin mode, switching to mode none \n " ) ;
2016-07-15 15:48:56 -04:00
}
2016-06-23 12:45:36 -04:00
/*
* kthread_fn - The CPU time sampling / hardware latency detection kernel thread
*
* Used to periodically sample the CPU TSC via a call to get_sample . We
* disable interrupts , which does ( intentionally ) introduce latency since we
* need to ensure nothing else might be running ( and thus preempting ) .
* Obviously this should never be used in production environments .
*
2017-02-13 12:25:17 -05:00
* Executes one loop interaction on each CPU in tracing_cpumask sysfs file .
2016-06-23 12:45:36 -04:00
*/
static int kthread_fn ( void * data )
{
u64 interval ;
while ( ! kthread_should_stop ( ) ) {
2021-06-22 16:42:20 +02:00
if ( hwlat_data . thread_mode = = MODE_ROUND_ROBIN )
move_to_next_cpu ( ) ;
2016-07-15 15:48:56 -04:00
2016-06-23 12:45:36 -04:00
local_irq_disable ( ) ;
get_sample ( ) ;
local_irq_enable ( ) ;
mutex_lock ( & hwlat_data . lock ) ;
interval = hwlat_data . sample_window - hwlat_data . sample_width ;
mutex_unlock ( & hwlat_data . lock ) ;
do_div ( interval , USEC_PER_MSEC ) ; /* modifies interval value */
/* Always sleep for at least 1ms */
if ( interval < 1 )
interval = 1 ;
if ( msleep_interruptible ( interval ) )
break ;
}
return 0 ;
}
2021-06-22 16:42:20 +02:00
/*
2021-06-22 16:42:22 +02:00
* stop_stop_kthread - Inform the hardware latency sampling / detector kthread to stop
*
* This kicks the running hardware latency sampling / detector kernel thread and
* tells it to stop sampling now . Use this on unload and at system shutdown .
*/
static void stop_single_kthread ( void )
{
struct hwlat_kthread_data * kdata = get_cpu_data ( ) ;
2021-06-22 16:42:29 +02:00
struct task_struct * kthread ;
2021-08-03 16:16:19 +02:00
cpus_read_lock ( ) ;
2021-06-22 16:42:29 +02:00
kthread = kdata - > kthread ;
2021-06-22 16:42:22 +02:00
if ( ! kthread )
2021-06-22 16:42:29 +02:00
goto out_put_cpus ;
2021-06-22 16:42:22 +02:00
kthread_stop ( kthread ) ;
kdata - > kthread = NULL ;
2021-06-22 16:42:29 +02:00
out_put_cpus :
2021-08-03 16:16:19 +02:00
cpus_read_unlock ( ) ;
2021-06-22 16:42:22 +02:00
}
/*
* start_single_kthread - Kick off the hardware latency sampling / detector kthread
2016-06-23 12:45:36 -04:00
*
* This starts the kernel thread that will sit and sample the CPU timestamp
* counter ( TSC or similar ) and look for potential hardware latencies .
*/
2021-06-22 16:42:22 +02:00
static int start_single_kthread ( struct trace_array * tr )
2016-06-23 12:45:36 -04:00
{
2021-06-22 16:42:22 +02:00
struct hwlat_kthread_data * kdata = get_cpu_data ( ) ;
2017-01-31 16:48:23 -05:00
struct cpumask * current_mask = & save_cpumask ;
2016-06-23 12:45:36 -04:00
struct task_struct * kthread ;
2017-01-31 16:48:23 -05:00
int next_cpu ;
2021-08-03 16:16:19 +02:00
cpus_read_lock ( ) ;
2021-06-22 16:42:22 +02:00
if ( kdata - > kthread )
2021-06-22 16:42:29 +02:00
goto out_put_cpus ;
2018-08-01 12:45:54 +02:00
2016-06-23 12:45:36 -04:00
kthread = kthread_create ( kthread_fn , NULL , " hwlatd " ) ;
if ( IS_ERR ( kthread ) ) {
pr_err ( BANNER " could not start sampling thread \n " ) ;
2021-08-03 16:16:19 +02:00
cpus_read_unlock ( ) ;
2016-06-23 12:45:36 -04:00
return - ENOMEM ;
}
2017-01-31 16:48:23 -05:00
2021-06-22 16:42:20 +02:00
/* Just pick the first CPU on first iteration */
cpumask_and ( current_mask , cpu_online_mask , tr - > tracing_cpumask ) ;
if ( hwlat_data . thread_mode = = MODE_ROUND_ROBIN ) {
next_cpu = cpumask_first ( current_mask ) ;
cpumask_clear ( current_mask ) ;
cpumask_set_cpu ( next_cpu , current_mask ) ;
}
tracing/hwlat: Replace sched_setaffinity with set_cpus_allowed_ptr
There is a problem with the behavior of hwlat in a container,
resulting in incorrect output. A warning message is generated:
"cpumask changed while in round-robin mode, switching to mode none",
and the tracing_cpumask is ignored. This issue arises because
the kernel thread, hwlatd, is not a part of the container, and
the function sched_setaffinity is unable to locate it using its PID.
Additionally, the task_struct of hwlatd is already known.
Ultimately, the function set_cpus_allowed_ptr achieves
the same outcome as sched_setaffinity, but employs task_struct
instead of PID.
Test case:
# cd /sys/kernel/tracing
# echo 0 > tracing_on
# echo round-robin > hwlat_detector/mode
# echo hwlat > current_tracer
# unshare --fork --pid bash -c 'echo 1 > tracing_on'
# dmesg -c
Actual behavior:
[573502.809060] hwlat_detector: cpumask changed while in round-robin mode, switching to mode none
Link: https://lore.kernel.org/linux-trace-kernel/20230316144535.1004952-1-costa.shul@redhat.com
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Fixes: 0330f7aa8ee63 ("tracing: Have hwlat trace migrate across tracing_cpumask CPUs")
Signed-off-by: Costa Shulyupin <costa.shul@redhat.com>
Acked-by: Daniel Bristot de Oliveira <bristot@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2023-03-16 16:45:35 +02:00
set_cpus_allowed_ptr ( kthread , current_mask ) ;
2017-01-31 16:48:23 -05:00
2021-06-22 16:42:22 +02:00
kdata - > kthread = kthread ;
2016-06-23 12:45:36 -04:00
wake_up_process ( kthread ) ;
2021-06-22 16:42:29 +02:00
out_put_cpus :
2021-08-03 16:16:19 +02:00
cpus_read_unlock ( ) ;
2016-06-23 12:45:36 -04:00
return 0 ;
}
2021-06-22 16:42:20 +02:00
/*
2021-06-22 16:42:22 +02:00
* stop_cpu_kthread - Stop a hwlat cpu kthread
*/
static void stop_cpu_kthread ( unsigned int cpu )
{
struct task_struct * kthread ;
kthread = per_cpu ( hwlat_per_cpu_data , cpu ) . kthread ;
if ( kthread )
kthread_stop ( kthread ) ;
2021-06-22 16:42:31 +02:00
per_cpu ( hwlat_per_cpu_data , cpu ) . kthread = NULL ;
2021-06-22 16:42:22 +02:00
}
/*
* stop_per_cpu_kthreads - Inform the hardware latency sampling / detector kthread to stop
2016-06-23 12:45:36 -04:00
*
2021-06-22 16:42:22 +02:00
* This kicks the running hardware latency sampling / detector kernel threads and
2016-06-23 12:45:36 -04:00
* tells it to stop sampling now . Use this on unload and at system shutdown .
*/
2021-06-22 16:42:22 +02:00
static void stop_per_cpu_kthreads ( void )
2016-06-23 12:45:36 -04:00
{
2021-06-22 16:42:22 +02:00
unsigned int cpu ;
2021-08-03 16:16:19 +02:00
cpus_read_lock ( ) ;
2021-06-22 16:42:22 +02:00
for_each_online_cpu ( cpu )
stop_cpu_kthread ( cpu ) ;
2021-08-03 16:16:19 +02:00
cpus_read_unlock ( ) ;
2021-06-22 16:42:22 +02:00
}
/*
* start_cpu_kthread - Start a hwlat cpu kthread
*/
static int start_cpu_kthread ( unsigned int cpu )
{
struct task_struct * kthread ;
2023-03-10 12:04:51 +02:00
/* Do not start a new hwlatd thread if it is already running */
if ( per_cpu ( hwlat_per_cpu_data , cpu ) . kthread )
return 0 ;
2022-01-14 14:03:10 -08:00
kthread = kthread_run_on_cpu ( kthread_fn , NULL , cpu , " hwlatd/%u " ) ;
2021-06-22 16:42:22 +02:00
if ( IS_ERR ( kthread ) ) {
pr_err ( BANNER " could not start sampling thread \n " ) ;
return - ENOMEM ;
}
per_cpu ( hwlat_per_cpu_data , cpu ) . kthread = kthread ;
return 0 ;
}
2021-06-22 16:42:31 +02:00
# ifdef CONFIG_HOTPLUG_CPU
static void hwlat_hotplug_workfn ( struct work_struct * dummy )
{
struct trace_array * tr = hwlat_trace ;
unsigned int cpu = smp_processor_id ( ) ;
mutex_lock ( & trace_types_lock ) ;
mutex_lock ( & hwlat_data . lock ) ;
2021-08-03 16:16:19 +02:00
cpus_read_lock ( ) ;
2021-06-22 16:42:31 +02:00
if ( ! hwlat_busy | | hwlat_data . thread_mode ! = MODE_PER_CPU )
goto out_unlock ;
if ( ! cpumask_test_cpu ( cpu , tr - > tracing_cpumask ) )
goto out_unlock ;
start_cpu_kthread ( cpu ) ;
out_unlock :
2021-08-03 16:16:19 +02:00
cpus_read_unlock ( ) ;
2021-06-22 16:42:31 +02:00
mutex_unlock ( & hwlat_data . lock ) ;
mutex_unlock ( & trace_types_lock ) ;
}
static DECLARE_WORK ( hwlat_hotplug_work , hwlat_hotplug_workfn ) ;
/*
* hwlat_cpu_init - CPU hotplug online callback function
*/
static int hwlat_cpu_init ( unsigned int cpu )
{
schedule_work_on ( cpu , & hwlat_hotplug_work ) ;
return 0 ;
}
/*
* hwlat_cpu_die - CPU hotplug offline callback function
*/
static int hwlat_cpu_die ( unsigned int cpu )
{
stop_cpu_kthread ( cpu ) ;
return 0 ;
}
static void hwlat_init_hotplug_support ( void )
{
int ret ;
ret = cpuhp_setup_state ( CPUHP_AP_ONLINE_DYN , " trace/hwlat:online " ,
hwlat_cpu_init , hwlat_cpu_die ) ;
if ( ret < 0 )
pr_warn ( BANNER " Error to init cpu hotplug support \n " ) ;
return ;
}
# else /* CONFIG_HOTPLUG_CPU */
static void hwlat_init_hotplug_support ( void )
{
return ;
}
# endif /* CONFIG_HOTPLUG_CPU */
2021-06-22 16:42:22 +02:00
/*
* start_per_cpu_kthreads - Kick off the hardware latency sampling / detector kthreads
*
* This starts the kernel threads that will sit on potentially all cpus and
* sample the CPU timestamp counter ( TSC or similar ) and look for potential
* hardware latencies .
*/
static int start_per_cpu_kthreads ( struct trace_array * tr )
{
struct cpumask * current_mask = & save_cpumask ;
unsigned int cpu ;
int retval ;
2021-08-03 16:16:19 +02:00
cpus_read_lock ( ) ;
2021-06-22 16:42:22 +02:00
/*
* Run only on CPUs in which hwlat is allowed to run .
*/
cpumask_and ( current_mask , cpu_online_mask , tr - > tracing_cpumask ) ;
for_each_cpu ( cpu , current_mask ) {
retval = start_cpu_kthread ( cpu ) ;
if ( retval )
goto out_error ;
}
2021-08-03 16:16:19 +02:00
cpus_read_unlock ( ) ;
2021-06-22 16:42:22 +02:00
return 0 ;
out_error :
2021-08-03 16:16:19 +02:00
cpus_read_unlock ( ) ;
2021-06-22 16:42:22 +02:00
stop_per_cpu_kthreads ( ) ;
return retval ;
2016-06-23 12:45:36 -04:00
}
2021-06-22 16:42:20 +02:00
static void * s_mode_start ( struct seq_file * s , loff_t * pos )
{
int mode = * pos ;
mutex_lock ( & hwlat_data . lock ) ;
if ( mode > = MODE_MAX )
return NULL ;
return pos ;
}
static void * s_mode_next ( struct seq_file * s , void * v , loff_t * pos )
{
int mode = + + ( * pos ) ;
if ( mode > = MODE_MAX )
return NULL ;
return pos ;
}
static int s_mode_show ( struct seq_file * s , void * v )
{
loff_t * pos = v ;
int mode = * pos ;
if ( mode = = hwlat_data . thread_mode )
seq_printf ( s , " [%s] " , thread_mode_str [ mode ] ) ;
else
seq_printf ( s , " %s " , thread_mode_str [ mode ] ) ;
2023-08-25 13:34:30 +03:00
if ( mode < MODE_MAX - 1 ) /* if mode is any but last */
2021-06-22 16:42:20 +02:00
seq_puts ( s , " " ) ;
return 0 ;
}
static void s_mode_stop ( struct seq_file * s , void * v )
{
seq_puts ( s , " \n " ) ;
mutex_unlock ( & hwlat_data . lock ) ;
}
static const struct seq_operations thread_mode_seq_ops = {
. start = s_mode_start ,
. next = s_mode_next ,
. show = s_mode_show ,
. stop = s_mode_stop
} ;
static int hwlat_mode_open ( struct inode * inode , struct file * file )
{
return seq_open ( file , & thread_mode_seq_ops ) ;
} ;
static void hwlat_tracer_start ( struct trace_array * tr ) ;
static void hwlat_tracer_stop ( struct trace_array * tr ) ;
/**
* hwlat_mode_write - Write function for " mode " entry
* @ filp : The active open file structure
* @ ubuf : The user buffer that contains the value to write
* @ cnt : The maximum number of bytes to write to " file "
* @ ppos : The current position in @ file
*
* This function provides a write implementation for the " mode " interface
* to the hardware latency detector . hwlatd has different operation modes .
* The " none " sets the allowed cpumask for a single hwlatd thread at the
* startup and lets the scheduler handle the migration . The default mode is
* the " round-robin " one , in which a single hwlatd thread runs , migrating
2021-06-22 16:42:22 +02:00
* among the allowed CPUs in a round - robin fashion . The " per-cpu " mode
* creates one hwlatd thread per allowed CPU .
2021-06-22 16:42:20 +02:00
*/
static ssize_t hwlat_mode_write ( struct file * filp , const char __user * ubuf ,
size_t cnt , loff_t * ppos )
{
struct trace_array * tr = hwlat_trace ;
const char * mode ;
char buf [ 64 ] ;
int ret , i ;
if ( cnt > = sizeof ( buf ) )
return - EINVAL ;
if ( copy_from_user ( buf , ubuf , cnt ) )
return - EFAULT ;
buf [ cnt ] = 0 ;
mode = strstrip ( buf ) ;
ret = - EINVAL ;
/*
* trace_types_lock is taken to avoid concurrency on start / stop
* and hwlat_busy .
*/
mutex_lock ( & trace_types_lock ) ;
if ( hwlat_busy )
hwlat_tracer_stop ( tr ) ;
mutex_lock ( & hwlat_data . lock ) ;
for ( i = 0 ; i < MODE_MAX ; i + + ) {
if ( strcmp ( mode , thread_mode_str [ i ] ) = = 0 ) {
hwlat_data . thread_mode = i ;
ret = cnt ;
}
}
mutex_unlock ( & hwlat_data . lock ) ;
if ( hwlat_busy )
hwlat_tracer_start ( tr ) ;
mutex_unlock ( & trace_types_lock ) ;
* ppos + = cnt ;
return ret ;
}
2021-06-22 16:42:24 +02:00
/*
* The width parameter is read / write using the generic trace_min_max_param
* method . The * val is protected by the hwlat_data lock and is upper
* bounded by the window parameter .
*/
static struct trace_min_max_param hwlat_width = {
. lock = & hwlat_data . lock ,
. val = & hwlat_data . sample_width ,
. max = & hwlat_data . sample_window ,
. min = NULL ,
2016-06-23 12:45:36 -04:00
} ;
2021-06-22 16:42:24 +02:00
/*
* The window parameter is read / write using the generic trace_min_max_param
* method . The * val is protected by the hwlat_data lock and is lower
* bounded by the width parameter .
*/
static struct trace_min_max_param hwlat_window = {
. lock = & hwlat_data . lock ,
. val = & hwlat_data . sample_window ,
. max = NULL ,
. min = & hwlat_data . sample_width ,
2016-06-23 12:45:36 -04:00
} ;
2021-06-22 16:42:20 +02:00
static const struct file_operations thread_mode_fops = {
. open = hwlat_mode_open ,
. read = seq_read ,
. llseek = seq_lseek ,
. release = seq_release ,
. write = hwlat_mode_write
} ;
2016-06-23 12:45:36 -04:00
/**
* init_tracefs - A function to initialize the tracefs interface files
*
* This function creates entries in tracefs for " hwlat_detector " .
* It creates the hwlat_detector directory in the tracing directory ,
* and within that directory is the count , width and window files to
* change and view those values .
*/
static int init_tracefs ( void )
{
2020-07-12 09:10:36 +08:00
int ret ;
2016-06-23 12:45:36 -04:00
struct dentry * top_dir ;
2020-07-12 09:10:36 +08:00
ret = tracing_init_dentry ( ) ;
if ( ret )
2016-06-23 12:45:36 -04:00
return - ENOMEM ;
2020-07-12 09:10:36 +08:00
top_dir = tracefs_create_dir ( " hwlat_detector " , NULL ) ;
2016-06-23 12:45:36 -04:00
if ( ! top_dir )
return - ENOMEM ;
2021-08-18 11:24:51 -04:00
hwlat_sample_window = tracefs_create_file ( " window " , TRACE_MODE_WRITE ,
2016-06-23 12:45:36 -04:00
top_dir ,
2021-06-22 16:42:24 +02:00
& hwlat_window ,
& trace_min_max_fops ) ;
2016-06-23 12:45:36 -04:00
if ( ! hwlat_sample_window )
goto err ;
2021-08-18 11:24:51 -04:00
hwlat_sample_width = tracefs_create_file ( " width " , TRACE_MODE_WRITE ,
2016-06-23 12:45:36 -04:00
top_dir ,
2021-06-22 16:42:24 +02:00
& hwlat_width ,
& trace_min_max_fops ) ;
2016-06-23 12:45:36 -04:00
if ( ! hwlat_sample_width )
goto err ;
2021-08-18 11:24:51 -04:00
hwlat_thread_mode = trace_create_file ( " mode " , TRACE_MODE_WRITE ,
2021-06-22 16:42:20 +02:00
top_dir ,
NULL ,
& thread_mode_fops ) ;
if ( ! hwlat_thread_mode )
goto err ;
2016-06-23 12:45:36 -04:00
return 0 ;
err :
2019-11-18 09:43:10 -05:00
tracefs_remove ( top_dir ) ;
2016-06-23 12:45:36 -04:00
return - ENOMEM ;
}
static void hwlat_tracer_start ( struct trace_array * tr )
{
int err ;
2021-06-22 16:42:22 +02:00
if ( hwlat_data . thread_mode = = MODE_PER_CPU )
err = start_per_cpu_kthreads ( tr ) ;
else
err = start_single_kthread ( tr ) ;
2016-06-23 12:45:36 -04:00
if ( err )
pr_err ( BANNER " Cannot start hwlat kthread \n " ) ;
}
static void hwlat_tracer_stop ( struct trace_array * tr )
{
2021-06-22 16:42:22 +02:00
if ( hwlat_data . thread_mode = = MODE_PER_CPU )
stop_per_cpu_kthreads ( ) ;
else
stop_single_kthread ( ) ;
2016-06-23 12:45:36 -04:00
}
static int hwlat_tracer_init ( struct trace_array * tr )
{
/* Only allow one instance to enable this */
if ( hwlat_busy )
return - EBUSY ;
hwlat_trace = tr ;
hwlat_data . count = 0 ;
tr - > max_latency = 0 ;
save_tracing_thresh = tracing_thresh ;
/* tracing_thresh is in nsecs, we speak in usecs */
if ( ! tracing_thresh )
tracing_thresh = last_tracing_thresh ;
if ( tracer_tracing_is_on ( tr ) )
hwlat_tracer_start ( tr ) ;
hwlat_busy = true ;
return 0 ;
}
static void hwlat_tracer_reset ( struct trace_array * tr )
{
2021-06-22 16:42:22 +02:00
hwlat_tracer_stop ( tr ) ;
2016-06-23 12:45:36 -04:00
/* the tracing threshold is static between runs */
last_tracing_thresh = tracing_thresh ;
tracing_thresh = save_tracing_thresh ;
hwlat_busy = false ;
}
static struct tracer hwlat_tracer __read_mostly =
{
. name = " hwlat " ,
. init = hwlat_tracer_init ,
. reset = hwlat_tracer_reset ,
. start = hwlat_tracer_start ,
. stop = hwlat_tracer_stop ,
. allow_instances = true ,
} ;
__init static int init_hwlat_tracer ( void )
{
int ret ;
mutex_init ( & hwlat_data . lock ) ;
ret = register_tracer ( & hwlat_tracer ) ;
if ( ret )
return ret ;
2021-06-22 16:42:31 +02:00
hwlat_init_hotplug_support ( ) ;
2016-06-23 12:45:36 -04:00
init_tracefs ( ) ;
return 0 ;
}
late_initcall ( init_hwlat_tracer ) ;