2023-05-17 17:02:08 -10:00
#!/usr/bin/env drgn
#
# Copyright (C) 2023 Tejun Heo <tj@kernel.org>
# Copyright (C) 2023 Meta Platforms, Inc. and affiliates.
desc = """
This is a drgn script to monitor workqueues . For more info on drgn , visit
https : / / github . com / osandov / drgn .
total Total number of work items executed by the workqueue .
infl The number of currently in - flight work items .
2023-05-17 17:02:09 -10:00
CPUtime Total CPU time consumed by the workqueue in seconds . This is
sampled from scheduler ticks and only provides ballpark
measurement . " nohz_full= " CPUs are excluded from measurement .
2023-05-17 17:02:08 -10:00
CPUitsv The number of times a concurrency - managed work item hogged CPU
longer than the threshold ( workqueue . cpu_intensive_thresh_us )
and got excluded from concurrency management to avoid stalling
other work items .
2023-05-17 17:02:08 -10:00
CMwake The number of concurrency - management wake - ups while executing a
work item of the workqueue .
mayday The number of times the rescuer was requested while waiting for
new worker creation .
rescued The number of work items executed by the rescuer .
"""
import sys
import signal
import os
import re
import time
import json
import drgn
from drgn . helpers . linux . list import list_for_each_entry , list_empty
from drgn . helpers . linux . cpumask import for_each_possible_cpu
import argparse
parser = argparse . ArgumentParser ( description = desc ,
formatter_class = argparse . RawTextHelpFormatter )
parser . add_argument ( ' workqueue ' , metavar = ' REGEX ' , nargs = ' * ' ,
help = ' Target workqueue name patterns (all if empty) ' )
parser . add_argument ( ' -i ' , ' --interval ' , metavar = ' SECS ' , type = float , default = 1 ,
help = ' Monitoring interval (0 to print once and exit) ' )
parser . add_argument ( ' -j ' , ' --json ' , action = ' store_true ' ,
help = ' Output in json ' )
args = parser . parse_args ( )
def err ( s ) :
print ( s , file = sys . stderr , flush = True )
sys . exit ( 1 )
workqueues = prog [ ' workqueues ' ]
WQ_UNBOUND = prog [ ' WQ_UNBOUND ' ]
WQ_MEM_RECLAIM = prog [ ' WQ_MEM_RECLAIM ' ]
PWQ_STAT_STARTED = prog [ ' PWQ_STAT_STARTED ' ] # work items started execution
PWQ_STAT_COMPLETED = prog [ ' PWQ_STAT_COMPLETED ' ] # work items completed execution
2023-05-17 17:02:09 -10:00
PWQ_STAT_CPU_TIME = prog [ ' PWQ_STAT_CPU_TIME ' ] # total CPU time consumed
2023-05-17 17:02:08 -10:00
PWQ_STAT_CPU_INTENSIVE = prog [ ' PWQ_STAT_CPU_INTENSIVE ' ] # wq_cpu_intensive_thresh_us violations
2023-05-17 17:02:08 -10:00
PWQ_STAT_CM_WAKEUP = prog [ ' PWQ_STAT_CM_WAKEUP ' ] # concurrency-management worker wakeups
PWQ_STAT_MAYDAY = prog [ ' PWQ_STAT_MAYDAY ' ] # maydays to rescuer
PWQ_STAT_RESCUED = prog [ ' PWQ_STAT_RESCUED ' ] # linked work items executed by rescuer
PWQ_NR_STATS = prog [ ' PWQ_NR_STATS ' ]
class WqStats :
def __init__ ( self , wq ) :
self . name = wq . name . string_ ( ) . decode ( )
self . unbound = wq . flags & WQ_UNBOUND != 0
self . mem_reclaim = wq . flags & WQ_MEM_RECLAIM != 0
self . stats = [ 0 ] * PWQ_NR_STATS
for pwq in list_for_each_entry ( ' struct pool_workqueue ' , wq . pwqs . address_of_ ( ) , ' pwqs_node ' ) :
for i in range ( PWQ_NR_STATS ) :
self . stats [ i ] + = int ( pwq . stats [ i ] )
def dict ( self , now ) :
return { ' timestamp ' : now ,
' name ' : self . name ,
' unbound ' : self . unbound ,
' mem_reclaim ' : self . mem_reclaim ,
' started ' : self . stats [ PWQ_STAT_STARTED ] ,
' completed ' : self . stats [ PWQ_STAT_COMPLETED ] ,
2023-05-17 17:02:09 -10:00
' cpu_time ' : self . stats [ PWQ_STAT_CPU_TIME ] ,
2023-05-17 17:02:08 -10:00
' cpu_intensive ' : self . stats [ PWQ_STAT_CPU_INTENSIVE ] ,
2023-05-17 17:02:08 -10:00
' cm_wakeup ' : self . stats [ PWQ_STAT_CM_WAKEUP ] ,
' mayday ' : self . stats [ PWQ_STAT_MAYDAY ] ,
' rescued ' : self . stats [ PWQ_STAT_RESCUED ] , }
def table_header_str ( ) :
2023-05-17 17:02:09 -10:00
return f ' { " " : >24 } { " total " : >8 } { " infl " : >5 } { " CPUtime " : >8 } ' \
2023-05-17 17:02:08 -10:00
f ' { " CPUitsv " : >7 } { " CMwake " : >7 } { " mayday " : >7 } { " rescued " : >7 } '
2023-05-17 17:02:08 -10:00
def table_row_str ( self ) :
2023-05-17 17:02:08 -10:00
cpu_intensive = ' - '
2023-05-17 17:02:08 -10:00
cm_wakeup = ' - '
mayday = ' - '
rescued = ' - '
if not self . unbound :
2023-05-17 17:02:08 -10:00
cpu_intensive = str ( self . stats [ PWQ_STAT_CPU_INTENSIVE ] )
2023-05-17 17:02:08 -10:00
cm_wakeup = str ( self . stats [ PWQ_STAT_CM_WAKEUP ] )
if self . mem_reclaim :
mayday = str ( self . stats [ PWQ_STAT_MAYDAY ] )
rescued = str ( self . stats [ PWQ_STAT_RESCUED ] )
out = f ' { self . name [ - 24 : ] : 24 } ' \
f ' { self . stats [ PWQ_STAT_STARTED ] : 8 } ' \
f ' { max ( self . stats [ PWQ_STAT_STARTED ] - self . stats [ PWQ_STAT_COMPLETED ] , 0 ) : 5 } ' \
2023-05-17 17:02:09 -10:00
f ' { self . stats [ PWQ_STAT_CPU_TIME ] / 1000000 : 8.1f } ' \
2023-05-17 17:02:08 -10:00
f ' { cpu_intensive : >7 } ' \
2023-05-17 17:02:08 -10:00
f ' { cm_wakeup : >7 } ' \
f ' { mayday : >7 } ' \
f ' { rescued : >7 } '
return out . rstrip ( ' : ' )
exit_req = False
def sigint_handler ( signr , frame ) :
global exit_req
exit_req = True
def main ( ) :
# handle args
table_fmt = not args . json
interval = args . interval
re_str = None
if args . workqueue :
for r in args . workqueue :
if re_str is None :
re_str = r
else :
re_str + = ' | ' + r
filter_re = re . compile ( re_str ) if re_str else None
# monitoring loop
signal . signal ( signal . SIGINT , sigint_handler )
while not exit_req :
now = time . time ( )
if table_fmt :
print ( )
print ( WqStats . table_header_str ( ) )
for wq in list_for_each_entry ( ' struct workqueue_struct ' , workqueues . address_of_ ( ) , ' list ' ) :
stats = WqStats ( wq )
if filter_re and not filter_re . search ( stats . name ) :
continue
if table_fmt :
print ( stats . table_row_str ( ) )
else :
print ( stats . dict ( now ) )
if interval == 0 :
break
time . sleep ( interval )
if __name__ == " __main__ " :
main ( )