c2f92e8b2d
The canonical location for the tracefs filesystem is at /sys/kernel/tracing. But, from Documentation/trace/ftrace.rst: Before 4.1, all ftrace tracing control files were within the debugfs file system, which is typically located at /sys/kernel/debug/tracing. For backward compatibility, when mounting the debugfs file system, the tracefs file system will be automatically mounted at: /sys/kernel/debug/tracing A comment in kvm_stat still refers to this older debugfs path, so let's update it to avoid confusion. Link: https://lkml.kernel.org/r/20230313211746.1541525-3-zwisler@kernel.org Cc: "Tobin C. Harding" <me@tobin.cc> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Masami Hiramatsu <mhiramat@kernel.org> Cc: Shuah Khan <shuah@kernel.org> Cc: Tycho Andersen <tycho@tycho.pizza> Acked-by: Paolo Bonzini <pbonzini@redhat.com> Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org> Reviewed-by: Mukesh Ojha <quic_mojha@quicinc.com> Signed-off-by: Ross Zwisler <zwisler@google.com> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
1889 lines
63 KiB
Python
Executable File
1889 lines
63 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# SPDX-License-Identifier: GPL-2.0-only
|
|
#
|
|
# top-like utility for displaying kvm statistics
|
|
#
|
|
# Copyright 2006-2008 Qumranet Technologies
|
|
# Copyright 2008-2011 Red Hat, Inc.
|
|
#
|
|
# Authors:
|
|
# Avi Kivity <avi@redhat.com>
|
|
#
|
|
"""The kvm_stat module outputs statistics about running KVM VMs
|
|
|
|
Three different ways of output formatting are available:
|
|
- as a top-like text ui
|
|
- in a key -> value format
|
|
- in an all keys, all values format
|
|
|
|
The data is sampled from the KVM's debugfs entries and its perf events.
|
|
"""
|
|
from __future__ import print_function
|
|
|
|
import curses
|
|
import sys
|
|
import locale
|
|
import os
|
|
import time
|
|
import argparse
|
|
import ctypes
|
|
import fcntl
|
|
import resource
|
|
import struct
|
|
import re
|
|
import subprocess
|
|
import signal
|
|
from collections import defaultdict, namedtuple
|
|
from functools import reduce
|
|
from datetime import datetime
|
|
|
|
VMX_EXIT_REASONS = {
|
|
'EXCEPTION_NMI': 0,
|
|
'EXTERNAL_INTERRUPT': 1,
|
|
'TRIPLE_FAULT': 2,
|
|
'INIT_SIGNAL': 3,
|
|
'SIPI_SIGNAL': 4,
|
|
'INTERRUPT_WINDOW': 7,
|
|
'NMI_WINDOW': 8,
|
|
'TASK_SWITCH': 9,
|
|
'CPUID': 10,
|
|
'HLT': 12,
|
|
'INVD': 13,
|
|
'INVLPG': 14,
|
|
'RDPMC': 15,
|
|
'RDTSC': 16,
|
|
'VMCALL': 18,
|
|
'VMCLEAR': 19,
|
|
'VMLAUNCH': 20,
|
|
'VMPTRLD': 21,
|
|
'VMPTRST': 22,
|
|
'VMREAD': 23,
|
|
'VMRESUME': 24,
|
|
'VMWRITE': 25,
|
|
'VMOFF': 26,
|
|
'VMON': 27,
|
|
'CR_ACCESS': 28,
|
|
'DR_ACCESS': 29,
|
|
'IO_INSTRUCTION': 30,
|
|
'MSR_READ': 31,
|
|
'MSR_WRITE': 32,
|
|
'INVALID_STATE': 33,
|
|
'MSR_LOAD_FAIL': 34,
|
|
'MWAIT_INSTRUCTION': 36,
|
|
'MONITOR_TRAP_FLAG': 37,
|
|
'MONITOR_INSTRUCTION': 39,
|
|
'PAUSE_INSTRUCTION': 40,
|
|
'MCE_DURING_VMENTRY': 41,
|
|
'TPR_BELOW_THRESHOLD': 43,
|
|
'APIC_ACCESS': 44,
|
|
'EOI_INDUCED': 45,
|
|
'GDTR_IDTR': 46,
|
|
'LDTR_TR': 47,
|
|
'EPT_VIOLATION': 48,
|
|
'EPT_MISCONFIG': 49,
|
|
'INVEPT': 50,
|
|
'RDTSCP': 51,
|
|
'PREEMPTION_TIMER': 52,
|
|
'INVVPID': 53,
|
|
'WBINVD': 54,
|
|
'XSETBV': 55,
|
|
'APIC_WRITE': 56,
|
|
'RDRAND': 57,
|
|
'INVPCID': 58,
|
|
'VMFUNC': 59,
|
|
'ENCLS': 60,
|
|
'RDSEED': 61,
|
|
'PML_FULL': 62,
|
|
'XSAVES': 63,
|
|
'XRSTORS': 64,
|
|
'UMWAIT': 67,
|
|
'TPAUSE': 68,
|
|
'BUS_LOCK': 74,
|
|
'NOTIFY': 75,
|
|
}
|
|
|
|
SVM_EXIT_REASONS = {
|
|
'READ_CR0': 0x000,
|
|
'READ_CR2': 0x002,
|
|
'READ_CR3': 0x003,
|
|
'READ_CR4': 0x004,
|
|
'READ_CR8': 0x008,
|
|
'WRITE_CR0': 0x010,
|
|
'WRITE_CR2': 0x012,
|
|
'WRITE_CR3': 0x013,
|
|
'WRITE_CR4': 0x014,
|
|
'WRITE_CR8': 0x018,
|
|
'READ_DR0': 0x020,
|
|
'READ_DR1': 0x021,
|
|
'READ_DR2': 0x022,
|
|
'READ_DR3': 0x023,
|
|
'READ_DR4': 0x024,
|
|
'READ_DR5': 0x025,
|
|
'READ_DR6': 0x026,
|
|
'READ_DR7': 0x027,
|
|
'WRITE_DR0': 0x030,
|
|
'WRITE_DR1': 0x031,
|
|
'WRITE_DR2': 0x032,
|
|
'WRITE_DR3': 0x033,
|
|
'WRITE_DR4': 0x034,
|
|
'WRITE_DR5': 0x035,
|
|
'WRITE_DR6': 0x036,
|
|
'WRITE_DR7': 0x037,
|
|
'EXCP_BASE': 0x040,
|
|
'LAST_EXCP': 0x05f,
|
|
'INTR': 0x060,
|
|
'NMI': 0x061,
|
|
'SMI': 0x062,
|
|
'INIT': 0x063,
|
|
'VINTR': 0x064,
|
|
'CR0_SEL_WRITE': 0x065,
|
|
'IDTR_READ': 0x066,
|
|
'GDTR_READ': 0x067,
|
|
'LDTR_READ': 0x068,
|
|
'TR_READ': 0x069,
|
|
'IDTR_WRITE': 0x06a,
|
|
'GDTR_WRITE': 0x06b,
|
|
'LDTR_WRITE': 0x06c,
|
|
'TR_WRITE': 0x06d,
|
|
'RDTSC': 0x06e,
|
|
'RDPMC': 0x06f,
|
|
'PUSHF': 0x070,
|
|
'POPF': 0x071,
|
|
'CPUID': 0x072,
|
|
'RSM': 0x073,
|
|
'IRET': 0x074,
|
|
'SWINT': 0x075,
|
|
'INVD': 0x076,
|
|
'PAUSE': 0x077,
|
|
'HLT': 0x078,
|
|
'INVLPG': 0x079,
|
|
'INVLPGA': 0x07a,
|
|
'IOIO': 0x07b,
|
|
'MSR': 0x07c,
|
|
'TASK_SWITCH': 0x07d,
|
|
'FERR_FREEZE': 0x07e,
|
|
'SHUTDOWN': 0x07f,
|
|
'VMRUN': 0x080,
|
|
'VMMCALL': 0x081,
|
|
'VMLOAD': 0x082,
|
|
'VMSAVE': 0x083,
|
|
'STGI': 0x084,
|
|
'CLGI': 0x085,
|
|
'SKINIT': 0x086,
|
|
'RDTSCP': 0x087,
|
|
'ICEBP': 0x088,
|
|
'WBINVD': 0x089,
|
|
'MONITOR': 0x08a,
|
|
'MWAIT': 0x08b,
|
|
'MWAIT_COND': 0x08c,
|
|
'XSETBV': 0x08d,
|
|
'RDPRU': 0x08e,
|
|
'EFER_WRITE_TRAP': 0x08f,
|
|
'CR0_WRITE_TRAP': 0x090,
|
|
'CR1_WRITE_TRAP': 0x091,
|
|
'CR2_WRITE_TRAP': 0x092,
|
|
'CR3_WRITE_TRAP': 0x093,
|
|
'CR4_WRITE_TRAP': 0x094,
|
|
'CR5_WRITE_TRAP': 0x095,
|
|
'CR6_WRITE_TRAP': 0x096,
|
|
'CR7_WRITE_TRAP': 0x097,
|
|
'CR8_WRITE_TRAP': 0x098,
|
|
'CR9_WRITE_TRAP': 0x099,
|
|
'CR10_WRITE_TRAP': 0x09a,
|
|
'CR11_WRITE_TRAP': 0x09b,
|
|
'CR12_WRITE_TRAP': 0x09c,
|
|
'CR13_WRITE_TRAP': 0x09d,
|
|
'CR14_WRITE_TRAP': 0x09e,
|
|
'CR15_WRITE_TRAP': 0x09f,
|
|
'INVPCID': 0x0a2,
|
|
'NPF': 0x400,
|
|
'AVIC_INCOMPLETE_IPI': 0x401,
|
|
'AVIC_UNACCELERATED_ACCESS': 0x402,
|
|
'VMGEXIT': 0x403,
|
|
}
|
|
|
|
# EC definition of HSR (from arch/arm64/include/asm/esr.h)
|
|
AARCH64_EXIT_REASONS = {
|
|
'UNKNOWN': 0x00,
|
|
'WFx': 0x01,
|
|
'CP15_32': 0x03,
|
|
'CP15_64': 0x04,
|
|
'CP14_MR': 0x05,
|
|
'CP14_LS': 0x06,
|
|
'FP_ASIMD': 0x07,
|
|
'CP10_ID': 0x08,
|
|
'PAC': 0x09,
|
|
'CP14_64': 0x0C,
|
|
'BTI': 0x0D,
|
|
'ILL': 0x0E,
|
|
'SVC32': 0x11,
|
|
'HVC32': 0x12,
|
|
'SMC32': 0x13,
|
|
'SVC64': 0x15,
|
|
'HVC64': 0x16,
|
|
'SMC64': 0x17,
|
|
'SYS64': 0x18,
|
|
'SVE': 0x19,
|
|
'ERET': 0x1A,
|
|
'FPAC': 0x1C,
|
|
'SME': 0x1D,
|
|
'IMP_DEF': 0x1F,
|
|
'IABT_LOW': 0x20,
|
|
'IABT_CUR': 0x21,
|
|
'PC_ALIGN': 0x22,
|
|
'DABT_LOW': 0x24,
|
|
'DABT_CUR': 0x25,
|
|
'SP_ALIGN': 0x26,
|
|
'FP_EXC32': 0x28,
|
|
'FP_EXC64': 0x2C,
|
|
'SERROR': 0x2F,
|
|
'BREAKPT_LOW': 0x30,
|
|
'BREAKPT_CUR': 0x31,
|
|
'SOFTSTP_LOW': 0x32,
|
|
'SOFTSTP_CUR': 0x33,
|
|
'WATCHPT_LOW': 0x34,
|
|
'WATCHPT_CUR': 0x35,
|
|
'BKPT32': 0x38,
|
|
'VECTOR32': 0x3A,
|
|
'BRK64': 0x3C,
|
|
}
|
|
|
|
# From include/uapi/linux/kvm.h, KVM_EXIT_xxx
|
|
USERSPACE_EXIT_REASONS = {
|
|
'UNKNOWN': 0,
|
|
'EXCEPTION': 1,
|
|
'IO': 2,
|
|
'HYPERCALL': 3,
|
|
'DEBUG': 4,
|
|
'HLT': 5,
|
|
'MMIO': 6,
|
|
'IRQ_WINDOW_OPEN': 7,
|
|
'SHUTDOWN': 8,
|
|
'FAIL_ENTRY': 9,
|
|
'INTR': 10,
|
|
'SET_TPR': 11,
|
|
'TPR_ACCESS': 12,
|
|
'S390_SIEIC': 13,
|
|
'S390_RESET': 14,
|
|
'DCR': 15,
|
|
'NMI': 16,
|
|
'INTERNAL_ERROR': 17,
|
|
'OSI': 18,
|
|
'PAPR_HCALL': 19,
|
|
'S390_UCONTROL': 20,
|
|
'WATCHDOG': 21,
|
|
'S390_TSCH': 22,
|
|
'EPR': 23,
|
|
'SYSTEM_EVENT': 24,
|
|
'S390_STSI': 25,
|
|
'IOAPIC_EOI': 26,
|
|
'HYPERV': 27,
|
|
'ARM_NISV': 28,
|
|
'X86_RDMSR': 29,
|
|
'X86_WRMSR': 30,
|
|
'DIRTY_RING_FULL': 31,
|
|
'AP_RESET_HOLD': 32,
|
|
'X86_BUS_LOCK': 33,
|
|
'XEN': 34,
|
|
'RISCV_SBI': 35,
|
|
'RISCV_CSR': 36,
|
|
'NOTIFY': 37,
|
|
}
|
|
|
|
IOCTL_NUMBERS = {
|
|
'SET_FILTER': 0x40082406,
|
|
'ENABLE': 0x00002400,
|
|
'DISABLE': 0x00002401,
|
|
'RESET': 0x00002403,
|
|
}
|
|
|
|
signal_received = False
|
|
|
|
ENCODING = locale.getpreferredencoding(False)
|
|
TRACE_FILTER = re.compile(r'^[^\(]*$')
|
|
|
|
|
|
class Arch(object):
|
|
"""Encapsulates global architecture specific data.
|
|
|
|
Contains the performance event open syscall and ioctl numbers, as
|
|
well as the VM exit reasons for the architecture it runs on.
|
|
|
|
"""
|
|
@staticmethod
|
|
def get_arch():
|
|
machine = os.uname()[4]
|
|
|
|
if machine.startswith('ppc'):
|
|
return ArchPPC()
|
|
elif machine.startswith('aarch64'):
|
|
return ArchA64()
|
|
elif machine.startswith('s390'):
|
|
return ArchS390()
|
|
else:
|
|
# X86_64
|
|
for line in open('/proc/cpuinfo'):
|
|
if not line.startswith('flags'):
|
|
continue
|
|
|
|
flags = line.split()
|
|
if 'vmx' in flags:
|
|
return ArchX86(VMX_EXIT_REASONS)
|
|
if 'svm' in flags:
|
|
return ArchX86(SVM_EXIT_REASONS)
|
|
return
|
|
|
|
def tracepoint_is_child(self, field):
|
|
if (TRACE_FILTER.match(field)):
|
|
return None
|
|
return field.split('(', 1)[0]
|
|
|
|
|
|
class ArchX86(Arch):
|
|
def __init__(self, exit_reasons):
|
|
self.sc_perf_evt_open = 298
|
|
self.ioctl_numbers = IOCTL_NUMBERS
|
|
self.exit_reason_field = 'exit_reason'
|
|
self.exit_reasons = exit_reasons
|
|
|
|
def debugfs_is_child(self, field):
|
|
""" Returns name of parent if 'field' is a child, None otherwise """
|
|
return None
|
|
|
|
|
|
class ArchPPC(Arch):
|
|
def __init__(self):
|
|
self.sc_perf_evt_open = 319
|
|
self.ioctl_numbers = IOCTL_NUMBERS
|
|
self.ioctl_numbers['ENABLE'] = 0x20002400
|
|
self.ioctl_numbers['DISABLE'] = 0x20002401
|
|
self.ioctl_numbers['RESET'] = 0x20002403
|
|
|
|
# PPC comes in 32 and 64 bit and some generated ioctl
|
|
# numbers depend on the wordsize.
|
|
char_ptr_size = ctypes.sizeof(ctypes.c_char_p)
|
|
self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16
|
|
self.exit_reason_field = 'exit_nr'
|
|
self.exit_reasons = {}
|
|
|
|
def debugfs_is_child(self, field):
|
|
""" Returns name of parent if 'field' is a child, None otherwise """
|
|
return None
|
|
|
|
|
|
class ArchA64(Arch):
|
|
def __init__(self):
|
|
self.sc_perf_evt_open = 241
|
|
self.ioctl_numbers = IOCTL_NUMBERS
|
|
self.exit_reason_field = 'esr_ec'
|
|
self.exit_reasons = AARCH64_EXIT_REASONS
|
|
|
|
def debugfs_is_child(self, field):
|
|
""" Returns name of parent if 'field' is a child, None otherwise """
|
|
return None
|
|
|
|
|
|
class ArchS390(Arch):
|
|
def __init__(self):
|
|
self.sc_perf_evt_open = 331
|
|
self.ioctl_numbers = IOCTL_NUMBERS
|
|
self.exit_reason_field = None
|
|
self.exit_reasons = None
|
|
|
|
def debugfs_is_child(self, field):
|
|
""" Returns name of parent if 'field' is a child, None otherwise """
|
|
if field.startswith('instruction_'):
|
|
return 'exit_instruction'
|
|
|
|
|
|
ARCH = Arch.get_arch()
|
|
|
|
|
|
class perf_event_attr(ctypes.Structure):
|
|
"""Struct that holds the necessary data to set up a trace event.
|
|
|
|
For an extensive explanation see perf_event_open(2) and
|
|
include/uapi/linux/perf_event.h, struct perf_event_attr
|
|
|
|
All fields that are not initialized in the constructor are 0.
|
|
|
|
"""
|
|
_fields_ = [('type', ctypes.c_uint32),
|
|
('size', ctypes.c_uint32),
|
|
('config', ctypes.c_uint64),
|
|
('sample_freq', ctypes.c_uint64),
|
|
('sample_type', ctypes.c_uint64),
|
|
('read_format', ctypes.c_uint64),
|
|
('flags', ctypes.c_uint64),
|
|
('wakeup_events', ctypes.c_uint32),
|
|
('bp_type', ctypes.c_uint32),
|
|
('bp_addr', ctypes.c_uint64),
|
|
('bp_len', ctypes.c_uint64),
|
|
]
|
|
|
|
def __init__(self):
|
|
super(self.__class__, self).__init__()
|
|
self.type = PERF_TYPE_TRACEPOINT
|
|
self.size = ctypes.sizeof(self)
|
|
self.read_format = PERF_FORMAT_GROUP
|
|
|
|
|
|
PERF_TYPE_TRACEPOINT = 2
|
|
PERF_FORMAT_GROUP = 1 << 3
|
|
|
|
|
|
class Group(object):
|
|
"""Represents a perf event group."""
|
|
|
|
def __init__(self):
|
|
self.events = []
|
|
|
|
def add_event(self, event):
|
|
self.events.append(event)
|
|
|
|
def read(self):
|
|
"""Returns a dict with 'event name: value' for all events in the
|
|
group.
|
|
|
|
Values are read by reading from the file descriptor of the
|
|
event that is the group leader. See perf_event_open(2) for
|
|
details.
|
|
|
|
Read format for the used event configuration is:
|
|
struct read_format {
|
|
u64 nr; /* The number of events */
|
|
struct {
|
|
u64 value; /* The value of the event */
|
|
} values[nr];
|
|
};
|
|
|
|
"""
|
|
length = 8 * (1 + len(self.events))
|
|
read_format = 'xxxxxxxx' + 'Q' * len(self.events)
|
|
return dict(zip([event.name for event in self.events],
|
|
struct.unpack(read_format,
|
|
os.read(self.events[0].fd, length))))
|
|
|
|
|
|
class Event(object):
|
|
"""Represents a performance event and manages its life cycle."""
|
|
def __init__(self, name, group, trace_cpu, trace_pid, trace_point,
|
|
trace_filter, trace_set='kvm'):
|
|
self.libc = ctypes.CDLL('libc.so.6', use_errno=True)
|
|
self.syscall = self.libc.syscall
|
|
self.name = name
|
|
self.fd = None
|
|
self._setup_event(group, trace_cpu, trace_pid, trace_point,
|
|
trace_filter, trace_set)
|
|
|
|
def __del__(self):
|
|
"""Closes the event's file descriptor.
|
|
|
|
As no python file object was created for the file descriptor,
|
|
python will not reference count the descriptor and will not
|
|
close it itself automatically, so we do it.
|
|
|
|
"""
|
|
if self.fd:
|
|
os.close(self.fd)
|
|
|
|
def _perf_event_open(self, attr, pid, cpu, group_fd, flags):
|
|
"""Wrapper for the sys_perf_evt_open() syscall.
|
|
|
|
Used to set up performance events, returns a file descriptor or -1
|
|
on error.
|
|
|
|
Attributes are:
|
|
- syscall number
|
|
- struct perf_event_attr *
|
|
- pid or -1 to monitor all pids
|
|
- cpu number or -1 to monitor all cpus
|
|
- The file descriptor of the group leader or -1 to create a group.
|
|
- flags
|
|
|
|
"""
|
|
return self.syscall(ARCH.sc_perf_evt_open, ctypes.pointer(attr),
|
|
ctypes.c_int(pid), ctypes.c_int(cpu),
|
|
ctypes.c_int(group_fd), ctypes.c_long(flags))
|
|
|
|
def _setup_event_attribute(self, trace_set, trace_point):
|
|
"""Returns an initialized ctype perf_event_attr struct."""
|
|
|
|
id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set,
|
|
trace_point, 'id')
|
|
|
|
event_attr = perf_event_attr()
|
|
event_attr.config = int(open(id_path).read())
|
|
return event_attr
|
|
|
|
def _setup_event(self, group, trace_cpu, trace_pid, trace_point,
|
|
trace_filter, trace_set):
|
|
"""Sets up the perf event in Linux.
|
|
|
|
Issues the syscall to register the event in the kernel and
|
|
then sets the optional filter.
|
|
|
|
"""
|
|
|
|
event_attr = self._setup_event_attribute(trace_set, trace_point)
|
|
|
|
# First event will be group leader.
|
|
group_leader = -1
|
|
|
|
# All others have to pass the leader's descriptor instead.
|
|
if group.events:
|
|
group_leader = group.events[0].fd
|
|
|
|
fd = self._perf_event_open(event_attr, trace_pid,
|
|
trace_cpu, group_leader, 0)
|
|
if fd == -1:
|
|
err = ctypes.get_errno()
|
|
raise OSError(err, os.strerror(err),
|
|
'while calling sys_perf_event_open().')
|
|
|
|
if trace_filter:
|
|
fcntl.ioctl(fd, ARCH.ioctl_numbers['SET_FILTER'],
|
|
trace_filter)
|
|
|
|
self.fd = fd
|
|
|
|
def enable(self):
|
|
"""Enables the trace event in the kernel.
|
|
|
|
Enabling the group leader makes reading counters from it and the
|
|
events under it possible.
|
|
|
|
"""
|
|
fcntl.ioctl(self.fd, ARCH.ioctl_numbers['ENABLE'], 0)
|
|
|
|
def disable(self):
|
|
"""Disables the trace event in the kernel.
|
|
|
|
Disabling the group leader makes reading all counters under it
|
|
impossible.
|
|
|
|
"""
|
|
fcntl.ioctl(self.fd, ARCH.ioctl_numbers['DISABLE'], 0)
|
|
|
|
def reset(self):
|
|
"""Resets the count of the trace event in the kernel."""
|
|
fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0)
|
|
|
|
|
|
class Provider(object):
|
|
"""Encapsulates functionalities used by all providers."""
|
|
def __init__(self, pid):
|
|
self.child_events = False
|
|
self.pid = pid
|
|
|
|
@staticmethod
|
|
def is_field_wanted(fields_filter, field):
|
|
"""Indicate whether field is valid according to fields_filter."""
|
|
if not fields_filter:
|
|
return True
|
|
return re.match(fields_filter, field) is not None
|
|
|
|
@staticmethod
|
|
def walkdir(path):
|
|
"""Returns os.walk() data for specified directory.
|
|
|
|
As it is only a wrapper it returns the same 3-tuple of (dirpath,
|
|
dirnames, filenames).
|
|
"""
|
|
return next(os.walk(path))
|
|
|
|
|
|
class TracepointProvider(Provider):
|
|
"""Data provider for the stats class.
|
|
|
|
Manages the events/groups from which it acquires its data.
|
|
|
|
"""
|
|
def __init__(self, pid, fields_filter):
|
|
self.group_leaders = []
|
|
self.filters = self._get_filters()
|
|
self.update_fields(fields_filter)
|
|
super(TracepointProvider, self).__init__(pid)
|
|
|
|
@staticmethod
|
|
def _get_filters():
|
|
"""Returns a dict of trace events, their filter ids and
|
|
the values that can be filtered.
|
|
|
|
Trace events can be filtered for special values by setting a
|
|
filter string via an ioctl. The string normally has the format
|
|
identifier==value. For each filter a new event will be created, to
|
|
be able to distinguish the events.
|
|
|
|
"""
|
|
filters = {}
|
|
filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS)
|
|
if ARCH.exit_reason_field and ARCH.exit_reasons:
|
|
filters['kvm_exit'] = (ARCH.exit_reason_field, ARCH.exit_reasons)
|
|
return filters
|
|
|
|
def _get_available_fields(self):
|
|
"""Returns a list of available events of format 'event name(filter
|
|
name)'.
|
|
|
|
All available events have directories under
|
|
/sys/kernel/tracing/events/ which export information
|
|
about the specific event. Therefore, listing the dirs gives us
|
|
a list of all available events.
|
|
|
|
Some events like the vm exit reasons can be filtered for
|
|
specific values. To take account for that, the routine below
|
|
creates special fields with the following format:
|
|
event name(filter name)
|
|
|
|
"""
|
|
path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm')
|
|
fields = self.walkdir(path)[1]
|
|
extra = []
|
|
for field in fields:
|
|
if field in self.filters:
|
|
filter_name_, filter_dicts = self.filters[field]
|
|
for name in filter_dicts:
|
|
extra.append(field + '(' + name + ')')
|
|
fields += extra
|
|
return fields
|
|
|
|
def update_fields(self, fields_filter):
|
|
"""Refresh fields, applying fields_filter"""
|
|
self.fields = [field for field in self._get_available_fields()
|
|
if self.is_field_wanted(fields_filter, field)]
|
|
# add parents for child fields - otherwise we won't see any output!
|
|
for field in self._fields:
|
|
parent = ARCH.tracepoint_is_child(field)
|
|
if (parent and parent not in self._fields):
|
|
self.fields.append(parent)
|
|
|
|
@staticmethod
|
|
def _get_online_cpus():
|
|
"""Returns a list of cpu id integers."""
|
|
def parse_int_list(list_string):
|
|
"""Returns an int list from a string of comma separated integers and
|
|
integer ranges."""
|
|
integers = []
|
|
members = list_string.split(',')
|
|
|
|
for member in members:
|
|
if '-' not in member:
|
|
integers.append(int(member))
|
|
else:
|
|
int_range = member.split('-')
|
|
integers.extend(range(int(int_range[0]),
|
|
int(int_range[1]) + 1))
|
|
|
|
return integers
|
|
|
|
with open('/sys/devices/system/cpu/online') as cpu_list:
|
|
cpu_string = cpu_list.readline()
|
|
return parse_int_list(cpu_string)
|
|
|
|
def _setup_traces(self):
|
|
"""Creates all event and group objects needed to be able to retrieve
|
|
data."""
|
|
fields = self._get_available_fields()
|
|
if self._pid > 0:
|
|
# Fetch list of all threads of the monitored pid, as qemu
|
|
# starts a thread for each vcpu.
|
|
path = os.path.join('/proc', str(self._pid), 'task')
|
|
groupids = self.walkdir(path)[1]
|
|
else:
|
|
groupids = self._get_online_cpus()
|
|
|
|
# The constant is needed as a buffer for python libs, std
|
|
# streams and other files that the script opens.
|
|
newlim = len(groupids) * len(fields) + 50
|
|
try:
|
|
softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE)
|
|
|
|
if hardlim < newlim:
|
|
# Now we need CAP_SYS_RESOURCE, to increase the hard limit.
|
|
resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, newlim))
|
|
else:
|
|
# Raising the soft limit is sufficient.
|
|
resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, hardlim))
|
|
|
|
except ValueError:
|
|
sys.exit("NOFILE rlimit could not be raised to {0}".format(newlim))
|
|
|
|
for groupid in groupids:
|
|
group = Group()
|
|
for name in fields:
|
|
tracepoint = name
|
|
tracefilter = None
|
|
match = re.match(r'(.*)\((.*)\)', name)
|
|
if match:
|
|
tracepoint, sub = match.groups()
|
|
tracefilter = ('%s==%d\0' %
|
|
(self.filters[tracepoint][0],
|
|
self.filters[tracepoint][1][sub]))
|
|
|
|
# From perf_event_open(2):
|
|
# pid > 0 and cpu == -1
|
|
# This measures the specified process/thread on any CPU.
|
|
#
|
|
# pid == -1 and cpu >= 0
|
|
# This measures all processes/threads on the specified CPU.
|
|
trace_cpu = groupid if self._pid == 0 else -1
|
|
trace_pid = int(groupid) if self._pid != 0 else -1
|
|
|
|
group.add_event(Event(name=name,
|
|
group=group,
|
|
trace_cpu=trace_cpu,
|
|
trace_pid=trace_pid,
|
|
trace_point=tracepoint,
|
|
trace_filter=tracefilter))
|
|
|
|
self.group_leaders.append(group)
|
|
|
|
@property
|
|
def fields(self):
|
|
return self._fields
|
|
|
|
@fields.setter
|
|
def fields(self, fields):
|
|
"""Enables/disables the (un)wanted events"""
|
|
self._fields = fields
|
|
for group in self.group_leaders:
|
|
for index, event in enumerate(group.events):
|
|
if event.name in fields:
|
|
event.reset()
|
|
event.enable()
|
|
else:
|
|
# Do not disable the group leader.
|
|
# It would disable all of its events.
|
|
if index != 0:
|
|
event.disable()
|
|
|
|
@property
|
|
def pid(self):
|
|
return self._pid
|
|
|
|
@pid.setter
|
|
def pid(self, pid):
|
|
"""Changes the monitored pid by setting new traces."""
|
|
self._pid = pid
|
|
# The garbage collector will get rid of all Event/Group
|
|
# objects and open files after removing the references.
|
|
self.group_leaders = []
|
|
self._setup_traces()
|
|
self.fields = self._fields
|
|
|
|
def read(self, by_guest=0):
|
|
"""Returns 'event name: current value' for all enabled events."""
|
|
ret = defaultdict(int)
|
|
for group in self.group_leaders:
|
|
for name, val in group.read().items():
|
|
if name not in self._fields:
|
|
continue
|
|
parent = ARCH.tracepoint_is_child(name)
|
|
if parent:
|
|
name += ' ' + parent
|
|
ret[name] += val
|
|
return ret
|
|
|
|
def reset(self):
|
|
"""Reset all field counters"""
|
|
for group in self.group_leaders:
|
|
for event in group.events:
|
|
event.reset()
|
|
|
|
|
|
class DebugfsProvider(Provider):
|
|
"""Provides data from the files that KVM creates in the kvm debugfs
|
|
folder."""
|
|
def __init__(self, pid, fields_filter, include_past):
|
|
self.update_fields(fields_filter)
|
|
self._baseline = {}
|
|
self.do_read = True
|
|
self.paths = []
|
|
super(DebugfsProvider, self).__init__(pid)
|
|
if include_past:
|
|
self._restore()
|
|
|
|
def _get_available_fields(self):
|
|
""""Returns a list of available fields.
|
|
|
|
The fields are all available KVM debugfs files
|
|
|
|
"""
|
|
exempt_list = ['halt_poll_fail_ns', 'halt_poll_success_ns', 'halt_wait_ns']
|
|
fields = [field for field in self.walkdir(PATH_DEBUGFS_KVM)[2]
|
|
if field not in exempt_list]
|
|
|
|
return fields
|
|
|
|
def update_fields(self, fields_filter):
|
|
"""Refresh fields, applying fields_filter"""
|
|
self._fields = [field for field in self._get_available_fields()
|
|
if self.is_field_wanted(fields_filter, field)]
|
|
# add parents for child fields - otherwise we won't see any output!
|
|
for field in self._fields:
|
|
parent = ARCH.debugfs_is_child(field)
|
|
if (parent and parent not in self._fields):
|
|
self.fields.append(parent)
|
|
|
|
@property
|
|
def fields(self):
|
|
return self._fields
|
|
|
|
@fields.setter
|
|
def fields(self, fields):
|
|
self._fields = fields
|
|
self.reset()
|
|
|
|
@property
|
|
def pid(self):
|
|
return self._pid
|
|
|
|
@pid.setter
|
|
def pid(self, pid):
|
|
self._pid = pid
|
|
if pid != 0:
|
|
vms = self.walkdir(PATH_DEBUGFS_KVM)[1]
|
|
if len(vms) == 0:
|
|
self.do_read = False
|
|
|
|
self.paths = list(filter(lambda x: "{}-".format(pid) in x, vms))
|
|
|
|
else:
|
|
self.paths = []
|
|
self.do_read = True
|
|
|
|
def _verify_paths(self):
|
|
"""Remove invalid paths"""
|
|
for path in self.paths:
|
|
if not os.path.exists(os.path.join(PATH_DEBUGFS_KVM, path)):
|
|
self.paths.remove(path)
|
|
continue
|
|
|
|
def read(self, reset=0, by_guest=0):
|
|
"""Returns a dict with format:'file name / field -> current value'.
|
|
|
|
Parameter 'reset':
|
|
0 plain read
|
|
1 reset field counts to 0
|
|
2 restore the original field counts
|
|
|
|
"""
|
|
results = {}
|
|
|
|
# If no debugfs filtering support is available, then don't read.
|
|
if not self.do_read:
|
|
return results
|
|
self._verify_paths()
|
|
|
|
paths = self.paths
|
|
if self._pid == 0:
|
|
paths = []
|
|
for entry in os.walk(PATH_DEBUGFS_KVM):
|
|
for dir in entry[1]:
|
|
paths.append(dir)
|
|
for path in paths:
|
|
for field in self._fields:
|
|
value = self._read_field(field, path)
|
|
key = path + field
|
|
if reset == 1:
|
|
self._baseline[key] = value
|
|
if reset == 2:
|
|
self._baseline[key] = 0
|
|
if self._baseline.get(key, -1) == -1:
|
|
self._baseline[key] = value
|
|
parent = ARCH.debugfs_is_child(field)
|
|
if parent:
|
|
field = field + ' ' + parent
|
|
else:
|
|
if by_guest:
|
|
field = key.split('-')[0] # set 'field' to 'pid'
|
|
increment = value - self._baseline.get(key, 0)
|
|
if field in results:
|
|
results[field] += increment
|
|
else:
|
|
results[field] = increment
|
|
|
|
return results
|
|
|
|
def _read_field(self, field, path):
|
|
"""Returns the value of a single field from a specific VM."""
|
|
try:
|
|
return int(open(os.path.join(PATH_DEBUGFS_KVM,
|
|
path,
|
|
field))
|
|
.read())
|
|
except IOError:
|
|
return 0
|
|
|
|
def reset(self):
|
|
"""Reset field counters"""
|
|
self._baseline = {}
|
|
self.read(1)
|
|
|
|
def _restore(self):
|
|
"""Reset field counters"""
|
|
self._baseline = {}
|
|
self.read(2)
|
|
|
|
|
|
EventStat = namedtuple('EventStat', ['value', 'delta'])
|
|
|
|
|
|
class Stats(object):
|
|
"""Manages the data providers and the data they provide.
|
|
|
|
It is used to set filters on the provider's data and collect all
|
|
provider data.
|
|
|
|
"""
|
|
def __init__(self, options):
|
|
self.providers = self._get_providers(options)
|
|
self._pid_filter = options.pid
|
|
self._fields_filter = options.fields
|
|
self.values = {}
|
|
self._child_events = False
|
|
|
|
def _get_providers(self, options):
|
|
"""Returns a list of data providers depending on the passed options."""
|
|
providers = []
|
|
|
|
if options.debugfs:
|
|
providers.append(DebugfsProvider(options.pid, options.fields,
|
|
options.debugfs_include_past))
|
|
if options.tracepoints or not providers:
|
|
providers.append(TracepointProvider(options.pid, options.fields))
|
|
|
|
return providers
|
|
|
|
def _update_provider_filters(self):
|
|
"""Propagates fields filters to providers."""
|
|
# As we reset the counters when updating the fields we can
|
|
# also clear the cache of old values.
|
|
self.values = {}
|
|
for provider in self.providers:
|
|
provider.update_fields(self._fields_filter)
|
|
|
|
def reset(self):
|
|
self.values = {}
|
|
for provider in self.providers:
|
|
provider.reset()
|
|
|
|
@property
|
|
def fields_filter(self):
|
|
return self._fields_filter
|
|
|
|
@fields_filter.setter
|
|
def fields_filter(self, fields_filter):
|
|
if fields_filter != self._fields_filter:
|
|
self._fields_filter = fields_filter
|
|
self._update_provider_filters()
|
|
|
|
@property
|
|
def pid_filter(self):
|
|
return self._pid_filter
|
|
|
|
@pid_filter.setter
|
|
def pid_filter(self, pid):
|
|
if pid != self._pid_filter:
|
|
self._pid_filter = pid
|
|
self.values = {}
|
|
for provider in self.providers:
|
|
provider.pid = self._pid_filter
|
|
|
|
@property
|
|
def child_events(self):
|
|
return self._child_events
|
|
|
|
@child_events.setter
|
|
def child_events(self, val):
|
|
self._child_events = val
|
|
for provider in self.providers:
|
|
provider.child_events = val
|
|
|
|
def get(self, by_guest=0):
|
|
"""Returns a dict with field -> (value, delta to last value) of all
|
|
provider data.
|
|
Key formats:
|
|
* plain: 'key' is event name
|
|
* child-parent: 'key' is in format '<child> <parent>'
|
|
* pid: 'key' is the pid of the guest, and the record contains the
|
|
aggregated event data
|
|
These formats are generated by the providers, and handled in class TUI.
|
|
"""
|
|
for provider in self.providers:
|
|
new = provider.read(by_guest=by_guest)
|
|
for key in new:
|
|
oldval = self.values.get(key, EventStat(0, 0)).value
|
|
newval = new.get(key, 0)
|
|
newdelta = newval - oldval
|
|
self.values[key] = EventStat(newval, newdelta)
|
|
return self.values
|
|
|
|
def toggle_display_guests(self, to_pid):
|
|
"""Toggle between collection of stats by individual event and by
|
|
guest pid
|
|
|
|
Events reported by DebugfsProvider change when switching to/from
|
|
reading by guest values. Hence we have to remove the excess event
|
|
names from self.values.
|
|
|
|
"""
|
|
if any(isinstance(ins, TracepointProvider) for ins in self.providers):
|
|
return 1
|
|
if to_pid:
|
|
for provider in self.providers:
|
|
if isinstance(provider, DebugfsProvider):
|
|
for key in provider.fields:
|
|
if key in self.values.keys():
|
|
del self.values[key]
|
|
else:
|
|
oldvals = self.values.copy()
|
|
for key in oldvals:
|
|
if key.isdigit():
|
|
del self.values[key]
|
|
# Update oldval (see get())
|
|
self.get(to_pid)
|
|
return 0
|
|
|
|
|
|
DELAY_DEFAULT = 3.0
|
|
MAX_GUEST_NAME_LEN = 48
|
|
MAX_REGEX_LEN = 44
|
|
SORT_DEFAULT = 0
|
|
MIN_DELAY = 0.1
|
|
MAX_DELAY = 25.5
|
|
|
|
|
|
class Tui(object):
|
|
"""Instruments curses to draw a nice text ui."""
|
|
def __init__(self, stats, opts):
|
|
self.stats = stats
|
|
self.screen = None
|
|
self._delay_initial = 0.25
|
|
self._delay_regular = opts.set_delay
|
|
self._sorting = SORT_DEFAULT
|
|
self._display_guests = 0
|
|
|
|
def __enter__(self):
|
|
"""Initialises curses for later use. Based on curses.wrapper
|
|
implementation from the Python standard library."""
|
|
self.screen = curses.initscr()
|
|
curses.noecho()
|
|
curses.cbreak()
|
|
|
|
# The try/catch works around a minor bit of
|
|
# over-conscientiousness in the curses module, the error
|
|
# return from C start_color() is ignorable.
|
|
try:
|
|
curses.start_color()
|
|
except curses.error:
|
|
pass
|
|
|
|
# Hide cursor in extra statement as some monochrome terminals
|
|
# might support hiding but not colors.
|
|
try:
|
|
curses.curs_set(0)
|
|
except curses.error:
|
|
pass
|
|
|
|
curses.use_default_colors()
|
|
return self
|
|
|
|
def __exit__(self, *exception):
|
|
"""Resets the terminal to its normal state. Based on curses.wrapper
|
|
implementation from the Python standard library."""
|
|
if self.screen:
|
|
self.screen.keypad(0)
|
|
curses.echo()
|
|
curses.nocbreak()
|
|
curses.endwin()
|
|
|
|
@staticmethod
|
|
def get_all_gnames():
|
|
"""Returns a list of (pid, gname) tuples of all running guests"""
|
|
res = []
|
|
try:
|
|
child = subprocess.Popen(['ps', '-A', '--format', 'pid,args'],
|
|
stdout=subprocess.PIPE)
|
|
except:
|
|
raise Exception
|
|
for line in child.stdout:
|
|
line = line.decode(ENCODING).lstrip().split(' ', 1)
|
|
# perform a sanity check before calling the more expensive
|
|
# function to possibly extract the guest name
|
|
if ' -name ' in line[1]:
|
|
res.append((line[0], Tui.get_gname_from_pid(line[0])))
|
|
child.stdout.close()
|
|
|
|
return res
|
|
|
|
def _print_all_gnames(self, row):
|
|
"""Print a list of all running guests along with their pids."""
|
|
self.screen.addstr(row, 2, '%8s %-60s' %
|
|
('Pid', 'Guest Name (fuzzy list, might be '
|
|
'inaccurate!)'),
|
|
curses.A_UNDERLINE)
|
|
row += 1
|
|
try:
|
|
for line in self.get_all_gnames():
|
|
self.screen.addstr(row, 2, '%8s %-60s' % (line[0], line[1]))
|
|
row += 1
|
|
if row >= self.screen.getmaxyx()[0]:
|
|
break
|
|
except Exception:
|
|
self.screen.addstr(row + 1, 2, 'Not available')
|
|
|
|
@staticmethod
|
|
def get_pid_from_gname(gname):
|
|
"""Fuzzy function to convert guest name to QEMU process pid.
|
|
|
|
Returns a list of potential pids, can be empty if no match found.
|
|
Throws an exception on processing errors.
|
|
|
|
"""
|
|
pids = []
|
|
for line in Tui.get_all_gnames():
|
|
if gname == line[1]:
|
|
pids.append(int(line[0]))
|
|
|
|
return pids
|
|
|
|
@staticmethod
|
|
def get_gname_from_pid(pid):
|
|
"""Returns the guest name for a QEMU process pid.
|
|
|
|
Extracts the guest name from the QEMU comma line by processing the
|
|
'-name' option. Will also handle names specified out of sequence.
|
|
|
|
"""
|
|
name = ''
|
|
try:
|
|
line = open('/proc/{}/cmdline'
|
|
.format(pid), 'r').read().split('\0')
|
|
parms = line[line.index('-name') + 1].split(',')
|
|
while '' in parms:
|
|
# commas are escaped (i.e. ',,'), hence e.g. 'foo,bar' results
|
|
# in # ['foo', '', 'bar'], which we revert here
|
|
idx = parms.index('')
|
|
parms[idx - 1] += ',' + parms[idx + 1]
|
|
del parms[idx:idx+2]
|
|
# the '-name' switch allows for two ways to specify the guest name,
|
|
# where the plain name overrides the name specified via 'guest='
|
|
for arg in parms:
|
|
if '=' not in arg:
|
|
name = arg
|
|
break
|
|
if arg[:6] == 'guest=':
|
|
name = arg[6:]
|
|
except (ValueError, IOError, IndexError):
|
|
pass
|
|
|
|
return name
|
|
|
|
def _update_pid(self, pid):
|
|
"""Propagates pid selection to stats object."""
|
|
self.screen.addstr(4, 1, 'Updating pid filter...')
|
|
self.screen.refresh()
|
|
self.stats.pid_filter = pid
|
|
|
|
def _refresh_header(self, pid=None):
|
|
"""Refreshes the header."""
|
|
if pid is None:
|
|
pid = self.stats.pid_filter
|
|
self.screen.erase()
|
|
gname = self.get_gname_from_pid(pid)
|
|
self._gname = gname
|
|
if gname:
|
|
gname = ('({})'.format(gname[:MAX_GUEST_NAME_LEN] + '...'
|
|
if len(gname) > MAX_GUEST_NAME_LEN
|
|
else gname))
|
|
if pid > 0:
|
|
self._headline = 'kvm statistics - pid {0} {1}'.format(pid, gname)
|
|
else:
|
|
self._headline = 'kvm statistics - summary'
|
|
self.screen.addstr(0, 0, self._headline, curses.A_BOLD)
|
|
if self.stats.fields_filter:
|
|
regex = self.stats.fields_filter
|
|
if len(regex) > MAX_REGEX_LEN:
|
|
regex = regex[:MAX_REGEX_LEN] + '...'
|
|
self.screen.addstr(1, 17, 'regex filter: {0}'.format(regex))
|
|
if self._display_guests:
|
|
col_name = 'Guest Name'
|
|
else:
|
|
col_name = 'Event'
|
|
self.screen.addstr(2, 1, '%-40s %10s%7s %8s' %
|
|
(col_name, 'Total', '%Total', 'CurAvg/s'),
|
|
curses.A_STANDOUT)
|
|
self.screen.addstr(4, 1, 'Collecting data...')
|
|
self.screen.refresh()
|
|
|
|
def _refresh_body(self, sleeptime):
|
|
def insert_child(sorted_items, child, values, parent):
|
|
num = len(sorted_items)
|
|
for i in range(0, num):
|
|
# only add child if parent is present
|
|
if parent.startswith(sorted_items[i][0]):
|
|
sorted_items.insert(i + 1, (' ' + child, values))
|
|
|
|
def get_sorted_events(self, stats):
|
|
""" separate parent and child events """
|
|
if self._sorting == SORT_DEFAULT:
|
|
def sortkey(pair):
|
|
# sort by (delta value, overall value)
|
|
v = pair[1]
|
|
return (v.delta, v.value)
|
|
else:
|
|
def sortkey(pair):
|
|
# sort by overall value
|
|
v = pair[1]
|
|
return v.value
|
|
|
|
childs = []
|
|
sorted_items = []
|
|
# we can't rule out child events to appear prior to parents even
|
|
# when sorted - separate out all children first, and add in later
|
|
for key, values in sorted(stats.items(), key=sortkey,
|
|
reverse=True):
|
|
if values == (0, 0):
|
|
continue
|
|
if key.find(' ') != -1:
|
|
if not self.stats.child_events:
|
|
continue
|
|
childs.insert(0, (key, values))
|
|
else:
|
|
sorted_items.append((key, values))
|
|
if self.stats.child_events:
|
|
for key, values in childs:
|
|
(child, parent) = key.split(' ')
|
|
insert_child(sorted_items, child, values, parent)
|
|
|
|
return sorted_items
|
|
|
|
if not self._is_running_guest(self.stats.pid_filter):
|
|
if self._gname:
|
|
try: # ...to identify the guest by name in case it's back
|
|
pids = self.get_pid_from_gname(self._gname)
|
|
if len(pids) == 1:
|
|
self._refresh_header(pids[0])
|
|
self._update_pid(pids[0])
|
|
return
|
|
except:
|
|
pass
|
|
self._display_guest_dead()
|
|
# leave final data on screen
|
|
return
|
|
row = 3
|
|
self.screen.move(row, 0)
|
|
self.screen.clrtobot()
|
|
stats = self.stats.get(self._display_guests)
|
|
total = 0.
|
|
ctotal = 0.
|
|
for key, values in stats.items():
|
|
if self._display_guests:
|
|
if self.get_gname_from_pid(key):
|
|
total += values.value
|
|
continue
|
|
if not key.find(' ') != -1:
|
|
total += values.value
|
|
else:
|
|
ctotal += values.value
|
|
if total == 0.:
|
|
# we don't have any fields, or all non-child events are filtered
|
|
total = ctotal
|
|
|
|
# print events
|
|
tavg = 0
|
|
tcur = 0
|
|
guest_removed = False
|
|
for key, values in get_sorted_events(self, stats):
|
|
if row >= self.screen.getmaxyx()[0] - 1 or values == (0, 0):
|
|
break
|
|
if self._display_guests:
|
|
key = self.get_gname_from_pid(key)
|
|
if not key:
|
|
continue
|
|
cur = int(round(values.delta / sleeptime)) if values.delta else 0
|
|
if cur < 0:
|
|
guest_removed = True
|
|
continue
|
|
if key[0] != ' ':
|
|
if values.delta:
|
|
tcur += values.delta
|
|
ptotal = values.value
|
|
ltotal = total
|
|
else:
|
|
ltotal = ptotal
|
|
self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' % (key,
|
|
values.value,
|
|
values.value * 100 / float(ltotal), cur))
|
|
row += 1
|
|
if row == 3:
|
|
if guest_removed:
|
|
self.screen.addstr(4, 1, 'Guest removed, updating...')
|
|
else:
|
|
self.screen.addstr(4, 1, 'No matching events reported yet')
|
|
if row > 4:
|
|
tavg = int(round(tcur / sleeptime)) if tcur > 0 else ''
|
|
self.screen.addstr(row, 1, '%-40s %10d %8s' %
|
|
('Total', total, tavg), curses.A_BOLD)
|
|
self.screen.refresh()
|
|
|
|
def _display_guest_dead(self):
|
|
marker = ' Guest is DEAD '
|
|
y = min(len(self._headline), 80 - len(marker))
|
|
self.screen.addstr(0, y, marker, curses.A_BLINK | curses.A_STANDOUT)
|
|
|
|
def _show_msg(self, text):
|
|
"""Display message centered text and exit on key press"""
|
|
hint = 'Press any key to continue'
|
|
curses.cbreak()
|
|
self.screen.erase()
|
|
(x, term_width) = self.screen.getmaxyx()
|
|
row = 2
|
|
for line in text:
|
|
start = (term_width - len(line)) // 2
|
|
self.screen.addstr(row, start, line)
|
|
row += 1
|
|
self.screen.addstr(row + 1, (term_width - len(hint)) // 2, hint,
|
|
curses.A_STANDOUT)
|
|
self.screen.getkey()
|
|
|
|
def _show_help_interactive(self):
|
|
"""Display help with list of interactive commands"""
|
|
msg = (' b toggle events by guests (debugfs only, honors'
|
|
' filters)',
|
|
' c clear filter',
|
|
' f filter by regular expression',
|
|
' g filter by guest name/PID',
|
|
' h display interactive commands reference',
|
|
' o toggle sorting order (Total vs CurAvg/s)',
|
|
' p filter by guest name/PID',
|
|
' q quit',
|
|
' r reset stats',
|
|
' s set delay between refreshs (value range: '
|
|
'%s-%s secs)' % (MIN_DELAY, MAX_DELAY),
|
|
' x toggle reporting of stats for individual child trace'
|
|
' events',
|
|
'Any other key refreshes statistics immediately')
|
|
curses.cbreak()
|
|
self.screen.erase()
|
|
self.screen.addstr(0, 0, "Interactive commands reference",
|
|
curses.A_BOLD)
|
|
self.screen.addstr(2, 0, "Press any key to exit", curses.A_STANDOUT)
|
|
row = 4
|
|
for line in msg:
|
|
self.screen.addstr(row, 0, line)
|
|
row += 1
|
|
self.screen.getkey()
|
|
self._refresh_header()
|
|
|
|
def _show_filter_selection(self):
|
|
"""Draws filter selection mask.
|
|
|
|
Asks for a valid regex and sets the fields filter accordingly.
|
|
|
|
"""
|
|
msg = ''
|
|
while True:
|
|
self.screen.erase()
|
|
self.screen.addstr(0, 0,
|
|
"Show statistics for events matching a regex.",
|
|
curses.A_BOLD)
|
|
self.screen.addstr(2, 0,
|
|
"Current regex: {0}"
|
|
.format(self.stats.fields_filter))
|
|
self.screen.addstr(5, 0, msg)
|
|
self.screen.addstr(3, 0, "New regex: ")
|
|
curses.echo()
|
|
regex = self.screen.getstr().decode(ENCODING)
|
|
curses.noecho()
|
|
if len(regex) == 0:
|
|
self.stats.fields_filter = ''
|
|
self._refresh_header()
|
|
return
|
|
try:
|
|
re.compile(regex)
|
|
self.stats.fields_filter = regex
|
|
self._refresh_header()
|
|
return
|
|
except re.error:
|
|
msg = '"' + regex + '": Not a valid regular expression'
|
|
continue
|
|
|
|
def _show_set_update_interval(self):
|
|
"""Draws update interval selection mask."""
|
|
msg = ''
|
|
while True:
|
|
self.screen.erase()
|
|
self.screen.addstr(0, 0, 'Set update interval (defaults to %.1fs).'
|
|
% DELAY_DEFAULT, curses.A_BOLD)
|
|
self.screen.addstr(4, 0, msg)
|
|
self.screen.addstr(2, 0, 'Change delay from %.1fs to ' %
|
|
self._delay_regular)
|
|
curses.echo()
|
|
val = self.screen.getstr().decode(ENCODING)
|
|
curses.noecho()
|
|
|
|
try:
|
|
if len(val) > 0:
|
|
delay = float(val)
|
|
err = is_delay_valid(delay)
|
|
if err is not None:
|
|
msg = err
|
|
continue
|
|
else:
|
|
delay = DELAY_DEFAULT
|
|
self._delay_regular = delay
|
|
break
|
|
|
|
except ValueError:
|
|
msg = '"' + str(val) + '": Invalid value'
|
|
self._refresh_header()
|
|
|
|
def _is_running_guest(self, pid):
|
|
"""Check if pid is still a running process."""
|
|
if not pid:
|
|
return True
|
|
return os.path.isdir(os.path.join('/proc/', str(pid)))
|
|
|
|
def _show_vm_selection_by_guest(self):
|
|
"""Draws guest selection mask.
|
|
|
|
Asks for a guest name or pid until a valid guest name or '' is entered.
|
|
|
|
"""
|
|
msg = ''
|
|
while True:
|
|
self.screen.erase()
|
|
self.screen.addstr(0, 0,
|
|
'Show statistics for specific guest or pid.',
|
|
curses.A_BOLD)
|
|
self.screen.addstr(1, 0,
|
|
'This might limit the shown data to the trace '
|
|
'statistics.')
|
|
self.screen.addstr(5, 0, msg)
|
|
self._print_all_gnames(7)
|
|
curses.echo()
|
|
curses.curs_set(1)
|
|
self.screen.addstr(3, 0, "Guest or pid [ENTER exits]: ")
|
|
guest = self.screen.getstr().decode(ENCODING)
|
|
curses.noecho()
|
|
|
|
pid = 0
|
|
if not guest or guest == '0':
|
|
break
|
|
if guest.isdigit():
|
|
if not self._is_running_guest(guest):
|
|
msg = '"' + guest + '": Not a running process'
|
|
continue
|
|
pid = int(guest)
|
|
break
|
|
pids = []
|
|
try:
|
|
pids = self.get_pid_from_gname(guest)
|
|
except:
|
|
msg = '"' + guest + '": Internal error while searching, ' \
|
|
'use pid filter instead'
|
|
continue
|
|
if len(pids) == 0:
|
|
msg = '"' + guest + '": Not an active guest'
|
|
continue
|
|
if len(pids) > 1:
|
|
msg = '"' + guest + '": Multiple matches found, use pid ' \
|
|
'filter instead'
|
|
continue
|
|
pid = pids[0]
|
|
break
|
|
curses.curs_set(0)
|
|
self._refresh_header(pid)
|
|
self._update_pid(pid)
|
|
|
|
def show_stats(self):
|
|
"""Refreshes the screen and processes user input."""
|
|
sleeptime = self._delay_initial
|
|
self._refresh_header()
|
|
start = 0.0 # result based on init value never appears on screen
|
|
while True:
|
|
self._refresh_body(time.time() - start)
|
|
curses.halfdelay(int(sleeptime * 10))
|
|
start = time.time()
|
|
sleeptime = self._delay_regular
|
|
try:
|
|
char = self.screen.getkey()
|
|
if char == 'b':
|
|
self._display_guests = not self._display_guests
|
|
if self.stats.toggle_display_guests(self._display_guests):
|
|
self._show_msg(['Command not available with '
|
|
'tracepoints enabled', 'Restart with '
|
|
'debugfs only (see option \'-d\') and '
|
|
'try again!'])
|
|
self._display_guests = not self._display_guests
|
|
self._refresh_header()
|
|
if char == 'c':
|
|
self.stats.fields_filter = ''
|
|
self._refresh_header(0)
|
|
self._update_pid(0)
|
|
if char == 'f':
|
|
curses.curs_set(1)
|
|
self._show_filter_selection()
|
|
curses.curs_set(0)
|
|
sleeptime = self._delay_initial
|
|
if char == 'g' or char == 'p':
|
|
self._show_vm_selection_by_guest()
|
|
sleeptime = self._delay_initial
|
|
if char == 'h':
|
|
self._show_help_interactive()
|
|
if char == 'o':
|
|
self._sorting = not self._sorting
|
|
if char == 'q':
|
|
break
|
|
if char == 'r':
|
|
self.stats.reset()
|
|
if char == 's':
|
|
curses.curs_set(1)
|
|
self._show_set_update_interval()
|
|
curses.curs_set(0)
|
|
sleeptime = self._delay_initial
|
|
if char == 'x':
|
|
self.stats.child_events = not self.stats.child_events
|
|
except KeyboardInterrupt:
|
|
break
|
|
except curses.error:
|
|
continue
|
|
|
|
|
|
def batch(stats):
|
|
"""Prints statistics in a key, value format."""
|
|
try:
|
|
s = stats.get()
|
|
time.sleep(1)
|
|
s = stats.get()
|
|
for key, values in sorted(s.items()):
|
|
print('%-42s%10d%10d' % (key.split(' ')[0], values.value,
|
|
values.delta))
|
|
except KeyboardInterrupt:
|
|
pass
|
|
|
|
|
|
class StdFormat(object):
|
|
def __init__(self, keys):
|
|
self._banner = ''
|
|
for key in keys:
|
|
self._banner += key.split(' ')[0] + ' '
|
|
|
|
def get_banner(self):
|
|
return self._banner
|
|
|
|
def get_statline(self, keys, s):
|
|
res = ''
|
|
for key in keys:
|
|
res += ' %9d' % s[key].delta
|
|
return res
|
|
|
|
|
|
class CSVFormat(object):
|
|
def __init__(self, keys):
|
|
self._banner = 'timestamp'
|
|
self._banner += reduce(lambda res, key: "{},{!s}".format(res,
|
|
key.split(' ')[0]), keys, '')
|
|
|
|
def get_banner(self):
|
|
return self._banner
|
|
|
|
def get_statline(self, keys, s):
|
|
return reduce(lambda res, key: "{},{!s}".format(res, s[key].delta),
|
|
keys, '')
|
|
|
|
|
|
def log(stats, opts, frmt, keys):
|
|
"""Prints statistics as reiterating key block, multiple value blocks."""
|
|
global signal_received
|
|
line = 0
|
|
banner_repeat = 20
|
|
f = None
|
|
|
|
def do_banner(opts):
|
|
nonlocal f
|
|
if opts.log_to_file:
|
|
if not f:
|
|
try:
|
|
f = open(opts.log_to_file, 'a')
|
|
except (IOError, OSError):
|
|
sys.exit("Error: Could not open file: %s" %
|
|
opts.log_to_file)
|
|
if isinstance(frmt, CSVFormat) and f.tell() != 0:
|
|
return
|
|
print(frmt.get_banner(), file=f or sys.stdout)
|
|
|
|
def do_statline(opts, values):
|
|
statline = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + \
|
|
frmt.get_statline(keys, values)
|
|
print(statline, file=f or sys.stdout)
|
|
|
|
do_banner(opts)
|
|
banner_printed = True
|
|
while True:
|
|
try:
|
|
time.sleep(opts.set_delay)
|
|
if signal_received:
|
|
banner_printed = True
|
|
line = 0
|
|
f.close()
|
|
do_banner(opts)
|
|
signal_received = False
|
|
if (line % banner_repeat == 0 and not banner_printed and
|
|
not (opts.log_to_file and isinstance(frmt, CSVFormat))):
|
|
do_banner(opts)
|
|
banner_printed = True
|
|
values = stats.get()
|
|
if (not opts.skip_zero_records or
|
|
any(values[k].delta != 0 for k in keys)):
|
|
do_statline(opts, values)
|
|
line += 1
|
|
banner_printed = False
|
|
except KeyboardInterrupt:
|
|
break
|
|
|
|
if opts.log_to_file:
|
|
f.close()
|
|
|
|
|
|
def handle_signal(sig, frame):
|
|
global signal_received
|
|
|
|
signal_received = True
|
|
|
|
return
|
|
|
|
|
|
def is_delay_valid(delay):
|
|
"""Verify delay is in valid value range."""
|
|
msg = None
|
|
if delay < MIN_DELAY:
|
|
msg = '"' + str(delay) + '": Delay must be >=%s' % MIN_DELAY
|
|
if delay > MAX_DELAY:
|
|
msg = '"' + str(delay) + '": Delay must be <=%s' % MAX_DELAY
|
|
return msg
|
|
|
|
|
|
def get_options():
|
|
"""Returns processed program arguments."""
|
|
description_text = """
|
|
This script displays various statistics about VMs running under KVM.
|
|
The statistics are gathered from the KVM debugfs entries and / or the
|
|
currently available perf traces.
|
|
|
|
The monitoring takes additional cpu cycles and might affect the VM's
|
|
performance.
|
|
|
|
Requirements:
|
|
- Access to:
|
|
%s
|
|
%s/events/*
|
|
/proc/pid/task
|
|
- /proc/sys/kernel/perf_event_paranoid < 1 if user has no
|
|
CAP_SYS_ADMIN and perf events are used.
|
|
- CAP_SYS_RESOURCE if the hard limit is not high enough to allow
|
|
the large number of files that are possibly opened.
|
|
|
|
Interactive Commands:
|
|
b toggle events by guests (debugfs only, honors filters)
|
|
c clear filter
|
|
f filter by regular expression
|
|
g filter by guest name
|
|
h display interactive commands reference
|
|
o toggle sorting order (Total vs CurAvg/s)
|
|
p filter by PID
|
|
q quit
|
|
r reset stats
|
|
s set update interval (value range: 0.1-25.5 secs)
|
|
x toggle reporting of stats for individual child trace events
|
|
Press any other key to refresh statistics immediately.
|
|
""" % (PATH_DEBUGFS_KVM, PATH_DEBUGFS_TRACING)
|
|
|
|
class Guest_to_pid(argparse.Action):
|
|
def __call__(self, parser, namespace, values, option_string=None):
|
|
try:
|
|
pids = Tui.get_pid_from_gname(values)
|
|
except:
|
|
sys.exit('Error while searching for guest "{}". Use "-p" to '
|
|
'specify a pid instead?'.format(values))
|
|
if len(pids) == 0:
|
|
sys.exit('Error: No guest by the name "{}" found'
|
|
.format(values))
|
|
if len(pids) > 1:
|
|
sys.exit('Error: Multiple processes found (pids: {}). Use "-p"'
|
|
' to specify the desired pid'
|
|
.format(" ".join(map(str, pids))))
|
|
namespace.pid = pids[0]
|
|
|
|
argparser = argparse.ArgumentParser(description=description_text,
|
|
formatter_class=argparse
|
|
.RawTextHelpFormatter)
|
|
argparser.add_argument('-1', '--once', '--batch',
|
|
action='store_true',
|
|
default=False,
|
|
help='run in batch mode for one second',
|
|
)
|
|
argparser.add_argument('-c', '--csv',
|
|
action='store_true',
|
|
default=False,
|
|
help='log in csv format - requires option -l/-L',
|
|
)
|
|
argparser.add_argument('-d', '--debugfs',
|
|
action='store_true',
|
|
default=False,
|
|
help='retrieve statistics from debugfs',
|
|
)
|
|
argparser.add_argument('-f', '--fields',
|
|
default='',
|
|
help='''fields to display (regex)
|
|
"-f help" for a list of available events''',
|
|
)
|
|
argparser.add_argument('-g', '--guest',
|
|
type=str,
|
|
help='restrict statistics to guest by name',
|
|
action=Guest_to_pid,
|
|
)
|
|
argparser.add_argument('-i', '--debugfs-include-past',
|
|
action='store_true',
|
|
default=False,
|
|
help='include all available data on past events for'
|
|
' debugfs',
|
|
)
|
|
argparser.add_argument('-l', '--log',
|
|
action='store_true',
|
|
default=False,
|
|
help='run in logging mode (like vmstat)',
|
|
)
|
|
argparser.add_argument('-L', '--log-to-file',
|
|
type=str,
|
|
metavar='FILE',
|
|
help="like '--log', but logging to a file"
|
|
)
|
|
argparser.add_argument('-p', '--pid',
|
|
type=int,
|
|
default=0,
|
|
help='restrict statistics to pid',
|
|
)
|
|
argparser.add_argument('-s', '--set-delay',
|
|
type=float,
|
|
default=DELAY_DEFAULT,
|
|
metavar='DELAY',
|
|
help='set delay between refreshs (value range: '
|
|
'%s-%s secs)' % (MIN_DELAY, MAX_DELAY),
|
|
)
|
|
argparser.add_argument('-t', '--tracepoints',
|
|
action='store_true',
|
|
default=False,
|
|
help='retrieve statistics from tracepoints',
|
|
)
|
|
argparser.add_argument('-z', '--skip-zero-records',
|
|
action='store_true',
|
|
default=False,
|
|
help='omit records with all zeros in logging mode',
|
|
)
|
|
options = argparser.parse_args()
|
|
if options.csv and not (options.log or options.log_to_file):
|
|
sys.exit('Error: Option -c/--csv requires -l/--log')
|
|
if options.skip_zero_records and not (options.log or options.log_to_file):
|
|
sys.exit('Error: Option -z/--skip-zero-records requires -l/-L')
|
|
try:
|
|
# verify that we were passed a valid regex up front
|
|
re.compile(options.fields)
|
|
except re.error:
|
|
sys.exit('Error: "' + options.fields + '" is not a valid regular '
|
|
'expression')
|
|
|
|
return options
|
|
|
|
|
|
def check_access(options):
|
|
"""Exits if the current user can't access all needed directories."""
|
|
if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints or
|
|
not options.debugfs):
|
|
sys.stderr.write("Please enable CONFIG_TRACING in your kernel "
|
|
"when using the option -t (default).\n"
|
|
"If it is enabled, make {0} readable by the "
|
|
"current user.\n"
|
|
.format(PATH_DEBUGFS_TRACING))
|
|
if options.tracepoints:
|
|
sys.exit(1)
|
|
|
|
sys.stderr.write("Falling back to debugfs statistics!\n")
|
|
options.debugfs = True
|
|
time.sleep(5)
|
|
|
|
return options
|
|
|
|
|
|
def assign_globals():
|
|
global PATH_DEBUGFS_KVM
|
|
global PATH_DEBUGFS_TRACING
|
|
|
|
debugfs = ''
|
|
for line in open('/proc/mounts'):
|
|
if line.split(' ')[2] == 'debugfs':
|
|
debugfs = line.split(' ')[1]
|
|
break
|
|
if debugfs == '':
|
|
sys.stderr.write("Please make sure that CONFIG_DEBUG_FS is enabled in "
|
|
"your kernel, mounted and\nreadable by the current "
|
|
"user:\n"
|
|
"('mount -t debugfs debugfs /sys/kernel/debug')\n")
|
|
sys.exit(1)
|
|
|
|
PATH_DEBUGFS_KVM = os.path.join(debugfs, 'kvm')
|
|
PATH_DEBUGFS_TRACING = os.path.join(debugfs, 'tracing')
|
|
|
|
if not os.path.exists(PATH_DEBUGFS_KVM):
|
|
sys.stderr.write("Please make sure that CONFIG_KVM is enabled in "
|
|
"your kernel and that the modules are loaded.\n")
|
|
sys.exit(1)
|
|
|
|
|
|
def main():
|
|
assign_globals()
|
|
options = get_options()
|
|
options = check_access(options)
|
|
|
|
if (options.pid > 0 and
|
|
not os.path.isdir(os.path.join('/proc/',
|
|
str(options.pid)))):
|
|
sys.stderr.write('Did you use a (unsupported) tid instead of a pid?\n')
|
|
sys.exit('Specified pid does not exist.')
|
|
|
|
err = is_delay_valid(options.set_delay)
|
|
if err is not None:
|
|
sys.exit('Error: ' + err)
|
|
|
|
stats = Stats(options)
|
|
|
|
if options.fields == 'help':
|
|
stats.fields_filter = None
|
|
event_list = []
|
|
for key in stats.get().keys():
|
|
event_list.append(key.split('(', 1)[0])
|
|
sys.stdout.write(' ' + '\n '.join(sorted(set(event_list))) + '\n')
|
|
sys.exit(0)
|
|
|
|
if options.log or options.log_to_file:
|
|
if options.log_to_file:
|
|
signal.signal(signal.SIGHUP, handle_signal)
|
|
keys = sorted(stats.get().keys())
|
|
if options.csv:
|
|
frmt = CSVFormat(keys)
|
|
else:
|
|
frmt = StdFormat(keys)
|
|
log(stats, options, frmt, keys)
|
|
elif not options.once:
|
|
with Tui(stats, options) as tui:
|
|
tui.show_stats()
|
|
else:
|
|
batch(stats)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|