s390:
- optimization for the exitless interrupt support that was merged in 4.16-rc1 - improve the branch prediction blocking for nested KVM - replace some jump tables with switch statements to improve expoline performance - fixes for multiple epoch facility ARM: - fix the interaction of userspace irqchip VMs with in-kernel irqchip VMs - make sure we can build 32-bit KVM/ARM with gcc-8. x86: - fixes for AMD SEV - fixes for Intel nested VMX, emulated UMIP and a dump_stack() on VM startup - fixes for async page fault migration - small optimization to PV TLB flush (new in 4.16-rc1) - syzkaller fixes Generic: - compiler warning fixes - syzkaller fixes - more improvements to the kvm_stat tool Two more small Spectre fixes are going to reach you via Ingo. -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.22 (GNU/Linux) iQEbBAABAgAGBQJakL/fAAoJEL/70l94x66Dzp4H9j6qMzgOTAQ0bYmupQp81tad V8lNabVSNi0UBYwk2D44oNigtNjQckE18KGnjuJ4tZW+GZ+D7zrrHrKXWtATXgxP SIfHj+raSd/lgJoy6HLu/N0oT6wS+PdZMYFgSu600Vi618lGKGX1SIAwBhjoxdMX 7QKKAuPcDZ1qgGddhWaLnof28nQQEWcCAVfFeVojmM0TyhvSbgSysh/Gq10ydybh NVUfgP3fzLtT9gVngX/ZtbogNkltPYmucpI+wT3nWfsgBic783klfWrfpnC/GM85 OeXLVhHwVLG6tXUGhb4ULO+F9HwRGX31+er6iIxmwH9PvqnQMRcQ0Xxf2gbNXg== =YmH6 -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull KVM fixes from Paolo Bonzini: "s390: - optimization for the exitless interrupt support that was merged in 4.16-rc1 - improve the branch prediction blocking for nested KVM - replace some jump tables with switch statements to improve expoline performance - fixes for multiple epoch facility ARM: - fix the interaction of userspace irqchip VMs with in-kernel irqchip VMs - make sure we can build 32-bit KVM/ARM with gcc-8. x86: - fixes for AMD SEV - fixes for Intel nested VMX, emulated UMIP and a dump_stack() on VM startup - fixes for async page fault migration - small optimization to PV TLB flush (new in 4.16-rc1) - syzkaller fixes Generic: - compiler warning fixes - syzkaller fixes - more improvements to the kvm_stat tool Two more small Spectre fixes are going to reach you via Ingo" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (40 commits) KVM: SVM: Fix SEV LAUNCH_SECRET command KVM: SVM: install RSM intercept KVM: SVM: no need to call access_ok() in LAUNCH_MEASURE command include: psp-sev: Capitalize invalid length enum crypto: ccp: Fix sparse, use plain integer as NULL pointer KVM: X86: Avoid traversing all the cpus for pv tlb flush when steal time is disabled x86/kvm: Make parse_no_xxx __init for kvm KVM: x86: fix backward migration with async_PF kvm: fix warning for non-x86 builds kvm: fix warning for CONFIG_HAVE_KVM_EVENTFD builds tools/kvm_stat: print 'Total' line for multiple events only tools/kvm_stat: group child events indented after parent tools/kvm_stat: separate drilldown and fields filtering tools/kvm_stat: eliminate extra guest/pid selection dialog tools/kvm_stat: mark private methods as such tools/kvm_stat: fix debugfs handling tools/kvm_stat: print error on invalid regex tools/kvm_stat: fix crash when filtering out all non-child trace events tools/kvm_stat: avoid 'is' for equality checks tools/kvm_stat: use a more pythonic way to iterate over dictionaries ...
This commit is contained in:
commit
d4858aaf6b
@ -58,6 +58,10 @@ KVM_FEATURE_PV_TLB_FLUSH || 9 || guest checks this feature bit
|
||||
|| || before enabling paravirtualized
|
||||
|| || tlb flush.
|
||||
------------------------------------------------------------------------------
|
||||
KVM_FEATURE_ASYNC_PF_VMEXIT || 10 || paravirtualized async PF VM exit
|
||||
|| || can be enabled by setting bit 2
|
||||
|| || when writing to msr 0x4b564d02
|
||||
------------------------------------------------------------------------------
|
||||
KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side
|
||||
|| || per-cpu warps are expected in
|
||||
|| || kvmclock.
|
||||
|
@ -170,7 +170,8 @@ MSR_KVM_ASYNC_PF_EN: 0x4b564d02
|
||||
when asynchronous page faults are enabled on the vcpu 0 when
|
||||
disabled. Bit 1 is 1 if asynchronous page faults can be injected
|
||||
when vcpu is in cpl == 0. Bit 2 is 1 if asynchronous page faults
|
||||
are delivered to L1 as #PF vmexits.
|
||||
are delivered to L1 as #PF vmexits. Bit 2 can be set only if
|
||||
KVM_FEATURE_ASYNC_PF_VMEXIT is present in CPUID.
|
||||
|
||||
First 4 byte of 64 byte memory location will be written to by
|
||||
the hypervisor at the time of asynchronous page fault (APF)
|
||||
|
@ -7,6 +7,8 @@ ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING
|
||||
|
||||
KVM=../../../../virt/kvm
|
||||
|
||||
CFLAGS_ARMV7VE :=$(call cc-option, -march=armv7ve)
|
||||
|
||||
obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o
|
||||
obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o
|
||||
obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o
|
||||
@ -15,7 +17,10 @@ obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
|
||||
obj-$(CONFIG_KVM_ARM_HOST) += cp15-sr.o
|
||||
obj-$(CONFIG_KVM_ARM_HOST) += vfp.o
|
||||
obj-$(CONFIG_KVM_ARM_HOST) += banked-sr.o
|
||||
CFLAGS_banked-sr.o += $(CFLAGS_ARMV7VE)
|
||||
|
||||
obj-$(CONFIG_KVM_ARM_HOST) += entry.o
|
||||
obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
|
||||
obj-$(CONFIG_KVM_ARM_HOST) += switch.o
|
||||
CFLAGS_switch.o += $(CFLAGS_ARMV7VE)
|
||||
obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o
|
||||
|
@ -20,6 +20,10 @@
|
||||
|
||||
#include <asm/kvm_hyp.h>
|
||||
|
||||
/*
|
||||
* gcc before 4.9 doesn't understand -march=armv7ve, so we have to
|
||||
* trick the assembler.
|
||||
*/
|
||||
__asm__(".arch_extension virt");
|
||||
|
||||
void __hyp_text __banked_save_state(struct kvm_cpu_context *ctxt)
|
||||
|
@ -22,22 +22,6 @@
|
||||
#include "trace.h"
|
||||
#include "trace-s390.h"
|
||||
|
||||
|
||||
static const intercept_handler_t instruction_handlers[256] = {
|
||||
[0x01] = kvm_s390_handle_01,
|
||||
[0x82] = kvm_s390_handle_lpsw,
|
||||
[0x83] = kvm_s390_handle_diag,
|
||||
[0xaa] = kvm_s390_handle_aa,
|
||||
[0xae] = kvm_s390_handle_sigp,
|
||||
[0xb2] = kvm_s390_handle_b2,
|
||||
[0xb6] = kvm_s390_handle_stctl,
|
||||
[0xb7] = kvm_s390_handle_lctl,
|
||||
[0xb9] = kvm_s390_handle_b9,
|
||||
[0xe3] = kvm_s390_handle_e3,
|
||||
[0xe5] = kvm_s390_handle_e5,
|
||||
[0xeb] = kvm_s390_handle_eb,
|
||||
};
|
||||
|
||||
u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
|
||||
@ -129,16 +113,39 @@ static int handle_validity(struct kvm_vcpu *vcpu)
|
||||
|
||||
static int handle_instruction(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
intercept_handler_t handler;
|
||||
|
||||
vcpu->stat.exit_instruction++;
|
||||
trace_kvm_s390_intercept_instruction(vcpu,
|
||||
vcpu->arch.sie_block->ipa,
|
||||
vcpu->arch.sie_block->ipb);
|
||||
handler = instruction_handlers[vcpu->arch.sie_block->ipa >> 8];
|
||||
if (handler)
|
||||
return handler(vcpu);
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
switch (vcpu->arch.sie_block->ipa >> 8) {
|
||||
case 0x01:
|
||||
return kvm_s390_handle_01(vcpu);
|
||||
case 0x82:
|
||||
return kvm_s390_handle_lpsw(vcpu);
|
||||
case 0x83:
|
||||
return kvm_s390_handle_diag(vcpu);
|
||||
case 0xaa:
|
||||
return kvm_s390_handle_aa(vcpu);
|
||||
case 0xae:
|
||||
return kvm_s390_handle_sigp(vcpu);
|
||||
case 0xb2:
|
||||
return kvm_s390_handle_b2(vcpu);
|
||||
case 0xb6:
|
||||
return kvm_s390_handle_stctl(vcpu);
|
||||
case 0xb7:
|
||||
return kvm_s390_handle_lctl(vcpu);
|
||||
case 0xb9:
|
||||
return kvm_s390_handle_b9(vcpu);
|
||||
case 0xe3:
|
||||
return kvm_s390_handle_e3(vcpu);
|
||||
case 0xe5:
|
||||
return kvm_s390_handle_e5(vcpu);
|
||||
case 0xeb:
|
||||
return kvm_s390_handle_eb(vcpu);
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
|
||||
static int inject_prog_on_prog_intercept(struct kvm_vcpu *vcpu)
|
||||
|
@ -169,8 +169,15 @@ static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
|
||||
|
||||
static int ckc_irq_pending(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (vcpu->arch.sie_block->ckc >= kvm_s390_get_tod_clock_fast(vcpu->kvm))
|
||||
const u64 now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
|
||||
const u64 ckc = vcpu->arch.sie_block->ckc;
|
||||
|
||||
if (vcpu->arch.sie_block->gcr[0] & 0x0020000000000000ul) {
|
||||
if ((s64)ckc >= (s64)now)
|
||||
return 0;
|
||||
} else if (ckc >= now) {
|
||||
return 0;
|
||||
}
|
||||
return ckc_interrupts_enabled(vcpu);
|
||||
}
|
||||
|
||||
@ -187,12 +194,6 @@ static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu)
|
||||
return kvm_s390_get_cpu_timer(vcpu) >> 63;
|
||||
}
|
||||
|
||||
static inline int is_ioirq(unsigned long irq_type)
|
||||
{
|
||||
return ((irq_type >= IRQ_PEND_IO_ISC_7) &&
|
||||
(irq_type <= IRQ_PEND_IO_ISC_0));
|
||||
}
|
||||
|
||||
static uint64_t isc_to_isc_bits(int isc)
|
||||
{
|
||||
return (0x80 >> isc) << 24;
|
||||
@ -236,10 +237,15 @@ static inline int kvm_s390_gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gis
|
||||
return test_and_clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
|
||||
}
|
||||
|
||||
static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu)
|
||||
static inline unsigned long pending_irqs_no_gisa(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->kvm->arch.float_int.pending_irqs |
|
||||
vcpu->arch.local_int.pending_irqs |
|
||||
vcpu->arch.local_int.pending_irqs;
|
||||
}
|
||||
|
||||
static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return pending_irqs_no_gisa(vcpu) |
|
||||
kvm_s390_gisa_get_ipm(vcpu->kvm->arch.gisa) << IRQ_PEND_IO_ISC_7;
|
||||
}
|
||||
|
||||
@ -337,7 +343,7 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
|
||||
|
||||
static void set_intercept_indicators_io(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!(pending_irqs(vcpu) & IRQ_PEND_IO_MASK))
|
||||
if (!(pending_irqs_no_gisa(vcpu) & IRQ_PEND_IO_MASK))
|
||||
return;
|
||||
else if (psw_ioint_disabled(vcpu))
|
||||
kvm_s390_set_cpuflags(vcpu, CPUSTAT_IO_INT);
|
||||
@ -1011,24 +1017,6 @@ out:
|
||||
return rc;
|
||||
}
|
||||
|
||||
typedef int (*deliver_irq_t)(struct kvm_vcpu *vcpu);
|
||||
|
||||
static const deliver_irq_t deliver_irq_funcs[] = {
|
||||
[IRQ_PEND_MCHK_EX] = __deliver_machine_check,
|
||||
[IRQ_PEND_MCHK_REP] = __deliver_machine_check,
|
||||
[IRQ_PEND_PROG] = __deliver_prog,
|
||||
[IRQ_PEND_EXT_EMERGENCY] = __deliver_emergency_signal,
|
||||
[IRQ_PEND_EXT_EXTERNAL] = __deliver_external_call,
|
||||
[IRQ_PEND_EXT_CLOCK_COMP] = __deliver_ckc,
|
||||
[IRQ_PEND_EXT_CPU_TIMER] = __deliver_cpu_timer,
|
||||
[IRQ_PEND_RESTART] = __deliver_restart,
|
||||
[IRQ_PEND_SET_PREFIX] = __deliver_set_prefix,
|
||||
[IRQ_PEND_PFAULT_INIT] = __deliver_pfault_init,
|
||||
[IRQ_PEND_EXT_SERVICE] = __deliver_service,
|
||||
[IRQ_PEND_PFAULT_DONE] = __deliver_pfault_done,
|
||||
[IRQ_PEND_VIRTIO] = __deliver_virtio,
|
||||
};
|
||||
|
||||
/* Check whether an external call is pending (deliverable or not) */
|
||||
int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
@ -1066,13 +1054,19 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
|
||||
|
||||
static u64 __calculate_sltime(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 now, cputm, sltime = 0;
|
||||
const u64 now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
|
||||
const u64 ckc = vcpu->arch.sie_block->ckc;
|
||||
u64 cputm, sltime = 0;
|
||||
|
||||
if (ckc_interrupts_enabled(vcpu)) {
|
||||
now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
|
||||
sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
|
||||
/* already expired or overflow? */
|
||||
if (!sltime || vcpu->arch.sie_block->ckc <= now)
|
||||
if (vcpu->arch.sie_block->gcr[0] & 0x0020000000000000ul) {
|
||||
if ((s64)now < (s64)ckc)
|
||||
sltime = tod_to_ns((s64)ckc - (s64)now);
|
||||
} else if (now < ckc) {
|
||||
sltime = tod_to_ns(ckc - now);
|
||||
}
|
||||
/* already expired */
|
||||
if (!sltime)
|
||||
return 0;
|
||||
if (cpu_timer_interrupts_enabled(vcpu)) {
|
||||
cputm = kvm_s390_get_cpu_timer(vcpu);
|
||||
@ -1192,7 +1186,6 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
|
||||
int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
|
||||
deliver_irq_t func;
|
||||
int rc = 0;
|
||||
unsigned long irq_type;
|
||||
unsigned long irqs;
|
||||
@ -1212,16 +1205,57 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
|
||||
while ((irqs = deliverable_irqs(vcpu)) && !rc) {
|
||||
/* bits are in the reverse order of interrupt priority */
|
||||
irq_type = find_last_bit(&irqs, IRQ_PEND_COUNT);
|
||||
if (is_ioirq(irq_type)) {
|
||||
switch (irq_type) {
|
||||
case IRQ_PEND_IO_ISC_0:
|
||||
case IRQ_PEND_IO_ISC_1:
|
||||
case IRQ_PEND_IO_ISC_2:
|
||||
case IRQ_PEND_IO_ISC_3:
|
||||
case IRQ_PEND_IO_ISC_4:
|
||||
case IRQ_PEND_IO_ISC_5:
|
||||
case IRQ_PEND_IO_ISC_6:
|
||||
case IRQ_PEND_IO_ISC_7:
|
||||
rc = __deliver_io(vcpu, irq_type);
|
||||
} else {
|
||||
func = deliver_irq_funcs[irq_type];
|
||||
if (!func) {
|
||||
WARN_ON_ONCE(func == NULL);
|
||||
clear_bit(irq_type, &li->pending_irqs);
|
||||
continue;
|
||||
}
|
||||
rc = func(vcpu);
|
||||
break;
|
||||
case IRQ_PEND_MCHK_EX:
|
||||
case IRQ_PEND_MCHK_REP:
|
||||
rc = __deliver_machine_check(vcpu);
|
||||
break;
|
||||
case IRQ_PEND_PROG:
|
||||
rc = __deliver_prog(vcpu);
|
||||
break;
|
||||
case IRQ_PEND_EXT_EMERGENCY:
|
||||
rc = __deliver_emergency_signal(vcpu);
|
||||
break;
|
||||
case IRQ_PEND_EXT_EXTERNAL:
|
||||
rc = __deliver_external_call(vcpu);
|
||||
break;
|
||||
case IRQ_PEND_EXT_CLOCK_COMP:
|
||||
rc = __deliver_ckc(vcpu);
|
||||
break;
|
||||
case IRQ_PEND_EXT_CPU_TIMER:
|
||||
rc = __deliver_cpu_timer(vcpu);
|
||||
break;
|
||||
case IRQ_PEND_RESTART:
|
||||
rc = __deliver_restart(vcpu);
|
||||
break;
|
||||
case IRQ_PEND_SET_PREFIX:
|
||||
rc = __deliver_set_prefix(vcpu);
|
||||
break;
|
||||
case IRQ_PEND_PFAULT_INIT:
|
||||
rc = __deliver_pfault_init(vcpu);
|
||||
break;
|
||||
case IRQ_PEND_EXT_SERVICE:
|
||||
rc = __deliver_service(vcpu);
|
||||
break;
|
||||
case IRQ_PEND_PFAULT_DONE:
|
||||
rc = __deliver_pfault_done(vcpu);
|
||||
break;
|
||||
case IRQ_PEND_VIRTIO:
|
||||
rc = __deliver_virtio(vcpu);
|
||||
break;
|
||||
default:
|
||||
WARN_ONCE(1, "Unknown pending irq type %ld", irq_type);
|
||||
clear_bit(irq_type, &li->pending_irqs);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1701,7 +1735,8 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type)
|
||||
kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_STOP_INT);
|
||||
break;
|
||||
case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
|
||||
kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT);
|
||||
if (!(type & KVM_S390_INT_IO_AI_MASK && kvm->arch.gisa))
|
||||
kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT);
|
||||
break;
|
||||
default:
|
||||
kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_EXT_INT);
|
||||
|
@ -179,6 +179,28 @@ int kvm_arch_hardware_enable(void)
|
||||
static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
|
||||
unsigned long end);
|
||||
|
||||
static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
|
||||
{
|
||||
u8 delta_idx = 0;
|
||||
|
||||
/*
|
||||
* The TOD jumps by delta, we have to compensate this by adding
|
||||
* -delta to the epoch.
|
||||
*/
|
||||
delta = -delta;
|
||||
|
||||
/* sign-extension - we're adding to signed values below */
|
||||
if ((s64)delta < 0)
|
||||
delta_idx = -1;
|
||||
|
||||
scb->epoch += delta;
|
||||
if (scb->ecd & ECD_MEF) {
|
||||
scb->epdx += delta_idx;
|
||||
if (scb->epoch < delta)
|
||||
scb->epdx += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This callback is executed during stop_machine(). All CPUs are therefore
|
||||
* temporarily stopped. In order not to change guest behavior, we have to
|
||||
@ -194,13 +216,17 @@ static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
|
||||
unsigned long long *delta = v;
|
||||
|
||||
list_for_each_entry(kvm, &vm_list, vm_list) {
|
||||
kvm->arch.epoch -= *delta;
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
vcpu->arch.sie_block->epoch -= *delta;
|
||||
kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
|
||||
if (i == 0) {
|
||||
kvm->arch.epoch = vcpu->arch.sie_block->epoch;
|
||||
kvm->arch.epdx = vcpu->arch.sie_block->epdx;
|
||||
}
|
||||
if (vcpu->arch.cputm_enabled)
|
||||
vcpu->arch.cputm_start += *delta;
|
||||
if (vcpu->arch.vsie_block)
|
||||
vcpu->arch.vsie_block->epoch -= *delta;
|
||||
kvm_clock_sync_scb(vcpu->arch.vsie_block,
|
||||
*delta);
|
||||
}
|
||||
}
|
||||
return NOTIFY_OK;
|
||||
@ -902,12 +928,9 @@ static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
|
||||
return -EFAULT;
|
||||
|
||||
if (test_kvm_facility(kvm, 139))
|
||||
kvm_s390_set_tod_clock_ext(kvm, >od);
|
||||
else if (gtod.epoch_idx == 0)
|
||||
kvm_s390_set_tod_clock(kvm, gtod.tod);
|
||||
else
|
||||
if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
|
||||
return -EINVAL;
|
||||
kvm_s390_set_tod_clock(kvm, >od);
|
||||
|
||||
VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
|
||||
gtod.epoch_idx, gtod.tod);
|
||||
@ -932,13 +955,14 @@ static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
|
||||
static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
u64 gtod;
|
||||
struct kvm_s390_vm_tod_clock gtod = { 0 };
|
||||
|
||||
if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
|
||||
if (copy_from_user(>od.tod, (void __user *)attr->addr,
|
||||
sizeof(gtod.tod)))
|
||||
return -EFAULT;
|
||||
|
||||
kvm_s390_set_tod_clock(kvm, gtod);
|
||||
VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
|
||||
kvm_s390_set_tod_clock(kvm, >od);
|
||||
VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -2389,6 +2413,7 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
|
||||
mutex_lock(&vcpu->kvm->lock);
|
||||
preempt_disable();
|
||||
vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
|
||||
vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
|
||||
preempt_enable();
|
||||
mutex_unlock(&vcpu->kvm->lock);
|
||||
if (!kvm_is_ucontrol(vcpu->kvm)) {
|
||||
@ -3021,8 +3046,8 @@ retry:
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
|
||||
const struct kvm_s390_vm_tod_clock *gtod)
|
||||
void kvm_s390_set_tod_clock(struct kvm *kvm,
|
||||
const struct kvm_s390_vm_tod_clock *gtod)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
struct kvm_s390_tod_clock_ext htod;
|
||||
@ -3034,10 +3059,12 @@ void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
|
||||
get_tod_clock_ext((char *)&htod);
|
||||
|
||||
kvm->arch.epoch = gtod->tod - htod.tod;
|
||||
kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
|
||||
|
||||
if (kvm->arch.epoch > gtod->tod)
|
||||
kvm->arch.epdx -= 1;
|
||||
kvm->arch.epdx = 0;
|
||||
if (test_kvm_facility(kvm, 139)) {
|
||||
kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
|
||||
if (kvm->arch.epoch > gtod->tod)
|
||||
kvm->arch.epdx -= 1;
|
||||
}
|
||||
|
||||
kvm_s390_vcpu_block_all(kvm);
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
@ -3050,22 +3077,6 @@ void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
|
||||
mutex_unlock(&kvm->lock);
|
||||
}
|
||||
|
||||
void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
int i;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
preempt_disable();
|
||||
kvm->arch.epoch = tod - get_tod_clock();
|
||||
kvm_s390_vcpu_block_all(kvm);
|
||||
kvm_for_each_vcpu(i, vcpu, kvm)
|
||||
vcpu->arch.sie_block->epoch = kvm->arch.epoch;
|
||||
kvm_s390_vcpu_unblock_all(kvm);
|
||||
preempt_enable();
|
||||
mutex_unlock(&kvm->lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_arch_fault_in_page - fault-in guest page if necessary
|
||||
* @vcpu: The corresponding virtual cpu
|
||||
|
@ -19,8 +19,6 @@
|
||||
#include <asm/processor.h>
|
||||
#include <asm/sclp.h>
|
||||
|
||||
typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
|
||||
|
||||
/* Transactional Memory Execution related macros */
|
||||
#define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & ECB_TE))
|
||||
#define TDB_FORMAT1 1
|
||||
@ -283,9 +281,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
|
||||
int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
|
||||
|
||||
/* implemented in kvm-s390.c */
|
||||
void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
|
||||
const struct kvm_s390_vm_tod_clock *gtod);
|
||||
void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod);
|
||||
void kvm_s390_set_tod_clock(struct kvm *kvm,
|
||||
const struct kvm_s390_vm_tod_clock *gtod);
|
||||
long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
|
||||
int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
|
||||
int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
|
||||
|
@ -85,9 +85,10 @@ int kvm_s390_handle_e3(struct kvm_vcpu *vcpu)
|
||||
/* Handle SCK (SET CLOCK) interception */
|
||||
static int handle_set_clock(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_s390_vm_tod_clock gtod = { 0 };
|
||||
int rc;
|
||||
u8 ar;
|
||||
u64 op2, val;
|
||||
u64 op2;
|
||||
|
||||
vcpu->stat.instruction_sck++;
|
||||
|
||||
@ -97,12 +98,12 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
|
||||
op2 = kvm_s390_get_base_disp_s(vcpu, &ar);
|
||||
if (op2 & 7) /* Operand must be on a doubleword boundary */
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
rc = read_guest(vcpu, op2, ar, &val, sizeof(val));
|
||||
rc = read_guest(vcpu, op2, ar, >od.tod, sizeof(gtod.tod));
|
||||
if (rc)
|
||||
return kvm_s390_inject_prog_cond(vcpu, rc);
|
||||
|
||||
VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", val);
|
||||
kvm_s390_set_tod_clock(vcpu->kvm, val);
|
||||
VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", gtod.tod);
|
||||
kvm_s390_set_tod_clock(vcpu->kvm, >od);
|
||||
|
||||
kvm_s390_set_psw_cc(vcpu, 0);
|
||||
return 0;
|
||||
@ -795,55 +796,60 @@ out:
|
||||
return rc;
|
||||
}
|
||||
|
||||
static const intercept_handler_t b2_handlers[256] = {
|
||||
[0x02] = handle_stidp,
|
||||
[0x04] = handle_set_clock,
|
||||
[0x10] = handle_set_prefix,
|
||||
[0x11] = handle_store_prefix,
|
||||
[0x12] = handle_store_cpu_address,
|
||||
[0x14] = kvm_s390_handle_vsie,
|
||||
[0x21] = handle_ipte_interlock,
|
||||
[0x29] = handle_iske,
|
||||
[0x2a] = handle_rrbe,
|
||||
[0x2b] = handle_sske,
|
||||
[0x2c] = handle_test_block,
|
||||
[0x30] = handle_io_inst,
|
||||
[0x31] = handle_io_inst,
|
||||
[0x32] = handle_io_inst,
|
||||
[0x33] = handle_io_inst,
|
||||
[0x34] = handle_io_inst,
|
||||
[0x35] = handle_io_inst,
|
||||
[0x36] = handle_io_inst,
|
||||
[0x37] = handle_io_inst,
|
||||
[0x38] = handle_io_inst,
|
||||
[0x39] = handle_io_inst,
|
||||
[0x3a] = handle_io_inst,
|
||||
[0x3b] = handle_io_inst,
|
||||
[0x3c] = handle_io_inst,
|
||||
[0x50] = handle_ipte_interlock,
|
||||
[0x56] = handle_sthyi,
|
||||
[0x5f] = handle_io_inst,
|
||||
[0x74] = handle_io_inst,
|
||||
[0x76] = handle_io_inst,
|
||||
[0x7d] = handle_stsi,
|
||||
[0xb1] = handle_stfl,
|
||||
[0xb2] = handle_lpswe,
|
||||
};
|
||||
|
||||
int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
intercept_handler_t handler;
|
||||
|
||||
/*
|
||||
* A lot of B2 instructions are priviledged. Here we check for
|
||||
* the privileged ones, that we can handle in the kernel.
|
||||
* Anything else goes to userspace.
|
||||
*/
|
||||
handler = b2_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
|
||||
if (handler)
|
||||
return handler(vcpu);
|
||||
|
||||
return -EOPNOTSUPP;
|
||||
switch (vcpu->arch.sie_block->ipa & 0x00ff) {
|
||||
case 0x02:
|
||||
return handle_stidp(vcpu);
|
||||
case 0x04:
|
||||
return handle_set_clock(vcpu);
|
||||
case 0x10:
|
||||
return handle_set_prefix(vcpu);
|
||||
case 0x11:
|
||||
return handle_store_prefix(vcpu);
|
||||
case 0x12:
|
||||
return handle_store_cpu_address(vcpu);
|
||||
case 0x14:
|
||||
return kvm_s390_handle_vsie(vcpu);
|
||||
case 0x21:
|
||||
case 0x50:
|
||||
return handle_ipte_interlock(vcpu);
|
||||
case 0x29:
|
||||
return handle_iske(vcpu);
|
||||
case 0x2a:
|
||||
return handle_rrbe(vcpu);
|
||||
case 0x2b:
|
||||
return handle_sske(vcpu);
|
||||
case 0x2c:
|
||||
return handle_test_block(vcpu);
|
||||
case 0x30:
|
||||
case 0x31:
|
||||
case 0x32:
|
||||
case 0x33:
|
||||
case 0x34:
|
||||
case 0x35:
|
||||
case 0x36:
|
||||
case 0x37:
|
||||
case 0x38:
|
||||
case 0x39:
|
||||
case 0x3a:
|
||||
case 0x3b:
|
||||
case 0x3c:
|
||||
case 0x5f:
|
||||
case 0x74:
|
||||
case 0x76:
|
||||
return handle_io_inst(vcpu);
|
||||
case 0x56:
|
||||
return handle_sthyi(vcpu);
|
||||
case 0x7d:
|
||||
return handle_stsi(vcpu);
|
||||
case 0xb1:
|
||||
return handle_stfl(vcpu);
|
||||
case 0xb2:
|
||||
return handle_lpswe(vcpu);
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
|
||||
static int handle_epsw(struct kvm_vcpu *vcpu)
|
||||
@ -1105,25 +1111,22 @@ static int handle_essa(struct kvm_vcpu *vcpu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const intercept_handler_t b9_handlers[256] = {
|
||||
[0x8a] = handle_ipte_interlock,
|
||||
[0x8d] = handle_epsw,
|
||||
[0x8e] = handle_ipte_interlock,
|
||||
[0x8f] = handle_ipte_interlock,
|
||||
[0xab] = handle_essa,
|
||||
[0xaf] = handle_pfmf,
|
||||
};
|
||||
|
||||
int kvm_s390_handle_b9(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
intercept_handler_t handler;
|
||||
|
||||
/* This is handled just as for the B2 instructions. */
|
||||
handler = b9_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
|
||||
if (handler)
|
||||
return handler(vcpu);
|
||||
|
||||
return -EOPNOTSUPP;
|
||||
switch (vcpu->arch.sie_block->ipa & 0x00ff) {
|
||||
case 0x8a:
|
||||
case 0x8e:
|
||||
case 0x8f:
|
||||
return handle_ipte_interlock(vcpu);
|
||||
case 0x8d:
|
||||
return handle_epsw(vcpu);
|
||||
case 0xab:
|
||||
return handle_essa(vcpu);
|
||||
case 0xaf:
|
||||
return handle_pfmf(vcpu);
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
|
||||
int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
|
||||
@ -1271,22 +1274,20 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
|
||||
return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
|
||||
}
|
||||
|
||||
static const intercept_handler_t eb_handlers[256] = {
|
||||
[0x2f] = handle_lctlg,
|
||||
[0x25] = handle_stctg,
|
||||
[0x60] = handle_ri,
|
||||
[0x61] = handle_ri,
|
||||
[0x62] = handle_ri,
|
||||
};
|
||||
|
||||
int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
intercept_handler_t handler;
|
||||
|
||||
handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff];
|
||||
if (handler)
|
||||
return handler(vcpu);
|
||||
return -EOPNOTSUPP;
|
||||
switch (vcpu->arch.sie_block->ipb & 0x000000ff) {
|
||||
case 0x25:
|
||||
return handle_stctg(vcpu);
|
||||
case 0x2f:
|
||||
return handle_lctlg(vcpu);
|
||||
case 0x60:
|
||||
case 0x61:
|
||||
case 0x62:
|
||||
return handle_ri(vcpu);
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
|
||||
static int handle_tprot(struct kvm_vcpu *vcpu)
|
||||
@ -1346,10 +1347,12 @@ out_unlock:
|
||||
|
||||
int kvm_s390_handle_e5(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* For e5xx... instructions we only handle TPROT */
|
||||
if ((vcpu->arch.sie_block->ipa & 0x00ff) == 0x01)
|
||||
switch (vcpu->arch.sie_block->ipa & 0x00ff) {
|
||||
case 0x01:
|
||||
return handle_tprot(vcpu);
|
||||
return -EOPNOTSUPP;
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
|
||||
static int handle_sckpf(struct kvm_vcpu *vcpu)
|
||||
@ -1380,17 +1383,14 @@ static int handle_ptff(struct kvm_vcpu *vcpu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const intercept_handler_t x01_handlers[256] = {
|
||||
[0x04] = handle_ptff,
|
||||
[0x07] = handle_sckpf,
|
||||
};
|
||||
|
||||
int kvm_s390_handle_01(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
intercept_handler_t handler;
|
||||
|
||||
handler = x01_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
|
||||
if (handler)
|
||||
return handler(vcpu);
|
||||
return -EOPNOTSUPP;
|
||||
switch (vcpu->arch.sie_block->ipa & 0x00ff) {
|
||||
case 0x04:
|
||||
return handle_ptff(vcpu);
|
||||
case 0x07:
|
||||
return handle_sckpf(vcpu);
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
|
@ -821,6 +821,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
||||
{
|
||||
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
|
||||
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
|
||||
int guest_bp_isolation;
|
||||
int rc;
|
||||
|
||||
handle_last_fault(vcpu, vsie_page);
|
||||
@ -831,6 +832,20 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
||||
s390_handle_mcck();
|
||||
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
|
||||
|
||||
/* save current guest state of bp isolation override */
|
||||
guest_bp_isolation = test_thread_flag(TIF_ISOLATE_BP_GUEST);
|
||||
|
||||
/*
|
||||
* The guest is running with BPBC, so we have to force it on for our
|
||||
* nested guest. This is done by enabling BPBC globally, so the BPBC
|
||||
* control in the SCB (which the nested guest can modify) is simply
|
||||
* ignored.
|
||||
*/
|
||||
if (test_kvm_facility(vcpu->kvm, 82) &&
|
||||
vcpu->arch.sie_block->fpf & FPF_BPBC)
|
||||
set_thread_flag(TIF_ISOLATE_BP_GUEST);
|
||||
|
||||
local_irq_disable();
|
||||
guest_enter_irqoff();
|
||||
local_irq_enable();
|
||||
@ -840,6 +855,11 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
||||
local_irq_disable();
|
||||
guest_exit_irqoff();
|
||||
local_irq_enable();
|
||||
|
||||
/* restore guest state for bp isolation override */
|
||||
if (!guest_bp_isolation)
|
||||
clear_thread_flag(TIF_ISOLATE_BP_GUEST);
|
||||
|
||||
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
|
||||
if (rc == -EINTR) {
|
||||
|
@ -1464,7 +1464,4 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
|
||||
#define put_smstate(type, buf, offset, val) \
|
||||
*(type *)((buf) + (offset) - 0x7e00) = val
|
||||
|
||||
void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
|
||||
unsigned long start, unsigned long end);
|
||||
|
||||
#endif /* _ASM_X86_KVM_HOST_H */
|
||||
|
@ -26,6 +26,7 @@
|
||||
#define KVM_FEATURE_PV_EOI 6
|
||||
#define KVM_FEATURE_PV_UNHALT 7
|
||||
#define KVM_FEATURE_PV_TLB_FLUSH 9
|
||||
#define KVM_FEATURE_ASYNC_PF_VMEXIT 10
|
||||
|
||||
/* The last 8 bits are used to indicate how to interpret the flags field
|
||||
* in pvclock structure. If no bits are set, all flags are ignored.
|
||||
|
@ -49,7 +49,7 @@
|
||||
|
||||
static int kvmapf = 1;
|
||||
|
||||
static int parse_no_kvmapf(char *arg)
|
||||
static int __init parse_no_kvmapf(char *arg)
|
||||
{
|
||||
kvmapf = 0;
|
||||
return 0;
|
||||
@ -58,7 +58,7 @@ static int parse_no_kvmapf(char *arg)
|
||||
early_param("no-kvmapf", parse_no_kvmapf);
|
||||
|
||||
static int steal_acc = 1;
|
||||
static int parse_no_stealacc(char *arg)
|
||||
static int __init parse_no_stealacc(char *arg)
|
||||
{
|
||||
steal_acc = 0;
|
||||
return 0;
|
||||
@ -67,7 +67,7 @@ static int parse_no_stealacc(char *arg)
|
||||
early_param("no-steal-acc", parse_no_stealacc);
|
||||
|
||||
static int kvmclock_vsyscall = 1;
|
||||
static int parse_no_kvmclock_vsyscall(char *arg)
|
||||
static int __init parse_no_kvmclock_vsyscall(char *arg)
|
||||
{
|
||||
kvmclock_vsyscall = 0;
|
||||
return 0;
|
||||
@ -341,10 +341,10 @@ static void kvm_guest_cpu_init(void)
|
||||
#endif
|
||||
pa |= KVM_ASYNC_PF_ENABLED;
|
||||
|
||||
/* Async page fault support for L1 hypervisor is optional */
|
||||
if (wrmsr_safe(MSR_KVM_ASYNC_PF_EN,
|
||||
(pa | KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT) & 0xffffffff, pa >> 32) < 0)
|
||||
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
|
||||
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT))
|
||||
pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
|
||||
|
||||
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
|
||||
__this_cpu_write(apf_reason.enabled, 1);
|
||||
printk(KERN_INFO"KVM setup async PF for cpu %d\n",
|
||||
smp_processor_id());
|
||||
@ -545,7 +545,8 @@ static void __init kvm_guest_init(void)
|
||||
pv_time_ops.steal_clock = kvm_steal_clock;
|
||||
}
|
||||
|
||||
if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH))
|
||||
if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
|
||||
!kvm_para_has_feature(KVM_FEATURE_STEAL_TIME))
|
||||
pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others;
|
||||
|
||||
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
|
||||
@ -633,7 +634,8 @@ static __init int kvm_setup_pv_tlb_flush(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) {
|
||||
if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
|
||||
!kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
|
||||
for_each_possible_cpu(cpu) {
|
||||
zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu),
|
||||
GFP_KERNEL, cpu_to_node(cpu));
|
||||
|
@ -607,7 +607,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
||||
(1 << KVM_FEATURE_PV_EOI) |
|
||||
(1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
|
||||
(1 << KVM_FEATURE_PV_UNHALT) |
|
||||
(1 << KVM_FEATURE_PV_TLB_FLUSH);
|
||||
(1 << KVM_FEATURE_PV_TLB_FLUSH) |
|
||||
(1 << KVM_FEATURE_ASYNC_PF_VMEXIT);
|
||||
|
||||
if (sched_info_on())
|
||||
entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
|
||||
|
@ -2165,7 +2165,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
|
||||
static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
|
||||
kvm_lapic_reset(vcpu, false);
|
||||
kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
|
||||
|
||||
return 0;
|
||||
|
@ -3029,7 +3029,7 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
|
||||
return RET_PF_RETRY;
|
||||
}
|
||||
|
||||
return -EFAULT;
|
||||
return RET_PF_EMULATE;
|
||||
}
|
||||
|
||||
static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
|
||||
|
@ -300,6 +300,8 @@ module_param(vgif, int, 0444);
|
||||
static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
|
||||
module_param(sev, int, 0444);
|
||||
|
||||
static u8 rsm_ins_bytes[] = "\x0f\xaa";
|
||||
|
||||
static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
|
||||
static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);
|
||||
static void svm_complete_interrupts(struct vcpu_svm *svm);
|
||||
@ -1383,6 +1385,7 @@ static void init_vmcb(struct vcpu_svm *svm)
|
||||
set_intercept(svm, INTERCEPT_SKINIT);
|
||||
set_intercept(svm, INTERCEPT_WBINVD);
|
||||
set_intercept(svm, INTERCEPT_XSETBV);
|
||||
set_intercept(svm, INTERCEPT_RSM);
|
||||
|
||||
if (!kvm_mwait_in_guest()) {
|
||||
set_intercept(svm, INTERCEPT_MONITOR);
|
||||
@ -3699,6 +3702,12 @@ static int emulate_on_interception(struct vcpu_svm *svm)
|
||||
return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
|
||||
}
|
||||
|
||||
static int rsm_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
return x86_emulate_instruction(&svm->vcpu, 0, 0,
|
||||
rsm_ins_bytes, 2) == EMULATE_DONE;
|
||||
}
|
||||
|
||||
static int rdpmc_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
int err;
|
||||
@ -4541,7 +4550,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
|
||||
[SVM_EXIT_MWAIT] = mwait_interception,
|
||||
[SVM_EXIT_XSETBV] = xsetbv_interception,
|
||||
[SVM_EXIT_NPF] = npf_interception,
|
||||
[SVM_EXIT_RSM] = emulate_on_interception,
|
||||
[SVM_EXIT_RSM] = rsm_interception,
|
||||
[SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception,
|
||||
[SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception,
|
||||
};
|
||||
@ -6236,16 +6245,18 @@ e_free:
|
||||
|
||||
static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
||||
{
|
||||
void __user *measure = (void __user *)(uintptr_t)argp->data;
|
||||
struct kvm_sev_info *sev = &kvm->arch.sev_info;
|
||||
struct sev_data_launch_measure *data;
|
||||
struct kvm_sev_launch_measure params;
|
||||
void __user *p = NULL;
|
||||
void *blob = NULL;
|
||||
int ret;
|
||||
|
||||
if (!sev_guest(kvm))
|
||||
return -ENOTTY;
|
||||
|
||||
if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params)))
|
||||
if (copy_from_user(¶ms, measure, sizeof(params)))
|
||||
return -EFAULT;
|
||||
|
||||
data = kzalloc(sizeof(*data), GFP_KERNEL);
|
||||
@ -6256,17 +6267,13 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
||||
if (!params.len)
|
||||
goto cmd;
|
||||
|
||||
if (params.uaddr) {
|
||||
p = (void __user *)(uintptr_t)params.uaddr;
|
||||
if (p) {
|
||||
if (params.len > SEV_FW_BLOB_MAX_SIZE) {
|
||||
ret = -EINVAL;
|
||||
goto e_free;
|
||||
}
|
||||
|
||||
if (!access_ok(VERIFY_WRITE, params.uaddr, params.len)) {
|
||||
ret = -EFAULT;
|
||||
goto e_free;
|
||||
}
|
||||
|
||||
ret = -ENOMEM;
|
||||
blob = kmalloc(params.len, GFP_KERNEL);
|
||||
if (!blob)
|
||||
@ -6290,13 +6297,13 @@ cmd:
|
||||
goto e_free_blob;
|
||||
|
||||
if (blob) {
|
||||
if (copy_to_user((void __user *)(uintptr_t)params.uaddr, blob, params.len))
|
||||
if (copy_to_user(p, blob, params.len))
|
||||
ret = -EFAULT;
|
||||
}
|
||||
|
||||
done:
|
||||
params.len = data->len;
|
||||
if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params)))
|
||||
if (copy_to_user(measure, ¶ms, sizeof(params)))
|
||||
ret = -EFAULT;
|
||||
e_free_blob:
|
||||
kfree(blob);
|
||||
@ -6597,7 +6604,7 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
||||
struct page **pages;
|
||||
void *blob, *hdr;
|
||||
unsigned long n;
|
||||
int ret;
|
||||
int ret, offset;
|
||||
|
||||
if (!sev_guest(kvm))
|
||||
return -ENOTTY;
|
||||
@ -6623,6 +6630,10 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
||||
if (!data)
|
||||
goto e_unpin_memory;
|
||||
|
||||
offset = params.guest_uaddr & (PAGE_SIZE - 1);
|
||||
data->guest_address = __sme_page_pa(pages[0]) + offset;
|
||||
data->guest_len = params.guest_len;
|
||||
|
||||
blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
|
||||
if (IS_ERR(blob)) {
|
||||
ret = PTR_ERR(blob);
|
||||
@ -6637,8 +6648,8 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
||||
ret = PTR_ERR(hdr);
|
||||
goto e_free_blob;
|
||||
}
|
||||
data->trans_address = __psp_pa(blob);
|
||||
data->trans_len = params.trans_len;
|
||||
data->hdr_address = __psp_pa(hdr);
|
||||
data->hdr_len = params.hdr_len;
|
||||
|
||||
data->handle = sev->handle;
|
||||
ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error);
|
||||
|
@ -4485,7 +4485,8 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
|
||||
vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
|
||||
SECONDARY_EXEC_DESC);
|
||||
hw_cr4 &= ~X86_CR4_UMIP;
|
||||
} else
|
||||
} else if (!is_guest_mode(vcpu) ||
|
||||
!nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC))
|
||||
vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
|
||||
SECONDARY_EXEC_DESC);
|
||||
|
||||
@ -11199,7 +11200,12 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
|
||||
/*
|
||||
* If we're entering a halted L2 vcpu and the L2 vcpu won't be woken
|
||||
* by event injection, halt vcpu.
|
||||
*/
|
||||
if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) &&
|
||||
!(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK))
|
||||
return kvm_vcpu_halt(vcpu);
|
||||
|
||||
vmx->nested.nested_run_pending = 1;
|
||||
|
@ -7975,6 +7975,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
|
||||
kvm_vcpu_mtrr_init(vcpu);
|
||||
vcpu_load(vcpu);
|
||||
kvm_vcpu_reset(vcpu, false);
|
||||
kvm_lapic_reset(vcpu, false);
|
||||
kvm_mmu_setup(vcpu);
|
||||
vcpu_put(vcpu);
|
||||
return 0;
|
||||
@ -8460,10 +8461,8 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
|
||||
return r;
|
||||
}
|
||||
|
||||
if (!size) {
|
||||
r = vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
|
||||
WARN_ON(r < 0);
|
||||
}
|
||||
if (!size)
|
||||
vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -211,7 +211,7 @@ static int __sev_platform_shutdown_locked(int *error)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, 0, error);
|
||||
ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -271,7 +271,7 @@ static int sev_ioctl_do_reset(struct sev_issue_cmd *argp)
|
||||
return rc;
|
||||
}
|
||||
|
||||
return __sev_do_cmd_locked(SEV_CMD_FACTORY_RESET, 0, &argp->error);
|
||||
return __sev_do_cmd_locked(SEV_CMD_FACTORY_RESET, NULL, &argp->error);
|
||||
}
|
||||
|
||||
static int sev_ioctl_do_platform_status(struct sev_issue_cmd *argp)
|
||||
@ -299,7 +299,7 @@ static int sev_ioctl_do_pek_pdh_gen(int cmd, struct sev_issue_cmd *argp)
|
||||
return rc;
|
||||
}
|
||||
|
||||
return __sev_do_cmd_locked(cmd, 0, &argp->error);
|
||||
return __sev_do_cmd_locked(cmd, NULL, &argp->error);
|
||||
}
|
||||
|
||||
static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp)
|
||||
@ -624,7 +624,7 @@ EXPORT_SYMBOL_GPL(sev_guest_decommission);
|
||||
|
||||
int sev_guest_df_flush(int *error)
|
||||
{
|
||||
return sev_do_cmd(SEV_CMD_DF_FLUSH, 0, error);
|
||||
return sev_do_cmd(SEV_CMD_DF_FLUSH, NULL, error);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sev_guest_df_flush);
|
||||
|
||||
|
@ -1105,7 +1105,6 @@ static inline void kvm_irq_routing_update(struct kvm *kvm)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
void kvm_arch_irq_routing_update(struct kvm *kvm);
|
||||
|
||||
static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
|
||||
{
|
||||
@ -1114,6 +1113,8 @@ static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
|
||||
|
||||
#endif /* CONFIG_HAVE_KVM_EVENTFD */
|
||||
|
||||
void kvm_arch_irq_routing_update(struct kvm *kvm);
|
||||
|
||||
static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
@ -1272,4 +1273,7 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp,
|
||||
}
|
||||
#endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */
|
||||
|
||||
void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
|
||||
unsigned long start, unsigned long end);
|
||||
|
||||
#endif
|
||||
|
@ -42,7 +42,7 @@ typedef enum {
|
||||
SEV_RET_INVALID_PLATFORM_STATE,
|
||||
SEV_RET_INVALID_GUEST_STATE,
|
||||
SEV_RET_INAVLID_CONFIG,
|
||||
SEV_RET_INVALID_len,
|
||||
SEV_RET_INVALID_LEN,
|
||||
SEV_RET_ALREADY_OWNED,
|
||||
SEV_RET_INVALID_CERTIFICATE,
|
||||
SEV_RET_POLICY_FAILURE,
|
||||
|
@ -33,7 +33,7 @@ import resource
|
||||
import struct
|
||||
import re
|
||||
import subprocess
|
||||
from collections import defaultdict
|
||||
from collections import defaultdict, namedtuple
|
||||
|
||||
VMX_EXIT_REASONS = {
|
||||
'EXCEPTION_NMI': 0,
|
||||
@ -228,6 +228,7 @@ IOCTL_NUMBERS = {
|
||||
}
|
||||
|
||||
ENCODING = locale.getpreferredencoding(False)
|
||||
TRACE_FILTER = re.compile(r'^[^\(]*$')
|
||||
|
||||
|
||||
class Arch(object):
|
||||
@ -260,6 +261,11 @@ class Arch(object):
|
||||
return ArchX86(SVM_EXIT_REASONS)
|
||||
return
|
||||
|
||||
def tracepoint_is_child(self, field):
|
||||
if (TRACE_FILTER.match(field)):
|
||||
return None
|
||||
return field.split('(', 1)[0]
|
||||
|
||||
|
||||
class ArchX86(Arch):
|
||||
def __init__(self, exit_reasons):
|
||||
@ -267,6 +273,10 @@ class ArchX86(Arch):
|
||||
self.ioctl_numbers = IOCTL_NUMBERS
|
||||
self.exit_reasons = exit_reasons
|
||||
|
||||
def debugfs_is_child(self, field):
|
||||
""" Returns name of parent if 'field' is a child, None otherwise """
|
||||
return None
|
||||
|
||||
|
||||
class ArchPPC(Arch):
|
||||
def __init__(self):
|
||||
@ -282,6 +292,10 @@ class ArchPPC(Arch):
|
||||
self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16
|
||||
self.exit_reasons = {}
|
||||
|
||||
def debugfs_is_child(self, field):
|
||||
""" Returns name of parent if 'field' is a child, None otherwise """
|
||||
return None
|
||||
|
||||
|
||||
class ArchA64(Arch):
|
||||
def __init__(self):
|
||||
@ -289,6 +303,10 @@ class ArchA64(Arch):
|
||||
self.ioctl_numbers = IOCTL_NUMBERS
|
||||
self.exit_reasons = AARCH64_EXIT_REASONS
|
||||
|
||||
def debugfs_is_child(self, field):
|
||||
""" Returns name of parent if 'field' is a child, None otherwise """
|
||||
return None
|
||||
|
||||
|
||||
class ArchS390(Arch):
|
||||
def __init__(self):
|
||||
@ -296,6 +314,12 @@ class ArchS390(Arch):
|
||||
self.ioctl_numbers = IOCTL_NUMBERS
|
||||
self.exit_reasons = None
|
||||
|
||||
def debugfs_is_child(self, field):
|
||||
""" Returns name of parent if 'field' is a child, None otherwise """
|
||||
if field.startswith('instruction_'):
|
||||
return 'exit_instruction'
|
||||
|
||||
|
||||
ARCH = Arch.get_arch()
|
||||
|
||||
|
||||
@ -331,9 +355,6 @@ class perf_event_attr(ctypes.Structure):
|
||||
PERF_TYPE_TRACEPOINT = 2
|
||||
PERF_FORMAT_GROUP = 1 << 3
|
||||
|
||||
PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing'
|
||||
PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm'
|
||||
|
||||
|
||||
class Group(object):
|
||||
"""Represents a perf event group."""
|
||||
@ -376,8 +397,8 @@ class Event(object):
|
||||
self.syscall = self.libc.syscall
|
||||
self.name = name
|
||||
self.fd = None
|
||||
self.setup_event(group, trace_cpu, trace_pid, trace_point,
|
||||
trace_filter, trace_set)
|
||||
self._setup_event(group, trace_cpu, trace_pid, trace_point,
|
||||
trace_filter, trace_set)
|
||||
|
||||
def __del__(self):
|
||||
"""Closes the event's file descriptor.
|
||||
@ -390,7 +411,7 @@ class Event(object):
|
||||
if self.fd:
|
||||
os.close(self.fd)
|
||||
|
||||
def perf_event_open(self, attr, pid, cpu, group_fd, flags):
|
||||
def _perf_event_open(self, attr, pid, cpu, group_fd, flags):
|
||||
"""Wrapper for the sys_perf_evt_open() syscall.
|
||||
|
||||
Used to set up performance events, returns a file descriptor or -1
|
||||
@ -409,7 +430,7 @@ class Event(object):
|
||||
ctypes.c_int(pid), ctypes.c_int(cpu),
|
||||
ctypes.c_int(group_fd), ctypes.c_long(flags))
|
||||
|
||||
def setup_event_attribute(self, trace_set, trace_point):
|
||||
def _setup_event_attribute(self, trace_set, trace_point):
|
||||
"""Returns an initialized ctype perf_event_attr struct."""
|
||||
|
||||
id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set,
|
||||
@ -419,8 +440,8 @@ class Event(object):
|
||||
event_attr.config = int(open(id_path).read())
|
||||
return event_attr
|
||||
|
||||
def setup_event(self, group, trace_cpu, trace_pid, trace_point,
|
||||
trace_filter, trace_set):
|
||||
def _setup_event(self, group, trace_cpu, trace_pid, trace_point,
|
||||
trace_filter, trace_set):
|
||||
"""Sets up the perf event in Linux.
|
||||
|
||||
Issues the syscall to register the event in the kernel and
|
||||
@ -428,7 +449,7 @@ class Event(object):
|
||||
|
||||
"""
|
||||
|
||||
event_attr = self.setup_event_attribute(trace_set, trace_point)
|
||||
event_attr = self._setup_event_attribute(trace_set, trace_point)
|
||||
|
||||
# First event will be group leader.
|
||||
group_leader = -1
|
||||
@ -437,8 +458,8 @@ class Event(object):
|
||||
if group.events:
|
||||
group_leader = group.events[0].fd
|
||||
|
||||
fd = self.perf_event_open(event_attr, trace_pid,
|
||||
trace_cpu, group_leader, 0)
|
||||
fd = self._perf_event_open(event_attr, trace_pid,
|
||||
trace_cpu, group_leader, 0)
|
||||
if fd == -1:
|
||||
err = ctypes.get_errno()
|
||||
raise OSError(err, os.strerror(err),
|
||||
@ -475,6 +496,10 @@ class Event(object):
|
||||
|
||||
class Provider(object):
|
||||
"""Encapsulates functionalities used by all providers."""
|
||||
def __init__(self, pid):
|
||||
self.child_events = False
|
||||
self.pid = pid
|
||||
|
||||
@staticmethod
|
||||
def is_field_wanted(fields_filter, field):
|
||||
"""Indicate whether field is valid according to fields_filter."""
|
||||
@ -500,12 +525,12 @@ class TracepointProvider(Provider):
|
||||
"""
|
||||
def __init__(self, pid, fields_filter):
|
||||
self.group_leaders = []
|
||||
self.filters = self.get_filters()
|
||||
self.filters = self._get_filters()
|
||||
self.update_fields(fields_filter)
|
||||
self.pid = pid
|
||||
super(TracepointProvider, self).__init__(pid)
|
||||
|
||||
@staticmethod
|
||||
def get_filters():
|
||||
def _get_filters():
|
||||
"""Returns a dict of trace events, their filter ids and
|
||||
the values that can be filtered.
|
||||
|
||||
@ -521,8 +546,8 @@ class TracepointProvider(Provider):
|
||||
filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons)
|
||||
return filters
|
||||
|
||||
def get_available_fields(self):
|
||||
"""Returns a list of available event's of format 'event name(filter
|
||||
def _get_available_fields(self):
|
||||
"""Returns a list of available events of format 'event name(filter
|
||||
name)'.
|
||||
|
||||
All available events have directories under
|
||||
@ -549,11 +574,12 @@ class TracepointProvider(Provider):
|
||||
|
||||
def update_fields(self, fields_filter):
|
||||
"""Refresh fields, applying fields_filter"""
|
||||
self.fields = [field for field in self.get_available_fields()
|
||||
if self.is_field_wanted(fields_filter, field)]
|
||||
self.fields = [field for field in self._get_available_fields()
|
||||
if self.is_field_wanted(fields_filter, field) or
|
||||
ARCH.tracepoint_is_child(field)]
|
||||
|
||||
@staticmethod
|
||||
def get_online_cpus():
|
||||
def _get_online_cpus():
|
||||
"""Returns a list of cpu id integers."""
|
||||
def parse_int_list(list_string):
|
||||
"""Returns an int list from a string of comma separated integers and
|
||||
@ -575,17 +601,17 @@ class TracepointProvider(Provider):
|
||||
cpu_string = cpu_list.readline()
|
||||
return parse_int_list(cpu_string)
|
||||
|
||||
def setup_traces(self):
|
||||
def _setup_traces(self):
|
||||
"""Creates all event and group objects needed to be able to retrieve
|
||||
data."""
|
||||
fields = self.get_available_fields()
|
||||
fields = self._get_available_fields()
|
||||
if self._pid > 0:
|
||||
# Fetch list of all threads of the monitored pid, as qemu
|
||||
# starts a thread for each vcpu.
|
||||
path = os.path.join('/proc', str(self._pid), 'task')
|
||||
groupids = self.walkdir(path)[1]
|
||||
else:
|
||||
groupids = self.get_online_cpus()
|
||||
groupids = self._get_online_cpus()
|
||||
|
||||
# The constant is needed as a buffer for python libs, std
|
||||
# streams and other files that the script opens.
|
||||
@ -663,7 +689,7 @@ class TracepointProvider(Provider):
|
||||
# The garbage collector will get rid of all Event/Group
|
||||
# objects and open files after removing the references.
|
||||
self.group_leaders = []
|
||||
self.setup_traces()
|
||||
self._setup_traces()
|
||||
self.fields = self._fields
|
||||
|
||||
def read(self, by_guest=0):
|
||||
@ -671,8 +697,12 @@ class TracepointProvider(Provider):
|
||||
ret = defaultdict(int)
|
||||
for group in self.group_leaders:
|
||||
for name, val in group.read().items():
|
||||
if name in self._fields:
|
||||
ret[name] += val
|
||||
if name not in self._fields:
|
||||
continue
|
||||
parent = ARCH.tracepoint_is_child(name)
|
||||
if parent:
|
||||
name += ' ' + parent
|
||||
ret[name] += val
|
||||
return ret
|
||||
|
||||
def reset(self):
|
||||
@ -690,11 +720,11 @@ class DebugfsProvider(Provider):
|
||||
self._baseline = {}
|
||||
self.do_read = True
|
||||
self.paths = []
|
||||
self.pid = pid
|
||||
super(DebugfsProvider, self).__init__(pid)
|
||||
if include_past:
|
||||
self.restore()
|
||||
self._restore()
|
||||
|
||||
def get_available_fields(self):
|
||||
def _get_available_fields(self):
|
||||
""""Returns a list of available fields.
|
||||
|
||||
The fields are all available KVM debugfs files
|
||||
@ -704,8 +734,9 @@ class DebugfsProvider(Provider):
|
||||
|
||||
def update_fields(self, fields_filter):
|
||||
"""Refresh fields, applying fields_filter"""
|
||||
self._fields = [field for field in self.get_available_fields()
|
||||
if self.is_field_wanted(fields_filter, field)]
|
||||
self._fields = [field for field in self._get_available_fields()
|
||||
if self.is_field_wanted(fields_filter, field) or
|
||||
ARCH.debugfs_is_child(field)]
|
||||
|
||||
@property
|
||||
def fields(self):
|
||||
@ -758,7 +789,7 @@ class DebugfsProvider(Provider):
|
||||
paths.append(dir)
|
||||
for path in paths:
|
||||
for field in self._fields:
|
||||
value = self.read_field(field, path)
|
||||
value = self._read_field(field, path)
|
||||
key = path + field
|
||||
if reset == 1:
|
||||
self._baseline[key] = value
|
||||
@ -766,20 +797,21 @@ class DebugfsProvider(Provider):
|
||||
self._baseline[key] = 0
|
||||
if self._baseline.get(key, -1) == -1:
|
||||
self._baseline[key] = value
|
||||
increment = (results.get(field, 0) + value -
|
||||
self._baseline.get(key, 0))
|
||||
if by_guest:
|
||||
pid = key.split('-')[0]
|
||||
if pid in results:
|
||||
results[pid] += increment
|
||||
else:
|
||||
results[pid] = increment
|
||||
parent = ARCH.debugfs_is_child(field)
|
||||
if parent:
|
||||
field = field + ' ' + parent
|
||||
else:
|
||||
if by_guest:
|
||||
field = key.split('-')[0] # set 'field' to 'pid'
|
||||
increment = value - self._baseline.get(key, 0)
|
||||
if field in results:
|
||||
results[field] += increment
|
||||
else:
|
||||
results[field] = increment
|
||||
|
||||
return results
|
||||
|
||||
def read_field(self, field, path):
|
||||
def _read_field(self, field, path):
|
||||
"""Returns the value of a single field from a specific VM."""
|
||||
try:
|
||||
return int(open(os.path.join(PATH_DEBUGFS_KVM,
|
||||
@ -794,12 +826,15 @@ class DebugfsProvider(Provider):
|
||||
self._baseline = {}
|
||||
self.read(1)
|
||||
|
||||
def restore(self):
|
||||
def _restore(self):
|
||||
"""Reset field counters"""
|
||||
self._baseline = {}
|
||||
self.read(2)
|
||||
|
||||
|
||||
EventStat = namedtuple('EventStat', ['value', 'delta'])
|
||||
|
||||
|
||||
class Stats(object):
|
||||
"""Manages the data providers and the data they provide.
|
||||
|
||||
@ -808,13 +843,13 @@ class Stats(object):
|
||||
|
||||
"""
|
||||
def __init__(self, options):
|
||||
self.providers = self.get_providers(options)
|
||||
self.providers = self._get_providers(options)
|
||||
self._pid_filter = options.pid
|
||||
self._fields_filter = options.fields
|
||||
self.values = {}
|
||||
self._child_events = False
|
||||
|
||||
@staticmethod
|
||||
def get_providers(options):
|
||||
def _get_providers(self, options):
|
||||
"""Returns a list of data providers depending on the passed options."""
|
||||
providers = []
|
||||
|
||||
@ -826,7 +861,7 @@ class Stats(object):
|
||||
|
||||
return providers
|
||||
|
||||
def update_provider_filters(self):
|
||||
def _update_provider_filters(self):
|
||||
"""Propagates fields filters to providers."""
|
||||
# As we reset the counters when updating the fields we can
|
||||
# also clear the cache of old values.
|
||||
@ -847,7 +882,7 @@ class Stats(object):
|
||||
def fields_filter(self, fields_filter):
|
||||
if fields_filter != self._fields_filter:
|
||||
self._fields_filter = fields_filter
|
||||
self.update_provider_filters()
|
||||
self._update_provider_filters()
|
||||
|
||||
@property
|
||||
def pid_filter(self):
|
||||
@ -861,16 +896,33 @@ class Stats(object):
|
||||
for provider in self.providers:
|
||||
provider.pid = self._pid_filter
|
||||
|
||||
@property
|
||||
def child_events(self):
|
||||
return self._child_events
|
||||
|
||||
@child_events.setter
|
||||
def child_events(self, val):
|
||||
self._child_events = val
|
||||
for provider in self.providers:
|
||||
provider.child_events = val
|
||||
|
||||
def get(self, by_guest=0):
|
||||
"""Returns a dict with field -> (value, delta to last value) of all
|
||||
provider data."""
|
||||
provider data.
|
||||
Key formats:
|
||||
* plain: 'key' is event name
|
||||
* child-parent: 'key' is in format '<child> <parent>'
|
||||
* pid: 'key' is the pid of the guest, and the record contains the
|
||||
aggregated event data
|
||||
These formats are generated by the providers, and handled in class TUI.
|
||||
"""
|
||||
for provider in self.providers:
|
||||
new = provider.read(by_guest=by_guest)
|
||||
for key in new if by_guest else provider.fields:
|
||||
oldval = self.values.get(key, (0, 0))[0]
|
||||
for key in new:
|
||||
oldval = self.values.get(key, EventStat(0, 0)).value
|
||||
newval = new.get(key, 0)
|
||||
newdelta = newval - oldval
|
||||
self.values[key] = (newval, newdelta)
|
||||
self.values[key] = EventStat(newval, newdelta)
|
||||
return self.values
|
||||
|
||||
def toggle_display_guests(self, to_pid):
|
||||
@ -899,10 +951,10 @@ class Stats(object):
|
||||
self.get(to_pid)
|
||||
return 0
|
||||
|
||||
|
||||
DELAY_DEFAULT = 3.0
|
||||
MAX_GUEST_NAME_LEN = 48
|
||||
MAX_REGEX_LEN = 44
|
||||
DEFAULT_REGEX = r'^[^\(]*$'
|
||||
SORT_DEFAULT = 0
|
||||
|
||||
|
||||
@ -969,7 +1021,7 @@ class Tui(object):
|
||||
|
||||
return res
|
||||
|
||||
def print_all_gnames(self, row):
|
||||
def _print_all_gnames(self, row):
|
||||
"""Print a list of all running guests along with their pids."""
|
||||
self.screen.addstr(row, 2, '%8s %-60s' %
|
||||
('Pid', 'Guest Name (fuzzy list, might be '
|
||||
@ -1032,19 +1084,13 @@ class Tui(object):
|
||||
|
||||
return name
|
||||
|
||||
def update_drilldown(self):
|
||||
"""Sets or removes a filter that only allows fields without braces."""
|
||||
if not self.stats.fields_filter:
|
||||
self.stats.fields_filter = DEFAULT_REGEX
|
||||
|
||||
elif self.stats.fields_filter == DEFAULT_REGEX:
|
||||
self.stats.fields_filter = None
|
||||
|
||||
def update_pid(self, pid):
|
||||
def _update_pid(self, pid):
|
||||
"""Propagates pid selection to stats object."""
|
||||
self.screen.addstr(4, 1, 'Updating pid filter...')
|
||||
self.screen.refresh()
|
||||
self.stats.pid_filter = pid
|
||||
|
||||
def refresh_header(self, pid=None):
|
||||
def _refresh_header(self, pid=None):
|
||||
"""Refreshes the header."""
|
||||
if pid is None:
|
||||
pid = self.stats.pid_filter
|
||||
@ -1059,8 +1105,7 @@ class Tui(object):
|
||||
.format(pid, gname), curses.A_BOLD)
|
||||
else:
|
||||
self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD)
|
||||
if self.stats.fields_filter and self.stats.fields_filter \
|
||||
!= DEFAULT_REGEX:
|
||||
if self.stats.fields_filter:
|
||||
regex = self.stats.fields_filter
|
||||
if len(regex) > MAX_REGEX_LEN:
|
||||
regex = regex[:MAX_REGEX_LEN] + '...'
|
||||
@ -1075,56 +1120,99 @@ class Tui(object):
|
||||
self.screen.addstr(4, 1, 'Collecting data...')
|
||||
self.screen.refresh()
|
||||
|
||||
def refresh_body(self, sleeptime):
|
||||
def _refresh_body(self, sleeptime):
|
||||
def is_child_field(field):
|
||||
return field.find('(') != -1
|
||||
|
||||
def insert_child(sorted_items, child, values, parent):
|
||||
num = len(sorted_items)
|
||||
for i in range(0, num):
|
||||
# only add child if parent is present
|
||||
if parent.startswith(sorted_items[i][0]):
|
||||
sorted_items.insert(i + 1, (' ' + child, values))
|
||||
|
||||
def get_sorted_events(self, stats):
|
||||
""" separate parent and child events """
|
||||
if self._sorting == SORT_DEFAULT:
|
||||
def sortkey((_k, v)):
|
||||
# sort by (delta value, overall value)
|
||||
return (v.delta, v.value)
|
||||
else:
|
||||
def sortkey((_k, v)):
|
||||
# sort by overall value
|
||||
return v.value
|
||||
|
||||
childs = []
|
||||
sorted_items = []
|
||||
# we can't rule out child events to appear prior to parents even
|
||||
# when sorted - separate out all children first, and add in later
|
||||
for key, values in sorted(stats.items(), key=sortkey,
|
||||
reverse=True):
|
||||
if values == (0, 0):
|
||||
continue
|
||||
if key.find(' ') != -1:
|
||||
if not self.stats.child_events:
|
||||
continue
|
||||
childs.insert(0, (key, values))
|
||||
else:
|
||||
sorted_items.append((key, values))
|
||||
if self.stats.child_events:
|
||||
for key, values in childs:
|
||||
(child, parent) = key.split(' ')
|
||||
insert_child(sorted_items, child, values, parent)
|
||||
|
||||
return sorted_items
|
||||
|
||||
row = 3
|
||||
self.screen.move(row, 0)
|
||||
self.screen.clrtobot()
|
||||
stats = self.stats.get(self._display_guests)
|
||||
|
||||
def sortCurAvg(x):
|
||||
# sort by current events if available
|
||||
if stats[x][1]:
|
||||
return (-stats[x][1], -stats[x][0])
|
||||
else:
|
||||
return (0, -stats[x][0])
|
||||
|
||||
def sortTotal(x):
|
||||
# sort by totals
|
||||
return (0, -stats[x][0])
|
||||
total = 0.
|
||||
for key in stats.keys():
|
||||
if key.find('(') is -1:
|
||||
total += stats[key][0]
|
||||
if self._sorting == SORT_DEFAULT:
|
||||
sortkey = sortCurAvg
|
||||
else:
|
||||
sortkey = sortTotal
|
||||
ctotal = 0.
|
||||
for key, values in stats.items():
|
||||
if self._display_guests:
|
||||
if self.get_gname_from_pid(key):
|
||||
total += values.value
|
||||
continue
|
||||
if not key.find(' ') != -1:
|
||||
total += values.value
|
||||
else:
|
||||
ctotal += values.value
|
||||
if total == 0.:
|
||||
# we don't have any fields, or all non-child events are filtered
|
||||
total = ctotal
|
||||
|
||||
# print events
|
||||
tavg = 0
|
||||
for key in sorted(stats.keys(), key=sortkey):
|
||||
if row >= self.screen.getmaxyx()[0] - 1:
|
||||
tcur = 0
|
||||
for key, values in get_sorted_events(self, stats):
|
||||
if row >= self.screen.getmaxyx()[0] - 1 or values == (0, 0):
|
||||
break
|
||||
values = stats[key]
|
||||
if not values[0] and not values[1]:
|
||||
break
|
||||
if values[0] is not None:
|
||||
cur = int(round(values[1] / sleeptime)) if values[1] else ''
|
||||
if self._display_guests:
|
||||
key = self.get_gname_from_pid(key)
|
||||
self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' %
|
||||
(key, values[0], values[0] * 100 / total,
|
||||
cur))
|
||||
if cur is not '' and key.find('(') is -1:
|
||||
tavg += cur
|
||||
if self._display_guests:
|
||||
key = self.get_gname_from_pid(key)
|
||||
if not key:
|
||||
continue
|
||||
cur = int(round(values.delta / sleeptime)) if values.delta else ''
|
||||
if key[0] != ' ':
|
||||
if values.delta:
|
||||
tcur += values.delta
|
||||
ptotal = values.value
|
||||
ltotal = total
|
||||
else:
|
||||
ltotal = ptotal
|
||||
self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' % (key,
|
||||
values.value,
|
||||
values.value * 100 / float(ltotal), cur))
|
||||
row += 1
|
||||
if row == 3:
|
||||
self.screen.addstr(4, 1, 'No matching events reported yet')
|
||||
else:
|
||||
if row > 4:
|
||||
tavg = int(round(tcur / sleeptime)) if tcur > 0 else ''
|
||||
self.screen.addstr(row, 1, '%-40s %10d %8s' %
|
||||
('Total', total, tavg if tavg else ''),
|
||||
curses.A_BOLD)
|
||||
('Total', total, tavg), curses.A_BOLD)
|
||||
self.screen.refresh()
|
||||
|
||||
def show_msg(self, text):
|
||||
def _show_msg(self, text):
|
||||
"""Display message centered text and exit on key press"""
|
||||
hint = 'Press any key to continue'
|
||||
curses.cbreak()
|
||||
@ -1139,16 +1227,16 @@ class Tui(object):
|
||||
curses.A_STANDOUT)
|
||||
self.screen.getkey()
|
||||
|
||||
def show_help_interactive(self):
|
||||
def _show_help_interactive(self):
|
||||
"""Display help with list of interactive commands"""
|
||||
msg = (' b toggle events by guests (debugfs only, honors'
|
||||
' filters)',
|
||||
' c clear filter',
|
||||
' f filter by regular expression',
|
||||
' g filter by guest name',
|
||||
' g filter by guest name/PID',
|
||||
' h display interactive commands reference',
|
||||
' o toggle sorting order (Total vs CurAvg/s)',
|
||||
' p filter by PID',
|
||||
' p filter by guest name/PID',
|
||||
' q quit',
|
||||
' r reset stats',
|
||||
' s set update interval',
|
||||
@ -1165,14 +1253,15 @@ class Tui(object):
|
||||
self.screen.addstr(row, 0, line)
|
||||
row += 1
|
||||
self.screen.getkey()
|
||||
self.refresh_header()
|
||||
self._refresh_header()
|
||||
|
||||
def show_filter_selection(self):
|
||||
def _show_filter_selection(self):
|
||||
"""Draws filter selection mask.
|
||||
|
||||
Asks for a valid regex and sets the fields filter accordingly.
|
||||
|
||||
"""
|
||||
msg = ''
|
||||
while True:
|
||||
self.screen.erase()
|
||||
self.screen.addstr(0, 0,
|
||||
@ -1181,61 +1270,25 @@ class Tui(object):
|
||||
self.screen.addstr(2, 0,
|
||||
"Current regex: {0}"
|
||||
.format(self.stats.fields_filter))
|
||||
self.screen.addstr(5, 0, msg)
|
||||
self.screen.addstr(3, 0, "New regex: ")
|
||||
curses.echo()
|
||||
regex = self.screen.getstr().decode(ENCODING)
|
||||
curses.noecho()
|
||||
if len(regex) == 0:
|
||||
self.stats.fields_filter = DEFAULT_REGEX
|
||||
self.refresh_header()
|
||||
self.stats.fields_filter = ''
|
||||
self._refresh_header()
|
||||
return
|
||||
try:
|
||||
re.compile(regex)
|
||||
self.stats.fields_filter = regex
|
||||
self.refresh_header()
|
||||
self._refresh_header()
|
||||
return
|
||||
except re.error:
|
||||
msg = '"' + regex + '": Not a valid regular expression'
|
||||
continue
|
||||
|
||||
def show_vm_selection_by_pid(self):
|
||||
"""Draws PID selection mask.
|
||||
|
||||
Asks for a pid until a valid pid or 0 has been entered.
|
||||
|
||||
"""
|
||||
msg = ''
|
||||
while True:
|
||||
self.screen.erase()
|
||||
self.screen.addstr(0, 0,
|
||||
'Show statistics for specific pid.',
|
||||
curses.A_BOLD)
|
||||
self.screen.addstr(1, 0,
|
||||
'This might limit the shown data to the trace '
|
||||
'statistics.')
|
||||
self.screen.addstr(5, 0, msg)
|
||||
self.print_all_gnames(7)
|
||||
|
||||
curses.echo()
|
||||
self.screen.addstr(3, 0, "Pid [0 or pid]: ")
|
||||
pid = self.screen.getstr().decode(ENCODING)
|
||||
curses.noecho()
|
||||
|
||||
try:
|
||||
if len(pid) > 0:
|
||||
pid = int(pid)
|
||||
if pid != 0 and not os.path.isdir(os.path.join('/proc/',
|
||||
str(pid))):
|
||||
msg = '"' + str(pid) + '": Not a running process'
|
||||
continue
|
||||
else:
|
||||
pid = 0
|
||||
self.refresh_header(pid)
|
||||
self.update_pid(pid)
|
||||
break
|
||||
except ValueError:
|
||||
msg = '"' + str(pid) + '": Not a valid pid'
|
||||
|
||||
def show_set_update_interval(self):
|
||||
def _show_set_update_interval(self):
|
||||
"""Draws update interval selection mask."""
|
||||
msg = ''
|
||||
while True:
|
||||
@ -1265,60 +1318,67 @@ class Tui(object):
|
||||
|
||||
except ValueError:
|
||||
msg = '"' + str(val) + '": Invalid value'
|
||||
self.refresh_header()
|
||||
self._refresh_header()
|
||||
|
||||
def show_vm_selection_by_guest_name(self):
|
||||
def _show_vm_selection_by_guest(self):
|
||||
"""Draws guest selection mask.
|
||||
|
||||
Asks for a guest name until a valid guest name or '' is entered.
|
||||
Asks for a guest name or pid until a valid guest name or '' is entered.
|
||||
|
||||
"""
|
||||
msg = ''
|
||||
while True:
|
||||
self.screen.erase()
|
||||
self.screen.addstr(0, 0,
|
||||
'Show statistics for specific guest.',
|
||||
'Show statistics for specific guest or pid.',
|
||||
curses.A_BOLD)
|
||||
self.screen.addstr(1, 0,
|
||||
'This might limit the shown data to the trace '
|
||||
'statistics.')
|
||||
self.screen.addstr(5, 0, msg)
|
||||
self.print_all_gnames(7)
|
||||
self._print_all_gnames(7)
|
||||
curses.echo()
|
||||
self.screen.addstr(3, 0, "Guest [ENTER or guest]: ")
|
||||
gname = self.screen.getstr().decode(ENCODING)
|
||||
curses.curs_set(1)
|
||||
self.screen.addstr(3, 0, "Guest or pid [ENTER exits]: ")
|
||||
guest = self.screen.getstr().decode(ENCODING)
|
||||
curses.noecho()
|
||||
|
||||
if not gname:
|
||||
self.refresh_header(0)
|
||||
self.update_pid(0)
|
||||
pid = 0
|
||||
if not guest or guest == '0':
|
||||
break
|
||||
else:
|
||||
pids = []
|
||||
try:
|
||||
pids = self.get_pid_from_gname(gname)
|
||||
except:
|
||||
msg = '"' + gname + '": Internal error while searching, ' \
|
||||
'use pid filter instead'
|
||||
if guest.isdigit():
|
||||
if not os.path.isdir(os.path.join('/proc/', guest)):
|
||||
msg = '"' + guest + '": Not a running process'
|
||||
continue
|
||||
if len(pids) == 0:
|
||||
msg = '"' + gname + '": Not an active guest'
|
||||
continue
|
||||
if len(pids) > 1:
|
||||
msg = '"' + gname + '": Multiple matches found, use pid ' \
|
||||
'filter instead'
|
||||
continue
|
||||
self.refresh_header(pids[0])
|
||||
self.update_pid(pids[0])
|
||||
pid = int(guest)
|
||||
break
|
||||
pids = []
|
||||
try:
|
||||
pids = self.get_pid_from_gname(guest)
|
||||
except:
|
||||
msg = '"' + guest + '": Internal error while searching, ' \
|
||||
'use pid filter instead'
|
||||
continue
|
||||
if len(pids) == 0:
|
||||
msg = '"' + guest + '": Not an active guest'
|
||||
continue
|
||||
if len(pids) > 1:
|
||||
msg = '"' + guest + '": Multiple matches found, use pid ' \
|
||||
'filter instead'
|
||||
continue
|
||||
pid = pids[0]
|
||||
break
|
||||
curses.curs_set(0)
|
||||
self._refresh_header(pid)
|
||||
self._update_pid(pid)
|
||||
|
||||
def show_stats(self):
|
||||
"""Refreshes the screen and processes user input."""
|
||||
sleeptime = self._delay_initial
|
||||
self.refresh_header()
|
||||
self._refresh_header()
|
||||
start = 0.0 # result based on init value never appears on screen
|
||||
while True:
|
||||
self.refresh_body(time.time() - start)
|
||||
self._refresh_body(time.time() - start)
|
||||
curses.halfdelay(int(sleeptime * 10))
|
||||
start = time.time()
|
||||
sleeptime = self._delay_regular
|
||||
@ -1327,47 +1387,39 @@ class Tui(object):
|
||||
if char == 'b':
|
||||
self._display_guests = not self._display_guests
|
||||
if self.stats.toggle_display_guests(self._display_guests):
|
||||
self.show_msg(['Command not available with tracepoints'
|
||||
' enabled', 'Restart with debugfs only '
|
||||
'(see option \'-d\') and try again!'])
|
||||
self._show_msg(['Command not available with '
|
||||
'tracepoints enabled', 'Restart with '
|
||||
'debugfs only (see option \'-d\') and '
|
||||
'try again!'])
|
||||
self._display_guests = not self._display_guests
|
||||
self.refresh_header()
|
||||
self._refresh_header()
|
||||
if char == 'c':
|
||||
self.stats.fields_filter = DEFAULT_REGEX
|
||||
self.refresh_header(0)
|
||||
self.update_pid(0)
|
||||
self.stats.fields_filter = ''
|
||||
self._refresh_header(0)
|
||||
self._update_pid(0)
|
||||
if char == 'f':
|
||||
curses.curs_set(1)
|
||||
self.show_filter_selection()
|
||||
self._show_filter_selection()
|
||||
curses.curs_set(0)
|
||||
sleeptime = self._delay_initial
|
||||
if char == 'g':
|
||||
curses.curs_set(1)
|
||||
self.show_vm_selection_by_guest_name()
|
||||
curses.curs_set(0)
|
||||
if char == 'g' or char == 'p':
|
||||
self._show_vm_selection_by_guest()
|
||||
sleeptime = self._delay_initial
|
||||
if char == 'h':
|
||||
self.show_help_interactive()
|
||||
self._show_help_interactive()
|
||||
if char == 'o':
|
||||
self._sorting = not self._sorting
|
||||
if char == 'p':
|
||||
curses.curs_set(1)
|
||||
self.show_vm_selection_by_pid()
|
||||
curses.curs_set(0)
|
||||
sleeptime = self._delay_initial
|
||||
if char == 'q':
|
||||
break
|
||||
if char == 'r':
|
||||
self.stats.reset()
|
||||
if char == 's':
|
||||
curses.curs_set(1)
|
||||
self.show_set_update_interval()
|
||||
self._show_set_update_interval()
|
||||
curses.curs_set(0)
|
||||
sleeptime = self._delay_initial
|
||||
if char == 'x':
|
||||
self.update_drilldown()
|
||||
# prevents display of current values on next refresh
|
||||
self.stats.get(self._display_guests)
|
||||
self.stats.child_events = not self.stats.child_events
|
||||
except KeyboardInterrupt:
|
||||
break
|
||||
except curses.error:
|
||||
@ -1380,9 +1432,9 @@ def batch(stats):
|
||||
s = stats.get()
|
||||
time.sleep(1)
|
||||
s = stats.get()
|
||||
for key in sorted(s.keys()):
|
||||
values = s[key]
|
||||
print('%-42s%10d%10d' % (key, values[0], values[1]))
|
||||
for key, values in sorted(s.items()):
|
||||
print('%-42s%10d%10d' % (key.split(' ')[0], values.value,
|
||||
values.delta))
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
|
||||
@ -1392,14 +1444,14 @@ def log(stats):
|
||||
keys = sorted(stats.get().keys())
|
||||
|
||||
def banner():
|
||||
for k in keys:
|
||||
print(k, end=' ')
|
||||
for key in keys:
|
||||
print(key.split(' ')[0], end=' ')
|
||||
print()
|
||||
|
||||
def statline():
|
||||
s = stats.get()
|
||||
for k in keys:
|
||||
print(' %9d' % s[k][1], end=' ')
|
||||
for key in keys:
|
||||
print(' %9d' % s[key].delta, end=' ')
|
||||
print()
|
||||
line = 0
|
||||
banner_repeat = 20
|
||||
@ -1504,7 +1556,7 @@ Press any other key to refresh statistics immediately.
|
||||
)
|
||||
optparser.add_option('-f', '--fields',
|
||||
action='store',
|
||||
default=DEFAULT_REGEX,
|
||||
default='',
|
||||
dest='fields',
|
||||
help='''fields to display (regex)
|
||||
"-f help" for a list of available events''',
|
||||
@ -1539,17 +1591,6 @@ Press any other key to refresh statistics immediately.
|
||||
|
||||
def check_access(options):
|
||||
"""Exits if the current user can't access all needed directories."""
|
||||
if not os.path.exists('/sys/kernel/debug'):
|
||||
sys.stderr.write('Please enable CONFIG_DEBUG_FS in your kernel.')
|
||||
sys.exit(1)
|
||||
|
||||
if not os.path.exists(PATH_DEBUGFS_KVM):
|
||||
sys.stderr.write("Please make sure, that debugfs is mounted and "
|
||||
"readable by the current user:\n"
|
||||
"('mount -t debugfs debugfs /sys/kernel/debug')\n"
|
||||
"Also ensure, that the kvm modules are loaded.\n")
|
||||
sys.exit(1)
|
||||
|
||||
if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints or
|
||||
not options.debugfs):
|
||||
sys.stderr.write("Please enable CONFIG_TRACING in your kernel "
|
||||
@ -1567,7 +1608,33 @@ def check_access(options):
|
||||
return options
|
||||
|
||||
|
||||
def assign_globals():
|
||||
global PATH_DEBUGFS_KVM
|
||||
global PATH_DEBUGFS_TRACING
|
||||
|
||||
debugfs = ''
|
||||
for line in file('/proc/mounts'):
|
||||
if line.split(' ')[0] == 'debugfs':
|
||||
debugfs = line.split(' ')[1]
|
||||
break
|
||||
if debugfs == '':
|
||||
sys.stderr.write("Please make sure that CONFIG_DEBUG_FS is enabled in "
|
||||
"your kernel, mounted and\nreadable by the current "
|
||||
"user:\n"
|
||||
"('mount -t debugfs debugfs /sys/kernel/debug')\n")
|
||||
sys.exit(1)
|
||||
|
||||
PATH_DEBUGFS_KVM = os.path.join(debugfs, 'kvm')
|
||||
PATH_DEBUGFS_TRACING = os.path.join(debugfs, 'tracing')
|
||||
|
||||
if not os.path.exists(PATH_DEBUGFS_KVM):
|
||||
sys.stderr.write("Please make sure that CONFIG_KVM is enabled in "
|
||||
"your kernel and that the modules are loaded.\n")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def main():
|
||||
assign_globals()
|
||||
options = get_options()
|
||||
options = check_access(options)
|
||||
|
||||
|
@ -35,13 +35,13 @@ INTERACTIVE COMMANDS
|
||||
|
||||
*f*:: filter by regular expression
|
||||
|
||||
*g*:: filter by guest name
|
||||
*g*:: filter by guest name/PID
|
||||
|
||||
*h*:: display interactive commands reference
|
||||
|
||||
*o*:: toggle sorting order (Total vs CurAvg/s)
|
||||
|
||||
*p*:: filter by PID
|
||||
*p*:: filter by guest name/PID
|
||||
|
||||
*q*:: quit
|
||||
|
||||
|
@ -36,6 +36,8 @@ static struct timecounter *timecounter;
|
||||
static unsigned int host_vtimer_irq;
|
||||
static u32 host_vtimer_irq_flags;
|
||||
|
||||
static DEFINE_STATIC_KEY_FALSE(has_gic_active_state);
|
||||
|
||||
static const struct kvm_irq_level default_ptimer_irq = {
|
||||
.irq = 30,
|
||||
.level = 1,
|
||||
@ -56,6 +58,12 @@ u64 kvm_phys_timer_read(void)
|
||||
return timecounter->cc->read(timecounter->cc);
|
||||
}
|
||||
|
||||
static inline bool userspace_irqchip(struct kvm *kvm)
|
||||
{
|
||||
return static_branch_unlikely(&userspace_irqchip_in_use) &&
|
||||
unlikely(!irqchip_in_kernel(kvm));
|
||||
}
|
||||
|
||||
static void soft_timer_start(struct hrtimer *hrt, u64 ns)
|
||||
{
|
||||
hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns),
|
||||
@ -69,25 +77,6 @@ static void soft_timer_cancel(struct hrtimer *hrt, struct work_struct *work)
|
||||
cancel_work_sync(work);
|
||||
}
|
||||
|
||||
static void kvm_vtimer_update_mask_user(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
|
||||
|
||||
/*
|
||||
* When using a userspace irqchip with the architected timers, we must
|
||||
* prevent continuously exiting from the guest, and therefore mask the
|
||||
* physical interrupt by disabling it on the host interrupt controller
|
||||
* when the virtual level is high, such that the guest can make
|
||||
* forward progress. Once we detect the output level being
|
||||
* de-asserted, we unmask the interrupt again so that we exit from the
|
||||
* guest when the timer fires.
|
||||
*/
|
||||
if (vtimer->irq.level)
|
||||
disable_percpu_irq(host_vtimer_irq);
|
||||
else
|
||||
enable_percpu_irq(host_vtimer_irq, 0);
|
||||
}
|
||||
|
||||
static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
|
||||
@ -106,9 +95,9 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
|
||||
if (kvm_timer_should_fire(vtimer))
|
||||
kvm_timer_update_irq(vcpu, true, vtimer);
|
||||
|
||||
if (static_branch_unlikely(&userspace_irqchip_in_use) &&
|
||||
unlikely(!irqchip_in_kernel(vcpu->kvm)))
|
||||
kvm_vtimer_update_mask_user(vcpu);
|
||||
if (userspace_irqchip(vcpu->kvm) &&
|
||||
!static_branch_unlikely(&has_gic_active_state))
|
||||
disable_percpu_irq(host_vtimer_irq);
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
@ -290,8 +279,7 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
|
||||
trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq,
|
||||
timer_ctx->irq.level);
|
||||
|
||||
if (!static_branch_unlikely(&userspace_irqchip_in_use) ||
|
||||
likely(irqchip_in_kernel(vcpu->kvm))) {
|
||||
if (!userspace_irqchip(vcpu->kvm)) {
|
||||
ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
|
||||
timer_ctx->irq.irq,
|
||||
timer_ctx->irq.level,
|
||||
@ -350,12 +338,6 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
|
||||
phys_timer_emulate(vcpu);
|
||||
}
|
||||
|
||||
static void __timer_snapshot_state(struct arch_timer_context *timer)
|
||||
{
|
||||
timer->cnt_ctl = read_sysreg_el0(cntv_ctl);
|
||||
timer->cnt_cval = read_sysreg_el0(cntv_cval);
|
||||
}
|
||||
|
||||
static void vtimer_save_state(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
@ -367,8 +349,10 @@ static void vtimer_save_state(struct kvm_vcpu *vcpu)
|
||||
if (!vtimer->loaded)
|
||||
goto out;
|
||||
|
||||
if (timer->enabled)
|
||||
__timer_snapshot_state(vtimer);
|
||||
if (timer->enabled) {
|
||||
vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
|
||||
vtimer->cnt_cval = read_sysreg_el0(cntv_cval);
|
||||
}
|
||||
|
||||
/* Disable the virtual timer */
|
||||
write_sysreg_el0(0, cntv_ctl);
|
||||
@ -460,23 +444,43 @@ static void set_cntvoff(u64 cntvoff)
|
||||
kvm_call_hyp(__kvm_timer_set_cntvoff, low, high);
|
||||
}
|
||||
|
||||
static void kvm_timer_vcpu_load_vgic(struct kvm_vcpu *vcpu)
|
||||
static inline void set_vtimer_irq_phys_active(struct kvm_vcpu *vcpu, bool active)
|
||||
{
|
||||
int r;
|
||||
r = irq_set_irqchip_state(host_vtimer_irq, IRQCHIP_STATE_ACTIVE, active);
|
||||
WARN_ON(r);
|
||||
}
|
||||
|
||||
static void kvm_timer_vcpu_load_gic(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
|
||||
bool phys_active;
|
||||
int ret;
|
||||
|
||||
phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
|
||||
|
||||
ret = irq_set_irqchip_state(host_vtimer_irq,
|
||||
IRQCHIP_STATE_ACTIVE,
|
||||
phys_active);
|
||||
WARN_ON(ret);
|
||||
if (irqchip_in_kernel(vcpu->kvm))
|
||||
phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
|
||||
else
|
||||
phys_active = vtimer->irq.level;
|
||||
set_vtimer_irq_phys_active(vcpu, phys_active);
|
||||
}
|
||||
|
||||
static void kvm_timer_vcpu_load_user(struct kvm_vcpu *vcpu)
|
||||
static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_vtimer_update_mask_user(vcpu);
|
||||
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
|
||||
|
||||
/*
|
||||
* When using a userspace irqchip with the architected timers and a
|
||||
* host interrupt controller that doesn't support an active state, we
|
||||
* must still prevent continuously exiting from the guest, and
|
||||
* therefore mask the physical interrupt by disabling it on the host
|
||||
* interrupt controller when the virtual level is high, such that the
|
||||
* guest can make forward progress. Once we detect the output level
|
||||
* being de-asserted, we unmask the interrupt again so that we exit
|
||||
* from the guest when the timer fires.
|
||||
*/
|
||||
if (vtimer->irq.level)
|
||||
disable_percpu_irq(host_vtimer_irq);
|
||||
else
|
||||
enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
|
||||
}
|
||||
|
||||
void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
|
||||
@ -487,10 +491,10 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
|
||||
if (unlikely(!timer->enabled))
|
||||
return;
|
||||
|
||||
if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
|
||||
kvm_timer_vcpu_load_user(vcpu);
|
||||
if (static_branch_likely(&has_gic_active_state))
|
||||
kvm_timer_vcpu_load_gic(vcpu);
|
||||
else
|
||||
kvm_timer_vcpu_load_vgic(vcpu);
|
||||
kvm_timer_vcpu_load_nogic(vcpu);
|
||||
|
||||
set_cntvoff(vtimer->cntvoff);
|
||||
|
||||
@ -555,18 +559,24 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
|
||||
|
||||
if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
|
||||
__timer_snapshot_state(vtimer);
|
||||
if (!kvm_timer_should_fire(vtimer)) {
|
||||
kvm_timer_update_irq(vcpu, false, vtimer);
|
||||
kvm_vtimer_update_mask_user(vcpu);
|
||||
}
|
||||
if (!kvm_timer_should_fire(vtimer)) {
|
||||
kvm_timer_update_irq(vcpu, false, vtimer);
|
||||
if (static_branch_likely(&has_gic_active_state))
|
||||
set_vtimer_irq_phys_active(vcpu, false);
|
||||
else
|
||||
enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
|
||||
}
|
||||
}
|
||||
|
||||
void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unmask_vtimer_irq_user(vcpu);
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
|
||||
if (unlikely(!timer->enabled))
|
||||
return;
|
||||
|
||||
if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
|
||||
unmask_vtimer_irq_user(vcpu);
|
||||
}
|
||||
|
||||
int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
@ -753,6 +763,8 @@ int kvm_timer_hyp_init(bool has_gic)
|
||||
kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
|
||||
goto out_free_irq;
|
||||
}
|
||||
|
||||
static_branch_enable(&has_gic_active_state);
|
||||
}
|
||||
|
||||
kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);
|
||||
|
@ -969,8 +969,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
||||
/* Check for overlaps */
|
||||
r = -EEXIST;
|
||||
kvm_for_each_memslot(slot, __kvm_memslots(kvm, as_id)) {
|
||||
if ((slot->id >= KVM_USER_MEM_SLOTS) ||
|
||||
(slot->id == id))
|
||||
if (slot->id == id)
|
||||
continue;
|
||||
if (!((base_gfn + npages <= slot->base_gfn) ||
|
||||
(base_gfn >= slot->base_gfn + slot->npages)))
|
||||
|
Loading…
Reference in New Issue
Block a user