KVM: x86/xen: Compatibility fixes for shared runstate area
The guest runstate area can be arbitrarily byte-aligned. In fact, even when a sane 32-bit guest aligns the overall structure nicely, the 64-bit fields in the structure end up being unaligned due to the fact that the 32-bit ABI only aligns them to 32 bits. So setting the ->state_entry_time field to something|XEN_RUNSTATE_UPDATE is buggy, because if it's unaligned then we can't update the whole field atomically; the low bytes might be observable before the _UPDATE bit is. Xen actually updates the *byte* containing that top bit, on its own. KVM should do the same. In addition, we cannot assume that the runstate area fits within a single page. One option might be to make the gfn_to_pfn cache cope with regions that cross a page — but getting a contiguous virtual kernel mapping of a discontiguous set of IOMEM pages is a distinctly non-trivial exercise, and it seems this is the *only* current use case for the GPC which would benefit from it. An earlier version of the runstate code did use a gfn_to_hva cache for this purpose, but it still had the single-page restriction because it used the uhva directly — because it needs to be able to do so atomically when the vCPU is being scheduled out, so it used pagefault_disable() around the accesses and didn't just use kvm_write_guest_cached() which has a fallback path. So... use a pair of GPCs for the first and potential second page covering the runstate area. We can get away with locking both at once because nothing else takes more than one GPC lock at a time so we can invent a trivial ordering rule. The common case where it's all in the same page is kept as a fast path, but in both cases, the actual guest structure (compat or not) is built up from the fields in @vx, following preset pointers to the state and times fields. The only difference is whether those pointers point to the kernel stack (in the split case) or to guest memory directly via the GPC. The fast path is also fixed to use a byte access for the XEN_RUNSTATE_UPDATE bit, then the only real difference is the dual memcpy. Finally, Xen also does write the runstate area immediately when it's configured. Flip the kvm_xen_update_runstate() and …_guest() functions and call the latter directly when the runstate area is set. This means that other ioctls which modify the runstate also write it immediately to the guest when they do so, which is also intended. Update the xen_shinfo_test to exercise the pathological case where the XEN_RUNSTATE_UPDATE flag in the top byte of the state_entry_time is actually in a different page to the rest of the 64-bit word. Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
parent
1e79a9e3ab
commit
5ec3289b31
@ -686,6 +686,7 @@ struct kvm_vcpu_xen {
|
|||||||
struct gfn_to_pfn_cache vcpu_info_cache;
|
struct gfn_to_pfn_cache vcpu_info_cache;
|
||||||
struct gfn_to_pfn_cache vcpu_time_info_cache;
|
struct gfn_to_pfn_cache vcpu_time_info_cache;
|
||||||
struct gfn_to_pfn_cache runstate_cache;
|
struct gfn_to_pfn_cache runstate_cache;
|
||||||
|
struct gfn_to_pfn_cache runstate2_cache;
|
||||||
u64 last_steal;
|
u64 last_steal;
|
||||||
u64 runstate_entry_time;
|
u64 runstate_entry_time;
|
||||||
u64 runstate_times[4];
|
u64 runstate_times[4];
|
||||||
|
@ -170,7 +170,246 @@ static void kvm_xen_init_timer(struct kvm_vcpu *vcpu)
|
|||||||
vcpu->arch.xen.timer.function = xen_timer_callback;
|
vcpu->arch.xen.timer.function = xen_timer_callback;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
|
static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
|
||||||
|
{
|
||||||
|
struct kvm_vcpu_xen *vx = &v->arch.xen;
|
||||||
|
struct gfn_to_pfn_cache *gpc1 = &vx->runstate_cache;
|
||||||
|
struct gfn_to_pfn_cache *gpc2 = &vx->runstate2_cache;
|
||||||
|
size_t user_len, user_len1, user_len2;
|
||||||
|
struct vcpu_runstate_info rs;
|
||||||
|
unsigned long flags;
|
||||||
|
size_t times_ofs;
|
||||||
|
uint8_t *update_bit;
|
||||||
|
uint64_t *rs_times;
|
||||||
|
int *rs_state;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The only difference between 32-bit and 64-bit versions of the
|
||||||
|
* runstate struct is the alignment of uint64_t in 32-bit, which
|
||||||
|
* means that the 64-bit version has an additional 4 bytes of
|
||||||
|
* padding after the first field 'state'. Let's be really really
|
||||||
|
* paranoid about that, and matching it with our internal data
|
||||||
|
* structures that we memcpy into it...
|
||||||
|
*/
|
||||||
|
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != 0);
|
||||||
|
BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state) != 0);
|
||||||
|
BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
|
||||||
|
#ifdef CONFIG_X86_64
|
||||||
|
/*
|
||||||
|
* The 64-bit structure has 4 bytes of padding before 'state_entry_time'
|
||||||
|
* so each subsequent field is shifted by 4, and it's 4 bytes longer.
|
||||||
|
*/
|
||||||
|
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
|
||||||
|
offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4);
|
||||||
|
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) !=
|
||||||
|
offsetof(struct compat_vcpu_runstate_info, time) + 4);
|
||||||
|
BUILD_BUG_ON(sizeof(struct vcpu_runstate_info) != 0x2c + 4);
|
||||||
|
#endif
|
||||||
|
/*
|
||||||
|
* The state field is in the same place at the start of both structs,
|
||||||
|
* and is the same size (int) as vx->current_runstate.
|
||||||
|
*/
|
||||||
|
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
|
||||||
|
offsetof(struct compat_vcpu_runstate_info, state));
|
||||||
|
BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state) !=
|
||||||
|
sizeof(vx->current_runstate));
|
||||||
|
BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) !=
|
||||||
|
sizeof(vx->current_runstate));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The state_entry_time field is 64 bits in both versions, and the
|
||||||
|
* XEN_RUNSTATE_UPDATE flag is in the top bit, which given that x86
|
||||||
|
* is little-endian means that it's in the last *byte* of the word.
|
||||||
|
* That detail is important later.
|
||||||
|
*/
|
||||||
|
BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state_entry_time) !=
|
||||||
|
sizeof(uint64_t));
|
||||||
|
BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) !=
|
||||||
|
sizeof(uint64_t));
|
||||||
|
BUILD_BUG_ON((XEN_RUNSTATE_UPDATE >> 56) != 0x80);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The time array is four 64-bit quantities in both versions, matching
|
||||||
|
* the vx->runstate_times and immediately following state_entry_time.
|
||||||
|
*/
|
||||||
|
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
|
||||||
|
offsetof(struct vcpu_runstate_info, time) - sizeof(uint64_t));
|
||||||
|
BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) !=
|
||||||
|
offsetof(struct compat_vcpu_runstate_info, time) - sizeof(uint64_t));
|
||||||
|
BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
|
||||||
|
sizeof_field(struct compat_vcpu_runstate_info, time));
|
||||||
|
BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
|
||||||
|
sizeof(vx->runstate_times));
|
||||||
|
|
||||||
|
if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode) {
|
||||||
|
user_len = sizeof(struct vcpu_runstate_info);
|
||||||
|
times_ofs = offsetof(struct vcpu_runstate_info,
|
||||||
|
state_entry_time);
|
||||||
|
} else {
|
||||||
|
user_len = sizeof(struct compat_vcpu_runstate_info);
|
||||||
|
times_ofs = offsetof(struct compat_vcpu_runstate_info,
|
||||||
|
state_entry_time);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* There are basically no alignment constraints. The guest can set it
|
||||||
|
* up so it crosses from one page to the next, and at arbitrary byte
|
||||||
|
* alignment (and the 32-bit ABI doesn't align the 64-bit integers
|
||||||
|
* anyway, even if the overall struct had been 64-bit aligned).
|
||||||
|
*/
|
||||||
|
if ((gpc1->gpa & ~PAGE_MASK) + user_len >= PAGE_SIZE) {
|
||||||
|
user_len1 = PAGE_SIZE - (gpc1->gpa & ~PAGE_MASK);
|
||||||
|
user_len2 = user_len - user_len1;
|
||||||
|
} else {
|
||||||
|
user_len1 = user_len;
|
||||||
|
user_len2 = 0;
|
||||||
|
}
|
||||||
|
BUG_ON(user_len1 + user_len2 != user_len);
|
||||||
|
|
||||||
|
retry:
|
||||||
|
/*
|
||||||
|
* Attempt to obtain the GPC lock on *both* (if there are two)
|
||||||
|
* gfn_to_pfn caches that cover the region.
|
||||||
|
*/
|
||||||
|
read_lock_irqsave(&gpc1->lock, flags);
|
||||||
|
while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc1, gpc1->gpa, user_len1)) {
|
||||||
|
read_unlock_irqrestore(&gpc1->lock, flags);
|
||||||
|
|
||||||
|
/* When invoked from kvm_sched_out() we cannot sleep */
|
||||||
|
if (atomic)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc1, gpc1->gpa, user_len1))
|
||||||
|
return;
|
||||||
|
|
||||||
|
read_lock_irqsave(&gpc1->lock, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (likely(!user_len2)) {
|
||||||
|
/*
|
||||||
|
* Set up three pointers directly to the runstate_info
|
||||||
|
* struct in the guest (via the GPC).
|
||||||
|
*
|
||||||
|
* • @rs_state → state field
|
||||||
|
* • @rs_times → state_entry_time field.
|
||||||
|
* • @update_bit → last byte of state_entry_time, which
|
||||||
|
* contains the XEN_RUNSTATE_UPDATE bit.
|
||||||
|
*/
|
||||||
|
rs_state = gpc1->khva;
|
||||||
|
rs_times = gpc1->khva + times_ofs;
|
||||||
|
update_bit = ((void *)(&rs_times[1])) - 1;
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* The guest's runstate_info is split across two pages and we
|
||||||
|
* need to hold and validate both GPCs simultaneously. We can
|
||||||
|
* declare a lock ordering GPC1 > GPC2 because nothing else
|
||||||
|
* takes them more than one at a time.
|
||||||
|
*/
|
||||||
|
read_lock(&gpc2->lock);
|
||||||
|
|
||||||
|
if (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc2, gpc2->gpa, user_len2)) {
|
||||||
|
read_unlock(&gpc2->lock);
|
||||||
|
read_unlock_irqrestore(&gpc1->lock, flags);
|
||||||
|
|
||||||
|
/* When invoked from kvm_sched_out() we cannot sleep */
|
||||||
|
if (atomic)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Use kvm_gpc_activate() here because if the runstate
|
||||||
|
* area was configured in 32-bit mode and only extends
|
||||||
|
* to the second page now because the guest changed to
|
||||||
|
* 64-bit mode, the second GPC won't have been set up.
|
||||||
|
*/
|
||||||
|
if (kvm_gpc_activate(v->kvm, gpc2, NULL, KVM_HOST_USES_PFN,
|
||||||
|
gpc1->gpa + user_len1, user_len2))
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We dropped the lock on GPC1 so we have to go all the
|
||||||
|
* way back and revalidate that too.
|
||||||
|
*/
|
||||||
|
goto retry;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In this case, the runstate_info struct will be assembled on
|
||||||
|
* the kernel stack (compat or not as appropriate) and will
|
||||||
|
* be copied to GPC1/GPC2 with a dual memcpy. Set up the three
|
||||||
|
* rs pointers accordingly.
|
||||||
|
*/
|
||||||
|
rs_times = &rs.state_entry_time;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The rs_state pointer points to the start of what we'll
|
||||||
|
* copy to the guest, which in the case of a compat guest
|
||||||
|
* is the 32-bit field that the compiler thinks is padding.
|
||||||
|
*/
|
||||||
|
rs_state = ((void *)rs_times) - times_ofs;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The update_bit is still directly in the guest memory,
|
||||||
|
* via one GPC or the other.
|
||||||
|
*/
|
||||||
|
if (user_len1 >= times_ofs + sizeof(uint64_t))
|
||||||
|
update_bit = gpc1->khva + times_ofs +
|
||||||
|
sizeof(uint64_t) - 1;
|
||||||
|
else
|
||||||
|
update_bit = gpc2->khva + times_ofs +
|
||||||
|
sizeof(uint64_t) - 1 - user_len1;
|
||||||
|
|
||||||
|
#ifdef CONFIG_X86_64
|
||||||
|
/*
|
||||||
|
* Don't leak kernel memory through the padding in the 64-bit
|
||||||
|
* version of the struct.
|
||||||
|
*/
|
||||||
|
memset(&rs, 0, offsetof(struct vcpu_runstate_info, state_entry_time));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* First, set the XEN_RUNSTATE_UPDATE bit in the top bit of the
|
||||||
|
* state_entry_time field, directly in the guest. We need to set
|
||||||
|
* that (and write-barrier) before writing to the rest of the
|
||||||
|
* structure, and clear it last. Just as Xen does, we address the
|
||||||
|
* single *byte* in which it resides because it might be in a
|
||||||
|
* different cache line to the rest of the 64-bit word, due to
|
||||||
|
* the (lack of) alignment constraints.
|
||||||
|
*/
|
||||||
|
*update_bit = (vx->runstate_entry_time | XEN_RUNSTATE_UPDATE) >> 56;
|
||||||
|
smp_wmb();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Now assemble the actual structure, either on our kernel stack
|
||||||
|
* or directly in the guest according to how the rs_state and
|
||||||
|
* rs_times pointers were set up above.
|
||||||
|
*/
|
||||||
|
*rs_state = vx->current_runstate;
|
||||||
|
rs_times[0] = vx->runstate_entry_time | XEN_RUNSTATE_UPDATE;
|
||||||
|
memcpy(rs_times + 1, vx->runstate_times, sizeof(vx->runstate_times));
|
||||||
|
|
||||||
|
/* For the split case, we have to then copy it to the guest. */
|
||||||
|
if (user_len2) {
|
||||||
|
memcpy(gpc1->khva, rs_state, user_len1);
|
||||||
|
memcpy(gpc2->khva, ((void *)rs_state) + user_len1, user_len2);
|
||||||
|
}
|
||||||
|
smp_wmb();
|
||||||
|
|
||||||
|
/* Finally, clear the XEN_RUNSTATE_UPDATE bit. */
|
||||||
|
*update_bit = vx->runstate_entry_time >> 56;
|
||||||
|
smp_wmb();
|
||||||
|
|
||||||
|
if (user_len2)
|
||||||
|
read_unlock(&gpc2->lock);
|
||||||
|
|
||||||
|
read_unlock_irqrestore(&gpc1->lock, flags);
|
||||||
|
|
||||||
|
mark_page_dirty_in_slot(v->kvm, gpc1->memslot, gpc1->gpa >> PAGE_SHIFT);
|
||||||
|
if (user_len2)
|
||||||
|
mark_page_dirty_in_slot(v->kvm, gpc2->memslot, gpc2->gpa >> PAGE_SHIFT);
|
||||||
|
}
|
||||||
|
|
||||||
|
void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
|
||||||
{
|
{
|
||||||
struct kvm_vcpu_xen *vx = &v->arch.xen;
|
struct kvm_vcpu_xen *vx = &v->arch.xen;
|
||||||
u64 now = get_kvmclock_ns(v->kvm);
|
u64 now = get_kvmclock_ns(v->kvm);
|
||||||
@ -196,122 +435,9 @@ static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
|
|||||||
vx->runstate_times[vx->current_runstate] += delta_ns;
|
vx->runstate_times[vx->current_runstate] += delta_ns;
|
||||||
vx->current_runstate = state;
|
vx->current_runstate = state;
|
||||||
vx->runstate_entry_time = now;
|
vx->runstate_entry_time = now;
|
||||||
}
|
|
||||||
|
|
||||||
void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
|
if (vx->runstate_cache.active)
|
||||||
{
|
kvm_xen_update_runstate_guest(v, state == RUNSTATE_runnable);
|
||||||
struct kvm_vcpu_xen *vx = &v->arch.xen;
|
|
||||||
struct gfn_to_pfn_cache *gpc = &vx->runstate_cache;
|
|
||||||
uint64_t *user_times;
|
|
||||||
unsigned long flags;
|
|
||||||
size_t user_len;
|
|
||||||
int *user_state;
|
|
||||||
|
|
||||||
kvm_xen_update_runstate(v, state);
|
|
||||||
|
|
||||||
if (!vx->runstate_cache.active)
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode)
|
|
||||||
user_len = sizeof(struct vcpu_runstate_info);
|
|
||||||
else
|
|
||||||
user_len = sizeof(struct compat_vcpu_runstate_info);
|
|
||||||
|
|
||||||
read_lock_irqsave(&gpc->lock, flags);
|
|
||||||
while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa,
|
|
||||||
user_len)) {
|
|
||||||
read_unlock_irqrestore(&gpc->lock, flags);
|
|
||||||
|
|
||||||
/* When invoked from kvm_sched_out() we cannot sleep */
|
|
||||||
if (state == RUNSTATE_runnable)
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa, user_len))
|
|
||||||
return;
|
|
||||||
|
|
||||||
read_lock_irqsave(&gpc->lock, flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The only difference between 32-bit and 64-bit versions of the
|
|
||||||
* runstate struct us the alignment of uint64_t in 32-bit, which
|
|
||||||
* means that the 64-bit version has an additional 4 bytes of
|
|
||||||
* padding after the first field 'state'.
|
|
||||||
*
|
|
||||||
* So we use 'int __user *user_state' to point to the state field,
|
|
||||||
* and 'uint64_t __user *user_times' for runstate_entry_time. So
|
|
||||||
* the actual array of time[] in each state starts at user_times[1].
|
|
||||||
*/
|
|
||||||
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != 0);
|
|
||||||
BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state) != 0);
|
|
||||||
BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
|
|
||||||
#ifdef CONFIG_X86_64
|
|
||||||
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
|
|
||||||
offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4);
|
|
||||||
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) !=
|
|
||||||
offsetof(struct compat_vcpu_runstate_info, time) + 4);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
user_state = gpc->khva;
|
|
||||||
|
|
||||||
if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode)
|
|
||||||
user_times = gpc->khva + offsetof(struct vcpu_runstate_info,
|
|
||||||
state_entry_time);
|
|
||||||
else
|
|
||||||
user_times = gpc->khva + offsetof(struct compat_vcpu_runstate_info,
|
|
||||||
state_entry_time);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* First write the updated state_entry_time at the appropriate
|
|
||||||
* location determined by 'offset'.
|
|
||||||
*/
|
|
||||||
BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state_entry_time) !=
|
|
||||||
sizeof(user_times[0]));
|
|
||||||
BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) !=
|
|
||||||
sizeof(user_times[0]));
|
|
||||||
|
|
||||||
user_times[0] = vx->runstate_entry_time | XEN_RUNSTATE_UPDATE;
|
|
||||||
smp_wmb();
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Next, write the new runstate. This is in the *same* place
|
|
||||||
* for 32-bit and 64-bit guests, asserted here for paranoia.
|
|
||||||
*/
|
|
||||||
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
|
|
||||||
offsetof(struct compat_vcpu_runstate_info, state));
|
|
||||||
BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state) !=
|
|
||||||
sizeof(vx->current_runstate));
|
|
||||||
BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) !=
|
|
||||||
sizeof(vx->current_runstate));
|
|
||||||
|
|
||||||
*user_state = vx->current_runstate;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Write the actual runstate times immediately after the
|
|
||||||
* runstate_entry_time.
|
|
||||||
*/
|
|
||||||
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
|
|
||||||
offsetof(struct vcpu_runstate_info, time) - sizeof(u64));
|
|
||||||
BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) !=
|
|
||||||
offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64));
|
|
||||||
BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
|
|
||||||
sizeof_field(struct compat_vcpu_runstate_info, time));
|
|
||||||
BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
|
|
||||||
sizeof(vx->runstate_times));
|
|
||||||
|
|
||||||
memcpy(user_times + 1, vx->runstate_times, sizeof(vx->runstate_times));
|
|
||||||
smp_wmb();
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Finally, clear the XEN_RUNSTATE_UPDATE bit in the guest's
|
|
||||||
* runstate_entry_time field.
|
|
||||||
*/
|
|
||||||
user_times[0] &= ~XEN_RUNSTATE_UPDATE;
|
|
||||||
smp_wmb();
|
|
||||||
|
|
||||||
read_unlock_irqrestore(&gpc->lock, flags);
|
|
||||||
|
|
||||||
mark_page_dirty_in_slot(v->kvm, gpc->memslot, gpc->gpa >> PAGE_SHIFT);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v)
|
static void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v)
|
||||||
@ -584,23 +710,57 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
|
|||||||
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
|
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR:
|
case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR: {
|
||||||
|
size_t sz, sz1, sz2;
|
||||||
|
|
||||||
if (!sched_info_on()) {
|
if (!sched_info_on()) {
|
||||||
r = -EOPNOTSUPP;
|
r = -EOPNOTSUPP;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (data->u.gpa == GPA_INVALID) {
|
if (data->u.gpa == GPA_INVALID) {
|
||||||
|
r = 0;
|
||||||
|
deactivate_out:
|
||||||
kvm_gpc_deactivate(vcpu->kvm,
|
kvm_gpc_deactivate(vcpu->kvm,
|
||||||
&vcpu->arch.xen.runstate_cache);
|
&vcpu->arch.xen.runstate_cache);
|
||||||
r = 0;
|
kvm_gpc_deactivate(vcpu->kvm,
|
||||||
|
&vcpu->arch.xen.runstate2_cache);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate_cache,
|
/*
|
||||||
NULL, KVM_HOST_USES_PFN, data->u.gpa,
|
* If the guest switches to 64-bit mode after setting the runstate
|
||||||
sizeof(struct vcpu_runstate_info));
|
* address, that's actually OK. kvm_xen_update_runstate_guest()
|
||||||
break;
|
* will cope.
|
||||||
|
*/
|
||||||
|
if (IS_ENABLED(CONFIG_64BIT) && vcpu->kvm->arch.xen.long_mode)
|
||||||
|
sz = sizeof(struct vcpu_runstate_info);
|
||||||
|
else
|
||||||
|
sz = sizeof(struct compat_vcpu_runstate_info);
|
||||||
|
|
||||||
|
/* How much fits in the (first) page? */
|
||||||
|
sz1 = PAGE_SIZE - (data->u.gpa & ~PAGE_MASK);
|
||||||
|
r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate_cache,
|
||||||
|
NULL, KVM_HOST_USES_PFN, data->u.gpa, sz1);
|
||||||
|
if (r)
|
||||||
|
goto deactivate_out;
|
||||||
|
|
||||||
|
/* Either map the second page, or deactivate the second GPC */
|
||||||
|
if (sz1 >= sz) {
|
||||||
|
kvm_gpc_deactivate(vcpu->kvm,
|
||||||
|
&vcpu->arch.xen.runstate2_cache);
|
||||||
|
} else {
|
||||||
|
sz2 = sz - sz1;
|
||||||
|
BUG_ON((data->u.gpa + sz1) & ~PAGE_MASK);
|
||||||
|
r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate2_cache,
|
||||||
|
NULL, KVM_HOST_USES_PFN,
|
||||||
|
data->u.gpa + sz1, sz2);
|
||||||
|
if (r)
|
||||||
|
goto deactivate_out;
|
||||||
|
}
|
||||||
|
|
||||||
|
kvm_xen_update_runstate_guest(vcpu, false);
|
||||||
|
break;
|
||||||
|
}
|
||||||
case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
|
case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
|
||||||
if (!sched_info_on()) {
|
if (!sched_info_on()) {
|
||||||
r = -EOPNOTSUPP;
|
r = -EOPNOTSUPP;
|
||||||
@ -1834,6 +1994,7 @@ void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu)
|
|||||||
timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0);
|
timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0);
|
||||||
|
|
||||||
kvm_gpc_init(&vcpu->arch.xen.runstate_cache);
|
kvm_gpc_init(&vcpu->arch.xen.runstate_cache);
|
||||||
|
kvm_gpc_init(&vcpu->arch.xen.runstate2_cache);
|
||||||
kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache);
|
kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache);
|
||||||
kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache);
|
kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache);
|
||||||
}
|
}
|
||||||
@ -1844,6 +2005,7 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
|
|||||||
kvm_xen_stop_timer(vcpu);
|
kvm_xen_stop_timer(vcpu);
|
||||||
|
|
||||||
kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.runstate_cache);
|
kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.runstate_cache);
|
||||||
|
kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.runstate2_cache);
|
||||||
kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache);
|
kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache);
|
||||||
kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_time_info_cache);
|
kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_time_info_cache);
|
||||||
|
|
||||||
|
@ -143,11 +143,11 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu);
|
|||||||
#include <asm/xen/interface.h>
|
#include <asm/xen/interface.h>
|
||||||
#include <xen/interface/vcpu.h>
|
#include <xen/interface/vcpu.h>
|
||||||
|
|
||||||
void kvm_xen_update_runstate_guest(struct kvm_vcpu *vcpu, int state);
|
void kvm_xen_update_runstate(struct kvm_vcpu *vcpu, int state);
|
||||||
|
|
||||||
static inline void kvm_xen_runstate_set_running(struct kvm_vcpu *vcpu)
|
static inline void kvm_xen_runstate_set_running(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
kvm_xen_update_runstate_guest(vcpu, RUNSTATE_running);
|
kvm_xen_update_runstate(vcpu, RUNSTATE_running);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu)
|
static inline void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu)
|
||||||
@ -162,7 +162,7 @@ static inline void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu)
|
|||||||
if (WARN_ON_ONCE(!vcpu->preempted))
|
if (WARN_ON_ONCE(!vcpu->preempted))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable);
|
kvm_xen_update_runstate(vcpu, RUNSTATE_runnable);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 32-bit compatibility definitions, also used natively in 32-bit build */
|
/* 32-bit compatibility definitions, also used natively in 32-bit build */
|
||||||
|
@ -26,17 +26,17 @@
|
|||||||
#define SHINFO_REGION_GPA 0xc0000000ULL
|
#define SHINFO_REGION_GPA 0xc0000000ULL
|
||||||
#define SHINFO_REGION_SLOT 10
|
#define SHINFO_REGION_SLOT 10
|
||||||
|
|
||||||
#define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (2 * PAGE_SIZE))
|
#define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (3 * PAGE_SIZE))
|
||||||
#define DUMMY_REGION_SLOT 11
|
#define DUMMY_REGION_SLOT 11
|
||||||
|
|
||||||
#define SHINFO_ADDR (SHINFO_REGION_GPA)
|
#define SHINFO_ADDR (SHINFO_REGION_GPA)
|
||||||
#define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE)
|
|
||||||
#define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + 0x20)
|
|
||||||
#define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40)
|
#define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40)
|
||||||
|
#define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE)
|
||||||
|
#define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - 15)
|
||||||
|
|
||||||
#define SHINFO_VADDR (SHINFO_REGION_GVA)
|
#define SHINFO_VADDR (SHINFO_REGION_GVA)
|
||||||
#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + 0x20)
|
|
||||||
#define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40)
|
#define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40)
|
||||||
|
#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + PAGE_SIZE - 15)
|
||||||
|
|
||||||
#define EVTCHN_VECTOR 0x10
|
#define EVTCHN_VECTOR 0x10
|
||||||
|
|
||||||
@ -449,8 +449,8 @@ int main(int argc, char *argv[])
|
|||||||
|
|
||||||
/* Map a region for the shared_info page */
|
/* Map a region for the shared_info page */
|
||||||
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
|
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
|
||||||
SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0);
|
SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 3, 0);
|
||||||
virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2);
|
virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 3);
|
||||||
|
|
||||||
struct shared_info *shinfo = addr_gpa2hva(vm, SHINFO_VADDR);
|
struct shared_info *shinfo = addr_gpa2hva(vm, SHINFO_VADDR);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user