KVM: arm64: Infer the PA offset from IPA in stage-2 map walker
Until now, the page table walker counted increments to the PA and IPA of a walk in two separate places. While the PA is incremented as soon as a leaf PTE is installed in stage2_map_walker_try_leaf(), the IPA is actually bumped in the generic table walker context. Critically, __kvm_pgtable_visit() rereads the PTE after the LEAF callback returns to work out if a table or leaf was installed, and only bumps the IPA for a leaf PTE. This arrangement worked fine when we handled faults behind the write lock, as the walker had exclusive access to the stage-2 page tables. However, commit1577cb5823
("KVM: arm64: Handle stage-2 faults in parallel") started handling all stage-2 faults behind the read lock, opening up a race where a walker could increment the PA but not the IPA of a walk. Nothing good ensues, as the walker starts mapping with the incorrect IPA -> PA relationship. For example, assume that two vCPUs took a data abort on the same IPA. One observes that dirty logging is disabled, and the other observed that it is enabled: vCPU attempting PMD mapping vCPU attempting PTE mapping ====================================== ===================================== /* install PMD */ stage2_make_pte(ctx, leaf); data->phys += granule; /* replace PMD with a table */ stage2_try_break_pte(ctx, data->mmu); stage2_make_pte(ctx, table); /* table is observed */ ctx.old = READ_ONCE(*ptep); table = kvm_pte_table(ctx.old, level); /* * map walk continues w/o incrementing * IPA. */ __kvm_pgtable_walk(..., level + 1); Bring an end to the whole mess by using the IPA as the single source of truth for how far along a walk has gotten. Work out the correct PA to map by calculating the IPA offset from the beginning of the walk and add that to the starting physical address. Cc: stable@vger.kernel.org Fixes:1577cb5823
("KVM: arm64: Handle stage-2 faults in parallel") Signed-off-by: Oliver Upton <oliver.upton@linux.dev> Signed-off-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20230421071606.1603916-2-oliver.upton@linux.dev
This commit is contained in:
parent
197b6b60ae
commit
1f0f4a2ef7
@ -209,6 +209,7 @@ struct kvm_pgtable_visit_ctx {
|
||||
kvm_pte_t old;
|
||||
void *arg;
|
||||
struct kvm_pgtable_mm_ops *mm_ops;
|
||||
u64 start;
|
||||
u64 addr;
|
||||
u64 end;
|
||||
u32 level;
|
||||
|
@ -58,6 +58,7 @@
|
||||
struct kvm_pgtable_walk_data {
|
||||
struct kvm_pgtable_walker *walker;
|
||||
|
||||
u64 start;
|
||||
u64 addr;
|
||||
u64 end;
|
||||
};
|
||||
@ -201,6 +202,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
|
||||
.old = READ_ONCE(*ptep),
|
||||
.arg = data->walker->arg,
|
||||
.mm_ops = mm_ops,
|
||||
.start = data->start,
|
||||
.addr = data->addr,
|
||||
.end = data->end,
|
||||
.level = level,
|
||||
@ -293,6 +295,7 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
struct kvm_pgtable_walker *walker)
|
||||
{
|
||||
struct kvm_pgtable_walk_data walk_data = {
|
||||
.start = ALIGN_DOWN(addr, PAGE_SIZE),
|
||||
.addr = ALIGN_DOWN(addr, PAGE_SIZE),
|
||||
.end = PAGE_ALIGN(walk_data.addr + size),
|
||||
.walker = walker,
|
||||
@ -794,20 +797,43 @@ static bool stage2_pte_executable(kvm_pte_t pte)
|
||||
return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
|
||||
}
|
||||
|
||||
static u64 stage2_map_walker_phys_addr(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
const struct stage2_map_data *data)
|
||||
{
|
||||
u64 phys = data->phys;
|
||||
|
||||
/*
|
||||
* Stage-2 walks to update ownership data are communicated to the map
|
||||
* walker using an invalid PA. Avoid offsetting an already invalid PA,
|
||||
* which could overflow and make the address valid again.
|
||||
*/
|
||||
if (!kvm_phys_is_valid(phys))
|
||||
return phys;
|
||||
|
||||
/*
|
||||
* Otherwise, work out the correct PA based on how far the walk has
|
||||
* gotten.
|
||||
*/
|
||||
return phys + (ctx->addr - ctx->start);
|
||||
}
|
||||
|
||||
static bool stage2_leaf_mapping_allowed(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
struct stage2_map_data *data)
|
||||
{
|
||||
u64 phys = stage2_map_walker_phys_addr(ctx, data);
|
||||
|
||||
if (data->force_pte && (ctx->level < (KVM_PGTABLE_MAX_LEVELS - 1)))
|
||||
return false;
|
||||
|
||||
return kvm_block_mapping_supported(ctx, data->phys);
|
||||
return kvm_block_mapping_supported(ctx, phys);
|
||||
}
|
||||
|
||||
static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
struct stage2_map_data *data)
|
||||
{
|
||||
kvm_pte_t new;
|
||||
u64 granule = kvm_granule_size(ctx->level), phys = data->phys;
|
||||
u64 phys = stage2_map_walker_phys_addr(ctx, data);
|
||||
u64 granule = kvm_granule_size(ctx->level);
|
||||
struct kvm_pgtable *pgt = data->mmu->pgt;
|
||||
struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
|
||||
|
||||
@ -841,8 +867,6 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
|
||||
stage2_make_pte(ctx, new);
|
||||
|
||||
if (kvm_phys_is_valid(phys))
|
||||
data->phys += granule;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user