KVM/riscv changes for 6.10
- Support guest breakpoints using ebreak - Introduce per-VCPU mp_state_lock and reset_cntx_lock - Virtualize SBI PMU snapshot and counter overflow interrupts -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEZdn75s5e6LHDQ+f/rUjsVaLHLAcFAmYwgroACgkQrUjsVaLH LAfckxAAnCvW9Ahcy0GgM2EwTtYDoNkQp1A6Wkp/a3nXBvc3hXMnlyZQ4YkyJ1T3 BfQABCWEXWiDyEVpN9KUKtzUJi7WJz0MFuph5kvyZwMl53zddUNFqXpN4Hbb58/d dqjTJg7AnHbvirfhlHay/Rp+EaYsDq1E5GviDBi46yFkH/vB8IPpWdFLh3pD/+7f bmG5jeLos8zsWEwe3pAIC2hLDj0vFRRe2YJuXTZ9fvPzGBsPN9OHrtq0JbB3lRGt WRiYKPJiFjt2P3TjPkjh4N1Xmy8pJaEetu0Qwa1TR6I+ULs2ZcFzx9cw2VuoRQ2C uNhVx0o5ulAzJwGgX4U49ZTK4M7a5q6xf6zpqNFHbyy5tZylKJuBEWucuSyF1kTU RpjNinZ1PShzjx7HU+2gKPu+bmKHgfwKlr2Dp9Cx92IV9It3Wt1VEXWsjatciMfj EGYx+E9VcEOfX6INwX/TiO4ti7chLH/sFc+LhLqvw/1elhi83yAWbszjUmJ1Vrx1 k1eATN2Hehvw06Y72lc+PrD0sYUmJPcDMVk3MSh/cSC8OODmZ9vi32v8Ie2bjNS5 gHRLc05av1aX8yX+GRpUSPkCRL/XQ2J3jLG4uc3FmBMcWEhAtnIPsvXnCvV8f2mw aYrN+VF/FuRfumuYX6jWN6dwEwDO96AN425Rqu9MXik5KqSASXQ= =mGfY -----END PGP SIGNATURE----- Merge tag 'kvm-riscv-6.10-1' of https://github.com/kvm-riscv/linux into HEAD KVM/riscv changes for 6.10 - Support guest breakpoints using ebreak - Introduce per-VCPU mp_state_lock and reset_cntx_lock - Virtualize SBI PMU snapshot and counter overflow interrupts - New selftests for SBI PMU and Guest ebreak
This commit is contained in:
commit
aa24865fb5
2
Makefile
2
Makefile
@ -2,7 +2,7 @@
|
||||
VERSION = 6
|
||||
PATCHLEVEL = 9
|
||||
SUBLEVEL = 0
|
||||
EXTRAVERSION = -rc2
|
||||
EXTRAVERSION = -rc3
|
||||
NAME = Hurr durr I'ma ninja sloth
|
||||
|
||||
# *DOCUMENTATION*
|
||||
|
@ -4,7 +4,6 @@
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include <asm/page.h>
|
||||
#include <asm/vdso/timebase.h>
|
||||
#include <asm/barrier.h>
|
||||
#include <asm/unistd.h>
|
||||
@ -95,7 +94,7 @@ const struct vdso_data *__arch_get_vdso_data(void);
|
||||
static __always_inline
|
||||
const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
|
||||
{
|
||||
return (void *)vd + PAGE_SIZE;
|
||||
return (void *)vd + (1U << CONFIG_PAGE_SHIFT);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -168,7 +168,8 @@
|
||||
#define VSIP_TO_HVIP_SHIFT (IRQ_VS_SOFT - IRQ_S_SOFT)
|
||||
#define VSIP_VALID_MASK ((_AC(1, UL) << IRQ_S_SOFT) | \
|
||||
(_AC(1, UL) << IRQ_S_TIMER) | \
|
||||
(_AC(1, UL) << IRQ_S_EXT))
|
||||
(_AC(1, UL) << IRQ_S_EXT) | \
|
||||
(_AC(1, UL) << IRQ_PMU_OVF))
|
||||
|
||||
/* AIA CSR bits */
|
||||
#define TOPI_IID_SHIFT 16
|
||||
@ -281,7 +282,7 @@
|
||||
#define CSR_HPMCOUNTER30H 0xc9e
|
||||
#define CSR_HPMCOUNTER31H 0xc9f
|
||||
|
||||
#define CSR_SSCOUNTOVF 0xda0
|
||||
#define CSR_SCOUNTOVF 0xda0
|
||||
|
||||
#define CSR_SSTATUS 0x100
|
||||
#define CSR_SIE 0x104
|
||||
|
@ -43,6 +43,17 @@
|
||||
KVM_ARCH_REQ_FLAGS(5, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
|
||||
#define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(6)
|
||||
|
||||
#define KVM_HEDELEG_DEFAULT (BIT(EXC_INST_MISALIGNED) | \
|
||||
BIT(EXC_BREAKPOINT) | \
|
||||
BIT(EXC_SYSCALL) | \
|
||||
BIT(EXC_INST_PAGE_FAULT) | \
|
||||
BIT(EXC_LOAD_PAGE_FAULT) | \
|
||||
BIT(EXC_STORE_PAGE_FAULT))
|
||||
|
||||
#define KVM_HIDELEG_DEFAULT (BIT(IRQ_VS_SOFT) | \
|
||||
BIT(IRQ_VS_TIMER) | \
|
||||
BIT(IRQ_VS_EXT))
|
||||
|
||||
enum kvm_riscv_hfence_type {
|
||||
KVM_RISCV_HFENCE_UNKNOWN = 0,
|
||||
KVM_RISCV_HFENCE_GVMA_VMID_GPA,
|
||||
@ -169,6 +180,7 @@ struct kvm_vcpu_csr {
|
||||
struct kvm_vcpu_config {
|
||||
u64 henvcfg;
|
||||
u64 hstateen0;
|
||||
unsigned long hedeleg;
|
||||
};
|
||||
|
||||
struct kvm_vcpu_smstateen_csr {
|
||||
@ -211,6 +223,7 @@ struct kvm_vcpu_arch {
|
||||
|
||||
/* CPU context upon Guest VCPU reset */
|
||||
struct kvm_cpu_context guest_reset_context;
|
||||
spinlock_t reset_cntx_lock;
|
||||
|
||||
/* CPU CSR context upon Guest VCPU reset */
|
||||
struct kvm_vcpu_csr guest_reset_csr;
|
||||
@ -252,8 +265,9 @@ struct kvm_vcpu_arch {
|
||||
/* Cache pages needed to program page tables with spinlock held */
|
||||
struct kvm_mmu_memory_cache mmu_page_cache;
|
||||
|
||||
/* VCPU power-off state */
|
||||
bool power_off;
|
||||
/* VCPU power state */
|
||||
struct kvm_mp_state mp_state;
|
||||
spinlock_t mp_state_lock;
|
||||
|
||||
/* Don't run the VCPU (blocked) */
|
||||
bool pause;
|
||||
@ -374,8 +388,11 @@ int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
|
||||
void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu);
|
||||
void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu);
|
||||
bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask);
|
||||
void __kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu);
|
||||
void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu);
|
||||
void __kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu);
|
||||
void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu);
|
||||
bool kvm_riscv_vcpu_stopped(struct kvm_vcpu *vcpu);
|
||||
|
||||
void kvm_riscv_vcpu_sbi_sta_reset(struct kvm_vcpu *vcpu);
|
||||
void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu);
|
||||
|
@ -20,7 +20,7 @@ static_assert(RISCV_KVM_MAX_COUNTERS <= 64);
|
||||
|
||||
struct kvm_fw_event {
|
||||
/* Current value of the event */
|
||||
unsigned long value;
|
||||
u64 value;
|
||||
|
||||
/* Event monitoring status */
|
||||
bool started;
|
||||
@ -36,6 +36,7 @@ struct kvm_pmc {
|
||||
bool started;
|
||||
/* Monitoring event ID */
|
||||
unsigned long event_idx;
|
||||
struct kvm_vcpu *vcpu;
|
||||
};
|
||||
|
||||
/* PMU data structure per vcpu */
|
||||
@ -50,6 +51,12 @@ struct kvm_pmu {
|
||||
bool init_done;
|
||||
/* Bit map of all the virtual counter used */
|
||||
DECLARE_BITMAP(pmc_in_use, RISCV_KVM_MAX_COUNTERS);
|
||||
/* Bit map of all the virtual counter overflown */
|
||||
DECLARE_BITMAP(pmc_overflown, RISCV_KVM_MAX_COUNTERS);
|
||||
/* The address of the counter snapshot area (guest physical address) */
|
||||
gpa_t snapshot_addr;
|
||||
/* The actual data of the snapshot */
|
||||
struct riscv_pmu_snapshot_data *sdata;
|
||||
};
|
||||
|
||||
#define vcpu_to_pmu(vcpu) (&(vcpu)->arch.pmu_context)
|
||||
@ -82,9 +89,14 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba
|
||||
unsigned long ctr_mask, unsigned long flags,
|
||||
unsigned long eidx, u64 evtdata,
|
||||
struct kvm_vcpu_sbi_return *retdata);
|
||||
int kvm_riscv_vcpu_pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
|
||||
int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
|
||||
struct kvm_vcpu_sbi_return *retdata);
|
||||
int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
|
||||
struct kvm_vcpu_sbi_return *retdata);
|
||||
void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu);
|
||||
int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low,
|
||||
unsigned long saddr_high, unsigned long flags,
|
||||
struct kvm_vcpu_sbi_return *retdata);
|
||||
void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu);
|
||||
void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu);
|
||||
|
||||
|
@ -131,6 +131,8 @@ enum sbi_ext_pmu_fid {
|
||||
SBI_EXT_PMU_COUNTER_START,
|
||||
SBI_EXT_PMU_COUNTER_STOP,
|
||||
SBI_EXT_PMU_COUNTER_FW_READ,
|
||||
SBI_EXT_PMU_COUNTER_FW_READ_HI,
|
||||
SBI_EXT_PMU_SNAPSHOT_SET_SHMEM,
|
||||
};
|
||||
|
||||
union sbi_pmu_ctr_info {
|
||||
@ -147,6 +149,13 @@ union sbi_pmu_ctr_info {
|
||||
};
|
||||
};
|
||||
|
||||
/* Data structure to contain the pmu snapshot data */
|
||||
struct riscv_pmu_snapshot_data {
|
||||
u64 ctr_overflow_mask;
|
||||
u64 ctr_values[64];
|
||||
u64 reserved[447];
|
||||
};
|
||||
|
||||
#define RISCV_PMU_RAW_EVENT_MASK GENMASK_ULL(47, 0)
|
||||
#define RISCV_PMU_RAW_EVENT_IDX 0x20000
|
||||
|
||||
@ -232,20 +241,22 @@ enum sbi_pmu_ctr_type {
|
||||
#define SBI_PMU_EVENT_IDX_INVALID 0xFFFFFFFF
|
||||
|
||||
/* Flags defined for config matching function */
|
||||
#define SBI_PMU_CFG_FLAG_SKIP_MATCH (1 << 0)
|
||||
#define SBI_PMU_CFG_FLAG_CLEAR_VALUE (1 << 1)
|
||||
#define SBI_PMU_CFG_FLAG_AUTO_START (1 << 2)
|
||||
#define SBI_PMU_CFG_FLAG_SET_VUINH (1 << 3)
|
||||
#define SBI_PMU_CFG_FLAG_SET_VSINH (1 << 4)
|
||||
#define SBI_PMU_CFG_FLAG_SET_UINH (1 << 5)
|
||||
#define SBI_PMU_CFG_FLAG_SET_SINH (1 << 6)
|
||||
#define SBI_PMU_CFG_FLAG_SET_MINH (1 << 7)
|
||||
#define SBI_PMU_CFG_FLAG_SKIP_MATCH BIT(0)
|
||||
#define SBI_PMU_CFG_FLAG_CLEAR_VALUE BIT(1)
|
||||
#define SBI_PMU_CFG_FLAG_AUTO_START BIT(2)
|
||||
#define SBI_PMU_CFG_FLAG_SET_VUINH BIT(3)
|
||||
#define SBI_PMU_CFG_FLAG_SET_VSINH BIT(4)
|
||||
#define SBI_PMU_CFG_FLAG_SET_UINH BIT(5)
|
||||
#define SBI_PMU_CFG_FLAG_SET_SINH BIT(6)
|
||||
#define SBI_PMU_CFG_FLAG_SET_MINH BIT(7)
|
||||
|
||||
/* Flags defined for counter start function */
|
||||
#define SBI_PMU_START_FLAG_SET_INIT_VALUE (1 << 0)
|
||||
#define SBI_PMU_START_FLAG_SET_INIT_VALUE BIT(0)
|
||||
#define SBI_PMU_START_FLAG_INIT_SNAPSHOT BIT(1)
|
||||
|
||||
/* Flags defined for counter stop function */
|
||||
#define SBI_PMU_STOP_FLAG_RESET (1 << 0)
|
||||
#define SBI_PMU_STOP_FLAG_RESET BIT(0)
|
||||
#define SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT BIT(1)
|
||||
|
||||
enum sbi_ext_dbcn_fid {
|
||||
SBI_EXT_DBCN_CONSOLE_WRITE = 0,
|
||||
@ -266,7 +277,7 @@ struct sbi_sta_struct {
|
||||
u8 pad[47];
|
||||
} __packed;
|
||||
|
||||
#define SBI_STA_SHMEM_DISABLE -1
|
||||
#define SBI_SHMEM_DISABLE -1
|
||||
|
||||
/* SBI spec version fields */
|
||||
#define SBI_SPEC_VERSION_DEFAULT 0x1
|
||||
@ -284,6 +295,7 @@ struct sbi_sta_struct {
|
||||
#define SBI_ERR_ALREADY_AVAILABLE -6
|
||||
#define SBI_ERR_ALREADY_STARTED -7
|
||||
#define SBI_ERR_ALREADY_STOPPED -8
|
||||
#define SBI_ERR_NO_SHMEM -9
|
||||
|
||||
extern unsigned long sbi_spec_version;
|
||||
struct sbiret {
|
||||
@ -355,8 +367,8 @@ static inline unsigned long sbi_minor_version(void)
|
||||
static inline unsigned long sbi_mk_version(unsigned long major,
|
||||
unsigned long minor)
|
||||
{
|
||||
return ((major & SBI_SPEC_VERSION_MAJOR_MASK) <<
|
||||
SBI_SPEC_VERSION_MAJOR_SHIFT) | minor;
|
||||
return ((major & SBI_SPEC_VERSION_MAJOR_MASK) << SBI_SPEC_VERSION_MAJOR_SHIFT)
|
||||
| (minor & SBI_SPEC_VERSION_MINOR_MASK);
|
||||
}
|
||||
|
||||
int sbi_err_map_linux_errno(int err);
|
||||
|
@ -167,6 +167,7 @@ enum KVM_RISCV_ISA_EXT_ID {
|
||||
KVM_RISCV_ISA_EXT_ZFA,
|
||||
KVM_RISCV_ISA_EXT_ZTSO,
|
||||
KVM_RISCV_ISA_EXT_ZACAS,
|
||||
KVM_RISCV_ISA_EXT_SSCOFPMF,
|
||||
KVM_RISCV_ISA_EXT_MAX,
|
||||
};
|
||||
|
||||
|
@ -62,7 +62,7 @@ static int sbi_sta_steal_time_set_shmem(unsigned long lo, unsigned long hi,
|
||||
ret = sbi_ecall(SBI_EXT_STA, SBI_EXT_STA_STEAL_TIME_SET_SHMEM,
|
||||
lo, hi, flags, 0, 0, 0);
|
||||
if (ret.error) {
|
||||
if (lo == SBI_STA_SHMEM_DISABLE && hi == SBI_STA_SHMEM_DISABLE)
|
||||
if (lo == SBI_SHMEM_DISABLE && hi == SBI_SHMEM_DISABLE)
|
||||
pr_warn("Failed to disable steal-time shmem");
|
||||
else
|
||||
pr_warn("Failed to set steal-time shmem");
|
||||
@ -84,8 +84,8 @@ static int pv_time_cpu_online(unsigned int cpu)
|
||||
|
||||
static int pv_time_cpu_down_prepare(unsigned int cpu)
|
||||
{
|
||||
return sbi_sta_steal_time_set_shmem(SBI_STA_SHMEM_DISABLE,
|
||||
SBI_STA_SHMEM_DISABLE, 0);
|
||||
return sbi_sta_steal_time_set_shmem(SBI_SHMEM_DISABLE,
|
||||
SBI_SHMEM_DISABLE, 0);
|
||||
}
|
||||
|
||||
static u64 pv_time_steal_clock(int cpu)
|
||||
|
@ -545,6 +545,9 @@ void kvm_riscv_aia_enable(void)
|
||||
enable_percpu_irq(hgei_parent_irq,
|
||||
irq_get_trigger_type(hgei_parent_irq));
|
||||
csr_set(CSR_HIE, BIT(IRQ_S_GEXT));
|
||||
/* Enable IRQ filtering for overflow interrupt only if sscofpmf is present */
|
||||
if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSCOFPMF))
|
||||
csr_write(CSR_HVIEN, BIT(IRQ_PMU_OVF));
|
||||
}
|
||||
|
||||
void kvm_riscv_aia_disable(void)
|
||||
@ -558,6 +561,8 @@ void kvm_riscv_aia_disable(void)
|
||||
return;
|
||||
hgctrl = get_cpu_ptr(&aia_hgei);
|
||||
|
||||
if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSCOFPMF))
|
||||
csr_clear(CSR_HVIEN, BIT(IRQ_PMU_OVF));
|
||||
/* Disable per-CPU SGEI interrupt */
|
||||
csr_clear(CSR_HIE, BIT(IRQ_S_GEXT));
|
||||
disable_percpu_irq(hgei_parent_irq);
|
||||
|
@ -22,22 +22,8 @@ long kvm_arch_dev_ioctl(struct file *filp,
|
||||
|
||||
int kvm_arch_hardware_enable(void)
|
||||
{
|
||||
unsigned long hideleg, hedeleg;
|
||||
|
||||
hedeleg = 0;
|
||||
hedeleg |= (1UL << EXC_INST_MISALIGNED);
|
||||
hedeleg |= (1UL << EXC_BREAKPOINT);
|
||||
hedeleg |= (1UL << EXC_SYSCALL);
|
||||
hedeleg |= (1UL << EXC_INST_PAGE_FAULT);
|
||||
hedeleg |= (1UL << EXC_LOAD_PAGE_FAULT);
|
||||
hedeleg |= (1UL << EXC_STORE_PAGE_FAULT);
|
||||
csr_write(CSR_HEDELEG, hedeleg);
|
||||
|
||||
hideleg = 0;
|
||||
hideleg |= (1UL << IRQ_VS_SOFT);
|
||||
hideleg |= (1UL << IRQ_VS_TIMER);
|
||||
hideleg |= (1UL << IRQ_VS_EXT);
|
||||
csr_write(CSR_HIDELEG, hideleg);
|
||||
csr_write(CSR_HEDELEG, KVM_HEDELEG_DEFAULT);
|
||||
csr_write(CSR_HIDELEG, KVM_HIDELEG_DEFAULT);
|
||||
|
||||
/* VS should access only the time counter directly. Everything else should trap */
|
||||
csr_write(CSR_HCOUNTEREN, 0x02);
|
||||
|
@ -64,7 +64,9 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
|
||||
|
||||
memcpy(csr, reset_csr, sizeof(*csr));
|
||||
|
||||
spin_lock(&vcpu->arch.reset_cntx_lock);
|
||||
memcpy(cntx, reset_cntx, sizeof(*cntx));
|
||||
spin_unlock(&vcpu->arch.reset_cntx_lock);
|
||||
|
||||
kvm_riscv_vcpu_fp_reset(vcpu);
|
||||
|
||||
@ -102,6 +104,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
|
||||
struct kvm_cpu_context *cntx;
|
||||
struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
|
||||
|
||||
spin_lock_init(&vcpu->arch.mp_state_lock);
|
||||
|
||||
/* Mark this VCPU never ran */
|
||||
vcpu->arch.ran_atleast_once = false;
|
||||
vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
|
||||
@ -119,12 +123,16 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
|
||||
spin_lock_init(&vcpu->arch.hfence_lock);
|
||||
|
||||
/* Setup reset state of shadow SSTATUS and HSTATUS CSRs */
|
||||
spin_lock_init(&vcpu->arch.reset_cntx_lock);
|
||||
|
||||
spin_lock(&vcpu->arch.reset_cntx_lock);
|
||||
cntx = &vcpu->arch.guest_reset_context;
|
||||
cntx->sstatus = SR_SPP | SR_SPIE;
|
||||
cntx->hstatus = 0;
|
||||
cntx->hstatus |= HSTATUS_VTW;
|
||||
cntx->hstatus |= HSTATUS_SPVP;
|
||||
cntx->hstatus |= HSTATUS_SPV;
|
||||
spin_unlock(&vcpu->arch.reset_cntx_lock);
|
||||
|
||||
if (kvm_riscv_vcpu_alloc_vector_context(vcpu, cntx))
|
||||
return -ENOMEM;
|
||||
@ -201,7 +209,7 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
|
||||
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) &&
|
||||
!vcpu->arch.power_off && !vcpu->arch.pause);
|
||||
!kvm_riscv_vcpu_stopped(vcpu) && !vcpu->arch.pause);
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
|
||||
@ -365,6 +373,13 @@ void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
}
|
||||
|
||||
/* Sync up the HVIP.LCOFIP bit changes (only clear) by the guest */
|
||||
if ((csr->hvip ^ hvip) & (1UL << IRQ_PMU_OVF)) {
|
||||
if (!(hvip & (1UL << IRQ_PMU_OVF)) &&
|
||||
!test_and_set_bit(IRQ_PMU_OVF, v->irqs_pending_mask))
|
||||
clear_bit(IRQ_PMU_OVF, v->irqs_pending);
|
||||
}
|
||||
|
||||
/* Sync-up AIA high interrupts */
|
||||
kvm_riscv_vcpu_aia_sync_interrupts(vcpu);
|
||||
|
||||
@ -382,7 +397,8 @@ int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
|
||||
if (irq < IRQ_LOCAL_MAX &&
|
||||
irq != IRQ_VS_SOFT &&
|
||||
irq != IRQ_VS_TIMER &&
|
||||
irq != IRQ_VS_EXT)
|
||||
irq != IRQ_VS_EXT &&
|
||||
irq != IRQ_PMU_OVF)
|
||||
return -EINVAL;
|
||||
|
||||
set_bit(irq, vcpu->arch.irqs_pending);
|
||||
@ -397,14 +413,15 @@ int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
|
||||
int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
|
||||
{
|
||||
/*
|
||||
* We only allow VS-mode software, timer, and external
|
||||
* We only allow VS-mode software, timer, counter overflow and external
|
||||
* interrupts when irq is one of the local interrupts
|
||||
* defined by RISC-V privilege specification.
|
||||
*/
|
||||
if (irq < IRQ_LOCAL_MAX &&
|
||||
irq != IRQ_VS_SOFT &&
|
||||
irq != IRQ_VS_TIMER &&
|
||||
irq != IRQ_VS_EXT)
|
||||
irq != IRQ_VS_EXT &&
|
||||
irq != IRQ_PMU_OVF)
|
||||
return -EINVAL;
|
||||
|
||||
clear_bit(irq, vcpu->arch.irqs_pending);
|
||||
@ -429,26 +446,42 @@ bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask)
|
||||
return kvm_riscv_vcpu_aia_has_interrupts(vcpu, mask);
|
||||
}
|
||||
|
||||
void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
|
||||
void __kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.power_off = true;
|
||||
WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED);
|
||||
kvm_make_request(KVM_REQ_SLEEP, vcpu);
|
||||
kvm_vcpu_kick(vcpu);
|
||||
}
|
||||
|
||||
void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
spin_lock(&vcpu->arch.mp_state_lock);
|
||||
__kvm_riscv_vcpu_power_off(vcpu);
|
||||
spin_unlock(&vcpu->arch.mp_state_lock);
|
||||
}
|
||||
|
||||
void __kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE);
|
||||
kvm_vcpu_wake_up(vcpu);
|
||||
}
|
||||
|
||||
void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.power_off = false;
|
||||
kvm_vcpu_wake_up(vcpu);
|
||||
spin_lock(&vcpu->arch.mp_state_lock);
|
||||
__kvm_riscv_vcpu_power_on(vcpu);
|
||||
spin_unlock(&vcpu->arch.mp_state_lock);
|
||||
}
|
||||
|
||||
bool kvm_riscv_vcpu_stopped(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_STOPPED;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
|
||||
struct kvm_mp_state *mp_state)
|
||||
{
|
||||
if (vcpu->arch.power_off)
|
||||
mp_state->mp_state = KVM_MP_STATE_STOPPED;
|
||||
else
|
||||
mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
|
||||
*mp_state = READ_ONCE(vcpu->arch.mp_state);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -458,25 +491,36 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
spin_lock(&vcpu->arch.mp_state_lock);
|
||||
|
||||
switch (mp_state->mp_state) {
|
||||
case KVM_MP_STATE_RUNNABLE:
|
||||
vcpu->arch.power_off = false;
|
||||
WRITE_ONCE(vcpu->arch.mp_state, *mp_state);
|
||||
break;
|
||||
case KVM_MP_STATE_STOPPED:
|
||||
kvm_riscv_vcpu_power_off(vcpu);
|
||||
__kvm_riscv_vcpu_power_off(vcpu);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
spin_unlock(&vcpu->arch.mp_state_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
|
||||
struct kvm_guest_debug *dbg)
|
||||
{
|
||||
/* TODO; To be implemented later. */
|
||||
return -EINVAL;
|
||||
if (dbg->control & KVM_GUESTDBG_ENABLE) {
|
||||
vcpu->guest_debug = dbg->control;
|
||||
vcpu->arch.cfg.hedeleg &= ~BIT(EXC_BREAKPOINT);
|
||||
} else {
|
||||
vcpu->guest_debug = 0;
|
||||
vcpu->arch.cfg.hedeleg |= BIT(EXC_BREAKPOINT);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu)
|
||||
@ -505,6 +549,10 @@ static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu)
|
||||
if (riscv_isa_extension_available(isa, SMSTATEEN))
|
||||
cfg->hstateen0 |= SMSTATEEN0_SSTATEEN0;
|
||||
}
|
||||
|
||||
cfg->hedeleg = KVM_HEDELEG_DEFAULT;
|
||||
if (vcpu->guest_debug)
|
||||
cfg->hedeleg &= ~BIT(EXC_BREAKPOINT);
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
@ -519,6 +567,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
csr_write(CSR_VSEPC, csr->vsepc);
|
||||
csr_write(CSR_VSCAUSE, csr->vscause);
|
||||
csr_write(CSR_VSTVAL, csr->vstval);
|
||||
csr_write(CSR_HEDELEG, cfg->hedeleg);
|
||||
csr_write(CSR_HVIP, csr->hvip);
|
||||
csr_write(CSR_VSATP, csr->vsatp);
|
||||
csr_write(CSR_HENVCFG, cfg->henvcfg);
|
||||
@ -584,11 +633,11 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
|
||||
if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) {
|
||||
kvm_vcpu_srcu_read_unlock(vcpu);
|
||||
rcuwait_wait_event(wait,
|
||||
(!vcpu->arch.power_off) && (!vcpu->arch.pause),
|
||||
(!kvm_riscv_vcpu_stopped(vcpu)) && (!vcpu->arch.pause),
|
||||
TASK_INTERRUPTIBLE);
|
||||
kvm_vcpu_srcu_read_lock(vcpu);
|
||||
|
||||
if (vcpu->arch.power_off || vcpu->arch.pause) {
|
||||
if (kvm_riscv_vcpu_stopped(vcpu) || vcpu->arch.pause) {
|
||||
/*
|
||||
* Awaken to handle a signal, request to
|
||||
* sleep again later.
|
||||
|
@ -204,6 +204,10 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV)
|
||||
ret = kvm_riscv_vcpu_sbi_ecall(vcpu, run);
|
||||
break;
|
||||
case EXC_BREAKPOINT:
|
||||
run->exit_reason = KVM_EXIT_DEBUG;
|
||||
ret = 0;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -36,6 +36,7 @@ static const unsigned long kvm_isa_ext_arr[] = {
|
||||
/* Multi letter extensions (alphabetically sorted) */
|
||||
KVM_ISA_EXT_ARR(SMSTATEEN),
|
||||
KVM_ISA_EXT_ARR(SSAIA),
|
||||
KVM_ISA_EXT_ARR(SSCOFPMF),
|
||||
KVM_ISA_EXT_ARR(SSTC),
|
||||
KVM_ISA_EXT_ARR(SVINVAL),
|
||||
KVM_ISA_EXT_ARR(SVNAPOT),
|
||||
@ -99,6 +100,9 @@ static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext)
|
||||
switch (ext) {
|
||||
case KVM_RISCV_ISA_EXT_H:
|
||||
return false;
|
||||
case KVM_RISCV_ISA_EXT_SSCOFPMF:
|
||||
/* Sscofpmf depends on interrupt filtering defined in ssaia */
|
||||
return __riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSAIA);
|
||||
case KVM_RISCV_ISA_EXT_V:
|
||||
return riscv_v_vstate_ctrl_user_allowed();
|
||||
default:
|
||||
@ -116,6 +120,8 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
|
||||
case KVM_RISCV_ISA_EXT_C:
|
||||
case KVM_RISCV_ISA_EXT_I:
|
||||
case KVM_RISCV_ISA_EXT_M:
|
||||
/* There is not architectural config bit to disable sscofpmf completely */
|
||||
case KVM_RISCV_ISA_EXT_SSCOFPMF:
|
||||
case KVM_RISCV_ISA_EXT_SSTC:
|
||||
case KVM_RISCV_ISA_EXT_SVINVAL:
|
||||
case KVM_RISCV_ISA_EXT_SVNAPOT:
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include <asm/csr.h>
|
||||
#include <asm/kvm_vcpu_sbi.h>
|
||||
#include <asm/kvm_vcpu_pmu.h>
|
||||
#include <asm/sbi.h>
|
||||
#include <linux/bitops.h>
|
||||
|
||||
#define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs)
|
||||
@ -39,7 +40,7 @@ static u64 kvm_pmu_get_sample_period(struct kvm_pmc *pmc)
|
||||
u64 sample_period;
|
||||
|
||||
if (!pmc->counter_val)
|
||||
sample_period = counter_val_mask + 1;
|
||||
sample_period = counter_val_mask;
|
||||
else
|
||||
sample_period = (-pmc->counter_val) & counter_val_mask;
|
||||
|
||||
@ -196,6 +197,36 @@ static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx,
|
||||
return kvm_pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask);
|
||||
}
|
||||
|
||||
static int pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
|
||||
unsigned long *out_val)
|
||||
{
|
||||
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
|
||||
struct kvm_pmc *pmc;
|
||||
int fevent_code;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_32BIT)) {
|
||||
pr_warn("%s: should be invoked for only RV32\n", __func__);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
|
||||
pr_warn("Invalid counter id [%ld]during read\n", cidx);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
pmc = &kvpmu->pmc[cidx];
|
||||
|
||||
if (pmc->cinfo.type != SBI_PMU_CTR_TYPE_FW)
|
||||
return -EINVAL;
|
||||
|
||||
fevent_code = get_event_code(pmc->event_idx);
|
||||
pmc->counter_val = kvpmu->fw_event[fevent_code].value;
|
||||
|
||||
*out_val = pmc->counter_val >> 32;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
|
||||
unsigned long *out_val)
|
||||
{
|
||||
@ -204,6 +235,11 @@ static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
|
||||
u64 enabled, running;
|
||||
int fevent_code;
|
||||
|
||||
if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
|
||||
pr_warn("Invalid counter id [%ld] during read\n", cidx);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
pmc = &kvpmu->pmc[cidx];
|
||||
|
||||
if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
|
||||
@ -229,8 +265,50 @@ static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ct
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr,
|
||||
unsigned long flags, unsigned long eidx, unsigned long evtdata)
|
||||
static void kvm_riscv_pmu_overflow(struct perf_event *perf_event,
|
||||
struct perf_sample_data *data,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
struct kvm_pmc *pmc = perf_event->overflow_handler_context;
|
||||
struct kvm_vcpu *vcpu = pmc->vcpu;
|
||||
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
|
||||
struct riscv_pmu *rpmu = to_riscv_pmu(perf_event->pmu);
|
||||
u64 period;
|
||||
|
||||
/*
|
||||
* Stop the event counting by directly accessing the perf_event.
|
||||
* Otherwise, this needs to deferred via a workqueue.
|
||||
* That will introduce skew in the counter value because the actual
|
||||
* physical counter would start after returning from this function.
|
||||
* It will be stopped again once the workqueue is scheduled
|
||||
*/
|
||||
rpmu->pmu.stop(perf_event, PERF_EF_UPDATE);
|
||||
|
||||
/*
|
||||
* The hw counter would start automatically when this function returns.
|
||||
* Thus, the host may continue to interrupt and inject it to the guest
|
||||
* even without the guest configuring the next event. Depending on the hardware
|
||||
* the host may have some sluggishness only if privilege mode filtering is not
|
||||
* available. In an ideal world, where qemu is not the only capable hardware,
|
||||
* this can be removed.
|
||||
* FYI: ARM64 does this way while x86 doesn't do anything as such.
|
||||
* TODO: Should we keep it for RISC-V ?
|
||||
*/
|
||||
period = -(local64_read(&perf_event->count));
|
||||
|
||||
local64_set(&perf_event->hw.period_left, 0);
|
||||
perf_event->attr.sample_period = period;
|
||||
perf_event->hw.sample_period = period;
|
||||
|
||||
set_bit(pmc->idx, kvpmu->pmc_overflown);
|
||||
kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_PMU_OVF);
|
||||
|
||||
rpmu->pmu.start(perf_event, PERF_EF_RELOAD);
|
||||
}
|
||||
|
||||
static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr,
|
||||
unsigned long flags, unsigned long eidx,
|
||||
unsigned long evtdata)
|
||||
{
|
||||
struct perf_event *event;
|
||||
|
||||
@ -247,7 +325,7 @@ static int kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr
|
||||
*/
|
||||
attr->sample_period = kvm_pmu_get_sample_period(pmc);
|
||||
|
||||
event = perf_event_create_kernel_counter(attr, -1, current, NULL, pmc);
|
||||
event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc);
|
||||
if (IS_ERR(event)) {
|
||||
pr_err("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event));
|
||||
return PTR_ERR(event);
|
||||
@ -310,6 +388,80 @@ int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
|
||||
int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data);
|
||||
|
||||
if (kvpmu->sdata) {
|
||||
if (kvpmu->snapshot_addr != INVALID_GPA) {
|
||||
memset(kvpmu->sdata, 0, snapshot_area_size);
|
||||
kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr,
|
||||
kvpmu->sdata, snapshot_area_size);
|
||||
} else {
|
||||
pr_warn("snapshot address invalid\n");
|
||||
}
|
||||
kfree(kvpmu->sdata);
|
||||
kvpmu->sdata = NULL;
|
||||
}
|
||||
kvpmu->snapshot_addr = INVALID_GPA;
|
||||
}
|
||||
|
||||
int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low,
|
||||
unsigned long saddr_high, unsigned long flags,
|
||||
struct kvm_vcpu_sbi_return *retdata)
|
||||
{
|
||||
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
|
||||
int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data);
|
||||
int sbiret = 0;
|
||||
gpa_t saddr;
|
||||
unsigned long hva;
|
||||
bool writable;
|
||||
|
||||
if (!kvpmu || flags) {
|
||||
sbiret = SBI_ERR_INVALID_PARAM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (saddr_low == SBI_SHMEM_DISABLE && saddr_high == SBI_SHMEM_DISABLE) {
|
||||
kvm_pmu_clear_snapshot_area(vcpu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
saddr = saddr_low;
|
||||
|
||||
if (saddr_high != 0) {
|
||||
if (IS_ENABLED(CONFIG_32BIT))
|
||||
saddr |= ((gpa_t)saddr_high << 32);
|
||||
else
|
||||
sbiret = SBI_ERR_INVALID_ADDRESS;
|
||||
goto out;
|
||||
}
|
||||
|
||||
hva = kvm_vcpu_gfn_to_hva_prot(vcpu, saddr >> PAGE_SHIFT, &writable);
|
||||
if (kvm_is_error_hva(hva) || !writable) {
|
||||
sbiret = SBI_ERR_INVALID_ADDRESS;
|
||||
goto out;
|
||||
}
|
||||
|
||||
kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC);
|
||||
if (!kvpmu->sdata)
|
||||
return -ENOMEM;
|
||||
|
||||
if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) {
|
||||
kfree(kvpmu->sdata);
|
||||
sbiret = SBI_ERR_FAILURE;
|
||||
goto out;
|
||||
}
|
||||
|
||||
kvpmu->snapshot_addr = saddr;
|
||||
|
||||
out:
|
||||
retdata->err_val = sbiret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu,
|
||||
struct kvm_vcpu_sbi_return *retdata)
|
||||
{
|
||||
@ -343,20 +495,40 @@ int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base,
|
||||
int i, pmc_index, sbiret = 0;
|
||||
struct kvm_pmc *pmc;
|
||||
int fevent_code;
|
||||
bool snap_flag_set = flags & SBI_PMU_START_FLAG_INIT_SNAPSHOT;
|
||||
|
||||
if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
|
||||
sbiret = SBI_ERR_INVALID_PARAM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (snap_flag_set) {
|
||||
if (kvpmu->snapshot_addr == INVALID_GPA) {
|
||||
sbiret = SBI_ERR_NO_SHMEM;
|
||||
goto out;
|
||||
}
|
||||
if (kvm_vcpu_read_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
|
||||
sizeof(struct riscv_pmu_snapshot_data))) {
|
||||
pr_warn("Unable to read snapshot shared memory while starting counters\n");
|
||||
sbiret = SBI_ERR_FAILURE;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
/* Start the counters that have been configured and requested by the guest */
|
||||
for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
|
||||
pmc_index = i + ctr_base;
|
||||
if (!test_bit(pmc_index, kvpmu->pmc_in_use))
|
||||
continue;
|
||||
/* The guest started the counter again. Reset the overflow status */
|
||||
clear_bit(pmc_index, kvpmu->pmc_overflown);
|
||||
pmc = &kvpmu->pmc[pmc_index];
|
||||
if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE)
|
||||
if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) {
|
||||
pmc->counter_val = ival;
|
||||
} else if (snap_flag_set) {
|
||||
/* The counter index in the snapshot are relative to the counter base */
|
||||
pmc->counter_val = kvpmu->sdata->ctr_values[i];
|
||||
}
|
||||
|
||||
if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
|
||||
fevent_code = get_event_code(pmc->event_idx);
|
||||
if (fevent_code >= SBI_PMU_FW_MAX) {
|
||||
@ -400,12 +572,19 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
|
||||
u64 enabled, running;
|
||||
struct kvm_pmc *pmc;
|
||||
int fevent_code;
|
||||
bool snap_flag_set = flags & SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT;
|
||||
bool shmem_needs_update = false;
|
||||
|
||||
if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
|
||||
sbiret = SBI_ERR_INVALID_PARAM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (snap_flag_set && kvpmu->snapshot_addr == INVALID_GPA) {
|
||||
sbiret = SBI_ERR_NO_SHMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Stop the counters that have been configured and requested by the guest */
|
||||
for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
|
||||
pmc_index = i + ctr_base;
|
||||
@ -432,21 +611,49 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
|
||||
sbiret = SBI_ERR_ALREADY_STOPPED;
|
||||
}
|
||||
|
||||
if (flags & SBI_PMU_STOP_FLAG_RESET) {
|
||||
/* Relase the counter if this is a reset request */
|
||||
pmc->counter_val += perf_event_read_value(pmc->perf_event,
|
||||
&enabled, &running);
|
||||
if (flags & SBI_PMU_STOP_FLAG_RESET)
|
||||
/* Release the counter if this is a reset request */
|
||||
kvm_pmu_release_perf_event(pmc);
|
||||
}
|
||||
} else {
|
||||
sbiret = SBI_ERR_INVALID_PARAM;
|
||||
}
|
||||
|
||||
if (snap_flag_set && !sbiret) {
|
||||
if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW)
|
||||
pmc->counter_val = kvpmu->fw_event[fevent_code].value;
|
||||
else if (pmc->perf_event)
|
||||
pmc->counter_val += perf_event_read_value(pmc->perf_event,
|
||||
&enabled, &running);
|
||||
/*
|
||||
* The counter and overflow indicies in the snapshot region are w.r.to
|
||||
* cbase. Modify the set bit in the counter mask instead of the pmc_index
|
||||
* which indicates the absolute counter index.
|
||||
*/
|
||||
if (test_bit(pmc_index, kvpmu->pmc_overflown))
|
||||
kvpmu->sdata->ctr_overflow_mask |= BIT(i);
|
||||
kvpmu->sdata->ctr_values[i] = pmc->counter_val;
|
||||
shmem_needs_update = true;
|
||||
}
|
||||
|
||||
if (flags & SBI_PMU_STOP_FLAG_RESET) {
|
||||
pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
|
||||
clear_bit(pmc_index, kvpmu->pmc_in_use);
|
||||
clear_bit(pmc_index, kvpmu->pmc_overflown);
|
||||
if (snap_flag_set) {
|
||||
/*
|
||||
* Only clear the given counter as the caller is responsible to
|
||||
* validate both the overflow mask and configured counters.
|
||||
*/
|
||||
kvpmu->sdata->ctr_overflow_mask &= ~BIT(i);
|
||||
shmem_needs_update = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (shmem_needs_update)
|
||||
kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
|
||||
sizeof(struct riscv_pmu_snapshot_data));
|
||||
|
||||
out:
|
||||
retdata->err_val = sbiret;
|
||||
|
||||
@ -458,7 +665,8 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba
|
||||
unsigned long eidx, u64 evtdata,
|
||||
struct kvm_vcpu_sbi_return *retdata)
|
||||
{
|
||||
int ctr_idx, ret, sbiret = 0;
|
||||
int ctr_idx, sbiret = 0;
|
||||
long ret;
|
||||
bool is_fevent;
|
||||
unsigned long event_code;
|
||||
u32 etype = kvm_pmu_get_perf_event_type(eidx);
|
||||
@ -517,8 +725,10 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba
|
||||
kvpmu->fw_event[event_code].started = true;
|
||||
} else {
|
||||
ret = kvm_pmu_create_perf_event(pmc, &attr, flags, eidx, evtdata);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (ret) {
|
||||
sbiret = SBI_ERR_NOT_SUPPORTED;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
set_bit(ctr_idx, kvpmu->pmc_in_use);
|
||||
@ -530,7 +740,19 @@ out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_riscv_vcpu_pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
|
||||
int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
|
||||
struct kvm_vcpu_sbi_return *retdata)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = pmu_fw_ctr_read_hi(vcpu, cidx, &retdata->out_val);
|
||||
if (ret == -EINVAL)
|
||||
retdata->err_val = SBI_ERR_INVALID_PARAM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
|
||||
struct kvm_vcpu_sbi_return *retdata)
|
||||
{
|
||||
int ret;
|
||||
@ -566,6 +788,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
|
||||
kvpmu->num_hw_ctrs = num_hw_ctrs + 1;
|
||||
kvpmu->num_fw_ctrs = SBI_PMU_FW_MAX;
|
||||
memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
|
||||
kvpmu->snapshot_addr = INVALID_GPA;
|
||||
|
||||
if (kvpmu->num_hw_ctrs > RISCV_KVM_MAX_HW_CTRS) {
|
||||
pr_warn_once("Limiting the hardware counters to 32 as specified by the ISA");
|
||||
@ -585,6 +808,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
|
||||
pmc = &kvpmu->pmc[i];
|
||||
pmc->idx = i;
|
||||
pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
|
||||
pmc->vcpu = vcpu;
|
||||
if (i < kvpmu->num_hw_ctrs) {
|
||||
pmc->cinfo.type = SBI_PMU_CTR_TYPE_HW;
|
||||
if (i < 3)
|
||||
@ -601,7 +825,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
|
||||
pmc->cinfo.csr = CSR_CYCLE + i;
|
||||
} else {
|
||||
pmc->cinfo.type = SBI_PMU_CTR_TYPE_FW;
|
||||
pmc->cinfo.width = BITS_PER_LONG - 1;
|
||||
pmc->cinfo.width = 63;
|
||||
}
|
||||
}
|
||||
|
||||
@ -617,14 +841,16 @@ void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu)
|
||||
if (!kvpmu)
|
||||
return;
|
||||
|
||||
for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_MAX_COUNTERS) {
|
||||
for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS) {
|
||||
pmc = &kvpmu->pmc[i];
|
||||
pmc->counter_val = 0;
|
||||
kvm_pmu_release_perf_event(pmc);
|
||||
pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
|
||||
}
|
||||
bitmap_zero(kvpmu->pmc_in_use, RISCV_MAX_COUNTERS);
|
||||
bitmap_zero(kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS);
|
||||
bitmap_zero(kvpmu->pmc_overflown, RISCV_KVM_MAX_COUNTERS);
|
||||
memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
|
||||
kvm_pmu_clear_snapshot_area(vcpu);
|
||||
}
|
||||
|
||||
void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu)
|
||||
|
@ -138,8 +138,11 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu,
|
||||
unsigned long i;
|
||||
struct kvm_vcpu *tmp;
|
||||
|
||||
kvm_for_each_vcpu(i, tmp, vcpu->kvm)
|
||||
tmp->arch.power_off = true;
|
||||
kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
|
||||
spin_lock(&vcpu->arch.mp_state_lock);
|
||||
WRITE_ONCE(tmp->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED);
|
||||
spin_unlock(&vcpu->arch.mp_state_lock);
|
||||
}
|
||||
kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP);
|
||||
|
||||
memset(&run->system_event, 0, sizeof(run->system_event));
|
||||
|
@ -18,13 +18,20 @@ static int kvm_sbi_hsm_vcpu_start(struct kvm_vcpu *vcpu)
|
||||
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
|
||||
struct kvm_vcpu *target_vcpu;
|
||||
unsigned long target_vcpuid = cp->a0;
|
||||
int ret = 0;
|
||||
|
||||
target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid);
|
||||
if (!target_vcpu)
|
||||
return SBI_ERR_INVALID_PARAM;
|
||||
if (!target_vcpu->arch.power_off)
|
||||
return SBI_ERR_ALREADY_AVAILABLE;
|
||||
|
||||
spin_lock(&target_vcpu->arch.mp_state_lock);
|
||||
|
||||
if (!kvm_riscv_vcpu_stopped(target_vcpu)) {
|
||||
ret = SBI_ERR_ALREADY_AVAILABLE;
|
||||
goto out;
|
||||
}
|
||||
|
||||
spin_lock(&target_vcpu->arch.reset_cntx_lock);
|
||||
reset_cntx = &target_vcpu->arch.guest_reset_context;
|
||||
/* start address */
|
||||
reset_cntx->sepc = cp->a1;
|
||||
@ -32,21 +39,35 @@ static int kvm_sbi_hsm_vcpu_start(struct kvm_vcpu *vcpu)
|
||||
reset_cntx->a0 = target_vcpuid;
|
||||
/* private data passed from kernel */
|
||||
reset_cntx->a1 = cp->a2;
|
||||
spin_unlock(&target_vcpu->arch.reset_cntx_lock);
|
||||
|
||||
kvm_make_request(KVM_REQ_VCPU_RESET, target_vcpu);
|
||||
|
||||
kvm_riscv_vcpu_power_on(target_vcpu);
|
||||
__kvm_riscv_vcpu_power_on(target_vcpu);
|
||||
|
||||
return 0;
|
||||
out:
|
||||
spin_unlock(&target_vcpu->arch.mp_state_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kvm_sbi_hsm_vcpu_stop(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (vcpu->arch.power_off)
|
||||
return SBI_ERR_FAILURE;
|
||||
int ret = 0;
|
||||
|
||||
kvm_riscv_vcpu_power_off(vcpu);
|
||||
spin_lock(&vcpu->arch.mp_state_lock);
|
||||
|
||||
return 0;
|
||||
if (kvm_riscv_vcpu_stopped(vcpu)) {
|
||||
ret = SBI_ERR_FAILURE;
|
||||
goto out;
|
||||
}
|
||||
|
||||
__kvm_riscv_vcpu_power_off(vcpu);
|
||||
|
||||
out:
|
||||
spin_unlock(&vcpu->arch.mp_state_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu)
|
||||
@ -58,7 +79,7 @@ static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu)
|
||||
target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid);
|
||||
if (!target_vcpu)
|
||||
return SBI_ERR_INVALID_PARAM;
|
||||
if (!target_vcpu->arch.power_off)
|
||||
if (!kvm_riscv_vcpu_stopped(target_vcpu))
|
||||
return SBI_HSM_STATE_STARTED;
|
||||
else if (vcpu->stat.generic.blocking)
|
||||
return SBI_HSM_STATE_SUSPENDED;
|
||||
@ -71,14 +92,11 @@ static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
{
|
||||
int ret = 0;
|
||||
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
unsigned long funcid = cp->a6;
|
||||
|
||||
switch (funcid) {
|
||||
case SBI_EXT_HSM_HART_START:
|
||||
mutex_lock(&kvm->lock);
|
||||
ret = kvm_sbi_hsm_vcpu_start(vcpu);
|
||||
mutex_unlock(&kvm->lock);
|
||||
break;
|
||||
case SBI_EXT_HSM_HART_STOP:
|
||||
ret = kvm_sbi_hsm_vcpu_stop(vcpu);
|
||||
|
@ -42,9 +42,9 @@ static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
#endif
|
||||
/*
|
||||
* This can fail if perf core framework fails to create an event.
|
||||
* Forward the error to userspace because it's an error which
|
||||
* happened within the host kernel. The other option would be
|
||||
* to convert to an SBI error and forward to the guest.
|
||||
* No need to forward the error to userspace and exit the guest.
|
||||
* The operation can continue without profiling. Forward the
|
||||
* appropriate SBI error to the guest.
|
||||
*/
|
||||
ret = kvm_riscv_vcpu_pmu_ctr_cfg_match(vcpu, cp->a0, cp->a1,
|
||||
cp->a2, cp->a3, temp, retdata);
|
||||
@ -62,7 +62,16 @@ static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
ret = kvm_riscv_vcpu_pmu_ctr_stop(vcpu, cp->a0, cp->a1, cp->a2, retdata);
|
||||
break;
|
||||
case SBI_EXT_PMU_COUNTER_FW_READ:
|
||||
ret = kvm_riscv_vcpu_pmu_ctr_read(vcpu, cp->a0, retdata);
|
||||
ret = kvm_riscv_vcpu_pmu_fw_ctr_read(vcpu, cp->a0, retdata);
|
||||
break;
|
||||
case SBI_EXT_PMU_COUNTER_FW_READ_HI:
|
||||
if (IS_ENABLED(CONFIG_32BIT))
|
||||
ret = kvm_riscv_vcpu_pmu_fw_ctr_read_hi(vcpu, cp->a0, retdata);
|
||||
else
|
||||
retdata->out_val = 0;
|
||||
break;
|
||||
case SBI_EXT_PMU_SNAPSHOT_SET_SHMEM:
|
||||
ret = kvm_riscv_vcpu_pmu_snapshot_set_shmem(vcpu, cp->a0, cp->a1, cp->a2, retdata);
|
||||
break;
|
||||
default:
|
||||
retdata->err_val = SBI_ERR_NOT_SUPPORTED;
|
||||
|
@ -93,8 +93,8 @@ static int kvm_sbi_sta_steal_time_set_shmem(struct kvm_vcpu *vcpu)
|
||||
if (flags != 0)
|
||||
return SBI_ERR_INVALID_PARAM;
|
||||
|
||||
if (shmem_phys_lo == SBI_STA_SHMEM_DISABLE &&
|
||||
shmem_phys_hi == SBI_STA_SHMEM_DISABLE) {
|
||||
if (shmem_phys_lo == SBI_SHMEM_DISABLE &&
|
||||
shmem_phys_hi == SBI_SHMEM_DISABLE) {
|
||||
vcpu->arch.sta.shmem = INVALID_GPA;
|
||||
return 0;
|
||||
}
|
||||
|
@ -186,6 +186,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
case KVM_CAP_READONLY_MEM:
|
||||
case KVM_CAP_MP_STATE:
|
||||
case KVM_CAP_IMMEDIATE_EXIT:
|
||||
case KVM_CAP_SET_GUEST_DEBUG:
|
||||
r = 1;
|
||||
break;
|
||||
case KVM_CAP_NR_VCPUS:
|
||||
|
@ -1237,11 +1237,11 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
|
||||
struct pmu *pmu = event->pmu;
|
||||
|
||||
/*
|
||||
* Make sure we get updated with the first PEBS
|
||||
* event. It will trigger also during removal, but
|
||||
* that does not hurt:
|
||||
* Make sure we get updated with the first PEBS event.
|
||||
* During removal, ->pebs_data_cfg is still valid for
|
||||
* the last PEBS event. Don't clear it.
|
||||
*/
|
||||
if (cpuc->n_pebs == 1)
|
||||
if ((cpuc->n_pebs == 1) && add)
|
||||
cpuc->pebs_data_cfg = PEBS_UPDATE_DS_SW;
|
||||
|
||||
if (needed_cb != pebs_needs_sched_cb(cpuc)) {
|
||||
|
@ -324,6 +324,7 @@ static void decode_ISR(unsigned int val)
|
||||
decode_bits(KERN_DEBUG "ISR", isr_bits, ARRAY_SIZE(isr_bits), val);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_I2C_PXA_SLAVE
|
||||
static const struct bits icr_bits[] = {
|
||||
PXA_BIT(ICR_START, "START", NULL),
|
||||
PXA_BIT(ICR_STOP, "STOP", NULL),
|
||||
@ -342,7 +343,6 @@ static const struct bits icr_bits[] = {
|
||||
PXA_BIT(ICR_UR, "UR", "ur"),
|
||||
};
|
||||
|
||||
#ifdef CONFIG_I2C_PXA_SLAVE
|
||||
static void decode_ICR(unsigned int val)
|
||||
{
|
||||
decode_bits(KERN_DEBUG "ICR", icr_bits, ARRAY_SIZE(icr_bits), val);
|
||||
|
@ -191,8 +191,6 @@ void riscv_pmu_stop(struct perf_event *event, int flags)
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
|
||||
|
||||
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
|
||||
|
||||
if (!(hwc->state & PERF_HES_STOPPED)) {
|
||||
if (rvpmu->ctr_stop) {
|
||||
rvpmu->ctr_stop(event, 0);
|
||||
@ -408,6 +406,7 @@ struct riscv_pmu *riscv_pmu_alloc(void)
|
||||
cpuc->n_events = 0;
|
||||
for (i = 0; i < RISCV_MAX_COUNTERS; i++)
|
||||
cpuc->events[i] = NULL;
|
||||
cpuc->snapshot_addr = NULL;
|
||||
}
|
||||
pmu->pmu = (struct pmu) {
|
||||
.event_init = riscv_pmu_event_init,
|
||||
|
@ -27,7 +27,7 @@
|
||||
|
||||
#define ALT_SBI_PMU_OVERFLOW(__ovl) \
|
||||
asm volatile(ALTERNATIVE_2( \
|
||||
"csrr %0, " __stringify(CSR_SSCOUNTOVF), \
|
||||
"csrr %0, " __stringify(CSR_SCOUNTOVF), \
|
||||
"csrr %0, " __stringify(THEAD_C9XX_CSR_SCOUNTEROF), \
|
||||
THEAD_VENDOR_ID, ERRATA_THEAD_PMU, \
|
||||
CONFIG_ERRATA_THEAD_PMU, \
|
||||
@ -57,6 +57,11 @@ asm volatile(ALTERNATIVE( \
|
||||
PMU_FORMAT_ATTR(event, "config:0-47");
|
||||
PMU_FORMAT_ATTR(firmware, "config:63");
|
||||
|
||||
static bool sbi_v2_available;
|
||||
static DEFINE_STATIC_KEY_FALSE(sbi_pmu_snapshot_available);
|
||||
#define sbi_pmu_snapshot_available() \
|
||||
static_branch_unlikely(&sbi_pmu_snapshot_available)
|
||||
|
||||
static struct attribute *riscv_arch_formats_attr[] = {
|
||||
&format_attr_event.attr,
|
||||
&format_attr_firmware.attr,
|
||||
@ -384,7 +389,7 @@ static int pmu_sbi_ctr_get_idx(struct perf_event *event)
|
||||
cmask = 1;
|
||||
} else if (event->attr.config == PERF_COUNT_HW_INSTRUCTIONS) {
|
||||
cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH;
|
||||
cmask = 1UL << (CSR_INSTRET - CSR_CYCLE);
|
||||
cmask = BIT(CSR_INSTRET - CSR_CYCLE);
|
||||
}
|
||||
}
|
||||
|
||||
@ -506,24 +511,126 @@ static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void pmu_sbi_snapshot_free(struct riscv_pmu *pmu)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct cpu_hw_events *cpu_hw_evt = per_cpu_ptr(pmu->hw_events, cpu);
|
||||
|
||||
if (!cpu_hw_evt->snapshot_addr)
|
||||
continue;
|
||||
|
||||
free_page((unsigned long)cpu_hw_evt->snapshot_addr);
|
||||
cpu_hw_evt->snapshot_addr = NULL;
|
||||
cpu_hw_evt->snapshot_addr_phys = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int pmu_sbi_snapshot_alloc(struct riscv_pmu *pmu)
|
||||
{
|
||||
int cpu;
|
||||
struct page *snapshot_page;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct cpu_hw_events *cpu_hw_evt = per_cpu_ptr(pmu->hw_events, cpu);
|
||||
|
||||
snapshot_page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
|
||||
if (!snapshot_page) {
|
||||
pmu_sbi_snapshot_free(pmu);
|
||||
return -ENOMEM;
|
||||
}
|
||||
cpu_hw_evt->snapshot_addr = page_to_virt(snapshot_page);
|
||||
cpu_hw_evt->snapshot_addr_phys = page_to_phys(snapshot_page);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pmu_sbi_snapshot_disable(void)
|
||||
{
|
||||
struct sbiret ret;
|
||||
|
||||
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_SNAPSHOT_SET_SHMEM, SBI_SHMEM_DISABLE,
|
||||
SBI_SHMEM_DISABLE, 0, 0, 0, 0);
|
||||
if (ret.error) {
|
||||
pr_warn("failed to disable snapshot shared memory\n");
|
||||
return sbi_err_map_linux_errno(ret.error);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pmu_sbi_snapshot_setup(struct riscv_pmu *pmu, int cpu)
|
||||
{
|
||||
struct cpu_hw_events *cpu_hw_evt;
|
||||
struct sbiret ret = {0};
|
||||
|
||||
cpu_hw_evt = per_cpu_ptr(pmu->hw_events, cpu);
|
||||
if (!cpu_hw_evt->snapshot_addr_phys)
|
||||
return -EINVAL;
|
||||
|
||||
if (cpu_hw_evt->snapshot_set_done)
|
||||
return 0;
|
||||
|
||||
if (IS_ENABLED(CONFIG_32BIT))
|
||||
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_SNAPSHOT_SET_SHMEM,
|
||||
cpu_hw_evt->snapshot_addr_phys,
|
||||
(u64)(cpu_hw_evt->snapshot_addr_phys) >> 32, 0, 0, 0, 0);
|
||||
else
|
||||
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_SNAPSHOT_SET_SHMEM,
|
||||
cpu_hw_evt->snapshot_addr_phys, 0, 0, 0, 0, 0);
|
||||
|
||||
/* Free up the snapshot area memory and fall back to SBI PMU calls without snapshot */
|
||||
if (ret.error) {
|
||||
if (ret.error != SBI_ERR_NOT_SUPPORTED)
|
||||
pr_warn("pmu snapshot setup failed with error %ld\n", ret.error);
|
||||
return sbi_err_map_linux_errno(ret.error);
|
||||
}
|
||||
|
||||
memset(cpu_hw_evt->snapshot_cval_shcopy, 0, sizeof(u64) * RISCV_MAX_COUNTERS);
|
||||
cpu_hw_evt->snapshot_set_done = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 pmu_sbi_ctr_read(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int idx = hwc->idx;
|
||||
struct sbiret ret;
|
||||
union sbi_pmu_ctr_info info;
|
||||
u64 val = 0;
|
||||
struct riscv_pmu *pmu = to_riscv_pmu(event->pmu);
|
||||
struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
|
||||
struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr;
|
||||
union sbi_pmu_ctr_info info = pmu_ctr_list[idx];
|
||||
|
||||
/* Read the value from the shared memory directly only if counter is stopped */
|
||||
if (sbi_pmu_snapshot_available() && (hwc->state & PERF_HES_STOPPED)) {
|
||||
val = sdata->ctr_values[idx];
|
||||
return val;
|
||||
}
|
||||
|
||||
if (pmu_sbi_is_fw_event(event)) {
|
||||
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ,
|
||||
hwc->idx, 0, 0, 0, 0, 0);
|
||||
if (!ret.error)
|
||||
val = ret.value;
|
||||
if (ret.error)
|
||||
return 0;
|
||||
|
||||
val = ret.value;
|
||||
if (IS_ENABLED(CONFIG_32BIT) && sbi_v2_available && info.width >= 32) {
|
||||
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ_HI,
|
||||
hwc->idx, 0, 0, 0, 0, 0);
|
||||
if (!ret.error)
|
||||
val |= ((u64)ret.value << 32);
|
||||
else
|
||||
WARN_ONCE(1, "Unable to read upper 32 bits of firmware counter error: %ld\n",
|
||||
ret.error);
|
||||
}
|
||||
} else {
|
||||
info = pmu_ctr_list[idx];
|
||||
val = riscv_pmu_ctr_read_csr(info.csr);
|
||||
if (IS_ENABLED(CONFIG_32BIT))
|
||||
val = ((u64)riscv_pmu_ctr_read_csr(info.csr + 0x80)) << 31 | val;
|
||||
val |= ((u64)riscv_pmu_ctr_read_csr(info.csr + 0x80)) << 32;
|
||||
}
|
||||
|
||||
return val;
|
||||
@ -553,6 +660,7 @@ static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival)
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE;
|
||||
|
||||
/* There is no benefit setting SNAPSHOT FLAG for a single counter */
|
||||
#if defined(CONFIG_32BIT)
|
||||
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, hwc->idx,
|
||||
1, flag, ival, ival >> 32, 0);
|
||||
@ -573,16 +681,36 @@ static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag)
|
||||
{
|
||||
struct sbiret ret;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct riscv_pmu *pmu = to_riscv_pmu(event->pmu);
|
||||
struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
|
||||
struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr;
|
||||
|
||||
if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) &&
|
||||
(hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT))
|
||||
pmu_sbi_reset_scounteren((void *)event);
|
||||
|
||||
if (sbi_pmu_snapshot_available())
|
||||
flag |= SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT;
|
||||
|
||||
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, hwc->idx, 1, flag, 0, 0, 0);
|
||||
if (ret.error && (ret.error != SBI_ERR_ALREADY_STOPPED) &&
|
||||
flag != SBI_PMU_STOP_FLAG_RESET)
|
||||
if (!ret.error && sbi_pmu_snapshot_available()) {
|
||||
/*
|
||||
* The counter snapshot is based on the index base specified by hwc->idx.
|
||||
* The actual counter value is updated in shared memory at index 0 when counter
|
||||
* mask is 0x01. To ensure accurate counter values, it's necessary to transfer
|
||||
* the counter value to shared memory. However, if hwc->idx is zero, the counter
|
||||
* value is already correctly updated in shared memory, requiring no further
|
||||
* adjustment.
|
||||
*/
|
||||
if (hwc->idx > 0) {
|
||||
sdata->ctr_values[hwc->idx] = sdata->ctr_values[0];
|
||||
sdata->ctr_values[0] = 0;
|
||||
}
|
||||
} else if (ret.error && (ret.error != SBI_ERR_ALREADY_STOPPED) &&
|
||||
flag != SBI_PMU_STOP_FLAG_RESET) {
|
||||
pr_err("Stopping counter idx %d failed with error %d\n",
|
||||
hwc->idx, sbi_err_map_linux_errno(ret.error));
|
||||
}
|
||||
}
|
||||
|
||||
static int pmu_sbi_find_num_ctrs(void)
|
||||
@ -640,10 +768,39 @@ static inline void pmu_sbi_stop_all(struct riscv_pmu *pmu)
|
||||
static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu)
|
||||
{
|
||||
struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
|
||||
struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr;
|
||||
unsigned long flag = 0;
|
||||
int i, idx;
|
||||
struct sbiret ret;
|
||||
u64 temp_ctr_overflow_mask = 0;
|
||||
|
||||
/* No need to check the error here as we can't do anything about the error */
|
||||
sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, 0,
|
||||
cpu_hw_evt->used_hw_ctrs[0], 0, 0, 0, 0);
|
||||
if (sbi_pmu_snapshot_available())
|
||||
flag = SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT;
|
||||
|
||||
/* Reset the shadow copy to avoid save/restore any value from previous overflow */
|
||||
memset(cpu_hw_evt->snapshot_cval_shcopy, 0, sizeof(u64) * RISCV_MAX_COUNTERS);
|
||||
|
||||
for (i = 0; i < BITS_TO_LONGS(RISCV_MAX_COUNTERS); i++) {
|
||||
/* No need to check the error here as we can't do anything about the error */
|
||||
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, i * BITS_PER_LONG,
|
||||
cpu_hw_evt->used_hw_ctrs[i], flag, 0, 0, 0);
|
||||
if (!ret.error && sbi_pmu_snapshot_available()) {
|
||||
/* Save the counter values to avoid clobbering */
|
||||
for_each_set_bit(idx, &cpu_hw_evt->used_hw_ctrs[i], BITS_PER_LONG)
|
||||
cpu_hw_evt->snapshot_cval_shcopy[i * BITS_PER_LONG + idx] =
|
||||
sdata->ctr_values[idx];
|
||||
/* Save the overflow mask to avoid clobbering */
|
||||
temp_ctr_overflow_mask |= sdata->ctr_overflow_mask << (i * BITS_PER_LONG);
|
||||
}
|
||||
}
|
||||
|
||||
/* Restore the counter values to the shared memory for used hw counters */
|
||||
if (sbi_pmu_snapshot_available()) {
|
||||
for_each_set_bit(idx, cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS)
|
||||
sdata->ctr_values[idx] = cpu_hw_evt->snapshot_cval_shcopy[idx];
|
||||
if (temp_ctr_overflow_mask)
|
||||
sdata->ctr_overflow_mask = temp_ctr_overflow_mask;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -652,11 +809,10 @@ static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu)
|
||||
* while the overflowed counters need to be started with updated initialization
|
||||
* value.
|
||||
*/
|
||||
static inline void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu,
|
||||
unsigned long ctr_ovf_mask)
|
||||
static inline void pmu_sbi_start_ovf_ctrs_sbi(struct cpu_hw_events *cpu_hw_evt,
|
||||
u64 ctr_ovf_mask)
|
||||
{
|
||||
int idx = 0;
|
||||
struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
|
||||
int idx = 0, i;
|
||||
struct perf_event *event;
|
||||
unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE;
|
||||
unsigned long ctr_start_mask = 0;
|
||||
@ -664,11 +820,12 @@ static inline void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu,
|
||||
struct hw_perf_event *hwc;
|
||||
u64 init_val = 0;
|
||||
|
||||
ctr_start_mask = cpu_hw_evt->used_hw_ctrs[0] & ~ctr_ovf_mask;
|
||||
|
||||
/* Start all the counters that did not overflow in a single shot */
|
||||
sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, 0, ctr_start_mask,
|
||||
0, 0, 0, 0);
|
||||
for (i = 0; i < BITS_TO_LONGS(RISCV_MAX_COUNTERS); i++) {
|
||||
ctr_start_mask = cpu_hw_evt->used_hw_ctrs[i] & ~ctr_ovf_mask;
|
||||
/* Start all the counters that did not overflow in a single shot */
|
||||
sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, i * BITS_PER_LONG, ctr_start_mask,
|
||||
0, 0, 0, 0);
|
||||
}
|
||||
|
||||
/* Reinitialize and start all the counter that overflowed */
|
||||
while (ctr_ovf_mask) {
|
||||
@ -691,6 +848,52 @@ static inline void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu,
|
||||
}
|
||||
}
|
||||
|
||||
static inline void pmu_sbi_start_ovf_ctrs_snapshot(struct cpu_hw_events *cpu_hw_evt,
|
||||
u64 ctr_ovf_mask)
|
||||
{
|
||||
int i, idx = 0;
|
||||
struct perf_event *event;
|
||||
unsigned long flag = SBI_PMU_START_FLAG_INIT_SNAPSHOT;
|
||||
u64 max_period, init_val = 0;
|
||||
struct hw_perf_event *hwc;
|
||||
struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr;
|
||||
|
||||
for_each_set_bit(idx, cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS) {
|
||||
if (ctr_ovf_mask & BIT(idx)) {
|
||||
event = cpu_hw_evt->events[idx];
|
||||
hwc = &event->hw;
|
||||
max_period = riscv_pmu_ctr_get_width_mask(event);
|
||||
init_val = local64_read(&hwc->prev_count) & max_period;
|
||||
cpu_hw_evt->snapshot_cval_shcopy[idx] = init_val;
|
||||
}
|
||||
/*
|
||||
* We do not need to update the non-overflow counters the previous
|
||||
* value should have been there already.
|
||||
*/
|
||||
}
|
||||
|
||||
for (i = 0; i < BITS_TO_LONGS(RISCV_MAX_COUNTERS); i++) {
|
||||
/* Restore the counter values to relative indices for used hw counters */
|
||||
for_each_set_bit(idx, &cpu_hw_evt->used_hw_ctrs[i], BITS_PER_LONG)
|
||||
sdata->ctr_values[idx] =
|
||||
cpu_hw_evt->snapshot_cval_shcopy[idx + i * BITS_PER_LONG];
|
||||
/* Start all the counters in a single shot */
|
||||
sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx * BITS_PER_LONG,
|
||||
cpu_hw_evt->used_hw_ctrs[i], flag, 0, 0, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu,
|
||||
u64 ctr_ovf_mask)
|
||||
{
|
||||
struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
|
||||
|
||||
if (sbi_pmu_snapshot_available())
|
||||
pmu_sbi_start_ovf_ctrs_snapshot(cpu_hw_evt, ctr_ovf_mask);
|
||||
else
|
||||
pmu_sbi_start_ovf_ctrs_sbi(cpu_hw_evt, ctr_ovf_mask);
|
||||
}
|
||||
|
||||
static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
|
||||
{
|
||||
struct perf_sample_data data;
|
||||
@ -700,10 +903,11 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
|
||||
int lidx, hidx, fidx;
|
||||
struct riscv_pmu *pmu;
|
||||
struct perf_event *event;
|
||||
unsigned long overflow;
|
||||
unsigned long overflowed_ctrs = 0;
|
||||
u64 overflow;
|
||||
u64 overflowed_ctrs = 0;
|
||||
struct cpu_hw_events *cpu_hw_evt = dev;
|
||||
u64 start_clock = sched_clock();
|
||||
struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr;
|
||||
|
||||
if (WARN_ON_ONCE(!cpu_hw_evt))
|
||||
return IRQ_NONE;
|
||||
@ -725,7 +929,10 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
|
||||
pmu_sbi_stop_hw_ctrs(pmu);
|
||||
|
||||
/* Overflow status register should only be read after counter are stopped */
|
||||
ALT_SBI_PMU_OVERFLOW(overflow);
|
||||
if (sbi_pmu_snapshot_available())
|
||||
overflow = sdata->ctr_overflow_mask;
|
||||
else
|
||||
ALT_SBI_PMU_OVERFLOW(overflow);
|
||||
|
||||
/*
|
||||
* Overflow interrupt pending bit should only be cleared after stopping
|
||||
@ -751,9 +958,14 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
|
||||
if (!info || info->type != SBI_PMU_CTR_TYPE_HW)
|
||||
continue;
|
||||
|
||||
/* compute hardware counter index */
|
||||
hidx = info->csr - CSR_CYCLE;
|
||||
/* check if the corresponding bit is set in sscountovf */
|
||||
if (sbi_pmu_snapshot_available())
|
||||
/* SBI implementation already updated the logical indicies */
|
||||
hidx = lidx;
|
||||
else
|
||||
/* compute hardware counter index */
|
||||
hidx = info->csr - CSR_CYCLE;
|
||||
|
||||
/* check if the corresponding bit is set in sscountovf or overflow mask in shmem */
|
||||
if (!(overflow & BIT(hidx)))
|
||||
continue;
|
||||
|
||||
@ -763,7 +975,10 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
|
||||
*/
|
||||
overflowed_ctrs |= BIT(lidx);
|
||||
hw_evt = &event->hw;
|
||||
/* Update the event states here so that we know the state while reading */
|
||||
hw_evt->state |= PERF_HES_STOPPED;
|
||||
riscv_pmu_event_update(event);
|
||||
hw_evt->state |= PERF_HES_UPTODATE;
|
||||
perf_sample_data_init(&data, 0, hw_evt->last_period);
|
||||
if (riscv_pmu_event_set_period(event)) {
|
||||
/*
|
||||
@ -776,6 +991,8 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
|
||||
*/
|
||||
perf_event_overflow(event, &data, regs);
|
||||
}
|
||||
/* Reset the state as we are going to start the counter after the loop */
|
||||
hw_evt->state = 0;
|
||||
}
|
||||
|
||||
pmu_sbi_start_overflow_mask(pmu, overflowed_ctrs);
|
||||
@ -807,6 +1024,9 @@ static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node)
|
||||
enable_percpu_irq(riscv_pmu_irq, IRQ_TYPE_NONE);
|
||||
}
|
||||
|
||||
if (sbi_pmu_snapshot_available())
|
||||
return pmu_sbi_snapshot_setup(pmu, cpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -819,6 +1039,9 @@ static int pmu_sbi_dying_cpu(unsigned int cpu, struct hlist_node *node)
|
||||
/* Disable all counters access for user mode now */
|
||||
csr_write(CSR_SCOUNTEREN, 0x0);
|
||||
|
||||
if (sbi_pmu_snapshot_available())
|
||||
return pmu_sbi_snapshot_disable();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -927,6 +1150,12 @@ static inline void riscv_pm_pmu_unregister(struct riscv_pmu *pmu) { }
|
||||
|
||||
static void riscv_pmu_destroy(struct riscv_pmu *pmu)
|
||||
{
|
||||
if (sbi_v2_available) {
|
||||
if (sbi_pmu_snapshot_available()) {
|
||||
pmu_sbi_snapshot_disable();
|
||||
pmu_sbi_snapshot_free(pmu);
|
||||
}
|
||||
}
|
||||
riscv_pm_pmu_unregister(pmu);
|
||||
cpuhp_state_remove_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
|
||||
}
|
||||
@ -1094,10 +1323,6 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
|
||||
pmu->event_unmapped = pmu_sbi_event_unmapped;
|
||||
pmu->csr_index = pmu_sbi_csr_index;
|
||||
|
||||
ret = cpuhp_state_add_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = riscv_pm_pmu_register(pmu);
|
||||
if (ret)
|
||||
goto out_unregister;
|
||||
@ -1106,8 +1331,34 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
|
||||
if (ret)
|
||||
goto out_unregister;
|
||||
|
||||
/* SBI PMU Snapsphot is only available in SBI v2.0 */
|
||||
if (sbi_v2_available) {
|
||||
ret = pmu_sbi_snapshot_alloc(pmu);
|
||||
if (ret)
|
||||
goto out_unregister;
|
||||
|
||||
ret = pmu_sbi_snapshot_setup(pmu, smp_processor_id());
|
||||
if (ret) {
|
||||
/* Snapshot is an optional feature. Continue if not available */
|
||||
pmu_sbi_snapshot_free(pmu);
|
||||
} else {
|
||||
pr_info("SBI PMU snapshot detected\n");
|
||||
/*
|
||||
* We enable it once here for the boot cpu. If snapshot shmem setup
|
||||
* fails during cpu hotplug process, it will fail to start the cpu
|
||||
* as we can not handle hetergenous PMUs with different snapshot
|
||||
* capability.
|
||||
*/
|
||||
static_branch_enable(&sbi_pmu_snapshot_available);
|
||||
}
|
||||
}
|
||||
|
||||
register_sysctl("kernel", sbi_pmu_sysctl_table);
|
||||
|
||||
ret = cpuhp_state_add_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
|
||||
if (ret)
|
||||
goto out_unregister;
|
||||
|
||||
return 0;
|
||||
|
||||
out_unregister:
|
||||
@ -1135,6 +1386,9 @@ static int __init pmu_sbi_devinit(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (sbi_spec_version >= sbi_mk_version(2, 0))
|
||||
sbi_v2_available = true;
|
||||
|
||||
ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_RISCV_STARTING,
|
||||
"perf/riscv/pmu:starting",
|
||||
pmu_sbi_starting_cpu, pmu_sbi_dying_cpu);
|
||||
|
@ -3042,12 +3042,9 @@ static void
|
||||
nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
|
||||
{
|
||||
struct nfs4_client *clp = cb->cb_clp;
|
||||
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
|
||||
|
||||
spin_lock(&nn->client_lock);
|
||||
clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
|
||||
put_client_renew_locked(clp);
|
||||
spin_unlock(&nn->client_lock);
|
||||
drop_client(clp);
|
||||
}
|
||||
|
||||
static int
|
||||
@ -6616,7 +6613,7 @@ deleg_reaper(struct nfsd_net *nn)
|
||||
list_add(&clp->cl_ra_cblist, &cblist);
|
||||
|
||||
/* release in nfsd4_cb_recall_any_release */
|
||||
atomic_inc(&clp->cl_rpc_users);
|
||||
kref_get(&clp->cl_nfsdfs.cl_ref);
|
||||
set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
|
||||
clp->cl_ra_time = ktime_get_boottime_seconds();
|
||||
}
|
||||
|
@ -417,6 +417,7 @@ smb2_close_cached_fid(struct kref *ref)
|
||||
{
|
||||
struct cached_fid *cfid = container_of(ref, struct cached_fid,
|
||||
refcount);
|
||||
int rc;
|
||||
|
||||
spin_lock(&cfid->cfids->cfid_list_lock);
|
||||
if (cfid->on_list) {
|
||||
@ -430,9 +431,10 @@ smb2_close_cached_fid(struct kref *ref)
|
||||
cfid->dentry = NULL;
|
||||
|
||||
if (cfid->is_open) {
|
||||
SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid,
|
||||
rc = SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid,
|
||||
cfid->fid.volatile_fid);
|
||||
atomic_dec(&cfid->tcon->num_remote_opens);
|
||||
if (rc != -EBUSY && rc != -EAGAIN)
|
||||
atomic_dec(&cfid->tcon->num_remote_opens);
|
||||
}
|
||||
|
||||
free_cached_dir(cfid);
|
||||
|
@ -250,6 +250,8 @@ static int cifs_debug_files_proc_show(struct seq_file *m, void *v)
|
||||
spin_lock(&cifs_tcp_ses_lock);
|
||||
list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
|
||||
list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
|
||||
if (cifs_ses_exiting(ses))
|
||||
continue;
|
||||
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
|
||||
spin_lock(&tcon->open_file_lock);
|
||||
list_for_each_entry(cfile, &tcon->openFileList, tlist) {
|
||||
@ -676,6 +678,8 @@ static ssize_t cifs_stats_proc_write(struct file *file,
|
||||
}
|
||||
#endif /* CONFIG_CIFS_STATS2 */
|
||||
list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
|
||||
if (cifs_ses_exiting(ses))
|
||||
continue;
|
||||
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
|
||||
atomic_set(&tcon->num_smbs_sent, 0);
|
||||
spin_lock(&tcon->stat_lock);
|
||||
@ -755,6 +759,8 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v)
|
||||
}
|
||||
#endif /* STATS2 */
|
||||
list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
|
||||
if (cifs_ses_exiting(ses))
|
||||
continue;
|
||||
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
|
||||
i++;
|
||||
seq_printf(m, "\n%d) %s", i, tcon->tree_name);
|
||||
|
@ -156,6 +156,7 @@ struct workqueue_struct *decrypt_wq;
|
||||
struct workqueue_struct *fileinfo_put_wq;
|
||||
struct workqueue_struct *cifsoplockd_wq;
|
||||
struct workqueue_struct *deferredclose_wq;
|
||||
struct workqueue_struct *serverclose_wq;
|
||||
__u32 cifs_lock_secret;
|
||||
|
||||
/*
|
||||
@ -1888,6 +1889,13 @@ init_cifs(void)
|
||||
goto out_destroy_cifsoplockd_wq;
|
||||
}
|
||||
|
||||
serverclose_wq = alloc_workqueue("serverclose",
|
||||
WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
|
||||
if (!serverclose_wq) {
|
||||
rc = -ENOMEM;
|
||||
goto out_destroy_serverclose_wq;
|
||||
}
|
||||
|
||||
rc = cifs_init_inodecache();
|
||||
if (rc)
|
||||
goto out_destroy_deferredclose_wq;
|
||||
@ -1962,6 +1970,8 @@ out_destroy_decrypt_wq:
|
||||
destroy_workqueue(decrypt_wq);
|
||||
out_destroy_cifsiod_wq:
|
||||
destroy_workqueue(cifsiod_wq);
|
||||
out_destroy_serverclose_wq:
|
||||
destroy_workqueue(serverclose_wq);
|
||||
out_clean_proc:
|
||||
cifs_proc_clean();
|
||||
return rc;
|
||||
@ -1991,6 +2001,7 @@ exit_cifs(void)
|
||||
destroy_workqueue(cifsoplockd_wq);
|
||||
destroy_workqueue(decrypt_wq);
|
||||
destroy_workqueue(fileinfo_put_wq);
|
||||
destroy_workqueue(serverclose_wq);
|
||||
destroy_workqueue(cifsiod_wq);
|
||||
cifs_proc_clean();
|
||||
}
|
||||
|
@ -442,10 +442,10 @@ struct smb_version_operations {
|
||||
/* set fid protocol-specific info */
|
||||
void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32);
|
||||
/* close a file */
|
||||
void (*close)(const unsigned int, struct cifs_tcon *,
|
||||
int (*close)(const unsigned int, struct cifs_tcon *,
|
||||
struct cifs_fid *);
|
||||
/* close a file, returning file attributes and timestamps */
|
||||
void (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon,
|
||||
int (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon,
|
||||
struct cifsFileInfo *pfile_info);
|
||||
/* send a flush request to the server */
|
||||
int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *);
|
||||
@ -1281,7 +1281,6 @@ struct cifs_tcon {
|
||||
struct cached_fids *cfids;
|
||||
/* BB add field for back pointer to sb struct(s)? */
|
||||
#ifdef CONFIG_CIFS_DFS_UPCALL
|
||||
struct list_head dfs_ses_list;
|
||||
struct delayed_work dfs_cache_work;
|
||||
#endif
|
||||
struct delayed_work query_interfaces; /* query interfaces workqueue job */
|
||||
@ -1440,6 +1439,7 @@ struct cifsFileInfo {
|
||||
bool swapfile:1;
|
||||
bool oplock_break_cancelled:1;
|
||||
bool status_file_deleted:1; /* file has been deleted */
|
||||
bool offload:1; /* offload final part of _put to a wq */
|
||||
unsigned int oplock_epoch; /* epoch from the lease break */
|
||||
__u32 oplock_level; /* oplock/lease level from the lease break */
|
||||
int count;
|
||||
@ -1448,6 +1448,7 @@ struct cifsFileInfo {
|
||||
struct cifs_search_info srch_inf;
|
||||
struct work_struct oplock_break; /* work for oplock breaks */
|
||||
struct work_struct put; /* work for the final part of _put */
|
||||
struct work_struct serverclose; /* work for serverclose */
|
||||
struct delayed_work deferred;
|
||||
bool deferred_close_scheduled; /* Flag to indicate close is scheduled */
|
||||
char *symlink_target;
|
||||
@ -1804,7 +1805,6 @@ struct cifs_mount_ctx {
|
||||
struct TCP_Server_Info *server;
|
||||
struct cifs_ses *ses;
|
||||
struct cifs_tcon *tcon;
|
||||
struct list_head dfs_ses_list;
|
||||
};
|
||||
|
||||
static inline void __free_dfs_info_param(struct dfs_info3_param *param)
|
||||
@ -2105,6 +2105,7 @@ extern struct workqueue_struct *decrypt_wq;
|
||||
extern struct workqueue_struct *fileinfo_put_wq;
|
||||
extern struct workqueue_struct *cifsoplockd_wq;
|
||||
extern struct workqueue_struct *deferredclose_wq;
|
||||
extern struct workqueue_struct *serverclose_wq;
|
||||
extern __u32 cifs_lock_secret;
|
||||
|
||||
extern mempool_t *cifs_sm_req_poolp;
|
||||
@ -2324,4 +2325,14 @@ struct smb2_compound_vars {
|
||||
struct kvec ea_iov;
|
||||
};
|
||||
|
||||
static inline bool cifs_ses_exiting(struct cifs_ses *ses)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
spin_lock(&ses->ses_lock);
|
||||
ret = ses->ses_status == SES_EXITING;
|
||||
spin_unlock(&ses->ses_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif /* _CIFS_GLOB_H */
|
||||
|
@ -725,31 +725,31 @@ struct super_block *cifs_get_tcon_super(struct cifs_tcon *tcon);
|
||||
void cifs_put_tcon_super(struct super_block *sb);
|
||||
int cifs_wait_for_server_reconnect(struct TCP_Server_Info *server, bool retry);
|
||||
|
||||
/* Put references of @ses and @ses->dfs_root_ses */
|
||||
/* Put references of @ses and its children */
|
||||
static inline void cifs_put_smb_ses(struct cifs_ses *ses)
|
||||
{
|
||||
struct cifs_ses *rses = ses->dfs_root_ses;
|
||||
struct cifs_ses *next;
|
||||
|
||||
__cifs_put_smb_ses(ses);
|
||||
if (rses)
|
||||
__cifs_put_smb_ses(rses);
|
||||
do {
|
||||
next = ses->dfs_root_ses;
|
||||
__cifs_put_smb_ses(ses);
|
||||
} while ((ses = next));
|
||||
}
|
||||
|
||||
/* Get an active reference of @ses and @ses->dfs_root_ses.
|
||||
/* Get an active reference of @ses and its children.
|
||||
*
|
||||
* NOTE: make sure to call this function when incrementing reference count of
|
||||
* @ses to ensure that any DFS root session attached to it (@ses->dfs_root_ses)
|
||||
* will also get its reference count incremented.
|
||||
*
|
||||
* cifs_put_smb_ses() will put both references, so call it when you're done.
|
||||
* cifs_put_smb_ses() will put all references, so call it when you're done.
|
||||
*/
|
||||
static inline void cifs_smb_ses_inc_refcount(struct cifs_ses *ses)
|
||||
{
|
||||
lockdep_assert_held(&cifs_tcp_ses_lock);
|
||||
|
||||
ses->ses_count++;
|
||||
if (ses->dfs_root_ses)
|
||||
ses->dfs_root_ses->ses_count++;
|
||||
for (; ses; ses = ses->dfs_root_ses)
|
||||
ses->ses_count++;
|
||||
}
|
||||
|
||||
static inline bool dfs_src_pathname_equal(const char *s1, const char *s2)
|
||||
|
@ -5854,10 +5854,8 @@ SetEARetry:
|
||||
parm_data->list.EA_flags = 0;
|
||||
/* we checked above that name len is less than 255 */
|
||||
parm_data->list.name_len = (__u8)name_len;
|
||||
/* EA names are always ASCII */
|
||||
if (ea_name)
|
||||
strncpy(parm_data->list.name, ea_name, name_len);
|
||||
parm_data->list.name[name_len] = '\0';
|
||||
/* EA names are always ASCII and NUL-terminated */
|
||||
strscpy(parm_data->list.name, ea_name ?: "", name_len + 1);
|
||||
parm_data->list.value_len = cpu_to_le16(ea_value_len);
|
||||
/* caller ensures that ea_value_len is less than 64K but
|
||||
we need to ensure that it fits within the smb */
|
||||
|
@ -175,6 +175,8 @@ cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server,
|
||||
|
||||
spin_lock(&cifs_tcp_ses_lock);
|
||||
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
|
||||
if (cifs_ses_exiting(ses))
|
||||
continue;
|
||||
spin_lock(&ses->chan_lock);
|
||||
for (i = 0; i < ses->chan_count; i++) {
|
||||
if (!ses->chans[i].server)
|
||||
@ -232,7 +234,13 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server,
|
||||
|
||||
spin_lock(&cifs_tcp_ses_lock);
|
||||
list_for_each_entry_safe(ses, nses, &pserver->smb_ses_list, smb_ses_list) {
|
||||
/* check if iface is still active */
|
||||
spin_lock(&ses->ses_lock);
|
||||
if (ses->ses_status == SES_EXITING) {
|
||||
spin_unlock(&ses->ses_lock);
|
||||
continue;
|
||||
}
|
||||
spin_unlock(&ses->ses_lock);
|
||||
|
||||
spin_lock(&ses->chan_lock);
|
||||
if (cifs_ses_get_chan_index(ses, server) ==
|
||||
CIFS_INVAL_CHAN_INDEX) {
|
||||
@ -1860,6 +1868,9 @@ static int match_session(struct cifs_ses *ses, struct smb3_fs_context *ctx)
|
||||
ctx->sectype != ses->sectype)
|
||||
return 0;
|
||||
|
||||
if (ctx->dfs_root_ses != ses->dfs_root_ses)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* If an existing session is limited to less channels than
|
||||
* requested, it should not be reused
|
||||
@ -1963,31 +1974,6 @@ out:
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* cifs_free_ipc - helper to release the session IPC tcon
|
||||
* @ses: smb session to unmount the IPC from
|
||||
*
|
||||
* Needs to be called everytime a session is destroyed.
|
||||
*
|
||||
* On session close, the IPC is closed and the server must release all tcons of the session.
|
||||
* No need to send a tree disconnect here.
|
||||
*
|
||||
* Besides, it will make the server to not close durable and resilient files on session close, as
|
||||
* specified in MS-SMB2 3.3.5.6 Receiving an SMB2 LOGOFF Request.
|
||||
*/
|
||||
static int
|
||||
cifs_free_ipc(struct cifs_ses *ses)
|
||||
{
|
||||
struct cifs_tcon *tcon = ses->tcon_ipc;
|
||||
|
||||
if (tcon == NULL)
|
||||
return 0;
|
||||
|
||||
tconInfoFree(tcon);
|
||||
ses->tcon_ipc = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct cifs_ses *
|
||||
cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
|
||||
{
|
||||
@ -2019,48 +2005,52 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
|
||||
void __cifs_put_smb_ses(struct cifs_ses *ses)
|
||||
{
|
||||
struct TCP_Server_Info *server = ses->server;
|
||||
struct cifs_tcon *tcon;
|
||||
unsigned int xid;
|
||||
size_t i;
|
||||
bool do_logoff;
|
||||
int rc;
|
||||
|
||||
spin_lock(&ses->ses_lock);
|
||||
if (ses->ses_status == SES_EXITING) {
|
||||
spin_unlock(&ses->ses_lock);
|
||||
return;
|
||||
}
|
||||
spin_unlock(&ses->ses_lock);
|
||||
|
||||
cifs_dbg(FYI, "%s: ses_count=%d\n", __func__, ses->ses_count);
|
||||
cifs_dbg(FYI,
|
||||
"%s: ses ipc: %s\n", __func__, ses->tcon_ipc ? ses->tcon_ipc->tree_name : "NONE");
|
||||
|
||||
spin_lock(&cifs_tcp_ses_lock);
|
||||
if (--ses->ses_count > 0) {
|
||||
spin_lock(&ses->ses_lock);
|
||||
cifs_dbg(FYI, "%s: id=0x%llx ses_count=%d ses_status=%u ipc=%s\n",
|
||||
__func__, ses->Suid, ses->ses_count, ses->ses_status,
|
||||
ses->tcon_ipc ? ses->tcon_ipc->tree_name : "none");
|
||||
if (ses->ses_status == SES_EXITING || --ses->ses_count > 0) {
|
||||
spin_unlock(&ses->ses_lock);
|
||||
spin_unlock(&cifs_tcp_ses_lock);
|
||||
return;
|
||||
}
|
||||
spin_lock(&ses->ses_lock);
|
||||
if (ses->ses_status == SES_GOOD)
|
||||
ses->ses_status = SES_EXITING;
|
||||
spin_unlock(&ses->ses_lock);
|
||||
spin_unlock(&cifs_tcp_ses_lock);
|
||||
|
||||
/* ses_count can never go negative */
|
||||
WARN_ON(ses->ses_count < 0);
|
||||
|
||||
spin_lock(&ses->ses_lock);
|
||||
if (ses->ses_status == SES_EXITING && server->ops->logoff) {
|
||||
spin_unlock(&ses->ses_lock);
|
||||
cifs_free_ipc(ses);
|
||||
spin_lock(&ses->chan_lock);
|
||||
cifs_chan_clear_need_reconnect(ses, server);
|
||||
spin_unlock(&ses->chan_lock);
|
||||
|
||||
do_logoff = ses->ses_status == SES_GOOD && server->ops->logoff;
|
||||
ses->ses_status = SES_EXITING;
|
||||
tcon = ses->tcon_ipc;
|
||||
ses->tcon_ipc = NULL;
|
||||
spin_unlock(&ses->ses_lock);
|
||||
spin_unlock(&cifs_tcp_ses_lock);
|
||||
|
||||
/*
|
||||
* On session close, the IPC is closed and the server must release all
|
||||
* tcons of the session. No need to send a tree disconnect here.
|
||||
*
|
||||
* Besides, it will make the server to not close durable and resilient
|
||||
* files on session close, as specified in MS-SMB2 3.3.5.6 Receiving an
|
||||
* SMB2 LOGOFF Request.
|
||||
*/
|
||||
tconInfoFree(tcon);
|
||||
if (do_logoff) {
|
||||
xid = get_xid();
|
||||
rc = server->ops->logoff(xid, ses);
|
||||
if (rc)
|
||||
cifs_server_dbg(VFS, "%s: Session Logoff failure rc=%d\n",
|
||||
__func__, rc);
|
||||
_free_xid(xid);
|
||||
} else {
|
||||
spin_unlock(&ses->ses_lock);
|
||||
cifs_free_ipc(ses);
|
||||
}
|
||||
|
||||
spin_lock(&cifs_tcp_ses_lock);
|
||||
@ -2373,9 +2363,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
|
||||
* need to lock before changing something in the session.
|
||||
*/
|
||||
spin_lock(&cifs_tcp_ses_lock);
|
||||
if (ctx->dfs_root_ses)
|
||||
cifs_smb_ses_inc_refcount(ctx->dfs_root_ses);
|
||||
ses->dfs_root_ses = ctx->dfs_root_ses;
|
||||
if (ses->dfs_root_ses)
|
||||
ses->dfs_root_ses->ses_count++;
|
||||
list_add(&ses->smb_ses_list, &server->smb_ses_list);
|
||||
spin_unlock(&cifs_tcp_ses_lock);
|
||||
|
||||
@ -3326,6 +3316,9 @@ void cifs_mount_put_conns(struct cifs_mount_ctx *mnt_ctx)
|
||||
cifs_put_smb_ses(mnt_ctx->ses);
|
||||
else if (mnt_ctx->server)
|
||||
cifs_put_tcp_session(mnt_ctx->server, 0);
|
||||
mnt_ctx->ses = NULL;
|
||||
mnt_ctx->tcon = NULL;
|
||||
mnt_ctx->server = NULL;
|
||||
mnt_ctx->cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_POSIX_PATHS;
|
||||
free_xid(mnt_ctx->xid);
|
||||
}
|
||||
@ -3604,8 +3597,6 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
|
||||
bool isdfs;
|
||||
int rc;
|
||||
|
||||
INIT_LIST_HEAD(&mnt_ctx.dfs_ses_list);
|
||||
|
||||
rc = dfs_mount_share(&mnt_ctx, &isdfs);
|
||||
if (rc)
|
||||
goto error;
|
||||
@ -3636,7 +3627,6 @@ out:
|
||||
return rc;
|
||||
|
||||
error:
|
||||
dfs_put_root_smb_sessions(&mnt_ctx.dfs_ses_list);
|
||||
cifs_mount_put_conns(&mnt_ctx);
|
||||
return rc;
|
||||
}
|
||||
@ -3651,6 +3641,18 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
|
||||
goto error;
|
||||
|
||||
rc = cifs_mount_get_tcon(&mnt_ctx);
|
||||
if (!rc) {
|
||||
/*
|
||||
* Prevent superblock from being created with any missing
|
||||
* connections.
|
||||
*/
|
||||
if (WARN_ON(!mnt_ctx.server))
|
||||
rc = -EHOSTDOWN;
|
||||
else if (WARN_ON(!mnt_ctx.ses))
|
||||
rc = -EACCES;
|
||||
else if (WARN_ON(!mnt_ctx.tcon))
|
||||
rc = -ENOENT;
|
||||
}
|
||||
if (rc)
|
||||
goto error;
|
||||
|
||||
@ -3988,13 +3990,14 @@ cifs_set_vol_auth(struct smb3_fs_context *ctx, struct cifs_ses *ses)
|
||||
}
|
||||
|
||||
static struct cifs_tcon *
|
||||
cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
|
||||
__cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
|
||||
{
|
||||
int rc;
|
||||
struct cifs_tcon *master_tcon = cifs_sb_master_tcon(cifs_sb);
|
||||
struct cifs_ses *ses;
|
||||
struct cifs_tcon *tcon = NULL;
|
||||
struct smb3_fs_context *ctx;
|
||||
char *origin_fullpath = NULL;
|
||||
|
||||
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
|
||||
if (ctx == NULL)
|
||||
@ -4018,6 +4021,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
|
||||
ctx->sign = master_tcon->ses->sign;
|
||||
ctx->seal = master_tcon->seal;
|
||||
ctx->witness = master_tcon->use_witness;
|
||||
ctx->dfs_root_ses = master_tcon->ses->dfs_root_ses;
|
||||
|
||||
rc = cifs_set_vol_auth(ctx, master_tcon->ses);
|
||||
if (rc) {
|
||||
@ -4037,12 +4041,39 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
|
||||
goto out;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CIFS_DFS_UPCALL
|
||||
spin_lock(&master_tcon->tc_lock);
|
||||
if (master_tcon->origin_fullpath) {
|
||||
spin_unlock(&master_tcon->tc_lock);
|
||||
origin_fullpath = dfs_get_path(cifs_sb, cifs_sb->ctx->source);
|
||||
if (IS_ERR(origin_fullpath)) {
|
||||
tcon = ERR_CAST(origin_fullpath);
|
||||
origin_fullpath = NULL;
|
||||
cifs_put_smb_ses(ses);
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
spin_unlock(&master_tcon->tc_lock);
|
||||
}
|
||||
#endif
|
||||
|
||||
tcon = cifs_get_tcon(ses, ctx);
|
||||
if (IS_ERR(tcon)) {
|
||||
cifs_put_smb_ses(ses);
|
||||
goto out;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CIFS_DFS_UPCALL
|
||||
if (origin_fullpath) {
|
||||
spin_lock(&tcon->tc_lock);
|
||||
tcon->origin_fullpath = origin_fullpath;
|
||||
spin_unlock(&tcon->tc_lock);
|
||||
origin_fullpath = NULL;
|
||||
queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work,
|
||||
dfs_cache_get_ttl() * HZ);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
|
||||
if (cap_unix(ses))
|
||||
reset_cifs_unix_caps(0, tcon, NULL, ctx);
|
||||
@ -4051,11 +4082,23 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
|
||||
out:
|
||||
kfree(ctx->username);
|
||||
kfree_sensitive(ctx->password);
|
||||
kfree(origin_fullpath);
|
||||
kfree(ctx);
|
||||
|
||||
return tcon;
|
||||
}
|
||||
|
||||
static struct cifs_tcon *
|
||||
cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
|
||||
{
|
||||
struct cifs_tcon *ret;
|
||||
|
||||
cifs_mount_lock();
|
||||
ret = __cifs_construct_tcon(cifs_sb, fsuid);
|
||||
cifs_mount_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct cifs_tcon *
|
||||
cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb)
|
||||
{
|
||||
|
@ -66,33 +66,20 @@ static int get_session(struct cifs_mount_ctx *mnt_ctx, const char *full_path)
|
||||
}
|
||||
|
||||
/*
|
||||
* Track individual DFS referral servers used by new DFS mount.
|
||||
*
|
||||
* On success, their lifetime will be shared by final tcon (dfs_ses_list).
|
||||
* Otherwise, they will be put by dfs_put_root_smb_sessions() in cifs_mount().
|
||||
* Get an active reference of @ses so that next call to cifs_put_tcon() won't
|
||||
* release it as any new DFS referrals must go through its IPC tcon.
|
||||
*/
|
||||
static int add_root_smb_session(struct cifs_mount_ctx *mnt_ctx)
|
||||
static void add_root_smb_session(struct cifs_mount_ctx *mnt_ctx)
|
||||
{
|
||||
struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
|
||||
struct dfs_root_ses *root_ses;
|
||||
struct cifs_ses *ses = mnt_ctx->ses;
|
||||
|
||||
if (ses) {
|
||||
root_ses = kmalloc(sizeof(*root_ses), GFP_KERNEL);
|
||||
if (!root_ses)
|
||||
return -ENOMEM;
|
||||
|
||||
INIT_LIST_HEAD(&root_ses->list);
|
||||
|
||||
spin_lock(&cifs_tcp_ses_lock);
|
||||
cifs_smb_ses_inc_refcount(ses);
|
||||
spin_unlock(&cifs_tcp_ses_lock);
|
||||
root_ses->ses = ses;
|
||||
list_add_tail(&root_ses->list, &mnt_ctx->dfs_ses_list);
|
||||
}
|
||||
/* Select new DFS referral server so that new referrals go through it */
|
||||
ctx->dfs_root_ses = ses;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int parse_dfs_target(struct smb3_fs_context *ctx,
|
||||
@ -185,11 +172,8 @@ again:
|
||||
continue;
|
||||
}
|
||||
|
||||
if (is_refsrv) {
|
||||
rc = add_root_smb_session(mnt_ctx);
|
||||
if (rc)
|
||||
goto out;
|
||||
}
|
||||
if (is_refsrv)
|
||||
add_root_smb_session(mnt_ctx);
|
||||
|
||||
rc = ref_walk_advance(rw);
|
||||
if (!rc) {
|
||||
@ -232,6 +216,7 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx)
|
||||
struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
|
||||
struct cifs_tcon *tcon;
|
||||
char *origin_fullpath;
|
||||
bool new_tcon = true;
|
||||
int rc;
|
||||
|
||||
origin_fullpath = dfs_get_path(cifs_sb, ctx->source);
|
||||
@ -239,6 +224,18 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx)
|
||||
return PTR_ERR(origin_fullpath);
|
||||
|
||||
rc = dfs_referral_walk(mnt_ctx);
|
||||
if (!rc) {
|
||||
/*
|
||||
* Prevent superblock from being created with any missing
|
||||
* connections.
|
||||
*/
|
||||
if (WARN_ON(!mnt_ctx->server))
|
||||
rc = -EHOSTDOWN;
|
||||
else if (WARN_ON(!mnt_ctx->ses))
|
||||
rc = -EACCES;
|
||||
else if (WARN_ON(!mnt_ctx->tcon))
|
||||
rc = -ENOENT;
|
||||
}
|
||||
if (rc)
|
||||
goto out;
|
||||
|
||||
@ -247,15 +244,14 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx)
|
||||
if (!tcon->origin_fullpath) {
|
||||
tcon->origin_fullpath = origin_fullpath;
|
||||
origin_fullpath = NULL;
|
||||
} else {
|
||||
new_tcon = false;
|
||||
}
|
||||
spin_unlock(&tcon->tc_lock);
|
||||
|
||||
if (list_empty(&tcon->dfs_ses_list)) {
|
||||
list_replace_init(&mnt_ctx->dfs_ses_list, &tcon->dfs_ses_list);
|
||||
if (new_tcon) {
|
||||
queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work,
|
||||
dfs_cache_get_ttl() * HZ);
|
||||
} else {
|
||||
dfs_put_root_smb_sessions(&mnt_ctx->dfs_ses_list);
|
||||
}
|
||||
|
||||
out:
|
||||
@ -298,7 +294,6 @@ int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs)
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
ctx->dfs_root_ses = mnt_ctx->ses;
|
||||
/*
|
||||
* If called with 'nodfs' mount option, then skip DFS resolving. Otherwise unconditionally
|
||||
* try to get an DFS referral (even cached) to determine whether it is an DFS mount.
|
||||
@ -324,7 +319,9 @@ int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs)
|
||||
|
||||
*isdfs = true;
|
||||
add_root_smb_session(mnt_ctx);
|
||||
return __dfs_mount_share(mnt_ctx);
|
||||
rc = __dfs_mount_share(mnt_ctx);
|
||||
dfs_put_root_smb_sessions(mnt_ctx);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Update dfs referral path of superblock */
|
||||
|
@ -7,7 +7,9 @@
|
||||
#define _CIFS_DFS_H
|
||||
|
||||
#include "cifsglob.h"
|
||||
#include "cifsproto.h"
|
||||
#include "fs_context.h"
|
||||
#include "dfs_cache.h"
|
||||
#include "cifs_unicode.h"
|
||||
#include <linux/namei.h>
|
||||
|
||||
@ -114,11 +116,6 @@ static inline void ref_walk_set_tgt_hint(struct dfs_ref_walk *rw)
|
||||
ref_walk_tit(rw));
|
||||
}
|
||||
|
||||
struct dfs_root_ses {
|
||||
struct list_head list;
|
||||
struct cifs_ses *ses;
|
||||
};
|
||||
|
||||
int dfs_parse_target_referral(const char *full_path, const struct dfs_info3_param *ref,
|
||||
struct smb3_fs_context *ctx);
|
||||
int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs);
|
||||
@ -133,20 +130,32 @@ static inline int dfs_get_referral(struct cifs_mount_ctx *mnt_ctx, const char *p
|
||||
{
|
||||
struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
|
||||
struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb;
|
||||
struct cifs_ses *rses = ctx->dfs_root_ses ?: mnt_ctx->ses;
|
||||
|
||||
return dfs_cache_find(mnt_ctx->xid, ctx->dfs_root_ses, cifs_sb->local_nls,
|
||||
return dfs_cache_find(mnt_ctx->xid, rses, cifs_sb->local_nls,
|
||||
cifs_remap(cifs_sb), path, ref, tl);
|
||||
}
|
||||
|
||||
static inline void dfs_put_root_smb_sessions(struct list_head *head)
|
||||
/*
|
||||
* cifs_get_smb_ses() already guarantees an active reference of
|
||||
* @ses->dfs_root_ses when a new session is created, so we need to put extra
|
||||
* references of all DFS root sessions that were used across the mount process
|
||||
* in dfs_mount_share().
|
||||
*/
|
||||
static inline void dfs_put_root_smb_sessions(struct cifs_mount_ctx *mnt_ctx)
|
||||
{
|
||||
struct dfs_root_ses *root, *tmp;
|
||||
const struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
|
||||
struct cifs_ses *ses = ctx->dfs_root_ses;
|
||||
struct cifs_ses *cur;
|
||||
|
||||
list_for_each_entry_safe(root, tmp, head, list) {
|
||||
list_del_init(&root->list);
|
||||
cifs_put_smb_ses(root->ses);
|
||||
kfree(root);
|
||||
if (!ses)
|
||||
return;
|
||||
|
||||
for (cur = ses; cur; cur = cur->dfs_root_ses) {
|
||||
if (cur->dfs_root_ses)
|
||||
cifs_put_smb_ses(cur->dfs_root_ses);
|
||||
}
|
||||
cifs_put_smb_ses(ses);
|
||||
}
|
||||
|
||||
#endif /* _CIFS_DFS_H */
|
||||
|
@ -1172,8 +1172,8 @@ static bool is_ses_good(struct cifs_ses *ses)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Refresh dfs referral of tcon and mark it for reconnect if needed */
|
||||
static int __refresh_tcon(const char *path, struct cifs_ses *ses, bool force_refresh)
|
||||
/* Refresh dfs referral of @ses and mark it for reconnect if needed */
|
||||
static void __refresh_ses_referral(struct cifs_ses *ses, bool force_refresh)
|
||||
{
|
||||
struct TCP_Server_Info *server = ses->server;
|
||||
DFS_CACHE_TGT_LIST(old_tl);
|
||||
@ -1181,10 +1181,21 @@ static int __refresh_tcon(const char *path, struct cifs_ses *ses, bool force_ref
|
||||
bool needs_refresh = false;
|
||||
struct cache_entry *ce;
|
||||
unsigned int xid;
|
||||
char *path = NULL;
|
||||
int rc = 0;
|
||||
|
||||
xid = get_xid();
|
||||
|
||||
mutex_lock(&server->refpath_lock);
|
||||
if (server->leaf_fullpath) {
|
||||
path = kstrdup(server->leaf_fullpath + 1, GFP_ATOMIC);
|
||||
if (!path)
|
||||
rc = -ENOMEM;
|
||||
}
|
||||
mutex_unlock(&server->refpath_lock);
|
||||
if (!path)
|
||||
goto out;
|
||||
|
||||
down_read(&htable_rw_lock);
|
||||
ce = lookup_cache_entry(path);
|
||||
needs_refresh = force_refresh || IS_ERR(ce) || cache_entry_expired(ce);
|
||||
@ -1218,19 +1229,17 @@ out:
|
||||
free_xid(xid);
|
||||
dfs_cache_free_tgts(&old_tl);
|
||||
dfs_cache_free_tgts(&new_tl);
|
||||
return rc;
|
||||
kfree(path);
|
||||
}
|
||||
|
||||
static int refresh_tcon(struct cifs_tcon *tcon, bool force_refresh)
|
||||
static inline void refresh_ses_referral(struct cifs_ses *ses)
|
||||
{
|
||||
struct TCP_Server_Info *server = tcon->ses->server;
|
||||
struct cifs_ses *ses = tcon->ses;
|
||||
__refresh_ses_referral(ses, false);
|
||||
}
|
||||
|
||||
mutex_lock(&server->refpath_lock);
|
||||
if (server->leaf_fullpath)
|
||||
__refresh_tcon(server->leaf_fullpath + 1, ses, force_refresh);
|
||||
mutex_unlock(&server->refpath_lock);
|
||||
return 0;
|
||||
static inline void force_refresh_ses_referral(struct cifs_ses *ses)
|
||||
{
|
||||
__refresh_ses_referral(ses, true);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1271,34 +1280,20 @@ int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb)
|
||||
*/
|
||||
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
|
||||
|
||||
return refresh_tcon(tcon, true);
|
||||
force_refresh_ses_referral(tcon->ses);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Refresh all DFS referrals related to DFS tcon */
|
||||
void dfs_cache_refresh(struct work_struct *work)
|
||||
{
|
||||
struct TCP_Server_Info *server;
|
||||
struct dfs_root_ses *rses;
|
||||
struct cifs_tcon *tcon;
|
||||
struct cifs_ses *ses;
|
||||
|
||||
tcon = container_of(work, struct cifs_tcon, dfs_cache_work.work);
|
||||
ses = tcon->ses;
|
||||
server = ses->server;
|
||||
|
||||
mutex_lock(&server->refpath_lock);
|
||||
if (server->leaf_fullpath)
|
||||
__refresh_tcon(server->leaf_fullpath + 1, ses, false);
|
||||
mutex_unlock(&server->refpath_lock);
|
||||
|
||||
list_for_each_entry(rses, &tcon->dfs_ses_list, list) {
|
||||
ses = rses->ses;
|
||||
server = ses->server;
|
||||
mutex_lock(&server->refpath_lock);
|
||||
if (server->leaf_fullpath)
|
||||
__refresh_tcon(server->leaf_fullpath + 1, ses, false);
|
||||
mutex_unlock(&server->refpath_lock);
|
||||
}
|
||||
for (ses = tcon->ses; ses; ses = ses->dfs_root_ses)
|
||||
refresh_ses_referral(ses);
|
||||
|
||||
queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work,
|
||||
atomic_read(&dfs_cache_ttl) * HZ);
|
||||
|
@ -189,6 +189,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
|
||||
int disposition;
|
||||
struct TCP_Server_Info *server = tcon->ses->server;
|
||||
struct cifs_open_parms oparms;
|
||||
int rdwr_for_fscache = 0;
|
||||
|
||||
*oplock = 0;
|
||||
if (tcon->ses->server->oplocks)
|
||||
@ -200,6 +201,10 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
|
||||
return PTR_ERR(full_path);
|
||||
}
|
||||
|
||||
/* If we're caching, we need to be able to fill in around partial writes. */
|
||||
if (cifs_fscache_enabled(inode) && (oflags & O_ACCMODE) == O_WRONLY)
|
||||
rdwr_for_fscache = 1;
|
||||
|
||||
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
|
||||
if (tcon->unix_ext && cap_unix(tcon->ses) && !tcon->broken_posix_open &&
|
||||
(CIFS_UNIX_POSIX_PATH_OPS_CAP &
|
||||
@ -276,6 +281,8 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
|
||||
desired_access |= GENERIC_READ; /* is this too little? */
|
||||
if (OPEN_FMODE(oflags) & FMODE_WRITE)
|
||||
desired_access |= GENERIC_WRITE;
|
||||
if (rdwr_for_fscache == 1)
|
||||
desired_access |= GENERIC_READ;
|
||||
|
||||
disposition = FILE_OVERWRITE_IF;
|
||||
if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
|
||||
@ -304,6 +311,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
|
||||
if (!tcon->unix_ext && (mode & S_IWUGO) == 0)
|
||||
create_options |= CREATE_OPTION_READONLY;
|
||||
|
||||
retry_open:
|
||||
oparms = (struct cifs_open_parms) {
|
||||
.tcon = tcon,
|
||||
.cifs_sb = cifs_sb,
|
||||
@ -317,8 +325,15 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
|
||||
rc = server->ops->open(xid, &oparms, oplock, buf);
|
||||
if (rc) {
|
||||
cifs_dbg(FYI, "cifs_create returned 0x%x\n", rc);
|
||||
if (rc == -EACCES && rdwr_for_fscache == 1) {
|
||||
desired_access &= ~GENERIC_READ;
|
||||
rdwr_for_fscache = 2;
|
||||
goto retry_open;
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
if (rdwr_for_fscache == 2)
|
||||
cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
|
||||
|
||||
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
|
||||
/*
|
||||
|
@ -206,12 +206,12 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
|
||||
*/
|
||||
}
|
||||
|
||||
static inline int cifs_convert_flags(unsigned int flags)
|
||||
static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache)
|
||||
{
|
||||
if ((flags & O_ACCMODE) == O_RDONLY)
|
||||
return GENERIC_READ;
|
||||
else if ((flags & O_ACCMODE) == O_WRONLY)
|
||||
return GENERIC_WRITE;
|
||||
return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE;
|
||||
else if ((flags & O_ACCMODE) == O_RDWR) {
|
||||
/* GENERIC_ALL is too much permission to request
|
||||
can cause unnecessary access denied on create */
|
||||
@ -348,11 +348,16 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_
|
||||
int create_options = CREATE_NOT_DIR;
|
||||
struct TCP_Server_Info *server = tcon->ses->server;
|
||||
struct cifs_open_parms oparms;
|
||||
int rdwr_for_fscache = 0;
|
||||
|
||||
if (!server->ops->open)
|
||||
return -ENOSYS;
|
||||
|
||||
desired_access = cifs_convert_flags(f_flags);
|
||||
/* If we're caching, we need to be able to fill in around partial writes. */
|
||||
if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY)
|
||||
rdwr_for_fscache = 1;
|
||||
|
||||
desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache);
|
||||
|
||||
/*********************************************************************
|
||||
* open flag mapping table:
|
||||
@ -389,6 +394,7 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_
|
||||
if (f_flags & O_DIRECT)
|
||||
create_options |= CREATE_NO_BUFFER;
|
||||
|
||||
retry_open:
|
||||
oparms = (struct cifs_open_parms) {
|
||||
.tcon = tcon,
|
||||
.cifs_sb = cifs_sb,
|
||||
@ -400,8 +406,16 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_
|
||||
};
|
||||
|
||||
rc = server->ops->open(xid, &oparms, oplock, buf);
|
||||
if (rc)
|
||||
if (rc) {
|
||||
if (rc == -EACCES && rdwr_for_fscache == 1) {
|
||||
desired_access = cifs_convert_flags(f_flags, 0);
|
||||
rdwr_for_fscache = 2;
|
||||
goto retry_open;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
if (rdwr_for_fscache == 2)
|
||||
cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
|
||||
|
||||
/* TODO: Add support for calling posix query info but with passing in fid */
|
||||
if (tcon->unix_ext)
|
||||
@ -445,6 +459,7 @@ cifs_down_write(struct rw_semaphore *sem)
|
||||
}
|
||||
|
||||
static void cifsFileInfo_put_work(struct work_struct *work);
|
||||
void serverclose_work(struct work_struct *work);
|
||||
|
||||
struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
|
||||
struct tcon_link *tlink, __u32 oplock,
|
||||
@ -491,6 +506,7 @@ struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
|
||||
cfile->tlink = cifs_get_tlink(tlink);
|
||||
INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
|
||||
INIT_WORK(&cfile->put, cifsFileInfo_put_work);
|
||||
INIT_WORK(&cfile->serverclose, serverclose_work);
|
||||
INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
|
||||
mutex_init(&cfile->fh_mutex);
|
||||
spin_lock_init(&cfile->file_info_lock);
|
||||
@ -582,6 +598,40 @@ static void cifsFileInfo_put_work(struct work_struct *work)
|
||||
cifsFileInfo_put_final(cifs_file);
|
||||
}
|
||||
|
||||
void serverclose_work(struct work_struct *work)
|
||||
{
|
||||
struct cifsFileInfo *cifs_file = container_of(work,
|
||||
struct cifsFileInfo, serverclose);
|
||||
|
||||
struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
|
||||
|
||||
struct TCP_Server_Info *server = tcon->ses->server;
|
||||
int rc = 0;
|
||||
int retries = 0;
|
||||
int MAX_RETRIES = 4;
|
||||
|
||||
do {
|
||||
if (server->ops->close_getattr)
|
||||
rc = server->ops->close_getattr(0, tcon, cifs_file);
|
||||
else if (server->ops->close)
|
||||
rc = server->ops->close(0, tcon, &cifs_file->fid);
|
||||
|
||||
if (rc == -EBUSY || rc == -EAGAIN) {
|
||||
retries++;
|
||||
msleep(250);
|
||||
}
|
||||
} while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES)
|
||||
);
|
||||
|
||||
if (retries == MAX_RETRIES)
|
||||
pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES);
|
||||
|
||||
if (cifs_file->offload)
|
||||
queue_work(fileinfo_put_wq, &cifs_file->put);
|
||||
else
|
||||
cifsFileInfo_put_final(cifs_file);
|
||||
}
|
||||
|
||||
/**
|
||||
* cifsFileInfo_put - release a reference of file priv data
|
||||
*
|
||||
@ -622,10 +672,13 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
|
||||
struct cifs_fid fid = {};
|
||||
struct cifs_pending_open open;
|
||||
bool oplock_break_cancelled;
|
||||
bool serverclose_offloaded = false;
|
||||
|
||||
spin_lock(&tcon->open_file_lock);
|
||||
spin_lock(&cifsi->open_file_lock);
|
||||
spin_lock(&cifs_file->file_info_lock);
|
||||
|
||||
cifs_file->offload = offload;
|
||||
if (--cifs_file->count > 0) {
|
||||
spin_unlock(&cifs_file->file_info_lock);
|
||||
spin_unlock(&cifsi->open_file_lock);
|
||||
@ -667,13 +720,20 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
|
||||
if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
|
||||
struct TCP_Server_Info *server = tcon->ses->server;
|
||||
unsigned int xid;
|
||||
int rc = 0;
|
||||
|
||||
xid = get_xid();
|
||||
if (server->ops->close_getattr)
|
||||
server->ops->close_getattr(xid, tcon, cifs_file);
|
||||
rc = server->ops->close_getattr(xid, tcon, cifs_file);
|
||||
else if (server->ops->close)
|
||||
server->ops->close(xid, tcon, &cifs_file->fid);
|
||||
rc = server->ops->close(xid, tcon, &cifs_file->fid);
|
||||
_free_xid(xid);
|
||||
|
||||
if (rc == -EBUSY || rc == -EAGAIN) {
|
||||
// Server close failed, hence offloading it as an async op
|
||||
queue_work(serverclose_wq, &cifs_file->serverclose);
|
||||
serverclose_offloaded = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (oplock_break_cancelled)
|
||||
@ -681,10 +741,15 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
|
||||
|
||||
cifs_del_pending_open(&open);
|
||||
|
||||
if (offload)
|
||||
queue_work(fileinfo_put_wq, &cifs_file->put);
|
||||
else
|
||||
cifsFileInfo_put_final(cifs_file);
|
||||
// if serverclose has been offloaded to wq (on failure), it will
|
||||
// handle offloading put as well. If serverclose not offloaded,
|
||||
// we need to handle offloading put here.
|
||||
if (!serverclose_offloaded) {
|
||||
if (offload)
|
||||
queue_work(fileinfo_put_wq, &cifs_file->put);
|
||||
else
|
||||
cifsFileInfo_put_final(cifs_file);
|
||||
}
|
||||
}
|
||||
|
||||
int cifs_open(struct inode *inode, struct file *file)
|
||||
@ -834,11 +899,11 @@ int cifs_open(struct inode *inode, struct file *file)
|
||||
use_cache:
|
||||
fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
|
||||
file->f_mode & FMODE_WRITE);
|
||||
if (file->f_flags & O_DIRECT &&
|
||||
(!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
|
||||
file->f_flags & O_APPEND))
|
||||
cifs_invalidate_cache(file_inode(file),
|
||||
FSCACHE_INVAL_DIO_WRITE);
|
||||
if (!(file->f_flags & O_DIRECT))
|
||||
goto out;
|
||||
if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY)
|
||||
goto out;
|
||||
cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE);
|
||||
|
||||
out:
|
||||
free_dentry_path(page);
|
||||
@ -903,6 +968,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
|
||||
int disposition = FILE_OPEN;
|
||||
int create_options = CREATE_NOT_DIR;
|
||||
struct cifs_open_parms oparms;
|
||||
int rdwr_for_fscache = 0;
|
||||
|
||||
xid = get_xid();
|
||||
mutex_lock(&cfile->fh_mutex);
|
||||
@ -966,7 +1032,11 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
|
||||
}
|
||||
#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
|
||||
|
||||
desired_access = cifs_convert_flags(cfile->f_flags);
|
||||
/* If we're caching, we need to be able to fill in around partial writes. */
|
||||
if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY)
|
||||
rdwr_for_fscache = 1;
|
||||
|
||||
desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache);
|
||||
|
||||
/* O_SYNC also has bit for O_DSYNC so following check picks up either */
|
||||
if (cfile->f_flags & O_SYNC)
|
||||
@ -978,6 +1048,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
|
||||
if (server->ops->get_lease_key)
|
||||
server->ops->get_lease_key(inode, &cfile->fid);
|
||||
|
||||
retry_open:
|
||||
oparms = (struct cifs_open_parms) {
|
||||
.tcon = tcon,
|
||||
.cifs_sb = cifs_sb,
|
||||
@ -1003,6 +1074,11 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
|
||||
/* indicate that we need to relock the file */
|
||||
oparms.reconnect = true;
|
||||
}
|
||||
if (rc == -EACCES && rdwr_for_fscache == 1) {
|
||||
desired_access = cifs_convert_flags(cfile->f_flags, 0);
|
||||
rdwr_for_fscache = 2;
|
||||
goto retry_open;
|
||||
}
|
||||
|
||||
if (rc) {
|
||||
mutex_unlock(&cfile->fh_mutex);
|
||||
@ -1011,6 +1087,9 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
|
||||
goto reopen_error_exit;
|
||||
}
|
||||
|
||||
if (rdwr_for_fscache == 2)
|
||||
cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
|
||||
|
||||
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
|
||||
reopen_success:
|
||||
#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
|
||||
|
@ -37,7 +37,7 @@
|
||||
#include "rfc1002pdu.h"
|
||||
#include "fs_context.h"
|
||||
|
||||
static DEFINE_MUTEX(cifs_mount_mutex);
|
||||
DEFINE_MUTEX(cifs_mount_mutex);
|
||||
|
||||
static const match_table_t cifs_smb_version_tokens = {
|
||||
{ Smb_1, SMB1_VERSION_STRING },
|
||||
@ -783,9 +783,9 @@ static int smb3_get_tree(struct fs_context *fc)
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
mutex_lock(&cifs_mount_mutex);
|
||||
cifs_mount_lock();
|
||||
ret = smb3_get_tree_common(fc);
|
||||
mutex_unlock(&cifs_mount_mutex);
|
||||
cifs_mount_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -304,4 +304,16 @@ extern void smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb);
|
||||
#define MAX_CACHED_FIDS 16
|
||||
extern char *cifs_sanitize_prepath(char *prepath, gfp_t gfp);
|
||||
|
||||
extern struct mutex cifs_mount_mutex;
|
||||
|
||||
static inline void cifs_mount_lock(void)
|
||||
{
|
||||
mutex_lock(&cifs_mount_mutex);
|
||||
}
|
||||
|
||||
static inline void cifs_mount_unlock(void)
|
||||
{
|
||||
mutex_unlock(&cifs_mount_mutex);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -109,6 +109,11 @@ static inline void cifs_readahead_to_fscache(struct inode *inode,
|
||||
__cifs_readahead_to_fscache(inode, pos, len);
|
||||
}
|
||||
|
||||
static inline bool cifs_fscache_enabled(struct inode *inode)
|
||||
{
|
||||
return fscache_cookie_enabled(cifs_inode_cookie(inode));
|
||||
}
|
||||
|
||||
#else /* CONFIG_CIFS_FSCACHE */
|
||||
static inline
|
||||
void cifs_fscache_fill_coherency(struct inode *inode,
|
||||
@ -124,6 +129,7 @@ static inline void cifs_fscache_release_inode_cookie(struct inode *inode) {}
|
||||
static inline void cifs_fscache_unuse_inode_cookie(struct inode *inode, bool update) {}
|
||||
static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode) { return NULL; }
|
||||
static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags) {}
|
||||
static inline bool cifs_fscache_enabled(struct inode *inode) { return false; }
|
||||
|
||||
static inline int cifs_fscache_query_occupancy(struct inode *inode,
|
||||
pgoff_t first, unsigned int nr_pages,
|
||||
|
@ -247,7 +247,9 @@ static int cifs_dump_full_key(struct cifs_tcon *tcon, struct smb3_full_key_debug
|
||||
spin_lock(&cifs_tcp_ses_lock);
|
||||
list_for_each_entry(server_it, &cifs_tcp_ses_list, tcp_ses_list) {
|
||||
list_for_each_entry(ses_it, &server_it->smb_ses_list, smb_ses_list) {
|
||||
if (ses_it->Suid == out.session_id) {
|
||||
spin_lock(&ses_it->ses_lock);
|
||||
if (ses_it->ses_status != SES_EXITING &&
|
||||
ses_it->Suid == out.session_id) {
|
||||
ses = ses_it;
|
||||
/*
|
||||
* since we are using the session outside the crit
|
||||
@ -255,9 +257,11 @@ static int cifs_dump_full_key(struct cifs_tcon *tcon, struct smb3_full_key_debug
|
||||
* so increment its refcount
|
||||
*/
|
||||
cifs_smb_ses_inc_refcount(ses);
|
||||
spin_unlock(&ses_it->ses_lock);
|
||||
found = true;
|
||||
goto search_end;
|
||||
}
|
||||
spin_unlock(&ses_it->ses_lock);
|
||||
}
|
||||
}
|
||||
search_end:
|
||||
|
@ -138,9 +138,6 @@ tcon_info_alloc(bool dir_leases_enabled)
|
||||
atomic_set(&ret_buf->num_local_opens, 0);
|
||||
atomic_set(&ret_buf->num_remote_opens, 0);
|
||||
ret_buf->stats_from_time = ktime_get_real_seconds();
|
||||
#ifdef CONFIG_CIFS_DFS_UPCALL
|
||||
INIT_LIST_HEAD(&ret_buf->dfs_ses_list);
|
||||
#endif
|
||||
|
||||
return ret_buf;
|
||||
}
|
||||
@ -156,9 +153,6 @@ tconInfoFree(struct cifs_tcon *tcon)
|
||||
atomic_dec(&tconInfoAllocCount);
|
||||
kfree(tcon->nativeFileSystem);
|
||||
kfree_sensitive(tcon->password);
|
||||
#ifdef CONFIG_CIFS_DFS_UPCALL
|
||||
dfs_put_root_smb_sessions(&tcon->dfs_ses_list);
|
||||
#endif
|
||||
kfree(tcon->origin_fullpath);
|
||||
kfree(tcon);
|
||||
}
|
||||
@ -487,6 +481,8 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
|
||||
/* look up tcon based on tid & uid */
|
||||
spin_lock(&cifs_tcp_ses_lock);
|
||||
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
|
||||
if (cifs_ses_exiting(ses))
|
||||
continue;
|
||||
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
|
||||
if (tcon->tid != buf->Tid)
|
||||
continue;
|
||||
|
@ -753,11 +753,11 @@ cifs_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
|
||||
cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode);
|
||||
}
|
||||
|
||||
static void
|
||||
static int
|
||||
cifs_close_file(const unsigned int xid, struct cifs_tcon *tcon,
|
||||
struct cifs_fid *fid)
|
||||
{
|
||||
CIFSSMBClose(xid, tcon, fid->netfid);
|
||||
return CIFSSMBClose(xid, tcon, fid->netfid);
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -622,6 +622,8 @@ smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server)
|
||||
/* look up tcon based on tid & uid */
|
||||
spin_lock(&cifs_tcp_ses_lock);
|
||||
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
|
||||
if (cifs_ses_exiting(ses))
|
||||
continue;
|
||||
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
|
||||
spin_lock(&tcon->open_file_lock);
|
||||
cifs_stats_inc(
|
||||
@ -697,6 +699,8 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
|
||||
/* look up tcon based on tid & uid */
|
||||
spin_lock(&cifs_tcp_ses_lock);
|
||||
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
|
||||
if (cifs_ses_exiting(ses))
|
||||
continue;
|
||||
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
|
||||
|
||||
spin_lock(&tcon->open_file_lock);
|
||||
|
@ -1412,14 +1412,14 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
|
||||
memcpy(cfile->fid.create_guid, fid->create_guid, 16);
|
||||
}
|
||||
|
||||
static void
|
||||
static int
|
||||
smb2_close_file(const unsigned int xid, struct cifs_tcon *tcon,
|
||||
struct cifs_fid *fid)
|
||||
{
|
||||
SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
|
||||
return SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
|
||||
}
|
||||
|
||||
static void
|
||||
static int
|
||||
smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon,
|
||||
struct cifsFileInfo *cfile)
|
||||
{
|
||||
@ -1430,7 +1430,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon,
|
||||
rc = __SMB2_close(xid, tcon, cfile->fid.persistent_fid,
|
||||
cfile->fid.volatile_fid, &file_inf);
|
||||
if (rc)
|
||||
return;
|
||||
return rc;
|
||||
|
||||
inode = d_inode(cfile->dentry);
|
||||
|
||||
@ -1459,6 +1459,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon,
|
||||
|
||||
/* End of file and Attributes should not have to be updated on close */
|
||||
spin_unlock(&inode->i_lock);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int
|
||||
@ -2480,6 +2481,8 @@ smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server)
|
||||
|
||||
spin_lock(&cifs_tcp_ses_lock);
|
||||
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
|
||||
if (cifs_ses_exiting(ses))
|
||||
continue;
|
||||
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
|
||||
if (tcon->tid == le32_to_cpu(shdr->Id.SyncId.TreeId)) {
|
||||
spin_lock(&tcon->tc_lock);
|
||||
@ -3913,7 +3916,7 @@ smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
|
||||
strcat(message, "W");
|
||||
}
|
||||
if (!new_oplock)
|
||||
strncpy(message, "None", sizeof(message));
|
||||
strscpy(message, "None");
|
||||
|
||||
cinode->oplock = new_oplock;
|
||||
cifs_dbg(FYI, "%s Lease granted on inode %p\n", message,
|
||||
|
@ -3628,9 +3628,9 @@ replay_again:
|
||||
memcpy(&pbuf->network_open_info,
|
||||
&rsp->network_open_info,
|
||||
sizeof(pbuf->network_open_info));
|
||||
atomic_dec(&tcon->num_remote_opens);
|
||||
}
|
||||
|
||||
atomic_dec(&tcon->num_remote_opens);
|
||||
close_exit:
|
||||
SMB2_close_free(&rqst);
|
||||
free_rsp_buf(resp_buftype, rsp);
|
||||
|
@ -659,7 +659,7 @@ smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server)
|
||||
}
|
||||
spin_unlock(&server->srv_lock);
|
||||
if (!is_binding && !server->session_estab) {
|
||||
strncpy(shdr->Signature, "BSRSPYL", 8);
|
||||
strscpy(shdr->Signature, "BSRSPYL");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1301,8 +1301,19 @@ xfs_link(
|
||||
*/
|
||||
if (unlikely((tdp->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
|
||||
tdp->i_projid != sip->i_projid)) {
|
||||
error = -EXDEV;
|
||||
goto error_return;
|
||||
/*
|
||||
* Project quota setup skips special files which can
|
||||
* leave inodes in a PROJINHERIT directory without a
|
||||
* project ID set. We need to allow links to be made
|
||||
* to these "project-less" inodes because userspace
|
||||
* expects them to succeed after project ID setup,
|
||||
* but everything else should be rejected.
|
||||
*/
|
||||
if (!special_file(VFS_I(sip)->i_mode) ||
|
||||
sip->i_projid != 0) {
|
||||
error = -EXDEV;
|
||||
goto error_return;
|
||||
}
|
||||
}
|
||||
|
||||
if (!resblks) {
|
||||
|
@ -39,6 +39,14 @@ struct cpu_hw_events {
|
||||
DECLARE_BITMAP(used_hw_ctrs, RISCV_MAX_COUNTERS);
|
||||
/* currently enabled firmware counters */
|
||||
DECLARE_BITMAP(used_fw_ctrs, RISCV_MAX_COUNTERS);
|
||||
/* The virtual address of the shared memory where counter snapshot will be taken */
|
||||
void *snapshot_addr;
|
||||
/* The physical address of the shared memory where counter snapshot will be taken */
|
||||
phys_addr_t snapshot_addr_phys;
|
||||
/* Boolean flag to indicate setup is already done */
|
||||
bool snapshot_set_done;
|
||||
/* A shadow copy of the counter values to avoid clobbering during multiple SBI calls */
|
||||
u64 snapshot_cval_shcopy[RISCV_MAX_COUNTERS];
|
||||
};
|
||||
|
||||
struct riscv_pmu {
|
||||
|
@ -22,7 +22,7 @@
|
||||
*
|
||||
* @read: returns the current cycle value
|
||||
* @mask: bitmask for two's complement
|
||||
* subtraction of non 64 bit counters,
|
||||
* subtraction of non-64-bit counters,
|
||||
* see CYCLECOUNTER_MASK() helper macro
|
||||
* @mult: cycle to nanosecond multiplier
|
||||
* @shift: cycle to nanosecond divisor (power of two)
|
||||
@ -35,7 +35,7 @@ struct cyclecounter {
|
||||
};
|
||||
|
||||
/**
|
||||
* struct timecounter - layer above a %struct cyclecounter which counts nanoseconds
|
||||
* struct timecounter - layer above a &struct cyclecounter which counts nanoseconds
|
||||
* Contains the state needed by timecounter_read() to detect
|
||||
* cycle counter wrap around. Initialize with
|
||||
* timecounter_init(). Also used to convert cycle counts into the
|
||||
@ -66,6 +66,8 @@ struct timecounter {
|
||||
* @cycles: Cycles
|
||||
* @mask: bit mask for maintaining the 'frac' field
|
||||
* @frac: pointer to storage for the fractional nanoseconds.
|
||||
*
|
||||
* Returns: cycle counter cycles converted to nanoseconds
|
||||
*/
|
||||
static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc,
|
||||
u64 cycles, u64 mask, u64 *frac)
|
||||
@ -79,6 +81,7 @@ static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc,
|
||||
|
||||
/**
|
||||
* timecounter_adjtime - Shifts the time of the clock.
|
||||
* @tc: The &struct timecounter to adjust
|
||||
* @delta: Desired change in nanoseconds.
|
||||
*/
|
||||
static inline void timecounter_adjtime(struct timecounter *tc, s64 delta)
|
||||
@ -107,6 +110,8 @@ extern void timecounter_init(struct timecounter *tc,
|
||||
*
|
||||
* In other words, keeps track of time since the same epoch as
|
||||
* the function which generated the initial time stamp.
|
||||
*
|
||||
* Returns: nanoseconds since the initial time stamp
|
||||
*/
|
||||
extern u64 timecounter_read(struct timecounter *tc);
|
||||
|
||||
@ -123,6 +128,8 @@ extern u64 timecounter_read(struct timecounter *tc);
|
||||
*
|
||||
* This allows conversion of cycle counter values which were generated
|
||||
* in the past.
|
||||
*
|
||||
* Returns: cycle counter converted to nanoseconds since the initial time stamp
|
||||
*/
|
||||
extern u64 timecounter_cyc2time(const struct timecounter *tc,
|
||||
u64 cycle_tstamp);
|
||||
|
@ -22,14 +22,14 @@ extern int do_sys_settimeofday64(const struct timespec64 *tv,
|
||||
const struct timezone *tz);
|
||||
|
||||
/*
|
||||
* ktime_get() family: read the current time in a multitude of ways,
|
||||
* ktime_get() family - read the current time in a multitude of ways.
|
||||
*
|
||||
* The default time reference is CLOCK_MONOTONIC, starting at
|
||||
* boot time but not counting the time spent in suspend.
|
||||
* For other references, use the functions with "real", "clocktai",
|
||||
* "boottime" and "raw" suffixes.
|
||||
*
|
||||
* To get the time in a different format, use the ones wit
|
||||
* To get the time in a different format, use the ones with
|
||||
* "ns", "ts64" and "seconds" suffix.
|
||||
*
|
||||
* See Documentation/core-api/timekeeping.rst for more details.
|
||||
@ -74,6 +74,8 @@ extern u32 ktime_get_resolution_ns(void);
|
||||
|
||||
/**
|
||||
* ktime_get_real - get the real (wall-) time in ktime_t format
|
||||
*
|
||||
* Returns: real (wall) time in ktime_t format
|
||||
*/
|
||||
static inline ktime_t ktime_get_real(void)
|
||||
{
|
||||
@ -86,10 +88,12 @@ static inline ktime_t ktime_get_coarse_real(void)
|
||||
}
|
||||
|
||||
/**
|
||||
* ktime_get_boottime - Returns monotonic time since boot in ktime_t format
|
||||
* ktime_get_boottime - Get monotonic time since boot in ktime_t format
|
||||
*
|
||||
* This is similar to CLOCK_MONTONIC/ktime_get, but also includes the
|
||||
* time spent in suspend.
|
||||
*
|
||||
* Returns: monotonic time since boot in ktime_t format
|
||||
*/
|
||||
static inline ktime_t ktime_get_boottime(void)
|
||||
{
|
||||
@ -102,7 +106,9 @@ static inline ktime_t ktime_get_coarse_boottime(void)
|
||||
}
|
||||
|
||||
/**
|
||||
* ktime_get_clocktai - Returns the TAI time of day in ktime_t format
|
||||
* ktime_get_clocktai - Get the TAI time of day in ktime_t format
|
||||
*
|
||||
* Returns: the TAI time of day in ktime_t format
|
||||
*/
|
||||
static inline ktime_t ktime_get_clocktai(void)
|
||||
{
|
||||
@ -144,32 +150,60 @@ static inline u64 ktime_get_coarse_clocktai_ns(void)
|
||||
|
||||
/**
|
||||
* ktime_mono_to_real - Convert monotonic time to clock realtime
|
||||
* @mono: monotonic time to convert
|
||||
*
|
||||
* Returns: time converted to realtime clock
|
||||
*/
|
||||
static inline ktime_t ktime_mono_to_real(ktime_t mono)
|
||||
{
|
||||
return ktime_mono_to_any(mono, TK_OFFS_REAL);
|
||||
}
|
||||
|
||||
/**
|
||||
* ktime_get_ns - Get the current time in nanoseconds
|
||||
*
|
||||
* Returns: current time converted to nanoseconds
|
||||
*/
|
||||
static inline u64 ktime_get_ns(void)
|
||||
{
|
||||
return ktime_to_ns(ktime_get());
|
||||
}
|
||||
|
||||
/**
|
||||
* ktime_get_real_ns - Get the current real/wall time in nanoseconds
|
||||
*
|
||||
* Returns: current real time converted to nanoseconds
|
||||
*/
|
||||
static inline u64 ktime_get_real_ns(void)
|
||||
{
|
||||
return ktime_to_ns(ktime_get_real());
|
||||
}
|
||||
|
||||
/**
|
||||
* ktime_get_boottime_ns - Get the monotonic time since boot in nanoseconds
|
||||
*
|
||||
* Returns: current boottime converted to nanoseconds
|
||||
*/
|
||||
static inline u64 ktime_get_boottime_ns(void)
|
||||
{
|
||||
return ktime_to_ns(ktime_get_boottime());
|
||||
}
|
||||
|
||||
/**
|
||||
* ktime_get_clocktai_ns - Get the current TAI time of day in nanoseconds
|
||||
*
|
||||
* Returns: current TAI time converted to nanoseconds
|
||||
*/
|
||||
static inline u64 ktime_get_clocktai_ns(void)
|
||||
{
|
||||
return ktime_to_ns(ktime_get_clocktai());
|
||||
}
|
||||
|
||||
/**
|
||||
* ktime_get_raw_ns - Get the raw monotonic time in nanoseconds
|
||||
*
|
||||
* Returns: current raw monotonic time converted to nanoseconds
|
||||
*/
|
||||
static inline u64 ktime_get_raw_ns(void)
|
||||
{
|
||||
return ktime_to_ns(ktime_get_raw());
|
||||
@ -224,8 +258,8 @@ extern bool timekeeping_rtc_skipresume(void);
|
||||
|
||||
extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta);
|
||||
|
||||
/*
|
||||
* struct ktime_timestanps - Simultaneous mono/boot/real timestamps
|
||||
/**
|
||||
* struct ktime_timestamps - Simultaneous mono/boot/real timestamps
|
||||
* @mono: Monotonic timestamp
|
||||
* @boot: Boottime timestamp
|
||||
* @real: Realtime timestamp
|
||||
@ -242,7 +276,8 @@ struct ktime_timestamps {
|
||||
* @cycles: Clocksource counter value to produce the system times
|
||||
* @real: Realtime system time
|
||||
* @raw: Monotonic raw system time
|
||||
* @clock_was_set_seq: The sequence number of clock was set events
|
||||
* @cs_id: Clocksource ID
|
||||
* @clock_was_set_seq: The sequence number of clock-was-set events
|
||||
* @cs_was_changed_seq: The sequence number of clocksource change events
|
||||
*/
|
||||
struct system_time_snapshot {
|
||||
|
@ -22,7 +22,7 @@
|
||||
#define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn)
|
||||
#endif
|
||||
|
||||
/**
|
||||
/*
|
||||
* @TIMER_DEFERRABLE: A deferrable timer will work normally when the
|
||||
* system is busy, but will not cause a CPU to come out of idle just
|
||||
* to service it; instead, the timer will be serviced when the CPU
|
||||
@ -140,7 +140,7 @@ static inline void destroy_timer_on_stack(struct timer_list *timer) { }
|
||||
* or not. Callers must ensure serialization wrt. other operations done
|
||||
* to this timer, eg. interrupt contexts, or other CPUs on SMP.
|
||||
*
|
||||
* return value: 1 if the timer is pending, 0 if not.
|
||||
* Returns: 1 if the timer is pending, 0 if not.
|
||||
*/
|
||||
static inline int timer_pending(const struct timer_list * timer)
|
||||
{
|
||||
@ -175,6 +175,10 @@ extern int timer_shutdown(struct timer_list *timer);
|
||||
* See timer_delete_sync() for detailed explanation.
|
||||
*
|
||||
* Do not use in new code. Use timer_delete_sync() instead.
|
||||
*
|
||||
* Returns:
|
||||
* * %0 - The timer was not pending
|
||||
* * %1 - The timer was pending and deactivated
|
||||
*/
|
||||
static inline int del_timer_sync(struct timer_list *timer)
|
||||
{
|
||||
@ -188,6 +192,10 @@ static inline int del_timer_sync(struct timer_list *timer)
|
||||
* See timer_delete() for detailed explanation.
|
||||
*
|
||||
* Do not use in new code. Use timer_delete() instead.
|
||||
*
|
||||
* Returns:
|
||||
* * %0 - The timer was not pending
|
||||
* * %1 - The timer was pending and deactivated
|
||||
*/
|
||||
static inline int del_timer(struct timer_list *timer)
|
||||
{
|
||||
|
@ -19,12 +19,6 @@
|
||||
#include <vdso/time32.h>
|
||||
#include <vdso/time64.h>
|
||||
|
||||
#ifdef CONFIG_ARM64
|
||||
#include <asm/page-def.h>
|
||||
#else
|
||||
#include <asm/page.h>
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_ARCH_HAS_VDSO_DATA
|
||||
#include <asm/vdso/data.h>
|
||||
#else
|
||||
@ -132,7 +126,7 @@ extern struct vdso_data _timens_data[CS_BASES] __attribute__((visibility("hidden
|
||||
*/
|
||||
union vdso_data_store {
|
||||
struct vdso_data data[CS_BASES];
|
||||
u8 page[PAGE_SIZE];
|
||||
u8 page[1U << CONFIG_PAGE_SHIFT];
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -697,6 +697,7 @@ bool tick_nohz_tick_stopped_cpu(int cpu)
|
||||
|
||||
/**
|
||||
* tick_nohz_update_jiffies - update jiffies when idle was interrupted
|
||||
* @now: current ktime_t
|
||||
*
|
||||
* Called from interrupt entry when the CPU was idle
|
||||
*
|
||||
@ -794,7 +795,7 @@ static u64 get_cpu_sleep_time_us(struct tick_sched *ts, ktime_t *sleeptime,
|
||||
* This time is measured via accounting rather than sampling,
|
||||
* and is as accurate as ktime_get() is.
|
||||
*
|
||||
* This function returns -1 if NOHZ is not enabled.
|
||||
* Return: -1 if NOHZ is not enabled, else total idle time of the @cpu
|
||||
*/
|
||||
u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
|
||||
{
|
||||
@ -820,7 +821,7 @@ EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
|
||||
* This time is measured via accounting rather than sampling,
|
||||
* and is as accurate as ktime_get() is.
|
||||
*
|
||||
* This function returns -1 if NOHZ is not enabled.
|
||||
* Return: -1 if NOHZ is not enabled, else total iowait time of @cpu
|
||||
*/
|
||||
u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
|
||||
{
|
||||
@ -1287,6 +1288,8 @@ void tick_nohz_irq_exit(void)
|
||||
|
||||
/**
|
||||
* tick_nohz_idle_got_tick - Check whether or not the tick handler has run
|
||||
*
|
||||
* Return: %true if the tick handler has run, otherwise %false
|
||||
*/
|
||||
bool tick_nohz_idle_got_tick(void)
|
||||
{
|
||||
@ -1305,6 +1308,8 @@ bool tick_nohz_idle_got_tick(void)
|
||||
* stopped, it returns the next hrtimer.
|
||||
*
|
||||
* Called from power state control code with interrupts disabled
|
||||
*
|
||||
* Return: the next expiration time
|
||||
*/
|
||||
ktime_t tick_nohz_get_next_hrtimer(void)
|
||||
{
|
||||
@ -1320,6 +1325,8 @@ ktime_t tick_nohz_get_next_hrtimer(void)
|
||||
* The return value of this function and/or the value returned by it through the
|
||||
* @delta_next pointer can be negative which must be taken into account by its
|
||||
* callers.
|
||||
*
|
||||
* Return: the expected length of the current sleep
|
||||
*/
|
||||
ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
|
||||
{
|
||||
@ -1357,8 +1364,11 @@ ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
|
||||
/**
|
||||
* tick_nohz_get_idle_calls_cpu - return the current idle calls counter value
|
||||
* for a particular CPU.
|
||||
* @cpu: target CPU number
|
||||
*
|
||||
* Called from the schedutil frequency scaling governor in scheduler context.
|
||||
*
|
||||
* Return: the current idle calls counter value for @cpu
|
||||
*/
|
||||
unsigned long tick_nohz_get_idle_calls_cpu(int cpu)
|
||||
{
|
||||
@ -1371,6 +1381,8 @@ unsigned long tick_nohz_get_idle_calls_cpu(int cpu)
|
||||
* tick_nohz_get_idle_calls - return the current idle calls counter value
|
||||
*
|
||||
* Called from the schedutil frequency scaling governor in scheduler context.
|
||||
*
|
||||
* Return: the current idle calls counter value for the current CPU
|
||||
*/
|
||||
unsigned long tick_nohz_get_idle_calls(void)
|
||||
{
|
||||
@ -1559,7 +1571,7 @@ early_param("skew_tick", skew_tick);
|
||||
|
||||
/**
|
||||
* tick_setup_sched_timer - setup the tick emulation timer
|
||||
* @mode: tick_nohz_mode to setup for
|
||||
* @hrtimer: whether to use the hrtimer or not
|
||||
*/
|
||||
void tick_setup_sched_timer(bool hrtimer)
|
||||
{
|
||||
|
@ -46,8 +46,8 @@ struct tick_device {
|
||||
* @next_tick: Next tick to be fired when in dynticks mode.
|
||||
* @idle_jiffies: jiffies at the entry to idle for idle time accounting
|
||||
* @idle_waketime: Time when the idle was interrupted
|
||||
* @idle_sleeptime_seq: sequence counter for data consistency
|
||||
* @idle_entrytime: Time when the idle call was entered
|
||||
* @nohz_mode: Mode - one state of tick_nohz_mode
|
||||
* @last_jiffies: Base jiffies snapshot when next event was last computed
|
||||
* @timer_expires_base: Base time clock monotonic for @timer_expires
|
||||
* @timer_expires: Anticipated timer expiration time (in case sched tick is stopped)
|
||||
|
@ -64,15 +64,15 @@ EXPORT_SYMBOL(jiffies_64);
|
||||
|
||||
/*
|
||||
* The timer wheel has LVL_DEPTH array levels. Each level provides an array of
|
||||
* LVL_SIZE buckets. Each level is driven by its own clock and therefor each
|
||||
* LVL_SIZE buckets. Each level is driven by its own clock and therefore each
|
||||
* level has a different granularity.
|
||||
*
|
||||
* The level granularity is: LVL_CLK_DIV ^ lvl
|
||||
* The level granularity is: LVL_CLK_DIV ^ level
|
||||
* The level clock frequency is: HZ / (LVL_CLK_DIV ^ level)
|
||||
*
|
||||
* The array level of a newly armed timer depends on the relative expiry
|
||||
* time. The farther the expiry time is away the higher the array level and
|
||||
* therefor the granularity becomes.
|
||||
* therefore the granularity becomes.
|
||||
*
|
||||
* Contrary to the original timer wheel implementation, which aims for 'exact'
|
||||
* expiry of the timers, this implementation removes the need for recascading
|
||||
@ -207,7 +207,7 @@ EXPORT_SYMBOL(jiffies_64);
|
||||
* struct timer_base - Per CPU timer base (number of base depends on config)
|
||||
* @lock: Lock protecting the timer_base
|
||||
* @running_timer: When expiring timers, the lock is dropped. To make
|
||||
* sure not to race agains deleting/modifying a
|
||||
* sure not to race against deleting/modifying a
|
||||
* currently running timer, the pointer is set to the
|
||||
* timer, which expires at the moment. If no timer is
|
||||
* running, the pointer is NULL.
|
||||
@ -737,7 +737,7 @@ static bool timer_is_static_object(void *addr)
|
||||
}
|
||||
|
||||
/*
|
||||
* fixup_init is called when:
|
||||
* timer_fixup_init is called when:
|
||||
* - an active object is initialized
|
||||
*/
|
||||
static bool timer_fixup_init(void *addr, enum debug_obj_state state)
|
||||
@ -761,7 +761,7 @@ static void stub_timer(struct timer_list *unused)
|
||||
}
|
||||
|
||||
/*
|
||||
* fixup_activate is called when:
|
||||
* timer_fixup_activate is called when:
|
||||
* - an active object is activated
|
||||
* - an unknown non-static object is activated
|
||||
*/
|
||||
@ -783,7 +783,7 @@ static bool timer_fixup_activate(void *addr, enum debug_obj_state state)
|
||||
}
|
||||
|
||||
/*
|
||||
* fixup_free is called when:
|
||||
* timer_fixup_free is called when:
|
||||
* - an active object is freed
|
||||
*/
|
||||
static bool timer_fixup_free(void *addr, enum debug_obj_state state)
|
||||
@ -801,7 +801,7 @@ static bool timer_fixup_free(void *addr, enum debug_obj_state state)
|
||||
}
|
||||
|
||||
/*
|
||||
* fixup_assert_init is called when:
|
||||
* timer_fixup_assert_init is called when:
|
||||
* - an untracked/uninit-ed object is found
|
||||
*/
|
||||
static bool timer_fixup_assert_init(void *addr, enum debug_obj_state state)
|
||||
@ -914,7 +914,7 @@ static void do_init_timer(struct timer_list *timer,
|
||||
* @key: lockdep class key of the fake lock used for tracking timer
|
||||
* sync lock dependencies
|
||||
*
|
||||
* init_timer_key() must be done to a timer prior calling *any* of the
|
||||
* init_timer_key() must be done to a timer prior to calling *any* of the
|
||||
* other timer functions.
|
||||
*/
|
||||
void init_timer_key(struct timer_list *timer,
|
||||
@ -1417,7 +1417,7 @@ static int __timer_delete(struct timer_list *timer, bool shutdown)
|
||||
* If @shutdown is set then the lock has to be taken whether the
|
||||
* timer is pending or not to protect against a concurrent rearm
|
||||
* which might hit between the lockless pending check and the lock
|
||||
* aquisition. By taking the lock it is ensured that such a newly
|
||||
* acquisition. By taking the lock it is ensured that such a newly
|
||||
* enqueued timer is dequeued and cannot end up with
|
||||
* timer->function == NULL in the expiry code.
|
||||
*
|
||||
@ -2306,7 +2306,7 @@ static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem,
|
||||
|
||||
/*
|
||||
* When timer base is not set idle, undo the effect of
|
||||
* tmigr_cpu_deactivate() to prevent inconsitent states - active
|
||||
* tmigr_cpu_deactivate() to prevent inconsistent states - active
|
||||
* timer base but inactive timer migration hierarchy.
|
||||
*
|
||||
* When timer base was already marked idle, nothing will be
|
||||
|
@ -751,6 +751,33 @@ bool tmigr_update_events(struct tmigr_group *group, struct tmigr_group *child,
|
||||
|
||||
first_childevt = evt = data->evt;
|
||||
|
||||
/*
|
||||
* Walking the hierarchy is required in any case when a
|
||||
* remote expiry was done before. This ensures to not lose
|
||||
* already queued events in non active groups (see section
|
||||
* "Required event and timerqueue update after a remote
|
||||
* expiry" in the documentation at the top).
|
||||
*
|
||||
* The two call sites which are executed without a remote expiry
|
||||
* before, are not prevented from propagating changes through
|
||||
* the hierarchy by the return:
|
||||
* - When entering this path by tmigr_new_timer(), @evt->ignore
|
||||
* is never set.
|
||||
* - tmigr_inactive_up() takes care of the propagation by
|
||||
* itself and ignores the return value. But an immediate
|
||||
* return is possible if there is a parent, sparing group
|
||||
* locking at this level, because the upper walking call to
|
||||
* the parent will take care about removing this event from
|
||||
* within the group and update next_expiry accordingly.
|
||||
*
|
||||
* However if there is no parent, ie: the hierarchy has only a
|
||||
* single level so @group is the top level group, make sure the
|
||||
* first event information of the group is updated properly and
|
||||
* also handled properly, so skip this fast return path.
|
||||
*/
|
||||
if (evt->ignore && !remote && group->parent)
|
||||
return true;
|
||||
|
||||
raw_spin_lock(&group->lock);
|
||||
|
||||
childstate.state = 0;
|
||||
@ -762,8 +789,11 @@ bool tmigr_update_events(struct tmigr_group *group, struct tmigr_group *child,
|
||||
* queue when the expiry time changed only or when it could be ignored.
|
||||
*/
|
||||
if (timerqueue_node_queued(&evt->nextevt)) {
|
||||
if ((evt->nextevt.expires == nextexp) && !evt->ignore)
|
||||
if ((evt->nextevt.expires == nextexp) && !evt->ignore) {
|
||||
/* Make sure not to miss a new CPU event with the same expiry */
|
||||
evt->cpu = first_childevt->cpu;
|
||||
goto check_toplvl;
|
||||
}
|
||||
|
||||
if (!timerqueue_del(&group->events, &evt->nextevt))
|
||||
WRITE_ONCE(group->next_expiry, KTIME_MAX);
|
||||
|
@ -1206,15 +1206,6 @@ err_noclose:
|
||||
* MSG_SPLICE_PAGES is used exclusively to reduce the number of
|
||||
* copy operations in this path. Therefore the caller must ensure
|
||||
* that the pages backing @xdr are unchanging.
|
||||
*
|
||||
* Note that the send is non-blocking. The caller has incremented
|
||||
* the reference count on each page backing the RPC message, and
|
||||
* the network layer will "put" these pages when transmission is
|
||||
* complete.
|
||||
*
|
||||
* This is safe for our RPC services because the memory backing
|
||||
* the head and tail components is never kmalloc'd. These always
|
||||
* come from pages in the svc_rqst::rq_pages array.
|
||||
*/
|
||||
static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
|
||||
rpc_fraghdr marker, unsigned int *sentp)
|
||||
@ -1244,6 +1235,7 @@ static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
|
||||
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
|
||||
1 + count, sizeof(marker) + rqstp->rq_res.len);
|
||||
ret = sock_sendmsg(svsk->sk_sock, &msg);
|
||||
page_frag_free(buf);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
*sentp += ret;
|
||||
|
@ -190,6 +190,8 @@ TEST_GEN_PROGS_s390x += rseq_test
|
||||
TEST_GEN_PROGS_s390x += set_memory_region_test
|
||||
TEST_GEN_PROGS_s390x += kvm_binary_stats_test
|
||||
|
||||
TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test
|
||||
TEST_GEN_PROGS_riscv += riscv/ebreak_test
|
||||
TEST_GEN_PROGS_riscv += arch_timer
|
||||
TEST_GEN_PROGS_riscv += demand_paging_test
|
||||
TEST_GEN_PROGS_riscv += dirty_log_test
|
||||
|
@ -50,6 +50,16 @@ static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t subtype,
|
||||
|
||||
bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext);
|
||||
|
||||
static inline bool __vcpu_has_isa_ext(struct kvm_vcpu *vcpu, uint64_t isa_ext)
|
||||
{
|
||||
return __vcpu_has_ext(vcpu, RISCV_ISA_EXT_REG(isa_ext));
|
||||
}
|
||||
|
||||
static inline bool __vcpu_has_sbi_ext(struct kvm_vcpu *vcpu, uint64_t sbi_ext)
|
||||
{
|
||||
return __vcpu_has_ext(vcpu, RISCV_SBI_EXT_REG(sbi_ext));
|
||||
}
|
||||
|
||||
struct ex_regs {
|
||||
unsigned long ra;
|
||||
unsigned long sp;
|
||||
@ -154,45 +164,6 @@ void vm_install_interrupt_handler(struct kvm_vm *vm, exception_handler_fn handle
|
||||
#define PGTBL_PAGE_SIZE PGTBL_L0_BLOCK_SIZE
|
||||
#define PGTBL_PAGE_SIZE_SHIFT PGTBL_L0_BLOCK_SHIFT
|
||||
|
||||
/* SBI return error codes */
|
||||
#define SBI_SUCCESS 0
|
||||
#define SBI_ERR_FAILURE -1
|
||||
#define SBI_ERR_NOT_SUPPORTED -2
|
||||
#define SBI_ERR_INVALID_PARAM -3
|
||||
#define SBI_ERR_DENIED -4
|
||||
#define SBI_ERR_INVALID_ADDRESS -5
|
||||
#define SBI_ERR_ALREADY_AVAILABLE -6
|
||||
#define SBI_ERR_ALREADY_STARTED -7
|
||||
#define SBI_ERR_ALREADY_STOPPED -8
|
||||
|
||||
#define SBI_EXT_EXPERIMENTAL_START 0x08000000
|
||||
#define SBI_EXT_EXPERIMENTAL_END 0x08FFFFFF
|
||||
|
||||
#define KVM_RISCV_SELFTESTS_SBI_EXT SBI_EXT_EXPERIMENTAL_END
|
||||
#define KVM_RISCV_SELFTESTS_SBI_UCALL 0
|
||||
#define KVM_RISCV_SELFTESTS_SBI_UNEXP 1
|
||||
|
||||
enum sbi_ext_id {
|
||||
SBI_EXT_BASE = 0x10,
|
||||
SBI_EXT_STA = 0x535441,
|
||||
};
|
||||
|
||||
enum sbi_ext_base_fid {
|
||||
SBI_EXT_BASE_PROBE_EXT = 3,
|
||||
};
|
||||
|
||||
struct sbiret {
|
||||
long error;
|
||||
long value;
|
||||
};
|
||||
|
||||
struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
|
||||
unsigned long arg1, unsigned long arg2,
|
||||
unsigned long arg3, unsigned long arg4,
|
||||
unsigned long arg5);
|
||||
|
||||
bool guest_sbi_probe_extension(int extid, long *out_val);
|
||||
|
||||
static inline void local_irq_enable(void)
|
||||
{
|
||||
csr_set(CSR_SSTATUS, SR_SIE);
|
||||
|
141
tools/testing/selftests/kvm/include/riscv/sbi.h
Normal file
141
tools/testing/selftests/kvm/include/riscv/sbi.h
Normal file
@ -0,0 +1,141 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* RISC-V SBI specific definitions
|
||||
*
|
||||
* Copyright (C) 2024 Rivos Inc.
|
||||
*/
|
||||
|
||||
#ifndef SELFTEST_KVM_SBI_H
|
||||
#define SELFTEST_KVM_SBI_H
|
||||
|
||||
/* SBI spec version fields */
|
||||
#define SBI_SPEC_VERSION_DEFAULT 0x1
|
||||
#define SBI_SPEC_VERSION_MAJOR_SHIFT 24
|
||||
#define SBI_SPEC_VERSION_MAJOR_MASK 0x7f
|
||||
#define SBI_SPEC_VERSION_MINOR_MASK 0xffffff
|
||||
|
||||
/* SBI return error codes */
|
||||
#define SBI_SUCCESS 0
|
||||
#define SBI_ERR_FAILURE -1
|
||||
#define SBI_ERR_NOT_SUPPORTED -2
|
||||
#define SBI_ERR_INVALID_PARAM -3
|
||||
#define SBI_ERR_DENIED -4
|
||||
#define SBI_ERR_INVALID_ADDRESS -5
|
||||
#define SBI_ERR_ALREADY_AVAILABLE -6
|
||||
#define SBI_ERR_ALREADY_STARTED -7
|
||||
#define SBI_ERR_ALREADY_STOPPED -8
|
||||
|
||||
#define SBI_EXT_EXPERIMENTAL_START 0x08000000
|
||||
#define SBI_EXT_EXPERIMENTAL_END 0x08FFFFFF
|
||||
|
||||
#define KVM_RISCV_SELFTESTS_SBI_EXT SBI_EXT_EXPERIMENTAL_END
|
||||
#define KVM_RISCV_SELFTESTS_SBI_UCALL 0
|
||||
#define KVM_RISCV_SELFTESTS_SBI_UNEXP 1
|
||||
|
||||
enum sbi_ext_id {
|
||||
SBI_EXT_BASE = 0x10,
|
||||
SBI_EXT_STA = 0x535441,
|
||||
SBI_EXT_PMU = 0x504D55,
|
||||
};
|
||||
|
||||
enum sbi_ext_base_fid {
|
||||
SBI_EXT_BASE_GET_SPEC_VERSION = 0,
|
||||
SBI_EXT_BASE_GET_IMP_ID,
|
||||
SBI_EXT_BASE_GET_IMP_VERSION,
|
||||
SBI_EXT_BASE_PROBE_EXT = 3,
|
||||
};
|
||||
enum sbi_ext_pmu_fid {
|
||||
SBI_EXT_PMU_NUM_COUNTERS = 0,
|
||||
SBI_EXT_PMU_COUNTER_GET_INFO,
|
||||
SBI_EXT_PMU_COUNTER_CFG_MATCH,
|
||||
SBI_EXT_PMU_COUNTER_START,
|
||||
SBI_EXT_PMU_COUNTER_STOP,
|
||||
SBI_EXT_PMU_COUNTER_FW_READ,
|
||||
SBI_EXT_PMU_COUNTER_FW_READ_HI,
|
||||
SBI_EXT_PMU_SNAPSHOT_SET_SHMEM,
|
||||
};
|
||||
|
||||
union sbi_pmu_ctr_info {
|
||||
unsigned long value;
|
||||
struct {
|
||||
unsigned long csr:12;
|
||||
unsigned long width:6;
|
||||
#if __riscv_xlen == 32
|
||||
unsigned long reserved:13;
|
||||
#else
|
||||
unsigned long reserved:45;
|
||||
#endif
|
||||
unsigned long type:1;
|
||||
};
|
||||
};
|
||||
|
||||
struct riscv_pmu_snapshot_data {
|
||||
u64 ctr_overflow_mask;
|
||||
u64 ctr_values[64];
|
||||
u64 reserved[447];
|
||||
};
|
||||
|
||||
struct sbiret {
|
||||
long error;
|
||||
long value;
|
||||
};
|
||||
|
||||
/** General pmu event codes specified in SBI PMU extension */
|
||||
enum sbi_pmu_hw_generic_events_t {
|
||||
SBI_PMU_HW_NO_EVENT = 0,
|
||||
SBI_PMU_HW_CPU_CYCLES = 1,
|
||||
SBI_PMU_HW_INSTRUCTIONS = 2,
|
||||
SBI_PMU_HW_CACHE_REFERENCES = 3,
|
||||
SBI_PMU_HW_CACHE_MISSES = 4,
|
||||
SBI_PMU_HW_BRANCH_INSTRUCTIONS = 5,
|
||||
SBI_PMU_HW_BRANCH_MISSES = 6,
|
||||
SBI_PMU_HW_BUS_CYCLES = 7,
|
||||
SBI_PMU_HW_STALLED_CYCLES_FRONTEND = 8,
|
||||
SBI_PMU_HW_STALLED_CYCLES_BACKEND = 9,
|
||||
SBI_PMU_HW_REF_CPU_CYCLES = 10,
|
||||
|
||||
SBI_PMU_HW_GENERAL_MAX,
|
||||
};
|
||||
|
||||
/* SBI PMU counter types */
|
||||
enum sbi_pmu_ctr_type {
|
||||
SBI_PMU_CTR_TYPE_HW = 0x0,
|
||||
SBI_PMU_CTR_TYPE_FW,
|
||||
};
|
||||
|
||||
/* Flags defined for config matching function */
|
||||
#define SBI_PMU_CFG_FLAG_SKIP_MATCH BIT(0)
|
||||
#define SBI_PMU_CFG_FLAG_CLEAR_VALUE BIT(1)
|
||||
#define SBI_PMU_CFG_FLAG_AUTO_START BIT(2)
|
||||
#define SBI_PMU_CFG_FLAG_SET_VUINH BIT(3)
|
||||
#define SBI_PMU_CFG_FLAG_SET_VSINH BIT(4)
|
||||
#define SBI_PMU_CFG_FLAG_SET_UINH BIT(5)
|
||||
#define SBI_PMU_CFG_FLAG_SET_SINH BIT(6)
|
||||
#define SBI_PMU_CFG_FLAG_SET_MINH BIT(7)
|
||||
|
||||
/* Flags defined for counter start function */
|
||||
#define SBI_PMU_START_FLAG_SET_INIT_VALUE BIT(0)
|
||||
#define SBI_PMU_START_FLAG_INIT_SNAPSHOT BIT(1)
|
||||
|
||||
/* Flags defined for counter stop function */
|
||||
#define SBI_PMU_STOP_FLAG_RESET BIT(0)
|
||||
#define SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT BIT(1)
|
||||
|
||||
struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
|
||||
unsigned long arg1, unsigned long arg2,
|
||||
unsigned long arg3, unsigned long arg4,
|
||||
unsigned long arg5);
|
||||
|
||||
bool guest_sbi_probe_extension(int extid, long *out_val);
|
||||
|
||||
/* Make SBI version */
|
||||
static inline unsigned long sbi_mk_version(unsigned long major,
|
||||
unsigned long minor)
|
||||
{
|
||||
return ((major & SBI_SPEC_VERSION_MAJOR_MASK) << SBI_SPEC_VERSION_MAJOR_SHIFT)
|
||||
| (minor & SBI_SPEC_VERSION_MINOR_MASK);
|
||||
}
|
||||
|
||||
unsigned long get_host_sbi_spec_version(void);
|
||||
|
||||
#endif /* SELFTEST_KVM_SBI_H */
|
@ -3,6 +3,7 @@
|
||||
#define SELFTEST_KVM_UCALL_H
|
||||
|
||||
#include "processor.h"
|
||||
#include "sbi.h"
|
||||
|
||||
#define UCALL_EXIT_REASON KVM_EXIT_RISCV_SBI
|
||||
|
||||
|
@ -502,3 +502,15 @@ bool guest_sbi_probe_extension(int extid, long *out_val)
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned long get_host_sbi_spec_version(void)
|
||||
{
|
||||
struct sbiret ret;
|
||||
|
||||
ret = sbi_ecall(SBI_EXT_BASE, SBI_EXT_BASE_GET_SPEC_VERSION, 0,
|
||||
0, 0, 0, 0, 0);
|
||||
|
||||
GUEST_ASSERT(!ret.error);
|
||||
|
||||
return ret.value;
|
||||
}
|
||||
|
@ -85,7 +85,7 @@ struct kvm_vm *test_vm_create(void)
|
||||
int nr_vcpus = test_args.nr_vcpus;
|
||||
|
||||
vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
|
||||
__TEST_REQUIRE(__vcpu_has_ext(vcpus[0], RISCV_ISA_EXT_REG(KVM_RISCV_ISA_EXT_SSTC)),
|
||||
__TEST_REQUIRE(__vcpu_has_isa_ext(vcpus[0], KVM_RISCV_ISA_EXT_SSTC),
|
||||
"SSTC not available, skipping test\n");
|
||||
|
||||
vm_init_vector_tables(vm);
|
||||
|
82
tools/testing/selftests/kvm/riscv/ebreak_test.c
Normal file
82
tools/testing/selftests/kvm/riscv/ebreak_test.c
Normal file
@ -0,0 +1,82 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* RISC-V KVM ebreak test.
|
||||
*
|
||||
* Copyright 2024 Beijing ESWIN Computing Technology Co., Ltd.
|
||||
*
|
||||
*/
|
||||
#include "kvm_util.h"
|
||||
|
||||
#define LABEL_ADDRESS(v) ((uint64_t)&(v))
|
||||
|
||||
extern unsigned char sw_bp_1, sw_bp_2;
|
||||
static uint64_t sw_bp_addr;
|
||||
|
||||
static void guest_code(void)
|
||||
{
|
||||
asm volatile(
|
||||
".option push\n"
|
||||
".option norvc\n"
|
||||
"sw_bp_1: ebreak\n"
|
||||
"sw_bp_2: ebreak\n"
|
||||
".option pop\n"
|
||||
);
|
||||
GUEST_ASSERT_EQ(READ_ONCE(sw_bp_addr), LABEL_ADDRESS(sw_bp_2));
|
||||
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
static void guest_breakpoint_handler(struct ex_regs *regs)
|
||||
{
|
||||
WRITE_ONCE(sw_bp_addr, regs->epc);
|
||||
regs->epc += 4;
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
struct kvm_vm *vm;
|
||||
struct kvm_vcpu *vcpu;
|
||||
uint64_t pc;
|
||||
struct kvm_guest_debug debug = {
|
||||
.control = KVM_GUESTDBG_ENABLE,
|
||||
};
|
||||
|
||||
TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_GUEST_DEBUG));
|
||||
|
||||
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
|
||||
|
||||
vm_init_vector_tables(vm);
|
||||
vcpu_init_vector_tables(vcpu);
|
||||
vm_install_exception_handler(vm, EXC_BREAKPOINT,
|
||||
guest_breakpoint_handler);
|
||||
|
||||
/*
|
||||
* Enable the guest debug.
|
||||
* ebreak should exit to the VMM with KVM_EXIT_DEBUG reason.
|
||||
*/
|
||||
vcpu_guest_debug_set(vcpu, &debug);
|
||||
vcpu_run(vcpu);
|
||||
|
||||
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
|
||||
|
||||
vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc), &pc);
|
||||
TEST_ASSERT_EQ(pc, LABEL_ADDRESS(sw_bp_1));
|
||||
|
||||
/* skip sw_bp_1 */
|
||||
vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.pc), pc + 4);
|
||||
|
||||
/*
|
||||
* Disable all debug controls.
|
||||
* Guest should handle the ebreak without exiting to the VMM.
|
||||
*/
|
||||
memset(&debug, 0, sizeof(debug));
|
||||
vcpu_guest_debug_set(vcpu, &debug);
|
||||
|
||||
vcpu_run(vcpu);
|
||||
|
||||
TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
|
||||
|
||||
kvm_vm_free(vm);
|
||||
|
||||
return 0;
|
||||
}
|
@ -43,6 +43,7 @@ bool filter_reg(__u64 reg)
|
||||
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_V:
|
||||
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SMSTATEEN:
|
||||
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSAIA:
|
||||
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSCOFPMF:
|
||||
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSTC:
|
||||
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVINVAL:
|
||||
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVNAPOT:
|
||||
@ -408,6 +409,7 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off)
|
||||
KVM_ISA_EXT_ARR(V),
|
||||
KVM_ISA_EXT_ARR(SMSTATEEN),
|
||||
KVM_ISA_EXT_ARR(SSAIA),
|
||||
KVM_ISA_EXT_ARR(SSCOFPMF),
|
||||
KVM_ISA_EXT_ARR(SSTC),
|
||||
KVM_ISA_EXT_ARR(SVINVAL),
|
||||
KVM_ISA_EXT_ARR(SVNAPOT),
|
||||
@ -931,6 +933,7 @@ KVM_ISA_EXT_SUBLIST_CONFIG(fp_f, FP_F);
|
||||
KVM_ISA_EXT_SUBLIST_CONFIG(fp_d, FP_D);
|
||||
KVM_ISA_EXT_SIMPLE_CONFIG(h, H);
|
||||
KVM_ISA_EXT_SUBLIST_CONFIG(smstateen, SMSTATEEN);
|
||||
KVM_ISA_EXT_SIMPLE_CONFIG(sscofpmf, SSCOFPMF);
|
||||
KVM_ISA_EXT_SIMPLE_CONFIG(sstc, SSTC);
|
||||
KVM_ISA_EXT_SIMPLE_CONFIG(svinval, SVINVAL);
|
||||
KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT);
|
||||
@ -986,6 +989,7 @@ struct vcpu_reg_list *vcpu_configs[] = {
|
||||
&config_fp_d,
|
||||
&config_h,
|
||||
&config_smstateen,
|
||||
&config_sscofpmf,
|
||||
&config_sstc,
|
||||
&config_svinval,
|
||||
&config_svnapot,
|
||||
|
681
tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
Normal file
681
tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
Normal file
@ -0,0 +1,681 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* sbi_pmu_test.c - Tests the riscv64 SBI PMU functionality.
|
||||
*
|
||||
* Copyright (c) 2024, Rivos Inc.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include "kvm_util.h"
|
||||
#include "test_util.h"
|
||||
#include "processor.h"
|
||||
#include "sbi.h"
|
||||
#include "arch_timer.h"
|
||||
|
||||
/* Maximum counters(firmware + hardware) */
|
||||
#define RISCV_MAX_PMU_COUNTERS 64
|
||||
union sbi_pmu_ctr_info ctrinfo_arr[RISCV_MAX_PMU_COUNTERS];
|
||||
|
||||
/* Snapshot shared memory data */
|
||||
#define PMU_SNAPSHOT_GPA_BASE BIT(30)
|
||||
static void *snapshot_gva;
|
||||
static vm_paddr_t snapshot_gpa;
|
||||
|
||||
static int vcpu_shared_irq_count;
|
||||
static int counter_in_use;
|
||||
|
||||
/* Cache the available counters in a bitmask */
|
||||
static unsigned long counter_mask_available;
|
||||
|
||||
static bool illegal_handler_invoked;
|
||||
|
||||
#define SBI_PMU_TEST_BASIC BIT(0)
|
||||
#define SBI_PMU_TEST_EVENTS BIT(1)
|
||||
#define SBI_PMU_TEST_SNAPSHOT BIT(2)
|
||||
#define SBI_PMU_TEST_OVERFLOW BIT(3)
|
||||
|
||||
static int disabled_tests;
|
||||
|
||||
unsigned long pmu_csr_read_num(int csr_num)
|
||||
{
|
||||
#define switchcase_csr_read(__csr_num, __val) {\
|
||||
case __csr_num: \
|
||||
__val = csr_read(__csr_num); \
|
||||
break; }
|
||||
#define switchcase_csr_read_2(__csr_num, __val) {\
|
||||
switchcase_csr_read(__csr_num + 0, __val) \
|
||||
switchcase_csr_read(__csr_num + 1, __val)}
|
||||
#define switchcase_csr_read_4(__csr_num, __val) {\
|
||||
switchcase_csr_read_2(__csr_num + 0, __val) \
|
||||
switchcase_csr_read_2(__csr_num + 2, __val)}
|
||||
#define switchcase_csr_read_8(__csr_num, __val) {\
|
||||
switchcase_csr_read_4(__csr_num + 0, __val) \
|
||||
switchcase_csr_read_4(__csr_num + 4, __val)}
|
||||
#define switchcase_csr_read_16(__csr_num, __val) {\
|
||||
switchcase_csr_read_8(__csr_num + 0, __val) \
|
||||
switchcase_csr_read_8(__csr_num + 8, __val)}
|
||||
#define switchcase_csr_read_32(__csr_num, __val) {\
|
||||
switchcase_csr_read_16(__csr_num + 0, __val) \
|
||||
switchcase_csr_read_16(__csr_num + 16, __val)}
|
||||
|
||||
unsigned long ret = 0;
|
||||
|
||||
switch (csr_num) {
|
||||
switchcase_csr_read_32(CSR_CYCLE, ret)
|
||||
switchcase_csr_read_32(CSR_CYCLEH, ret)
|
||||
default :
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
#undef switchcase_csr_read_32
|
||||
#undef switchcase_csr_read_16
|
||||
#undef switchcase_csr_read_8
|
||||
#undef switchcase_csr_read_4
|
||||
#undef switchcase_csr_read_2
|
||||
#undef switchcase_csr_read
|
||||
}
|
||||
|
||||
static inline void dummy_func_loop(uint64_t iter)
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
while (i < iter) {
|
||||
asm volatile("nop");
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
static void start_counter(unsigned long counter, unsigned long start_flags,
|
||||
unsigned long ival)
|
||||
{
|
||||
struct sbiret ret;
|
||||
|
||||
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, counter, 1, start_flags,
|
||||
ival, 0, 0);
|
||||
__GUEST_ASSERT(ret.error == 0, "Unable to start counter %ld\n", counter);
|
||||
}
|
||||
|
||||
/* This should be invoked only for reset counter use case */
|
||||
static void stop_reset_counter(unsigned long counter, unsigned long stop_flags)
|
||||
{
|
||||
struct sbiret ret;
|
||||
|
||||
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, counter, 1,
|
||||
stop_flags | SBI_PMU_STOP_FLAG_RESET, 0, 0, 0);
|
||||
__GUEST_ASSERT(ret.error == SBI_ERR_ALREADY_STOPPED,
|
||||
"Unable to stop counter %ld\n", counter);
|
||||
}
|
||||
|
||||
static void stop_counter(unsigned long counter, unsigned long stop_flags)
|
||||
{
|
||||
struct sbiret ret;
|
||||
|
||||
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, counter, 1, stop_flags,
|
||||
0, 0, 0);
|
||||
__GUEST_ASSERT(ret.error == 0, "Unable to stop counter %ld error %ld\n",
|
||||
counter, ret.error);
|
||||
}
|
||||
|
||||
static void guest_illegal_exception_handler(struct ex_regs *regs)
|
||||
{
|
||||
__GUEST_ASSERT(regs->cause == EXC_INST_ILLEGAL,
|
||||
"Unexpected exception handler %lx\n", regs->cause);
|
||||
|
||||
illegal_handler_invoked = true;
|
||||
/* skip the trapping instruction */
|
||||
regs->epc += 4;
|
||||
}
|
||||
|
||||
static void guest_irq_handler(struct ex_regs *regs)
|
||||
{
|
||||
unsigned int irq_num = regs->cause & ~CAUSE_IRQ_FLAG;
|
||||
struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva;
|
||||
unsigned long overflown_mask;
|
||||
unsigned long counter_val = 0;
|
||||
|
||||
/* Validate that we are in the correct irq handler */
|
||||
GUEST_ASSERT_EQ(irq_num, IRQ_PMU_OVF);
|
||||
|
||||
/* Stop all counters first to avoid further interrupts */
|
||||
stop_counter(counter_in_use, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
|
||||
|
||||
csr_clear(CSR_SIP, BIT(IRQ_PMU_OVF));
|
||||
|
||||
overflown_mask = READ_ONCE(snapshot_data->ctr_overflow_mask);
|
||||
GUEST_ASSERT(overflown_mask & 0x01);
|
||||
|
||||
WRITE_ONCE(vcpu_shared_irq_count, vcpu_shared_irq_count+1);
|
||||
|
||||
counter_val = READ_ONCE(snapshot_data->ctr_values[0]);
|
||||
/* Now start the counter to mimick the real driver behavior */
|
||||
start_counter(counter_in_use, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_val);
|
||||
}
|
||||
|
||||
static unsigned long get_counter_index(unsigned long cbase, unsigned long cmask,
|
||||
unsigned long cflags,
|
||||
unsigned long event)
|
||||
{
|
||||
struct sbiret ret;
|
||||
|
||||
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase, cmask,
|
||||
cflags, event, 0, 0);
|
||||
__GUEST_ASSERT(ret.error == 0, "config matching failed %ld\n", ret.error);
|
||||
GUEST_ASSERT(ret.value < RISCV_MAX_PMU_COUNTERS);
|
||||
GUEST_ASSERT(BIT(ret.value) & counter_mask_available);
|
||||
|
||||
return ret.value;
|
||||
}
|
||||
|
||||
static unsigned long get_num_counters(void)
|
||||
{
|
||||
struct sbiret ret;
|
||||
|
||||
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_NUM_COUNTERS, 0, 0, 0, 0, 0, 0);
|
||||
|
||||
__GUEST_ASSERT(ret.error == 0, "Unable to retrieve number of counters from SBI PMU");
|
||||
__GUEST_ASSERT(ret.value < RISCV_MAX_PMU_COUNTERS,
|
||||
"Invalid number of counters %ld\n", ret.value);
|
||||
|
||||
return ret.value;
|
||||
}
|
||||
|
||||
static void update_counter_info(int num_counters)
|
||||
{
|
||||
int i = 0;
|
||||
struct sbiret ret;
|
||||
|
||||
for (i = 0; i < num_counters; i++) {
|
||||
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_GET_INFO, i, 0, 0, 0, 0, 0);
|
||||
|
||||
/* There can be gaps in logical counter indicies*/
|
||||
if (ret.error)
|
||||
continue;
|
||||
GUEST_ASSERT_NE(ret.value, 0);
|
||||
|
||||
ctrinfo_arr[i].value = ret.value;
|
||||
counter_mask_available |= BIT(i);
|
||||
}
|
||||
|
||||
GUEST_ASSERT(counter_mask_available > 0);
|
||||
}
|
||||
|
||||
static unsigned long read_fw_counter(int idx, union sbi_pmu_ctr_info ctrinfo)
|
||||
{
|
||||
struct sbiret ret;
|
||||
|
||||
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ, idx, 0, 0, 0, 0, 0);
|
||||
GUEST_ASSERT(ret.error == 0);
|
||||
return ret.value;
|
||||
}
|
||||
|
||||
static unsigned long read_counter(int idx, union sbi_pmu_ctr_info ctrinfo)
|
||||
{
|
||||
unsigned long counter_val = 0;
|
||||
|
||||
__GUEST_ASSERT(ctrinfo.type < 2, "Invalid counter type %d", ctrinfo.type);
|
||||
|
||||
if (ctrinfo.type == SBI_PMU_CTR_TYPE_HW)
|
||||
counter_val = pmu_csr_read_num(ctrinfo.csr);
|
||||
else if (ctrinfo.type == SBI_PMU_CTR_TYPE_FW)
|
||||
counter_val = read_fw_counter(idx, ctrinfo);
|
||||
|
||||
return counter_val;
|
||||
}
|
||||
|
||||
static inline void verify_sbi_requirement_assert(void)
|
||||
{
|
||||
long out_val = 0;
|
||||
bool probe;
|
||||
|
||||
probe = guest_sbi_probe_extension(SBI_EXT_PMU, &out_val);
|
||||
GUEST_ASSERT(probe && out_val == 1);
|
||||
|
||||
if (get_host_sbi_spec_version() < sbi_mk_version(2, 0))
|
||||
__GUEST_ASSERT(0, "SBI implementation version doesn't support PMU Snapshot");
|
||||
}
|
||||
|
||||
static void snapshot_set_shmem(vm_paddr_t gpa, unsigned long flags)
|
||||
{
|
||||
unsigned long lo = (unsigned long)gpa;
|
||||
#if __riscv_xlen == 32
|
||||
unsigned long hi = (unsigned long)(gpa >> 32);
|
||||
#else
|
||||
unsigned long hi = gpa == -1 ? -1 : 0;
|
||||
#endif
|
||||
struct sbiret ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_SNAPSHOT_SET_SHMEM,
|
||||
lo, hi, flags, 0, 0, 0);
|
||||
|
||||
GUEST_ASSERT(ret.value == 0 && ret.error == 0);
|
||||
}
|
||||
|
||||
static void test_pmu_event(unsigned long event)
|
||||
{
|
||||
unsigned long counter;
|
||||
unsigned long counter_value_pre, counter_value_post;
|
||||
unsigned long counter_init_value = 100;
|
||||
|
||||
counter = get_counter_index(0, counter_mask_available, 0, event);
|
||||
counter_value_pre = read_counter(counter, ctrinfo_arr[counter]);
|
||||
|
||||
/* Do not set the initial value */
|
||||
start_counter(counter, 0, 0);
|
||||
dummy_func_loop(10000);
|
||||
stop_counter(counter, 0);
|
||||
|
||||
counter_value_post = read_counter(counter, ctrinfo_arr[counter]);
|
||||
__GUEST_ASSERT(counter_value_post > counter_value_pre,
|
||||
"Event update verification failed: post [%lx] pre [%lx]\n",
|
||||
counter_value_post, counter_value_pre);
|
||||
|
||||
/*
|
||||
* We can't just update the counter without starting it.
|
||||
* Do start/stop twice to simulate that by first initializing to a very
|
||||
* high value and a low value after that.
|
||||
*/
|
||||
start_counter(counter, SBI_PMU_START_FLAG_SET_INIT_VALUE, ULONG_MAX/2);
|
||||
stop_counter(counter, 0);
|
||||
counter_value_pre = read_counter(counter, ctrinfo_arr[counter]);
|
||||
|
||||
start_counter(counter, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_init_value);
|
||||
stop_counter(counter, 0);
|
||||
counter_value_post = read_counter(counter, ctrinfo_arr[counter]);
|
||||
__GUEST_ASSERT(counter_value_pre > counter_value_post,
|
||||
"Counter reinitialization verification failed : post [%lx] pre [%lx]\n",
|
||||
counter_value_post, counter_value_pre);
|
||||
|
||||
/* Now set the initial value and compare */
|
||||
start_counter(counter, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_init_value);
|
||||
dummy_func_loop(10000);
|
||||
stop_counter(counter, 0);
|
||||
|
||||
counter_value_post = read_counter(counter, ctrinfo_arr[counter]);
|
||||
__GUEST_ASSERT(counter_value_post > counter_init_value,
|
||||
"Event update verification failed: post [%lx] pre [%lx]\n",
|
||||
counter_value_post, counter_init_value);
|
||||
|
||||
stop_reset_counter(counter, 0);
|
||||
}
|
||||
|
||||
static void test_pmu_event_snapshot(unsigned long event)
|
||||
{
|
||||
unsigned long counter;
|
||||
unsigned long counter_value_pre, counter_value_post;
|
||||
unsigned long counter_init_value = 100;
|
||||
struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva;
|
||||
|
||||
counter = get_counter_index(0, counter_mask_available, 0, event);
|
||||
counter_value_pre = read_counter(counter, ctrinfo_arr[counter]);
|
||||
|
||||
/* Do not set the initial value */
|
||||
start_counter(counter, 0, 0);
|
||||
dummy_func_loop(10000);
|
||||
stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
|
||||
|
||||
/* The counter value is updated w.r.t relative index of cbase */
|
||||
counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]);
|
||||
__GUEST_ASSERT(counter_value_post > counter_value_pre,
|
||||
"Event update verification failed: post [%lx] pre [%lx]\n",
|
||||
counter_value_post, counter_value_pre);
|
||||
|
||||
/*
|
||||
* We can't just update the counter without starting it.
|
||||
* Do start/stop twice to simulate that by first initializing to a very
|
||||
* high value and a low value after that.
|
||||
*/
|
||||
WRITE_ONCE(snapshot_data->ctr_values[0], ULONG_MAX/2);
|
||||
start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0);
|
||||
stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
|
||||
counter_value_pre = READ_ONCE(snapshot_data->ctr_values[0]);
|
||||
|
||||
WRITE_ONCE(snapshot_data->ctr_values[0], counter_init_value);
|
||||
start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0);
|
||||
stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
|
||||
counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]);
|
||||
__GUEST_ASSERT(counter_value_pre > counter_value_post,
|
||||
"Counter reinitialization verification failed : post [%lx] pre [%lx]\n",
|
||||
counter_value_post, counter_value_pre);
|
||||
|
||||
/* Now set the initial value and compare */
|
||||
WRITE_ONCE(snapshot_data->ctr_values[0], counter_init_value);
|
||||
start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0);
|
||||
dummy_func_loop(10000);
|
||||
stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
|
||||
|
||||
counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]);
|
||||
__GUEST_ASSERT(counter_value_post > counter_init_value,
|
||||
"Event update verification failed: post [%lx] pre [%lx]\n",
|
||||
counter_value_post, counter_init_value);
|
||||
|
||||
stop_reset_counter(counter, 0);
|
||||
}
|
||||
|
||||
static void test_pmu_event_overflow(unsigned long event)
|
||||
{
|
||||
unsigned long counter;
|
||||
unsigned long counter_value_post;
|
||||
unsigned long counter_init_value = ULONG_MAX - 10000;
|
||||
struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva;
|
||||
|
||||
counter = get_counter_index(0, counter_mask_available, 0, event);
|
||||
counter_in_use = counter;
|
||||
|
||||
/* The counter value is updated w.r.t relative index of cbase passed to start/stop */
|
||||
WRITE_ONCE(snapshot_data->ctr_values[0], counter_init_value);
|
||||
start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0);
|
||||
dummy_func_loop(10000);
|
||||
udelay(msecs_to_usecs(2000));
|
||||
/* irq handler should have stopped the counter */
|
||||
stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
|
||||
|
||||
counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]);
|
||||
/* The counter value after stopping should be less the init value due to overflow */
|
||||
__GUEST_ASSERT(counter_value_post < counter_init_value,
|
||||
"counter_value_post %lx counter_init_value %lx for counter\n",
|
||||
counter_value_post, counter_init_value);
|
||||
|
||||
stop_reset_counter(counter, 0);
|
||||
}
|
||||
|
||||
static void test_invalid_event(void)
|
||||
{
|
||||
struct sbiret ret;
|
||||
unsigned long event = 0x1234; /* A random event */
|
||||
|
||||
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, 0,
|
||||
counter_mask_available, 0, event, 0, 0);
|
||||
GUEST_ASSERT_EQ(ret.error, SBI_ERR_NOT_SUPPORTED);
|
||||
}
|
||||
|
||||
static void test_pmu_events(void)
|
||||
{
|
||||
int num_counters = 0;
|
||||
|
||||
/* Get the counter details */
|
||||
num_counters = get_num_counters();
|
||||
update_counter_info(num_counters);
|
||||
|
||||
/* Sanity testing for any random invalid event */
|
||||
test_invalid_event();
|
||||
|
||||
/* Only these two events are guaranteed to be present */
|
||||
test_pmu_event(SBI_PMU_HW_CPU_CYCLES);
|
||||
test_pmu_event(SBI_PMU_HW_INSTRUCTIONS);
|
||||
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
static void test_pmu_basic_sanity(void)
|
||||
{
|
||||
long out_val = 0;
|
||||
bool probe;
|
||||
struct sbiret ret;
|
||||
int num_counters = 0, i;
|
||||
union sbi_pmu_ctr_info ctrinfo;
|
||||
|
||||
probe = guest_sbi_probe_extension(SBI_EXT_PMU, &out_val);
|
||||
GUEST_ASSERT(probe && out_val == 1);
|
||||
|
||||
num_counters = get_num_counters();
|
||||
|
||||
for (i = 0; i < num_counters; i++) {
|
||||
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_GET_INFO, i,
|
||||
0, 0, 0, 0, 0);
|
||||
|
||||
/* There can be gaps in logical counter indicies*/
|
||||
if (ret.error)
|
||||
continue;
|
||||
GUEST_ASSERT_NE(ret.value, 0);
|
||||
|
||||
ctrinfo.value = ret.value;
|
||||
|
||||
/**
|
||||
* Accessibility check of hardware and read capability of firmware counters.
|
||||
* The spec doesn't mandate any initial value. No need to check any value.
|
||||
*/
|
||||
if (ctrinfo.type == SBI_PMU_CTR_TYPE_HW) {
|
||||
pmu_csr_read_num(ctrinfo.csr);
|
||||
GUEST_ASSERT(illegal_handler_invoked);
|
||||
} else if (ctrinfo.type == SBI_PMU_CTR_TYPE_FW) {
|
||||
read_fw_counter(i, ctrinfo);
|
||||
}
|
||||
}
|
||||
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
static void test_pmu_events_snaphost(void)
|
||||
{
|
||||
int num_counters = 0;
|
||||
struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva;
|
||||
int i;
|
||||
|
||||
/* Verify presence of SBI PMU and minimum requrired SBI version */
|
||||
verify_sbi_requirement_assert();
|
||||
|
||||
snapshot_set_shmem(snapshot_gpa, 0);
|
||||
|
||||
/* Get the counter details */
|
||||
num_counters = get_num_counters();
|
||||
update_counter_info(num_counters);
|
||||
|
||||
/* Validate shared memory access */
|
||||
GUEST_ASSERT_EQ(READ_ONCE(snapshot_data->ctr_overflow_mask), 0);
|
||||
for (i = 0; i < num_counters; i++) {
|
||||
if (counter_mask_available & (BIT(i)))
|
||||
GUEST_ASSERT_EQ(READ_ONCE(snapshot_data->ctr_values[i]), 0);
|
||||
}
|
||||
/* Only these two events are guranteed to be present */
|
||||
test_pmu_event_snapshot(SBI_PMU_HW_CPU_CYCLES);
|
||||
test_pmu_event_snapshot(SBI_PMU_HW_INSTRUCTIONS);
|
||||
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
static void test_pmu_events_overflow(void)
|
||||
{
|
||||
int num_counters = 0;
|
||||
|
||||
/* Verify presence of SBI PMU and minimum requrired SBI version */
|
||||
verify_sbi_requirement_assert();
|
||||
|
||||
snapshot_set_shmem(snapshot_gpa, 0);
|
||||
csr_set(CSR_IE, BIT(IRQ_PMU_OVF));
|
||||
local_irq_enable();
|
||||
|
||||
/* Get the counter details */
|
||||
num_counters = get_num_counters();
|
||||
update_counter_info(num_counters);
|
||||
|
||||
/*
|
||||
* Qemu supports overflow for cycle/instruction.
|
||||
* This test may fail on any platform that do not support overflow for these two events.
|
||||
*/
|
||||
test_pmu_event_overflow(SBI_PMU_HW_CPU_CYCLES);
|
||||
GUEST_ASSERT_EQ(vcpu_shared_irq_count, 1);
|
||||
|
||||
test_pmu_event_overflow(SBI_PMU_HW_INSTRUCTIONS);
|
||||
GUEST_ASSERT_EQ(vcpu_shared_irq_count, 2);
|
||||
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
static void run_vcpu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct ucall uc;
|
||||
|
||||
vcpu_run(vcpu);
|
||||
switch (get_ucall(vcpu, &uc)) {
|
||||
case UCALL_ABORT:
|
||||
REPORT_GUEST_ASSERT(uc);
|
||||
break;
|
||||
case UCALL_DONE:
|
||||
case UCALL_SYNC:
|
||||
break;
|
||||
default:
|
||||
TEST_FAIL("Unknown ucall %lu", uc.cmd);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void test_vm_destroy(struct kvm_vm *vm)
|
||||
{
|
||||
memset(ctrinfo_arr, 0, sizeof(union sbi_pmu_ctr_info) * RISCV_MAX_PMU_COUNTERS);
|
||||
counter_mask_available = 0;
|
||||
kvm_vm_free(vm);
|
||||
}
|
||||
|
||||
static void test_vm_basic_test(void *guest_code)
|
||||
{
|
||||
struct kvm_vm *vm;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
|
||||
__TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU),
|
||||
"SBI PMU not available, skipping test");
|
||||
vm_init_vector_tables(vm);
|
||||
/* Illegal instruction handler is required to verify read access without configuration */
|
||||
vm_install_exception_handler(vm, EXC_INST_ILLEGAL, guest_illegal_exception_handler);
|
||||
|
||||
vcpu_init_vector_tables(vcpu);
|
||||
run_vcpu(vcpu);
|
||||
|
||||
test_vm_destroy(vm);
|
||||
}
|
||||
|
||||
static void test_vm_events_test(void *guest_code)
|
||||
{
|
||||
struct kvm_vm *vm = NULL;
|
||||
struct kvm_vcpu *vcpu = NULL;
|
||||
|
||||
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
|
||||
__TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU),
|
||||
"SBI PMU not available, skipping test");
|
||||
run_vcpu(vcpu);
|
||||
|
||||
test_vm_destroy(vm);
|
||||
}
|
||||
|
||||
static void test_vm_setup_snapshot_mem(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* PMU Snapshot requires single page only */
|
||||
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, PMU_SNAPSHOT_GPA_BASE, 1, 1, 0);
|
||||
/* PMU_SNAPSHOT_GPA_BASE is identity mapped */
|
||||
virt_map(vm, PMU_SNAPSHOT_GPA_BASE, PMU_SNAPSHOT_GPA_BASE, 1);
|
||||
|
||||
snapshot_gva = (void *)(PMU_SNAPSHOT_GPA_BASE);
|
||||
snapshot_gpa = addr_gva2gpa(vcpu->vm, (vm_vaddr_t)snapshot_gva);
|
||||
sync_global_to_guest(vcpu->vm, snapshot_gva);
|
||||
sync_global_to_guest(vcpu->vm, snapshot_gpa);
|
||||
}
|
||||
|
||||
static void test_vm_events_snapshot_test(void *guest_code)
|
||||
{
|
||||
struct kvm_vm *vm = NULL;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
|
||||
__TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU),
|
||||
"SBI PMU not available, skipping test");
|
||||
|
||||
test_vm_setup_snapshot_mem(vm, vcpu);
|
||||
|
||||
run_vcpu(vcpu);
|
||||
|
||||
test_vm_destroy(vm);
|
||||
}
|
||||
|
||||
static void test_vm_events_overflow(void *guest_code)
|
||||
{
|
||||
struct kvm_vm *vm = NULL;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
|
||||
__TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU),
|
||||
"SBI PMU not available, skipping test");
|
||||
|
||||
__TEST_REQUIRE(__vcpu_has_isa_ext(vcpu, KVM_RISCV_ISA_EXT_SSCOFPMF),
|
||||
"Sscofpmf is not available, skipping overflow test");
|
||||
|
||||
test_vm_setup_snapshot_mem(vm, vcpu);
|
||||
vm_init_vector_tables(vm);
|
||||
vm_install_interrupt_handler(vm, guest_irq_handler);
|
||||
|
||||
vcpu_init_vector_tables(vcpu);
|
||||
/* Initialize guest timer frequency. */
|
||||
vcpu_get_reg(vcpu, RISCV_TIMER_REG(frequency), &timer_freq);
|
||||
sync_global_to_guest(vm, timer_freq);
|
||||
|
||||
run_vcpu(vcpu);
|
||||
|
||||
test_vm_destroy(vm);
|
||||
}
|
||||
|
||||
static void test_print_help(char *name)
|
||||
{
|
||||
pr_info("Usage: %s [-h] [-d <test name>]\n", name);
|
||||
pr_info("\t-d: Test to disable. Available tests are 'basic', 'events', 'snapshot', 'overflow'\n");
|
||||
pr_info("\t-h: print this help screen\n");
|
||||
}
|
||||
|
||||
static bool parse_args(int argc, char *argv[])
|
||||
{
|
||||
int opt;
|
||||
|
||||
while ((opt = getopt(argc, argv, "hd:")) != -1) {
|
||||
switch (opt) {
|
||||
case 'd':
|
||||
if (!strncmp("basic", optarg, 5))
|
||||
disabled_tests |= SBI_PMU_TEST_BASIC;
|
||||
else if (!strncmp("events", optarg, 6))
|
||||
disabled_tests |= SBI_PMU_TEST_EVENTS;
|
||||
else if (!strncmp("snapshot", optarg, 8))
|
||||
disabled_tests |= SBI_PMU_TEST_SNAPSHOT;
|
||||
else if (!strncmp("overflow", optarg, 8))
|
||||
disabled_tests |= SBI_PMU_TEST_OVERFLOW;
|
||||
else
|
||||
goto done;
|
||||
break;
|
||||
case 'h':
|
||||
default:
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
done:
|
||||
test_print_help(argv[0]);
|
||||
return false;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
if (!parse_args(argc, argv))
|
||||
exit(KSFT_SKIP);
|
||||
|
||||
if (!(disabled_tests & SBI_PMU_TEST_BASIC)) {
|
||||
test_vm_basic_test(test_pmu_basic_sanity);
|
||||
pr_info("SBI PMU basic test : PASS\n");
|
||||
}
|
||||
|
||||
if (!(disabled_tests & SBI_PMU_TEST_EVENTS)) {
|
||||
test_vm_events_test(test_pmu_events);
|
||||
pr_info("SBI PMU event verification test : PASS\n");
|
||||
}
|
||||
|
||||
if (!(disabled_tests & SBI_PMU_TEST_SNAPSHOT)) {
|
||||
test_vm_events_snapshot_test(test_pmu_events_snaphost);
|
||||
pr_info("SBI PMU event verification with snapshot test : PASS\n");
|
||||
}
|
||||
|
||||
if (!(disabled_tests & SBI_PMU_TEST_OVERFLOW)) {
|
||||
test_vm_events_overflow(test_pmu_events_overflow);
|
||||
pr_info("SBI PMU event verification with overflow test : PASS\n");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -11,7 +11,9 @@
|
||||
#include <pthread.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <asm/kvm.h>
|
||||
#ifndef __riscv
|
||||
#ifdef __riscv
|
||||
#include "sbi.h"
|
||||
#else
|
||||
#include <asm/kvm_para.h>
|
||||
#endif
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user