KVM/riscv changes for 6.10

- Support guest breakpoints using ebreak
 - Introduce per-VCPU mp_state_lock and reset_cntx_lock
 - Virtualize SBI PMU snapshot and counter overflow interrupts
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEZdn75s5e6LHDQ+f/rUjsVaLHLAcFAmYwgroACgkQrUjsVaLH
 LAfckxAAnCvW9Ahcy0GgM2EwTtYDoNkQp1A6Wkp/a3nXBvc3hXMnlyZQ4YkyJ1T3
 BfQABCWEXWiDyEVpN9KUKtzUJi7WJz0MFuph5kvyZwMl53zddUNFqXpN4Hbb58/d
 dqjTJg7AnHbvirfhlHay/Rp+EaYsDq1E5GviDBi46yFkH/vB8IPpWdFLh3pD/+7f
 bmG5jeLos8zsWEwe3pAIC2hLDj0vFRRe2YJuXTZ9fvPzGBsPN9OHrtq0JbB3lRGt
 WRiYKPJiFjt2P3TjPkjh4N1Xmy8pJaEetu0Qwa1TR6I+ULs2ZcFzx9cw2VuoRQ2C
 uNhVx0o5ulAzJwGgX4U49ZTK4M7a5q6xf6zpqNFHbyy5tZylKJuBEWucuSyF1kTU
 RpjNinZ1PShzjx7HU+2gKPu+bmKHgfwKlr2Dp9Cx92IV9It3Wt1VEXWsjatciMfj
 EGYx+E9VcEOfX6INwX/TiO4ti7chLH/sFc+LhLqvw/1elhi83yAWbszjUmJ1Vrx1
 k1eATN2Hehvw06Y72lc+PrD0sYUmJPcDMVk3MSh/cSC8OODmZ9vi32v8Ie2bjNS5
 gHRLc05av1aX8yX+GRpUSPkCRL/XQ2J3jLG4uc3FmBMcWEhAtnIPsvXnCvV8f2mw
 aYrN+VF/FuRfumuYX6jWN6dwEwDO96AN425Rqu9MXik5KqSASXQ=
 =mGfY
 -----END PGP SIGNATURE-----

Merge tag 'kvm-riscv-6.10-1' of https://github.com/kvm-riscv/linux into HEAD

 KVM/riscv changes for 6.10

- Support guest breakpoints using ebreak
- Introduce per-VCPU mp_state_lock and reset_cntx_lock
- Virtualize SBI PMU snapshot and counter overflow interrupts
- New selftests for SBI PMU and Guest ebreak
This commit is contained in:
Paolo Bonzini 2024-05-07 13:03:03 -04:00
commit aa24865fb5
67 changed files with 2188 additions and 405 deletions

View File

@ -2,7 +2,7 @@
VERSION = 6
PATCHLEVEL = 9
SUBLEVEL = 0
EXTRAVERSION = -rc2
EXTRAVERSION = -rc3
NAME = Hurr durr I'ma ninja sloth
# *DOCUMENTATION*

View File

@ -4,7 +4,6 @@
#ifndef __ASSEMBLY__
#include <asm/page.h>
#include <asm/vdso/timebase.h>
#include <asm/barrier.h>
#include <asm/unistd.h>
@ -95,7 +94,7 @@ const struct vdso_data *__arch_get_vdso_data(void);
static __always_inline
const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
{
return (void *)vd + PAGE_SIZE;
return (void *)vd + (1U << CONFIG_PAGE_SHIFT);
}
#endif

View File

@ -168,7 +168,8 @@
#define VSIP_TO_HVIP_SHIFT (IRQ_VS_SOFT - IRQ_S_SOFT)
#define VSIP_VALID_MASK ((_AC(1, UL) << IRQ_S_SOFT) | \
(_AC(1, UL) << IRQ_S_TIMER) | \
(_AC(1, UL) << IRQ_S_EXT))
(_AC(1, UL) << IRQ_S_EXT) | \
(_AC(1, UL) << IRQ_PMU_OVF))
/* AIA CSR bits */
#define TOPI_IID_SHIFT 16
@ -281,7 +282,7 @@
#define CSR_HPMCOUNTER30H 0xc9e
#define CSR_HPMCOUNTER31H 0xc9f
#define CSR_SSCOUNTOVF 0xda0
#define CSR_SCOUNTOVF 0xda0
#define CSR_SSTATUS 0x100
#define CSR_SIE 0x104

View File

@ -43,6 +43,17 @@
KVM_ARCH_REQ_FLAGS(5, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(6)
#define KVM_HEDELEG_DEFAULT (BIT(EXC_INST_MISALIGNED) | \
BIT(EXC_BREAKPOINT) | \
BIT(EXC_SYSCALL) | \
BIT(EXC_INST_PAGE_FAULT) | \
BIT(EXC_LOAD_PAGE_FAULT) | \
BIT(EXC_STORE_PAGE_FAULT))
#define KVM_HIDELEG_DEFAULT (BIT(IRQ_VS_SOFT) | \
BIT(IRQ_VS_TIMER) | \
BIT(IRQ_VS_EXT))
enum kvm_riscv_hfence_type {
KVM_RISCV_HFENCE_UNKNOWN = 0,
KVM_RISCV_HFENCE_GVMA_VMID_GPA,
@ -169,6 +180,7 @@ struct kvm_vcpu_csr {
struct kvm_vcpu_config {
u64 henvcfg;
u64 hstateen0;
unsigned long hedeleg;
};
struct kvm_vcpu_smstateen_csr {
@ -211,6 +223,7 @@ struct kvm_vcpu_arch {
/* CPU context upon Guest VCPU reset */
struct kvm_cpu_context guest_reset_context;
spinlock_t reset_cntx_lock;
/* CPU CSR context upon Guest VCPU reset */
struct kvm_vcpu_csr guest_reset_csr;
@ -252,8 +265,9 @@ struct kvm_vcpu_arch {
/* Cache pages needed to program page tables with spinlock held */
struct kvm_mmu_memory_cache mmu_page_cache;
/* VCPU power-off state */
bool power_off;
/* VCPU power state */
struct kvm_mp_state mp_state;
spinlock_t mp_state_lock;
/* Don't run the VCPU (blocked) */
bool pause;
@ -374,8 +388,11 @@ int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu);
bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask);
void __kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu);
void __kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu);
bool kvm_riscv_vcpu_stopped(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_sbi_sta_reset(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu);

View File

@ -20,7 +20,7 @@ static_assert(RISCV_KVM_MAX_COUNTERS <= 64);
struct kvm_fw_event {
/* Current value of the event */
unsigned long value;
u64 value;
/* Event monitoring status */
bool started;
@ -36,6 +36,7 @@ struct kvm_pmc {
bool started;
/* Monitoring event ID */
unsigned long event_idx;
struct kvm_vcpu *vcpu;
};
/* PMU data structure per vcpu */
@ -50,6 +51,12 @@ struct kvm_pmu {
bool init_done;
/* Bit map of all the virtual counter used */
DECLARE_BITMAP(pmc_in_use, RISCV_KVM_MAX_COUNTERS);
/* Bit map of all the virtual counter overflown */
DECLARE_BITMAP(pmc_overflown, RISCV_KVM_MAX_COUNTERS);
/* The address of the counter snapshot area (guest physical address) */
gpa_t snapshot_addr;
/* The actual data of the snapshot */
struct riscv_pmu_snapshot_data *sdata;
};
#define vcpu_to_pmu(vcpu) (&(vcpu)->arch.pmu_context)
@ -82,9 +89,14 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba
unsigned long ctr_mask, unsigned long flags,
unsigned long eidx, u64 evtdata,
struct kvm_vcpu_sbi_return *retdata);
int kvm_riscv_vcpu_pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
struct kvm_vcpu_sbi_return *retdata);
int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
struct kvm_vcpu_sbi_return *retdata);
void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu);
int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low,
unsigned long saddr_high, unsigned long flags,
struct kvm_vcpu_sbi_return *retdata);
void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu);

View File

@ -131,6 +131,8 @@ enum sbi_ext_pmu_fid {
SBI_EXT_PMU_COUNTER_START,
SBI_EXT_PMU_COUNTER_STOP,
SBI_EXT_PMU_COUNTER_FW_READ,
SBI_EXT_PMU_COUNTER_FW_READ_HI,
SBI_EXT_PMU_SNAPSHOT_SET_SHMEM,
};
union sbi_pmu_ctr_info {
@ -147,6 +149,13 @@ union sbi_pmu_ctr_info {
};
};
/* Data structure to contain the pmu snapshot data */
struct riscv_pmu_snapshot_data {
u64 ctr_overflow_mask;
u64 ctr_values[64];
u64 reserved[447];
};
#define RISCV_PMU_RAW_EVENT_MASK GENMASK_ULL(47, 0)
#define RISCV_PMU_RAW_EVENT_IDX 0x20000
@ -232,20 +241,22 @@ enum sbi_pmu_ctr_type {
#define SBI_PMU_EVENT_IDX_INVALID 0xFFFFFFFF
/* Flags defined for config matching function */
#define SBI_PMU_CFG_FLAG_SKIP_MATCH (1 << 0)
#define SBI_PMU_CFG_FLAG_CLEAR_VALUE (1 << 1)
#define SBI_PMU_CFG_FLAG_AUTO_START (1 << 2)
#define SBI_PMU_CFG_FLAG_SET_VUINH (1 << 3)
#define SBI_PMU_CFG_FLAG_SET_VSINH (1 << 4)
#define SBI_PMU_CFG_FLAG_SET_UINH (1 << 5)
#define SBI_PMU_CFG_FLAG_SET_SINH (1 << 6)
#define SBI_PMU_CFG_FLAG_SET_MINH (1 << 7)
#define SBI_PMU_CFG_FLAG_SKIP_MATCH BIT(0)
#define SBI_PMU_CFG_FLAG_CLEAR_VALUE BIT(1)
#define SBI_PMU_CFG_FLAG_AUTO_START BIT(2)
#define SBI_PMU_CFG_FLAG_SET_VUINH BIT(3)
#define SBI_PMU_CFG_FLAG_SET_VSINH BIT(4)
#define SBI_PMU_CFG_FLAG_SET_UINH BIT(5)
#define SBI_PMU_CFG_FLAG_SET_SINH BIT(6)
#define SBI_PMU_CFG_FLAG_SET_MINH BIT(7)
/* Flags defined for counter start function */
#define SBI_PMU_START_FLAG_SET_INIT_VALUE (1 << 0)
#define SBI_PMU_START_FLAG_SET_INIT_VALUE BIT(0)
#define SBI_PMU_START_FLAG_INIT_SNAPSHOT BIT(1)
/* Flags defined for counter stop function */
#define SBI_PMU_STOP_FLAG_RESET (1 << 0)
#define SBI_PMU_STOP_FLAG_RESET BIT(0)
#define SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT BIT(1)
enum sbi_ext_dbcn_fid {
SBI_EXT_DBCN_CONSOLE_WRITE = 0,
@ -266,7 +277,7 @@ struct sbi_sta_struct {
u8 pad[47];
} __packed;
#define SBI_STA_SHMEM_DISABLE -1
#define SBI_SHMEM_DISABLE -1
/* SBI spec version fields */
#define SBI_SPEC_VERSION_DEFAULT 0x1
@ -284,6 +295,7 @@ struct sbi_sta_struct {
#define SBI_ERR_ALREADY_AVAILABLE -6
#define SBI_ERR_ALREADY_STARTED -7
#define SBI_ERR_ALREADY_STOPPED -8
#define SBI_ERR_NO_SHMEM -9
extern unsigned long sbi_spec_version;
struct sbiret {
@ -355,8 +367,8 @@ static inline unsigned long sbi_minor_version(void)
static inline unsigned long sbi_mk_version(unsigned long major,
unsigned long minor)
{
return ((major & SBI_SPEC_VERSION_MAJOR_MASK) <<
SBI_SPEC_VERSION_MAJOR_SHIFT) | minor;
return ((major & SBI_SPEC_VERSION_MAJOR_MASK) << SBI_SPEC_VERSION_MAJOR_SHIFT)
| (minor & SBI_SPEC_VERSION_MINOR_MASK);
}
int sbi_err_map_linux_errno(int err);

View File

@ -167,6 +167,7 @@ enum KVM_RISCV_ISA_EXT_ID {
KVM_RISCV_ISA_EXT_ZFA,
KVM_RISCV_ISA_EXT_ZTSO,
KVM_RISCV_ISA_EXT_ZACAS,
KVM_RISCV_ISA_EXT_SSCOFPMF,
KVM_RISCV_ISA_EXT_MAX,
};

View File

@ -62,7 +62,7 @@ static int sbi_sta_steal_time_set_shmem(unsigned long lo, unsigned long hi,
ret = sbi_ecall(SBI_EXT_STA, SBI_EXT_STA_STEAL_TIME_SET_SHMEM,
lo, hi, flags, 0, 0, 0);
if (ret.error) {
if (lo == SBI_STA_SHMEM_DISABLE && hi == SBI_STA_SHMEM_DISABLE)
if (lo == SBI_SHMEM_DISABLE && hi == SBI_SHMEM_DISABLE)
pr_warn("Failed to disable steal-time shmem");
else
pr_warn("Failed to set steal-time shmem");
@ -84,8 +84,8 @@ static int pv_time_cpu_online(unsigned int cpu)
static int pv_time_cpu_down_prepare(unsigned int cpu)
{
return sbi_sta_steal_time_set_shmem(SBI_STA_SHMEM_DISABLE,
SBI_STA_SHMEM_DISABLE, 0);
return sbi_sta_steal_time_set_shmem(SBI_SHMEM_DISABLE,
SBI_SHMEM_DISABLE, 0);
}
static u64 pv_time_steal_clock(int cpu)

View File

@ -545,6 +545,9 @@ void kvm_riscv_aia_enable(void)
enable_percpu_irq(hgei_parent_irq,
irq_get_trigger_type(hgei_parent_irq));
csr_set(CSR_HIE, BIT(IRQ_S_GEXT));
/* Enable IRQ filtering for overflow interrupt only if sscofpmf is present */
if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSCOFPMF))
csr_write(CSR_HVIEN, BIT(IRQ_PMU_OVF));
}
void kvm_riscv_aia_disable(void)
@ -558,6 +561,8 @@ void kvm_riscv_aia_disable(void)
return;
hgctrl = get_cpu_ptr(&aia_hgei);
if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSCOFPMF))
csr_clear(CSR_HVIEN, BIT(IRQ_PMU_OVF));
/* Disable per-CPU SGEI interrupt */
csr_clear(CSR_HIE, BIT(IRQ_S_GEXT));
disable_percpu_irq(hgei_parent_irq);

View File

@ -22,22 +22,8 @@ long kvm_arch_dev_ioctl(struct file *filp,
int kvm_arch_hardware_enable(void)
{
unsigned long hideleg, hedeleg;
hedeleg = 0;
hedeleg |= (1UL << EXC_INST_MISALIGNED);
hedeleg |= (1UL << EXC_BREAKPOINT);
hedeleg |= (1UL << EXC_SYSCALL);
hedeleg |= (1UL << EXC_INST_PAGE_FAULT);
hedeleg |= (1UL << EXC_LOAD_PAGE_FAULT);
hedeleg |= (1UL << EXC_STORE_PAGE_FAULT);
csr_write(CSR_HEDELEG, hedeleg);
hideleg = 0;
hideleg |= (1UL << IRQ_VS_SOFT);
hideleg |= (1UL << IRQ_VS_TIMER);
hideleg |= (1UL << IRQ_VS_EXT);
csr_write(CSR_HIDELEG, hideleg);
csr_write(CSR_HEDELEG, KVM_HEDELEG_DEFAULT);
csr_write(CSR_HIDELEG, KVM_HIDELEG_DEFAULT);
/* VS should access only the time counter directly. Everything else should trap */
csr_write(CSR_HCOUNTEREN, 0x02);

View File

@ -64,7 +64,9 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
memcpy(csr, reset_csr, sizeof(*csr));
spin_lock(&vcpu->arch.reset_cntx_lock);
memcpy(cntx, reset_cntx, sizeof(*cntx));
spin_unlock(&vcpu->arch.reset_cntx_lock);
kvm_riscv_vcpu_fp_reset(vcpu);
@ -102,6 +104,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
struct kvm_cpu_context *cntx;
struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
spin_lock_init(&vcpu->arch.mp_state_lock);
/* Mark this VCPU never ran */
vcpu->arch.ran_atleast_once = false;
vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
@ -119,12 +123,16 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
spin_lock_init(&vcpu->arch.hfence_lock);
/* Setup reset state of shadow SSTATUS and HSTATUS CSRs */
spin_lock_init(&vcpu->arch.reset_cntx_lock);
spin_lock(&vcpu->arch.reset_cntx_lock);
cntx = &vcpu->arch.guest_reset_context;
cntx->sstatus = SR_SPP | SR_SPIE;
cntx->hstatus = 0;
cntx->hstatus |= HSTATUS_VTW;
cntx->hstatus |= HSTATUS_SPVP;
cntx->hstatus |= HSTATUS_SPV;
spin_unlock(&vcpu->arch.reset_cntx_lock);
if (kvm_riscv_vcpu_alloc_vector_context(vcpu, cntx))
return -ENOMEM;
@ -201,7 +209,7 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) &&
!vcpu->arch.power_off && !vcpu->arch.pause);
!kvm_riscv_vcpu_stopped(vcpu) && !vcpu->arch.pause);
}
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
@ -365,6 +373,13 @@ void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu)
}
}
/* Sync up the HVIP.LCOFIP bit changes (only clear) by the guest */
if ((csr->hvip ^ hvip) & (1UL << IRQ_PMU_OVF)) {
if (!(hvip & (1UL << IRQ_PMU_OVF)) &&
!test_and_set_bit(IRQ_PMU_OVF, v->irqs_pending_mask))
clear_bit(IRQ_PMU_OVF, v->irqs_pending);
}
/* Sync-up AIA high interrupts */
kvm_riscv_vcpu_aia_sync_interrupts(vcpu);
@ -382,7 +397,8 @@ int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
if (irq < IRQ_LOCAL_MAX &&
irq != IRQ_VS_SOFT &&
irq != IRQ_VS_TIMER &&
irq != IRQ_VS_EXT)
irq != IRQ_VS_EXT &&
irq != IRQ_PMU_OVF)
return -EINVAL;
set_bit(irq, vcpu->arch.irqs_pending);
@ -397,14 +413,15 @@ int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
{
/*
* We only allow VS-mode software, timer, and external
* We only allow VS-mode software, timer, counter overflow and external
* interrupts when irq is one of the local interrupts
* defined by RISC-V privilege specification.
*/
if (irq < IRQ_LOCAL_MAX &&
irq != IRQ_VS_SOFT &&
irq != IRQ_VS_TIMER &&
irq != IRQ_VS_EXT)
irq != IRQ_VS_EXT &&
irq != IRQ_PMU_OVF)
return -EINVAL;
clear_bit(irq, vcpu->arch.irqs_pending);
@ -429,26 +446,42 @@ bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask)
return kvm_riscv_vcpu_aia_has_interrupts(vcpu, mask);
}
void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
void __kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
{
vcpu->arch.power_off = true;
WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED);
kvm_make_request(KVM_REQ_SLEEP, vcpu);
kvm_vcpu_kick(vcpu);
}
void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
{
spin_lock(&vcpu->arch.mp_state_lock);
__kvm_riscv_vcpu_power_off(vcpu);
spin_unlock(&vcpu->arch.mp_state_lock);
}
void __kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu)
{
WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE);
kvm_vcpu_wake_up(vcpu);
}
void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu)
{
vcpu->arch.power_off = false;
kvm_vcpu_wake_up(vcpu);
spin_lock(&vcpu->arch.mp_state_lock);
__kvm_riscv_vcpu_power_on(vcpu);
spin_unlock(&vcpu->arch.mp_state_lock);
}
bool kvm_riscv_vcpu_stopped(struct kvm_vcpu *vcpu)
{
return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_STOPPED;
}
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
if (vcpu->arch.power_off)
mp_state->mp_state = KVM_MP_STATE_STOPPED;
else
mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
*mp_state = READ_ONCE(vcpu->arch.mp_state);
return 0;
}
@ -458,25 +491,36 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
{
int ret = 0;
spin_lock(&vcpu->arch.mp_state_lock);
switch (mp_state->mp_state) {
case KVM_MP_STATE_RUNNABLE:
vcpu->arch.power_off = false;
WRITE_ONCE(vcpu->arch.mp_state, *mp_state);
break;
case KVM_MP_STATE_STOPPED:
kvm_riscv_vcpu_power_off(vcpu);
__kvm_riscv_vcpu_power_off(vcpu);
break;
default:
ret = -EINVAL;
}
spin_unlock(&vcpu->arch.mp_state_lock);
return ret;
}
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
/* TODO; To be implemented later. */
return -EINVAL;
if (dbg->control & KVM_GUESTDBG_ENABLE) {
vcpu->guest_debug = dbg->control;
vcpu->arch.cfg.hedeleg &= ~BIT(EXC_BREAKPOINT);
} else {
vcpu->guest_debug = 0;
vcpu->arch.cfg.hedeleg |= BIT(EXC_BREAKPOINT);
}
return 0;
}
static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu)
@ -505,6 +549,10 @@ static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu)
if (riscv_isa_extension_available(isa, SMSTATEEN))
cfg->hstateen0 |= SMSTATEEN0_SSTATEEN0;
}
cfg->hedeleg = KVM_HEDELEG_DEFAULT;
if (vcpu->guest_debug)
cfg->hedeleg &= ~BIT(EXC_BREAKPOINT);
}
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@ -519,6 +567,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
csr_write(CSR_VSEPC, csr->vsepc);
csr_write(CSR_VSCAUSE, csr->vscause);
csr_write(CSR_VSTVAL, csr->vstval);
csr_write(CSR_HEDELEG, cfg->hedeleg);
csr_write(CSR_HVIP, csr->hvip);
csr_write(CSR_VSATP, csr->vsatp);
csr_write(CSR_HENVCFG, cfg->henvcfg);
@ -584,11 +633,11 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) {
kvm_vcpu_srcu_read_unlock(vcpu);
rcuwait_wait_event(wait,
(!vcpu->arch.power_off) && (!vcpu->arch.pause),
(!kvm_riscv_vcpu_stopped(vcpu)) && (!vcpu->arch.pause),
TASK_INTERRUPTIBLE);
kvm_vcpu_srcu_read_lock(vcpu);
if (vcpu->arch.power_off || vcpu->arch.pause) {
if (kvm_riscv_vcpu_stopped(vcpu) || vcpu->arch.pause) {
/*
* Awaken to handle a signal, request to
* sleep again later.

View File

@ -204,6 +204,10 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV)
ret = kvm_riscv_vcpu_sbi_ecall(vcpu, run);
break;
case EXC_BREAKPOINT:
run->exit_reason = KVM_EXIT_DEBUG;
ret = 0;
break;
default:
break;
}

View File

@ -36,6 +36,7 @@ static const unsigned long kvm_isa_ext_arr[] = {
/* Multi letter extensions (alphabetically sorted) */
KVM_ISA_EXT_ARR(SMSTATEEN),
KVM_ISA_EXT_ARR(SSAIA),
KVM_ISA_EXT_ARR(SSCOFPMF),
KVM_ISA_EXT_ARR(SSTC),
KVM_ISA_EXT_ARR(SVINVAL),
KVM_ISA_EXT_ARR(SVNAPOT),
@ -99,6 +100,9 @@ static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext)
switch (ext) {
case KVM_RISCV_ISA_EXT_H:
return false;
case KVM_RISCV_ISA_EXT_SSCOFPMF:
/* Sscofpmf depends on interrupt filtering defined in ssaia */
return __riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSAIA);
case KVM_RISCV_ISA_EXT_V:
return riscv_v_vstate_ctrl_user_allowed();
default:
@ -116,6 +120,8 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
case KVM_RISCV_ISA_EXT_C:
case KVM_RISCV_ISA_EXT_I:
case KVM_RISCV_ISA_EXT_M:
/* There is not architectural config bit to disable sscofpmf completely */
case KVM_RISCV_ISA_EXT_SSCOFPMF:
case KVM_RISCV_ISA_EXT_SSTC:
case KVM_RISCV_ISA_EXT_SVINVAL:
case KVM_RISCV_ISA_EXT_SVNAPOT:

View File

@ -14,6 +14,7 @@
#include <asm/csr.h>
#include <asm/kvm_vcpu_sbi.h>
#include <asm/kvm_vcpu_pmu.h>
#include <asm/sbi.h>
#include <linux/bitops.h>
#define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs)
@ -39,7 +40,7 @@ static u64 kvm_pmu_get_sample_period(struct kvm_pmc *pmc)
u64 sample_period;
if (!pmc->counter_val)
sample_period = counter_val_mask + 1;
sample_period = counter_val_mask;
else
sample_period = (-pmc->counter_val) & counter_val_mask;
@ -196,6 +197,36 @@ static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx,
return kvm_pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask);
}
static int pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
unsigned long *out_val)
{
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
struct kvm_pmc *pmc;
int fevent_code;
if (!IS_ENABLED(CONFIG_32BIT)) {
pr_warn("%s: should be invoked for only RV32\n", __func__);
return -EINVAL;
}
if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
pr_warn("Invalid counter id [%ld]during read\n", cidx);
return -EINVAL;
}
pmc = &kvpmu->pmc[cidx];
if (pmc->cinfo.type != SBI_PMU_CTR_TYPE_FW)
return -EINVAL;
fevent_code = get_event_code(pmc->event_idx);
pmc->counter_val = kvpmu->fw_event[fevent_code].value;
*out_val = pmc->counter_val >> 32;
return 0;
}
static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
unsigned long *out_val)
{
@ -204,6 +235,11 @@ static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
u64 enabled, running;
int fevent_code;
if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
pr_warn("Invalid counter id [%ld] during read\n", cidx);
return -EINVAL;
}
pmc = &kvpmu->pmc[cidx];
if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
@ -229,8 +265,50 @@ static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ct
return 0;
}
static int kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr,
unsigned long flags, unsigned long eidx, unsigned long evtdata)
static void kvm_riscv_pmu_overflow(struct perf_event *perf_event,
struct perf_sample_data *data,
struct pt_regs *regs)
{
struct kvm_pmc *pmc = perf_event->overflow_handler_context;
struct kvm_vcpu *vcpu = pmc->vcpu;
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
struct riscv_pmu *rpmu = to_riscv_pmu(perf_event->pmu);
u64 period;
/*
* Stop the event counting by directly accessing the perf_event.
* Otherwise, this needs to deferred via a workqueue.
* That will introduce skew in the counter value because the actual
* physical counter would start after returning from this function.
* It will be stopped again once the workqueue is scheduled
*/
rpmu->pmu.stop(perf_event, PERF_EF_UPDATE);
/*
* The hw counter would start automatically when this function returns.
* Thus, the host may continue to interrupt and inject it to the guest
* even without the guest configuring the next event. Depending on the hardware
* the host may have some sluggishness only if privilege mode filtering is not
* available. In an ideal world, where qemu is not the only capable hardware,
* this can be removed.
* FYI: ARM64 does this way while x86 doesn't do anything as such.
* TODO: Should we keep it for RISC-V ?
*/
period = -(local64_read(&perf_event->count));
local64_set(&perf_event->hw.period_left, 0);
perf_event->attr.sample_period = period;
perf_event->hw.sample_period = period;
set_bit(pmc->idx, kvpmu->pmc_overflown);
kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_PMU_OVF);
rpmu->pmu.start(perf_event, PERF_EF_RELOAD);
}
static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr,
unsigned long flags, unsigned long eidx,
unsigned long evtdata)
{
struct perf_event *event;
@ -247,7 +325,7 @@ static int kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr
*/
attr->sample_period = kvm_pmu_get_sample_period(pmc);
event = perf_event_create_kernel_counter(attr, -1, current, NULL, pmc);
event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc);
if (IS_ERR(event)) {
pr_err("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event));
return PTR_ERR(event);
@ -310,6 +388,80 @@ int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num,
return ret;
}
static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu *vcpu)
{
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data);
if (kvpmu->sdata) {
if (kvpmu->snapshot_addr != INVALID_GPA) {
memset(kvpmu->sdata, 0, snapshot_area_size);
kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr,
kvpmu->sdata, snapshot_area_size);
} else {
pr_warn("snapshot address invalid\n");
}
kfree(kvpmu->sdata);
kvpmu->sdata = NULL;
}
kvpmu->snapshot_addr = INVALID_GPA;
}
int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low,
unsigned long saddr_high, unsigned long flags,
struct kvm_vcpu_sbi_return *retdata)
{
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data);
int sbiret = 0;
gpa_t saddr;
unsigned long hva;
bool writable;
if (!kvpmu || flags) {
sbiret = SBI_ERR_INVALID_PARAM;
goto out;
}
if (saddr_low == SBI_SHMEM_DISABLE && saddr_high == SBI_SHMEM_DISABLE) {
kvm_pmu_clear_snapshot_area(vcpu);
return 0;
}
saddr = saddr_low;
if (saddr_high != 0) {
if (IS_ENABLED(CONFIG_32BIT))
saddr |= ((gpa_t)saddr_high << 32);
else
sbiret = SBI_ERR_INVALID_ADDRESS;
goto out;
}
hva = kvm_vcpu_gfn_to_hva_prot(vcpu, saddr >> PAGE_SHIFT, &writable);
if (kvm_is_error_hva(hva) || !writable) {
sbiret = SBI_ERR_INVALID_ADDRESS;
goto out;
}
kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC);
if (!kvpmu->sdata)
return -ENOMEM;
if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) {
kfree(kvpmu->sdata);
sbiret = SBI_ERR_FAILURE;
goto out;
}
kvpmu->snapshot_addr = saddr;
out:
retdata->err_val = sbiret;
return 0;
}
int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu,
struct kvm_vcpu_sbi_return *retdata)
{
@ -343,20 +495,40 @@ int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base,
int i, pmc_index, sbiret = 0;
struct kvm_pmc *pmc;
int fevent_code;
bool snap_flag_set = flags & SBI_PMU_START_FLAG_INIT_SNAPSHOT;
if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
sbiret = SBI_ERR_INVALID_PARAM;
goto out;
}
if (snap_flag_set) {
if (kvpmu->snapshot_addr == INVALID_GPA) {
sbiret = SBI_ERR_NO_SHMEM;
goto out;
}
if (kvm_vcpu_read_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
sizeof(struct riscv_pmu_snapshot_data))) {
pr_warn("Unable to read snapshot shared memory while starting counters\n");
sbiret = SBI_ERR_FAILURE;
goto out;
}
}
/* Start the counters that have been configured and requested by the guest */
for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
pmc_index = i + ctr_base;
if (!test_bit(pmc_index, kvpmu->pmc_in_use))
continue;
/* The guest started the counter again. Reset the overflow status */
clear_bit(pmc_index, kvpmu->pmc_overflown);
pmc = &kvpmu->pmc[pmc_index];
if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE)
if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) {
pmc->counter_val = ival;
} else if (snap_flag_set) {
/* The counter index in the snapshot are relative to the counter base */
pmc->counter_val = kvpmu->sdata->ctr_values[i];
}
if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
fevent_code = get_event_code(pmc->event_idx);
if (fevent_code >= SBI_PMU_FW_MAX) {
@ -400,12 +572,19 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
u64 enabled, running;
struct kvm_pmc *pmc;
int fevent_code;
bool snap_flag_set = flags & SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT;
bool shmem_needs_update = false;
if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
sbiret = SBI_ERR_INVALID_PARAM;
goto out;
}
if (snap_flag_set && kvpmu->snapshot_addr == INVALID_GPA) {
sbiret = SBI_ERR_NO_SHMEM;
goto out;
}
/* Stop the counters that have been configured and requested by the guest */
for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
pmc_index = i + ctr_base;
@ -432,21 +611,49 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
sbiret = SBI_ERR_ALREADY_STOPPED;
}
if (flags & SBI_PMU_STOP_FLAG_RESET) {
/* Relase the counter if this is a reset request */
pmc->counter_val += perf_event_read_value(pmc->perf_event,
&enabled, &running);
if (flags & SBI_PMU_STOP_FLAG_RESET)
/* Release the counter if this is a reset request */
kvm_pmu_release_perf_event(pmc);
}
} else {
sbiret = SBI_ERR_INVALID_PARAM;
}
if (snap_flag_set && !sbiret) {
if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW)
pmc->counter_val = kvpmu->fw_event[fevent_code].value;
else if (pmc->perf_event)
pmc->counter_val += perf_event_read_value(pmc->perf_event,
&enabled, &running);
/*
* The counter and overflow indicies in the snapshot region are w.r.to
* cbase. Modify the set bit in the counter mask instead of the pmc_index
* which indicates the absolute counter index.
*/
if (test_bit(pmc_index, kvpmu->pmc_overflown))
kvpmu->sdata->ctr_overflow_mask |= BIT(i);
kvpmu->sdata->ctr_values[i] = pmc->counter_val;
shmem_needs_update = true;
}
if (flags & SBI_PMU_STOP_FLAG_RESET) {
pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
clear_bit(pmc_index, kvpmu->pmc_in_use);
clear_bit(pmc_index, kvpmu->pmc_overflown);
if (snap_flag_set) {
/*
* Only clear the given counter as the caller is responsible to
* validate both the overflow mask and configured counters.
*/
kvpmu->sdata->ctr_overflow_mask &= ~BIT(i);
shmem_needs_update = true;
}
}
}
if (shmem_needs_update)
kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
sizeof(struct riscv_pmu_snapshot_data));
out:
retdata->err_val = sbiret;
@ -458,7 +665,8 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba
unsigned long eidx, u64 evtdata,
struct kvm_vcpu_sbi_return *retdata)
{
int ctr_idx, ret, sbiret = 0;
int ctr_idx, sbiret = 0;
long ret;
bool is_fevent;
unsigned long event_code;
u32 etype = kvm_pmu_get_perf_event_type(eidx);
@ -517,8 +725,10 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba
kvpmu->fw_event[event_code].started = true;
} else {
ret = kvm_pmu_create_perf_event(pmc, &attr, flags, eidx, evtdata);
if (ret)
return ret;
if (ret) {
sbiret = SBI_ERR_NOT_SUPPORTED;
goto out;
}
}
set_bit(ctr_idx, kvpmu->pmc_in_use);
@ -530,7 +740,19 @@ out:
return 0;
}
int kvm_riscv_vcpu_pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
struct kvm_vcpu_sbi_return *retdata)
{
int ret;
ret = pmu_fw_ctr_read_hi(vcpu, cidx, &retdata->out_val);
if (ret == -EINVAL)
retdata->err_val = SBI_ERR_INVALID_PARAM;
return 0;
}
int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
struct kvm_vcpu_sbi_return *retdata)
{
int ret;
@ -566,6 +788,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
kvpmu->num_hw_ctrs = num_hw_ctrs + 1;
kvpmu->num_fw_ctrs = SBI_PMU_FW_MAX;
memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
kvpmu->snapshot_addr = INVALID_GPA;
if (kvpmu->num_hw_ctrs > RISCV_KVM_MAX_HW_CTRS) {
pr_warn_once("Limiting the hardware counters to 32 as specified by the ISA");
@ -585,6 +808,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
pmc = &kvpmu->pmc[i];
pmc->idx = i;
pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
pmc->vcpu = vcpu;
if (i < kvpmu->num_hw_ctrs) {
pmc->cinfo.type = SBI_PMU_CTR_TYPE_HW;
if (i < 3)
@ -601,7 +825,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
pmc->cinfo.csr = CSR_CYCLE + i;
} else {
pmc->cinfo.type = SBI_PMU_CTR_TYPE_FW;
pmc->cinfo.width = BITS_PER_LONG - 1;
pmc->cinfo.width = 63;
}
}
@ -617,14 +841,16 @@ void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu)
if (!kvpmu)
return;
for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_MAX_COUNTERS) {
for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS) {
pmc = &kvpmu->pmc[i];
pmc->counter_val = 0;
kvm_pmu_release_perf_event(pmc);
pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
}
bitmap_zero(kvpmu->pmc_in_use, RISCV_MAX_COUNTERS);
bitmap_zero(kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS);
bitmap_zero(kvpmu->pmc_overflown, RISCV_KVM_MAX_COUNTERS);
memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
kvm_pmu_clear_snapshot_area(vcpu);
}
void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu)

View File

@ -138,8 +138,11 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu,
unsigned long i;
struct kvm_vcpu *tmp;
kvm_for_each_vcpu(i, tmp, vcpu->kvm)
tmp->arch.power_off = true;
kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
spin_lock(&vcpu->arch.mp_state_lock);
WRITE_ONCE(tmp->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED);
spin_unlock(&vcpu->arch.mp_state_lock);
}
kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP);
memset(&run->system_event, 0, sizeof(run->system_event));

View File

@ -18,13 +18,20 @@ static int kvm_sbi_hsm_vcpu_start(struct kvm_vcpu *vcpu)
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
struct kvm_vcpu *target_vcpu;
unsigned long target_vcpuid = cp->a0;
int ret = 0;
target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid);
if (!target_vcpu)
return SBI_ERR_INVALID_PARAM;
if (!target_vcpu->arch.power_off)
return SBI_ERR_ALREADY_AVAILABLE;
spin_lock(&target_vcpu->arch.mp_state_lock);
if (!kvm_riscv_vcpu_stopped(target_vcpu)) {
ret = SBI_ERR_ALREADY_AVAILABLE;
goto out;
}
spin_lock(&target_vcpu->arch.reset_cntx_lock);
reset_cntx = &target_vcpu->arch.guest_reset_context;
/* start address */
reset_cntx->sepc = cp->a1;
@ -32,21 +39,35 @@ static int kvm_sbi_hsm_vcpu_start(struct kvm_vcpu *vcpu)
reset_cntx->a0 = target_vcpuid;
/* private data passed from kernel */
reset_cntx->a1 = cp->a2;
spin_unlock(&target_vcpu->arch.reset_cntx_lock);
kvm_make_request(KVM_REQ_VCPU_RESET, target_vcpu);
kvm_riscv_vcpu_power_on(target_vcpu);
__kvm_riscv_vcpu_power_on(target_vcpu);
return 0;
out:
spin_unlock(&target_vcpu->arch.mp_state_lock);
return ret;
}
static int kvm_sbi_hsm_vcpu_stop(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.power_off)
return SBI_ERR_FAILURE;
int ret = 0;
kvm_riscv_vcpu_power_off(vcpu);
spin_lock(&vcpu->arch.mp_state_lock);
return 0;
if (kvm_riscv_vcpu_stopped(vcpu)) {
ret = SBI_ERR_FAILURE;
goto out;
}
__kvm_riscv_vcpu_power_off(vcpu);
out:
spin_unlock(&vcpu->arch.mp_state_lock);
return ret;
}
static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu)
@ -58,7 +79,7 @@ static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu)
target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid);
if (!target_vcpu)
return SBI_ERR_INVALID_PARAM;
if (!target_vcpu->arch.power_off)
if (!kvm_riscv_vcpu_stopped(target_vcpu))
return SBI_HSM_STATE_STARTED;
else if (vcpu->stat.generic.blocking)
return SBI_HSM_STATE_SUSPENDED;
@ -71,14 +92,11 @@ static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
{
int ret = 0;
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
struct kvm *kvm = vcpu->kvm;
unsigned long funcid = cp->a6;
switch (funcid) {
case SBI_EXT_HSM_HART_START:
mutex_lock(&kvm->lock);
ret = kvm_sbi_hsm_vcpu_start(vcpu);
mutex_unlock(&kvm->lock);
break;
case SBI_EXT_HSM_HART_STOP:
ret = kvm_sbi_hsm_vcpu_stop(vcpu);

View File

@ -42,9 +42,9 @@ static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
#endif
/*
* This can fail if perf core framework fails to create an event.
* Forward the error to userspace because it's an error which
* happened within the host kernel. The other option would be
* to convert to an SBI error and forward to the guest.
* No need to forward the error to userspace and exit the guest.
* The operation can continue without profiling. Forward the
* appropriate SBI error to the guest.
*/
ret = kvm_riscv_vcpu_pmu_ctr_cfg_match(vcpu, cp->a0, cp->a1,
cp->a2, cp->a3, temp, retdata);
@ -62,7 +62,16 @@ static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
ret = kvm_riscv_vcpu_pmu_ctr_stop(vcpu, cp->a0, cp->a1, cp->a2, retdata);
break;
case SBI_EXT_PMU_COUNTER_FW_READ:
ret = kvm_riscv_vcpu_pmu_ctr_read(vcpu, cp->a0, retdata);
ret = kvm_riscv_vcpu_pmu_fw_ctr_read(vcpu, cp->a0, retdata);
break;
case SBI_EXT_PMU_COUNTER_FW_READ_HI:
if (IS_ENABLED(CONFIG_32BIT))
ret = kvm_riscv_vcpu_pmu_fw_ctr_read_hi(vcpu, cp->a0, retdata);
else
retdata->out_val = 0;
break;
case SBI_EXT_PMU_SNAPSHOT_SET_SHMEM:
ret = kvm_riscv_vcpu_pmu_snapshot_set_shmem(vcpu, cp->a0, cp->a1, cp->a2, retdata);
break;
default:
retdata->err_val = SBI_ERR_NOT_SUPPORTED;

View File

@ -93,8 +93,8 @@ static int kvm_sbi_sta_steal_time_set_shmem(struct kvm_vcpu *vcpu)
if (flags != 0)
return SBI_ERR_INVALID_PARAM;
if (shmem_phys_lo == SBI_STA_SHMEM_DISABLE &&
shmem_phys_hi == SBI_STA_SHMEM_DISABLE) {
if (shmem_phys_lo == SBI_SHMEM_DISABLE &&
shmem_phys_hi == SBI_SHMEM_DISABLE) {
vcpu->arch.sta.shmem = INVALID_GPA;
return 0;
}

View File

@ -186,6 +186,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_READONLY_MEM:
case KVM_CAP_MP_STATE:
case KVM_CAP_IMMEDIATE_EXIT:
case KVM_CAP_SET_GUEST_DEBUG:
r = 1;
break;
case KVM_CAP_NR_VCPUS:

View File

@ -1237,11 +1237,11 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
struct pmu *pmu = event->pmu;
/*
* Make sure we get updated with the first PEBS
* event. It will trigger also during removal, but
* that does not hurt:
* Make sure we get updated with the first PEBS event.
* During removal, ->pebs_data_cfg is still valid for
* the last PEBS event. Don't clear it.
*/
if (cpuc->n_pebs == 1)
if ((cpuc->n_pebs == 1) && add)
cpuc->pebs_data_cfg = PEBS_UPDATE_DS_SW;
if (needed_cb != pebs_needs_sched_cb(cpuc)) {

View File

@ -324,6 +324,7 @@ static void decode_ISR(unsigned int val)
decode_bits(KERN_DEBUG "ISR", isr_bits, ARRAY_SIZE(isr_bits), val);
}
#ifdef CONFIG_I2C_PXA_SLAVE
static const struct bits icr_bits[] = {
PXA_BIT(ICR_START, "START", NULL),
PXA_BIT(ICR_STOP, "STOP", NULL),
@ -342,7 +343,6 @@ static const struct bits icr_bits[] = {
PXA_BIT(ICR_UR, "UR", "ur"),
};
#ifdef CONFIG_I2C_PXA_SLAVE
static void decode_ICR(unsigned int val)
{
decode_bits(KERN_DEBUG "ICR", icr_bits, ARRAY_SIZE(icr_bits), val);

View File

@ -191,8 +191,6 @@ void riscv_pmu_stop(struct perf_event *event, int flags)
struct hw_perf_event *hwc = &event->hw;
struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
if (!(hwc->state & PERF_HES_STOPPED)) {
if (rvpmu->ctr_stop) {
rvpmu->ctr_stop(event, 0);
@ -408,6 +406,7 @@ struct riscv_pmu *riscv_pmu_alloc(void)
cpuc->n_events = 0;
for (i = 0; i < RISCV_MAX_COUNTERS; i++)
cpuc->events[i] = NULL;
cpuc->snapshot_addr = NULL;
}
pmu->pmu = (struct pmu) {
.event_init = riscv_pmu_event_init,

View File

@ -27,7 +27,7 @@
#define ALT_SBI_PMU_OVERFLOW(__ovl) \
asm volatile(ALTERNATIVE_2( \
"csrr %0, " __stringify(CSR_SSCOUNTOVF), \
"csrr %0, " __stringify(CSR_SCOUNTOVF), \
"csrr %0, " __stringify(THEAD_C9XX_CSR_SCOUNTEROF), \
THEAD_VENDOR_ID, ERRATA_THEAD_PMU, \
CONFIG_ERRATA_THEAD_PMU, \
@ -57,6 +57,11 @@ asm volatile(ALTERNATIVE( \
PMU_FORMAT_ATTR(event, "config:0-47");
PMU_FORMAT_ATTR(firmware, "config:63");
static bool sbi_v2_available;
static DEFINE_STATIC_KEY_FALSE(sbi_pmu_snapshot_available);
#define sbi_pmu_snapshot_available() \
static_branch_unlikely(&sbi_pmu_snapshot_available)
static struct attribute *riscv_arch_formats_attr[] = {
&format_attr_event.attr,
&format_attr_firmware.attr,
@ -384,7 +389,7 @@ static int pmu_sbi_ctr_get_idx(struct perf_event *event)
cmask = 1;
} else if (event->attr.config == PERF_COUNT_HW_INSTRUCTIONS) {
cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH;
cmask = 1UL << (CSR_INSTRET - CSR_CYCLE);
cmask = BIT(CSR_INSTRET - CSR_CYCLE);
}
}
@ -506,24 +511,126 @@ static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig)
return ret;
}
static void pmu_sbi_snapshot_free(struct riscv_pmu *pmu)
{
int cpu;
for_each_possible_cpu(cpu) {
struct cpu_hw_events *cpu_hw_evt = per_cpu_ptr(pmu->hw_events, cpu);
if (!cpu_hw_evt->snapshot_addr)
continue;
free_page((unsigned long)cpu_hw_evt->snapshot_addr);
cpu_hw_evt->snapshot_addr = NULL;
cpu_hw_evt->snapshot_addr_phys = 0;
}
}
static int pmu_sbi_snapshot_alloc(struct riscv_pmu *pmu)
{
int cpu;
struct page *snapshot_page;
for_each_possible_cpu(cpu) {
struct cpu_hw_events *cpu_hw_evt = per_cpu_ptr(pmu->hw_events, cpu);
snapshot_page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
if (!snapshot_page) {
pmu_sbi_snapshot_free(pmu);
return -ENOMEM;
}
cpu_hw_evt->snapshot_addr = page_to_virt(snapshot_page);
cpu_hw_evt->snapshot_addr_phys = page_to_phys(snapshot_page);
}
return 0;
}
static int pmu_sbi_snapshot_disable(void)
{
struct sbiret ret;
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_SNAPSHOT_SET_SHMEM, SBI_SHMEM_DISABLE,
SBI_SHMEM_DISABLE, 0, 0, 0, 0);
if (ret.error) {
pr_warn("failed to disable snapshot shared memory\n");
return sbi_err_map_linux_errno(ret.error);
}
return 0;
}
static int pmu_sbi_snapshot_setup(struct riscv_pmu *pmu, int cpu)
{
struct cpu_hw_events *cpu_hw_evt;
struct sbiret ret = {0};
cpu_hw_evt = per_cpu_ptr(pmu->hw_events, cpu);
if (!cpu_hw_evt->snapshot_addr_phys)
return -EINVAL;
if (cpu_hw_evt->snapshot_set_done)
return 0;
if (IS_ENABLED(CONFIG_32BIT))
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_SNAPSHOT_SET_SHMEM,
cpu_hw_evt->snapshot_addr_phys,
(u64)(cpu_hw_evt->snapshot_addr_phys) >> 32, 0, 0, 0, 0);
else
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_SNAPSHOT_SET_SHMEM,
cpu_hw_evt->snapshot_addr_phys, 0, 0, 0, 0, 0);
/* Free up the snapshot area memory and fall back to SBI PMU calls without snapshot */
if (ret.error) {
if (ret.error != SBI_ERR_NOT_SUPPORTED)
pr_warn("pmu snapshot setup failed with error %ld\n", ret.error);
return sbi_err_map_linux_errno(ret.error);
}
memset(cpu_hw_evt->snapshot_cval_shcopy, 0, sizeof(u64) * RISCV_MAX_COUNTERS);
cpu_hw_evt->snapshot_set_done = true;
return 0;
}
static u64 pmu_sbi_ctr_read(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
struct sbiret ret;
union sbi_pmu_ctr_info info;
u64 val = 0;
struct riscv_pmu *pmu = to_riscv_pmu(event->pmu);
struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr;
union sbi_pmu_ctr_info info = pmu_ctr_list[idx];
/* Read the value from the shared memory directly only if counter is stopped */
if (sbi_pmu_snapshot_available() && (hwc->state & PERF_HES_STOPPED)) {
val = sdata->ctr_values[idx];
return val;
}
if (pmu_sbi_is_fw_event(event)) {
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ,
hwc->idx, 0, 0, 0, 0, 0);
if (!ret.error)
val = ret.value;
if (ret.error)
return 0;
val = ret.value;
if (IS_ENABLED(CONFIG_32BIT) && sbi_v2_available && info.width >= 32) {
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ_HI,
hwc->idx, 0, 0, 0, 0, 0);
if (!ret.error)
val |= ((u64)ret.value << 32);
else
WARN_ONCE(1, "Unable to read upper 32 bits of firmware counter error: %ld\n",
ret.error);
}
} else {
info = pmu_ctr_list[idx];
val = riscv_pmu_ctr_read_csr(info.csr);
if (IS_ENABLED(CONFIG_32BIT))
val = ((u64)riscv_pmu_ctr_read_csr(info.csr + 0x80)) << 31 | val;
val |= ((u64)riscv_pmu_ctr_read_csr(info.csr + 0x80)) << 32;
}
return val;
@ -553,6 +660,7 @@ static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival)
struct hw_perf_event *hwc = &event->hw;
unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE;
/* There is no benefit setting SNAPSHOT FLAG for a single counter */
#if defined(CONFIG_32BIT)
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, hwc->idx,
1, flag, ival, ival >> 32, 0);
@ -573,16 +681,36 @@ static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag)
{
struct sbiret ret;
struct hw_perf_event *hwc = &event->hw;
struct riscv_pmu *pmu = to_riscv_pmu(event->pmu);
struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr;
if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) &&
(hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT))
pmu_sbi_reset_scounteren((void *)event);
if (sbi_pmu_snapshot_available())
flag |= SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT;
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, hwc->idx, 1, flag, 0, 0, 0);
if (ret.error && (ret.error != SBI_ERR_ALREADY_STOPPED) &&
flag != SBI_PMU_STOP_FLAG_RESET)
if (!ret.error && sbi_pmu_snapshot_available()) {
/*
* The counter snapshot is based on the index base specified by hwc->idx.
* The actual counter value is updated in shared memory at index 0 when counter
* mask is 0x01. To ensure accurate counter values, it's necessary to transfer
* the counter value to shared memory. However, if hwc->idx is zero, the counter
* value is already correctly updated in shared memory, requiring no further
* adjustment.
*/
if (hwc->idx > 0) {
sdata->ctr_values[hwc->idx] = sdata->ctr_values[0];
sdata->ctr_values[0] = 0;
}
} else if (ret.error && (ret.error != SBI_ERR_ALREADY_STOPPED) &&
flag != SBI_PMU_STOP_FLAG_RESET) {
pr_err("Stopping counter idx %d failed with error %d\n",
hwc->idx, sbi_err_map_linux_errno(ret.error));
}
}
static int pmu_sbi_find_num_ctrs(void)
@ -640,10 +768,39 @@ static inline void pmu_sbi_stop_all(struct riscv_pmu *pmu)
static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu)
{
struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr;
unsigned long flag = 0;
int i, idx;
struct sbiret ret;
u64 temp_ctr_overflow_mask = 0;
/* No need to check the error here as we can't do anything about the error */
sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, 0,
cpu_hw_evt->used_hw_ctrs[0], 0, 0, 0, 0);
if (sbi_pmu_snapshot_available())
flag = SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT;
/* Reset the shadow copy to avoid save/restore any value from previous overflow */
memset(cpu_hw_evt->snapshot_cval_shcopy, 0, sizeof(u64) * RISCV_MAX_COUNTERS);
for (i = 0; i < BITS_TO_LONGS(RISCV_MAX_COUNTERS); i++) {
/* No need to check the error here as we can't do anything about the error */
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, i * BITS_PER_LONG,
cpu_hw_evt->used_hw_ctrs[i], flag, 0, 0, 0);
if (!ret.error && sbi_pmu_snapshot_available()) {
/* Save the counter values to avoid clobbering */
for_each_set_bit(idx, &cpu_hw_evt->used_hw_ctrs[i], BITS_PER_LONG)
cpu_hw_evt->snapshot_cval_shcopy[i * BITS_PER_LONG + idx] =
sdata->ctr_values[idx];
/* Save the overflow mask to avoid clobbering */
temp_ctr_overflow_mask |= sdata->ctr_overflow_mask << (i * BITS_PER_LONG);
}
}
/* Restore the counter values to the shared memory for used hw counters */
if (sbi_pmu_snapshot_available()) {
for_each_set_bit(idx, cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS)
sdata->ctr_values[idx] = cpu_hw_evt->snapshot_cval_shcopy[idx];
if (temp_ctr_overflow_mask)
sdata->ctr_overflow_mask = temp_ctr_overflow_mask;
}
}
/*
@ -652,11 +809,10 @@ static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu)
* while the overflowed counters need to be started with updated initialization
* value.
*/
static inline void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu,
unsigned long ctr_ovf_mask)
static inline void pmu_sbi_start_ovf_ctrs_sbi(struct cpu_hw_events *cpu_hw_evt,
u64 ctr_ovf_mask)
{
int idx = 0;
struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
int idx = 0, i;
struct perf_event *event;
unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE;
unsigned long ctr_start_mask = 0;
@ -664,11 +820,12 @@ static inline void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu,
struct hw_perf_event *hwc;
u64 init_val = 0;
ctr_start_mask = cpu_hw_evt->used_hw_ctrs[0] & ~ctr_ovf_mask;
/* Start all the counters that did not overflow in a single shot */
sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, 0, ctr_start_mask,
0, 0, 0, 0);
for (i = 0; i < BITS_TO_LONGS(RISCV_MAX_COUNTERS); i++) {
ctr_start_mask = cpu_hw_evt->used_hw_ctrs[i] & ~ctr_ovf_mask;
/* Start all the counters that did not overflow in a single shot */
sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, i * BITS_PER_LONG, ctr_start_mask,
0, 0, 0, 0);
}
/* Reinitialize and start all the counter that overflowed */
while (ctr_ovf_mask) {
@ -691,6 +848,52 @@ static inline void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu,
}
}
static inline void pmu_sbi_start_ovf_ctrs_snapshot(struct cpu_hw_events *cpu_hw_evt,
u64 ctr_ovf_mask)
{
int i, idx = 0;
struct perf_event *event;
unsigned long flag = SBI_PMU_START_FLAG_INIT_SNAPSHOT;
u64 max_period, init_val = 0;
struct hw_perf_event *hwc;
struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr;
for_each_set_bit(idx, cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS) {
if (ctr_ovf_mask & BIT(idx)) {
event = cpu_hw_evt->events[idx];
hwc = &event->hw;
max_period = riscv_pmu_ctr_get_width_mask(event);
init_val = local64_read(&hwc->prev_count) & max_period;
cpu_hw_evt->snapshot_cval_shcopy[idx] = init_val;
}
/*
* We do not need to update the non-overflow counters the previous
* value should have been there already.
*/
}
for (i = 0; i < BITS_TO_LONGS(RISCV_MAX_COUNTERS); i++) {
/* Restore the counter values to relative indices for used hw counters */
for_each_set_bit(idx, &cpu_hw_evt->used_hw_ctrs[i], BITS_PER_LONG)
sdata->ctr_values[idx] =
cpu_hw_evt->snapshot_cval_shcopy[idx + i * BITS_PER_LONG];
/* Start all the counters in a single shot */
sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx * BITS_PER_LONG,
cpu_hw_evt->used_hw_ctrs[i], flag, 0, 0, 0);
}
}
static void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu,
u64 ctr_ovf_mask)
{
struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
if (sbi_pmu_snapshot_available())
pmu_sbi_start_ovf_ctrs_snapshot(cpu_hw_evt, ctr_ovf_mask);
else
pmu_sbi_start_ovf_ctrs_sbi(cpu_hw_evt, ctr_ovf_mask);
}
static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
{
struct perf_sample_data data;
@ -700,10 +903,11 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
int lidx, hidx, fidx;
struct riscv_pmu *pmu;
struct perf_event *event;
unsigned long overflow;
unsigned long overflowed_ctrs = 0;
u64 overflow;
u64 overflowed_ctrs = 0;
struct cpu_hw_events *cpu_hw_evt = dev;
u64 start_clock = sched_clock();
struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr;
if (WARN_ON_ONCE(!cpu_hw_evt))
return IRQ_NONE;
@ -725,7 +929,10 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
pmu_sbi_stop_hw_ctrs(pmu);
/* Overflow status register should only be read after counter are stopped */
ALT_SBI_PMU_OVERFLOW(overflow);
if (sbi_pmu_snapshot_available())
overflow = sdata->ctr_overflow_mask;
else
ALT_SBI_PMU_OVERFLOW(overflow);
/*
* Overflow interrupt pending bit should only be cleared after stopping
@ -751,9 +958,14 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
if (!info || info->type != SBI_PMU_CTR_TYPE_HW)
continue;
/* compute hardware counter index */
hidx = info->csr - CSR_CYCLE;
/* check if the corresponding bit is set in sscountovf */
if (sbi_pmu_snapshot_available())
/* SBI implementation already updated the logical indicies */
hidx = lidx;
else
/* compute hardware counter index */
hidx = info->csr - CSR_CYCLE;
/* check if the corresponding bit is set in sscountovf or overflow mask in shmem */
if (!(overflow & BIT(hidx)))
continue;
@ -763,7 +975,10 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
*/
overflowed_ctrs |= BIT(lidx);
hw_evt = &event->hw;
/* Update the event states here so that we know the state while reading */
hw_evt->state |= PERF_HES_STOPPED;
riscv_pmu_event_update(event);
hw_evt->state |= PERF_HES_UPTODATE;
perf_sample_data_init(&data, 0, hw_evt->last_period);
if (riscv_pmu_event_set_period(event)) {
/*
@ -776,6 +991,8 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
*/
perf_event_overflow(event, &data, regs);
}
/* Reset the state as we are going to start the counter after the loop */
hw_evt->state = 0;
}
pmu_sbi_start_overflow_mask(pmu, overflowed_ctrs);
@ -807,6 +1024,9 @@ static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node)
enable_percpu_irq(riscv_pmu_irq, IRQ_TYPE_NONE);
}
if (sbi_pmu_snapshot_available())
return pmu_sbi_snapshot_setup(pmu, cpu);
return 0;
}
@ -819,6 +1039,9 @@ static int pmu_sbi_dying_cpu(unsigned int cpu, struct hlist_node *node)
/* Disable all counters access for user mode now */
csr_write(CSR_SCOUNTEREN, 0x0);
if (sbi_pmu_snapshot_available())
return pmu_sbi_snapshot_disable();
return 0;
}
@ -927,6 +1150,12 @@ static inline void riscv_pm_pmu_unregister(struct riscv_pmu *pmu) { }
static void riscv_pmu_destroy(struct riscv_pmu *pmu)
{
if (sbi_v2_available) {
if (sbi_pmu_snapshot_available()) {
pmu_sbi_snapshot_disable();
pmu_sbi_snapshot_free(pmu);
}
}
riscv_pm_pmu_unregister(pmu);
cpuhp_state_remove_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
}
@ -1094,10 +1323,6 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
pmu->event_unmapped = pmu_sbi_event_unmapped;
pmu->csr_index = pmu_sbi_csr_index;
ret = cpuhp_state_add_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
if (ret)
return ret;
ret = riscv_pm_pmu_register(pmu);
if (ret)
goto out_unregister;
@ -1106,8 +1331,34 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
if (ret)
goto out_unregister;
/* SBI PMU Snapsphot is only available in SBI v2.0 */
if (sbi_v2_available) {
ret = pmu_sbi_snapshot_alloc(pmu);
if (ret)
goto out_unregister;
ret = pmu_sbi_snapshot_setup(pmu, smp_processor_id());
if (ret) {
/* Snapshot is an optional feature. Continue if not available */
pmu_sbi_snapshot_free(pmu);
} else {
pr_info("SBI PMU snapshot detected\n");
/*
* We enable it once here for the boot cpu. If snapshot shmem setup
* fails during cpu hotplug process, it will fail to start the cpu
* as we can not handle hetergenous PMUs with different snapshot
* capability.
*/
static_branch_enable(&sbi_pmu_snapshot_available);
}
}
register_sysctl("kernel", sbi_pmu_sysctl_table);
ret = cpuhp_state_add_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
if (ret)
goto out_unregister;
return 0;
out_unregister:
@ -1135,6 +1386,9 @@ static int __init pmu_sbi_devinit(void)
return 0;
}
if (sbi_spec_version >= sbi_mk_version(2, 0))
sbi_v2_available = true;
ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_RISCV_STARTING,
"perf/riscv/pmu:starting",
pmu_sbi_starting_cpu, pmu_sbi_dying_cpu);

View File

@ -3042,12 +3042,9 @@ static void
nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
{
struct nfs4_client *clp = cb->cb_clp;
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
spin_lock(&nn->client_lock);
clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
put_client_renew_locked(clp);
spin_unlock(&nn->client_lock);
drop_client(clp);
}
static int
@ -6616,7 +6613,7 @@ deleg_reaper(struct nfsd_net *nn)
list_add(&clp->cl_ra_cblist, &cblist);
/* release in nfsd4_cb_recall_any_release */
atomic_inc(&clp->cl_rpc_users);
kref_get(&clp->cl_nfsdfs.cl_ref);
set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
clp->cl_ra_time = ktime_get_boottime_seconds();
}

View File

@ -417,6 +417,7 @@ smb2_close_cached_fid(struct kref *ref)
{
struct cached_fid *cfid = container_of(ref, struct cached_fid,
refcount);
int rc;
spin_lock(&cfid->cfids->cfid_list_lock);
if (cfid->on_list) {
@ -430,9 +431,10 @@ smb2_close_cached_fid(struct kref *ref)
cfid->dentry = NULL;
if (cfid->is_open) {
SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid,
rc = SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid,
cfid->fid.volatile_fid);
atomic_dec(&cfid->tcon->num_remote_opens);
if (rc != -EBUSY && rc != -EAGAIN)
atomic_dec(&cfid->tcon->num_remote_opens);
}
free_cached_dir(cfid);

View File

@ -250,6 +250,8 @@ static int cifs_debug_files_proc_show(struct seq_file *m, void *v)
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
if (cifs_ses_exiting(ses))
continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
spin_lock(&tcon->open_file_lock);
list_for_each_entry(cfile, &tcon->openFileList, tlist) {
@ -676,6 +678,8 @@ static ssize_t cifs_stats_proc_write(struct file *file,
}
#endif /* CONFIG_CIFS_STATS2 */
list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
if (cifs_ses_exiting(ses))
continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
atomic_set(&tcon->num_smbs_sent, 0);
spin_lock(&tcon->stat_lock);
@ -755,6 +759,8 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v)
}
#endif /* STATS2 */
list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
if (cifs_ses_exiting(ses))
continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
i++;
seq_printf(m, "\n%d) %s", i, tcon->tree_name);

View File

@ -156,6 +156,7 @@ struct workqueue_struct *decrypt_wq;
struct workqueue_struct *fileinfo_put_wq;
struct workqueue_struct *cifsoplockd_wq;
struct workqueue_struct *deferredclose_wq;
struct workqueue_struct *serverclose_wq;
__u32 cifs_lock_secret;
/*
@ -1888,6 +1889,13 @@ init_cifs(void)
goto out_destroy_cifsoplockd_wq;
}
serverclose_wq = alloc_workqueue("serverclose",
WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
if (!serverclose_wq) {
rc = -ENOMEM;
goto out_destroy_serverclose_wq;
}
rc = cifs_init_inodecache();
if (rc)
goto out_destroy_deferredclose_wq;
@ -1962,6 +1970,8 @@ out_destroy_decrypt_wq:
destroy_workqueue(decrypt_wq);
out_destroy_cifsiod_wq:
destroy_workqueue(cifsiod_wq);
out_destroy_serverclose_wq:
destroy_workqueue(serverclose_wq);
out_clean_proc:
cifs_proc_clean();
return rc;
@ -1991,6 +2001,7 @@ exit_cifs(void)
destroy_workqueue(cifsoplockd_wq);
destroy_workqueue(decrypt_wq);
destroy_workqueue(fileinfo_put_wq);
destroy_workqueue(serverclose_wq);
destroy_workqueue(cifsiod_wq);
cifs_proc_clean();
}

View File

@ -442,10 +442,10 @@ struct smb_version_operations {
/* set fid protocol-specific info */
void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32);
/* close a file */
void (*close)(const unsigned int, struct cifs_tcon *,
int (*close)(const unsigned int, struct cifs_tcon *,
struct cifs_fid *);
/* close a file, returning file attributes and timestamps */
void (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon,
int (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon,
struct cifsFileInfo *pfile_info);
/* send a flush request to the server */
int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *);
@ -1281,7 +1281,6 @@ struct cifs_tcon {
struct cached_fids *cfids;
/* BB add field for back pointer to sb struct(s)? */
#ifdef CONFIG_CIFS_DFS_UPCALL
struct list_head dfs_ses_list;
struct delayed_work dfs_cache_work;
#endif
struct delayed_work query_interfaces; /* query interfaces workqueue job */
@ -1440,6 +1439,7 @@ struct cifsFileInfo {
bool swapfile:1;
bool oplock_break_cancelled:1;
bool status_file_deleted:1; /* file has been deleted */
bool offload:1; /* offload final part of _put to a wq */
unsigned int oplock_epoch; /* epoch from the lease break */
__u32 oplock_level; /* oplock/lease level from the lease break */
int count;
@ -1448,6 +1448,7 @@ struct cifsFileInfo {
struct cifs_search_info srch_inf;
struct work_struct oplock_break; /* work for oplock breaks */
struct work_struct put; /* work for the final part of _put */
struct work_struct serverclose; /* work for serverclose */
struct delayed_work deferred;
bool deferred_close_scheduled; /* Flag to indicate close is scheduled */
char *symlink_target;
@ -1804,7 +1805,6 @@ struct cifs_mount_ctx {
struct TCP_Server_Info *server;
struct cifs_ses *ses;
struct cifs_tcon *tcon;
struct list_head dfs_ses_list;
};
static inline void __free_dfs_info_param(struct dfs_info3_param *param)
@ -2105,6 +2105,7 @@ extern struct workqueue_struct *decrypt_wq;
extern struct workqueue_struct *fileinfo_put_wq;
extern struct workqueue_struct *cifsoplockd_wq;
extern struct workqueue_struct *deferredclose_wq;
extern struct workqueue_struct *serverclose_wq;
extern __u32 cifs_lock_secret;
extern mempool_t *cifs_sm_req_poolp;
@ -2324,4 +2325,14 @@ struct smb2_compound_vars {
struct kvec ea_iov;
};
static inline bool cifs_ses_exiting(struct cifs_ses *ses)
{
bool ret;
spin_lock(&ses->ses_lock);
ret = ses->ses_status == SES_EXITING;
spin_unlock(&ses->ses_lock);
return ret;
}
#endif /* _CIFS_GLOB_H */

View File

@ -725,31 +725,31 @@ struct super_block *cifs_get_tcon_super(struct cifs_tcon *tcon);
void cifs_put_tcon_super(struct super_block *sb);
int cifs_wait_for_server_reconnect(struct TCP_Server_Info *server, bool retry);
/* Put references of @ses and @ses->dfs_root_ses */
/* Put references of @ses and its children */
static inline void cifs_put_smb_ses(struct cifs_ses *ses)
{
struct cifs_ses *rses = ses->dfs_root_ses;
struct cifs_ses *next;
__cifs_put_smb_ses(ses);
if (rses)
__cifs_put_smb_ses(rses);
do {
next = ses->dfs_root_ses;
__cifs_put_smb_ses(ses);
} while ((ses = next));
}
/* Get an active reference of @ses and @ses->dfs_root_ses.
/* Get an active reference of @ses and its children.
*
* NOTE: make sure to call this function when incrementing reference count of
* @ses to ensure that any DFS root session attached to it (@ses->dfs_root_ses)
* will also get its reference count incremented.
*
* cifs_put_smb_ses() will put both references, so call it when you're done.
* cifs_put_smb_ses() will put all references, so call it when you're done.
*/
static inline void cifs_smb_ses_inc_refcount(struct cifs_ses *ses)
{
lockdep_assert_held(&cifs_tcp_ses_lock);
ses->ses_count++;
if (ses->dfs_root_ses)
ses->dfs_root_ses->ses_count++;
for (; ses; ses = ses->dfs_root_ses)
ses->ses_count++;
}
static inline bool dfs_src_pathname_equal(const char *s1, const char *s2)

View File

@ -5854,10 +5854,8 @@ SetEARetry:
parm_data->list.EA_flags = 0;
/* we checked above that name len is less than 255 */
parm_data->list.name_len = (__u8)name_len;
/* EA names are always ASCII */
if (ea_name)
strncpy(parm_data->list.name, ea_name, name_len);
parm_data->list.name[name_len] = '\0';
/* EA names are always ASCII and NUL-terminated */
strscpy(parm_data->list.name, ea_name ?: "", name_len + 1);
parm_data->list.value_len = cpu_to_le16(ea_value_len);
/* caller ensures that ea_value_len is less than 64K but
we need to ensure that it fits within the smb */

View File

@ -175,6 +175,8 @@ cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server,
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
if (cifs_ses_exiting(ses))
continue;
spin_lock(&ses->chan_lock);
for (i = 0; i < ses->chan_count; i++) {
if (!ses->chans[i].server)
@ -232,7 +234,13 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server,
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry_safe(ses, nses, &pserver->smb_ses_list, smb_ses_list) {
/* check if iface is still active */
spin_lock(&ses->ses_lock);
if (ses->ses_status == SES_EXITING) {
spin_unlock(&ses->ses_lock);
continue;
}
spin_unlock(&ses->ses_lock);
spin_lock(&ses->chan_lock);
if (cifs_ses_get_chan_index(ses, server) ==
CIFS_INVAL_CHAN_INDEX) {
@ -1860,6 +1868,9 @@ static int match_session(struct cifs_ses *ses, struct smb3_fs_context *ctx)
ctx->sectype != ses->sectype)
return 0;
if (ctx->dfs_root_ses != ses->dfs_root_ses)
return 0;
/*
* If an existing session is limited to less channels than
* requested, it should not be reused
@ -1963,31 +1974,6 @@ out:
return rc;
}
/**
* cifs_free_ipc - helper to release the session IPC tcon
* @ses: smb session to unmount the IPC from
*
* Needs to be called everytime a session is destroyed.
*
* On session close, the IPC is closed and the server must release all tcons of the session.
* No need to send a tree disconnect here.
*
* Besides, it will make the server to not close durable and resilient files on session close, as
* specified in MS-SMB2 3.3.5.6 Receiving an SMB2 LOGOFF Request.
*/
static int
cifs_free_ipc(struct cifs_ses *ses)
{
struct cifs_tcon *tcon = ses->tcon_ipc;
if (tcon == NULL)
return 0;
tconInfoFree(tcon);
ses->tcon_ipc = NULL;
return 0;
}
static struct cifs_ses *
cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
{
@ -2019,48 +2005,52 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
void __cifs_put_smb_ses(struct cifs_ses *ses)
{
struct TCP_Server_Info *server = ses->server;
struct cifs_tcon *tcon;
unsigned int xid;
size_t i;
bool do_logoff;
int rc;
spin_lock(&ses->ses_lock);
if (ses->ses_status == SES_EXITING) {
spin_unlock(&ses->ses_lock);
return;
}
spin_unlock(&ses->ses_lock);
cifs_dbg(FYI, "%s: ses_count=%d\n", __func__, ses->ses_count);
cifs_dbg(FYI,
"%s: ses ipc: %s\n", __func__, ses->tcon_ipc ? ses->tcon_ipc->tree_name : "NONE");
spin_lock(&cifs_tcp_ses_lock);
if (--ses->ses_count > 0) {
spin_lock(&ses->ses_lock);
cifs_dbg(FYI, "%s: id=0x%llx ses_count=%d ses_status=%u ipc=%s\n",
__func__, ses->Suid, ses->ses_count, ses->ses_status,
ses->tcon_ipc ? ses->tcon_ipc->tree_name : "none");
if (ses->ses_status == SES_EXITING || --ses->ses_count > 0) {
spin_unlock(&ses->ses_lock);
spin_unlock(&cifs_tcp_ses_lock);
return;
}
spin_lock(&ses->ses_lock);
if (ses->ses_status == SES_GOOD)
ses->ses_status = SES_EXITING;
spin_unlock(&ses->ses_lock);
spin_unlock(&cifs_tcp_ses_lock);
/* ses_count can never go negative */
WARN_ON(ses->ses_count < 0);
spin_lock(&ses->ses_lock);
if (ses->ses_status == SES_EXITING && server->ops->logoff) {
spin_unlock(&ses->ses_lock);
cifs_free_ipc(ses);
spin_lock(&ses->chan_lock);
cifs_chan_clear_need_reconnect(ses, server);
spin_unlock(&ses->chan_lock);
do_logoff = ses->ses_status == SES_GOOD && server->ops->logoff;
ses->ses_status = SES_EXITING;
tcon = ses->tcon_ipc;
ses->tcon_ipc = NULL;
spin_unlock(&ses->ses_lock);
spin_unlock(&cifs_tcp_ses_lock);
/*
* On session close, the IPC is closed and the server must release all
* tcons of the session. No need to send a tree disconnect here.
*
* Besides, it will make the server to not close durable and resilient
* files on session close, as specified in MS-SMB2 3.3.5.6 Receiving an
* SMB2 LOGOFF Request.
*/
tconInfoFree(tcon);
if (do_logoff) {
xid = get_xid();
rc = server->ops->logoff(xid, ses);
if (rc)
cifs_server_dbg(VFS, "%s: Session Logoff failure rc=%d\n",
__func__, rc);
_free_xid(xid);
} else {
spin_unlock(&ses->ses_lock);
cifs_free_ipc(ses);
}
spin_lock(&cifs_tcp_ses_lock);
@ -2373,9 +2363,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
* need to lock before changing something in the session.
*/
spin_lock(&cifs_tcp_ses_lock);
if (ctx->dfs_root_ses)
cifs_smb_ses_inc_refcount(ctx->dfs_root_ses);
ses->dfs_root_ses = ctx->dfs_root_ses;
if (ses->dfs_root_ses)
ses->dfs_root_ses->ses_count++;
list_add(&ses->smb_ses_list, &server->smb_ses_list);
spin_unlock(&cifs_tcp_ses_lock);
@ -3326,6 +3316,9 @@ void cifs_mount_put_conns(struct cifs_mount_ctx *mnt_ctx)
cifs_put_smb_ses(mnt_ctx->ses);
else if (mnt_ctx->server)
cifs_put_tcp_session(mnt_ctx->server, 0);
mnt_ctx->ses = NULL;
mnt_ctx->tcon = NULL;
mnt_ctx->server = NULL;
mnt_ctx->cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_POSIX_PATHS;
free_xid(mnt_ctx->xid);
}
@ -3604,8 +3597,6 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
bool isdfs;
int rc;
INIT_LIST_HEAD(&mnt_ctx.dfs_ses_list);
rc = dfs_mount_share(&mnt_ctx, &isdfs);
if (rc)
goto error;
@ -3636,7 +3627,6 @@ out:
return rc;
error:
dfs_put_root_smb_sessions(&mnt_ctx.dfs_ses_list);
cifs_mount_put_conns(&mnt_ctx);
return rc;
}
@ -3651,6 +3641,18 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
goto error;
rc = cifs_mount_get_tcon(&mnt_ctx);
if (!rc) {
/*
* Prevent superblock from being created with any missing
* connections.
*/
if (WARN_ON(!mnt_ctx.server))
rc = -EHOSTDOWN;
else if (WARN_ON(!mnt_ctx.ses))
rc = -EACCES;
else if (WARN_ON(!mnt_ctx.tcon))
rc = -ENOENT;
}
if (rc)
goto error;
@ -3988,13 +3990,14 @@ cifs_set_vol_auth(struct smb3_fs_context *ctx, struct cifs_ses *ses)
}
static struct cifs_tcon *
cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
__cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
{
int rc;
struct cifs_tcon *master_tcon = cifs_sb_master_tcon(cifs_sb);
struct cifs_ses *ses;
struct cifs_tcon *tcon = NULL;
struct smb3_fs_context *ctx;
char *origin_fullpath = NULL;
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (ctx == NULL)
@ -4018,6 +4021,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
ctx->sign = master_tcon->ses->sign;
ctx->seal = master_tcon->seal;
ctx->witness = master_tcon->use_witness;
ctx->dfs_root_ses = master_tcon->ses->dfs_root_ses;
rc = cifs_set_vol_auth(ctx, master_tcon->ses);
if (rc) {
@ -4037,12 +4041,39 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
goto out;
}
#ifdef CONFIG_CIFS_DFS_UPCALL
spin_lock(&master_tcon->tc_lock);
if (master_tcon->origin_fullpath) {
spin_unlock(&master_tcon->tc_lock);
origin_fullpath = dfs_get_path(cifs_sb, cifs_sb->ctx->source);
if (IS_ERR(origin_fullpath)) {
tcon = ERR_CAST(origin_fullpath);
origin_fullpath = NULL;
cifs_put_smb_ses(ses);
goto out;
}
} else {
spin_unlock(&master_tcon->tc_lock);
}
#endif
tcon = cifs_get_tcon(ses, ctx);
if (IS_ERR(tcon)) {
cifs_put_smb_ses(ses);
goto out;
}
#ifdef CONFIG_CIFS_DFS_UPCALL
if (origin_fullpath) {
spin_lock(&tcon->tc_lock);
tcon->origin_fullpath = origin_fullpath;
spin_unlock(&tcon->tc_lock);
origin_fullpath = NULL;
queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work,
dfs_cache_get_ttl() * HZ);
}
#endif
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
if (cap_unix(ses))
reset_cifs_unix_caps(0, tcon, NULL, ctx);
@ -4051,11 +4082,23 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
out:
kfree(ctx->username);
kfree_sensitive(ctx->password);
kfree(origin_fullpath);
kfree(ctx);
return tcon;
}
static struct cifs_tcon *
cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
{
struct cifs_tcon *ret;
cifs_mount_lock();
ret = __cifs_construct_tcon(cifs_sb, fsuid);
cifs_mount_unlock();
return ret;
}
struct cifs_tcon *
cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb)
{

View File

@ -66,33 +66,20 @@ static int get_session(struct cifs_mount_ctx *mnt_ctx, const char *full_path)
}
/*
* Track individual DFS referral servers used by new DFS mount.
*
* On success, their lifetime will be shared by final tcon (dfs_ses_list).
* Otherwise, they will be put by dfs_put_root_smb_sessions() in cifs_mount().
* Get an active reference of @ses so that next call to cifs_put_tcon() won't
* release it as any new DFS referrals must go through its IPC tcon.
*/
static int add_root_smb_session(struct cifs_mount_ctx *mnt_ctx)
static void add_root_smb_session(struct cifs_mount_ctx *mnt_ctx)
{
struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
struct dfs_root_ses *root_ses;
struct cifs_ses *ses = mnt_ctx->ses;
if (ses) {
root_ses = kmalloc(sizeof(*root_ses), GFP_KERNEL);
if (!root_ses)
return -ENOMEM;
INIT_LIST_HEAD(&root_ses->list);
spin_lock(&cifs_tcp_ses_lock);
cifs_smb_ses_inc_refcount(ses);
spin_unlock(&cifs_tcp_ses_lock);
root_ses->ses = ses;
list_add_tail(&root_ses->list, &mnt_ctx->dfs_ses_list);
}
/* Select new DFS referral server so that new referrals go through it */
ctx->dfs_root_ses = ses;
return 0;
}
static inline int parse_dfs_target(struct smb3_fs_context *ctx,
@ -185,11 +172,8 @@ again:
continue;
}
if (is_refsrv) {
rc = add_root_smb_session(mnt_ctx);
if (rc)
goto out;
}
if (is_refsrv)
add_root_smb_session(mnt_ctx);
rc = ref_walk_advance(rw);
if (!rc) {
@ -232,6 +216,7 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx)
struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
struct cifs_tcon *tcon;
char *origin_fullpath;
bool new_tcon = true;
int rc;
origin_fullpath = dfs_get_path(cifs_sb, ctx->source);
@ -239,6 +224,18 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx)
return PTR_ERR(origin_fullpath);
rc = dfs_referral_walk(mnt_ctx);
if (!rc) {
/*
* Prevent superblock from being created with any missing
* connections.
*/
if (WARN_ON(!mnt_ctx->server))
rc = -EHOSTDOWN;
else if (WARN_ON(!mnt_ctx->ses))
rc = -EACCES;
else if (WARN_ON(!mnt_ctx->tcon))
rc = -ENOENT;
}
if (rc)
goto out;
@ -247,15 +244,14 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx)
if (!tcon->origin_fullpath) {
tcon->origin_fullpath = origin_fullpath;
origin_fullpath = NULL;
} else {
new_tcon = false;
}
spin_unlock(&tcon->tc_lock);
if (list_empty(&tcon->dfs_ses_list)) {
list_replace_init(&mnt_ctx->dfs_ses_list, &tcon->dfs_ses_list);
if (new_tcon) {
queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work,
dfs_cache_get_ttl() * HZ);
} else {
dfs_put_root_smb_sessions(&mnt_ctx->dfs_ses_list);
}
out:
@ -298,7 +294,6 @@ int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs)
if (rc)
return rc;
ctx->dfs_root_ses = mnt_ctx->ses;
/*
* If called with 'nodfs' mount option, then skip DFS resolving. Otherwise unconditionally
* try to get an DFS referral (even cached) to determine whether it is an DFS mount.
@ -324,7 +319,9 @@ int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs)
*isdfs = true;
add_root_smb_session(mnt_ctx);
return __dfs_mount_share(mnt_ctx);
rc = __dfs_mount_share(mnt_ctx);
dfs_put_root_smb_sessions(mnt_ctx);
return rc;
}
/* Update dfs referral path of superblock */

View File

@ -7,7 +7,9 @@
#define _CIFS_DFS_H
#include "cifsglob.h"
#include "cifsproto.h"
#include "fs_context.h"
#include "dfs_cache.h"
#include "cifs_unicode.h"
#include <linux/namei.h>
@ -114,11 +116,6 @@ static inline void ref_walk_set_tgt_hint(struct dfs_ref_walk *rw)
ref_walk_tit(rw));
}
struct dfs_root_ses {
struct list_head list;
struct cifs_ses *ses;
};
int dfs_parse_target_referral(const char *full_path, const struct dfs_info3_param *ref,
struct smb3_fs_context *ctx);
int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs);
@ -133,20 +130,32 @@ static inline int dfs_get_referral(struct cifs_mount_ctx *mnt_ctx, const char *p
{
struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb;
struct cifs_ses *rses = ctx->dfs_root_ses ?: mnt_ctx->ses;
return dfs_cache_find(mnt_ctx->xid, ctx->dfs_root_ses, cifs_sb->local_nls,
return dfs_cache_find(mnt_ctx->xid, rses, cifs_sb->local_nls,
cifs_remap(cifs_sb), path, ref, tl);
}
static inline void dfs_put_root_smb_sessions(struct list_head *head)
/*
* cifs_get_smb_ses() already guarantees an active reference of
* @ses->dfs_root_ses when a new session is created, so we need to put extra
* references of all DFS root sessions that were used across the mount process
* in dfs_mount_share().
*/
static inline void dfs_put_root_smb_sessions(struct cifs_mount_ctx *mnt_ctx)
{
struct dfs_root_ses *root, *tmp;
const struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
struct cifs_ses *ses = ctx->dfs_root_ses;
struct cifs_ses *cur;
list_for_each_entry_safe(root, tmp, head, list) {
list_del_init(&root->list);
cifs_put_smb_ses(root->ses);
kfree(root);
if (!ses)
return;
for (cur = ses; cur; cur = cur->dfs_root_ses) {
if (cur->dfs_root_ses)
cifs_put_smb_ses(cur->dfs_root_ses);
}
cifs_put_smb_ses(ses);
}
#endif /* _CIFS_DFS_H */

View File

@ -1172,8 +1172,8 @@ static bool is_ses_good(struct cifs_ses *ses)
return ret;
}
/* Refresh dfs referral of tcon and mark it for reconnect if needed */
static int __refresh_tcon(const char *path, struct cifs_ses *ses, bool force_refresh)
/* Refresh dfs referral of @ses and mark it for reconnect if needed */
static void __refresh_ses_referral(struct cifs_ses *ses, bool force_refresh)
{
struct TCP_Server_Info *server = ses->server;
DFS_CACHE_TGT_LIST(old_tl);
@ -1181,10 +1181,21 @@ static int __refresh_tcon(const char *path, struct cifs_ses *ses, bool force_ref
bool needs_refresh = false;
struct cache_entry *ce;
unsigned int xid;
char *path = NULL;
int rc = 0;
xid = get_xid();
mutex_lock(&server->refpath_lock);
if (server->leaf_fullpath) {
path = kstrdup(server->leaf_fullpath + 1, GFP_ATOMIC);
if (!path)
rc = -ENOMEM;
}
mutex_unlock(&server->refpath_lock);
if (!path)
goto out;
down_read(&htable_rw_lock);
ce = lookup_cache_entry(path);
needs_refresh = force_refresh || IS_ERR(ce) || cache_entry_expired(ce);
@ -1218,19 +1229,17 @@ out:
free_xid(xid);
dfs_cache_free_tgts(&old_tl);
dfs_cache_free_tgts(&new_tl);
return rc;
kfree(path);
}
static int refresh_tcon(struct cifs_tcon *tcon, bool force_refresh)
static inline void refresh_ses_referral(struct cifs_ses *ses)
{
struct TCP_Server_Info *server = tcon->ses->server;
struct cifs_ses *ses = tcon->ses;
__refresh_ses_referral(ses, false);
}
mutex_lock(&server->refpath_lock);
if (server->leaf_fullpath)
__refresh_tcon(server->leaf_fullpath + 1, ses, force_refresh);
mutex_unlock(&server->refpath_lock);
return 0;
static inline void force_refresh_ses_referral(struct cifs_ses *ses)
{
__refresh_ses_referral(ses, true);
}
/**
@ -1271,34 +1280,20 @@ int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb)
*/
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
return refresh_tcon(tcon, true);
force_refresh_ses_referral(tcon->ses);
return 0;
}
/* Refresh all DFS referrals related to DFS tcon */
void dfs_cache_refresh(struct work_struct *work)
{
struct TCP_Server_Info *server;
struct dfs_root_ses *rses;
struct cifs_tcon *tcon;
struct cifs_ses *ses;
tcon = container_of(work, struct cifs_tcon, dfs_cache_work.work);
ses = tcon->ses;
server = ses->server;
mutex_lock(&server->refpath_lock);
if (server->leaf_fullpath)
__refresh_tcon(server->leaf_fullpath + 1, ses, false);
mutex_unlock(&server->refpath_lock);
list_for_each_entry(rses, &tcon->dfs_ses_list, list) {
ses = rses->ses;
server = ses->server;
mutex_lock(&server->refpath_lock);
if (server->leaf_fullpath)
__refresh_tcon(server->leaf_fullpath + 1, ses, false);
mutex_unlock(&server->refpath_lock);
}
for (ses = tcon->ses; ses; ses = ses->dfs_root_ses)
refresh_ses_referral(ses);
queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work,
atomic_read(&dfs_cache_ttl) * HZ);

View File

@ -189,6 +189,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
int disposition;
struct TCP_Server_Info *server = tcon->ses->server;
struct cifs_open_parms oparms;
int rdwr_for_fscache = 0;
*oplock = 0;
if (tcon->ses->server->oplocks)
@ -200,6 +201,10 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
return PTR_ERR(full_path);
}
/* If we're caching, we need to be able to fill in around partial writes. */
if (cifs_fscache_enabled(inode) && (oflags & O_ACCMODE) == O_WRONLY)
rdwr_for_fscache = 1;
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
if (tcon->unix_ext && cap_unix(tcon->ses) && !tcon->broken_posix_open &&
(CIFS_UNIX_POSIX_PATH_OPS_CAP &
@ -276,6 +281,8 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
desired_access |= GENERIC_READ; /* is this too little? */
if (OPEN_FMODE(oflags) & FMODE_WRITE)
desired_access |= GENERIC_WRITE;
if (rdwr_for_fscache == 1)
desired_access |= GENERIC_READ;
disposition = FILE_OVERWRITE_IF;
if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
@ -304,6 +311,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
if (!tcon->unix_ext && (mode & S_IWUGO) == 0)
create_options |= CREATE_OPTION_READONLY;
retry_open:
oparms = (struct cifs_open_parms) {
.tcon = tcon,
.cifs_sb = cifs_sb,
@ -317,8 +325,15 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
rc = server->ops->open(xid, &oparms, oplock, buf);
if (rc) {
cifs_dbg(FYI, "cifs_create returned 0x%x\n", rc);
if (rc == -EACCES && rdwr_for_fscache == 1) {
desired_access &= ~GENERIC_READ;
rdwr_for_fscache = 2;
goto retry_open;
}
goto out;
}
if (rdwr_for_fscache == 2)
cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
/*

View File

@ -206,12 +206,12 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
*/
}
static inline int cifs_convert_flags(unsigned int flags)
static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache)
{
if ((flags & O_ACCMODE) == O_RDONLY)
return GENERIC_READ;
else if ((flags & O_ACCMODE) == O_WRONLY)
return GENERIC_WRITE;
return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE;
else if ((flags & O_ACCMODE) == O_RDWR) {
/* GENERIC_ALL is too much permission to request
can cause unnecessary access denied on create */
@ -348,11 +348,16 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_
int create_options = CREATE_NOT_DIR;
struct TCP_Server_Info *server = tcon->ses->server;
struct cifs_open_parms oparms;
int rdwr_for_fscache = 0;
if (!server->ops->open)
return -ENOSYS;
desired_access = cifs_convert_flags(f_flags);
/* If we're caching, we need to be able to fill in around partial writes. */
if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY)
rdwr_for_fscache = 1;
desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache);
/*********************************************************************
* open flag mapping table:
@ -389,6 +394,7 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_
if (f_flags & O_DIRECT)
create_options |= CREATE_NO_BUFFER;
retry_open:
oparms = (struct cifs_open_parms) {
.tcon = tcon,
.cifs_sb = cifs_sb,
@ -400,8 +406,16 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_
};
rc = server->ops->open(xid, &oparms, oplock, buf);
if (rc)
if (rc) {
if (rc == -EACCES && rdwr_for_fscache == 1) {
desired_access = cifs_convert_flags(f_flags, 0);
rdwr_for_fscache = 2;
goto retry_open;
}
return rc;
}
if (rdwr_for_fscache == 2)
cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
/* TODO: Add support for calling posix query info but with passing in fid */
if (tcon->unix_ext)
@ -445,6 +459,7 @@ cifs_down_write(struct rw_semaphore *sem)
}
static void cifsFileInfo_put_work(struct work_struct *work);
void serverclose_work(struct work_struct *work);
struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
struct tcon_link *tlink, __u32 oplock,
@ -491,6 +506,7 @@ struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
cfile->tlink = cifs_get_tlink(tlink);
INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
INIT_WORK(&cfile->put, cifsFileInfo_put_work);
INIT_WORK(&cfile->serverclose, serverclose_work);
INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
mutex_init(&cfile->fh_mutex);
spin_lock_init(&cfile->file_info_lock);
@ -582,6 +598,40 @@ static void cifsFileInfo_put_work(struct work_struct *work)
cifsFileInfo_put_final(cifs_file);
}
void serverclose_work(struct work_struct *work)
{
struct cifsFileInfo *cifs_file = container_of(work,
struct cifsFileInfo, serverclose);
struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
struct TCP_Server_Info *server = tcon->ses->server;
int rc = 0;
int retries = 0;
int MAX_RETRIES = 4;
do {
if (server->ops->close_getattr)
rc = server->ops->close_getattr(0, tcon, cifs_file);
else if (server->ops->close)
rc = server->ops->close(0, tcon, &cifs_file->fid);
if (rc == -EBUSY || rc == -EAGAIN) {
retries++;
msleep(250);
}
} while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES)
);
if (retries == MAX_RETRIES)
pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES);
if (cifs_file->offload)
queue_work(fileinfo_put_wq, &cifs_file->put);
else
cifsFileInfo_put_final(cifs_file);
}
/**
* cifsFileInfo_put - release a reference of file priv data
*
@ -622,10 +672,13 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
struct cifs_fid fid = {};
struct cifs_pending_open open;
bool oplock_break_cancelled;
bool serverclose_offloaded = false;
spin_lock(&tcon->open_file_lock);
spin_lock(&cifsi->open_file_lock);
spin_lock(&cifs_file->file_info_lock);
cifs_file->offload = offload;
if (--cifs_file->count > 0) {
spin_unlock(&cifs_file->file_info_lock);
spin_unlock(&cifsi->open_file_lock);
@ -667,13 +720,20 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
struct TCP_Server_Info *server = tcon->ses->server;
unsigned int xid;
int rc = 0;
xid = get_xid();
if (server->ops->close_getattr)
server->ops->close_getattr(xid, tcon, cifs_file);
rc = server->ops->close_getattr(xid, tcon, cifs_file);
else if (server->ops->close)
server->ops->close(xid, tcon, &cifs_file->fid);
rc = server->ops->close(xid, tcon, &cifs_file->fid);
_free_xid(xid);
if (rc == -EBUSY || rc == -EAGAIN) {
// Server close failed, hence offloading it as an async op
queue_work(serverclose_wq, &cifs_file->serverclose);
serverclose_offloaded = true;
}
}
if (oplock_break_cancelled)
@ -681,10 +741,15 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
cifs_del_pending_open(&open);
if (offload)
queue_work(fileinfo_put_wq, &cifs_file->put);
else
cifsFileInfo_put_final(cifs_file);
// if serverclose has been offloaded to wq (on failure), it will
// handle offloading put as well. If serverclose not offloaded,
// we need to handle offloading put here.
if (!serverclose_offloaded) {
if (offload)
queue_work(fileinfo_put_wq, &cifs_file->put);
else
cifsFileInfo_put_final(cifs_file);
}
}
int cifs_open(struct inode *inode, struct file *file)
@ -834,11 +899,11 @@ int cifs_open(struct inode *inode, struct file *file)
use_cache:
fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
file->f_mode & FMODE_WRITE);
if (file->f_flags & O_DIRECT &&
(!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
file->f_flags & O_APPEND))
cifs_invalidate_cache(file_inode(file),
FSCACHE_INVAL_DIO_WRITE);
if (!(file->f_flags & O_DIRECT))
goto out;
if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY)
goto out;
cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE);
out:
free_dentry_path(page);
@ -903,6 +968,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
int disposition = FILE_OPEN;
int create_options = CREATE_NOT_DIR;
struct cifs_open_parms oparms;
int rdwr_for_fscache = 0;
xid = get_xid();
mutex_lock(&cfile->fh_mutex);
@ -966,7 +1032,11 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
}
#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
desired_access = cifs_convert_flags(cfile->f_flags);
/* If we're caching, we need to be able to fill in around partial writes. */
if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY)
rdwr_for_fscache = 1;
desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache);
/* O_SYNC also has bit for O_DSYNC so following check picks up either */
if (cfile->f_flags & O_SYNC)
@ -978,6 +1048,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
if (server->ops->get_lease_key)
server->ops->get_lease_key(inode, &cfile->fid);
retry_open:
oparms = (struct cifs_open_parms) {
.tcon = tcon,
.cifs_sb = cifs_sb,
@ -1003,6 +1074,11 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
/* indicate that we need to relock the file */
oparms.reconnect = true;
}
if (rc == -EACCES && rdwr_for_fscache == 1) {
desired_access = cifs_convert_flags(cfile->f_flags, 0);
rdwr_for_fscache = 2;
goto retry_open;
}
if (rc) {
mutex_unlock(&cfile->fh_mutex);
@ -1011,6 +1087,9 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
goto reopen_error_exit;
}
if (rdwr_for_fscache == 2)
cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
reopen_success:
#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */

View File

@ -37,7 +37,7 @@
#include "rfc1002pdu.h"
#include "fs_context.h"
static DEFINE_MUTEX(cifs_mount_mutex);
DEFINE_MUTEX(cifs_mount_mutex);
static const match_table_t cifs_smb_version_tokens = {
{ Smb_1, SMB1_VERSION_STRING },
@ -783,9 +783,9 @@ static int smb3_get_tree(struct fs_context *fc)
if (err)
return err;
mutex_lock(&cifs_mount_mutex);
cifs_mount_lock();
ret = smb3_get_tree_common(fc);
mutex_unlock(&cifs_mount_mutex);
cifs_mount_unlock();
return ret;
}

View File

@ -304,4 +304,16 @@ extern void smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb);
#define MAX_CACHED_FIDS 16
extern char *cifs_sanitize_prepath(char *prepath, gfp_t gfp);
extern struct mutex cifs_mount_mutex;
static inline void cifs_mount_lock(void)
{
mutex_lock(&cifs_mount_mutex);
}
static inline void cifs_mount_unlock(void)
{
mutex_unlock(&cifs_mount_mutex);
}
#endif

View File

@ -109,6 +109,11 @@ static inline void cifs_readahead_to_fscache(struct inode *inode,
__cifs_readahead_to_fscache(inode, pos, len);
}
static inline bool cifs_fscache_enabled(struct inode *inode)
{
return fscache_cookie_enabled(cifs_inode_cookie(inode));
}
#else /* CONFIG_CIFS_FSCACHE */
static inline
void cifs_fscache_fill_coherency(struct inode *inode,
@ -124,6 +129,7 @@ static inline void cifs_fscache_release_inode_cookie(struct inode *inode) {}
static inline void cifs_fscache_unuse_inode_cookie(struct inode *inode, bool update) {}
static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode) { return NULL; }
static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags) {}
static inline bool cifs_fscache_enabled(struct inode *inode) { return false; }
static inline int cifs_fscache_query_occupancy(struct inode *inode,
pgoff_t first, unsigned int nr_pages,

View File

@ -247,7 +247,9 @@ static int cifs_dump_full_key(struct cifs_tcon *tcon, struct smb3_full_key_debug
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(server_it, &cifs_tcp_ses_list, tcp_ses_list) {
list_for_each_entry(ses_it, &server_it->smb_ses_list, smb_ses_list) {
if (ses_it->Suid == out.session_id) {
spin_lock(&ses_it->ses_lock);
if (ses_it->ses_status != SES_EXITING &&
ses_it->Suid == out.session_id) {
ses = ses_it;
/*
* since we are using the session outside the crit
@ -255,9 +257,11 @@ static int cifs_dump_full_key(struct cifs_tcon *tcon, struct smb3_full_key_debug
* so increment its refcount
*/
cifs_smb_ses_inc_refcount(ses);
spin_unlock(&ses_it->ses_lock);
found = true;
goto search_end;
}
spin_unlock(&ses_it->ses_lock);
}
}
search_end:

View File

@ -138,9 +138,6 @@ tcon_info_alloc(bool dir_leases_enabled)
atomic_set(&ret_buf->num_local_opens, 0);
atomic_set(&ret_buf->num_remote_opens, 0);
ret_buf->stats_from_time = ktime_get_real_seconds();
#ifdef CONFIG_CIFS_DFS_UPCALL
INIT_LIST_HEAD(&ret_buf->dfs_ses_list);
#endif
return ret_buf;
}
@ -156,9 +153,6 @@ tconInfoFree(struct cifs_tcon *tcon)
atomic_dec(&tconInfoAllocCount);
kfree(tcon->nativeFileSystem);
kfree_sensitive(tcon->password);
#ifdef CONFIG_CIFS_DFS_UPCALL
dfs_put_root_smb_sessions(&tcon->dfs_ses_list);
#endif
kfree(tcon->origin_fullpath);
kfree(tcon);
}
@ -487,6 +481,8 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
/* look up tcon based on tid & uid */
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
if (cifs_ses_exiting(ses))
continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
if (tcon->tid != buf->Tid)
continue;

View File

@ -753,11 +753,11 @@ cifs_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode);
}
static void
static int
cifs_close_file(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_fid *fid)
{
CIFSSMBClose(xid, tcon, fid->netfid);
return CIFSSMBClose(xid, tcon, fid->netfid);
}
static int

View File

@ -622,6 +622,8 @@ smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server)
/* look up tcon based on tid & uid */
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
if (cifs_ses_exiting(ses))
continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
spin_lock(&tcon->open_file_lock);
cifs_stats_inc(
@ -697,6 +699,8 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
/* look up tcon based on tid & uid */
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
if (cifs_ses_exiting(ses))
continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
spin_lock(&tcon->open_file_lock);

View File

@ -1412,14 +1412,14 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
memcpy(cfile->fid.create_guid, fid->create_guid, 16);
}
static void
static int
smb2_close_file(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_fid *fid)
{
SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
return SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
}
static void
static int
smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon,
struct cifsFileInfo *cfile)
{
@ -1430,7 +1430,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon,
rc = __SMB2_close(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid, &file_inf);
if (rc)
return;
return rc;
inode = d_inode(cfile->dentry);
@ -1459,6 +1459,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon,
/* End of file and Attributes should not have to be updated on close */
spin_unlock(&inode->i_lock);
return rc;
}
static int
@ -2480,6 +2481,8 @@ smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server)
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
if (cifs_ses_exiting(ses))
continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
if (tcon->tid == le32_to_cpu(shdr->Id.SyncId.TreeId)) {
spin_lock(&tcon->tc_lock);
@ -3913,7 +3916,7 @@ smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
strcat(message, "W");
}
if (!new_oplock)
strncpy(message, "None", sizeof(message));
strscpy(message, "None");
cinode->oplock = new_oplock;
cifs_dbg(FYI, "%s Lease granted on inode %p\n", message,

View File

@ -3628,9 +3628,9 @@ replay_again:
memcpy(&pbuf->network_open_info,
&rsp->network_open_info,
sizeof(pbuf->network_open_info));
atomic_dec(&tcon->num_remote_opens);
}
atomic_dec(&tcon->num_remote_opens);
close_exit:
SMB2_close_free(&rqst);
free_rsp_buf(resp_buftype, rsp);

View File

@ -659,7 +659,7 @@ smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server)
}
spin_unlock(&server->srv_lock);
if (!is_binding && !server->session_estab) {
strncpy(shdr->Signature, "BSRSPYL", 8);
strscpy(shdr->Signature, "BSRSPYL");
return 0;
}

View File

@ -1301,8 +1301,19 @@ xfs_link(
*/
if (unlikely((tdp->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
tdp->i_projid != sip->i_projid)) {
error = -EXDEV;
goto error_return;
/*
* Project quota setup skips special files which can
* leave inodes in a PROJINHERIT directory without a
* project ID set. We need to allow links to be made
* to these "project-less" inodes because userspace
* expects them to succeed after project ID setup,
* but everything else should be rejected.
*/
if (!special_file(VFS_I(sip)->i_mode) ||
sip->i_projid != 0) {
error = -EXDEV;
goto error_return;
}
}
if (!resblks) {

View File

@ -39,6 +39,14 @@ struct cpu_hw_events {
DECLARE_BITMAP(used_hw_ctrs, RISCV_MAX_COUNTERS);
/* currently enabled firmware counters */
DECLARE_BITMAP(used_fw_ctrs, RISCV_MAX_COUNTERS);
/* The virtual address of the shared memory where counter snapshot will be taken */
void *snapshot_addr;
/* The physical address of the shared memory where counter snapshot will be taken */
phys_addr_t snapshot_addr_phys;
/* Boolean flag to indicate setup is already done */
bool snapshot_set_done;
/* A shadow copy of the counter values to avoid clobbering during multiple SBI calls */
u64 snapshot_cval_shcopy[RISCV_MAX_COUNTERS];
};
struct riscv_pmu {

View File

@ -22,7 +22,7 @@
*
* @read: returns the current cycle value
* @mask: bitmask for two's complement
* subtraction of non 64 bit counters,
* subtraction of non-64-bit counters,
* see CYCLECOUNTER_MASK() helper macro
* @mult: cycle to nanosecond multiplier
* @shift: cycle to nanosecond divisor (power of two)
@ -35,7 +35,7 @@ struct cyclecounter {
};
/**
* struct timecounter - layer above a %struct cyclecounter which counts nanoseconds
* struct timecounter - layer above a &struct cyclecounter which counts nanoseconds
* Contains the state needed by timecounter_read() to detect
* cycle counter wrap around. Initialize with
* timecounter_init(). Also used to convert cycle counts into the
@ -66,6 +66,8 @@ struct timecounter {
* @cycles: Cycles
* @mask: bit mask for maintaining the 'frac' field
* @frac: pointer to storage for the fractional nanoseconds.
*
* Returns: cycle counter cycles converted to nanoseconds
*/
static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc,
u64 cycles, u64 mask, u64 *frac)
@ -79,6 +81,7 @@ static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc,
/**
* timecounter_adjtime - Shifts the time of the clock.
* @tc: The &struct timecounter to adjust
* @delta: Desired change in nanoseconds.
*/
static inline void timecounter_adjtime(struct timecounter *tc, s64 delta)
@ -107,6 +110,8 @@ extern void timecounter_init(struct timecounter *tc,
*
* In other words, keeps track of time since the same epoch as
* the function which generated the initial time stamp.
*
* Returns: nanoseconds since the initial time stamp
*/
extern u64 timecounter_read(struct timecounter *tc);
@ -123,6 +128,8 @@ extern u64 timecounter_read(struct timecounter *tc);
*
* This allows conversion of cycle counter values which were generated
* in the past.
*
* Returns: cycle counter converted to nanoseconds since the initial time stamp
*/
extern u64 timecounter_cyc2time(const struct timecounter *tc,
u64 cycle_tstamp);

View File

@ -22,14 +22,14 @@ extern int do_sys_settimeofday64(const struct timespec64 *tv,
const struct timezone *tz);
/*
* ktime_get() family: read the current time in a multitude of ways,
* ktime_get() family - read the current time in a multitude of ways.
*
* The default time reference is CLOCK_MONOTONIC, starting at
* boot time but not counting the time spent in suspend.
* For other references, use the functions with "real", "clocktai",
* "boottime" and "raw" suffixes.
*
* To get the time in a different format, use the ones wit
* To get the time in a different format, use the ones with
* "ns", "ts64" and "seconds" suffix.
*
* See Documentation/core-api/timekeeping.rst for more details.
@ -74,6 +74,8 @@ extern u32 ktime_get_resolution_ns(void);
/**
* ktime_get_real - get the real (wall-) time in ktime_t format
*
* Returns: real (wall) time in ktime_t format
*/
static inline ktime_t ktime_get_real(void)
{
@ -86,10 +88,12 @@ static inline ktime_t ktime_get_coarse_real(void)
}
/**
* ktime_get_boottime - Returns monotonic time since boot in ktime_t format
* ktime_get_boottime - Get monotonic time since boot in ktime_t format
*
* This is similar to CLOCK_MONTONIC/ktime_get, but also includes the
* time spent in suspend.
*
* Returns: monotonic time since boot in ktime_t format
*/
static inline ktime_t ktime_get_boottime(void)
{
@ -102,7 +106,9 @@ static inline ktime_t ktime_get_coarse_boottime(void)
}
/**
* ktime_get_clocktai - Returns the TAI time of day in ktime_t format
* ktime_get_clocktai - Get the TAI time of day in ktime_t format
*
* Returns: the TAI time of day in ktime_t format
*/
static inline ktime_t ktime_get_clocktai(void)
{
@ -144,32 +150,60 @@ static inline u64 ktime_get_coarse_clocktai_ns(void)
/**
* ktime_mono_to_real - Convert monotonic time to clock realtime
* @mono: monotonic time to convert
*
* Returns: time converted to realtime clock
*/
static inline ktime_t ktime_mono_to_real(ktime_t mono)
{
return ktime_mono_to_any(mono, TK_OFFS_REAL);
}
/**
* ktime_get_ns - Get the current time in nanoseconds
*
* Returns: current time converted to nanoseconds
*/
static inline u64 ktime_get_ns(void)
{
return ktime_to_ns(ktime_get());
}
/**
* ktime_get_real_ns - Get the current real/wall time in nanoseconds
*
* Returns: current real time converted to nanoseconds
*/
static inline u64 ktime_get_real_ns(void)
{
return ktime_to_ns(ktime_get_real());
}
/**
* ktime_get_boottime_ns - Get the monotonic time since boot in nanoseconds
*
* Returns: current boottime converted to nanoseconds
*/
static inline u64 ktime_get_boottime_ns(void)
{
return ktime_to_ns(ktime_get_boottime());
}
/**
* ktime_get_clocktai_ns - Get the current TAI time of day in nanoseconds
*
* Returns: current TAI time converted to nanoseconds
*/
static inline u64 ktime_get_clocktai_ns(void)
{
return ktime_to_ns(ktime_get_clocktai());
}
/**
* ktime_get_raw_ns - Get the raw monotonic time in nanoseconds
*
* Returns: current raw monotonic time converted to nanoseconds
*/
static inline u64 ktime_get_raw_ns(void)
{
return ktime_to_ns(ktime_get_raw());
@ -224,8 +258,8 @@ extern bool timekeeping_rtc_skipresume(void);
extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta);
/*
* struct ktime_timestanps - Simultaneous mono/boot/real timestamps
/**
* struct ktime_timestamps - Simultaneous mono/boot/real timestamps
* @mono: Monotonic timestamp
* @boot: Boottime timestamp
* @real: Realtime timestamp
@ -242,7 +276,8 @@ struct ktime_timestamps {
* @cycles: Clocksource counter value to produce the system times
* @real: Realtime system time
* @raw: Monotonic raw system time
* @clock_was_set_seq: The sequence number of clock was set events
* @cs_id: Clocksource ID
* @clock_was_set_seq: The sequence number of clock-was-set events
* @cs_was_changed_seq: The sequence number of clocksource change events
*/
struct system_time_snapshot {

View File

@ -22,7 +22,7 @@
#define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn)
#endif
/**
/*
* @TIMER_DEFERRABLE: A deferrable timer will work normally when the
* system is busy, but will not cause a CPU to come out of idle just
* to service it; instead, the timer will be serviced when the CPU
@ -140,7 +140,7 @@ static inline void destroy_timer_on_stack(struct timer_list *timer) { }
* or not. Callers must ensure serialization wrt. other operations done
* to this timer, eg. interrupt contexts, or other CPUs on SMP.
*
* return value: 1 if the timer is pending, 0 if not.
* Returns: 1 if the timer is pending, 0 if not.
*/
static inline int timer_pending(const struct timer_list * timer)
{
@ -175,6 +175,10 @@ extern int timer_shutdown(struct timer_list *timer);
* See timer_delete_sync() for detailed explanation.
*
* Do not use in new code. Use timer_delete_sync() instead.
*
* Returns:
* * %0 - The timer was not pending
* * %1 - The timer was pending and deactivated
*/
static inline int del_timer_sync(struct timer_list *timer)
{
@ -188,6 +192,10 @@ static inline int del_timer_sync(struct timer_list *timer)
* See timer_delete() for detailed explanation.
*
* Do not use in new code. Use timer_delete() instead.
*
* Returns:
* * %0 - The timer was not pending
* * %1 - The timer was pending and deactivated
*/
static inline int del_timer(struct timer_list *timer)
{

View File

@ -19,12 +19,6 @@
#include <vdso/time32.h>
#include <vdso/time64.h>
#ifdef CONFIG_ARM64
#include <asm/page-def.h>
#else
#include <asm/page.h>
#endif
#ifdef CONFIG_ARCH_HAS_VDSO_DATA
#include <asm/vdso/data.h>
#else
@ -132,7 +126,7 @@ extern struct vdso_data _timens_data[CS_BASES] __attribute__((visibility("hidden
*/
union vdso_data_store {
struct vdso_data data[CS_BASES];
u8 page[PAGE_SIZE];
u8 page[1U << CONFIG_PAGE_SHIFT];
};
/*

View File

@ -697,6 +697,7 @@ bool tick_nohz_tick_stopped_cpu(int cpu)
/**
* tick_nohz_update_jiffies - update jiffies when idle was interrupted
* @now: current ktime_t
*
* Called from interrupt entry when the CPU was idle
*
@ -794,7 +795,7 @@ static u64 get_cpu_sleep_time_us(struct tick_sched *ts, ktime_t *sleeptime,
* This time is measured via accounting rather than sampling,
* and is as accurate as ktime_get() is.
*
* This function returns -1 if NOHZ is not enabled.
* Return: -1 if NOHZ is not enabled, else total idle time of the @cpu
*/
u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
{
@ -820,7 +821,7 @@ EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
* This time is measured via accounting rather than sampling,
* and is as accurate as ktime_get() is.
*
* This function returns -1 if NOHZ is not enabled.
* Return: -1 if NOHZ is not enabled, else total iowait time of @cpu
*/
u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
{
@ -1287,6 +1288,8 @@ void tick_nohz_irq_exit(void)
/**
* tick_nohz_idle_got_tick - Check whether or not the tick handler has run
*
* Return: %true if the tick handler has run, otherwise %false
*/
bool tick_nohz_idle_got_tick(void)
{
@ -1305,6 +1308,8 @@ bool tick_nohz_idle_got_tick(void)
* stopped, it returns the next hrtimer.
*
* Called from power state control code with interrupts disabled
*
* Return: the next expiration time
*/
ktime_t tick_nohz_get_next_hrtimer(void)
{
@ -1320,6 +1325,8 @@ ktime_t tick_nohz_get_next_hrtimer(void)
* The return value of this function and/or the value returned by it through the
* @delta_next pointer can be negative which must be taken into account by its
* callers.
*
* Return: the expected length of the current sleep
*/
ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
{
@ -1357,8 +1364,11 @@ ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
/**
* tick_nohz_get_idle_calls_cpu - return the current idle calls counter value
* for a particular CPU.
* @cpu: target CPU number
*
* Called from the schedutil frequency scaling governor in scheduler context.
*
* Return: the current idle calls counter value for @cpu
*/
unsigned long tick_nohz_get_idle_calls_cpu(int cpu)
{
@ -1371,6 +1381,8 @@ unsigned long tick_nohz_get_idle_calls_cpu(int cpu)
* tick_nohz_get_idle_calls - return the current idle calls counter value
*
* Called from the schedutil frequency scaling governor in scheduler context.
*
* Return: the current idle calls counter value for the current CPU
*/
unsigned long tick_nohz_get_idle_calls(void)
{
@ -1559,7 +1571,7 @@ early_param("skew_tick", skew_tick);
/**
* tick_setup_sched_timer - setup the tick emulation timer
* @mode: tick_nohz_mode to setup for
* @hrtimer: whether to use the hrtimer or not
*/
void tick_setup_sched_timer(bool hrtimer)
{

View File

@ -46,8 +46,8 @@ struct tick_device {
* @next_tick: Next tick to be fired when in dynticks mode.
* @idle_jiffies: jiffies at the entry to idle for idle time accounting
* @idle_waketime: Time when the idle was interrupted
* @idle_sleeptime_seq: sequence counter for data consistency
* @idle_entrytime: Time when the idle call was entered
* @nohz_mode: Mode - one state of tick_nohz_mode
* @last_jiffies: Base jiffies snapshot when next event was last computed
* @timer_expires_base: Base time clock monotonic for @timer_expires
* @timer_expires: Anticipated timer expiration time (in case sched tick is stopped)

View File

@ -64,15 +64,15 @@ EXPORT_SYMBOL(jiffies_64);
/*
* The timer wheel has LVL_DEPTH array levels. Each level provides an array of
* LVL_SIZE buckets. Each level is driven by its own clock and therefor each
* LVL_SIZE buckets. Each level is driven by its own clock and therefore each
* level has a different granularity.
*
* The level granularity is: LVL_CLK_DIV ^ lvl
* The level granularity is: LVL_CLK_DIV ^ level
* The level clock frequency is: HZ / (LVL_CLK_DIV ^ level)
*
* The array level of a newly armed timer depends on the relative expiry
* time. The farther the expiry time is away the higher the array level and
* therefor the granularity becomes.
* therefore the granularity becomes.
*
* Contrary to the original timer wheel implementation, which aims for 'exact'
* expiry of the timers, this implementation removes the need for recascading
@ -207,7 +207,7 @@ EXPORT_SYMBOL(jiffies_64);
* struct timer_base - Per CPU timer base (number of base depends on config)
* @lock: Lock protecting the timer_base
* @running_timer: When expiring timers, the lock is dropped. To make
* sure not to race agains deleting/modifying a
* sure not to race against deleting/modifying a
* currently running timer, the pointer is set to the
* timer, which expires at the moment. If no timer is
* running, the pointer is NULL.
@ -737,7 +737,7 @@ static bool timer_is_static_object(void *addr)
}
/*
* fixup_init is called when:
* timer_fixup_init is called when:
* - an active object is initialized
*/
static bool timer_fixup_init(void *addr, enum debug_obj_state state)
@ -761,7 +761,7 @@ static void stub_timer(struct timer_list *unused)
}
/*
* fixup_activate is called when:
* timer_fixup_activate is called when:
* - an active object is activated
* - an unknown non-static object is activated
*/
@ -783,7 +783,7 @@ static bool timer_fixup_activate(void *addr, enum debug_obj_state state)
}
/*
* fixup_free is called when:
* timer_fixup_free is called when:
* - an active object is freed
*/
static bool timer_fixup_free(void *addr, enum debug_obj_state state)
@ -801,7 +801,7 @@ static bool timer_fixup_free(void *addr, enum debug_obj_state state)
}
/*
* fixup_assert_init is called when:
* timer_fixup_assert_init is called when:
* - an untracked/uninit-ed object is found
*/
static bool timer_fixup_assert_init(void *addr, enum debug_obj_state state)
@ -914,7 +914,7 @@ static void do_init_timer(struct timer_list *timer,
* @key: lockdep class key of the fake lock used for tracking timer
* sync lock dependencies
*
* init_timer_key() must be done to a timer prior calling *any* of the
* init_timer_key() must be done to a timer prior to calling *any* of the
* other timer functions.
*/
void init_timer_key(struct timer_list *timer,
@ -1417,7 +1417,7 @@ static int __timer_delete(struct timer_list *timer, bool shutdown)
* If @shutdown is set then the lock has to be taken whether the
* timer is pending or not to protect against a concurrent rearm
* which might hit between the lockless pending check and the lock
* aquisition. By taking the lock it is ensured that such a newly
* acquisition. By taking the lock it is ensured that such a newly
* enqueued timer is dequeued and cannot end up with
* timer->function == NULL in the expiry code.
*
@ -2306,7 +2306,7 @@ static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem,
/*
* When timer base is not set idle, undo the effect of
* tmigr_cpu_deactivate() to prevent inconsitent states - active
* tmigr_cpu_deactivate() to prevent inconsistent states - active
* timer base but inactive timer migration hierarchy.
*
* When timer base was already marked idle, nothing will be

View File

@ -751,6 +751,33 @@ bool tmigr_update_events(struct tmigr_group *group, struct tmigr_group *child,
first_childevt = evt = data->evt;
/*
* Walking the hierarchy is required in any case when a
* remote expiry was done before. This ensures to not lose
* already queued events in non active groups (see section
* "Required event and timerqueue update after a remote
* expiry" in the documentation at the top).
*
* The two call sites which are executed without a remote expiry
* before, are not prevented from propagating changes through
* the hierarchy by the return:
* - When entering this path by tmigr_new_timer(), @evt->ignore
* is never set.
* - tmigr_inactive_up() takes care of the propagation by
* itself and ignores the return value. But an immediate
* return is possible if there is a parent, sparing group
* locking at this level, because the upper walking call to
* the parent will take care about removing this event from
* within the group and update next_expiry accordingly.
*
* However if there is no parent, ie: the hierarchy has only a
* single level so @group is the top level group, make sure the
* first event information of the group is updated properly and
* also handled properly, so skip this fast return path.
*/
if (evt->ignore && !remote && group->parent)
return true;
raw_spin_lock(&group->lock);
childstate.state = 0;
@ -762,8 +789,11 @@ bool tmigr_update_events(struct tmigr_group *group, struct tmigr_group *child,
* queue when the expiry time changed only or when it could be ignored.
*/
if (timerqueue_node_queued(&evt->nextevt)) {
if ((evt->nextevt.expires == nextexp) && !evt->ignore)
if ((evt->nextevt.expires == nextexp) && !evt->ignore) {
/* Make sure not to miss a new CPU event with the same expiry */
evt->cpu = first_childevt->cpu;
goto check_toplvl;
}
if (!timerqueue_del(&group->events, &evt->nextevt))
WRITE_ONCE(group->next_expiry, KTIME_MAX);

View File

@ -1206,15 +1206,6 @@ err_noclose:
* MSG_SPLICE_PAGES is used exclusively to reduce the number of
* copy operations in this path. Therefore the caller must ensure
* that the pages backing @xdr are unchanging.
*
* Note that the send is non-blocking. The caller has incremented
* the reference count on each page backing the RPC message, and
* the network layer will "put" these pages when transmission is
* complete.
*
* This is safe for our RPC services because the memory backing
* the head and tail components is never kmalloc'd. These always
* come from pages in the svc_rqst::rq_pages array.
*/
static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
rpc_fraghdr marker, unsigned int *sentp)
@ -1244,6 +1235,7 @@ static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
1 + count, sizeof(marker) + rqstp->rq_res.len);
ret = sock_sendmsg(svsk->sk_sock, &msg);
page_frag_free(buf);
if (ret < 0)
return ret;
*sentp += ret;

View File

@ -190,6 +190,8 @@ TEST_GEN_PROGS_s390x += rseq_test
TEST_GEN_PROGS_s390x += set_memory_region_test
TEST_GEN_PROGS_s390x += kvm_binary_stats_test
TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test
TEST_GEN_PROGS_riscv += riscv/ebreak_test
TEST_GEN_PROGS_riscv += arch_timer
TEST_GEN_PROGS_riscv += demand_paging_test
TEST_GEN_PROGS_riscv += dirty_log_test

View File

@ -50,6 +50,16 @@ static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t subtype,
bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext);
static inline bool __vcpu_has_isa_ext(struct kvm_vcpu *vcpu, uint64_t isa_ext)
{
return __vcpu_has_ext(vcpu, RISCV_ISA_EXT_REG(isa_ext));
}
static inline bool __vcpu_has_sbi_ext(struct kvm_vcpu *vcpu, uint64_t sbi_ext)
{
return __vcpu_has_ext(vcpu, RISCV_SBI_EXT_REG(sbi_ext));
}
struct ex_regs {
unsigned long ra;
unsigned long sp;
@ -154,45 +164,6 @@ void vm_install_interrupt_handler(struct kvm_vm *vm, exception_handler_fn handle
#define PGTBL_PAGE_SIZE PGTBL_L0_BLOCK_SIZE
#define PGTBL_PAGE_SIZE_SHIFT PGTBL_L0_BLOCK_SHIFT
/* SBI return error codes */
#define SBI_SUCCESS 0
#define SBI_ERR_FAILURE -1
#define SBI_ERR_NOT_SUPPORTED -2
#define SBI_ERR_INVALID_PARAM -3
#define SBI_ERR_DENIED -4
#define SBI_ERR_INVALID_ADDRESS -5
#define SBI_ERR_ALREADY_AVAILABLE -6
#define SBI_ERR_ALREADY_STARTED -7
#define SBI_ERR_ALREADY_STOPPED -8
#define SBI_EXT_EXPERIMENTAL_START 0x08000000
#define SBI_EXT_EXPERIMENTAL_END 0x08FFFFFF
#define KVM_RISCV_SELFTESTS_SBI_EXT SBI_EXT_EXPERIMENTAL_END
#define KVM_RISCV_SELFTESTS_SBI_UCALL 0
#define KVM_RISCV_SELFTESTS_SBI_UNEXP 1
enum sbi_ext_id {
SBI_EXT_BASE = 0x10,
SBI_EXT_STA = 0x535441,
};
enum sbi_ext_base_fid {
SBI_EXT_BASE_PROBE_EXT = 3,
};
struct sbiret {
long error;
long value;
};
struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
unsigned long arg1, unsigned long arg2,
unsigned long arg3, unsigned long arg4,
unsigned long arg5);
bool guest_sbi_probe_extension(int extid, long *out_val);
static inline void local_irq_enable(void)
{
csr_set(CSR_SSTATUS, SR_SIE);

View File

@ -0,0 +1,141 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* RISC-V SBI specific definitions
*
* Copyright (C) 2024 Rivos Inc.
*/
#ifndef SELFTEST_KVM_SBI_H
#define SELFTEST_KVM_SBI_H
/* SBI spec version fields */
#define SBI_SPEC_VERSION_DEFAULT 0x1
#define SBI_SPEC_VERSION_MAJOR_SHIFT 24
#define SBI_SPEC_VERSION_MAJOR_MASK 0x7f
#define SBI_SPEC_VERSION_MINOR_MASK 0xffffff
/* SBI return error codes */
#define SBI_SUCCESS 0
#define SBI_ERR_FAILURE -1
#define SBI_ERR_NOT_SUPPORTED -2
#define SBI_ERR_INVALID_PARAM -3
#define SBI_ERR_DENIED -4
#define SBI_ERR_INVALID_ADDRESS -5
#define SBI_ERR_ALREADY_AVAILABLE -6
#define SBI_ERR_ALREADY_STARTED -7
#define SBI_ERR_ALREADY_STOPPED -8
#define SBI_EXT_EXPERIMENTAL_START 0x08000000
#define SBI_EXT_EXPERIMENTAL_END 0x08FFFFFF
#define KVM_RISCV_SELFTESTS_SBI_EXT SBI_EXT_EXPERIMENTAL_END
#define KVM_RISCV_SELFTESTS_SBI_UCALL 0
#define KVM_RISCV_SELFTESTS_SBI_UNEXP 1
enum sbi_ext_id {
SBI_EXT_BASE = 0x10,
SBI_EXT_STA = 0x535441,
SBI_EXT_PMU = 0x504D55,
};
enum sbi_ext_base_fid {
SBI_EXT_BASE_GET_SPEC_VERSION = 0,
SBI_EXT_BASE_GET_IMP_ID,
SBI_EXT_BASE_GET_IMP_VERSION,
SBI_EXT_BASE_PROBE_EXT = 3,
};
enum sbi_ext_pmu_fid {
SBI_EXT_PMU_NUM_COUNTERS = 0,
SBI_EXT_PMU_COUNTER_GET_INFO,
SBI_EXT_PMU_COUNTER_CFG_MATCH,
SBI_EXT_PMU_COUNTER_START,
SBI_EXT_PMU_COUNTER_STOP,
SBI_EXT_PMU_COUNTER_FW_READ,
SBI_EXT_PMU_COUNTER_FW_READ_HI,
SBI_EXT_PMU_SNAPSHOT_SET_SHMEM,
};
union sbi_pmu_ctr_info {
unsigned long value;
struct {
unsigned long csr:12;
unsigned long width:6;
#if __riscv_xlen == 32
unsigned long reserved:13;
#else
unsigned long reserved:45;
#endif
unsigned long type:1;
};
};
struct riscv_pmu_snapshot_data {
u64 ctr_overflow_mask;
u64 ctr_values[64];
u64 reserved[447];
};
struct sbiret {
long error;
long value;
};
/** General pmu event codes specified in SBI PMU extension */
enum sbi_pmu_hw_generic_events_t {
SBI_PMU_HW_NO_EVENT = 0,
SBI_PMU_HW_CPU_CYCLES = 1,
SBI_PMU_HW_INSTRUCTIONS = 2,
SBI_PMU_HW_CACHE_REFERENCES = 3,
SBI_PMU_HW_CACHE_MISSES = 4,
SBI_PMU_HW_BRANCH_INSTRUCTIONS = 5,
SBI_PMU_HW_BRANCH_MISSES = 6,
SBI_PMU_HW_BUS_CYCLES = 7,
SBI_PMU_HW_STALLED_CYCLES_FRONTEND = 8,
SBI_PMU_HW_STALLED_CYCLES_BACKEND = 9,
SBI_PMU_HW_REF_CPU_CYCLES = 10,
SBI_PMU_HW_GENERAL_MAX,
};
/* SBI PMU counter types */
enum sbi_pmu_ctr_type {
SBI_PMU_CTR_TYPE_HW = 0x0,
SBI_PMU_CTR_TYPE_FW,
};
/* Flags defined for config matching function */
#define SBI_PMU_CFG_FLAG_SKIP_MATCH BIT(0)
#define SBI_PMU_CFG_FLAG_CLEAR_VALUE BIT(1)
#define SBI_PMU_CFG_FLAG_AUTO_START BIT(2)
#define SBI_PMU_CFG_FLAG_SET_VUINH BIT(3)
#define SBI_PMU_CFG_FLAG_SET_VSINH BIT(4)
#define SBI_PMU_CFG_FLAG_SET_UINH BIT(5)
#define SBI_PMU_CFG_FLAG_SET_SINH BIT(6)
#define SBI_PMU_CFG_FLAG_SET_MINH BIT(7)
/* Flags defined for counter start function */
#define SBI_PMU_START_FLAG_SET_INIT_VALUE BIT(0)
#define SBI_PMU_START_FLAG_INIT_SNAPSHOT BIT(1)
/* Flags defined for counter stop function */
#define SBI_PMU_STOP_FLAG_RESET BIT(0)
#define SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT BIT(1)
struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
unsigned long arg1, unsigned long arg2,
unsigned long arg3, unsigned long arg4,
unsigned long arg5);
bool guest_sbi_probe_extension(int extid, long *out_val);
/* Make SBI version */
static inline unsigned long sbi_mk_version(unsigned long major,
unsigned long minor)
{
return ((major & SBI_SPEC_VERSION_MAJOR_MASK) << SBI_SPEC_VERSION_MAJOR_SHIFT)
| (minor & SBI_SPEC_VERSION_MINOR_MASK);
}
unsigned long get_host_sbi_spec_version(void);
#endif /* SELFTEST_KVM_SBI_H */

View File

@ -3,6 +3,7 @@
#define SELFTEST_KVM_UCALL_H
#include "processor.h"
#include "sbi.h"
#define UCALL_EXIT_REASON KVM_EXIT_RISCV_SBI

View File

@ -502,3 +502,15 @@ bool guest_sbi_probe_extension(int extid, long *out_val)
return true;
}
unsigned long get_host_sbi_spec_version(void)
{
struct sbiret ret;
ret = sbi_ecall(SBI_EXT_BASE, SBI_EXT_BASE_GET_SPEC_VERSION, 0,
0, 0, 0, 0, 0);
GUEST_ASSERT(!ret.error);
return ret.value;
}

View File

@ -85,7 +85,7 @@ struct kvm_vm *test_vm_create(void)
int nr_vcpus = test_args.nr_vcpus;
vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
__TEST_REQUIRE(__vcpu_has_ext(vcpus[0], RISCV_ISA_EXT_REG(KVM_RISCV_ISA_EXT_SSTC)),
__TEST_REQUIRE(__vcpu_has_isa_ext(vcpus[0], KVM_RISCV_ISA_EXT_SSTC),
"SSTC not available, skipping test\n");
vm_init_vector_tables(vm);

View File

@ -0,0 +1,82 @@
// SPDX-License-Identifier: GPL-2.0
/*
* RISC-V KVM ebreak test.
*
* Copyright 2024 Beijing ESWIN Computing Technology Co., Ltd.
*
*/
#include "kvm_util.h"
#define LABEL_ADDRESS(v) ((uint64_t)&(v))
extern unsigned char sw_bp_1, sw_bp_2;
static uint64_t sw_bp_addr;
static void guest_code(void)
{
asm volatile(
".option push\n"
".option norvc\n"
"sw_bp_1: ebreak\n"
"sw_bp_2: ebreak\n"
".option pop\n"
);
GUEST_ASSERT_EQ(READ_ONCE(sw_bp_addr), LABEL_ADDRESS(sw_bp_2));
GUEST_DONE();
}
static void guest_breakpoint_handler(struct ex_regs *regs)
{
WRITE_ONCE(sw_bp_addr, regs->epc);
regs->epc += 4;
}
int main(void)
{
struct kvm_vm *vm;
struct kvm_vcpu *vcpu;
uint64_t pc;
struct kvm_guest_debug debug = {
.control = KVM_GUESTDBG_ENABLE,
};
TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_GUEST_DEBUG));
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
vm_init_vector_tables(vm);
vcpu_init_vector_tables(vcpu);
vm_install_exception_handler(vm, EXC_BREAKPOINT,
guest_breakpoint_handler);
/*
* Enable the guest debug.
* ebreak should exit to the VMM with KVM_EXIT_DEBUG reason.
*/
vcpu_guest_debug_set(vcpu, &debug);
vcpu_run(vcpu);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc), &pc);
TEST_ASSERT_EQ(pc, LABEL_ADDRESS(sw_bp_1));
/* skip sw_bp_1 */
vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.pc), pc + 4);
/*
* Disable all debug controls.
* Guest should handle the ebreak without exiting to the VMM.
*/
memset(&debug, 0, sizeof(debug));
vcpu_guest_debug_set(vcpu, &debug);
vcpu_run(vcpu);
TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
kvm_vm_free(vm);
return 0;
}

View File

@ -43,6 +43,7 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_V:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SMSTATEEN:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSAIA:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSCOFPMF:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSTC:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVINVAL:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVNAPOT:
@ -408,6 +409,7 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off)
KVM_ISA_EXT_ARR(V),
KVM_ISA_EXT_ARR(SMSTATEEN),
KVM_ISA_EXT_ARR(SSAIA),
KVM_ISA_EXT_ARR(SSCOFPMF),
KVM_ISA_EXT_ARR(SSTC),
KVM_ISA_EXT_ARR(SVINVAL),
KVM_ISA_EXT_ARR(SVNAPOT),
@ -931,6 +933,7 @@ KVM_ISA_EXT_SUBLIST_CONFIG(fp_f, FP_F);
KVM_ISA_EXT_SUBLIST_CONFIG(fp_d, FP_D);
KVM_ISA_EXT_SIMPLE_CONFIG(h, H);
KVM_ISA_EXT_SUBLIST_CONFIG(smstateen, SMSTATEEN);
KVM_ISA_EXT_SIMPLE_CONFIG(sscofpmf, SSCOFPMF);
KVM_ISA_EXT_SIMPLE_CONFIG(sstc, SSTC);
KVM_ISA_EXT_SIMPLE_CONFIG(svinval, SVINVAL);
KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT);
@ -986,6 +989,7 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_fp_d,
&config_h,
&config_smstateen,
&config_sscofpmf,
&config_sstc,
&config_svinval,
&config_svnapot,

View File

@ -0,0 +1,681 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* sbi_pmu_test.c - Tests the riscv64 SBI PMU functionality.
*
* Copyright (c) 2024, Rivos Inc.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include "kvm_util.h"
#include "test_util.h"
#include "processor.h"
#include "sbi.h"
#include "arch_timer.h"
/* Maximum counters(firmware + hardware) */
#define RISCV_MAX_PMU_COUNTERS 64
union sbi_pmu_ctr_info ctrinfo_arr[RISCV_MAX_PMU_COUNTERS];
/* Snapshot shared memory data */
#define PMU_SNAPSHOT_GPA_BASE BIT(30)
static void *snapshot_gva;
static vm_paddr_t snapshot_gpa;
static int vcpu_shared_irq_count;
static int counter_in_use;
/* Cache the available counters in a bitmask */
static unsigned long counter_mask_available;
static bool illegal_handler_invoked;
#define SBI_PMU_TEST_BASIC BIT(0)
#define SBI_PMU_TEST_EVENTS BIT(1)
#define SBI_PMU_TEST_SNAPSHOT BIT(2)
#define SBI_PMU_TEST_OVERFLOW BIT(3)
static int disabled_tests;
unsigned long pmu_csr_read_num(int csr_num)
{
#define switchcase_csr_read(__csr_num, __val) {\
case __csr_num: \
__val = csr_read(__csr_num); \
break; }
#define switchcase_csr_read_2(__csr_num, __val) {\
switchcase_csr_read(__csr_num + 0, __val) \
switchcase_csr_read(__csr_num + 1, __val)}
#define switchcase_csr_read_4(__csr_num, __val) {\
switchcase_csr_read_2(__csr_num + 0, __val) \
switchcase_csr_read_2(__csr_num + 2, __val)}
#define switchcase_csr_read_8(__csr_num, __val) {\
switchcase_csr_read_4(__csr_num + 0, __val) \
switchcase_csr_read_4(__csr_num + 4, __val)}
#define switchcase_csr_read_16(__csr_num, __val) {\
switchcase_csr_read_8(__csr_num + 0, __val) \
switchcase_csr_read_8(__csr_num + 8, __val)}
#define switchcase_csr_read_32(__csr_num, __val) {\
switchcase_csr_read_16(__csr_num + 0, __val) \
switchcase_csr_read_16(__csr_num + 16, __val)}
unsigned long ret = 0;
switch (csr_num) {
switchcase_csr_read_32(CSR_CYCLE, ret)
switchcase_csr_read_32(CSR_CYCLEH, ret)
default :
break;
}
return ret;
#undef switchcase_csr_read_32
#undef switchcase_csr_read_16
#undef switchcase_csr_read_8
#undef switchcase_csr_read_4
#undef switchcase_csr_read_2
#undef switchcase_csr_read
}
static inline void dummy_func_loop(uint64_t iter)
{
int i = 0;
while (i < iter) {
asm volatile("nop");
i++;
}
}
static void start_counter(unsigned long counter, unsigned long start_flags,
unsigned long ival)
{
struct sbiret ret;
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, counter, 1, start_flags,
ival, 0, 0);
__GUEST_ASSERT(ret.error == 0, "Unable to start counter %ld\n", counter);
}
/* This should be invoked only for reset counter use case */
static void stop_reset_counter(unsigned long counter, unsigned long stop_flags)
{
struct sbiret ret;
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, counter, 1,
stop_flags | SBI_PMU_STOP_FLAG_RESET, 0, 0, 0);
__GUEST_ASSERT(ret.error == SBI_ERR_ALREADY_STOPPED,
"Unable to stop counter %ld\n", counter);
}
static void stop_counter(unsigned long counter, unsigned long stop_flags)
{
struct sbiret ret;
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, counter, 1, stop_flags,
0, 0, 0);
__GUEST_ASSERT(ret.error == 0, "Unable to stop counter %ld error %ld\n",
counter, ret.error);
}
static void guest_illegal_exception_handler(struct ex_regs *regs)
{
__GUEST_ASSERT(regs->cause == EXC_INST_ILLEGAL,
"Unexpected exception handler %lx\n", regs->cause);
illegal_handler_invoked = true;
/* skip the trapping instruction */
regs->epc += 4;
}
static void guest_irq_handler(struct ex_regs *regs)
{
unsigned int irq_num = regs->cause & ~CAUSE_IRQ_FLAG;
struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva;
unsigned long overflown_mask;
unsigned long counter_val = 0;
/* Validate that we are in the correct irq handler */
GUEST_ASSERT_EQ(irq_num, IRQ_PMU_OVF);
/* Stop all counters first to avoid further interrupts */
stop_counter(counter_in_use, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
csr_clear(CSR_SIP, BIT(IRQ_PMU_OVF));
overflown_mask = READ_ONCE(snapshot_data->ctr_overflow_mask);
GUEST_ASSERT(overflown_mask & 0x01);
WRITE_ONCE(vcpu_shared_irq_count, vcpu_shared_irq_count+1);
counter_val = READ_ONCE(snapshot_data->ctr_values[0]);
/* Now start the counter to mimick the real driver behavior */
start_counter(counter_in_use, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_val);
}
static unsigned long get_counter_index(unsigned long cbase, unsigned long cmask,
unsigned long cflags,
unsigned long event)
{
struct sbiret ret;
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase, cmask,
cflags, event, 0, 0);
__GUEST_ASSERT(ret.error == 0, "config matching failed %ld\n", ret.error);
GUEST_ASSERT(ret.value < RISCV_MAX_PMU_COUNTERS);
GUEST_ASSERT(BIT(ret.value) & counter_mask_available);
return ret.value;
}
static unsigned long get_num_counters(void)
{
struct sbiret ret;
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_NUM_COUNTERS, 0, 0, 0, 0, 0, 0);
__GUEST_ASSERT(ret.error == 0, "Unable to retrieve number of counters from SBI PMU");
__GUEST_ASSERT(ret.value < RISCV_MAX_PMU_COUNTERS,
"Invalid number of counters %ld\n", ret.value);
return ret.value;
}
static void update_counter_info(int num_counters)
{
int i = 0;
struct sbiret ret;
for (i = 0; i < num_counters; i++) {
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_GET_INFO, i, 0, 0, 0, 0, 0);
/* There can be gaps in logical counter indicies*/
if (ret.error)
continue;
GUEST_ASSERT_NE(ret.value, 0);
ctrinfo_arr[i].value = ret.value;
counter_mask_available |= BIT(i);
}
GUEST_ASSERT(counter_mask_available > 0);
}
static unsigned long read_fw_counter(int idx, union sbi_pmu_ctr_info ctrinfo)
{
struct sbiret ret;
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ, idx, 0, 0, 0, 0, 0);
GUEST_ASSERT(ret.error == 0);
return ret.value;
}
static unsigned long read_counter(int idx, union sbi_pmu_ctr_info ctrinfo)
{
unsigned long counter_val = 0;
__GUEST_ASSERT(ctrinfo.type < 2, "Invalid counter type %d", ctrinfo.type);
if (ctrinfo.type == SBI_PMU_CTR_TYPE_HW)
counter_val = pmu_csr_read_num(ctrinfo.csr);
else if (ctrinfo.type == SBI_PMU_CTR_TYPE_FW)
counter_val = read_fw_counter(idx, ctrinfo);
return counter_val;
}
static inline void verify_sbi_requirement_assert(void)
{
long out_val = 0;
bool probe;
probe = guest_sbi_probe_extension(SBI_EXT_PMU, &out_val);
GUEST_ASSERT(probe && out_val == 1);
if (get_host_sbi_spec_version() < sbi_mk_version(2, 0))
__GUEST_ASSERT(0, "SBI implementation version doesn't support PMU Snapshot");
}
static void snapshot_set_shmem(vm_paddr_t gpa, unsigned long flags)
{
unsigned long lo = (unsigned long)gpa;
#if __riscv_xlen == 32
unsigned long hi = (unsigned long)(gpa >> 32);
#else
unsigned long hi = gpa == -1 ? -1 : 0;
#endif
struct sbiret ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_SNAPSHOT_SET_SHMEM,
lo, hi, flags, 0, 0, 0);
GUEST_ASSERT(ret.value == 0 && ret.error == 0);
}
static void test_pmu_event(unsigned long event)
{
unsigned long counter;
unsigned long counter_value_pre, counter_value_post;
unsigned long counter_init_value = 100;
counter = get_counter_index(0, counter_mask_available, 0, event);
counter_value_pre = read_counter(counter, ctrinfo_arr[counter]);
/* Do not set the initial value */
start_counter(counter, 0, 0);
dummy_func_loop(10000);
stop_counter(counter, 0);
counter_value_post = read_counter(counter, ctrinfo_arr[counter]);
__GUEST_ASSERT(counter_value_post > counter_value_pre,
"Event update verification failed: post [%lx] pre [%lx]\n",
counter_value_post, counter_value_pre);
/*
* We can't just update the counter without starting it.
* Do start/stop twice to simulate that by first initializing to a very
* high value and a low value after that.
*/
start_counter(counter, SBI_PMU_START_FLAG_SET_INIT_VALUE, ULONG_MAX/2);
stop_counter(counter, 0);
counter_value_pre = read_counter(counter, ctrinfo_arr[counter]);
start_counter(counter, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_init_value);
stop_counter(counter, 0);
counter_value_post = read_counter(counter, ctrinfo_arr[counter]);
__GUEST_ASSERT(counter_value_pre > counter_value_post,
"Counter reinitialization verification failed : post [%lx] pre [%lx]\n",
counter_value_post, counter_value_pre);
/* Now set the initial value and compare */
start_counter(counter, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_init_value);
dummy_func_loop(10000);
stop_counter(counter, 0);
counter_value_post = read_counter(counter, ctrinfo_arr[counter]);
__GUEST_ASSERT(counter_value_post > counter_init_value,
"Event update verification failed: post [%lx] pre [%lx]\n",
counter_value_post, counter_init_value);
stop_reset_counter(counter, 0);
}
static void test_pmu_event_snapshot(unsigned long event)
{
unsigned long counter;
unsigned long counter_value_pre, counter_value_post;
unsigned long counter_init_value = 100;
struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva;
counter = get_counter_index(0, counter_mask_available, 0, event);
counter_value_pre = read_counter(counter, ctrinfo_arr[counter]);
/* Do not set the initial value */
start_counter(counter, 0, 0);
dummy_func_loop(10000);
stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
/* The counter value is updated w.r.t relative index of cbase */
counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]);
__GUEST_ASSERT(counter_value_post > counter_value_pre,
"Event update verification failed: post [%lx] pre [%lx]\n",
counter_value_post, counter_value_pre);
/*
* We can't just update the counter without starting it.
* Do start/stop twice to simulate that by first initializing to a very
* high value and a low value after that.
*/
WRITE_ONCE(snapshot_data->ctr_values[0], ULONG_MAX/2);
start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0);
stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
counter_value_pre = READ_ONCE(snapshot_data->ctr_values[0]);
WRITE_ONCE(snapshot_data->ctr_values[0], counter_init_value);
start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0);
stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]);
__GUEST_ASSERT(counter_value_pre > counter_value_post,
"Counter reinitialization verification failed : post [%lx] pre [%lx]\n",
counter_value_post, counter_value_pre);
/* Now set the initial value and compare */
WRITE_ONCE(snapshot_data->ctr_values[0], counter_init_value);
start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0);
dummy_func_loop(10000);
stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]);
__GUEST_ASSERT(counter_value_post > counter_init_value,
"Event update verification failed: post [%lx] pre [%lx]\n",
counter_value_post, counter_init_value);
stop_reset_counter(counter, 0);
}
static void test_pmu_event_overflow(unsigned long event)
{
unsigned long counter;
unsigned long counter_value_post;
unsigned long counter_init_value = ULONG_MAX - 10000;
struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva;
counter = get_counter_index(0, counter_mask_available, 0, event);
counter_in_use = counter;
/* The counter value is updated w.r.t relative index of cbase passed to start/stop */
WRITE_ONCE(snapshot_data->ctr_values[0], counter_init_value);
start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0);
dummy_func_loop(10000);
udelay(msecs_to_usecs(2000));
/* irq handler should have stopped the counter */
stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]);
/* The counter value after stopping should be less the init value due to overflow */
__GUEST_ASSERT(counter_value_post < counter_init_value,
"counter_value_post %lx counter_init_value %lx for counter\n",
counter_value_post, counter_init_value);
stop_reset_counter(counter, 0);
}
static void test_invalid_event(void)
{
struct sbiret ret;
unsigned long event = 0x1234; /* A random event */
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, 0,
counter_mask_available, 0, event, 0, 0);
GUEST_ASSERT_EQ(ret.error, SBI_ERR_NOT_SUPPORTED);
}
static void test_pmu_events(void)
{
int num_counters = 0;
/* Get the counter details */
num_counters = get_num_counters();
update_counter_info(num_counters);
/* Sanity testing for any random invalid event */
test_invalid_event();
/* Only these two events are guaranteed to be present */
test_pmu_event(SBI_PMU_HW_CPU_CYCLES);
test_pmu_event(SBI_PMU_HW_INSTRUCTIONS);
GUEST_DONE();
}
static void test_pmu_basic_sanity(void)
{
long out_val = 0;
bool probe;
struct sbiret ret;
int num_counters = 0, i;
union sbi_pmu_ctr_info ctrinfo;
probe = guest_sbi_probe_extension(SBI_EXT_PMU, &out_val);
GUEST_ASSERT(probe && out_val == 1);
num_counters = get_num_counters();
for (i = 0; i < num_counters; i++) {
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_GET_INFO, i,
0, 0, 0, 0, 0);
/* There can be gaps in logical counter indicies*/
if (ret.error)
continue;
GUEST_ASSERT_NE(ret.value, 0);
ctrinfo.value = ret.value;
/**
* Accessibility check of hardware and read capability of firmware counters.
* The spec doesn't mandate any initial value. No need to check any value.
*/
if (ctrinfo.type == SBI_PMU_CTR_TYPE_HW) {
pmu_csr_read_num(ctrinfo.csr);
GUEST_ASSERT(illegal_handler_invoked);
} else if (ctrinfo.type == SBI_PMU_CTR_TYPE_FW) {
read_fw_counter(i, ctrinfo);
}
}
GUEST_DONE();
}
static void test_pmu_events_snaphost(void)
{
int num_counters = 0;
struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva;
int i;
/* Verify presence of SBI PMU and minimum requrired SBI version */
verify_sbi_requirement_assert();
snapshot_set_shmem(snapshot_gpa, 0);
/* Get the counter details */
num_counters = get_num_counters();
update_counter_info(num_counters);
/* Validate shared memory access */
GUEST_ASSERT_EQ(READ_ONCE(snapshot_data->ctr_overflow_mask), 0);
for (i = 0; i < num_counters; i++) {
if (counter_mask_available & (BIT(i)))
GUEST_ASSERT_EQ(READ_ONCE(snapshot_data->ctr_values[i]), 0);
}
/* Only these two events are guranteed to be present */
test_pmu_event_snapshot(SBI_PMU_HW_CPU_CYCLES);
test_pmu_event_snapshot(SBI_PMU_HW_INSTRUCTIONS);
GUEST_DONE();
}
static void test_pmu_events_overflow(void)
{
int num_counters = 0;
/* Verify presence of SBI PMU and minimum requrired SBI version */
verify_sbi_requirement_assert();
snapshot_set_shmem(snapshot_gpa, 0);
csr_set(CSR_IE, BIT(IRQ_PMU_OVF));
local_irq_enable();
/* Get the counter details */
num_counters = get_num_counters();
update_counter_info(num_counters);
/*
* Qemu supports overflow for cycle/instruction.
* This test may fail on any platform that do not support overflow for these two events.
*/
test_pmu_event_overflow(SBI_PMU_HW_CPU_CYCLES);
GUEST_ASSERT_EQ(vcpu_shared_irq_count, 1);
test_pmu_event_overflow(SBI_PMU_HW_INSTRUCTIONS);
GUEST_ASSERT_EQ(vcpu_shared_irq_count, 2);
GUEST_DONE();
}
static void run_vcpu(struct kvm_vcpu *vcpu)
{
struct ucall uc;
vcpu_run(vcpu);
switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
REPORT_GUEST_ASSERT(uc);
break;
case UCALL_DONE:
case UCALL_SYNC:
break;
default:
TEST_FAIL("Unknown ucall %lu", uc.cmd);
break;
}
}
void test_vm_destroy(struct kvm_vm *vm)
{
memset(ctrinfo_arr, 0, sizeof(union sbi_pmu_ctr_info) * RISCV_MAX_PMU_COUNTERS);
counter_mask_available = 0;
kvm_vm_free(vm);
}
static void test_vm_basic_test(void *guest_code)
{
struct kvm_vm *vm;
struct kvm_vcpu *vcpu;
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
__TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU),
"SBI PMU not available, skipping test");
vm_init_vector_tables(vm);
/* Illegal instruction handler is required to verify read access without configuration */
vm_install_exception_handler(vm, EXC_INST_ILLEGAL, guest_illegal_exception_handler);
vcpu_init_vector_tables(vcpu);
run_vcpu(vcpu);
test_vm_destroy(vm);
}
static void test_vm_events_test(void *guest_code)
{
struct kvm_vm *vm = NULL;
struct kvm_vcpu *vcpu = NULL;
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
__TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU),
"SBI PMU not available, skipping test");
run_vcpu(vcpu);
test_vm_destroy(vm);
}
static void test_vm_setup_snapshot_mem(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
{
/* PMU Snapshot requires single page only */
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, PMU_SNAPSHOT_GPA_BASE, 1, 1, 0);
/* PMU_SNAPSHOT_GPA_BASE is identity mapped */
virt_map(vm, PMU_SNAPSHOT_GPA_BASE, PMU_SNAPSHOT_GPA_BASE, 1);
snapshot_gva = (void *)(PMU_SNAPSHOT_GPA_BASE);
snapshot_gpa = addr_gva2gpa(vcpu->vm, (vm_vaddr_t)snapshot_gva);
sync_global_to_guest(vcpu->vm, snapshot_gva);
sync_global_to_guest(vcpu->vm, snapshot_gpa);
}
static void test_vm_events_snapshot_test(void *guest_code)
{
struct kvm_vm *vm = NULL;
struct kvm_vcpu *vcpu;
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
__TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU),
"SBI PMU not available, skipping test");
test_vm_setup_snapshot_mem(vm, vcpu);
run_vcpu(vcpu);
test_vm_destroy(vm);
}
static void test_vm_events_overflow(void *guest_code)
{
struct kvm_vm *vm = NULL;
struct kvm_vcpu *vcpu;
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
__TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU),
"SBI PMU not available, skipping test");
__TEST_REQUIRE(__vcpu_has_isa_ext(vcpu, KVM_RISCV_ISA_EXT_SSCOFPMF),
"Sscofpmf is not available, skipping overflow test");
test_vm_setup_snapshot_mem(vm, vcpu);
vm_init_vector_tables(vm);
vm_install_interrupt_handler(vm, guest_irq_handler);
vcpu_init_vector_tables(vcpu);
/* Initialize guest timer frequency. */
vcpu_get_reg(vcpu, RISCV_TIMER_REG(frequency), &timer_freq);
sync_global_to_guest(vm, timer_freq);
run_vcpu(vcpu);
test_vm_destroy(vm);
}
static void test_print_help(char *name)
{
pr_info("Usage: %s [-h] [-d <test name>]\n", name);
pr_info("\t-d: Test to disable. Available tests are 'basic', 'events', 'snapshot', 'overflow'\n");
pr_info("\t-h: print this help screen\n");
}
static bool parse_args(int argc, char *argv[])
{
int opt;
while ((opt = getopt(argc, argv, "hd:")) != -1) {
switch (opt) {
case 'd':
if (!strncmp("basic", optarg, 5))
disabled_tests |= SBI_PMU_TEST_BASIC;
else if (!strncmp("events", optarg, 6))
disabled_tests |= SBI_PMU_TEST_EVENTS;
else if (!strncmp("snapshot", optarg, 8))
disabled_tests |= SBI_PMU_TEST_SNAPSHOT;
else if (!strncmp("overflow", optarg, 8))
disabled_tests |= SBI_PMU_TEST_OVERFLOW;
else
goto done;
break;
case 'h':
default:
goto done;
}
}
return true;
done:
test_print_help(argv[0]);
return false;
}
int main(int argc, char *argv[])
{
if (!parse_args(argc, argv))
exit(KSFT_SKIP);
if (!(disabled_tests & SBI_PMU_TEST_BASIC)) {
test_vm_basic_test(test_pmu_basic_sanity);
pr_info("SBI PMU basic test : PASS\n");
}
if (!(disabled_tests & SBI_PMU_TEST_EVENTS)) {
test_vm_events_test(test_pmu_events);
pr_info("SBI PMU event verification test : PASS\n");
}
if (!(disabled_tests & SBI_PMU_TEST_SNAPSHOT)) {
test_vm_events_snapshot_test(test_pmu_events_snaphost);
pr_info("SBI PMU event verification with snapshot test : PASS\n");
}
if (!(disabled_tests & SBI_PMU_TEST_OVERFLOW)) {
test_vm_events_overflow(test_pmu_events_overflow);
pr_info("SBI PMU event verification with overflow test : PASS\n");
}
return 0;
}

View File

@ -11,7 +11,9 @@
#include <pthread.h>
#include <linux/kernel.h>
#include <asm/kvm.h>
#ifndef __riscv
#ifdef __riscv
#include "sbi.h"
#else
#include <asm/kvm_para.h>
#endif