KVM: s390: fixes and features for 4.13
- initial machine check forwarding - migration support for the CMMA page hinting information - cleanups - fixes -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.14 (GNU/Linux) iQIcBAABAgAGBQJZU5JdAAoJEBF7vIC1phx8gCwP/RTl1DzLsyuSbX/AhneQVb/X gXRnrtVEMsya4vL5lZxbp8JD5J4nBu8vNlgDmQwXM1KiFVDW5IFyQLUHv5PP899z 357mQC61pbkuDA8BhM71FuQav2V0ZMes+FYsza4Zx+Iev4uQtVfTos/nuMPnRVaD hSfWKbQ9dH/Yluxn8ClXkUOrLH7luiU7HZoQLTxYPFmyM9BIgSbUH2rSXUbQ/i5I PLpcky6M52/A/IFeEAt5qASsCwWJhPSLGsLKghDKvHDcBWVSb/M94ypXKInZ0pTf l97TOwCHVODje0Nn4R7wuoeY1ahOwgfhbI3R8m9Cnck3t7mbWtzYVn3DvSXl/Juk 3dfMkbi/GG9lrHoOwnGVGUsaNw5U11sDZEV+rVDT5847HEnGclNWfIBzr4Lcchdr 7f3qap9AGLWu79e32mOP2yO2zFKXpDdVuFfW/c/ms4wq3v03a6HxcUkIn98m6Q1O EEKzwknA1tSCdtWKOW9THENmywd1o4pMisC+FHnBxFwllOl5ORpbPegOrPCe7qQW +MZClAJl0s23NpbEMzwrilHzC1P9RxYTFnhGmVamcAg9PVOcFIOGllum26IXzaFM SyJ8HxS10SiAIVzv18yw3uxy6BUzzuKulIPu+W7JeOTOAAWiwTNL8wEx1ol93Ioi 531QgI7kPfDnudS14WaM =L7Ia -----END PGP SIGNATURE----- Merge tag 'kvm-s390-next-4.13-1' of git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD KVM: s390: fixes and features for 4.13 - initial machine check forwarding - migration support for the CMMA page hinting information - cleanups - fixes
This commit is contained in:
commit
3195a35b41
@ -3255,6 +3255,141 @@ Otherwise, if the MCE is a corrected error, KVM will just
|
||||
store it in the corresponding bank (provided this bank is
|
||||
not holding a previously reported uncorrected error).
|
||||
|
||||
4.107 KVM_S390_GET_CMMA_BITS
|
||||
|
||||
Capability: KVM_CAP_S390_CMMA_MIGRATION
|
||||
Architectures: s390
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_s390_cmma_log (in, out)
|
||||
Returns: 0 on success, a negative value on error
|
||||
|
||||
This ioctl is used to get the values of the CMMA bits on the s390
|
||||
architecture. It is meant to be used in two scenarios:
|
||||
- During live migration to save the CMMA values. Live migration needs
|
||||
to be enabled via the KVM_REQ_START_MIGRATION VM property.
|
||||
- To non-destructively peek at the CMMA values, with the flag
|
||||
KVM_S390_CMMA_PEEK set.
|
||||
|
||||
The ioctl takes parameters via the kvm_s390_cmma_log struct. The desired
|
||||
values are written to a buffer whose location is indicated via the "values"
|
||||
member in the kvm_s390_cmma_log struct. The values in the input struct are
|
||||
also updated as needed.
|
||||
Each CMMA value takes up one byte.
|
||||
|
||||
struct kvm_s390_cmma_log {
|
||||
__u64 start_gfn;
|
||||
__u32 count;
|
||||
__u32 flags;
|
||||
union {
|
||||
__u64 remaining;
|
||||
__u64 mask;
|
||||
};
|
||||
__u64 values;
|
||||
};
|
||||
|
||||
start_gfn is the number of the first guest frame whose CMMA values are
|
||||
to be retrieved,
|
||||
|
||||
count is the length of the buffer in bytes,
|
||||
|
||||
values points to the buffer where the result will be written to.
|
||||
|
||||
If count is greater than KVM_S390_SKEYS_MAX, then it is considered to be
|
||||
KVM_S390_SKEYS_MAX. KVM_S390_SKEYS_MAX is re-used for consistency with
|
||||
other ioctls.
|
||||
|
||||
The result is written in the buffer pointed to by the field values, and
|
||||
the values of the input parameter are updated as follows.
|
||||
|
||||
Depending on the flags, different actions are performed. The only
|
||||
supported flag so far is KVM_S390_CMMA_PEEK.
|
||||
|
||||
The default behaviour if KVM_S390_CMMA_PEEK is not set is:
|
||||
start_gfn will indicate the first page frame whose CMMA bits were dirty.
|
||||
It is not necessarily the same as the one passed as input, as clean pages
|
||||
are skipped.
|
||||
|
||||
count will indicate the number of bytes actually written in the buffer.
|
||||
It can (and very often will) be smaller than the input value, since the
|
||||
buffer is only filled until 16 bytes of clean values are found (which
|
||||
are then not copied in the buffer). Since a CMMA migration block needs
|
||||
the base address and the length, for a total of 16 bytes, we will send
|
||||
back some clean data if there is some dirty data afterwards, as long as
|
||||
the size of the clean data does not exceed the size of the header. This
|
||||
allows to minimize the amount of data to be saved or transferred over
|
||||
the network at the expense of more roundtrips to userspace. The next
|
||||
invocation of the ioctl will skip over all the clean values, saving
|
||||
potentially more than just the 16 bytes we found.
|
||||
|
||||
If KVM_S390_CMMA_PEEK is set:
|
||||
the existing storage attributes are read even when not in migration
|
||||
mode, and no other action is performed;
|
||||
|
||||
the output start_gfn will be equal to the input start_gfn,
|
||||
|
||||
the output count will be equal to the input count, except if the end of
|
||||
memory has been reached.
|
||||
|
||||
In both cases:
|
||||
the field "remaining" will indicate the total number of dirty CMMA values
|
||||
still remaining, or 0 if KVM_S390_CMMA_PEEK is set and migration mode is
|
||||
not enabled.
|
||||
|
||||
mask is unused.
|
||||
|
||||
values points to the userspace buffer where the result will be stored.
|
||||
|
||||
This ioctl can fail with -ENOMEM if not enough memory can be allocated to
|
||||
complete the task, with -ENXIO if CMMA is not enabled, with -EINVAL if
|
||||
KVM_S390_CMMA_PEEK is not set but migration mode was not enabled, with
|
||||
-EFAULT if the userspace address is invalid or if no page table is
|
||||
present for the addresses (e.g. when using hugepages).
|
||||
|
||||
4.108 KVM_S390_SET_CMMA_BITS
|
||||
|
||||
Capability: KVM_CAP_S390_CMMA_MIGRATION
|
||||
Architectures: s390
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_s390_cmma_log (in)
|
||||
Returns: 0 on success, a negative value on error
|
||||
|
||||
This ioctl is used to set the values of the CMMA bits on the s390
|
||||
architecture. It is meant to be used during live migration to restore
|
||||
the CMMA values, but there are no restrictions on its use.
|
||||
The ioctl takes parameters via the kvm_s390_cmma_values struct.
|
||||
Each CMMA value takes up one byte.
|
||||
|
||||
struct kvm_s390_cmma_log {
|
||||
__u64 start_gfn;
|
||||
__u32 count;
|
||||
__u32 flags;
|
||||
union {
|
||||
__u64 remaining;
|
||||
__u64 mask;
|
||||
};
|
||||
__u64 values;
|
||||
};
|
||||
|
||||
start_gfn indicates the starting guest frame number,
|
||||
|
||||
count indicates how many values are to be considered in the buffer,
|
||||
|
||||
flags is not used and must be 0.
|
||||
|
||||
mask indicates which PGSTE bits are to be considered.
|
||||
|
||||
remaining is not used.
|
||||
|
||||
values points to the buffer in userspace where to store the values.
|
||||
|
||||
This ioctl can fail with -ENOMEM if not enough memory can be allocated to
|
||||
complete the task, with -ENXIO if CMMA is not enabled, with -EINVAL if
|
||||
the count field is too large (e.g. more than KVM_S390_CMMA_SIZE_MAX) or
|
||||
if the flags field was not 0, with -EFAULT if the userspace address is
|
||||
invalid, if invalid pages are written to (e.g. after the end of memory)
|
||||
or if no page table is present for the addresses (e.g. when using
|
||||
hugepages).
|
||||
|
||||
5. The kvm_run structure
|
||||
------------------------
|
||||
|
||||
|
@ -16,6 +16,7 @@ FLIC provides support to
|
||||
- register and modify adapter interrupt sources (KVM_DEV_FLIC_ADAPTER_*)
|
||||
- modify AIS (adapter-interruption-suppression) mode state (KVM_DEV_FLIC_AISM)
|
||||
- inject adapter interrupts on a specified adapter (KVM_DEV_FLIC_AIRQ_INJECT)
|
||||
- get/set all AIS mode states (KVM_DEV_FLIC_AISM_ALL)
|
||||
|
||||
Groups:
|
||||
KVM_DEV_FLIC_ENQUEUE
|
||||
@ -136,6 +137,20 @@ struct kvm_s390_ais_req {
|
||||
an isc according to the adapter-interruption-suppression mode on condition
|
||||
that the AIS capability is enabled.
|
||||
|
||||
KVM_DEV_FLIC_AISM_ALL
|
||||
Gets or sets the adapter-interruption-suppression mode for all ISCs. Takes
|
||||
a kvm_s390_ais_all describing:
|
||||
|
||||
struct kvm_s390_ais_all {
|
||||
__u8 simm; /* Single-Interruption-Mode mask */
|
||||
__u8 nimm; /* No-Interruption-Mode mask *
|
||||
};
|
||||
|
||||
simm contains Single-Interruption-Mode mask for all ISCs, nimm contains
|
||||
No-Interruption-Mode mask for all ISCs. Each bit in simm and nimm corresponds
|
||||
to an ISC (MSB0 bit 0 to ISC 0 and so on). The combination of simm bit and
|
||||
nimm bit presents AIS mode for a ISC.
|
||||
|
||||
Note: The KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR device ioctls executed on
|
||||
FLIC with an unknown group or attribute gives the error code EINVAL (instead of
|
||||
ENXIO, as specified in the API documentation). It is not possible to conclude
|
||||
|
@ -222,3 +222,36 @@ Allows user space to disable dea key wrapping, clearing the wrapping key.
|
||||
|
||||
Parameters: none
|
||||
Returns: 0
|
||||
|
||||
5. GROUP: KVM_S390_VM_MIGRATION
|
||||
Architectures: s390
|
||||
|
||||
5.1. ATTRIBUTE: KVM_S390_VM_MIGRATION_STOP (w/o)
|
||||
|
||||
Allows userspace to stop migration mode, needed for PGSTE migration.
|
||||
Setting this attribute when migration mode is not active will have no
|
||||
effects.
|
||||
|
||||
Parameters: none
|
||||
Returns: 0
|
||||
|
||||
5.2. ATTRIBUTE: KVM_S390_VM_MIGRATION_START (w/o)
|
||||
|
||||
Allows userspace to start migration mode, needed for PGSTE migration.
|
||||
Setting this attribute when migration mode is already active will have
|
||||
no effects.
|
||||
|
||||
Parameters: none
|
||||
Returns: -ENOMEM if there is not enough free memory to start migration mode
|
||||
-EINVAL if the state of the VM is invalid (e.g. no memory defined)
|
||||
0 in case of success.
|
||||
|
||||
5.3. ATTRIBUTE: KVM_S390_VM_MIGRATION_STATUS (r/o)
|
||||
|
||||
Allows userspace to query the status of migration mode.
|
||||
|
||||
Parameters: address of a buffer in user space to store the data (u64) to;
|
||||
the data itself is either 0 if migration mode is disabled or 1
|
||||
if it is enabled
|
||||
Returns: -EFAULT if the given address is not accessible from kernel space
|
||||
0 in case of success.
|
||||
|
@ -59,7 +59,9 @@ union ctlreg0 {
|
||||
unsigned long lap : 1; /* Low-address-protection control */
|
||||
unsigned long : 4;
|
||||
unsigned long edat : 1; /* Enhanced-DAT-enablement control */
|
||||
unsigned long : 4;
|
||||
unsigned long : 2;
|
||||
unsigned long iep : 1; /* Instruction-Execution-Protection */
|
||||
unsigned long : 1;
|
||||
unsigned long afp : 1; /* AFP-register control */
|
||||
unsigned long vx : 1; /* Vector enablement control */
|
||||
unsigned long : 7;
|
||||
|
@ -45,6 +45,8 @@
|
||||
#define KVM_REQ_ENABLE_IBS 8
|
||||
#define KVM_REQ_DISABLE_IBS 9
|
||||
#define KVM_REQ_ICPT_OPEREXC 10
|
||||
#define KVM_REQ_START_MIGRATION 11
|
||||
#define KVM_REQ_STOP_MIGRATION 12
|
||||
|
||||
#define SIGP_CTRL_C 0x80
|
||||
#define SIGP_CTRL_SCN_MASK 0x3f
|
||||
@ -56,7 +58,7 @@ union bsca_sigp_ctrl {
|
||||
__u8 r : 1;
|
||||
__u8 scn : 6;
|
||||
};
|
||||
} __packed;
|
||||
};
|
||||
|
||||
union esca_sigp_ctrl {
|
||||
__u16 value;
|
||||
@ -65,14 +67,14 @@ union esca_sigp_ctrl {
|
||||
__u8 reserved: 7;
|
||||
__u8 scn;
|
||||
};
|
||||
} __packed;
|
||||
};
|
||||
|
||||
struct esca_entry {
|
||||
union esca_sigp_ctrl sigp_ctrl;
|
||||
__u16 reserved1[3];
|
||||
__u64 sda;
|
||||
__u64 reserved2[6];
|
||||
} __packed;
|
||||
};
|
||||
|
||||
struct bsca_entry {
|
||||
__u8 reserved0;
|
||||
@ -80,7 +82,7 @@ struct bsca_entry {
|
||||
__u16 reserved[3];
|
||||
__u64 sda;
|
||||
__u64 reserved2[2];
|
||||
} __attribute__((packed));
|
||||
};
|
||||
|
||||
union ipte_control {
|
||||
unsigned long val;
|
||||
@ -97,7 +99,7 @@ struct bsca_block {
|
||||
__u64 mcn;
|
||||
__u64 reserved2;
|
||||
struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS];
|
||||
} __attribute__((packed));
|
||||
};
|
||||
|
||||
struct esca_block {
|
||||
union ipte_control ipte_control;
|
||||
@ -105,7 +107,21 @@ struct esca_block {
|
||||
__u64 mcn[4];
|
||||
__u64 reserved2[20];
|
||||
struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];
|
||||
} __packed;
|
||||
};
|
||||
|
||||
/*
|
||||
* This struct is used to store some machine check info from lowcore
|
||||
* for machine checks that happen while the guest is running.
|
||||
* This info in host's lowcore might be overwritten by a second machine
|
||||
* check from host when host is in the machine check's high-level handling.
|
||||
* The size is 24 bytes.
|
||||
*/
|
||||
struct mcck_volatile_info {
|
||||
__u64 mcic;
|
||||
__u64 failing_storage_address;
|
||||
__u32 ext_damage_code;
|
||||
__u32 reserved;
|
||||
};
|
||||
|
||||
#define CPUSTAT_STOPPED 0x80000000
|
||||
#define CPUSTAT_WAIT 0x10000000
|
||||
@ -260,14 +276,15 @@ struct kvm_s390_sie_block {
|
||||
|
||||
struct kvm_s390_itdb {
|
||||
__u8 data[256];
|
||||
} __packed;
|
||||
};
|
||||
|
||||
struct sie_page {
|
||||
struct kvm_s390_sie_block sie_block;
|
||||
__u8 reserved200[1024]; /* 0x0200 */
|
||||
struct mcck_volatile_info mcck_info; /* 0x0200 */
|
||||
__u8 reserved218[1000]; /* 0x0218 */
|
||||
struct kvm_s390_itdb itdb; /* 0x0600 */
|
||||
__u8 reserved700[2304]; /* 0x0700 */
|
||||
} __packed;
|
||||
};
|
||||
|
||||
struct kvm_vcpu_stat {
|
||||
u64 exit_userspace;
|
||||
@ -681,7 +698,7 @@ struct sie_page2 {
|
||||
__u64 fac_list[S390_ARCH_FAC_LIST_SIZE_U64]; /* 0x0000 */
|
||||
struct kvm_s390_crypto_cb crycb; /* 0x0800 */
|
||||
u8 reserved900[0x1000 - 0x900]; /* 0x0900 */
|
||||
} __packed;
|
||||
};
|
||||
|
||||
struct kvm_s390_vsie {
|
||||
struct mutex mutex;
|
||||
@ -691,6 +708,12 @@ struct kvm_s390_vsie {
|
||||
struct page *pages[KVM_MAX_VCPUS];
|
||||
};
|
||||
|
||||
struct kvm_s390_migration_state {
|
||||
unsigned long bitmap_size; /* in bits (number of guest pages) */
|
||||
atomic64_t dirty_pages; /* number of dirty pages */
|
||||
unsigned long *pgste_bitmap;
|
||||
};
|
||||
|
||||
struct kvm_arch{
|
||||
void *sca;
|
||||
int use_esca;
|
||||
@ -718,6 +741,7 @@ struct kvm_arch{
|
||||
struct kvm_s390_crypto crypto;
|
||||
struct kvm_s390_vsie vsie;
|
||||
u64 epoch;
|
||||
struct kvm_s390_migration_state *migration_state;
|
||||
/* subset of available cpu features enabled by user space */
|
||||
DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
|
||||
};
|
||||
|
@ -14,11 +14,24 @@
|
||||
#include <linux/const.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#define MCIC_SUBCLASS_MASK (1ULL<<63 | 1ULL<<62 | 1ULL<<61 | \
|
||||
1ULL<<59 | 1ULL<<58 | 1ULL<<56 | \
|
||||
1ULL<<55 | 1ULL<<54 | 1ULL<<53 | \
|
||||
1ULL<<52 | 1ULL<<47 | 1ULL<<46 | \
|
||||
1ULL<<45 | 1ULL<<44)
|
||||
#define MCCK_CODE_SYSTEM_DAMAGE _BITUL(63)
|
||||
#define MCCK_CODE_EXT_DAMAGE _BITUL(63 - 5)
|
||||
#define MCCK_CODE_CP _BITUL(63 - 9)
|
||||
#define MCCK_CODE_CPU_TIMER_VALID _BITUL(63 - 46)
|
||||
#define MCCK_CODE_PSW_MWP_VALID _BITUL(63 - 20)
|
||||
#define MCCK_CODE_PSW_IA_VALID _BITUL(63 - 23)
|
||||
|
||||
#define MCCK_CR14_CR_PENDING_SUB_MASK (1 << 28)
|
||||
#define MCCK_CR14_RECOVERY_SUB_MASK (1 << 27)
|
||||
#define MCCK_CR14_DEGRAD_SUB_MASK (1 << 26)
|
||||
#define MCCK_CR14_EXT_DAMAGE_SUB_MASK (1 << 25)
|
||||
#define MCCK_CR14_WARN_SUB_MASK (1 << 24)
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
union mci {
|
||||
|
@ -20,6 +20,7 @@
|
||||
#define CIF_FPU 4 /* restore FPU registers */
|
||||
#define CIF_IGNORE_IRQ 5 /* ignore interrupt (for udelay) */
|
||||
#define CIF_ENABLED_WAIT 6 /* in enabled wait state */
|
||||
#define CIF_MCCK_GUEST 7 /* machine check happening in guest */
|
||||
|
||||
#define _CIF_MCCK_PENDING _BITUL(CIF_MCCK_PENDING)
|
||||
#define _CIF_ASCE_PRIMARY _BITUL(CIF_ASCE_PRIMARY)
|
||||
@ -28,6 +29,7 @@
|
||||
#define _CIF_FPU _BITUL(CIF_FPU)
|
||||
#define _CIF_IGNORE_IRQ _BITUL(CIF_IGNORE_IRQ)
|
||||
#define _CIF_ENABLED_WAIT _BITUL(CIF_ENABLED_WAIT)
|
||||
#define _CIF_MCCK_GUEST _BITUL(CIF_MCCK_GUEST)
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
|
@ -28,6 +28,7 @@
|
||||
#define KVM_DEV_FLIC_CLEAR_IO_IRQ 8
|
||||
#define KVM_DEV_FLIC_AISM 9
|
||||
#define KVM_DEV_FLIC_AIRQ_INJECT 10
|
||||
#define KVM_DEV_FLIC_AISM_ALL 11
|
||||
/*
|
||||
* We can have up to 4*64k pending subchannels + 8 adapter interrupts,
|
||||
* as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts.
|
||||
@ -53,6 +54,11 @@ struct kvm_s390_ais_req {
|
||||
__u16 mode;
|
||||
};
|
||||
|
||||
struct kvm_s390_ais_all {
|
||||
__u8 simm;
|
||||
__u8 nimm;
|
||||
};
|
||||
|
||||
#define KVM_S390_IO_ADAPTER_MASK 1
|
||||
#define KVM_S390_IO_ADAPTER_MAP 2
|
||||
#define KVM_S390_IO_ADAPTER_UNMAP 3
|
||||
@ -70,6 +76,7 @@ struct kvm_s390_io_adapter_req {
|
||||
#define KVM_S390_VM_TOD 1
|
||||
#define KVM_S390_VM_CRYPTO 2
|
||||
#define KVM_S390_VM_CPU_MODEL 3
|
||||
#define KVM_S390_VM_MIGRATION 4
|
||||
|
||||
/* kvm attributes for mem_ctrl */
|
||||
#define KVM_S390_VM_MEM_ENABLE_CMMA 0
|
||||
@ -151,6 +158,11 @@ struct kvm_s390_vm_cpu_subfunc {
|
||||
#define KVM_S390_VM_CRYPTO_DISABLE_AES_KW 2
|
||||
#define KVM_S390_VM_CRYPTO_DISABLE_DEA_KW 3
|
||||
|
||||
/* kvm attributes for migration mode */
|
||||
#define KVM_S390_VM_MIGRATION_STOP 0
|
||||
#define KVM_S390_VM_MIGRATION_START 1
|
||||
#define KVM_S390_VM_MIGRATION_STATUS 2
|
||||
|
||||
/* for KVM_GET_REGS and KVM_SET_REGS */
|
||||
struct kvm_regs {
|
||||
/* general purpose regs for s390 */
|
||||
|
@ -58,6 +58,9 @@ int main(void)
|
||||
OFFSET(__SF_BACKCHAIN, stack_frame, back_chain);
|
||||
OFFSET(__SF_GPRS, stack_frame, gprs);
|
||||
OFFSET(__SF_EMPTY, stack_frame, empty1);
|
||||
OFFSET(__SF_SIE_CONTROL, stack_frame, empty1[0]);
|
||||
OFFSET(__SF_SIE_SAVEAREA, stack_frame, empty1[1]);
|
||||
OFFSET(__SF_SIE_REASON, stack_frame, empty1[2]);
|
||||
BLANK();
|
||||
/* timeval/timezone offsets for use by vdso */
|
||||
OFFSET(__VDSO_UPD_COUNT, vdso_data, tb_update_count);
|
||||
|
@ -225,6 +225,7 @@ ENTRY(sie64a)
|
||||
jnz .Lsie_skip
|
||||
TSTMSK __LC_CPU_FLAGS,_CIF_FPU
|
||||
jo .Lsie_skip # exit if fp/vx regs changed
|
||||
.Lsie_entry:
|
||||
sie 0(%r14)
|
||||
.Lsie_skip:
|
||||
ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
|
||||
@ -1104,7 +1105,13 @@ cleanup_critical:
|
||||
.quad .Lsie_done
|
||||
|
||||
.Lcleanup_sie:
|
||||
lg %r9,__SF_EMPTY(%r15) # get control block pointer
|
||||
cghi %r11,__LC_SAVE_AREA_ASYNC #Is this in normal interrupt?
|
||||
je 1f
|
||||
slg %r9,BASED(.Lsie_crit_mcck_start)
|
||||
clg %r9,BASED(.Lsie_crit_mcck_length)
|
||||
jh 1f
|
||||
oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
|
||||
1: lg %r9,__SF_EMPTY(%r15) # get control block pointer
|
||||
ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
|
||||
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
|
||||
larl %r9,sie_exit # skip forward to sie_exit
|
||||
@ -1289,6 +1296,10 @@ cleanup_critical:
|
||||
.quad .Lsie_gmap
|
||||
.Lsie_critical_length:
|
||||
.quad .Lsie_done - .Lsie_gmap
|
||||
.Lsie_crit_mcck_start:
|
||||
.quad .Lsie_entry
|
||||
.Lsie_crit_mcck_length:
|
||||
.quad .Lsie_skip - .Lsie_entry
|
||||
#endif
|
||||
|
||||
.section .rodata, "a"
|
||||
|
@ -25,6 +25,8 @@
|
||||
#include <asm/crw.h>
|
||||
#include <asm/switch_to.h>
|
||||
#include <asm/ctl_reg.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <linux/kvm_host.h>
|
||||
|
||||
struct mcck_struct {
|
||||
unsigned int kill_task : 1;
|
||||
@ -274,12 +276,39 @@ static int notrace s390_validate_registers(union mci mci, int umode)
|
||||
return kill_task;
|
||||
}
|
||||
|
||||
/*
|
||||
* Backup the guest's machine check info to its description block
|
||||
*/
|
||||
static void notrace s390_backup_mcck_info(struct pt_regs *regs)
|
||||
{
|
||||
struct mcck_volatile_info *mcck_backup;
|
||||
struct sie_page *sie_page;
|
||||
|
||||
/* r14 contains the sie block, which was set in sie64a */
|
||||
struct kvm_s390_sie_block *sie_block =
|
||||
(struct kvm_s390_sie_block *) regs->gprs[14];
|
||||
|
||||
if (sie_block == NULL)
|
||||
/* Something's seriously wrong, stop system. */
|
||||
s390_handle_damage();
|
||||
|
||||
sie_page = container_of(sie_block, struct sie_page, sie_block);
|
||||
mcck_backup = &sie_page->mcck_info;
|
||||
mcck_backup->mcic = S390_lowcore.mcck_interruption_code &
|
||||
~(MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE);
|
||||
mcck_backup->ext_damage_code = S390_lowcore.external_damage_code;
|
||||
mcck_backup->failing_storage_address
|
||||
= S390_lowcore.failing_storage_address;
|
||||
}
|
||||
|
||||
#define MAX_IPD_COUNT 29
|
||||
#define MAX_IPD_TIME (5 * 60 * USEC_PER_SEC) /* 5 minutes */
|
||||
|
||||
#define ED_STP_ISLAND 6 /* External damage STP island check */
|
||||
#define ED_STP_SYNC 7 /* External damage STP sync check */
|
||||
|
||||
#define MCCK_CODE_NO_GUEST (MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE)
|
||||
|
||||
/*
|
||||
* machine check handler.
|
||||
*/
|
||||
@ -291,6 +320,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
|
||||
struct mcck_struct *mcck;
|
||||
unsigned long long tmp;
|
||||
union mci mci;
|
||||
unsigned long mcck_dam_code;
|
||||
|
||||
nmi_enter();
|
||||
inc_irq_stat(NMI_NMI);
|
||||
@ -301,7 +331,13 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
|
||||
/* System damage -> stopping machine */
|
||||
s390_handle_damage();
|
||||
}
|
||||
if (mci.pd) {
|
||||
|
||||
/*
|
||||
* Reinject the instruction processing damages' machine checks
|
||||
* including Delayed Access Exception into the guest
|
||||
* instead of damaging the host if they happen in the guest.
|
||||
*/
|
||||
if (mci.pd && !test_cpu_flag(CIF_MCCK_GUEST)) {
|
||||
if (mci.b) {
|
||||
/* Processing backup -> verify if we can survive this */
|
||||
u64 z_mcic, o_mcic, t_mcic;
|
||||
@ -345,6 +381,14 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
|
||||
mcck->mcck_code = mci.val;
|
||||
set_cpu_flag(CIF_MCCK_PENDING);
|
||||
}
|
||||
|
||||
/*
|
||||
* Backup the machine check's info if it happens when the guest
|
||||
* is running.
|
||||
*/
|
||||
if (test_cpu_flag(CIF_MCCK_GUEST))
|
||||
s390_backup_mcck_info(regs);
|
||||
|
||||
if (mci.cd) {
|
||||
/* Timing facility damage */
|
||||
s390_handle_damage();
|
||||
@ -358,15 +402,22 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
|
||||
if (mcck->stp_queue)
|
||||
set_cpu_flag(CIF_MCCK_PENDING);
|
||||
}
|
||||
if (mci.se)
|
||||
/* Storage error uncorrected */
|
||||
s390_handle_damage();
|
||||
if (mci.ke)
|
||||
/* Storage key-error uncorrected */
|
||||
s390_handle_damage();
|
||||
if (mci.ds && mci.fa)
|
||||
/* Storage degradation */
|
||||
s390_handle_damage();
|
||||
|
||||
/*
|
||||
* Reinject storage related machine checks into the guest if they
|
||||
* happen when the guest is running.
|
||||
*/
|
||||
if (!test_cpu_flag(CIF_MCCK_GUEST)) {
|
||||
if (mci.se)
|
||||
/* Storage error uncorrected */
|
||||
s390_handle_damage();
|
||||
if (mci.ke)
|
||||
/* Storage key-error uncorrected */
|
||||
s390_handle_damage();
|
||||
if (mci.ds && mci.fa)
|
||||
/* Storage degradation */
|
||||
s390_handle_damage();
|
||||
}
|
||||
if (mci.cp) {
|
||||
/* Channel report word pending */
|
||||
mcck->channel_report = 1;
|
||||
@ -377,6 +428,19 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
|
||||
mcck->warning = 1;
|
||||
set_cpu_flag(CIF_MCCK_PENDING);
|
||||
}
|
||||
|
||||
/*
|
||||
* If there are only Channel Report Pending and External Damage
|
||||
* machine checks, they will not be reinjected into the guest
|
||||
* because they refer to host conditions only.
|
||||
*/
|
||||
mcck_dam_code = (mci.val & MCIC_SUBCLASS_MASK);
|
||||
if (test_cpu_flag(CIF_MCCK_GUEST) &&
|
||||
(mcck_dam_code & MCCK_CODE_NO_GUEST) != mcck_dam_code) {
|
||||
/* Set exit reason code for host's later handling */
|
||||
*((long *)(regs->gprs[15] + __SF_SIE_REASON)) = -EINTR;
|
||||
}
|
||||
clear_cpu_flag(CIF_MCCK_GUEST);
|
||||
nmi_exit();
|
||||
}
|
||||
|
||||
|
@ -89,7 +89,7 @@ struct region3_table_entry_fc1 {
|
||||
unsigned long f : 1; /* Fetch-Protection Bit */
|
||||
unsigned long fc : 1; /* Format-Control */
|
||||
unsigned long p : 1; /* DAT-Protection Bit */
|
||||
unsigned long co : 1; /* Change-Recording Override */
|
||||
unsigned long iep: 1; /* Instruction-Execution-Protection */
|
||||
unsigned long : 2;
|
||||
unsigned long i : 1; /* Region-Invalid Bit */
|
||||
unsigned long cr : 1; /* Common-Region Bit */
|
||||
@ -131,7 +131,7 @@ struct segment_entry_fc1 {
|
||||
unsigned long f : 1; /* Fetch-Protection Bit */
|
||||
unsigned long fc : 1; /* Format-Control */
|
||||
unsigned long p : 1; /* DAT-Protection Bit */
|
||||
unsigned long co : 1; /* Change-Recording Override */
|
||||
unsigned long iep: 1; /* Instruction-Execution-Protection */
|
||||
unsigned long : 2;
|
||||
unsigned long i : 1; /* Segment-Invalid Bit */
|
||||
unsigned long cs : 1; /* Common-Segment Bit */
|
||||
@ -168,7 +168,8 @@ union page_table_entry {
|
||||
unsigned long z : 1; /* Zero Bit */
|
||||
unsigned long i : 1; /* Page-Invalid Bit */
|
||||
unsigned long p : 1; /* DAT-Protection Bit */
|
||||
unsigned long : 9;
|
||||
unsigned long iep: 1; /* Instruction-Execution-Protection */
|
||||
unsigned long : 8;
|
||||
};
|
||||
};
|
||||
|
||||
@ -241,7 +242,7 @@ struct ale {
|
||||
unsigned long asteo : 25; /* ASN-Second-Table-Entry Origin */
|
||||
unsigned long : 6;
|
||||
unsigned long astesn : 32; /* ASTE Sequence Number */
|
||||
} __packed;
|
||||
};
|
||||
|
||||
struct aste {
|
||||
unsigned long i : 1; /* ASX-Invalid Bit */
|
||||
@ -257,7 +258,7 @@ struct aste {
|
||||
unsigned long ald : 32;
|
||||
unsigned long astesn : 32;
|
||||
/* .. more fields there */
|
||||
} __packed;
|
||||
};
|
||||
|
||||
int ipte_lock_held(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
@ -485,6 +486,7 @@ enum prot_type {
|
||||
PROT_TYPE_KEYC = 1,
|
||||
PROT_TYPE_ALC = 2,
|
||||
PROT_TYPE_DAT = 3,
|
||||
PROT_TYPE_IEP = 4,
|
||||
};
|
||||
|
||||
static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
|
||||
@ -500,6 +502,9 @@ static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
|
||||
switch (code) {
|
||||
case PGM_PROTECTION:
|
||||
switch (prot) {
|
||||
case PROT_TYPE_IEP:
|
||||
tec->b61 = 1;
|
||||
/* FALL THROUGH */
|
||||
case PROT_TYPE_LA:
|
||||
tec->b56 = 1;
|
||||
break;
|
||||
@ -591,6 +596,7 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
|
||||
* @gpa: points to where guest physical (absolute) address should be stored
|
||||
* @asce: effective asce
|
||||
* @mode: indicates the access mode to be used
|
||||
* @prot: returns the type for protection exceptions
|
||||
*
|
||||
* Translate a guest virtual address into a guest absolute address by means
|
||||
* of dynamic address translation as specified by the architecture.
|
||||
@ -606,19 +612,21 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
|
||||
*/
|
||||
static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
|
||||
unsigned long *gpa, const union asce asce,
|
||||
enum gacc_mode mode)
|
||||
enum gacc_mode mode, enum prot_type *prot)
|
||||
{
|
||||
union vaddress vaddr = {.addr = gva};
|
||||
union raddress raddr = {.addr = gva};
|
||||
union page_table_entry pte;
|
||||
int dat_protection = 0;
|
||||
int iep_protection = 0;
|
||||
union ctlreg0 ctlreg0;
|
||||
unsigned long ptr;
|
||||
int edat1, edat2;
|
||||
int edat1, edat2, iep;
|
||||
|
||||
ctlreg0.val = vcpu->arch.sie_block->gcr[0];
|
||||
edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8);
|
||||
edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78);
|
||||
iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130);
|
||||
if (asce.r)
|
||||
goto real_address;
|
||||
ptr = asce.origin * 4096;
|
||||
@ -702,6 +710,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
|
||||
return PGM_TRANSLATION_SPEC;
|
||||
if (rtte.fc && edat2) {
|
||||
dat_protection |= rtte.fc1.p;
|
||||
iep_protection = rtte.fc1.iep;
|
||||
raddr.rfaa = rtte.fc1.rfaa;
|
||||
goto absolute_address;
|
||||
}
|
||||
@ -729,6 +738,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
|
||||
return PGM_TRANSLATION_SPEC;
|
||||
if (ste.fc && edat1) {
|
||||
dat_protection |= ste.fc1.p;
|
||||
iep_protection = ste.fc1.iep;
|
||||
raddr.sfaa = ste.fc1.sfaa;
|
||||
goto absolute_address;
|
||||
}
|
||||
@ -745,12 +755,19 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
|
||||
if (pte.z)
|
||||
return PGM_TRANSLATION_SPEC;
|
||||
dat_protection |= pte.p;
|
||||
iep_protection = pte.iep;
|
||||
raddr.pfra = pte.pfra;
|
||||
real_address:
|
||||
raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr);
|
||||
absolute_address:
|
||||
if (mode == GACC_STORE && dat_protection)
|
||||
if (mode == GACC_STORE && dat_protection) {
|
||||
*prot = PROT_TYPE_DAT;
|
||||
return PGM_PROTECTION;
|
||||
}
|
||||
if (mode == GACC_IFETCH && iep_protection && iep) {
|
||||
*prot = PROT_TYPE_IEP;
|
||||
return PGM_PROTECTION;
|
||||
}
|
||||
if (kvm_is_error_gpa(vcpu->kvm, raddr.addr))
|
||||
return PGM_ADDRESSING;
|
||||
*gpa = raddr.addr;
|
||||
@ -782,6 +799,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
|
||||
{
|
||||
psw_t *psw = &vcpu->arch.sie_block->gpsw;
|
||||
int lap_enabled, rc = 0;
|
||||
enum prot_type prot;
|
||||
|
||||
lap_enabled = low_address_protection_enabled(vcpu, asce);
|
||||
while (nr_pages) {
|
||||
@ -791,7 +809,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
|
||||
PROT_TYPE_LA);
|
||||
ga &= PAGE_MASK;
|
||||
if (psw_bits(*psw).t) {
|
||||
rc = guest_translate(vcpu, ga, pages, asce, mode);
|
||||
rc = guest_translate(vcpu, ga, pages, asce, mode, &prot);
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
} else {
|
||||
@ -800,7 +818,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
|
||||
rc = PGM_ADDRESSING;
|
||||
}
|
||||
if (rc)
|
||||
return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_DAT);
|
||||
return trans_exc(vcpu, rc, ga, ar, mode, prot);
|
||||
ga += PAGE_SIZE;
|
||||
pages++;
|
||||
nr_pages--;
|
||||
@ -886,6 +904,7 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
|
||||
unsigned long *gpa, enum gacc_mode mode)
|
||||
{
|
||||
psw_t *psw = &vcpu->arch.sie_block->gpsw;
|
||||
enum prot_type prot;
|
||||
union asce asce;
|
||||
int rc;
|
||||
|
||||
@ -900,9 +919,9 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
|
||||
}
|
||||
|
||||
if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */
|
||||
rc = guest_translate(vcpu, gva, gpa, asce, mode);
|
||||
rc = guest_translate(vcpu, gva, gpa, asce, mode, &prot);
|
||||
if (rc > 0)
|
||||
return trans_exc(vcpu, rc, gva, 0, mode, PROT_TYPE_DAT);
|
||||
return trans_exc(vcpu, rc, gva, 0, mode, prot);
|
||||
} else {
|
||||
*gpa = kvm_s390_real_to_abs(vcpu, gva);
|
||||
if (kvm_is_error_gpa(vcpu->kvm, *gpa))
|
||||
|
@ -251,8 +251,13 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
|
||||
__clear_bit(IRQ_PEND_EXT_SERVICE, &active_mask);
|
||||
if (psw_mchk_disabled(vcpu))
|
||||
active_mask &= ~IRQ_PEND_MCHK_MASK;
|
||||
/*
|
||||
* Check both floating and local interrupt's cr14 because
|
||||
* bit IRQ_PEND_MCHK_REP could be set in both cases.
|
||||
*/
|
||||
if (!(vcpu->arch.sie_block->gcr[14] &
|
||||
vcpu->kvm->arch.float_int.mchk.cr14))
|
||||
(vcpu->kvm->arch.float_int.mchk.cr14 |
|
||||
vcpu->arch.local_int.irq.mchk.cr14)))
|
||||
__clear_bit(IRQ_PEND_MCHK_REP, &active_mask);
|
||||
|
||||
/*
|
||||
@ -1876,6 +1881,28 @@ out:
|
||||
return ret < 0 ? ret : n;
|
||||
}
|
||||
|
||||
static int flic_ais_mode_get_all(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
|
||||
struct kvm_s390_ais_all ais;
|
||||
|
||||
if (attr->attr < sizeof(ais))
|
||||
return -EINVAL;
|
||||
|
||||
if (!test_kvm_facility(kvm, 72))
|
||||
return -ENOTSUPP;
|
||||
|
||||
mutex_lock(&fi->ais_lock);
|
||||
ais.simm = fi->simm;
|
||||
ais.nimm = fi->nimm;
|
||||
mutex_unlock(&fi->ais_lock);
|
||||
|
||||
if (copy_to_user((void __user *)attr->addr, &ais, sizeof(ais)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
|
||||
{
|
||||
int r;
|
||||
@ -1885,6 +1912,9 @@ static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
|
||||
r = get_all_floating_irqs(dev->kvm, (u8 __user *) attr->addr,
|
||||
attr->attr);
|
||||
break;
|
||||
case KVM_DEV_FLIC_AISM_ALL:
|
||||
r = flic_ais_mode_get_all(dev->kvm, attr);
|
||||
break;
|
||||
default:
|
||||
r = -EINVAL;
|
||||
}
|
||||
@ -2235,6 +2265,25 @@ static int flic_inject_airq(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
return kvm_s390_inject_airq(kvm, adapter);
|
||||
}
|
||||
|
||||
static int flic_ais_mode_set_all(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
|
||||
struct kvm_s390_ais_all ais;
|
||||
|
||||
if (!test_kvm_facility(kvm, 72))
|
||||
return -ENOTSUPP;
|
||||
|
||||
if (copy_from_user(&ais, (void __user *)attr->addr, sizeof(ais)))
|
||||
return -EFAULT;
|
||||
|
||||
mutex_lock(&fi->ais_lock);
|
||||
fi->simm = ais.simm;
|
||||
fi->nimm = ais.nimm;
|
||||
mutex_unlock(&fi->ais_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
|
||||
{
|
||||
int r = 0;
|
||||
@ -2277,6 +2326,9 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
|
||||
case KVM_DEV_FLIC_AIRQ_INJECT:
|
||||
r = flic_inject_airq(dev->kvm, attr);
|
||||
break;
|
||||
case KVM_DEV_FLIC_AISM_ALL:
|
||||
r = flic_ais_mode_set_all(dev->kvm, attr);
|
||||
break;
|
||||
default:
|
||||
r = -EINVAL;
|
||||
}
|
||||
@ -2298,6 +2350,7 @@ static int flic_has_attr(struct kvm_device *dev,
|
||||
case KVM_DEV_FLIC_CLEAR_IO_IRQ:
|
||||
case KVM_DEV_FLIC_AISM:
|
||||
case KVM_DEV_FLIC_AIRQ_INJECT:
|
||||
case KVM_DEV_FLIC_AISM_ALL:
|
||||
return 0;
|
||||
}
|
||||
return -ENXIO;
|
||||
@ -2415,6 +2468,42 @@ static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Inject the machine check to the guest.
|
||||
*/
|
||||
void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
|
||||
struct mcck_volatile_info *mcck_info)
|
||||
{
|
||||
struct kvm_s390_interrupt_info inti;
|
||||
struct kvm_s390_irq irq;
|
||||
struct kvm_s390_mchk_info *mchk;
|
||||
union mci mci;
|
||||
__u64 cr14 = 0; /* upper bits are not used */
|
||||
|
||||
mci.val = mcck_info->mcic;
|
||||
if (mci.sr)
|
||||
cr14 |= MCCK_CR14_RECOVERY_SUB_MASK;
|
||||
if (mci.dg)
|
||||
cr14 |= MCCK_CR14_DEGRAD_SUB_MASK;
|
||||
if (mci.w)
|
||||
cr14 |= MCCK_CR14_WARN_SUB_MASK;
|
||||
|
||||
mchk = mci.ck ? &inti.mchk : &irq.u.mchk;
|
||||
mchk->cr14 = cr14;
|
||||
mchk->mcic = mcck_info->mcic;
|
||||
mchk->ext_damage_code = mcck_info->ext_damage_code;
|
||||
mchk->failing_storage_address = mcck_info->failing_storage_address;
|
||||
if (mci.ck) {
|
||||
/* Inject the floating machine check */
|
||||
inti.type = KVM_S390_MCHK;
|
||||
WARN_ON_ONCE(__inject_vm(vcpu->kvm, &inti));
|
||||
} else {
|
||||
/* Inject the machine check to specified vcpu */
|
||||
irq.type = KVM_S390_MCHK;
|
||||
WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
|
||||
}
|
||||
}
|
||||
|
||||
int kvm_set_routing_entry(struct kvm *kvm,
|
||||
struct kvm_kernel_irq_routing_entry *e,
|
||||
const struct kvm_irq_routing_entry *ue)
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/string.h>
|
||||
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/lowcore.h>
|
||||
@ -386,6 +387,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
case KVM_CAP_S390_SKEYS:
|
||||
case KVM_CAP_S390_IRQ_STATE:
|
||||
case KVM_CAP_S390_USER_INSTR0:
|
||||
case KVM_CAP_S390_CMMA_MIGRATION:
|
||||
case KVM_CAP_S390_AIS:
|
||||
r = 1;
|
||||
break;
|
||||
@ -750,6 +752,129 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
|
||||
{
|
||||
int cx;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
kvm_for_each_vcpu(cx, vcpu, kvm)
|
||||
kvm_s390_sync_request(req, vcpu);
|
||||
}
|
||||
|
||||
/*
|
||||
* Must be called with kvm->srcu held to avoid races on memslots, and with
|
||||
* kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
|
||||
*/
|
||||
static int kvm_s390_vm_start_migration(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_s390_migration_state *mgs;
|
||||
struct kvm_memory_slot *ms;
|
||||
/* should be the only one */
|
||||
struct kvm_memslots *slots;
|
||||
unsigned long ram_pages;
|
||||
int slotnr;
|
||||
|
||||
/* migration mode already enabled */
|
||||
if (kvm->arch.migration_state)
|
||||
return 0;
|
||||
|
||||
slots = kvm_memslots(kvm);
|
||||
if (!slots || !slots->used_slots)
|
||||
return -EINVAL;
|
||||
|
||||
mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
|
||||
if (!mgs)
|
||||
return -ENOMEM;
|
||||
kvm->arch.migration_state = mgs;
|
||||
|
||||
if (kvm->arch.use_cmma) {
|
||||
/*
|
||||
* Get the last slot. They should be sorted by base_gfn, so the
|
||||
* last slot is also the one at the end of the address space.
|
||||
* We have verified above that at least one slot is present.
|
||||
*/
|
||||
ms = slots->memslots + slots->used_slots - 1;
|
||||
/* round up so we only use full longs */
|
||||
ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
|
||||
/* allocate enough bytes to store all the bits */
|
||||
mgs->pgste_bitmap = vmalloc(ram_pages / 8);
|
||||
if (!mgs->pgste_bitmap) {
|
||||
kfree(mgs);
|
||||
kvm->arch.migration_state = NULL;
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
mgs->bitmap_size = ram_pages;
|
||||
atomic64_set(&mgs->dirty_pages, ram_pages);
|
||||
/* mark all the pages in active slots as dirty */
|
||||
for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
|
||||
ms = slots->memslots + slotnr;
|
||||
bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
|
||||
}
|
||||
|
||||
kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Must be called with kvm->lock to avoid races with ourselves and
|
||||
* kvm_s390_vm_start_migration.
|
||||
*/
|
||||
static int kvm_s390_vm_stop_migration(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_s390_migration_state *mgs;
|
||||
|
||||
/* migration mode already disabled */
|
||||
if (!kvm->arch.migration_state)
|
||||
return 0;
|
||||
mgs = kvm->arch.migration_state;
|
||||
kvm->arch.migration_state = NULL;
|
||||
|
||||
if (kvm->arch.use_cmma) {
|
||||
kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
|
||||
vfree(mgs->pgste_bitmap);
|
||||
}
|
||||
kfree(mgs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_s390_vm_set_migration(struct kvm *kvm,
|
||||
struct kvm_device_attr *attr)
|
||||
{
|
||||
int idx, res = -ENXIO;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
switch (attr->attr) {
|
||||
case KVM_S390_VM_MIGRATION_START:
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
res = kvm_s390_vm_start_migration(kvm);
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
break;
|
||||
case KVM_S390_VM_MIGRATION_STOP:
|
||||
res = kvm_s390_vm_stop_migration(kvm);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
mutex_unlock(&kvm->lock);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static int kvm_s390_vm_get_migration(struct kvm *kvm,
|
||||
struct kvm_device_attr *attr)
|
||||
{
|
||||
u64 mig = (kvm->arch.migration_state != NULL);
|
||||
|
||||
if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
|
||||
return -ENXIO;
|
||||
|
||||
if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
u8 gtod_high;
|
||||
@ -1090,6 +1215,9 @@ static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
case KVM_S390_VM_CRYPTO:
|
||||
ret = kvm_s390_vm_set_crypto(kvm, attr);
|
||||
break;
|
||||
case KVM_S390_VM_MIGRATION:
|
||||
ret = kvm_s390_vm_set_migration(kvm, attr);
|
||||
break;
|
||||
default:
|
||||
ret = -ENXIO;
|
||||
break;
|
||||
@ -1112,6 +1240,9 @@ static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
case KVM_S390_VM_CPU_MODEL:
|
||||
ret = kvm_s390_get_cpu_model(kvm, attr);
|
||||
break;
|
||||
case KVM_S390_VM_MIGRATION:
|
||||
ret = kvm_s390_vm_get_migration(kvm, attr);
|
||||
break;
|
||||
default:
|
||||
ret = -ENXIO;
|
||||
break;
|
||||
@ -1179,6 +1310,9 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case KVM_S390_VM_MIGRATION:
|
||||
ret = 0;
|
||||
break;
|
||||
default:
|
||||
ret = -ENXIO;
|
||||
break;
|
||||
@ -1286,6 +1420,182 @@ out:
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* Base address and length must be sent at the start of each block, therefore
|
||||
* it's cheaper to send some clean data, as long as it's less than the size of
|
||||
* two longs.
|
||||
*/
|
||||
#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
|
||||
/* for consistency */
|
||||
#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
|
||||
|
||||
/*
|
||||
* This function searches for the next page with dirty CMMA attributes, and
|
||||
* saves the attributes in the buffer up to either the end of the buffer or
|
||||
* until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
|
||||
* no trailing clean bytes are saved.
|
||||
* In case no dirty bits were found, or if CMMA was not enabled or used, the
|
||||
* output buffer will indicate 0 as length.
|
||||
*/
|
||||
static int kvm_s390_get_cmma_bits(struct kvm *kvm,
|
||||
struct kvm_s390_cmma_log *args)
|
||||
{
|
||||
struct kvm_s390_migration_state *s = kvm->arch.migration_state;
|
||||
unsigned long bufsize, hva, pgstev, i, next, cur;
|
||||
int srcu_idx, peek, r = 0, rr;
|
||||
u8 *res;
|
||||
|
||||
cur = args->start_gfn;
|
||||
i = next = pgstev = 0;
|
||||
|
||||
if (unlikely(!kvm->arch.use_cmma))
|
||||
return -ENXIO;
|
||||
/* Invalid/unsupported flags were specified */
|
||||
if (args->flags & ~KVM_S390_CMMA_PEEK)
|
||||
return -EINVAL;
|
||||
/* Migration mode query, and we are not doing a migration */
|
||||
peek = !!(args->flags & KVM_S390_CMMA_PEEK);
|
||||
if (!peek && !s)
|
||||
return -EINVAL;
|
||||
/* CMMA is disabled or was not used, or the buffer has length zero */
|
||||
bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
|
||||
if (!bufsize || !kvm->mm->context.use_cmma) {
|
||||
memset(args, 0, sizeof(*args));
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!peek) {
|
||||
/* We are not peeking, and there are no dirty pages */
|
||||
if (!atomic64_read(&s->dirty_pages)) {
|
||||
memset(args, 0, sizeof(*args));
|
||||
return 0;
|
||||
}
|
||||
cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
|
||||
args->start_gfn);
|
||||
if (cur >= s->bitmap_size) /* nothing found, loop back */
|
||||
cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
|
||||
if (cur >= s->bitmap_size) { /* again! (very unlikely) */
|
||||
memset(args, 0, sizeof(*args));
|
||||
return 0;
|
||||
}
|
||||
next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
|
||||
}
|
||||
|
||||
res = vmalloc(bufsize);
|
||||
if (!res)
|
||||
return -ENOMEM;
|
||||
|
||||
args->start_gfn = cur;
|
||||
|
||||
down_read(&kvm->mm->mmap_sem);
|
||||
srcu_idx = srcu_read_lock(&kvm->srcu);
|
||||
while (i < bufsize) {
|
||||
hva = gfn_to_hva(kvm, cur);
|
||||
if (kvm_is_error_hva(hva)) {
|
||||
r = -EFAULT;
|
||||
break;
|
||||
}
|
||||
/* decrement only if we actually flipped the bit to 0 */
|
||||
if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
|
||||
atomic64_dec(&s->dirty_pages);
|
||||
r = get_pgste(kvm->mm, hva, &pgstev);
|
||||
if (r < 0)
|
||||
pgstev = 0;
|
||||
/* save the value */
|
||||
res[i++] = (pgstev >> 24) & 0x3;
|
||||
/*
|
||||
* if the next bit is too far away, stop.
|
||||
* if we reached the previous "next", find the next one
|
||||
*/
|
||||
if (!peek) {
|
||||
if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
|
||||
break;
|
||||
if (cur == next)
|
||||
next = find_next_bit(s->pgste_bitmap,
|
||||
s->bitmap_size, cur + 1);
|
||||
/* reached the end of the bitmap or of the buffer, stop */
|
||||
if ((next >= s->bitmap_size) ||
|
||||
(next >= args->start_gfn + bufsize))
|
||||
break;
|
||||
}
|
||||
cur++;
|
||||
}
|
||||
srcu_read_unlock(&kvm->srcu, srcu_idx);
|
||||
up_read(&kvm->mm->mmap_sem);
|
||||
args->count = i;
|
||||
args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
|
||||
|
||||
rr = copy_to_user((void __user *)args->values, res, args->count);
|
||||
if (rr)
|
||||
r = -EFAULT;
|
||||
|
||||
vfree(res);
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function sets the CMMA attributes for the given pages. If the input
|
||||
* buffer has zero length, no action is taken, otherwise the attributes are
|
||||
* set and the mm->context.use_cmma flag is set.
|
||||
*/
|
||||
static int kvm_s390_set_cmma_bits(struct kvm *kvm,
|
||||
const struct kvm_s390_cmma_log *args)
|
||||
{
|
||||
unsigned long hva, mask, pgstev, i;
|
||||
uint8_t *bits;
|
||||
int srcu_idx, r = 0;
|
||||
|
||||
mask = args->mask;
|
||||
|
||||
if (!kvm->arch.use_cmma)
|
||||
return -ENXIO;
|
||||
/* invalid/unsupported flags */
|
||||
if (args->flags != 0)
|
||||
return -EINVAL;
|
||||
/* Enforce sane limit on memory allocation */
|
||||
if (args->count > KVM_S390_CMMA_SIZE_MAX)
|
||||
return -EINVAL;
|
||||
/* Nothing to do */
|
||||
if (args->count == 0)
|
||||
return 0;
|
||||
|
||||
bits = vmalloc(sizeof(*bits) * args->count);
|
||||
if (!bits)
|
||||
return -ENOMEM;
|
||||
|
||||
r = copy_from_user(bits, (void __user *)args->values, args->count);
|
||||
if (r) {
|
||||
r = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
down_read(&kvm->mm->mmap_sem);
|
||||
srcu_idx = srcu_read_lock(&kvm->srcu);
|
||||
for (i = 0; i < args->count; i++) {
|
||||
hva = gfn_to_hva(kvm, args->start_gfn + i);
|
||||
if (kvm_is_error_hva(hva)) {
|
||||
r = -EFAULT;
|
||||
break;
|
||||
}
|
||||
|
||||
pgstev = bits[i];
|
||||
pgstev = pgstev << 24;
|
||||
mask &= _PGSTE_GPS_USAGE_MASK;
|
||||
set_pgste_bits(kvm->mm, hva, mask, pgstev);
|
||||
}
|
||||
srcu_read_unlock(&kvm->srcu, srcu_idx);
|
||||
up_read(&kvm->mm->mmap_sem);
|
||||
|
||||
if (!kvm->mm->context.use_cmma) {
|
||||
down_write(&kvm->mm->mmap_sem);
|
||||
kvm->mm->context.use_cmma = 1;
|
||||
up_write(&kvm->mm->mmap_sem);
|
||||
}
|
||||
out:
|
||||
vfree(bits);
|
||||
return r;
|
||||
}
|
||||
|
||||
long kvm_arch_vm_ioctl(struct file *filp,
|
||||
unsigned int ioctl, unsigned long arg)
|
||||
{
|
||||
@ -1364,6 +1674,29 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
r = kvm_s390_set_skeys(kvm, &args);
|
||||
break;
|
||||
}
|
||||
case KVM_S390_GET_CMMA_BITS: {
|
||||
struct kvm_s390_cmma_log args;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&args, argp, sizeof(args)))
|
||||
break;
|
||||
r = kvm_s390_get_cmma_bits(kvm, &args);
|
||||
if (!r) {
|
||||
r = copy_to_user(argp, &args, sizeof(args));
|
||||
if (r)
|
||||
r = -EFAULT;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case KVM_S390_SET_CMMA_BITS: {
|
||||
struct kvm_s390_cmma_log args;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&args, argp, sizeof(args)))
|
||||
break;
|
||||
r = kvm_s390_set_cmma_bits(kvm, &args);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
r = -ENOTTY;
|
||||
}
|
||||
@ -1633,6 +1966,10 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
|
||||
kvm_s390_destroy_adapters(kvm);
|
||||
kvm_s390_clear_float_irqs(kvm);
|
||||
kvm_s390_vsie_destroy(kvm);
|
||||
if (kvm->arch.migration_state) {
|
||||
vfree(kvm->arch.migration_state->pgste_bitmap);
|
||||
kfree(kvm->arch.migration_state);
|
||||
}
|
||||
KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
|
||||
}
|
||||
|
||||
@ -1977,7 +2314,6 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
|
||||
if (!vcpu->arch.sie_block->cbrlo)
|
||||
return -ENOMEM;
|
||||
|
||||
vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
|
||||
vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
|
||||
return 0;
|
||||
}
|
||||
@ -2069,6 +2405,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
|
||||
if (!vcpu)
|
||||
goto out;
|
||||
|
||||
BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
|
||||
sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
|
||||
if (!sie_page)
|
||||
goto out_free_cpu;
|
||||
@ -2489,6 +2826,27 @@ retry:
|
||||
goto retry;
|
||||
}
|
||||
|
||||
if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
|
||||
/*
|
||||
* Disable CMMA virtualization; we will emulate the ESSA
|
||||
* instruction manually, in order to provide additional
|
||||
* functionalities needed for live migration.
|
||||
*/
|
||||
vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
|
||||
/*
|
||||
* Re-enable CMMA virtualization if CMMA is available and
|
||||
* was used.
|
||||
*/
|
||||
if ((vcpu->kvm->arch.use_cmma) &&
|
||||
(vcpu->kvm->mm->context.use_cmma))
|
||||
vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/* nothing to do, just clear the request */
|
||||
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
|
||||
|
||||
@ -2683,6 +3041,9 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
|
||||
|
||||
static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
|
||||
{
|
||||
struct mcck_volatile_info *mcck_info;
|
||||
struct sie_page *sie_page;
|
||||
|
||||
VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
|
||||
vcpu->arch.sie_block->icptcode);
|
||||
trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
|
||||
@ -2693,6 +3054,15 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
|
||||
vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
|
||||
vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
|
||||
|
||||
if (exit_reason == -EINTR) {
|
||||
VCPU_EVENT(vcpu, 3, "%s", "machine check");
|
||||
sie_page = container_of(vcpu->arch.sie_block,
|
||||
struct sie_page, sie_block);
|
||||
mcck_info = &sie_page->mcck_info;
|
||||
kvm_s390_reinject_machine_check(vcpu, mcck_info);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (vcpu->arch.sie_block->icptcode > 0) {
|
||||
int rc = kvm_handle_sie_intercept(vcpu);
|
||||
|
||||
|
@ -397,4 +397,6 @@ static inline int kvm_s390_use_sca_entries(void)
|
||||
*/
|
||||
return sclp.has_sigpif;
|
||||
}
|
||||
void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
|
||||
struct mcck_volatile_info *mcck_info);
|
||||
#endif
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include <asm/ebcdic.h>
|
||||
#include <asm/sysinfo.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/page-states.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/gmap.h>
|
||||
#include <asm/io.h>
|
||||
@ -949,13 +950,72 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int do_essa(struct kvm_vcpu *vcpu, const int orc)
|
||||
{
|
||||
struct kvm_s390_migration_state *ms = vcpu->kvm->arch.migration_state;
|
||||
int r1, r2, nappended, entries;
|
||||
unsigned long gfn, hva, res, pgstev, ptev;
|
||||
unsigned long *cbrlo;
|
||||
|
||||
/*
|
||||
* We don't need to set SD.FPF.SK to 1 here, because if we have a
|
||||
* machine check here we either handle it or crash
|
||||
*/
|
||||
|
||||
kvm_s390_get_regs_rre(vcpu, &r1, &r2);
|
||||
gfn = vcpu->run->s.regs.gprs[r2] >> PAGE_SHIFT;
|
||||
hva = gfn_to_hva(vcpu->kvm, gfn);
|
||||
entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
|
||||
|
||||
if (kvm_is_error_hva(hva))
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||
|
||||
nappended = pgste_perform_essa(vcpu->kvm->mm, hva, orc, &ptev, &pgstev);
|
||||
if (nappended < 0) {
|
||||
res = orc ? 0x10 : 0;
|
||||
vcpu->run->s.regs.gprs[r1] = res; /* Exception Indication */
|
||||
return 0;
|
||||
}
|
||||
res = (pgstev & _PGSTE_GPS_USAGE_MASK) >> 22;
|
||||
/*
|
||||
* Set the block-content state part of the result. 0 means resident, so
|
||||
* nothing to do if the page is valid. 2 is for preserved pages
|
||||
* (non-present and non-zero), and 3 for zero pages (non-present and
|
||||
* zero).
|
||||
*/
|
||||
if (ptev & _PAGE_INVALID) {
|
||||
res |= 2;
|
||||
if (pgstev & _PGSTE_GPS_ZERO)
|
||||
res |= 1;
|
||||
}
|
||||
vcpu->run->s.regs.gprs[r1] = res;
|
||||
/*
|
||||
* It is possible that all the normal 511 slots were full, in which case
|
||||
* we will now write in the 512th slot, which is reserved for host use.
|
||||
* In both cases we let the normal essa handling code process all the
|
||||
* slots, including the reserved one, if needed.
|
||||
*/
|
||||
if (nappended > 0) {
|
||||
cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo & PAGE_MASK);
|
||||
cbrlo[entries] = gfn << PAGE_SHIFT;
|
||||
}
|
||||
|
||||
if (orc) {
|
||||
/* increment only if we are really flipping the bit to 1 */
|
||||
if (!test_and_set_bit(gfn, ms->pgste_bitmap))
|
||||
atomic64_inc(&ms->dirty_pages);
|
||||
}
|
||||
|
||||
return nappended;
|
||||
}
|
||||
|
||||
static int handle_essa(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* entries expected to be 1FF */
|
||||
int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
|
||||
unsigned long *cbrlo;
|
||||
struct gmap *gmap;
|
||||
int i;
|
||||
int i, orc;
|
||||
|
||||
VCPU_EVENT(vcpu, 4, "ESSA: release %d pages", entries);
|
||||
gmap = vcpu->arch.gmap;
|
||||
@ -965,12 +1025,45 @@ static int handle_essa(struct kvm_vcpu *vcpu)
|
||||
|
||||
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
|
||||
|
||||
if (((vcpu->arch.sie_block->ipb & 0xf0000000) >> 28) > 6)
|
||||
/* Check for invalid operation request code */
|
||||
orc = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28;
|
||||
if (orc > ESSA_MAX)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
|
||||
/* Retry the ESSA instruction */
|
||||
kvm_s390_retry_instr(vcpu);
|
||||
if (likely(!vcpu->kvm->arch.migration_state)) {
|
||||
/*
|
||||
* CMMA is enabled in the KVM settings, but is disabled in
|
||||
* the SIE block and in the mm_context, and we are not doing
|
||||
* a migration. Enable CMMA in the mm_context.
|
||||
* Since we need to take a write lock to write to the context
|
||||
* to avoid races with storage keys handling, we check if the
|
||||
* value really needs to be written to; if the value is
|
||||
* already correct, we do nothing and avoid the lock.
|
||||
*/
|
||||
if (vcpu->kvm->mm->context.use_cmma == 0) {
|
||||
down_write(&vcpu->kvm->mm->mmap_sem);
|
||||
vcpu->kvm->mm->context.use_cmma = 1;
|
||||
up_write(&vcpu->kvm->mm->mmap_sem);
|
||||
}
|
||||
/*
|
||||
* If we are here, we are supposed to have CMMA enabled in
|
||||
* the SIE block. Enabling CMMA works on a per-CPU basis,
|
||||
* while the context use_cmma flag is per process.
|
||||
* It's possible that the context flag is enabled and the
|
||||
* SIE flag is not, so we set the flag always; if it was
|
||||
* already set, nothing changes, otherwise we enable it
|
||||
* on this CPU too.
|
||||
*/
|
||||
vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
|
||||
/* Retry the ESSA instruction */
|
||||
kvm_s390_retry_instr(vcpu);
|
||||
} else {
|
||||
/* Account for the possible extra cbrl entry */
|
||||
i = do_essa(vcpu, orc);
|
||||
if (i < 0)
|
||||
return i;
|
||||
entries += i;
|
||||
}
|
||||
vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */
|
||||
cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
|
||||
down_read(&gmap->mm->mmap_sem);
|
||||
|
@ -26,16 +26,21 @@
|
||||
|
||||
struct vsie_page {
|
||||
struct kvm_s390_sie_block scb_s; /* 0x0000 */
|
||||
/*
|
||||
* the backup info for machine check. ensure it's at
|
||||
* the same offset as that in struct sie_page!
|
||||
*/
|
||||
struct mcck_volatile_info mcck_info; /* 0x0200 */
|
||||
/* the pinned originial scb */
|
||||
struct kvm_s390_sie_block *scb_o; /* 0x0200 */
|
||||
struct kvm_s390_sie_block *scb_o; /* 0x0218 */
|
||||
/* the shadow gmap in use by the vsie_page */
|
||||
struct gmap *gmap; /* 0x0208 */
|
||||
struct gmap *gmap; /* 0x0220 */
|
||||
/* address of the last reported fault to guest2 */
|
||||
unsigned long fault_addr; /* 0x0210 */
|
||||
__u8 reserved[0x0700 - 0x0218]; /* 0x0218 */
|
||||
unsigned long fault_addr; /* 0x0228 */
|
||||
__u8 reserved[0x0700 - 0x0230]; /* 0x0230 */
|
||||
struct kvm_s390_crypto_cb crycb; /* 0x0700 */
|
||||
__u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */
|
||||
} __packed;
|
||||
};
|
||||
|
||||
/* trigger a validity icpt for the given scb */
|
||||
static int set_validity_icpt(struct kvm_s390_sie_block *scb,
|
||||
@ -801,6 +806,8 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
||||
{
|
||||
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
|
||||
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
|
||||
struct mcck_volatile_info *mcck_info;
|
||||
struct sie_page *sie_page;
|
||||
int rc;
|
||||
|
||||
handle_last_fault(vcpu, vsie_page);
|
||||
@ -822,6 +829,14 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
||||
local_irq_enable();
|
||||
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
|
||||
if (rc == -EINTR) {
|
||||
VCPU_EVENT(vcpu, 3, "%s", "machine check");
|
||||
sie_page = container_of(scb_s, struct sie_page, sie_block);
|
||||
mcck_info = &sie_page->mcck_info;
|
||||
kvm_s390_reinject_machine_check(vcpu, mcck_info);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (rc > 0)
|
||||
rc = 0; /* we could still have an icpt */
|
||||
else if (rc == -EFAULT)
|
||||
|
@ -155,6 +155,35 @@ struct kvm_s390_skeys {
|
||||
__u32 reserved[9];
|
||||
};
|
||||
|
||||
#define KVM_S390_CMMA_PEEK (1 << 0)
|
||||
|
||||
/**
|
||||
* kvm_s390_cmma_log - Used for CMMA migration.
|
||||
*
|
||||
* Used both for input and output.
|
||||
*
|
||||
* @start_gfn: Guest page number to start from.
|
||||
* @count: Size of the result buffer.
|
||||
* @flags: Control operation mode via KVM_S390_CMMA_* flags
|
||||
* @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty
|
||||
* pages are still remaining.
|
||||
* @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set
|
||||
* in the PGSTE.
|
||||
* @values: Pointer to the values buffer.
|
||||
*
|
||||
* Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls.
|
||||
*/
|
||||
struct kvm_s390_cmma_log {
|
||||
__u64 start_gfn;
|
||||
__u32 count;
|
||||
__u32 flags;
|
||||
union {
|
||||
__u64 remaining;
|
||||
__u64 mask;
|
||||
};
|
||||
__u64 values;
|
||||
};
|
||||
|
||||
struct kvm_hyperv_exit {
|
||||
#define KVM_EXIT_HYPERV_SYNIC 1
|
||||
#define KVM_EXIT_HYPERV_HCALL 2
|
||||
@ -895,6 +924,7 @@ struct kvm_ppc_resize_hpt {
|
||||
#define KVM_CAP_SPAPR_TCE_VFIO 142
|
||||
#define KVM_CAP_X86_GUEST_MWAIT 143
|
||||
#define KVM_CAP_ARM_USER_IRQ 144
|
||||
#define KVM_CAP_S390_CMMA_MIGRATION 145
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
@ -1318,6 +1348,9 @@ struct kvm_s390_ucas_mapping {
|
||||
#define KVM_S390_GET_IRQ_STATE _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state)
|
||||
/* Available with KVM_CAP_X86_SMM */
|
||||
#define KVM_SMI _IO(KVMIO, 0xb7)
|
||||
/* Available with KVM_CAP_S390_CMMA_MIGRATION */
|
||||
#define KVM_S390_GET_CMMA_BITS _IOW(KVMIO, 0xb8, struct kvm_s390_cmma_log)
|
||||
#define KVM_S390_SET_CMMA_BITS _IOW(KVMIO, 0xb9, struct kvm_s390_cmma_log)
|
||||
|
||||
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
|
||||
#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1)
|
||||
|
Loading…
x
Reference in New Issue
Block a user