hyperv-next for 5.8
-----BEGIN PGP SIGNATURE----- iQFHBAABCAAxFiEEIbPD0id6easf0xsudhRwX5BBoF4FAl7WhbkTHHdlaS5saXVA a2VybmVsLm9yZwAKCRB2FHBfkEGgXlUnB/0R8dBVSeRfNmyJaadBWKFc/LffwKLD CQ8PVv22ffkCaEYV2tpnhS6NmkERLNdson4Uo02tVUsjOJ4CrWHTn7aKqYWZyA+O qv/PiD9TBXJVYMVP2kkyaJlK5KoqeAWBr2kM16tT0cxQmlhE7g0Xo2wU9vhRbU+4 i4F0jffe4lWps65TK392CsPr6UEv1HSel191Py5zLzYqChT+L8WfahmBt3chhsV5 TIUJYQvBwxecFRla7yo+4sUn37ZfcTqD1hCWSr0zs4psW0ge7d80kuaNZS+EqxND fGm3Bp1BlUuDKsJ/D+AaHLCR47PUZ9t9iMDjZS/ovYglLFwi+h3tAV+W =LwVR -----END PGP SIGNATURE----- Merge tag 'hyperv-next-signed' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux Pull hyper-v updates from Wei Liu: - a series from Andrea to support channel reassignment - a series from Vitaly to clean up Vmbus message handling - a series from Michael to clean up and augment hyperv-tlfs.h - patches from Andy to clean up GUID usage in Hyper-V code - a few other misc patches * tag 'hyperv-next-signed' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux: (29 commits) Drivers: hv: vmbus: Resolve more races involving init_vp_index() Drivers: hv: vmbus: Resolve race between init_vp_index() and CPU hotplug vmbus: Replace zero-length array with flexible-array Driver: hv: vmbus: drop a no long applicable comment hyper-v: Switch to use UUID types directly hyper-v: Replace open-coded variant of %*phN specifier hyper-v: Supply GUID pointer to printf() like functions hyper-v: Use UUID API for exporting the GUID (part 2) asm-generic/hyperv: Add definitions for Get/SetVpRegister hypercalls x86/hyperv: Split hyperv-tlfs.h into arch dependent and independent files x86/hyperv: Remove HV_PROCESSOR_POWER_STATE #defines KVM: x86: hyperv: Remove duplicate definitions of Reference TSC Page drivers: hv: remove redundant assignment to pointer primary_channel scsi: storvsc: Re-init stor_chns when a channel interrupt is re-assigned Drivers: hv: vmbus: Introduce the CHANNELMSG_MODIFYCHANNEL message type Drivers: hv: vmbus: Synchronize init_vp_index() vs. CPU hotplug Drivers: hv: vmbus: Remove the unused HV_LOCALIZED channel affinity logic PCI: hv: Prepare hv_compose_msi_msg() for the VMBus-channel-interrupt-to-vCPU reassignment functionality Drivers: hv: vmbus: Use a spin lock for synchronizing channel scheduling vs. channel removal hv_utils: Always execute the fcopy and vss callbacks in a tasklet ...
This commit is contained in:
commit
6b2591c212
@ -7924,6 +7924,7 @@ F: drivers/pci/controller/pci-hyperv.c
|
|||||||
F: drivers/scsi/storvsc_drv.c
|
F: drivers/scsi/storvsc_drv.c
|
||||||
F: drivers/uio/uio_hv_generic.c
|
F: drivers/uio/uio_hv_generic.c
|
||||||
F: drivers/video/fbdev/hyperv_fb.c
|
F: drivers/video/fbdev/hyperv_fb.c
|
||||||
|
F: include/asm-generic/hyperv-tlfs.h
|
||||||
F: include/asm-generic/mshyperv.h
|
F: include/asm-generic/mshyperv.h
|
||||||
F: include/clocksource/hyperv_timer.h
|
F: include/clocksource/hyperv_timer.h
|
||||||
F: include/linux/hyperv.h
|
F: include/linux/hyperv.h
|
||||||
|
@ -11,17 +11,6 @@
|
|||||||
|
|
||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
#include <asm/page.h>
|
#include <asm/page.h>
|
||||||
|
|
||||||
/*
|
|
||||||
* While not explicitly listed in the TLFS, Hyper-V always runs with a page size
|
|
||||||
* of 4096. These definitions are used when communicating with Hyper-V using
|
|
||||||
* guest physical pages and guest physical page addresses, since the guest page
|
|
||||||
* size may not be 4096 on all architectures.
|
|
||||||
*/
|
|
||||||
#define HV_HYP_PAGE_SHIFT 12
|
|
||||||
#define HV_HYP_PAGE_SIZE BIT(HV_HYP_PAGE_SHIFT)
|
|
||||||
#define HV_HYP_PAGE_MASK (~(HV_HYP_PAGE_SIZE - 1))
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
|
* The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
|
||||||
* is set by CPUID(HvCpuIdFunctionVersionAndFeatures).
|
* is set by CPUID(HvCpuIdFunctionVersionAndFeatures).
|
||||||
@ -39,78 +28,41 @@
|
|||||||
#define HYPERV_CPUID_MAX 0x4000ffff
|
#define HYPERV_CPUID_MAX 0x4000ffff
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Feature identification. EAX indicates which features are available
|
* Aliases for Group A features that have X64 in the name.
|
||||||
* to the partition based upon the current partition privileges.
|
* On x86/x64 these are HYPERV_CPUID_FEATURES.EAX bits.
|
||||||
* These are HYPERV_CPUID_FEATURES.EAX bits.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* VP Runtime (HV_X64_MSR_VP_RUNTIME) available */
|
#define HV_X64_MSR_VP_RUNTIME_AVAILABLE \
|
||||||
#define HV_X64_MSR_VP_RUNTIME_AVAILABLE BIT(0)
|
HV_MSR_VP_RUNTIME_AVAILABLE
|
||||||
/* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
|
#define HV_X64_MSR_SYNIC_AVAILABLE \
|
||||||
#define HV_MSR_TIME_REF_COUNT_AVAILABLE BIT(1)
|
HV_MSR_SYNIC_AVAILABLE
|
||||||
/*
|
#define HV_X64_MSR_APIC_ACCESS_AVAILABLE \
|
||||||
* Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM
|
HV_MSR_APIC_ACCESS_AVAILABLE
|
||||||
* and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15) available
|
#define HV_X64_MSR_HYPERCALL_AVAILABLE \
|
||||||
*/
|
HV_MSR_HYPERCALL_AVAILABLE
|
||||||
#define HV_X64_MSR_SYNIC_AVAILABLE BIT(2)
|
#define HV_X64_MSR_VP_INDEX_AVAILABLE \
|
||||||
/*
|
HV_MSR_VP_INDEX_AVAILABLE
|
||||||
* Synthetic Timer MSRs (HV_X64_MSR_STIMER0_CONFIG through
|
#define HV_X64_MSR_RESET_AVAILABLE \
|
||||||
* HV_X64_MSR_STIMER3_COUNT) available
|
HV_MSR_RESET_AVAILABLE
|
||||||
*/
|
#define HV_X64_MSR_GUEST_IDLE_AVAILABLE \
|
||||||
#define HV_MSR_SYNTIMER_AVAILABLE BIT(3)
|
HV_MSR_GUEST_IDLE_AVAILABLE
|
||||||
/*
|
#define HV_X64_ACCESS_FREQUENCY_MSRS \
|
||||||
* APIC access MSRs (HV_X64_MSR_EOI, HV_X64_MSR_ICR and HV_X64_MSR_TPR)
|
HV_ACCESS_FREQUENCY_MSRS
|
||||||
* are available
|
#define HV_X64_ACCESS_REENLIGHTENMENT \
|
||||||
*/
|
HV_ACCESS_REENLIGHTENMENT
|
||||||
#define HV_X64_MSR_APIC_ACCESS_AVAILABLE BIT(4)
|
#define HV_X64_ACCESS_TSC_INVARIANT \
|
||||||
/* Hypercall MSRs (HV_X64_MSR_GUEST_OS_ID and HV_X64_MSR_HYPERCALL) available*/
|
HV_ACCESS_TSC_INVARIANT
|
||||||
#define HV_X64_MSR_HYPERCALL_AVAILABLE BIT(5)
|
|
||||||
/* Access virtual processor index MSR (HV_X64_MSR_VP_INDEX) available*/
|
|
||||||
#define HV_X64_MSR_VP_INDEX_AVAILABLE BIT(6)
|
|
||||||
/* Virtual system reset MSR (HV_X64_MSR_RESET) is available*/
|
|
||||||
#define HV_X64_MSR_RESET_AVAILABLE BIT(7)
|
|
||||||
/*
|
|
||||||
* Access statistics pages MSRs (HV_X64_MSR_STATS_PARTITION_RETAIL_PAGE,
|
|
||||||
* HV_X64_MSR_STATS_PARTITION_INTERNAL_PAGE, HV_X64_MSR_STATS_VP_RETAIL_PAGE,
|
|
||||||
* HV_X64_MSR_STATS_VP_INTERNAL_PAGE) available
|
|
||||||
*/
|
|
||||||
#define HV_X64_MSR_STAT_PAGES_AVAILABLE BIT(8)
|
|
||||||
/* Partition reference TSC MSR is available */
|
|
||||||
#define HV_MSR_REFERENCE_TSC_AVAILABLE BIT(9)
|
|
||||||
/* Partition Guest IDLE MSR is available */
|
|
||||||
#define HV_X64_MSR_GUEST_IDLE_AVAILABLE BIT(10)
|
|
||||||
/*
|
|
||||||
* There is a single feature flag that signifies if the partition has access
|
|
||||||
* to MSRs with local APIC and TSC frequencies.
|
|
||||||
*/
|
|
||||||
#define HV_X64_ACCESS_FREQUENCY_MSRS BIT(11)
|
|
||||||
/* AccessReenlightenmentControls privilege */
|
|
||||||
#define HV_X64_ACCESS_REENLIGHTENMENT BIT(13)
|
|
||||||
/* AccessTscInvariantControls privilege */
|
|
||||||
#define HV_X64_ACCESS_TSC_INVARIANT BIT(15)
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Feature identification: indicates which flags were specified at partition
|
* Aliases for Group B features that have X64 in the name.
|
||||||
* creation. The format is the same as the partition creation flag structure
|
* On x86/x64 these are HYPERV_CPUID_FEATURES.EBX bits.
|
||||||
* defined in section Partition Creation Flags.
|
|
||||||
* These are HYPERV_CPUID_FEATURES.EBX bits.
|
|
||||||
*/
|
*/
|
||||||
#define HV_X64_CREATE_PARTITIONS BIT(0)
|
#define HV_X64_POST_MESSAGES HV_POST_MESSAGES
|
||||||
#define HV_X64_ACCESS_PARTITION_ID BIT(1)
|
#define HV_X64_SIGNAL_EVENTS HV_SIGNAL_EVENTS
|
||||||
#define HV_X64_ACCESS_MEMORY_POOL BIT(2)
|
|
||||||
#define HV_X64_ADJUST_MESSAGE_BUFFERS BIT(3)
|
|
||||||
#define HV_X64_POST_MESSAGES BIT(4)
|
|
||||||
#define HV_X64_SIGNAL_EVENTS BIT(5)
|
|
||||||
#define HV_X64_CREATE_PORT BIT(6)
|
|
||||||
#define HV_X64_CONNECT_PORT BIT(7)
|
|
||||||
#define HV_X64_ACCESS_STATS BIT(8)
|
|
||||||
#define HV_X64_DEBUGGING BIT(11)
|
|
||||||
#define HV_X64_CPU_POWER_MANAGEMENT BIT(12)
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Feature identification. EDX indicates which miscellaneous features
|
* Group D Features. The bit assignments are custom to each architecture.
|
||||||
* are available to the partition.
|
* On x86/x64 these are HYPERV_CPUID_FEATURES.EDX bits.
|
||||||
* These are HYPERV_CPUID_FEATURES.EDX bits.
|
|
||||||
*/
|
*/
|
||||||
/* The MWAIT instruction is available (per section MONITOR / MWAIT) */
|
/* The MWAIT instruction is available (per section MONITOR / MWAIT) */
|
||||||
#define HV_X64_MWAIT_AVAILABLE BIT(0)
|
#define HV_X64_MWAIT_AVAILABLE BIT(0)
|
||||||
@ -187,7 +139,7 @@
|
|||||||
* processor, except for virtual processors that are reported as sibling SMT
|
* processor, except for virtual processors that are reported as sibling SMT
|
||||||
* threads.
|
* threads.
|
||||||
*/
|
*/
|
||||||
#define HV_X64_NO_NONARCH_CORESHARING BIT(18)
|
#define HV_X64_NO_NONARCH_CORESHARING BIT(18)
|
||||||
|
|
||||||
/* Nested features. These are HYPERV_CPUID_NESTED_FEATURES.EAX bits. */
|
/* Nested features. These are HYPERV_CPUID_NESTED_FEATURES.EAX bits. */
|
||||||
#define HV_X64_NESTED_DIRECT_FLUSH BIT(17)
|
#define HV_X64_NESTED_DIRECT_FLUSH BIT(17)
|
||||||
@ -295,43 +247,6 @@ union hv_x64_msr_hypercall_contents {
|
|||||||
} __packed;
|
} __packed;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
|
||||||
* TSC page layout.
|
|
||||||
*/
|
|
||||||
struct ms_hyperv_tsc_page {
|
|
||||||
volatile u32 tsc_sequence;
|
|
||||||
u32 reserved1;
|
|
||||||
volatile u64 tsc_scale;
|
|
||||||
volatile s64 tsc_offset;
|
|
||||||
u64 reserved2[509];
|
|
||||||
} __packed;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The guest OS needs to register the guest ID with the hypervisor.
|
|
||||||
* The guest ID is a 64 bit entity and the structure of this ID is
|
|
||||||
* specified in the Hyper-V specification:
|
|
||||||
*
|
|
||||||
* msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx
|
|
||||||
*
|
|
||||||
* While the current guideline does not specify how Linux guest ID(s)
|
|
||||||
* need to be generated, our plan is to publish the guidelines for
|
|
||||||
* Linux and other guest operating systems that currently are hosted
|
|
||||||
* on Hyper-V. The implementation here conforms to this yet
|
|
||||||
* unpublished guidelines.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* Bit(s)
|
|
||||||
* 63 - Indicates if the OS is Open Source or not; 1 is Open Source
|
|
||||||
* 62:56 - Os Type; Linux is 0x100
|
|
||||||
* 55:48 - Distro specific identification
|
|
||||||
* 47:16 - Linux kernel version number
|
|
||||||
* 15:0 - Distro specific identification
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define HV_LINUX_VENDOR_ID 0x8100
|
|
||||||
|
|
||||||
struct hv_reenlightenment_control {
|
struct hv_reenlightenment_control {
|
||||||
__u64 vector:8;
|
__u64 vector:8;
|
||||||
__u64 reserved1:8;
|
__u64 reserved1:8;
|
||||||
@ -355,31 +270,12 @@ struct hv_tsc_emulation_status {
|
|||||||
#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \
|
#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \
|
||||||
(~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1))
|
(~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1))
|
||||||
|
|
||||||
/*
|
|
||||||
* Crash notification (HV_X64_MSR_CRASH_CTL) flags.
|
|
||||||
*/
|
|
||||||
#define HV_CRASH_CTL_CRASH_NOTIFY_MSG BIT_ULL(62)
|
|
||||||
#define HV_CRASH_CTL_CRASH_NOTIFY BIT_ULL(63)
|
|
||||||
#define HV_X64_MSR_CRASH_PARAMS \
|
#define HV_X64_MSR_CRASH_PARAMS \
|
||||||
(1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0))
|
(1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0))
|
||||||
|
|
||||||
#define HV_IPI_LOW_VECTOR 0x10
|
#define HV_IPI_LOW_VECTOR 0x10
|
||||||
#define HV_IPI_HIGH_VECTOR 0xff
|
#define HV_IPI_HIGH_VECTOR 0xff
|
||||||
|
|
||||||
/* Declare the various hypercall operations. */
|
|
||||||
#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002
|
|
||||||
#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003
|
|
||||||
#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008
|
|
||||||
#define HVCALL_SEND_IPI 0x000b
|
|
||||||
#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013
|
|
||||||
#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014
|
|
||||||
#define HVCALL_SEND_IPI_EX 0x0015
|
|
||||||
#define HVCALL_POST_MESSAGE 0x005c
|
|
||||||
#define HVCALL_SIGNAL_EVENT 0x005d
|
|
||||||
#define HVCALL_RETARGET_INTERRUPT 0x007e
|
|
||||||
#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
|
|
||||||
#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
|
|
||||||
|
|
||||||
#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001
|
#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001
|
||||||
#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12
|
#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12
|
||||||
#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \
|
#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \
|
||||||
@ -391,75 +287,6 @@ struct hv_tsc_emulation_status {
|
|||||||
#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001
|
#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001
|
||||||
#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12
|
#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12
|
||||||
|
|
||||||
#define HV_PROCESSOR_POWER_STATE_C0 0
|
|
||||||
#define HV_PROCESSOR_POWER_STATE_C1 1
|
|
||||||
#define HV_PROCESSOR_POWER_STATE_C2 2
|
|
||||||
#define HV_PROCESSOR_POWER_STATE_C3 3
|
|
||||||
|
|
||||||
#define HV_FLUSH_ALL_PROCESSORS BIT(0)
|
|
||||||
#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1)
|
|
||||||
#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2)
|
|
||||||
#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3)
|
|
||||||
|
|
||||||
enum HV_GENERIC_SET_FORMAT {
|
|
||||||
HV_GENERIC_SET_SPARSE_4K,
|
|
||||||
HV_GENERIC_SET_ALL,
|
|
||||||
};
|
|
||||||
|
|
||||||
#define HV_PARTITION_ID_SELF ((u64)-1)
|
|
||||||
|
|
||||||
#define HV_HYPERCALL_RESULT_MASK GENMASK_ULL(15, 0)
|
|
||||||
#define HV_HYPERCALL_FAST_BIT BIT(16)
|
|
||||||
#define HV_HYPERCALL_VARHEAD_OFFSET 17
|
|
||||||
#define HV_HYPERCALL_REP_COMP_OFFSET 32
|
|
||||||
#define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32)
|
|
||||||
#define HV_HYPERCALL_REP_START_OFFSET 48
|
|
||||||
#define HV_HYPERCALL_REP_START_MASK GENMASK_ULL(59, 48)
|
|
||||||
|
|
||||||
/* hypercall status code */
|
|
||||||
#define HV_STATUS_SUCCESS 0
|
|
||||||
#define HV_STATUS_INVALID_HYPERCALL_CODE 2
|
|
||||||
#define HV_STATUS_INVALID_HYPERCALL_INPUT 3
|
|
||||||
#define HV_STATUS_INVALID_ALIGNMENT 4
|
|
||||||
#define HV_STATUS_INVALID_PARAMETER 5
|
|
||||||
#define HV_STATUS_INSUFFICIENT_MEMORY 11
|
|
||||||
#define HV_STATUS_INVALID_PORT_ID 17
|
|
||||||
#define HV_STATUS_INVALID_CONNECTION_ID 18
|
|
||||||
#define HV_STATUS_INSUFFICIENT_BUFFERS 19
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The Hyper-V TimeRefCount register and the TSC
|
|
||||||
* page provide a guest VM clock with 100ns tick rate
|
|
||||||
*/
|
|
||||||
#define HV_CLOCK_HZ (NSEC_PER_SEC/100)
|
|
||||||
|
|
||||||
typedef struct _HV_REFERENCE_TSC_PAGE {
|
|
||||||
__u32 tsc_sequence;
|
|
||||||
__u32 res1;
|
|
||||||
__u64 tsc_scale;
|
|
||||||
__s64 tsc_offset;
|
|
||||||
} __packed HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
|
|
||||||
|
|
||||||
/* Define the number of synthetic interrupt sources. */
|
|
||||||
#define HV_SYNIC_SINT_COUNT (16)
|
|
||||||
/* Define the expected SynIC version. */
|
|
||||||
#define HV_SYNIC_VERSION_1 (0x1)
|
|
||||||
/* Valid SynIC vectors are 16-255. */
|
|
||||||
#define HV_SYNIC_FIRST_VALID_VECTOR (16)
|
|
||||||
|
|
||||||
#define HV_SYNIC_CONTROL_ENABLE (1ULL << 0)
|
|
||||||
#define HV_SYNIC_SIMP_ENABLE (1ULL << 0)
|
|
||||||
#define HV_SYNIC_SIEFP_ENABLE (1ULL << 0)
|
|
||||||
#define HV_SYNIC_SINT_MASKED (1ULL << 16)
|
|
||||||
#define HV_SYNIC_SINT_AUTO_EOI (1ULL << 17)
|
|
||||||
#define HV_SYNIC_SINT_VECTOR_MASK (0xFF)
|
|
||||||
|
|
||||||
#define HV_SYNIC_STIMER_COUNT (4)
|
|
||||||
|
|
||||||
/* Define synthetic interrupt controller message constants. */
|
|
||||||
#define HV_MESSAGE_SIZE (256)
|
|
||||||
#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240)
|
|
||||||
#define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30)
|
|
||||||
|
|
||||||
/* Define hypervisor message types. */
|
/* Define hypervisor message types. */
|
||||||
enum hv_message_type {
|
enum hv_message_type {
|
||||||
@ -470,76 +297,25 @@ enum hv_message_type {
|
|||||||
HVMSG_GPA_INTERCEPT = 0x80000001,
|
HVMSG_GPA_INTERCEPT = 0x80000001,
|
||||||
|
|
||||||
/* Timer notification messages. */
|
/* Timer notification messages. */
|
||||||
HVMSG_TIMER_EXPIRED = 0x80000010,
|
HVMSG_TIMER_EXPIRED = 0x80000010,
|
||||||
|
|
||||||
/* Error messages. */
|
/* Error messages. */
|
||||||
HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020,
|
HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020,
|
||||||
HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021,
|
HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021,
|
||||||
HVMSG_UNSUPPORTED_FEATURE = 0x80000022,
|
HVMSG_UNSUPPORTED_FEATURE = 0x80000022,
|
||||||
|
|
||||||
/* Trace buffer complete messages. */
|
/* Trace buffer complete messages. */
|
||||||
HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040,
|
HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040,
|
||||||
|
|
||||||
/* Platform-specific processor intercept messages. */
|
/* Platform-specific processor intercept messages. */
|
||||||
HVMSG_X64_IOPORT_INTERCEPT = 0x80010000,
|
HVMSG_X64_IOPORT_INTERCEPT = 0x80010000,
|
||||||
HVMSG_X64_MSR_INTERCEPT = 0x80010001,
|
HVMSG_X64_MSR_INTERCEPT = 0x80010001,
|
||||||
HVMSG_X64_CPUID_INTERCEPT = 0x80010002,
|
HVMSG_X64_CPUID_INTERCEPT = 0x80010002,
|
||||||
HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003,
|
HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003,
|
||||||
HVMSG_X64_APIC_EOI = 0x80010004,
|
HVMSG_X64_APIC_EOI = 0x80010004,
|
||||||
HVMSG_X64_LEGACY_FP_ERROR = 0x80010005
|
HVMSG_X64_LEGACY_FP_ERROR = 0x80010005
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Define synthetic interrupt controller message flags. */
|
|
||||||
union hv_message_flags {
|
|
||||||
__u8 asu8;
|
|
||||||
struct {
|
|
||||||
__u8 msg_pending:1;
|
|
||||||
__u8 reserved:7;
|
|
||||||
} __packed;
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Define port identifier type. */
|
|
||||||
union hv_port_id {
|
|
||||||
__u32 asu32;
|
|
||||||
struct {
|
|
||||||
__u32 id:24;
|
|
||||||
__u32 reserved:8;
|
|
||||||
} __packed u;
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Define synthetic interrupt controller message header. */
|
|
||||||
struct hv_message_header {
|
|
||||||
__u32 message_type;
|
|
||||||
__u8 payload_size;
|
|
||||||
union hv_message_flags message_flags;
|
|
||||||
__u8 reserved[2];
|
|
||||||
union {
|
|
||||||
__u64 sender;
|
|
||||||
union hv_port_id port;
|
|
||||||
};
|
|
||||||
} __packed;
|
|
||||||
|
|
||||||
/* Define synthetic interrupt controller message format. */
|
|
||||||
struct hv_message {
|
|
||||||
struct hv_message_header header;
|
|
||||||
union {
|
|
||||||
__u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
|
|
||||||
} u;
|
|
||||||
} __packed;
|
|
||||||
|
|
||||||
/* Define the synthetic interrupt message page layout. */
|
|
||||||
struct hv_message_page {
|
|
||||||
struct hv_message sint_message[HV_SYNIC_SINT_COUNT];
|
|
||||||
} __packed;
|
|
||||||
|
|
||||||
/* Define timer message payload structure. */
|
|
||||||
struct hv_timer_message_payload {
|
|
||||||
__u32 timer_index;
|
|
||||||
__u32 reserved;
|
|
||||||
__u64 expiration_time; /* When the timer expired */
|
|
||||||
__u64 delivery_time; /* When the message was delivered */
|
|
||||||
} __packed;
|
|
||||||
|
|
||||||
struct hv_nested_enlightenments_control {
|
struct hv_nested_enlightenments_control {
|
||||||
struct {
|
struct {
|
||||||
__u32 directhypercall:1;
|
__u32 directhypercall:1;
|
||||||
@ -767,187 +543,11 @@ struct hv_enlightened_vmcs {
|
|||||||
|
|
||||||
#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF
|
#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF
|
||||||
|
|
||||||
/* Define synthetic interrupt controller flag constants. */
|
|
||||||
#define HV_EVENT_FLAGS_COUNT (256 * 8)
|
|
||||||
#define HV_EVENT_FLAGS_LONG_COUNT (256 / sizeof(unsigned long))
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Synthetic timer configuration.
|
|
||||||
*/
|
|
||||||
union hv_stimer_config {
|
|
||||||
u64 as_uint64;
|
|
||||||
struct {
|
|
||||||
u64 enable:1;
|
|
||||||
u64 periodic:1;
|
|
||||||
u64 lazy:1;
|
|
||||||
u64 auto_enable:1;
|
|
||||||
u64 apic_vector:8;
|
|
||||||
u64 direct_mode:1;
|
|
||||||
u64 reserved_z0:3;
|
|
||||||
u64 sintx:4;
|
|
||||||
u64 reserved_z1:44;
|
|
||||||
} __packed;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
/* Define the synthetic interrupt controller event flags format. */
|
|
||||||
union hv_synic_event_flags {
|
|
||||||
unsigned long flags[HV_EVENT_FLAGS_LONG_COUNT];
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Define SynIC control register. */
|
|
||||||
union hv_synic_scontrol {
|
|
||||||
u64 as_uint64;
|
|
||||||
struct {
|
|
||||||
u64 enable:1;
|
|
||||||
u64 reserved:63;
|
|
||||||
} __packed;
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Define synthetic interrupt source. */
|
|
||||||
union hv_synic_sint {
|
|
||||||
u64 as_uint64;
|
|
||||||
struct {
|
|
||||||
u64 vector:8;
|
|
||||||
u64 reserved1:8;
|
|
||||||
u64 masked:1;
|
|
||||||
u64 auto_eoi:1;
|
|
||||||
u64 polling:1;
|
|
||||||
u64 reserved2:45;
|
|
||||||
} __packed;
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Define the format of the SIMP register */
|
|
||||||
union hv_synic_simp {
|
|
||||||
u64 as_uint64;
|
|
||||||
struct {
|
|
||||||
u64 simp_enabled:1;
|
|
||||||
u64 preserved:11;
|
|
||||||
u64 base_simp_gpa:52;
|
|
||||||
} __packed;
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Define the format of the SIEFP register */
|
|
||||||
union hv_synic_siefp {
|
|
||||||
u64 as_uint64;
|
|
||||||
struct {
|
|
||||||
u64 siefp_enabled:1;
|
|
||||||
u64 preserved:11;
|
|
||||||
u64 base_siefp_gpa:52;
|
|
||||||
} __packed;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct hv_vpset {
|
|
||||||
u64 format;
|
|
||||||
u64 valid_bank_mask;
|
|
||||||
u64 bank_contents[];
|
|
||||||
} __packed;
|
|
||||||
|
|
||||||
/* HvCallSendSyntheticClusterIpi hypercall */
|
|
||||||
struct hv_send_ipi {
|
|
||||||
u32 vector;
|
|
||||||
u32 reserved;
|
|
||||||
u64 cpu_mask;
|
|
||||||
} __packed;
|
|
||||||
|
|
||||||
/* HvCallSendSyntheticClusterIpiEx hypercall */
|
|
||||||
struct hv_send_ipi_ex {
|
|
||||||
u32 vector;
|
|
||||||
u32 reserved;
|
|
||||||
struct hv_vpset vp_set;
|
|
||||||
} __packed;
|
|
||||||
|
|
||||||
/* HvFlushGuestPhysicalAddressSpace hypercalls */
|
|
||||||
struct hv_guest_mapping_flush {
|
|
||||||
u64 address_space;
|
|
||||||
u64 flags;
|
|
||||||
} __packed;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* HV_MAX_FLUSH_PAGES = "additional_pages" + 1. It's limited
|
|
||||||
* by the bitwidth of "additional_pages" in union hv_gpa_page_range.
|
|
||||||
*/
|
|
||||||
#define HV_MAX_FLUSH_PAGES (2048)
|
|
||||||
|
|
||||||
/* HvFlushGuestPhysicalAddressList hypercall */
|
|
||||||
union hv_gpa_page_range {
|
|
||||||
u64 address_space;
|
|
||||||
struct {
|
|
||||||
u64 additional_pages:11;
|
|
||||||
u64 largepage:1;
|
|
||||||
u64 basepfn:52;
|
|
||||||
} page;
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
|
||||||
* All input flush parameters should be in single page. The max flush
|
|
||||||
* count is equal with how many entries of union hv_gpa_page_range can
|
|
||||||
* be populated into the input parameter page.
|
|
||||||
*/
|
|
||||||
#define HV_MAX_FLUSH_REP_COUNT ((HV_HYP_PAGE_SIZE - 2 * sizeof(u64)) / \
|
|
||||||
sizeof(union hv_gpa_page_range))
|
|
||||||
|
|
||||||
struct hv_guest_mapping_flush_list {
|
|
||||||
u64 address_space;
|
|
||||||
u64 flags;
|
|
||||||
union hv_gpa_page_range gpa_list[HV_MAX_FLUSH_REP_COUNT];
|
|
||||||
};
|
|
||||||
|
|
||||||
/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
|
|
||||||
struct hv_tlb_flush {
|
|
||||||
u64 address_space;
|
|
||||||
u64 flags;
|
|
||||||
u64 processor_mask;
|
|
||||||
u64 gva_list[];
|
|
||||||
} __packed;
|
|
||||||
|
|
||||||
/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
|
|
||||||
struct hv_tlb_flush_ex {
|
|
||||||
u64 address_space;
|
|
||||||
u64 flags;
|
|
||||||
struct hv_vpset hv_vp_set;
|
|
||||||
u64 gva_list[];
|
|
||||||
} __packed;
|
|
||||||
|
|
||||||
struct hv_partition_assist_pg {
|
struct hv_partition_assist_pg {
|
||||||
u32 tlb_lock_count;
|
u32 tlb_lock_count;
|
||||||
};
|
};
|
||||||
|
|
||||||
union hv_msi_entry {
|
|
||||||
u64 as_uint64;
|
|
||||||
struct {
|
|
||||||
u32 address;
|
|
||||||
u32 data;
|
|
||||||
} __packed;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct hv_interrupt_entry {
|
#include <asm-generic/hyperv-tlfs.h>
|
||||||
u32 source; /* 1 for MSI(-X) */
|
|
||||||
u32 reserved1;
|
|
||||||
union hv_msi_entry msi_entry;
|
|
||||||
} __packed;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* flags for hv_device_interrupt_target.flags
|
|
||||||
*/
|
|
||||||
#define HV_DEVICE_INTERRUPT_TARGET_MULTICAST 1
|
|
||||||
#define HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET 2
|
|
||||||
|
|
||||||
struct hv_device_interrupt_target {
|
|
||||||
u32 vector;
|
|
||||||
u32 flags;
|
|
||||||
union {
|
|
||||||
u64 vp_mask;
|
|
||||||
struct hv_vpset vp_set;
|
|
||||||
};
|
|
||||||
} __packed;
|
|
||||||
|
|
||||||
/* HvRetargetDeviceInterrupt hypercall */
|
|
||||||
struct hv_retarget_device_interrupt {
|
|
||||||
u64 partition_id; /* use "self" */
|
|
||||||
u64 device_id;
|
|
||||||
struct hv_interrupt_entry int_entry;
|
|
||||||
u64 reserved2;
|
|
||||||
struct hv_device_interrupt_target int_target;
|
|
||||||
} __packed __aligned(8);
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -866,7 +866,7 @@ struct kvm_hv {
|
|||||||
u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS];
|
u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS];
|
||||||
u64 hv_crash_ctl;
|
u64 hv_crash_ctl;
|
||||||
|
|
||||||
HV_REFERENCE_TSC_PAGE tsc_ref;
|
struct ms_hyperv_tsc_page tsc_ref;
|
||||||
|
|
||||||
struct idr conn_to_evt;
|
struct idr conn_to_evt;
|
||||||
|
|
||||||
|
@ -900,7 +900,7 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
|
|||||||
* These two equivalencies are implemented in this function.
|
* These two equivalencies are implemented in this function.
|
||||||
*/
|
*/
|
||||||
static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock,
|
static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock,
|
||||||
HV_REFERENCE_TSC_PAGE *tsc_ref)
|
struct ms_hyperv_tsc_page *tsc_ref)
|
||||||
{
|
{
|
||||||
u64 max_mul;
|
u64 max_mul;
|
||||||
|
|
||||||
@ -941,7 +941,7 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
|
|||||||
u64 gfn;
|
u64 gfn;
|
||||||
|
|
||||||
BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence));
|
BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence));
|
||||||
BUILD_BUG_ON(offsetof(HV_REFERENCE_TSC_PAGE, tsc_sequence) != 0);
|
BUILD_BUG_ON(offsetof(struct ms_hyperv_tsc_page, tsc_sequence) != 0);
|
||||||
|
|
||||||
if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
|
if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
|
||||||
return;
|
return;
|
||||||
|
@ -289,6 +289,34 @@ int vmbus_send_tl_connect_request(const guid_t *shv_guest_servie_id,
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(vmbus_send_tl_connect_request);
|
EXPORT_SYMBOL_GPL(vmbus_send_tl_connect_request);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set/change the vCPU (@target_vp) the channel (@child_relid) will interrupt.
|
||||||
|
*
|
||||||
|
* CHANNELMSG_MODIFYCHANNEL messages are aynchronous. Also, Hyper-V does not
|
||||||
|
* ACK such messages. IOW we can't know when the host will stop interrupting
|
||||||
|
* the "old" vCPU and start interrupting the "new" vCPU for the given channel.
|
||||||
|
*
|
||||||
|
* The CHANNELMSG_MODIFYCHANNEL message type is supported since VMBus version
|
||||||
|
* VERSION_WIN10_V4_1.
|
||||||
|
*/
|
||||||
|
int vmbus_send_modifychannel(u32 child_relid, u32 target_vp)
|
||||||
|
{
|
||||||
|
struct vmbus_channel_modifychannel conn_msg;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
memset(&conn_msg, 0, sizeof(conn_msg));
|
||||||
|
conn_msg.header.msgtype = CHANNELMSG_MODIFYCHANNEL;
|
||||||
|
conn_msg.child_relid = child_relid;
|
||||||
|
conn_msg.target_vp = target_vp;
|
||||||
|
|
||||||
|
ret = vmbus_post_msg(&conn_msg, sizeof(conn_msg), true);
|
||||||
|
|
||||||
|
trace_vmbus_send_modifychannel(&conn_msg, ret);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(vmbus_send_modifychannel);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* create_gpadl_header - Creates a gpadl for the specified buffer
|
* create_gpadl_header - Creates a gpadl for the specified buffer
|
||||||
*/
|
*/
|
||||||
@ -594,35 +622,31 @@ post_msg_err:
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(vmbus_teardown_gpadl);
|
EXPORT_SYMBOL_GPL(vmbus_teardown_gpadl);
|
||||||
|
|
||||||
static void reset_channel_cb(void *arg)
|
|
||||||
{
|
|
||||||
struct vmbus_channel *channel = arg;
|
|
||||||
|
|
||||||
channel->onchannel_callback = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
void vmbus_reset_channel_cb(struct vmbus_channel *channel)
|
void vmbus_reset_channel_cb(struct vmbus_channel *channel)
|
||||||
{
|
{
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* vmbus_on_event(), running in the per-channel tasklet, can race
|
* vmbus_on_event(), running in the per-channel tasklet, can race
|
||||||
* with vmbus_close_internal() in the case of SMP guest, e.g., when
|
* with vmbus_close_internal() in the case of SMP guest, e.g., when
|
||||||
* the former is accessing channel->inbound.ring_buffer, the latter
|
* the former is accessing channel->inbound.ring_buffer, the latter
|
||||||
* could be freeing the ring_buffer pages, so here we must stop it
|
* could be freeing the ring_buffer pages, so here we must stop it
|
||||||
* first.
|
* first.
|
||||||
|
*
|
||||||
|
* vmbus_chan_sched() might call the netvsc driver callback function
|
||||||
|
* that ends up scheduling NAPI work that accesses the ring buffer.
|
||||||
|
* At this point, we have to ensure that any such work is completed
|
||||||
|
* and that the channel ring buffer is no longer being accessed, cf.
|
||||||
|
* the calls to napi_disable() in netvsc_device_remove().
|
||||||
*/
|
*/
|
||||||
tasklet_disable(&channel->callback_event);
|
tasklet_disable(&channel->callback_event);
|
||||||
|
|
||||||
channel->sc_creation_callback = NULL;
|
/* See the inline comments in vmbus_chan_sched(). */
|
||||||
|
spin_lock_irqsave(&channel->sched_lock, flags);
|
||||||
|
channel->onchannel_callback = NULL;
|
||||||
|
spin_unlock_irqrestore(&channel->sched_lock, flags);
|
||||||
|
|
||||||
/* Stop the callback asap */
|
channel->sc_creation_callback = NULL;
|
||||||
if (channel->target_cpu != get_cpu()) {
|
|
||||||
put_cpu();
|
|
||||||
smp_call_function_single(channel->target_cpu, reset_channel_cb,
|
|
||||||
channel, true);
|
|
||||||
} else {
|
|
||||||
reset_channel_cb(channel);
|
|
||||||
put_cpu();
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Re-enable tasklet for use on re-open */
|
/* Re-enable tasklet for use on re-open */
|
||||||
tasklet_enable(&channel->callback_event);
|
tasklet_enable(&channel->callback_event);
|
||||||
|
@ -18,14 +18,15 @@
|
|||||||
#include <linux/module.h>
|
#include <linux/module.h>
|
||||||
#include <linux/completion.h>
|
#include <linux/completion.h>
|
||||||
#include <linux/delay.h>
|
#include <linux/delay.h>
|
||||||
|
#include <linux/cpu.h>
|
||||||
#include <linux/hyperv.h>
|
#include <linux/hyperv.h>
|
||||||
#include <asm/mshyperv.h>
|
#include <asm/mshyperv.h>
|
||||||
|
|
||||||
#include "hyperv_vmbus.h"
|
#include "hyperv_vmbus.h"
|
||||||
|
|
||||||
static void init_vp_index(struct vmbus_channel *channel, u16 dev_type);
|
static void init_vp_index(struct vmbus_channel *channel);
|
||||||
|
|
||||||
static const struct vmbus_device vmbus_devs[] = {
|
const struct vmbus_device vmbus_devs[] = {
|
||||||
/* IDE */
|
/* IDE */
|
||||||
{ .dev_type = HV_IDE,
|
{ .dev_type = HV_IDE,
|
||||||
HV_IDE_GUID,
|
HV_IDE_GUID,
|
||||||
@ -315,11 +316,11 @@ static struct vmbus_channel *alloc_channel(void)
|
|||||||
if (!channel)
|
if (!channel)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
spin_lock_init(&channel->sched_lock);
|
||||||
spin_lock_init(&channel->lock);
|
spin_lock_init(&channel->lock);
|
||||||
init_completion(&channel->rescind_event);
|
init_completion(&channel->rescind_event);
|
||||||
|
|
||||||
INIT_LIST_HEAD(&channel->sc_list);
|
INIT_LIST_HEAD(&channel->sc_list);
|
||||||
INIT_LIST_HEAD(&channel->percpu_list);
|
|
||||||
|
|
||||||
tasklet_init(&channel->callback_event,
|
tasklet_init(&channel->callback_event,
|
||||||
vmbus_on_event, (unsigned long)channel);
|
vmbus_on_event, (unsigned long)channel);
|
||||||
@ -340,23 +341,49 @@ static void free_channel(struct vmbus_channel *channel)
|
|||||||
kobject_put(&channel->kobj);
|
kobject_put(&channel->kobj);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void percpu_channel_enq(void *arg)
|
void vmbus_channel_map_relid(struct vmbus_channel *channel)
|
||||||
{
|
{
|
||||||
struct vmbus_channel *channel = arg;
|
if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS))
|
||||||
struct hv_per_cpu_context *hv_cpu
|
return;
|
||||||
= this_cpu_ptr(hv_context.cpu_context);
|
/*
|
||||||
|
* The mapping of the channel's relid is visible from the CPUs that
|
||||||
list_add_tail_rcu(&channel->percpu_list, &hv_cpu->chan_list);
|
* execute vmbus_chan_sched() by the time that vmbus_chan_sched() will
|
||||||
|
* execute:
|
||||||
|
*
|
||||||
|
* (a) In the "normal (i.e., not resuming from hibernation)" path,
|
||||||
|
* the full barrier in smp_store_mb() guarantees that the store
|
||||||
|
* is propagated to all CPUs before the add_channel_work work
|
||||||
|
* is queued. In turn, add_channel_work is queued before the
|
||||||
|
* channel's ring buffer is allocated/initialized and the
|
||||||
|
* OPENCHANNEL message for the channel is sent in vmbus_open().
|
||||||
|
* Hyper-V won't start sending the interrupts for the channel
|
||||||
|
* before the OPENCHANNEL message is acked. The memory barrier
|
||||||
|
* in vmbus_chan_sched() -> sync_test_and_clear_bit() ensures
|
||||||
|
* that vmbus_chan_sched() must find the channel's relid in
|
||||||
|
* recv_int_page before retrieving the channel pointer from the
|
||||||
|
* array of channels.
|
||||||
|
*
|
||||||
|
* (b) In the "resuming from hibernation" path, the smp_store_mb()
|
||||||
|
* guarantees that the store is propagated to all CPUs before
|
||||||
|
* the VMBus connection is marked as ready for the resume event
|
||||||
|
* (cf. check_ready_for_resume_event()). The interrupt handler
|
||||||
|
* of the VMBus driver and vmbus_chan_sched() can not run before
|
||||||
|
* vmbus_bus_resume() has completed execution (cf. resume_noirq).
|
||||||
|
*/
|
||||||
|
smp_store_mb(
|
||||||
|
vmbus_connection.channels[channel->offermsg.child_relid],
|
||||||
|
channel);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void percpu_channel_deq(void *arg)
|
void vmbus_channel_unmap_relid(struct vmbus_channel *channel)
|
||||||
{
|
{
|
||||||
struct vmbus_channel *channel = arg;
|
if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS))
|
||||||
|
return;
|
||||||
list_del_rcu(&channel->percpu_list);
|
WRITE_ONCE(
|
||||||
|
vmbus_connection.channels[channel->offermsg.child_relid],
|
||||||
|
NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void vmbus_release_relid(u32 relid)
|
static void vmbus_release_relid(u32 relid)
|
||||||
{
|
{
|
||||||
struct vmbus_channel_relid_released msg;
|
struct vmbus_channel_relid_released msg;
|
||||||
@ -373,39 +400,43 @@ static void vmbus_release_relid(u32 relid)
|
|||||||
|
|
||||||
void hv_process_channel_removal(struct vmbus_channel *channel)
|
void hv_process_channel_removal(struct vmbus_channel *channel)
|
||||||
{
|
{
|
||||||
struct vmbus_channel *primary_channel;
|
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
|
lockdep_assert_held(&vmbus_connection.channel_mutex);
|
||||||
BUG_ON(!channel->rescind);
|
BUG_ON(!channel->rescind);
|
||||||
|
|
||||||
if (channel->target_cpu != get_cpu()) {
|
/*
|
||||||
put_cpu();
|
* hv_process_channel_removal() could find INVALID_RELID only for
|
||||||
smp_call_function_single(channel->target_cpu,
|
* hv_sock channels. See the inline comments in vmbus_onoffer().
|
||||||
percpu_channel_deq, channel, true);
|
*/
|
||||||
} else {
|
WARN_ON(channel->offermsg.child_relid == INVALID_RELID &&
|
||||||
percpu_channel_deq(channel);
|
!is_hvsock_channel(channel));
|
||||||
put_cpu();
|
|
||||||
}
|
/*
|
||||||
|
* Upon suspend, an in-use hv_sock channel is removed from the array of
|
||||||
|
* channels and the relid is invalidated. After hibernation, when the
|
||||||
|
* user-space appplication destroys the channel, it's unnecessary and
|
||||||
|
* unsafe to remove the channel from the array of channels. See also
|
||||||
|
* the inline comments before the call of vmbus_release_relid() below.
|
||||||
|
*/
|
||||||
|
if (channel->offermsg.child_relid != INVALID_RELID)
|
||||||
|
vmbus_channel_unmap_relid(channel);
|
||||||
|
|
||||||
if (channel->primary_channel == NULL) {
|
if (channel->primary_channel == NULL) {
|
||||||
list_del(&channel->listentry);
|
list_del(&channel->listentry);
|
||||||
|
|
||||||
primary_channel = channel;
|
|
||||||
} else {
|
} else {
|
||||||
primary_channel = channel->primary_channel;
|
struct vmbus_channel *primary_channel = channel->primary_channel;
|
||||||
spin_lock_irqsave(&primary_channel->lock, flags);
|
spin_lock_irqsave(&primary_channel->lock, flags);
|
||||||
list_del(&channel->sc_list);
|
list_del(&channel->sc_list);
|
||||||
spin_unlock_irqrestore(&primary_channel->lock, flags);
|
spin_unlock_irqrestore(&primary_channel->lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We need to free the bit for init_vp_index() to work in the case
|
* If this is a "perf" channel, updates the hv_numa_map[] masks so that
|
||||||
* of sub-channel, when we reload drivers like hv_netvsc.
|
* init_vp_index() can (re-)use the CPU.
|
||||||
*/
|
*/
|
||||||
if (channel->affinity_policy == HV_LOCALIZED)
|
if (hv_is_perf_channel(channel))
|
||||||
cpumask_clear_cpu(channel->target_cpu,
|
hv_clear_alloced_cpu(channel->target_cpu);
|
||||||
&primary_channel->alloced_cpus_in_node);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Upon suspend, an in-use hv_sock channel is marked as "rescinded" and
|
* Upon suspend, an in-use hv_sock channel is marked as "rescinded" and
|
||||||
@ -440,23 +471,8 @@ static void vmbus_add_channel_work(struct work_struct *work)
|
|||||||
container_of(work, struct vmbus_channel, add_channel_work);
|
container_of(work, struct vmbus_channel, add_channel_work);
|
||||||
struct vmbus_channel *primary_channel = newchannel->primary_channel;
|
struct vmbus_channel *primary_channel = newchannel->primary_channel;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
u16 dev_type;
|
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
dev_type = hv_get_dev_type(newchannel);
|
|
||||||
|
|
||||||
init_vp_index(newchannel, dev_type);
|
|
||||||
|
|
||||||
if (newchannel->target_cpu != get_cpu()) {
|
|
||||||
put_cpu();
|
|
||||||
smp_call_function_single(newchannel->target_cpu,
|
|
||||||
percpu_channel_enq,
|
|
||||||
newchannel, true);
|
|
||||||
} else {
|
|
||||||
percpu_channel_enq(newchannel);
|
|
||||||
put_cpu();
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This state is used to indicate a successful open
|
* This state is used to indicate a successful open
|
||||||
* so that when we do close the channel normally, we
|
* so that when we do close the channel normally, we
|
||||||
@ -488,7 +504,7 @@ static void vmbus_add_channel_work(struct work_struct *work)
|
|||||||
if (!newchannel->device_obj)
|
if (!newchannel->device_obj)
|
||||||
goto err_deq_chan;
|
goto err_deq_chan;
|
||||||
|
|
||||||
newchannel->device_obj->device_id = dev_type;
|
newchannel->device_obj->device_id = newchannel->device_id;
|
||||||
/*
|
/*
|
||||||
* Add the new device to the bus. This will kick off device-driver
|
* Add the new device to the bus. This will kick off device-driver
|
||||||
* binding which eventually invokes the device driver's AddDevice()
|
* binding which eventually invokes the device driver's AddDevice()
|
||||||
@ -523,17 +539,10 @@ err_deq_chan:
|
|||||||
spin_unlock_irqrestore(&primary_channel->lock, flags);
|
spin_unlock_irqrestore(&primary_channel->lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
mutex_unlock(&vmbus_connection.channel_mutex);
|
/* vmbus_process_offer() has mapped the channel. */
|
||||||
|
vmbus_channel_unmap_relid(newchannel);
|
||||||
|
|
||||||
if (newchannel->target_cpu != get_cpu()) {
|
mutex_unlock(&vmbus_connection.channel_mutex);
|
||||||
put_cpu();
|
|
||||||
smp_call_function_single(newchannel->target_cpu,
|
|
||||||
percpu_channel_deq,
|
|
||||||
newchannel, true);
|
|
||||||
} else {
|
|
||||||
percpu_channel_deq(newchannel);
|
|
||||||
put_cpu();
|
|
||||||
}
|
|
||||||
|
|
||||||
vmbus_release_relid(newchannel->offermsg.child_relid);
|
vmbus_release_relid(newchannel->offermsg.child_relid);
|
||||||
|
|
||||||
@ -551,8 +560,35 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
|
|||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
bool fnew = true;
|
bool fnew = true;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Synchronize vmbus_process_offer() and CPU hotplugging:
|
||||||
|
*
|
||||||
|
* CPU1 CPU2
|
||||||
|
*
|
||||||
|
* [vmbus_process_offer()] [Hot removal of the CPU]
|
||||||
|
*
|
||||||
|
* CPU_READ_LOCK CPUS_WRITE_LOCK
|
||||||
|
* LOAD cpu_online_mask SEARCH chn_list
|
||||||
|
* STORE target_cpu LOAD target_cpu
|
||||||
|
* INSERT chn_list STORE cpu_online_mask
|
||||||
|
* CPUS_READ_UNLOCK CPUS_WRITE_UNLOCK
|
||||||
|
*
|
||||||
|
* Forbids: CPU1's LOAD from *not* seing CPU2's STORE &&
|
||||||
|
* CPU2's SEARCH from *not* seeing CPU1's INSERT
|
||||||
|
*
|
||||||
|
* Forbids: CPU2's SEARCH from seeing CPU1's INSERT &&
|
||||||
|
* CPU2's LOAD from *not* seing CPU1's STORE
|
||||||
|
*/
|
||||||
|
cpus_read_lock();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Serializes the modifications of the chn_list list as well as
|
||||||
|
* the accesses to next_numa_node_id in init_vp_index().
|
||||||
|
*/
|
||||||
mutex_lock(&vmbus_connection.channel_mutex);
|
mutex_lock(&vmbus_connection.channel_mutex);
|
||||||
|
|
||||||
|
init_vp_index(newchannel);
|
||||||
|
|
||||||
/* Remember the channels that should be cleaned up upon suspend. */
|
/* Remember the channels that should be cleaned up upon suspend. */
|
||||||
if (is_hvsock_channel(newchannel) || is_sub_channel(newchannel))
|
if (is_hvsock_channel(newchannel) || is_sub_channel(newchannel))
|
||||||
atomic_inc(&vmbus_connection.nr_chan_close_on_suspend);
|
atomic_inc(&vmbus_connection.nr_chan_close_on_suspend);
|
||||||
@ -599,7 +635,10 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
|
|||||||
spin_unlock_irqrestore(&channel->lock, flags);
|
spin_unlock_irqrestore(&channel->lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vmbus_channel_map_relid(newchannel);
|
||||||
|
|
||||||
mutex_unlock(&vmbus_connection.channel_mutex);
|
mutex_unlock(&vmbus_connection.channel_mutex);
|
||||||
|
cpus_read_unlock();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* vmbus_process_offer() mustn't call channel->sc_creation_callback()
|
* vmbus_process_offer() mustn't call channel->sc_creation_callback()
|
||||||
@ -632,73 +671,61 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
|
|||||||
* We use this state to statically distribute the channel interrupt load.
|
* We use this state to statically distribute the channel interrupt load.
|
||||||
*/
|
*/
|
||||||
static int next_numa_node_id;
|
static int next_numa_node_id;
|
||||||
/*
|
|
||||||
* init_vp_index() accesses global variables like next_numa_node_id, and
|
|
||||||
* it can run concurrently for primary channels and sub-channels: see
|
|
||||||
* vmbus_process_offer(), so we need the lock to protect the global
|
|
||||||
* variables.
|
|
||||||
*/
|
|
||||||
static DEFINE_SPINLOCK(bind_channel_to_cpu_lock);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Starting with Win8, we can statically distribute the incoming
|
* Starting with Win8, we can statically distribute the incoming
|
||||||
* channel interrupt load by binding a channel to VCPU.
|
* channel interrupt load by binding a channel to VCPU.
|
||||||
* We distribute the interrupt loads to one or more NUMA nodes based on
|
|
||||||
* the channel's affinity_policy.
|
|
||||||
*
|
*
|
||||||
* For pre-win8 hosts or non-performance critical channels we assign the
|
* For pre-win8 hosts or non-performance critical channels we assign the
|
||||||
* first CPU in the first NUMA node.
|
* VMBUS_CONNECT_CPU.
|
||||||
|
*
|
||||||
|
* Starting with win8, performance critical channels will be distributed
|
||||||
|
* evenly among all the available NUMA nodes. Once the node is assigned,
|
||||||
|
* we will assign the CPU based on a simple round robin scheme.
|
||||||
*/
|
*/
|
||||||
static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
|
static void init_vp_index(struct vmbus_channel *channel)
|
||||||
{
|
{
|
||||||
u32 cur_cpu;
|
bool perf_chn = hv_is_perf_channel(channel);
|
||||||
bool perf_chn = vmbus_devs[dev_type].perf_device;
|
|
||||||
struct vmbus_channel *primary = channel->primary_channel;
|
|
||||||
int next_node;
|
|
||||||
cpumask_var_t available_mask;
|
cpumask_var_t available_mask;
|
||||||
struct cpumask *alloced_mask;
|
struct cpumask *alloced_mask;
|
||||||
|
u32 target_cpu;
|
||||||
|
int numa_node;
|
||||||
|
|
||||||
if ((vmbus_proto_version == VERSION_WS2008) ||
|
if ((vmbus_proto_version == VERSION_WS2008) ||
|
||||||
(vmbus_proto_version == VERSION_WIN7) || (!perf_chn) ||
|
(vmbus_proto_version == VERSION_WIN7) || (!perf_chn) ||
|
||||||
!alloc_cpumask_var(&available_mask, GFP_KERNEL)) {
|
!alloc_cpumask_var(&available_mask, GFP_KERNEL)) {
|
||||||
/*
|
/*
|
||||||
* Prior to win8, all channel interrupts are
|
* Prior to win8, all channel interrupts are
|
||||||
* delivered on cpu 0.
|
* delivered on VMBUS_CONNECT_CPU.
|
||||||
* Also if the channel is not a performance critical
|
* Also if the channel is not a performance critical
|
||||||
* channel, bind it to cpu 0.
|
* channel, bind it to VMBUS_CONNECT_CPU.
|
||||||
* In case alloc_cpumask_var() fails, bind it to cpu 0.
|
* In case alloc_cpumask_var() fails, bind it to
|
||||||
|
* VMBUS_CONNECT_CPU.
|
||||||
*/
|
*/
|
||||||
channel->numa_node = 0;
|
channel->numa_node = cpu_to_node(VMBUS_CONNECT_CPU);
|
||||||
channel->target_cpu = 0;
|
channel->target_cpu = VMBUS_CONNECT_CPU;
|
||||||
channel->target_vp = hv_cpu_number_to_vp_number(0);
|
channel->target_vp =
|
||||||
|
hv_cpu_number_to_vp_number(VMBUS_CONNECT_CPU);
|
||||||
|
if (perf_chn)
|
||||||
|
hv_set_alloced_cpu(VMBUS_CONNECT_CPU);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_lock(&bind_channel_to_cpu_lock);
|
while (true) {
|
||||||
|
numa_node = next_numa_node_id++;
|
||||||
/*
|
if (numa_node == nr_node_ids) {
|
||||||
* Based on the channel affinity policy, we will assign the NUMA
|
next_numa_node_id = 0;
|
||||||
* nodes.
|
continue;
|
||||||
*/
|
|
||||||
|
|
||||||
if ((channel->affinity_policy == HV_BALANCED) || (!primary)) {
|
|
||||||
while (true) {
|
|
||||||
next_node = next_numa_node_id++;
|
|
||||||
if (next_node == nr_node_ids) {
|
|
||||||
next_node = next_numa_node_id = 0;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (cpumask_empty(cpumask_of_node(next_node)))
|
|
||||||
continue;
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
channel->numa_node = next_node;
|
if (cpumask_empty(cpumask_of_node(numa_node)))
|
||||||
primary = channel;
|
continue;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
alloced_mask = &hv_context.hv_numa_map[primary->numa_node];
|
channel->numa_node = numa_node;
|
||||||
|
alloced_mask = &hv_context.hv_numa_map[numa_node];
|
||||||
|
|
||||||
if (cpumask_weight(alloced_mask) ==
|
if (cpumask_weight(alloced_mask) ==
|
||||||
cpumask_weight(cpumask_of_node(primary->numa_node))) {
|
cpumask_weight(cpumask_of_node(numa_node))) {
|
||||||
/*
|
/*
|
||||||
* We have cycled through all the CPUs in the node;
|
* We have cycled through all the CPUs in the node;
|
||||||
* reset the alloced map.
|
* reset the alloced map.
|
||||||
@ -706,59 +733,13 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
|
|||||||
cpumask_clear(alloced_mask);
|
cpumask_clear(alloced_mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
cpumask_xor(available_mask, alloced_mask,
|
cpumask_xor(available_mask, alloced_mask, cpumask_of_node(numa_node));
|
||||||
cpumask_of_node(primary->numa_node));
|
|
||||||
|
|
||||||
cur_cpu = -1;
|
target_cpu = cpumask_first(available_mask);
|
||||||
|
cpumask_set_cpu(target_cpu, alloced_mask);
|
||||||
|
|
||||||
if (primary->affinity_policy == HV_LOCALIZED) {
|
channel->target_cpu = target_cpu;
|
||||||
/*
|
channel->target_vp = hv_cpu_number_to_vp_number(target_cpu);
|
||||||
* Normally Hyper-V host doesn't create more subchannels
|
|
||||||
* than there are VCPUs on the node but it is possible when not
|
|
||||||
* all present VCPUs on the node are initialized by guest.
|
|
||||||
* Clear the alloced_cpus_in_node to start over.
|
|
||||||
*/
|
|
||||||
if (cpumask_equal(&primary->alloced_cpus_in_node,
|
|
||||||
cpumask_of_node(primary->numa_node)))
|
|
||||||
cpumask_clear(&primary->alloced_cpus_in_node);
|
|
||||||
}
|
|
||||||
|
|
||||||
while (true) {
|
|
||||||
cur_cpu = cpumask_next(cur_cpu, available_mask);
|
|
||||||
if (cur_cpu >= nr_cpu_ids) {
|
|
||||||
cur_cpu = -1;
|
|
||||||
cpumask_copy(available_mask,
|
|
||||||
cpumask_of_node(primary->numa_node));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (primary->affinity_policy == HV_LOCALIZED) {
|
|
||||||
/*
|
|
||||||
* NOTE: in the case of sub-channel, we clear the
|
|
||||||
* sub-channel related bit(s) in
|
|
||||||
* primary->alloced_cpus_in_node in
|
|
||||||
* hv_process_channel_removal(), so when we
|
|
||||||
* reload drivers like hv_netvsc in SMP guest, here
|
|
||||||
* we're able to re-allocate
|
|
||||||
* bit from primary->alloced_cpus_in_node.
|
|
||||||
*/
|
|
||||||
if (!cpumask_test_cpu(cur_cpu,
|
|
||||||
&primary->alloced_cpus_in_node)) {
|
|
||||||
cpumask_set_cpu(cur_cpu,
|
|
||||||
&primary->alloced_cpus_in_node);
|
|
||||||
cpumask_set_cpu(cur_cpu, alloced_mask);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
cpumask_set_cpu(cur_cpu, alloced_mask);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
channel->target_cpu = cur_cpu;
|
|
||||||
channel->target_vp = hv_cpu_number_to_vp_number(cur_cpu);
|
|
||||||
|
|
||||||
spin_unlock(&bind_channel_to_cpu_lock);
|
|
||||||
|
|
||||||
free_cpumask_var(available_mask);
|
free_cpumask_var(available_mask);
|
||||||
}
|
}
|
||||||
@ -890,6 +871,7 @@ static void vmbus_setup_channel_state(struct vmbus_channel *channel,
|
|||||||
sizeof(struct vmbus_channel_offer_channel));
|
sizeof(struct vmbus_channel_offer_channel));
|
||||||
channel->monitor_grp = (u8)offer->monitorid / 32;
|
channel->monitor_grp = (u8)offer->monitorid / 32;
|
||||||
channel->monitor_bit = (u8)offer->monitorid % 32;
|
channel->monitor_bit = (u8)offer->monitorid % 32;
|
||||||
|
channel->device_id = hv_get_dev_type(channel);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -940,8 +922,6 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
|
|||||||
oldchannel = find_primary_channel_by_offer(offer);
|
oldchannel = find_primary_channel_by_offer(offer);
|
||||||
|
|
||||||
if (oldchannel != NULL) {
|
if (oldchannel != NULL) {
|
||||||
atomic_dec(&vmbus_connection.offer_in_progress);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We're resuming from hibernation: all the sub-channel and
|
* We're resuming from hibernation: all the sub-channel and
|
||||||
* hv_sock channels we had before the hibernation should have
|
* hv_sock channels we had before the hibernation should have
|
||||||
@ -949,36 +929,65 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
|
|||||||
* primary channel that we had before the hibernation.
|
* primary channel that we had before the hibernation.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* { Initially: channel relid = INVALID_RELID,
|
||||||
|
* channels[valid_relid] = NULL }
|
||||||
|
*
|
||||||
|
* CPU1 CPU2
|
||||||
|
*
|
||||||
|
* [vmbus_onoffer()] [vmbus_device_release()]
|
||||||
|
*
|
||||||
|
* LOCK channel_mutex LOCK channel_mutex
|
||||||
|
* STORE channel relid = valid_relid LOAD r1 = channel relid
|
||||||
|
* MAP_RELID channel if (r1 != INVALID_RELID)
|
||||||
|
* UNLOCK channel_mutex UNMAP_RELID channel
|
||||||
|
* UNLOCK channel_mutex
|
||||||
|
*
|
||||||
|
* Forbids: r1 == valid_relid &&
|
||||||
|
* channels[valid_relid] == channel
|
||||||
|
*
|
||||||
|
* Note. r1 can be INVALID_RELID only for an hv_sock channel.
|
||||||
|
* None of the hv_sock channels which were present before the
|
||||||
|
* suspend are re-offered upon the resume. See the WARN_ON()
|
||||||
|
* in hv_process_channel_removal().
|
||||||
|
*/
|
||||||
|
mutex_lock(&vmbus_connection.channel_mutex);
|
||||||
|
|
||||||
|
atomic_dec(&vmbus_connection.offer_in_progress);
|
||||||
|
|
||||||
WARN_ON(oldchannel->offermsg.child_relid != INVALID_RELID);
|
WARN_ON(oldchannel->offermsg.child_relid != INVALID_RELID);
|
||||||
/* Fix up the relid. */
|
/* Fix up the relid. */
|
||||||
oldchannel->offermsg.child_relid = offer->child_relid;
|
oldchannel->offermsg.child_relid = offer->child_relid;
|
||||||
|
|
||||||
offer_sz = sizeof(*offer);
|
offer_sz = sizeof(*offer);
|
||||||
if (memcmp(offer, &oldchannel->offermsg, offer_sz) == 0) {
|
if (memcmp(offer, &oldchannel->offermsg, offer_sz) != 0) {
|
||||||
check_ready_for_resume_event();
|
/*
|
||||||
return;
|
* This is not an error, since the host can also change
|
||||||
|
* the other field(s) of the offer, e.g. on WS RS5
|
||||||
|
* (Build 17763), the offer->connection_id of the
|
||||||
|
* Mellanox VF vmbus device can change when the host
|
||||||
|
* reoffers the device upon resume.
|
||||||
|
*/
|
||||||
|
pr_debug("vmbus offer changed: relid=%d\n",
|
||||||
|
offer->child_relid);
|
||||||
|
|
||||||
|
print_hex_dump_debug("Old vmbus offer: ",
|
||||||
|
DUMP_PREFIX_OFFSET, 16, 4,
|
||||||
|
&oldchannel->offermsg, offer_sz,
|
||||||
|
false);
|
||||||
|
print_hex_dump_debug("New vmbus offer: ",
|
||||||
|
DUMP_PREFIX_OFFSET, 16, 4,
|
||||||
|
offer, offer_sz, false);
|
||||||
|
|
||||||
|
/* Fix up the old channel. */
|
||||||
|
vmbus_setup_channel_state(oldchannel, offer);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/* Add the channel back to the array of channels. */
|
||||||
* This is not an error, since the host can also change the
|
vmbus_channel_map_relid(oldchannel);
|
||||||
* other field(s) of the offer, e.g. on WS RS5 (Build 17763),
|
|
||||||
* the offer->connection_id of the Mellanox VF vmbus device
|
|
||||||
* can change when the host reoffers the device upon resume.
|
|
||||||
*/
|
|
||||||
pr_debug("vmbus offer changed: relid=%d\n",
|
|
||||||
offer->child_relid);
|
|
||||||
|
|
||||||
print_hex_dump_debug("Old vmbus offer: ", DUMP_PREFIX_OFFSET,
|
|
||||||
16, 4, &oldchannel->offermsg, offer_sz,
|
|
||||||
false);
|
|
||||||
print_hex_dump_debug("New vmbus offer: ", DUMP_PREFIX_OFFSET,
|
|
||||||
16, 4, offer, offer_sz, false);
|
|
||||||
|
|
||||||
/* Fix up the old channel. */
|
|
||||||
vmbus_setup_channel_state(oldchannel, offer);
|
|
||||||
|
|
||||||
check_ready_for_resume_event();
|
check_ready_for_resume_event();
|
||||||
|
|
||||||
|
mutex_unlock(&vmbus_connection.channel_mutex);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1028,11 +1037,22 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
|
|||||||
* offer comes in first and then the rescind.
|
* offer comes in first and then the rescind.
|
||||||
* Since we process these events in work elements,
|
* Since we process these events in work elements,
|
||||||
* and with preemption, we may end up processing
|
* and with preemption, we may end up processing
|
||||||
* the events out of order. Given that we handle these
|
* the events out of order. We rely on the synchronization
|
||||||
* work elements on the same CPU, this is possible only
|
* provided by offer_in_progress and by channel_mutex for
|
||||||
* in the case of preemption. In any case wait here
|
* ordering these events:
|
||||||
* until the offer processing has moved beyond the
|
*
|
||||||
* point where the channel is discoverable.
|
* { Initially: offer_in_progress = 1 }
|
||||||
|
*
|
||||||
|
* CPU1 CPU2
|
||||||
|
*
|
||||||
|
* [vmbus_onoffer()] [vmbus_onoffer_rescind()]
|
||||||
|
*
|
||||||
|
* LOCK channel_mutex WAIT_ON offer_in_progress == 0
|
||||||
|
* DECREMENT offer_in_progress LOCK channel_mutex
|
||||||
|
* STORE channels[] LOAD channels[]
|
||||||
|
* UNLOCK channel_mutex UNLOCK channel_mutex
|
||||||
|
*
|
||||||
|
* Forbids: CPU2's LOAD from *not* seeing CPU1's STORE
|
||||||
*/
|
*/
|
||||||
|
|
||||||
while (atomic_read(&vmbus_connection.offer_in_progress) != 0) {
|
while (atomic_read(&vmbus_connection.offer_in_progress) != 0) {
|
||||||
@ -1332,30 +1352,36 @@ static void vmbus_onversion_response(
|
|||||||
/* Channel message dispatch table */
|
/* Channel message dispatch table */
|
||||||
const struct vmbus_channel_message_table_entry
|
const struct vmbus_channel_message_table_entry
|
||||||
channel_message_table[CHANNELMSG_COUNT] = {
|
channel_message_table[CHANNELMSG_COUNT] = {
|
||||||
{ CHANNELMSG_INVALID, 0, NULL },
|
{ CHANNELMSG_INVALID, 0, NULL, 0},
|
||||||
{ CHANNELMSG_OFFERCHANNEL, 0, vmbus_onoffer },
|
{ CHANNELMSG_OFFERCHANNEL, 0, vmbus_onoffer,
|
||||||
{ CHANNELMSG_RESCIND_CHANNELOFFER, 0, vmbus_onoffer_rescind },
|
sizeof(struct vmbus_channel_offer_channel)},
|
||||||
{ CHANNELMSG_REQUESTOFFERS, 0, NULL },
|
{ CHANNELMSG_RESCIND_CHANNELOFFER, 0, vmbus_onoffer_rescind,
|
||||||
{ CHANNELMSG_ALLOFFERS_DELIVERED, 1, vmbus_onoffers_delivered },
|
sizeof(struct vmbus_channel_rescind_offer) },
|
||||||
{ CHANNELMSG_OPENCHANNEL, 0, NULL },
|
{ CHANNELMSG_REQUESTOFFERS, 0, NULL, 0},
|
||||||
{ CHANNELMSG_OPENCHANNEL_RESULT, 1, vmbus_onopen_result },
|
{ CHANNELMSG_ALLOFFERS_DELIVERED, 1, vmbus_onoffers_delivered, 0},
|
||||||
{ CHANNELMSG_CLOSECHANNEL, 0, NULL },
|
{ CHANNELMSG_OPENCHANNEL, 0, NULL, 0},
|
||||||
{ CHANNELMSG_GPADL_HEADER, 0, NULL },
|
{ CHANNELMSG_OPENCHANNEL_RESULT, 1, vmbus_onopen_result,
|
||||||
{ CHANNELMSG_GPADL_BODY, 0, NULL },
|
sizeof(struct vmbus_channel_open_result)},
|
||||||
{ CHANNELMSG_GPADL_CREATED, 1, vmbus_ongpadl_created },
|
{ CHANNELMSG_CLOSECHANNEL, 0, NULL, 0},
|
||||||
{ CHANNELMSG_GPADL_TEARDOWN, 0, NULL },
|
{ CHANNELMSG_GPADL_HEADER, 0, NULL, 0},
|
||||||
{ CHANNELMSG_GPADL_TORNDOWN, 1, vmbus_ongpadl_torndown },
|
{ CHANNELMSG_GPADL_BODY, 0, NULL, 0},
|
||||||
{ CHANNELMSG_RELID_RELEASED, 0, NULL },
|
{ CHANNELMSG_GPADL_CREATED, 1, vmbus_ongpadl_created,
|
||||||
{ CHANNELMSG_INITIATE_CONTACT, 0, NULL },
|
sizeof(struct vmbus_channel_gpadl_created)},
|
||||||
{ CHANNELMSG_VERSION_RESPONSE, 1, vmbus_onversion_response },
|
{ CHANNELMSG_GPADL_TEARDOWN, 0, NULL, 0},
|
||||||
{ CHANNELMSG_UNLOAD, 0, NULL },
|
{ CHANNELMSG_GPADL_TORNDOWN, 1, vmbus_ongpadl_torndown,
|
||||||
{ CHANNELMSG_UNLOAD_RESPONSE, 1, vmbus_unload_response },
|
sizeof(struct vmbus_channel_gpadl_torndown) },
|
||||||
{ CHANNELMSG_18, 0, NULL },
|
{ CHANNELMSG_RELID_RELEASED, 0, NULL, 0},
|
||||||
{ CHANNELMSG_19, 0, NULL },
|
{ CHANNELMSG_INITIATE_CONTACT, 0, NULL, 0},
|
||||||
{ CHANNELMSG_20, 0, NULL },
|
{ CHANNELMSG_VERSION_RESPONSE, 1, vmbus_onversion_response,
|
||||||
{ CHANNELMSG_TL_CONNECT_REQUEST, 0, NULL },
|
sizeof(struct vmbus_channel_version_response)},
|
||||||
{ CHANNELMSG_22, 0, NULL },
|
{ CHANNELMSG_UNLOAD, 0, NULL, 0},
|
||||||
{ CHANNELMSG_TL_CONNECT_RESULT, 0, NULL },
|
{ CHANNELMSG_UNLOAD_RESPONSE, 1, vmbus_unload_response, 0},
|
||||||
|
{ CHANNELMSG_18, 0, NULL, 0},
|
||||||
|
{ CHANNELMSG_19, 0, NULL, 0},
|
||||||
|
{ CHANNELMSG_20, 0, NULL, 0},
|
||||||
|
{ CHANNELMSG_TL_CONNECT_REQUEST, 0, NULL, 0},
|
||||||
|
{ CHANNELMSG_MODIFYCHANNEL, 0, NULL, 0},
|
||||||
|
{ CHANNELMSG_TL_CONNECT_RESULT, 0, NULL, 0},
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1363,13 +1389,8 @@ channel_message_table[CHANNELMSG_COUNT] = {
|
|||||||
*
|
*
|
||||||
* This is invoked in the vmbus worker thread context.
|
* This is invoked in the vmbus worker thread context.
|
||||||
*/
|
*/
|
||||||
void vmbus_onmessage(void *context)
|
void vmbus_onmessage(struct vmbus_channel_message_header *hdr)
|
||||||
{
|
{
|
||||||
struct hv_message *msg = context;
|
|
||||||
struct vmbus_channel_message_header *hdr;
|
|
||||||
|
|
||||||
hdr = (struct vmbus_channel_message_header *)msg->u.payload;
|
|
||||||
|
|
||||||
trace_vmbus_on_message(hdr);
|
trace_vmbus_on_message(hdr);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -69,7 +69,6 @@ MODULE_PARM_DESC(max_version,
|
|||||||
int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version)
|
int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version)
|
||||||
{
|
{
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
unsigned int cur_cpu;
|
|
||||||
struct vmbus_channel_initiate_contact *msg;
|
struct vmbus_channel_initiate_contact *msg;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
@ -102,24 +101,7 @@ int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version)
|
|||||||
|
|
||||||
msg->monitor_page1 = virt_to_phys(vmbus_connection.monitor_pages[0]);
|
msg->monitor_page1 = virt_to_phys(vmbus_connection.monitor_pages[0]);
|
||||||
msg->monitor_page2 = virt_to_phys(vmbus_connection.monitor_pages[1]);
|
msg->monitor_page2 = virt_to_phys(vmbus_connection.monitor_pages[1]);
|
||||||
/*
|
msg->target_vcpu = hv_cpu_number_to_vp_number(VMBUS_CONNECT_CPU);
|
||||||
* We want all channel messages to be delivered on CPU 0.
|
|
||||||
* This has been the behavior pre-win8. This is not
|
|
||||||
* perf issue and having all channel messages delivered on CPU 0
|
|
||||||
* would be ok.
|
|
||||||
* For post win8 hosts, we support receiving channel messagges on
|
|
||||||
* all the CPUs. This is needed for kexec to work correctly where
|
|
||||||
* the CPU attempting to connect may not be CPU 0.
|
|
||||||
*/
|
|
||||||
if (version >= VERSION_WIN8_1) {
|
|
||||||
cur_cpu = get_cpu();
|
|
||||||
msg->target_vcpu = hv_cpu_number_to_vp_number(cur_cpu);
|
|
||||||
vmbus_connection.connect_cpu = cur_cpu;
|
|
||||||
put_cpu();
|
|
||||||
} else {
|
|
||||||
msg->target_vcpu = 0;
|
|
||||||
vmbus_connection.connect_cpu = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Add to list before we send the request since we may
|
* Add to list before we send the request since we may
|
||||||
@ -266,6 +248,14 @@ int vmbus_connect(void)
|
|||||||
pr_info("Vmbus version:%d.%d\n",
|
pr_info("Vmbus version:%d.%d\n",
|
||||||
version >> 16, version & 0xFFFF);
|
version >> 16, version & 0xFFFF);
|
||||||
|
|
||||||
|
vmbus_connection.channels = kcalloc(MAX_CHANNEL_RELIDS,
|
||||||
|
sizeof(struct vmbus_channel *),
|
||||||
|
GFP_KERNEL);
|
||||||
|
if (vmbus_connection.channels == NULL) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
kfree(msginfo);
|
kfree(msginfo);
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
@ -313,33 +303,9 @@ void vmbus_disconnect(void)
|
|||||||
*/
|
*/
|
||||||
struct vmbus_channel *relid2channel(u32 relid)
|
struct vmbus_channel *relid2channel(u32 relid)
|
||||||
{
|
{
|
||||||
struct vmbus_channel *channel;
|
if (WARN_ON(relid >= MAX_CHANNEL_RELIDS))
|
||||||
struct vmbus_channel *found_channel = NULL;
|
return NULL;
|
||||||
struct list_head *cur, *tmp;
|
return READ_ONCE(vmbus_connection.channels[relid]);
|
||||||
struct vmbus_channel *cur_sc;
|
|
||||||
|
|
||||||
BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
|
|
||||||
|
|
||||||
list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
|
|
||||||
if (channel->offermsg.child_relid == relid) {
|
|
||||||
found_channel = channel;
|
|
||||||
break;
|
|
||||||
} else if (!list_empty(&channel->sc_list)) {
|
|
||||||
/*
|
|
||||||
* Deal with sub-channels.
|
|
||||||
*/
|
|
||||||
list_for_each_safe(cur, tmp, &channel->sc_list) {
|
|
||||||
cur_sc = list_entry(cur, struct vmbus_channel,
|
|
||||||
sc_list);
|
|
||||||
if (cur_sc->offermsg.child_relid == relid) {
|
|
||||||
found_channel = cur_sc;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return found_channel;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -117,8 +117,6 @@ int hv_synic_alloc(void)
|
|||||||
pr_err("Unable to allocate post msg page\n");
|
pr_err("Unable to allocate post msg page\n");
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
INIT_LIST_HEAD(&hv_cpu->chan_list);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -245,11 +243,19 @@ int hv_synic_cleanup(unsigned int cpu)
|
|||||||
bool channel_found = false;
|
bool channel_found = false;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Hyper-V does not provide a way to change the connect CPU once
|
||||||
|
* it is set; we must prevent the connect CPU from going offline.
|
||||||
|
*/
|
||||||
|
if (cpu == VMBUS_CONNECT_CPU)
|
||||||
|
return -EBUSY;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Search for channels which are bound to the CPU we're about to
|
* Search for channels which are bound to the CPU we're about to
|
||||||
* cleanup. In case we find one and vmbus is still connected we need to
|
* cleanup. In case we find one and vmbus is still connected, we
|
||||||
* fail, this will effectively prevent CPU offlining. There is no way
|
* fail; this will effectively prevent CPU offlining.
|
||||||
* we can re-bind channels to different CPUs for now.
|
*
|
||||||
|
* TODO: Re-bind the channels to different CPUs.
|
||||||
*/
|
*/
|
||||||
mutex_lock(&vmbus_connection.channel_mutex);
|
mutex_lock(&vmbus_connection.channel_mutex);
|
||||||
list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
|
list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
|
||||||
|
@ -71,7 +71,7 @@ static void fcopy_poll_wrapper(void *channel)
|
|||||||
{
|
{
|
||||||
/* Transaction is finished, reset the state here to avoid races. */
|
/* Transaction is finished, reset the state here to avoid races. */
|
||||||
fcopy_transaction.state = HVUTIL_READY;
|
fcopy_transaction.state = HVUTIL_READY;
|
||||||
hv_fcopy_onchannelcallback(channel);
|
tasklet_schedule(&((struct vmbus_channel *)channel)->callback_event);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void fcopy_timeout_func(struct work_struct *dummy)
|
static void fcopy_timeout_func(struct work_struct *dummy)
|
||||||
|
@ -80,7 +80,7 @@ static void vss_poll_wrapper(void *channel)
|
|||||||
{
|
{
|
||||||
/* Transaction is finished, reset the state here to avoid races. */
|
/* Transaction is finished, reset the state here to avoid races. */
|
||||||
vss_transaction.state = HVUTIL_READY;
|
vss_transaction.state = HVUTIL_READY;
|
||||||
hv_vss_onchannelcallback(channel);
|
tasklet_schedule(&((struct vmbus_channel *)channel)->callback_event);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -44,10 +44,8 @@ TRACE_EVENT(vmbus_onoffer,
|
|||||||
__entry->monitorid = offer->monitorid;
|
__entry->monitorid = offer->monitorid;
|
||||||
__entry->is_ddc_int = offer->is_dedicated_interrupt;
|
__entry->is_ddc_int = offer->is_dedicated_interrupt;
|
||||||
__entry->connection_id = offer->connection_id;
|
__entry->connection_id = offer->connection_id;
|
||||||
memcpy(__entry->if_type,
|
export_guid(__entry->if_type, &offer->offer.if_type);
|
||||||
&offer->offer.if_type.b, 16);
|
export_guid(__entry->if_instance, &offer->offer.if_instance);
|
||||||
memcpy(__entry->if_instance,
|
|
||||||
&offer->offer.if_instance.b, 16);
|
|
||||||
__entry->chn_flags = offer->offer.chn_flags;
|
__entry->chn_flags = offer->offer.chn_flags;
|
||||||
__entry->mmio_mb = offer->offer.mmio_megabytes;
|
__entry->mmio_mb = offer->offer.mmio_megabytes;
|
||||||
__entry->sub_idx = offer->offer.sub_channel_index;
|
__entry->sub_idx = offer->offer.sub_channel_index;
|
||||||
@ -296,6 +294,25 @@ TRACE_EVENT(vmbus_send_tl_connect_request,
|
|||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
TRACE_EVENT(vmbus_send_modifychannel,
|
||||||
|
TP_PROTO(const struct vmbus_channel_modifychannel *msg,
|
||||||
|
int ret),
|
||||||
|
TP_ARGS(msg, ret),
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(u32, child_relid)
|
||||||
|
__field(u32, target_vp)
|
||||||
|
__field(int, ret)
|
||||||
|
),
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->child_relid = msg->child_relid;
|
||||||
|
__entry->target_vp = msg->target_vp;
|
||||||
|
__entry->ret = ret;
|
||||||
|
),
|
||||||
|
TP_printk("binding child_relid 0x%x to target_vp 0x%x, ret %d",
|
||||||
|
__entry->child_relid, __entry->target_vp, __entry->ret
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
DECLARE_EVENT_CLASS(vmbus_channel,
|
DECLARE_EVENT_CLASS(vmbus_channel,
|
||||||
TP_PROTO(const struct vmbus_channel *channel),
|
TP_PROTO(const struct vmbus_channel *channel),
|
||||||
TP_ARGS(channel),
|
TP_ARGS(channel),
|
||||||
|
@ -132,12 +132,6 @@ struct hv_per_cpu_context {
|
|||||||
* basis.
|
* basis.
|
||||||
*/
|
*/
|
||||||
struct tasklet_struct msg_dpc;
|
struct tasklet_struct msg_dpc;
|
||||||
|
|
||||||
/*
|
|
||||||
* To optimize the mapping of relid to channel, maintain
|
|
||||||
* per-cpu list of the channels based on their CPU affinity.
|
|
||||||
*/
|
|
||||||
struct list_head chan_list;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct hv_context {
|
struct hv_context {
|
||||||
@ -202,6 +196,8 @@ int hv_ringbuffer_read(struct vmbus_channel *channel,
|
|||||||
/* TODO: Need to make this configurable */
|
/* TODO: Need to make this configurable */
|
||||||
#define MAX_NUM_CHANNELS_SUPPORTED 256
|
#define MAX_NUM_CHANNELS_SUPPORTED 256
|
||||||
|
|
||||||
|
#define MAX_CHANNEL_RELIDS \
|
||||||
|
max(MAX_NUM_CHANNELS_SUPPORTED, HV_EVENT_FLAGS_COUNT)
|
||||||
|
|
||||||
enum vmbus_connect_state {
|
enum vmbus_connect_state {
|
||||||
DISCONNECTED,
|
DISCONNECTED,
|
||||||
@ -212,12 +208,13 @@ enum vmbus_connect_state {
|
|||||||
|
|
||||||
#define MAX_SIZE_CHANNEL_MESSAGE HV_MESSAGE_PAYLOAD_BYTE_COUNT
|
#define MAX_SIZE_CHANNEL_MESSAGE HV_MESSAGE_PAYLOAD_BYTE_COUNT
|
||||||
|
|
||||||
struct vmbus_connection {
|
/*
|
||||||
/*
|
* The CPU that Hyper-V will interrupt for VMBUS messages, such as
|
||||||
* CPU on which the initial host contact was made.
|
* CHANNELMSG_OFFERCHANNEL and CHANNELMSG_RESCIND_CHANNELOFFER.
|
||||||
*/
|
*/
|
||||||
int connect_cpu;
|
#define VMBUS_CONNECT_CPU 0
|
||||||
|
|
||||||
|
struct vmbus_connection {
|
||||||
u32 msg_conn_id;
|
u32 msg_conn_id;
|
||||||
|
|
||||||
atomic_t offer_in_progress;
|
atomic_t offer_in_progress;
|
||||||
@ -250,6 +247,9 @@ struct vmbus_connection {
|
|||||||
struct list_head chn_list;
|
struct list_head chn_list;
|
||||||
struct mutex channel_mutex;
|
struct mutex channel_mutex;
|
||||||
|
|
||||||
|
/* Array of channels */
|
||||||
|
struct vmbus_channel **channels;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* An offer message is handled first on the work_queue, and then
|
* An offer message is handled first on the work_queue, and then
|
||||||
* is further handled on handle_primary_chan_wq or
|
* is further handled on handle_primary_chan_wq or
|
||||||
@ -317,6 +317,7 @@ struct vmbus_channel_message_table_entry {
|
|||||||
enum vmbus_channel_message_type message_type;
|
enum vmbus_channel_message_type message_type;
|
||||||
enum vmbus_message_handler_type handler_type;
|
enum vmbus_message_handler_type handler_type;
|
||||||
void (*message_handler)(struct vmbus_channel_message_header *msg);
|
void (*message_handler)(struct vmbus_channel_message_header *msg);
|
||||||
|
u32 min_payload_len;
|
||||||
};
|
};
|
||||||
|
|
||||||
extern const struct vmbus_channel_message_table_entry
|
extern const struct vmbus_channel_message_table_entry
|
||||||
@ -336,6 +337,9 @@ int vmbus_add_channel_kobj(struct hv_device *device_obj,
|
|||||||
|
|
||||||
void vmbus_remove_channel_attr_group(struct vmbus_channel *channel);
|
void vmbus_remove_channel_attr_group(struct vmbus_channel *channel);
|
||||||
|
|
||||||
|
void vmbus_channel_map_relid(struct vmbus_channel *channel);
|
||||||
|
void vmbus_channel_unmap_relid(struct vmbus_channel *channel);
|
||||||
|
|
||||||
struct vmbus_channel *relid2channel(u32 relid);
|
struct vmbus_channel *relid2channel(u32 relid);
|
||||||
|
|
||||||
void vmbus_free_channels(void);
|
void vmbus_free_channels(void);
|
||||||
@ -374,12 +378,7 @@ static inline void hv_poll_channel(struct vmbus_channel *channel,
|
|||||||
{
|
{
|
||||||
if (!channel)
|
if (!channel)
|
||||||
return;
|
return;
|
||||||
|
cb(channel);
|
||||||
if (in_interrupt() && (channel->target_cpu == smp_processor_id())) {
|
|
||||||
cb(channel);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
smp_call_function_single(channel->target_cpu, cb, channel, true);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
enum hvutil_device_state {
|
enum hvutil_device_state {
|
||||||
@ -396,6 +395,54 @@ enum delay {
|
|||||||
MESSAGE_DELAY = 1,
|
MESSAGE_DELAY = 1,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
extern const struct vmbus_device vmbus_devs[];
|
||||||
|
|
||||||
|
static inline bool hv_is_perf_channel(struct vmbus_channel *channel)
|
||||||
|
{
|
||||||
|
return vmbus_devs[channel->device_id].perf_device;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool hv_is_alloced_cpu(unsigned int cpu)
|
||||||
|
{
|
||||||
|
struct vmbus_channel *channel, *sc;
|
||||||
|
|
||||||
|
lockdep_assert_held(&vmbus_connection.channel_mutex);
|
||||||
|
/*
|
||||||
|
* List additions/deletions as well as updates of the target CPUs are
|
||||||
|
* protected by channel_mutex.
|
||||||
|
*/
|
||||||
|
list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
|
||||||
|
if (!hv_is_perf_channel(channel))
|
||||||
|
continue;
|
||||||
|
if (channel->target_cpu == cpu)
|
||||||
|
return true;
|
||||||
|
list_for_each_entry(sc, &channel->sc_list, sc_list) {
|
||||||
|
if (sc->target_cpu == cpu)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void hv_set_alloced_cpu(unsigned int cpu)
|
||||||
|
{
|
||||||
|
cpumask_set_cpu(cpu, &hv_context.hv_numa_map[cpu_to_node(cpu)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void hv_clear_alloced_cpu(unsigned int cpu)
|
||||||
|
{
|
||||||
|
if (hv_is_alloced_cpu(cpu))
|
||||||
|
return;
|
||||||
|
cpumask_clear_cpu(cpu, &hv_context.hv_numa_map[cpu_to_node(cpu)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void hv_update_alloced_cpus(unsigned int old_cpu,
|
||||||
|
unsigned int new_cpu)
|
||||||
|
{
|
||||||
|
hv_set_alloced_cpu(new_cpu);
|
||||||
|
hv_clear_alloced_cpu(old_cpu);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_HYPERV_TESTING
|
#ifdef CONFIG_HYPERV_TESTING
|
||||||
|
|
||||||
int hv_debug_add_dev_dir(struct hv_device *dev);
|
int hv_debug_add_dev_dir(struct hv_device *dev);
|
||||||
|
@ -117,14 +117,6 @@ static int vmbus_exists(void)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define VMBUS_ALIAS_LEN ((sizeof((struct hv_vmbus_device_id *)0)->guid) * 2)
|
|
||||||
static void print_alias_name(struct hv_device *hv_dev, char *alias_name)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
for (i = 0; i < VMBUS_ALIAS_LEN; i += 2)
|
|
||||||
sprintf(&alias_name[i], "%02x", hv_dev->dev_type.b[i/2]);
|
|
||||||
}
|
|
||||||
|
|
||||||
static u8 channel_monitor_group(const struct vmbus_channel *channel)
|
static u8 channel_monitor_group(const struct vmbus_channel *channel)
|
||||||
{
|
{
|
||||||
return (u8)channel->offermsg.monitorid / 32;
|
return (u8)channel->offermsg.monitorid / 32;
|
||||||
@ -201,7 +193,7 @@ static ssize_t class_id_show(struct device *dev,
|
|||||||
if (!hv_dev->channel)
|
if (!hv_dev->channel)
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
return sprintf(buf, "{%pUl}\n",
|
return sprintf(buf, "{%pUl}\n",
|
||||||
hv_dev->channel->offermsg.offer.if_type.b);
|
&hv_dev->channel->offermsg.offer.if_type);
|
||||||
}
|
}
|
||||||
static DEVICE_ATTR_RO(class_id);
|
static DEVICE_ATTR_RO(class_id);
|
||||||
|
|
||||||
@ -213,7 +205,7 @@ static ssize_t device_id_show(struct device *dev,
|
|||||||
if (!hv_dev->channel)
|
if (!hv_dev->channel)
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
return sprintf(buf, "{%pUl}\n",
|
return sprintf(buf, "{%pUl}\n",
|
||||||
hv_dev->channel->offermsg.offer.if_instance.b);
|
&hv_dev->channel->offermsg.offer.if_instance);
|
||||||
}
|
}
|
||||||
static DEVICE_ATTR_RO(device_id);
|
static DEVICE_ATTR_RO(device_id);
|
||||||
|
|
||||||
@ -221,10 +213,8 @@ static ssize_t modalias_show(struct device *dev,
|
|||||||
struct device_attribute *dev_attr, char *buf)
|
struct device_attribute *dev_attr, char *buf)
|
||||||
{
|
{
|
||||||
struct hv_device *hv_dev = device_to_hv_device(dev);
|
struct hv_device *hv_dev = device_to_hv_device(dev);
|
||||||
char alias_name[VMBUS_ALIAS_LEN + 1];
|
|
||||||
|
|
||||||
print_alias_name(hv_dev, alias_name);
|
return sprintf(buf, "vmbus:%*phN\n", UUID_SIZE, &hv_dev->dev_type);
|
||||||
return sprintf(buf, "vmbus:%s\n", alias_name);
|
|
||||||
}
|
}
|
||||||
static DEVICE_ATTR_RO(modalias);
|
static DEVICE_ATTR_RO(modalias);
|
||||||
|
|
||||||
@ -693,12 +683,9 @@ __ATTRIBUTE_GROUPS(vmbus_dev);
|
|||||||
static int vmbus_uevent(struct device *device, struct kobj_uevent_env *env)
|
static int vmbus_uevent(struct device *device, struct kobj_uevent_env *env)
|
||||||
{
|
{
|
||||||
struct hv_device *dev = device_to_hv_device(device);
|
struct hv_device *dev = device_to_hv_device(device);
|
||||||
int ret;
|
const char *format = "MODALIAS=vmbus:%*phN";
|
||||||
char alias_name[VMBUS_ALIAS_LEN + 1];
|
|
||||||
|
|
||||||
print_alias_name(dev, alias_name);
|
return add_uevent_var(env, format, UUID_SIZE, &dev->dev_type);
|
||||||
ret = add_uevent_var(env, "MODALIAS=vmbus:%s", alias_name);
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static const struct hv_vmbus_device_id *
|
static const struct hv_vmbus_device_id *
|
||||||
@ -1033,7 +1020,10 @@ static struct bus_type hv_bus = {
|
|||||||
|
|
||||||
struct onmessage_work_context {
|
struct onmessage_work_context {
|
||||||
struct work_struct work;
|
struct work_struct work;
|
||||||
struct hv_message msg;
|
struct {
|
||||||
|
struct hv_message_header header;
|
||||||
|
u8 payload[];
|
||||||
|
} msg;
|
||||||
};
|
};
|
||||||
|
|
||||||
static void vmbus_onmessage_work(struct work_struct *work)
|
static void vmbus_onmessage_work(struct work_struct *work)
|
||||||
@ -1046,7 +1036,8 @@ static void vmbus_onmessage_work(struct work_struct *work)
|
|||||||
|
|
||||||
ctx = container_of(work, struct onmessage_work_context,
|
ctx = container_of(work, struct onmessage_work_context,
|
||||||
work);
|
work);
|
||||||
vmbus_onmessage(&ctx->msg);
|
vmbus_onmessage((struct vmbus_channel_message_header *)
|
||||||
|
&ctx->msg.payload);
|
||||||
kfree(ctx);
|
kfree(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1061,6 +1052,13 @@ void vmbus_on_msg_dpc(unsigned long data)
|
|||||||
struct onmessage_work_context *ctx;
|
struct onmessage_work_context *ctx;
|
||||||
u32 message_type = msg->header.message_type;
|
u32 message_type = msg->header.message_type;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* 'enum vmbus_channel_message_type' is supposed to always be 'u32' as
|
||||||
|
* it is being used in 'struct vmbus_channel_message_header' definition
|
||||||
|
* which is supposed to match hypervisor ABI.
|
||||||
|
*/
|
||||||
|
BUILD_BUG_ON(sizeof(enum vmbus_channel_message_type) != sizeof(u32));
|
||||||
|
|
||||||
if (message_type == HVMSG_NONE)
|
if (message_type == HVMSG_NONE)
|
||||||
/* no msg */
|
/* no msg */
|
||||||
return;
|
return;
|
||||||
@ -1074,41 +1072,88 @@ void vmbus_on_msg_dpc(unsigned long data)
|
|||||||
goto msg_handled;
|
goto msg_handled;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (msg->header.payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT) {
|
||||||
|
WARN_ONCE(1, "payload size is too large (%d)\n",
|
||||||
|
msg->header.payload_size);
|
||||||
|
goto msg_handled;
|
||||||
|
}
|
||||||
|
|
||||||
entry = &channel_message_table[hdr->msgtype];
|
entry = &channel_message_table[hdr->msgtype];
|
||||||
|
|
||||||
if (!entry->message_handler)
|
if (!entry->message_handler)
|
||||||
goto msg_handled;
|
goto msg_handled;
|
||||||
|
|
||||||
|
if (msg->header.payload_size < entry->min_payload_len) {
|
||||||
|
WARN_ONCE(1, "message too short: msgtype=%d len=%d\n",
|
||||||
|
hdr->msgtype, msg->header.payload_size);
|
||||||
|
goto msg_handled;
|
||||||
|
}
|
||||||
|
|
||||||
if (entry->handler_type == VMHT_BLOCKING) {
|
if (entry->handler_type == VMHT_BLOCKING) {
|
||||||
ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
|
ctx = kmalloc(sizeof(*ctx) + msg->header.payload_size,
|
||||||
|
GFP_ATOMIC);
|
||||||
if (ctx == NULL)
|
if (ctx == NULL)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
INIT_WORK(&ctx->work, vmbus_onmessage_work);
|
INIT_WORK(&ctx->work, vmbus_onmessage_work);
|
||||||
memcpy(&ctx->msg, msg, sizeof(*msg));
|
memcpy(&ctx->msg, msg, sizeof(msg->header) +
|
||||||
|
msg->header.payload_size);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The host can generate a rescind message while we
|
* The host can generate a rescind message while we
|
||||||
* may still be handling the original offer. We deal with
|
* may still be handling the original offer. We deal with
|
||||||
* this condition by ensuring the processing is done on the
|
* this condition by relying on the synchronization provided
|
||||||
* same CPU.
|
* by offer_in_progress and by channel_mutex. See also the
|
||||||
|
* inline comments in vmbus_onoffer_rescind().
|
||||||
*/
|
*/
|
||||||
switch (hdr->msgtype) {
|
switch (hdr->msgtype) {
|
||||||
case CHANNELMSG_RESCIND_CHANNELOFFER:
|
case CHANNELMSG_RESCIND_CHANNELOFFER:
|
||||||
/*
|
/*
|
||||||
* If we are handling the rescind message;
|
* If we are handling the rescind message;
|
||||||
* schedule the work on the global work queue.
|
* schedule the work on the global work queue.
|
||||||
|
*
|
||||||
|
* The OFFER message and the RESCIND message should
|
||||||
|
* not be handled by the same serialized work queue,
|
||||||
|
* because the OFFER handler may call vmbus_open(),
|
||||||
|
* which tries to open the channel by sending an
|
||||||
|
* OPEN_CHANNEL message to the host and waits for
|
||||||
|
* the host's response; however, if the host has
|
||||||
|
* rescinded the channel before it receives the
|
||||||
|
* OPEN_CHANNEL message, the host just silently
|
||||||
|
* ignores the OPEN_CHANNEL message; as a result,
|
||||||
|
* the guest's OFFER handler hangs for ever, if we
|
||||||
|
* handle the RESCIND message in the same serialized
|
||||||
|
* work queue: the RESCIND handler can not start to
|
||||||
|
* run before the OFFER handler finishes.
|
||||||
*/
|
*/
|
||||||
schedule_work_on(vmbus_connection.connect_cpu,
|
schedule_work(&ctx->work);
|
||||||
&ctx->work);
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case CHANNELMSG_OFFERCHANNEL:
|
case CHANNELMSG_OFFERCHANNEL:
|
||||||
|
/*
|
||||||
|
* The host sends the offer message of a given channel
|
||||||
|
* before sending the rescind message of the same
|
||||||
|
* channel. These messages are sent to the guest's
|
||||||
|
* connect CPU; the guest then starts processing them
|
||||||
|
* in the tasklet handler on this CPU:
|
||||||
|
*
|
||||||
|
* VMBUS_CONNECT_CPU
|
||||||
|
*
|
||||||
|
* [vmbus_on_msg_dpc()]
|
||||||
|
* atomic_inc() // CHANNELMSG_OFFERCHANNEL
|
||||||
|
* queue_work()
|
||||||
|
* ...
|
||||||
|
* [vmbus_on_msg_dpc()]
|
||||||
|
* schedule_work() // CHANNELMSG_RESCIND_CHANNELOFFER
|
||||||
|
*
|
||||||
|
* We rely on the memory-ordering properties of the
|
||||||
|
* queue_work() and schedule_work() primitives, which
|
||||||
|
* guarantee that the atomic increment will be visible
|
||||||
|
* to the CPUs which will execute the offer & rescind
|
||||||
|
* works by the time these works will start execution.
|
||||||
|
*/
|
||||||
atomic_inc(&vmbus_connection.offer_in_progress);
|
atomic_inc(&vmbus_connection.offer_in_progress);
|
||||||
queue_work_on(vmbus_connection.connect_cpu,
|
fallthrough;
|
||||||
vmbus_connection.work_queue,
|
|
||||||
&ctx->work);
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
default:
|
||||||
queue_work(vmbus_connection.work_queue, &ctx->work);
|
queue_work(vmbus_connection.work_queue, &ctx->work);
|
||||||
@ -1133,10 +1178,11 @@ static void vmbus_force_channel_rescinded(struct vmbus_channel *channel)
|
|||||||
WARN_ON(!is_hvsock_channel(channel));
|
WARN_ON(!is_hvsock_channel(channel));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* sizeof(*ctx) is small and the allocation should really not fail,
|
* Allocation size is small and the allocation should really not fail,
|
||||||
* otherwise the state of the hv_sock connections ends up in limbo.
|
* otherwise the state of the hv_sock connections ends up in limbo.
|
||||||
*/
|
*/
|
||||||
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL | __GFP_NOFAIL);
|
ctx = kzalloc(sizeof(*ctx) + sizeof(*rescind),
|
||||||
|
GFP_KERNEL | __GFP_NOFAIL);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* So far, these are not really used by Linux. Just set them to the
|
* So far, these are not really used by Linux. Just set them to the
|
||||||
@ -1146,30 +1192,16 @@ static void vmbus_force_channel_rescinded(struct vmbus_channel *channel)
|
|||||||
ctx->msg.header.payload_size = sizeof(*rescind);
|
ctx->msg.header.payload_size = sizeof(*rescind);
|
||||||
|
|
||||||
/* These values are actually used by Linux. */
|
/* These values are actually used by Linux. */
|
||||||
rescind = (struct vmbus_channel_rescind_offer *)ctx->msg.u.payload;
|
rescind = (struct vmbus_channel_rescind_offer *)ctx->msg.payload;
|
||||||
rescind->header.msgtype = CHANNELMSG_RESCIND_CHANNELOFFER;
|
rescind->header.msgtype = CHANNELMSG_RESCIND_CHANNELOFFER;
|
||||||
rescind->child_relid = channel->offermsg.child_relid;
|
rescind->child_relid = channel->offermsg.child_relid;
|
||||||
|
|
||||||
INIT_WORK(&ctx->work, vmbus_onmessage_work);
|
INIT_WORK(&ctx->work, vmbus_onmessage_work);
|
||||||
|
|
||||||
queue_work_on(vmbus_connection.connect_cpu,
|
queue_work(vmbus_connection.work_queue, &ctx->work);
|
||||||
vmbus_connection.work_queue,
|
|
||||||
&ctx->work);
|
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_PM_SLEEP */
|
#endif /* CONFIG_PM_SLEEP */
|
||||||
|
|
||||||
/*
|
|
||||||
* Direct callback for channels using other deferred processing
|
|
||||||
*/
|
|
||||||
static void vmbus_channel_isr(struct vmbus_channel *channel)
|
|
||||||
{
|
|
||||||
void (*callback_fn)(void *);
|
|
||||||
|
|
||||||
callback_fn = READ_ONCE(channel->onchannel_callback);
|
|
||||||
if (likely(callback_fn != NULL))
|
|
||||||
(*callback_fn)(channel->channel_callback_context);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Schedule all channels with events pending
|
* Schedule all channels with events pending
|
||||||
*/
|
*/
|
||||||
@ -1200,6 +1232,7 @@ static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu)
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
for_each_set_bit(relid, recv_int_page, maxbits) {
|
for_each_set_bit(relid, recv_int_page, maxbits) {
|
||||||
|
void (*callback_fn)(void *context);
|
||||||
struct vmbus_channel *channel;
|
struct vmbus_channel *channel;
|
||||||
|
|
||||||
if (!sync_test_and_clear_bit(relid, recv_int_page))
|
if (!sync_test_and_clear_bit(relid, recv_int_page))
|
||||||
@ -1209,33 +1242,54 @@ static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu)
|
|||||||
if (relid == 0)
|
if (relid == 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Pairs with the kfree_rcu() in vmbus_chan_release().
|
||||||
|
* Guarantees that the channel data structure doesn't
|
||||||
|
* get freed while the channel pointer below is being
|
||||||
|
* dereferenced.
|
||||||
|
*/
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
|
|
||||||
/* Find channel based on relid */
|
/* Find channel based on relid */
|
||||||
list_for_each_entry_rcu(channel, &hv_cpu->chan_list, percpu_list) {
|
channel = relid2channel(relid);
|
||||||
if (channel->offermsg.child_relid != relid)
|
if (channel == NULL)
|
||||||
continue;
|
goto sched_unlock_rcu;
|
||||||
|
|
||||||
if (channel->rescind)
|
if (channel->rescind)
|
||||||
continue;
|
goto sched_unlock_rcu;
|
||||||
|
|
||||||
trace_vmbus_chan_sched(channel);
|
/*
|
||||||
|
* Make sure that the ring buffer data structure doesn't get
|
||||||
|
* freed while we dereference the ring buffer pointer. Test
|
||||||
|
* for the channel's onchannel_callback being NULL within a
|
||||||
|
* sched_lock critical section. See also the inline comments
|
||||||
|
* in vmbus_reset_channel_cb().
|
||||||
|
*/
|
||||||
|
spin_lock(&channel->sched_lock);
|
||||||
|
|
||||||
++channel->interrupts;
|
callback_fn = channel->onchannel_callback;
|
||||||
|
if (unlikely(callback_fn == NULL))
|
||||||
|
goto sched_unlock;
|
||||||
|
|
||||||
switch (channel->callback_mode) {
|
trace_vmbus_chan_sched(channel);
|
||||||
case HV_CALL_ISR:
|
|
||||||
vmbus_channel_isr(channel);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case HV_CALL_BATCHED:
|
++channel->interrupts;
|
||||||
hv_begin_read(&channel->inbound);
|
|
||||||
/* fallthrough */
|
switch (channel->callback_mode) {
|
||||||
case HV_CALL_DIRECT:
|
case HV_CALL_ISR:
|
||||||
tasklet_schedule(&channel->callback_event);
|
(*callback_fn)(channel->channel_callback_context);
|
||||||
}
|
break;
|
||||||
|
|
||||||
|
case HV_CALL_BATCHED:
|
||||||
|
hv_begin_read(&channel->inbound);
|
||||||
|
fallthrough;
|
||||||
|
case HV_CALL_DIRECT:
|
||||||
|
tasklet_schedule(&channel->callback_event);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sched_unlock:
|
||||||
|
spin_unlock(&channel->sched_lock);
|
||||||
|
sched_unlock_rcu:
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1364,7 +1418,6 @@ static int vmbus_bus_init(void)
|
|||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
/* Hypervisor initialization...setup hypercall page..etc */
|
|
||||||
ret = hv_init();
|
ret = hv_init();
|
||||||
if (ret != 0) {
|
if (ret != 0) {
|
||||||
pr_err("Unable to initialize the hypervisor - 0x%x\n", ret);
|
pr_err("Unable to initialize the hypervisor - 0x%x\n", ret);
|
||||||
@ -1553,8 +1606,24 @@ static ssize_t vmbus_chan_attr_show(struct kobject *kobj,
|
|||||||
return attribute->show(chan, buf);
|
return attribute->show(chan, buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static ssize_t vmbus_chan_attr_store(struct kobject *kobj,
|
||||||
|
struct attribute *attr, const char *buf,
|
||||||
|
size_t count)
|
||||||
|
{
|
||||||
|
const struct vmbus_chan_attribute *attribute
|
||||||
|
= container_of(attr, struct vmbus_chan_attribute, attr);
|
||||||
|
struct vmbus_channel *chan
|
||||||
|
= container_of(kobj, struct vmbus_channel, kobj);
|
||||||
|
|
||||||
|
if (!attribute->store)
|
||||||
|
return -EIO;
|
||||||
|
|
||||||
|
return attribute->store(chan, buf, count);
|
||||||
|
}
|
||||||
|
|
||||||
static const struct sysfs_ops vmbus_chan_sysfs_ops = {
|
static const struct sysfs_ops vmbus_chan_sysfs_ops = {
|
||||||
.show = vmbus_chan_attr_show,
|
.show = vmbus_chan_attr_show,
|
||||||
|
.store = vmbus_chan_attr_store,
|
||||||
};
|
};
|
||||||
|
|
||||||
static ssize_t out_mask_show(struct vmbus_channel *channel, char *buf)
|
static ssize_t out_mask_show(struct vmbus_channel *channel, char *buf)
|
||||||
@ -1625,11 +1694,110 @@ static ssize_t write_avail_show(struct vmbus_channel *channel, char *buf)
|
|||||||
}
|
}
|
||||||
static VMBUS_CHAN_ATTR_RO(write_avail);
|
static VMBUS_CHAN_ATTR_RO(write_avail);
|
||||||
|
|
||||||
static ssize_t show_target_cpu(struct vmbus_channel *channel, char *buf)
|
static ssize_t target_cpu_show(struct vmbus_channel *channel, char *buf)
|
||||||
{
|
{
|
||||||
return sprintf(buf, "%u\n", channel->target_cpu);
|
return sprintf(buf, "%u\n", channel->target_cpu);
|
||||||
}
|
}
|
||||||
static VMBUS_CHAN_ATTR(cpu, S_IRUGO, show_target_cpu, NULL);
|
static ssize_t target_cpu_store(struct vmbus_channel *channel,
|
||||||
|
const char *buf, size_t count)
|
||||||
|
{
|
||||||
|
u32 target_cpu, origin_cpu;
|
||||||
|
ssize_t ret = count;
|
||||||
|
|
||||||
|
if (vmbus_proto_version < VERSION_WIN10_V4_1)
|
||||||
|
return -EIO;
|
||||||
|
|
||||||
|
if (sscanf(buf, "%uu", &target_cpu) != 1)
|
||||||
|
return -EIO;
|
||||||
|
|
||||||
|
/* Validate target_cpu for the cpumask_test_cpu() operation below. */
|
||||||
|
if (target_cpu >= nr_cpumask_bits)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
/* No CPUs should come up or down during this. */
|
||||||
|
cpus_read_lock();
|
||||||
|
|
||||||
|
if (!cpumask_test_cpu(target_cpu, cpu_online_mask)) {
|
||||||
|
cpus_read_unlock();
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Synchronizes target_cpu_store() and channel closure:
|
||||||
|
*
|
||||||
|
* { Initially: state = CHANNEL_OPENED }
|
||||||
|
*
|
||||||
|
* CPU1 CPU2
|
||||||
|
*
|
||||||
|
* [target_cpu_store()] [vmbus_disconnect_ring()]
|
||||||
|
*
|
||||||
|
* LOCK channel_mutex LOCK channel_mutex
|
||||||
|
* LOAD r1 = state LOAD r2 = state
|
||||||
|
* IF (r1 == CHANNEL_OPENED) IF (r2 == CHANNEL_OPENED)
|
||||||
|
* SEND MODIFYCHANNEL STORE state = CHANNEL_OPEN
|
||||||
|
* [...] SEND CLOSECHANNEL
|
||||||
|
* UNLOCK channel_mutex UNLOCK channel_mutex
|
||||||
|
*
|
||||||
|
* Forbids: r1 == r2 == CHANNEL_OPENED (i.e., CPU1's LOCK precedes
|
||||||
|
* CPU2's LOCK) && CPU2's SEND precedes CPU1's SEND
|
||||||
|
*
|
||||||
|
* Note. The host processes the channel messages "sequentially", in
|
||||||
|
* the order in which they are received on a per-partition basis.
|
||||||
|
*/
|
||||||
|
mutex_lock(&vmbus_connection.channel_mutex);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Hyper-V will ignore MODIFYCHANNEL messages for "non-open" channels;
|
||||||
|
* avoid sending the message and fail here for such channels.
|
||||||
|
*/
|
||||||
|
if (channel->state != CHANNEL_OPENED_STATE) {
|
||||||
|
ret = -EIO;
|
||||||
|
goto cpu_store_unlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
origin_cpu = channel->target_cpu;
|
||||||
|
if (target_cpu == origin_cpu)
|
||||||
|
goto cpu_store_unlock;
|
||||||
|
|
||||||
|
if (vmbus_send_modifychannel(channel->offermsg.child_relid,
|
||||||
|
hv_cpu_number_to_vp_number(target_cpu))) {
|
||||||
|
ret = -EIO;
|
||||||
|
goto cpu_store_unlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Warning. At this point, there is *no* guarantee that the host will
|
||||||
|
* have successfully processed the vmbus_send_modifychannel() request.
|
||||||
|
* See the header comment of vmbus_send_modifychannel() for more info.
|
||||||
|
*
|
||||||
|
* Lags in the processing of the above vmbus_send_modifychannel() can
|
||||||
|
* result in missed interrupts if the "old" target CPU is taken offline
|
||||||
|
* before Hyper-V starts sending interrupts to the "new" target CPU.
|
||||||
|
* But apart from this offlining scenario, the code tolerates such
|
||||||
|
* lags. It will function correctly even if a channel interrupt comes
|
||||||
|
* in on a CPU that is different from the channel target_cpu value.
|
||||||
|
*/
|
||||||
|
|
||||||
|
channel->target_cpu = target_cpu;
|
||||||
|
channel->target_vp = hv_cpu_number_to_vp_number(target_cpu);
|
||||||
|
channel->numa_node = cpu_to_node(target_cpu);
|
||||||
|
|
||||||
|
/* See init_vp_index(). */
|
||||||
|
if (hv_is_perf_channel(channel))
|
||||||
|
hv_update_alloced_cpus(origin_cpu, target_cpu);
|
||||||
|
|
||||||
|
/* Currently set only for storvsc channels. */
|
||||||
|
if (channel->change_target_cpu_callback) {
|
||||||
|
(*channel->change_target_cpu_callback)(channel,
|
||||||
|
origin_cpu, target_cpu);
|
||||||
|
}
|
||||||
|
|
||||||
|
cpu_store_unlock:
|
||||||
|
mutex_unlock(&vmbus_connection.channel_mutex);
|
||||||
|
cpus_read_unlock();
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
static VMBUS_CHAN_ATTR(cpu, 0644, target_cpu_show, target_cpu_store);
|
||||||
|
|
||||||
static ssize_t channel_pending_show(struct vmbus_channel *channel,
|
static ssize_t channel_pending_show(struct vmbus_channel *channel,
|
||||||
char *buf)
|
char *buf)
|
||||||
@ -1830,7 +1998,7 @@ int vmbus_device_register(struct hv_device *child_device_obj)
|
|||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
dev_set_name(&child_device_obj->device, "%pUl",
|
dev_set_name(&child_device_obj->device, "%pUl",
|
||||||
child_device_obj->channel->offermsg.offer.if_instance.b);
|
&child_device_obj->channel->offermsg.offer.if_instance);
|
||||||
|
|
||||||
child_device_obj->device.bus = &hv_bus;
|
child_device_obj->device.bus = &hv_bus;
|
||||||
child_device_obj->device.parent = &hv_acpi_dev->dev;
|
child_device_obj->device.parent = &hv_acpi_dev->dev;
|
||||||
@ -2221,9 +2389,12 @@ static int vmbus_bus_suspend(struct device *dev)
|
|||||||
|
|
||||||
list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
|
list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
|
||||||
/*
|
/*
|
||||||
* Invalidate the field. Upon resume, vmbus_onoffer() will fix
|
* Remove the channel from the array of channels and invalidate
|
||||||
* up the field, and the other fields (if necessary).
|
* the channel's relid. Upon resume, vmbus_onoffer() will fix
|
||||||
|
* up the relid (and other fields, if necessary) and add the
|
||||||
|
* channel back to the array.
|
||||||
*/
|
*/
|
||||||
|
vmbus_channel_unmap_relid(channel);
|
||||||
channel->offermsg.child_relid = INVALID_RELID;
|
channel->offermsg.child_relid = INVALID_RELID;
|
||||||
|
|
||||||
if (is_hvsock_channel(channel)) {
|
if (is_hvsock_channel(channel)) {
|
||||||
@ -2470,6 +2641,7 @@ static void __exit vmbus_exit(void)
|
|||||||
hv_debug_rm_all_dir();
|
hv_debug_rm_all_dir();
|
||||||
|
|
||||||
vmbus_free_channels();
|
vmbus_free_channels();
|
||||||
|
kfree(vmbus_connection.channels);
|
||||||
|
|
||||||
if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
|
if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
|
||||||
kmsg_dump_unregister(&hv_kmsg_dumper);
|
kmsg_dump_unregister(&hv_kmsg_dumper);
|
||||||
|
@ -636,9 +636,12 @@ void netvsc_device_remove(struct hv_device *device)
|
|||||||
|
|
||||||
RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
|
RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
|
||||||
|
|
||||||
/* And disassociate NAPI context from device */
|
/* Disable NAPI and disassociate its context from the device. */
|
||||||
for (i = 0; i < net_device->num_chn; i++)
|
for (i = 0; i < net_device->num_chn; i++) {
|
||||||
|
/* See also vmbus_reset_channel_cb(). */
|
||||||
|
napi_disable(&net_device->chan_table[i].napi);
|
||||||
netif_napi_del(&net_device->chan_table[i].napi);
|
netif_napi_del(&net_device->chan_table[i].napi);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* At this point, no one should be accessing net_device
|
* At this point, no one should be accessing net_device
|
||||||
|
@ -1356,11 +1356,11 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
|
|||||||
{
|
{
|
||||||
struct irq_cfg *cfg = irqd_cfg(data);
|
struct irq_cfg *cfg = irqd_cfg(data);
|
||||||
struct hv_pcibus_device *hbus;
|
struct hv_pcibus_device *hbus;
|
||||||
|
struct vmbus_channel *channel;
|
||||||
struct hv_pci_dev *hpdev;
|
struct hv_pci_dev *hpdev;
|
||||||
struct pci_bus *pbus;
|
struct pci_bus *pbus;
|
||||||
struct pci_dev *pdev;
|
struct pci_dev *pdev;
|
||||||
struct cpumask *dest;
|
struct cpumask *dest;
|
||||||
unsigned long flags;
|
|
||||||
struct compose_comp_ctxt comp;
|
struct compose_comp_ctxt comp;
|
||||||
struct tran_int_desc *int_desc;
|
struct tran_int_desc *int_desc;
|
||||||
struct {
|
struct {
|
||||||
@ -1378,6 +1378,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
|
|||||||
dest = irq_data_get_effective_affinity_mask(data);
|
dest = irq_data_get_effective_affinity_mask(data);
|
||||||
pbus = pdev->bus;
|
pbus = pdev->bus;
|
||||||
hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata);
|
hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata);
|
||||||
|
channel = hbus->hdev->channel;
|
||||||
hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn));
|
hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn));
|
||||||
if (!hpdev)
|
if (!hpdev)
|
||||||
goto return_null_message;
|
goto return_null_message;
|
||||||
@ -1435,43 +1436,52 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
|
|||||||
goto free_int_desc;
|
goto free_int_desc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Prevents hv_pci_onchannelcallback() from running concurrently
|
||||||
|
* in the tasklet.
|
||||||
|
*/
|
||||||
|
tasklet_disable(&channel->callback_event);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Since this function is called with IRQ locks held, can't
|
* Since this function is called with IRQ locks held, can't
|
||||||
* do normal wait for completion; instead poll.
|
* do normal wait for completion; instead poll.
|
||||||
*/
|
*/
|
||||||
while (!try_wait_for_completion(&comp.comp_pkt.host_event)) {
|
while (!try_wait_for_completion(&comp.comp_pkt.host_event)) {
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
/* 0xFFFF means an invalid PCI VENDOR ID. */
|
/* 0xFFFF means an invalid PCI VENDOR ID. */
|
||||||
if (hv_pcifront_get_vendor_id(hpdev) == 0xFFFF) {
|
if (hv_pcifront_get_vendor_id(hpdev) == 0xFFFF) {
|
||||||
dev_err_once(&hbus->hdev->device,
|
dev_err_once(&hbus->hdev->device,
|
||||||
"the device has gone\n");
|
"the device has gone\n");
|
||||||
goto free_int_desc;
|
goto enable_tasklet;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When the higher level interrupt code calls us with
|
* Make sure that the ring buffer data structure doesn't get
|
||||||
* interrupt disabled, we must poll the channel by calling
|
* freed while we dereference the ring buffer pointer. Test
|
||||||
* the channel callback directly when channel->target_cpu is
|
* for the channel's onchannel_callback being NULL within a
|
||||||
* the current CPU. When the higher level interrupt code
|
* sched_lock critical section. See also the inline comments
|
||||||
* calls us with interrupt enabled, let's add the
|
* in vmbus_reset_channel_cb().
|
||||||
* local_irq_save()/restore() to avoid race:
|
|
||||||
* hv_pci_onchannelcallback() can also run in tasklet.
|
|
||||||
*/
|
*/
|
||||||
local_irq_save(flags);
|
spin_lock_irqsave(&channel->sched_lock, flags);
|
||||||
|
if (unlikely(channel->onchannel_callback == NULL)) {
|
||||||
if (hbus->hdev->channel->target_cpu == smp_processor_id())
|
spin_unlock_irqrestore(&channel->sched_lock, flags);
|
||||||
hv_pci_onchannelcallback(hbus);
|
goto enable_tasklet;
|
||||||
|
}
|
||||||
local_irq_restore(flags);
|
hv_pci_onchannelcallback(hbus);
|
||||||
|
spin_unlock_irqrestore(&channel->sched_lock, flags);
|
||||||
|
|
||||||
if (hpdev->state == hv_pcichild_ejecting) {
|
if (hpdev->state == hv_pcichild_ejecting) {
|
||||||
dev_err_once(&hbus->hdev->device,
|
dev_err_once(&hbus->hdev->device,
|
||||||
"the device is being ejected\n");
|
"the device is being ejected\n");
|
||||||
goto free_int_desc;
|
goto enable_tasklet;
|
||||||
}
|
}
|
||||||
|
|
||||||
udelay(100);
|
udelay(100);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tasklet_enable(&channel->callback_event);
|
||||||
|
|
||||||
if (comp.comp_pkt.completion_status < 0) {
|
if (comp.comp_pkt.completion_status < 0) {
|
||||||
dev_err(&hbus->hdev->device,
|
dev_err(&hbus->hdev->device,
|
||||||
"Request for interrupt failed: 0x%x",
|
"Request for interrupt failed: 0x%x",
|
||||||
@ -1495,6 +1505,8 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
|
|||||||
put_pcichild(hpdev);
|
put_pcichild(hpdev);
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
enable_tasklet:
|
||||||
|
tasklet_enable(&channel->callback_event);
|
||||||
free_int_desc:
|
free_int_desc:
|
||||||
kfree(int_desc);
|
kfree(int_desc);
|
||||||
drop_reference:
|
drop_reference:
|
||||||
|
@ -621,6 +621,64 @@ get_in_err:
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void storvsc_change_target_cpu(struct vmbus_channel *channel, u32 old,
|
||||||
|
u32 new)
|
||||||
|
{
|
||||||
|
struct storvsc_device *stor_device;
|
||||||
|
struct vmbus_channel *cur_chn;
|
||||||
|
bool old_is_alloced = false;
|
||||||
|
struct hv_device *device;
|
||||||
|
unsigned long flags;
|
||||||
|
int cpu;
|
||||||
|
|
||||||
|
device = channel->primary_channel ?
|
||||||
|
channel->primary_channel->device_obj
|
||||||
|
: channel->device_obj;
|
||||||
|
stor_device = get_out_stor_device(device);
|
||||||
|
if (!stor_device)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* See storvsc_do_io() -> get_og_chn(). */
|
||||||
|
spin_lock_irqsave(&device->channel->lock, flags);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Determines if the storvsc device has other channels assigned to
|
||||||
|
* the "old" CPU to update the alloced_cpus mask and the stor_chns
|
||||||
|
* array.
|
||||||
|
*/
|
||||||
|
if (device->channel != channel && device->channel->target_cpu == old) {
|
||||||
|
cur_chn = device->channel;
|
||||||
|
old_is_alloced = true;
|
||||||
|
goto old_is_alloced;
|
||||||
|
}
|
||||||
|
list_for_each_entry(cur_chn, &device->channel->sc_list, sc_list) {
|
||||||
|
if (cur_chn == channel)
|
||||||
|
continue;
|
||||||
|
if (cur_chn->target_cpu == old) {
|
||||||
|
old_is_alloced = true;
|
||||||
|
goto old_is_alloced;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
old_is_alloced:
|
||||||
|
if (old_is_alloced)
|
||||||
|
WRITE_ONCE(stor_device->stor_chns[old], cur_chn);
|
||||||
|
else
|
||||||
|
cpumask_clear_cpu(old, &stor_device->alloced_cpus);
|
||||||
|
|
||||||
|
/* "Flush" the stor_chns array. */
|
||||||
|
for_each_possible_cpu(cpu) {
|
||||||
|
if (stor_device->stor_chns[cpu] && !cpumask_test_cpu(
|
||||||
|
cpu, &stor_device->alloced_cpus))
|
||||||
|
WRITE_ONCE(stor_device->stor_chns[cpu], NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
WRITE_ONCE(stor_device->stor_chns[new], channel);
|
||||||
|
cpumask_set_cpu(new, &stor_device->alloced_cpus);
|
||||||
|
|
||||||
|
spin_unlock_irqrestore(&device->channel->lock, flags);
|
||||||
|
}
|
||||||
|
|
||||||
static void handle_sc_creation(struct vmbus_channel *new_sc)
|
static void handle_sc_creation(struct vmbus_channel *new_sc)
|
||||||
{
|
{
|
||||||
struct hv_device *device = new_sc->primary_channel->device_obj;
|
struct hv_device *device = new_sc->primary_channel->device_obj;
|
||||||
@ -648,6 +706,8 @@ static void handle_sc_creation(struct vmbus_channel *new_sc)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
new_sc->change_target_cpu_callback = storvsc_change_target_cpu;
|
||||||
|
|
||||||
/* Add the sub-channel to the array of available channels. */
|
/* Add the sub-channel to the array of available channels. */
|
||||||
stor_device->stor_chns[new_sc->target_cpu] = new_sc;
|
stor_device->stor_chns[new_sc->target_cpu] = new_sc;
|
||||||
cpumask_set_cpu(new_sc->target_cpu, &stor_device->alloced_cpus);
|
cpumask_set_cpu(new_sc->target_cpu, &stor_device->alloced_cpus);
|
||||||
@ -876,6 +936,8 @@ static int storvsc_channel_init(struct hv_device *device, bool is_fc)
|
|||||||
if (stor_device->stor_chns == NULL)
|
if (stor_device->stor_chns == NULL)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
|
device->channel->change_target_cpu_callback = storvsc_change_target_cpu;
|
||||||
|
|
||||||
stor_device->stor_chns[device->channel->target_cpu] = device->channel;
|
stor_device->stor_chns[device->channel->target_cpu] = device->channel;
|
||||||
cpumask_set_cpu(device->channel->target_cpu,
|
cpumask_set_cpu(device->channel->target_cpu,
|
||||||
&stor_device->alloced_cpus);
|
&stor_device->alloced_cpus);
|
||||||
@ -1248,8 +1310,10 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
|
|||||||
const struct cpumask *node_mask;
|
const struct cpumask *node_mask;
|
||||||
int num_channels, tgt_cpu;
|
int num_channels, tgt_cpu;
|
||||||
|
|
||||||
if (stor_device->num_sc == 0)
|
if (stor_device->num_sc == 0) {
|
||||||
|
stor_device->stor_chns[q_num] = stor_device->device->channel;
|
||||||
return stor_device->device->channel;
|
return stor_device->device->channel;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Our channel array is sparsley populated and we
|
* Our channel array is sparsley populated and we
|
||||||
@ -1258,7 +1322,6 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
|
|||||||
* The strategy is simple:
|
* The strategy is simple:
|
||||||
* I. Ensure NUMA locality
|
* I. Ensure NUMA locality
|
||||||
* II. Distribute evenly (best effort)
|
* II. Distribute evenly (best effort)
|
||||||
* III. Mapping is persistent.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
node_mask = cpumask_of_node(cpu_to_node(q_num));
|
node_mask = cpumask_of_node(cpu_to_node(q_num));
|
||||||
@ -1268,8 +1331,10 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
|
|||||||
if (cpumask_test_cpu(tgt_cpu, node_mask))
|
if (cpumask_test_cpu(tgt_cpu, node_mask))
|
||||||
num_channels++;
|
num_channels++;
|
||||||
}
|
}
|
||||||
if (num_channels == 0)
|
if (num_channels == 0) {
|
||||||
|
stor_device->stor_chns[q_num] = stor_device->device->channel;
|
||||||
return stor_device->device->channel;
|
return stor_device->device->channel;
|
||||||
|
}
|
||||||
|
|
||||||
hash_qnum = q_num;
|
hash_qnum = q_num;
|
||||||
while (hash_qnum >= num_channels)
|
while (hash_qnum >= num_channels)
|
||||||
@ -1295,6 +1360,7 @@ static int storvsc_do_io(struct hv_device *device,
|
|||||||
struct storvsc_device *stor_device;
|
struct storvsc_device *stor_device;
|
||||||
struct vstor_packet *vstor_packet;
|
struct vstor_packet *vstor_packet;
|
||||||
struct vmbus_channel *outgoing_channel, *channel;
|
struct vmbus_channel *outgoing_channel, *channel;
|
||||||
|
unsigned long flags;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
const struct cpumask *node_mask;
|
const struct cpumask *node_mask;
|
||||||
int tgt_cpu;
|
int tgt_cpu;
|
||||||
@ -1308,10 +1374,11 @@ static int storvsc_do_io(struct hv_device *device,
|
|||||||
|
|
||||||
request->device = device;
|
request->device = device;
|
||||||
/*
|
/*
|
||||||
* Select an an appropriate channel to send the request out.
|
* Select an appropriate channel to send the request out.
|
||||||
*/
|
*/
|
||||||
if (stor_device->stor_chns[q_num] != NULL) {
|
/* See storvsc_change_target_cpu(). */
|
||||||
outgoing_channel = stor_device->stor_chns[q_num];
|
outgoing_channel = READ_ONCE(stor_device->stor_chns[q_num]);
|
||||||
|
if (outgoing_channel != NULL) {
|
||||||
if (outgoing_channel->target_cpu == q_num) {
|
if (outgoing_channel->target_cpu == q_num) {
|
||||||
/*
|
/*
|
||||||
* Ideally, we want to pick a different channel if
|
* Ideally, we want to pick a different channel if
|
||||||
@ -1324,7 +1391,10 @@ static int storvsc_do_io(struct hv_device *device,
|
|||||||
continue;
|
continue;
|
||||||
if (tgt_cpu == q_num)
|
if (tgt_cpu == q_num)
|
||||||
continue;
|
continue;
|
||||||
channel = stor_device->stor_chns[tgt_cpu];
|
channel = READ_ONCE(
|
||||||
|
stor_device->stor_chns[tgt_cpu]);
|
||||||
|
if (channel == NULL)
|
||||||
|
continue;
|
||||||
if (hv_get_avail_to_write_percent(
|
if (hv_get_avail_to_write_percent(
|
||||||
&channel->outbound)
|
&channel->outbound)
|
||||||
> ring_avail_percent_lowater) {
|
> ring_avail_percent_lowater) {
|
||||||
@ -1350,7 +1420,10 @@ static int storvsc_do_io(struct hv_device *device,
|
|||||||
for_each_cpu(tgt_cpu, &stor_device->alloced_cpus) {
|
for_each_cpu(tgt_cpu, &stor_device->alloced_cpus) {
|
||||||
if (cpumask_test_cpu(tgt_cpu, node_mask))
|
if (cpumask_test_cpu(tgt_cpu, node_mask))
|
||||||
continue;
|
continue;
|
||||||
channel = stor_device->stor_chns[tgt_cpu];
|
channel = READ_ONCE(
|
||||||
|
stor_device->stor_chns[tgt_cpu]);
|
||||||
|
if (channel == NULL)
|
||||||
|
continue;
|
||||||
if (hv_get_avail_to_write_percent(
|
if (hv_get_avail_to_write_percent(
|
||||||
&channel->outbound)
|
&channel->outbound)
|
||||||
> ring_avail_percent_lowater) {
|
> ring_avail_percent_lowater) {
|
||||||
@ -1360,7 +1433,14 @@ static int storvsc_do_io(struct hv_device *device,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
spin_lock_irqsave(&device->channel->lock, flags);
|
||||||
|
outgoing_channel = stor_device->stor_chns[q_num];
|
||||||
|
if (outgoing_channel != NULL) {
|
||||||
|
spin_unlock_irqrestore(&device->channel->lock, flags);
|
||||||
|
goto found_channel;
|
||||||
|
}
|
||||||
outgoing_channel = get_og_chn(stor_device, q_num);
|
outgoing_channel = get_og_chn(stor_device, q_num);
|
||||||
|
spin_unlock_irqrestore(&device->channel->lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
found_channel:
|
found_channel:
|
||||||
|
493
include/asm-generic/hyperv-tlfs.h
Normal file
493
include/asm-generic/hyperv-tlfs.h
Normal file
@ -0,0 +1,493 @@
|
|||||||
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This file contains definitions from Hyper-V Hypervisor Top-Level Functional
|
||||||
|
* Specification (TLFS):
|
||||||
|
* https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _ASM_GENERIC_HYPERV_TLFS_H
|
||||||
|
#define _ASM_GENERIC_HYPERV_TLFS_H
|
||||||
|
|
||||||
|
#include <linux/types.h>
|
||||||
|
#include <linux/bits.h>
|
||||||
|
#include <linux/time64.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* While not explicitly listed in the TLFS, Hyper-V always runs with a page size
|
||||||
|
* of 4096. These definitions are used when communicating with Hyper-V using
|
||||||
|
* guest physical pages and guest physical page addresses, since the guest page
|
||||||
|
* size may not be 4096 on all architectures.
|
||||||
|
*/
|
||||||
|
#define HV_HYP_PAGE_SHIFT 12
|
||||||
|
#define HV_HYP_PAGE_SIZE BIT(HV_HYP_PAGE_SHIFT)
|
||||||
|
#define HV_HYP_PAGE_MASK (~(HV_HYP_PAGE_SIZE - 1))
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Hyper-V provides two categories of flags relevant to guest VMs. The
|
||||||
|
* "Features" category indicates specific functionality that is available
|
||||||
|
* to guests on this particular instance of Hyper-V. The "Features"
|
||||||
|
* are presented in four groups, each of which is 32 bits. The group A
|
||||||
|
* and B definitions are common across architectures and are listed here.
|
||||||
|
* However, not all flags are relevant on all architectures.
|
||||||
|
*
|
||||||
|
* Groups C and D vary across architectures and are listed in the
|
||||||
|
* architecture specific portion of hyperv-tlfs.h. Some of these flags exist
|
||||||
|
* on multiple architectures, but the bit positions are different so they
|
||||||
|
* cannot appear in the generic portion of hyperv-tlfs.h.
|
||||||
|
*
|
||||||
|
* The "Enlightenments" category provides recommendations on whether to use
|
||||||
|
* specific enlightenments that are available. The Enlighenments are a single
|
||||||
|
* group of 32 bits, but they vary across architectures and are listed in
|
||||||
|
* the architecture specific portion of hyperv-tlfs.h.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Group A Features.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* VP Runtime register available */
|
||||||
|
#define HV_MSR_VP_RUNTIME_AVAILABLE BIT(0)
|
||||||
|
/* Partition Reference Counter available*/
|
||||||
|
#define HV_MSR_TIME_REF_COUNT_AVAILABLE BIT(1)
|
||||||
|
/* Basic SynIC register available */
|
||||||
|
#define HV_MSR_SYNIC_AVAILABLE BIT(2)
|
||||||
|
/* Synthetic Timer registers available */
|
||||||
|
#define HV_MSR_SYNTIMER_AVAILABLE BIT(3)
|
||||||
|
/* Virtual APIC assist and VP assist page registers available */
|
||||||
|
#define HV_MSR_APIC_ACCESS_AVAILABLE BIT(4)
|
||||||
|
/* Hypercall and Guest OS ID registers available*/
|
||||||
|
#define HV_MSR_HYPERCALL_AVAILABLE BIT(5)
|
||||||
|
/* Access virtual processor index register available*/
|
||||||
|
#define HV_MSR_VP_INDEX_AVAILABLE BIT(6)
|
||||||
|
/* Virtual system reset register available*/
|
||||||
|
#define HV_MSR_RESET_AVAILABLE BIT(7)
|
||||||
|
/* Access statistics page registers available */
|
||||||
|
#define HV_MSR_STAT_PAGES_AVAILABLE BIT(8)
|
||||||
|
/* Partition reference TSC register is available */
|
||||||
|
#define HV_MSR_REFERENCE_TSC_AVAILABLE BIT(9)
|
||||||
|
/* Partition Guest IDLE register is available */
|
||||||
|
#define HV_MSR_GUEST_IDLE_AVAILABLE BIT(10)
|
||||||
|
/* Partition local APIC and TSC frequency registers available */
|
||||||
|
#define HV_ACCESS_FREQUENCY_MSRS BIT(11)
|
||||||
|
/* AccessReenlightenmentControls privilege */
|
||||||
|
#define HV_ACCESS_REENLIGHTENMENT BIT(13)
|
||||||
|
/* AccessTscInvariantControls privilege */
|
||||||
|
#define HV_ACCESS_TSC_INVARIANT BIT(15)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Group B features.
|
||||||
|
*/
|
||||||
|
#define HV_CREATE_PARTITIONS BIT(0)
|
||||||
|
#define HV_ACCESS_PARTITION_ID BIT(1)
|
||||||
|
#define HV_ACCESS_MEMORY_POOL BIT(2)
|
||||||
|
#define HV_ADJUST_MESSAGE_BUFFERS BIT(3)
|
||||||
|
#define HV_POST_MESSAGES BIT(4)
|
||||||
|
#define HV_SIGNAL_EVENTS BIT(5)
|
||||||
|
#define HV_CREATE_PORT BIT(6)
|
||||||
|
#define HV_CONNECT_PORT BIT(7)
|
||||||
|
#define HV_ACCESS_STATS BIT(8)
|
||||||
|
#define HV_DEBUGGING BIT(11)
|
||||||
|
#define HV_CPU_POWER_MANAGEMENT BIT(12)
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* TSC page layout.
|
||||||
|
*/
|
||||||
|
struct ms_hyperv_tsc_page {
|
||||||
|
volatile u32 tsc_sequence;
|
||||||
|
u32 reserved1;
|
||||||
|
volatile u64 tsc_scale;
|
||||||
|
volatile s64 tsc_offset;
|
||||||
|
} __packed;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The guest OS needs to register the guest ID with the hypervisor.
|
||||||
|
* The guest ID is a 64 bit entity and the structure of this ID is
|
||||||
|
* specified in the Hyper-V specification:
|
||||||
|
*
|
||||||
|
* msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx
|
||||||
|
*
|
||||||
|
* While the current guideline does not specify how Linux guest ID(s)
|
||||||
|
* need to be generated, our plan is to publish the guidelines for
|
||||||
|
* Linux and other guest operating systems that currently are hosted
|
||||||
|
* on Hyper-V. The implementation here conforms to this yet
|
||||||
|
* unpublished guidelines.
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* Bit(s)
|
||||||
|
* 63 - Indicates if the OS is Open Source or not; 1 is Open Source
|
||||||
|
* 62:56 - Os Type; Linux is 0x100
|
||||||
|
* 55:48 - Distro specific identification
|
||||||
|
* 47:16 - Linux kernel version number
|
||||||
|
* 15:0 - Distro specific identification
|
||||||
|
*
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define HV_LINUX_VENDOR_ID 0x8100
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Crash notification flags.
|
||||||
|
*/
|
||||||
|
#define HV_CRASH_CTL_CRASH_NOTIFY_MSG BIT_ULL(62)
|
||||||
|
#define HV_CRASH_CTL_CRASH_NOTIFY BIT_ULL(63)
|
||||||
|
|
||||||
|
/* Declare the various hypercall operations. */
|
||||||
|
#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002
|
||||||
|
#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003
|
||||||
|
#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008
|
||||||
|
#define HVCALL_SEND_IPI 0x000b
|
||||||
|
#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013
|
||||||
|
#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014
|
||||||
|
#define HVCALL_SEND_IPI_EX 0x0015
|
||||||
|
#define HVCALL_GET_VP_REGISTERS 0x0050
|
||||||
|
#define HVCALL_SET_VP_REGISTERS 0x0051
|
||||||
|
#define HVCALL_POST_MESSAGE 0x005c
|
||||||
|
#define HVCALL_SIGNAL_EVENT 0x005d
|
||||||
|
#define HVCALL_RETARGET_INTERRUPT 0x007e
|
||||||
|
#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
|
||||||
|
#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
|
||||||
|
|
||||||
|
#define HV_FLUSH_ALL_PROCESSORS BIT(0)
|
||||||
|
#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1)
|
||||||
|
#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2)
|
||||||
|
#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3)
|
||||||
|
|
||||||
|
enum HV_GENERIC_SET_FORMAT {
|
||||||
|
HV_GENERIC_SET_SPARSE_4K,
|
||||||
|
HV_GENERIC_SET_ALL,
|
||||||
|
};
|
||||||
|
|
||||||
|
#define HV_PARTITION_ID_SELF ((u64)-1)
|
||||||
|
#define HV_VP_INDEX_SELF ((u32)-2)
|
||||||
|
|
||||||
|
#define HV_HYPERCALL_RESULT_MASK GENMASK_ULL(15, 0)
|
||||||
|
#define HV_HYPERCALL_FAST_BIT BIT(16)
|
||||||
|
#define HV_HYPERCALL_VARHEAD_OFFSET 17
|
||||||
|
#define HV_HYPERCALL_REP_COMP_OFFSET 32
|
||||||
|
#define HV_HYPERCALL_REP_COMP_1 BIT_ULL(32)
|
||||||
|
#define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32)
|
||||||
|
#define HV_HYPERCALL_REP_START_OFFSET 48
|
||||||
|
#define HV_HYPERCALL_REP_START_MASK GENMASK_ULL(59, 48)
|
||||||
|
|
||||||
|
/* hypercall status code */
|
||||||
|
#define HV_STATUS_SUCCESS 0
|
||||||
|
#define HV_STATUS_INVALID_HYPERCALL_CODE 2
|
||||||
|
#define HV_STATUS_INVALID_HYPERCALL_INPUT 3
|
||||||
|
#define HV_STATUS_INVALID_ALIGNMENT 4
|
||||||
|
#define HV_STATUS_INVALID_PARAMETER 5
|
||||||
|
#define HV_STATUS_INSUFFICIENT_MEMORY 11
|
||||||
|
#define HV_STATUS_INVALID_PORT_ID 17
|
||||||
|
#define HV_STATUS_INVALID_CONNECTION_ID 18
|
||||||
|
#define HV_STATUS_INSUFFICIENT_BUFFERS 19
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The Hyper-V TimeRefCount register and the TSC
|
||||||
|
* page provide a guest VM clock with 100ns tick rate
|
||||||
|
*/
|
||||||
|
#define HV_CLOCK_HZ (NSEC_PER_SEC/100)
|
||||||
|
|
||||||
|
/* Define the number of synthetic interrupt sources. */
|
||||||
|
#define HV_SYNIC_SINT_COUNT (16)
|
||||||
|
/* Define the expected SynIC version. */
|
||||||
|
#define HV_SYNIC_VERSION_1 (0x1)
|
||||||
|
/* Valid SynIC vectors are 16-255. */
|
||||||
|
#define HV_SYNIC_FIRST_VALID_VECTOR (16)
|
||||||
|
|
||||||
|
#define HV_SYNIC_CONTROL_ENABLE (1ULL << 0)
|
||||||
|
#define HV_SYNIC_SIMP_ENABLE (1ULL << 0)
|
||||||
|
#define HV_SYNIC_SIEFP_ENABLE (1ULL << 0)
|
||||||
|
#define HV_SYNIC_SINT_MASKED (1ULL << 16)
|
||||||
|
#define HV_SYNIC_SINT_AUTO_EOI (1ULL << 17)
|
||||||
|
#define HV_SYNIC_SINT_VECTOR_MASK (0xFF)
|
||||||
|
|
||||||
|
#define HV_SYNIC_STIMER_COUNT (4)
|
||||||
|
|
||||||
|
/* Define synthetic interrupt controller message constants. */
|
||||||
|
#define HV_MESSAGE_SIZE (256)
|
||||||
|
#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240)
|
||||||
|
#define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30)
|
||||||
|
|
||||||
|
/* Define synthetic interrupt controller message flags. */
|
||||||
|
union hv_message_flags {
|
||||||
|
__u8 asu8;
|
||||||
|
struct {
|
||||||
|
__u8 msg_pending:1;
|
||||||
|
__u8 reserved:7;
|
||||||
|
} __packed;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Define port identifier type. */
|
||||||
|
union hv_port_id {
|
||||||
|
__u32 asu32;
|
||||||
|
struct {
|
||||||
|
__u32 id:24;
|
||||||
|
__u32 reserved:8;
|
||||||
|
} __packed u;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Define synthetic interrupt controller message header. */
|
||||||
|
struct hv_message_header {
|
||||||
|
__u32 message_type;
|
||||||
|
__u8 payload_size;
|
||||||
|
union hv_message_flags message_flags;
|
||||||
|
__u8 reserved[2];
|
||||||
|
union {
|
||||||
|
__u64 sender;
|
||||||
|
union hv_port_id port;
|
||||||
|
};
|
||||||
|
} __packed;
|
||||||
|
|
||||||
|
/* Define synthetic interrupt controller message format. */
|
||||||
|
struct hv_message {
|
||||||
|
struct hv_message_header header;
|
||||||
|
union {
|
||||||
|
__u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
|
||||||
|
} u;
|
||||||
|
} __packed;
|
||||||
|
|
||||||
|
/* Define the synthetic interrupt message page layout. */
|
||||||
|
struct hv_message_page {
|
||||||
|
struct hv_message sint_message[HV_SYNIC_SINT_COUNT];
|
||||||
|
} __packed;
|
||||||
|
|
||||||
|
/* Define timer message payload structure. */
|
||||||
|
struct hv_timer_message_payload {
|
||||||
|
__u32 timer_index;
|
||||||
|
__u32 reserved;
|
||||||
|
__u64 expiration_time; /* When the timer expired */
|
||||||
|
__u64 delivery_time; /* When the message was delivered */
|
||||||
|
} __packed;
|
||||||
|
|
||||||
|
|
||||||
|
/* Define synthetic interrupt controller flag constants. */
|
||||||
|
#define HV_EVENT_FLAGS_COUNT (256 * 8)
|
||||||
|
#define HV_EVENT_FLAGS_LONG_COUNT (256 / sizeof(unsigned long))
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Synthetic timer configuration.
|
||||||
|
*/
|
||||||
|
union hv_stimer_config {
|
||||||
|
u64 as_uint64;
|
||||||
|
struct {
|
||||||
|
u64 enable:1;
|
||||||
|
u64 periodic:1;
|
||||||
|
u64 lazy:1;
|
||||||
|
u64 auto_enable:1;
|
||||||
|
u64 apic_vector:8;
|
||||||
|
u64 direct_mode:1;
|
||||||
|
u64 reserved_z0:3;
|
||||||
|
u64 sintx:4;
|
||||||
|
u64 reserved_z1:44;
|
||||||
|
} __packed;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/* Define the synthetic interrupt controller event flags format. */
|
||||||
|
union hv_synic_event_flags {
|
||||||
|
unsigned long flags[HV_EVENT_FLAGS_LONG_COUNT];
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Define SynIC control register. */
|
||||||
|
union hv_synic_scontrol {
|
||||||
|
u64 as_uint64;
|
||||||
|
struct {
|
||||||
|
u64 enable:1;
|
||||||
|
u64 reserved:63;
|
||||||
|
} __packed;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Define synthetic interrupt source. */
|
||||||
|
union hv_synic_sint {
|
||||||
|
u64 as_uint64;
|
||||||
|
struct {
|
||||||
|
u64 vector:8;
|
||||||
|
u64 reserved1:8;
|
||||||
|
u64 masked:1;
|
||||||
|
u64 auto_eoi:1;
|
||||||
|
u64 polling:1;
|
||||||
|
u64 reserved2:45;
|
||||||
|
} __packed;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Define the format of the SIMP register */
|
||||||
|
union hv_synic_simp {
|
||||||
|
u64 as_uint64;
|
||||||
|
struct {
|
||||||
|
u64 simp_enabled:1;
|
||||||
|
u64 preserved:11;
|
||||||
|
u64 base_simp_gpa:52;
|
||||||
|
} __packed;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Define the format of the SIEFP register */
|
||||||
|
union hv_synic_siefp {
|
||||||
|
u64 as_uint64;
|
||||||
|
struct {
|
||||||
|
u64 siefp_enabled:1;
|
||||||
|
u64 preserved:11;
|
||||||
|
u64 base_siefp_gpa:52;
|
||||||
|
} __packed;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct hv_vpset {
|
||||||
|
u64 format;
|
||||||
|
u64 valid_bank_mask;
|
||||||
|
u64 bank_contents[];
|
||||||
|
} __packed;
|
||||||
|
|
||||||
|
/* HvCallSendSyntheticClusterIpi hypercall */
|
||||||
|
struct hv_send_ipi {
|
||||||
|
u32 vector;
|
||||||
|
u32 reserved;
|
||||||
|
u64 cpu_mask;
|
||||||
|
} __packed;
|
||||||
|
|
||||||
|
/* HvCallSendSyntheticClusterIpiEx hypercall */
|
||||||
|
struct hv_send_ipi_ex {
|
||||||
|
u32 vector;
|
||||||
|
u32 reserved;
|
||||||
|
struct hv_vpset vp_set;
|
||||||
|
} __packed;
|
||||||
|
|
||||||
|
/* HvFlushGuestPhysicalAddressSpace hypercalls */
|
||||||
|
struct hv_guest_mapping_flush {
|
||||||
|
u64 address_space;
|
||||||
|
u64 flags;
|
||||||
|
} __packed;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* HV_MAX_FLUSH_PAGES = "additional_pages" + 1. It's limited
|
||||||
|
* by the bitwidth of "additional_pages" in union hv_gpa_page_range.
|
||||||
|
*/
|
||||||
|
#define HV_MAX_FLUSH_PAGES (2048)
|
||||||
|
|
||||||
|
/* HvFlushGuestPhysicalAddressList hypercall */
|
||||||
|
union hv_gpa_page_range {
|
||||||
|
u64 address_space;
|
||||||
|
struct {
|
||||||
|
u64 additional_pages:11;
|
||||||
|
u64 largepage:1;
|
||||||
|
u64 basepfn:52;
|
||||||
|
} page;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* All input flush parameters should be in single page. The max flush
|
||||||
|
* count is equal with how many entries of union hv_gpa_page_range can
|
||||||
|
* be populated into the input parameter page.
|
||||||
|
*/
|
||||||
|
#define HV_MAX_FLUSH_REP_COUNT ((HV_HYP_PAGE_SIZE - 2 * sizeof(u64)) / \
|
||||||
|
sizeof(union hv_gpa_page_range))
|
||||||
|
|
||||||
|
struct hv_guest_mapping_flush_list {
|
||||||
|
u64 address_space;
|
||||||
|
u64 flags;
|
||||||
|
union hv_gpa_page_range gpa_list[HV_MAX_FLUSH_REP_COUNT];
|
||||||
|
};
|
||||||
|
|
||||||
|
/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
|
||||||
|
struct hv_tlb_flush {
|
||||||
|
u64 address_space;
|
||||||
|
u64 flags;
|
||||||
|
u64 processor_mask;
|
||||||
|
u64 gva_list[];
|
||||||
|
} __packed;
|
||||||
|
|
||||||
|
/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
|
||||||
|
struct hv_tlb_flush_ex {
|
||||||
|
u64 address_space;
|
||||||
|
u64 flags;
|
||||||
|
struct hv_vpset hv_vp_set;
|
||||||
|
u64 gva_list[];
|
||||||
|
} __packed;
|
||||||
|
|
||||||
|
/* HvRetargetDeviceInterrupt hypercall */
|
||||||
|
union hv_msi_entry {
|
||||||
|
u64 as_uint64;
|
||||||
|
struct {
|
||||||
|
u32 address;
|
||||||
|
u32 data;
|
||||||
|
} __packed;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct hv_interrupt_entry {
|
||||||
|
u32 source; /* 1 for MSI(-X) */
|
||||||
|
u32 reserved1;
|
||||||
|
union hv_msi_entry msi_entry;
|
||||||
|
} __packed;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* flags for hv_device_interrupt_target.flags
|
||||||
|
*/
|
||||||
|
#define HV_DEVICE_INTERRUPT_TARGET_MULTICAST 1
|
||||||
|
#define HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET 2
|
||||||
|
|
||||||
|
struct hv_device_interrupt_target {
|
||||||
|
u32 vector;
|
||||||
|
u32 flags;
|
||||||
|
union {
|
||||||
|
u64 vp_mask;
|
||||||
|
struct hv_vpset vp_set;
|
||||||
|
};
|
||||||
|
} __packed;
|
||||||
|
|
||||||
|
struct hv_retarget_device_interrupt {
|
||||||
|
u64 partition_id; /* use "self" */
|
||||||
|
u64 device_id;
|
||||||
|
struct hv_interrupt_entry int_entry;
|
||||||
|
u64 reserved2;
|
||||||
|
struct hv_device_interrupt_target int_target;
|
||||||
|
} __packed __aligned(8);
|
||||||
|
|
||||||
|
|
||||||
|
/* HvGetVpRegisters hypercall input with variable size reg name list*/
|
||||||
|
struct hv_get_vp_registers_input {
|
||||||
|
struct {
|
||||||
|
u64 partitionid;
|
||||||
|
u32 vpindex;
|
||||||
|
u8 inputvtl;
|
||||||
|
u8 padding[3];
|
||||||
|
} header;
|
||||||
|
struct input {
|
||||||
|
u32 name0;
|
||||||
|
u32 name1;
|
||||||
|
} element[];
|
||||||
|
} __packed;
|
||||||
|
|
||||||
|
|
||||||
|
/* HvGetVpRegisters returns an array of these output elements */
|
||||||
|
struct hv_get_vp_registers_output {
|
||||||
|
union {
|
||||||
|
struct {
|
||||||
|
u32 a;
|
||||||
|
u32 b;
|
||||||
|
u32 c;
|
||||||
|
u32 d;
|
||||||
|
} as32 __packed;
|
||||||
|
struct {
|
||||||
|
u64 low;
|
||||||
|
u64 high;
|
||||||
|
} as64 __packed;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
/* HvSetVpRegisters hypercall with variable size reg name/value list*/
|
||||||
|
struct hv_set_vp_registers_input {
|
||||||
|
struct {
|
||||||
|
u64 partitionid;
|
||||||
|
u32 vpindex;
|
||||||
|
u8 inputvtl;
|
||||||
|
u8 padding[3];
|
||||||
|
} header;
|
||||||
|
struct {
|
||||||
|
u32 name;
|
||||||
|
u32 padding1;
|
||||||
|
u64 padding2;
|
||||||
|
u64 valuelow;
|
||||||
|
u64 valuehigh;
|
||||||
|
} element[];
|
||||||
|
} __packed;
|
||||||
|
|
||||||
|
#endif
|
@ -117,7 +117,7 @@ struct hv_ring_buffer {
|
|||||||
* Ring data starts here + RingDataStartOffset
|
* Ring data starts here + RingDataStartOffset
|
||||||
* !!! DO NOT place any fields below this !!!
|
* !!! DO NOT place any fields below this !!!
|
||||||
*/
|
*/
|
||||||
u8 buffer[0];
|
u8 buffer[];
|
||||||
} __packed;
|
} __packed;
|
||||||
|
|
||||||
struct hv_ring_buffer_info {
|
struct hv_ring_buffer_info {
|
||||||
@ -313,7 +313,7 @@ struct vmadd_remove_transfer_page_set {
|
|||||||
struct gpa_range {
|
struct gpa_range {
|
||||||
u32 byte_count;
|
u32 byte_count;
|
||||||
u32 byte_offset;
|
u32 byte_offset;
|
||||||
u64 pfn_array[0];
|
u64 pfn_array[];
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -425,7 +425,7 @@ enum vmbus_channel_message_type {
|
|||||||
CHANNELMSG_19 = 19,
|
CHANNELMSG_19 = 19,
|
||||||
CHANNELMSG_20 = 20,
|
CHANNELMSG_20 = 20,
|
||||||
CHANNELMSG_TL_CONNECT_REQUEST = 21,
|
CHANNELMSG_TL_CONNECT_REQUEST = 21,
|
||||||
CHANNELMSG_22 = 22,
|
CHANNELMSG_MODIFYCHANNEL = 22,
|
||||||
CHANNELMSG_TL_CONNECT_RESULT = 23,
|
CHANNELMSG_TL_CONNECT_RESULT = 23,
|
||||||
CHANNELMSG_COUNT
|
CHANNELMSG_COUNT
|
||||||
};
|
};
|
||||||
@ -563,7 +563,7 @@ struct vmbus_channel_gpadl_header {
|
|||||||
u32 gpadl;
|
u32 gpadl;
|
||||||
u16 range_buflen;
|
u16 range_buflen;
|
||||||
u16 rangecount;
|
u16 rangecount;
|
||||||
struct gpa_range range[0];
|
struct gpa_range range[];
|
||||||
} __packed;
|
} __packed;
|
||||||
|
|
||||||
/* This is the followup packet that contains more PFNs. */
|
/* This is the followup packet that contains more PFNs. */
|
||||||
@ -571,7 +571,7 @@ struct vmbus_channel_gpadl_body {
|
|||||||
struct vmbus_channel_message_header header;
|
struct vmbus_channel_message_header header;
|
||||||
u32 msgnumber;
|
u32 msgnumber;
|
||||||
u32 gpadl;
|
u32 gpadl;
|
||||||
u64 pfn[0];
|
u64 pfn[];
|
||||||
} __packed;
|
} __packed;
|
||||||
|
|
||||||
struct vmbus_channel_gpadl_created {
|
struct vmbus_channel_gpadl_created {
|
||||||
@ -620,6 +620,13 @@ struct vmbus_channel_tl_connect_request {
|
|||||||
guid_t host_service_id;
|
guid_t host_service_id;
|
||||||
} __packed;
|
} __packed;
|
||||||
|
|
||||||
|
/* Modify Channel parameters, cf. vmbus_send_modifychannel() */
|
||||||
|
struct vmbus_channel_modifychannel {
|
||||||
|
struct vmbus_channel_message_header header;
|
||||||
|
u32 child_relid;
|
||||||
|
u32 target_vp;
|
||||||
|
} __packed;
|
||||||
|
|
||||||
struct vmbus_channel_version_response {
|
struct vmbus_channel_version_response {
|
||||||
struct vmbus_channel_message_header header;
|
struct vmbus_channel_message_header header;
|
||||||
u8 version_supported;
|
u8 version_supported;
|
||||||
@ -672,7 +679,7 @@ struct vmbus_channel_msginfo {
|
|||||||
* The channel message that goes out on the "wire".
|
* The channel message that goes out on the "wire".
|
||||||
* It will contain at minimum the VMBUS_CHANNEL_MESSAGE_HEADER header
|
* It will contain at minimum the VMBUS_CHANNEL_MESSAGE_HEADER header
|
||||||
*/
|
*/
|
||||||
unsigned char msg[0];
|
unsigned char msg[];
|
||||||
};
|
};
|
||||||
|
|
||||||
struct vmbus_close_msg {
|
struct vmbus_close_msg {
|
||||||
@ -689,11 +696,6 @@ union hv_connection_id {
|
|||||||
} u;
|
} u;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum hv_numa_policy {
|
|
||||||
HV_BALANCED = 0,
|
|
||||||
HV_LOCALIZED,
|
|
||||||
};
|
|
||||||
|
|
||||||
enum vmbus_device_type {
|
enum vmbus_device_type {
|
||||||
HV_IDE = 0,
|
HV_IDE = 0,
|
||||||
HV_SCSI,
|
HV_SCSI,
|
||||||
@ -771,6 +773,15 @@ struct vmbus_channel {
|
|||||||
void (*onchannel_callback)(void *context);
|
void (*onchannel_callback)(void *context);
|
||||||
void *channel_callback_context;
|
void *channel_callback_context;
|
||||||
|
|
||||||
|
void (*change_target_cpu_callback)(struct vmbus_channel *channel,
|
||||||
|
u32 old, u32 new);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Synchronize channel scheduling and channel removal; see the inline
|
||||||
|
* comments in vmbus_chan_sched() and vmbus_reset_channel_cb().
|
||||||
|
*/
|
||||||
|
spinlock_t sched_lock;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* A channel can be marked for one of three modes of reading:
|
* A channel can be marked for one of three modes of reading:
|
||||||
* BATCHED - callback called from taslket and should read
|
* BATCHED - callback called from taslket and should read
|
||||||
@ -802,10 +813,6 @@ struct vmbus_channel {
|
|||||||
u32 target_vp;
|
u32 target_vp;
|
||||||
/* The corresponding CPUID in the guest */
|
/* The corresponding CPUID in the guest */
|
||||||
u32 target_cpu;
|
u32 target_cpu;
|
||||||
/*
|
|
||||||
* State to manage the CPU affiliation of channels.
|
|
||||||
*/
|
|
||||||
struct cpumask alloced_cpus_in_node;
|
|
||||||
int numa_node;
|
int numa_node;
|
||||||
/*
|
/*
|
||||||
* Support for sub-channels. For high performance devices,
|
* Support for sub-channels. For high performance devices,
|
||||||
@ -854,11 +861,6 @@ struct vmbus_channel {
|
|||||||
* Support per-channel state for use by vmbus drivers.
|
* Support per-channel state for use by vmbus drivers.
|
||||||
*/
|
*/
|
||||||
void *per_channel_state;
|
void *per_channel_state;
|
||||||
/*
|
|
||||||
* To support per-cpu lookup mapping of relid to channel,
|
|
||||||
* link up channels based on their CPU affinity.
|
|
||||||
*/
|
|
||||||
struct list_head percpu_list;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Defer freeing channel until after all cpu's have
|
* Defer freeing channel until after all cpu's have
|
||||||
@ -897,20 +899,15 @@ struct vmbus_channel {
|
|||||||
*/
|
*/
|
||||||
bool low_latency;
|
bool low_latency;
|
||||||
|
|
||||||
/*
|
|
||||||
* NUMA distribution policy:
|
|
||||||
* We support two policies:
|
|
||||||
* 1) Balanced: Here all performance critical channels are
|
|
||||||
* distributed evenly amongst all the NUMA nodes.
|
|
||||||
* This policy will be the default policy.
|
|
||||||
* 2) Localized: All channels of a given instance of a
|
|
||||||
* performance critical service will be assigned CPUs
|
|
||||||
* within a selected NUMA node.
|
|
||||||
*/
|
|
||||||
enum hv_numa_policy affinity_policy;
|
|
||||||
|
|
||||||
bool probe_done;
|
bool probe_done;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Cache the device ID here for easy access; this is useful, in
|
||||||
|
* particular, in situations where the channel's device_obj has
|
||||||
|
* not been allocated/initialized yet.
|
||||||
|
*/
|
||||||
|
u16 device_id;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We must offload the handling of the primary/sub channels
|
* We must offload the handling of the primary/sub channels
|
||||||
* from the single-threaded vmbus_connection.work_queue to
|
* from the single-threaded vmbus_connection.work_queue to
|
||||||
@ -964,12 +961,6 @@ static inline bool is_sub_channel(const struct vmbus_channel *c)
|
|||||||
return c->offermsg.offer.sub_channel_index != 0;
|
return c->offermsg.offer.sub_channel_index != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void set_channel_affinity_state(struct vmbus_channel *c,
|
|
||||||
enum hv_numa_policy policy)
|
|
||||||
{
|
|
||||||
c->affinity_policy = policy;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void set_channel_read_mode(struct vmbus_channel *c,
|
static inline void set_channel_read_mode(struct vmbus_channel *c,
|
||||||
enum hv_callback_mode mode)
|
enum hv_callback_mode mode)
|
||||||
{
|
{
|
||||||
@ -1017,7 +1008,7 @@ static inline void clear_low_latency_mode(struct vmbus_channel *c)
|
|||||||
c->low_latency = false;
|
c->low_latency = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void vmbus_onmessage(void *context);
|
void vmbus_onmessage(struct vmbus_channel_message_header *hdr);
|
||||||
|
|
||||||
int vmbus_request_offers(void);
|
int vmbus_request_offers(void);
|
||||||
|
|
||||||
@ -1531,6 +1522,7 @@ extern __u32 vmbus_proto_version;
|
|||||||
|
|
||||||
int vmbus_send_tl_connect_request(const guid_t *shv_guest_servie_id,
|
int vmbus_send_tl_connect_request(const guid_t *shv_guest_servie_id,
|
||||||
const guid_t *shv_host_servie_id);
|
const guid_t *shv_host_servie_id);
|
||||||
|
int vmbus_send_modifychannel(u32 child_relid, u32 target_vp);
|
||||||
void vmbus_set_event(struct vmbus_channel *channel);
|
void vmbus_set_event(struct vmbus_channel *channel);
|
||||||
|
|
||||||
/* Get the start of the ring buffer. */
|
/* Get the start of the ring buffer. */
|
||||||
|
@ -434,7 +434,7 @@ struct virtio_device_id {
|
|||||||
* For Hyper-V devices we use the device guid as the id.
|
* For Hyper-V devices we use the device guid as the id.
|
||||||
*/
|
*/
|
||||||
struct hv_vmbus_device_id {
|
struct hv_vmbus_device_id {
|
||||||
uuid_le guid;
|
guid_t guid;
|
||||||
kernel_ulong_t driver_data; /* Data private to the driver */
|
kernel_ulong_t driver_data; /* Data private to the driver */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user