Merge tag 'hyperv-next-signed-20220528' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux

Pull hyperv updates from Wei Liu:

 - Harden hv_sock driver (Andrea Parri)

 - Harden Hyper-V PCI driver (Andrea Parri)

 - Fix multi-MSI for Hyper-V PCI driver (Jeffrey Hugo)

 - Fix Hyper-V PCI to reduce boot time (Dexuan Cui)

 - Remove code for long EOL'ed Hyper-V versions (Michael Kelley, Saurabh
   Sengar)

 - Fix balloon driver error handling (Shradha Gupta)

 - Fix a typo in vmbus driver (Julia Lawall)

 - Ignore vmbus IMC device (Michael Kelley)

 - Add a new error message to Hyper-V DRM driver (Saurabh Sengar)

* tag 'hyperv-next-signed-20220528' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux: (28 commits)
  hv_balloon: Fix balloon_probe() and balloon_remove() error handling
  scsi: storvsc: Removing Pre Win8 related logic
  Drivers: hv: vmbus: fix typo in comment
  PCI: hv: Fix synchronization between channel callback and hv_pci_bus_exit()
  PCI: hv: Add validation for untrusted Hyper-V values
  PCI: hv: Fix interrupt mapping for multi-MSI
  PCI: hv: Reuse existing IRTE allocation in compose_msi_msg()
  drm/hyperv: Remove support for Hyper-V 2008 and 2008R2/Win7
  video: hyperv_fb: Remove support for Hyper-V 2008 and 2008R2/Win7
  scsi: storvsc: Remove support for Hyper-V 2008 and 2008R2/Win7
  Drivers: hv: vmbus: Remove support for Hyper-V 2008 and Hyper-V 2008R2/Win7
  x86/hyperv: Disable hardlockup detector by default in Hyper-V guests
  drm/hyperv: Add error message for fb size greater than allocated
  PCI: hv: Do not set PCI_COMMAND_MEMORY to reduce VM boot time
  PCI: hv: Fix hv_arch_irq_unmask() for multi-MSI
  Drivers: hv: vmbus: Refactor the ring-buffer iterator functions
  Drivers: hv: vmbus: Accept hv_sock offers in isolated guests
  hv_sock: Add validation for untrusted Hyper-V values
  hv_sock: Copy packets sent by Hyper-V out of the ring buffer
  hv_sock: Check hv_pkt_iter_first_raw()'s return value
  ...
This commit is contained in:
Linus Torvalds
2022-05-28 11:39:01 -07:00
15 changed files with 491 additions and 433 deletions

View File

@ -91,6 +91,13 @@ static enum pci_protocol_version_t pci_protocol_versions[] = {
/* space for 32bit serial number as string */
#define SLOT_NAME_SIZE 11
/*
* Size of requestor for VMbus; the value is based on the observation
* that having more than one request outstanding is 'rare', and so 64
* should be generous in ensuring that we don't ever run out.
*/
#define HV_PCI_RQSTOR_SIZE 64
/*
* Message Types
*/
@ -604,17 +611,19 @@ static unsigned int hv_msi_get_int_vector(struct irq_data *data)
return cfg->vector;
}
static void hv_set_msi_entry_from_desc(union hv_msi_entry *msi_entry,
struct msi_desc *msi_desc)
{
msi_entry->address.as_uint32 = msi_desc->msg.address_lo;
msi_entry->data.as_uint32 = msi_desc->msg.data;
}
static int hv_msi_prepare(struct irq_domain *domain, struct device *dev,
int nvec, msi_alloc_info_t *info)
{
return pci_msi_prepare(domain, dev, nvec, info);
int ret = pci_msi_prepare(domain, dev, nvec, info);
/*
* By using the interrupt remapper in the hypervisor IOMMU, contiguous
* CPU vectors is not needed for multi-MSI
*/
if (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI)
info->flags &= ~X86_IRQ_ALLOC_CONTIGUOUS_VECTORS;
return ret;
}
/**
@ -631,6 +640,7 @@ static void hv_arch_irq_unmask(struct irq_data *data)
{
struct msi_desc *msi_desc = irq_data_get_msi_desc(data);
struct hv_retarget_device_interrupt *params;
struct tran_int_desc *int_desc;
struct hv_pcibus_device *hbus;
struct cpumask *dest;
cpumask_var_t tmp;
@ -645,6 +655,7 @@ static void hv_arch_irq_unmask(struct irq_data *data)
pdev = msi_desc_to_pci_dev(msi_desc);
pbus = pdev->bus;
hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata);
int_desc = data->chip_data;
spin_lock_irqsave(&hbus->retarget_msi_interrupt_lock, flags);
@ -652,7 +663,8 @@ static void hv_arch_irq_unmask(struct irq_data *data)
memset(params, 0, sizeof(*params));
params->partition_id = HV_PARTITION_ID_SELF;
params->int_entry.source = HV_INTERRUPT_SOURCE_MSI;
hv_set_msi_entry_from_desc(&params->int_entry.msi_entry, msi_desc);
params->int_entry.msi_entry.address.as_uint32 = int_desc->address & 0xffffffff;
params->int_entry.msi_entry.data.as_uint32 = int_desc->data;
params->device_id = (hbus->hdev->dev_instance.b[5] << 24) |
(hbus->hdev->dev_instance.b[4] << 16) |
(hbus->hdev->dev_instance.b[7] << 8) |
@ -969,11 +981,7 @@ static void hv_pci_generic_compl(void *context, struct pci_response *resp,
{
struct hv_pci_compl *comp_pkt = context;
if (resp_packet_size >= offsetofend(struct pci_response, status))
comp_pkt->completion_status = resp->status;
else
comp_pkt->completion_status = -1;
comp_pkt->completion_status = resp->status;
complete(&comp_pkt->host_event);
}
@ -1513,6 +1521,10 @@ static void hv_int_desc_free(struct hv_pci_dev *hpdev,
u8 buffer[sizeof(struct pci_delete_interrupt)];
} ctxt;
if (!int_desc->vector_count) {
kfree(int_desc);
return;
}
memset(&ctxt, 0, sizeof(ctxt));
int_pkt = (struct pci_delete_interrupt *)&ctxt.pkt.message;
int_pkt->message_type.type =
@ -1520,7 +1532,7 @@ static void hv_int_desc_free(struct hv_pci_dev *hpdev,
int_pkt->wslot.slot = hpdev->desc.win_slot.slot;
int_pkt->int_desc = *int_desc;
vmbus_sendpacket(hpdev->hbus->hdev->channel, int_pkt, sizeof(*int_pkt),
(unsigned long)&ctxt.pkt, VM_PKT_DATA_INBAND, 0);
0, VM_PKT_DATA_INBAND, 0);
kfree(int_desc);
}
@ -1590,19 +1602,24 @@ static void hv_pci_compose_compl(void *context, struct pci_response *resp,
struct pci_create_int_response *int_resp =
(struct pci_create_int_response *)resp;
if (resp_packet_size < sizeof(*int_resp)) {
comp_pkt->comp_pkt.completion_status = -1;
goto out;
}
comp_pkt->comp_pkt.completion_status = resp->status;
comp_pkt->int_desc = int_resp->int_desc;
out:
complete(&comp_pkt->comp_pkt.host_event);
}
static u32 hv_compose_msi_req_v1(
struct pci_create_interrupt *int_pkt, struct cpumask *affinity,
u32 slot, u8 vector)
u32 slot, u8 vector, u8 vector_count)
{
int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE;
int_pkt->wslot.slot = slot;
int_pkt->int_desc.vector = vector;
int_pkt->int_desc.vector_count = 1;
int_pkt->int_desc.vector_count = vector_count;
int_pkt->int_desc.delivery_mode = DELIVERY_MODE;
/*
@ -1625,14 +1642,14 @@ static int hv_compose_msi_req_get_cpu(struct cpumask *affinity)
static u32 hv_compose_msi_req_v2(
struct pci_create_interrupt2 *int_pkt, struct cpumask *affinity,
u32 slot, u8 vector)
u32 slot, u8 vector, u8 vector_count)
{
int cpu;
int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE2;
int_pkt->wslot.slot = slot;
int_pkt->int_desc.vector = vector;
int_pkt->int_desc.vector_count = 1;
int_pkt->int_desc.vector_count = vector_count;
int_pkt->int_desc.delivery_mode = DELIVERY_MODE;
cpu = hv_compose_msi_req_get_cpu(affinity);
int_pkt->int_desc.processor_array[0] =
@ -1644,7 +1661,7 @@ static u32 hv_compose_msi_req_v2(
static u32 hv_compose_msi_req_v3(
struct pci_create_interrupt3 *int_pkt, struct cpumask *affinity,
u32 slot, u32 vector)
u32 slot, u32 vector, u8 vector_count)
{
int cpu;
@ -1652,7 +1669,7 @@ static u32 hv_compose_msi_req_v3(
int_pkt->wslot.slot = slot;
int_pkt->int_desc.vector = vector;
int_pkt->int_desc.reserved = 0;
int_pkt->int_desc.vector_count = 1;
int_pkt->int_desc.vector_count = vector_count;
int_pkt->int_desc.delivery_mode = DELIVERY_MODE;
cpu = hv_compose_msi_req_get_cpu(affinity);
int_pkt->int_desc.processor_array[0] =
@ -1683,6 +1700,8 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
struct cpumask *dest;
struct compose_comp_ctxt comp;
struct tran_int_desc *int_desc;
struct msi_desc *msi_desc;
u8 vector, vector_count;
struct {
struct pci_packet pci_pkt;
union {
@ -1691,11 +1710,21 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
struct pci_create_interrupt3 v3;
} int_pkts;
} __packed ctxt;
u64 trans_id;
u32 size;
int ret;
pdev = msi_desc_to_pci_dev(irq_data_get_msi_desc(data));
/* Reuse the previous allocation */
if (data->chip_data) {
int_desc = data->chip_data;
msg->address_hi = int_desc->address >> 32;
msg->address_lo = int_desc->address & 0xffffffff;
msg->data = int_desc->data;
return;
}
msi_desc = irq_data_get_msi_desc(data);
pdev = msi_desc_to_pci_dev(msi_desc);
dest = irq_data_get_effective_affinity_mask(data);
pbus = pdev->bus;
hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata);
@ -1704,17 +1733,40 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
if (!hpdev)
goto return_null_message;
/* Free any previous message that might have already been composed. */
if (data->chip_data) {
int_desc = data->chip_data;
data->chip_data = NULL;
hv_int_desc_free(hpdev, int_desc);
}
int_desc = kzalloc(sizeof(*int_desc), GFP_ATOMIC);
if (!int_desc)
goto drop_reference;
if (!msi_desc->pci.msi_attrib.is_msix && msi_desc->nvec_used > 1) {
/*
* If this is not the first MSI of Multi MSI, we already have
* a mapping. Can exit early.
*/
if (msi_desc->irq != data->irq) {
data->chip_data = int_desc;
int_desc->address = msi_desc->msg.address_lo |
(u64)msi_desc->msg.address_hi << 32;
int_desc->data = msi_desc->msg.data +
(data->irq - msi_desc->irq);
msg->address_hi = msi_desc->msg.address_hi;
msg->address_lo = msi_desc->msg.address_lo;
msg->data = int_desc->data;
put_pcichild(hpdev);
return;
}
/*
* The vector we select here is a dummy value. The correct
* value gets sent to the hypervisor in unmask(). This needs
* to be aligned with the count, and also not zero. Multi-msi
* is powers of 2 up to 32, so 32 will always work here.
*/
vector = 32;
vector_count = msi_desc->nvec_used;
} else {
vector = hv_msi_get_int_vector(data);
vector_count = 1;
}
memset(&ctxt, 0, sizeof(ctxt));
init_completion(&comp.comp_pkt.host_event);
ctxt.pci_pkt.completion_func = hv_pci_compose_compl;
@ -1725,7 +1777,8 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
size = hv_compose_msi_req_v1(&ctxt.int_pkts.v1,
dest,
hpdev->desc.win_slot.slot,
hv_msi_get_int_vector(data));
vector,
vector_count);
break;
case PCI_PROTOCOL_VERSION_1_2:
@ -1733,14 +1786,16 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
size = hv_compose_msi_req_v2(&ctxt.int_pkts.v2,
dest,
hpdev->desc.win_slot.slot,
hv_msi_get_int_vector(data));
vector,
vector_count);
break;
case PCI_PROTOCOL_VERSION_1_4:
size = hv_compose_msi_req_v3(&ctxt.int_pkts.v3,
dest,
hpdev->desc.win_slot.slot,
hv_msi_get_int_vector(data));
vector,
vector_count);
break;
default:
@ -1753,10 +1808,10 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
goto free_int_desc;
}
ret = vmbus_sendpacket(hpdev->hbus->hdev->channel, &ctxt.int_pkts,
size, (unsigned long)&ctxt.pci_pkt,
VM_PKT_DATA_INBAND,
VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
ret = vmbus_sendpacket_getid(hpdev->hbus->hdev->channel, &ctxt.int_pkts,
size, (unsigned long)&ctxt.pci_pkt,
&trans_id, VM_PKT_DATA_INBAND,
VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
if (ret) {
dev_err(&hbus->hdev->device,
"Sending request for interrupt failed: 0x%x",
@ -1835,6 +1890,15 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
enable_tasklet:
tasklet_enable(&channel->callback_event);
/*
* The completion packet on the stack becomes invalid after 'return';
* remove the ID from the VMbus requestor if the identifier is still
* mapped to/associated with the packet. (The identifier could have
* been 're-used', i.e., already removed and (re-)mapped.)
*
* Cf. hv_pci_onchannelcallback().
*/
vmbus_request_addr_match(channel, trans_id, (unsigned long)&ctxt.pci_pkt);
free_int_desc:
kfree(int_desc);
drop_reference:
@ -2082,12 +2146,17 @@ static void prepopulate_bars(struct hv_pcibus_device *hbus)
}
}
if (high_size <= 1 && low_size <= 1) {
/* Set the memory enable bit. */
_hv_pcifront_read_config(hpdev, PCI_COMMAND, 2,
&command);
command |= PCI_COMMAND_MEMORY;
_hv_pcifront_write_config(hpdev, PCI_COMMAND, 2,
command);
/*
* No need to set the PCI_COMMAND_MEMORY bit as
* the core PCI driver doesn't require the bit
* to be pre-set. Actually here we intentionally
* keep the bit off so that the PCI BAR probing
* in the core PCI driver doesn't cause Hyper-V
* to unnecessarily unmap/map the virtual BARs
* from/to the physical BARs multiple times.
* This reduces the VM boot time significantly
* if the BAR sizes are huge.
*/
break;
}
}
@ -2223,12 +2292,14 @@ static void q_resource_requirements(void *context, struct pci_response *resp,
struct q_res_req_compl *completion = context;
struct pci_q_res_req_response *q_res_req =
(struct pci_q_res_req_response *)resp;
s32 status;
int i;
if (resp->status < 0) {
status = (resp_packet_size < sizeof(*q_res_req)) ? -1 : resp->status;
if (status < 0) {
dev_err(&completion->hpdev->hbus->hdev->device,
"query resource requirements failed: %x\n",
resp->status);
status);
} else {
for (i = 0; i < PCI_STD_NUM_BARS; i++) {
completion->hpdev->probed_bar[i] =
@ -2652,7 +2723,7 @@ static void hv_eject_device_work(struct work_struct *work)
ejct_pkt->message_type.type = PCI_EJECTION_COMPLETE;
ejct_pkt->wslot.slot = hpdev->desc.win_slot.slot;
vmbus_sendpacket(hbus->hdev->channel, ejct_pkt,
sizeof(*ejct_pkt), (unsigned long)&ctxt.pkt,
sizeof(*ejct_pkt), 0,
VM_PKT_DATA_INBAND, 0);
/* For the get_pcichild() in hv_pci_eject_device() */
@ -2699,8 +2770,9 @@ static void hv_pci_onchannelcallback(void *context)
const int packet_size = 0x100;
int ret;
struct hv_pcibus_device *hbus = context;
struct vmbus_channel *chan = hbus->hdev->channel;
u32 bytes_recvd;
u64 req_id;
u64 req_id, req_addr;
struct vmpacket_descriptor *desc;
unsigned char *buffer;
int bufferlen = packet_size;
@ -2712,14 +2784,15 @@ static void hv_pci_onchannelcallback(void *context)
struct pci_dev_inval_block *inval;
struct pci_dev_incoming *dev_message;
struct hv_pci_dev *hpdev;
unsigned long flags;
buffer = kmalloc(bufferlen, GFP_ATOMIC);
if (!buffer)
return;
while (1) {
ret = vmbus_recvpacket_raw(hbus->hdev->channel, buffer,
bufferlen, &bytes_recvd, &req_id);
ret = vmbus_recvpacket_raw(chan, buffer, bufferlen,
&bytes_recvd, &req_id);
if (ret == -ENOBUFS) {
kfree(buffer);
@ -2746,15 +2819,29 @@ static void hv_pci_onchannelcallback(void *context)
switch (desc->type) {
case VM_PKT_COMP:
/*
* The host is trusted, and thus it's safe to interpret
* this transaction ID as a pointer.
*/
comp_packet = (struct pci_packet *)req_id;
lock_requestor(chan, flags);
req_addr = __vmbus_request_addr_match(chan, req_id,
VMBUS_RQST_ADDR_ANY);
if (req_addr == VMBUS_RQST_ERROR) {
unlock_requestor(chan, flags);
dev_err(&hbus->hdev->device,
"Invalid transaction ID %llx\n",
req_id);
break;
}
comp_packet = (struct pci_packet *)req_addr;
response = (struct pci_response *)buffer;
/*
* Call ->completion_func() within the critical section to make
* sure that the packet pointer is still valid during the call:
* here 'valid' means that there's a task still waiting for the
* completion, and that the packet data is still on the waiting
* task's stack. Cf. hv_compose_msi_msg().
*/
comp_packet->completion_func(comp_packet->compl_ctxt,
response,
bytes_recvd);
unlock_requestor(chan, flags);
break;
case VM_PKT_DATA_INBAND:
@ -2764,7 +2851,8 @@ static void hv_pci_onchannelcallback(void *context)
case PCI_BUS_RELATIONS:
bus_rel = (struct pci_bus_relations *)buffer;
if (bytes_recvd <
if (bytes_recvd < sizeof(*bus_rel) ||
bytes_recvd <
struct_size(bus_rel, func,
bus_rel->device_count)) {
dev_err(&hbus->hdev->device,
@ -2778,7 +2866,8 @@ static void hv_pci_onchannelcallback(void *context)
case PCI_BUS_RELATIONS2:
bus_rel2 = (struct pci_bus_relations2 *)buffer;
if (bytes_recvd <
if (bytes_recvd < sizeof(*bus_rel2) ||
bytes_recvd <
struct_size(bus_rel2, func,
bus_rel2->device_count)) {
dev_err(&hbus->hdev->device,
@ -2792,6 +2881,11 @@ static void hv_pci_onchannelcallback(void *context)
case PCI_EJECT:
dev_message = (struct pci_dev_incoming *)buffer;
if (bytes_recvd < sizeof(*dev_message)) {
dev_err(&hbus->hdev->device,
"eject message too small\n");
break;
}
hpdev = get_pcichild_wslot(hbus,
dev_message->wslot.slot);
if (hpdev) {
@ -2803,6 +2897,11 @@ static void hv_pci_onchannelcallback(void *context)
case PCI_INVALIDATE_BLOCK:
inval = (struct pci_dev_inval_block *)buffer;
if (bytes_recvd < sizeof(*inval)) {
dev_err(&hbus->hdev->device,
"invalidate message too small\n");
break;
}
hpdev = get_pcichild_wslot(hbus,
inval->wslot.slot);
if (hpdev) {
@ -3431,6 +3530,10 @@ static int hv_pci_probe(struct hv_device *hdev,
goto free_dom;
}
hdev->channel->next_request_id_callback = vmbus_next_request_id;
hdev->channel->request_addr_callback = vmbus_request_addr;
hdev->channel->rqstor_size = HV_PCI_RQSTOR_SIZE;
ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0,
hv_pci_onchannelcallback, hbus);
if (ret)
@ -3561,6 +3664,7 @@ free_bus:
static int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs)
{
struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
struct vmbus_channel *chan = hdev->channel;
struct {
struct pci_packet teardown_packet;
u8 buffer[sizeof(struct pci_message)];
@ -3568,13 +3672,14 @@ static int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs)
struct hv_pci_compl comp_pkt;
struct hv_pci_dev *hpdev, *tmp;
unsigned long flags;
u64 trans_id;
int ret;
/*
* After the host sends the RESCIND_CHANNEL message, it doesn't
* access the per-channel ringbuffer any longer.
*/
if (hdev->channel->rescind)
if (chan->rescind)
return 0;
if (!keep_devs) {
@ -3611,16 +3716,26 @@ static int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs)
pkt.teardown_packet.compl_ctxt = &comp_pkt;
pkt.teardown_packet.message[0].type = PCI_BUS_D0EXIT;
ret = vmbus_sendpacket(hdev->channel, &pkt.teardown_packet.message,
sizeof(struct pci_message),
(unsigned long)&pkt.teardown_packet,
VM_PKT_DATA_INBAND,
VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
ret = vmbus_sendpacket_getid(chan, &pkt.teardown_packet.message,
sizeof(struct pci_message),
(unsigned long)&pkt.teardown_packet,
&trans_id, VM_PKT_DATA_INBAND,
VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
if (ret)
return ret;
if (wait_for_completion_timeout(&comp_pkt.host_event, 10 * HZ) == 0)
if (wait_for_completion_timeout(&comp_pkt.host_event, 10 * HZ) == 0) {
/*
* The completion packet on the stack becomes invalid after
* 'return'; remove the ID from the VMbus requestor if the
* identifier is still mapped to/associated with the packet.
*
* Cf. hv_pci_onchannelcallback().
*/
vmbus_request_addr_match(chan, trans_id,
(unsigned long)&pkt.teardown_packet);
return -ETIMEDOUT;
}
return 0;
}
@ -3761,6 +3876,10 @@ static int hv_pci_resume(struct hv_device *hdev)
hbus->state = hv_pcibus_init;
hdev->channel->next_request_id_callback = vmbus_next_request_id;
hdev->channel->request_addr_callback = vmbus_request_addr;
hdev->channel->rqstor_size = HV_PCI_RQSTOR_SIZE;
ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0,
hv_pci_onchannelcallback, hbus);
if (ret)