diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 736ac76d2a6a..e9bf0bb87ac4 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -630,9 +630,13 @@ void vmbus_close(struct vmbus_channel *channel) */ list_for_each_safe(cur, tmp, &channel->sc_list) { cur_channel = list_entry(cur, struct vmbus_channel, sc_list); - if (cur_channel->state != CHANNEL_OPENED_STATE) - continue; vmbus_close_internal(cur_channel); + if (cur_channel->rescind) { + mutex_lock(&vmbus_connection.channel_mutex); + hv_process_channel_removal(cur_channel, + cur_channel->offermsg.child_relid); + mutex_unlock(&vmbus_connection.channel_mutex); + } } /* * Now close the primary. diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 735f9363f2e4..0fabd410efd9 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -428,7 +428,6 @@ void vmbus_free_channels(void) { struct vmbus_channel *channel, *tmp; - mutex_lock(&vmbus_connection.channel_mutex); list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list, listentry) { /* hv_process_channel_removal() needs this */ @@ -436,7 +435,6 @@ void vmbus_free_channels(void) vmbus_device_unregister(channel->device_obj); } - mutex_unlock(&vmbus_connection.channel_mutex); } /* @@ -483,8 +481,10 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) list_add_tail(&newchannel->sc_list, &channel->sc_list); channel->num_sc++; spin_unlock_irqrestore(&channel->lock, flags); - } else + } else { + atomic_dec(&vmbus_connection.offer_in_progress); goto err_free_chan; + } } dev_type = hv_get_dev_type(newchannel); @@ -511,6 +511,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) if (!fnew) { if (channel->sc_creation_callback != NULL) channel->sc_creation_callback(newchannel); + atomic_dec(&vmbus_connection.offer_in_progress); return; } @@ -532,9 +533,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) * binding which eventually invokes the device driver's AddDevice() * method. */ - mutex_lock(&vmbus_connection.channel_mutex); ret = vmbus_device_register(newchannel->device_obj); - mutex_unlock(&vmbus_connection.channel_mutex); if (ret != 0) { pr_err("unable to add child device object (relid %d)\n", @@ -542,6 +541,8 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) kfree(newchannel->device_obj); goto err_deq_chan; } + + atomic_dec(&vmbus_connection.offer_in_progress); return; err_deq_chan: @@ -797,6 +798,7 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr) newchannel = alloc_channel(); if (!newchannel) { vmbus_release_relid(offer->child_relid); + atomic_dec(&vmbus_connection.offer_in_progress); pr_err("Unable to allocate channel object\n"); return; } @@ -843,16 +845,38 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) rescind = (struct vmbus_channel_rescind_offer *)hdr; + /* + * The offer msg and the corresponding rescind msg + * from the host are guranteed to be ordered - + * offer comes in first and then the rescind. + * Since we process these events in work elements, + * and with preemption, we may end up processing + * the events out of order. Given that we handle these + * work elements on the same CPU, this is possible only + * in the case of preemption. In any case wait here + * until the offer processing has moved beyond the + * point where the channel is discoverable. + */ + + while (atomic_read(&vmbus_connection.offer_in_progress) != 0) { + /* + * We wait here until any channel offer is currently + * being processed. + */ + msleep(1); + } + mutex_lock(&vmbus_connection.channel_mutex); channel = relid2channel(rescind->child_relid); + mutex_unlock(&vmbus_connection.channel_mutex); if (channel == NULL) { /* - * This is very impossible, because in - * vmbus_process_offer(), we have already invoked - * vmbus_release_relid() on error. + * We failed in processing the offer message; + * we would have cleaned up the relid in that + * failure path. */ - goto out; + return; } spin_lock_irqsave(&channel->lock, flags); @@ -864,7 +888,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) if (channel->device_obj) { if (channel->chn_rescind_callback) { channel->chn_rescind_callback(channel); - goto out; + return; } /* * We will have to unregister this device from the @@ -875,13 +899,26 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) vmbus_device_unregister(channel->device_obj); put_device(dev); } - } else { - hv_process_channel_removal(channel, - channel->offermsg.child_relid); } - -out: - mutex_unlock(&vmbus_connection.channel_mutex); + if (channel->primary_channel != NULL) { + /* + * Sub-channel is being rescinded. Following is the channel + * close sequence when initiated from the driveri (refer to + * vmbus_close() for details): + * 1. Close all sub-channels first + * 2. Then close the primary channel. + */ + if (channel->state == CHANNEL_OPEN_STATE) { + /* + * The channel is currently not open; + * it is safe for us to cleanup the channel. + */ + mutex_lock(&vmbus_connection.channel_mutex); + hv_process_channel_removal(channel, + channel->offermsg.child_relid); + mutex_unlock(&vmbus_connection.channel_mutex); + } + } } void vmbus_hvsock_device_unregister(struct vmbus_channel *channel) diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index a938fcfe9bc9..c2d74ee95f60 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -93,10 +93,13 @@ static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, * all the CPUs. This is needed for kexec to work correctly where * the CPU attempting to connect may not be CPU 0. */ - if (version >= VERSION_WIN8_1) + if (version >= VERSION_WIN8_1) { msg->target_vcpu = hv_context.vp_index[smp_processor_id()]; - else + vmbus_connection.connect_cpu = smp_processor_id(); + } else { msg->target_vcpu = 0; + vmbus_connection.connect_cpu = 0; + } /* * Add to list before we send the request since we may diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index fa514be7679c..1b6a5e0dfa75 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -303,6 +303,13 @@ enum vmbus_connect_state { #define MAX_SIZE_CHANNEL_MESSAGE HV_MESSAGE_PAYLOAD_BYTE_COUNT struct vmbus_connection { + /* + * CPU on which the initial host contact was made. + */ + int connect_cpu; + + atomic_t offer_in_progress; + enum vmbus_connect_state conn_state; atomic_t next_gpadl_handle; diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 0087b49095eb..59bb3efa6e10 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -798,8 +798,10 @@ static void vmbus_device_release(struct device *device) struct hv_device *hv_dev = device_to_hv_device(device); struct vmbus_channel *channel = hv_dev->channel; + mutex_lock(&vmbus_connection.channel_mutex); hv_process_channel_removal(channel, channel->offermsg.child_relid); + mutex_unlock(&vmbus_connection.channel_mutex); kfree(hv_dev); } @@ -877,7 +879,32 @@ void vmbus_on_msg_dpc(unsigned long data) INIT_WORK(&ctx->work, vmbus_onmessage_work); memcpy(&ctx->msg, msg, sizeof(*msg)); - queue_work(vmbus_connection.work_queue, &ctx->work); + /* + * The host can generate a rescind message while we + * may still be handling the original offer. We deal with + * this condition by ensuring the processing is done on the + * same CPU. + */ + switch (hdr->msgtype) { + case CHANNELMSG_RESCIND_CHANNELOFFER: + /* + * If we are handling the rescind message; + * schedule the work on the global work queue. + */ + schedule_work_on(vmbus_connection.connect_cpu, + &ctx->work); + break; + + case CHANNELMSG_OFFERCHANNEL: + atomic_inc(&vmbus_connection.offer_in_progress); + queue_work_on(vmbus_connection.connect_cpu, + vmbus_connection.work_queue, + &ctx->work); + break; + + default: + queue_work(vmbus_connection.work_queue, &ctx->work); + } } else entry->message_handler(hdr);