scsi: storvsc: Select channel based on available percentage of ring buffer to write

This is a best effort for estimating on how busy the ring buffer is for
that channel, based on available buffer to write in percentage. It is
still possible that at the time of actual ring buffer write, the space
may not be available due to other processes may be writing at the time.

Selecting a channel based on how full it is can reduce the possibility
that a ring buffer write will fail, and avoid the situation a channel is
over busy.

Now it's possible that storvsc can use a smaller ring buffer size
(e.g. 40k bytes) to take advantage of cache locality.

Signed-off-by: Long Li <longli@microsoft.com>
Reviewed-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
This commit is contained in:
Long Li 2018-04-19 14:54:24 -07:00 committed by Martin K. Petersen
parent 69589c9bb9
commit 2217a47de4

View File

@ -395,6 +395,12 @@ MODULE_PARM_DESC(storvsc_ringbuffer_size, "Ring buffer size (bytes)");
module_param(storvsc_vcpus_per_sub_channel, int, S_IRUGO); module_param(storvsc_vcpus_per_sub_channel, int, S_IRUGO);
MODULE_PARM_DESC(storvsc_vcpus_per_sub_channel, "Ratio of VCPUs to subchannels"); MODULE_PARM_DESC(storvsc_vcpus_per_sub_channel, "Ratio of VCPUs to subchannels");
static int ring_avail_percent_lowater = 10;
module_param(ring_avail_percent_lowater, int, S_IRUGO);
MODULE_PARM_DESC(ring_avail_percent_lowater,
"Select a channel if available ring size > this in percent");
/* /*
* Timeout in seconds for all devices managed by this driver. * Timeout in seconds for all devices managed by this driver.
*/ */
@ -468,6 +474,13 @@ struct storvsc_device {
* Mask of CPUs bound to subchannels. * Mask of CPUs bound to subchannels.
*/ */
struct cpumask alloced_cpus; struct cpumask alloced_cpus;
/*
* Pre-allocated struct cpumask for each hardware queue.
* struct cpumask is used by selecting out-going channels. It is a
* big structure, default to 1024k bytes when CONFIG_MAXSMP=y.
* Pre-allocate it to avoid allocation on the kernel stack.
*/
struct cpumask *cpumask_chns;
/* Used for vsc/vsp channel reset process */ /* Used for vsc/vsp channel reset process */
struct storvsc_cmd_request init_request; struct storvsc_cmd_request init_request;
struct storvsc_cmd_request reset_request; struct storvsc_cmd_request reset_request;
@ -872,6 +885,13 @@ static int storvsc_channel_init(struct hv_device *device, bool is_fc)
if (stor_device->stor_chns == NULL) if (stor_device->stor_chns == NULL)
return -ENOMEM; return -ENOMEM;
stor_device->cpumask_chns = kcalloc(num_possible_cpus(),
sizeof(struct cpumask), GFP_KERNEL);
if (stor_device->cpumask_chns == NULL) {
kfree(stor_device->stor_chns);
return -ENOMEM;
}
stor_device->stor_chns[device->channel->target_cpu] = device->channel; stor_device->stor_chns[device->channel->target_cpu] = device->channel;
cpumask_set_cpu(device->channel->target_cpu, cpumask_set_cpu(device->channel->target_cpu,
&stor_device->alloced_cpus); &stor_device->alloced_cpus);
@ -1232,6 +1252,7 @@ static int storvsc_dev_remove(struct hv_device *device)
vmbus_close(device->channel); vmbus_close(device->channel);
kfree(stor_device->stor_chns); kfree(stor_device->stor_chns);
kfree(stor_device->cpumask_chns);
kfree(stor_device); kfree(stor_device);
return 0; return 0;
} }
@ -1241,7 +1262,7 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
{ {
u16 slot = 0; u16 slot = 0;
u16 hash_qnum; u16 hash_qnum;
struct cpumask alloced_mask; struct cpumask *alloced_mask = &stor_device->cpumask_chns[q_num];
int num_channels, tgt_cpu; int num_channels, tgt_cpu;
if (stor_device->num_sc == 0) if (stor_device->num_sc == 0)
@ -1257,10 +1278,10 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
* III. Mapping is persistent. * III. Mapping is persistent.
*/ */
cpumask_and(&alloced_mask, &stor_device->alloced_cpus, cpumask_and(alloced_mask, &stor_device->alloced_cpus,
cpumask_of_node(cpu_to_node(q_num))); cpumask_of_node(cpu_to_node(q_num)));
num_channels = cpumask_weight(&alloced_mask); num_channels = cpumask_weight(alloced_mask);
if (num_channels == 0) if (num_channels == 0)
return stor_device->device->channel; return stor_device->device->channel;
@ -1268,7 +1289,7 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
while (hash_qnum >= num_channels) while (hash_qnum >= num_channels)
hash_qnum -= num_channels; hash_qnum -= num_channels;
for_each_cpu(tgt_cpu, &alloced_mask) { for_each_cpu(tgt_cpu, alloced_mask) {
if (slot == hash_qnum) if (slot == hash_qnum)
break; break;
slot++; slot++;
@ -1285,9 +1306,9 @@ static int storvsc_do_io(struct hv_device *device,
{ {
struct storvsc_device *stor_device; struct storvsc_device *stor_device;
struct vstor_packet *vstor_packet; struct vstor_packet *vstor_packet;
struct vmbus_channel *outgoing_channel; struct vmbus_channel *outgoing_channel, *channel;
int ret = 0; int ret = 0;
struct cpumask alloced_mask; struct cpumask *alloced_mask;
int tgt_cpu; int tgt_cpu;
vstor_packet = &request->vstor_packet; vstor_packet = &request->vstor_packet;
@ -1301,22 +1322,53 @@ static int storvsc_do_io(struct hv_device *device,
/* /*
* Select an an appropriate channel to send the request out. * Select an an appropriate channel to send the request out.
*/ */
if (stor_device->stor_chns[q_num] != NULL) { if (stor_device->stor_chns[q_num] != NULL) {
outgoing_channel = stor_device->stor_chns[q_num]; outgoing_channel = stor_device->stor_chns[q_num];
if (outgoing_channel->target_cpu == smp_processor_id()) { if (outgoing_channel->target_cpu == q_num) {
/* /*
* Ideally, we want to pick a different channel if * Ideally, we want to pick a different channel if
* available on the same NUMA node. * available on the same NUMA node.
*/ */
cpumask_and(&alloced_mask, &stor_device->alloced_cpus, alloced_mask = &stor_device->cpumask_chns[q_num];
cpumask_and(alloced_mask, &stor_device->alloced_cpus,
cpumask_of_node(cpu_to_node(q_num))); cpumask_of_node(cpu_to_node(q_num)));
for_each_cpu_wrap(tgt_cpu, &alloced_mask,
outgoing_channel->target_cpu + 1) { for_each_cpu_wrap(tgt_cpu, alloced_mask, q_num + 1) {
if (tgt_cpu != outgoing_channel->target_cpu) { if (tgt_cpu == q_num)
outgoing_channel = continue;
stor_device->stor_chns[tgt_cpu]; channel = stor_device->stor_chns[tgt_cpu];
break; if (hv_get_avail_to_write_percent(
&channel->outbound)
> ring_avail_percent_lowater) {
outgoing_channel = channel;
goto found_channel;
}
}
/*
* All the other channels on the same NUMA node are
* busy. Try to use the channel on the current CPU
*/
if (hv_get_avail_to_write_percent(
&outgoing_channel->outbound)
> ring_avail_percent_lowater)
goto found_channel;
/*
* If we reach here, all the channels on the current
* NUMA node are busy. Try to find a channel in
* other NUMA nodes
*/
cpumask_andnot(alloced_mask, &stor_device->alloced_cpus,
cpumask_of_node(cpu_to_node(q_num)));
for_each_cpu(tgt_cpu, alloced_mask) {
channel = stor_device->stor_chns[tgt_cpu];
if (hv_get_avail_to_write_percent(
&channel->outbound)
> ring_avail_percent_lowater) {
outgoing_channel = channel;
goto found_channel;
} }
} }
} }
@ -1324,7 +1376,7 @@ static int storvsc_do_io(struct hv_device *device,
outgoing_channel = get_og_chn(stor_device, q_num); outgoing_channel = get_og_chn(stor_device, q_num);
} }
found_channel:
vstor_packet->flags |= REQUEST_COMPLETION_FLAG; vstor_packet->flags |= REQUEST_COMPLETION_FLAG;
vstor_packet->vm_srb.length = (sizeof(struct vmscsi_request) - vstor_packet->vm_srb.length = (sizeof(struct vmscsi_request) -
@ -1726,8 +1778,9 @@ static int storvsc_probe(struct hv_device *device,
max_sub_channels = (num_cpus / storvsc_vcpus_per_sub_channel); max_sub_channels = (num_cpus / storvsc_vcpus_per_sub_channel);
} }
scsi_driver.can_queue = (max_outstanding_req_per_channel * scsi_driver.can_queue = max_outstanding_req_per_channel *
(max_sub_channels + 1)); (max_sub_channels + 1) *
(100 - ring_avail_percent_lowater) / 100;
host = scsi_host_alloc(&scsi_driver, host = scsi_host_alloc(&scsi_driver,
sizeof(struct hv_host_device)); sizeof(struct hv_host_device));
@ -1858,6 +1911,7 @@ err_out2:
err_out1: err_out1:
kfree(stor_device->stor_chns); kfree(stor_device->stor_chns);
kfree(stor_device->cpumask_chns);
kfree(stor_device); kfree(stor_device);
err_out0: err_out0: