From 6123c66854c174e4982f98195100c1d990f9e5e6 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 9 Aug 2017 17:46:03 -0700 Subject: [PATCH 01/10] netvsc: delay setup of VF device When VF device is discovered, delay bring it automatically up in order to allow userspace to some simple changes (like renaming). Reported-by: Vitaly Kuznetsov Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 2 +- drivers/net/hyperv/netvsc_drv.c | 15 ++++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 98b25f6900c8..a57e37641dc5 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -723,7 +723,7 @@ struct net_device_context { /* State to manage the associated VF interface. */ struct net_device __rcu *vf_netdev; struct netvsc_vf_pcpu_stats __percpu *vf_stats; - struct work_struct vf_takeover; + struct delayed_work vf_takeover; /* 1: allocated, serial number is valid. 0: not allocated */ u32 vf_alloc; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index eb0023f55fe1..e059375a6d8c 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -47,6 +47,7 @@ #define RING_SIZE_MIN 64 #define LINKCHANGE_INT (2 * HZ) +#define VF_TAKEOVER_INT (HZ / 10) static int ring_size = 128; module_param(ring_size, int, S_IRUGO); @@ -1559,7 +1560,9 @@ static int netvsc_vf_join(struct net_device *vf_netdev, /* set slave flag before open to prevent IPv6 addrconf */ vf_netdev->flags |= IFF_SLAVE; - schedule_work(&ndev_ctx->vf_takeover); + schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT); + + call_netdevice_notifiers(NETDEV_JOIN, vf_netdev); netdev_info(vf_netdev, "joined to %s\n", ndev->name); return 0; @@ -1575,8 +1578,6 @@ static void __netvsc_vf_setup(struct net_device *ndev, { int ret; - call_netdevice_notifiers(NETDEV_JOIN, vf_netdev); - /* Align MTU of VF with master */ ret = dev_set_mtu(vf_netdev, ndev->mtu); if (ret) @@ -1597,12 +1598,12 @@ static void __netvsc_vf_setup(struct net_device *ndev, static void netvsc_vf_setup(struct work_struct *w) { struct net_device_context *ndev_ctx - = container_of(w, struct net_device_context, vf_takeover); + = container_of(w, struct net_device_context, vf_takeover.work); struct net_device *ndev = hv_get_drvdata(ndev_ctx->device_ctx); struct net_device *vf_netdev; if (!rtnl_trylock()) { - schedule_work(w); + schedule_delayed_work(&ndev_ctx->vf_takeover, 0); return; } @@ -1706,7 +1707,7 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) return NOTIFY_DONE; net_device_ctx = netdev_priv(ndev); - cancel_work_sync(&net_device_ctx->vf_takeover); + cancel_delayed_work_sync(&net_device_ctx->vf_takeover); netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name); @@ -1748,7 +1749,7 @@ static int netvsc_probe(struct hv_device *dev, spin_lock_init(&net_device_ctx->lock); INIT_LIST_HEAD(&net_device_ctx->reconfig_events); - INIT_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup); + INIT_DELAYED_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup); net_device_ctx->vf_stats = netdev_alloc_pcpu_stats(struct netvsc_vf_pcpu_stats); From 5e20d55a23a76a876396ba1235bdf019e74d0c6f Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 9 Aug 2017 17:46:04 -0700 Subject: [PATCH 02/10] netvsc: don't signal host twice if empty When hv_pkt_iter_next() returns NULL, it has already called hv_pkt_iter_close(). Calling it twice can lead to extra host signal. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index bffaf93d3cb0..b5fc8fb56b88 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -1191,10 +1191,6 @@ int netvsc_poll(struct napi_struct *napi, int budget) nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc); } - /* if ring is empty, signal host */ - if (!nvchan->desc) - hv_pkt_iter_close(channel); - /* If send of pending receive completions suceeded * and did not exhaust NAPI budget this time * and not doing busy poll From 16ba3266006be10dc7ec25dd1442f74cef89cb95 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 9 Aug 2017 17:46:05 -0700 Subject: [PATCH 03/10] netvsc: propagate MAC address change to VF slave If VF is slaved to synthetic device, then any change to netvsc MAC address should be propagated to the slave device. If slave device doesn't support MAC address change then it should also be an error to attempt to change synthetic NIC MAC address. It also fixes the error unwind in the original code. If give a bad address, the old code would change the device MAC address anyway. Reviewed-by: Haiyang Zhang Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index e059375a6d8c..07015b1c42c6 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1053,27 +1053,31 @@ static void netvsc_get_stats64(struct net_device *net, static int netvsc_set_mac_addr(struct net_device *ndev, void *p) { struct net_device_context *ndc = netdev_priv(ndev); + struct net_device *vf_netdev = rtnl_dereference(ndc->vf_netdev); struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev); struct sockaddr *addr = p; - char save_adr[ETH_ALEN]; - unsigned char save_aatype; int err; - memcpy(save_adr, ndev->dev_addr, ETH_ALEN); - save_aatype = ndev->addr_assign_type; - - err = eth_mac_addr(ndev, p); - if (err != 0) + err = eth_prepare_mac_addr_change(ndev, p); + if (err) return err; if (!nvdev) return -ENODEV; + if (vf_netdev) { + err = dev_set_mac_address(vf_netdev, addr); + if (err) + return err; + } + err = rndis_filter_set_device_mac(nvdev, addr->sa_data); - if (err != 0) { - /* roll back to saved MAC */ - memcpy(ndev->dev_addr, save_adr, ETH_ALEN); - ndev->addr_assign_type = save_aatype; + if (!err) { + eth_commit_mac_addr_change(ndev, p); + } else if (vf_netdev) { + /* rollback change on VF */ + memcpy(addr->sa_data, ndev->dev_addr, ETH_ALEN); + dev_set_mac_address(vf_netdev, addr); } return err; From 68d715f68541d58033199eea80991394a6886eb7 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 9 Aug 2017 17:46:06 -0700 Subject: [PATCH 04/10] netvsc: check error return when restoring channels and mtu If setting new values fails, and the attempt to restore original settings fails. Then log an error and leave device down. This should never happen, but if it does don't go down in flames. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 07015b1c42c6..c7391889938b 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -845,7 +845,13 @@ static int netvsc_set_channels(struct net_device *net, } else { ret = PTR_ERR(nvdev); device_info.num_chn = orig; - rndis_filter_device_add(dev, &device_info); + nvdev = rndis_filter_device_add(dev, &device_info); + + if (IS_ERR(nvdev)) { + netdev_err(net, "restoring channel setting failed: %ld\n", + PTR_ERR(nvdev)); + return ret; + } } if (was_opened) @@ -953,10 +959,16 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) /* Attempt rollback to original MTU */ ndev->mtu = orig_mtu; - rndis_filter_device_add(hdev, &device_info); + nvdev = rndis_filter_device_add(hdev, &device_info); if (vf_netdev) dev_set_mtu(vf_netdev, orig_mtu); + + if (IS_ERR(nvdev)) { + netdev_err(ndev, "restoring mtu failed: %ld\n", + PTR_ERR(nvdev)); + return ret; + } } if (was_opened) From 958333708f2877d3855e3bc31dad428e2f2c8096 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 9 Aug 2017 17:46:07 -0700 Subject: [PATCH 05/10] netvsc: no need to allocate send/receive on numa node The send and receive buffers are both per-device (not per-channel). The associated NUMA node is a property of the CPU which is per-channel therefore it makes no sense to force the receive/send buffer to be allocated on a particular node (since it is a shared resource). Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index b5fc8fb56b88..7407006f4e22 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -246,20 +246,13 @@ int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx) static int netvsc_init_buf(struct hv_device *device, struct netvsc_device *net_device) { - int ret = 0; - struct nvsp_message *init_packet; struct nvsp_1_message_send_receive_buffer_complete *resp; - struct net_device *ndev; + struct net_device *ndev = hv_get_drvdata(device); + struct nvsp_message *init_packet; size_t map_words; - int node; - - ndev = hv_get_drvdata(device); - - node = cpu_to_node(device->channel->target_cpu); - net_device->recv_buf = vzalloc_node(net_device->recv_buf_size, node); - if (!net_device->recv_buf) - net_device->recv_buf = vzalloc(net_device->recv_buf_size); + int ret = 0; + net_device->recv_buf = vzalloc(net_device->recv_buf_size); if (!net_device->recv_buf) { netdev_err(ndev, "unable to allocate receive " "buffer of size %d\n", net_device->recv_buf_size); @@ -340,9 +333,7 @@ static int netvsc_init_buf(struct hv_device *device, goto cleanup; /* Now setup the send buffer. */ - net_device->send_buf = vzalloc_node(net_device->send_buf_size, node); - if (!net_device->send_buf) - net_device->send_buf = vzalloc(net_device->send_buf_size); + net_device->send_buf = vzalloc(net_device->send_buf_size); if (!net_device->send_buf) { netdev_err(ndev, "unable to allocate send " "buffer of size %d\n", net_device->send_buf_size); From 89bb42b11370c2daf19d8820398f7255f8499ab7 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 9 Aug 2017 17:46:08 -0700 Subject: [PATCH 06/10] netvsc: whitespace cleanup Fix some minor indentation issues. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index c7391889938b..3219d2e8918f 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -165,7 +165,7 @@ static int netvsc_close(struct net_device *net) } static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size, - int pkt_type) + int pkt_type) { struct rndis_packet *rndis_pkt; struct rndis_per_packet_info *ppi; @@ -286,7 +286,7 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, } static u32 fill_pg_buf(struct page *page, u32 offset, u32 len, - struct hv_page_buffer *pb) + struct hv_page_buffer *pb) { int j = 0; @@ -626,6 +626,7 @@ no_memory: ++net_device_ctx->eth_stats.tx_no_memory; goto drop; } + /* * netvsc_linkstatus_callback - Link up/down notification */ @@ -649,8 +650,8 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj, if (indicate->status == RNDIS_STATUS_LINK_SPEED_CHANGE) { u32 speed; - speed = *(u32 *)((void *)indicate + indicate-> - status_buf_offset) / 10000; + speed = *(u32 *)((void *)indicate + + indicate->status_buf_offset) / 10000; ndev_ctx->speed = speed; return; } @@ -1018,7 +1019,7 @@ static void netvsc_get_stats64(struct net_device *net, struct net_device_context *ndev_ctx = netdev_priv(net); struct netvsc_device *nvdev = rcu_dereference_rtnl(ndev_ctx->nvdev); struct netvsc_vf_pcpu_stats vf_tot; - int i; + int i; if (!nvdev) return; From 00f5024e821e60c0d1d7df44f2577a2c277b5cc9 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 9 Aug 2017 17:46:09 -0700 Subject: [PATCH 07/10] netvsc: remove unnecessary cast of void pointer Assignment to a typed pointer is sufficient in C. No cast is needed. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 3219d2e8918f..9f89de17b5fa 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -523,9 +523,9 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) rndis_msg_size += NDIS_VLAN_PPI_SIZE; ppi = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE, - IEEE_8021Q_INFO); - vlan = (struct ndis_pkt_8021q_info *)((void *)ppi + - ppi->ppi_offset); + IEEE_8021Q_INFO); + + vlan = (void *)ppi + ppi->ppi_offset; vlan->vlanid = skb->vlan_tci & VLAN_VID_MASK; vlan->pri = (skb->vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; @@ -538,8 +538,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE, TCP_LARGESEND_PKTINFO); - lso_info = (struct ndis_tcp_lso_info *)((void *)ppi + - ppi->ppi_offset); + lso_info = (void *)ppi + ppi->ppi_offset; lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; if (skb->protocol == htons(ETH_P_IP)) { From ea5a32c00bcacce1d8ac834a70a82f95a1c79425 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 9 Aug 2017 17:46:10 -0700 Subject: [PATCH 08/10] netvsc: remove unnecessary check for NULL hdr The function init_page_array is always called with a valid pointer to RNDIS header. No check for NULL is needed. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 9f89de17b5fa..7b465e40869b 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -333,10 +333,9 @@ static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb, * 2. skb linear data * 3. skb fragment data */ - if (hdr != NULL) - slots_used += fill_pg_buf(virt_to_page(hdr), - offset_in_page(hdr), - len, &pb[slots_used]); + slots_used += fill_pg_buf(virt_to_page(hdr), + offset_in_page(hdr), + len, &pb[slots_used]); packet->rmsg_size = len; packet->rmsg_pgcnt = slots_used; From 8b5327975ae171ca54dfd93e6c042d1292945867 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 9 Aug 2017 17:46:11 -0700 Subject: [PATCH 09/10] netvsc: allow controlling send/recv buffer size Control the size of the buffer areas via ethtool ring settings. They aren't really traditional hardware rings, but host API breaks receive and send buffer into chunks. The final size of the chunks are controlled by the host. The default value of send and receive buffer area for host DMA is much larger than it needs to be. Experimentation shows that 4M receive and 1M send is sufficient. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 9 +-- drivers/net/hyperv/netvsc.c | 70 ++++++++++--------- drivers/net/hyperv/netvsc_drv.c | 117 +++++++++++++++++++++++++++++++- 3 files changed, 157 insertions(+), 39 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index a57e37641dc5..30326373e46f 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -148,6 +148,8 @@ struct netvsc_device_info { unsigned char mac_adr[ETH_ALEN]; int ring_size; u32 num_chn; + u32 send_sections; + u32 recv_sections; }; enum rndis_device_state { @@ -634,12 +636,12 @@ struct nvsp_message { #define NETVSC_SEND_BUFFER_SIZE (1024 * 1024 * 15) /* 15MB */ #define NETVSC_INVALID_INDEX -1 +#define NETVSC_SEND_SECTION_SIZE 6144 +#define NETVSC_RECV_SECTION_SIZE 1728 #define NETVSC_RECEIVE_BUFFER_ID 0xcafe #define NETVSC_SEND_BUFFER_ID 0 -#define NETVSC_PACKET_SIZE 4096 - #define VRSS_SEND_TAB_SIZE 16 /* must be power of 2 */ #define VRSS_CHANNEL_MAX 64 #define VRSS_CHANNEL_DEFAULT 8 @@ -754,14 +756,13 @@ struct netvsc_device { /* Receive buffer allocated by us but manages by NetVSP */ void *recv_buf; - u32 recv_buf_size; u32 recv_buf_gpadl_handle; u32 recv_section_cnt; + u32 recv_section_size; u32 recv_completion_cnt; /* Send buffer allocated by us */ void *send_buf; - u32 send_buf_size; u32 send_buf_gpadl_handle; u32 send_section_cnt; u32 send_section_size; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 7407006f4e22..d9d7555148eb 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -75,6 +75,10 @@ static struct netvsc_device *alloc_net_device(void) atomic_set(&net_device->open_cnt, 0); net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT; net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT; + + net_device->recv_section_size = NETVSC_RECV_SECTION_SIZE; + net_device->send_section_size = NETVSC_SEND_SECTION_SIZE; + init_completion(&net_device->channel_init_wait); init_waitqueue_head(&net_device->subchan_open); @@ -143,6 +147,7 @@ static void netvsc_destroy_buf(struct hv_device *device) "revoke receive buffer to netvsp\n"); return; } + net_device->recv_section_cnt = 0; } /* Teardown the gpadl on the vsp end */ @@ -173,7 +178,7 @@ static void netvsc_destroy_buf(struct hv_device *device) * NVSP_MSG1_TYPE_SEND_SEND_BUF msg) therefore, we need * to send a revoke msg here */ - if (net_device->send_section_size) { + if (net_device->send_section_cnt) { /* Send the revoke receive buffer */ revoke_packet = &net_device->revoke_packet; memset(revoke_packet, 0, sizeof(struct nvsp_message)); @@ -205,6 +210,7 @@ static void netvsc_destroy_buf(struct hv_device *device) "revoke send buffer to netvsp\n"); return; } + net_device->send_section_cnt = 0; } /* Teardown the gpadl on the vsp end */ if (net_device->send_buf_gpadl_handle) { @@ -244,18 +250,25 @@ int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx) } static int netvsc_init_buf(struct hv_device *device, - struct netvsc_device *net_device) + struct netvsc_device *net_device, + const struct netvsc_device_info *device_info) { struct nvsp_1_message_send_receive_buffer_complete *resp; struct net_device *ndev = hv_get_drvdata(device); struct nvsp_message *init_packet; + unsigned int buf_size; size_t map_words; int ret = 0; - net_device->recv_buf = vzalloc(net_device->recv_buf_size); + /* Get receive buffer area. */ + buf_size = device_info->recv_sections * net_device->recv_section_size; + buf_size = roundup(buf_size, PAGE_SIZE); + + net_device->recv_buf = vzalloc(buf_size); if (!net_device->recv_buf) { - netdev_err(ndev, "unable to allocate receive " - "buffer of size %d\n", net_device->recv_buf_size); + netdev_err(ndev, + "unable to allocate receive buffer of size %u\n", + buf_size); ret = -ENOMEM; goto cleanup; } @@ -266,7 +279,7 @@ static int netvsc_init_buf(struct hv_device *device, * than the channel to establish the gpadl handle. */ ret = vmbus_establish_gpadl(device->channel, net_device->recv_buf, - net_device->recv_buf_size, + buf_size, &net_device->recv_buf_gpadl_handle); if (ret != 0) { netdev_err(ndev, @@ -312,31 +325,31 @@ static int netvsc_init_buf(struct hv_device *device, resp->num_sections, resp->sections[0].sub_alloc_size, resp->sections[0].num_sub_allocs); - net_device->recv_section_cnt = resp->num_sections; - - /* - * For 1st release, there should only be 1 section that represents the - * entire receive buffer - */ - if (net_device->recv_section_cnt != 1 || - resp->sections[0].offset != 0) { + /* There should only be one section for the entire receive buffer */ + if (resp->num_sections != 1 || resp->sections[0].offset != 0) { ret = -EINVAL; goto cleanup; } + net_device->recv_section_size = resp->sections[0].sub_alloc_size; + net_device->recv_section_cnt = resp->sections[0].num_sub_allocs; + /* Setup receive completion ring */ net_device->recv_completion_cnt - = round_up(resp->sections[0].num_sub_allocs + 1, + = round_up(net_device->recv_section_cnt + 1, PAGE_SIZE / sizeof(u64)); ret = netvsc_alloc_recv_comp_ring(net_device, 0); if (ret) goto cleanup; /* Now setup the send buffer. */ - net_device->send_buf = vzalloc(net_device->send_buf_size); + buf_size = device_info->send_sections * net_device->send_section_size; + buf_size = round_up(buf_size, PAGE_SIZE); + + net_device->send_buf = vzalloc(buf_size); if (!net_device->send_buf) { - netdev_err(ndev, "unable to allocate send " - "buffer of size %d\n", net_device->send_buf_size); + netdev_err(ndev, "unable to allocate send buffer of size %u\n", + buf_size); ret = -ENOMEM; goto cleanup; } @@ -346,7 +359,7 @@ static int netvsc_init_buf(struct hv_device *device, * than the channel to establish the gpadl handle. */ ret = vmbus_establish_gpadl(device->channel, net_device->send_buf, - net_device->send_buf_size, + buf_size, &net_device->send_buf_gpadl_handle); if (ret != 0) { netdev_err(ndev, @@ -391,10 +404,8 @@ static int netvsc_init_buf(struct hv_device *device, net_device->send_section_size = init_packet->msg. v1_msg.send_send_buf_complete.section_size; - /* Section count is simply the size divided by the section size. - */ - net_device->send_section_cnt = - net_device->send_buf_size / net_device->send_section_size; + /* Section count is simply the size divided by the section size. */ + net_device->send_section_cnt = buf_size / net_device->send_section_size; netdev_dbg(ndev, "Send section size: %d, Section count:%d\n", net_device->send_section_size, net_device->send_section_cnt); @@ -472,7 +483,8 @@ static int negotiate_nvsp_ver(struct hv_device *device, } static int netvsc_connect_vsp(struct hv_device *device, - struct netvsc_device *net_device) + struct netvsc_device *net_device, + const struct netvsc_device_info *device_info) { const u32 ver_list[] = { NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2, @@ -522,14 +534,8 @@ static int netvsc_connect_vsp(struct hv_device *device, if (ret != 0) goto cleanup; - /* Post the big receive buffer to NetVSP */ - if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_2) - net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY; - else - net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE; - net_device->send_buf_size = NETVSC_SEND_BUFFER_SIZE; - ret = netvsc_init_buf(device, net_device); + ret = netvsc_init_buf(device, net_device, device_info); cleanup: return ret; @@ -1287,7 +1293,7 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, rcu_assign_pointer(net_device_ctx->nvdev, net_device); /* Connect with the NetVsp */ - ret = netvsc_connect_vsp(device, net_device); + ret = netvsc_connect_vsp(device, net_device, device_info); if (ret != 0) { netdev_err(ndev, "unable to connect to NetVSP - %d\n", ret); diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 7b465e40869b..873c83a66cc2 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -45,7 +45,12 @@ #include "hyperv_net.h" -#define RING_SIZE_MIN 64 +#define RING_SIZE_MIN 64 +#define NETVSC_MIN_TX_SECTIONS 10 +#define NETVSC_DEFAULT_TX 192 /* ~1M */ +#define NETVSC_MIN_RX_SECTIONS 10 /* ~64K */ +#define NETVSC_DEFAULT_RX 2048 /* ~4M */ + #define LINKCHANGE_INT (2 * HZ) #define VF_TAKEOVER_INT (HZ / 10) @@ -831,11 +836,13 @@ static int netvsc_set_channels(struct net_device *net, if (was_opened) rndis_filter_close(nvdev); - rndis_filter_device_remove(dev, nvdev); - memset(&device_info, 0, sizeof(device_info)); device_info.num_chn = count; device_info.ring_size = ring_size; + device_info.send_sections = nvdev->send_section_cnt; + device_info.recv_sections = nvdev->recv_section_cnt; + + rndis_filter_device_remove(dev, nvdev); nvdev = rndis_filter_device_add(dev, &device_info); if (!IS_ERR(nvdev)) { @@ -947,6 +954,8 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) memset(&device_info, 0, sizeof(device_info)); device_info.ring_size = ring_size; device_info.num_chn = nvdev->num_chn; + device_info.send_sections = nvdev->send_section_cnt; + device_info.recv_sections = nvdev->recv_section_cnt; rndis_filter_device_remove(hdev, nvdev); @@ -1351,6 +1360,104 @@ static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir, return rndis_filter_set_rss_param(rndis_dev, key, ndev->num_chn); } +/* Hyper-V RNDIS protocol does not have ring in the HW sense. + * It does have pre-allocated receive area which is divided into sections. + */ +static void __netvsc_get_ringparam(struct netvsc_device *nvdev, + struct ethtool_ringparam *ring) +{ + u32 max_buf_size; + + ring->rx_pending = nvdev->recv_section_cnt; + ring->tx_pending = nvdev->send_section_cnt; + + if (nvdev->nvsp_version <= NVSP_PROTOCOL_VERSION_2) + max_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY; + else + max_buf_size = NETVSC_RECEIVE_BUFFER_SIZE; + + ring->rx_max_pending = max_buf_size / nvdev->recv_section_size; + ring->tx_max_pending = NETVSC_SEND_BUFFER_SIZE + / nvdev->send_section_size; +} + +static void netvsc_get_ringparam(struct net_device *ndev, + struct ethtool_ringparam *ring) +{ + struct net_device_context *ndevctx = netdev_priv(ndev); + struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev); + + if (!nvdev) + return; + + __netvsc_get_ringparam(nvdev, ring); +} + +static int netvsc_set_ringparam(struct net_device *ndev, + struct ethtool_ringparam *ring) +{ + struct net_device_context *ndevctx = netdev_priv(ndev); + struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev); + struct hv_device *hdev = ndevctx->device_ctx; + struct netvsc_device_info device_info; + struct ethtool_ringparam orig; + u32 new_tx, new_rx; + bool was_opened; + int ret = 0; + + if (!nvdev || nvdev->destroy) + return -ENODEV; + + memset(&orig, 0, sizeof(orig)); + __netvsc_get_ringparam(nvdev, &orig); + + new_tx = clamp_t(u32, ring->tx_pending, + NETVSC_MIN_TX_SECTIONS, orig.tx_max_pending); + new_rx = clamp_t(u32, ring->rx_pending, + NETVSC_MIN_RX_SECTIONS, orig.rx_max_pending); + + if (new_tx == orig.tx_pending && + new_rx == orig.rx_pending) + return 0; /* no change */ + + memset(&device_info, 0, sizeof(device_info)); + device_info.num_chn = nvdev->num_chn; + device_info.ring_size = ring_size; + device_info.send_sections = new_tx; + device_info.recv_sections = new_rx; + + netif_device_detach(ndev); + was_opened = rndis_filter_opened(nvdev); + if (was_opened) + rndis_filter_close(nvdev); + + rndis_filter_device_remove(hdev, nvdev); + + nvdev = rndis_filter_device_add(hdev, &device_info); + if (IS_ERR(nvdev)) { + ret = PTR_ERR(nvdev); + + device_info.send_sections = orig.tx_pending; + device_info.recv_sections = orig.rx_pending; + nvdev = rndis_filter_device_add(hdev, &device_info); + if (IS_ERR(nvdev)) { + netdev_err(ndev, "restoring ringparam failed: %ld\n", + PTR_ERR(nvdev)); + return ret; + } + } + + if (was_opened) + rndis_filter_open(nvdev); + netif_device_attach(ndev); + + /* We may have missed link change notifications */ + ndevctx->last_reconfig = 0; + schedule_delayed_work(&ndevctx->dwork, 0); + + return ret; +} + static const struct ethtool_ops ethtool_ops = { .get_drvinfo = netvsc_get_drvinfo, .get_link = ethtool_op_get_link, @@ -1367,6 +1474,8 @@ static const struct ethtool_ops ethtool_ops = { .set_rxfh = netvsc_set_rxfh, .get_link_ksettings = netvsc_get_link_ksettings, .set_link_ksettings = netvsc_set_link_ksettings, + .get_ringparam = netvsc_get_ringparam, + .set_ringparam = netvsc_set_ringparam, }; static const struct net_device_ops device_ops = { @@ -1782,6 +1891,8 @@ static int netvsc_probe(struct hv_device *dev, memset(&device_info, 0, sizeof(device_info)); device_info.ring_size = ring_size; device_info.num_chn = VRSS_CHANNEL_DEFAULT; + device_info.send_sections = NETVSC_DEFAULT_TX; + device_info.recv_sections = NETVSC_DEFAULT_RX; nvdev = rndis_filter_device_add(dev, &device_info); if (IS_ERR(nvdev)) { From cad5c197704d82faf33ffdbef414f15db08d9ef9 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 9 Aug 2017 17:46:12 -0700 Subject: [PATCH 10/10] netvsc: keep track of some non-fatal overload conditions Add ethtool statistics for case where send chimmeny buffer is exhausted and driver has to fall back to doing scatter/gather send. Also, add statistic for case where ring buffer is full and receive completions are delayed. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 2 ++ drivers/net/hyperv/netvsc.c | 19 +++++++++++++------ drivers/net/hyperv/netvsc_drv.c | 2 ++ 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 30326373e46f..9198dd1240ed 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -680,6 +680,8 @@ struct netvsc_ethtool_stats { unsigned long tx_no_space; unsigned long tx_too_big; unsigned long tx_busy; + unsigned long tx_send_full; + unsigned long rx_comp_busy; }; struct netvsc_vf_pcpu_stats { diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index d9d7555148eb..0530e7d729e1 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -883,7 +883,9 @@ int netvsc_send(struct net_device_context *ndev_ctx, } else if (pktlen + net_device->pkt_align < net_device->send_section_size) { section_index = netvsc_get_next_send_section(net_device); - if (section_index != NETVSC_INVALID_INDEX) { + if (unlikely(section_index == NETVSC_INVALID_INDEX)) { + ++ndev_ctx->eth_stats.tx_send_full; + } else { move_pkt_msd(&msd_send, &msd_skb, msdp); msd_len = 0; } @@ -949,9 +951,10 @@ send_now: } /* Send pending recv completions */ -static int send_recv_completions(struct netvsc_channel *nvchan) +static int send_recv_completions(struct net_device *ndev, + struct netvsc_device *nvdev, + struct netvsc_channel *nvchan) { - struct netvsc_device *nvdev = nvchan->net_device; struct multi_recv_comp *mrc = &nvchan->mrc; struct recv_comp_msg { struct nvsp_message_header hdr; @@ -969,8 +972,12 @@ static int send_recv_completions(struct netvsc_channel *nvchan) msg.status = rcd->status; ret = vmbus_sendpacket(nvchan->channel, &msg, sizeof(msg), rcd->tid, VM_PKT_COMP, 0); - if (unlikely(ret)) + if (unlikely(ret)) { + struct net_device_context *ndev_ctx = netdev_priv(ndev); + + ++ndev_ctx->eth_stats.rx_comp_busy; return ret; + } if (++mrc->first == nvdev->recv_completion_cnt) mrc->first = 0; @@ -1011,7 +1018,7 @@ static void enq_receive_complete(struct net_device *ndev, recv_comp_slot_avail(nvdev, mrc, &filled, &avail); if (unlikely(filled > NAPI_POLL_WEIGHT)) { - send_recv_completions(nvchan); + send_recv_completions(ndev, nvdev, nvchan); recv_comp_slot_avail(nvdev, mrc, &filled, &avail); } @@ -1194,7 +1201,7 @@ int netvsc_poll(struct napi_struct *napi, int budget) * then re-enable host interrupts * and reschedule if ring is not empty. */ - if (send_recv_completions(nvchan) == 0 && + if (send_recv_completions(ndev, net_device, nvchan) == 0 && work_done < budget && napi_complete_done(napi, work_done) && hv_end_read(&channel->inbound)) { diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 873c83a66cc2..b33f0507c373 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1112,6 +1112,8 @@ static const struct { { "tx_no_space", offsetof(struct netvsc_ethtool_stats, tx_no_space) }, { "tx_too_big", offsetof(struct netvsc_ethtool_stats, tx_too_big) }, { "tx_busy", offsetof(struct netvsc_ethtool_stats, tx_busy) }, + { "tx_send_full", offsetof(struct netvsc_ethtool_stats, tx_send_full) }, + { "rx_comp_busy", offsetof(struct netvsc_ethtool_stats, rx_comp_busy) }, }, vf_stats[] = { { "vf_rx_packets", offsetof(struct netvsc_vf_pcpu_stats, rx_packets) }, { "vf_rx_bytes", offsetof(struct netvsc_vf_pcpu_stats, rx_bytes) },