From c20cd5d753a452807b080bbf390e2f844d7991b3 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Mon, 23 Jul 2012 21:16:06 +0000 Subject: [PATCH 01/23] bnx2x: Correct EEE statistics gathering In boards with 4-ports, Tx LPI statistics were gathered incorrectly. This patch guarantees that each pmf will only query its own port for these statistics. Signed-off-by: Yuval Mintz Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h | 4 ++++ drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c | 8 +++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h index ec62a5c8bd37..28a0bcfe61ff 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h @@ -1603,6 +1603,10 @@ * of counts that the SM entered the EEE LPI state. Clock 25MHz. Read only * register. Reset on hard reset. */ #define MISC_REG_CPMU_LP_SM_ENT_CNT_P0 0xa8b8 +/* [RW 16] EEE LPI Entry Events Counter. A statistic counter with the number + * of counts that the SM entered the EEE LPI state. Clock 25MHz. Read only + * register. Reset on hard reset. */ +#define MISC_REG_CPMU_LP_SM_ENT_CNT_P1 0xa8bc /* [RW 32] The following driver registers(1...16) represent 16 drivers and 32 clients. Each client can be controlled by one driver only. One in each bit represent that this driver control the appropriate client (Ex: bit 5 diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c index 667d89042d35..332db64dd5be 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c @@ -785,9 +785,11 @@ static int bnx2x_hw_stats_update(struct bnx2x *bp) pstats->host_port_stats_counter++; - if (CHIP_IS_E3(bp)) - estats->eee_tx_lpi += REG_RD(bp, - MISC_REG_CPMU_LP_SM_ENT_CNT_P0); + if (CHIP_IS_E3(bp)) { + u32 lpi_reg = BP_PORT(bp) ? MISC_REG_CPMU_LP_SM_ENT_CNT_P1 + : MISC_REG_CPMU_LP_SM_ENT_CNT_P0; + estats->eee_tx_lpi += REG_RD(bp, lpi_reg); + } if (!BP_NOMCP(bp)) { u32 nig_timer_max = From 9cb429d692b341e972b12e6cd097364050ebbb26 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 24 Jul 2012 01:19:31 +0000 Subject: [PATCH 02/23] tcp: early_demux fixes 1) Remove a non needed pskb_may_pull() in tcp_v4_early_demux() and fix a potential bug if skb->head was reallocated (iph & th pointers were not reloaded) TCP stack will pull/check headers anyway. 2) must reload iph in ip_rcv_finish() after early_demux() call since skb->head might have changed. 3) skb->dev->ifindex can be now replaced by skb->skb_iif Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_input.c | 5 ++++- net/ipv4/tcp_ipv4.c | 9 ++------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 4ebc6feee250..93134b0eab0c 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -326,8 +326,11 @@ static int ip_rcv_finish(struct sk_buff *skb) rcu_read_lock(); ipprot = rcu_dereference(inet_protos[protocol]); - if (ipprot && ipprot->early_demux) + if (ipprot && ipprot->early_demux) { ipprot->early_demux(skb); + /* must reload iph, skb->head might have changed */ + iph = ip_hdr(skb); + } rcu_read_unlock(); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3e30548ac32a..b6b07c93924c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1686,7 +1686,6 @@ void tcp_v4_early_demux(struct sk_buff *skb) struct net *net = dev_net(skb->dev); const struct iphdr *iph; const struct tcphdr *th; - struct net_device *dev; struct sock *sk; if (skb->pkt_type != PACKET_HOST) @@ -1701,14 +1700,10 @@ void tcp_v4_early_demux(struct sk_buff *skb) if (th->doff < sizeof(struct tcphdr) / 4) return; - if (!pskb_may_pull(skb, ip_hdrlen(skb) + th->doff * 4)) - return; - - dev = skb->dev; sk = __inet_lookup_established(net, &tcp_hashinfo, iph->saddr, th->source, iph->daddr, ntohs(th->dest), - dev->ifindex); + skb->skb_iif); if (sk) { skb->sk = sk; skb->destructor = sock_edemux; @@ -1718,7 +1713,7 @@ void tcp_v4_early_demux(struct sk_buff *skb) if (dst) dst = dst_check(dst, 0); if (dst && - icsk->rx_dst_ifindex == dev->ifindex) + icsk->rx_dst_ifindex == skb->skb_iif) skb_dst_set_noref(skb, dst); } } From 3324d024ba4bebb74a78d84b925b59df73a92fbd Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 24 Jul 2012 01:20:48 +0000 Subject: [PATCH 03/23] team: init error value to 0 in team_netpoll_setup() This will ensure correct value is returned in case the port list is empty. Signed-off-by: Jiri Pirko Reported-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/team/team.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index b104c05225f7..87707ab39430 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1447,7 +1447,7 @@ static int team_netpoll_setup(struct net_device *dev, { struct team *team = netdev_priv(dev); struct team_port *port; - int err; + int err = 0; mutex_lock(&team->lock); list_for_each_entry(port, &team->port_list, list) { From c66b9b7d365444b433307ebb18734757cb668a02 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Tue, 24 Jul 2012 02:42:14 +0000 Subject: [PATCH 04/23] caif: fix NULL pointer check Reported-by: Resolves-bug: http://bugzilla.kernel.org/show_bug?44441 Signed-off-by: Alan Cox Signed-off-by: David S. Miller --- drivers/net/caif/caif_serial.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c index 8a3054b84812..5de74e762021 100644 --- a/drivers/net/caif/caif_serial.c +++ b/drivers/net/caif/caif_serial.c @@ -325,6 +325,9 @@ static int ldisc_open(struct tty_struct *tty) sprintf(name, "cf%s", tty->name); dev = alloc_netdev(sizeof(*ser), name, caifdev_setup); + if (!dev) + return -ENOMEM; + ser = netdev_priv(dev); ser->tty = tty_kref_get(tty); ser->dev = dev; From 8b72ff6484fe303e01498b58621810a114f3cf09 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Tue, 24 Jul 2012 08:16:25 +0000 Subject: [PATCH 05/23] wanmain: comparing array with NULL gcc really should warn about these ! Signed-off-by: Alan Cox Signed-off-by: David S. Miller --- net/wanrouter/wanmain.c | 47 ++++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c index 788a12c1eb5d..2ab785064b7e 100644 --- a/net/wanrouter/wanmain.c +++ b/net/wanrouter/wanmain.c @@ -602,36 +602,31 @@ static int wanrouter_device_new_if(struct wan_device *wandev, * successfully, add it to the interface list. */ - if (dev->name == NULL) { - err = -EINVAL; - } else { +#ifdef WANDEBUG + printk(KERN_INFO "%s: registering interface %s...\n", + wanrouter_modname, dev->name); +#endif - #ifdef WANDEBUG - printk(KERN_INFO "%s: registering interface %s...\n", - wanrouter_modname, dev->name); - #endif + err = register_netdev(dev); + if (!err) { + struct net_device *slave = NULL; + unsigned long smp_flags=0; - err = register_netdev(dev); - if (!err) { - struct net_device *slave = NULL; - unsigned long smp_flags=0; + lock_adapter_irq(&wandev->lock, &smp_flags); - lock_adapter_irq(&wandev->lock, &smp_flags); - - if (wandev->dev == NULL) { - wandev->dev = dev; - } else { - for (slave=wandev->dev; - DEV_TO_SLAVE(slave); - slave = DEV_TO_SLAVE(slave)) - DEV_TO_SLAVE(slave) = dev; - } - ++wandev->ndev; - - unlock_adapter_irq(&wandev->lock, &smp_flags); - err = 0; /* done !!! */ - goto out; + if (wandev->dev == NULL) { + wandev->dev = dev; + } else { + for (slave=wandev->dev; + DEV_TO_SLAVE(slave); + slave = DEV_TO_SLAVE(slave)) + DEV_TO_SLAVE(slave) = dev; } + ++wandev->ndev; + + unlock_adapter_irq(&wandev->lock, &smp_flags); + err = 0; /* done !!! */ + goto out; } if (wandev->del_if) wandev->del_if(wandev, dev); From f94898ea6682977f15c5a8f9ffb293a14f95455a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 24 Jul 2012 08:43:22 +0000 Subject: [PATCH 06/23] cdc-ncm: tag Ericsson WWAN devices (eg F5521gw) with FLAG_WWAN Tag Ericsson NCM devices as WWAN modems, since they almost certainly all are. This way userspace clients know that the device requires further setup on the AT-capable serial ports before connectivity is available. Signed-off-by: Dan Williams --- drivers/net/usb/cdc_ncm.c | 68 ++++++++++++++++++++++++++------------- 1 file changed, 45 insertions(+), 23 deletions(-) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 4b9513fcf275..f4ce5957df32 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -138,20 +138,7 @@ struct cdc_ncm_ctx { static void cdc_ncm_txpath_bh(unsigned long param); static void cdc_ncm_tx_timeout_start(struct cdc_ncm_ctx *ctx); static enum hrtimer_restart cdc_ncm_tx_timer_cb(struct hrtimer *hr_timer); -static const struct driver_info cdc_ncm_info; static struct usb_driver cdc_ncm_driver; -static const struct ethtool_ops cdc_ncm_ethtool_ops; - -static const struct usb_device_id cdc_devs[] = { - { USB_INTERFACE_INFO(USB_CLASS_COMM, - USB_CDC_SUBCLASS_NCM, USB_CDC_PROTO_NONE), - .driver_info = (unsigned long)&cdc_ncm_info, - }, - { - }, -}; - -MODULE_DEVICE_TABLE(usb, cdc_devs); static void cdc_ncm_get_drvinfo(struct net_device *net, struct ethtool_drvinfo *info) @@ -454,6 +441,16 @@ static void cdc_ncm_free(struct cdc_ncm_ctx *ctx) kfree(ctx); } +static const struct ethtool_ops cdc_ncm_ethtool_ops = { + .get_drvinfo = cdc_ncm_get_drvinfo, + .get_link = usbnet_get_link, + .get_msglevel = usbnet_get_msglevel, + .set_msglevel = usbnet_set_msglevel, + .get_settings = usbnet_get_settings, + .set_settings = usbnet_set_settings, + .nway_reset = usbnet_nway_reset, +}; + static int cdc_ncm_bind(struct usbnet *dev, struct usb_interface *intf) { struct cdc_ncm_ctx *ctx; @@ -1203,6 +1200,41 @@ static const struct driver_info cdc_ncm_info = { .tx_fixup = cdc_ncm_tx_fixup, }; +/* Same as cdc_ncm_info, but with FLAG_WWAN */ +static const struct driver_info wwan_info = { + .description = "Mobile Broadband Network Device", + .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET + | FLAG_WWAN, + .bind = cdc_ncm_bind, + .unbind = cdc_ncm_unbind, + .check_connect = cdc_ncm_check_connect, + .manage_power = cdc_ncm_manage_power, + .status = cdc_ncm_status, + .rx_fixup = cdc_ncm_rx_fixup, + .tx_fixup = cdc_ncm_tx_fixup, +}; + +static const struct usb_device_id cdc_devs[] = { + /* Ericsson MBM devices like F5521gw */ + { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO + | USB_DEVICE_ID_MATCH_VENDOR, + .idVendor = 0x0bdb, + .bInterfaceClass = USB_CLASS_COMM, + .bInterfaceSubClass = USB_CDC_SUBCLASS_NCM, + .bInterfaceProtocol = USB_CDC_PROTO_NONE, + .driver_info = (unsigned long) &wwan_info, + }, + + /* Generic CDC-NCM devices */ + { USB_INTERFACE_INFO(USB_CLASS_COMM, + USB_CDC_SUBCLASS_NCM, USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&cdc_ncm_info, + }, + { + }, +}; +MODULE_DEVICE_TABLE(usb, cdc_devs); + static struct usb_driver cdc_ncm_driver = { .name = "cdc_ncm", .id_table = cdc_devs, @@ -1215,16 +1247,6 @@ static struct usb_driver cdc_ncm_driver = { .disable_hub_initiated_lpm = 1, }; -static const struct ethtool_ops cdc_ncm_ethtool_ops = { - .get_drvinfo = cdc_ncm_get_drvinfo, - .get_link = usbnet_get_link, - .get_msglevel = usbnet_get_msglevel, - .set_msglevel = usbnet_set_msglevel, - .get_settings = usbnet_get_settings, - .set_settings = usbnet_set_settings, - .nway_reset = usbnet_nway_reset, -}; - module_usb_driver(cdc_ncm_driver); MODULE_AUTHOR("Hans Petter Selasky"); From 57dbf29a54bda5773f9ed1d00e3cc633294259da Mon Sep 17 00:00:00 2001 From: Kleber Sacilotto de Souza Date: Fri, 20 Jul 2012 09:55:43 +0000 Subject: [PATCH 07/23] mlx4: Add support for EEH error recovery Currently the mlx4 drivers don't have the necessary callbacks to implement EEH errors detection and recovery, so the PCI layer uses the probe and remove callbacks to try to recover the device after an error on the bus. However, these callbacks have race conditions with the internal catastrophic error recovery functions, which will also detect the error and this can cause the system to crash if both EEH and catas functions try to reset the device. This patch adds the necessary error recovery callbacks and makes sure that the internal catastrophic error functions will not try to reset the device in such scenarios. It also adds some calls to pci_channel_offline() to suppress reads/writes on the bus when the slot cannot accept I/O operations so we prevent unnecessary accesses to the bus and speed up the device removal. Signed-off-by: Kleber Sacilotto de Souza Acked-by: Shlomo Pongratz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/catas.c | 25 +++++++---- drivers/net/ethernet/mellanox/mlx4/cmd.c | 49 +++++++++++++++++++++- drivers/net/ethernet/mellanox/mlx4/main.c | 30 ++++++++++++- 3 files changed, 93 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c index 915e947b422d..9c656fe4983d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/catas.c +++ b/drivers/net/ethernet/mellanox/mlx4/catas.c @@ -69,16 +69,21 @@ static void poll_catas(unsigned long dev_ptr) struct mlx4_priv *priv = mlx4_priv(dev); if (readl(priv->catas_err.map)) { - dump_err_buf(dev); + /* If the device is off-line, we cannot try to recover it */ + if (pci_channel_offline(dev->pdev)) + mod_timer(&priv->catas_err.timer, + round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL)); + else { + dump_err_buf(dev); + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0); - mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0); + if (internal_err_reset) { + spin_lock(&catas_lock); + list_add(&priv->catas_err.list, &catas_list); + spin_unlock(&catas_lock); - if (internal_err_reset) { - spin_lock(&catas_lock); - list_add(&priv->catas_err.list, &catas_list); - spin_unlock(&catas_lock); - - queue_work(mlx4_wq, &catas_work); + queue_work(mlx4_wq, &catas_work); + } } } else mod_timer(&priv->catas_err.timer, @@ -100,6 +105,10 @@ static void catas_reset(struct work_struct *work) list_for_each_entry_safe(priv, tmppriv, &tlist, catas_err.list) { struct pci_dev *pdev = priv->dev.pdev; + /* If the device is off-line, we cannot reset it */ + if (pci_channel_offline(pdev)) + continue; + ret = mlx4_restart_one(priv->dev.pdev); /* 'priv' now is not valid */ if (ret) diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 7e94987d030c..c8fef4353021 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -296,7 +296,12 @@ int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param, static int cmd_pending(struct mlx4_dev *dev) { - u32 status = readl(mlx4_priv(dev)->cmd.hcr + HCR_STATUS_OFFSET); + u32 status; + + if (pci_channel_offline(dev->pdev)) + return -EIO; + + status = readl(mlx4_priv(dev)->cmd.hcr + HCR_STATUS_OFFSET); return (status & swab32(1 << HCR_GO_BIT)) || (mlx4_priv(dev)->cmd.toggle == @@ -314,11 +319,29 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param, mutex_lock(&cmd->hcr_mutex); + if (pci_channel_offline(dev->pdev)) { + /* + * Device is going through error recovery + * and cannot accept commands. + */ + ret = -EIO; + goto out; + } + end = jiffies; if (event) end += msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS); while (cmd_pending(dev)) { + if (pci_channel_offline(dev->pdev)) { + /* + * Device is going through error recovery + * and cannot accept commands. + */ + ret = -EIO; + goto out; + } + if (time_after_eq(jiffies, end)) { mlx4_err(dev, "%s:cmd_pending failed\n", __func__); goto out; @@ -431,14 +454,33 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, down(&priv->cmd.poll_sem); + if (pci_channel_offline(dev->pdev)) { + /* + * Device is going through error recovery + * and cannot accept commands. + */ + err = -EIO; + goto out; + } + err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0, in_modifier, op_modifier, op, CMD_POLL_TOKEN, 0); if (err) goto out; end = msecs_to_jiffies(timeout) + jiffies; - while (cmd_pending(dev) && time_before(jiffies, end)) + while (cmd_pending(dev) && time_before(jiffies, end)) { + if (pci_channel_offline(dev->pdev)) { + /* + * Device is going through error recovery + * and cannot accept commands. + */ + err = -EIO; + goto out; + } + cond_resched(); + } if (cmd_pending(dev)) { err = -ETIMEDOUT; @@ -532,6 +574,9 @@ int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, int out_is_imm, u32 in_modifier, u8 op_modifier, u16 op, unsigned long timeout, int native) { + if (pci_channel_offline(dev->pdev)) + return -EIO; + if (!mlx4_is_mfunc(dev) || (native && mlx4_is_master(dev))) { if (mlx4_priv(dev)->cmd.use_events) return mlx4_cmd_wait(dev, in_param, out_param, diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 42645166bae2..e717091734d0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1775,6 +1775,9 @@ static int mlx4_get_ownership(struct mlx4_dev *dev) void __iomem *owner; u32 ret; + if (pci_channel_offline(dev->pdev)) + return -EIO; + owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE, MLX4_OWNER_SIZE); if (!owner) { @@ -1791,6 +1794,9 @@ static void mlx4_free_ownership(struct mlx4_dev *dev) { void __iomem *owner; + if (pci_channel_offline(dev->pdev)) + return; + owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE, MLX4_OWNER_SIZE); if (!owner) { @@ -2237,11 +2243,33 @@ static DEFINE_PCI_DEVICE_TABLE(mlx4_pci_table) = { MODULE_DEVICE_TABLE(pci, mlx4_pci_table); +static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + mlx4_remove_one(pdev); + + return state == pci_channel_io_perm_failure ? + PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; +} + +static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev) +{ + int ret = __mlx4_init_one(pdev, NULL); + + return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; +} + +static struct pci_error_handlers mlx4_err_handler = { + .error_detected = mlx4_pci_err_detected, + .slot_reset = mlx4_pci_slot_reset, +}; + static struct pci_driver mlx4_driver = { .name = DRV_NAME, .id_table = mlx4_pci_table, .probe = mlx4_init_one, - .remove = __devexit_p(mlx4_remove_one) + .remove = __devexit_p(mlx4_remove_one), + .err_handler = &mlx4_err_handler, }; static int __init mlx4_verify_params(void) From ac46a4623815113a0305b2a491c125f473a88c53 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 24 Jul 2012 15:05:25 +0000 Subject: [PATCH 08/23] be2net: Missing byteswap in be_get_fw_log_level causes oops on PowerPC We are seeing an oops in be_get_fw_log_level on ppc64 where we walk off the end of memory. commit 941a77d582c8 (be2net: Fix to allow get/set of debug levels in the firmware.) requires byteswapping of num_modes and num_modules. Cc: stable@vger.kernel.org # 3.5+ Signed-off-by: Anton Blanchard Acked-by: Sathya Perla Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_ethtool.c | 5 +++-- drivers/net/ethernet/emulex/benet/be_main.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index e34be1c7ae8a..c0e700653f96 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -910,8 +910,9 @@ static void be_set_fw_log_level(struct be_adapter *adapter, u32 level) if (!status) { cfgs = (struct be_fat_conf_params *)(extfat_cmd.va + sizeof(struct be_cmd_resp_hdr)); - for (i = 0; i < cfgs->num_modules; i++) { - for (j = 0; j < cfgs->module[i].num_modes; j++) { + for (i = 0; i < le32_to_cpu(cfgs->num_modules); i++) { + u32 num_modes = le32_to_cpu(cfgs->module[i].num_modes); + for (j = 0; j < num_modes; j++) { if (cfgs->module[i].trace_lvl[j].mode == MODE_UART) cfgs->module[i].trace_lvl[j].dbg_lvl = diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 4d9677174490..4e81401259f9 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -3579,7 +3579,7 @@ u32 be_get_fw_log_level(struct be_adapter *adapter) if (!status) { cfgs = (struct be_fat_conf_params *)(extfat_cmd.va + sizeof(struct be_cmd_resp_hdr)); - for (j = 0; j < cfgs->module[0].num_modes; j++) { + for (j = 0; j < le32_to_cpu(cfgs->module[0].num_modes); j++) { if (cfgs->module[0].trace_lvl[j].mode == MODE_UART) level = cfgs->module[0].trace_lvl[j].dbg_lvl; } From 9fafbd4d5f97dfbabe0650a920f7a33b0b71b5d0 Mon Sep 17 00:00:00 2001 From: "frank.blaschka@de.ibm.com" Date: Tue, 24 Jul 2012 22:34:27 +0000 Subject: [PATCH 09/23] net: wiznet add missing HAS_IOMEM dependency The "WIZnet devices" config option should depend on HAS_IOMEM as all wiznet drivers require it as well. Signed-off-by: Martin Schwidefsky Signed-off-by: Frank Blaschka Signed-off-by: David S. Miller --- drivers/net/ethernet/wiznet/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/wiznet/Kconfig b/drivers/net/ethernet/wiznet/Kconfig index cb18043f5830..b4d281626fb4 100644 --- a/drivers/net/ethernet/wiznet/Kconfig +++ b/drivers/net/ethernet/wiznet/Kconfig @@ -4,6 +4,7 @@ config NET_VENDOR_WIZNET bool "WIZnet devices" + depends on HAS_IOMEM default y ---help--- If you have a network (Ethernet) card belonging to this class, say Y From 0b945293c4fa89347b295ccb355e5faad1585039 Mon Sep 17 00:00:00 2001 From: "frank.blaschka@de.ibm.com" Date: Tue, 24 Jul 2012 22:34:28 +0000 Subject: [PATCH 10/23] netiucv: cleanup attribute usage Let the driver core handle device attribute creation and removal. This will simplify the code and eliminates races between attribute availability and userspace notification via uevents. Signed-off-by: Sebastian Ott Signed-off-by: Frank Blaschka Acked-by: Ursula Braun Signed-off-by: David S. Miller --- drivers/s390/net/netiucv.c | 34 ++++++---------------------------- 1 file changed, 6 insertions(+), 28 deletions(-) diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c index 8160591913f9..4ffa66c87ea5 100644 --- a/drivers/s390/net/netiucv.c +++ b/drivers/s390/net/netiucv.c @@ -1854,26 +1854,11 @@ static struct attribute_group netiucv_stat_attr_group = { .attrs = netiucv_stat_attrs, }; -static int netiucv_add_files(struct device *dev) -{ - int ret; - - IUCV_DBF_TEXT(trace, 3, __func__); - ret = sysfs_create_group(&dev->kobj, &netiucv_attr_group); - if (ret) - return ret; - ret = sysfs_create_group(&dev->kobj, &netiucv_stat_attr_group); - if (ret) - sysfs_remove_group(&dev->kobj, &netiucv_attr_group); - return ret; -} - -static void netiucv_remove_files(struct device *dev) -{ - IUCV_DBF_TEXT(trace, 3, __func__); - sysfs_remove_group(&dev->kobj, &netiucv_stat_attr_group); - sysfs_remove_group(&dev->kobj, &netiucv_attr_group); -} +static const struct attribute_group *netiucv_attr_groups[] = { + &netiucv_stat_attr_group, + &netiucv_attr_group, + NULL, +}; static int netiucv_register_device(struct net_device *ndev) { @@ -1887,6 +1872,7 @@ static int netiucv_register_device(struct net_device *ndev) dev_set_name(dev, "net%s", ndev->name); dev->bus = &iucv_bus; dev->parent = iucv_root; + dev->groups = netiucv_attr_groups; /* * The release function could be called after the * module has been unloaded. It's _only_ task is to @@ -1904,22 +1890,14 @@ static int netiucv_register_device(struct net_device *ndev) put_device(dev); return ret; } - ret = netiucv_add_files(dev); - if (ret) - goto out_unreg; priv->dev = dev; dev_set_drvdata(dev, priv); return 0; - -out_unreg: - device_unregister(dev); - return ret; } static void netiucv_unregister_device(struct device *dev) { IUCV_DBF_TEXT(trace, 3, __func__); - netiucv_remove_files(dev); device_unregister(dev); } From eabfbe6230ee7363681e7a561948d362b87169f0 Mon Sep 17 00:00:00 2001 From: "frank.blaschka@de.ibm.com" Date: Tue, 24 Jul 2012 22:34:29 +0000 Subject: [PATCH 11/23] qeth: repair crash in qeth_l3_vlan_rx_kill_vid() Commit efc73f4b "net: Fix memory leak - vlan_info struct" adds deletion of VLAN 0 for devices with feature NETIF_F_HW_VLAN_FILTER. For driver qeth these are the layer 3 devices. Usually there exists no separate vlan net_device for VLAN 0. Thus the qeth functions qeth_l3_free_vlan_addresses4() and qeth_l3_free_vlan_addresses6() require an extra checking if function __vlan_find_dev_deep() returns with a net_device. Cc: stable@vger.kernel.org Signed-off-by: Ursula Braun Signed-off-by: Frank Blaschka Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l3_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 0cf706699a04..c5f03fa70fba 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1758,6 +1758,8 @@ static void qeth_l3_free_vlan_addresses4(struct qeth_card *card, QETH_CARD_TEXT(card, 4, "frvaddr4"); netdev = __vlan_find_dev_deep(card->dev, vid); + if (!netdev) + return; in_dev = in_dev_get(netdev); if (!in_dev) return; @@ -1786,6 +1788,8 @@ static void qeth_l3_free_vlan_addresses6(struct qeth_card *card, QETH_CARD_TEXT(card, 4, "frvaddr6"); netdev = __vlan_find_dev_deep(card->dev, vid); + if (!netdev) + return; in6_dev = in6_dev_get(netdev); if (!in6_dev) return; From 6d8d2dd8ad92ec89f540e73b7a46c5835a7a06a6 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Wed, 25 Jul 2012 02:13:59 +0000 Subject: [PATCH 12/23] net/pch_gpe: Cannot disable ethernet autonegation When attempting to disable ethernet autonegation via ethtool, the pch_gpe driver will set software reset bit of PHY chip, But control register of PHY chip of FRI2 will reenable ethernet autonegation. Signed-off-by: Wei Yang Signed-off-by: David S. Miller --- drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c | 1 - drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c index 9dbf38c10a68..24b787be6062 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c @@ -129,7 +129,6 @@ static int pch_gbe_set_settings(struct net_device *netdev, hw->mac.link_duplex = ecmd->duplex; hw->phy.autoneg_advertised = ecmd->advertising; hw->mac.autoneg = ecmd->autoneg; - pch_gbe_hal_phy_sw_reset(hw); /* reset the link */ if (netif_running(adapter->netdev)) { diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index b1006563f736..8e0e5f0fb4fd 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -1988,6 +1988,7 @@ int pch_gbe_up(struct pch_gbe_adapter *adapter) void pch_gbe_down(struct pch_gbe_adapter *adapter) { struct net_device *netdev = adapter->netdev; + struct pci_dev *pdev = adapter->pdev; struct pch_gbe_rx_ring *rx_ring = adapter->rx_ring; /* signal that we're down so the interrupt handler does not @@ -2004,7 +2005,8 @@ void pch_gbe_down(struct pch_gbe_adapter *adapter) netif_carrier_off(netdev); netif_stop_queue(netdev); - pch_gbe_reset(adapter); + if ((pdev->error_state) && (pdev->error_state != pci_channel_io_normal)) + pch_gbe_reset(adapter); pch_gbe_clean_tx_ring(adapter, adapter->tx_ring); pch_gbe_clean_rx_ring(adapter, adapter->rx_ring); From 4331debc51ee1ce319f4a389484e0e8e05de2aca Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 25 Jul 2012 05:11:23 +0000 Subject: [PATCH 13/23] ipv4: rt_cache_valid must check expired routes commit d2d68ba9fe8 (ipv4: Cache input routes in fib_info nexthops.) introduced rt_cache_valid() helper. It unfortunately doesn't check if route is expired before caching it. I noticed sk_setup_caps() was constantly called on a tcp workload. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/route.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6bcb8fc71cbc..3f7bb7185c50 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -444,7 +444,7 @@ static inline int ip_rt_proc_init(void) } #endif /* CONFIG_PROC_FS */ -static inline int rt_is_expired(struct rtable *rth) +static inline bool rt_is_expired(const struct rtable *rth) { return rth->rt_genid != rt_genid(dev_net(rth->dst.dev)); } @@ -1222,9 +1222,11 @@ static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) } } -static bool rt_cache_valid(struct rtable *rt) +static bool rt_cache_valid(const struct rtable *rt) { - return (rt && rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK); + return rt && + rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && + !rt_is_expired(rt); } static void rt_set_nexthop(struct rtable *rt, __be32 daddr, From fa85a6c29a493e8a574bf62a6349b38b591bf885 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Wed, 25 Jul 2012 08:08:41 +0000 Subject: [PATCH 14/23] hyperv: Add a check for ring_size value It prevents ring_size being set to a too small value. Reported-by: Jason Wang Signed-off-by: Haiyang Zhang Reviewed-by: K. Y. Srinivasan Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 8e23c084c4a7..8c5a1c43c81d 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -47,7 +47,7 @@ struct net_device_context { struct work_struct work; }; - +#define RING_SIZE_MIN 64 static int ring_size = 128; module_param(ring_size, int, S_IRUGO); MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)"); @@ -518,6 +518,11 @@ static void __exit netvsc_drv_exit(void) static int __init netvsc_drv_init(void) { + if (ring_size < RING_SIZE_MIN) { + ring_size = RING_SIZE_MIN; + pr_info("Increased ring_size to %d (min allowed)\n", + ring_size); + } return vmbus_driver_register(&netvsc_drv); } From 5243e7bd98b2dfecef3acd82661b541905a86e7f Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Wed, 25 Jul 2012 08:08:42 +0000 Subject: [PATCH 15/23] hyperv: Add error handling to rndis_filter_device_add() Reported-by: Jason Wang Signed-off-by: Haiyang Zhang Reviewed-by: K. Y. Srinivasan Signed-off-by: David S. Miller --- drivers/net/hyperv/rndis_filter.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index fbf539468205..e5d6146937fa 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -804,18 +804,15 @@ int rndis_filter_device_add(struct hv_device *dev, /* Send the rndis initialization message */ ret = rndis_filter_init_device(rndis_device); if (ret != 0) { - /* - * TODO: If rndis init failed, we will need to shut down the - * channel - */ + rndis_filter_device_remove(dev); + return ret; } /* Get the mac address */ ret = rndis_filter_query_device_mac(rndis_device); if (ret != 0) { - /* - * TODO: shutdown rndis device and the channel - */ + rndis_filter_device_remove(dev); + return ret; } memcpy(device_info->mac_adr, rndis_device->hw_mac_adr, ETH_ALEN); From ee64c0ee5180b3163b89e0d23b22126b5da088b1 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Wed, 25 Jul 2012 21:21:16 +0000 Subject: [PATCH 16/23] net/mlx4_en: Limit the RFS filter IDs to be < RPS_NO_FILTER RFS filter id can't have the special value RPS_NO_FILTER, need to skip it when allocating id's. CC: Ben Hutchings Signed-off-by: Amir Vadai Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 8864d8b53737..edd9cb8d3e1d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -201,7 +201,7 @@ mlx4_en_filter_alloc(struct mlx4_en_priv *priv, int rxq_index, __be32 src_ip, filter->flow_id = flow_id; - filter->id = priv->last_filter_id++; + filter->id = priv->last_filter_id++ % RPS_NO_FILTER; list_add_tail(&filter->next, &priv->filters); hlist_add_head(&filter->filter_chain, From c297977ec18deb36b2c0a5ee57101f7ab736ec00 Mon Sep 17 00:00:00 2001 From: Sarveshwar Bandi Date: Wed, 25 Jul 2012 21:29:50 +0000 Subject: [PATCH 17/23] be2net: Fix to parse RSS hash from Receive completions correctly. Wrong pointer variable is being used to parse the rss hash from receive completions leading to corrupted rss_hash values filled into skb. Signed-off-by: Sarveshwar Bandi Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 4e81401259f9..c60de89b6669 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1397,7 +1397,7 @@ static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl, rxcp->pkt_type = AMAP_GET_BITS(struct amap_eth_rx_compl_v1, cast_enc, compl); rxcp->rss_hash = - AMAP_GET_BITS(struct amap_eth_rx_compl_v1, rsshash, rxcp); + AMAP_GET_BITS(struct amap_eth_rx_compl_v1, rsshash, compl); if (rxcp->vlanf) { rxcp->vtm = AMAP_GET_BITS(struct amap_eth_rx_compl_v1, vtm, compl); @@ -1429,7 +1429,7 @@ static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl, rxcp->pkt_type = AMAP_GET_BITS(struct amap_eth_rx_compl_v0, cast_enc, compl); rxcp->rss_hash = - AMAP_GET_BITS(struct amap_eth_rx_compl_v0, rsshash, rxcp); + AMAP_GET_BITS(struct amap_eth_rx_compl_v0, rsshash, compl); if (rxcp->vlanf) { rxcp->vtm = AMAP_GET_BITS(struct amap_eth_rx_compl_v0, vtm, compl); From 9c50c0358f31f81dbcf7fa55089865f1efd9928d Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Thu, 26 Jul 2012 01:21:24 +0000 Subject: [PATCH 18/23] ixgbe: fix panic while dumping packets on Tx hang with IOMMU This patch resolves a "BUG: unable to handle kernel paging request at ..." oops while dumping packet data. The issue occurs with IOMMU enabled due to the address provided by phys_to_virt(). This patch makes use of skb->data on Tx and the virtual address of the pages allocated for Rx. Signed-off-by: Emil Tantilov Tested-by: Phil Schmitt Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 3b6784cf134a..c709eae58c63 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -396,11 +396,10 @@ static void ixgbe_dump(struct ixgbe_adapter *adapter) pr_cont("\n"); if (netif_msg_pktdata(adapter) && - dma_unmap_len(tx_buffer, len) != 0) + tx_buffer->skb) print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 1, - phys_to_virt(dma_unmap_addr(tx_buffer, - dma)), + tx_buffer->skb->data, dma_unmap_len(tx_buffer, len), true); } @@ -474,10 +473,12 @@ rx_ring_summary: (u64)rx_buffer_info->dma, rx_buffer_info->skb); - if (netif_msg_pktdata(adapter)) { + if (netif_msg_pktdata(adapter) && + rx_buffer_info->dma) { print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 1, - phys_to_virt(rx_buffer_info->dma), + page_address(rx_buffer_info->page) + + rx_buffer_info->page_offset, ixgbe_rx_bufsz(rx_ring), true); } } From 913f53e4c8a464c46a70898c88f2291ade28c196 Mon Sep 17 00:00:00 2001 From: Andy Cress Date: Thu, 26 Jul 2012 05:59:07 +0000 Subject: [PATCH 19/23] pch_gbe: fix transmit watchdog timeout An extended ping test with 6 vlans resulted in a driver oops with a netdev transmit timeout. Fix WATCHDOG_TIMEOUT to be more like e1000e at 5 * HZ, to avoid unnecessary transmit timeouts. Signed-off-by: Andy Cress Signed-off-by: David S. Miller --- drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index 8e0e5f0fb4fd..99bba29e0553 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -35,7 +35,7 @@ const char pch_driver_version[] = DRV_VERSION; #define DSC_INIT16 0xC000 #define PCH_GBE_DMA_ALIGN 0 #define PCH_GBE_DMA_PADDING 2 -#define PCH_GBE_WATCHDOG_PERIOD (1 * HZ) /* watchdog time */ +#define PCH_GBE_WATCHDOG_PERIOD (5 * HZ) /* watchdog time */ #define PCH_GBE_COPYBREAK_DEFAULT 256 #define PCH_GBE_PCI_BAR 1 #define PCH_GBE_RESERVE_MEMORY 0x200000 /* 2MB */ From f2c31662762b9e82b9891d6b385b17f9e5ef0ed2 Mon Sep 17 00:00:00 2001 From: Andy Cress Date: Thu, 26 Jul 2012 06:00:11 +0000 Subject: [PATCH 20/23] pch_gbe: add extra clean tx This adds extra cleaning to the pch_gbe_clean_tx routine to avoid transmit timeouts on some BCM PHYs that have different timing. Also update the DRV_VERSION to 1.01, and show it. Signed-off-by: Andy Cress Signed-off-by: David S. Miller --- .../ethernet/oki-semi/pch_gbe/pch_gbe_main.c | 59 ++++++++++++++----- 1 file changed, 45 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index 99bba29e0553..1bb4cd14b149 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -26,7 +26,7 @@ #include #endif -#define DRV_VERSION "1.00" +#define DRV_VERSION "1.01" const char pch_driver_version[] = DRV_VERSION; #define PCI_DEVICE_ID_INTEL_IOH1_GBE 0x8802 /* Pci device ID */ @@ -1579,7 +1579,8 @@ pch_gbe_clean_tx(struct pch_gbe_adapter *adapter, struct sk_buff *skb; unsigned int i; unsigned int cleaned_count = 0; - bool cleaned = true; + bool cleaned = false; + int unused, thresh; pr_debug("next_to_clean : %d\n", tx_ring->next_to_clean); @@ -1588,10 +1589,36 @@ pch_gbe_clean_tx(struct pch_gbe_adapter *adapter, pr_debug("gbec_status:0x%04x dma_status:0x%04x\n", tx_desc->gbec_status, tx_desc->dma_status); + unused = PCH_GBE_DESC_UNUSED(tx_ring); + thresh = tx_ring->count - PCH_GBE_TX_WEIGHT; + if ((tx_desc->gbec_status == DSC_INIT16) && (unused < thresh)) + { /* current marked clean, tx queue filling up, do extra clean */ + int j, k; + if (unused < 8) { /* tx queue nearly full */ + pr_debug("clean_tx: transmit queue warning (%x,%x) unused=%d\n", + tx_ring->next_to_clean,tx_ring->next_to_use,unused); + } + + /* current marked clean, scan for more that need cleaning. */ + k = i; + for (j = 0; j < PCH_GBE_TX_WEIGHT; j++) + { + tx_desc = PCH_GBE_TX_DESC(*tx_ring, k); + if (tx_desc->gbec_status != DSC_INIT16) break; /*found*/ + if (++k >= tx_ring->count) k = 0; /*increment, wrap*/ + } + if (j < PCH_GBE_TX_WEIGHT) { + pr_debug("clean_tx: unused=%d loops=%d found tx_desc[%x,%x:%x].gbec_status=%04x\n", + unused,j, i,k, tx_ring->next_to_use, tx_desc->gbec_status); + i = k; /*found one to clean, usu gbec_status==2000.*/ + } + } + while ((tx_desc->gbec_status & DSC_INIT16) == 0x0000) { pr_debug("gbec_status:0x%04x\n", tx_desc->gbec_status); buffer_info = &tx_ring->buffer_info[i]; skb = buffer_info->skb; + cleaned = true; if ((tx_desc->gbec_status & PCH_GBE_TXD_GMAC_STAT_ABT)) { adapter->stats.tx_aborted_errors++; @@ -1639,18 +1666,21 @@ pch_gbe_clean_tx(struct pch_gbe_adapter *adapter, } pr_debug("called pch_gbe_unmap_and_free_tx_resource() %d count\n", cleaned_count); - /* Recover from running out of Tx resources in xmit_frame */ - spin_lock(&tx_ring->tx_lock); - if (unlikely(cleaned && (netif_queue_stopped(adapter->netdev)))) { - netif_wake_queue(adapter->netdev); - adapter->stats.tx_restart_count++; - pr_debug("Tx wake queue\n"); + if (cleaned_count > 0) { /*skip this if nothing cleaned*/ + /* Recover from running out of Tx resources in xmit_frame */ + spin_lock(&tx_ring->tx_lock); + if (unlikely(cleaned && (netif_queue_stopped(adapter->netdev)))) + { + netif_wake_queue(adapter->netdev); + adapter->stats.tx_restart_count++; + pr_debug("Tx wake queue\n"); + } + + tx_ring->next_to_clean = i; + + pr_debug("next_to_clean : %d\n", tx_ring->next_to_clean); + spin_unlock(&tx_ring->tx_lock); } - - tx_ring->next_to_clean = i; - - pr_debug("next_to_clean : %d\n", tx_ring->next_to_clean); - spin_unlock(&tx_ring->tx_lock); return cleaned; } @@ -2389,7 +2419,7 @@ static int pch_gbe_napi_poll(struct napi_struct *napi, int budget) pch_gbe_clean_rx(adapter, adapter->rx_ring, &work_done, budget); cleaned = pch_gbe_clean_tx(adapter, adapter->tx_ring); - if (!cleaned) + if (cleaned) work_done = budget; /* If no Tx and not enough Rx work done, * exit the polling mode @@ -2795,6 +2825,7 @@ static int __init pch_gbe_init_module(void) { int ret; + pr_info("EG20T PCH Gigabit Ethernet Driver - version %s\n",DRV_VERSION); ret = pci_register_driver(&pch_gbe_driver); if (copybreak != PCH_GBE_COPYBREAK_DEFAULT) { if (copybreak == 0) { From 4487e64de63b8e42efe5a5543871c42c5a5859d9 Mon Sep 17 00:00:00 2001 From: Andy Cress Date: Thu, 26 Jul 2012 06:01:17 +0000 Subject: [PATCH 21/23] pch_gbe: vlan skb len fix pch_gbe_xmit_frame skb->len verification was incorrect in vlan case causing bogus transfer length errors. One correction could be: offset = skb->protocol == htons(ETH_P_8021Q) ? 0 : 4; if (unlikely(skb->len > (adapter->hw.mac.max_frame_size - offset))) However, this verification is not necessary, so remove it. Signed-off-by: Andy Cress Signed-off-by: David S. Miller --- drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index 1bb4cd14b149..feb85d56c750 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -2159,13 +2159,6 @@ static int pch_gbe_xmit_frame(struct sk_buff *skb, struct net_device *netdev) struct pch_gbe_tx_ring *tx_ring = adapter->tx_ring; unsigned long flags; - if (unlikely(skb->len > (adapter->hw.mac.max_frame_size - 4))) { - pr_err("Transfer length Error: skb len: %d > max: %d\n", - skb->len, adapter->hw.mac.max_frame_size); - dev_kfree_skb_any(skb); - adapter->stats.tx_length_errors++; - return NETDEV_TX_OK; - } if (!spin_trylock_irqsave(&tx_ring->tx_lock, flags)) { /* Collision - tell upper layer to requeue */ return NETDEV_TX_LOCKED; From c6cffba4ffa26a8ffacd0bb9f3144e34f20da7de Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 26 Jul 2012 11:14:38 +0000 Subject: [PATCH 22/23] ipv4: Fix input route performance regression. With the routing cache removal we lost the "noref" code paths on input, and this can kill some routing workloads. Reinstate the noref path when we hit a cached route in the FIB nexthops. With help from Eric Dumazet. Reported-by: Alexander Duyck Signed-off-by: David S. Miller Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/route.h | 19 ++++++++++++++-- net/ipv4/arp.c | 2 +- net/ipv4/fib_semantics.c | 4 ++-- net/ipv4/ip_fragment.c | 4 ++-- net/ipv4/ip_input.c | 4 ++-- net/ipv4/route.c | 48 ++++++++++++++++++---------------------- net/ipv4/xfrm4_input.c | 4 ++-- 7 files changed, 48 insertions(+), 37 deletions(-) diff --git a/include/net/route.h b/include/net/route.h index c29ef2733f2d..8c52bc6f1c90 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -157,8 +158,22 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4 return ip_route_output_key(net, fl4); } -extern int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, - u8 tos, struct net_device *devin); +extern int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src, + u8 tos, struct net_device *devin); + +static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, + u8 tos, struct net_device *devin) +{ + int err; + + rcu_read_lock(); + err = ip_route_input_noref(skb, dst, src, tos, devin); + if (!err) + skb_dst_force(skb); + rcu_read_unlock(); + + return err; +} extern void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, int oif, u32 mark, u8 protocol, int flow_flags); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index a0124eb7dbea..77e87aff419a 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -827,7 +827,7 @@ static int arp_process(struct sk_buff *skb) } if (arp->ar_op == htons(ARPOP_REQUEST) && - ip_route_input(skb, tip, sip, 0, dev) == 0) { + ip_route_input_noref(skb, tip, sip, 0, dev) == 0) { rt = skb_rtable(skb); addr_type = rt->rt_type; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index e55171f184f9..da0cc2e6b250 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -172,9 +172,9 @@ static void free_fib_info_rcu(struct rcu_head *head) if (nexthop_nh->nh_exceptions) free_nh_exceptions(nexthop_nh); if (nexthop_nh->nh_rth_output) - dst_release(&nexthop_nh->nh_rth_output->dst); + dst_free(&nexthop_nh->nh_rth_output->dst); if (nexthop_nh->nh_rth_input) - dst_release(&nexthop_nh->nh_rth_input->dst); + dst_free(&nexthop_nh->nh_rth_input->dst); } endfor_nexthops(fi); release_net(fi->fib_net); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 7ad88e5e7110..8d07c973409c 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -258,8 +258,8 @@ static void ip_expire(unsigned long arg) /* skb dst is stale, drop it, and perform route lookup again */ skb_dst_drop(head); iph = ip_hdr(head); - err = ip_route_input(head, iph->daddr, iph->saddr, - iph->tos, head->dev); + err = ip_route_input_noref(head, iph->daddr, iph->saddr, + iph->tos, head->dev); if (err) goto out_rcu_unlock; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 93134b0eab0c..bda8cac2ae91 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -339,8 +339,8 @@ static int ip_rcv_finish(struct sk_buff *skb) * how the packet travels inside Linux networking. */ if (!skb_dst(skb)) { - int err = ip_route_input(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev); + int err = ip_route_input_noref(skb, iph->daddr, iph->saddr, + iph->tos, skb->dev); if (unlikely(err)) { if (err == -EXDEV) NET_INC_STATS_BH(dev_net(skb->dev), diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 3f7bb7185c50..fc1a81ca79a7 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1199,10 +1199,9 @@ restart: fnhe->fnhe_stamp = jiffies; } -static inline void rt_release_rcu(struct rcu_head *head) +static inline void rt_free(struct rtable *rt) { - struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); - dst_release(dst); + call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); } static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) @@ -1216,9 +1215,15 @@ static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) prev = cmpxchg(p, orig, rt); if (prev == orig) { - dst_clone(&rt->dst); if (orig) - call_rcu_bh(&orig->dst.rcu_head, rt_release_rcu); + rt_free(orig); + } else { + /* Routes we intend to cache in the FIB nexthop have + * the DST_NOCACHE bit clear. However, if we are + * unsuccessful at storing this route into the cache + * we really need to set it. + */ + rt->dst.flags |= DST_NOCACHE; } } @@ -1245,7 +1250,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, #ifdef CONFIG_IP_ROUTE_CLASSID rt->dst.tclassid = nh->nh_tclassid; #endif - if (!(rt->dst.flags & DST_HOST)) + if (!(rt->dst.flags & DST_NOCACHE)) rt_cache_route(nh, rt); } @@ -1261,7 +1266,7 @@ static struct rtable *rt_dst_alloc(struct net_device *dev, bool nopolicy, bool noxfrm, bool will_cache) { return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, - (will_cache ? 0 : DST_HOST) | DST_NOCACHE | + (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) | (nopolicy ? DST_NOPOLICY : 0) | (noxfrm ? DST_NOXFRM : 0)); } @@ -1366,8 +1371,7 @@ static void ip_handle_martian_source(struct net_device *dev, static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res, struct in_device *in_dev, - __be32 daddr, __be32 saddr, u32 tos, - struct rtable **result) + __be32 daddr, __be32 saddr, u32 tos) { struct rtable *rth; int err; @@ -1418,7 +1422,7 @@ static int __mkroute_input(struct sk_buff *skb, if (!itag) { rth = FIB_RES_NH(*res).nh_rth_input; if (rt_cache_valid(rth)) { - dst_hold(&rth->dst); + skb_dst_set_noref(skb, &rth->dst); goto out; } do_cache = true; @@ -1445,8 +1449,8 @@ static int __mkroute_input(struct sk_buff *skb, rth->dst.output = ip_output; rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag); + skb_dst_set(skb, &rth->dst); out: - *result = rth; err = 0; cleanup: return err; @@ -1458,21 +1462,13 @@ static int ip_mkroute_input(struct sk_buff *skb, struct in_device *in_dev, __be32 daddr, __be32 saddr, u32 tos) { - struct rtable *rth = NULL; - int err; - #ifdef CONFIG_IP_ROUTE_MULTIPATH if (res->fi && res->fi->fib_nhs > 1) fib_select_multipath(res); #endif /* create a routing cache entry */ - err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth); - if (err) - return err; - - skb_dst_set(skb, &rth->dst); - return 0; + return __mkroute_input(skb, res, in_dev, daddr, saddr, tos); } /* @@ -1588,8 +1584,9 @@ local_input: if (!itag) { rth = FIB_RES_NH(res).nh_rth_input; if (rt_cache_valid(rth)) { - dst_hold(&rth->dst); - goto set_and_out; + skb_dst_set_noref(skb, &rth->dst); + err = 0; + goto out; } do_cache = true; } @@ -1620,7 +1617,6 @@ local_input: } if (do_cache) rt_cache_route(&FIB_RES_NH(res), rth); -set_and_out: skb_dst_set(skb, &rth->dst); err = 0; goto out; @@ -1658,8 +1654,8 @@ martian_source_keep_err: goto out; } -int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev) +int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, + u8 tos, struct net_device *dev) { int res; @@ -1702,7 +1698,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, rcu_read_unlock(); return res; } -EXPORT_SYMBOL(ip_route_input); +EXPORT_SYMBOL(ip_route_input_noref); /* called with rcu_read_lock() */ static struct rtable *__mkroute_output(const struct fib_result *res, diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 58d23a572509..06814b6216dc 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -27,8 +27,8 @@ static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb) if (skb_dst(skb) == NULL) { const struct iphdr *iph = ip_hdr(skb); - if (ip_route_input(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev)) + if (ip_route_input_noref(skb, iph->daddr, iph->saddr, + iph->tos, skb->dev)) goto drop; } return dst_input(skb); From c7109986db3c945f50ceed884a30e0fd8af3b89b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Jul 2012 12:18:11 +0000 Subject: [PATCH 23/23] ipv6: Early TCP socket demux This is the IPv6 missing bits for infrastructure added in commit 41063e9dd1195 (ipv4: Early TCP socket demux.) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet6_hashtables.h | 13 ++++++------ include/net/protocol.h | 2 ++ net/ipv4/ip_input.c | 1 + net/ipv6/ip6_input.c | 13 ++++++++++-- net/ipv6/tcp_ipv6.c | 38 ++++++++++++++++++++++++++++++++++ 5 files changed, 59 insertions(+), 8 deletions(-) diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 00cbb4384c79..9e34c877a770 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -96,14 +96,15 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, const __be16 sport, const __be16 dport) { - struct sock *sk; + struct sock *sk = skb_steal_sock(skb); - if (unlikely(sk = skb_steal_sock(skb))) + if (sk) return sk; - else return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, - &ipv6_hdr(skb)->saddr, sport, - &ipv6_hdr(skb)->daddr, ntohs(dport), - inet6_iif(skb)); + + return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, + &ipv6_hdr(skb)->saddr, sport, + &ipv6_hdr(skb)->daddr, ntohs(dport), + inet6_iif(skb)); } extern struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, diff --git a/include/net/protocol.h b/include/net/protocol.h index 057f2d315567..929528c73fe8 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -52,6 +52,8 @@ struct net_protocol { #if IS_ENABLED(CONFIG_IPV6) struct inet6_protocol { + void (*early_demux)(struct sk_buff *skb); + int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index bda8cac2ae91..981ff1eef28c 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -314,6 +314,7 @@ drop: } int sysctl_ip_early_demux __read_mostly = 1; +EXPORT_SYMBOL(sysctl_ip_early_demux); static int ip_rcv_finish(struct sk_buff *skb) { diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 5ab923e51af3..47975e363fcd 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -47,9 +47,18 @@ -inline int ip6_rcv_finish( struct sk_buff *skb) +int ip6_rcv_finish(struct sk_buff *skb) { - if (skb_dst(skb) == NULL) + if (sysctl_ip_early_demux && !skb_dst(skb)) { + const struct inet6_protocol *ipprot; + + rcu_read_lock(); + ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); + if (ipprot && ipprot->early_demux) + ipprot->early_demux(skb); + rcu_read_unlock(); + } + if (!skb_dst(skb)) ip6_route_input(skb); return dst_input(skb); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f49476e2d884..221224e72507 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1674,6 +1674,43 @@ do_time_wait: goto discard_it; } +static void tcp_v6_early_demux(struct sk_buff *skb) +{ + const struct ipv6hdr *hdr; + const struct tcphdr *th; + struct sock *sk; + + if (skb->pkt_type != PACKET_HOST) + return; + + if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) + return; + + hdr = ipv6_hdr(skb); + th = tcp_hdr(skb); + + if (th->doff < sizeof(struct tcphdr) / 4) + return; + + sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo, + &hdr->saddr, th->source, + &hdr->daddr, ntohs(th->dest), + inet6_iif(skb)); + if (sk) { + skb->sk = sk; + skb->destructor = sock_edemux; + if (sk->sk_state != TCP_TIME_WAIT) { + struct dst_entry *dst = sk->sk_rx_dst; + struct inet_sock *icsk = inet_sk(sk); + if (dst) + dst = dst_check(dst, 0); + if (dst && + icsk->rx_dst_ifindex == inet6_iif(skb)) + skb_dst_set_noref(skb, dst); + } + } +} + static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_obj_size = sizeof(struct tcp6_timewait_sock), .twsk_unique = tcp_twsk_unique, @@ -1984,6 +2021,7 @@ struct proto tcpv6_prot = { }; static const struct inet6_protocol tcpv6_protocol = { + .early_demux = tcp_v6_early_demux, .handler = tcp_v6_rcv, .err_handler = tcp_v6_err, .gso_send_check = tcp_v6_gso_send_check,