From 8536aa06f7d7d0eaed112b869ea07cba75eb05d8 Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Tue, 19 Jul 2016 10:56:17 +0300 Subject: [PATCH 01/65] fsl/fman: split lines over 80 characters Signed-off-by: Madalin Bucur --- drivers/net/ethernet/freescale/fman/fman_muram.c | 3 ++- drivers/net/ethernet/freescale/fman/fman_muram.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/fman/fman_muram.c b/drivers/net/ethernet/freescale/fman/fman_muram.c index 47394c45b6e8..5ec94d243da0 100644 --- a/drivers/net/ethernet/freescale/fman/fman_muram.c +++ b/drivers/net/ethernet/freescale/fman/fman_muram.c @@ -150,7 +150,8 @@ unsigned long fman_muram_alloc(struct muram_info *muram, size_t size) * * Free an allocated memory from FM-MURAM partition. */ -void fman_muram_free_mem(struct muram_info *muram, unsigned long offset, size_t size) +void fman_muram_free_mem(struct muram_info *muram, unsigned long offset, + size_t size) { unsigned long addr = fman_muram_offset_to_vbase(muram, offset); diff --git a/drivers/net/ethernet/freescale/fman/fman_muram.h b/drivers/net/ethernet/freescale/fman/fman_muram.h index 889649ad8931..453bf849eee1 100644 --- a/drivers/net/ethernet/freescale/fman/fman_muram.h +++ b/drivers/net/ethernet/freescale/fman/fman_muram.h @@ -46,6 +46,7 @@ unsigned long fman_muram_offset_to_vbase(struct muram_info *muram, unsigned long fman_muram_alloc(struct muram_info *muram, size_t size); -void fman_muram_free_mem(struct muram_info *muram, unsigned long offset, size_t size); +void fman_muram_free_mem(struct muram_info *muram, unsigned long offset, + size_t size); #endif /* __FM_MURAM_EXT */ From 29c4684e76193bc318305b5d8ebe40a4141f029e Mon Sep 17 00:00:00 2001 From: Igal Liberman Date: Sat, 9 Jan 2016 23:16:33 +0200 Subject: [PATCH 02/65] fsl/fman: fix loadable module compilation Signed-off-by: Igal Liberman --- drivers/net/ethernet/freescale/fman/Makefile | 10 +++--- drivers/net/ethernet/freescale/fman/fman.c | 35 +++++++++++++++++-- .../net/ethernet/freescale/fman/fman_port.c | 23 +++++++++++- drivers/net/ethernet/freescale/fman/fman_sp.c | 3 ++ 4 files changed, 64 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/freescale/fman/Makefile b/drivers/net/ethernet/freescale/fman/Makefile index 51fd2e6c1b84..60491779e49f 100644 --- a/drivers/net/ethernet/freescale/fman/Makefile +++ b/drivers/net/ethernet/freescale/fman/Makefile @@ -1,7 +1,9 @@ subdir-ccflags-y += -I$(srctree)/drivers/net/ethernet/freescale/fman -obj-y += fsl_fman.o fsl_fman_mac.o fsl_mac.o +obj-$(CONFIG_FSL_FMAN) += fsl_fman.o +obj-$(CONFIG_FSL_FMAN) += fsl_fman_port.o +obj-$(CONFIG_FSL_FMAN) += fsl_mac.o -fsl_fman-objs := fman_muram.o fman.o fman_sp.o fman_port.o -fsl_fman_mac-objs := fman_dtsec.o fman_memac.o fman_tgec.o -fsl_mac-objs += mac.o +fsl_fman-objs := fman_muram.o fman.o fman_sp.o +fsl_fman_port-objs := fman_port.o +fsl_mac-objs:= mac.o fman_dtsec.o fman_memac.o fman_tgec.o diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c index 1de2e1e51c2b..ef5f22862ef3 100644 --- a/drivers/net/ethernet/freescale/fman/fman.c +++ b/drivers/net/ethernet/freescale/fman/fman.c @@ -2115,6 +2115,7 @@ void fman_register_intr(struct fman *fman, enum fman_event_modules module, fman->intr_mng[event].isr_cb = isr_cb; fman->intr_mng[event].src_handle = src_arg; } +EXPORT_SYMBOL(fman_register_intr); /** * fman_unregister_intr @@ -2138,6 +2139,7 @@ void fman_unregister_intr(struct fman *fman, enum fman_event_modules module, fman->intr_mng[event].isr_cb = NULL; fman->intr_mng[event].src_handle = NULL; } +EXPORT_SYMBOL(fman_unregister_intr); /** * fman_set_port_params @@ -2241,6 +2243,7 @@ return_err: spin_unlock_irqrestore(&fman->spinlock, flags); return err; } +EXPORT_SYMBOL(fman_set_port_params); /** * fman_reset_mac @@ -2310,6 +2313,7 @@ int fman_reset_mac(struct fman *fman, u8 mac_id) return 0; } +EXPORT_SYMBOL(fman_reset_mac); /** * fman_set_mac_max_frame @@ -2337,6 +2341,7 @@ int fman_set_mac_max_frame(struct fman *fman, u8 mac_id, u16 mfl) } return 0; } +EXPORT_SYMBOL(fman_set_mac_max_frame); /** * fman_get_clock_freq @@ -2363,6 +2368,7 @@ u32 fman_get_bmi_max_fifo_size(struct fman *fman) { return fman->state->bmi_max_fifo_size; } +EXPORT_SYMBOL(fman_get_bmi_max_fifo_size); /** * fman_get_revision @@ -2384,6 +2390,7 @@ void fman_get_revision(struct fman *fman, struct fman_rev_info *rev_info) FPM_REV1_MAJOR_SHIFT); rev_info->minor = tmp & FPM_REV1_MINOR_MASK; } +EXPORT_SYMBOL(fman_get_revision); /** * fman_get_qman_channel_id @@ -2419,6 +2426,7 @@ u32 fman_get_qman_channel_id(struct fman *fman, u32 port_id) return fman->state->qman_channel_base + i; } +EXPORT_SYMBOL(fman_get_qman_channel_id); /** * fman_get_mem_region @@ -2432,6 +2440,7 @@ struct resource *fman_get_mem_region(struct fman *fman) { return fman->state->res; } +EXPORT_SYMBOL(fman_get_mem_region); /* Bootargs defines */ /* Extra headroom for RX buffers - Default, min and max */ @@ -2538,6 +2547,7 @@ struct fman *fman_bind(struct device *fm_dev) { return (struct fman *)(dev_get_drvdata(get_device(fm_dev))); } +EXPORT_SYMBOL(fman_bind); static irqreturn_t fman_err_irq(int irq, void *handle) { @@ -2930,7 +2940,7 @@ static const struct of_device_id fman_match[] = { {} }; -MODULE_DEVICE_TABLE(of, fm_match); +MODULE_DEVICE_TABLE(of, fman_match); static struct platform_driver fman_driver = { .driver = { @@ -2940,4 +2950,25 @@ static struct platform_driver fman_driver = { .probe = fman_probe, }; -builtin_platform_driver(fman_driver); +static int __init fman_load(void) +{ + int err; + + pr_debug("FSL DPAA FMan driver\n"); + + err = platform_driver_register(&fman_driver); + if (err < 0) + pr_err("Error, platform_driver_register() = %d\n", err); + + return err; +} +module_init(fman_load); + +static void __exit fman_unload(void) +{ + platform_driver_unregister(&fman_driver); +} +module_exit(fman_unload); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("Freescale DPAA Frame Manager driver"); diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c index 70c198d072dc..6de808e2593a 100644 --- a/drivers/net/ethernet/freescale/fman/fman_port.c +++ b/drivers/net/ethernet/freescale/fman/fman_port.c @@ -1775,4 +1775,25 @@ static struct platform_driver fman_port_driver = { .probe = fman_port_probe, }; -builtin_platform_driver(fman_port_driver); +static int __init fman_port_load(void) +{ + int err; + + pr_debug("FSL DPAA FMan driver\n"); + + err = platform_driver_register(&fman_port_driver); + if (err < 0) + pr_err("Error, platform_driver_register() = %d\n", err); + + return err; +} +module_init(fman_port_load); + +static void __exit fman_port_unload(void) +{ + platform_driver_unregister(&fman_port_driver); +} +module_exit(fman_port_unload); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("Freescale DPAA Frame Manager Port driver"); diff --git a/drivers/net/ethernet/freescale/fman/fman_sp.c b/drivers/net/ethernet/freescale/fman/fman_sp.c index f9e7aa385cba..248f5bcca468 100644 --- a/drivers/net/ethernet/freescale/fman/fman_sp.c +++ b/drivers/net/ethernet/freescale/fman/fman_sp.c @@ -80,6 +80,7 @@ void fman_sp_set_buf_pools_in_asc_order_of_buf_sizes(struct fman_ext_pools } } } +EXPORT_SYMBOL(fman_sp_set_buf_pools_in_asc_order_of_buf_sizes); int fman_sp_build_buffer_struct(struct fman_sp_int_context_data_copy * int_context_data_copy, @@ -164,3 +165,5 @@ int fman_sp_build_buffer_struct(struct fman_sp_int_context_data_copy * return 0; } +EXPORT_SYMBOL(fman_sp_build_buffer_struct); + From 5df6f7fa47e0306bdbb94ac7f3545697ff92d7d2 Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Tue, 22 Mar 2016 10:27:16 +0200 Subject: [PATCH 03/65] fsl/fman: small fixes Make module params static, proper NULL checks, remove __iomem label when misused. Signed-off-by: Madalin Bucur --- drivers/net/ethernet/freescale/fman/fman.c | 6 +++--- drivers/net/ethernet/freescale/fman/fman_port.c | 8 +++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c index ef5f22862ef3..fb2574878958 100644 --- a/drivers/net/ethernet/freescale/fman/fman.c +++ b/drivers/net/ethernet/freescale/fman/fman.c @@ -2462,7 +2462,7 @@ EXPORT_SYMBOL(fman_get_mem_region); * particular forwarding scenarios that add extra headers to the * forwarded frame. */ -int fsl_fm_rx_extra_headroom = FSL_FM_RX_EXTRA_HEADROOM; +static int fsl_fm_rx_extra_headroom = FSL_FM_RX_EXTRA_HEADROOM; module_param(fsl_fm_rx_extra_headroom, int, 0); MODULE_PARM_DESC(fsl_fm_rx_extra_headroom, "Extra headroom for Rx buffers"); @@ -2475,7 +2475,7 @@ MODULE_PARM_DESC(fsl_fm_rx_extra_headroom, "Extra headroom for Rx buffers"); * Could be overridden once, at boot-time, via the * fm_set_max_frm() callback. */ -int fsl_fm_max_frm = FSL_FM_MAX_FRAME_SIZE; +static int fsl_fm_max_frm = FSL_FM_MAX_FRAME_SIZE; module_param(fsl_fm_max_frm, int, 0); MODULE_PARM_DESC(fsl_fm_max_frm, "Maximum frame size, across all interfaces"); @@ -2868,7 +2868,7 @@ static struct fman *read_dts_node(struct platform_device *of_dev) fman->dts_params.base_addr = devm_ioremap(&of_dev->dev, phys_base_addr, mem_size); - if (fman->dts_params.base_addr == 0) { + if (!fman->dts_params.base_addr) { dev_err(&of_dev->dev, "%s: devm_ioremap() failed\n", __func__); goto fman_free; } diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c index 6de808e2593a..8b043e7b070b 100644 --- a/drivers/net/ethernet/freescale/fman/fman_port.c +++ b/drivers/net/ethernet/freescale/fman/fman_port.c @@ -1477,7 +1477,8 @@ EXPORT_SYMBOL(fman_port_cfg_buf_prefix_content); */ int fman_port_disable(struct fman_port *port) { - u32 __iomem *bmi_cfg_reg, *bmi_status_reg, tmp; + u32 __iomem *bmi_cfg_reg, *bmi_status_reg; + u32 tmp; bool rx_port, failure = false; int count; @@ -1553,7 +1554,8 @@ EXPORT_SYMBOL(fman_port_disable); */ int fman_port_enable(struct fman_port *port) { - u32 __iomem *bmi_cfg_reg, tmp; + u32 __iomem *bmi_cfg_reg; + u32 tmp; bool rx_port; if (!is_init_done(port->cfg)) @@ -1743,7 +1745,7 @@ static int fman_port_probe(struct platform_device *of_dev) port->dts_params.base_addr = devm_ioremap(port->dev, res.start, resource_size(&res)); - if (port->dts_params.base_addr == 0) + if (!port->dts_params.base_addr) dev_err(port->dev, "%s: devm_ioremap() failed\n", __func__); dev_set_drvdata(&of_dev->dev, port); From 6fa8519274db638fff104fffeadbffc089499244 Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Thu, 7 Apr 2016 12:50:16 +0300 Subject: [PATCH 04/65] fsl/fman: use of_get_phy_mode() Signed-off-by: Madalin Bucur --- drivers/net/ethernet/freescale/fman/mac.c | 33 ++--------------------- 1 file changed, 2 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index e33d9d24c1db..f94fad7884df 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -583,31 +583,6 @@ static void setup_memac(struct mac_device *mac_dev) static DEFINE_MUTEX(eth_lock); -static const char phy_str[][11] = { - [PHY_INTERFACE_MODE_MII] = "mii", - [PHY_INTERFACE_MODE_GMII] = "gmii", - [PHY_INTERFACE_MODE_SGMII] = "sgmii", - [PHY_INTERFACE_MODE_TBI] = "tbi", - [PHY_INTERFACE_MODE_RMII] = "rmii", - [PHY_INTERFACE_MODE_RGMII] = "rgmii", - [PHY_INTERFACE_MODE_RGMII_ID] = "rgmii-id", - [PHY_INTERFACE_MODE_RGMII_RXID] = "rgmii-rxid", - [PHY_INTERFACE_MODE_RGMII_TXID] = "rgmii-txid", - [PHY_INTERFACE_MODE_RTBI] = "rtbi", - [PHY_INTERFACE_MODE_XGMII] = "xgmii" -}; - -static phy_interface_t __pure __attribute__((nonnull)) str2phy(const char *str) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(phy_str); i++) - if (strcmp(str, phy_str[i]) == 0) - return (phy_interface_t)i; - - return PHY_INTERFACE_MODE_MII; -} - static const u16 phy2speed[] = { [PHY_INTERFACE_MODE_MII] = SPEED_100, [PHY_INTERFACE_MODE_GMII] = SPEED_1000, @@ -686,7 +661,6 @@ static int mac_probe(struct platform_device *_of_dev) struct resource res; struct mac_priv_s *priv; const u8 *mac_addr; - const char *char_prop; const u32 *u32_prop; u8 fman_id; @@ -870,15 +844,12 @@ static int mac_probe(struct platform_device *_of_dev) } /* Get the PHY connection type */ - char_prop = (const char *)of_get_property(mac_node, - "phy-connection-type", NULL); - if (!char_prop) { + priv->phy_if = of_get_phy_mode(mac_node); + if (priv->phy_if < 0) { dev_warn(dev, "of_get_property(%s, phy-connection-type) failed. Defaulting to MII\n", mac_node->full_name); priv->phy_if = PHY_INTERFACE_MODE_MII; - } else { - priv->phy_if = str2phy(char_prop); } priv->speed = phy2speed[priv->phy_if]; From 537a31658f8a01d635eb628eff5895672ac03981 Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Mon, 16 May 2016 16:57:14 +0300 Subject: [PATCH 05/65] fsl/fman: simplify device tree reads Signed-off-by: Madalin Bucur --- drivers/net/ethernet/freescale/fman/fman.c | 30 ++++++++----------- .../net/ethernet/freescale/fman/fman_port.c | 24 +++++---------- drivers/net/ethernet/freescale/fman/mac.c | 24 +++++++-------- 3 files changed, 30 insertions(+), 48 deletions(-) diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c index fb2574878958..2278bbd6bdfe 100644 --- a/drivers/net/ethernet/freescale/fman/fman.c +++ b/drivers/net/ethernet/freescale/fman/fman.c @@ -2737,8 +2737,8 @@ static struct fman *read_dts_node(struct platform_device *of_dev) struct fman *fman; struct device_node *fm_node, *muram_node; struct resource *res; - const u32 *u32_prop; - int lenp, err, irq; + u32 val, range[2]; + int err, irq; struct clk *clk; u32 clk_rate; phys_addr_t phys_base_addr; @@ -2750,16 +2750,13 @@ static struct fman *read_dts_node(struct platform_device *of_dev) fm_node = of_node_get(of_dev->dev.of_node); - u32_prop = (const u32 *)of_get_property(fm_node, "cell-index", &lenp); - if (!u32_prop) { - dev_err(&of_dev->dev, "%s: of_get_property(%s, cell-index) failed\n", + err = of_property_read_u32(fm_node, "cell-index", &val); + if (err) { + dev_err(&of_dev->dev, "%s: failed to read cell-index for %s\n", __func__, fm_node->full_name); goto fman_node_put; } - if (WARN_ON(lenp != sizeof(u32))) - goto fman_node_put; - - fman->dts_params.id = (u8)fdt32_to_cpu(u32_prop[0]); + fman->dts_params.id = (u8)val; /* Get the FM interrupt */ res = platform_get_resource(of_dev, IORESOURCE_IRQ, 0); @@ -2806,18 +2803,15 @@ static struct fman *read_dts_node(struct platform_device *of_dev) /* Rounding to MHz */ fman->dts_params.clk_freq = DIV_ROUND_UP(clk_rate, 1000000); - u32_prop = (const u32 *)of_get_property(fm_node, - "fsl,qman-channel-range", - &lenp); - if (!u32_prop) { - dev_err(&of_dev->dev, "%s: of_get_property(%s, fsl,qman-channel-range) failed\n", + err = of_property_read_u32_array(fm_node, "fsl,qman-channel-range", + &range[0], 2); + if (err) { + dev_err(&of_dev->dev, "%s: failed to read fsl,qman-channel-range for %s\n", __func__, fm_node->full_name); goto fman_node_put; } - if (WARN_ON(lenp != sizeof(u32) * 2)) - goto fman_node_put; - fman->dts_params.qman_channel_base = fdt32_to_cpu(u32_prop[0]); - fman->dts_params.num_of_qman_channels = fdt32_to_cpu(u32_prop[1]); + fman->dts_params.qman_channel_base = range[0]; + fman->dts_params.num_of_qman_channels = range[1]; /* Get the MURAM base address and size */ muram_node = of_find_matching_node(fm_node, fman_muram_match); diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c index 8b043e7b070b..9f3bb50a2365 100644 --- a/drivers/net/ethernet/freescale/fman/fman_port.c +++ b/drivers/net/ethernet/freescale/fman/fman_port.c @@ -1625,7 +1625,7 @@ static int fman_port_probe(struct platform_device *of_dev) struct device_node *fm_node, *port_node; struct resource res; struct resource *dev_res; - const u32 *u32_prop; + u32 val; int err = 0, lenp; enum fman_port_type port_type; u16 port_speed; @@ -1654,28 +1654,20 @@ static int fman_port_probe(struct platform_device *of_dev) goto return_err; } - u32_prop = (const u32 *)of_get_property(port_node, "cell-index", &lenp); - if (!u32_prop) { - dev_err(port->dev, "%s: of_get_property(%s, cell-index) failed\n", + err = of_property_read_u32(port_node, "cell-index", &val); + if (err) { + dev_err(port->dev, "%s: reading cell-index for %s failed\n", __func__, port_node->full_name); err = -EINVAL; goto return_err; } - if (WARN_ON(lenp != sizeof(u32))) { - err = -EINVAL; - goto return_err; - } - port_id = (u8)fdt32_to_cpu(u32_prop[0]); - + port_id = (u8)val; port->dts_params.id = port_id; if (of_device_is_compatible(port_node, "fsl,fman-v3-port-tx")) { port_type = FMAN_PORT_TYPE_TX; port_speed = 1000; - u32_prop = (const u32 *)of_get_property(port_node, - "fsl,fman-10g-port", - &lenp); - if (u32_prop) + if (of_find_property(port_node, "fsl,fman-10g-port", &lenp)) port_speed = 10000; } else if (of_device_is_compatible(port_node, "fsl,fman-v2-port-tx")) { @@ -1688,9 +1680,7 @@ static int fman_port_probe(struct platform_device *of_dev) } else if (of_device_is_compatible(port_node, "fsl,fman-v3-port-rx")) { port_type = FMAN_PORT_TYPE_RX; port_speed = 1000; - u32_prop = (const u32 *)of_get_property(port_node, - "fsl,fman-10g-port", &lenp); - if (u32_prop) + if (of_find_property(port_node, "fsl,fman-10g-port", &lenp)) port_speed = 10000; } else if (of_device_is_compatible(port_node, "fsl,fman-v2-port-rx")) { diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index f94fad7884df..dc04e617af8d 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -653,7 +653,7 @@ MODULE_DEVICE_TABLE(of, mac_match); static int mac_probe(struct platform_device *_of_dev) { - int err, i, lenp, nph; + int err, i, nph; struct device *dev; struct device_node *mac_node, *dev_node; struct mac_device *mac_dev; @@ -661,7 +661,7 @@ static int mac_probe(struct platform_device *_of_dev) struct resource res; struct mac_priv_s *priv; const u8 *mac_addr; - const u32 *u32_prop; + u32 val; u8 fman_id; dev = &_of_dev->dev; @@ -723,16 +723,15 @@ static int mac_probe(struct platform_device *_of_dev) } /* Get the FMan cell-index */ - u32_prop = of_get_property(dev_node, "cell-index", &lenp); - if (!u32_prop) { - dev_err(dev, "of_get_property(%s, cell-index) failed\n", + err = of_property_read_u32(dev_node, "cell-index", &val); + if (err) { + dev_err(dev, "failed to read cell-index for %s\n", dev_node->full_name); err = -EINVAL; goto _return_of_node_put; } - WARN_ON(lenp != sizeof(u32)); /* cell-index 0 => FMan id 1 */ - fman_id = (u8)(fdt32_to_cpu(u32_prop[0]) + 1); + fman_id = (u8)(val + 1); priv->fman = fman_bind(&of_dev->dev); if (!priv->fman) { @@ -779,15 +778,14 @@ static int mac_probe(struct platform_device *_of_dev) } /* Get the cell-index */ - u32_prop = of_get_property(mac_node, "cell-index", &lenp); - if (!u32_prop) { - dev_err(dev, "of_get_property(%s, cell-index) failed\n", + err = of_property_read_u32(mac_node, "cell-index", &val); + if (err) { + dev_err(dev, "failed to read cell-index for %s\n", mac_node->full_name); err = -EINVAL; goto _return_dev_set_drvdata; } - WARN_ON(lenp != sizeof(u32)); - priv->cell_index = (u8)fdt32_to_cpu(u32_prop[0]); + priv->cell_index = (u8)val; /* Get the MAC address */ mac_addr = of_get_mac_address(mac_node); @@ -847,7 +845,7 @@ static int mac_probe(struct platform_device *_of_dev) priv->phy_if = of_get_phy_mode(mac_node); if (priv->phy_if < 0) { dev_warn(dev, - "of_get_property(%s, phy-connection-type) failed. Defaulting to MII\n", + "of_get_phy_mode() for %s failed. Defaulting to MII\n", mac_node->full_name); priv->phy_if = PHY_INTERFACE_MODE_MII; } From 73c364e110015690f13336544a51aab685bd5f23 Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Tue, 24 May 2016 16:33:54 +0300 Subject: [PATCH 06/65] fsl/fman: return a phy_dev pointer from init Signed-off-by: Madalin Bucur --- drivers/net/ethernet/freescale/fman/mac.c | 22 +++++++++++----------- drivers/net/ethernet/freescale/fman/mac.h | 3 ++- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index dc04e617af8d..e4378c2c1e6a 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -469,9 +469,9 @@ static void adjust_link_memac(struct net_device *net_dev) /* Initializes driver's PHY state, and attaches to the PHY. * Returns 0 on success. */ -static int init_phy(struct net_device *net_dev, - struct mac_device *mac_dev, - void (*adj_lnk)(struct net_device *)) +static struct phy_device *init_phy(struct net_device *net_dev, + struct mac_device *mac_dev, + void (*adj_lnk)(struct net_device *)) { struct phy_device *phy_dev; struct mac_priv_s *priv = mac_dev->priv; @@ -480,7 +480,7 @@ static int init_phy(struct net_device *net_dev, priv->phy_if); if (!phy_dev) { netdev_err(net_dev, "Could not connect to PHY\n"); - return -ENODEV; + return NULL; } /* Remove any features not supported by the controller */ @@ -493,23 +493,23 @@ static int init_phy(struct net_device *net_dev, mac_dev->phy_dev = phy_dev; - return 0; + return phy_dev; } -static int dtsec_init_phy(struct net_device *net_dev, - struct mac_device *mac_dev) +static struct phy_device *dtsec_init_phy(struct net_device *net_dev, + struct mac_device *mac_dev) { return init_phy(net_dev, mac_dev, &adjust_link_dtsec); } -static int tgec_init_phy(struct net_device *net_dev, - struct mac_device *mac_dev) +static struct phy_device *tgec_init_phy(struct net_device *net_dev, + struct mac_device *mac_dev) { return init_phy(net_dev, mac_dev, adjust_link_void); } -static int memac_init_phy(struct net_device *net_dev, - struct mac_device *mac_dev) +static struct phy_device *memac_init_phy(struct net_device *net_dev, + struct mac_device *mac_dev) { return init_phy(net_dev, mac_dev, &adjust_link_memac); } diff --git a/drivers/net/ethernet/freescale/fman/mac.h b/drivers/net/ethernet/freescale/fman/mac.h index 0211cc9a46d6..d7313f0c5135 100644 --- a/drivers/net/ethernet/freescale/fman/mac.h +++ b/drivers/net/ethernet/freescale/fman/mac.h @@ -58,7 +58,8 @@ struct mac_device { bool tx_pause_active; bool promisc; - int (*init_phy)(struct net_device *net_dev, struct mac_device *mac_dev); + struct phy_device *(*init_phy)(struct net_device *net_dev, + struct mac_device *mac_dev); int (*init)(struct mac_device *mac_dev); int (*start)(struct mac_device *mac_dev); int (*stop)(struct mac_device *mac_dev); From 44045e45abbda3e6db7ca0ef3e460e6ed03419c9 Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Fri, 24 Jun 2016 12:25:05 +0300 Subject: [PATCH 07/65] fsl/fman: MEMAC may use QSGMII PHY interface mode Signed-off-by: Madalin Bucur --- drivers/net/ethernet/freescale/fman/fman_memac.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c index 45e98fd8b79e..96dfe7eb4e5f 100644 --- a/drivers/net/ethernet/freescale/fman/fman_memac.c +++ b/drivers/net/ethernet/freescale/fman/fman_memac.c @@ -1151,7 +1151,8 @@ struct fman_mac *memac_config(struct fman_mac_params *params) /* Save FMan revision */ fman_get_revision(memac->fm, &memac->fm_rev_info); - if (memac->phy_if == PHY_INTERFACE_MODE_SGMII) { + if (memac->phy_if == PHY_INTERFACE_MODE_SGMII || + memac->phy_if == PHY_INTERFACE_MODE_QSGMII) { if (!params->internal_phy_node) { pr_err("PCS PHY node is not available\n"); memac_free(memac); From 47256192c65b786ea9dbebf47f45b938a02e3914 Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Tue, 28 Jun 2016 15:32:44 +0300 Subject: [PATCH 08/65] fsl/fman: check pcsphy pointer before use Signed-off-by: Madalin Bucur --- drivers/net/ethernet/freescale/fman/fman_memac.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c index 96dfe7eb4e5f..53ef51e3bd9e 100644 --- a/drivers/net/ethernet/freescale/fman/fman_memac.c +++ b/drivers/net/ethernet/freescale/fman/fman_memac.c @@ -507,6 +507,9 @@ static void setup_sgmii_internal_phy(struct fman_mac *memac, { u16 tmp_reg16; + if (WARN_ON(!memac->pcsphy)) + return; + /* SGMII mode */ tmp_reg16 = IF_MODE_SGMII_EN; if (!fixed_link) From 604104fc549a32ae928435a48de6761af13836ea Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Thu, 30 Jun 2016 16:48:05 +0300 Subject: [PATCH 09/65] fsl/fman: check of_get_phy_mode() return value For unknown compatibles avoid crashing and default to SGMII. Signed-off-by: Madalin Bucur --- drivers/net/ethernet/freescale/fman/mac.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index e4378c2c1e6a..8fe6b3e253fa 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -663,6 +663,7 @@ static int mac_probe(struct platform_device *_of_dev) const u8 *mac_addr; u32 val; u8 fman_id; + int phy_if; dev = &_of_dev->dev; mac_node = dev->of_node; @@ -842,13 +843,14 @@ static int mac_probe(struct platform_device *_of_dev) } /* Get the PHY connection type */ - priv->phy_if = of_get_phy_mode(mac_node); - if (priv->phy_if < 0) { + phy_if = of_get_phy_mode(mac_node); + if (phy_if < 0) { dev_warn(dev, - "of_get_phy_mode() for %s failed. Defaulting to MII\n", + "of_get_phy_mode() for %s failed. Defaulting to SGMII\n", mac_node->full_name); - priv->phy_if = PHY_INTERFACE_MODE_MII; + phy_if = PHY_INTERFACE_MODE_SGMII; } + priv->phy_if = phy_if; priv->speed = phy2speed[priv->phy_if]; priv->max_speed = priv->speed; From 73912d51d6c80096e95d29118b48c53393890bf2 Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Thu, 7 Jul 2016 15:25:24 +0300 Subject: [PATCH 10/65] fsl/fman: simplify redundant condition Change suggested by David Binderman, thanks. Signed-off-by: Madalin Bucur --- drivers/net/ethernet/freescale/fman/fman.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c index 2278bbd6bdfe..1fc10493a6b1 100644 --- a/drivers/net/ethernet/freescale/fman/fman.c +++ b/drivers/net/ethernet/freescale/fman/fman.c @@ -2331,8 +2331,7 @@ int fman_set_mac_max_frame(struct fman *fman, u8 mac_id, u16 mfl) * or equal to the port's max */ if ((!fman->state->port_mfl[mac_id]) || - (fman->state->port_mfl[mac_id] && - (mfl <= fman->state->port_mfl[mac_id]))) { + (mfl <= fman->state->port_mfl[mac_id])) { fman->state->mac_mfl[mac_id] = mfl; } else { dev_warn(fman->dev, "%s: MAC max_frame_length is larger than Port max_frame_length\n", From 0af46590d415ee672f9056c7cda5da63e02dbebf Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Tue, 12 Jul 2016 18:08:52 +0300 Subject: [PATCH 11/65] fsl/fman: fix return value checking Signed-off-by: Madalin Bucur --- drivers/net/ethernet/freescale/fman/fman.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c index 1fc10493a6b1..dafd9e1baba2 100644 --- a/drivers/net/ethernet/freescale/fman/fman.c +++ b/drivers/net/ethernet/freescale/fman/fman.c @@ -618,7 +618,7 @@ struct fman { unsigned long cam_offset; size_t cam_size; /* Fifo in MURAM */ - int fifo_offset; + unsigned long fifo_offset; size_t fifo_size; u32 liodn_base[64]; @@ -2036,7 +2036,7 @@ static int fman_init(struct fman *fman) /* allocate MURAM for FIFO according to total size */ fman->fifo_offset = fman_muram_alloc(fman->muram, fman->state->total_fifo_size); - if (IS_ERR_VALUE(fman->cam_offset)) { + if (IS_ERR_VALUE(fman->fifo_offset)) { free_init_resources(fman); dev_err(fman->dev, "%s: MURAM alloc for BMI FIFO failed\n", __func__); From 07d8aafb3ef833fbc3890b36bf2822ce47209621 Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Wed, 14 Sep 2016 15:08:43 +0300 Subject: [PATCH 12/65] fsl/fman: remove leftover comment Signed-off-by: Madalin Bucur --- drivers/net/ethernet/freescale/fman/fman_mac.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/fman/fman_mac.h b/drivers/net/ethernet/freescale/fman/fman_mac.h index ddf0260176c9..dd6d0526f6c1 100644 --- a/drivers/net/ethernet/freescale/fman/fman_mac.h +++ b/drivers/net/ethernet/freescale/fman/fman_mac.h @@ -191,10 +191,6 @@ struct fman_mac_params { u16 max_speed; /* A handle to the FM object this port related to */ void *fm; - /* MDIO exceptions interrupt source - not valid for all - * MACs; MUST be set to 0 for MACs that don't have - * mdio-irq, or for polling - */ void *dev_id; /* device cookie used by the exception cbs */ fman_mac_exception_cb *event_cb; /* MDIO Events Callback Routine */ fman_mac_exception_cb *exception_cb;/* Exception Callback Routine */ From 2fbfadb5436a394b0aad68d21de60fe3eb9c14db Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Tue, 19 Jul 2016 11:10:37 +0300 Subject: [PATCH 13/65] MAINTAINERS: net: add entry for Freescale QorIQ DPAA FMan driver Add record for Freescale QORIQ DPAA FMan driver adding myself as maintainer. Signed-off-by: Madalin Bucur --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 669909ed6f25..3460152e80cd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4980,6 +4980,13 @@ F: drivers/net/ethernet/freescale/fec_ptp.c F: drivers/net/ethernet/freescale/fec.h F: Documentation/devicetree/bindings/net/fsl-fec.txt +FREESCALE QORIQ DPAA FMAN DRIVER +M: Madalin Bucur +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/ethernet/freescale/fman +F: Documentation/devicetree/bindings/powerpc/fsl/fman.txt + FREESCALE QUICC ENGINE LIBRARY L: linuxppc-dev@lists.ozlabs.org S: Orphan From b63452c11e22382e592d3f7f9ac4966197d3eab6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Oct 2016 08:11:48 +0100 Subject: [PATCH 14/65] rxrpc: Accesses of rxrpc_local::service need to be RCU managed struct rxrpc_local->service is marked __rcu - this means that accesses of it need to be managed using RCU wrappers. There are two such places in rxrpc_release_sock() where the value is checked and cleared. Fix this by using the appropriate wrappers. Signed-off-by: David Howells --- net/rxrpc/af_rxrpc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 44c9c2b0b190..2d59c9be40e1 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -678,9 +678,9 @@ static int rxrpc_release_sock(struct sock *sk) sk->sk_state = RXRPC_CLOSE; spin_unlock_bh(&sk->sk_receive_queue.lock); - if (rx->local && rx->local->service == rx) { + if (rx->local && rcu_access_pointer(rx->local->service) == rx) { write_lock(&rx->local->services_lock); - rx->local->service = NULL; + rcu_assign_pointer(rx->local->service, NULL); write_unlock(&rx->local->services_lock); } From 19c0dbd5406ddc669ef1516c02c6b0f5a4465628 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Oct 2016 08:11:48 +0100 Subject: [PATCH 15/65] rxrpc: Fix duplicate const Remove a duplicate const keyword. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 2 +- net/rxrpc/misc.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index d38dffd78085..4954e6e25819 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -777,7 +777,7 @@ extern const char rxrpc_congest_modes[NR__RXRPC_CONGEST_MODES][10]; extern const char rxrpc_congest_changes[rxrpc_congest__nr_change][9]; extern const char *const rxrpc_pkts[]; -extern const char const rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4]; +extern const char rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4]; #include diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 9d1c721bc4e8..804a88e28739 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -96,7 +96,7 @@ const s8 rxrpc_ack_priority[] = { [RXRPC_ACK_PING] = 9, }; -const char const rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4] = { +const char rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4] = { "---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY", "IDL", "-?-" }; From 7212a57e8eaa2572481398532d7be0c2685362b9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Oct 2016 08:11:49 +0100 Subject: [PATCH 16/65] rxrpc: Fix oops on incoming call to serviceless endpoint If an call comes in to a local endpoint that isn't listening for any incoming calls at the moment, an oops will happen. We need to check that the local endpoint's service pointer isn't NULL before we dereference it. Signed-off-by: David Howells --- net/rxrpc/call_accept.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 3cac231d8405..22cd8a18c481 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -337,7 +337,7 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, /* Get the socket providing the service */ rx = rcu_dereference(local->service); - if (service_id == rx->srx.srx_service) + if (rx && service_id == rx->srx.srx_service) goto found_service; trace_rxrpc_abort("INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, From a9f312d98affab387557e2795d4e11ad82a4e4e8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Oct 2016 08:11:49 +0100 Subject: [PATCH 17/65] rxrpc: Only ping for lost reply in client call When a reply is deemed lost, we send a ping to find out the other end received all the request data packets we sent. This should be limited to client calls and we shouldn't do this on service calls. Signed-off-by: David Howells --- net/rxrpc/input.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 3ad9f75031e3..103d2b0d4690 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -847,7 +847,8 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] & RXRPC_TX_ANNO_LAST && - summary.nr_acks == call->tx_top - hard_ack) + summary.nr_acks == call->tx_top - hard_ack && + rxrpc_is_client_call(call)) rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial, false, true, rxrpc_propose_ack_ping_for_lost_reply); From 26cb02aa6d3efeb543805ed9ad599dae24f7c6d4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Oct 2016 08:11:49 +0100 Subject: [PATCH 18/65] rxrpc: Fix warning by splitting rxrpc_send_call_packet() Split rxrpc_send_data_packet() to separate ACK generation (which is more complicated) from ABORT generation. This simplifies the code a bit and fixes the following warning: In file included from ../net/rxrpc/output.c:20:0: net/rxrpc/output.c: In function 'rxrpc_send_call_packet': net/rxrpc/ar-internal.h:1187:27: error: 'top' may be used uninitialized in this function [-Werror=maybe-uninitialized] net/rxrpc/output.c:103:24: note: 'top' was declared here net/rxrpc/output.c:225:25: error: 'hard_ack' may be used uninitialized in this function [-Werror=maybe-uninitialized] Reported-by: Arnd Bergmann Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 3 +- net/rxrpc/call_accept.c | 2 +- net/rxrpc/call_event.c | 6 +- net/rxrpc/call_object.c | 2 +- net/rxrpc/output.c | 156 ++++++++++++++++++++++------------------ net/rxrpc/recvmsg.c | 4 +- net/rxrpc/rxkad.c | 6 +- net/rxrpc/sendmsg.c | 7 +- 8 files changed, 102 insertions(+), 84 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 4954e6e25819..ef849a12a0f0 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -1068,7 +1068,8 @@ extern const s8 rxrpc_ack_priority[]; /* * output.c */ -int rxrpc_send_call_packet(struct rxrpc_call *, u8); +int rxrpc_send_ack_packet(struct rxrpc_call *); +int rxrpc_send_abort_packet(struct rxrpc_call *); int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *, bool); void rxrpc_reject_packets(struct rxrpc_local *); diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 22cd8a18c481..832d854c2d5c 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -565,7 +565,7 @@ out_discard: write_unlock_bh(&call->state_lock); write_unlock(&rx->call_lock); if (abort) { - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + rxrpc_send_abort_packet(call); rxrpc_release_call(rx, call); rxrpc_put_call(call, rxrpc_call_put); } diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 4f00476630b9..e313099860d5 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -253,7 +253,7 @@ static void rxrpc_resend(struct rxrpc_call *call, ktime_t now) goto out; rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false, rxrpc_propose_ack_ping_for_lost_ack); - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + rxrpc_send_ack_packet(call); goto out; } @@ -328,7 +328,7 @@ void rxrpc_process_call(struct work_struct *work) recheck_state: if (test_and_clear_bit(RXRPC_CALL_EV_ABORT, &call->events)) { - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + rxrpc_send_abort_packet(call); goto recheck_state; } @@ -347,7 +347,7 @@ recheck_state: if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events)) { call->ack_at = call->expire_at; if (call->ackr_reason) { - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + rxrpc_send_ack_packet(call); goto recheck_state; } } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 364b42dc3dce..07094012ac15 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -498,7 +498,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) struct rxrpc_call, sock_link); rxrpc_get_call(call, rxrpc_call_got); rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, ECONNRESET); - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + rxrpc_send_abort_packet(call); rxrpc_release_call(rx, call); rxrpc_put_call(call, rxrpc_call_put); } diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 0d47db886f6e..2dae877c0876 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -19,24 +19,24 @@ #include #include "ar-internal.h" -struct rxrpc_pkt_buffer { +struct rxrpc_ack_buffer { struct rxrpc_wire_header whdr; - union { - struct { - struct rxrpc_ackpacket ack; - u8 acks[255]; - u8 pad[3]; - }; - __be32 abort_code; - }; + struct rxrpc_ackpacket ack; + u8 acks[255]; + u8 pad[3]; struct rxrpc_ackinfo ackinfo; }; +struct rxrpc_abort_buffer { + struct rxrpc_wire_header whdr; + __be32 abort_code; +}; + /* * Fill out an ACK packet. */ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, - struct rxrpc_pkt_buffer *pkt, + struct rxrpc_ack_buffer *pkt, rxrpc_seq_t *_hard_ack, rxrpc_seq_t *_top) { @@ -91,22 +91,19 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, } /* - * Send an ACK or ABORT call packet. + * Send an ACK call packet. */ -int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) +int rxrpc_send_ack_packet(struct rxrpc_call *call) { struct rxrpc_connection *conn = NULL; - struct rxrpc_pkt_buffer *pkt; + struct rxrpc_ack_buffer *pkt; struct msghdr msg; struct kvec iov[2]; rxrpc_serial_t serial; rxrpc_seq_t hard_ack, top; size_t len, n; bool ping = false; - int ioc, ret; - u32 abort_code; - - _enter("%u,%s", call->debug_id, rxrpc_pkts[type]); + int ret; spin_lock_bh(&call->lock); if (call->conn) @@ -131,65 +128,37 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) pkt->whdr.cid = htonl(call->cid); pkt->whdr.callNumber = htonl(call->call_id); pkt->whdr.seq = 0; - pkt->whdr.type = type; - pkt->whdr.flags = conn->out_clientflag; + pkt->whdr.type = RXRPC_PACKET_TYPE_ACK; + pkt->whdr.flags = RXRPC_SLOW_START_OK | conn->out_clientflag; pkt->whdr.userStatus = 0; pkt->whdr.securityIndex = call->security_ix; pkt->whdr._rsvd = 0; pkt->whdr.serviceId = htons(call->service_id); - iov[0].iov_base = pkt; - iov[0].iov_len = sizeof(pkt->whdr); - len = sizeof(pkt->whdr); - - switch (type) { - case RXRPC_PACKET_TYPE_ACK: - spin_lock_bh(&call->lock); - if (!call->ackr_reason) { - spin_unlock_bh(&call->lock); - ret = 0; - goto out; - } - ping = (call->ackr_reason == RXRPC_ACK_PING); - n = rxrpc_fill_out_ack(call, pkt, &hard_ack, &top); - call->ackr_reason = 0; - + spin_lock_bh(&call->lock); + if (!call->ackr_reason) { spin_unlock_bh(&call->lock); - - - pkt->whdr.flags |= RXRPC_SLOW_START_OK; - - iov[0].iov_len += sizeof(pkt->ack) + n; - iov[1].iov_base = &pkt->ackinfo; - iov[1].iov_len = sizeof(pkt->ackinfo); - len += sizeof(pkt->ack) + n + sizeof(pkt->ackinfo); - ioc = 2; - break; - - case RXRPC_PACKET_TYPE_ABORT: - abort_code = call->abort_code; - pkt->abort_code = htonl(abort_code); - iov[0].iov_len += sizeof(pkt->abort_code); - len += sizeof(pkt->abort_code); - ioc = 1; - break; - - default: - BUG(); - ret = -ENOANO; + ret = 0; goto out; } + ping = (call->ackr_reason == RXRPC_ACK_PING); + n = rxrpc_fill_out_ack(call, pkt, &hard_ack, &top); + call->ackr_reason = 0; + + spin_unlock_bh(&call->lock); + + iov[0].iov_base = pkt; + iov[0].iov_len = sizeof(pkt->whdr) + sizeof(pkt->ack) + n; + iov[1].iov_base = &pkt->ackinfo; + iov[1].iov_len = sizeof(pkt->ackinfo); + len = iov[0].iov_len + iov[1].iov_len; serial = atomic_inc_return(&conn->serial); pkt->whdr.serial = htonl(serial); - switch (type) { - case RXRPC_PACKET_TYPE_ACK: - trace_rxrpc_tx_ack(call, serial, - ntohl(pkt->ack.firstPacket), - ntohl(pkt->ack.serial), - pkt->ack.reason, pkt->ack.nAcks); - break; - } + trace_rxrpc_tx_ack(call, serial, + ntohl(pkt->ack.firstPacket), + ntohl(pkt->ack.serial), + pkt->ack.reason, pkt->ack.nAcks); if (ping) { call->ackr_ping = serial; @@ -205,13 +174,12 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) set_bit(RXRPC_CALL_PINGING, &call->flags); trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_ping, serial); } - ret = kernel_sendmsg(conn->params.local->socket, - &msg, iov, ioc, len); + + ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); if (ping) call->ackr_ping_time = ktime_get_real(); - if (type == RXRPC_PACKET_TYPE_ACK && - call->state < RXRPC_CALL_COMPLETE) { + if (call->state < RXRPC_CALL_COMPLETE) { if (ret < 0) { clear_bit(RXRPC_CALL_PINGING, &call->flags); rxrpc_propose_ACK(call, pkt->ack.reason, @@ -235,6 +203,56 @@ out: return ret; } +/* + * Send an ABORT call packet. + */ +int rxrpc_send_abort_packet(struct rxrpc_call *call) +{ + struct rxrpc_connection *conn = NULL; + struct rxrpc_abort_buffer pkt; + struct msghdr msg; + struct kvec iov[1]; + rxrpc_serial_t serial; + int ret; + + spin_lock_bh(&call->lock); + if (call->conn) + conn = rxrpc_get_connection_maybe(call->conn); + spin_unlock_bh(&call->lock); + if (!conn) + return -ECONNRESET; + + msg.msg_name = &call->peer->srx.transport; + msg.msg_namelen = call->peer->srx.transport_len; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + pkt.whdr.epoch = htonl(conn->proto.epoch); + pkt.whdr.cid = htonl(call->cid); + pkt.whdr.callNumber = htonl(call->call_id); + pkt.whdr.seq = 0; + pkt.whdr.type = RXRPC_PACKET_TYPE_ABORT; + pkt.whdr.flags = conn->out_clientflag; + pkt.whdr.userStatus = 0; + pkt.whdr.securityIndex = call->security_ix; + pkt.whdr._rsvd = 0; + pkt.whdr.serviceId = htons(call->service_id); + pkt.abort_code = htonl(call->abort_code); + + iov[0].iov_base = &pkt; + iov[0].iov_len = sizeof(pkt); + + serial = atomic_inc_return(&conn->serial); + pkt.whdr.serial = htonl(serial); + + ret = kernel_sendmsg(conn->params.local->socket, + &msg, iov, 1, sizeof(pkt)); + + rxrpc_put_connection(conn); + return ret; +} + /* * send a packet through the transport endpoint */ diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index f05ea0a88076..11723bc1c783 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -143,7 +143,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, true, false, rxrpc_propose_ack_terminal_ack); - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + rxrpc_send_ack_packet(call); } write_lock_bh(&call->state_lock); @@ -212,7 +212,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) true, false, rxrpc_propose_ack_rotate_rx); if (call->ackr_reason) - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + rxrpc_send_ack_packet(call); } } diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 627abed5f999..4374e7b9c7bf 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -381,7 +381,7 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, return 0; protocol_error: - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + rxrpc_send_abort_packet(call); _leave(" = -EPROTO"); return -EPROTO; @@ -471,7 +471,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, return 0; protocol_error: - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + rxrpc_send_abort_packet(call); _leave(" = -EPROTO"); return -EPROTO; @@ -523,7 +523,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, if (cksum != expected_cksum) { rxrpc_abort_call("VCK", call, seq, RXKADSEALEDINCON, EPROTO); - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + rxrpc_send_abort_packet(call); _leave(" = -EPROTO [csum failed]"); return -EPROTO; } diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 3322543d460a..901b28ceeff4 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -197,7 +197,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, do { /* Check to see if there's a ping ACK to reply to. */ if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE) - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + rxrpc_send_ack_packet(call); if (!skb) { size_t size, chunk, max, space; @@ -514,8 +514,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) } else if (cmd == RXRPC_CMD_SEND_ABORT) { ret = 0; if (rxrpc_abort_call("CMD", call, 0, abort_code, ECONNABORTED)) - ret = rxrpc_send_call_packet(call, - RXRPC_PACKET_TYPE_ABORT); + ret = rxrpc_send_abort_packet(call); } else if (cmd != RXRPC_CMD_SEND_DATA) { ret = -EINVAL; } else if (rxrpc_is_client_call(call) && @@ -597,7 +596,7 @@ void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, lock_sock(sock->sk); if (rxrpc_abort_call(why, call, 0, abort_code, error)) - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + rxrpc_send_abort_packet(call); release_sock(sock->sk); _leave(""); From a5af7e1fc69a46f29b977fd4b570e0ac414c2338 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Oct 2016 08:11:49 +0100 Subject: [PATCH 19/65] rxrpc: Fix loss of PING RESPONSE ACK production due to PING ACKs Separate the output of PING ACKs from the output of other sorts of ACK so that if we receive a PING ACK and schedule transmission of a PING RESPONSE ACK, the response doesn't get cancelled by a PING ACK we happen to be scheduling transmission of at the same time. If a PING RESPONSE gets lost, the other side might just sit there waiting for it and refuse to proceed otherwise. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 12 ++++++++--- net/rxrpc/call_event.c | 48 +++++++++++++++++++++++++++++++++++++---- net/rxrpc/call_object.c | 1 + net/rxrpc/input.c | 4 ++-- net/rxrpc/misc.c | 2 +- net/rxrpc/output.c | 38 ++++++++++++++++++-------------- net/rxrpc/recvmsg.c | 4 ++-- net/rxrpc/sendmsg.c | 2 +- 8 files changed, 82 insertions(+), 29 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index ef849a12a0f0..b56676be07c7 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -398,6 +398,7 @@ enum rxrpc_call_flag { RXRPC_CALL_EXPOSED, /* The call was exposed to the world */ RXRPC_CALL_RX_LAST, /* Received the last packet (at rxtx_top) */ RXRPC_CALL_TX_LAST, /* Last packet in Tx buffer (at rxtx_top) */ + RXRPC_CALL_SEND_PING, /* A ping will need to be sent */ RXRPC_CALL_PINGING, /* Ping in process */ RXRPC_CALL_RETRANS_TIMEOUT, /* Retransmission due to timeout occurred */ }; @@ -410,6 +411,7 @@ enum rxrpc_call_event { RXRPC_CALL_EV_ABORT, /* need to generate abort */ RXRPC_CALL_EV_TIMER, /* Timer expired */ RXRPC_CALL_EV_RESEND, /* Tx resend required */ + RXRPC_CALL_EV_PING, /* Ping send required */ }; /* @@ -466,6 +468,7 @@ struct rxrpc_call { struct rxrpc_sock __rcu *socket; /* socket responsible */ ktime_t ack_at; /* When deferred ACK needs to happen */ ktime_t resend_at; /* When next resend needs to happen */ + ktime_t ping_at; /* When next to send a ping */ ktime_t expire_at; /* When the call times out */ struct timer_list timer; /* Combined event timer */ struct work_struct processor; /* Event processor */ @@ -558,8 +561,10 @@ struct rxrpc_call { rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */ rxrpc_seq_t ackr_consumed; /* Highest packet shown consumed */ rxrpc_seq_t ackr_seen; /* Highest packet shown seen */ - rxrpc_serial_t ackr_ping; /* Last ping sent */ - ktime_t ackr_ping_time; /* Time last ping sent */ + + /* ping management */ + rxrpc_serial_t ping_serial; /* Last ping sent */ + ktime_t ping_time; /* Time last ping sent */ /* transmission-phase ACK management */ ktime_t acks_latest_ts; /* Timestamp of latest ACK received */ @@ -730,6 +735,7 @@ enum rxrpc_timer_trace { rxrpc_timer_init_for_reply, rxrpc_timer_expired, rxrpc_timer_set_for_ack, + rxrpc_timer_set_for_ping, rxrpc_timer_set_for_resend, rxrpc_timer_set_for_send, rxrpc_timer__nr_trace @@ -1068,7 +1074,7 @@ extern const s8 rxrpc_ack_priority[]; /* * output.c */ -int rxrpc_send_ack_packet(struct rxrpc_call *); +int rxrpc_send_ack_packet(struct rxrpc_call *, bool); int rxrpc_send_abort_packet(struct rxrpc_call *); int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *, bool); void rxrpc_reject_packets(struct rxrpc_local *); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index e313099860d5..eeea9602cb89 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -54,6 +54,14 @@ void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, t = call->ack_at; } + if (!ktime_after(call->ping_at, now)) { + call->ping_at = call->expire_at; + if (!test_and_set_bit(RXRPC_CALL_EV_PING, &call->events)) + queue = true; + } else if (ktime_before(call->ping_at, t)) { + t = call->ping_at; + } + t_j = nsecs_to_jiffies(ktime_to_ns(ktime_sub(t, now))); t_j += jiffies; @@ -77,6 +85,27 @@ out: read_unlock_bh(&call->state_lock); } +/* + * Propose a PING ACK be sent. + */ +static void rxrpc_propose_ping(struct rxrpc_call *call, + bool immediate, bool background) +{ + if (immediate) { + if (background && + !test_and_set_bit(RXRPC_CALL_EV_PING, &call->events)) + rxrpc_queue_call(call); + } else { + ktime_t now = ktime_get_real(); + ktime_t ping_at = ktime_add_ms(now, rxrpc_idle_ack_delay); + + if (ktime_before(ping_at, call->ping_at)) { + call->ping_at = ping_at; + rxrpc_set_timer(call, rxrpc_timer_set_for_ping, now); + } + } +} + /* * propose an ACK be sent */ @@ -90,6 +119,14 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, ktime_t now, ack_at; s8 prior = rxrpc_ack_priority[ack_reason]; + /* Pings are handled specially because we don't want to accidentally + * lose a ping response by subsuming it into a ping. + */ + if (ack_reason == RXRPC_ACK_PING) { + rxrpc_propose_ping(call, immediate, background); + goto trace; + } + /* Update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial * numbers, but we don't alter the timeout. */ @@ -125,7 +162,6 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, expiry = rxrpc_soft_ack_delay; break; - case RXRPC_ACK_PING: case RXRPC_ACK_IDLE: if (rxrpc_idle_ack_delay < expiry) expiry = rxrpc_idle_ack_delay; @@ -253,7 +289,7 @@ static void rxrpc_resend(struct rxrpc_call *call, ktime_t now) goto out; rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false, rxrpc_propose_ack_ping_for_lost_ack); - rxrpc_send_ack_packet(call); + rxrpc_send_ack_packet(call, true); goto out; } @@ -345,13 +381,17 @@ recheck_state: } if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events)) { - call->ack_at = call->expire_at; if (call->ackr_reason) { - rxrpc_send_ack_packet(call); + rxrpc_send_ack_packet(call, false); goto recheck_state; } } + if (test_and_clear_bit(RXRPC_CALL_EV_PING, &call->events)) { + rxrpc_send_ack_packet(call, true); + goto recheck_state; + } + if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events)) { rxrpc_resend(call, now); goto recheck_state; diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 07094012ac15..4353a29f3b57 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -205,6 +205,7 @@ static void rxrpc_start_call_timer(struct rxrpc_call *call) expire_at = ktime_add_ms(now, rxrpc_max_call_lifetime); call->expire_at = expire_at; call->ack_at = expire_at; + call->ping_at = expire_at; call->resend_at = expire_at; call->timer.expires = jiffies + LONG_MAX / 2; rxrpc_set_timer(call, rxrpc_timer_begin, now); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 103d2b0d4690..a6da83f036d6 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -625,9 +625,9 @@ static void rxrpc_input_ping_response(struct rxrpc_call *call, rxrpc_serial_t ping_serial; ktime_t ping_time; - ping_time = call->ackr_ping_time; + ping_time = call->ping_time; smp_rmb(); - ping_serial = call->ackr_ping; + ping_serial = call->ping_serial; if (!test_bit(RXRPC_CALL_PINGING, &call->flags) || before(orig_serial, ping_serial)) diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 804a88e28739..1cdcba52f83b 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -93,7 +93,6 @@ const s8 rxrpc_ack_priority[] = { [RXRPC_ACK_EXCEEDS_WINDOW] = 6, [RXRPC_ACK_NOSPACE] = 7, [RXRPC_ACK_PING_RESPONSE] = 8, - [RXRPC_ACK_PING] = 9, }; const char rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4] = { @@ -197,6 +196,7 @@ const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8] = { [rxrpc_timer_expired] = "*EXPR*", [rxrpc_timer_init_for_reply] = "IniRpl", [rxrpc_timer_set_for_ack] = "SetAck", + [rxrpc_timer_set_for_ping] = "SetPng", [rxrpc_timer_set_for_send] = "SetTx ", [rxrpc_timer_set_for_resend] = "SetRTx", }; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 2dae877c0876..a12cea0cbc05 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -38,7 +38,8 @@ struct rxrpc_abort_buffer { static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, struct rxrpc_ack_buffer *pkt, rxrpc_seq_t *_hard_ack, - rxrpc_seq_t *_top) + rxrpc_seq_t *_top, + u8 reason) { rxrpc_serial_t serial; rxrpc_seq_t hard_ack, top, seq; @@ -58,10 +59,10 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, pkt->ack.firstPacket = htonl(hard_ack + 1); pkt->ack.previousPacket = htonl(call->ackr_prev_seq); pkt->ack.serial = htonl(serial); - pkt->ack.reason = call->ackr_reason; + pkt->ack.reason = reason; pkt->ack.nAcks = top - hard_ack; - if (pkt->ack.reason == RXRPC_ACK_PING) + if (reason == RXRPC_ACK_PING) pkt->whdr.flags |= RXRPC_REQUEST_ACK; if (after(top, hard_ack)) { @@ -93,7 +94,7 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, /* * Send an ACK call packet. */ -int rxrpc_send_ack_packet(struct rxrpc_call *call) +int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping) { struct rxrpc_connection *conn = NULL; struct rxrpc_ack_buffer *pkt; @@ -102,8 +103,8 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call) rxrpc_serial_t serial; rxrpc_seq_t hard_ack, top; size_t len, n; - bool ping = false; int ret; + u8 reason; spin_lock_bh(&call->lock); if (call->conn) @@ -136,14 +137,18 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call) pkt->whdr.serviceId = htons(call->service_id); spin_lock_bh(&call->lock); - if (!call->ackr_reason) { - spin_unlock_bh(&call->lock); - ret = 0; - goto out; + if (ping) { + reason = RXRPC_ACK_PING; + } else { + reason = call->ackr_reason; + if (!call->ackr_reason) { + spin_unlock_bh(&call->lock); + ret = 0; + goto out; + } + call->ackr_reason = 0; } - ping = (call->ackr_reason == RXRPC_ACK_PING); - n = rxrpc_fill_out_ack(call, pkt, &hard_ack, &top); - call->ackr_reason = 0; + n = rxrpc_fill_out_ack(call, pkt, &hard_ack, &top, reason); spin_unlock_bh(&call->lock); @@ -161,7 +166,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call) pkt->ack.reason, pkt->ack.nAcks); if (ping) { - call->ackr_ping = serial; + call->ping_serial = serial; smp_wmb(); /* We need to stick a time in before we send the packet in case * the reply gets back before kernel_sendmsg() completes - but @@ -170,18 +175,19 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call) * the packet transmission is more likely to happen towards the * end of the kernel_sendmsg() call. */ - call->ackr_ping_time = ktime_get_real(); + call->ping_time = ktime_get_real(); set_bit(RXRPC_CALL_PINGING, &call->flags); trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_ping, serial); } ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); if (ping) - call->ackr_ping_time = ktime_get_real(); + call->ping_time = ktime_get_real(); if (call->state < RXRPC_CALL_COMPLETE) { if (ret < 0) { - clear_bit(RXRPC_CALL_PINGING, &call->flags); + if (ping) + clear_bit(RXRPC_CALL_PINGING, &call->flags); rxrpc_propose_ACK(call, pkt->ack.reason, ntohs(pkt->ack.maxSkew), ntohl(pkt->ack.serial), diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 11723bc1c783..3fa7771c2a9d 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -143,7 +143,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, true, false, rxrpc_propose_ack_terminal_ack); - rxrpc_send_ack_packet(call); + rxrpc_send_ack_packet(call, false); } write_lock_bh(&call->state_lock); @@ -212,7 +212,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) true, false, rxrpc_propose_ack_rotate_rx); if (call->ackr_reason) - rxrpc_send_ack_packet(call); + rxrpc_send_ack_packet(call, false); } } diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 901b28ceeff4..55a2fb2cfc2f 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -197,7 +197,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, do { /* Check to see if there's a ping ACK to reply to. */ if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE) - rxrpc_send_ack_packet(call); + rxrpc_send_ack_packet(call, false); if (!skb) { size_t size, chunk, max, space; From b3156274ca01297b861e912175820e78c9ac4d7c Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Oct 2016 08:11:49 +0100 Subject: [PATCH 20/65] rxrpc: Partially handle OpenAFS's improper termination of calls OpenAFS doesn't always correctly terminate client calls that it makes - this includes calls the OpenAFS servers make to the cache manager service. It should end the client call with either: (1) An ACK that has firstPacket set to one greater than the seq number of the reply DATA packet with the LAST_PACKET flag set (thereby hard-ACK'ing all packets). nAcks should be 0 and acks[] should be empty (ie. no soft-ACKs). (2) An ACKALL packet. OpenAFS, though, may send an ACK packet with firstPacket set to the last seq number or less and soft-ACKs listed for all packets up to and including the last DATA packet. The transmitter, however, is obliged to keep the call live and the soft-ACK'd DATA packets around until they're hard-ACK'd as the receiver is permitted to drop any merely soft-ACK'd packet and request retransmission by sending an ACK packet with a NACK in it. Further, OpenAFS will also terminate a client call by beginning the next client call on the same connection channel. This implicitly completes the previous call. This patch handles implicit ACK of a call on a channel by the reception of the first packet of the next call on that channel. If another call doesn't come along to implicitly ACK a call, then we have to time the call out. There are some bugs there that will be addressed in subsequent patches. Signed-off-by: David Howells --- net/rxrpc/input.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index a6da83f036d6..44fb8d893c7d 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -938,6 +938,33 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call, _leave(""); } +/* + * Handle a new call on a channel implicitly completing the preceding call on + * that channel. + * + * TODO: If callNumber > call_id + 1, renegotiate security. + */ +static void rxrpc_input_implicit_end_call(struct rxrpc_connection *conn, + struct rxrpc_call *call) +{ + switch (call->state) { + case RXRPC_CALL_SERVER_AWAIT_ACK: + rxrpc_call_completed(call); + break; + case RXRPC_CALL_COMPLETE: + break; + default: + if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, ESHUTDOWN)) { + set_bit(RXRPC_CALL_EV_ABORT, &call->events); + rxrpc_queue_call(call); + } + break; + } + + __rxrpc_disconnect_call(conn, call); + rxrpc_notify_socket(call); +} + /* * post connection-level events to the connection * - this includes challenges, responses, some aborts and call terminal packet @@ -1146,6 +1173,16 @@ void rxrpc_data_ready(struct sock *udp_sk) } call = rcu_dereference(chan->call); + + if (sp->hdr.callNumber > chan->call_id) { + if (!(sp->hdr.flags & RXRPC_CLIENT_INITIATED)) { + rcu_read_unlock(); + goto reject_packet; + } + if (call) + rxrpc_input_implicit_end_call(conn, call); + call = NULL; + } } else { skew = 0; call = NULL; From d7833d00915e1fb5743e94d3c207810b30e9fc38 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Oct 2016 08:11:50 +0100 Subject: [PATCH 21/65] rxrpc: Queue the call on expiry When a call expires, it must be queued for the background processor to deal with otherwise a service call that is improperly terminated will just sit there awaiting an ACK and won't expire. Signed-off-by: David Howells --- net/rxrpc/call_event.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index eeea9602cb89..e2a987fd31ce 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -35,8 +35,11 @@ void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, if (call->state < RXRPC_CALL_COMPLETE) { t = call->expire_at; - if (!ktime_after(t, now)) + if (!ktime_after(t, now)) { + trace_rxrpc_timer(call, why, now, now_j); + queue = true; goto out; + } if (!ktime_after(call->resend_at, now)) { call->resend_at = call->expire_at; @@ -76,12 +79,11 @@ void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, mod_timer(&call->timer, t_j); trace_rxrpc_timer(call, why, now, now_j); } - - if (queue) - rxrpc_queue_call(call); } out: + if (queue) + rxrpc_queue_call(call); read_unlock_bh(&call->state_lock); } From 94bc669efa3beb1f6b171f5a3225079bc457d4a2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Oct 2016 08:11:50 +0100 Subject: [PATCH 22/65] rxrpc: Add missing notification The call's background processor work item needs to notify the socket when it completes a call so that recvmsg() or the AFS fs can deal with it. Without this, call expiry isn't handled. Signed-off-by: David Howells --- net/rxrpc/call_event.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index e2a987fd31ce..0f91d329e910 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -372,6 +372,7 @@ recheck_state: if (call->state == RXRPC_CALL_COMPLETE) { del_timer_sync(&call->timer); + rxrpc_notify_socket(call); goto out_put; } From cf69207afa2a750ba78782bb4ff4d72c1efb8e6b Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Oct 2016 08:11:50 +0100 Subject: [PATCH 23/65] rxrpc: Return negative error code to kernel service In rxrpc_kernel_recv_data(), when we return the error number incurred by a failed call, we must negate it before returning it as it's stored as positive (that's what we have to pass back to userspace). Signed-off-by: David Howells --- net/rxrpc/recvmsg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 3fa7771c2a9d..db5b02a47518 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -652,7 +652,7 @@ excess_data: goto out; call_complete: *_abort = call->abort_code; - ret = call->error; + ret = -call->error; if (call->completion == RXRPC_CALL_SUCCEEDED) { ret = 1; if (size > 0) From 9008f998a2e992991a5d60656d4573ba4c516c58 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Oct 2016 08:11:50 +0100 Subject: [PATCH 24/65] afs: Check for fatal error when in waiting for ack state When it's in the waiting-for-ACK state, the AFS filesystem needs to check the result of rxrpc_kernel_recv_data() any time it is notified to see if it is indicating a fatal error. If this is the case, it needs to mark the call completed otherwise the call just sits there and never goes away. Signed-off-by: David Howells --- fs/afs/rxrpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 59bdaa7527b6..477928b25940 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -418,7 +418,7 @@ static void afs_deliver_to_call(struct afs_call *call) &call->abort_code); if (ret == -EINPROGRESS || ret == -EAGAIN) return; - if (ret == 1) { + if (ret == 1 || ret < 0) { call->state = AFS_CALL_COMPLETE; goto done; } From 9749fd2beac42e32cb3e3d85489b52b9cc71a9ac Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Oct 2016 08:11:50 +0100 Subject: [PATCH 25/65] rxrpc: Need to produce an ACK for service op if op takes a long time We need to generate a DELAY ACK from the service end of an operation if we start doing the actual operation work and it takes longer than expected. This will hard-ACK the request data and allow the client to release its resources. To make this work: (1) We have to set the ack timer and propose an ACK when the call moves to the RXRPC_CALL_SERVER_ACK_REQUEST and clear the pending ACK and cancel the timer when we start transmitting the reply (the first DATA packet of the reply implicitly ACKs the request phase). (2) It must be possible to set the timer when the caller is holding call->state_lock, so split the lock-getting part of the timer function out. (3) Add trace notes for the ACK we're requesting and the timer we clear. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 3 +++ net/rxrpc/call_event.c | 16 ++++++++++++---- net/rxrpc/misc.c | 2 ++ net/rxrpc/recvmsg.c | 8 ++++++-- net/rxrpc/sendmsg.c | 5 +++++ 5 files changed, 28 insertions(+), 6 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index b56676be07c7..f60e35576526 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -733,6 +733,7 @@ extern const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5]; enum rxrpc_timer_trace { rxrpc_timer_begin, rxrpc_timer_init_for_reply, + rxrpc_timer_init_for_send_reply, rxrpc_timer_expired, rxrpc_timer_set_for_ack, rxrpc_timer_set_for_ping, @@ -749,6 +750,7 @@ enum rxrpc_propose_ack_trace { rxrpc_propose_ack_ping_for_lost_ack, rxrpc_propose_ack_ping_for_lost_reply, rxrpc_propose_ack_ping_for_params, + rxrpc_propose_ack_processing_op, rxrpc_propose_ack_respond_to_ack, rxrpc_propose_ack_respond_to_ping, rxrpc_propose_ack_retry_tx, @@ -811,6 +813,7 @@ int rxrpc_reject_call(struct rxrpc_sock *); /* * call_event.c */ +void __rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace, ktime_t); void rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace, ktime_t); void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool, enum rxrpc_propose_ack_trace); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 0f91d329e910..97a17ada4431 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -24,15 +24,13 @@ /* * Set the timer */ -void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, - ktime_t now) +void __rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, + ktime_t now) { unsigned long t_j, now_j = jiffies; ktime_t t; bool queue = false; - read_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE) { t = call->expire_at; if (!ktime_after(t, now)) { @@ -84,6 +82,16 @@ void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, out: if (queue) rxrpc_queue_call(call); +} + +/* + * Set the timer + */ +void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, + ktime_t now) +{ + read_lock_bh(&call->state_lock); + __rxrpc_set_timer(call, why, now); read_unlock_bh(&call->state_lock); } diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 1cdcba52f83b..6dee55fad2d3 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -195,6 +195,7 @@ const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8] = { [rxrpc_timer_begin] = "Begin ", [rxrpc_timer_expired] = "*EXPR*", [rxrpc_timer_init_for_reply] = "IniRpl", + [rxrpc_timer_init_for_send_reply] = "SndRpl", [rxrpc_timer_set_for_ack] = "SetAck", [rxrpc_timer_set_for_ping] = "SetPng", [rxrpc_timer_set_for_send] = "SetTx ", @@ -207,6 +208,7 @@ const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8] = { [rxrpc_propose_ack_ping_for_lost_ack] = "LostAck", [rxrpc_propose_ack_ping_for_lost_reply] = "LostRpl", [rxrpc_propose_ack_ping_for_params] = "Params ", + [rxrpc_propose_ack_processing_op] = "ProcOp ", [rxrpc_propose_ack_respond_to_ack] = "Rsp2Ack", [rxrpc_propose_ack_respond_to_ping] = "Rsp2Png", [rxrpc_propose_ack_retry_tx] = "RetryTx", diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index db5b02a47518..c29362d50a92 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -151,17 +151,21 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) switch (call->state) { case RXRPC_CALL_CLIENT_RECV_REPLY: __rxrpc_call_completed(call); + write_unlock_bh(&call->state_lock); break; case RXRPC_CALL_SERVER_RECV_REQUEST: call->tx_phase = true; call->state = RXRPC_CALL_SERVER_ACK_REQUEST; + call->ack_at = call->expire_at; + write_unlock_bh(&call->state_lock); + rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, false, true, + rxrpc_propose_ack_processing_op); break; default: + write_unlock_bh(&call->state_lock); break; } - - write_unlock_bh(&call->state_lock); } /* diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 55a2fb2cfc2f..b214a4d4a641 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -130,6 +130,11 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, break; case RXRPC_CALL_SERVER_ACK_REQUEST: call->state = RXRPC_CALL_SERVER_SEND_REPLY; + call->ack_at = call->expire_at; + if (call->ackr_reason == RXRPC_ACK_DELAY) + call->ackr_reason = 0; + __rxrpc_set_timer(call, rxrpc_timer_init_for_send_reply, + ktime_get_real()); if (!last) break; case RXRPC_CALL_SERVER_SEND_REPLY: From bf7d620abf22c321208a4da4f435e7af52551a21 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Oct 2016 08:11:51 +0100 Subject: [PATCH 26/65] rxrpc: Don't request an ACK on the last DATA packet of a call's Tx phase Don't request an ACK on the last DATA packet of a call's Tx phase as for a client there will be a reply packet or some sort of ACK to shift phase. If the ACK is requested, OpenAFS sends a REQUESTED-ACK ACK with soft-ACKs in it and doesn't follow up with a hard-ACK. If we don't set the flag, OpenAFS will send a DELAY ACK that hard-ACKs the reply data, thereby allowing the call to terminate cleanly. Signed-off-by: David Howells --- net/rxrpc/output.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index a12cea0cbc05..5dab1ff3a6c2 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -307,11 +307,12 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, /* If our RTT cache needs working on, request an ACK. Also request * ACKs if a DATA packet appears to have been lost. */ - if (retrans || - call->cong_mode == RXRPC_CALL_SLOW_START || - (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || - ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), - ktime_get_real())) + if (!(sp->hdr.flags & RXRPC_LAST_PACKET) && + (retrans || + call->cong_mode == RXRPC_CALL_SLOW_START || + (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || + ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), + ktime_get_real()))) whdr.flags |= RXRPC_REQUEST_ACK; if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { From 1165df0ee470930c2da73def005b1f842c2239cc Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Wed, 28 Sep 2016 16:18:35 +0530 Subject: [PATCH 27/65] Bluetooth: btusb: add entry for Marvell 8997 chipset btusb_set_bdaddr_marvell() configures BD address for Marvell chipsets. This patch adds new chipset 8997 in the list so that BD address feature would be available for it. Signed-off-by: Amitkumar Karwar Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 6bd63b84abd0..88962339d4b7 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -314,6 +314,7 @@ static const struct usb_device_id blacklist_table[] = { /* Marvell Bluetooth devices */ { USB_DEVICE(0x1286, 0x2044), .driver_info = BTUSB_MARVELL }, { USB_DEVICE(0x1286, 0x2046), .driver_info = BTUSB_MARVELL }, + { USB_DEVICE(0x1286, 0x204e), .driver_info = BTUSB_MARVELL }, /* Intel Bluetooth devices */ { USB_DEVICE(0x8087, 0x07da), .driver_info = BTUSB_CSR }, From cecbf3e932c1fa6df45fd6cc4fc8081a4cb45bcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Narajowski?= Date: Wed, 5 Oct 2016 12:28:25 +0200 Subject: [PATCH 28/65] Bluetooth: Fix local name in scan rsp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use complete name if it fits. If not and there is short name check if it fits. If not then use shortened name as prefix of complete name. Signed-off-by: Michał Narajowski Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 47 +++++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index c8135680c43e..fd6406df8a07 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -973,25 +973,48 @@ void __hci_req_enable_advertising(struct hci_request *req) static u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len) { - size_t name_len; + size_t complete_len; + size_t short_len; int max_len; max_len = HCI_MAX_AD_LENGTH - ad_len - 2; - name_len = strlen(hdev->dev_name); - if (name_len > 0 && max_len > 0) { + complete_len = strlen(hdev->dev_name); + short_len = strlen(hdev->short_name); - if (name_len > max_len) { - name_len = max_len; - ptr[1] = EIR_NAME_SHORT; - } else - ptr[1] = EIR_NAME_COMPLETE; + /* no space left for name */ + if (max_len < 1) + return ad_len; - ptr[0] = name_len + 1; + /* no name set */ + if (!complete_len) + return ad_len; - memcpy(ptr + 2, hdev->dev_name, name_len); + /* complete name fits and is eq to max short name len or smaller */ + if (complete_len <= max_len && + complete_len <= HCI_MAX_SHORT_NAME_LENGTH) { + ptr[0] = complete_len + 1; + ptr[1] = EIR_NAME_COMPLETE; + memcpy(ptr + 2, hdev->dev_name, complete_len); - ad_len += (name_len + 2); - ptr += (name_len + 2); + return ad_len + complete_len + 2; + } + + /* short name set and fits */ + if (short_len && short_len <= max_len) { + ptr[0] = short_len + 1; + ptr[1] = EIR_NAME_SHORT; + memcpy(ptr + 2, hdev->short_name, short_len); + + return ad_len + short_len + 2; + } + + /* no short name set so shorten complete name */ + if (!short_len) { + ptr[0] = max_len + 1; + ptr[1] = EIR_NAME_SHORT; + memcpy(ptr + 2, hdev->dev_name, max_len); + + return ad_len + max_len + 2; } return ad_len; From 7ddb30c7471ed69b75ae4c2601d45cbda5d390ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Narajowski?= Date: Wed, 5 Oct 2016 12:28:26 +0200 Subject: [PATCH 29/65] Bluetooth: Add appearance to default scan rsp data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add appearance value to beginning of scan rsp data for default advertising instance if the value is not 0. Signed-off-by: Michał Narajowski Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index fd6406df8a07..3c44c54a056f 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1022,7 +1022,16 @@ static u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len) static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) { - return append_local_name(hdev, ptr, 0); + u8 scan_rsp_len = 0; + + if (hdev->appearance) { + ptr[0] = 3; + ptr[1] = EIR_APPEARANCE; + put_unaligned_le16(hdev->appearance, ptr + 2); + scan_rsp_len += 4; + } + + return append_local_name(hdev, ptr + scan_rsp_len, scan_rsp_len); } static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance, From 1b422066658b7cc985fa020066b72d28159d858f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Narajowski?= Date: Wed, 5 Oct 2016 12:28:27 +0200 Subject: [PATCH 30/65] Bluetooth: Refactor append name and appearance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use eir_append_data to remove code duplication. Signed-off-by: Michał Narajowski Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 44 +++++++++++++------------------------ net/bluetooth/hci_request.h | 23 +++++++++++++++++++ net/bluetooth/mgmt.c | 21 ------------------ 3 files changed, 38 insertions(+), 50 deletions(-) diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 3c44c54a056f..e2288421fe6b 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -21,8 +21,6 @@ SOFTWARE IS DISCLAIMED. */ -#include - #include #include #include @@ -992,46 +990,39 @@ static u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len) /* complete name fits and is eq to max short name len or smaller */ if (complete_len <= max_len && complete_len <= HCI_MAX_SHORT_NAME_LENGTH) { - ptr[0] = complete_len + 1; - ptr[1] = EIR_NAME_COMPLETE; - memcpy(ptr + 2, hdev->dev_name, complete_len); - - return ad_len + complete_len + 2; + return eir_append_data(ptr, ad_len, EIR_NAME_COMPLETE, + hdev->dev_name, complete_len); } /* short name set and fits */ if (short_len && short_len <= max_len) { - ptr[0] = short_len + 1; - ptr[1] = EIR_NAME_SHORT; - memcpy(ptr + 2, hdev->short_name, short_len); - - return ad_len + short_len + 2; + return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, + hdev->short_name, short_len); } /* no short name set so shorten complete name */ if (!short_len) { - ptr[0] = max_len + 1; - ptr[1] = EIR_NAME_SHORT; - memcpy(ptr + 2, hdev->dev_name, max_len); - - return ad_len + max_len + 2; + return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, + hdev->dev_name, max_len); } return ad_len; } +static u8 append_appearance(struct hci_dev *hdev, u8 *ptr, u8 ad_len) +{ + return eir_append_le16(ptr, ad_len, EIR_APPEARANCE, hdev->appearance); +} + static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) { u8 scan_rsp_len = 0; if (hdev->appearance) { - ptr[0] = 3; - ptr[1] = EIR_APPEARANCE; - put_unaligned_le16(hdev->appearance, ptr + 2); - scan_rsp_len += 4; + scan_rsp_len = append_appearance(hdev, ptr, scan_rsp_len); } - return append_local_name(hdev, ptr + scan_rsp_len, scan_rsp_len); + return append_local_name(hdev, ptr, scan_rsp_len); } static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance, @@ -1048,18 +1039,13 @@ static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance, instance_flags = adv_instance->flags; if ((instance_flags & MGMT_ADV_FLAG_APPEARANCE) && hdev->appearance) { - ptr[0] = 3; - ptr[1] = EIR_APPEARANCE; - put_unaligned_le16(hdev->appearance, ptr + 2); - scan_rsp_len += 4; - ptr += 4; + scan_rsp_len = append_appearance(hdev, ptr, scan_rsp_len); } - memcpy(ptr, adv_instance->scan_rsp_data, + memcpy(&ptr[scan_rsp_len], adv_instance->scan_rsp_data, adv_instance->scan_rsp_len); scan_rsp_len += adv_instance->scan_rsp_len; - ptr += adv_instance->scan_rsp_len; if (instance_flags & MGMT_ADV_FLAG_LOCAL_NAME) scan_rsp_len = append_local_name(hdev, ptr, scan_rsp_len); diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index ac1e11006f38..6b06629245a8 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -20,6 +20,8 @@ SOFTWARE IS DISCLAIMED. */ +#include + #define hci_req_sync_lock(hdev) mutex_lock(&hdev->req_lock) #define hci_req_sync_unlock(hdev) mutex_unlock(&hdev->req_lock) @@ -103,3 +105,24 @@ static inline void hci_update_background_scan(struct hci_dev *hdev) void hci_request_setup(struct hci_dev *hdev); void hci_request_cancel_all(struct hci_dev *hdev); + +static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, + u8 *data, u8 data_len) +{ + eir[eir_len++] = sizeof(type) + data_len; + eir[eir_len++] = type; + memcpy(&eir[eir_len], data, data_len); + eir_len += data_len; + + return eir_len; +} + +static inline u16 eir_append_le16(u8 *eir, u16 eir_len, u8 type, u16 data) +{ + eir[eir_len++] = sizeof(type) + sizeof(data); + eir[eir_len++] = type; + put_unaligned_le16(data, &eir[eir_len]); + eir_len += sizeof(data); + + return eir_len; +} diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 19b8a5e9420d..736038085feb 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -867,27 +867,6 @@ static int read_controller_info(struct sock *sk, struct hci_dev *hdev, sizeof(rp)); } -static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, - u8 data_len) -{ - eir[eir_len++] = sizeof(type) + data_len; - eir[eir_len++] = type; - memcpy(&eir[eir_len], data, data_len); - eir_len += data_len; - - return eir_len; -} - -static inline u16 eir_append_le16(u8 *eir, u16 eir_len, u8 type, u16 data) -{ - eir[eir_len++] = sizeof(type) + sizeof(data); - eir[eir_len++] = type; - put_unaligned_le16(data, &eir[eir_len]); - eir_len += sizeof(data); - - return eir_len; -} - static u16 append_eir_data_to_buf(struct hci_dev *hdev, u8 *eir) { u16 eir_len = 0; From 3254f83694fe519ac18b8334a2f481d80c3a8a3a Mon Sep 17 00:00:00 2001 From: Paul Durrant Date: Tue, 4 Oct 2016 10:29:12 +0100 Subject: [PATCH 31/65] xen-netback: separate guest side rx code into separate module The netback source module has become very large and somewhat confusing. This patch simply moves all code related to the backend to frontend (i.e guest side rx) data-path into a separate rx source module. This patch contains no functional change, it is code movement and minimal changes to avoid patch style-check issues. Signed-off-by: Paul Durrant Signed-off-by: David S. Miller --- drivers/net/xen-netback/Makefile | 2 +- drivers/net/xen-netback/netback.c | 754 ---------------------------- drivers/net/xen-netback/rx.c | 789 ++++++++++++++++++++++++++++++ 3 files changed, 790 insertions(+), 755 deletions(-) create mode 100644 drivers/net/xen-netback/rx.c diff --git a/drivers/net/xen-netback/Makefile b/drivers/net/xen-netback/Makefile index 11e02be9db1a..d49798a46b51 100644 --- a/drivers/net/xen-netback/Makefile +++ b/drivers/net/xen-netback/Makefile @@ -1,3 +1,3 @@ obj-$(CONFIG_XEN_NETDEV_BACKEND) := xen-netback.o -xen-netback-y := netback.o xenbus.o interface.o hash.o +xen-netback-y := netback.o xenbus.o interface.o hash.o rx.o diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 3d0c989384b5..47b481095d77 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -106,13 +106,6 @@ static void push_tx_responses(struct xenvif_queue *queue); static inline int tx_work_todo(struct xenvif_queue *queue); -static struct xen_netif_rx_response *make_rx_response(struct xenvif_queue *queue, - u16 id, - s8 st, - u16 offset, - u16 size, - u16 flags); - static inline unsigned long idx_to_pfn(struct xenvif_queue *queue, u16 idx) { @@ -155,571 +148,11 @@ static inline pending_ring_idx_t pending_index(unsigned i) return i & (MAX_PENDING_REQS-1); } -static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) -{ - RING_IDX prod, cons; - struct sk_buff *skb; - int needed; - - skb = skb_peek(&queue->rx_queue); - if (!skb) - return false; - - needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE); - if (skb_is_gso(skb)) - needed++; - if (skb->sw_hash) - needed++; - - do { - prod = queue->rx.sring->req_prod; - cons = queue->rx.req_cons; - - if (prod - cons >= needed) - return true; - - queue->rx.sring->req_event = prod + 1; - - /* Make sure event is visible before we check prod - * again. - */ - mb(); - } while (queue->rx.sring->req_prod != prod); - - return false; -} - -void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) -{ - unsigned long flags; - - spin_lock_irqsave(&queue->rx_queue.lock, flags); - - __skb_queue_tail(&queue->rx_queue, skb); - - queue->rx_queue_len += skb->len; - if (queue->rx_queue_len > queue->rx_queue_max) - netif_tx_stop_queue(netdev_get_tx_queue(queue->vif->dev, queue->id)); - - spin_unlock_irqrestore(&queue->rx_queue.lock, flags); -} - -static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue) -{ - struct sk_buff *skb; - - spin_lock_irq(&queue->rx_queue.lock); - - skb = __skb_dequeue(&queue->rx_queue); - if (skb) - queue->rx_queue_len -= skb->len; - - spin_unlock_irq(&queue->rx_queue.lock); - - return skb; -} - -static void xenvif_rx_queue_maybe_wake(struct xenvif_queue *queue) -{ - spin_lock_irq(&queue->rx_queue.lock); - - if (queue->rx_queue_len < queue->rx_queue_max) - netif_tx_wake_queue(netdev_get_tx_queue(queue->vif->dev, queue->id)); - - spin_unlock_irq(&queue->rx_queue.lock); -} - - -static void xenvif_rx_queue_purge(struct xenvif_queue *queue) -{ - struct sk_buff *skb; - while ((skb = xenvif_rx_dequeue(queue)) != NULL) - kfree_skb(skb); -} - -static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue) -{ - struct sk_buff *skb; - - for(;;) { - skb = skb_peek(&queue->rx_queue); - if (!skb) - break; - if (time_before(jiffies, XENVIF_RX_CB(skb)->expires)) - break; - xenvif_rx_dequeue(queue); - kfree_skb(skb); - } -} - -struct netrx_pending_operations { - unsigned copy_prod, copy_cons; - unsigned meta_prod, meta_cons; - struct gnttab_copy *copy; - struct xenvif_rx_meta *meta; - int copy_off; - grant_ref_t copy_gref; -}; - -static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif_queue *queue, - struct netrx_pending_operations *npo) -{ - struct xenvif_rx_meta *meta; - struct xen_netif_rx_request req; - - RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req); - - meta = npo->meta + npo->meta_prod++; - meta->gso_type = XEN_NETIF_GSO_TYPE_NONE; - meta->gso_size = 0; - meta->size = 0; - meta->id = req.id; - - npo->copy_off = 0; - npo->copy_gref = req.gref; - - return meta; -} - -struct gop_frag_copy { - struct xenvif_queue *queue; - struct netrx_pending_operations *npo; - struct xenvif_rx_meta *meta; - int head; - int gso_type; - int protocol; - int hash_present; - - struct page *page; -}; - -static void xenvif_setup_copy_gop(unsigned long gfn, - unsigned int offset, - unsigned int *len, - struct gop_frag_copy *info) -{ - struct gnttab_copy *copy_gop; - struct xen_page_foreign *foreign; - /* Convenient aliases */ - struct xenvif_queue *queue = info->queue; - struct netrx_pending_operations *npo = info->npo; - struct page *page = info->page; - - BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET); - - if (npo->copy_off == MAX_BUFFER_OFFSET) - info->meta = get_next_rx_buffer(queue, npo); - - if (npo->copy_off + *len > MAX_BUFFER_OFFSET) - *len = MAX_BUFFER_OFFSET - npo->copy_off; - - copy_gop = npo->copy + npo->copy_prod++; - copy_gop->flags = GNTCOPY_dest_gref; - copy_gop->len = *len; - - foreign = xen_page_foreign(page); - if (foreign) { - copy_gop->source.domid = foreign->domid; - copy_gop->source.u.ref = foreign->gref; - copy_gop->flags |= GNTCOPY_source_gref; - } else { - copy_gop->source.domid = DOMID_SELF; - copy_gop->source.u.gmfn = gfn; - } - copy_gop->source.offset = offset; - - copy_gop->dest.domid = queue->vif->domid; - copy_gop->dest.offset = npo->copy_off; - copy_gop->dest.u.ref = npo->copy_gref; - - npo->copy_off += *len; - info->meta->size += *len; - - if (!info->head) - return; - - /* Leave a gap for the GSO descriptor. */ - if ((1 << info->gso_type) & queue->vif->gso_mask) - queue->rx.req_cons++; - - /* Leave a gap for the hash extra segment. */ - if (info->hash_present) - queue->rx.req_cons++; - - info->head = 0; /* There must be something in this buffer now */ -} - -static void xenvif_gop_frag_copy_grant(unsigned long gfn, - unsigned offset, - unsigned int len, - void *data) -{ - unsigned int bytes; - - while (len) { - bytes = len; - xenvif_setup_copy_gop(gfn, offset, &bytes, data); - offset += bytes; - len -= bytes; - } -} - -/* - * Set up the grant operations for this fragment. If it's a flipping - * interface, we also set up the unmap request from here. - */ -static void xenvif_gop_frag_copy(struct xenvif_queue *queue, struct sk_buff *skb, - struct netrx_pending_operations *npo, - struct page *page, unsigned long size, - unsigned long offset, int *head) -{ - struct gop_frag_copy info = { - .queue = queue, - .npo = npo, - .head = *head, - .gso_type = XEN_NETIF_GSO_TYPE_NONE, - /* xenvif_set_skb_hash() will have either set a s/w - * hash or cleared the hash depending on - * whether the the frontend wants a hash for this skb. - */ - .hash_present = skb->sw_hash, - }; - unsigned long bytes; - - if (skb_is_gso(skb)) { - if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) - info.gso_type = XEN_NETIF_GSO_TYPE_TCPV4; - else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) - info.gso_type = XEN_NETIF_GSO_TYPE_TCPV6; - } - - /* Data must not cross a page boundary. */ - BUG_ON(size + offset > PAGE_SIZE<meta + npo->meta_prod - 1; - - /* Skip unused frames from start of page */ - page += offset >> PAGE_SHIFT; - offset &= ~PAGE_MASK; - - while (size > 0) { - BUG_ON(offset >= PAGE_SIZE); - - bytes = PAGE_SIZE - offset; - if (bytes > size) - bytes = size; - - info.page = page; - gnttab_foreach_grant_in_range(page, offset, bytes, - xenvif_gop_frag_copy_grant, - &info); - size -= bytes; - offset = 0; - - /* Next page */ - if (size) { - BUG_ON(!PageCompound(page)); - page++; - } - } - - *head = info.head; -} - -/* - * Prepare an SKB to be transmitted to the frontend. - * - * This function is responsible for allocating grant operations, meta - * structures, etc. - * - * It returns the number of meta structures consumed. The number of - * ring slots used is always equal to the number of meta slots used - * plus the number of GSO descriptors used. Currently, we use either - * zero GSO descriptors (for non-GSO packets) or one descriptor (for - * frontend-side LRO). - */ -static int xenvif_gop_skb(struct sk_buff *skb, - struct netrx_pending_operations *npo, - struct xenvif_queue *queue) -{ - struct xenvif *vif = netdev_priv(skb->dev); - int nr_frags = skb_shinfo(skb)->nr_frags; - int i; - struct xen_netif_rx_request req; - struct xenvif_rx_meta *meta; - unsigned char *data; - int head = 1; - int old_meta_prod; - int gso_type; - - old_meta_prod = npo->meta_prod; - - gso_type = XEN_NETIF_GSO_TYPE_NONE; - if (skb_is_gso(skb)) { - if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) - gso_type = XEN_NETIF_GSO_TYPE_TCPV4; - else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) - gso_type = XEN_NETIF_GSO_TYPE_TCPV6; - } - - /* Set up a GSO prefix descriptor, if necessary */ - if ((1 << gso_type) & vif->gso_prefix_mask) { - RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req); - meta = npo->meta + npo->meta_prod++; - meta->gso_type = gso_type; - meta->gso_size = skb_shinfo(skb)->gso_size; - meta->size = 0; - meta->id = req.id; - } - - RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req); - meta = npo->meta + npo->meta_prod++; - - if ((1 << gso_type) & vif->gso_mask) { - meta->gso_type = gso_type; - meta->gso_size = skb_shinfo(skb)->gso_size; - } else { - meta->gso_type = XEN_NETIF_GSO_TYPE_NONE; - meta->gso_size = 0; - } - - meta->size = 0; - meta->id = req.id; - npo->copy_off = 0; - npo->copy_gref = req.gref; - - data = skb->data; - while (data < skb_tail_pointer(skb)) { - unsigned int offset = offset_in_page(data); - unsigned int len = PAGE_SIZE - offset; - - if (data + len > skb_tail_pointer(skb)) - len = skb_tail_pointer(skb) - data; - - xenvif_gop_frag_copy(queue, skb, npo, - virt_to_page(data), len, offset, &head); - data += len; - } - - for (i = 0; i < nr_frags; i++) { - xenvif_gop_frag_copy(queue, skb, npo, - skb_frag_page(&skb_shinfo(skb)->frags[i]), - skb_frag_size(&skb_shinfo(skb)->frags[i]), - skb_shinfo(skb)->frags[i].page_offset, - &head); - } - - return npo->meta_prod - old_meta_prod; -} - -/* - * This is a twin to xenvif_gop_skb. Assume that xenvif_gop_skb was - * used to set up the operations on the top of - * netrx_pending_operations, which have since been done. Check that - * they didn't give any errors and advance over them. - */ -static int xenvif_check_gop(struct xenvif *vif, int nr_meta_slots, - struct netrx_pending_operations *npo) -{ - struct gnttab_copy *copy_op; - int status = XEN_NETIF_RSP_OKAY; - int i; - - for (i = 0; i < nr_meta_slots; i++) { - copy_op = npo->copy + npo->copy_cons++; - if (copy_op->status != GNTST_okay) { - netdev_dbg(vif->dev, - "Bad status %d from copy to DOM%d.\n", - copy_op->status, vif->domid); - status = XEN_NETIF_RSP_ERROR; - } - } - - return status; -} - -static void xenvif_add_frag_responses(struct xenvif_queue *queue, int status, - struct xenvif_rx_meta *meta, - int nr_meta_slots) -{ - int i; - unsigned long offset; - - /* No fragments used */ - if (nr_meta_slots <= 1) - return; - - nr_meta_slots--; - - for (i = 0; i < nr_meta_slots; i++) { - int flags; - if (i == nr_meta_slots - 1) - flags = 0; - else - flags = XEN_NETRXF_more_data; - - offset = 0; - make_rx_response(queue, meta[i].id, status, offset, - meta[i].size, flags); - } -} - void xenvif_kick_thread(struct xenvif_queue *queue) { wake_up(&queue->wq); } -static void xenvif_rx_action(struct xenvif_queue *queue) -{ - struct xenvif *vif = queue->vif; - s8 status; - u16 flags; - struct xen_netif_rx_response *resp; - struct sk_buff_head rxq; - struct sk_buff *skb; - LIST_HEAD(notify); - int ret; - unsigned long offset; - bool need_to_notify = false; - - struct netrx_pending_operations npo = { - .copy = queue->grant_copy_op, - .meta = queue->meta, - }; - - skb_queue_head_init(&rxq); - - while (xenvif_rx_ring_slots_available(queue) - && (skb = xenvif_rx_dequeue(queue)) != NULL) { - queue->last_rx_time = jiffies; - - XENVIF_RX_CB(skb)->meta_slots_used = xenvif_gop_skb(skb, &npo, queue); - - __skb_queue_tail(&rxq, skb); - } - - BUG_ON(npo.meta_prod > ARRAY_SIZE(queue->meta)); - - if (!npo.copy_prod) - goto done; - - BUG_ON(npo.copy_prod > MAX_GRANT_COPY_OPS); - gnttab_batch_copy(queue->grant_copy_op, npo.copy_prod); - - while ((skb = __skb_dequeue(&rxq)) != NULL) { - struct xen_netif_extra_info *extra = NULL; - - if ((1 << queue->meta[npo.meta_cons].gso_type) & - vif->gso_prefix_mask) { - resp = RING_GET_RESPONSE(&queue->rx, - queue->rx.rsp_prod_pvt++); - - resp->flags = XEN_NETRXF_gso_prefix | XEN_NETRXF_more_data; - - resp->offset = queue->meta[npo.meta_cons].gso_size; - resp->id = queue->meta[npo.meta_cons].id; - resp->status = XENVIF_RX_CB(skb)->meta_slots_used; - - npo.meta_cons++; - XENVIF_RX_CB(skb)->meta_slots_used--; - } - - - queue->stats.tx_bytes += skb->len; - queue->stats.tx_packets++; - - status = xenvif_check_gop(vif, - XENVIF_RX_CB(skb)->meta_slots_used, - &npo); - - if (XENVIF_RX_CB(skb)->meta_slots_used == 1) - flags = 0; - else - flags = XEN_NETRXF_more_data; - - if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */ - flags |= XEN_NETRXF_csum_blank | XEN_NETRXF_data_validated; - else if (skb->ip_summed == CHECKSUM_UNNECESSARY) - /* remote but checksummed. */ - flags |= XEN_NETRXF_data_validated; - - offset = 0; - resp = make_rx_response(queue, queue->meta[npo.meta_cons].id, - status, offset, - queue->meta[npo.meta_cons].size, - flags); - - if ((1 << queue->meta[npo.meta_cons].gso_type) & - vif->gso_mask) { - extra = (struct xen_netif_extra_info *) - RING_GET_RESPONSE(&queue->rx, - queue->rx.rsp_prod_pvt++); - - resp->flags |= XEN_NETRXF_extra_info; - - extra->u.gso.type = queue->meta[npo.meta_cons].gso_type; - extra->u.gso.size = queue->meta[npo.meta_cons].gso_size; - extra->u.gso.pad = 0; - extra->u.gso.features = 0; - - extra->type = XEN_NETIF_EXTRA_TYPE_GSO; - extra->flags = 0; - } - - if (skb->sw_hash) { - /* Since the skb got here via xenvif_select_queue() - * we know that the hash has been re-calculated - * according to a configuration set by the frontend - * and therefore we know that it is legitimate to - * pass it to the frontend. - */ - if (resp->flags & XEN_NETRXF_extra_info) - extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE; - else - resp->flags |= XEN_NETRXF_extra_info; - - extra = (struct xen_netif_extra_info *) - RING_GET_RESPONSE(&queue->rx, - queue->rx.rsp_prod_pvt++); - - extra->u.hash.algorithm = - XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ; - - if (skb->l4_hash) - extra->u.hash.type = - skb->protocol == htons(ETH_P_IP) ? - _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP : - _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP; - else - extra->u.hash.type = - skb->protocol == htons(ETH_P_IP) ? - _XEN_NETIF_CTRL_HASH_TYPE_IPV4 : - _XEN_NETIF_CTRL_HASH_TYPE_IPV6; - - *(uint32_t *)extra->u.hash.value = - skb_get_hash_raw(skb); - - extra->type = XEN_NETIF_EXTRA_TYPE_HASH; - extra->flags = 0; - } - - xenvif_add_frag_responses(queue, status, - queue->meta + npo.meta_cons + 1, - XENVIF_RX_CB(skb)->meta_slots_used); - - RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->rx, ret); - - need_to_notify |= !!ret; - - npo.meta_cons += XENVIF_RX_CB(skb)->meta_slots_used; - dev_kfree_skb(skb); - } - -done: - if (need_to_notify) - notify_remote_via_irq(queue->rx_irq); -} - void xenvif_napi_schedule_or_enable_events(struct xenvif_queue *queue) { int more_to_do; @@ -1951,29 +1384,6 @@ static void push_tx_responses(struct xenvif_queue *queue) notify_remote_via_irq(queue->tx_irq); } -static struct xen_netif_rx_response *make_rx_response(struct xenvif_queue *queue, - u16 id, - s8 st, - u16 offset, - u16 size, - u16 flags) -{ - RING_IDX i = queue->rx.rsp_prod_pvt; - struct xen_netif_rx_response *resp; - - resp = RING_GET_RESPONSE(&queue->rx, i); - resp->offset = offset; - resp->flags = flags; - resp->id = id; - resp->status = (s16)size; - if (st < 0) - resp->status = (s16)st; - - queue->rx.rsp_prod_pvt = ++i; - - return resp; -} - void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx) { int ret; @@ -2055,170 +1465,6 @@ err: return err; } -static void xenvif_queue_carrier_off(struct xenvif_queue *queue) -{ - struct xenvif *vif = queue->vif; - - queue->stalled = true; - - /* At least one queue has stalled? Disable the carrier. */ - spin_lock(&vif->lock); - if (vif->stalled_queues++ == 0) { - netdev_info(vif->dev, "Guest Rx stalled"); - netif_carrier_off(vif->dev); - } - spin_unlock(&vif->lock); -} - -static void xenvif_queue_carrier_on(struct xenvif_queue *queue) -{ - struct xenvif *vif = queue->vif; - - queue->last_rx_time = jiffies; /* Reset Rx stall detection. */ - queue->stalled = false; - - /* All queues are ready? Enable the carrier. */ - spin_lock(&vif->lock); - if (--vif->stalled_queues == 0) { - netdev_info(vif->dev, "Guest Rx ready"); - netif_carrier_on(vif->dev); - } - spin_unlock(&vif->lock); -} - -static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue) -{ - RING_IDX prod, cons; - - prod = queue->rx.sring->req_prod; - cons = queue->rx.req_cons; - - return !queue->stalled && prod - cons < 1 - && time_after(jiffies, - queue->last_rx_time + queue->vif->stall_timeout); -} - -static bool xenvif_rx_queue_ready(struct xenvif_queue *queue) -{ - RING_IDX prod, cons; - - prod = queue->rx.sring->req_prod; - cons = queue->rx.req_cons; - - return queue->stalled && prod - cons >= 1; -} - -static bool xenvif_have_rx_work(struct xenvif_queue *queue) -{ - return xenvif_rx_ring_slots_available(queue) - || (queue->vif->stall_timeout && - (xenvif_rx_queue_stalled(queue) - || xenvif_rx_queue_ready(queue))) - || kthread_should_stop() - || queue->vif->disabled; -} - -static long xenvif_rx_queue_timeout(struct xenvif_queue *queue) -{ - struct sk_buff *skb; - long timeout; - - skb = skb_peek(&queue->rx_queue); - if (!skb) - return MAX_SCHEDULE_TIMEOUT; - - timeout = XENVIF_RX_CB(skb)->expires - jiffies; - return timeout < 0 ? 0 : timeout; -} - -/* Wait until the guest Rx thread has work. - * - * The timeout needs to be adjusted based on the current head of the - * queue (and not just the head at the beginning). In particular, if - * the queue is initially empty an infinite timeout is used and this - * needs to be reduced when a skb is queued. - * - * This cannot be done with wait_event_timeout() because it only - * calculates the timeout once. - */ -static void xenvif_wait_for_rx_work(struct xenvif_queue *queue) -{ - DEFINE_WAIT(wait); - - if (xenvif_have_rx_work(queue)) - return; - - for (;;) { - long ret; - - prepare_to_wait(&queue->wq, &wait, TASK_INTERRUPTIBLE); - if (xenvif_have_rx_work(queue)) - break; - ret = schedule_timeout(xenvif_rx_queue_timeout(queue)); - if (!ret) - break; - } - finish_wait(&queue->wq, &wait); -} - -int xenvif_kthread_guest_rx(void *data) -{ - struct xenvif_queue *queue = data; - struct xenvif *vif = queue->vif; - - if (!vif->stall_timeout) - xenvif_queue_carrier_on(queue); - - for (;;) { - xenvif_wait_for_rx_work(queue); - - if (kthread_should_stop()) - break; - - /* This frontend is found to be rogue, disable it in - * kthread context. Currently this is only set when - * netback finds out frontend sends malformed packet, - * but we cannot disable the interface in softirq - * context so we defer it here, if this thread is - * associated with queue 0. - */ - if (unlikely(vif->disabled && queue->id == 0)) { - xenvif_carrier_off(vif); - break; - } - - if (!skb_queue_empty(&queue->rx_queue)) - xenvif_rx_action(queue); - - /* If the guest hasn't provided any Rx slots for a - * while it's probably not responsive, drop the - * carrier so packets are dropped earlier. - */ - if (vif->stall_timeout) { - if (xenvif_rx_queue_stalled(queue)) - xenvif_queue_carrier_off(queue); - else if (xenvif_rx_queue_ready(queue)) - xenvif_queue_carrier_on(queue); - } - - /* Queued packets may have foreign pages from other - * domains. These cannot be queued indefinitely as - * this would starve guests of grant refs and transmit - * slots. - */ - xenvif_rx_queue_drop_expired(queue); - - xenvif_rx_queue_maybe_wake(queue); - - cond_resched(); - } - - /* Bin any remaining skbs */ - xenvif_rx_queue_purge(queue); - - return 0; -} - static bool xenvif_dealloc_kthread_should_stop(struct xenvif_queue *queue) { /* Dealloc thread must remain running until all inflight diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c new file mode 100644 index 000000000000..03836aaac1c2 --- /dev/null +++ b/drivers/net/xen-netback/rx.c @@ -0,0 +1,789 @@ +/* + * Copyright (c) 2016 Citrix Systems Inc. + * Copyright (c) 2002-2005, K A Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "common.h" + +#include + +#include +#include + +static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) +{ + RING_IDX prod, cons; + struct sk_buff *skb; + int needed; + + skb = skb_peek(&queue->rx_queue); + if (!skb) + return false; + + needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE); + if (skb_is_gso(skb)) + needed++; + if (skb->sw_hash) + needed++; + + do { + prod = queue->rx.sring->req_prod; + cons = queue->rx.req_cons; + + if (prod - cons >= needed) + return true; + + queue->rx.sring->req_event = prod + 1; + + /* Make sure event is visible before we check prod + * again. + */ + mb(); + } while (queue->rx.sring->req_prod != prod); + + return false; +} + +void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) +{ + unsigned long flags; + + spin_lock_irqsave(&queue->rx_queue.lock, flags); + + __skb_queue_tail(&queue->rx_queue, skb); + + queue->rx_queue_len += skb->len; + if (queue->rx_queue_len > queue->rx_queue_max) { + struct net_device *dev = queue->vif->dev; + + netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id)); + } + + spin_unlock_irqrestore(&queue->rx_queue.lock, flags); +} + +static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue) +{ + struct sk_buff *skb; + + spin_lock_irq(&queue->rx_queue.lock); + + skb = __skb_dequeue(&queue->rx_queue); + if (skb) + queue->rx_queue_len -= skb->len; + + spin_unlock_irq(&queue->rx_queue.lock); + + return skb; +} + +static void xenvif_rx_queue_maybe_wake(struct xenvif_queue *queue) +{ + spin_lock_irq(&queue->rx_queue.lock); + + if (queue->rx_queue_len < queue->rx_queue_max) { + struct net_device *dev = queue->vif->dev; + + netif_tx_wake_queue(netdev_get_tx_queue(dev, queue->id)); + } + + spin_unlock_irq(&queue->rx_queue.lock); +} + +static void xenvif_rx_queue_purge(struct xenvif_queue *queue) +{ + struct sk_buff *skb; + + while ((skb = xenvif_rx_dequeue(queue)) != NULL) + kfree_skb(skb); +} + +static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue) +{ + struct sk_buff *skb; + + for (;;) { + skb = skb_peek(&queue->rx_queue); + if (!skb) + break; + if (time_before(jiffies, XENVIF_RX_CB(skb)->expires)) + break; + xenvif_rx_dequeue(queue); + kfree_skb(skb); + } +} + +struct netrx_pending_operations { + unsigned int copy_prod, copy_cons; + unsigned int meta_prod, meta_cons; + struct gnttab_copy *copy; + struct xenvif_rx_meta *meta; + int copy_off; + grant_ref_t copy_gref; +}; + +static struct xenvif_rx_meta *get_next_rx_buffer( + struct xenvif_queue *queue, + struct netrx_pending_operations *npo) +{ + struct xenvif_rx_meta *meta; + struct xen_netif_rx_request req; + + RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req); + + meta = npo->meta + npo->meta_prod++; + meta->gso_type = XEN_NETIF_GSO_TYPE_NONE; + meta->gso_size = 0; + meta->size = 0; + meta->id = req.id; + + npo->copy_off = 0; + npo->copy_gref = req.gref; + + return meta; +} + +struct gop_frag_copy { + struct xenvif_queue *queue; + struct netrx_pending_operations *npo; + struct xenvif_rx_meta *meta; + int head; + int gso_type; + int protocol; + int hash_present; + + struct page *page; +}; + +static void xenvif_setup_copy_gop(unsigned long gfn, + unsigned int offset, + unsigned int *len, + struct gop_frag_copy *info) +{ + struct gnttab_copy *copy_gop; + struct xen_page_foreign *foreign; + /* Convenient aliases */ + struct xenvif_queue *queue = info->queue; + struct netrx_pending_operations *npo = info->npo; + struct page *page = info->page; + + WARN_ON(npo->copy_off > MAX_BUFFER_OFFSET); + + if (npo->copy_off == MAX_BUFFER_OFFSET) + info->meta = get_next_rx_buffer(queue, npo); + + if (npo->copy_off + *len > MAX_BUFFER_OFFSET) + *len = MAX_BUFFER_OFFSET - npo->copy_off; + + copy_gop = npo->copy + npo->copy_prod++; + copy_gop->flags = GNTCOPY_dest_gref; + copy_gop->len = *len; + + foreign = xen_page_foreign(page); + if (foreign) { + copy_gop->source.domid = foreign->domid; + copy_gop->source.u.ref = foreign->gref; + copy_gop->flags |= GNTCOPY_source_gref; + } else { + copy_gop->source.domid = DOMID_SELF; + copy_gop->source.u.gmfn = gfn; + } + copy_gop->source.offset = offset; + + copy_gop->dest.domid = queue->vif->domid; + copy_gop->dest.offset = npo->copy_off; + copy_gop->dest.u.ref = npo->copy_gref; + + npo->copy_off += *len; + info->meta->size += *len; + + if (!info->head) + return; + + /* Leave a gap for the GSO descriptor. */ + if ((1 << info->gso_type) & queue->vif->gso_mask) + queue->rx.req_cons++; + + /* Leave a gap for the hash extra segment. */ + if (info->hash_present) + queue->rx.req_cons++; + + info->head = 0; /* There must be something in this buffer now */ +} + +static void xenvif_gop_frag_copy_grant(unsigned long gfn, + unsigned int offset, + unsigned int len, + void *data) +{ + unsigned int bytes; + + while (len) { + bytes = len; + xenvif_setup_copy_gop(gfn, offset, &bytes, data); + offset += bytes; + len -= bytes; + } +} + +/* Set up the grant operations for this fragment. If it's a flipping + * interface, we also set up the unmap request from here. + */ +static void xenvif_gop_frag_copy(struct xenvif_queue *queue, + struct sk_buff *skb, + struct netrx_pending_operations *npo, + struct page *page, unsigned long size, + unsigned long offset, int *head) +{ + struct gop_frag_copy info = { + .queue = queue, + .npo = npo, + .head = *head, + .gso_type = XEN_NETIF_GSO_TYPE_NONE, + /* xenvif_set_skb_hash() will have either set a s/w + * hash or cleared the hash depending on + * whether the the frontend wants a hash for this skb. + */ + .hash_present = skb->sw_hash, + }; + unsigned long bytes; + + if (skb_is_gso(skb)) { + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) + info.gso_type = XEN_NETIF_GSO_TYPE_TCPV4; + else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) + info.gso_type = XEN_NETIF_GSO_TYPE_TCPV6; + } + + /* Data must not cross a page boundary. */ + WARN_ON(size + offset > (PAGE_SIZE << compound_order(page))); + + info.meta = npo->meta + npo->meta_prod - 1; + + /* Skip unused frames from start of page */ + page += offset >> PAGE_SHIFT; + offset &= ~PAGE_MASK; + + while (size > 0) { + WARN_ON(offset >= PAGE_SIZE); + + bytes = PAGE_SIZE - offset; + if (bytes > size) + bytes = size; + + info.page = page; + gnttab_foreach_grant_in_range(page, offset, bytes, + xenvif_gop_frag_copy_grant, + &info); + size -= bytes; + offset = 0; + + /* Next page */ + if (size) { + WARN_ON(!PageCompound(page)); + page++; + } + } + + *head = info.head; +} + +/* Prepare an SKB to be transmitted to the frontend. + * + * This function is responsible for allocating grant operations, meta + * structures, etc. + * + * It returns the number of meta structures consumed. The number of + * ring slots used is always equal to the number of meta slots used + * plus the number of GSO descriptors used. Currently, we use either + * zero GSO descriptors (for non-GSO packets) or one descriptor (for + * frontend-side LRO). + */ +static int xenvif_gop_skb(struct sk_buff *skb, + struct netrx_pending_operations *npo, + struct xenvif_queue *queue) +{ + struct xenvif *vif = netdev_priv(skb->dev); + int nr_frags = skb_shinfo(skb)->nr_frags; + int i; + struct xen_netif_rx_request req; + struct xenvif_rx_meta *meta; + unsigned char *data; + int head = 1; + int old_meta_prod; + int gso_type; + + old_meta_prod = npo->meta_prod; + + gso_type = XEN_NETIF_GSO_TYPE_NONE; + if (skb_is_gso(skb)) { + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) + gso_type = XEN_NETIF_GSO_TYPE_TCPV4; + else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) + gso_type = XEN_NETIF_GSO_TYPE_TCPV6; + } + + /* Set up a GSO prefix descriptor, if necessary */ + if ((1 << gso_type) & vif->gso_prefix_mask) { + RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req); + meta = npo->meta + npo->meta_prod++; + meta->gso_type = gso_type; + meta->gso_size = skb_shinfo(skb)->gso_size; + meta->size = 0; + meta->id = req.id; + } + + RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req); + meta = npo->meta + npo->meta_prod++; + + if ((1 << gso_type) & vif->gso_mask) { + meta->gso_type = gso_type; + meta->gso_size = skb_shinfo(skb)->gso_size; + } else { + meta->gso_type = XEN_NETIF_GSO_TYPE_NONE; + meta->gso_size = 0; + } + + meta->size = 0; + meta->id = req.id; + npo->copy_off = 0; + npo->copy_gref = req.gref; + + data = skb->data; + while (data < skb_tail_pointer(skb)) { + unsigned int offset = offset_in_page(data); + unsigned int len = PAGE_SIZE - offset; + + if (data + len > skb_tail_pointer(skb)) + len = skb_tail_pointer(skb) - data; + + xenvif_gop_frag_copy(queue, skb, npo, + virt_to_page(data), len, offset, &head); + data += len; + } + + for (i = 0; i < nr_frags; i++) { + xenvif_gop_frag_copy(queue, skb, npo, + skb_frag_page(&skb_shinfo(skb)->frags[i]), + skb_frag_size(&skb_shinfo(skb)->frags[i]), + skb_shinfo(skb)->frags[i].page_offset, + &head); + } + + return npo->meta_prod - old_meta_prod; +} + +/* This is a twin to xenvif_gop_skb. Assume that xenvif_gop_skb was + * used to set up the operations on the top of + * netrx_pending_operations, which have since been done. Check that + * they didn't give any errors and advance over them. + */ +static int xenvif_check_gop(struct xenvif *vif, int nr_meta_slots, + struct netrx_pending_operations *npo) +{ + struct gnttab_copy *copy_op; + int status = XEN_NETIF_RSP_OKAY; + int i; + + for (i = 0; i < nr_meta_slots; i++) { + copy_op = npo->copy + npo->copy_cons++; + if (copy_op->status != GNTST_okay) { + netdev_dbg(vif->dev, + "Bad status %d from copy to DOM%d.\n", + copy_op->status, vif->domid); + status = XEN_NETIF_RSP_ERROR; + } + } + + return status; +} + +static struct xen_netif_rx_response *make_rx_response( + struct xenvif_queue *queue, u16 id, s8 st, u16 offset, u16 size, + u16 flags) +{ + RING_IDX i = queue->rx.rsp_prod_pvt; + struct xen_netif_rx_response *resp; + + resp = RING_GET_RESPONSE(&queue->rx, i); + resp->offset = offset; + resp->flags = flags; + resp->id = id; + resp->status = (s16)size; + if (st < 0) + resp->status = (s16)st; + + queue->rx.rsp_prod_pvt = ++i; + + return resp; +} + +static void xenvif_add_frag_responses(struct xenvif_queue *queue, + int status, + struct xenvif_rx_meta *meta, + int nr_meta_slots) +{ + int i; + unsigned long offset; + + /* No fragments used */ + if (nr_meta_slots <= 1) + return; + + nr_meta_slots--; + + for (i = 0; i < nr_meta_slots; i++) { + int flags; + + if (i == nr_meta_slots - 1) + flags = 0; + else + flags = XEN_NETRXF_more_data; + + offset = 0; + make_rx_response(queue, meta[i].id, status, offset, + meta[i].size, flags); + } +} + +static void xenvif_rx_action(struct xenvif_queue *queue) +{ + struct xenvif *vif = queue->vif; + s8 status; + u16 flags; + struct xen_netif_rx_response *resp; + struct sk_buff_head rxq; + struct sk_buff *skb; + LIST_HEAD(notify); + int ret; + unsigned long offset; + bool need_to_notify = false; + + struct netrx_pending_operations npo = { + .copy = queue->grant_copy_op, + .meta = queue->meta, + }; + + skb_queue_head_init(&rxq); + + while (xenvif_rx_ring_slots_available(queue) && + (skb = xenvif_rx_dequeue(queue)) != NULL) { + queue->last_rx_time = jiffies; + + XENVIF_RX_CB(skb)->meta_slots_used = + xenvif_gop_skb(skb, &npo, queue); + + __skb_queue_tail(&rxq, skb); + } + + WARN_ON(npo.meta_prod > ARRAY_SIZE(queue->meta)); + + if (!npo.copy_prod) + goto done; + + WARN_ON(npo.copy_prod > MAX_GRANT_COPY_OPS); + gnttab_batch_copy(queue->grant_copy_op, npo.copy_prod); + + while ((skb = __skb_dequeue(&rxq)) != NULL) { + struct xen_netif_extra_info *extra = NULL; + + if ((1 << queue->meta[npo.meta_cons].gso_type) & + vif->gso_prefix_mask) { + resp = RING_GET_RESPONSE(&queue->rx, + queue->rx.rsp_prod_pvt++); + + resp->flags = XEN_NETRXF_gso_prefix | + XEN_NETRXF_more_data; + + resp->offset = queue->meta[npo.meta_cons].gso_size; + resp->id = queue->meta[npo.meta_cons].id; + resp->status = XENVIF_RX_CB(skb)->meta_slots_used; + + npo.meta_cons++; + XENVIF_RX_CB(skb)->meta_slots_used--; + } + + queue->stats.tx_bytes += skb->len; + queue->stats.tx_packets++; + + status = xenvif_check_gop(vif, + XENVIF_RX_CB(skb)->meta_slots_used, + &npo); + + if (XENVIF_RX_CB(skb)->meta_slots_used == 1) + flags = 0; + else + flags = XEN_NETRXF_more_data; + + if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */ + flags |= XEN_NETRXF_csum_blank | + XEN_NETRXF_data_validated; + else if (skb->ip_summed == CHECKSUM_UNNECESSARY) + /* remote but checksummed. */ + flags |= XEN_NETRXF_data_validated; + + offset = 0; + resp = make_rx_response(queue, queue->meta[npo.meta_cons].id, + status, offset, + queue->meta[npo.meta_cons].size, + flags); + + if ((1 << queue->meta[npo.meta_cons].gso_type) & + vif->gso_mask) { + extra = (struct xen_netif_extra_info *) + RING_GET_RESPONSE(&queue->rx, + queue->rx.rsp_prod_pvt++); + + resp->flags |= XEN_NETRXF_extra_info; + + extra->u.gso.type = queue->meta[npo.meta_cons].gso_type; + extra->u.gso.size = queue->meta[npo.meta_cons].gso_size; + extra->u.gso.pad = 0; + extra->u.gso.features = 0; + + extra->type = XEN_NETIF_EXTRA_TYPE_GSO; + extra->flags = 0; + } + + if (skb->sw_hash) { + /* Since the skb got here via xenvif_select_queue() + * we know that the hash has been re-calculated + * according to a configuration set by the frontend + * and therefore we know that it is legitimate to + * pass it to the frontend. + */ + if (resp->flags & XEN_NETRXF_extra_info) + extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE; + else + resp->flags |= XEN_NETRXF_extra_info; + + extra = (struct xen_netif_extra_info *) + RING_GET_RESPONSE(&queue->rx, + queue->rx.rsp_prod_pvt++); + + extra->u.hash.algorithm = + XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ; + + if (skb->l4_hash) + extra->u.hash.type = + skb->protocol == htons(ETH_P_IP) ? + _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP : + _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP; + else + extra->u.hash.type = + skb->protocol == htons(ETH_P_IP) ? + _XEN_NETIF_CTRL_HASH_TYPE_IPV4 : + _XEN_NETIF_CTRL_HASH_TYPE_IPV6; + + *(uint32_t *)extra->u.hash.value = + skb_get_hash_raw(skb); + + extra->type = XEN_NETIF_EXTRA_TYPE_HASH; + extra->flags = 0; + } + + xenvif_add_frag_responses(queue, status, + queue->meta + npo.meta_cons + 1, + XENVIF_RX_CB(skb)->meta_slots_used); + + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->rx, ret); + + need_to_notify |= !!ret; + + npo.meta_cons += XENVIF_RX_CB(skb)->meta_slots_used; + dev_kfree_skb(skb); + } + +done: + if (need_to_notify) + notify_remote_via_irq(queue->rx_irq); +} + +static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue) +{ + RING_IDX prod, cons; + + prod = queue->rx.sring->req_prod; + cons = queue->rx.req_cons; + + return !queue->stalled && + prod - cons < 1 && + time_after(jiffies, + queue->last_rx_time + queue->vif->stall_timeout); +} + +static bool xenvif_rx_queue_ready(struct xenvif_queue *queue) +{ + RING_IDX prod, cons; + + prod = queue->rx.sring->req_prod; + cons = queue->rx.req_cons; + + return queue->stalled && prod - cons >= 1; +} + +static bool xenvif_have_rx_work(struct xenvif_queue *queue) +{ + return xenvif_rx_ring_slots_available(queue) || + (queue->vif->stall_timeout && + (xenvif_rx_queue_stalled(queue) || + xenvif_rx_queue_ready(queue))) || + kthread_should_stop() || + queue->vif->disabled; +} + +static long xenvif_rx_queue_timeout(struct xenvif_queue *queue) +{ + struct sk_buff *skb; + long timeout; + + skb = skb_peek(&queue->rx_queue); + if (!skb) + return MAX_SCHEDULE_TIMEOUT; + + timeout = XENVIF_RX_CB(skb)->expires - jiffies; + return timeout < 0 ? 0 : timeout; +} + +/* Wait until the guest Rx thread has work. + * + * The timeout needs to be adjusted based on the current head of the + * queue (and not just the head at the beginning). In particular, if + * the queue is initially empty an infinite timeout is used and this + * needs to be reduced when a skb is queued. + * + * This cannot be done with wait_event_timeout() because it only + * calculates the timeout once. + */ +static void xenvif_wait_for_rx_work(struct xenvif_queue *queue) +{ + DEFINE_WAIT(wait); + + if (xenvif_have_rx_work(queue)) + return; + + for (;;) { + long ret; + + prepare_to_wait(&queue->wq, &wait, TASK_INTERRUPTIBLE); + if (xenvif_have_rx_work(queue)) + break; + ret = schedule_timeout(xenvif_rx_queue_timeout(queue)); + if (!ret) + break; + } + finish_wait(&queue->wq, &wait); +} + +static void xenvif_queue_carrier_off(struct xenvif_queue *queue) +{ + struct xenvif *vif = queue->vif; + + queue->stalled = true; + + /* At least one queue has stalled? Disable the carrier. */ + spin_lock(&vif->lock); + if (vif->stalled_queues++ == 0) { + netdev_info(vif->dev, "Guest Rx stalled"); + netif_carrier_off(vif->dev); + } + spin_unlock(&vif->lock); +} + +static void xenvif_queue_carrier_on(struct xenvif_queue *queue) +{ + struct xenvif *vif = queue->vif; + + queue->last_rx_time = jiffies; /* Reset Rx stall detection. */ + queue->stalled = false; + + /* All queues are ready? Enable the carrier. */ + spin_lock(&vif->lock); + if (--vif->stalled_queues == 0) { + netdev_info(vif->dev, "Guest Rx ready"); + netif_carrier_on(vif->dev); + } + spin_unlock(&vif->lock); +} + +int xenvif_kthread_guest_rx(void *data) +{ + struct xenvif_queue *queue = data; + struct xenvif *vif = queue->vif; + + if (!vif->stall_timeout) + xenvif_queue_carrier_on(queue); + + for (;;) { + xenvif_wait_for_rx_work(queue); + + if (kthread_should_stop()) + break; + + /* This frontend is found to be rogue, disable it in + * kthread context. Currently this is only set when + * netback finds out frontend sends malformed packet, + * but we cannot disable the interface in softirq + * context so we defer it here, if this thread is + * associated with queue 0. + */ + if (unlikely(vif->disabled && queue->id == 0)) { + xenvif_carrier_off(vif); + break; + } + + if (!skb_queue_empty(&queue->rx_queue)) + xenvif_rx_action(queue); + + /* If the guest hasn't provided any Rx slots for a + * while it's probably not responsive, drop the + * carrier so packets are dropped earlier. + */ + if (vif->stall_timeout) { + if (xenvif_rx_queue_stalled(queue)) + xenvif_queue_carrier_off(queue); + else if (xenvif_rx_queue_ready(queue)) + xenvif_queue_carrier_on(queue); + } + + /* Queued packets may have foreign pages from other + * domains. These cannot be queued indefinitely as + * this would starve guests of grant refs and transmit + * slots. + */ + xenvif_rx_queue_drop_expired(queue); + + xenvif_rx_queue_maybe_wake(queue); + + cond_resched(); + } + + /* Bin any remaining skbs */ + xenvif_rx_queue_purge(queue); + + return 0; +} From fedbc8c132bcf836358103195d8b6df6c03d9daf Mon Sep 17 00:00:00 2001 From: Paul Durrant Date: Tue, 4 Oct 2016 10:29:13 +0100 Subject: [PATCH 32/65] xen-netback: retire guest rx side prefix GSO feature As far as I am aware only very old Windows network frontends make use of this style of passing GSO packets from backend to frontend. These frontends can easily be replaced by the freely available Xen Project Windows PV network frontend, which uses the 'default' mechanism for passing GSO packets, which is also used by all Linux frontends. NOTE: Removal of this feature will not cause breakage in old Windows frontends. They simply will no longer receive GSO packets - the packets instead being fragmented in the backend. Signed-off-by: Paul Durrant Reviewed-by: David Vrabel Signed-off-by: David S. Miller --- drivers/net/xen-netback/common.h | 1 - drivers/net/xen-netback/interface.c | 4 ++-- drivers/net/xen-netback/rx.c | 26 -------------------------- drivers/net/xen-netback/xenbus.c | 21 --------------------- 4 files changed, 2 insertions(+), 50 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index b38fb2cf3364..0ba59106b1a5 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -260,7 +260,6 @@ struct xenvif { /* Frontend feature information. */ int gso_mask; - int gso_prefix_mask; u8 can_sg:1; u8 ip_csum:1; diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index fb50c6d5f6c3..211d542a830b 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -319,9 +319,9 @@ static netdev_features_t xenvif_fix_features(struct net_device *dev, if (!vif->can_sg) features &= ~NETIF_F_SG; - if (~(vif->gso_mask | vif->gso_prefix_mask) & GSO_BIT(TCPV4)) + if (~(vif->gso_mask) & GSO_BIT(TCPV4)) features &= ~NETIF_F_TSO; - if (~(vif->gso_mask | vif->gso_prefix_mask) & GSO_BIT(TCPV6)) + if (~(vif->gso_mask) & GSO_BIT(TCPV6)) features &= ~NETIF_F_TSO6; if (!vif->ip_csum) features &= ~NETIF_F_IP_CSUM; diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index 03836aaac1c2..6bd7d6e84b8e 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -347,16 +347,6 @@ static int xenvif_gop_skb(struct sk_buff *skb, gso_type = XEN_NETIF_GSO_TYPE_TCPV6; } - /* Set up a GSO prefix descriptor, if necessary */ - if ((1 << gso_type) & vif->gso_prefix_mask) { - RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req); - meta = npo->meta + npo->meta_prod++; - meta->gso_type = gso_type; - meta->gso_size = skb_shinfo(skb)->gso_size; - meta->size = 0; - meta->id = req.id; - } - RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req); meta = npo->meta + npo->meta_prod++; @@ -511,22 +501,6 @@ static void xenvif_rx_action(struct xenvif_queue *queue) while ((skb = __skb_dequeue(&rxq)) != NULL) { struct xen_netif_extra_info *extra = NULL; - if ((1 << queue->meta[npo.meta_cons].gso_type) & - vif->gso_prefix_mask) { - resp = RING_GET_RESPONSE(&queue->rx, - queue->rx.rsp_prod_pvt++); - - resp->flags = XEN_NETRXF_gso_prefix | - XEN_NETRXF_more_data; - - resp->offset = queue->meta[npo.meta_cons].gso_size; - resp->id = queue->meta[npo.meta_cons].id; - resp->status = XENVIF_RX_CB(skb)->meta_slots_used; - - npo.meta_cons++; - XENVIF_RX_CB(skb)->meta_slots_used--; - } - queue->stats.tx_bytes += skb->len; queue->stats.tx_packets++; diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index daf4c7867102..7056404e3cb8 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -1135,7 +1135,6 @@ static int read_xenbus_vif_flags(struct backend_info *be) vif->can_sg = !!val; vif->gso_mask = 0; - vif->gso_prefix_mask = 0; if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4", "%d", &val) < 0) @@ -1143,32 +1142,12 @@ static int read_xenbus_vif_flags(struct backend_info *be) if (val) vif->gso_mask |= GSO_BIT(TCPV4); - if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix", - "%d", &val) < 0) - val = 0; - if (val) - vif->gso_prefix_mask |= GSO_BIT(TCPV4); - if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv6", "%d", &val) < 0) val = 0; if (val) vif->gso_mask |= GSO_BIT(TCPV6); - if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv6-prefix", - "%d", &val) < 0) - val = 0; - if (val) - vif->gso_prefix_mask |= GSO_BIT(TCPV6); - - if (vif->gso_mask & vif->gso_prefix_mask) { - xenbus_dev_fatal(dev, err, - "%s: gso and gso prefix flags are not " - "mutually exclusive", - dev->otherend); - return -EOPNOTSUPP; - } - if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload", "%d", &val) < 0) val = 0; From eb1723a29b9a75dd787510a39096a68dba6cc200 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Tue, 4 Oct 2016 10:29:14 +0100 Subject: [PATCH 33/65] xen-netback: refactor guest rx Refactor the to-guest (rx) path to: 1. Push responses for completed skbs earlier, reducing latency. 2. Reduce the per-queue memory overhead by greatly reducing the maximum number of grant copy ops in each hypercall (from 4352 to 64). Each struct xenvif_queue is now only 44 kB instead of 220 kB. 3. Make the code more maintainable. Signed-off-by: David Vrabel [re-based] Signed-off-by: Paul Durrant Signed-off-by: David S. Miller --- drivers/net/xen-netback/common.h | 23 +- drivers/net/xen-netback/rx.c | 654 ++++++++++++------------------- 2 files changed, 254 insertions(+), 423 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 0ba59106b1a5..7d12a388afc6 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -91,13 +91,6 @@ struct xenvif_rx_meta { */ #define MAX_XEN_SKB_FRAGS (65536 / XEN_PAGE_SIZE + 1) -/* It's possible for an skb to have a maximal number of frags - * but still be less than MAX_BUFFER_OFFSET in size. Thus the - * worst-case number of copy operations is MAX_XEN_SKB_FRAGS per - * ring slot. - */ -#define MAX_GRANT_COPY_OPS (MAX_XEN_SKB_FRAGS * XEN_NETIF_RX_RING_SIZE) - #define NETBACK_INVALID_HANDLE -1 /* To avoid confusion, we define XEN_NETBK_LEGACY_SLOTS_MAX indicating @@ -133,6 +126,14 @@ struct xenvif_stats { unsigned long tx_frag_overflow; }; +#define COPY_BATCH_SIZE 64 + +struct xenvif_copy_state { + struct gnttab_copy op[COPY_BATCH_SIZE]; + RING_IDX idx[COPY_BATCH_SIZE]; + unsigned int num; +}; + struct xenvif_queue { /* Per-queue data for xenvif */ unsigned int id; /* Queue ID, 0-based */ char name[QUEUE_NAME_SIZE]; /* DEVNAME-qN */ @@ -189,12 +190,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */ unsigned long last_rx_time; bool stalled; - struct gnttab_copy grant_copy_op[MAX_GRANT_COPY_OPS]; - - /* We create one meta structure per ring request we consume, so - * the maximum number is the same as the ring size. - */ - struct xenvif_rx_meta meta[XEN_NETIF_RX_RING_SIZE]; + struct xenvif_copy_state rx_copy; /* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */ unsigned long credit_bytes; @@ -358,6 +354,7 @@ int xenvif_dealloc_kthread(void *data); irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data); +void xenvif_rx_action(struct xenvif_queue *queue); void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); void xenvif_carrier_on(struct xenvif *vif); diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index 6bd7d6e84b8e..b0ce4c6e9b21 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -26,7 +26,6 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ - #include "common.h" #include @@ -137,464 +136,299 @@ static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue) } } -struct netrx_pending_operations { - unsigned int copy_prod, copy_cons; - unsigned int meta_prod, meta_cons; - struct gnttab_copy *copy; - struct xenvif_rx_meta *meta; - int copy_off; - grant_ref_t copy_gref; -}; - -static struct xenvif_rx_meta *get_next_rx_buffer( - struct xenvif_queue *queue, - struct netrx_pending_operations *npo) +static void xenvif_rx_copy_flush(struct xenvif_queue *queue) { - struct xenvif_rx_meta *meta; - struct xen_netif_rx_request req; + unsigned int i; - RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req); + gnttab_batch_copy(queue->rx_copy.op, queue->rx_copy.num); - meta = npo->meta + npo->meta_prod++; - meta->gso_type = XEN_NETIF_GSO_TYPE_NONE; - meta->gso_size = 0; - meta->size = 0; - meta->id = req.id; + for (i = 0; i < queue->rx_copy.num; i++) { + struct gnttab_copy *op; - npo->copy_off = 0; - npo->copy_gref = req.gref; + op = &queue->rx_copy.op[i]; - return meta; + /* If the copy failed, overwrite the status field in + * the corresponding response. + */ + if (unlikely(op->status != GNTST_okay)) { + struct xen_netif_rx_response *rsp; + + rsp = RING_GET_RESPONSE(&queue->rx, + queue->rx_copy.idx[i]); + rsp->status = op->status; + } + } + + queue->rx_copy.num = 0; } -struct gop_frag_copy { - struct xenvif_queue *queue; - struct netrx_pending_operations *npo; - struct xenvif_rx_meta *meta; - int head; - int gso_type; - int protocol; - int hash_present; - - struct page *page; -}; - -static void xenvif_setup_copy_gop(unsigned long gfn, - unsigned int offset, - unsigned int *len, - struct gop_frag_copy *info) +static void xenvif_rx_copy_add(struct xenvif_queue *queue, + struct xen_netif_rx_request *req, + unsigned int offset, void *data, size_t len) { - struct gnttab_copy *copy_gop; + struct gnttab_copy *op; + struct page *page; struct xen_page_foreign *foreign; - /* Convenient aliases */ - struct xenvif_queue *queue = info->queue; - struct netrx_pending_operations *npo = info->npo; - struct page *page = info->page; - WARN_ON(npo->copy_off > MAX_BUFFER_OFFSET); + if (queue->rx_copy.num == COPY_BATCH_SIZE) + xenvif_rx_copy_flush(queue); - if (npo->copy_off == MAX_BUFFER_OFFSET) - info->meta = get_next_rx_buffer(queue, npo); + op = &queue->rx_copy.op[queue->rx_copy.num]; - if (npo->copy_off + *len > MAX_BUFFER_OFFSET) - *len = MAX_BUFFER_OFFSET - npo->copy_off; + page = virt_to_page(data); - copy_gop = npo->copy + npo->copy_prod++; - copy_gop->flags = GNTCOPY_dest_gref; - copy_gop->len = *len; + op->flags = GNTCOPY_dest_gref; foreign = xen_page_foreign(page); if (foreign) { - copy_gop->source.domid = foreign->domid; - copy_gop->source.u.ref = foreign->gref; - copy_gop->flags |= GNTCOPY_source_gref; + op->source.domid = foreign->domid; + op->source.u.ref = foreign->gref; + op->flags |= GNTCOPY_source_gref; } else { - copy_gop->source.domid = DOMID_SELF; - copy_gop->source.u.gmfn = gfn; + op->source.u.gmfn = virt_to_gfn(data); + op->source.domid = DOMID_SELF; } - copy_gop->source.offset = offset; - copy_gop->dest.domid = queue->vif->domid; - copy_gop->dest.offset = npo->copy_off; - copy_gop->dest.u.ref = npo->copy_gref; + op->source.offset = xen_offset_in_page(data); + op->dest.u.ref = req->gref; + op->dest.domid = queue->vif->domid; + op->dest.offset = offset; + op->len = len; - npo->copy_off += *len; - info->meta->size += *len; - - if (!info->head) - return; - - /* Leave a gap for the GSO descriptor. */ - if ((1 << info->gso_type) & queue->vif->gso_mask) - queue->rx.req_cons++; - - /* Leave a gap for the hash extra segment. */ - if (info->hash_present) - queue->rx.req_cons++; - - info->head = 0; /* There must be something in this buffer now */ + queue->rx_copy.idx[queue->rx_copy.num] = queue->rx.req_cons; + queue->rx_copy.num++; } -static void xenvif_gop_frag_copy_grant(unsigned long gfn, - unsigned int offset, - unsigned int len, - void *data) +static unsigned int xenvif_gso_type(struct sk_buff *skb) { - unsigned int bytes; - - while (len) { - bytes = len; - xenvif_setup_copy_gop(gfn, offset, &bytes, data); - offset += bytes; - len -= bytes; - } -} - -/* Set up the grant operations for this fragment. If it's a flipping - * interface, we also set up the unmap request from here. - */ -static void xenvif_gop_frag_copy(struct xenvif_queue *queue, - struct sk_buff *skb, - struct netrx_pending_operations *npo, - struct page *page, unsigned long size, - unsigned long offset, int *head) -{ - struct gop_frag_copy info = { - .queue = queue, - .npo = npo, - .head = *head, - .gso_type = XEN_NETIF_GSO_TYPE_NONE, - /* xenvif_set_skb_hash() will have either set a s/w - * hash or cleared the hash depending on - * whether the the frontend wants a hash for this skb. - */ - .hash_present = skb->sw_hash, - }; - unsigned long bytes; - if (skb_is_gso(skb)) { if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) - info.gso_type = XEN_NETIF_GSO_TYPE_TCPV4; - else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) - info.gso_type = XEN_NETIF_GSO_TYPE_TCPV6; - } - - /* Data must not cross a page boundary. */ - WARN_ON(size + offset > (PAGE_SIZE << compound_order(page))); - - info.meta = npo->meta + npo->meta_prod - 1; - - /* Skip unused frames from start of page */ - page += offset >> PAGE_SHIFT; - offset &= ~PAGE_MASK; - - while (size > 0) { - WARN_ON(offset >= PAGE_SIZE); - - bytes = PAGE_SIZE - offset; - if (bytes > size) - bytes = size; - - info.page = page; - gnttab_foreach_grant_in_range(page, offset, bytes, - xenvif_gop_frag_copy_grant, - &info); - size -= bytes; - offset = 0; - - /* Next page */ - if (size) { - WARN_ON(!PageCompound(page)); - page++; - } - } - - *head = info.head; -} - -/* Prepare an SKB to be transmitted to the frontend. - * - * This function is responsible for allocating grant operations, meta - * structures, etc. - * - * It returns the number of meta structures consumed. The number of - * ring slots used is always equal to the number of meta slots used - * plus the number of GSO descriptors used. Currently, we use either - * zero GSO descriptors (for non-GSO packets) or one descriptor (for - * frontend-side LRO). - */ -static int xenvif_gop_skb(struct sk_buff *skb, - struct netrx_pending_operations *npo, - struct xenvif_queue *queue) -{ - struct xenvif *vif = netdev_priv(skb->dev); - int nr_frags = skb_shinfo(skb)->nr_frags; - int i; - struct xen_netif_rx_request req; - struct xenvif_rx_meta *meta; - unsigned char *data; - int head = 1; - int old_meta_prod; - int gso_type; - - old_meta_prod = npo->meta_prod; - - gso_type = XEN_NETIF_GSO_TYPE_NONE; - if (skb_is_gso(skb)) { - if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) - gso_type = XEN_NETIF_GSO_TYPE_TCPV4; - else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) - gso_type = XEN_NETIF_GSO_TYPE_TCPV6; - } - - RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req); - meta = npo->meta + npo->meta_prod++; - - if ((1 << gso_type) & vif->gso_mask) { - meta->gso_type = gso_type; - meta->gso_size = skb_shinfo(skb)->gso_size; - } else { - meta->gso_type = XEN_NETIF_GSO_TYPE_NONE; - meta->gso_size = 0; - } - - meta->size = 0; - meta->id = req.id; - npo->copy_off = 0; - npo->copy_gref = req.gref; - - data = skb->data; - while (data < skb_tail_pointer(skb)) { - unsigned int offset = offset_in_page(data); - unsigned int len = PAGE_SIZE - offset; - - if (data + len > skb_tail_pointer(skb)) - len = skb_tail_pointer(skb) - data; - - xenvif_gop_frag_copy(queue, skb, npo, - virt_to_page(data), len, offset, &head); - data += len; - } - - for (i = 0; i < nr_frags; i++) { - xenvif_gop_frag_copy(queue, skb, npo, - skb_frag_page(&skb_shinfo(skb)->frags[i]), - skb_frag_size(&skb_shinfo(skb)->frags[i]), - skb_shinfo(skb)->frags[i].page_offset, - &head); - } - - return npo->meta_prod - old_meta_prod; -} - -/* This is a twin to xenvif_gop_skb. Assume that xenvif_gop_skb was - * used to set up the operations on the top of - * netrx_pending_operations, which have since been done. Check that - * they didn't give any errors and advance over them. - */ -static int xenvif_check_gop(struct xenvif *vif, int nr_meta_slots, - struct netrx_pending_operations *npo) -{ - struct gnttab_copy *copy_op; - int status = XEN_NETIF_RSP_OKAY; - int i; - - for (i = 0; i < nr_meta_slots; i++) { - copy_op = npo->copy + npo->copy_cons++; - if (copy_op->status != GNTST_okay) { - netdev_dbg(vif->dev, - "Bad status %d from copy to DOM%d.\n", - copy_op->status, vif->domid); - status = XEN_NETIF_RSP_ERROR; - } - } - - return status; -} - -static struct xen_netif_rx_response *make_rx_response( - struct xenvif_queue *queue, u16 id, s8 st, u16 offset, u16 size, - u16 flags) -{ - RING_IDX i = queue->rx.rsp_prod_pvt; - struct xen_netif_rx_response *resp; - - resp = RING_GET_RESPONSE(&queue->rx, i); - resp->offset = offset; - resp->flags = flags; - resp->id = id; - resp->status = (s16)size; - if (st < 0) - resp->status = (s16)st; - - queue->rx.rsp_prod_pvt = ++i; - - return resp; -} - -static void xenvif_add_frag_responses(struct xenvif_queue *queue, - int status, - struct xenvif_rx_meta *meta, - int nr_meta_slots) -{ - int i; - unsigned long offset; - - /* No fragments used */ - if (nr_meta_slots <= 1) - return; - - nr_meta_slots--; - - for (i = 0; i < nr_meta_slots; i++) { - int flags; - - if (i == nr_meta_slots - 1) - flags = 0; + return XEN_NETIF_GSO_TYPE_TCPV4; else - flags = XEN_NETRXF_more_data; - - offset = 0; - make_rx_response(queue, meta[i].id, status, offset, - meta[i].size, flags); + return XEN_NETIF_GSO_TYPE_TCPV6; } + return XEN_NETIF_GSO_TYPE_NONE; } -static void xenvif_rx_action(struct xenvif_queue *queue) -{ - struct xenvif *vif = queue->vif; - s8 status; - u16 flags; - struct xen_netif_rx_response *resp; - struct sk_buff_head rxq; +struct xenvif_pkt_state { struct sk_buff *skb; - LIST_HEAD(notify); - int ret; - unsigned long offset; - bool need_to_notify = false; + size_t remaining_len; + int frag; /* frag == -1 => skb->head */ + unsigned int frag_offset; + struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; + unsigned int extra_count; + unsigned int slot; +}; - struct netrx_pending_operations npo = { - .copy = queue->grant_copy_op, - .meta = queue->meta, - }; +static void xenvif_rx_next_skb(struct xenvif_queue *queue, + struct xenvif_pkt_state *pkt) +{ + struct sk_buff *skb; + unsigned int gso_type; - skb_queue_head_init(&rxq); + skb = xenvif_rx_dequeue(queue); - while (xenvif_rx_ring_slots_available(queue) && - (skb = xenvif_rx_dequeue(queue)) != NULL) { - queue->last_rx_time = jiffies; + queue->stats.tx_bytes += skb->len; + queue->stats.tx_packets++; - XENVIF_RX_CB(skb)->meta_slots_used = - xenvif_gop_skb(skb, &npo, queue); + /* Reset packet state. */ + memset(pkt, 0, sizeof(struct xenvif_pkt_state)); - __skb_queue_tail(&rxq, skb); + pkt->skb = skb; + pkt->remaining_len = skb->len; + pkt->frag = -1; + + gso_type = xenvif_gso_type(skb); + if ((1 << gso_type) & queue->vif->gso_mask) { + struct xen_netif_extra_info *extra; + + extra = &pkt->extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; + + extra->u.gso.type = gso_type; + extra->u.gso.size = skb_shinfo(skb)->gso_size; + extra->u.gso.pad = 0; + extra->u.gso.features = 0; + extra->type = XEN_NETIF_EXTRA_TYPE_GSO; + extra->flags = 0; + + pkt->extra_count++; } - WARN_ON(npo.meta_prod > ARRAY_SIZE(queue->meta)); + if (skb->sw_hash) { + struct xen_netif_extra_info *extra; - if (!npo.copy_prod) - goto done; + extra = &pkt->extras[XEN_NETIF_EXTRA_TYPE_HASH - 1]; - WARN_ON(npo.copy_prod > MAX_GRANT_COPY_OPS); - gnttab_batch_copy(queue->grant_copy_op, npo.copy_prod); + extra->u.hash.algorithm = + XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ; - while ((skb = __skb_dequeue(&rxq)) != NULL) { - struct xen_netif_extra_info *extra = NULL; - - queue->stats.tx_bytes += skb->len; - queue->stats.tx_packets++; - - status = xenvif_check_gop(vif, - XENVIF_RX_CB(skb)->meta_slots_used, - &npo); - - if (XENVIF_RX_CB(skb)->meta_slots_used == 1) - flags = 0; + if (skb->l4_hash) + extra->u.hash.type = + skb->protocol == htons(ETH_P_IP) ? + _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP : + _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP; else - flags = XEN_NETRXF_more_data; + extra->u.hash.type = + skb->protocol == htons(ETH_P_IP) ? + _XEN_NETIF_CTRL_HASH_TYPE_IPV4 : + _XEN_NETIF_CTRL_HASH_TYPE_IPV6; - if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */ + *(uint32_t *)extra->u.hash.value = skb_get_hash_raw(skb); + + extra->type = XEN_NETIF_EXTRA_TYPE_HASH; + extra->flags = 0; + + pkt->extra_count++; + } +} + +static void xenvif_rx_complete(struct xenvif_queue *queue, + struct xenvif_pkt_state *pkt) +{ + int notify; + + /* Complete any outstanding copy ops for this skb. */ + xenvif_rx_copy_flush(queue); + + /* Push responses and notify. */ + queue->rx.rsp_prod_pvt = queue->rx.req_cons; + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->rx, notify); + if (notify) + notify_remote_via_irq(queue->rx_irq); + + dev_kfree_skb(pkt->skb); +} + +static void xenvif_rx_next_chunk(struct xenvif_queue *queue, + struct xenvif_pkt_state *pkt, + unsigned int offset, void **data, + size_t *len) +{ + struct sk_buff *skb = pkt->skb; + void *frag_data; + size_t frag_len, chunk_len; + + if (pkt->frag == -1) { + frag_data = skb->data; + frag_len = skb_headlen(skb); + } else { + skb_frag_t *frag = &skb_shinfo(skb)->frags[pkt->frag]; + + frag_data = skb_frag_address(frag); + frag_len = skb_frag_size(frag); + } + + frag_data += pkt->frag_offset; + frag_len -= pkt->frag_offset; + + chunk_len = min(frag_len, XEN_PAGE_SIZE - offset); + chunk_len = min(chunk_len, + XEN_PAGE_SIZE - xen_offset_in_page(frag_data)); + + pkt->frag_offset += chunk_len; + + /* Advance to next frag? */ + if (frag_len == chunk_len) { + pkt->frag++; + pkt->frag_offset = 0; + } + + *data = frag_data; + *len = chunk_len; +} + +static void xenvif_rx_data_slot(struct xenvif_queue *queue, + struct xenvif_pkt_state *pkt, + struct xen_netif_rx_request *req, + struct xen_netif_rx_response *rsp) +{ + unsigned int offset = 0; + unsigned int flags; + + do { + size_t len; + void *data; + + xenvif_rx_next_chunk(queue, pkt, offset, &data, &len); + xenvif_rx_copy_add(queue, req, offset, data, len); + + offset += len; + pkt->remaining_len -= len; + + } while (offset < XEN_PAGE_SIZE && pkt->remaining_len > 0); + + if (pkt->remaining_len > 0) + flags = XEN_NETRXF_more_data; + else + flags = 0; + + if (pkt->slot == 0) { + struct sk_buff *skb = pkt->skb; + + if (skb->ip_summed == CHECKSUM_PARTIAL) flags |= XEN_NETRXF_csum_blank | XEN_NETRXF_data_validated; else if (skb->ip_summed == CHECKSUM_UNNECESSARY) - /* remote but checksummed. */ flags |= XEN_NETRXF_data_validated; - offset = 0; - resp = make_rx_response(queue, queue->meta[npo.meta_cons].id, - status, offset, - queue->meta[npo.meta_cons].size, - flags); - - if ((1 << queue->meta[npo.meta_cons].gso_type) & - vif->gso_mask) { - extra = (struct xen_netif_extra_info *) - RING_GET_RESPONSE(&queue->rx, - queue->rx.rsp_prod_pvt++); - - resp->flags |= XEN_NETRXF_extra_info; - - extra->u.gso.type = queue->meta[npo.meta_cons].gso_type; - extra->u.gso.size = queue->meta[npo.meta_cons].gso_size; - extra->u.gso.pad = 0; - extra->u.gso.features = 0; - - extra->type = XEN_NETIF_EXTRA_TYPE_GSO; - extra->flags = 0; - } - - if (skb->sw_hash) { - /* Since the skb got here via xenvif_select_queue() - * we know that the hash has been re-calculated - * according to a configuration set by the frontend - * and therefore we know that it is legitimate to - * pass it to the frontend. - */ - if (resp->flags & XEN_NETRXF_extra_info) - extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE; - else - resp->flags |= XEN_NETRXF_extra_info; - - extra = (struct xen_netif_extra_info *) - RING_GET_RESPONSE(&queue->rx, - queue->rx.rsp_prod_pvt++); - - extra->u.hash.algorithm = - XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ; - - if (skb->l4_hash) - extra->u.hash.type = - skb->protocol == htons(ETH_P_IP) ? - _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP : - _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP; - else - extra->u.hash.type = - skb->protocol == htons(ETH_P_IP) ? - _XEN_NETIF_CTRL_HASH_TYPE_IPV4 : - _XEN_NETIF_CTRL_HASH_TYPE_IPV6; - - *(uint32_t *)extra->u.hash.value = - skb_get_hash_raw(skb); - - extra->type = XEN_NETIF_EXTRA_TYPE_HASH; - extra->flags = 0; - } - - xenvif_add_frag_responses(queue, status, - queue->meta + npo.meta_cons + 1, - XENVIF_RX_CB(skb)->meta_slots_used); - - RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->rx, ret); - - need_to_notify |= !!ret; - - npo.meta_cons += XENVIF_RX_CB(skb)->meta_slots_used; - dev_kfree_skb(skb); + if (pkt->extra_count != 0) + flags |= XEN_NETRXF_extra_info; } -done: - if (need_to_notify) - notify_remote_via_irq(queue->rx_irq); + rsp->offset = 0; + rsp->flags = flags; + rsp->id = req->id; + rsp->status = (s16)offset; +} + +static void xenvif_rx_extra_slot(struct xenvif_queue *queue, + struct xenvif_pkt_state *pkt, + struct xen_netif_rx_request *req, + struct xen_netif_rx_response *rsp) +{ + struct xen_netif_extra_info *extra = (void *)rsp; + unsigned int i; + + pkt->extra_count--; + + for (i = 0; i < ARRAY_SIZE(pkt->extras); i++) { + if (pkt->extras[i].type) { + *extra = pkt->extras[i]; + + if (pkt->extra_count != 0) + extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE; + + pkt->extras[i].type = 0; + return; + } + } + BUG(); +} + +void xenvif_rx_action(struct xenvif_queue *queue) +{ + struct xenvif_pkt_state pkt; + + xenvif_rx_next_skb(queue, &pkt); + + do { + struct xen_netif_rx_request *req; + struct xen_netif_rx_response *rsp; + + req = RING_GET_REQUEST(&queue->rx, queue->rx.req_cons); + rsp = RING_GET_RESPONSE(&queue->rx, queue->rx.req_cons); + + /* Extras must go after the first data slot */ + if (pkt.slot != 0 && pkt.extra_count != 0) + xenvif_rx_extra_slot(queue, &pkt, req, rsp); + else + xenvif_rx_data_slot(queue, &pkt, req, rsp); + + queue->rx.req_cons++; + pkt.slot++; + } while (pkt.remaining_len > 0 || pkt.extra_count != 0); + + xenvif_rx_complete(queue, &pkt); } static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue) From 7c0b1a23e6f983fe392c8ffa71d05189ae52ebb5 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Tue, 4 Oct 2016 10:29:15 +0100 Subject: [PATCH 34/65] xen-netback: immediately wake tx queue when guest rx queue has space When an skb is removed from the guest rx queue, immediately wake the tx queue, instead of after processing them. Signed-off-by: David Vrabel [re-based] Signed-off-by: Paul Durrant Signed-off-by: David S. Miller --- drivers/net/xen-netback/rx.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index b0ce4c6e9b21..95487091763c 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -92,27 +92,21 @@ static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue) spin_lock_irq(&queue->rx_queue.lock); skb = __skb_dequeue(&queue->rx_queue); - if (skb) + if (skb) { queue->rx_queue_len -= skb->len; + if (queue->rx_queue_len < queue->rx_queue_max) { + struct netdev_queue *txq; + + txq = netdev_get_tx_queue(queue->vif->dev, queue->id); + netif_tx_wake_queue(txq); + } + } spin_unlock_irq(&queue->rx_queue.lock); return skb; } -static void xenvif_rx_queue_maybe_wake(struct xenvif_queue *queue) -{ - spin_lock_irq(&queue->rx_queue.lock); - - if (queue->rx_queue_len < queue->rx_queue_max) { - struct net_device *dev = queue->vif->dev; - - netif_tx_wake_queue(netdev_get_tx_queue(dev, queue->id)); - } - - spin_unlock_irq(&queue->rx_queue.lock); -} - static void xenvif_rx_queue_purge(struct xenvif_queue *queue) { struct sk_buff *skb; @@ -585,8 +579,6 @@ int xenvif_kthread_guest_rx(void *data) */ xenvif_rx_queue_drop_expired(queue); - xenvif_rx_queue_maybe_wake(queue); - cond_resched(); } From 98f6d57ced73b723551568262019f1d6c8771f20 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Tue, 4 Oct 2016 10:29:16 +0100 Subject: [PATCH 35/65] xen-netback: process guest rx packets in batches Instead of only placing one skb on the guest rx ring at a time, process a batch of up-to 64. This improves performance by ~10% in some tests. Signed-off-by: David Vrabel [re-based] Signed-off-by: Paul Durrant Signed-off-by: David S. Miller --- drivers/net/xen-netback/rx.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index 95487091763c..ae822b8fa76d 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -399,7 +399,7 @@ static void xenvif_rx_extra_slot(struct xenvif_queue *queue, BUG(); } -void xenvif_rx_action(struct xenvif_queue *queue) +void xenvif_rx_skb(struct xenvif_queue *queue) { struct xenvif_pkt_state pkt; @@ -425,6 +425,19 @@ void xenvif_rx_action(struct xenvif_queue *queue) xenvif_rx_complete(queue, &pkt); } +#define RX_BATCH_SIZE 64 + +void xenvif_rx_action(struct xenvif_queue *queue) +{ + unsigned int work_done = 0; + + while (xenvif_rx_ring_slots_available(queue) && + work_done < RX_BATCH_SIZE) { + xenvif_rx_skb(queue); + work_done++; + } +} + static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue) { RING_IDX prod, cons; From a37f12298c251a48bc74d4012e07bf0d78175f46 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Tue, 4 Oct 2016 10:29:17 +0100 Subject: [PATCH 36/65] xen-netback: batch copies for multiple to-guest rx packets Instead of flushing the copy ops when an packet is complete, complete packets when their copy ops are done. This improves performance by reducing the number of grant copy hypercalls. Latency is still limited by the relatively small size of the copy batch. Signed-off-by: David Vrabel [re-based] Signed-off-by: Paul Durrant Signed-off-by: David S. Miller --- drivers/net/xen-netback/common.h | 1 + drivers/net/xen-netback/rx.c | 27 +++++++++++++++++---------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 7d12a388afc6..cf68149cbb55 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -132,6 +132,7 @@ struct xenvif_copy_state { struct gnttab_copy op[COPY_BATCH_SIZE]; RING_IDX idx[COPY_BATCH_SIZE]; unsigned int num; + struct sk_buff_head *completed; }; struct xenvif_queue { /* Per-queue data for xenvif */ diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index ae822b8fa76d..8c8c5b5883eb 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -133,6 +133,7 @@ static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue) static void xenvif_rx_copy_flush(struct xenvif_queue *queue) { unsigned int i; + int notify; gnttab_batch_copy(queue->rx_copy.op, queue->rx_copy.num); @@ -154,6 +155,13 @@ static void xenvif_rx_copy_flush(struct xenvif_queue *queue) } queue->rx_copy.num = 0; + + /* Push responses for all completed packets. */ + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->rx, notify); + if (notify) + notify_remote_via_irq(queue->rx_irq); + + __skb_queue_purge(queue->rx_copy.completed); } static void xenvif_rx_copy_add(struct xenvif_queue *queue, @@ -279,18 +287,10 @@ static void xenvif_rx_next_skb(struct xenvif_queue *queue, static void xenvif_rx_complete(struct xenvif_queue *queue, struct xenvif_pkt_state *pkt) { - int notify; - - /* Complete any outstanding copy ops for this skb. */ - xenvif_rx_copy_flush(queue); - - /* Push responses and notify. */ + /* All responses are ready to be pushed. */ queue->rx.rsp_prod_pvt = queue->rx.req_cons; - RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->rx, notify); - if (notify) - notify_remote_via_irq(queue->rx_irq); - dev_kfree_skb(pkt->skb); + __skb_queue_tail(queue->rx_copy.completed, pkt->skb); } static void xenvif_rx_next_chunk(struct xenvif_queue *queue, @@ -429,13 +429,20 @@ void xenvif_rx_skb(struct xenvif_queue *queue) void xenvif_rx_action(struct xenvif_queue *queue) { + struct sk_buff_head completed_skbs; unsigned int work_done = 0; + __skb_queue_head_init(&completed_skbs); + queue->rx_copy.completed = &completed_skbs; + while (xenvif_rx_ring_slots_available(queue) && work_done < RX_BATCH_SIZE) { xenvif_rx_skb(queue); work_done++; } + + /* Flush any pending copies and complete all skbs. */ + xenvif_rx_copy_flush(queue); } static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue) From 2167ca029c2449018314fdf8637c1eb3f123036e Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Tue, 4 Oct 2016 10:29:18 +0100 Subject: [PATCH 37/65] xen/netback: add fraglist support for to-guest rx This allows full 64K skbuffs (with 1500 mtu ethernet, composed of 45 fragments) to be handled by netback for to-guest rx. Signed-off-by: Ross Lagerwall [re-based] Signed-off-by: Paul Durrant Reviewed-by: David Vrabel Signed-off-by: David S. Miller --- drivers/net/xen-netback/interface.c | 2 +- drivers/net/xen-netback/rx.c | 38 ++++++++++++++++++++++------- 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 211d542a830b..4af532a67d95 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -467,7 +467,7 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, dev->netdev_ops = &xenvif_netdev_ops; dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | - NETIF_F_TSO | NETIF_F_TSO6; + NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_FRAGLIST; dev->features = dev->hw_features | NETIF_F_RXCSUM; dev->ethtool_ops = &xenvif_ethtool_ops; diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index 8c8c5b5883eb..8e9ade6ccf18 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -215,7 +215,8 @@ static unsigned int xenvif_gso_type(struct sk_buff *skb) struct xenvif_pkt_state { struct sk_buff *skb; size_t remaining_len; - int frag; /* frag == -1 => skb->head */ + struct sk_buff *frag_iter; + int frag; /* frag == -1 => frag_iter->head */ unsigned int frag_offset; struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; unsigned int extra_count; @@ -237,6 +238,7 @@ static void xenvif_rx_next_skb(struct xenvif_queue *queue, memset(pkt, 0, sizeof(struct xenvif_pkt_state)); pkt->skb = skb; + pkt->frag_iter = skb; pkt->remaining_len = skb->len; pkt->frag = -1; @@ -293,20 +295,40 @@ static void xenvif_rx_complete(struct xenvif_queue *queue, __skb_queue_tail(queue->rx_copy.completed, pkt->skb); } +static void xenvif_rx_next_frag(struct xenvif_pkt_state *pkt) +{ + struct sk_buff *frag_iter = pkt->frag_iter; + unsigned int nr_frags = skb_shinfo(frag_iter)->nr_frags; + + pkt->frag++; + pkt->frag_offset = 0; + + if (pkt->frag >= nr_frags) { + if (frag_iter == pkt->skb) + pkt->frag_iter = skb_shinfo(frag_iter)->frag_list; + else + pkt->frag_iter = frag_iter->next; + + pkt->frag = -1; + } +} + static void xenvif_rx_next_chunk(struct xenvif_queue *queue, struct xenvif_pkt_state *pkt, unsigned int offset, void **data, size_t *len) { - struct sk_buff *skb = pkt->skb; + struct sk_buff *frag_iter = pkt->frag_iter; void *frag_data; size_t frag_len, chunk_len; + BUG_ON(!frag_iter); + if (pkt->frag == -1) { - frag_data = skb->data; - frag_len = skb_headlen(skb); + frag_data = frag_iter->data; + frag_len = skb_headlen(frag_iter); } else { - skb_frag_t *frag = &skb_shinfo(skb)->frags[pkt->frag]; + skb_frag_t *frag = &skb_shinfo(frag_iter)->frags[pkt->frag]; frag_data = skb_frag_address(frag); frag_len = skb_frag_size(frag); @@ -322,10 +344,8 @@ static void xenvif_rx_next_chunk(struct xenvif_queue *queue, pkt->frag_offset += chunk_len; /* Advance to next frag? */ - if (frag_len == chunk_len) { - pkt->frag++; - pkt->frag_offset = 0; - } + if (frag_len == chunk_len) + xenvif_rx_next_frag(pkt); *data = frag_data; *len = chunk_len; From 68c8182bedb138dda9b67f68a928f7ef25b169ff Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 4 Oct 2016 12:11:41 +0100 Subject: [PATCH 38/65] net: axienet: Add missing \n to end of dev_err messages Trival fix, dev_err messages are missing a \n, so add it. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 69e2a833a84f..35f9f9742a48 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -818,7 +818,7 @@ static irqreturn_t axienet_tx_irq(int irq, void *_ndev) goto out; } if (!(status & XAXIDMA_IRQ_ALL_MASK)) - dev_err(&ndev->dev, "No interrupts asserted in Tx path"); + dev_err(&ndev->dev, "No interrupts asserted in Tx path\n"); if (status & XAXIDMA_IRQ_ERROR_MASK) { dev_err(&ndev->dev, "DMA Tx error 0x%x\n", status); dev_err(&ndev->dev, "Current BD is at: 0x%x\n", @@ -867,7 +867,7 @@ static irqreturn_t axienet_rx_irq(int irq, void *_ndev) goto out; } if (!(status & XAXIDMA_IRQ_ALL_MASK)) - dev_err(&ndev->dev, "No interrupts asserted in Rx path"); + dev_err(&ndev->dev, "No interrupts asserted in Rx path\n"); if (status & XAXIDMA_IRQ_ERROR_MASK) { dev_err(&ndev->dev, "DMA Rx error 0x%x\n", status); dev_err(&ndev->dev, "Current BD is at: 0x%x\n", From 87089dd76815b153892ed4c9f4006d66c087ae28 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 4 Oct 2016 12:15:54 +0100 Subject: [PATCH 39/65] net: ps3_gelic: Add missing \n to end of deb_dbg message Trival fix, dev_dbg message is missing a \n, so add it. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/toshiba/ps3_gelic_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c index bc258d7e41df..272f2b1cb7ad 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c @@ -1769,7 +1769,7 @@ static int ps3_gelic_driver_probe(struct ps3_system_bus_device *dev) gelic_ether_setup_netdev_ops(netdev, &card->napi); result = gelic_net_setup_netdev(netdev, card); if (result) { - dev_dbg(&dev->core, "%s: setup_netdev failed %d", + dev_dbg(&dev->core, "%s: setup_netdev failed %d\n", __func__, result); goto fail_setup_netdev; } From 451e856ef70907caec288d56c71b9409f29311d6 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 4 Oct 2016 13:57:01 +0100 Subject: [PATCH 40/65] net: hns: Add missing \n to end of dev_err messages, tidy up text Trival fix, dev_err messages are missing a \n, so add it. Also fix grammer, spelling mistake and add white spaces to various error messages. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- .../net/ethernet/hisilicon/hns/hns_dsaf_mac.c | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c index a834774fdb02..751c1264c811 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c @@ -126,7 +126,7 @@ void hns_mac_adjust_link(struct hns_mac_cb *mac_cb, int speed, int duplex) (enum mac_speed)speed, duplex); if (ret) { dev_err(mac_cb->dev, - "adjust_link failed,%s mac%d ret = %#x!\n", + "adjust_link failed, %s mac%d ret = %#x!\n", mac_cb->dsaf_dev->ae_dev.name, mac_cb->mac_id, ret); return; @@ -149,7 +149,7 @@ static int hns_mac_get_inner_port_num(struct hns_mac_cb *mac_cb, if (mac_cb->dsaf_dev->dsaf_mode <= DSAF_MODE_ENABLE) { if (mac_cb->mac_id != DSAF_MAX_PORT_NUM) { dev_err(mac_cb->dev, - "input invalid,%s mac%d vmid%d !\n", + "input invalid, %s mac%d vmid%d !\n", mac_cb->dsaf_dev->ae_dev.name, mac_cb->mac_id, vmid); return -EINVAL; @@ -157,19 +157,19 @@ static int hns_mac_get_inner_port_num(struct hns_mac_cb *mac_cb, } else if (mac_cb->dsaf_dev->dsaf_mode < DSAF_MODE_MAX) { if (mac_cb->mac_id >= DSAF_MAX_PORT_NUM) { dev_err(mac_cb->dev, - "input invalid,%s mac%d vmid%d!\n", + "input invalid, %s mac%d vmid%d!\n", mac_cb->dsaf_dev->ae_dev.name, mac_cb->mac_id, vmid); return -EINVAL; } } else { - dev_err(mac_cb->dev, "dsaf mode invalid,%s mac%d!\n", + dev_err(mac_cb->dev, "dsaf mode invalid, %s mac%d!\n", mac_cb->dsaf_dev->ae_dev.name, mac_cb->mac_id); return -EINVAL; } if (vmid >= mac_cb->dsaf_dev->rcb_common[0]->max_vfn) { - dev_err(mac_cb->dev, "input invalid,%s mac%d vmid%d !\n", + dev_err(mac_cb->dev, "input invalid, %s mac%d vmid%d !\n", mac_cb->dsaf_dev->ae_dev.name, mac_cb->mac_id, vmid); return -EINVAL; } @@ -196,7 +196,7 @@ static int hns_mac_get_inner_port_num(struct hns_mac_cb *mac_cb, tmp_port = vmid; break; default: - dev_err(mac_cb->dev, "dsaf mode invalid,%s mac%d!\n", + dev_err(mac_cb->dev, "dsaf mode invalid, %s mac%d!\n", mac_cb->dsaf_dev->ae_dev.name, mac_cb->mac_id); return -EINVAL; } @@ -275,7 +275,7 @@ int hns_mac_set_multi(struct hns_mac_cb *mac_cb, ret = hns_dsaf_add_mac_mc_port(dsaf_dev, &mac_entry); if (ret) { dev_err(dsaf_dev->dev, - "set mac mc port failed,%s mac%d ret = %#x!\n", + "set mac mc port failed, %s mac%d ret = %#x!\n", mac_cb->dsaf_dev->ae_dev.name, mac_cb->mac_id, ret); return ret; @@ -305,7 +305,7 @@ int hns_mac_del_mac(struct hns_mac_cb *mac_cb, u32 vfn, char *mac) old_mac = &mac_cb->addr_entry_idx[vfn]; } else { dev_err(mac_cb->dev, - "vf queue is too large,%s mac%d queue = %#x!\n", + "vf queue is too large, %s mac%d queue = %#x!\n", mac_cb->dsaf_dev->ae_dev.name, mac_cb->mac_id, vfn); return -EINVAL; } @@ -547,7 +547,7 @@ int hns_mac_set_autoneg(struct hns_mac_cb *mac_cb, u8 enable) struct mac_driver *mac_ctrl_drv = hns_mac_get_drv(mac_cb); if (mac_cb->phy_if == PHY_INTERFACE_MODE_XGMII && enable) { - dev_err(mac_cb->dev, "enable autoneg is not allowed!"); + dev_err(mac_cb->dev, "enabling autoneg is not allowed!\n"); return -ENOTSUPP; } @@ -571,7 +571,7 @@ int hns_mac_set_pauseparam(struct hns_mac_cb *mac_cb, u32 rx_en, u32 tx_en) if (mac_cb->mac_type == HNAE_PORT_DEBUG) { if (is_ver1 && (tx_en || rx_en)) { - dev_err(mac_cb->dev, "macv1 cann't enable tx/rx_pause!"); + dev_err(mac_cb->dev, "macv1 can't enable tx/rx_pause!\n"); return -EINVAL; } } @@ -941,7 +941,7 @@ int hns_mac_get_cfg(struct dsaf_device *dsaf_dev, struct hns_mac_cb *mac_cb) ret = hns_mac_get_mode(mac_cb->phy_if); if (ret < 0) { dev_err(dsaf_dev->dev, - "hns_mac_get_mode failed,mac%d ret = %#x!\n", + "hns_mac_get_mode failed, mac%d ret = %#x!\n", mac_cb->mac_id, ret); return ret; } From 0fb26c3063ea7095fcdd1cf1dfd39e57130bc80c Mon Sep 17 00:00:00 2001 From: Mugunthan V N Date: Tue, 4 Oct 2016 19:07:29 +0530 Subject: [PATCH 41/65] drivers: net: cpsw-phy-sel: add support to configure rgmii internal delay Add support to enable CPSW RGMII internal delay (id mode) bits when rgmii internal delay is configured in phy. Signed-off-by: Mugunthan V N Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw-phy-sel.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/net/ethernet/ti/cpsw-phy-sel.c b/drivers/net/ethernet/ti/cpsw-phy-sel.c index c3e85acfdc70..054a8dd23dae 100644 --- a/drivers/net/ethernet/ti/cpsw-phy-sel.c +++ b/drivers/net/ethernet/ti/cpsw-phy-sel.c @@ -30,6 +30,8 @@ #define AM33XX_GMII_SEL_RMII2_IO_CLK_EN BIT(7) #define AM33XX_GMII_SEL_RMII1_IO_CLK_EN BIT(6) +#define AM33XX_GMII_SEL_RGMII2_IDMODE BIT(5) +#define AM33XX_GMII_SEL_RGMII1_IDMODE BIT(4) #define GMII_SEL_MODE_MASK 0x3 @@ -48,6 +50,7 @@ static void cpsw_gmii_sel_am3352(struct cpsw_phy_sel_priv *priv, u32 reg; u32 mask; u32 mode = 0; + bool rgmii_id = false; reg = readl(priv->gmii_sel); @@ -57,10 +60,14 @@ static void cpsw_gmii_sel_am3352(struct cpsw_phy_sel_priv *priv, break; case PHY_INTERFACE_MODE_RGMII: + mode = AM33XX_GMII_SEL_MODE_RGMII; + break; + case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII_RXID: case PHY_INTERFACE_MODE_RGMII_TXID: mode = AM33XX_GMII_SEL_MODE_RGMII; + rgmii_id = true; break; default: @@ -83,6 +90,13 @@ static void cpsw_gmii_sel_am3352(struct cpsw_phy_sel_priv *priv, mode |= AM33XX_GMII_SEL_RMII2_IO_CLK_EN; } + if (rgmii_id) { + if (slave == 0) + mode |= AM33XX_GMII_SEL_RGMII1_IDMODE; + else + mode |= AM33XX_GMII_SEL_RGMII2_IDMODE; + } + reg &= ~mask; reg |= mode; From ed2eb0fb873e792798258eb00071d68f7278fa78 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Tue, 4 Oct 2016 19:45:46 +0300 Subject: [PATCH 42/65] dt-bindings: net: renesas-ravb: Add support for R8A7796 RAVB Add a new compatible string for the R8A7796 (M3-W) RAVB. Signed-off-by: Laurent Pinchart Reviewed-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/renesas,ravb.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/renesas,ravb.txt b/Documentation/devicetree/bindings/net/renesas,ravb.txt index c8ac222eac67..b519503be51a 100644 --- a/Documentation/devicetree/bindings/net/renesas,ravb.txt +++ b/Documentation/devicetree/bindings/net/renesas,ravb.txt @@ -10,6 +10,7 @@ Required properties: "renesas,etheravb-r8a7793" if the device is a part of R8A7793 SoC. "renesas,etheravb-r8a7794" if the device is a part of R8A7794 SoC. "renesas,etheravb-r8a7795" if the device is a part of R8A7795 SoC. + "renesas,etheravb-r8a7796" if the device is a part of R8A7796 SoC. "renesas,etheravb-rcar-gen2" for generic R-Car Gen 2 compatible interface. "renesas,etheravb-rcar-gen3" for generic R-Car Gen 3 compatible interface. @@ -33,7 +34,7 @@ Optional properties: - interrupt-parent: the phandle for the interrupt controller that services interrupts for this device. - interrupt-names: A list of interrupt names. - For the R8A7795 SoC this property is mandatory; + For the R8A779[56] SoCs this property is mandatory; it should include one entry per channel, named "ch%u", where %u is the channel number ranging from 0 to 24. For other SoCs this property is optional; if present From 0a55c12f9734105c004e464b5eebb79f08634d7a Mon Sep 17 00:00:00 2001 From: Raju Lakkaraju Date: Wed, 5 Oct 2016 14:19:27 +0530 Subject: [PATCH 43/65] net: phy: Add Wake-on-LAN driver for Microsemi PHYs. Wake-on-LAN (WoL) is an Ethernet networking standard that allows a computer/device to be turned on or awakened by a network message. VSC8531 PHY can support this feature configure by driver set function. WoL status get by driver get function. Tested on Beaglebone Black with VSC 8531 PHY. Signed-off-by: Raju Lakkaraju Signed-off-by: David S. Miller --- drivers/net/phy/mscc.c | 128 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c index a17573e3bd8a..77a6671d572e 100644 --- a/drivers/net/phy/mscc.c +++ b/drivers/net/phy/mscc.c @@ -13,6 +13,7 @@ #include #include #include +#include enum rgmii_rx_clock_delay { RGMII_RX_CLK_DELAY_0_2_NS = 0, @@ -37,6 +38,7 @@ enum rgmii_rx_clock_delay { #define MII_VSC85XX_INT_MASK 25 #define MII_VSC85XX_INT_MASK_MASK 0xa000 +#define MII_VSC85XX_INT_MASK_WOL 0x0040 #define MII_VSC85XX_INT_STATUS 26 #define MSCC_PHY_WOL_MAC_CONTROL 27 @@ -52,6 +54,17 @@ enum rgmii_rx_clock_delay { #define RGMII_RX_CLK_DELAY_MASK 0x0070 #define RGMII_RX_CLK_DELAY_POS 4 +#define MSCC_PHY_WOL_LOWER_MAC_ADDR 21 +#define MSCC_PHY_WOL_MID_MAC_ADDR 22 +#define MSCC_PHY_WOL_UPPER_MAC_ADDR 23 +#define MSCC_PHY_WOL_LOWER_PASSWD 24 +#define MSCC_PHY_WOL_MID_PASSWD 25 +#define MSCC_PHY_WOL_UPPER_PASSWD 26 + +#define MSCC_PHY_WOL_MAC_CONTROL 27 +#define SECURE_ON_ENABLE 0x8000 +#define SECURE_ON_PASSWD_LEN_4 0x4000 + /* Microsemi PHY ID's */ #define PHY_ID_VSC8531 0x00070570 #define PHY_ID_VSC8541 0x00070770 @@ -81,6 +94,117 @@ static int vsc85xx_phy_page_set(struct phy_device *phydev, u8 page) return rc; } +static int vsc85xx_wol_set(struct phy_device *phydev, + struct ethtool_wolinfo *wol) +{ + int rc; + u16 reg_val; + u8 i; + u16 pwd[3] = {0, 0, 0}; + struct ethtool_wolinfo *wol_conf = wol; + u8 *mac_addr = phydev->attached_dev->dev_addr; + + mutex_lock(&phydev->lock); + rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_EXTENDED_2); + if (rc != 0) + goto out_unlock; + + if (wol->wolopts & WAKE_MAGIC) { + /* Store the device address for the magic packet */ + for (i = 0; i < ARRAY_SIZE(pwd); i++) + pwd[i] = mac_addr[5 - (i * 2 + 1)] << 8 | + mac_addr[5 - i * 2]; + phy_write(phydev, MSCC_PHY_WOL_LOWER_MAC_ADDR, pwd[0]); + phy_write(phydev, MSCC_PHY_WOL_MID_MAC_ADDR, pwd[1]); + phy_write(phydev, MSCC_PHY_WOL_UPPER_MAC_ADDR, pwd[2]); + } else { + phy_write(phydev, MSCC_PHY_WOL_LOWER_MAC_ADDR, 0); + phy_write(phydev, MSCC_PHY_WOL_MID_MAC_ADDR, 0); + phy_write(phydev, MSCC_PHY_WOL_UPPER_MAC_ADDR, 0); + } + + if (wol_conf->wolopts & WAKE_MAGICSECURE) { + for (i = 0; i < ARRAY_SIZE(pwd); i++) + pwd[i] = wol_conf->sopass[5 - (i * 2 + 1)] << 8 | + wol_conf->sopass[5 - i * 2]; + phy_write(phydev, MSCC_PHY_WOL_LOWER_PASSWD, pwd[0]); + phy_write(phydev, MSCC_PHY_WOL_MID_PASSWD, pwd[1]); + phy_write(phydev, MSCC_PHY_WOL_UPPER_PASSWD, pwd[2]); + } else { + phy_write(phydev, MSCC_PHY_WOL_LOWER_PASSWD, 0); + phy_write(phydev, MSCC_PHY_WOL_MID_PASSWD, 0); + phy_write(phydev, MSCC_PHY_WOL_UPPER_PASSWD, 0); + } + + reg_val = phy_read(phydev, MSCC_PHY_WOL_MAC_CONTROL); + if (wol_conf->wolopts & WAKE_MAGICSECURE) + reg_val |= SECURE_ON_ENABLE; + else + reg_val &= ~SECURE_ON_ENABLE; + phy_write(phydev, MSCC_PHY_WOL_MAC_CONTROL, reg_val); + + rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_STANDARD); + if (rc != 0) + goto out_unlock; + + if (wol->wolopts & WAKE_MAGIC) { + /* Enable the WOL interrupt */ + reg_val = phy_read(phydev, MII_VSC85XX_INT_MASK); + reg_val |= MII_VSC85XX_INT_MASK_WOL; + rc = phy_write(phydev, MII_VSC85XX_INT_MASK, reg_val); + if (rc != 0) + goto out_unlock; + } else { + /* Disable the WOL interrupt */ + reg_val = phy_read(phydev, MII_VSC85XX_INT_MASK); + reg_val &= (~MII_VSC85XX_INT_MASK_WOL); + rc = phy_write(phydev, MII_VSC85XX_INT_MASK, reg_val); + if (rc != 0) + goto out_unlock; + } + /* Clear WOL iterrupt status */ + reg_val = phy_read(phydev, MII_VSC85XX_INT_STATUS); + +out_unlock: + mutex_unlock(&phydev->lock); + + return rc; +} + +static void vsc85xx_wol_get(struct phy_device *phydev, + struct ethtool_wolinfo *wol) +{ + int rc; + u16 reg_val; + u8 i; + u16 pwd[3] = {0, 0, 0}; + struct ethtool_wolinfo *wol_conf = wol; + + mutex_lock(&phydev->lock); + rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_EXTENDED_2); + if (rc != 0) + goto out_unlock; + + reg_val = phy_read(phydev, MSCC_PHY_WOL_MAC_CONTROL); + if (reg_val & SECURE_ON_ENABLE) + wol_conf->wolopts |= WAKE_MAGICSECURE; + if (wol_conf->wolopts & WAKE_MAGICSECURE) { + pwd[0] = phy_read(phydev, MSCC_PHY_WOL_LOWER_PASSWD); + pwd[1] = phy_read(phydev, MSCC_PHY_WOL_MID_PASSWD); + pwd[2] = phy_read(phydev, MSCC_PHY_WOL_UPPER_PASSWD); + for (i = 0; i < ARRAY_SIZE(pwd); i++) { + wol_conf->sopass[5 - i * 2] = pwd[i] & 0x00ff; + wol_conf->sopass[5 - (i * 2 + 1)] = (pwd[i] & 0xff00) + >> 8; + } + } + + rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_STANDARD); + +out_unlock: + mutex_unlock(&phydev->lock); +} + static u8 edge_rate_magic_get(u16 vddmac, int slowdown) { @@ -301,6 +425,8 @@ static struct phy_driver vsc85xx_driver[] = { .suspend = &genphy_suspend, .resume = &genphy_resume, .probe = &vsc85xx_probe, + .set_wol = &vsc85xx_wol_set, + .get_wol = &vsc85xx_wol_get, }, { .phy_id = PHY_ID_VSC8541, @@ -318,6 +444,8 @@ static struct phy_driver vsc85xx_driver[] = { .suspend = &genphy_suspend, .resume = &genphy_resume, .probe = &vsc85xx_probe, + .set_wol = &vsc85xx_wol_set, + .get_wol = &vsc85xx_wol_get, } }; From 3d9e133fe611a4efc9be3ff04a0dcdf705fa6d84 Mon Sep 17 00:00:00 2001 From: Mike Looijmans Date: Wed, 5 Oct 2016 16:03:08 +0200 Subject: [PATCH 44/65] devicetree: net: micrel-ksz90x1.txt: Properly explain skew settings The KSZ9031 skew registers contain an offset, the chip's default value is "neutral" which does not add any skew. Programming a 0 into a skew property will actually set it the maximal negative adjustment and not to a neutral position as one would expect. Explain this situation in the devicetree binding documentation and list the settings that the chip considers neutral. Changing the implementation to accept negative values would have been a better solution, but would break existing configurations. Signed-off-by: Mike Looijmans Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/micrel-ksz90x1.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt b/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt index f9c32adab5c6..c35b5b428a7f 100644 --- a/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt +++ b/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt @@ -34,16 +34,17 @@ KSZ9031: All skew control options are specified in picoseconds. The minimum value is 0, and the maximum is property-dependent. The increment - step is 60ps. + step is 60ps. The default value is the neutral setting, so setting + rxc-skew-ps=<0> actually results in -900 picoseconds adjustment. Optional properties: - Maximum value of 1860: + Maximum value of 1860, default value 900: - rxc-skew-ps : Skew control of RX clock pad - txc-skew-ps : Skew control of TX clock pad - Maximum value of 900: + Maximum value of 900, default value 420: - rxdv-skew-ps : Skew control of RX CTL pad - txen-skew-ps : Skew control of TX CTL pad From 6664498280cf17a59c3e7cf1a931444c02633ed1 Mon Sep 17 00:00:00 2001 From: Anoob Soman Date: Wed, 5 Oct 2016 15:12:54 +0100 Subject: [PATCH 45/65] packet: call fanout_release, while UNREGISTERING a netdev If a socket has FANOUT sockopt set, a new proto_hook is registered as part of fanout_add(). When processing a NETDEV_UNREGISTER event in af_packet, __fanout_unlink is called for all sockets, but prot_hook which was registered as part of fanout_add is not removed. Call fanout_release, on a NETDEV_UNREGISTER, which removes prot_hook and removes fanout from the fanout_list. This fixes BUG_ON(!list_empty(&dev->ptype_specific)) in netdev_run_todo() Signed-off-by: Anoob Soman Signed-off-by: David S. Miller --- net/packet/af_packet.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 33a4697d5539..11db0d619c00 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3952,6 +3952,7 @@ static int packet_notifier(struct notifier_block *this, } if (msg == NETDEV_UNREGISTER) { packet_cached_dev_reset(po); + fanout_release(sk); po->ifindex = -1; if (po->prot_hook.dev) dev_put(po->prot_hook.dev); From d35c99ff77ecb2eb239731b799386f3b3637a31e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Oct 2016 04:13:18 +0900 Subject: [PATCH 46/65] netlink: do not enter direct reclaim from netlink_dump() Since linux-3.15, netlink_dump() can use up to 16384 bytes skb allocations. Due to struct skb_shared_info ~320 bytes overhead, we end up using order-3 (on x86) page allocations, that might trigger direct reclaim and add stress. The intent was really to attempt a large allocation but immediately fallback to a smaller one (order-1 on x86) in case of memory stress. On recent kernels (linux-4.4), we can remove __GFP_DIRECT_RECLAIM to meet the goal. Old kernels would need to remove __GFP_WAIT While we are at it, since we do an order-3 allocation, allow to use all the allocated bytes instead of 16384 to reduce syscalls during large dumps. iproute2 already uses 32KB recvmsg() buffer sizes. Alexei provided an initial patch downsizing to SKB_WITH_OVERHEAD(16384) Fixes: 9063e21fb026 ("netlink: autosize skb lengthes") Signed-off-by: Eric Dumazet Reported-by: Alexei Starovoitov Cc: Greg Thelen Reviewed-by: Greg Rose Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 627f898c05b9..62bea4591054 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1832,7 +1832,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, /* Record the max length of recvmsg() calls for future allocations */ nlk->max_recvmsg_len = max(nlk->max_recvmsg_len, len); nlk->max_recvmsg_len = min_t(size_t, nlk->max_recvmsg_len, - 16384); + SKB_WITH_OVERHEAD(32768)); copied = data_skb->len; if (len < copied) { @@ -2083,8 +2083,9 @@ static int netlink_dump(struct sock *sk) if (alloc_min_size < nlk->max_recvmsg_len) { alloc_size = nlk->max_recvmsg_len; - skb = alloc_skb(alloc_size, GFP_KERNEL | - __GFP_NOWARN | __GFP_NORETRY); + skb = alloc_skb(alloc_size, + (GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) | + __GFP_NOWARN | __GFP_NORETRY); } if (!skb) { alloc_size = alloc_min_size; From 4af1474e6198b10fee7bb20e81f7e033ad1b586c Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Wed, 5 Oct 2016 15:36:49 -0400 Subject: [PATCH 47/65] net: bgmac: Fix errant feature flag check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During the conversion to the feature flags, a check against ci->id != BCMA_CHIP_ID_BCM47162 became bgmac->feature_flags & BGMAC_FEAT_CLKCTLS instead of !(bgmac->feature_flags & BGMAC_FEAT_CLKCTLS) Reported-by: Rafał Miłecki Signed-off-by: Jon Mason Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bgmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 6ea0e5ff1e44..856379cbb402 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1046,7 +1046,7 @@ static void bgmac_enable(struct bgmac *bgmac) mode = (bgmac_read(bgmac, BGMAC_DEV_STATUS) & BGMAC_DS_MM_MASK) >> BGMAC_DS_MM_SHIFT; - if (bgmac->feature_flags & BGMAC_FEAT_CLKCTLST || mode != 0) + if (!(bgmac->feature_flags & BGMAC_FEAT_CLKCTLST) || mode != 0) bgmac_set(bgmac, BCMA_CLKCTLST, BCMA_CLKCTLST_FORCEHT); if (bgmac->feature_flags & BGMAC_FEAT_CLKCTLST && mode == 2) bgmac_cco_ctl_maskset(bgmac, 1, ~0, From b95b6d99ce3673ca8d9f7c824d82ca7912d96b34 Mon Sep 17 00:00:00 2001 From: Nelson Chang Date: Thu, 6 Oct 2016 19:44:01 +0800 Subject: [PATCH 48/65] net: ethernet: mediatek: get the chip id by ETHDMASYS registers The driver gets the chip id by ETHSYS_CHIPID0_3/ETHSYS_CHIPID4_7 registers in mtk_probe(). Signed-off-by: Nelson Chang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 29 +++++++++++++++++++++ drivers/net/ethernet/mediatek/mtk_eth_soc.h | 5 ++++ 2 files changed, 34 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index ad4ab979507b..0c67ab1fb4d8 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2323,6 +2323,31 @@ free_netdev: return err; } +static int mtk_get_chip_id(struct mtk_eth *eth, u32 *chip_id) +{ + u32 val[2], id[4]; + + regmap_read(eth->ethsys, ETHSYS_CHIPID0_3, &val[0]); + regmap_read(eth->ethsys, ETHSYS_CHIPID4_7, &val[1]); + + id[3] = ((val[0] >> 16) & 0xff) - '0'; + id[2] = ((val[0] >> 24) & 0xff) - '0'; + id[1] = (val[1] & 0xff) - '0'; + id[0] = ((val[1] >> 8) & 0xff) - '0'; + + *chip_id = (id[3] * 1000) + (id[2] * 100) + + (id[1] * 10) + id[0]; + + if (!(*chip_id)) { + dev_err(eth->dev, "failed to get chip id\n"); + return -ENODEV; + } + + dev_info(eth->dev, "chip id = %d\n", *chip_id); + + return 0; +} + static int mtk_probe(struct platform_device *pdev) { struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -2388,6 +2413,10 @@ static int mtk_probe(struct platform_device *pdev) if (err) return err; + err = mtk_get_chip_id(eth, ð->chip_id); + if (err) + return err; + for_each_child_of_node(pdev->dev.of_node, mac_np) { if (!of_device_is_compatible(mac_np, "mediatek,eth-mac")) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 30031959d6de..a5b422b37f80 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -342,6 +342,10 @@ #define GPIO_BIAS_CTRL 0xed0 #define GPIO_DRV_SEL10 0xf00 +/* ethernet subsystem chip id register */ +#define ETHSYS_CHIPID0_3 0x0 +#define ETHSYS_CHIPID4_7 0x4 + /* ethernet subsystem config register */ #define ETHSYS_SYSCFG0 0x14 #define SYSCFG0_GE_MASK 0x3 @@ -534,6 +538,7 @@ struct mtk_eth { unsigned long sysclk; struct regmap *ethsys; struct regmap *pctl; + u32 chip_id; bool hwlro; atomic_t dma_refcnt; struct mtk_tx_ring tx_ring; From 983e1a6c95abf8058d26149a928578b720c77bce Mon Sep 17 00:00:00 2001 From: Nelson Chang Date: Thu, 6 Oct 2016 19:44:02 +0800 Subject: [PATCH 49/65] net: ethernet: mediatek: get hw lro capability by the chip id instead of by the dtsi Because hw lro started to be supported from MT7623, the proper way to check if the feature is capable is to judge by the chip id instead of by the dtsi. Signed-off-by: Nelson Chang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 14 ++++++++++++-- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 1 + 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 0c67ab1fb4d8..4a62ffd7729d 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2348,6 +2348,16 @@ static int mtk_get_chip_id(struct mtk_eth *eth, u32 *chip_id) return 0; } +static bool mtk_is_hwlro_supported(struct mtk_eth *eth) +{ + switch (eth->chip_id) { + case MT7623_ETH: + return true; + } + + return false; +} + static int mtk_probe(struct platform_device *pdev) { struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -2387,8 +2397,6 @@ static int mtk_probe(struct platform_device *pdev) return PTR_ERR(eth->pctl); } - eth->hwlro = of_property_read_bool(pdev->dev.of_node, "mediatek,hwlro"); - for (i = 0; i < 3; i++) { eth->irq[i] = platform_get_irq(pdev, i); if (eth->irq[i] < 0) { @@ -2417,6 +2425,8 @@ static int mtk_probe(struct platform_device *pdev) if (err) return err; + eth->hwlro = mtk_is_hwlro_supported(eth); + for_each_child_of_node(pdev->dev.of_node, mac_np) { if (!of_device_is_compatible(mac_np, "mediatek,eth-mac")) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index a5b422b37f80..99b1c8e9f16f 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -345,6 +345,7 @@ /* ethernet subsystem chip id register */ #define ETHSYS_CHIPID0_3 0x0 #define ETHSYS_CHIPID4_7 0x4 +#define MT7623_ETH 7623 /* ethernet subsystem config register */ #define ETHSYS_SYSCFG0 0x14 From 3a09f18ef6f685c714f1c5a22df9b4da58dde355 Mon Sep 17 00:00:00 2001 From: Nelson Chang Date: Thu, 6 Oct 2016 19:44:03 +0800 Subject: [PATCH 50/65] net: ethernet: mediatek: remove hwlro property in the device tree Since the proper way to check the hw lro capability is by the chip id, hwlro property in the device tree should be removed. Signed-off-by: Nelson Chang Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/mediatek-net.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/mediatek-net.txt b/Documentation/devicetree/bindings/net/mediatek-net.txt index f09525772369..c010fafc66a8 100644 --- a/Documentation/devicetree/bindings/net/mediatek-net.txt +++ b/Documentation/devicetree/bindings/net/mediatek-net.txt @@ -24,7 +24,6 @@ Required properties: Optional properties: - interrupt-parent: Should be the phandle for the interrupt controller that services interrupts for this device -- mediatek,hwlro: the capability if the hardware supports LRO functions * Ethernet MAC node @@ -54,7 +53,6 @@ eth: ethernet@1b100000 { reset-names = "eth"; mediatek,ethsys = <ðsys>; mediatek,pctl = <&syscfg_pctl_a>; - mediatek,hwlro; #address-cells = <1>; #size-cells = <0>; From e5e0fbfc4e3b343ff985dd800f1ee31564793563 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 6 Oct 2016 16:44:53 +0200 Subject: [PATCH 51/65] ethernet: qualcomm: QCOM_EMAC should depend on HAS_DMA and HAS_IOMEM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If NO_DMA=y: drivers/built-in.o: In function `emac_probe': emac.c:(.text+0x3780b8): undefined reference to `bad_dma_ops' emac.c:(.text+0x3780e2): undefined reference to `bad_dma_ops' emac.c:(.text+0x378112): undefined reference to `bad_dma_ops' emac.c:(.text+0x378146): undefined reference to `bad_dma_ops' emac.c:(.text+0x37816e): undefined reference to `bad_dma_ops' drivers/built-in.o:emac.c:(.text+0x37819a): more undefined references to `bad_dma_ops' follow If NO_IOMEM=y: drivers/net/ethernet/qualcomm/emac/emac.c: In function ‘emac_remove’: drivers/net/ethernet/qualcomm/emac/emac.c:736:3: error: implicit declaration of function ‘iounmap’ [-Werror=implicit-function-declaration] iounmap(adpt->phy.digital); ^ Add dependencies on HAS_DMA and HAS_IOMEM to fix this. Signed-off-by: Geert Uytterhoeven Acked-by: Timur Tabi Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/qualcomm/Kconfig b/drivers/net/ethernet/qualcomm/Kconfig index 9ba568db576f..d7720bf92d49 100644 --- a/drivers/net/ethernet/qualcomm/Kconfig +++ b/drivers/net/ethernet/qualcomm/Kconfig @@ -26,6 +26,7 @@ config QCA7000 config QCOM_EMAC tristate "Qualcomm Technologies, Inc. EMAC Gigabit Ethernet support" + depends on HAS_DMA && HAS_IOMEM select CRC32 select PHYLIB ---help--- From 7aa6ec2296614f2da8546bbfe8fca54bbbd8ae12 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Thu, 6 Oct 2016 11:22:51 -0700 Subject: [PATCH 52/65] drivers: net: phy: Correct duplicate MDIO_XGENE entry An extra entry for MDIO_XGENE got added during merging. Delete it. Reviewed-by: Andrew Lunn Signed-off-by: Laura Abbott Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/Kconfig | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 5078a0d0db64..2651c8d8de2f 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -142,6 +142,7 @@ config MDIO_THUNDER config MDIO_XGENE tristate "APM X-Gene SoC MDIO bus controller" + depends on ARCH_XGENE || COMPILE_TEST help This module provides a driver for the MDIO busses found in the APM X-Gene SoC's. @@ -320,13 +321,6 @@ config XILINX_GMII2RGMII the Reduced Gigabit Media Independent Interface(RGMII) between Ethernet physical media devices and the Gigabit Ethernet controller. -config MDIO_XGENE - tristate "APM X-Gene SoC MDIO bus controller" - depends on ARCH_XGENE || COMPILE_TEST - help - This module provides a driver for the MDIO busses found in the - APM X-Gene SoC's. - endif # PHYLIB config MICREL_KS8995MA From c7e163fe282f34efdf77fb09b333529de97a6a10 Mon Sep 17 00:00:00 2001 From: Ethan Hsieh Date: Fri, 7 Oct 2016 12:06:42 +0800 Subject: [PATCH 53/65] Bluetooth: btusb: Fix atheros firmware download error Move usb_autopm_get_interface() ahead of setup_on_usb() to prevent device from sending usb control message in usb suspend mode. The error message is as below: [ 83.944103] btusb 1-2:1.1: usb_suspend_interface: status 0 [ 83.944107] btusb 1-2:1.0: usb_suspend_interface: status 0 [ 83.960132] usb 1-2: usb auto-suspend, wakeup 0 [ 83.976156] usb 1-2: usb_suspend_device: status 0 [ 83.976162] usb 1-2: usb_suspend_both: status 0 [ 298.689106] Bluetooth: hci0 [ 298.689399] Bluetooth: hci0: Failed to access otp area (-113) Signed-off-by: Ethan Hsieh Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 88962339d4b7..2f633df9f4e6 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -1043,6 +1043,10 @@ static int btusb_open(struct hci_dev *hdev) BT_DBG("%s", hdev->name); + err = usb_autopm_get_interface(data->intf); + if (err < 0) + return err; + /* Patching USB firmware files prior to starting any URBs of HCI path * It is more safe to use USB bulk channel for downloading USB patch */ @@ -1052,10 +1056,6 @@ static int btusb_open(struct hci_dev *hdev) return err; } - err = usb_autopm_get_interface(data->intf); - if (err < 0) - return err; - data->intf->needs_remote_wakeup = 1; if (test_and_set_bit(BTUSB_INTR_RUNNING, &data->flags)) From cb4a4c691e8631089759fc5d1faf8d6ccf581497 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Fri, 7 Oct 2016 01:00:49 -0700 Subject: [PATCH 54/65] ipv6 addrconf: disallow rtr_solicits < -1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This disallows setting /proc/sys/net/ipv6/conf/*/router_solicitations to values below -1. -1 continues to mean an unlimited number of retransmits. Note: this depends on 'ipv6 addrconf: remove addrconf_sysctl_hop_limit()' Signed-off-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index cbd9343751a2..d8983e15f859 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5729,6 +5729,7 @@ int addrconf_sysctl_ignore_routes_with_linkdown(struct ctl_table *ctl, return ret; } +static int minus_one = -1; static const int one = 1; static const int two_five_five = 255; @@ -5789,7 +5790,8 @@ static const struct ctl_table addrconf_sysctl[] = { .data = &ipv6_devconf.rtr_solicits, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &minus_one, }, { .procname = "router_solicitation_interval", From 7d3cfc36ec3aae1d37a13fbe257f31141eb772b4 Mon Sep 17 00:00:00 2001 From: Paul Durrant Date: Fri, 7 Oct 2016 11:33:37 +0100 Subject: [PATCH 55/65] MAINTAINERS: add myself as a maintainer of xen-netback Signed-off-by: Paul Durrant Cc: Wei Liu Acked-by: Wei Liu Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 93f718da0de2..274d2a020a1a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13068,6 +13068,7 @@ F: arch/arm64/include/asm/xen/ XEN NETWORK BACKEND DRIVER M: Wei Liu +M: Paul Durrant L: xen-devel@lists.xenproject.org (moderated for non-subscribers) L: netdev@vger.kernel.org S: Supported From 21d9629a7abd87512d062fbe57b04a1baedf93d2 Mon Sep 17 00:00:00 2001 From: Alex Sidorenko Date: Fri, 7 Oct 2016 09:02:33 -0400 Subject: [PATCH 56/65] Fixing a bug in team driver due to incorrect 'unsigned int' to 'int' conversion Roundrobin runner of team driver uses 'unsigned int' variable to count the number of sent_packets. Later it is passed to a subroutine team_num_to_port_index(struct team *team, int num) as 'num' and when we reach MAXINT (2**31-1), 'num' becomes negative. This leads to using incorrect hash-bucket for port lookup and as a result, packets are dropped. The fix consists of changing 'int num' to 'unsigned int num'. Testing of a fixed kernel shows that there is no packet drop anymore. Signed-off-by: Alex Sidorenko Signed-off-by: David S. Miller --- include/linux/if_team.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/if_team.h b/include/linux/if_team.h index 174f43f43aff..c05216a8fbac 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -245,7 +245,7 @@ static inline struct team_port *team_get_port_by_index(struct team *team, return NULL; } -static inline int team_num_to_port_index(struct team *team, int num) +static inline int team_num_to_port_index(struct team *team, unsigned int num) { int en_port_count = ACCESS_ONCE(team->en_port_count); From 912e27e85e070596ed4964ebde29fa9781390f2a Mon Sep 17 00:00:00 2001 From: Paul Durrant Date: Fri, 7 Oct 2016 09:32:31 +0100 Subject: [PATCH 57/65] xen-netback: make sure that hashes are not send to unaware frontends In the case when a frontend only negotiates a single queue with xen- netback it is possible for a skbuff with a s/w hash to result in a hash extra_info segment being sent to the frontend even when no hash algorithm has been configured. (The ndo_select_queue() entry point makes sure the hash is not set if no algorithm is configured, but this entry point is not called when there is only a single queue). This can result in a frontend that is unable to handle extra_info segments being given such a segment, causing it to crash. This patch fixes the problem by clearing the hash in ndo_start_xmit() instead, which is clearly guaranteed to be called irrespective of the number of queues. Signed-off-by: Paul Durrant Cc: Wei Liu Acked-by: Wei Liu Signed-off-by: David S. Miller --- drivers/net/xen-netback/interface.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 4af532a67d95..74dc2bf71428 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -149,17 +149,8 @@ static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb, struct xenvif *vif = netdev_priv(dev); unsigned int size = vif->hash.size; - if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE) { - u16 index = fallback(dev, skb) % dev->real_num_tx_queues; - - /* Make sure there is no hash information in the socket - * buffer otherwise it would be incorrectly forwarded - * to the frontend. - */ - skb_clear_hash(skb); - - return index; - } + if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE) + return fallback(dev, skb) % dev->real_num_tx_queues; xenvif_set_skb_hash(vif, skb); @@ -208,6 +199,13 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) cb = XENVIF_RX_CB(skb); cb->expires = jiffies + vif->drain_timeout; + /* If there is no hash algorithm configured then make sure there + * is no hash information in the socket buffer otherwise it + * would be incorrectly forwarded to the frontend. + */ + if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE) + skb_clear_hash(skb); + xenvif_rx_queue_tail(queue, skb); xenvif_kick_thread(queue); From fa6114d4bde70152765ba1c35fed4fcd8481faf6 Mon Sep 17 00:00:00 2001 From: Nathan Sullivan Date: Fri, 7 Oct 2016 10:13:22 -0500 Subject: [PATCH 58/65] net: macb: NULL out phydev after removing mdio bus To ensure the dev->phydev pointer is not used after becoming invalid in mdiobus_unregister, set it to NULL. This happens when removing the macb driver without first taking its interface down, since unregister_netdev will end up calling macb_close. Signed-off-by: Xander Huff Signed-off-by: Nathan Sullivan Signed-off-by: Brad Mouring Reviewed-by: Moritz Fischer Acked-by: Nicolas Ferre Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 63144bb413d1..b32444a3ed79 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -3117,6 +3117,7 @@ static int macb_remove(struct platform_device *pdev) if (dev->phydev) phy_disconnect(dev->phydev); mdiobus_unregister(bp->mii_bus); + dev->phydev = NULL; mdiobus_free(bp->mii_bus); /* Shutdown the PHY if there is a GPIO reset */ From 776482cd8d8af063878ed22a1a0d44e4c6238a94 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Fri, 7 Oct 2016 22:58:47 +0200 Subject: [PATCH 59/65] wan/fsl_ucc_hdlc: Fix size used in dma_free_coherent() Size used with 'dma_alloc_coherent()' and 'dma_free_coherent()' should be consistent. Here, the size of a pointer is used in dma_alloc... and the size of the pointed structure is used in dma_free... This has been spotted with coccinelle, using the following script: //////////////////// @r@ expression x0, x1, y0, y1, z0, z1, t0, t1, ret; @@ * ret = dma_alloc_coherent(x0, y0, z0, t0); ... * dma_free_coherent(x1, y1, ret, t1); @script:python@ y0 << r.y0; y1 << r.y1; @@ if y1.find(y0) == -1: print "WARNING: sizes look different: '%s' vs '%s'" % (y0, y1) //////////////////// Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/wan/fsl_ucc_hdlc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index 5fbf83d5aa57..65647533b401 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -295,11 +295,11 @@ free_ucc_pram: qe_muram_free(priv->ucc_pram_offset); free_tx_bd: dma_free_coherent(priv->dev, - TX_BD_RING_LEN * sizeof(struct qe_bd), + TX_BD_RING_LEN * sizeof(struct qe_bd *), priv->tx_bd_base, priv->dma_tx_bd); free_rx_bd: dma_free_coherent(priv->dev, - RX_BD_RING_LEN * sizeof(struct qe_bd), + RX_BD_RING_LEN * sizeof(struct qe_bd *), priv->rx_bd_base, priv->dma_rx_bd); free_uccf: ucc_fast_free(priv->uccf); @@ -688,7 +688,7 @@ static void uhdlc_memclean(struct ucc_hdlc_private *priv) if (priv->rx_bd_base) { dma_free_coherent(priv->dev, - RX_BD_RING_LEN * sizeof(struct qe_bd), + RX_BD_RING_LEN * sizeof(struct qe_bd *), priv->rx_bd_base, priv->dma_rx_bd); priv->rx_bd_base = NULL; @@ -697,7 +697,7 @@ static void uhdlc_memclean(struct ucc_hdlc_private *priv) if (priv->tx_bd_base) { dma_free_coherent(priv->dev, - TX_BD_RING_LEN * sizeof(struct qe_bd), + TX_BD_RING_LEN * sizeof(struct qe_bd *), priv->tx_bd_base, priv->dma_tx_bd); priv->tx_bd_base = NULL; From 6ee080bb09889dc0195a9c659288d17999237fb6 Mon Sep 17 00:00:00 2001 From: Sriharsha Basavapatna Date: Sun, 9 Oct 2016 09:58:49 +0530 Subject: [PATCH 60/65] be2net: Provide an alternate way to read pf_num for BEx chips The driver gets the pf_num for Skyhawk and Lancer using GET_FUNC_CONFIG FW command. But since that command is not supported in BEx, we need to get it from some other command. Otherwise TPE recovery would fail since all NIC PFs would end up with a func num of 0. There's a pci function number field in the response of GET_CNTL_ATTRIBUTES command that can be read to get the same info for BEx adapters. Signed-off-by: Sriharsha Basavapatna Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_cmds.c | 5 +++++ drivers/net/ethernet/emulex/benet/be_cmds.h | 6 +++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 9cffe48be156..45d174262d32 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -3527,6 +3527,11 @@ int be_cmd_get_cntl_attributes(struct be_adapter *adapter) for (i = 0; i < CNTL_SERIAL_NUM_WORDS; i++) adapter->serial_num[i] = le32_to_cpu(serial_num[i]) & (BIT_MASK(16) - 1); + /* For BEx, since GET_FUNC_CONFIG command is not + * supported, we read funcnum here as a workaround. + */ + if (BEx_chip(adapter)) + adapter->pf_num = attribs->hba_attribs.pci_funcnum; } err: diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index 1bd82bcb3be5..09da2d82c2f0 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -1720,7 +1720,11 @@ struct mgmt_hba_attribs { u32 rsvd2[55]; u8 rsvd3[3]; u8 phy_port; - u32 rsvd4[13]; + u32 rsvd4[15]; + u8 rsvd5[2]; + u8 pci_funcnum; + u8 rsvd6; + u32 rsvd7[6]; } __packed; struct mgmt_controller_attrib { From f5ef017e1195d0a8c69a82bf95fea9c776b93ff0 Mon Sep 17 00:00:00 2001 From: Sriharsha Basavapatna Date: Sun, 9 Oct 2016 09:58:50 +0530 Subject: [PATCH 61/65] be2net: NCSI FW section should be properly updated with ethtool for BE3 The driver has a check to ensure that NCSI FW section is updated only if the current FW version in the card supports it. This FW version check is done using memcmp() which obviously fails in some cases. Fix this by breaking up the version string into integer version components and comparing them. Signed-off-by: Sriharsha Basavapatna Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_cmds.c | 24 ++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 45d174262d32..7e9be9f4236a 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -2728,6 +2728,26 @@ static int be_flash(struct be_adapter *adapter, const u8 *img, return 0; } +#define NCSI_UPDATE_LOG "NCSI section update is not supported in FW ver %s\n" +static bool be_fw_ncsi_supported(char *ver) +{ + int v1[4] = {3, 102, 148, 0}; /* Min ver that supports NCSI FW */ + int v2[4]; + int i; + + if (sscanf(ver, "%d.%d.%d.%d", &v2[0], &v2[1], &v2[2], &v2[3]) != 4) + return false; + + for (i = 0; i < 4; i++) { + if (v1[i] < v2[i]) + return true; + else if (v1[i] > v2[i]) + return false; + } + + return true; +} + /* For BE2, BE3 and BE3-R */ static int be_flash_BEx(struct be_adapter *adapter, const struct firmware *fw, @@ -2805,8 +2825,10 @@ static int be_flash_BEx(struct be_adapter *adapter, continue; if ((pflashcomp[i].optype == OPTYPE_NCSI_FW) && - memcmp(adapter->fw_ver, "3.102.148.0", 11) < 0) + !be_fw_ncsi_supported(adapter->fw_ver)) { + dev_info(dev, NCSI_UPDATE_LOG, adapter->fw_ver); continue; + } if (pflashcomp[i].optype == OPTYPE_PHY_FW && !phy_flashing_required(adapter)) From 77b696cba961bb6e88aeba36253849443f9a4186 Mon Sep 17 00:00:00 2001 From: Sriharsha Basavapatna Date: Sun, 9 Oct 2016 09:58:51 +0530 Subject: [PATCH 62/65] be2net: Update Copyright string in be_hw.h This patch updates the year and company name in the copyright string in be_hw.h. Signed-off-by: Sriharsha Basavapatna Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_hw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/emulex/benet/be_hw.h b/drivers/net/ethernet/emulex/benet/be_hw.h index 92942c84d329..36e4232ed6b8 100644 --- a/drivers/net/ethernet/emulex/benet/be_hw.h +++ b/drivers/net/ethernet/emulex/benet/be_hw.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 - 2015 Emulex + * Copyright (C) 2005-2016 Broadcom. * All rights reserved. * * This program is free software; you can redistribute it and/or From f3d6ad84807254954fc69bdebb6123e5a2883baf Mon Sep 17 00:00:00 2001 From: Sriharsha Basavapatna Date: Sun, 9 Oct 2016 09:58:52 +0530 Subject: [PATCH 63/65] be2net: Fix TX stats for TSO packets TX stats update does not take into account headers which get duplicated when the TSO packet is split into segments by HW. Fix this for both tunneled (vxlan) and non-tunneled TSO packets. Signed-off-by: Sriharsha Basavapatna Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index dcb930a52613..cece8a08edca 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -724,14 +724,24 @@ void be_link_status_update(struct be_adapter *adapter, u8 link_status) netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down"); } +static int be_gso_hdr_len(struct sk_buff *skb) +{ + if (skb->encapsulation) + return skb_inner_transport_offset(skb) + + inner_tcp_hdrlen(skb); + return skb_transport_offset(skb) + tcp_hdrlen(skb); +} + static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb) { struct be_tx_stats *stats = tx_stats(txo); - u64 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1; + u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1; + /* Account for headers which get duplicated in TSO pkt */ + u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0; u64_stats_update_begin(&stats->sync); stats->tx_reqs++; - stats->tx_bytes += skb->len; + stats->tx_bytes += skb->len + dup_hdr_len; stats->tx_pkts += tx_pkts; if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL) stats->tx_vxlan_offload_pkts += tx_pkts; From dc6e8511ff7141141578bac559565c55a1e14ad8 Mon Sep 17 00:00:00 2001 From: Suresh Reddy Date: Sun, 9 Oct 2016 09:58:53 +0530 Subject: [PATCH 64/65] be2net: Enable VF link state setting for BE3 The VF link state setting feature now works on BE3 chips too from FW ver 11.1.192.0 onwards. Signed-off-by: Suresh Reddy Signed-off-by: Sriharsha Basavapatna Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_cmds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 7e9be9f4236a..1fb5d7239254 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -4977,7 +4977,7 @@ int be_cmd_set_logical_link_config(struct be_adapter *adapter, { int status; - if (BEx_chip(adapter)) + if (BE2_chip(adapter)) return -EOPNOTSUPP; status = __be_cmd_set_logical_link_config(adapter, link_state, From bd3769bfedb2b65af61744e9b40b1863e0870e2b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 10 Oct 2016 22:39:04 -0700 Subject: [PATCH 65/65] netfilter: Fix slab corruption. Use the correct pattern for singly linked list insertion and deletion. We can also calculate the list head outside of the mutex. Fixes: e3b37f11e6e4 ("netfilter: replace list_head with single linked list") Signed-off-by: Linus Torvalds Reviewed-by: Aaron Conole Signed-off-by: David S. Miller net/netfilter/core.c | 108 ++++++++++++++++----------------------------------- 1 file changed, 33 insertions(+), 75 deletions(-) --- net/netfilter/core.c | 110 +++++++++++++------------------------------ 1 file changed, 34 insertions(+), 76 deletions(-) diff --git a/net/netfilter/core.c b/net/netfilter/core.c index c9d90eb64046..fcb5d1df11e9 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -65,49 +65,24 @@ static DEFINE_MUTEX(nf_hook_mutex); #define nf_entry_dereference(e) \ rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex)) -static struct nf_hook_entry *nf_hook_entry_head(struct net *net, - const struct nf_hook_ops *reg) +static struct nf_hook_entry __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg) { - struct nf_hook_entry *hook_head = NULL; - if (reg->pf != NFPROTO_NETDEV) - hook_head = nf_entry_dereference(net->nf.hooks[reg->pf] - [reg->hooknum]); - else if (reg->hooknum == NF_NETDEV_INGRESS) { -#ifdef CONFIG_NETFILTER_INGRESS - if (reg->dev && dev_net(reg->dev) == net) - hook_head = - nf_entry_dereference( - reg->dev->nf_hooks_ingress); -#endif - } - return hook_head; -} + return net->nf.hooks[reg->pf]+reg->hooknum; -/* must hold nf_hook_mutex */ -static void nf_set_hooks_head(struct net *net, const struct nf_hook_ops *reg, - struct nf_hook_entry *entry) -{ - switch (reg->pf) { - case NFPROTO_NETDEV: #ifdef CONFIG_NETFILTER_INGRESS - /* We already checked in nf_register_net_hook() that this is - * used from ingress. - */ - rcu_assign_pointer(reg->dev->nf_hooks_ingress, entry); -#endif - break; - default: - rcu_assign_pointer(net->nf.hooks[reg->pf][reg->hooknum], - entry); - break; + if (reg->hooknum == NF_NETDEV_INGRESS) { + if (reg->dev && dev_net(reg->dev) == net) + return ®->dev->nf_hooks_ingress; } +#endif + return NULL; } int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) { - struct nf_hook_entry *hooks_entry; - struct nf_hook_entry *entry; + struct nf_hook_entry __rcu **pp; + struct nf_hook_entry *entry, *p; if (reg->pf == NFPROTO_NETDEV) { #ifndef CONFIG_NETFILTER_INGRESS @@ -119,6 +94,10 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) return -EINVAL; } + pp = nf_hook_entry_head(net, reg); + if (!pp) + return -EINVAL; + entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return -ENOMEM; @@ -128,26 +107,15 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) entry->next = NULL; mutex_lock(&nf_hook_mutex); - hooks_entry = nf_hook_entry_head(net, reg); - if (hooks_entry && hooks_entry->orig_ops->priority > reg->priority) { - /* This is the case where we need to insert at the head */ - entry->next = hooks_entry; - hooks_entry = NULL; - } - - while (hooks_entry && - reg->priority >= hooks_entry->orig_ops->priority && - nf_entry_dereference(hooks_entry->next)) { - hooks_entry = nf_entry_dereference(hooks_entry->next); - } - - if (hooks_entry) { - entry->next = nf_entry_dereference(hooks_entry->next); - rcu_assign_pointer(hooks_entry->next, entry); - } else { - nf_set_hooks_head(net, reg, entry); + /* Find the spot in the list */ + while ((p = nf_entry_dereference(*pp)) != NULL) { + if (reg->priority < p->orig_ops->priority) + break; + pp = &p->next; } + rcu_assign_pointer(entry->next, p); + rcu_assign_pointer(*pp, entry); mutex_unlock(&nf_hook_mutex); #ifdef CONFIG_NETFILTER_INGRESS @@ -163,33 +131,23 @@ EXPORT_SYMBOL(nf_register_net_hook); void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) { - struct nf_hook_entry *hooks_entry; + struct nf_hook_entry __rcu **pp; + struct nf_hook_entry *p; + + pp = nf_hook_entry_head(net, reg); + if (WARN_ON_ONCE(!pp)) + return; mutex_lock(&nf_hook_mutex); - hooks_entry = nf_hook_entry_head(net, reg); - if (hooks_entry && hooks_entry->orig_ops == reg) { - nf_set_hooks_head(net, reg, - nf_entry_dereference(hooks_entry->next)); - goto unlock; - } - while (hooks_entry && nf_entry_dereference(hooks_entry->next)) { - struct nf_hook_entry *next = - nf_entry_dereference(hooks_entry->next); - struct nf_hook_entry *nnext; - - if (next->orig_ops != reg) { - hooks_entry = next; - continue; + while ((p = nf_entry_dereference(*pp)) != NULL) { + if (p->orig_ops == reg) { + rcu_assign_pointer(*pp, p->next); + break; } - nnext = nf_entry_dereference(next->next); - rcu_assign_pointer(hooks_entry->next, nnext); - hooks_entry = next; - break; + pp = &p->next; } - -unlock: mutex_unlock(&nf_hook_mutex); - if (!hooks_entry) { + if (!p) { WARN(1, "nf_unregister_net_hook: hook not found!\n"); return; } @@ -201,10 +159,10 @@ unlock: static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif synchronize_net(); - nf_queue_nf_hook_drop(net, hooks_entry); + nf_queue_nf_hook_drop(net, p); /* other cpu might still process nfqueue verdict that used reg */ synchronize_net(); - kfree(hooks_entry); + kfree(p); } EXPORT_SYMBOL(nf_unregister_net_hook);