From 955f4d3bf0a454bc76c6393d74d844556d61b520 Mon Sep 17 00:00:00 2001 From: John Fraker <jfraker@google.com> Date: Mon, 27 Nov 2023 16:26:44 -0800 Subject: [PATCH 1/5] gve: Perform adminq allocations through a dma_pool. This allows the adminq to be smaller than a page, paving the way for non 4k page support. This is to support platforms where PAGE_SIZE is not 4k, such as some ARM platforms. Signed-off-by: Jordan Kimbrough <jrkim@google.com> Signed-off-by: John Fraker <jfraker@google.com> Reviewed-by: Willem de Bruijn <willemb@google.com> Link: https://lore.kernel.org/r/20231128002648.320892-2-jfraker@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> --- drivers/net/ethernet/google/gve/gve.h | 4 +++ drivers/net/ethernet/google/gve/gve_adminq.c | 28 +++++++++++++------- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 0d1e681be250..abc0c708b47a 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -8,6 +8,7 @@ #define _GVE_H_ #include <linux/dma-mapping.h> +#include <linux/dmapool.h> #include <linux/netdevice.h> #include <linux/pci.h> #include <linux/u64_stats_sync.h> @@ -41,6 +42,8 @@ #define NIC_TX_STATS_REPORT_NUM 0 #define NIC_RX_STATS_REPORT_NUM 4 +#define GVE_ADMINQ_BUFFER_SIZE 4096 + #define GVE_DATA_SLOT_ADDR_PAGE_MASK (~(PAGE_SIZE - 1)) /* PTYPEs are always 10 bits. */ @@ -672,6 +675,7 @@ struct gve_priv { /* Admin queue - see gve_adminq.h*/ union gve_adminq_command *adminq; dma_addr_t adminq_bus_addr; + struct dma_pool *adminq_pool; u32 adminq_mask; /* masks prod_cnt to adminq size */ u32 adminq_prod_cnt; /* free-running count of AQ cmds executed */ u32 adminq_cmd_fail; /* free-running count of AQ cmds failed */ diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index 79db7a6d42bc..d3f3a015238d 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -194,12 +194,19 @@ gve_process_device_options(struct gve_priv *priv, int gve_adminq_alloc(struct device *dev, struct gve_priv *priv) { - priv->adminq = dma_alloc_coherent(dev, PAGE_SIZE, - &priv->adminq_bus_addr, GFP_KERNEL); - if (unlikely(!priv->adminq)) + priv->adminq_pool = dma_pool_create("adminq_pool", dev, + GVE_ADMINQ_BUFFER_SIZE, 0, 0); + if (unlikely(!priv->adminq_pool)) return -ENOMEM; + priv->adminq = dma_pool_alloc(priv->adminq_pool, GFP_KERNEL, + &priv->adminq_bus_addr); + if (unlikely(!priv->adminq)) { + dma_pool_destroy(priv->adminq_pool); + return -ENOMEM; + } - priv->adminq_mask = (PAGE_SIZE / sizeof(union gve_adminq_command)) - 1; + priv->adminq_mask = + (GVE_ADMINQ_BUFFER_SIZE / sizeof(union gve_adminq_command)) - 1; priv->adminq_prod_cnt = 0; priv->adminq_cmd_fail = 0; priv->adminq_timeouts = 0; @@ -251,7 +258,8 @@ void gve_adminq_free(struct device *dev, struct gve_priv *priv) if (!gve_get_admin_queue_ok(priv)) return; gve_adminq_release(priv); - dma_free_coherent(dev, PAGE_SIZE, priv->adminq, priv->adminq_bus_addr); + dma_pool_free(priv->adminq_pool, priv->adminq, priv->adminq_bus_addr); + dma_pool_destroy(priv->adminq_pool); gve_clear_admin_queue_ok(priv); } @@ -778,8 +786,8 @@ int gve_adminq_describe_device(struct gve_priv *priv) u16 mtu; memset(&cmd, 0, sizeof(cmd)); - descriptor = dma_alloc_coherent(&priv->pdev->dev, PAGE_SIZE, - &descriptor_bus, GFP_KERNEL); + descriptor = dma_pool_alloc(priv->adminq_pool, GFP_KERNEL, + &descriptor_bus); if (!descriptor) return -ENOMEM; cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESCRIBE_DEVICE); @@ -787,7 +795,8 @@ int gve_adminq_describe_device(struct gve_priv *priv) cpu_to_be64(descriptor_bus); cmd.describe_device.device_descriptor_version = cpu_to_be32(GVE_ADMINQ_DEVICE_DESCRIPTOR_VERSION); - cmd.describe_device.available_length = cpu_to_be32(PAGE_SIZE); + cmd.describe_device.available_length = + cpu_to_be32(GVE_ADMINQ_BUFFER_SIZE); err = gve_adminq_execute_cmd(priv, &cmd); if (err) @@ -868,8 +877,7 @@ int gve_adminq_describe_device(struct gve_priv *priv) dev_op_jumbo_frames, dev_op_dqo_qpl); free_device_descriptor: - dma_free_coherent(&priv->pdev->dev, PAGE_SIZE, descriptor, - descriptor_bus); + dma_pool_free(priv->adminq_pool, descriptor, descriptor_bus); return err; } From 8ae980d24195f25d639e9f05421fcf80c5c64b3f Mon Sep 17 00:00:00 2001 From: John Fraker <jfraker@google.com> Date: Mon, 27 Nov 2023 16:26:45 -0800 Subject: [PATCH 2/5] gve: Deprecate adminq_pfn for pci revision 0x1. adminq_pfn assumes a page size of 4k, causing this mechanism to break in kernels compiled with different page sizes. A new PCI device revision was needed for the device to be able to communicate with the driver how to set up the admin queue prior to having access to the admin queue. Signed-off-by: Jordan Kimbrough <jrkim@google.com> Signed-off-by: John Fraker <jfraker@google.com> Reviewed-by: Willem de Bruijn <willemb@google.com> Link: https://lore.kernel.org/r/20231128002648.320892-3-jfraker@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> --- drivers/net/ethernet/google/gve/gve_adminq.c | 48 ++++++++++++++----- .../net/ethernet/google/gve/gve_register.h | 9 ++++ 2 files changed, 44 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index d3f3a015238d..f81ed6f6296a 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -225,9 +225,20 @@ int gve_adminq_alloc(struct device *dev, struct gve_priv *priv) priv->adminq_get_ptype_map_cnt = 0; /* Setup Admin queue with the device */ - iowrite32be(priv->adminq_bus_addr / PAGE_SIZE, - &priv->reg_bar0->adminq_pfn); - + if (priv->pdev->revision < 0x1) { + iowrite32be(priv->adminq_bus_addr / PAGE_SIZE, + &priv->reg_bar0->adminq_pfn); + } else { + iowrite16be(GVE_ADMINQ_BUFFER_SIZE, + &priv->reg_bar0->adminq_length); +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + iowrite32be(priv->adminq_bus_addr >> 32, + &priv->reg_bar0->adminq_base_address_hi); +#endif + iowrite32be(priv->adminq_bus_addr, + &priv->reg_bar0->adminq_base_address_lo); + iowrite32be(GVE_DRIVER_STATUS_RUN_MASK, &priv->reg_bar0->driver_status); + } gve_set_admin_queue_ok(priv); return 0; } @@ -237,16 +248,27 @@ void gve_adminq_release(struct gve_priv *priv) int i = 0; /* Tell the device the adminq is leaving */ - iowrite32be(0x0, &priv->reg_bar0->adminq_pfn); - while (ioread32be(&priv->reg_bar0->adminq_pfn)) { - /* If this is reached the device is unrecoverable and still - * holding memory. Continue looping to avoid memory corruption, - * but WARN so it is visible what is going on. - */ - if (i == GVE_MAX_ADMINQ_RELEASE_CHECK) - WARN(1, "Unrecoverable platform error!"); - i++; - msleep(GVE_ADMINQ_SLEEP_LEN); + if (priv->pdev->revision < 0x1) { + iowrite32be(0x0, &priv->reg_bar0->adminq_pfn); + while (ioread32be(&priv->reg_bar0->adminq_pfn)) { + /* If this is reached the device is unrecoverable and still + * holding memory. Continue looping to avoid memory corruption, + * but WARN so it is visible what is going on. + */ + if (i == GVE_MAX_ADMINQ_RELEASE_CHECK) + WARN(1, "Unrecoverable platform error!"); + i++; + msleep(GVE_ADMINQ_SLEEP_LEN); + } + } else { + iowrite32be(GVE_DRIVER_STATUS_RESET_MASK, &priv->reg_bar0->driver_status); + while (!(ioread32be(&priv->reg_bar0->device_status) + & GVE_DEVICE_STATUS_DEVICE_IS_RESET)) { + if (i == GVE_MAX_ADMINQ_RELEASE_CHECK) + WARN(1, "Unrecoverable platform error!"); + i++; + msleep(GVE_ADMINQ_SLEEP_LEN); + } } gve_clear_device_rings_ok(priv); gve_clear_device_resources_ok(priv); diff --git a/drivers/net/ethernet/google/gve/gve_register.h b/drivers/net/ethernet/google/gve/gve_register.h index fb655463c357..8e72b97008d6 100644 --- a/drivers/net/ethernet/google/gve/gve_register.h +++ b/drivers/net/ethernet/google/gve/gve_register.h @@ -18,11 +18,20 @@ struct gve_registers { __be32 adminq_event_counter; u8 reserved[3]; u8 driver_version; + __be32 adminq_base_address_hi; + __be32 adminq_base_address_lo; + __be16 adminq_length; }; enum gve_device_status_flags { GVE_DEVICE_STATUS_RESET_MASK = BIT(1), GVE_DEVICE_STATUS_LINK_STATUS_MASK = BIT(2), GVE_DEVICE_STATUS_REPORT_STATS_MASK = BIT(3), + GVE_DEVICE_STATUS_DEVICE_IS_RESET = BIT(4), +}; + +enum gve_driver_status_flags { + GVE_DRIVER_STATUS_RUN_MASK = BIT(0), + GVE_DRIVER_STATUS_RESET_MASK = BIT(1), }; #endif /* _GVE_REGISTER_H_ */ From ce260cb114bbf65d53834c712729429b2233f5fd Mon Sep 17 00:00:00 2001 From: John Fraker <jfraker@google.com> Date: Mon, 27 Nov 2023 16:26:46 -0800 Subject: [PATCH 3/5] gve: Remove obsolete checks that rely on page size. These checks are safe to remove as they are no longer enforced by the backend. Retaining them would require updating them to work differently with page sizes larger than 4k. Signed-off-by: Jordan Kimbrough <jrkim@google.com> Signed-off-by: John Fraker <jfraker@google.com> Reviewed-by: Willem de Bruijn <willemb@google.com> Link: https://lore.kernel.org/r/20231128002648.320892-4-jfraker@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> --- drivers/net/ethernet/google/gve/gve_adminq.c | 11 ----------- drivers/net/ethernet/google/gve/gve_rx.c | 8 +------- 2 files changed, 1 insertion(+), 18 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index f81ed6f6296a..bebb7ed113d3 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -727,18 +727,7 @@ static int gve_set_desc_cnt(struct gve_priv *priv, struct gve_device_descriptor *descriptor) { priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries); - if (priv->tx_desc_cnt * sizeof(priv->tx->desc[0]) < PAGE_SIZE) { - dev_err(&priv->pdev->dev, "Tx desc count %d too low\n", - priv->tx_desc_cnt); - return -EINVAL; - } priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries); - if (priv->rx_desc_cnt * sizeof(priv->rx->desc.desc_ring[0]) - < PAGE_SIZE) { - dev_err(&priv->pdev->dev, "Rx desc count %d too low\n", - priv->rx_desc_cnt); - return -EINVAL; - } return 0; } diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index 73655347902d..82aa18588049 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -211,9 +211,9 @@ static int gve_rx_alloc_ring(struct gve_priv *priv, int idx) { struct gve_rx_ring *rx = &priv->rx[idx]; struct device *hdev = &priv->pdev->dev; - u32 slots, npages; int filled_pages; size_t bytes; + u32 slots; int err; netif_dbg(priv, drv, priv->dev, "allocating rx ring\n"); @@ -270,12 +270,6 @@ static int gve_rx_alloc_ring(struct gve_priv *priv, int idx) /* alloc rx desc ring */ bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt; - npages = bytes / PAGE_SIZE; - if (npages * PAGE_SIZE != bytes) { - err = -EIO; - goto abort_with_q_resources; - } - rx->desc.desc_ring = dma_alloc_coherent(hdev, bytes, &rx->desc.bus, GFP_KERNEL); if (!rx->desc.desc_ring) { From 513072fb4bf816686473eec897194ce6a28e53db Mon Sep 17 00:00:00 2001 From: John Fraker <jfraker@google.com> Date: Mon, 27 Nov 2023 16:26:47 -0800 Subject: [PATCH 4/5] gve: Add page size register to the register_page_list command. This register is required on platforms with page sizes greater than 4k. This is because the tx side of the driver vmaps the entire queue page list of pages into a single flat address space, then uses the entire space. Without communicating the guest page size to the backend, the backend will only access the first 4k of each page in the queue page list. Signed-off-by: Jordan Kimbrough <jrkim@google.com> Signed-off-by: John Fraker <jfraker@google.com> Reviewed-by: Willem de Bruijn <willemb@google.com> Link: https://lore.kernel.org/r/20231128002648.320892-5-jfraker@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> --- drivers/net/ethernet/google/gve/gve_adminq.c | 1 + drivers/net/ethernet/google/gve/gve_adminq.h | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index bebb7ed113d3..12fbd723ecc6 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -917,6 +917,7 @@ int gve_adminq_register_page_list(struct gve_priv *priv, .page_list_id = cpu_to_be32(qpl->id), .num_pages = cpu_to_be32(num_entries), .page_address_list_addr = cpu_to_be64(page_list_bus), + .page_size = cpu_to_be64(PAGE_SIZE), }; err = gve_adminq_execute_cmd(priv, &cmd); diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h index 38a22279e863..5865ccdccbd0 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.h +++ b/drivers/net/ethernet/google/gve/gve_adminq.h @@ -219,9 +219,10 @@ struct gve_adminq_register_page_list { __be32 page_list_id; __be32 num_pages; __be64 page_address_list_addr; + __be64 page_size; }; -static_assert(sizeof(struct gve_adminq_register_page_list) == 16); +static_assert(sizeof(struct gve_adminq_register_page_list) == 24); struct gve_adminq_unregister_page_list { __be32 page_list_id; From da7d4b42caf1b4d6ba3447bfd9ed185479fb0fe4 Mon Sep 17 00:00:00 2001 From: John Fraker <jfraker@google.com> Date: Mon, 27 Nov 2023 16:26:48 -0800 Subject: [PATCH 5/5] gve: Remove dependency on 4k page size. Prior to this change, gve crashes when attempting to run in kernels with page sizes other than 4k. This change removes unnecessary references to PAGE_SIZE and replaces them with more meaningful constants. Signed-off-by: Jordan Kimbrough <jrkim@google.com> Signed-off-by: John Fraker <jfraker@google.com> Reviewed-by: Willem de Bruijn <willemb@google.com> Link: https://lore.kernel.org/r/20231128002648.320892-6-jfraker@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> --- drivers/net/ethernet/google/gve/gve.h | 4 +++- drivers/net/ethernet/google/gve/gve_ethtool.c | 2 +- drivers/net/ethernet/google/gve/gve_main.c | 4 ++-- drivers/net/ethernet/google/gve/gve_rx.c | 9 ++++----- drivers/net/ethernet/google/gve/gve_tx.c | 2 +- 5 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index abc0c708b47a..b80349154604 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -49,7 +49,9 @@ /* PTYPEs are always 10 bits. */ #define GVE_NUM_PTYPES 1024 -#define GVE_RX_BUFFER_SIZE_DQO 2048 +#define GVE_DEFAULT_RX_BUFFER_SIZE 2048 + +#define GVE_DEFAULT_RX_BUFFER_OFFSET 2048 #define GVE_XDP_ACTIONS 5 diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index 233e5946905e..e5397aa1e48f 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -519,7 +519,7 @@ static int gve_set_tunable(struct net_device *netdev, case ETHTOOL_RX_COPYBREAK: { u32 max_copybreak = gve_is_gqi(priv) ? - (PAGE_SIZE / 2) : priv->data_buffer_size_dqo; + GVE_DEFAULT_RX_BUFFER_SIZE : priv->data_buffer_size_dqo; len = *(u32 *)value; if (len > max_copybreak) diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index cc169748ffa2..619bf63ec935 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1328,7 +1328,7 @@ static int gve_open(struct net_device *dev) /* Hard code this for now. This may be tuned in the future for * performance. */ - priv->data_buffer_size_dqo = GVE_RX_BUFFER_SIZE_DQO; + priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE; } err = gve_create_rings(priv); if (err) @@ -1664,7 +1664,7 @@ static int verify_xdp_configuration(struct net_device *dev) return -EOPNOTSUPP; } - if (dev->mtu > (PAGE_SIZE / 2) - sizeof(struct ethhdr) - GVE_RX_PAD) { + if (dev->mtu > GVE_DEFAULT_RX_BUFFER_SIZE - sizeof(struct ethhdr) - GVE_RX_PAD) { netdev_warn(dev, "XDP is not supported for mtu %d.\n", dev->mtu); return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index 82aa18588049..7a8dc5386fff 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -283,7 +283,7 @@ static int gve_rx_alloc_ring(struct gve_priv *priv, int idx) /* Allocating half-page buffers allows page-flipping which is faster * than copying or allocating new pages. */ - rx->packet_buffer_size = PAGE_SIZE / 2; + rx->packet_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; gve_rx_ctx_clear(&rx->ctx); gve_rx_add_to_block(priv, idx); @@ -399,10 +399,10 @@ static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi, static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, __be64 *slot_addr) { - const __be64 offset = cpu_to_be64(PAGE_SIZE / 2); + const __be64 offset = cpu_to_be64(GVE_DEFAULT_RX_BUFFER_OFFSET); /* "flip" to other packet buffer on this page */ - page_info->page_offset ^= PAGE_SIZE / 2; + page_info->page_offset ^= GVE_DEFAULT_RX_BUFFER_OFFSET; *(slot_addr) ^= offset; } @@ -507,8 +507,7 @@ static struct sk_buff *gve_rx_copy_to_pool(struct gve_rx_ring *rx, return NULL; gve_dec_pagecnt_bias(copy_page_info); - copy_page_info->page_offset += rx->packet_buffer_size; - copy_page_info->page_offset &= (PAGE_SIZE - 1); + copy_page_info->page_offset ^= GVE_DEFAULT_RX_BUFFER_OFFSET; if (copy_page_info->can_flip) { /* We have used both halves of this copy page, it diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c index 9f6ffc4a54f0..07ba124780df 100644 --- a/drivers/net/ethernet/google/gve/gve_tx.c +++ b/drivers/net/ethernet/google/gve/gve_tx.c @@ -819,7 +819,7 @@ int gve_xdp_xmit_one(struct gve_priv *priv, struct gve_tx_ring *tx, return 0; } -#define GVE_TX_START_THRESH PAGE_SIZE +#define GVE_TX_START_THRESH 4096 static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx, u32 to_do, bool try_to_wake)