diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 4727c067e2ba..6d8ae3d9a680 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -165,6 +165,12 @@ attribute-sets: - name: rx-push type: u8 + - + name: tx-push-buf-len + type: u32 + - + name: tx-push-buf-len-max + type: u32 - name: mm-stat @@ -311,6 +317,8 @@ operations: - cqe-size - tx-push - rx-push + - tx-push-buf-len + - tx-push-buf-len-max dump: *ring-get-op - name: rings-set diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index e1bc6186d7ea..cd0973d4ba01 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -860,22 +860,24 @@ Request contents: Kernel response contents: - ==================================== ====== =========================== - ``ETHTOOL_A_RINGS_HEADER`` nested reply header - ``ETHTOOL_A_RINGS_RX_MAX`` u32 max size of RX ring - ``ETHTOOL_A_RINGS_RX_MINI_MAX`` u32 max size of RX mini ring - ``ETHTOOL_A_RINGS_RX_JUMBO_MAX`` u32 max size of RX jumbo ring - ``ETHTOOL_A_RINGS_TX_MAX`` u32 max size of TX ring - ``ETHTOOL_A_RINGS_RX`` u32 size of RX ring - ``ETHTOOL_A_RINGS_RX_MINI`` u32 size of RX mini ring - ``ETHTOOL_A_RINGS_RX_JUMBO`` u32 size of RX jumbo ring - ``ETHTOOL_A_RINGS_TX`` u32 size of TX ring - ``ETHTOOL_A_RINGS_RX_BUF_LEN`` u32 size of buffers on the ring - ``ETHTOOL_A_RINGS_TCP_DATA_SPLIT`` u8 TCP header / data split - ``ETHTOOL_A_RINGS_CQE_SIZE`` u32 Size of TX/RX CQE - ``ETHTOOL_A_RINGS_TX_PUSH`` u8 flag of TX Push mode - ``ETHTOOL_A_RINGS_RX_PUSH`` u8 flag of RX Push mode - ==================================== ====== =========================== + ======================================= ====== =========================== + ``ETHTOOL_A_RINGS_HEADER`` nested reply header + ``ETHTOOL_A_RINGS_RX_MAX`` u32 max size of RX ring + ``ETHTOOL_A_RINGS_RX_MINI_MAX`` u32 max size of RX mini ring + ``ETHTOOL_A_RINGS_RX_JUMBO_MAX`` u32 max size of RX jumbo ring + ``ETHTOOL_A_RINGS_TX_MAX`` u32 max size of TX ring + ``ETHTOOL_A_RINGS_RX`` u32 size of RX ring + ``ETHTOOL_A_RINGS_RX_MINI`` u32 size of RX mini ring + ``ETHTOOL_A_RINGS_RX_JUMBO`` u32 size of RX jumbo ring + ``ETHTOOL_A_RINGS_TX`` u32 size of TX ring + ``ETHTOOL_A_RINGS_RX_BUF_LEN`` u32 size of buffers on the ring + ``ETHTOOL_A_RINGS_TCP_DATA_SPLIT`` u8 TCP header / data split + ``ETHTOOL_A_RINGS_CQE_SIZE`` u32 Size of TX/RX CQE + ``ETHTOOL_A_RINGS_TX_PUSH`` u8 flag of TX Push mode + ``ETHTOOL_A_RINGS_RX_PUSH`` u8 flag of RX Push mode + ``ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN`` u32 size of TX push buffer + ``ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX`` u32 max size of TX push buffer + ======================================= ====== =========================== ``ETHTOOL_A_RINGS_TCP_DATA_SPLIT`` indicates whether the device is usable with page-flipping TCP zero-copy receive (``getsockopt(TCP_ZEROCOPY_RECEIVE)``). @@ -891,6 +893,18 @@ through MMIO writes, thus reducing the latency. However, enabling this feature may increase the CPU cost. Drivers may enforce additional per-packet eligibility checks (e.g. on packet size). +``ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN`` specifies the maximum number of bytes of a +transmitted packet a driver can push directly to the underlying device +('push' mode). Pushing some of the payload bytes to the device has the +advantages of reducing latency for small packets by avoiding DMA mapping (same +as ``ETHTOOL_A_RINGS_TX_PUSH`` parameter) as well as allowing the underlying +device to process packet headers ahead of fetching its payload. +This can help the device to make fast actions based on the packet's headers. +This is similar to the "tx-copybreak" parameter, which copies the packet to a +preallocated DMA memory area instead of mapping new memory. However, +tx-push-buff parameter copies the packet directly to the device to allow the +device to take faster actions on the packet. + RINGS_SET ========= @@ -908,6 +922,7 @@ Request contents: ``ETHTOOL_A_RINGS_CQE_SIZE`` u32 Size of TX/RX CQE ``ETHTOOL_A_RINGS_TX_PUSH`` u8 flag of TX Push mode ``ETHTOOL_A_RINGS_RX_PUSH`` u8 flag of RX Push mode + ``ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN`` u32 size of TX push buffer ==================================== ====== =========================== Kernel checks that requested ring sizes do not exceed limits reported by diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h b/drivers/net/ethernet/amazon/ena/ena_eth_com.h index 689313ee25a8..372b259279ec 100644 --- a/drivers/net/ethernet/amazon/ena/ena_eth_com.h +++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.h @@ -10,6 +10,10 @@ /* head update threshold in units of (queue size / ENA_COMP_HEAD_THRESH) */ #define ENA_COMP_HEAD_THRESH 4 +/* we allow 2 DMA descriptors per LLQ entry */ +#define ENA_LLQ_ENTRY_DESC_CHUNK_SIZE (2 * sizeof(struct ena_eth_io_tx_desc)) +#define ENA_LLQ_HEADER (128UL - ENA_LLQ_ENTRY_DESC_CHUNK_SIZE) +#define ENA_LLQ_LARGE_HEADER (256UL - ENA_LLQ_ENTRY_DESC_CHUNK_SIZE) struct ena_com_tx_ctx { struct ena_com_tx_meta ena_meta; diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index 1d4f2f4d10f2..d671df4b76bc 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -476,6 +476,21 @@ static void ena_get_ringparam(struct net_device *netdev, ring->tx_max_pending = adapter->max_tx_ring_size; ring->rx_max_pending = adapter->max_rx_ring_size; + if (adapter->ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { + bool large_llq_supported = adapter->large_llq_header_supported; + + kernel_ring->tx_push = true; + kernel_ring->tx_push_buf_len = adapter->ena_dev->tx_max_header_size; + if (large_llq_supported) + kernel_ring->tx_push_buf_max_len = ENA_LLQ_LARGE_HEADER; + else + kernel_ring->tx_push_buf_max_len = ENA_LLQ_HEADER; + } else { + kernel_ring->tx_push = false; + kernel_ring->tx_push_buf_max_len = 0; + kernel_ring->tx_push_buf_len = 0; + } + ring->tx_pending = adapter->tx_ring[0].ring_size; ring->rx_pending = adapter->rx_ring[0].ring_size; } @@ -486,7 +501,8 @@ static int ena_set_ringparam(struct net_device *netdev, struct netlink_ext_ack *extack) { struct ena_adapter *adapter = netdev_priv(netdev); - u32 new_tx_size, new_rx_size; + u32 new_tx_size, new_rx_size, new_tx_push_buf_len; + bool changed = false; new_tx_size = ring->tx_pending < ENA_MIN_RING_SIZE ? ENA_MIN_RING_SIZE : ring->tx_pending; @@ -496,11 +512,51 @@ static int ena_set_ringparam(struct net_device *netdev, ENA_MIN_RING_SIZE : ring->rx_pending; new_rx_size = rounddown_pow_of_two(new_rx_size); - if (new_tx_size == adapter->requested_tx_ring_size && - new_rx_size == adapter->requested_rx_ring_size) + changed |= new_tx_size != adapter->requested_tx_ring_size || + new_rx_size != adapter->requested_rx_ring_size; + + /* This value is ignored if LLQ is not supported */ + new_tx_push_buf_len = adapter->ena_dev->tx_max_header_size; + + if ((adapter->ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) != + kernel_ring->tx_push) { + NL_SET_ERR_MSG_MOD(extack, "Push mode state cannot be modified"); + return -EINVAL; + } + + /* Validate that the push buffer is supported on the underlying device */ + if (kernel_ring->tx_push_buf_len) { + enum ena_admin_placement_policy_type placement; + + new_tx_push_buf_len = kernel_ring->tx_push_buf_len; + + placement = adapter->ena_dev->tx_mem_queue_type; + if (placement == ENA_ADMIN_PLACEMENT_POLICY_HOST) + return -EOPNOTSUPP; + + if (new_tx_push_buf_len != ENA_LLQ_HEADER && + new_tx_push_buf_len != ENA_LLQ_LARGE_HEADER) { + bool large_llq_sup = adapter->large_llq_header_supported; + char large_llq_size_str[40]; + + snprintf(large_llq_size_str, 40, ", %lu", ENA_LLQ_LARGE_HEADER); + + NL_SET_ERR_MSG_FMT_MOD(extack, + "Supported tx push buff values: [%lu%s]", + ENA_LLQ_HEADER, + large_llq_sup ? large_llq_size_str : ""); + + return -EINVAL; + } + + changed |= new_tx_push_buf_len != adapter->ena_dev->tx_max_header_size; + } + + if (!changed) return 0; - return ena_update_queue_sizes(adapter, new_tx_size, new_rx_size); + return ena_update_queue_params(adapter, new_tx_size, new_rx_size, + new_tx_push_buf_len); } static u32 ena_flow_hash_to_flow_type(u16 hash_fields) @@ -909,6 +965,8 @@ static int ena_set_tunable(struct net_device *netdev, static const struct ethtool_ops ena_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_USE_ADAPTIVE_RX, + .supported_ring_params = ETHTOOL_RING_USE_TX_PUSH_BUF_LEN | + ETHTOOL_RING_USE_TX_PUSH, .get_link_ksettings = ena_get_link_ksettings, .get_drvinfo = ena_get_drvinfo, .get_msglevel = ena_get_msglevel, diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index cbfe7f977270..e0588a82c8e5 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -2809,11 +2809,13 @@ static int ena_close(struct net_device *netdev) return 0; } -int ena_update_queue_sizes(struct ena_adapter *adapter, - u32 new_tx_size, - u32 new_rx_size) +int ena_update_queue_params(struct ena_adapter *adapter, + u32 new_tx_size, + u32 new_rx_size, + u32 new_llq_header_len) { - bool dev_was_up; + bool dev_was_up, large_llq_changed = false; + int rc = 0; dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); ena_close(adapter->netdev); @@ -2823,7 +2825,21 @@ int ena_update_queue_sizes(struct ena_adapter *adapter, 0, adapter->xdp_num_queues + adapter->num_io_queues); - return dev_was_up ? ena_up(adapter) : 0; + + large_llq_changed = adapter->ena_dev->tx_mem_queue_type == + ENA_ADMIN_PLACEMENT_POLICY_DEV; + large_llq_changed &= + new_llq_header_len != adapter->ena_dev->tx_max_header_size; + + /* a check that the configuration is valid is done by caller */ + if (large_llq_changed) { + adapter->large_llq_header_enabled = !adapter->large_llq_header_enabled; + + ena_destroy_device(adapter, false); + rc = ena_restore_device(adapter); + } + + return dev_was_up && !rc ? ena_up(adapter) : rc; } int ena_set_rx_copybreak(struct ena_adapter *adapter, u32 rx_copybreak) @@ -3364,6 +3380,98 @@ static const struct net_device_ops ena_netdev_ops = { .ndo_xdp_xmit = ena_xdp_xmit, }; +static void ena_calc_io_queue_size(struct ena_adapter *adapter, + struct ena_com_dev_get_features_ctx *get_feat_ctx) +{ + struct ena_admin_feature_llq_desc *llq = &get_feat_ctx->llq; + struct ena_com_dev *ena_dev = adapter->ena_dev; + u32 tx_queue_size = ENA_DEFAULT_RING_SIZE; + u32 rx_queue_size = ENA_DEFAULT_RING_SIZE; + u32 max_tx_queue_size; + u32 max_rx_queue_size; + + /* If this function is called after driver load, the ring sizes have already + * been configured. Take it into account when recalculating ring size. + */ + if (adapter->tx_ring->ring_size) + tx_queue_size = adapter->tx_ring->ring_size; + + if (adapter->rx_ring->ring_size) + rx_queue_size = adapter->rx_ring->ring_size; + + if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { + struct ena_admin_queue_ext_feature_fields *max_queue_ext = + &get_feat_ctx->max_queue_ext.max_queue_ext; + max_rx_queue_size = min_t(u32, max_queue_ext->max_rx_cq_depth, + max_queue_ext->max_rx_sq_depth); + max_tx_queue_size = max_queue_ext->max_tx_cq_depth; + + if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) + max_tx_queue_size = min_t(u32, max_tx_queue_size, + llq->max_llq_depth); + else + max_tx_queue_size = min_t(u32, max_tx_queue_size, + max_queue_ext->max_tx_sq_depth); + + adapter->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, + max_queue_ext->max_per_packet_tx_descs); + adapter->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, + max_queue_ext->max_per_packet_rx_descs); + } else { + struct ena_admin_queue_feature_desc *max_queues = + &get_feat_ctx->max_queues; + max_rx_queue_size = min_t(u32, max_queues->max_cq_depth, + max_queues->max_sq_depth); + max_tx_queue_size = max_queues->max_cq_depth; + + if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) + max_tx_queue_size = min_t(u32, max_tx_queue_size, + llq->max_llq_depth); + else + max_tx_queue_size = min_t(u32, max_tx_queue_size, + max_queues->max_sq_depth); + + adapter->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, + max_queues->max_packet_tx_descs); + adapter->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, + max_queues->max_packet_rx_descs); + } + + max_tx_queue_size = rounddown_pow_of_two(max_tx_queue_size); + max_rx_queue_size = rounddown_pow_of_two(max_rx_queue_size); + + /* When forcing large headers, we multiply the entry size by 2, and therefore divide + * the queue size by 2, leaving the amount of memory used by the queues unchanged. + */ + if (adapter->large_llq_header_enabled) { + if ((llq->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B) && + ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { + max_tx_queue_size /= 2; + dev_info(&adapter->pdev->dev, + "Forcing large headers and decreasing maximum TX queue size to %d\n", + max_tx_queue_size); + } else { + dev_err(&adapter->pdev->dev, + "Forcing large headers failed: LLQ is disabled or device does not support large headers\n"); + + adapter->large_llq_header_enabled = false; + } + } + + tx_queue_size = clamp_val(tx_queue_size, ENA_MIN_RING_SIZE, + max_tx_queue_size); + rx_queue_size = clamp_val(rx_queue_size, ENA_MIN_RING_SIZE, + max_rx_queue_size); + + tx_queue_size = rounddown_pow_of_two(tx_queue_size); + rx_queue_size = rounddown_pow_of_two(rx_queue_size); + + adapter->max_tx_ring_size = max_tx_queue_size; + adapter->max_rx_ring_size = max_rx_queue_size; + adapter->requested_tx_ring_size = tx_queue_size; + adapter->requested_rx_ring_size = rx_queue_size; +} + static int ena_device_validate_params(struct ena_adapter *adapter, struct ena_com_dev_get_features_ctx *get_feat_ctx) { @@ -3387,13 +3495,30 @@ static int ena_device_validate_params(struct ena_adapter *adapter, return 0; } -static void set_default_llq_configurations(struct ena_llq_configurations *llq_config) +static void set_default_llq_configurations(struct ena_adapter *adapter, + struct ena_llq_configurations *llq_config, + struct ena_admin_feature_llq_desc *llq) { + struct ena_com_dev *ena_dev = adapter->ena_dev; + llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER; llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY; llq_config->llq_num_decs_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2; - llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_128B; - llq_config->llq_ring_entry_size_value = 128; + + adapter->large_llq_header_supported = + !!(ena_dev->supported_features & BIT(ENA_ADMIN_LLQ)); + adapter->large_llq_header_supported &= + !!(llq->entry_size_ctrl_supported & + ENA_ADMIN_LIST_ENTRY_SIZE_256B); + + if ((llq->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B) && + adapter->large_llq_header_enabled) { + llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_256B; + llq_config->llq_ring_entry_size_value = 256; + } else { + llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_128B; + llq_config->llq_ring_entry_size_value = 128; + } } static int ena_set_queues_placement_policy(struct pci_dev *pdev, @@ -3412,6 +3537,13 @@ static int ena_set_queues_placement_policy(struct pci_dev *pdev, return 0; } + if (!ena_dev->mem_bar) { + netdev_err(ena_dev->net_device, + "LLQ is advertised as supported but device doesn't expose mem bar\n"); + ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; + return 0; + } + rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations); if (unlikely(rc)) { dev_err(&pdev->dev, @@ -3427,15 +3559,8 @@ static int ena_map_llq_mem_bar(struct pci_dev *pdev, struct ena_com_dev *ena_dev { bool has_mem_bar = !!(bars & BIT(ENA_MEM_BAR)); - if (!has_mem_bar) { - if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { - dev_err(&pdev->dev, - "ENA device does not expose LLQ bar. Fallback to host mode policy.\n"); - ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; - } - + if (!has_mem_bar) return 0; - } ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev, pci_resource_start(pdev, ENA_MEM_BAR), @@ -3447,10 +3572,11 @@ static int ena_map_llq_mem_bar(struct pci_dev *pdev, struct ena_com_dev *ena_dev return 0; } -static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev, +static int ena_device_init(struct ena_adapter *adapter, struct pci_dev *pdev, struct ena_com_dev_get_features_ctx *get_feat_ctx, bool *wd_state) { + struct ena_com_dev *ena_dev = adapter->ena_dev; struct ena_llq_configurations llq_config; struct device *dev = &pdev->dev; bool readless_supported; @@ -3535,7 +3661,7 @@ static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev, *wd_state = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE)); - set_default_llq_configurations(&llq_config); + set_default_llq_configurations(adapter, &llq_config, &get_feat_ctx->llq); rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx->llq, &llq_config); @@ -3544,6 +3670,8 @@ static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev, goto err_admin_init; } + ena_calc_io_queue_size(adapter, get_feat_ctx); + return 0; err_admin_init: @@ -3638,17 +3766,25 @@ static int ena_restore_device(struct ena_adapter *adapter) struct ena_com_dev_get_features_ctx get_feat_ctx; struct ena_com_dev *ena_dev = adapter->ena_dev; struct pci_dev *pdev = adapter->pdev; + struct ena_ring *txr; + int rc, count, i; bool wd_state; - int rc; set_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags); - rc = ena_device_init(ena_dev, adapter->pdev, &get_feat_ctx, &wd_state); + rc = ena_device_init(adapter, adapter->pdev, &get_feat_ctx, &wd_state); if (rc) { dev_err(&pdev->dev, "Can not initialize device\n"); goto err; } adapter->wd_state = wd_state; + count = adapter->xdp_num_queues + adapter->num_io_queues; + for (i = 0 ; i < count; i++) { + txr = &adapter->tx_ring[i]; + txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type; + txr->tx_max_header_size = ena_dev->tx_max_header_size; + } + rc = ena_device_validate_params(adapter, &get_feat_ctx); if (rc) { dev_err(&pdev->dev, "Validation of device parameters failed\n"); @@ -4162,72 +4298,6 @@ static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev) pci_release_selected_regions(pdev, release_bars); } - -static void ena_calc_io_queue_size(struct ena_adapter *adapter, - struct ena_com_dev_get_features_ctx *get_feat_ctx) -{ - struct ena_admin_feature_llq_desc *llq = &get_feat_ctx->llq; - struct ena_com_dev *ena_dev = adapter->ena_dev; - u32 tx_queue_size = ENA_DEFAULT_RING_SIZE; - u32 rx_queue_size = ENA_DEFAULT_RING_SIZE; - u32 max_tx_queue_size; - u32 max_rx_queue_size; - - if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { - struct ena_admin_queue_ext_feature_fields *max_queue_ext = - &get_feat_ctx->max_queue_ext.max_queue_ext; - max_rx_queue_size = min_t(u32, max_queue_ext->max_rx_cq_depth, - max_queue_ext->max_rx_sq_depth); - max_tx_queue_size = max_queue_ext->max_tx_cq_depth; - - if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) - max_tx_queue_size = min_t(u32, max_tx_queue_size, - llq->max_llq_depth); - else - max_tx_queue_size = min_t(u32, max_tx_queue_size, - max_queue_ext->max_tx_sq_depth); - - adapter->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, - max_queue_ext->max_per_packet_tx_descs); - adapter->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, - max_queue_ext->max_per_packet_rx_descs); - } else { - struct ena_admin_queue_feature_desc *max_queues = - &get_feat_ctx->max_queues; - max_rx_queue_size = min_t(u32, max_queues->max_cq_depth, - max_queues->max_sq_depth); - max_tx_queue_size = max_queues->max_cq_depth; - - if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) - max_tx_queue_size = min_t(u32, max_tx_queue_size, - llq->max_llq_depth); - else - max_tx_queue_size = min_t(u32, max_tx_queue_size, - max_queues->max_sq_depth); - - adapter->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, - max_queues->max_packet_tx_descs); - adapter->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, - max_queues->max_packet_rx_descs); - } - - max_tx_queue_size = rounddown_pow_of_two(max_tx_queue_size); - max_rx_queue_size = rounddown_pow_of_two(max_rx_queue_size); - - tx_queue_size = clamp_val(tx_queue_size, ENA_MIN_RING_SIZE, - max_tx_queue_size); - rx_queue_size = clamp_val(rx_queue_size, ENA_MIN_RING_SIZE, - max_rx_queue_size); - - tx_queue_size = rounddown_pow_of_two(tx_queue_size); - rx_queue_size = rounddown_pow_of_two(rx_queue_size); - - adapter->max_tx_ring_size = max_tx_queue_size; - adapter->max_rx_ring_size = max_rx_queue_size; - adapter->requested_tx_ring_size = tx_queue_size; - adapter->requested_rx_ring_size = rx_queue_size; -} - /* ena_probe - Device Initialization Routine * @pdev: PCI device information struct * @ent: entry in ena_pci_tbl @@ -4310,7 +4380,13 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, adapter); - rc = ena_device_init(ena_dev, pdev, &get_feat_ctx, &wd_state); + rc = ena_map_llq_mem_bar(pdev, ena_dev, bars); + if (rc) { + dev_err(&pdev->dev, "ENA LLQ bar mapping failed\n"); + goto err_netdev_destroy; + } + + rc = ena_device_init(adapter, pdev, &get_feat_ctx, &wd_state); if (rc) { dev_err(&pdev->dev, "ENA device init failed\n"); if (rc == -ETIME) @@ -4318,12 +4394,6 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_netdev_destroy; } - rc = ena_map_llq_mem_bar(pdev, ena_dev, bars); - if (rc) { - dev_err(&pdev->dev, "ENA llq bar mapping failed\n"); - goto err_device_destroy; - } - /* Initial TX and RX interrupt delay. Assumes 1 usec granularity. * Updated during device initialization with the real granularity */ @@ -4331,7 +4401,6 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) ena_dev->intr_moder_rx_interval = ENA_INTR_INITIAL_RX_INTERVAL_USECS; ena_dev->intr_delay_resolution = ENA_DEFAULT_INTR_DELAY_RESOLUTION; max_num_io_queues = ena_calc_max_io_queue_num(pdev, ena_dev, &get_feat_ctx); - ena_calc_io_queue_size(adapter, &get_feat_ctx); if (unlikely(!max_num_io_queues)) { rc = -EFAULT; goto err_device_destroy; @@ -4364,6 +4433,7 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) "Failed to query interrupt moderation feature\n"); goto err_device_destroy; } + ena_init_io_rings(adapter, 0, adapter->xdp_num_queues + @@ -4488,6 +4558,7 @@ static void __ena_shutoff(struct pci_dev *pdev, bool shutdown) rtnl_lock(); /* lock released inside the below if-else block */ adapter->reset_reason = ENA_REGS_RESET_SHUTDOWN; ena_destroy_device(adapter, true); + if (shutdown) { netif_device_detach(netdev); dev_close(netdev); diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index 2cb141079474..5a0d4ee76172 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -334,6 +334,14 @@ struct ena_adapter { u32 msg_enable; + /* large_llq_header_enabled is used for two purposes: + * 1. Indicates that large LLQ has been requested. + * 2. Indicates whether large LLQ is set or not after device + * initialization / configuration. + */ + bool large_llq_header_enabled; + bool large_llq_header_supported; + u16 max_tx_sgl_size; u16 max_rx_sgl_size; @@ -388,9 +396,10 @@ void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf); int ena_update_hw_stats(struct ena_adapter *adapter); -int ena_update_queue_sizes(struct ena_adapter *adapter, - u32 new_tx_size, - u32 new_rx_size); +int ena_update_queue_params(struct ena_adapter *adapter, + u32 new_tx_size, + u32 new_rx_size, + u32 new_llq_header_len); int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count); diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 2792185dda22..798d35890118 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -75,6 +75,8 @@ enum { * @tx_push: The flag of tx push mode * @rx_push: The flag of rx push mode * @cqe_size: Size of TX/RX completion queue event + * @tx_push_buf_len: Size of TX push buffer + * @tx_push_buf_max_len: Maximum allowed size of TX push buffer */ struct kernel_ethtool_ringparam { u32 rx_buf_len; @@ -82,6 +84,8 @@ struct kernel_ethtool_ringparam { u8 tx_push; u8 rx_push; u32 cqe_size; + u32 tx_push_buf_len; + u32 tx_push_buf_max_len; }; /** @@ -90,12 +94,14 @@ struct kernel_ethtool_ringparam { * @ETHTOOL_RING_USE_CQE_SIZE: capture for setting cqe_size * @ETHTOOL_RING_USE_TX_PUSH: capture for setting tx_push * @ETHTOOL_RING_USE_RX_PUSH: capture for setting rx_push + * @ETHTOOL_RING_USE_TX_PUSH_BUF_LEN: capture for setting tx_push_buf_len */ enum ethtool_supported_ring_param { - ETHTOOL_RING_USE_RX_BUF_LEN = BIT(0), - ETHTOOL_RING_USE_CQE_SIZE = BIT(1), - ETHTOOL_RING_USE_TX_PUSH = BIT(2), - ETHTOOL_RING_USE_RX_PUSH = BIT(3), + ETHTOOL_RING_USE_RX_BUF_LEN = BIT(0), + ETHTOOL_RING_USE_CQE_SIZE = BIT(1), + ETHTOOL_RING_USE_TX_PUSH = BIT(2), + ETHTOOL_RING_USE_RX_PUSH = BIT(3), + ETHTOOL_RING_USE_TX_PUSH_BUF_LEN = BIT(4), }; #define __ETH_RSS_HASH_BIT(bit) ((u32)1 << (bit)) diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 3e8743252167..19c0791ed9d5 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -161,9 +161,31 @@ struct netlink_ext_ack { } \ } while (0) +#define NL_SET_ERR_MSG_ATTR_POL_FMT(extack, attr, pol, fmt, args...) do { \ + struct netlink_ext_ack *__extack = (extack); \ + \ + if (!__extack) \ + break; \ + \ + if (snprintf(__extack->_msg_buf, NETLINK_MAX_FMTMSG_LEN, \ + "%s" fmt "%s", "", ##args, "") >= \ + NETLINK_MAX_FMTMSG_LEN) \ + net_warn_ratelimited("%s" fmt "%s", "truncated extack: ", \ + ##args, "\n"); \ + \ + do_trace_netlink_extack(__extack->_msg_buf); \ + \ + __extack->_msg = __extack->_msg_buf; \ + __extack->bad_attr = (attr); \ + __extack->policy = (pol); \ +} while (0) + #define NL_SET_ERR_MSG_ATTR(extack, attr, msg) \ NL_SET_ERR_MSG_ATTR_POL(extack, attr, NULL, msg) +#define NL_SET_ERR_MSG_ATTR_FMT(extack, attr, msg, args...) \ + NL_SET_ERR_MSG_ATTR_POL_FMT(extack, attr, NULL, msg, ##args) + #define NL_SET_ERR_ATTR_MISS(extack, nest, type) do { \ struct netlink_ext_ack *__extack = (extack); \ \ diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index d39ce21381c5..1ebf8d455f07 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -357,6 +357,8 @@ enum { ETHTOOL_A_RINGS_CQE_SIZE, /* u32 */ ETHTOOL_A_RINGS_TX_PUSH, /* u8 */ ETHTOOL_A_RINGS_RX_PUSH, /* u8 */ + ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN, /* u32 */ + ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX, /* u32 */ /* add new constants above here */ __ETHTOOL_A_RINGS_CNT, diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index f7b189ed96b2..79424b34b553 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -413,7 +413,7 @@ extern const struct nla_policy ethnl_features_set_policy[ETHTOOL_A_FEATURES_WANT extern const struct nla_policy ethnl_privflags_get_policy[ETHTOOL_A_PRIVFLAGS_HEADER + 1]; extern const struct nla_policy ethnl_privflags_set_policy[ETHTOOL_A_PRIVFLAGS_FLAGS + 1]; extern const struct nla_policy ethnl_rings_get_policy[ETHTOOL_A_RINGS_HEADER + 1]; -extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_RX_PUSH + 1]; +extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX + 1]; extern const struct nla_policy ethnl_channels_get_policy[ETHTOOL_A_CHANNELS_HEADER + 1]; extern const struct nla_policy ethnl_channels_set_policy[ETHTOOL_A_CHANNELS_COMBINED_COUNT + 1]; extern const struct nla_policy ethnl_coalesce_get_policy[ETHTOOL_A_COALESCE_HEADER + 1]; diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c index f358cd57d094..1c4972526142 100644 --- a/net/ethtool/rings.c +++ b/net/ethtool/rings.c @@ -11,6 +11,7 @@ struct rings_reply_data { struct ethnl_reply_data base; struct ethtool_ringparam ringparam; struct kernel_ethtool_ringparam kernel_ringparam; + u32 supported_ring_params; }; #define RINGS_REPDATA(__reply_base) \ @@ -32,6 +33,8 @@ static int rings_prepare_data(const struct ethnl_req_info *req_base, if (!dev->ethtool_ops->get_ringparam) return -EOPNOTSUPP; + + data->supported_ring_params = dev->ethtool_ops->supported_ring_params; ret = ethnl_ops_begin(dev); if (ret < 0) return ret; @@ -57,7 +60,9 @@ static int rings_reply_size(const struct ethnl_req_info *req_base, nla_total_size(sizeof(u8)) + /* _RINGS_TCP_DATA_SPLIT */ nla_total_size(sizeof(u32) + /* _RINGS_CQE_SIZE */ nla_total_size(sizeof(u8)) + /* _RINGS_TX_PUSH */ - nla_total_size(sizeof(u8))); /* _RINGS_RX_PUSH */ + nla_total_size(sizeof(u8))) + /* _RINGS_RX_PUSH */ + nla_total_size(sizeof(u32)) + /* _RINGS_TX_PUSH_BUF_LEN */ + nla_total_size(sizeof(u32)); /* _RINGS_TX_PUSH_BUF_LEN_MAX */ } static int rings_fill_reply(struct sk_buff *skb, @@ -67,6 +72,7 @@ static int rings_fill_reply(struct sk_buff *skb, const struct rings_reply_data *data = RINGS_REPDATA(reply_base); const struct kernel_ethtool_ringparam *kr = &data->kernel_ringparam; const struct ethtool_ringparam *ringparam = &data->ringparam; + u32 supported_ring_params = data->supported_ring_params; WARN_ON(kr->tcp_data_split > ETHTOOL_TCP_DATA_SPLIT_ENABLED); @@ -98,7 +104,12 @@ static int rings_fill_reply(struct sk_buff *skb, (kr->cqe_size && (nla_put_u32(skb, ETHTOOL_A_RINGS_CQE_SIZE, kr->cqe_size))) || nla_put_u8(skb, ETHTOOL_A_RINGS_TX_PUSH, !!kr->tx_push) || - nla_put_u8(skb, ETHTOOL_A_RINGS_RX_PUSH, !!kr->rx_push)) + nla_put_u8(skb, ETHTOOL_A_RINGS_RX_PUSH, !!kr->rx_push) || + ((supported_ring_params & ETHTOOL_RING_USE_TX_PUSH_BUF_LEN) && + (nla_put_u32(skb, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX, + kr->tx_push_buf_max_len) || + nla_put_u32(skb, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN, + kr->tx_push_buf_len)))) return -EMSGSIZE; return 0; @@ -117,6 +128,7 @@ const struct nla_policy ethnl_rings_set_policy[] = { [ETHTOOL_A_RINGS_CQE_SIZE] = NLA_POLICY_MIN(NLA_U32, 1), [ETHTOOL_A_RINGS_TX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1), [ETHTOOL_A_RINGS_RX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1), + [ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN] = { .type = NLA_U32 }, }; static int @@ -158,6 +170,14 @@ ethnl_set_rings_validate(struct ethnl_req_info *req_info, return -EOPNOTSUPP; } + if (tb[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN] && + !(ops->supported_ring_params & ETHTOOL_RING_USE_TX_PUSH_BUF_LEN)) { + NL_SET_ERR_MSG_ATTR(info->extack, + tb[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN], + "setting tx push buf len is not supported"); + return -EOPNOTSUPP; + } + return ops->get_ringparam && ops->set_ringparam ? 1 : -EOPNOTSUPP; } @@ -189,6 +209,8 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info) tb[ETHTOOL_A_RINGS_TX_PUSH], &mod); ethnl_update_u8(&kernel_ringparam.rx_push, tb[ETHTOOL_A_RINGS_RX_PUSH], &mod); + ethnl_update_u32(&kernel_ringparam.tx_push_buf_len, + tb[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN], &mod); if (!mod) return 0; @@ -209,6 +231,14 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info) return -EINVAL; } + if (kernel_ringparam.tx_push_buf_len > kernel_ringparam.tx_push_buf_max_len) { + NL_SET_ERR_MSG_ATTR_FMT(info->extack, tb[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN], + "Requested TX push buffer exceeds the maximum of %u", + kernel_ringparam.tx_push_buf_max_len); + + return -EINVAL; + } + ret = dev->ethtool_ops->set_ringparam(dev, &ringparam, &kernel_ringparam, info->extack); return ret < 0 ? ret : 1;