Merge tag 'net-next-5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: "Core: - BPF: - add syscall program type and libbpf support for generating instructions and bindings for in-kernel BPF loaders (BPF loaders for BPF), this is a stepping stone for signed BPF programs - infrastructure to migrate TCP child sockets from one listener to another in the same reuseport group/map to improve flexibility of service hand-off/restart - add broadcast support to XDP redirect - allow bypass of the lockless qdisc to improving performance (for pktgen: +23% with one thread, +44% with 2 threads) - add a simpler version of "DO_ONCE()" which does not require jump labels, intended for slow-path usage - virtio/vsock: introduce SOCK_SEQPACKET support - add getsocketopt to retrieve netns cookie - ip: treat lowest address of a IPv4 subnet as ordinary unicast address allowing reclaiming of precious IPv4 addresses - ipv6: use prandom_u32() for ID generation - ip: add support for more flexible field selection for hashing across multi-path routes (w/ offload to mlxsw) - icmp: add support for extended RFC 8335 PROBE (ping) - seg6: add support for SRv6 End.DT46 behavior - mptcp: - DSS checksum support (RFC 8684) to detect middlebox meddling - support Connection-time 'C' flag - time stamping support - sctp: packetization Layer Path MTU Discovery (RFC 8899) - xfrm: speed up state addition with seq set - WiFi: - hidden AP discovery on 6 GHz and other HE 6 GHz improvements - aggregation handling improvements for some drivers - minstrel improvements for no-ack frames - deferred rate control for TXQs to improve reaction times - switch from round robin to virtual time-based airtime scheduler - add trace points: - tcp checksum errors - openvswitch - action execution, upcalls - socket errors via sk_error_report Device APIs: - devlink: add rate API for hierarchical control of max egress rate of virtual devices (VFs, SFs etc.) - don't require RCU read lock to be held around BPF hooks in NAPI context - page_pool: generic buffer recycling New hardware/drivers: - mobile: - iosm: PCIe Driver for Intel M.2 Modem - support for Qualcomm MSM8998 (ipa) - WiFi: Qualcomm QCN9074 and WCN6855 PCI devices - sparx5: Microchip SparX-5 family of Enterprise Ethernet switches - Mellanox BlueField Gigabit Ethernet (control NIC of the DPU) - NXP SJA1110 Automotive Ethernet 10-port switch - Qualcomm QCA8327 switch support (qca8k) - Mikrotik 10/25G NIC (atl1c) Driver changes: - ACPI support for some MDIO, MAC and PHY devices from Marvell and NXP (our first foray into MAC/PHY description via ACPI) - HW timestamping (PTP) support: bnxt_en, ice, sja1105, hns3, tja11xx - Mellanox/Nvidia NIC (mlx5) - NIC VF offload of L2 bridging - support IRQ distribution to Sub-functions - Marvell (prestera): - add flower and match all - devlink trap - link aggregation - Netronome (nfp): connection tracking offload - Intel 1GE (igc): add AF_XDP support - Marvell DPU (octeontx2): ingress ratelimit offload - Google vNIC (gve): new ring/descriptor format support - Qualcomm mobile (rmnet & ipa): inline checksum offload support - MediaTek WiFi (mt76) - mt7915 MSI support - mt7915 Tx status reporting - mt7915 thermal sensors support - mt7921 decapsulation offload - mt7921 enable runtime pm and deep sleep - Realtek WiFi (rtw88) - beacon filter support - Tx antenna path diversity support - firmware crash information via devcoredump - Qualcomm WiFi (wcn36xx) - Wake-on-WLAN support with magic packets and GTK rekeying - Micrel PHY (ksz886x/ksz8081): add cable test support" * tag 'net-next-5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2168 commits) tcp: change ICSK_CA_PRIV_SIZE definition tcp_yeah: check struct yeah size at compile time gve: DQO: Fix off by one in gve_rx_dqo() stmmac: intel: set PCI_D3hot in suspend stmmac: intel: Enable PHY WOL option in EHL net: stmmac: option to enable PHY WOL with PMT enabled net: say "local" instead of "static" addresses in ndo_dflt_fdb_{add,del} net: use netdev_info in ndo_dflt_fdb_{add,del} ptp: Set lookup cookie when creating a PTP PPS source. net: sock: add trace for socket errors net: sock: introduce sk_error_report net: dsa: replay the local bridge FDB entries pointing to the bridge dev too net: dsa: ensure during dsa_fdb_offload_notify that dev_hold and dev_put are on the same dev net: dsa: include fdb entries pointing to bridge in the host fdb list net: dsa: include bridge addresses which are local in the host fdb list net: dsa: sync static FDB entries on foreign interfaces to hardware net: dsa: install the host MDB and FDB entries in the master's RX filter net: dsa: reference count the FDB addresses at the cross-chip notifier level net: dsa: introduce a separate cross-chip notifier type for host FDBs net: dsa: reference count the MDB entries at the cross-chip notifier level ...
This commit is contained in:
@@ -524,8 +524,7 @@ bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs)
|
||||
nr_maps++;
|
||||
}
|
||||
|
||||
diag = kzalloc(sizeof(*diag) + sizeof(diag->maps[0]) * nr_maps,
|
||||
GFP_KERNEL);
|
||||
diag = kzalloc(struct_size(diag, maps, nr_maps), GFP_KERNEL);
|
||||
if (!diag)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
|
@@ -148,6 +148,7 @@
|
||||
#include <net/devlink.h>
|
||||
#include <linux/pm_runtime.h>
|
||||
#include <linux/prandom.h>
|
||||
#include <linux/once_lite.h>
|
||||
|
||||
#include "net-sysfs.h"
|
||||
|
||||
@@ -3487,13 +3488,16 @@ EXPORT_SYMBOL(__skb_gso_segment);
|
||||
|
||||
/* Take action when hardware reception checksum errors are detected. */
|
||||
#ifdef CONFIG_BUG
|
||||
static void do_netdev_rx_csum_fault(struct net_device *dev, struct sk_buff *skb)
|
||||
{
|
||||
pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
|
||||
skb_dump(KERN_ERR, skb, true);
|
||||
dump_stack();
|
||||
}
|
||||
|
||||
void netdev_rx_csum_fault(struct net_device *dev, struct sk_buff *skb)
|
||||
{
|
||||
if (net_ratelimit()) {
|
||||
pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
|
||||
skb_dump(KERN_ERR, skb, true);
|
||||
dump_stack();
|
||||
}
|
||||
DO_ONCE_LITE(do_netdev_rx_csum_fault, dev, skb);
|
||||
}
|
||||
EXPORT_SYMBOL(netdev_rx_csum_fault);
|
||||
#endif
|
||||
@@ -3852,10 +3856,33 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
|
||||
qdisc_calculate_pkt_len(skb, q);
|
||||
|
||||
if (q->flags & TCQ_F_NOLOCK) {
|
||||
rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
|
||||
if (likely(!netif_xmit_frozen_or_stopped(txq)))
|
||||
qdisc_run(q);
|
||||
if (q->flags & TCQ_F_CAN_BYPASS && nolock_qdisc_is_empty(q) &&
|
||||
qdisc_run_begin(q)) {
|
||||
/* Retest nolock_qdisc_is_empty() within the protection
|
||||
* of q->seqlock to protect from racing with requeuing.
|
||||
*/
|
||||
if (unlikely(!nolock_qdisc_is_empty(q))) {
|
||||
rc = q->enqueue(skb, q, &to_free) &
|
||||
NET_XMIT_MASK;
|
||||
__qdisc_run(q);
|
||||
qdisc_run_end(q);
|
||||
|
||||
goto no_lock_out;
|
||||
}
|
||||
|
||||
qdisc_bstats_cpu_update(q, skb);
|
||||
if (sch_direct_xmit(skb, q, dev, txq, NULL, true) &&
|
||||
!nolock_qdisc_is_empty(q))
|
||||
__qdisc_run(q);
|
||||
|
||||
qdisc_run_end(q);
|
||||
return NET_XMIT_SUCCESS;
|
||||
}
|
||||
|
||||
rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
|
||||
qdisc_run(q);
|
||||
|
||||
no_lock_out:
|
||||
if (unlikely(to_free))
|
||||
kfree_skb_list(to_free);
|
||||
return rc;
|
||||
@@ -5277,9 +5304,9 @@ another_round:
|
||||
if (static_branch_unlikely(&generic_xdp_needed_key)) {
|
||||
int ret2;
|
||||
|
||||
preempt_disable();
|
||||
migrate_disable();
|
||||
ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb);
|
||||
preempt_enable();
|
||||
migrate_enable();
|
||||
|
||||
if (ret2 != XDP_PASS) {
|
||||
ret = NET_RX_DROP;
|
||||
@@ -6520,11 +6547,18 @@ EXPORT_SYMBOL(napi_schedule_prep);
|
||||
* __napi_schedule_irqoff - schedule for receive
|
||||
* @n: entry to schedule
|
||||
*
|
||||
* Variant of __napi_schedule() assuming hard irqs are masked
|
||||
* Variant of __napi_schedule() assuming hard irqs are masked.
|
||||
*
|
||||
* On PREEMPT_RT enabled kernels this maps to __napi_schedule()
|
||||
* because the interrupt disabled assumption might not be true
|
||||
* due to force-threaded interrupts and spinlock substitution.
|
||||
*/
|
||||
void __napi_schedule_irqoff(struct napi_struct *n)
|
||||
{
|
||||
____napi_schedule(this_cpu_ptr(&softnet_data), n);
|
||||
if (!IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||
____napi_schedule(this_cpu_ptr(&softnet_data), n);
|
||||
else
|
||||
__napi_schedule(n);
|
||||
}
|
||||
EXPORT_SYMBOL(__napi_schedule_irqoff);
|
||||
|
||||
|
@@ -190,6 +190,80 @@ static struct devlink_port *devlink_port_get_from_info(struct devlink *devlink,
|
||||
return devlink_port_get_from_attrs(devlink, info->attrs);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
devlink_rate_is_leaf(struct devlink_rate *devlink_rate)
|
||||
{
|
||||
return devlink_rate->type == DEVLINK_RATE_TYPE_LEAF;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
devlink_rate_is_node(struct devlink_rate *devlink_rate)
|
||||
{
|
||||
return devlink_rate->type == DEVLINK_RATE_TYPE_NODE;
|
||||
}
|
||||
|
||||
static struct devlink_rate *
|
||||
devlink_rate_leaf_get_from_info(struct devlink *devlink, struct genl_info *info)
|
||||
{
|
||||
struct devlink_rate *devlink_rate;
|
||||
struct devlink_port *devlink_port;
|
||||
|
||||
devlink_port = devlink_port_get_from_attrs(devlink, info->attrs);
|
||||
if (IS_ERR(devlink_port))
|
||||
return ERR_CAST(devlink_port);
|
||||
devlink_rate = devlink_port->devlink_rate;
|
||||
return devlink_rate ?: ERR_PTR(-ENODEV);
|
||||
}
|
||||
|
||||
static struct devlink_rate *
|
||||
devlink_rate_node_get_by_name(struct devlink *devlink, const char *node_name)
|
||||
{
|
||||
static struct devlink_rate *devlink_rate;
|
||||
|
||||
list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
|
||||
if (devlink_rate_is_node(devlink_rate) &&
|
||||
!strcmp(node_name, devlink_rate->name))
|
||||
return devlink_rate;
|
||||
}
|
||||
return ERR_PTR(-ENODEV);
|
||||
}
|
||||
|
||||
static struct devlink_rate *
|
||||
devlink_rate_node_get_from_attrs(struct devlink *devlink, struct nlattr **attrs)
|
||||
{
|
||||
const char *rate_node_name;
|
||||
size_t len;
|
||||
|
||||
if (!attrs[DEVLINK_ATTR_RATE_NODE_NAME])
|
||||
return ERR_PTR(-EINVAL);
|
||||
rate_node_name = nla_data(attrs[DEVLINK_ATTR_RATE_NODE_NAME]);
|
||||
len = strlen(rate_node_name);
|
||||
/* Name cannot be empty or decimal number */
|
||||
if (!len || strspn(rate_node_name, "0123456789") == len)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
return devlink_rate_node_get_by_name(devlink, rate_node_name);
|
||||
}
|
||||
|
||||
static struct devlink_rate *
|
||||
devlink_rate_node_get_from_info(struct devlink *devlink, struct genl_info *info)
|
||||
{
|
||||
return devlink_rate_node_get_from_attrs(devlink, info->attrs);
|
||||
}
|
||||
|
||||
static struct devlink_rate *
|
||||
devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info)
|
||||
{
|
||||
struct nlattr **attrs = info->attrs;
|
||||
|
||||
if (attrs[DEVLINK_ATTR_PORT_INDEX])
|
||||
return devlink_rate_leaf_get_from_info(devlink, info);
|
||||
else if (attrs[DEVLINK_ATTR_RATE_NODE_NAME])
|
||||
return devlink_rate_node_get_from_info(devlink, info);
|
||||
else
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
struct devlink_sb {
|
||||
struct list_head list;
|
||||
unsigned int index;
|
||||
@@ -408,12 +482,14 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
|
||||
|
||||
#define DEVLINK_NL_FLAG_NEED_PORT BIT(0)
|
||||
#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1)
|
||||
#define DEVLINK_NL_FLAG_NEED_RATE BIT(2)
|
||||
#define DEVLINK_NL_FLAG_NEED_RATE_NODE BIT(3)
|
||||
|
||||
/* The per devlink instance lock is taken by default in the pre-doit
|
||||
* operation, yet several commands do not require this. The global
|
||||
* devlink lock is taken and protects from disruption by user-calls.
|
||||
*/
|
||||
#define DEVLINK_NL_FLAG_NO_LOCK BIT(2)
|
||||
#define DEVLINK_NL_FLAG_NO_LOCK BIT(4)
|
||||
|
||||
static int devlink_nl_pre_doit(const struct genl_ops *ops,
|
||||
struct sk_buff *skb, struct genl_info *info)
|
||||
@@ -442,6 +518,24 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
|
||||
devlink_port = devlink_port_get_from_info(devlink, info);
|
||||
if (!IS_ERR(devlink_port))
|
||||
info->user_ptr[1] = devlink_port;
|
||||
} else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE) {
|
||||
struct devlink_rate *devlink_rate;
|
||||
|
||||
devlink_rate = devlink_rate_get_from_info(devlink, info);
|
||||
if (IS_ERR(devlink_rate)) {
|
||||
err = PTR_ERR(devlink_rate);
|
||||
goto unlock;
|
||||
}
|
||||
info->user_ptr[1] = devlink_rate;
|
||||
} else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE_NODE) {
|
||||
struct devlink_rate *rate_node;
|
||||
|
||||
rate_node = devlink_rate_node_get_from_info(devlink, info);
|
||||
if (IS_ERR(rate_node)) {
|
||||
err = PTR_ERR(rate_node);
|
||||
goto unlock;
|
||||
}
|
||||
info->user_ptr[1] = rate_node;
|
||||
}
|
||||
return 0;
|
||||
|
||||
@@ -748,6 +842,56 @@ devlink_port_fn_hw_addr_fill(struct devlink *devlink, const struct devlink_ops *
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int devlink_nl_rate_fill(struct sk_buff *msg,
|
||||
struct devlink *devlink,
|
||||
struct devlink_rate *devlink_rate,
|
||||
enum devlink_command cmd, u32 portid,
|
||||
u32 seq, int flags,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
void *hdr;
|
||||
|
||||
hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
|
||||
if (!hdr)
|
||||
return -EMSGSIZE;
|
||||
|
||||
if (devlink_nl_put_handle(msg, devlink))
|
||||
goto nla_put_failure;
|
||||
|
||||
if (nla_put_u16(msg, DEVLINK_ATTR_RATE_TYPE, devlink_rate->type))
|
||||
goto nla_put_failure;
|
||||
|
||||
if (devlink_rate_is_leaf(devlink_rate)) {
|
||||
if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX,
|
||||
devlink_rate->devlink_port->index))
|
||||
goto nla_put_failure;
|
||||
} else if (devlink_rate_is_node(devlink_rate)) {
|
||||
if (nla_put_string(msg, DEVLINK_ATTR_RATE_NODE_NAME,
|
||||
devlink_rate->name))
|
||||
goto nla_put_failure;
|
||||
}
|
||||
|
||||
if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_SHARE,
|
||||
devlink_rate->tx_share, DEVLINK_ATTR_PAD))
|
||||
goto nla_put_failure;
|
||||
|
||||
if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_MAX,
|
||||
devlink_rate->tx_max, DEVLINK_ATTR_PAD))
|
||||
goto nla_put_failure;
|
||||
|
||||
if (devlink_rate->parent)
|
||||
if (nla_put_string(msg, DEVLINK_ATTR_RATE_PARENT_NODE_NAME,
|
||||
devlink_rate->parent->name))
|
||||
goto nla_put_failure;
|
||||
|
||||
genlmsg_end(msg, hdr);
|
||||
return 0;
|
||||
|
||||
nla_put_failure:
|
||||
genlmsg_cancel(msg, hdr);
|
||||
return -EMSGSIZE;
|
||||
}
|
||||
|
||||
static bool
|
||||
devlink_port_fn_state_valid(enum devlink_port_fn_state state)
|
||||
{
|
||||
@@ -919,6 +1063,111 @@ static void devlink_port_notify(struct devlink_port *devlink_port,
|
||||
msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
|
||||
}
|
||||
|
||||
static void devlink_rate_notify(struct devlink_rate *devlink_rate,
|
||||
enum devlink_command cmd)
|
||||
{
|
||||
struct devlink *devlink = devlink_rate->devlink;
|
||||
struct sk_buff *msg;
|
||||
int err;
|
||||
|
||||
WARN_ON(cmd != DEVLINK_CMD_RATE_NEW &&
|
||||
cmd != DEVLINK_CMD_RATE_DEL);
|
||||
|
||||
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
|
||||
if (!msg)
|
||||
return;
|
||||
|
||||
err = devlink_nl_rate_fill(msg, devlink, devlink_rate,
|
||||
cmd, 0, 0, 0, NULL);
|
||||
if (err) {
|
||||
nlmsg_free(msg);
|
||||
return;
|
||||
}
|
||||
|
||||
genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
|
||||
msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
|
||||
}
|
||||
|
||||
static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg,
|
||||
struct netlink_callback *cb)
|
||||
{
|
||||
struct devlink_rate *devlink_rate;
|
||||
struct devlink *devlink;
|
||||
int start = cb->args[0];
|
||||
int idx = 0;
|
||||
int err = 0;
|
||||
|
||||
mutex_lock(&devlink_mutex);
|
||||
list_for_each_entry(devlink, &devlink_list, list) {
|
||||
if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
|
||||
continue;
|
||||
mutex_lock(&devlink->lock);
|
||||
list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
|
||||
enum devlink_command cmd = DEVLINK_CMD_RATE_NEW;
|
||||
u32 id = NETLINK_CB(cb->skb).portid;
|
||||
|
||||
if (idx < start) {
|
||||
idx++;
|
||||
continue;
|
||||
}
|
||||
err = devlink_nl_rate_fill(msg, devlink,
|
||||
devlink_rate,
|
||||
cmd, id,
|
||||
cb->nlh->nlmsg_seq,
|
||||
NLM_F_MULTI, NULL);
|
||||
if (err) {
|
||||
mutex_unlock(&devlink->lock);
|
||||
goto out;
|
||||
}
|
||||
idx++;
|
||||
}
|
||||
mutex_unlock(&devlink->lock);
|
||||
}
|
||||
out:
|
||||
mutex_unlock(&devlink_mutex);
|
||||
if (err != -EMSGSIZE)
|
||||
return err;
|
||||
|
||||
cb->args[0] = idx;
|
||||
return msg->len;
|
||||
}
|
||||
|
||||
static int devlink_nl_cmd_rate_get_doit(struct sk_buff *skb,
|
||||
struct genl_info *info)
|
||||
{
|
||||
struct devlink_rate *devlink_rate = info->user_ptr[1];
|
||||
struct devlink *devlink = devlink_rate->devlink;
|
||||
struct sk_buff *msg;
|
||||
int err;
|
||||
|
||||
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
|
||||
if (!msg)
|
||||
return -ENOMEM;
|
||||
|
||||
err = devlink_nl_rate_fill(msg, devlink, devlink_rate,
|
||||
DEVLINK_CMD_RATE_NEW,
|
||||
info->snd_portid, info->snd_seq, 0,
|
||||
info->extack);
|
||||
if (err) {
|
||||
nlmsg_free(msg);
|
||||
return err;
|
||||
}
|
||||
|
||||
return genlmsg_reply(msg, info);
|
||||
}
|
||||
|
||||
static bool
|
||||
devlink_rate_is_parent_node(struct devlink_rate *devlink_rate,
|
||||
struct devlink_rate *parent)
|
||||
{
|
||||
while (parent) {
|
||||
if (parent == devlink_rate)
|
||||
return true;
|
||||
parent = parent->parent;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info)
|
||||
{
|
||||
struct devlink *devlink = info->user_ptr[0];
|
||||
@@ -1339,6 +1588,255 @@ static int devlink_nl_cmd_port_del_doit(struct sk_buff *skb,
|
||||
return devlink->ops->port_del(devlink, port_index, extack);
|
||||
}
|
||||
|
||||
static int
|
||||
devlink_nl_rate_parent_node_set(struct devlink_rate *devlink_rate,
|
||||
struct genl_info *info,
|
||||
struct nlattr *nla_parent)
|
||||
{
|
||||
struct devlink *devlink = devlink_rate->devlink;
|
||||
const char *parent_name = nla_data(nla_parent);
|
||||
const struct devlink_ops *ops = devlink->ops;
|
||||
size_t len = strlen(parent_name);
|
||||
struct devlink_rate *parent;
|
||||
int err = -EOPNOTSUPP;
|
||||
|
||||
parent = devlink_rate->parent;
|
||||
if (parent && len) {
|
||||
NL_SET_ERR_MSG_MOD(info->extack, "Rate object already has parent.");
|
||||
return -EBUSY;
|
||||
} else if (parent && !len) {
|
||||
if (devlink_rate_is_leaf(devlink_rate))
|
||||
err = ops->rate_leaf_parent_set(devlink_rate, NULL,
|
||||
devlink_rate->priv, NULL,
|
||||
info->extack);
|
||||
else if (devlink_rate_is_node(devlink_rate))
|
||||
err = ops->rate_node_parent_set(devlink_rate, NULL,
|
||||
devlink_rate->priv, NULL,
|
||||
info->extack);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
refcount_dec(&parent->refcnt);
|
||||
devlink_rate->parent = NULL;
|
||||
} else if (!parent && len) {
|
||||
parent = devlink_rate_node_get_by_name(devlink, parent_name);
|
||||
if (IS_ERR(parent))
|
||||
return -ENODEV;
|
||||
|
||||
if (parent == devlink_rate) {
|
||||
NL_SET_ERR_MSG_MOD(info->extack, "Parent to self is not allowed");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (devlink_rate_is_node(devlink_rate) &&
|
||||
devlink_rate_is_parent_node(devlink_rate, parent->parent)) {
|
||||
NL_SET_ERR_MSG_MOD(info->extack, "Node is already a parent of parent node.");
|
||||
return -EEXIST;
|
||||
}
|
||||
|
||||
if (devlink_rate_is_leaf(devlink_rate))
|
||||
err = ops->rate_leaf_parent_set(devlink_rate, parent,
|
||||
devlink_rate->priv, parent->priv,
|
||||
info->extack);
|
||||
else if (devlink_rate_is_node(devlink_rate))
|
||||
err = ops->rate_node_parent_set(devlink_rate, parent,
|
||||
devlink_rate->priv, parent->priv,
|
||||
info->extack);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
refcount_inc(&parent->refcnt);
|
||||
devlink_rate->parent = parent;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int devlink_nl_rate_set(struct devlink_rate *devlink_rate,
|
||||
const struct devlink_ops *ops,
|
||||
struct genl_info *info)
|
||||
{
|
||||
struct nlattr *nla_parent, **attrs = info->attrs;
|
||||
int err = -EOPNOTSUPP;
|
||||
u64 rate;
|
||||
|
||||
if (attrs[DEVLINK_ATTR_RATE_TX_SHARE]) {
|
||||
rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_SHARE]);
|
||||
if (devlink_rate_is_leaf(devlink_rate))
|
||||
err = ops->rate_leaf_tx_share_set(devlink_rate, devlink_rate->priv,
|
||||
rate, info->extack);
|
||||
else if (devlink_rate_is_node(devlink_rate))
|
||||
err = ops->rate_node_tx_share_set(devlink_rate, devlink_rate->priv,
|
||||
rate, info->extack);
|
||||
if (err)
|
||||
return err;
|
||||
devlink_rate->tx_share = rate;
|
||||
}
|
||||
|
||||
if (attrs[DEVLINK_ATTR_RATE_TX_MAX]) {
|
||||
rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_MAX]);
|
||||
if (devlink_rate_is_leaf(devlink_rate))
|
||||
err = ops->rate_leaf_tx_max_set(devlink_rate, devlink_rate->priv,
|
||||
rate, info->extack);
|
||||
else if (devlink_rate_is_node(devlink_rate))
|
||||
err = ops->rate_node_tx_max_set(devlink_rate, devlink_rate->priv,
|
||||
rate, info->extack);
|
||||
if (err)
|
||||
return err;
|
||||
devlink_rate->tx_max = rate;
|
||||
}
|
||||
|
||||
nla_parent = attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME];
|
||||
if (nla_parent) {
|
||||
err = devlink_nl_rate_parent_node_set(devlink_rate, info,
|
||||
nla_parent);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops,
|
||||
struct genl_info *info,
|
||||
enum devlink_rate_type type)
|
||||
{
|
||||
struct nlattr **attrs = info->attrs;
|
||||
|
||||
if (type == DEVLINK_RATE_TYPE_LEAF) {
|
||||
if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_leaf_tx_share_set) {
|
||||
NL_SET_ERR_MSG_MOD(info->extack, "TX share set isn't supported for the leafs");
|
||||
return false;
|
||||
}
|
||||
if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_leaf_tx_max_set) {
|
||||
NL_SET_ERR_MSG_MOD(info->extack, "TX max set isn't supported for the leafs");
|
||||
return false;
|
||||
}
|
||||
if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] &&
|
||||
!ops->rate_leaf_parent_set) {
|
||||
NL_SET_ERR_MSG_MOD(info->extack, "Parent set isn't supported for the leafs");
|
||||
return false;
|
||||
}
|
||||
} else if (type == DEVLINK_RATE_TYPE_NODE) {
|
||||
if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_node_tx_share_set) {
|
||||
NL_SET_ERR_MSG_MOD(info->extack, "TX share set isn't supported for the nodes");
|
||||
return false;
|
||||
}
|
||||
if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_node_tx_max_set) {
|
||||
NL_SET_ERR_MSG_MOD(info->extack, "TX max set isn't supported for the nodes");
|
||||
return false;
|
||||
}
|
||||
if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] &&
|
||||
!ops->rate_node_parent_set) {
|
||||
NL_SET_ERR_MSG_MOD(info->extack, "Parent set isn't supported for the nodes");
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
WARN(1, "Unknown type of rate object");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int devlink_nl_cmd_rate_set_doit(struct sk_buff *skb,
|
||||
struct genl_info *info)
|
||||
{
|
||||
struct devlink_rate *devlink_rate = info->user_ptr[1];
|
||||
struct devlink *devlink = devlink_rate->devlink;
|
||||
const struct devlink_ops *ops = devlink->ops;
|
||||
int err;
|
||||
|
||||
if (!ops || !devlink_rate_set_ops_supported(ops, info, devlink_rate->type))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
err = devlink_nl_rate_set(devlink_rate, ops, info);
|
||||
|
||||
if (!err)
|
||||
devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int devlink_nl_cmd_rate_new_doit(struct sk_buff *skb,
|
||||
struct genl_info *info)
|
||||
{
|
||||
struct devlink *devlink = info->user_ptr[0];
|
||||
struct devlink_rate *rate_node;
|
||||
const struct devlink_ops *ops;
|
||||
int err;
|
||||
|
||||
ops = devlink->ops;
|
||||
if (!ops || !ops->rate_node_new || !ops->rate_node_del) {
|
||||
NL_SET_ERR_MSG_MOD(info->extack, "Rate nodes aren't supported");
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
if (!devlink_rate_set_ops_supported(ops, info, DEVLINK_RATE_TYPE_NODE))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
rate_node = devlink_rate_node_get_from_attrs(devlink, info->attrs);
|
||||
if (!IS_ERR(rate_node))
|
||||
return -EEXIST;
|
||||
else if (rate_node == ERR_PTR(-EINVAL))
|
||||
return -EINVAL;
|
||||
|
||||
rate_node = kzalloc(sizeof(*rate_node), GFP_KERNEL);
|
||||
if (!rate_node)
|
||||
return -ENOMEM;
|
||||
|
||||
rate_node->devlink = devlink;
|
||||
rate_node->type = DEVLINK_RATE_TYPE_NODE;
|
||||
rate_node->name = nla_strdup(info->attrs[DEVLINK_ATTR_RATE_NODE_NAME], GFP_KERNEL);
|
||||
if (!rate_node->name) {
|
||||
err = -ENOMEM;
|
||||
goto err_strdup;
|
||||
}
|
||||
|
||||
err = ops->rate_node_new(rate_node, &rate_node->priv, info->extack);
|
||||
if (err)
|
||||
goto err_node_new;
|
||||
|
||||
err = devlink_nl_rate_set(rate_node, ops, info);
|
||||
if (err)
|
||||
goto err_rate_set;
|
||||
|
||||
refcount_set(&rate_node->refcnt, 1);
|
||||
list_add(&rate_node->list, &devlink->rate_list);
|
||||
devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
|
||||
return 0;
|
||||
|
||||
err_rate_set:
|
||||
ops->rate_node_del(rate_node, rate_node->priv, info->extack);
|
||||
err_node_new:
|
||||
kfree(rate_node->name);
|
||||
err_strdup:
|
||||
kfree(rate_node);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb,
|
||||
struct genl_info *info)
|
||||
{
|
||||
struct devlink_rate *rate_node = info->user_ptr[1];
|
||||
struct devlink *devlink = rate_node->devlink;
|
||||
const struct devlink_ops *ops = devlink->ops;
|
||||
int err;
|
||||
|
||||
if (refcount_read(&rate_node->refcnt) > 1) {
|
||||
NL_SET_ERR_MSG_MOD(info->extack, "Node has children. Cannot delete node.");
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL);
|
||||
err = ops->rate_node_del(rate_node, rate_node->priv, info->extack);
|
||||
if (rate_node->parent)
|
||||
refcount_dec(&rate_node->parent->refcnt);
|
||||
list_del(&rate_node->list);
|
||||
kfree(rate_node->name);
|
||||
kfree(rate_node);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink,
|
||||
struct devlink_sb *devlink_sb,
|
||||
enum devlink_command cmd, u32 portid,
|
||||
@@ -2207,6 +2705,23 @@ static int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb,
|
||||
return genlmsg_reply(msg, info);
|
||||
}
|
||||
|
||||
static int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
struct devlink_rate *devlink_rate;
|
||||
|
||||
/* Take the lock to sync with devlink_rate_nodes_destroy() */
|
||||
mutex_lock(&devlink->lock);
|
||||
list_for_each_entry(devlink_rate, &devlink->rate_list, list)
|
||||
if (devlink_rate_is_node(devlink_rate)) {
|
||||
mutex_unlock(&devlink->lock);
|
||||
NL_SET_ERR_MSG_MOD(extack, "Rate node(s) exists.");
|
||||
return -EBUSY;
|
||||
}
|
||||
mutex_unlock(&devlink->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb,
|
||||
struct genl_info *info)
|
||||
{
|
||||
@@ -2221,6 +2736,9 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb,
|
||||
if (!ops->eswitch_mode_set)
|
||||
return -EOPNOTSUPP;
|
||||
mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]);
|
||||
err = devlink_rate_nodes_check(devlink, mode, info->extack);
|
||||
if (err)
|
||||
return err;
|
||||
err = ops->eswitch_mode_set(devlink, mode, info->extack);
|
||||
if (err)
|
||||
return err;
|
||||
@@ -6994,8 +7512,9 @@ static void devlink_trap_stats_read(struct devlink_stats __percpu *trap_stats,
|
||||
}
|
||||
}
|
||||
|
||||
static int devlink_trap_stats_put(struct sk_buff *msg,
|
||||
struct devlink_stats __percpu *trap_stats)
|
||||
static int
|
||||
devlink_trap_group_stats_put(struct sk_buff *msg,
|
||||
struct devlink_stats __percpu *trap_stats)
|
||||
{
|
||||
struct devlink_stats stats;
|
||||
struct nlattr *attr;
|
||||
@@ -7023,6 +7542,50 @@ nla_put_failure:
|
||||
return -EMSGSIZE;
|
||||
}
|
||||
|
||||
static int devlink_trap_stats_put(struct sk_buff *msg, struct devlink *devlink,
|
||||
const struct devlink_trap_item *trap_item)
|
||||
{
|
||||
struct devlink_stats stats;
|
||||
struct nlattr *attr;
|
||||
u64 drops = 0;
|
||||
int err;
|
||||
|
||||
if (devlink->ops->trap_drop_counter_get) {
|
||||
err = devlink->ops->trap_drop_counter_get(devlink,
|
||||
trap_item->trap,
|
||||
&drops);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
devlink_trap_stats_read(trap_item->stats, &stats);
|
||||
|
||||
attr = nla_nest_start(msg, DEVLINK_ATTR_STATS);
|
||||
if (!attr)
|
||||
return -EMSGSIZE;
|
||||
|
||||
if (devlink->ops->trap_drop_counter_get &&
|
||||
nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_DROPPED, drops,
|
||||
DEVLINK_ATTR_PAD))
|
||||
goto nla_put_failure;
|
||||
|
||||
if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS,
|
||||
stats.rx_packets, DEVLINK_ATTR_PAD))
|
||||
goto nla_put_failure;
|
||||
|
||||
if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES,
|
||||
stats.rx_bytes, DEVLINK_ATTR_PAD))
|
||||
goto nla_put_failure;
|
||||
|
||||
nla_nest_end(msg, attr);
|
||||
|
||||
return 0;
|
||||
|
||||
nla_put_failure:
|
||||
nla_nest_cancel(msg, attr);
|
||||
return -EMSGSIZE;
|
||||
}
|
||||
|
||||
static int devlink_nl_trap_fill(struct sk_buff *msg, struct devlink *devlink,
|
||||
const struct devlink_trap_item *trap_item,
|
||||
enum devlink_command cmd, u32 portid, u32 seq,
|
||||
@@ -7060,7 +7623,7 @@ static int devlink_nl_trap_fill(struct sk_buff *msg, struct devlink *devlink,
|
||||
if (err)
|
||||
goto nla_put_failure;
|
||||
|
||||
err = devlink_trap_stats_put(msg, trap_item->stats);
|
||||
err = devlink_trap_stats_put(msg, devlink, trap_item);
|
||||
if (err)
|
||||
goto nla_put_failure;
|
||||
|
||||
@@ -7277,7 +7840,7 @@ devlink_nl_trap_group_fill(struct sk_buff *msg, struct devlink *devlink,
|
||||
group_item->policer_item->policer->id))
|
||||
goto nla_put_failure;
|
||||
|
||||
err = devlink_trap_stats_put(msg, group_item->stats);
|
||||
err = devlink_trap_group_stats_put(msg, group_item->stats);
|
||||
if (err)
|
||||
goto nla_put_failure;
|
||||
|
||||
@@ -7801,6 +8364,11 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
|
||||
[DEVLINK_ATTR_PORT_PCI_PF_NUMBER] = { .type = NLA_U16 },
|
||||
[DEVLINK_ATTR_PORT_PCI_SF_NUMBER] = { .type = NLA_U32 },
|
||||
[DEVLINK_ATTR_PORT_CONTROLLER_NUMBER] = { .type = NLA_U32 },
|
||||
[DEVLINK_ATTR_RATE_TYPE] = { .type = NLA_U16 },
|
||||
[DEVLINK_ATTR_RATE_TX_SHARE] = { .type = NLA_U64 },
|
||||
[DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64 },
|
||||
[DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING },
|
||||
[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING },
|
||||
};
|
||||
|
||||
static const struct genl_small_ops devlink_nl_ops[] = {
|
||||
@@ -7826,6 +8394,30 @@ static const struct genl_small_ops devlink_nl_ops[] = {
|
||||
.flags = GENL_ADMIN_PERM,
|
||||
.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
|
||||
},
|
||||
{
|
||||
.cmd = DEVLINK_CMD_RATE_GET,
|
||||
.doit = devlink_nl_cmd_rate_get_doit,
|
||||
.dumpit = devlink_nl_cmd_rate_get_dumpit,
|
||||
.internal_flags = DEVLINK_NL_FLAG_NEED_RATE,
|
||||
/* can be retrieved by unprivileged users */
|
||||
},
|
||||
{
|
||||
.cmd = DEVLINK_CMD_RATE_SET,
|
||||
.doit = devlink_nl_cmd_rate_set_doit,
|
||||
.flags = GENL_ADMIN_PERM,
|
||||
.internal_flags = DEVLINK_NL_FLAG_NEED_RATE,
|
||||
},
|
||||
{
|
||||
.cmd = DEVLINK_CMD_RATE_NEW,
|
||||
.doit = devlink_nl_cmd_rate_new_doit,
|
||||
.flags = GENL_ADMIN_PERM,
|
||||
},
|
||||
{
|
||||
.cmd = DEVLINK_CMD_RATE_DEL,
|
||||
.doit = devlink_nl_cmd_rate_del_doit,
|
||||
.flags = GENL_ADMIN_PERM,
|
||||
.internal_flags = DEVLINK_NL_FLAG_NEED_RATE_NODE,
|
||||
},
|
||||
{
|
||||
.cmd = DEVLINK_CMD_PORT_SPLIT,
|
||||
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
|
||||
@@ -8201,6 +8793,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
|
||||
xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC);
|
||||
__devlink_net_set(devlink, &init_net);
|
||||
INIT_LIST_HEAD(&devlink->port_list);
|
||||
INIT_LIST_HEAD(&devlink->rate_list);
|
||||
INIT_LIST_HEAD(&devlink->sb_list);
|
||||
INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list);
|
||||
INIT_LIST_HEAD(&devlink->resource_list);
|
||||
@@ -8303,6 +8896,7 @@ void devlink_free(struct devlink *devlink)
|
||||
WARN_ON(!list_empty(&devlink->resource_list));
|
||||
WARN_ON(!list_empty(&devlink->dpipe_table_list));
|
||||
WARN_ON(!list_empty(&devlink->sb_list));
|
||||
WARN_ON(!list_empty(&devlink->rate_list));
|
||||
WARN_ON(!list_empty(&devlink->port_list));
|
||||
|
||||
xa_destroy(&devlink->snapshot_ids);
|
||||
@@ -8619,6 +9213,110 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 contro
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_sf_set);
|
||||
|
||||
/**
|
||||
* devlink_rate_leaf_create - create devlink rate leaf
|
||||
*
|
||||
* @devlink_port: devlink port object to create rate object on
|
||||
* @priv: driver private data
|
||||
*
|
||||
* Create devlink rate object of type leaf on provided @devlink_port.
|
||||
* Throws call trace if @devlink_port already has a devlink rate object.
|
||||
*
|
||||
* Context: Takes and release devlink->lock <mutex>.
|
||||
*
|
||||
* Return: -ENOMEM if failed to allocate rate object, 0 otherwise.
|
||||
*/
|
||||
int
|
||||
devlink_rate_leaf_create(struct devlink_port *devlink_port, void *priv)
|
||||
{
|
||||
struct devlink *devlink = devlink_port->devlink;
|
||||
struct devlink_rate *devlink_rate;
|
||||
|
||||
devlink_rate = kzalloc(sizeof(*devlink_rate), GFP_KERNEL);
|
||||
if (!devlink_rate)
|
||||
return -ENOMEM;
|
||||
|
||||
mutex_lock(&devlink->lock);
|
||||
WARN_ON(devlink_port->devlink_rate);
|
||||
devlink_rate->type = DEVLINK_RATE_TYPE_LEAF;
|
||||
devlink_rate->devlink = devlink;
|
||||
devlink_rate->devlink_port = devlink_port;
|
||||
devlink_rate->priv = priv;
|
||||
list_add_tail(&devlink_rate->list, &devlink->rate_list);
|
||||
devlink_port->devlink_rate = devlink_rate;
|
||||
devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW);
|
||||
mutex_unlock(&devlink->lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(devlink_rate_leaf_create);
|
||||
|
||||
/**
|
||||
* devlink_rate_leaf_destroy - destroy devlink rate leaf
|
||||
*
|
||||
* @devlink_port: devlink port linked to the rate object
|
||||
*
|
||||
* Context: Takes and release devlink->lock <mutex>.
|
||||
*/
|
||||
void devlink_rate_leaf_destroy(struct devlink_port *devlink_port)
|
||||
{
|
||||
struct devlink_rate *devlink_rate = devlink_port->devlink_rate;
|
||||
struct devlink *devlink = devlink_port->devlink;
|
||||
|
||||
if (!devlink_rate)
|
||||
return;
|
||||
|
||||
mutex_lock(&devlink->lock);
|
||||
devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_DEL);
|
||||
if (devlink_rate->parent)
|
||||
refcount_dec(&devlink_rate->parent->refcnt);
|
||||
list_del(&devlink_rate->list);
|
||||
devlink_port->devlink_rate = NULL;
|
||||
mutex_unlock(&devlink->lock);
|
||||
kfree(devlink_rate);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(devlink_rate_leaf_destroy);
|
||||
|
||||
/**
|
||||
* devlink_rate_nodes_destroy - destroy all devlink rate nodes on device
|
||||
*
|
||||
* @devlink: devlink instance
|
||||
*
|
||||
* Unset parent for all rate objects and destroy all rate nodes
|
||||
* on specified device.
|
||||
*
|
||||
* Context: Takes and release devlink->lock <mutex>.
|
||||
*/
|
||||
void devlink_rate_nodes_destroy(struct devlink *devlink)
|
||||
{
|
||||
static struct devlink_rate *devlink_rate, *tmp;
|
||||
const struct devlink_ops *ops = devlink->ops;
|
||||
|
||||
mutex_lock(&devlink->lock);
|
||||
list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
|
||||
if (!devlink_rate->parent)
|
||||
continue;
|
||||
|
||||
refcount_dec(&devlink_rate->parent->refcnt);
|
||||
if (devlink_rate_is_leaf(devlink_rate))
|
||||
ops->rate_leaf_parent_set(devlink_rate, NULL, devlink_rate->priv,
|
||||
NULL, NULL);
|
||||
else if (devlink_rate_is_node(devlink_rate))
|
||||
ops->rate_node_parent_set(devlink_rate, NULL, devlink_rate->priv,
|
||||
NULL, NULL);
|
||||
}
|
||||
list_for_each_entry_safe(devlink_rate, tmp, &devlink->rate_list, list) {
|
||||
if (devlink_rate_is_node(devlink_rate)) {
|
||||
ops->rate_node_del(devlink_rate, devlink_rate->priv, NULL);
|
||||
list_del(&devlink_rate->list);
|
||||
kfree(devlink_rate->name);
|
||||
kfree(devlink_rate);
|
||||
}
|
||||
}
|
||||
mutex_unlock(&devlink->lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(devlink_rate_nodes_destroy);
|
||||
|
||||
static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
|
||||
char *name, size_t len)
|
||||
{
|
||||
@@ -8630,12 +9328,18 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
|
||||
|
||||
switch (attrs->flavour) {
|
||||
case DEVLINK_PORT_FLAVOUR_PHYSICAL:
|
||||
case DEVLINK_PORT_FLAVOUR_VIRTUAL:
|
||||
n = snprintf(name, len, "p%u", attrs->phys.port_number);
|
||||
if (n < len && attrs->split)
|
||||
n += snprintf(name + n, len - n, "s%u",
|
||||
attrs->phys.split_subport_number);
|
||||
if (!attrs->split)
|
||||
n = snprintf(name, len, "p%u", attrs->phys.port_number);
|
||||
else
|
||||
n = snprintf(name, len, "p%us%u",
|
||||
attrs->phys.port_number,
|
||||
attrs->phys.split_subport_number);
|
||||
|
||||
break;
|
||||
case DEVLINK_PORT_FLAVOUR_CPU:
|
||||
case DEVLINK_PORT_FLAVOUR_DSA:
|
||||
@@ -8677,8 +9381,6 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
|
||||
n = snprintf(name, len, "pf%usf%u", attrs->pci_sf.pf,
|
||||
attrs->pci_sf.sf);
|
||||
break;
|
||||
case DEVLINK_PORT_FLAVOUR_VIRTUAL:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
if (n >= len)
|
||||
|
@@ -3241,9 +3241,6 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
|
||||
u32 off = skb_mac_header_len(skb);
|
||||
int ret;
|
||||
|
||||
if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
|
||||
return -ENOTSUPP;
|
||||
|
||||
ret = skb_cow(skb, len_diff);
|
||||
if (unlikely(ret < 0))
|
||||
return ret;
|
||||
@@ -3255,19 +3252,11 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
|
||||
if (skb_is_gso(skb)) {
|
||||
struct skb_shared_info *shinfo = skb_shinfo(skb);
|
||||
|
||||
/* SKB_GSO_TCPV4 needs to be changed into
|
||||
* SKB_GSO_TCPV6.
|
||||
*/
|
||||
/* SKB_GSO_TCPV4 needs to be changed into SKB_GSO_TCPV6. */
|
||||
if (shinfo->gso_type & SKB_GSO_TCPV4) {
|
||||
shinfo->gso_type &= ~SKB_GSO_TCPV4;
|
||||
shinfo->gso_type |= SKB_GSO_TCPV6;
|
||||
}
|
||||
|
||||
/* Due to IPv6 header, MSS needs to be downgraded. */
|
||||
skb_decrease_gso_size(shinfo, len_diff);
|
||||
/* Header must be checked, and gso_segs recomputed. */
|
||||
shinfo->gso_type |= SKB_GSO_DODGY;
|
||||
shinfo->gso_segs = 0;
|
||||
}
|
||||
|
||||
skb->protocol = htons(ETH_P_IPV6);
|
||||
@@ -3282,9 +3271,6 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
|
||||
u32 off = skb_mac_header_len(skb);
|
||||
int ret;
|
||||
|
||||
if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
|
||||
return -ENOTSUPP;
|
||||
|
||||
ret = skb_unclone(skb, GFP_ATOMIC);
|
||||
if (unlikely(ret < 0))
|
||||
return ret;
|
||||
@@ -3296,19 +3282,11 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
|
||||
if (skb_is_gso(skb)) {
|
||||
struct skb_shared_info *shinfo = skb_shinfo(skb);
|
||||
|
||||
/* SKB_GSO_TCPV6 needs to be changed into
|
||||
* SKB_GSO_TCPV4.
|
||||
*/
|
||||
/* SKB_GSO_TCPV6 needs to be changed into SKB_GSO_TCPV4. */
|
||||
if (shinfo->gso_type & SKB_GSO_TCPV6) {
|
||||
shinfo->gso_type &= ~SKB_GSO_TCPV6;
|
||||
shinfo->gso_type |= SKB_GSO_TCPV4;
|
||||
}
|
||||
|
||||
/* Due to IPv4 header, MSS can be upgraded. */
|
||||
skb_increase_gso_size(shinfo, len_diff);
|
||||
/* Header must be checked, and gso_segs recomputed. */
|
||||
shinfo->gso_type |= SKB_GSO_DODGY;
|
||||
shinfo->gso_segs = 0;
|
||||
}
|
||||
|
||||
skb->protocol = htons(ETH_P_IP);
|
||||
@@ -3919,6 +3897,34 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
/* XDP_REDIRECT works by a three-step process, implemented in the functions
|
||||
* below:
|
||||
*
|
||||
* 1. The bpf_redirect() and bpf_redirect_map() helpers will lookup the target
|
||||
* of the redirect and store it (along with some other metadata) in a per-CPU
|
||||
* struct bpf_redirect_info.
|
||||
*
|
||||
* 2. When the program returns the XDP_REDIRECT return code, the driver will
|
||||
* call xdp_do_redirect() which will use the information in struct
|
||||
* bpf_redirect_info to actually enqueue the frame into a map type-specific
|
||||
* bulk queue structure.
|
||||
*
|
||||
* 3. Before exiting its NAPI poll loop, the driver will call xdp_do_flush(),
|
||||
* which will flush all the different bulk queues, thus completing the
|
||||
* redirect.
|
||||
*
|
||||
* Pointers to the map entries will be kept around for this whole sequence of
|
||||
* steps, protected by RCU. However, there is no top-level rcu_read_lock() in
|
||||
* the core code; instead, the RCU protection relies on everything happening
|
||||
* inside a single NAPI poll sequence, which means it's between a pair of calls
|
||||
* to local_bh_disable()/local_bh_enable().
|
||||
*
|
||||
* The map entries are marked as __rcu and the map code makes sure to
|
||||
* dereference those pointers with rcu_dereference_check() in a way that works
|
||||
* for both sections that to hold an rcu_read_lock() and sections that are
|
||||
* called from NAPI without a separate rcu_read_lock(). The code below does not
|
||||
* use RCU annotations, but relies on those in the map code.
|
||||
*/
|
||||
void xdp_do_flush(void)
|
||||
{
|
||||
__dev_flush();
|
||||
@@ -3927,6 +3933,23 @@ void xdp_do_flush(void)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdp_do_flush);
|
||||
|
||||
void bpf_clear_redirect_map(struct bpf_map *map)
|
||||
{
|
||||
struct bpf_redirect_info *ri;
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
ri = per_cpu_ptr(&bpf_redirect_info, cpu);
|
||||
/* Avoid polluting remote cacheline due to writes if
|
||||
* not needed. Once we pass this test, we need the
|
||||
* cmpxchg() to make sure it hasn't been changed in
|
||||
* the meantime by remote CPU.
|
||||
*/
|
||||
if (unlikely(READ_ONCE(ri->map) == map))
|
||||
cmpxchg(&ri->map, map, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
|
||||
struct bpf_prog *xdp_prog)
|
||||
{
|
||||
@@ -3934,6 +3957,7 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
|
||||
enum bpf_map_type map_type = ri->map_type;
|
||||
void *fwd = ri->tgt_value;
|
||||
u32 map_id = ri->map_id;
|
||||
struct bpf_map *map;
|
||||
int err;
|
||||
|
||||
ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
|
||||
@@ -3943,7 +3967,14 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
|
||||
case BPF_MAP_TYPE_DEVMAP:
|
||||
fallthrough;
|
||||
case BPF_MAP_TYPE_DEVMAP_HASH:
|
||||
err = dev_map_enqueue(fwd, xdp, dev);
|
||||
map = READ_ONCE(ri->map);
|
||||
if (unlikely(map)) {
|
||||
WRITE_ONCE(ri->map, NULL);
|
||||
err = dev_map_enqueue_multi(xdp, dev, map,
|
||||
ri->flags & BPF_F_EXCLUDE_INGRESS);
|
||||
} else {
|
||||
err = dev_map_enqueue(fwd, xdp, dev);
|
||||
}
|
||||
break;
|
||||
case BPF_MAP_TYPE_CPUMAP:
|
||||
err = cpu_map_enqueue(fwd, xdp, dev);
|
||||
@@ -3985,13 +4016,21 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
|
||||
enum bpf_map_type map_type, u32 map_id)
|
||||
{
|
||||
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
|
||||
struct bpf_map *map;
|
||||
int err;
|
||||
|
||||
switch (map_type) {
|
||||
case BPF_MAP_TYPE_DEVMAP:
|
||||
fallthrough;
|
||||
case BPF_MAP_TYPE_DEVMAP_HASH:
|
||||
err = dev_map_generic_redirect(fwd, skb, xdp_prog);
|
||||
map = READ_ONCE(ri->map);
|
||||
if (unlikely(map)) {
|
||||
WRITE_ONCE(ri->map, NULL);
|
||||
err = dev_map_redirect_multi(dev, skb, xdp_prog, map,
|
||||
ri->flags & BPF_F_EXCLUDE_INGRESS);
|
||||
} else {
|
||||
err = dev_map_generic_redirect(fwd, skb, xdp_prog);
|
||||
}
|
||||
if (unlikely(err))
|
||||
goto err;
|
||||
break;
|
||||
@@ -10008,11 +10047,13 @@ out:
|
||||
static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern,
|
||||
struct sock_reuseport *reuse,
|
||||
struct sock *sk, struct sk_buff *skb,
|
||||
struct sock *migrating_sk,
|
||||
u32 hash)
|
||||
{
|
||||
reuse_kern->skb = skb;
|
||||
reuse_kern->sk = sk;
|
||||
reuse_kern->selected_sk = NULL;
|
||||
reuse_kern->migrating_sk = migrating_sk;
|
||||
reuse_kern->data_end = skb->data + skb_headlen(skb);
|
||||
reuse_kern->hash = hash;
|
||||
reuse_kern->reuseport_id = reuse->reuseport_id;
|
||||
@@ -10021,12 +10062,13 @@ static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern,
|
||||
|
||||
struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
|
||||
struct bpf_prog *prog, struct sk_buff *skb,
|
||||
struct sock *migrating_sk,
|
||||
u32 hash)
|
||||
{
|
||||
struct sk_reuseport_kern reuse_kern;
|
||||
enum sk_action action;
|
||||
|
||||
bpf_init_reuseport_kern(&reuse_kern, reuse, sk, skb, hash);
|
||||
bpf_init_reuseport_kern(&reuse_kern, reuse, sk, skb, migrating_sk, hash);
|
||||
action = BPF_PROG_RUN(prog, &reuse_kern);
|
||||
|
||||
if (action == SK_PASS)
|
||||
@@ -10136,6 +10178,8 @@ sk_reuseport_func_proto(enum bpf_func_id func_id,
|
||||
return &sk_reuseport_load_bytes_proto;
|
||||
case BPF_FUNC_skb_load_bytes_relative:
|
||||
return &sk_reuseport_load_bytes_relative_proto;
|
||||
case BPF_FUNC_get_socket_cookie:
|
||||
return &bpf_get_socket_ptr_cookie_proto;
|
||||
default:
|
||||
return bpf_base_func_proto(func_id);
|
||||
}
|
||||
@@ -10165,6 +10209,14 @@ sk_reuseport_is_valid_access(int off, int size,
|
||||
case offsetof(struct sk_reuseport_md, hash):
|
||||
return size == size_default;
|
||||
|
||||
case offsetof(struct sk_reuseport_md, sk):
|
||||
info->reg_type = PTR_TO_SOCKET;
|
||||
return size == sizeof(__u64);
|
||||
|
||||
case offsetof(struct sk_reuseport_md, migrating_sk):
|
||||
info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
|
||||
return size == sizeof(__u64);
|
||||
|
||||
/* Fields that allow narrowing */
|
||||
case bpf_ctx_range(struct sk_reuseport_md, eth_protocol):
|
||||
if (size < sizeof_field(struct sk_buff, protocol))
|
||||
@@ -10237,6 +10289,14 @@ static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
|
||||
case offsetof(struct sk_reuseport_md, bind_inany):
|
||||
SK_REUSEPORT_LOAD_FIELD(bind_inany);
|
||||
break;
|
||||
|
||||
case offsetof(struct sk_reuseport_md, sk):
|
||||
SK_REUSEPORT_LOAD_FIELD(sk);
|
||||
break;
|
||||
|
||||
case offsetof(struct sk_reuseport_md, migrating_sk):
|
||||
SK_REUSEPORT_LOAD_FIELD(migrating_sk);
|
||||
break;
|
||||
}
|
||||
|
||||
return insn - insn_buf;
|
||||
|
@@ -943,8 +943,8 @@ bool __skb_flow_dissect(const struct net *net,
|
||||
int offset = 0;
|
||||
|
||||
ops = skb->dev->dsa_ptr->tag_ops;
|
||||
/* Tail taggers don't break flow dissection */
|
||||
if (!ops->tail_tag) {
|
||||
/* Only DSA header taggers break flow dissection */
|
||||
if (ops->needed_headroom) {
|
||||
if (ops->flow_dissect)
|
||||
ops->flow_dissect(skb, &proto, &offset);
|
||||
else
|
||||
|
@@ -3142,7 +3142,7 @@ static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
|
||||
struct net *net = seq_file_net(seq);
|
||||
struct neigh_table *tbl = state->tbl;
|
||||
struct pneigh_entry *pn = NULL;
|
||||
int bucket = state->bucket;
|
||||
int bucket;
|
||||
|
||||
state->flags |= NEIGH_SEQ_IS_PNEIGH;
|
||||
for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
|
||||
|
@@ -60,3 +60,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll);
|
||||
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_send_reset);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_bad_csum);
|
||||
|
@@ -36,6 +36,7 @@
|
||||
#include <net/ip6_checksum.h>
|
||||
#include <asm/unaligned.h>
|
||||
#include <trace/events/napi.h>
|
||||
#include <linux/kconfig.h>
|
||||
|
||||
/*
|
||||
* We maintain a small pool of fully-sized skbs, to make sure the
|
||||
@@ -389,7 +390,8 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
|
||||
static atomic_t ip_ident;
|
||||
struct ipv6hdr *ip6h;
|
||||
|
||||
WARN_ON_ONCE(!irqs_disabled());
|
||||
if (!IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||
WARN_ON_ONCE(!irqs_disabled());
|
||||
|
||||
udp_len = len + sizeof(*udph);
|
||||
if (np->ipv6)
|
||||
|
@@ -17,6 +17,7 @@
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/page-flags.h>
|
||||
#include <linux/mm.h> /* for __put_page() */
|
||||
#include <linux/poison.h>
|
||||
|
||||
#include <trace/events/page_pool.h>
|
||||
|
||||
@@ -221,6 +222,8 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
page->pp_magic |= PP_SIGNATURE;
|
||||
|
||||
/* Track how many pages are held 'in-flight' */
|
||||
pool->pages_state_hold_cnt++;
|
||||
trace_page_pool_state_hold(pool, page, pool->pages_state_hold_cnt);
|
||||
@@ -263,6 +266,7 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
|
||||
put_page(page);
|
||||
continue;
|
||||
}
|
||||
page->pp_magic |= PP_SIGNATURE;
|
||||
pool->alloc.cache[pool->alloc.count++] = page;
|
||||
/* Track how many pages are held 'in-flight' */
|
||||
pool->pages_state_hold_cnt++;
|
||||
@@ -341,6 +345,8 @@ void page_pool_release_page(struct page_pool *pool, struct page *page)
|
||||
DMA_ATTR_SKIP_CPU_SYNC);
|
||||
page_pool_set_dma_addr(page, 0);
|
||||
skip_dma_unmap:
|
||||
page->pp_magic = 0;
|
||||
|
||||
/* This may be the last page returned, releasing the pool, so
|
||||
* it is not safe to reference pool afterwards.
|
||||
*/
|
||||
@@ -622,3 +628,25 @@ void page_pool_update_nid(struct page_pool *pool, int new_nid)
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(page_pool_update_nid);
|
||||
|
||||
bool page_pool_return_skb_page(struct page *page)
|
||||
{
|
||||
struct page_pool *pp;
|
||||
|
||||
page = compound_head(page);
|
||||
if (unlikely(page->pp_magic != PP_SIGNATURE))
|
||||
return false;
|
||||
|
||||
pp = page->pp;
|
||||
|
||||
/* Driver set this to memory recycling info. Reset it on recycle.
|
||||
* This will *not* work for NIC using a split-page memory model.
|
||||
* The page will be returned to the pool here regardless of the
|
||||
* 'flipped' fragment being in use or not.
|
||||
*/
|
||||
page->pp = NULL;
|
||||
page_pool_put_full_page(pp, page, false);
|
||||
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL(page_pool_return_skb_page);
|
||||
|
@@ -467,7 +467,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
|
||||
static int pktgen_device_event(struct notifier_block *, unsigned long, void *);
|
||||
static void pktgen_run_all_threads(struct pktgen_net *pn);
|
||||
static void pktgen_reset_all_threads(struct pktgen_net *pn);
|
||||
static void pktgen_stop_all_threads_ifs(struct pktgen_net *pn);
|
||||
static void pktgen_stop_all_threads(struct pktgen_net *pn);
|
||||
|
||||
static void pktgen_stop(struct pktgen_thread *t);
|
||||
static void pktgen_clear_counters(struct pktgen_dev *pkt_dev);
|
||||
@@ -516,14 +516,11 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf,
|
||||
data[count - 1] = 0; /* Strip trailing '\n' and terminate string */
|
||||
|
||||
if (!strcmp(data, "stop"))
|
||||
pktgen_stop_all_threads_ifs(pn);
|
||||
|
||||
pktgen_stop_all_threads(pn);
|
||||
else if (!strcmp(data, "start"))
|
||||
pktgen_run_all_threads(pn);
|
||||
|
||||
else if (!strcmp(data, "reset"))
|
||||
pktgen_reset_all_threads(pn);
|
||||
|
||||
else
|
||||
return -EINVAL;
|
||||
|
||||
@@ -3027,20 +3024,25 @@ static void pktgen_run(struct pktgen_thread *t)
|
||||
t->control &= ~(T_STOP);
|
||||
}
|
||||
|
||||
static void pktgen_stop_all_threads_ifs(struct pktgen_net *pn)
|
||||
static void pktgen_handle_all_threads(struct pktgen_net *pn, u32 flags)
|
||||
{
|
||||
struct pktgen_thread *t;
|
||||
|
||||
func_enter();
|
||||
|
||||
mutex_lock(&pktgen_thread_lock);
|
||||
|
||||
list_for_each_entry(t, &pn->pktgen_threads, th_list)
|
||||
t->control |= T_STOP;
|
||||
t->control |= (flags);
|
||||
|
||||
mutex_unlock(&pktgen_thread_lock);
|
||||
}
|
||||
|
||||
static void pktgen_stop_all_threads(struct pktgen_net *pn)
|
||||
{
|
||||
func_enter();
|
||||
|
||||
pktgen_handle_all_threads(pn, T_STOP);
|
||||
}
|
||||
|
||||
static int thread_is_running(const struct pktgen_thread *t)
|
||||
{
|
||||
const struct pktgen_dev *pkt_dev;
|
||||
@@ -3103,16 +3105,9 @@ static int pktgen_wait_all_threads_run(struct pktgen_net *pn)
|
||||
|
||||
static void pktgen_run_all_threads(struct pktgen_net *pn)
|
||||
{
|
||||
struct pktgen_thread *t;
|
||||
|
||||
func_enter();
|
||||
|
||||
mutex_lock(&pktgen_thread_lock);
|
||||
|
||||
list_for_each_entry(t, &pn->pktgen_threads, th_list)
|
||||
t->control |= (T_RUN);
|
||||
|
||||
mutex_unlock(&pktgen_thread_lock);
|
||||
pktgen_handle_all_threads(pn, T_RUN);
|
||||
|
||||
/* Propagate thread->control */
|
||||
schedule_timeout_interruptible(msecs_to_jiffies(125));
|
||||
@@ -3122,16 +3117,9 @@ static void pktgen_run_all_threads(struct pktgen_net *pn)
|
||||
|
||||
static void pktgen_reset_all_threads(struct pktgen_net *pn)
|
||||
{
|
||||
struct pktgen_thread *t;
|
||||
|
||||
func_enter();
|
||||
|
||||
mutex_lock(&pktgen_thread_lock);
|
||||
|
||||
list_for_each_entry(t, &pn->pktgen_threads, th_list)
|
||||
t->control |= (T_REMDEVALL);
|
||||
|
||||
mutex_unlock(&pktgen_thread_lock);
|
||||
pktgen_handle_all_threads(pn, T_REMDEVALL);
|
||||
|
||||
/* Propagate thread->control */
|
||||
schedule_timeout_interruptible(msecs_to_jiffies(125));
|
||||
|
@@ -9,7 +9,7 @@
|
||||
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
|
||||
*
|
||||
* Fixes:
|
||||
* Vitaly E. Lavrov RTA_OK arithmetics was wrong.
|
||||
* Vitaly E. Lavrov RTA_OK arithmetic was wrong.
|
||||
*/
|
||||
|
||||
#include <linux/bitops.h>
|
||||
@@ -234,7 +234,7 @@ unlock:
|
||||
* @msgtype: rtnetlink message type
|
||||
* @doit: Function pointer called for each request message
|
||||
* @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
|
||||
* @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions
|
||||
* @flags: rtnl_link_flags to modify behaviour of doit/dumpit functions
|
||||
*
|
||||
* Like rtnl_register, but for use by removable modules.
|
||||
*/
|
||||
@@ -254,7 +254,7 @@ EXPORT_SYMBOL_GPL(rtnl_register_module);
|
||||
* @msgtype: rtnetlink message type
|
||||
* @doit: Function pointer called for each request message
|
||||
* @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
|
||||
* @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions
|
||||
* @flags: rtnl_link_flags to modify behaviour of doit/dumpit functions
|
||||
*
|
||||
* Registers the specified function pointers (at least one of them has
|
||||
* to be non-NULL) to be called whenever a request message for the
|
||||
@@ -376,12 +376,12 @@ int __rtnl_link_register(struct rtnl_link_ops *ops)
|
||||
if (rtnl_link_ops_get(ops->kind))
|
||||
return -EEXIST;
|
||||
|
||||
/* The check for setup is here because if ops
|
||||
/* The check for alloc/setup is here because if ops
|
||||
* does not have that filled up, it is not possible
|
||||
* to use the ops for creating device. So do not
|
||||
* fill up dellink as well. That disables rtnl_dellink.
|
||||
*/
|
||||
if (ops->setup && !ops->dellink)
|
||||
if ((ops->alloc || ops->setup) && !ops->dellink)
|
||||
ops->dellink = unregister_netdevice_queue;
|
||||
|
||||
list_add_tail(&ops->list, &link_ops);
|
||||
@@ -543,7 +543,9 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
|
||||
{
|
||||
const struct rtnl_af_ops *ops;
|
||||
|
||||
list_for_each_entry_rcu(ops, &rtnl_af_ops, list) {
|
||||
ASSERT_RTNL();
|
||||
|
||||
list_for_each_entry(ops, &rtnl_af_ops, list) {
|
||||
if (ops->family == family)
|
||||
return ops;
|
||||
}
|
||||
@@ -1819,6 +1821,16 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
|
||||
if (rtnl_fill_prop_list(skb, dev))
|
||||
goto nla_put_failure;
|
||||
|
||||
if (dev->dev.parent &&
|
||||
nla_put_string(skb, IFLA_PARENT_DEV_NAME,
|
||||
dev_name(dev->dev.parent)))
|
||||
goto nla_put_failure;
|
||||
|
||||
if (dev->dev.parent && dev->dev.parent->bus &&
|
||||
nla_put_string(skb, IFLA_PARENT_DEV_BUS_NAME,
|
||||
dev->dev.parent->bus->name))
|
||||
goto nla_put_failure;
|
||||
|
||||
nlmsg_end(skb, nlh);
|
||||
return 0;
|
||||
|
||||
@@ -1878,6 +1890,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
|
||||
[IFLA_PERM_ADDRESS] = { .type = NLA_REJECT },
|
||||
[IFLA_PROTO_DOWN_REASON] = { .type = NLA_NESTED },
|
||||
[IFLA_NEW_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1),
|
||||
[IFLA_PARENT_DEV_NAME] = { .type = NLA_NUL_STRING },
|
||||
};
|
||||
|
||||
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
|
||||
@@ -2274,27 +2287,18 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
|
||||
nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
|
||||
const struct rtnl_af_ops *af_ops;
|
||||
|
||||
rcu_read_lock();
|
||||
af_ops = rtnl_af_lookup(nla_type(af));
|
||||
if (!af_ops) {
|
||||
rcu_read_unlock();
|
||||
if (!af_ops)
|
||||
return -EAFNOSUPPORT;
|
||||
}
|
||||
|
||||
if (!af_ops->set_link_af) {
|
||||
rcu_read_unlock();
|
||||
if (!af_ops->set_link_af)
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
if (af_ops->validate_link_af) {
|
||||
err = af_ops->validate_link_af(dev, af);
|
||||
if (err < 0) {
|
||||
rcu_read_unlock();
|
||||
if (err < 0)
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2574,7 +2578,7 @@ static int do_set_proto_down(struct net_device *dev,
|
||||
if (nl_proto_down) {
|
||||
proto_down = nla_get_u8(nl_proto_down);
|
||||
|
||||
/* Dont turn off protodown if there are active reasons */
|
||||
/* Don't turn off protodown if there are active reasons */
|
||||
if (!proto_down && dev->proto_down_reason) {
|
||||
NL_SET_ERR_MSG(extack, "Cannot clear protodown, active reasons");
|
||||
return -EBUSY;
|
||||
@@ -2868,17 +2872,12 @@ static int do_setlink(const struct sk_buff *skb,
|
||||
nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
|
||||
const struct rtnl_af_ops *af_ops;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
BUG_ON(!(af_ops = rtnl_af_lookup(nla_type(af))));
|
||||
|
||||
err = af_ops->set_link_af(dev, af, extack);
|
||||
if (err < 0) {
|
||||
rcu_read_unlock();
|
||||
if (err < 0)
|
||||
goto errout;
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
status |= DO_SETLINK_NOTIFY;
|
||||
}
|
||||
}
|
||||
@@ -3177,8 +3176,17 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname,
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
dev = alloc_netdev_mqs(ops->priv_size, ifname, name_assign_type,
|
||||
ops->setup, num_tx_queues, num_rx_queues);
|
||||
if (ops->alloc) {
|
||||
dev = ops->alloc(tb, ifname, name_assign_type,
|
||||
num_tx_queues, num_rx_queues);
|
||||
if (IS_ERR(dev))
|
||||
return dev;
|
||||
} else {
|
||||
dev = alloc_netdev_mqs(ops->priv_size, ifname,
|
||||
name_assign_type, ops->setup,
|
||||
num_tx_queues, num_rx_queues);
|
||||
}
|
||||
|
||||
if (!dev)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
@@ -3411,7 +3419,7 @@ replay:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
if (!ops->setup)
|
||||
if (!ops->alloc && !ops->setup)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (!ifname[0]) {
|
||||
@@ -3939,12 +3947,12 @@ int ndo_dflt_fdb_add(struct ndmsg *ndm,
|
||||
* implement its own handler for this.
|
||||
*/
|
||||
if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) {
|
||||
pr_info("%s: FDB only supports static addresses\n", dev->name);
|
||||
netdev_info(dev, "default FDB implementation only supports local addresses\n");
|
||||
return err;
|
||||
}
|
||||
|
||||
if (vid) {
|
||||
pr_info("%s: vlans aren't supported yet for dev_uc|mc_add()\n", dev->name);
|
||||
netdev_info(dev, "vlans aren't supported yet for dev_uc|mc_add()\n");
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -4078,7 +4086,7 @@ int ndo_dflt_fdb_del(struct ndmsg *ndm,
|
||||
* implement its own handler for this.
|
||||
*/
|
||||
if (!(ndm->ndm_state & NUD_PERMANENT)) {
|
||||
pr_info("%s: FDB only supports static addresses\n", dev->name);
|
||||
netdev_info(dev, "default FDB implementation only supports local addresses\n");
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@@ -70,6 +70,7 @@
|
||||
#include <net/xfrm.h>
|
||||
#include <net/mpls.h>
|
||||
#include <net/mptcp.h>
|
||||
#include <net/page_pool.h>
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
#include <trace/events/skb.h>
|
||||
@@ -645,10 +646,13 @@ static void skb_free_head(struct sk_buff *skb)
|
||||
{
|
||||
unsigned char *head = skb->head;
|
||||
|
||||
if (skb->head_frag)
|
||||
if (skb->head_frag) {
|
||||
if (skb_pp_recycle(skb, head))
|
||||
return;
|
||||
skb_free_frag(head);
|
||||
else
|
||||
} else {
|
||||
kfree(head);
|
||||
}
|
||||
}
|
||||
|
||||
static void skb_release_data(struct sk_buff *skb)
|
||||
@@ -664,7 +668,7 @@ static void skb_release_data(struct sk_buff *skb)
|
||||
skb_zcopy_clear(skb, true);
|
||||
|
||||
for (i = 0; i < shinfo->nr_frags; i++)
|
||||
__skb_frag_unref(&shinfo->frags[i]);
|
||||
__skb_frag_unref(&shinfo->frags[i], skb->pp_recycle);
|
||||
|
||||
if (shinfo->frag_list)
|
||||
kfree_skb_list(shinfo->frag_list);
|
||||
@@ -1046,6 +1050,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
|
||||
n->nohdr = 0;
|
||||
n->peeked = 0;
|
||||
C(pfmemalloc);
|
||||
C(pp_recycle);
|
||||
n->destructor = NULL;
|
||||
C(tail);
|
||||
C(end);
|
||||
@@ -1289,7 +1294,7 @@ static void __msg_zerocopy_callback(struct ubuf_info *uarg)
|
||||
}
|
||||
spin_unlock_irqrestore(&q->lock, flags);
|
||||
|
||||
sk->sk_error_report(sk);
|
||||
sk_error_report(sk);
|
||||
|
||||
release:
|
||||
consume_skb(skb);
|
||||
@@ -3497,7 +3502,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
|
||||
fragto = &skb_shinfo(tgt)->frags[merge];
|
||||
|
||||
skb_frag_size_add(fragto, skb_frag_size(fragfrom));
|
||||
__skb_frag_unref(fragfrom);
|
||||
__skb_frag_unref(fragfrom, skb->pp_recycle);
|
||||
}
|
||||
|
||||
/* Reposition in the original skb */
|
||||
@@ -4680,7 +4685,7 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
|
||||
|
||||
skb_queue_tail(&sk->sk_error_queue, skb);
|
||||
if (!sock_flag(sk, SOCK_DEAD))
|
||||
sk->sk_error_report(sk);
|
||||
sk_error_report(sk);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(sock_queue_err_skb);
|
||||
@@ -4711,7 +4716,7 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk)
|
||||
sk->sk_err = 0;
|
||||
|
||||
if (skb_next)
|
||||
sk->sk_error_report(sk);
|
||||
sk_error_report(sk);
|
||||
|
||||
return skb;
|
||||
}
|
||||
@@ -5287,6 +5292,13 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
|
||||
if (skb_cloned(to))
|
||||
return false;
|
||||
|
||||
/* The page pool signature of struct page will eventually figure out
|
||||
* which pages can be recycled or not but for now let's prohibit slab
|
||||
* allocated and page_pool allocated SKBs from being coalesced.
|
||||
*/
|
||||
if (to->pp_recycle != from->pp_recycle)
|
||||
return false;
|
||||
|
||||
if (len <= skb_tailroom(to)) {
|
||||
if (len)
|
||||
BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len));
|
||||
|
@@ -399,29 +399,6 @@ out:
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter);
|
||||
|
||||
int sk_msg_wait_data(struct sock *sk, struct sk_psock *psock, int flags,
|
||||
long timeo, int *err)
|
||||
{
|
||||
DEFINE_WAIT_FUNC(wait, woken_wake_function);
|
||||
int ret = 0;
|
||||
|
||||
if (sk->sk_shutdown & RCV_SHUTDOWN)
|
||||
return 1;
|
||||
|
||||
if (!timeo)
|
||||
return ret;
|
||||
|
||||
add_wait_queue(sk_sleep(sk), &wait);
|
||||
sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
|
||||
ret = sk_wait_event(sk, &timeo,
|
||||
!list_empty(&psock->ingress_msg) ||
|
||||
!skb_queue_empty(&sk->sk_receive_queue), &wait);
|
||||
sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
|
||||
remove_wait_queue(sk_sleep(sk), &wait);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_msg_wait_data);
|
||||
|
||||
/* Receive sk_msg from psock->ingress_msg to @msg. */
|
||||
int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
|
||||
int len, int flags)
|
||||
@@ -601,6 +578,12 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
|
||||
return sk_psock_skb_ingress(psock, skb);
|
||||
}
|
||||
|
||||
static void sock_drop(struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
sk_drops_add(sk, skb);
|
||||
kfree_skb(skb);
|
||||
}
|
||||
|
||||
static void sk_psock_backlog(struct work_struct *work)
|
||||
{
|
||||
struct sk_psock *psock = container_of(work, struct sk_psock, work);
|
||||
@@ -640,7 +623,7 @@ start:
|
||||
/* Hard errors break pipe and stop xmit. */
|
||||
sk_psock_report_error(psock, ret ? -ret : EPIPE);
|
||||
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
|
||||
kfree_skb(skb);
|
||||
sock_drop(psock->sk, skb);
|
||||
goto end;
|
||||
}
|
||||
off += ret;
|
||||
@@ -731,7 +714,7 @@ static void __sk_psock_zap_ingress(struct sk_psock *psock)
|
||||
|
||||
while ((skb = skb_dequeue(&psock->ingress_skb)) != NULL) {
|
||||
skb_bpf_redirect_clear(skb);
|
||||
kfree_skb(skb);
|
||||
sock_drop(psock->sk, skb);
|
||||
}
|
||||
__sk_psock_purge_ingress_msg(psock);
|
||||
}
|
||||
@@ -847,7 +830,7 @@ out:
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_psock_msg_verdict);
|
||||
|
||||
static void sk_psock_skb_redirect(struct sk_buff *skb)
|
||||
static int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb)
|
||||
{
|
||||
struct sk_psock *psock_other;
|
||||
struct sock *sk_other;
|
||||
@@ -857,8 +840,8 @@ static void sk_psock_skb_redirect(struct sk_buff *skb)
|
||||
* return code, but then didn't set a redirect interface.
|
||||
*/
|
||||
if (unlikely(!sk_other)) {
|
||||
kfree_skb(skb);
|
||||
return;
|
||||
sock_drop(from->sk, skb);
|
||||
return -EIO;
|
||||
}
|
||||
psock_other = sk_psock(sk_other);
|
||||
/* This error indicates the socket is being torn down or had another
|
||||
@@ -866,26 +849,30 @@ static void sk_psock_skb_redirect(struct sk_buff *skb)
|
||||
* a socket that is in this state so we drop the skb.
|
||||
*/
|
||||
if (!psock_other || sock_flag(sk_other, SOCK_DEAD)) {
|
||||
kfree_skb(skb);
|
||||
return;
|
||||
skb_bpf_redirect_clear(skb);
|
||||
sock_drop(from->sk, skb);
|
||||
return -EIO;
|
||||
}
|
||||
spin_lock_bh(&psock_other->ingress_lock);
|
||||
if (!sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) {
|
||||
spin_unlock_bh(&psock_other->ingress_lock);
|
||||
kfree_skb(skb);
|
||||
return;
|
||||
skb_bpf_redirect_clear(skb);
|
||||
sock_drop(from->sk, skb);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
skb_queue_tail(&psock_other->ingress_skb, skb);
|
||||
schedule_work(&psock_other->work);
|
||||
spin_unlock_bh(&psock_other->ingress_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void sk_psock_tls_verdict_apply(struct sk_buff *skb, struct sock *sk, int verdict)
|
||||
static void sk_psock_tls_verdict_apply(struct sk_buff *skb,
|
||||
struct sk_psock *from, int verdict)
|
||||
{
|
||||
switch (verdict) {
|
||||
case __SK_REDIRECT:
|
||||
sk_psock_skb_redirect(skb);
|
||||
sk_psock_skb_redirect(from, skb);
|
||||
break;
|
||||
case __SK_PASS:
|
||||
case __SK_DROP:
|
||||
@@ -909,20 +896,21 @@ int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb)
|
||||
ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
|
||||
skb->sk = NULL;
|
||||
}
|
||||
sk_psock_tls_verdict_apply(skb, psock->sk, ret);
|
||||
sk_psock_tls_verdict_apply(skb, psock, ret);
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read);
|
||||
|
||||
static void sk_psock_verdict_apply(struct sk_psock *psock,
|
||||
struct sk_buff *skb, int verdict)
|
||||
static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
|
||||
int verdict)
|
||||
{
|
||||
struct sock *sk_other;
|
||||
int err = -EIO;
|
||||
int err = 0;
|
||||
|
||||
switch (verdict) {
|
||||
case __SK_PASS:
|
||||
err = -EIO;
|
||||
sk_other = psock->sk;
|
||||
if (sock_flag(sk_other, SOCK_DEAD) ||
|
||||
!sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
|
||||
@@ -945,18 +933,25 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
|
||||
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
|
||||
skb_queue_tail(&psock->ingress_skb, skb);
|
||||
schedule_work(&psock->work);
|
||||
err = 0;
|
||||
}
|
||||
spin_unlock_bh(&psock->ingress_lock);
|
||||
if (err < 0) {
|
||||
skb_bpf_redirect_clear(skb);
|
||||
goto out_free;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case __SK_REDIRECT:
|
||||
sk_psock_skb_redirect(skb);
|
||||
err = sk_psock_skb_redirect(psock, skb);
|
||||
break;
|
||||
case __SK_DROP:
|
||||
default:
|
||||
out_free:
|
||||
kfree_skb(skb);
|
||||
sock_drop(psock->sk, skb);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void sk_psock_write_space(struct sock *sk)
|
||||
@@ -988,7 +983,7 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
|
||||
sk = strp->sk;
|
||||
psock = sk_psock(sk);
|
||||
if (unlikely(!psock)) {
|
||||
kfree_skb(skb);
|
||||
sock_drop(sk, skb);
|
||||
goto out;
|
||||
}
|
||||
prog = READ_ONCE(psock->progs.stream_verdict);
|
||||
@@ -1109,7 +1104,7 @@ static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
|
||||
psock = sk_psock(sk);
|
||||
if (unlikely(!psock)) {
|
||||
len = 0;
|
||||
kfree_skb(skb);
|
||||
sock_drop(sk, skb);
|
||||
goto out;
|
||||
}
|
||||
prog = READ_ONCE(psock->progs.stream_verdict);
|
||||
@@ -1123,7 +1118,8 @@ static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
|
||||
ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
|
||||
skb->sk = NULL;
|
||||
}
|
||||
sk_psock_verdict_apply(psock, skb, ret);
|
||||
if (sk_psock_verdict_apply(psock, skb, ret) < 0)
|
||||
len = 0;
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
return len;
|
||||
|
120
net/core/sock.c
120
net/core/sock.c
@@ -331,6 +331,22 @@ int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
|
||||
}
|
||||
EXPORT_SYMBOL(__sk_backlog_rcv);
|
||||
|
||||
void sk_error_report(struct sock *sk)
|
||||
{
|
||||
sk->sk_error_report(sk);
|
||||
|
||||
switch (sk->sk_family) {
|
||||
case AF_INET:
|
||||
fallthrough;
|
||||
case AF_INET6:
|
||||
trace_inet_sk_error_report(sk);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(sk_error_report);
|
||||
|
||||
static int sock_get_timeout(long timeo, void *optval, bool old_timeval)
|
||||
{
|
||||
struct __kernel_sock_timeval tv;
|
||||
@@ -776,6 +792,58 @@ void sock_enable_timestamps(struct sock *sk)
|
||||
}
|
||||
EXPORT_SYMBOL(sock_enable_timestamps);
|
||||
|
||||
void sock_set_timestamp(struct sock *sk, int optname, bool valbool)
|
||||
{
|
||||
switch (optname) {
|
||||
case SO_TIMESTAMP_OLD:
|
||||
__sock_set_timestamps(sk, valbool, false, false);
|
||||
break;
|
||||
case SO_TIMESTAMP_NEW:
|
||||
__sock_set_timestamps(sk, valbool, true, false);
|
||||
break;
|
||||
case SO_TIMESTAMPNS_OLD:
|
||||
__sock_set_timestamps(sk, valbool, false, true);
|
||||
break;
|
||||
case SO_TIMESTAMPNS_NEW:
|
||||
__sock_set_timestamps(sk, valbool, true, true);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int sock_set_timestamping(struct sock *sk, int optname, int val)
|
||||
{
|
||||
if (val & ~SOF_TIMESTAMPING_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
if (val & SOF_TIMESTAMPING_OPT_ID &&
|
||||
!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
|
||||
if (sk->sk_protocol == IPPROTO_TCP &&
|
||||
sk->sk_type == SOCK_STREAM) {
|
||||
if ((1 << sk->sk_state) &
|
||||
(TCPF_CLOSE | TCPF_LISTEN))
|
||||
return -EINVAL;
|
||||
sk->sk_tskey = tcp_sk(sk)->snd_una;
|
||||
} else {
|
||||
sk->sk_tskey = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (val & SOF_TIMESTAMPING_OPT_STATS &&
|
||||
!(val & SOF_TIMESTAMPING_OPT_TSONLY))
|
||||
return -EINVAL;
|
||||
|
||||
sk->sk_tsflags = val;
|
||||
sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
|
||||
|
||||
if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
|
||||
sock_enable_timestamp(sk,
|
||||
SOCK_TIMESTAMPING_RX_SOFTWARE);
|
||||
else
|
||||
sock_disable_timestamp(sk,
|
||||
(1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
|
||||
return 0;
|
||||
}
|
||||
|
||||
void sock_set_keepalive(struct sock *sk)
|
||||
{
|
||||
lock_sock(sk);
|
||||
@@ -997,54 +1065,15 @@ set_sndbuf:
|
||||
break;
|
||||
|
||||
case SO_TIMESTAMP_OLD:
|
||||
__sock_set_timestamps(sk, valbool, false, false);
|
||||
break;
|
||||
case SO_TIMESTAMP_NEW:
|
||||
__sock_set_timestamps(sk, valbool, true, false);
|
||||
break;
|
||||
case SO_TIMESTAMPNS_OLD:
|
||||
__sock_set_timestamps(sk, valbool, false, true);
|
||||
break;
|
||||
case SO_TIMESTAMPNS_NEW:
|
||||
__sock_set_timestamps(sk, valbool, true, true);
|
||||
sock_set_timestamp(sk, valbool, optname);
|
||||
break;
|
||||
|
||||
case SO_TIMESTAMPING_NEW:
|
||||
case SO_TIMESTAMPING_OLD:
|
||||
if (val & ~SOF_TIMESTAMPING_MASK) {
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
if (val & SOF_TIMESTAMPING_OPT_ID &&
|
||||
!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
|
||||
if (sk->sk_protocol == IPPROTO_TCP &&
|
||||
sk->sk_type == SOCK_STREAM) {
|
||||
if ((1 << sk->sk_state) &
|
||||
(TCPF_CLOSE | TCPF_LISTEN)) {
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
sk->sk_tskey = tcp_sk(sk)->snd_una;
|
||||
} else {
|
||||
sk->sk_tskey = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (val & SOF_TIMESTAMPING_OPT_STATS &&
|
||||
!(val & SOF_TIMESTAMPING_OPT_TSONLY)) {
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
sk->sk_tsflags = val;
|
||||
sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
|
||||
|
||||
if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
|
||||
sock_enable_timestamp(sk,
|
||||
SOCK_TIMESTAMPING_RX_SOFTWARE);
|
||||
else
|
||||
sock_disable_timestamp(sk,
|
||||
(1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
|
||||
ret = sock_set_timestamping(sk, optname, val);
|
||||
break;
|
||||
|
||||
case SO_RCVLOWAT:
|
||||
@@ -1622,6 +1651,13 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
|
||||
v.val = sk->sk_bound_dev_if;
|
||||
break;
|
||||
|
||||
case SO_NETNS_COOKIE:
|
||||
lv = sizeof(u64);
|
||||
if (len != lv)
|
||||
return -EINVAL;
|
||||
v.val64 = sock_net(sk)->net_cookie;
|
||||
break;
|
||||
|
||||
default:
|
||||
/* We implement the SO_SNDLOWAT etc to not be settable
|
||||
* (1003.1g 7).
|
||||
|
@@ -48,7 +48,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
|
||||
bpf_map_init_from_attr(&stab->map, attr);
|
||||
raw_spin_lock_init(&stab->lock);
|
||||
|
||||
stab->sks = bpf_map_area_alloc(stab->map.max_entries *
|
||||
stab->sks = bpf_map_area_alloc((u64) stab->map.max_entries *
|
||||
sizeof(struct sock *),
|
||||
stab->map.numa_node);
|
||||
if (!stab->sks) {
|
||||
|
@@ -6,6 +6,7 @@
|
||||
* selecting the socket index from the array of available sockets.
|
||||
*/
|
||||
|
||||
#include <net/ip.h>
|
||||
#include <net/sock_reuseport.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/idr.h>
|
||||
@@ -17,6 +18,74 @@
|
||||
DEFINE_SPINLOCK(reuseport_lock);
|
||||
|
||||
static DEFINE_IDA(reuseport_ida);
|
||||
static int reuseport_resurrect(struct sock *sk, struct sock_reuseport *old_reuse,
|
||||
struct sock_reuseport *reuse, bool bind_inany);
|
||||
|
||||
static int reuseport_sock_index(struct sock *sk,
|
||||
const struct sock_reuseport *reuse,
|
||||
bool closed)
|
||||
{
|
||||
int left, right;
|
||||
|
||||
if (!closed) {
|
||||
left = 0;
|
||||
right = reuse->num_socks;
|
||||
} else {
|
||||
left = reuse->max_socks - reuse->num_closed_socks;
|
||||
right = reuse->max_socks;
|
||||
}
|
||||
|
||||
for (; left < right; left++)
|
||||
if (reuse->socks[left] == sk)
|
||||
return left;
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void __reuseport_add_sock(struct sock *sk,
|
||||
struct sock_reuseport *reuse)
|
||||
{
|
||||
reuse->socks[reuse->num_socks] = sk;
|
||||
/* paired with smp_rmb() in reuseport_(select|migrate)_sock() */
|
||||
smp_wmb();
|
||||
reuse->num_socks++;
|
||||
}
|
||||
|
||||
static bool __reuseport_detach_sock(struct sock *sk,
|
||||
struct sock_reuseport *reuse)
|
||||
{
|
||||
int i = reuseport_sock_index(sk, reuse, false);
|
||||
|
||||
if (i == -1)
|
||||
return false;
|
||||
|
||||
reuse->socks[i] = reuse->socks[reuse->num_socks - 1];
|
||||
reuse->num_socks--;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void __reuseport_add_closed_sock(struct sock *sk,
|
||||
struct sock_reuseport *reuse)
|
||||
{
|
||||
reuse->socks[reuse->max_socks - reuse->num_closed_socks - 1] = sk;
|
||||
/* paired with READ_ONCE() in inet_csk_bind_conflict() */
|
||||
WRITE_ONCE(reuse->num_closed_socks, reuse->num_closed_socks + 1);
|
||||
}
|
||||
|
||||
static bool __reuseport_detach_closed_sock(struct sock *sk,
|
||||
struct sock_reuseport *reuse)
|
||||
{
|
||||
int i = reuseport_sock_index(sk, reuse, true);
|
||||
|
||||
if (i == -1)
|
||||
return false;
|
||||
|
||||
reuse->socks[i] = reuse->socks[reuse->max_socks - reuse->num_closed_socks];
|
||||
/* paired with READ_ONCE() in inet_csk_bind_conflict() */
|
||||
WRITE_ONCE(reuse->num_closed_socks, reuse->num_closed_socks - 1);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks)
|
||||
{
|
||||
@@ -49,6 +118,12 @@ int reuseport_alloc(struct sock *sk, bool bind_inany)
|
||||
reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
|
||||
lockdep_is_held(&reuseport_lock));
|
||||
if (reuse) {
|
||||
if (reuse->num_closed_socks) {
|
||||
/* sk was shutdown()ed before */
|
||||
ret = reuseport_resurrect(sk, reuse, NULL, bind_inany);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Only set reuse->bind_inany if the bind_inany is true.
|
||||
* Otherwise, it will overwrite the reuse->bind_inany
|
||||
* which was set by the bind/hash path.
|
||||
@@ -72,9 +147,9 @@ int reuseport_alloc(struct sock *sk, bool bind_inany)
|
||||
}
|
||||
|
||||
reuse->reuseport_id = id;
|
||||
reuse->bind_inany = bind_inany;
|
||||
reuse->socks[0] = sk;
|
||||
reuse->num_socks = 1;
|
||||
reuse->bind_inany = bind_inany;
|
||||
rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
|
||||
|
||||
out:
|
||||
@@ -90,14 +165,30 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
|
||||
u32 more_socks_size, i;
|
||||
|
||||
more_socks_size = reuse->max_socks * 2U;
|
||||
if (more_socks_size > U16_MAX)
|
||||
if (more_socks_size > U16_MAX) {
|
||||
if (reuse->num_closed_socks) {
|
||||
/* Make room by removing a closed sk.
|
||||
* The child has already been migrated.
|
||||
* Only reqsk left at this point.
|
||||
*/
|
||||
struct sock *sk;
|
||||
|
||||
sk = reuse->socks[reuse->max_socks - reuse->num_closed_socks];
|
||||
RCU_INIT_POINTER(sk->sk_reuseport_cb, NULL);
|
||||
__reuseport_detach_closed_sock(sk, reuse);
|
||||
|
||||
return reuse;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
more_reuse = __reuseport_alloc(more_socks_size);
|
||||
if (!more_reuse)
|
||||
return NULL;
|
||||
|
||||
more_reuse->num_socks = reuse->num_socks;
|
||||
more_reuse->num_closed_socks = reuse->num_closed_socks;
|
||||
more_reuse->prog = reuse->prog;
|
||||
more_reuse->reuseport_id = reuse->reuseport_id;
|
||||
more_reuse->bind_inany = reuse->bind_inany;
|
||||
@@ -105,9 +196,13 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
|
||||
|
||||
memcpy(more_reuse->socks, reuse->socks,
|
||||
reuse->num_socks * sizeof(struct sock *));
|
||||
memcpy(more_reuse->socks +
|
||||
(more_reuse->max_socks - more_reuse->num_closed_socks),
|
||||
reuse->socks + (reuse->max_socks - reuse->num_closed_socks),
|
||||
reuse->num_closed_socks * sizeof(struct sock *));
|
||||
more_reuse->synq_overflow_ts = READ_ONCE(reuse->synq_overflow_ts);
|
||||
|
||||
for (i = 0; i < reuse->num_socks; ++i)
|
||||
for (i = 0; i < reuse->max_socks; ++i)
|
||||
rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb,
|
||||
more_reuse);
|
||||
|
||||
@@ -152,13 +247,21 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany)
|
||||
reuse = rcu_dereference_protected(sk2->sk_reuseport_cb,
|
||||
lockdep_is_held(&reuseport_lock));
|
||||
old_reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
|
||||
lockdep_is_held(&reuseport_lock));
|
||||
lockdep_is_held(&reuseport_lock));
|
||||
if (old_reuse && old_reuse->num_closed_socks) {
|
||||
/* sk was shutdown()ed before */
|
||||
int err = reuseport_resurrect(sk, old_reuse, reuse, reuse->bind_inany);
|
||||
|
||||
spin_unlock_bh(&reuseport_lock);
|
||||
return err;
|
||||
}
|
||||
|
||||
if (old_reuse && old_reuse->num_socks != 1) {
|
||||
spin_unlock_bh(&reuseport_lock);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
if (reuse->num_socks == reuse->max_socks) {
|
||||
if (reuse->num_socks + reuse->num_closed_socks == reuse->max_socks) {
|
||||
reuse = reuseport_grow(reuse);
|
||||
if (!reuse) {
|
||||
spin_unlock_bh(&reuseport_lock);
|
||||
@@ -166,10 +269,7 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany)
|
||||
}
|
||||
}
|
||||
|
||||
reuse->socks[reuse->num_socks] = sk;
|
||||
/* paired with smp_rmb() in reuseport_select_sock() */
|
||||
smp_wmb();
|
||||
reuse->num_socks++;
|
||||
__reuseport_add_sock(sk, reuse);
|
||||
rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
|
||||
|
||||
spin_unlock_bh(&reuseport_lock);
|
||||
@@ -180,15 +280,77 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany)
|
||||
}
|
||||
EXPORT_SYMBOL(reuseport_add_sock);
|
||||
|
||||
static int reuseport_resurrect(struct sock *sk, struct sock_reuseport *old_reuse,
|
||||
struct sock_reuseport *reuse, bool bind_inany)
|
||||
{
|
||||
if (old_reuse == reuse) {
|
||||
/* If sk was in the same reuseport group, just pop sk out of
|
||||
* the closed section and push sk into the listening section.
|
||||
*/
|
||||
__reuseport_detach_closed_sock(sk, old_reuse);
|
||||
__reuseport_add_sock(sk, old_reuse);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!reuse) {
|
||||
/* In bind()/listen() path, we cannot carry over the eBPF prog
|
||||
* for the shutdown()ed socket. In setsockopt() path, we should
|
||||
* not change the eBPF prog of listening sockets by attaching a
|
||||
* prog to the shutdown()ed socket. Thus, we will allocate a new
|
||||
* reuseport group and detach sk from the old group.
|
||||
*/
|
||||
int id;
|
||||
|
||||
reuse = __reuseport_alloc(INIT_SOCKS);
|
||||
if (!reuse)
|
||||
return -ENOMEM;
|
||||
|
||||
id = ida_alloc(&reuseport_ida, GFP_ATOMIC);
|
||||
if (id < 0) {
|
||||
kfree(reuse);
|
||||
return id;
|
||||
}
|
||||
|
||||
reuse->reuseport_id = id;
|
||||
reuse->bind_inany = bind_inany;
|
||||
} else {
|
||||
/* Move sk from the old group to the new one if
|
||||
* - all the other listeners in the old group were close()d or
|
||||
* shutdown()ed, and then sk2 has listen()ed on the same port
|
||||
* OR
|
||||
* - sk listen()ed without bind() (or with autobind), was
|
||||
* shutdown()ed, and then listen()s on another port which
|
||||
* sk2 listen()s on.
|
||||
*/
|
||||
if (reuse->num_socks + reuse->num_closed_socks == reuse->max_socks) {
|
||||
reuse = reuseport_grow(reuse);
|
||||
if (!reuse)
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
|
||||
__reuseport_detach_closed_sock(sk, old_reuse);
|
||||
__reuseport_add_sock(sk, reuse);
|
||||
rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
|
||||
|
||||
if (old_reuse->num_socks + old_reuse->num_closed_socks == 0)
|
||||
call_rcu(&old_reuse->rcu, reuseport_free_rcu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void reuseport_detach_sock(struct sock *sk)
|
||||
{
|
||||
struct sock_reuseport *reuse;
|
||||
int i;
|
||||
|
||||
spin_lock_bh(&reuseport_lock);
|
||||
reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
|
||||
lockdep_is_held(&reuseport_lock));
|
||||
|
||||
/* reuseport_grow() has detached a closed sk */
|
||||
if (!reuse)
|
||||
goto out;
|
||||
|
||||
/* Notify the bpf side. The sk may be added to a sockarray
|
||||
* map. If so, sockarray logic will remove it from the map.
|
||||
*
|
||||
@@ -201,19 +363,52 @@ void reuseport_detach_sock(struct sock *sk)
|
||||
|
||||
rcu_assign_pointer(sk->sk_reuseport_cb, NULL);
|
||||
|
||||
for (i = 0; i < reuse->num_socks; i++) {
|
||||
if (reuse->socks[i] == sk) {
|
||||
reuse->socks[i] = reuse->socks[reuse->num_socks - 1];
|
||||
reuse->num_socks--;
|
||||
if (reuse->num_socks == 0)
|
||||
call_rcu(&reuse->rcu, reuseport_free_rcu);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!__reuseport_detach_closed_sock(sk, reuse))
|
||||
__reuseport_detach_sock(sk, reuse);
|
||||
|
||||
if (reuse->num_socks + reuse->num_closed_socks == 0)
|
||||
call_rcu(&reuse->rcu, reuseport_free_rcu);
|
||||
|
||||
out:
|
||||
spin_unlock_bh(&reuseport_lock);
|
||||
}
|
||||
EXPORT_SYMBOL(reuseport_detach_sock);
|
||||
|
||||
void reuseport_stop_listen_sock(struct sock *sk)
|
||||
{
|
||||
if (sk->sk_protocol == IPPROTO_TCP) {
|
||||
struct sock_reuseport *reuse;
|
||||
struct bpf_prog *prog;
|
||||
|
||||
spin_lock_bh(&reuseport_lock);
|
||||
|
||||
reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
|
||||
lockdep_is_held(&reuseport_lock));
|
||||
prog = rcu_dereference_protected(reuse->prog,
|
||||
lockdep_is_held(&reuseport_lock));
|
||||
|
||||
if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req ||
|
||||
(prog && prog->expected_attach_type == BPF_SK_REUSEPORT_SELECT_OR_MIGRATE)) {
|
||||
/* Migration capable, move sk from the listening section
|
||||
* to the closed section.
|
||||
*/
|
||||
bpf_sk_reuseport_detach(sk);
|
||||
|
||||
__reuseport_detach_sock(sk, reuse);
|
||||
__reuseport_add_closed_sock(sk, reuse);
|
||||
|
||||
spin_unlock_bh(&reuseport_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
spin_unlock_bh(&reuseport_lock);
|
||||
}
|
||||
|
||||
/* Not capable to do migration, detach immediately */
|
||||
reuseport_detach_sock(sk);
|
||||
}
|
||||
EXPORT_SYMBOL(reuseport_stop_listen_sock);
|
||||
|
||||
static struct sock *run_bpf_filter(struct sock_reuseport *reuse, u16 socks,
|
||||
struct bpf_prog *prog, struct sk_buff *skb,
|
||||
int hdr_len)
|
||||
@@ -244,6 +439,23 @@ static struct sock *run_bpf_filter(struct sock_reuseport *reuse, u16 socks,
|
||||
return reuse->socks[index];
|
||||
}
|
||||
|
||||
static struct sock *reuseport_select_sock_by_hash(struct sock_reuseport *reuse,
|
||||
u32 hash, u16 num_socks)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
i = j = reciprocal_scale(hash, num_socks);
|
||||
while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) {
|
||||
i++;
|
||||
if (i >= num_socks)
|
||||
i = 0;
|
||||
if (i == j)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return reuse->socks[i];
|
||||
}
|
||||
|
||||
/**
|
||||
* reuseport_select_sock - Select a socket from an SO_REUSEPORT group.
|
||||
* @sk: First socket in the group.
|
||||
@@ -274,32 +486,21 @@ struct sock *reuseport_select_sock(struct sock *sk,
|
||||
prog = rcu_dereference(reuse->prog);
|
||||
socks = READ_ONCE(reuse->num_socks);
|
||||
if (likely(socks)) {
|
||||
/* paired with smp_wmb() in reuseport_add_sock() */
|
||||
/* paired with smp_wmb() in __reuseport_add_sock() */
|
||||
smp_rmb();
|
||||
|
||||
if (!prog || !skb)
|
||||
goto select_by_hash;
|
||||
|
||||
if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
|
||||
sk2 = bpf_run_sk_reuseport(reuse, sk, prog, skb, hash);
|
||||
sk2 = bpf_run_sk_reuseport(reuse, sk, prog, skb, NULL, hash);
|
||||
else
|
||||
sk2 = run_bpf_filter(reuse, socks, prog, skb, hdr_len);
|
||||
|
||||
select_by_hash:
|
||||
/* no bpf or invalid bpf result: fall back to hash usage */
|
||||
if (!sk2) {
|
||||
int i, j;
|
||||
|
||||
i = j = reciprocal_scale(hash, socks);
|
||||
while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) {
|
||||
i++;
|
||||
if (i >= socks)
|
||||
i = 0;
|
||||
if (i == j)
|
||||
goto out;
|
||||
}
|
||||
sk2 = reuse->socks[i];
|
||||
}
|
||||
if (!sk2)
|
||||
sk2 = reuseport_select_sock_by_hash(reuse, hash, socks);
|
||||
}
|
||||
|
||||
out:
|
||||
@@ -308,14 +509,90 @@ out:
|
||||
}
|
||||
EXPORT_SYMBOL(reuseport_select_sock);
|
||||
|
||||
/**
|
||||
* reuseport_migrate_sock - Select a socket from an SO_REUSEPORT group.
|
||||
* @sk: close()ed or shutdown()ed socket in the group.
|
||||
* @migrating_sk: ESTABLISHED/SYN_RECV full socket in the accept queue or
|
||||
* NEW_SYN_RECV request socket during 3WHS.
|
||||
* @skb: skb to run through BPF filter.
|
||||
* Returns a socket (with sk_refcnt +1) that should accept the child socket
|
||||
* (or NULL on error).
|
||||
*/
|
||||
struct sock *reuseport_migrate_sock(struct sock *sk,
|
||||
struct sock *migrating_sk,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
struct sock_reuseport *reuse;
|
||||
struct sock *nsk = NULL;
|
||||
bool allocated = false;
|
||||
struct bpf_prog *prog;
|
||||
u16 socks;
|
||||
u32 hash;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
reuse = rcu_dereference(sk->sk_reuseport_cb);
|
||||
if (!reuse)
|
||||
goto out;
|
||||
|
||||
socks = READ_ONCE(reuse->num_socks);
|
||||
if (unlikely(!socks))
|
||||
goto failure;
|
||||
|
||||
/* paired with smp_wmb() in __reuseport_add_sock() */
|
||||
smp_rmb();
|
||||
|
||||
hash = migrating_sk->sk_hash;
|
||||
prog = rcu_dereference(reuse->prog);
|
||||
if (!prog || prog->expected_attach_type != BPF_SK_REUSEPORT_SELECT_OR_MIGRATE) {
|
||||
if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req)
|
||||
goto select_by_hash;
|
||||
goto failure;
|
||||
}
|
||||
|
||||
if (!skb) {
|
||||
skb = alloc_skb(0, GFP_ATOMIC);
|
||||
if (!skb)
|
||||
goto failure;
|
||||
allocated = true;
|
||||
}
|
||||
|
||||
nsk = bpf_run_sk_reuseport(reuse, sk, prog, skb, migrating_sk, hash);
|
||||
|
||||
if (allocated)
|
||||
kfree_skb(skb);
|
||||
|
||||
select_by_hash:
|
||||
if (!nsk)
|
||||
nsk = reuseport_select_sock_by_hash(reuse, hash, socks);
|
||||
|
||||
if (IS_ERR_OR_NULL(nsk) || unlikely(!refcount_inc_not_zero(&nsk->sk_refcnt))) {
|
||||
nsk = NULL;
|
||||
goto failure;
|
||||
}
|
||||
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
return nsk;
|
||||
|
||||
failure:
|
||||
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
|
||||
goto out;
|
||||
}
|
||||
EXPORT_SYMBOL(reuseport_migrate_sock);
|
||||
|
||||
int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
|
||||
{
|
||||
struct sock_reuseport *reuse;
|
||||
struct bpf_prog *old_prog;
|
||||
|
||||
if (sk_unhashed(sk) && sk->sk_reuseport) {
|
||||
int err = reuseport_alloc(sk, false);
|
||||
if (sk_unhashed(sk)) {
|
||||
int err;
|
||||
|
||||
if (!sk->sk_reuseport)
|
||||
return -EINVAL;
|
||||
|
||||
err = reuseport_alloc(sk, false);
|
||||
if (err)
|
||||
return err;
|
||||
} else if (!rcu_access_pointer(sk->sk_reuseport_cb)) {
|
||||
@@ -341,13 +618,24 @@ int reuseport_detach_prog(struct sock *sk)
|
||||
struct sock_reuseport *reuse;
|
||||
struct bpf_prog *old_prog;
|
||||
|
||||
if (!rcu_access_pointer(sk->sk_reuseport_cb))
|
||||
return sk->sk_reuseport ? -ENOENT : -EINVAL;
|
||||
|
||||
old_prog = NULL;
|
||||
spin_lock_bh(&reuseport_lock);
|
||||
reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
|
||||
lockdep_is_held(&reuseport_lock));
|
||||
|
||||
/* reuse must be checked after acquiring the reuseport_lock
|
||||
* because reuseport_grow() can detach a closed sk.
|
||||
*/
|
||||
if (!reuse) {
|
||||
spin_unlock_bh(&reuseport_lock);
|
||||
return sk->sk_reuseport ? -ENOENT : -EINVAL;
|
||||
}
|
||||
|
||||
if (sk_unhashed(sk) && reuse->num_closed_socks) {
|
||||
spin_unlock_bh(&reuseport_lock);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
old_prog = rcu_replace_pointer(reuse->prog, old_prog,
|
||||
lockdep_is_held(&reuseport_lock));
|
||||
spin_unlock_bh(&reuseport_lock);
|
||||
|
@@ -113,8 +113,13 @@ static void mem_allocator_disconnect(void *allocator)
|
||||
void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
|
||||
{
|
||||
struct xdp_mem_allocator *xa;
|
||||
int type = xdp_rxq->mem.type;
|
||||
int id = xdp_rxq->mem.id;
|
||||
|
||||
/* Reset mem info to defaults */
|
||||
xdp_rxq->mem.id = 0;
|
||||
xdp_rxq->mem.type = 0;
|
||||
|
||||
if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
|
||||
WARN(1, "Missing register, driver bug");
|
||||
return;
|
||||
@@ -123,7 +128,7 @@ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
|
||||
if (id == 0)
|
||||
return;
|
||||
|
||||
if (xdp_rxq->mem.type == MEM_TYPE_PAGE_POOL) {
|
||||
if (type == MEM_TYPE_PAGE_POOL) {
|
||||
rcu_read_lock();
|
||||
xa = rhashtable_lookup(mem_id_ht, &id, mem_id_rht_params);
|
||||
page_pool_destroy(xa->page_pool);
|
||||
@@ -144,10 +149,6 @@ void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
|
||||
|
||||
xdp_rxq->reg_state = REG_STATE_UNREGISTERED;
|
||||
xdp_rxq->dev = NULL;
|
||||
|
||||
/* Reset mem info to defaults */
|
||||
xdp_rxq->mem.id = 0;
|
||||
xdp_rxq->mem.type = 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg);
|
||||
|
||||
@@ -584,3 +585,31 @@ struct sk_buff *xdp_build_skb_from_frame(struct xdp_frame *xdpf,
|
||||
return __xdp_build_skb_from_frame(xdpf, skb, dev);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdp_build_skb_from_frame);
|
||||
|
||||
struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf)
|
||||
{
|
||||
unsigned int headroom, totalsize;
|
||||
struct xdp_frame *nxdpf;
|
||||
struct page *page;
|
||||
void *addr;
|
||||
|
||||
headroom = xdpf->headroom + sizeof(*xdpf);
|
||||
totalsize = headroom + xdpf->len;
|
||||
|
||||
if (unlikely(totalsize > PAGE_SIZE))
|
||||
return NULL;
|
||||
page = dev_alloc_page();
|
||||
if (!page)
|
||||
return NULL;
|
||||
addr = page_to_virt(page);
|
||||
|
||||
memcpy(addr, xdpf, totalsize);
|
||||
|
||||
nxdpf = addr;
|
||||
nxdpf->data = addr + headroom;
|
||||
nxdpf->frame_sz = PAGE_SIZE;
|
||||
nxdpf->mem.type = MEM_TYPE_PAGE_ORDER0;
|
||||
nxdpf->mem.id = 0;
|
||||
|
||||
return nxdpf;
|
||||
}
|
||||
|
Reference in New Issue
Block a user