From 26943aefa8704ca7871c34a2d1b2b2a418372666 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 16 Nov 2022 15:01:16 -0500 Subject: [PATCH 1/7] sctp: verify the bind address with the tb_id from l3mdev After binding to a l3mdev, it should use the route table from the corresponding VRF to verify the addr when binding to an address. Note ipv6 doesn't need it, as binding to ipv6 address does not verify the addr with route lookup. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/protocol.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index bcd3384ab07a..dbfe7d1000c2 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -351,10 +351,13 @@ static int sctp_v4_addr_valid(union sctp_addr *addr, /* Should this be available for binding? */ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp) { - struct net *net = sock_net(&sp->inet.sk); - int ret = inet_addr_type(net, addr->v4.sin_addr.s_addr); - + struct sock *sk = &sp->inet.sk; + struct net *net = sock_net(sk); + int tb_id = RT_TABLE_LOCAL; + int ret; + tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ?: tb_id; + ret = inet_addr_type_table(net, addr->v4.sin_addr.s_addr, tb_id); if (addr->v4.sin_addr.s_addr != htonl(INADDR_ANY) && ret != RTN_LOCAL && !sp->inet.freebind && From 6fe1e52490a91cb23f6b3aafc93e7c5beb99f862 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 16 Nov 2022 15:01:17 -0500 Subject: [PATCH 2/7] sctp: check ipv6 addr with sk_bound_dev if set When binding to an ipv6 address, it calls ipv6_chk_addr() to check if this address is on any dev. If a socket binds to a l3mdev but no dev is passed to do this check, all l3mdev and slaves will be skipped and the check will fail. This patch is to pass the bound_dev to make sure the devices under the same l3mdev can be returned in ipv6_chk_addr(). When the bound_dev is not a l3mdev or l3slave, l3mdev_master_dev_rcu() will return NULL in __ipv6_chk_addr_and_flags(), it will keep compitable with before when NULL dev was passed. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/ipv6.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index d081858c2d07..e6274cdbdf6c 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -680,9 +680,11 @@ static int sctp_v6_is_any(const union sctp_addr *addr) /* Should this be available for binding? */ static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp) { - int type; - struct net *net = sock_net(&sp->inet.sk); const struct in6_addr *in6 = (const struct in6_addr *)&addr->v6.sin6_addr; + struct sock *sk = &sp->inet.sk; + struct net *net = sock_net(sk); + struct net_device *dev = NULL; + int type; type = ipv6_addr_type(in6); if (IPV6_ADDR_ANY == type) @@ -696,8 +698,14 @@ static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp) if (!(type & IPV6_ADDR_UNICAST)) return 0; + if (sk->sk_bound_dev_if) { + dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); + if (!dev) + return 0; + } + return ipv6_can_nonlocal_bind(net, &sp->inet) || - ipv6_chk_addr(net, in6, NULL, 0); + ipv6_chk_addr(net, in6, dev, 0); } /* This function checks if the address is a valid address to be used for From f87b1ac06c887210782eab9f105ffd9045be74cc Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 16 Nov 2022 15:01:18 -0500 Subject: [PATCH 3/7] sctp: check sk_bound_dev_if when matching ep in get_port In sctp_get_port_local(), when binding to IP and PORT, it should also check sk_bound_dev_if to match listening sk if it's set by SO_BINDTOIFINDEX, so that multiple sockets with the same IP and PORT, but different sk_bound_dev_if can be listened at the same time. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 3e83963d1b8a..4306164238ef 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -8398,6 +8398,7 @@ pp_found: * in an endpoint. */ sk_for_each_bound(sk2, &pp->owner) { + int bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if); struct sctp_sock *sp2 = sctp_sk(sk2); struct sctp_endpoint *ep2 = sp2->ep; @@ -8408,7 +8409,9 @@ pp_found: uid_eq(uid, sock_i_uid(sk2)))) continue; - if (sctp_bind_addr_conflict(&ep2->base.bind_addr, + if ((!sk->sk_bound_dev_if || !bound_dev_if2 || + sk->sk_bound_dev_if == bound_dev_if2) && + sctp_bind_addr_conflict(&ep2->base.bind_addr, addr, sp2, sp)) { ret = 1; goto fail_unlock; From 33e93ed2209d5971043bed41dd194bc583b57ef3 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 16 Nov 2022 15:01:19 -0500 Subject: [PATCH 4/7] sctp: add skb_sdif in struct sctp_af Add skb_sdif function in struct sctp_af to get the enslaved device for both ipv4 and ipv6 when adding SCTP VRF support in sctp_rcv in the next patch. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 1 + net/sctp/ipv6.c | 8 +++++++- net/sctp/protocol.c | 6 ++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 350f250b0dc7..7b4884c63b26 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -477,6 +477,7 @@ struct sctp_af { int (*available) (union sctp_addr *, struct sctp_sock *); int (*skb_iif) (const struct sk_buff *sk); + int (*skb_sdif)(const struct sk_buff *sk); int (*is_ce) (const struct sk_buff *sk); void (*seq_dump_addr)(struct seq_file *seq, union sctp_addr *addr); diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index e6274cdbdf6c..097bd60ce964 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -842,7 +842,12 @@ static int sctp_v6_addr_to_user(struct sctp_sock *sp, union sctp_addr *addr) /* Where did this skb come from? */ static int sctp_v6_skb_iif(const struct sk_buff *skb) { - return IP6CB(skb)->iif; + return inet6_iif(skb); +} + +static int sctp_v6_skb_sdif(const struct sk_buff *skb) +{ + return inet6_sdif(skb); } /* Was this packet marked by Explicit Congestion Notification? */ @@ -1142,6 +1147,7 @@ static struct sctp_af sctp_af_inet6 = { .is_any = sctp_v6_is_any, .available = sctp_v6_available, .skb_iif = sctp_v6_skb_iif, + .skb_sdif = sctp_v6_skb_sdif, .is_ce = sctp_v6_is_ce, .seq_dump_addr = sctp_v6_seq_dump_addr, .ecn_capable = sctp_v6_ecn_capable, diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index dbfe7d1000c2..a18cf0471a8d 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -567,6 +567,11 @@ static int sctp_v4_skb_iif(const struct sk_buff *skb) return inet_iif(skb); } +static int sctp_v4_skb_sdif(const struct sk_buff *skb) +{ + return inet_sdif(skb); +} + /* Was this packet marked by Explicit Congestion Notification? */ static int sctp_v4_is_ce(const struct sk_buff *skb) { @@ -1185,6 +1190,7 @@ static struct sctp_af sctp_af_inet = { .available = sctp_v4_available, .scope = sctp_v4_scope, .skb_iif = sctp_v4_skb_iif, + .skb_sdif = sctp_v4_skb_sdif, .is_ce = sctp_v4_is_ce, .seq_dump_addr = sctp_v4_seq_dump_addr, .ecn_capable = sctp_v4_ecn_capable, From 0af03170637f47fb5cc6501d4b2dcbf1c14772a9 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 16 Nov 2022 15:01:20 -0500 Subject: [PATCH 5/7] sctp: add dif and sdif check in asoc and ep lookup This patch at first adds a pernet global l3mdev_accept to decide if it accepts the packets from a l3mdev when a SCTP socket doesn't bind to any interface. It's set to 1 to avoid any possible incompatible issue, and in next patch, a sysctl will be introduced to allow to change it. Then similar to inet/udp_sk_bound_dev_eq(), sctp_sk_bound_dev_eq() is added to check either dif or sdif is equal to sk_bound_dev_if, and to check sid is 0 or l3mdev_accept is 1 if sk_bound_dev_if is not set. This function is used to match a association or a endpoint, namely called by sctp_addrs_lookup_transport() and sctp_endpoint_is_match(). All functions that needs updating are: sctp_rcv(): asoc: __sctp_rcv_lookup() __sctp_lookup_association() -> sctp_addrs_lookup_transport() __sctp_rcv_lookup_harder() __sctp_rcv_init_lookup() __sctp_lookup_association() -> sctp_addrs_lookup_transport() __sctp_rcv_walk_lookup() __sctp_rcv_asconf_lookup() __sctp_lookup_association() -> sctp_addrs_lookup_transport() ep: __sctp_rcv_lookup_endpoint() -> sctp_endpoint_is_match() sctp_connect(): sctp_endpoint_is_peeled_off() __sctp_lookup_association() sctp_has_association() sctp_lookup_association() __sctp_lookup_association() -> sctp_addrs_lookup_transport() sctp_diag_dump_one(): sctp_transport_lookup_process() -> sctp_addrs_lookup_transport() Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/netns/sctp.h | 4 ++ include/net/sctp/sctp.h | 6 ++- include/net/sctp/structs.h | 8 +-- net/sctp/diag.c | 3 +- net/sctp/endpointola.c | 13 +++-- net/sctp/input.c | 108 ++++++++++++++++++++----------------- net/sctp/protocol.c | 4 ++ net/sctp/socket.c | 4 +- 8 files changed, 89 insertions(+), 61 deletions(-) diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index a681147aecd8..7eff3d981b89 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -175,6 +175,10 @@ struct netns_sctp { /* Threshold for autoclose timeout, in seconds. */ unsigned long max_autoclose; + +#ifdef CONFIG_NET_L3_MASTER_DEV + int l3mdev_accept; +#endif }; #endif /* __NETNS_SCTP_H__ */ diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 01d904b34cf0..c335dd01a597 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -109,7 +109,7 @@ struct sctp_transport *sctp_transport_get_idx(struct net *net, struct rhashtable_iter *iter, int pos); int sctp_transport_lookup_process(sctp_callback_t cb, struct net *net, const union sctp_addr *laddr, - const union sctp_addr *paddr, void *p); + const union sctp_addr *paddr, void *p, int dif); int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done, struct net *net, int *pos, void *p); int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p); @@ -157,10 +157,12 @@ void sctp_unhash_transport(struct sctp_transport *t); struct sctp_transport *sctp_addrs_lookup_transport( struct net *net, const union sctp_addr *laddr, - const union sctp_addr *paddr); + const union sctp_addr *paddr, + int dif, int sdif); struct sctp_transport *sctp_epaddr_lookup_transport( const struct sctp_endpoint *ep, const union sctp_addr *paddr); +bool sctp_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif); /* * sctp/proc.c diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 7b4884c63b26..afa3781e3ca2 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1379,10 +1379,12 @@ struct sctp_association *sctp_endpoint_lookup_assoc( struct sctp_transport **); bool sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep, const union sctp_addr *paddr); -struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *, - struct net *, const union sctp_addr *); +struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *ep, + struct net *net, + const union sctp_addr *laddr, + int dif, int sdif); bool sctp_has_association(struct net *net, const union sctp_addr *laddr, - const union sctp_addr *paddr); + const union sctp_addr *paddr, int dif, int sdif); int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, diff --git a/net/sctp/diag.c b/net/sctp/diag.c index d9c6d8f30f09..a557009e9832 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -426,6 +426,7 @@ static int sctp_diag_dump_one(struct netlink_callback *cb, struct net *net = sock_net(skb->sk); const struct nlmsghdr *nlh = cb->nlh; union sctp_addr laddr, paddr; + int dif = req->id.idiag_if; struct sctp_comm_param commp = { .skb = skb, .r = req, @@ -454,7 +455,7 @@ static int sctp_diag_dump_one(struct netlink_callback *cb, } return sctp_transport_lookup_process(sctp_sock_dump_one, - net, &laddr, &paddr, &commp); + net, &laddr, &paddr, &commp, dif); } static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index efffde7f2328..7e77b450697c 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -246,12 +246,15 @@ void sctp_endpoint_put(struct sctp_endpoint *ep) /* Is this the endpoint we are looking for? */ struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *ep, struct net *net, - const union sctp_addr *laddr) + const union sctp_addr *laddr, + int dif, int sdif) { + int bound_dev_if = READ_ONCE(ep->base.sk->sk_bound_dev_if); struct sctp_endpoint *retval = NULL; - if ((htons(ep->base.bind_addr.port) == laddr->v4.sin_port) && - net_eq(ep->base.net, net)) { + if (net_eq(ep->base.net, net) && + sctp_sk_bound_dev_eq(net, bound_dev_if, dif, sdif) && + (htons(ep->base.bind_addr.port) == laddr->v4.sin_port)) { if (sctp_bind_addr_match(&ep->base.bind_addr, laddr, sctp_sk(ep->base.sk))) retval = ep; @@ -298,6 +301,7 @@ out: bool sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep, const union sctp_addr *paddr) { + int bound_dev_if = READ_ONCE(ep->base.sk->sk_bound_dev_if); struct sctp_sockaddr_entry *addr; struct net *net = ep->base.net; struct sctp_bind_addr *bp; @@ -307,7 +311,8 @@ bool sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep, * so the address_list can not change. */ list_for_each_entry(addr, &bp->address_list, list) { - if (sctp_has_association(net, &addr->a, paddr)) + if (sctp_has_association(net, &addr->a, paddr, + bound_dev_if, bound_dev_if)) return true; } diff --git a/net/sctp/input.c b/net/sctp/input.c index 4f43afa8678f..bf70371301ff 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -50,16 +50,19 @@ static struct sctp_association *__sctp_rcv_lookup(struct net *net, struct sk_buff *skb, const union sctp_addr *paddr, const union sctp_addr *laddr, - struct sctp_transport **transportp); + struct sctp_transport **transportp, + int dif, int sdif); static struct sctp_endpoint *__sctp_rcv_lookup_endpoint( struct net *net, struct sk_buff *skb, const union sctp_addr *laddr, - const union sctp_addr *daddr); + const union sctp_addr *daddr, + int dif, int sdif); static struct sctp_association *__sctp_lookup_association( struct net *net, const union sctp_addr *local, const union sctp_addr *peer, - struct sctp_transport **pt); + struct sctp_transport **pt, + int dif, int sdif); static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb); @@ -92,11 +95,11 @@ int sctp_rcv(struct sk_buff *skb) struct sctp_chunk *chunk; union sctp_addr src; union sctp_addr dest; - int bound_dev_if; int family; struct sctp_af *af; struct net *net = dev_net(skb->dev); bool is_gso = skb_is_gso(skb) && skb_is_gso_sctp(skb); + int dif, sdif; if (skb->pkt_type != PACKET_HOST) goto discard_it; @@ -141,6 +144,8 @@ int sctp_rcv(struct sk_buff *skb) /* Initialize local addresses for lookups. */ af->from_skb(&src, skb, 1); af->from_skb(&dest, skb, 0); + dif = af->skb_iif(skb); + sdif = af->skb_sdif(skb); /* If the packet is to or from a non-unicast address, * silently discard the packet. @@ -157,35 +162,15 @@ int sctp_rcv(struct sk_buff *skb) !af->addr_valid(&dest, NULL, skb)) goto discard_it; - asoc = __sctp_rcv_lookup(net, skb, &src, &dest, &transport); + asoc = __sctp_rcv_lookup(net, skb, &src, &dest, &transport, dif, sdif); if (!asoc) - ep = __sctp_rcv_lookup_endpoint(net, skb, &dest, &src); + ep = __sctp_rcv_lookup_endpoint(net, skb, &dest, &src, dif, sdif); /* Retrieve the common input handling substructure. */ rcvr = asoc ? &asoc->base : &ep->base; sk = rcvr->sk; - /* - * If a frame arrives on an interface and the receiving socket is - * bound to another interface, via SO_BINDTODEVICE, treat it as OOTB - */ - bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); - if (bound_dev_if && (bound_dev_if != af->skb_iif(skb))) { - if (transport) { - sctp_transport_put(transport); - asoc = NULL; - transport = NULL; - } else { - sctp_endpoint_put(ep); - ep = NULL; - } - sk = net->sctp.ctl_sock; - ep = sctp_sk(sk)->ep; - sctp_endpoint_hold(ep); - rcvr = &ep->base; - } - /* * RFC 2960, 8.4 - Handle "Out of the blue" Packets. * An SCTP packet is called an "out of the blue" (OOTB) @@ -485,6 +470,8 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb, struct sctp_association *asoc; struct sctp_transport *transport = NULL; __u32 vtag = ntohl(sctphdr->vtag); + int sdif = inet_sdif(skb); + int dif = inet_iif(skb); *app = NULL; *tpp = NULL; @@ -500,7 +487,7 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb, /* Look for an association that matches the incoming ICMP error * packet. */ - asoc = __sctp_lookup_association(net, &saddr, &daddr, &transport); + asoc = __sctp_lookup_association(net, &saddr, &daddr, &transport, dif, sdif); if (!asoc) return NULL; @@ -850,7 +837,8 @@ static inline __u32 sctp_hashfn(const struct net *net, __be16 lport, static struct sctp_endpoint *__sctp_rcv_lookup_endpoint( struct net *net, struct sk_buff *skb, const union sctp_addr *laddr, - const union sctp_addr *paddr) + const union sctp_addr *paddr, + int dif, int sdif) { struct sctp_hashbucket *head; struct sctp_endpoint *ep; @@ -863,7 +851,7 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint( head = &sctp_ep_hashtable[hash]; read_lock(&head->lock); sctp_for_each_hentry(ep, &head->chain) { - if (sctp_endpoint_is_match(ep, net, laddr)) + if (sctp_endpoint_is_match(ep, net, laddr, dif, sdif)) goto hit; } @@ -990,14 +978,26 @@ void sctp_unhash_transport(struct sctp_transport *t) sctp_hash_params); } +bool sctp_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif) +{ + bool l3mdev_accept = true; + +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + l3mdev_accept = !!READ_ONCE(net->sctp.l3mdev_accept); +#endif + return inet_bound_dev_eq(l3mdev_accept, bound_dev_if, dif, sdif); +} + /* return a transport with holding it */ struct sctp_transport *sctp_addrs_lookup_transport( struct net *net, const union sctp_addr *laddr, - const union sctp_addr *paddr) + const union sctp_addr *paddr, + int dif, int sdif) { struct rhlist_head *tmp, *list; struct sctp_transport *t; + int bound_dev_if; struct sctp_hash_cmp_arg arg = { .paddr = paddr, .net = net, @@ -1011,7 +1011,9 @@ struct sctp_transport *sctp_addrs_lookup_transport( if (!sctp_transport_hold(t)) continue; - if (sctp_bind_addr_match(&t->asoc->base.bind_addr, + bound_dev_if = READ_ONCE(t->asoc->base.sk->sk_bound_dev_if); + if (sctp_sk_bound_dev_eq(net, bound_dev_if, dif, sdif) && + sctp_bind_addr_match(&t->asoc->base.bind_addr, laddr, sctp_sk(t->asoc->base.sk))) return t; sctp_transport_put(t); @@ -1048,12 +1050,13 @@ static struct sctp_association *__sctp_lookup_association( struct net *net, const union sctp_addr *local, const union sctp_addr *peer, - struct sctp_transport **pt) + struct sctp_transport **pt, + int dif, int sdif) { struct sctp_transport *t; struct sctp_association *asoc = NULL; - t = sctp_addrs_lookup_transport(net, local, peer); + t = sctp_addrs_lookup_transport(net, local, peer, dif, sdif); if (!t) goto out; @@ -1069,12 +1072,13 @@ static struct sctp_association *sctp_lookup_association(struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr, - struct sctp_transport **transportp) + struct sctp_transport **transportp, + int dif, int sdif) { struct sctp_association *asoc; rcu_read_lock(); - asoc = __sctp_lookup_association(net, laddr, paddr, transportp); + asoc = __sctp_lookup_association(net, laddr, paddr, transportp, dif, sdif); rcu_read_unlock(); return asoc; @@ -1083,11 +1087,12 @@ struct sctp_association *sctp_lookup_association(struct net *net, /* Is there an association matching the given local and peer addresses? */ bool sctp_has_association(struct net *net, const union sctp_addr *laddr, - const union sctp_addr *paddr) + const union sctp_addr *paddr, + int dif, int sdif) { struct sctp_transport *transport; - if (sctp_lookup_association(net, laddr, paddr, &transport)) { + if (sctp_lookup_association(net, laddr, paddr, &transport, dif, sdif)) { sctp_transport_put(transport); return true; } @@ -1115,7 +1120,8 @@ bool sctp_has_association(struct net *net, */ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net, struct sk_buff *skb, - const union sctp_addr *laddr, struct sctp_transport **transportp) + const union sctp_addr *laddr, struct sctp_transport **transportp, + int dif, int sdif) { struct sctp_association *asoc; union sctp_addr addr; @@ -1154,7 +1160,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net, if (!af->from_addr_param(paddr, params.addr, sh->source, 0)) continue; - asoc = __sctp_lookup_association(net, laddr, paddr, transportp); + asoc = __sctp_lookup_association(net, laddr, paddr, transportp, dif, sdif); if (asoc) return asoc; } @@ -1181,7 +1187,8 @@ static struct sctp_association *__sctp_rcv_asconf_lookup( struct sctp_chunkhdr *ch, const union sctp_addr *laddr, __be16 peer_port, - struct sctp_transport **transportp) + struct sctp_transport **transportp, + int dif, int sdif) { struct sctp_addip_chunk *asconf = (struct sctp_addip_chunk *)ch; struct sctp_af *af; @@ -1201,7 +1208,7 @@ static struct sctp_association *__sctp_rcv_asconf_lookup( if (!af->from_addr_param(&paddr, param, peer_port, 0)) return NULL; - return __sctp_lookup_association(net, laddr, &paddr, transportp); + return __sctp_lookup_association(net, laddr, &paddr, transportp, dif, sdif); } @@ -1217,7 +1224,8 @@ static struct sctp_association *__sctp_rcv_asconf_lookup( static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net, struct sk_buff *skb, const union sctp_addr *laddr, - struct sctp_transport **transportp) + struct sctp_transport **transportp, + int dif, int sdif) { struct sctp_association *asoc = NULL; struct sctp_chunkhdr *ch; @@ -1260,7 +1268,7 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net, asoc = __sctp_rcv_asconf_lookup( net, ch, laddr, sctp_hdr(skb)->source, - transportp); + transportp, dif, sdif); break; default: break; @@ -1285,7 +1293,8 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net, static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net, struct sk_buff *skb, const union sctp_addr *laddr, - struct sctp_transport **transportp) + struct sctp_transport **transportp, + int dif, int sdif) { struct sctp_chunkhdr *ch; @@ -1309,9 +1318,9 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net, /* If this is INIT/INIT-ACK look inside the chunk too. */ if (ch->type == SCTP_CID_INIT || ch->type == SCTP_CID_INIT_ACK) - return __sctp_rcv_init_lookup(net, skb, laddr, transportp); + return __sctp_rcv_init_lookup(net, skb, laddr, transportp, dif, sdif); - return __sctp_rcv_walk_lookup(net, skb, laddr, transportp); + return __sctp_rcv_walk_lookup(net, skb, laddr, transportp, dif, sdif); } /* Lookup an association for an inbound skb. */ @@ -1319,11 +1328,12 @@ static struct sctp_association *__sctp_rcv_lookup(struct net *net, struct sk_buff *skb, const union sctp_addr *paddr, const union sctp_addr *laddr, - struct sctp_transport **transportp) + struct sctp_transport **transportp, + int dif, int sdif) { struct sctp_association *asoc; - asoc = __sctp_lookup_association(net, laddr, paddr, transportp); + asoc = __sctp_lookup_association(net, laddr, paddr, transportp, dif, sdif); if (asoc) goto out; @@ -1331,7 +1341,7 @@ static struct sctp_association *__sctp_rcv_lookup(struct net *net, * SCTP Implementors Guide, 2.18 Handling of address * parameters within the INIT or INIT-ACK. */ - asoc = __sctp_rcv_lookup_harder(net, skb, laddr, transportp); + asoc = __sctp_rcv_lookup_harder(net, skb, laddr, transportp, dif, sdif); if (asoc) goto out; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index a18cf0471a8d..909a89a1cff4 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1394,6 +1394,10 @@ static int __net_init sctp_defaults_init(struct net *net) /* Initialize maximum autoclose timeout. */ net->sctp.max_autoclose = INT_MAX / HZ; +#ifdef CONFIG_NET_L3_MASTER_DEV + net->sctp.l3mdev_accept = 1; +#endif + status = sctp_sysctl_net_register(net); if (status) goto err_sysctl_register; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 4306164238ef..5acbdf0d38f3 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5315,14 +5315,14 @@ EXPORT_SYMBOL_GPL(sctp_for_each_endpoint); int sctp_transport_lookup_process(sctp_callback_t cb, struct net *net, const union sctp_addr *laddr, - const union sctp_addr *paddr, void *p) + const union sctp_addr *paddr, void *p, int dif) { struct sctp_transport *transport; struct sctp_endpoint *ep; int err = -ENOENT; rcu_read_lock(); - transport = sctp_addrs_lookup_transport(net, laddr, paddr); + transport = sctp_addrs_lookup_transport(net, laddr, paddr, dif, dif); if (!transport) { rcu_read_unlock(); return err; From b712d0328c2c3ab456847f29f711e785f70cd8a5 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 16 Nov 2022 15:01:21 -0500 Subject: [PATCH 6/7] sctp: add sysctl net.sctp.l3mdev_accept This patch is to add sysctl net.sctp.l3mdev_accept to allow users to change the pernet global l3mdev_accept. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.rst | 9 +++++++++ net/sctp/sysctl.c | 11 +++++++++++ 2 files changed, 20 insertions(+) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 727b25cc7ec4..7fbd060d6047 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -3127,6 +3127,15 @@ ecn_enable - BOOLEAN Default: 1 +l3mdev_accept - BOOLEAN + Enabling this option allows a "global" bound socket to work + across L3 master domains (e.g., VRFs) with packets capable of + being received regardless of the L3 domain in which they + originated. Only valid when the kernel was compiled with + CONFIG_NET_L3_MASTER_DEV. + + Default: 1 (enabled) + ``/proc/sys/net/core/*`` ======================== diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index b46a416787ec..7f40ed117fc7 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -347,6 +347,17 @@ static struct ctl_table sctp_net_table[] = { .extra1 = &max_autoclose_min, .extra2 = &max_autoclose_max, }, +#ifdef CONFIG_NET_L3_MASTER_DEV + { + .procname = "l3mdev_accept", + .data = &init_net.sctp.l3mdev_accept, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif { .procname = "pf_enable", .data = &init_net.sctp.pf_enable, From a61bd7b9fef34484a3fe144a62f4ec78cc42e20e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 16 Nov 2022 15:01:22 -0500 Subject: [PATCH 7/7] selftests: add a selftest for sctp vrf This patch adds 12 small test cases: 01-04 test for the sysctl net.sctp.l3mdev_accept. 05-10 test for only binding to a right l3mdev device, the connection can be created. 11-12 test for two socks binding to different l3mdev devices at the same time, each of them can process the packets from the corresponding peer. The tests run for both IPv4 and IPv6 SCTP. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- tools/testing/selftests/net/Makefile | 2 + tools/testing/selftests/net/sctp_hello.c | 137 +++++++++++++++++ tools/testing/selftests/net/sctp_vrf.sh | 178 +++++++++++++++++++++++ 3 files changed, 317 insertions(+) create mode 100644 tools/testing/selftests/net/sctp_hello.c create mode 100755 tools/testing/selftests/net/sctp_vrf.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index cec4800cb017..880e6ded6ed5 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -72,6 +72,8 @@ TEST_GEN_PROGS += sk_bind_sendto_listen TEST_GEN_PROGS += sk_connect_zero_addr TEST_PROGS += test_ingress_egress_chaining.sh TEST_GEN_PROGS += so_incoming_cpu +TEST_PROGS += sctp_vrf.sh +TEST_GEN_FILES += sctp_hello TEST_FILES := settings diff --git a/tools/testing/selftests/net/sctp_hello.c b/tools/testing/selftests/net/sctp_hello.c new file mode 100644 index 000000000000..f02f1f95d227 --- /dev/null +++ b/tools/testing/selftests/net/sctp_hello.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include +#include + +static void set_addr(struct sockaddr_storage *ss, char *ip, char *port, int *len) +{ + if (ss->ss_family == AF_INET) { + struct sockaddr_in *a = (struct sockaddr_in *)ss; + + a->sin_addr.s_addr = inet_addr(ip); + a->sin_port = htons(atoi(port)); + *len = sizeof(*a); + } else { + struct sockaddr_in6 *a = (struct sockaddr_in6 *)ss; + + a->sin6_family = AF_INET6; + inet_pton(AF_INET6, ip, &a->sin6_addr); + a->sin6_port = htons(atoi(port)); + *len = sizeof(*a); + } +} + +static int do_client(int argc, char *argv[]) +{ + struct sockaddr_storage ss; + char buf[] = "hello"; + int csk, ret, len; + + if (argc < 5) { + printf("%s client -4|6 IP PORT [IP PORT]\n", argv[0]); + return -1; + } + + bzero((void *)&ss, sizeof(ss)); + ss.ss_family = !strcmp(argv[2], "-4") ? AF_INET : AF_INET6; + csk = socket(ss.ss_family, SOCK_STREAM, IPPROTO_SCTP); + if (csk < 0) { + printf("failed to create socket\n"); + return -1; + } + + if (argc >= 7) { + set_addr(&ss, argv[5], argv[6], &len); + ret = bind(csk, (struct sockaddr *)&ss, len); + if (ret < 0) { + printf("failed to bind to address\n"); + return -1; + } + } + + set_addr(&ss, argv[3], argv[4], &len); + ret = connect(csk, (struct sockaddr *)&ss, len); + if (ret < 0) { + printf("failed to connect to peer\n"); + return -1; + } + + ret = send(csk, buf, strlen(buf) + 1, 0); + if (ret < 0) { + printf("failed to send msg %d\n", ret); + return -1; + } + close(csk); + + return 0; +} + +int main(int argc, char *argv[]) +{ + struct sockaddr_storage ss; + int lsk, csk, ret, len; + char buf[20]; + + if (argc < 2 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) { + printf("%s server|client ...\n", argv[0]); + return -1; + } + + if (!strcmp(argv[1], "client")) + return do_client(argc, argv); + + if (argc < 5) { + printf("%s server -4|6 IP PORT [IFACE]\n", argv[0]); + return -1; + } + + ss.ss_family = !strcmp(argv[2], "-4") ? AF_INET : AF_INET6; + lsk = socket(ss.ss_family, SOCK_STREAM, IPPROTO_SCTP); + if (lsk < 0) { + printf("failed to create lsk\n"); + return -1; + } + + if (argc >= 6) { + ret = setsockopt(lsk, SOL_SOCKET, SO_BINDTODEVICE, + argv[5], strlen(argv[5]) + 1); + if (ret < 0) { + printf("failed to bind to device\n"); + return -1; + } + } + + set_addr(&ss, argv[3], argv[4], &len); + ret = bind(lsk, (struct sockaddr *)&ss, len); + if (ret < 0) { + printf("failed to bind to address\n"); + return -1; + } + + ret = listen(lsk, 5); + if (ret < 0) { + printf("failed to listen on port\n"); + return -1; + } + + csk = accept(lsk, (struct sockaddr *)NULL, (socklen_t *)NULL); + if (csk < 0) { + printf("failed to accept new client\n"); + return -1; + } + + ret = recv(csk, buf, sizeof(buf), 0); + if (ret <= 0) { + printf("failed to recv msg %d\n", ret); + return -1; + } + close(csk); + close(lsk); + + return 0; +} diff --git a/tools/testing/selftests/net/sctp_vrf.sh b/tools/testing/selftests/net/sctp_vrf.sh new file mode 100755 index 000000000000..c721e952e5f3 --- /dev/null +++ b/tools/testing/selftests/net/sctp_vrf.sh @@ -0,0 +1,178 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Testing For SCTP VRF. +# TOPO: CLIENT_NS1 (veth1) <---> (veth1) -> vrf_s1 +# SERVER_NS +# CLIENT_NS2 (veth1) <---> (veth2) -> vrf_s2 + +CLIENT_NS1="client-ns1" +CLIENT_NS2="client-ns2" +CLIENT_IP4="10.0.0.1" +CLIENT_IP6="2000::1" +CLIENT_PORT=1234 + +SERVER_NS="server-ns" +SERVER_IP4="10.0.0.2" +SERVER_IP6="2000::2" +SERVER_PORT=1234 + +setup() { + modprobe sctp + modprobe sctp_diag + ip netns add $CLIENT_NS1 + ip netns add $CLIENT_NS2 + ip netns add $SERVER_NS + + ip net exec $CLIENT_NS1 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null + ip net exec $CLIENT_NS2 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null + ip net exec $SERVER_NS sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null + + ip -n $SERVER_NS link add veth1 type veth peer name veth1 netns $CLIENT_NS1 + ip -n $SERVER_NS link add veth2 type veth peer name veth1 netns $CLIENT_NS2 + + ip -n $CLIENT_NS1 link set veth1 up + ip -n $CLIENT_NS1 addr add $CLIENT_IP4/24 dev veth1 + ip -n $CLIENT_NS1 addr add $CLIENT_IP6/24 dev veth1 + + ip -n $CLIENT_NS2 link set veth1 up + ip -n $CLIENT_NS2 addr add $CLIENT_IP4/24 dev veth1 + ip -n $CLIENT_NS2 addr add $CLIENT_IP6/24 dev veth1 + + ip -n $SERVER_NS link add dummy1 type dummy + ip -n $SERVER_NS link set dummy1 up + ip -n $SERVER_NS link add vrf-1 type vrf table 10 + ip -n $SERVER_NS link add vrf-2 type vrf table 20 + ip -n $SERVER_NS link set vrf-1 up + ip -n $SERVER_NS link set vrf-2 up + ip -n $SERVER_NS link set veth1 master vrf-1 + ip -n $SERVER_NS link set veth2 master vrf-2 + + ip -n $SERVER_NS addr add $SERVER_IP4/24 dev dummy1 + ip -n $SERVER_NS addr add $SERVER_IP4/24 dev veth1 + ip -n $SERVER_NS addr add $SERVER_IP4/24 dev veth2 + ip -n $SERVER_NS addr add $SERVER_IP6/24 dev dummy1 + ip -n $SERVER_NS addr add $SERVER_IP6/24 dev veth1 + ip -n $SERVER_NS addr add $SERVER_IP6/24 dev veth2 + + ip -n $SERVER_NS link set veth1 up + ip -n $SERVER_NS link set veth2 up + ip -n $SERVER_NS route add table 10 $CLIENT_IP4 dev veth1 src $SERVER_IP4 + ip -n $SERVER_NS route add table 20 $CLIENT_IP4 dev veth2 src $SERVER_IP4 + ip -n $SERVER_NS route add $CLIENT_IP4 dev veth1 src $SERVER_IP4 + ip -n $SERVER_NS route add table 10 $CLIENT_IP6 dev veth1 src $SERVER_IP6 + ip -n $SERVER_NS route add table 20 $CLIENT_IP6 dev veth2 src $SERVER_IP6 + ip -n $SERVER_NS route add $CLIENT_IP6 dev veth1 src $SERVER_IP6 +} + +cleanup() { + ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null + ip netns del "$CLIENT_NS1" + ip netns del "$CLIENT_NS2" + ip netns del "$SERVER_NS" +} + +wait_server() { + local IFACE=$1 + local CNT=0 + + until ip netns exec $SERVER_NS ss -lS src $SERVER_IP:$SERVER_PORT | \ + grep LISTEN | grep "$IFACE" 2>&1 >/dev/null; do + [ $((CNT++)) = "20" ] && { RET=3; return $RET; } + sleep 0.1 + done +} + +do_test() { + local CLIENT_NS=$1 + local IFACE=$2 + + ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null + ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \ + $SERVER_PORT $IFACE 2>&1 >/dev/null & + disown + wait_server $IFACE || return $RET + timeout 3 ip netns exec $CLIENT_NS ./sctp_hello client $AF \ + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null + RET=$? + return $RET +} + +do_testx() { + local IFACE1=$1 + local IFACE2=$2 + + ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null + ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \ + $SERVER_PORT $IFACE1 2>&1 >/dev/null & + disown + wait_server $IFACE1 || return $RET + ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \ + $SERVER_PORT $IFACE2 2>&1 >/dev/null & + disown + wait_server $IFACE2 || return $RET + timeout 3 ip netns exec $CLIENT_NS1 ./sctp_hello client $AF \ + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null && \ + timeout 3 ip netns exec $CLIENT_NS2 ./sctp_hello client $AF \ + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null + RET=$? + return $RET +} + +testup() { + ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=1 2>&1 >/dev/null + echo -n "TEST 01: nobind, connect from client 1, l3mdev_accept=1, Y " + do_test $CLIENT_NS1 || { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 02: nobind, connect from client 2, l3mdev_accept=1, N " + do_test $CLIENT_NS2 && { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=0 2>&1 >/dev/null + echo -n "TEST 03: nobind, connect from client 1, l3mdev_accept=0, N " + do_test $CLIENT_NS1 && { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 04: nobind, connect from client 2, l3mdev_accept=0, N " + do_test $CLIENT_NS2 && { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 05: bind veth2 in server, connect from client 1, N " + do_test $CLIENT_NS1 veth2 && { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 06: bind veth1 in server, connect from client 1, Y " + do_test $CLIENT_NS1 veth1 || { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 07: bind vrf-1 in server, connect from client 1, Y " + do_test $CLIENT_NS1 vrf-1 || { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 08: bind vrf-2 in server, connect from client 1, N " + do_test $CLIENT_NS1 vrf-2 && { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 09: bind vrf-2 in server, connect from client 2, Y " + do_test $CLIENT_NS2 vrf-2 || { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 10: bind vrf-1 in server, connect from client 2, N " + do_test $CLIENT_NS2 vrf-1 && { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 11: bind vrf-1 & 2 in server, connect from client 1 & 2, Y " + do_testx vrf-1 vrf-2 || { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 12: bind vrf-2 & 1 in server, connect from client 1 & 2, N " + do_testx vrf-2 vrf-1 || { echo "[FAIL]"; return $RET; } + echo "[PASS]" +} + +trap cleanup EXIT +setup && echo "Testing For SCTP VRF:" && \ +CLIENT_IP=$CLIENT_IP4 SERVER_IP=$SERVER_IP4 AF="-4" testup && echo "***v4 Tests Done***" && +CLIENT_IP=$CLIENT_IP6 SERVER_IP=$SERVER_IP6 AF="-6" testup && echo "***v6 Tests Done***" +exit $?