Merge branch 'mptcp-more-socket-options'

Mat Martineau says:

====================
mptcp: More socket option support

These patches add MPTCP socket support for a few additional socket
options: IP_TOS, IP_FREEBIND, IP_TRANSPARENT, IPV6_FREEBIND, and
IPV6_TRANSPARENT.

Patch 1 exposes __ip_sock_set_tos() for use in patch 2.

Patch 2 adds IP_TOS support.

Patches 3 and 4 add the freebind and transparent support, with a
selftest for the latter.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2021-11-20 14:11:00 +00:00
commit 89f9711824
7 changed files with 245 additions and 6 deletions

View File

@ -783,5 +783,6 @@ int ip_sock_set_mtu_discover(struct sock *sk, int val);
void ip_sock_set_pktinfo(struct sock *sk);
void ip_sock_set_recverr(struct sock *sk);
void ip_sock_set_tos(struct sock *sk, int val);
void __ip_sock_set_tos(struct sock *sk, int val);
#endif /* _IP_H */

View File

@ -576,7 +576,7 @@ out:
return err;
}
static void __ip_sock_set_tos(struct sock *sk, int val)
void __ip_sock_set_tos(struct sock *sk, int val)
{
if (sk->sk_type == SOCK_STREAM) {
val &= ~INET_ECN_MASK;

View File

@ -390,6 +390,8 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
switch (optname) {
case IPV6_V6ONLY:
case IPV6_TRANSPARENT:
case IPV6_FREEBIND:
lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
if (!ssock) {
@ -398,8 +400,24 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
}
ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen);
if (ret == 0)
if (ret != 0) {
release_sock(sk);
return ret;
}
sockopt_seq_inc(msk);
switch (optname) {
case IPV6_V6ONLY:
sk->sk_ipv6only = ssock->sk->sk_ipv6only;
break;
case IPV6_TRANSPARENT:
inet_sk(sk)->transparent = inet_sk(ssock->sk)->transparent;
break;
case IPV6_FREEBIND:
inet_sk(sk)->freebind = inet_sk(ssock->sk)->freebind;
break;
}
release_sock(sk);
break;
@ -598,6 +616,85 @@ static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t
return ret;
}
static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int optname,
sockptr_t optval, unsigned int optlen)
{
struct sock *sk = (struct sock *)msk;
struct inet_sock *issk;
struct socket *ssock;
int err;
err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen);
if (err != 0)
return err;
lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
if (!ssock) {
release_sock(sk);
return -EINVAL;
}
issk = inet_sk(ssock->sk);
switch (optname) {
case IP_FREEBIND:
issk->freebind = inet_sk(sk)->freebind;
break;
case IP_TRANSPARENT:
issk->transparent = inet_sk(sk)->transparent;
break;
default:
release_sock(sk);
WARN_ON_ONCE(1);
return -EOPNOTSUPP;
}
sockopt_seq_inc(msk);
release_sock(sk);
return 0;
}
static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname,
sockptr_t optval, unsigned int optlen)
{
struct mptcp_subflow_context *subflow;
struct sock *sk = (struct sock *)msk;
int err, val;
err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen);
if (err != 0)
return err;
lock_sock(sk);
sockopt_seq_inc(msk);
val = inet_sk(sk)->tos;
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
__ip_sock_set_tos(ssk, val);
}
release_sock(sk);
return err;
}
static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname,
sockptr_t optval, unsigned int optlen)
{
switch (optname) {
case IP_FREEBIND:
case IP_TRANSPARENT:
return mptcp_setsockopt_sol_ip_set_transparent(msk, optname, optval, optlen);
case IP_TOS:
return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen);
}
return -EOPNOTSUPP;
}
static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
sockptr_t optval, unsigned int optlen)
{
@ -637,6 +734,9 @@ int mptcp_setsockopt(struct sock *sk, int level, int optname,
if (ssk)
return tcp_setsockopt(ssk, level, optname, optval, optlen);
if (level == SOL_IP)
return mptcp_setsockopt_v4(msk, optname, optval, optlen);
if (level == SOL_IPV6)
return mptcp_setsockopt_v6(msk, optname, optval, optlen);
@ -1003,6 +1103,7 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
ssk->sk_priority = sk->sk_priority;
ssk->sk_bound_dev_if = sk->sk_bound_dev_if;
ssk->sk_incoming_cpu = sk->sk_incoming_cpu;
__ip_sock_set_tos(ssk, inet_sk(sk)->tos);
if (sk->sk_userlocks & tx_rx_locks) {
ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks;
@ -1028,6 +1129,9 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops)
tcp_set_congestion_control(ssk, msk->ca_name, false, true);
inet_sk(ssk)->transparent = inet_sk(sk)->transparent;
inet_sk(ssk)->freebind = inet_sk(sk)->freebind;
}
static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)

View File

@ -1425,6 +1425,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
if (addr.ss_family == AF_INET6)
addrlen = sizeof(struct sockaddr_in6);
#endif
mptcp_sockopt_sync(msk, ssk);
ssk->sk_bound_dev_if = ifindex;
err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen);
if (err)
@ -1441,7 +1443,6 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
mptcp_info2sockaddr(remote, &addr, ssk->sk_family);
mptcp_add_pending_subflow(msk, subflow);
mptcp_sockopt_sync(msk, ssk);
err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
if (err && err != -EINPROGRESS)
goto failed_unlink;

View File

@ -13,5 +13,9 @@ CONFIG_NFT_COUNTER=m
CONFIG_NFT_COMPAT=m
CONFIG_NETFILTER_XTABLES=m
CONFIG_NETFILTER_XT_MATCH_BPF=m
CONFIG_NF_TABLES_IPV4=y
CONFIG_NF_TABLES_IPV6=y
CONFIG_NF_TABLES_INET=y
CONFIG_NFT_TPROXY=m
CONFIG_NFT_SOCKET=m
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MULTIPLE_TABLES=y
CONFIG_IPV6_MULTIPLE_TABLES=y

View File

@ -75,7 +75,12 @@ struct cfg_cmsg_types {
unsigned int timestampns:1;
};
struct cfg_sockopt_types {
unsigned int transparent:1;
};
static struct cfg_cmsg_types cfg_cmsg_types;
static struct cfg_sockopt_types cfg_sockopt_types;
static void die_usage(void)
{
@ -93,6 +98,7 @@ static void die_usage(void)
fprintf(stderr, "\t-u -- check mptcp ulp\n");
fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n");
fprintf(stderr, "\t-c cmsg -- test cmsg type <cmsg>\n");
fprintf(stderr, "\t-o option -- test sockopt <option>\n");
fprintf(stderr,
"\t-P [saveWithPeek|saveAfterPeek] -- save data with/after MSG_PEEK form tcp socket\n");
exit(1);
@ -185,6 +191,22 @@ static void set_mark(int fd, uint32_t mark)
}
}
static void set_transparent(int fd, int pf)
{
int one = 1;
switch (pf) {
case AF_INET:
if (-1 == setsockopt(fd, SOL_IP, IP_TRANSPARENT, &one, sizeof(one)))
perror("IP_TRANSPARENT");
break;
case AF_INET6:
if (-1 == setsockopt(fd, IPPROTO_IPV6, IPV6_TRANSPARENT, &one, sizeof(one)))
perror("IPV6_TRANSPARENT");
break;
}
}
static int sock_listen_mptcp(const char * const listenaddr,
const char * const port)
{
@ -212,6 +234,9 @@ static int sock_listen_mptcp(const char * const listenaddr,
sizeof(one)))
perror("setsockopt");
if (cfg_sockopt_types.transparent)
set_transparent(sock, pf);
if (bind(sock, a->ai_addr, a->ai_addrlen) == 0)
break; /* success */
@ -944,6 +969,27 @@ static void parse_cmsg_types(const char *type)
exit(1);
}
static void parse_setsock_options(const char *name)
{
char *next = strchr(name, ',');
unsigned int len = 0;
if (next) {
parse_setsock_options(next + 1);
len = next - name;
} else {
len = strlen(name);
}
if (strncmp(name, "TRANSPARENT", len) == 0) {
cfg_sockopt_types.transparent = 1;
return;
}
fprintf(stderr, "Unrecognized setsockopt option %s\n", name);
exit(1);
}
int main_loop(void)
{
int fd;
@ -1047,7 +1093,7 @@ static void parse_opts(int argc, char **argv)
{
int c;
while ((c = getopt(argc, argv, "6jr:lp:s:hut:T:m:S:R:w:M:P:c:")) != -1) {
while ((c = getopt(argc, argv, "6jr:lp:s:hut:T:m:S:R:w:M:P:c:o:")) != -1) {
switch (c) {
case 'j':
cfg_join = true;
@ -1108,6 +1154,9 @@ static void parse_opts(int argc, char **argv)
case 'c':
parse_cmsg_types(optarg);
break;
case 'o':
parse_setsock_options(optarg);
break;
}
}

View File

@ -671,6 +671,82 @@ run_tests()
run_tests_lo $1 $2 $3 0
}
run_test_transparent()
{
local connect_addr="$1"
local msg="$2"
local connector_ns="$ns1"
local listener_ns="$ns2"
local lret=0
local r6flag=""
# skip if we don't want v6
if ! $ipv6 && is_v6 "${connect_addr}"; then
return 0
fi
ip netns exec "$listener_ns" nft -f /dev/stdin <<"EOF"
flush ruleset
table inet mangle {
chain divert {
type filter hook prerouting priority -150;
meta l4proto tcp socket transparent 1 meta mark set 1 accept
tcp dport 20000 tproxy to :20000 meta mark set 1 accept
}
}
EOF
if [ $? -ne 0 ]; then
echo "SKIP: $msg, could not load nft ruleset"
return
fi
local local_addr
if is_v6 "${connect_addr}"; then
local_addr="::"
r6flag="-6"
else
local_addr="0.0.0.0"
fi
ip -net "$listener_ns" $r6flag rule add fwmark 1 lookup 100
if [ $? -ne 0 ]; then
ip netns exec "$listener_ns" nft flush ruleset
echo "SKIP: $msg, ip $r6flag rule failed"
return
fi
ip -net "$listener_ns" route add local $local_addr/0 dev lo table 100
if [ $? -ne 0 ]; then
ip netns exec "$listener_ns" nft flush ruleset
ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100
echo "SKIP: $msg, ip route add local $local_addr failed"
return
fi
echo "INFO: test $msg"
TEST_COUNT=10000
local extra_args="-o TRANSPARENT"
do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP \
${connect_addr} ${local_addr} "${extra_args}"
lret=$?
ip netns exec "$listener_ns" nft flush ruleset
ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100
ip -net "$listener_ns" route del local $local_addr/0 dev lo table 100
if [ $lret -ne 0 ]; then
echo "FAIL: $msg, mptcp connection error" 1>&2
ret=$lret
return 1
fi
echo "PASS: $msg"
return 0
}
run_tests_peekmode()
{
local peekmode="$1"
@ -794,5 +870,9 @@ run_tests_peekmode "saveWithPeek"
run_tests_peekmode "saveAfterPeek"
stop_if_error "Tests with peek mode have failed"
# connect to ns4 ip address, ns2 should intercept/proxy
run_test_transparent 10.0.3.1 "tproxy ipv4"
run_test_transparent dead:beef:3::1 "tproxy ipv6"
display_time
exit $ret