Merge branch 'mptcp-more-socket-options'
Mat Martineau says: ==================== mptcp: More socket option support These patches add MPTCP socket support for a few additional socket options: IP_TOS, IP_FREEBIND, IP_TRANSPARENT, IPV6_FREEBIND, and IPV6_TRANSPARENT. Patch 1 exposes __ip_sock_set_tos() for use in patch 2. Patch 2 adds IP_TOS support. Patches 3 and 4 add the freebind and transparent support, with a selftest for the latter. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
89f9711824
@ -783,5 +783,6 @@ int ip_sock_set_mtu_discover(struct sock *sk, int val);
|
||||
void ip_sock_set_pktinfo(struct sock *sk);
|
||||
void ip_sock_set_recverr(struct sock *sk);
|
||||
void ip_sock_set_tos(struct sock *sk, int val);
|
||||
void __ip_sock_set_tos(struct sock *sk, int val);
|
||||
|
||||
#endif /* _IP_H */
|
||||
|
@ -576,7 +576,7 @@ out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static void __ip_sock_set_tos(struct sock *sk, int val)
|
||||
void __ip_sock_set_tos(struct sock *sk, int val)
|
||||
{
|
||||
if (sk->sk_type == SOCK_STREAM) {
|
||||
val &= ~INET_ECN_MASK;
|
||||
|
@ -390,6 +390,8 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
|
||||
|
||||
switch (optname) {
|
||||
case IPV6_V6ONLY:
|
||||
case IPV6_TRANSPARENT:
|
||||
case IPV6_FREEBIND:
|
||||
lock_sock(sk);
|
||||
ssock = __mptcp_nmpc_socket(msk);
|
||||
if (!ssock) {
|
||||
@ -398,8 +400,24 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
|
||||
}
|
||||
|
||||
ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen);
|
||||
if (ret == 0)
|
||||
if (ret != 0) {
|
||||
release_sock(sk);
|
||||
return ret;
|
||||
}
|
||||
|
||||
sockopt_seq_inc(msk);
|
||||
|
||||
switch (optname) {
|
||||
case IPV6_V6ONLY:
|
||||
sk->sk_ipv6only = ssock->sk->sk_ipv6only;
|
||||
break;
|
||||
case IPV6_TRANSPARENT:
|
||||
inet_sk(sk)->transparent = inet_sk(ssock->sk)->transparent;
|
||||
break;
|
||||
case IPV6_FREEBIND:
|
||||
inet_sk(sk)->freebind = inet_sk(ssock->sk)->freebind;
|
||||
break;
|
||||
}
|
||||
|
||||
release_sock(sk);
|
||||
break;
|
||||
@ -598,6 +616,85 @@ static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int optname,
|
||||
sockptr_t optval, unsigned int optlen)
|
||||
{
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
struct inet_sock *issk;
|
||||
struct socket *ssock;
|
||||
int err;
|
||||
|
||||
err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen);
|
||||
if (err != 0)
|
||||
return err;
|
||||
|
||||
lock_sock(sk);
|
||||
|
||||
ssock = __mptcp_nmpc_socket(msk);
|
||||
if (!ssock) {
|
||||
release_sock(sk);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
issk = inet_sk(ssock->sk);
|
||||
|
||||
switch (optname) {
|
||||
case IP_FREEBIND:
|
||||
issk->freebind = inet_sk(sk)->freebind;
|
||||
break;
|
||||
case IP_TRANSPARENT:
|
||||
issk->transparent = inet_sk(sk)->transparent;
|
||||
break;
|
||||
default:
|
||||
release_sock(sk);
|
||||
WARN_ON_ONCE(1);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
sockopt_seq_inc(msk);
|
||||
release_sock(sk);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname,
|
||||
sockptr_t optval, unsigned int optlen)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow;
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
int err, val;
|
||||
|
||||
err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen);
|
||||
|
||||
if (err != 0)
|
||||
return err;
|
||||
|
||||
lock_sock(sk);
|
||||
sockopt_seq_inc(msk);
|
||||
val = inet_sk(sk)->tos;
|
||||
mptcp_for_each_subflow(msk, subflow) {
|
||||
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
|
||||
|
||||
__ip_sock_set_tos(ssk, val);
|
||||
}
|
||||
release_sock(sk);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname,
|
||||
sockptr_t optval, unsigned int optlen)
|
||||
{
|
||||
switch (optname) {
|
||||
case IP_FREEBIND:
|
||||
case IP_TRANSPARENT:
|
||||
return mptcp_setsockopt_sol_ip_set_transparent(msk, optname, optval, optlen);
|
||||
case IP_TOS:
|
||||
return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen);
|
||||
}
|
||||
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
|
||||
sockptr_t optval, unsigned int optlen)
|
||||
{
|
||||
@ -637,6 +734,9 @@ int mptcp_setsockopt(struct sock *sk, int level, int optname,
|
||||
if (ssk)
|
||||
return tcp_setsockopt(ssk, level, optname, optval, optlen);
|
||||
|
||||
if (level == SOL_IP)
|
||||
return mptcp_setsockopt_v4(msk, optname, optval, optlen);
|
||||
|
||||
if (level == SOL_IPV6)
|
||||
return mptcp_setsockopt_v6(msk, optname, optval, optlen);
|
||||
|
||||
@ -1003,6 +1103,7 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
|
||||
ssk->sk_priority = sk->sk_priority;
|
||||
ssk->sk_bound_dev_if = sk->sk_bound_dev_if;
|
||||
ssk->sk_incoming_cpu = sk->sk_incoming_cpu;
|
||||
__ip_sock_set_tos(ssk, inet_sk(sk)->tos);
|
||||
|
||||
if (sk->sk_userlocks & tx_rx_locks) {
|
||||
ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks;
|
||||
@ -1028,6 +1129,9 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
|
||||
|
||||
if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops)
|
||||
tcp_set_congestion_control(ssk, msk->ca_name, false, true);
|
||||
|
||||
inet_sk(ssk)->transparent = inet_sk(sk)->transparent;
|
||||
inet_sk(ssk)->freebind = inet_sk(sk)->freebind;
|
||||
}
|
||||
|
||||
static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
|
||||
|
@ -1425,6 +1425,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
|
||||
if (addr.ss_family == AF_INET6)
|
||||
addrlen = sizeof(struct sockaddr_in6);
|
||||
#endif
|
||||
mptcp_sockopt_sync(msk, ssk);
|
||||
|
||||
ssk->sk_bound_dev_if = ifindex;
|
||||
err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen);
|
||||
if (err)
|
||||
@ -1441,7 +1443,6 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
|
||||
mptcp_info2sockaddr(remote, &addr, ssk->sk_family);
|
||||
|
||||
mptcp_add_pending_subflow(msk, subflow);
|
||||
mptcp_sockopt_sync(msk, ssk);
|
||||
err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
|
||||
if (err && err != -EINPROGRESS)
|
||||
goto failed_unlink;
|
||||
|
@ -13,5 +13,9 @@ CONFIG_NFT_COUNTER=m
|
||||
CONFIG_NFT_COMPAT=m
|
||||
CONFIG_NETFILTER_XTABLES=m
|
||||
CONFIG_NETFILTER_XT_MATCH_BPF=m
|
||||
CONFIG_NF_TABLES_IPV4=y
|
||||
CONFIG_NF_TABLES_IPV6=y
|
||||
CONFIG_NF_TABLES_INET=y
|
||||
CONFIG_NFT_TPROXY=m
|
||||
CONFIG_NFT_SOCKET=m
|
||||
CONFIG_IP_ADVANCED_ROUTER=y
|
||||
CONFIG_IP_MULTIPLE_TABLES=y
|
||||
CONFIG_IPV6_MULTIPLE_TABLES=y
|
||||
|
@ -75,7 +75,12 @@ struct cfg_cmsg_types {
|
||||
unsigned int timestampns:1;
|
||||
};
|
||||
|
||||
struct cfg_sockopt_types {
|
||||
unsigned int transparent:1;
|
||||
};
|
||||
|
||||
static struct cfg_cmsg_types cfg_cmsg_types;
|
||||
static struct cfg_sockopt_types cfg_sockopt_types;
|
||||
|
||||
static void die_usage(void)
|
||||
{
|
||||
@ -93,6 +98,7 @@ static void die_usage(void)
|
||||
fprintf(stderr, "\t-u -- check mptcp ulp\n");
|
||||
fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n");
|
||||
fprintf(stderr, "\t-c cmsg -- test cmsg type <cmsg>\n");
|
||||
fprintf(stderr, "\t-o option -- test sockopt <option>\n");
|
||||
fprintf(stderr,
|
||||
"\t-P [saveWithPeek|saveAfterPeek] -- save data with/after MSG_PEEK form tcp socket\n");
|
||||
exit(1);
|
||||
@ -185,6 +191,22 @@ static void set_mark(int fd, uint32_t mark)
|
||||
}
|
||||
}
|
||||
|
||||
static void set_transparent(int fd, int pf)
|
||||
{
|
||||
int one = 1;
|
||||
|
||||
switch (pf) {
|
||||
case AF_INET:
|
||||
if (-1 == setsockopt(fd, SOL_IP, IP_TRANSPARENT, &one, sizeof(one)))
|
||||
perror("IP_TRANSPARENT");
|
||||
break;
|
||||
case AF_INET6:
|
||||
if (-1 == setsockopt(fd, IPPROTO_IPV6, IPV6_TRANSPARENT, &one, sizeof(one)))
|
||||
perror("IPV6_TRANSPARENT");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int sock_listen_mptcp(const char * const listenaddr,
|
||||
const char * const port)
|
||||
{
|
||||
@ -212,6 +234,9 @@ static int sock_listen_mptcp(const char * const listenaddr,
|
||||
sizeof(one)))
|
||||
perror("setsockopt");
|
||||
|
||||
if (cfg_sockopt_types.transparent)
|
||||
set_transparent(sock, pf);
|
||||
|
||||
if (bind(sock, a->ai_addr, a->ai_addrlen) == 0)
|
||||
break; /* success */
|
||||
|
||||
@ -944,6 +969,27 @@ static void parse_cmsg_types(const char *type)
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static void parse_setsock_options(const char *name)
|
||||
{
|
||||
char *next = strchr(name, ',');
|
||||
unsigned int len = 0;
|
||||
|
||||
if (next) {
|
||||
parse_setsock_options(next + 1);
|
||||
len = next - name;
|
||||
} else {
|
||||
len = strlen(name);
|
||||
}
|
||||
|
||||
if (strncmp(name, "TRANSPARENT", len) == 0) {
|
||||
cfg_sockopt_types.transparent = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
fprintf(stderr, "Unrecognized setsockopt option %s\n", name);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int main_loop(void)
|
||||
{
|
||||
int fd;
|
||||
@ -1047,7 +1093,7 @@ static void parse_opts(int argc, char **argv)
|
||||
{
|
||||
int c;
|
||||
|
||||
while ((c = getopt(argc, argv, "6jr:lp:s:hut:T:m:S:R:w:M:P:c:")) != -1) {
|
||||
while ((c = getopt(argc, argv, "6jr:lp:s:hut:T:m:S:R:w:M:P:c:o:")) != -1) {
|
||||
switch (c) {
|
||||
case 'j':
|
||||
cfg_join = true;
|
||||
@ -1108,6 +1154,9 @@ static void parse_opts(int argc, char **argv)
|
||||
case 'c':
|
||||
parse_cmsg_types(optarg);
|
||||
break;
|
||||
case 'o':
|
||||
parse_setsock_options(optarg);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -671,6 +671,82 @@ run_tests()
|
||||
run_tests_lo $1 $2 $3 0
|
||||
}
|
||||
|
||||
run_test_transparent()
|
||||
{
|
||||
local connect_addr="$1"
|
||||
local msg="$2"
|
||||
|
||||
local connector_ns="$ns1"
|
||||
local listener_ns="$ns2"
|
||||
local lret=0
|
||||
local r6flag=""
|
||||
|
||||
# skip if we don't want v6
|
||||
if ! $ipv6 && is_v6 "${connect_addr}"; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
ip netns exec "$listener_ns" nft -f /dev/stdin <<"EOF"
|
||||
flush ruleset
|
||||
table inet mangle {
|
||||
chain divert {
|
||||
type filter hook prerouting priority -150;
|
||||
|
||||
meta l4proto tcp socket transparent 1 meta mark set 1 accept
|
||||
tcp dport 20000 tproxy to :20000 meta mark set 1 accept
|
||||
}
|
||||
}
|
||||
EOF
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "SKIP: $msg, could not load nft ruleset"
|
||||
return
|
||||
fi
|
||||
|
||||
local local_addr
|
||||
if is_v6 "${connect_addr}"; then
|
||||
local_addr="::"
|
||||
r6flag="-6"
|
||||
else
|
||||
local_addr="0.0.0.0"
|
||||
fi
|
||||
|
||||
ip -net "$listener_ns" $r6flag rule add fwmark 1 lookup 100
|
||||
if [ $? -ne 0 ]; then
|
||||
ip netns exec "$listener_ns" nft flush ruleset
|
||||
echo "SKIP: $msg, ip $r6flag rule failed"
|
||||
return
|
||||
fi
|
||||
|
||||
ip -net "$listener_ns" route add local $local_addr/0 dev lo table 100
|
||||
if [ $? -ne 0 ]; then
|
||||
ip netns exec "$listener_ns" nft flush ruleset
|
||||
ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100
|
||||
echo "SKIP: $msg, ip route add local $local_addr failed"
|
||||
return
|
||||
fi
|
||||
|
||||
echo "INFO: test $msg"
|
||||
|
||||
TEST_COUNT=10000
|
||||
local extra_args="-o TRANSPARENT"
|
||||
do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP \
|
||||
${connect_addr} ${local_addr} "${extra_args}"
|
||||
lret=$?
|
||||
|
||||
ip netns exec "$listener_ns" nft flush ruleset
|
||||
ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100
|
||||
ip -net "$listener_ns" route del local $local_addr/0 dev lo table 100
|
||||
|
||||
if [ $lret -ne 0 ]; then
|
||||
echo "FAIL: $msg, mptcp connection error" 1>&2
|
||||
ret=$lret
|
||||
return 1
|
||||
fi
|
||||
|
||||
echo "PASS: $msg"
|
||||
return 0
|
||||
}
|
||||
|
||||
run_tests_peekmode()
|
||||
{
|
||||
local peekmode="$1"
|
||||
@ -794,5 +870,9 @@ run_tests_peekmode "saveWithPeek"
|
||||
run_tests_peekmode "saveAfterPeek"
|
||||
stop_if_error "Tests with peek mode have failed"
|
||||
|
||||
# connect to ns4 ip address, ns2 should intercept/proxy
|
||||
run_test_transparent 10.0.3.1 "tproxy ipv4"
|
||||
run_test_transparent dead:beef:3::1 "tproxy ipv6"
|
||||
|
||||
display_time
|
||||
exit $ret
|
||||
|
Loading…
Reference in New Issue
Block a user