Merge branch 'mptcp-path-manager-mode-selection'
Mat Martineau says: ==================== mptcp: Path manager mode selection MPTCP already has an in-kernel path manager (PM) to add and remove TCP subflows associated with a given MPTCP connection. This in-kernel PM has been designed to handle typical server-side use cases, but is not very flexible or configurable for client devices that may have more complicated policies to implement. This patch series from the MPTCP tree is the first step toward adding a generic-netlink-based API for MPTCP path management, which a privileged userspace daemon will be able to use to control subflow establishment. These patches add a per-namespace sysctl to select the default PM type (in-kernel or userspace) for new MPTCP sockets. New self-tests confirm expected behavior when userspace PM is selected but there is no daemon available to handle existing MPTCP PM events. Subsequent patch series (already staged in the MPTCP tree) will add the generic netlink path management API. ==================== Link: https://lore.kernel.org/r/20220427225002.231996-1-mathew.j.martineau@linux.intel.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
4994d4fa99
@ -46,6 +46,24 @@ allow_join_initial_addr_port - BOOLEAN
|
||||
|
||||
Default: 1
|
||||
|
||||
pm_type - INTEGER
|
||||
|
||||
Set the default path manager type to use for each new MPTCP
|
||||
socket. In-kernel path management will control subflow
|
||||
connections and address advertisements according to
|
||||
per-namespace values configured over the MPTCP netlink
|
||||
API. Userspace path management puts per-MPTCP-connection subflow
|
||||
connection decisions and address advertisements under control of
|
||||
a privileged userspace program, at the cost of more netlink
|
||||
traffic to propagate all of the related events and commands.
|
||||
|
||||
This is a per-namespace sysctl.
|
||||
|
||||
* 0 - In-kernel path manager
|
||||
* 1 - Userspace path manager
|
||||
|
||||
Default: 0
|
||||
|
||||
stale_loss_cnt - INTEGER
|
||||
The number of MPTCP-level retransmission intervals with no traffic and
|
||||
pending outstanding data on a given subflow required to declare it stale.
|
||||
|
@ -16,6 +16,11 @@
|
||||
#define MPTCP_SYSCTL_PATH "net/mptcp"
|
||||
|
||||
static int mptcp_pernet_id;
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
static int mptcp_pm_type_max = __MPTCP_PM_TYPE_MAX;
|
||||
#endif
|
||||
|
||||
struct mptcp_pernet {
|
||||
#ifdef CONFIG_SYSCTL
|
||||
struct ctl_table_header *ctl_table_hdr;
|
||||
@ -26,6 +31,7 @@ struct mptcp_pernet {
|
||||
u8 mptcp_enabled;
|
||||
u8 checksum_enabled;
|
||||
u8 allow_join_initial_addr_port;
|
||||
u8 pm_type;
|
||||
};
|
||||
|
||||
static struct mptcp_pernet *mptcp_get_pernet(const struct net *net)
|
||||
@ -58,6 +64,11 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net)
|
||||
return mptcp_get_pernet(net)->stale_loss_cnt;
|
||||
}
|
||||
|
||||
int mptcp_get_pm_type(const struct net *net)
|
||||
{
|
||||
return mptcp_get_pernet(net)->pm_type;
|
||||
}
|
||||
|
||||
static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
|
||||
{
|
||||
pernet->mptcp_enabled = 1;
|
||||
@ -65,6 +76,7 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
|
||||
pernet->checksum_enabled = 0;
|
||||
pernet->allow_join_initial_addr_port = 1;
|
||||
pernet->stale_loss_cnt = 4;
|
||||
pernet->pm_type = MPTCP_PM_TYPE_KERNEL;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
@ -108,6 +120,14 @@ static struct ctl_table mptcp_sysctl_table[] = {
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_douintvec_minmax,
|
||||
},
|
||||
{
|
||||
.procname = "pm_type",
|
||||
.maxlen = sizeof(u8),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dou8vec_minmax,
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
.extra2 = &mptcp_pm_type_max
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
@ -128,6 +148,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
|
||||
table[2].data = &pernet->checksum_enabled;
|
||||
table[3].data = &pernet->allow_join_initial_addr_port;
|
||||
table[4].data = &pernet->stale_loss_cnt;
|
||||
table[5].data = &pernet->pm_type;
|
||||
|
||||
hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table);
|
||||
if (!hdr)
|
||||
|
@ -208,7 +208,7 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
|
||||
|
||||
spin_lock_bh(&pm->lock);
|
||||
|
||||
if (!READ_ONCE(pm->accept_addr)) {
|
||||
if (!READ_ONCE(pm->accept_addr) || mptcp_pm_is_userspace(msk)) {
|
||||
mptcp_pm_announce_addr(msk, addr, true);
|
||||
mptcp_pm_add_addr_send_ack(msk);
|
||||
} else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) {
|
||||
@ -415,21 +415,41 @@ void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
|
||||
|
||||
void mptcp_pm_data_reset(struct mptcp_sock *msk)
|
||||
{
|
||||
msk->pm.add_addr_signaled = 0;
|
||||
msk->pm.add_addr_accepted = 0;
|
||||
msk->pm.local_addr_used = 0;
|
||||
msk->pm.subflows = 0;
|
||||
msk->pm.rm_list_tx.nr = 0;
|
||||
msk->pm.rm_list_rx.nr = 0;
|
||||
WRITE_ONCE(msk->pm.work_pending, false);
|
||||
WRITE_ONCE(msk->pm.addr_signal, 0);
|
||||
WRITE_ONCE(msk->pm.accept_addr, false);
|
||||
WRITE_ONCE(msk->pm.accept_subflow, false);
|
||||
WRITE_ONCE(msk->pm.remote_deny_join_id0, false);
|
||||
msk->pm.status = 0;
|
||||
bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
|
||||
u8 pm_type = mptcp_get_pm_type(sock_net((struct sock *)msk));
|
||||
struct mptcp_pm_data *pm = &msk->pm;
|
||||
|
||||
mptcp_pm_nl_data_init(msk);
|
||||
pm->add_addr_signaled = 0;
|
||||
pm->add_addr_accepted = 0;
|
||||
pm->local_addr_used = 0;
|
||||
pm->subflows = 0;
|
||||
pm->rm_list_tx.nr = 0;
|
||||
pm->rm_list_rx.nr = 0;
|
||||
WRITE_ONCE(pm->pm_type, pm_type);
|
||||
|
||||
if (pm_type == MPTCP_PM_TYPE_KERNEL) {
|
||||
bool subflows_allowed = !!mptcp_pm_get_subflows_max(msk);
|
||||
|
||||
/* pm->work_pending must be only be set to 'true' when
|
||||
* pm->pm_type is set to MPTCP_PM_TYPE_KERNEL
|
||||
*/
|
||||
WRITE_ONCE(pm->work_pending,
|
||||
(!!mptcp_pm_get_local_addr_max(msk) &&
|
||||
subflows_allowed) ||
|
||||
!!mptcp_pm_get_add_addr_signal_max(msk));
|
||||
WRITE_ONCE(pm->accept_addr,
|
||||
!!mptcp_pm_get_add_addr_accept_max(msk) &&
|
||||
subflows_allowed);
|
||||
WRITE_ONCE(pm->accept_subflow, subflows_allowed);
|
||||
} else {
|
||||
WRITE_ONCE(pm->work_pending, 0);
|
||||
WRITE_ONCE(pm->accept_addr, 0);
|
||||
WRITE_ONCE(pm->accept_subflow, 0);
|
||||
}
|
||||
|
||||
WRITE_ONCE(pm->addr_signal, 0);
|
||||
WRITE_ONCE(pm->remote_deny_join_id0, false);
|
||||
pm->status = 0;
|
||||
bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
|
||||
}
|
||||
|
||||
void mptcp_pm_data_init(struct mptcp_sock *msk)
|
||||
|
@ -1061,18 +1061,6 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
|
||||
return ret;
|
||||
}
|
||||
|
||||
void mptcp_pm_nl_data_init(struct mptcp_sock *msk)
|
||||
{
|
||||
struct mptcp_pm_data *pm = &msk->pm;
|
||||
bool subflows;
|
||||
|
||||
subflows = !!mptcp_pm_get_subflows_max(msk);
|
||||
WRITE_ONCE(pm->work_pending, (!!mptcp_pm_get_local_addr_max(msk) && subflows) ||
|
||||
!!mptcp_pm_get_add_addr_signal_max(msk));
|
||||
WRITE_ONCE(pm->accept_addr, !!mptcp_pm_get_add_addr_accept_max(msk) && subflows);
|
||||
WRITE_ONCE(pm->accept_subflow, subflows);
|
||||
}
|
||||
|
||||
#define MPTCP_PM_CMD_GRP_OFFSET 0
|
||||
#define MPTCP_PM_EV_GRP_OFFSET 1
|
||||
|
||||
@ -1232,7 +1220,8 @@ static int mptcp_nl_add_subflow_or_signal_addr(struct net *net)
|
||||
while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
|
||||
if (!READ_ONCE(msk->fully_established))
|
||||
if (!READ_ONCE(msk->fully_established) ||
|
||||
mptcp_pm_is_userspace(msk))
|
||||
goto next;
|
||||
|
||||
lock_sock(sk);
|
||||
@ -1375,6 +1364,9 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
bool remove_subflow;
|
||||
|
||||
if (mptcp_pm_is_userspace(msk))
|
||||
goto next;
|
||||
|
||||
if (list_empty(&msk->conn_list)) {
|
||||
mptcp_pm_remove_anno_addr(msk, addr, false);
|
||||
goto next;
|
||||
@ -1409,7 +1401,7 @@ static int mptcp_nl_remove_id_zero_address(struct net *net,
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
struct mptcp_addr_info msk_local;
|
||||
|
||||
if (list_empty(&msk->conn_list))
|
||||
if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk))
|
||||
goto next;
|
||||
|
||||
local_address((struct sock_common *)msk, &msk_local);
|
||||
@ -1516,9 +1508,11 @@ static void mptcp_nl_remove_addrs_list(struct net *net,
|
||||
while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
|
||||
lock_sock(sk);
|
||||
mptcp_pm_remove_addrs_and_subflows(msk, rm_list);
|
||||
release_sock(sk);
|
||||
if (!mptcp_pm_is_userspace(msk)) {
|
||||
lock_sock(sk);
|
||||
mptcp_pm_remove_addrs_and_subflows(msk, rm_list);
|
||||
release_sock(sk);
|
||||
}
|
||||
|
||||
sock_put(sk);
|
||||
cond_resched();
|
||||
@ -1791,7 +1785,7 @@ static int mptcp_nl_set_flags(struct net *net,
|
||||
while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
|
||||
if (list_empty(&msk->conn_list))
|
||||
if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk))
|
||||
goto next;
|
||||
|
||||
lock_sock(sk);
|
||||
|
@ -184,6 +184,14 @@ enum mptcp_pm_status {
|
||||
*/
|
||||
};
|
||||
|
||||
enum mptcp_pm_type {
|
||||
MPTCP_PM_TYPE_KERNEL = 0,
|
||||
MPTCP_PM_TYPE_USERSPACE,
|
||||
|
||||
__MPTCP_PM_TYPE_NR,
|
||||
__MPTCP_PM_TYPE_MAX = __MPTCP_PM_TYPE_NR - 1,
|
||||
};
|
||||
|
||||
/* Status bits below MPTCP_PM_ALREADY_ESTABLISHED need pm worker actions */
|
||||
#define MPTCP_PM_WORK_MASK ((1 << MPTCP_PM_ALREADY_ESTABLISHED) - 1)
|
||||
|
||||
@ -212,6 +220,7 @@ struct mptcp_pm_data {
|
||||
u8 add_addr_signaled;
|
||||
u8 add_addr_accepted;
|
||||
u8 local_addr_used;
|
||||
u8 pm_type;
|
||||
u8 subflows;
|
||||
u8 status;
|
||||
DECLARE_BITMAP(id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
|
||||
@ -576,6 +585,7 @@ unsigned int mptcp_get_add_addr_timeout(const struct net *net);
|
||||
int mptcp_is_checksum_enabled(const struct net *net);
|
||||
int mptcp_allow_join_id0(const struct net *net);
|
||||
unsigned int mptcp_stale_loss_cnt(const struct net *net);
|
||||
int mptcp_get_pm_type(const struct net *net);
|
||||
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
|
||||
struct mptcp_options_received *mp_opt);
|
||||
bool __mptcp_retransmit_pending_data(struct sock *sk);
|
||||
@ -796,6 +806,11 @@ static inline bool mptcp_pm_should_rm_signal(struct mptcp_sock *msk)
|
||||
return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_RM_ADDR_SIGNAL);
|
||||
}
|
||||
|
||||
static inline bool mptcp_pm_is_userspace(const struct mptcp_sock *msk)
|
||||
{
|
||||
return READ_ONCE(msk->pm.pm_type) == MPTCP_PM_TYPE_USERSPACE;
|
||||
}
|
||||
|
||||
static inline unsigned int mptcp_add_addr_len(int family, bool echo, bool port)
|
||||
{
|
||||
u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE;
|
||||
@ -828,7 +843,6 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
|
||||
int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
|
||||
|
||||
void __init mptcp_pm_nl_init(void);
|
||||
void mptcp_pm_nl_data_init(struct mptcp_sock *msk);
|
||||
void mptcp_pm_nl_work(struct mptcp_sock *msk);
|
||||
void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
|
||||
const struct mptcp_rm_list *rm_list);
|
||||
|
@ -70,6 +70,7 @@ init_partial()
|
||||
ip netns add $netns || exit $ksft_skip
|
||||
ip -net $netns link set lo up
|
||||
ip netns exec $netns sysctl -q net.mptcp.enabled=1
|
||||
ip netns exec $netns sysctl -q net.mptcp.pm_type=0
|
||||
ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0
|
||||
ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0
|
||||
if [ $checksum -eq 1 ]; then
|
||||
@ -1611,6 +1612,13 @@ wait_attempt_fail()
|
||||
return 1
|
||||
}
|
||||
|
||||
set_userspace_pm()
|
||||
{
|
||||
local ns=$1
|
||||
|
||||
ip netns exec $ns sysctl -q net.mptcp.pm_type=1
|
||||
}
|
||||
|
||||
subflows_tests()
|
||||
{
|
||||
if reset "no JOIN"; then
|
||||
@ -2698,6 +2706,63 @@ fail_tests()
|
||||
fi
|
||||
}
|
||||
|
||||
userspace_tests()
|
||||
{
|
||||
# userspace pm type prevents add_addr
|
||||
if reset "userspace pm type prevents add_addr"; then
|
||||
set_userspace_pm $ns1
|
||||
pm_nl_set_limits $ns1 0 2
|
||||
pm_nl_set_limits $ns2 0 2
|
||||
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
|
||||
run_tests $ns1 $ns2 10.0.1.1
|
||||
chk_join_nr 0 0 0
|
||||
chk_add_nr 0 0
|
||||
fi
|
||||
|
||||
# userspace pm type rejects join
|
||||
if reset "userspace pm type rejects join"; then
|
||||
set_userspace_pm $ns1
|
||||
pm_nl_set_limits $ns1 1 1
|
||||
pm_nl_set_limits $ns2 1 1
|
||||
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
|
||||
run_tests $ns1 $ns2 10.0.1.1
|
||||
chk_join_nr 1 1 0
|
||||
fi
|
||||
|
||||
# userspace pm type does not send join
|
||||
if reset "userspace pm type does not send join"; then
|
||||
set_userspace_pm $ns2
|
||||
pm_nl_set_limits $ns1 1 1
|
||||
pm_nl_set_limits $ns2 1 1
|
||||
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
|
||||
run_tests $ns1 $ns2 10.0.1.1
|
||||
chk_join_nr 0 0 0
|
||||
fi
|
||||
|
||||
# userspace pm type prevents mp_prio
|
||||
if reset "userspace pm type prevents mp_prio"; then
|
||||
set_userspace_pm $ns1
|
||||
pm_nl_set_limits $ns1 1 1
|
||||
pm_nl_set_limits $ns2 1 1
|
||||
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
|
||||
run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup
|
||||
chk_join_nr 1 1 0
|
||||
chk_prio_nr 0 0
|
||||
fi
|
||||
|
||||
# userspace pm type prevents rm_addr
|
||||
if reset "userspace pm type prevents rm_addr"; then
|
||||
set_userspace_pm $ns1
|
||||
set_userspace_pm $ns2
|
||||
pm_nl_set_limits $ns1 0 1
|
||||
pm_nl_set_limits $ns2 0 1
|
||||
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
|
||||
run_tests $ns1 $ns2 10.0.1.1 0 0 -1 slow
|
||||
chk_join_nr 0 0 0
|
||||
chk_rm_nr 0 0
|
||||
fi
|
||||
}
|
||||
|
||||
implicit_tests()
|
||||
{
|
||||
# userspace pm type prevents add_addr
|
||||
@ -2767,6 +2832,7 @@ all_tests_sorted=(
|
||||
m@fullmesh_tests
|
||||
z@fastclose_tests
|
||||
F@fail_tests
|
||||
u@userspace_tests
|
||||
I@implicit_tests
|
||||
)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user