Merge branch 'mptcp-optimizations'
Mat Martineau says: ==================== mptcp: A few optimizations Here is a set of patches that we've accumulated and tested in the MPTCP tree. Patch 1 removes the MPTCP-level tx skb cache that added complexity but did not provide a meaningful benefit. Patch 2 uses the fast socket lock in more places. Patch 3 improves handling of a data-ready flag. Patch 4 deletes an unnecessary and racy connection state check. Patch 5 adds a MIB counter for one type of invalid MPTCP header. Patch 6 improves self test failure output. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
1a77de09b7
@ -25,6 +25,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
|
||||
SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC),
|
||||
SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH),
|
||||
SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX),
|
||||
SNMP_MIB_ITEM("DSSNoMatchTCP", MPTCP_MIB_DSSTCPMISMATCH),
|
||||
SNMP_MIB_ITEM("DataCsumErr", MPTCP_MIB_DATACSUMERR),
|
||||
SNMP_MIB_ITEM("OFOQueueTail", MPTCP_MIB_OFOQUEUETAIL),
|
||||
SNMP_MIB_ITEM("OFOQueue", MPTCP_MIB_OFOQUEUE),
|
||||
|
@ -18,6 +18,7 @@ enum linux_mptcp_mib_field {
|
||||
MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */
|
||||
MPTCP_MIB_DSSNOMATCH, /* Received a new mapping that did not match the previous one */
|
||||
MPTCP_MIB_INFINITEMAPRX, /* Received an infinite mapping */
|
||||
MPTCP_MIB_DSSTCPMISMATCH, /* DSS-mapping did not map with TCP's sequence numbers */
|
||||
MPTCP_MIB_DATACSUMERR, /* The data checksum fail */
|
||||
MPTCP_MIB_OFOQUEUETAIL, /* Segments inserted into OoO queue tail */
|
||||
MPTCP_MIB_OFOQUEUE, /* Segments inserted into OoO queue */
|
||||
|
@ -540,6 +540,7 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
|
||||
subflow = list_first_entry_or_null(&msk->conn_list, typeof(*subflow), node);
|
||||
if (subflow) {
|
||||
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
|
||||
bool slow;
|
||||
|
||||
spin_unlock_bh(&msk->pm.lock);
|
||||
pr_debug("send ack for %s%s%s",
|
||||
@ -547,9 +548,9 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
|
||||
mptcp_pm_should_add_signal_ipv6(msk) ? " [ipv6]" : "",
|
||||
mptcp_pm_should_add_signal_port(msk) ? " [port]" : "");
|
||||
|
||||
lock_sock(ssk);
|
||||
slow = lock_sock_fast(ssk);
|
||||
tcp_send_ack(ssk);
|
||||
release_sock(ssk);
|
||||
unlock_sock_fast(ssk, slow);
|
||||
spin_lock_bh(&msk->pm.lock);
|
||||
}
|
||||
}
|
||||
@ -566,6 +567,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
|
||||
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
struct mptcp_addr_info local;
|
||||
bool slow;
|
||||
|
||||
local_address((struct sock_common *)ssk, &local);
|
||||
if (!addresses_equal(&local, addr, addr->port))
|
||||
@ -578,9 +580,9 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
|
||||
|
||||
spin_unlock_bh(&msk->pm.lock);
|
||||
pr_debug("send ack for mp_prio");
|
||||
lock_sock(ssk);
|
||||
slow = lock_sock_fast(ssk);
|
||||
tcp_send_ack(ssk);
|
||||
release_sock(ssk);
|
||||
unlock_sock_fast(ssk, slow);
|
||||
spin_lock_bh(&msk->pm.lock);
|
||||
|
||||
return 0;
|
||||
|
@ -433,23 +433,25 @@ static void mptcp_send_ack(struct mptcp_sock *msk)
|
||||
|
||||
mptcp_for_each_subflow(msk, subflow) {
|
||||
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
|
||||
bool slow;
|
||||
|
||||
lock_sock(ssk);
|
||||
slow = lock_sock_fast(ssk);
|
||||
if (tcp_can_send_ack(ssk))
|
||||
tcp_send_ack(ssk);
|
||||
release_sock(ssk);
|
||||
unlock_sock_fast(ssk, slow);
|
||||
}
|
||||
}
|
||||
|
||||
static bool mptcp_subflow_cleanup_rbuf(struct sock *ssk)
|
||||
{
|
||||
bool slow;
|
||||
int ret;
|
||||
|
||||
lock_sock(ssk);
|
||||
slow = lock_sock_fast(ssk);
|
||||
ret = tcp_can_send_ack(ssk);
|
||||
if (ret)
|
||||
tcp_cleanup_rbuf(ssk, 1);
|
||||
release_sock(ssk);
|
||||
unlock_sock_fast(ssk, slow);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -684,9 +686,6 @@ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
unsigned int moved = 0;
|
||||
|
||||
if (inet_sk_state_load(sk) == TCP_CLOSE)
|
||||
return false;
|
||||
|
||||
__mptcp_move_skbs_from_subflow(msk, ssk, &moved);
|
||||
__mptcp_ofo_queue(msk);
|
||||
if (unlikely(ssk->sk_err)) {
|
||||
@ -902,22 +901,14 @@ static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk,
|
||||
df->data_seq + df->data_len == msk->write_seq;
|
||||
}
|
||||
|
||||
static int mptcp_wmem_with_overhead(struct sock *sk, int size)
|
||||
static int mptcp_wmem_with_overhead(int size)
|
||||
{
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
int ret, skbs;
|
||||
|
||||
ret = size + ((sizeof(struct mptcp_data_frag) * size) >> PAGE_SHIFT);
|
||||
skbs = (msk->tx_pending_data + size) / msk->size_goal_cache;
|
||||
if (skbs < msk->skb_tx_cache.qlen)
|
||||
return ret;
|
||||
|
||||
return ret + (skbs - msk->skb_tx_cache.qlen) * SKB_TRUESIZE(MAX_TCP_HEADER);
|
||||
return size + ((sizeof(struct mptcp_data_frag) * size) >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
static void __mptcp_wmem_reserve(struct sock *sk, int size)
|
||||
{
|
||||
int amount = mptcp_wmem_with_overhead(sk, size);
|
||||
int amount = mptcp_wmem_with_overhead(size);
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
|
||||
WARN_ON_ONCE(msk->wmem_reserved);
|
||||
@ -1212,49 +1203,8 @@ static struct sk_buff *__mptcp_do_alloc_tx_skb(struct sock *sk, gfp_t gfp)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static bool mptcp_tx_cache_refill(struct sock *sk, int size,
|
||||
struct sk_buff_head *skbs, int *total_ts)
|
||||
{
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
struct sk_buff *skb;
|
||||
int space_needed;
|
||||
|
||||
if (unlikely(tcp_under_memory_pressure(sk))) {
|
||||
mptcp_mem_reclaim_partial(sk);
|
||||
|
||||
/* under pressure pre-allocate at most a single skb */
|
||||
if (msk->skb_tx_cache.qlen)
|
||||
return true;
|
||||
space_needed = msk->size_goal_cache;
|
||||
} else {
|
||||
space_needed = msk->tx_pending_data + size -
|
||||
msk->skb_tx_cache.qlen * msk->size_goal_cache;
|
||||
}
|
||||
|
||||
while (space_needed > 0) {
|
||||
skb = __mptcp_do_alloc_tx_skb(sk, sk->sk_allocation);
|
||||
if (unlikely(!skb)) {
|
||||
/* under memory pressure, try to pass the caller a
|
||||
* single skb to allow forward progress
|
||||
*/
|
||||
while (skbs->qlen > 1) {
|
||||
skb = __skb_dequeue_tail(skbs);
|
||||
*total_ts -= skb->truesize;
|
||||
__kfree_skb(skb);
|
||||
}
|
||||
return skbs->qlen > 0;
|
||||
}
|
||||
|
||||
*total_ts += skb->truesize;
|
||||
__skb_queue_tail(skbs, skb);
|
||||
space_needed -= msk->size_goal_cache;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp)
|
||||
{
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
struct sk_buff *skb;
|
||||
|
||||
if (ssk->sk_tx_skb_cache) {
|
||||
@ -1265,22 +1215,6 @@ static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp)
|
||||
return true;
|
||||
}
|
||||
|
||||
skb = skb_peek(&msk->skb_tx_cache);
|
||||
if (skb) {
|
||||
if (likely(sk_wmem_schedule(ssk, skb->truesize))) {
|
||||
skb = __skb_dequeue(&msk->skb_tx_cache);
|
||||
if (WARN_ON_ONCE(!skb))
|
||||
return false;
|
||||
|
||||
mptcp_wmem_uncharge(sk, skb->truesize);
|
||||
ssk->sk_tx_skb_cache = skb;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* over memory limit, no point to try to allocate a new skb */
|
||||
return false;
|
||||
}
|
||||
|
||||
skb = __mptcp_do_alloc_tx_skb(sk, gfp);
|
||||
if (!skb)
|
||||
return false;
|
||||
@ -1296,7 +1230,6 @@ static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp)
|
||||
static bool mptcp_must_reclaim_memory(struct sock *sk, struct sock *ssk)
|
||||
{
|
||||
return !ssk->sk_tx_skb_cache &&
|
||||
!skb_peek(&mptcp_sk(sk)->skb_tx_cache) &&
|
||||
tcp_under_memory_pressure(sk);
|
||||
}
|
||||
|
||||
@ -1339,7 +1272,6 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
|
||||
/* compute send limit */
|
||||
info->mss_now = tcp_send_mss(ssk, &info->size_goal, info->flags);
|
||||
avail_size = info->size_goal;
|
||||
msk->size_goal_cache = info->size_goal;
|
||||
skb = tcp_write_queue_tail(ssk);
|
||||
if (skb) {
|
||||
/* Limit the write to the size available in the
|
||||
@ -1688,7 +1620,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
|
||||
while (msg_data_left(msg)) {
|
||||
int total_ts, frag_truesize = 0;
|
||||
struct mptcp_data_frag *dfrag;
|
||||
struct sk_buff_head skbs;
|
||||
bool dfrag_collapsed;
|
||||
size_t psize, offset;
|
||||
|
||||
@ -1721,16 +1652,10 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
|
||||
psize = pfrag->size - offset;
|
||||
psize = min_t(size_t, psize, msg_data_left(msg));
|
||||
total_ts = psize + frag_truesize;
|
||||
__skb_queue_head_init(&skbs);
|
||||
if (!mptcp_tx_cache_refill(sk, psize, &skbs, &total_ts))
|
||||
|
||||
if (!mptcp_wmem_alloc(sk, total_ts))
|
||||
goto wait_for_memory;
|
||||
|
||||
if (!mptcp_wmem_alloc(sk, total_ts)) {
|
||||
__skb_queue_purge(&skbs);
|
||||
goto wait_for_memory;
|
||||
}
|
||||
|
||||
skb_queue_splice_tail(&skbs, &msk->skb_tx_cache);
|
||||
if (copy_page_from_iter(dfrag->page, offset, psize,
|
||||
&msg->msg_iter) != psize) {
|
||||
mptcp_wmem_uncharge(sk, psize + frag_truesize);
|
||||
@ -1787,7 +1712,7 @@ static void mptcp_wait_data(struct sock *sk, long *timeo)
|
||||
sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
|
||||
|
||||
sk_wait_event(sk, timeo,
|
||||
test_and_clear_bit(MPTCP_DATA_READY, &msk->flags), &wait);
|
||||
test_bit(MPTCP_DATA_READY, &msk->flags), &wait);
|
||||
|
||||
sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
|
||||
remove_wait_queue(sk_sleep(sk), &wait);
|
||||
@ -2111,10 +2036,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
|
||||
*/
|
||||
if (unlikely(__mptcp_move_skbs(msk)))
|
||||
set_bit(MPTCP_DATA_READY, &msk->flags);
|
||||
} else if (unlikely(!test_bit(MPTCP_DATA_READY, &msk->flags))) {
|
||||
/* data to read but mptcp_wait_data() cleared DATA_READY */
|
||||
set_bit(MPTCP_DATA_READY, &msk->flags);
|
||||
}
|
||||
|
||||
out_err:
|
||||
if (cmsg_flags && copied >= 0) {
|
||||
if (cmsg_flags & MPTCP_CMSG_TS)
|
||||
@ -2326,13 +2249,14 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
|
||||
|
||||
list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
|
||||
struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
|
||||
bool slow;
|
||||
|
||||
lock_sock(tcp_sk);
|
||||
slow = lock_sock_fast(tcp_sk);
|
||||
if (tcp_sk->sk_state != TCP_CLOSE) {
|
||||
tcp_send_active_reset(tcp_sk, GFP_ATOMIC);
|
||||
tcp_set_state(tcp_sk, TCP_CLOSE);
|
||||
}
|
||||
release_sock(tcp_sk);
|
||||
unlock_sock_fast(tcp_sk, slow);
|
||||
}
|
||||
|
||||
inet_sk_state_store(sk, TCP_CLOSE);
|
||||
@ -2462,13 +2386,11 @@ static int __mptcp_init_sock(struct sock *sk)
|
||||
INIT_LIST_HEAD(&msk->rtx_queue);
|
||||
INIT_WORK(&msk->work, mptcp_worker);
|
||||
__skb_queue_head_init(&msk->receive_queue);
|
||||
__skb_queue_head_init(&msk->skb_tx_cache);
|
||||
msk->out_of_order_queue = RB_ROOT;
|
||||
msk->first_pending = NULL;
|
||||
msk->wmem_reserved = 0;
|
||||
msk->rmem_released = 0;
|
||||
msk->tx_pending_data = 0;
|
||||
msk->size_goal_cache = TCP_BASE_MSS;
|
||||
|
||||
msk->ack_hint = NULL;
|
||||
msk->first = NULL;
|
||||
@ -2525,15 +2447,10 @@ static void __mptcp_clear_xmit(struct sock *sk)
|
||||
{
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
struct mptcp_data_frag *dtmp, *dfrag;
|
||||
struct sk_buff *skb;
|
||||
|
||||
WRITE_ONCE(msk->first_pending, NULL);
|
||||
list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list)
|
||||
dfrag_clear(sk, dfrag);
|
||||
while ((skb = __skb_dequeue(&msk->skb_tx_cache)) != NULL) {
|
||||
sk->sk_forward_alloc += skb->truesize;
|
||||
kfree_skb(skb);
|
||||
}
|
||||
}
|
||||
|
||||
static void mptcp_cancel_work(struct sock *sk)
|
||||
|
@ -245,9 +245,7 @@ struct mptcp_sock {
|
||||
struct sk_buff *ooo_last_skb;
|
||||
struct rb_root out_of_order_queue;
|
||||
struct sk_buff_head receive_queue;
|
||||
struct sk_buff_head skb_tx_cache; /* this is wmem accounted */
|
||||
int tx_pending_data;
|
||||
int size_goal_cache;
|
||||
struct list_head conn_list;
|
||||
struct list_head rtx_queue;
|
||||
struct mptcp_data_frag *first_pending;
|
||||
|
@ -1046,8 +1046,10 @@ validate_seq:
|
||||
/* we revalidate valid mapping on new skb, because we must ensure
|
||||
* the current skb is completely covered by the available mapping
|
||||
*/
|
||||
if (!validate_mapping(ssk, skb))
|
||||
if (!validate_mapping(ssk, skb)) {
|
||||
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSTCPMISMATCH);
|
||||
return MAPPING_INVALID;
|
||||
}
|
||||
|
||||
skb_ext_del(skb, SKB_EXT_MPTCP);
|
||||
|
||||
|
@ -680,6 +680,25 @@ run_tests_peekmode()
|
||||
run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}"
|
||||
}
|
||||
|
||||
display_time()
|
||||
{
|
||||
time_end=$(date +%s)
|
||||
time_run=$((time_end-time_start))
|
||||
|
||||
echo "Time: ${time_run} seconds"
|
||||
}
|
||||
|
||||
stop_if_error()
|
||||
{
|
||||
local msg="$1"
|
||||
|
||||
if [ ${ret} -ne 0 ]; then
|
||||
echo "FAIL: ${msg}" 1>&2
|
||||
display_time
|
||||
exit ${ret}
|
||||
fi
|
||||
}
|
||||
|
||||
make_file "$cin" "client"
|
||||
make_file "$sin" "server"
|
||||
|
||||
@ -687,6 +706,8 @@ check_mptcp_disabled
|
||||
|
||||
check_mptcp_ulp_setsockopt
|
||||
|
||||
stop_if_error "The kernel configuration is not valid for MPTCP"
|
||||
|
||||
echo "INFO: validating network environment with pings"
|
||||
for sender in "$ns1" "$ns2" "$ns3" "$ns4";do
|
||||
do_ping "$ns1" $sender 10.0.1.1
|
||||
@ -706,6 +727,8 @@ for sender in "$ns1" "$ns2" "$ns3" "$ns4";do
|
||||
do_ping "$ns4" $sender dead:beef:3::1
|
||||
done
|
||||
|
||||
stop_if_error "Could not even run ping tests"
|
||||
|
||||
[ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss delay ${tc_delay}ms
|
||||
echo -n "INFO: Using loss of $tc_loss "
|
||||
test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms "
|
||||
@ -733,18 +756,13 @@ echo "on ns3eth4"
|
||||
|
||||
tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${reorder_delay}ms $tc_reorder
|
||||
|
||||
for sender in $ns1 $ns2 $ns3 $ns4;do
|
||||
run_tests_lo "$ns1" "$sender" 10.0.1.1 1
|
||||
if [ $ret -ne 0 ] ;then
|
||||
echo "FAIL: Could not even run loopback test" 1>&2
|
||||
exit $ret
|
||||
fi
|
||||
run_tests_lo "$ns1" $sender dead:beef:1::1 1
|
||||
if [ $ret -ne 0 ] ;then
|
||||
echo "FAIL: Could not even run loopback v6 test" 2>&1
|
||||
exit $ret
|
||||
fi
|
||||
run_tests_lo "$ns1" "$ns1" 10.0.1.1 1
|
||||
stop_if_error "Could not even run loopback test"
|
||||
|
||||
run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1
|
||||
stop_if_error "Could not even run loopback v6 test"
|
||||
|
||||
for sender in $ns1 $ns2 $ns3 $ns4;do
|
||||
# ns1<->ns2 is not subject to reordering/tc delays. Use it to test
|
||||
# mptcp syncookie support.
|
||||
if [ $sender = $ns1 ]; then
|
||||
@ -753,6 +771,9 @@ for sender in $ns1 $ns2 $ns3 $ns4;do
|
||||
ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=1
|
||||
fi
|
||||
|
||||
run_tests "$ns1" $sender 10.0.1.1
|
||||
run_tests "$ns1" $sender dead:beef:1::1
|
||||
|
||||
run_tests "$ns2" $sender 10.0.1.2
|
||||
run_tests "$ns2" $sender dead:beef:1::2
|
||||
run_tests "$ns2" $sender 10.0.2.1
|
||||
@ -765,14 +786,13 @@ for sender in $ns1 $ns2 $ns3 $ns4;do
|
||||
|
||||
run_tests "$ns4" $sender 10.0.3.1
|
||||
run_tests "$ns4" $sender dead:beef:3::1
|
||||
|
||||
stop_if_error "Tests with $sender as a sender have failed"
|
||||
done
|
||||
|
||||
run_tests_peekmode "saveWithPeek"
|
||||
run_tests_peekmode "saveAfterPeek"
|
||||
stop_if_error "Tests with peek mode have failed"
|
||||
|
||||
time_end=$(date +%s)
|
||||
time_run=$((time_end-time_start))
|
||||
|
||||
echo "Time: ${time_run} seconds"
|
||||
|
||||
display_time
|
||||
exit $ret
|
||||
|
Loading…
x
Reference in New Issue
Block a user