diff --git a/include/trace/events/net_probe_common.h b/include/trace/events/net_probe_common.h new file mode 100644 index 000000000000..3930119cab08 --- /dev/null +++ b/include/trace/events/net_probe_common.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#if !defined(_TRACE_NET_PROBE_COMMON_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_NET_PROBE_COMMON_H + +#define TP_STORE_ADDR_PORTS_V4(__entry, inet, sk) \ + do { \ + struct sockaddr_in *v4 = (void *)__entry->saddr; \ + \ + v4->sin_family = AF_INET; \ + v4->sin_port = inet->inet_sport; \ + v4->sin_addr.s_addr = inet->inet_saddr; \ + v4 = (void *)__entry->daddr; \ + v4->sin_family = AF_INET; \ + v4->sin_port = inet->inet_dport; \ + v4->sin_addr.s_addr = inet->inet_daddr; \ + } while (0) + +#if IS_ENABLED(CONFIG_IPV6) + +#define TP_STORE_ADDR_PORTS(__entry, inet, sk) \ + do { \ + if (sk->sk_family == AF_INET6) { \ + struct sockaddr_in6 *v6 = (void *)__entry->saddr; \ + \ + v6->sin6_family = AF_INET6; \ + v6->sin6_port = inet->inet_sport; \ + v6->sin6_addr = inet6_sk(sk)->saddr; \ + v6 = (void *)__entry->daddr; \ + v6->sin6_family = AF_INET6; \ + v6->sin6_port = inet->inet_dport; \ + v6->sin6_addr = sk->sk_v6_daddr; \ + } else \ + TP_STORE_ADDR_PORTS_V4(__entry, inet, sk); \ + } while (0) + +#else + +#define TP_STORE_ADDR_PORTS(__entry, inet, sk) \ + TP_STORE_ADDR_PORTS_V4(__entry, inet, sk); + +#endif + +#endif diff --git a/include/trace/events/sctp.h b/include/trace/events/sctp.h new file mode 100644 index 000000000000..7475c7be165a --- /dev/null +++ b/include/trace/events/sctp.h @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM sctp + +#if !defined(_TRACE_SCTP_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SCTP_H + +#include +#include + +TRACE_EVENT(sctp_probe_path, + + TP_PROTO(struct sctp_transport *sp, + const struct sctp_association *asoc), + + TP_ARGS(sp, asoc), + + TP_STRUCT__entry( + __field(__u64, asoc) + __field(__u32, primary) + __array(__u8, ipaddr, sizeof(union sctp_addr)) + __field(__u32, state) + __field(__u32, cwnd) + __field(__u32, ssthresh) + __field(__u32, flight_size) + __field(__u32, partial_bytes_acked) + __field(__u32, pathmtu) + ), + + TP_fast_assign( + __entry->asoc = (unsigned long)asoc; + __entry->primary = (sp == asoc->peer.primary_path); + memcpy(__entry->ipaddr, &sp->ipaddr, sizeof(union sctp_addr)); + __entry->state = sp->state; + __entry->cwnd = sp->cwnd; + __entry->ssthresh = sp->ssthresh; + __entry->flight_size = sp->flight_size; + __entry->partial_bytes_acked = sp->partial_bytes_acked; + __entry->pathmtu = sp->pathmtu; + ), + + TP_printk("asoc=%#llx%s ipaddr=%pISpc state=%u cwnd=%u ssthresh=%u " + "flight_size=%u partial_bytes_acked=%u pathmtu=%u", + __entry->asoc, __entry->primary ? "(*)" : "", + __entry->ipaddr, __entry->state, __entry->cwnd, + __entry->ssthresh, __entry->flight_size, + __entry->partial_bytes_acked, __entry->pathmtu) +); + +TRACE_EVENT(sctp_probe, + + TP_PROTO(const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + struct sctp_chunk *chunk), + + TP_ARGS(ep, asoc, chunk), + + TP_STRUCT__entry( + __field(__u64, asoc) + __field(__u32, mark) + __field(__u16, bind_port) + __field(__u16, peer_port) + __field(__u32, pathmtu) + __field(__u32, rwnd) + __field(__u16, unack_data) + ), + + TP_fast_assign( + struct sk_buff *skb = chunk->skb; + + __entry->asoc = (unsigned long)asoc; + __entry->mark = skb->mark; + __entry->bind_port = ep->base.bind_addr.port; + __entry->peer_port = asoc->peer.port; + __entry->pathmtu = asoc->pathmtu; + __entry->rwnd = asoc->peer.rwnd; + __entry->unack_data = asoc->unack_data; + + if (trace_sctp_probe_path_enabled()) { + struct sctp_transport *sp; + + list_for_each_entry(sp, &asoc->peer.transport_addr_list, + transports) { + trace_sctp_probe_path(sp, asoc); + } + } + ), + + TP_printk("asoc=%#llx mark=%#x bind_port=%d peer_port=%d pathmtu=%d " + "rwnd=%u unack_data=%d", + __entry->asoc, __entry->mark, __entry->bind_port, + __entry->peer_port, __entry->pathmtu, __entry->rwnd, + __entry->unack_data) +); + +#endif /* _TRACE_SCTP_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index bb00459d2d4d..878b2be7ce77 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM tcp @@ -8,6 +9,7 @@ #include #include #include +#include #define TP_STORE_V4MAPPED(__entry, saddr, daddr) \ do { \ @@ -254,6 +256,64 @@ TRACE_EVENT(tcp_retransmit_synack, __entry->saddr_v6, __entry->daddr_v6) ); +#include + +TRACE_EVENT(tcp_probe, + + TP_PROTO(struct sock *sk, struct sk_buff *skb), + + TP_ARGS(sk, skb), + + TP_STRUCT__entry( + /* sockaddr_in6 is always bigger than sockaddr_in */ + __array(__u8, saddr, sizeof(struct sockaddr_in6)) + __array(__u8, daddr, sizeof(struct sockaddr_in6)) + __field(__u16, sport) + __field(__u16, dport) + __field(__u32, mark) + __field(__u16, length) + __field(__u32, snd_nxt) + __field(__u32, snd_una) + __field(__u32, snd_cwnd) + __field(__u32, ssthresh) + __field(__u32, snd_wnd) + __field(__u32, srtt) + __field(__u32, rcv_wnd) + ), + + TP_fast_assign( + const struct tcp_sock *tp = tcp_sk(sk); + const struct inet_sock *inet = inet_sk(sk); + + memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); + memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); + + TP_STORE_ADDR_PORTS(__entry, inet, sk); + + /* For filtering use */ + __entry->sport = ntohs(inet->inet_sport); + __entry->dport = ntohs(inet->inet_dport); + __entry->mark = skb->mark; + + __entry->length = skb->len; + __entry->snd_nxt = tp->snd_nxt; + __entry->snd_una = tp->snd_una; + __entry->snd_cwnd = tp->snd_cwnd; + __entry->snd_wnd = tp->snd_wnd; + __entry->rcv_wnd = tp->rcv_wnd; + __entry->ssthresh = tcp_current_ssthresh(sk); + __entry->srtt = tp->srtt_us >> 3; + ), + + TP_printk("src=%pISpc dest=%pISpc mark=%#x length=%d snd_nxt=%#x " + "snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u " + "rcv_wnd=%u", + __entry->saddr, __entry->daddr, __entry->mark, + __entry->length, __entry->snd_nxt, __entry->snd_una, + __entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd, + __entry->srtt, __entry->rcv_wnd) +); + #endif /* _TRACE_TCP_H */ /* This part must be outside protection */ diff --git a/net/Kconfig b/net/Kconfig index 9dba2715919d..efe930db3c08 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -336,23 +336,6 @@ config NET_PKTGEN To compile this code as a module, choose M here: the module will be called pktgen. -config NET_TCPPROBE - tristate "TCP connection probing" - depends on INET && PROC_FS && KPROBES - ---help--- - This module allows for capturing the changes to TCP connection - state in response to incoming packets. It is used for debugging - TCP congestion avoidance modules. If you don't understand - what was just said, you don't need it: say N. - - Documentation on how to use TCP connection probing can be found - at: - - http://www.linuxfoundation.org/collaborate/workgroups/networking/tcpprobe - - To compile this code as a module, choose M here: the - module will be called tcp_probe. - config NET_DROP_MONITOR tristate "Network packet drop alerting service" depends on INET && TRACEPOINTS diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig index 8c0ef71bed2f..b270e84d9c13 100644 --- a/net/dccp/Kconfig +++ b/net/dccp/Kconfig @@ -39,23 +39,6 @@ config IP_DCCP_DEBUG Just say N. -config NET_DCCPPROBE - tristate "DCCP connection probing" - depends on PROC_FS && KPROBES - ---help--- - This module allows for capturing the changes to DCCP connection - state in response to incoming packets. It is used for debugging - DCCP congestion avoidance modules. If you don't understand - what was just said, you don't need it: say N. - - Documentation on how to use DCCP connection probing can be found - at: - - http://www.linuxfoundation.org/collaborate/workgroups/networking/dccpprobe - - To compile this code as a module, choose M here: the - module will be called dccp_probe. - endmenu diff --git a/net/dccp/Makefile b/net/dccp/Makefile index 2e7b56097bc4..5b4ff37bc806 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile @@ -21,9 +21,10 @@ obj-$(subst y,$(CONFIG_IP_DCCP),$(CONFIG_IPV6)) += dccp_ipv6.o dccp_ipv6-y := ipv6.o obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o -obj-$(CONFIG_NET_DCCPPROBE) += dccp_probe.o dccp-$(CONFIG_SYSCTL) += sysctl.o dccp_diag-y := diag.o -dccp_probe-y := probe.o + +# build with local directory for trace.h +CFLAGS_proto.o := -I$(src) diff --git a/net/dccp/probe.c b/net/dccp/probe.c deleted file mode 100644 index 3d3fda05b32d..000000000000 --- a/net/dccp/probe.c +++ /dev/null @@ -1,203 +0,0 @@ -/* - * dccp_probe - Observe the DCCP flow with kprobes. - * - * The idea for this came from Werner Almesberger's umlsim - * Copyright (C) 2004, Stephen Hemminger - * - * Modified for DCCP from Stephen Hemminger's code - * Copyright (C) 2006, Ian McDonald - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "dccp.h" -#include "ccid.h" -#include "ccids/ccid3.h" - -static int port; - -static int bufsize = 64 * 1024; - -static const char procname[] = "dccpprobe"; - -static struct { - struct kfifo fifo; - spinlock_t lock; - wait_queue_head_t wait; - struct timespec64 tstart; -} dccpw; - -static void printl(const char *fmt, ...) -{ - va_list args; - int len; - struct timespec64 now; - char tbuf[256]; - - va_start(args, fmt); - getnstimeofday64(&now); - - now = timespec64_sub(now, dccpw.tstart); - - len = sprintf(tbuf, "%lu.%06lu ", - (unsigned long) now.tv_sec, - (unsigned long) now.tv_nsec / NSEC_PER_USEC); - len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args); - va_end(args); - - kfifo_in_locked(&dccpw.fifo, tbuf, len, &dccpw.lock); - wake_up(&dccpw.wait); -} - -static int jdccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) -{ - const struct inet_sock *inet = inet_sk(sk); - struct ccid3_hc_tx_sock *hc = NULL; - - if (ccid_get_current_tx_ccid(dccp_sk(sk)) == DCCPC_CCID3) - hc = ccid3_hc_tx_sk(sk); - - if (port == 0 || ntohs(inet->inet_dport) == port || - ntohs(inet->inet_sport) == port) { - if (hc) - printl("%pI4:%u %pI4:%u %d %d %d %d %u %llu %llu %d\n", - &inet->inet_saddr, ntohs(inet->inet_sport), - &inet->inet_daddr, ntohs(inet->inet_dport), size, - hc->tx_s, hc->tx_rtt, hc->tx_p, - hc->tx_x_calc, hc->tx_x_recv >> 6, - hc->tx_x >> 6, hc->tx_t_ipi); - else - printl("%pI4:%u %pI4:%u %d\n", - &inet->inet_saddr, ntohs(inet->inet_sport), - &inet->inet_daddr, ntohs(inet->inet_dport), - size); - } - - jprobe_return(); - return 0; -} - -static struct jprobe dccp_send_probe = { - .kp = { - .symbol_name = "dccp_sendmsg", - }, - .entry = jdccp_sendmsg, -}; - -static int dccpprobe_open(struct inode *inode, struct file *file) -{ - kfifo_reset(&dccpw.fifo); - getnstimeofday64(&dccpw.tstart); - return 0; -} - -static ssize_t dccpprobe_read(struct file *file, char __user *buf, - size_t len, loff_t *ppos) -{ - int error = 0, cnt = 0; - unsigned char *tbuf; - - if (!buf) - return -EINVAL; - - if (len == 0) - return 0; - - tbuf = vmalloc(len); - if (!tbuf) - return -ENOMEM; - - error = wait_event_interruptible(dccpw.wait, - kfifo_len(&dccpw.fifo) != 0); - if (error) - goto out_free; - - cnt = kfifo_out_locked(&dccpw.fifo, tbuf, len, &dccpw.lock); - error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0; - -out_free: - vfree(tbuf); - - return error ? error : cnt; -} - -static const struct file_operations dccpprobe_fops = { - .owner = THIS_MODULE, - .open = dccpprobe_open, - .read = dccpprobe_read, - .llseek = noop_llseek, -}; - -static __init int dccpprobe_init(void) -{ - int ret = -ENOMEM; - - init_waitqueue_head(&dccpw.wait); - spin_lock_init(&dccpw.lock); - if (kfifo_alloc(&dccpw.fifo, bufsize, GFP_KERNEL)) - return ret; - if (!proc_create(procname, S_IRUSR, init_net.proc_net, &dccpprobe_fops)) - goto err0; - - ret = register_jprobe(&dccp_send_probe); - if (ret) { - ret = request_module("dccp"); - if (!ret) - ret = register_jprobe(&dccp_send_probe); - } - - if (ret) - goto err1; - - pr_info("DCCP watch registered (port=%d)\n", port); - return 0; -err1: - remove_proc_entry(procname, init_net.proc_net); -err0: - kfifo_free(&dccpw.fifo); - return ret; -} -module_init(dccpprobe_init); - -static __exit void dccpprobe_exit(void) -{ - kfifo_free(&dccpw.fifo); - remove_proc_entry(procname, init_net.proc_net); - unregister_jprobe(&dccp_send_probe); - -} -module_exit(dccpprobe_exit); - -MODULE_PARM_DESC(port, "Port to match (0=all)"); -module_param(port, int, 0); - -MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)"); -module_param(bufsize, int, 0); - -MODULE_AUTHOR("Ian McDonald "); -MODULE_DESCRIPTION("DCCP snooper"); -MODULE_LICENSE("GPL"); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 7a75a1d3568b..fa7e92e08920 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -38,6 +38,9 @@ #include "dccp.h" #include "feat.h" +#define CREATE_TRACE_POINTS +#include "trace.h" + DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly; EXPORT_SYMBOL_GPL(dccp_statistics); @@ -761,6 +764,8 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int rc, size; long timeo; + trace_dccp_probe(sk, len); + if (len > dp->dccps_mss_cache) return -EMSGSIZE; diff --git a/net/dccp/trace.h b/net/dccp/trace.h new file mode 100644 index 000000000000..5062421beee9 --- /dev/null +++ b/net/dccp/trace.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM dccp + +#if !defined(_TRACE_DCCP_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_DCCP_H + +#include +#include "dccp.h" +#include "ccids/ccid3.h" +#include +#include + +TRACE_EVENT(dccp_probe, + + TP_PROTO(struct sock *sk, size_t size), + + TP_ARGS(sk, size), + + TP_STRUCT__entry( + /* sockaddr_in6 is always bigger than sockaddr_in */ + __array(__u8, saddr, sizeof(struct sockaddr_in6)) + __array(__u8, daddr, sizeof(struct sockaddr_in6)) + __field(__u16, sport) + __field(__u16, dport) + __field(__u16, size) + __field(__u16, tx_s) + __field(__u32, tx_rtt) + __field(__u32, tx_p) + __field(__u32, tx_x_calc) + __field(__u64, tx_x_recv) + __field(__u64, tx_x) + __field(__u32, tx_t_ipi) + ), + + TP_fast_assign( + const struct inet_sock *inet = inet_sk(sk); + struct ccid3_hc_tx_sock *hc = NULL; + + if (ccid_get_current_tx_ccid(dccp_sk(sk)) == DCCPC_CCID3) + hc = ccid3_hc_tx_sk(sk); + + memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); + memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); + + TP_STORE_ADDR_PORTS(__entry, inet, sk); + + /* For filtering use */ + __entry->sport = ntohs(inet->inet_sport); + __entry->dport = ntohs(inet->inet_dport); + + __entry->size = size; + if (hc) { + __entry->tx_s = hc->tx_s; + __entry->tx_rtt = hc->tx_rtt; + __entry->tx_p = hc->tx_p; + __entry->tx_x_calc = hc->tx_x_calc; + __entry->tx_x_recv = hc->tx_x_recv >> 6; + __entry->tx_x = hc->tx_x >> 6; + __entry->tx_t_ipi = hc->tx_t_ipi; + } else { + __entry->tx_s = 0; + memset(&__entry->tx_rtt, 0, (void *)&__entry->tx_t_ipi - + (void *)&__entry->tx_rtt + + sizeof(__entry->tx_t_ipi)); + } + ), + + TP_printk("src=%pISpc dest=%pISpc size=%d tx_s=%d tx_rtt=%d " + "tx_p=%d tx_x_calc=%u tx_x_recv=%llu tx_x=%llu tx_t_ipi=%d", + __entry->saddr, __entry->daddr, __entry->size, + __entry->tx_s, __entry->tx_rtt, __entry->tx_p, + __entry->tx_x_calc, __entry->tx_x_recv, __entry->tx_x, + __entry->tx_t_ipi) +); + +#endif /* _TRACE_TCP_H */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace +#include diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index c6c8ad1d4b6d..47a0a6649a9d 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -43,7 +43,6 @@ obj-$(CONFIG_INET_DIAG) += inet_diag.o obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o obj-$(CONFIG_INET_RAW_DIAG) += raw_diag.o -obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o obj-$(CONFIG_TCP_CONG_BBR) += tcp_bbr.o obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o obj-$(CONFIG_TCP_CONG_CDG) += tcp_cdg.o diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 4d55c4b338ee..ff71b18d9682 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5299,6 +5299,9 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, unsigned int len = skb->len; struct tcp_sock *tp = tcp_sk(sk); + /* TCP congestion window tracking */ + trace_tcp_probe(sk, skb); + tcp_mstamp_refresh(tp); if (unlikely(!sk->sk_rx_dst)) inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb); diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c deleted file mode 100644 index 697f4c67b2e3..000000000000 --- a/net/ipv4/tcp_probe.c +++ /dev/null @@ -1,301 +0,0 @@ -/* - * tcpprobe - Observe the TCP flow with kprobes. - * - * The idea for this came from Werner Almesberger's umlsim - * Copyright (C) 2004, Stephen Hemminger - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -MODULE_AUTHOR("Stephen Hemminger "); -MODULE_DESCRIPTION("TCP cwnd snooper"); -MODULE_LICENSE("GPL"); -MODULE_VERSION("1.1"); - -static int port __read_mostly; -MODULE_PARM_DESC(port, "Port to match (0=all)"); -module_param(port, int, 0); - -static unsigned int bufsize __read_mostly = 4096; -MODULE_PARM_DESC(bufsize, "Log buffer size in packets (4096)"); -module_param(bufsize, uint, 0); - -static unsigned int fwmark __read_mostly; -MODULE_PARM_DESC(fwmark, "skb mark to match (0=no mark)"); -module_param(fwmark, uint, 0); - -static int full __read_mostly; -MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)"); -module_param(full, int, 0); - -static const char procname[] = "tcpprobe"; - -struct tcp_log { - ktime_t tstamp; - union { - struct sockaddr raw; - struct sockaddr_in v4; - struct sockaddr_in6 v6; - } src, dst; - u16 length; - u32 snd_nxt; - u32 snd_una; - u32 snd_wnd; - u32 rcv_wnd; - u32 snd_cwnd; - u32 ssthresh; - u32 srtt; -}; - -static struct { - spinlock_t lock; - wait_queue_head_t wait; - ktime_t start; - u32 lastcwnd; - - unsigned long head, tail; - struct tcp_log *log; -} tcp_probe; - -static inline int tcp_probe_used(void) -{ - return (tcp_probe.head - tcp_probe.tail) & (bufsize - 1); -} - -static inline int tcp_probe_avail(void) -{ - return bufsize - tcp_probe_used() - 1; -} - -#define tcp_probe_copy_fl_to_si4(inet, si4, mem) \ - do { \ - si4.sin_family = AF_INET; \ - si4.sin_port = inet->inet_##mem##port; \ - si4.sin_addr.s_addr = inet->inet_##mem##addr; \ - } while (0) \ - -/* - * Hook inserted to be called before each receive packet. - * Note: arguments must match tcp_rcv_established()! - */ -static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, - const struct tcphdr *th) -{ - unsigned int len = skb->len; - const struct tcp_sock *tp = tcp_sk(sk); - const struct inet_sock *inet = inet_sk(sk); - - /* Only update if port or skb mark matches */ - if (((port == 0 && fwmark == 0) || - ntohs(inet->inet_dport) == port || - ntohs(inet->inet_sport) == port || - (fwmark > 0 && skb->mark == fwmark)) && - (full || tp->snd_cwnd != tcp_probe.lastcwnd)) { - - spin_lock(&tcp_probe.lock); - /* If log fills, just silently drop */ - if (tcp_probe_avail() > 1) { - struct tcp_log *p = tcp_probe.log + tcp_probe.head; - - p->tstamp = ktime_get(); - switch (sk->sk_family) { - case AF_INET: - tcp_probe_copy_fl_to_si4(inet, p->src.v4, s); - tcp_probe_copy_fl_to_si4(inet, p->dst.v4, d); - break; - case AF_INET6: - memset(&p->src.v6, 0, sizeof(p->src.v6)); - memset(&p->dst.v6, 0, sizeof(p->dst.v6)); -#if IS_ENABLED(CONFIG_IPV6) - p->src.v6.sin6_family = AF_INET6; - p->src.v6.sin6_port = inet->inet_sport; - p->src.v6.sin6_addr = inet6_sk(sk)->saddr; - - p->dst.v6.sin6_family = AF_INET6; - p->dst.v6.sin6_port = inet->inet_dport; - p->dst.v6.sin6_addr = sk->sk_v6_daddr; -#endif - break; - default: - BUG(); - } - - p->length = len; - p->snd_nxt = tp->snd_nxt; - p->snd_una = tp->snd_una; - p->snd_cwnd = tp->snd_cwnd; - p->snd_wnd = tp->snd_wnd; - p->rcv_wnd = tp->rcv_wnd; - p->ssthresh = tcp_current_ssthresh(sk); - p->srtt = tp->srtt_us >> 3; - - tcp_probe.head = (tcp_probe.head + 1) & (bufsize - 1); - } - tcp_probe.lastcwnd = tp->snd_cwnd; - spin_unlock(&tcp_probe.lock); - - wake_up(&tcp_probe.wait); - } - - jprobe_return(); -} - -static struct jprobe tcp_jprobe = { - .kp = { - .symbol_name = "tcp_rcv_established", - }, - .entry = jtcp_rcv_established, -}; - -static int tcpprobe_open(struct inode *inode, struct file *file) -{ - /* Reset (empty) log */ - spin_lock_bh(&tcp_probe.lock); - tcp_probe.head = tcp_probe.tail = 0; - tcp_probe.start = ktime_get(); - spin_unlock_bh(&tcp_probe.lock); - - return 0; -} - -static int tcpprobe_sprint(char *tbuf, int n) -{ - const struct tcp_log *p - = tcp_probe.log + tcp_probe.tail; - struct timespec64 ts - = ktime_to_timespec64(ktime_sub(p->tstamp, tcp_probe.start)); - - return scnprintf(tbuf, n, - "%lu.%09lu %pISpc %pISpc %d %#x %#x %u %u %u %u %u\n", - (unsigned long)ts.tv_sec, - (unsigned long)ts.tv_nsec, - &p->src, &p->dst, p->length, p->snd_nxt, p->snd_una, - p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt, p->rcv_wnd); -} - -static ssize_t tcpprobe_read(struct file *file, char __user *buf, - size_t len, loff_t *ppos) -{ - int error = 0; - size_t cnt = 0; - - if (!buf) - return -EINVAL; - - while (cnt < len) { - char tbuf[256]; - int width; - - /* Wait for data in buffer */ - error = wait_event_interruptible(tcp_probe.wait, - tcp_probe_used() > 0); - if (error) - break; - - spin_lock_bh(&tcp_probe.lock); - if (tcp_probe.head == tcp_probe.tail) { - /* multiple readers race? */ - spin_unlock_bh(&tcp_probe.lock); - continue; - } - - width = tcpprobe_sprint(tbuf, sizeof(tbuf)); - - if (cnt + width < len) - tcp_probe.tail = (tcp_probe.tail + 1) & (bufsize - 1); - - spin_unlock_bh(&tcp_probe.lock); - - /* if record greater than space available - return partial buffer (so far) */ - if (cnt + width >= len) - break; - - if (copy_to_user(buf + cnt, tbuf, width)) - return -EFAULT; - cnt += width; - } - - return cnt == 0 ? error : cnt; -} - -static const struct file_operations tcpprobe_fops = { - .owner = THIS_MODULE, - .open = tcpprobe_open, - .read = tcpprobe_read, - .llseek = noop_llseek, -}; - -static __init int tcpprobe_init(void) -{ - int ret = -ENOMEM; - - /* Warning: if the function signature of tcp_rcv_established, - * has been changed, you also have to change the signature of - * jtcp_rcv_established, otherwise you end up right here! - */ - BUILD_BUG_ON(__same_type(tcp_rcv_established, - jtcp_rcv_established) == 0); - - init_waitqueue_head(&tcp_probe.wait); - spin_lock_init(&tcp_probe.lock); - - if (bufsize == 0) - return -EINVAL; - - bufsize = roundup_pow_of_two(bufsize); - tcp_probe.log = kcalloc(bufsize, sizeof(struct tcp_log), GFP_KERNEL); - if (!tcp_probe.log) - goto err0; - - if (!proc_create(procname, S_IRUSR, init_net.proc_net, &tcpprobe_fops)) - goto err0; - - ret = register_jprobe(&tcp_jprobe); - if (ret) - goto err1; - - pr_info("probe registered (port=%d/fwmark=%u) bufsize=%u\n", - port, fwmark, bufsize); - return 0; - err1: - remove_proc_entry(procname, init_net.proc_net); - err0: - kfree(tcp_probe.log); - return ret; -} -module_init(tcpprobe_init); - -static __exit void tcpprobe_exit(void) -{ - remove_proc_entry(procname, init_net.proc_net); - unregister_jprobe(&tcp_jprobe); - kfree(tcp_probe.log); -} -module_exit(tcpprobe_exit); diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index d9c04dc1b3f3..c740b189d4ba 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -37,18 +37,6 @@ menuconfig IP_SCTP if IP_SCTP -config NET_SCTPPROBE - tristate "SCTP: Association probing" - depends on PROC_FS && KPROBES - ---help--- - This module allows for capturing the changes to SCTP association - state in response to incoming packets. It is used for debugging - SCTP congestion control algorithms. If you don't understand - what was just said, you don't need it: say N. - - To compile this code as a module, choose M here: the - module will be called sctp_probe. - config SCTP_DBG_OBJCNT bool "SCTP: Debug object counts" depends on PROC_FS diff --git a/net/sctp/Makefile b/net/sctp/Makefile index 54bd9c1a8aa1..6776582ec449 100644 --- a/net/sctp/Makefile +++ b/net/sctp/Makefile @@ -4,7 +4,6 @@ # obj-$(CONFIG_IP_SCTP) += sctp.o -obj-$(CONFIG_NET_SCTPPROBE) += sctp_probe.o obj-$(CONFIG_INET_SCTP_DIAG) += sctp_diag.o sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ @@ -16,8 +15,6 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ offload.o stream_sched.o stream_sched_prio.o \ stream_sched_rr.o stream_interleave.o -sctp_probe-y := probe.o - sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o sctp-$(CONFIG_PROC_FS) += proc.o sctp-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/sctp/probe.c b/net/sctp/probe.c deleted file mode 100644 index 1280f85a598d..000000000000 --- a/net/sctp/probe.c +++ /dev/null @@ -1,244 +0,0 @@ -/* - * sctp_probe - Observe the SCTP flow with kprobes. - * - * The idea for this came from Werner Almesberger's umlsim - * Copyright (C) 2004, Stephen Hemminger - * - * Modified for SCTP from Stephen Hemminger's code - * Copyright (C) 2010, Wei Yongjun - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -MODULE_SOFTDEP("pre: sctp"); -MODULE_AUTHOR("Wei Yongjun "); -MODULE_DESCRIPTION("SCTP snooper"); -MODULE_LICENSE("GPL"); - -static int port __read_mostly = 0; -MODULE_PARM_DESC(port, "Port to match (0=all)"); -module_param(port, int, 0); - -static unsigned int fwmark __read_mostly = 0; -MODULE_PARM_DESC(fwmark, "skb mark to match (0=no mark)"); -module_param(fwmark, uint, 0); - -static int bufsize __read_mostly = 64 * 1024; -MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)"); -module_param(bufsize, int, 0); - -static int full __read_mostly = 1; -MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)"); -module_param(full, int, 0); - -static const char procname[] = "sctpprobe"; - -static struct { - struct kfifo fifo; - spinlock_t lock; - wait_queue_head_t wait; - struct timespec64 tstart; -} sctpw; - -static __printf(1, 2) void printl(const char *fmt, ...) -{ - va_list args; - int len; - char tbuf[256]; - - va_start(args, fmt); - len = vscnprintf(tbuf, sizeof(tbuf), fmt, args); - va_end(args); - - kfifo_in_locked(&sctpw.fifo, tbuf, len, &sctpw.lock); - wake_up(&sctpw.wait); -} - -static int sctpprobe_open(struct inode *inode, struct file *file) -{ - kfifo_reset(&sctpw.fifo); - ktime_get_ts64(&sctpw.tstart); - - return 0; -} - -static ssize_t sctpprobe_read(struct file *file, char __user *buf, - size_t len, loff_t *ppos) -{ - int error = 0, cnt = 0; - unsigned char *tbuf; - - if (!buf) - return -EINVAL; - - if (len == 0) - return 0; - - tbuf = vmalloc(len); - if (!tbuf) - return -ENOMEM; - - error = wait_event_interruptible(sctpw.wait, - kfifo_len(&sctpw.fifo) != 0); - if (error) - goto out_free; - - cnt = kfifo_out_locked(&sctpw.fifo, tbuf, len, &sctpw.lock); - error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0; - -out_free: - vfree(tbuf); - - return error ? error : cnt; -} - -static const struct file_operations sctpprobe_fops = { - .owner = THIS_MODULE, - .open = sctpprobe_open, - .read = sctpprobe_read, - .llseek = noop_llseek, -}; - -static enum sctp_disposition jsctp_sf_eat_sack( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const union sctp_subtype type, - void *arg, - struct sctp_cmd_seq *commands) -{ - struct sctp_chunk *chunk = arg; - struct sk_buff *skb = chunk->skb; - struct sctp_transport *sp; - static __u32 lcwnd = 0; - struct timespec64 now; - - sp = asoc->peer.primary_path; - - if (((port == 0 && fwmark == 0) || - asoc->peer.port == port || - ep->base.bind_addr.port == port || - (fwmark > 0 && skb->mark == fwmark)) && - (full || sp->cwnd != lcwnd)) { - lcwnd = sp->cwnd; - - ktime_get_ts64(&now); - now = timespec64_sub(now, sctpw.tstart); - - printl("%lu.%06lu ", (unsigned long) now.tv_sec, - (unsigned long) now.tv_nsec / NSEC_PER_USEC); - - printl("%p %5d %5d %5d %8d %5d ", asoc, - ep->base.bind_addr.port, asoc->peer.port, - asoc->pathmtu, asoc->peer.rwnd, asoc->unack_data); - - list_for_each_entry(sp, &asoc->peer.transport_addr_list, - transports) { - if (sp == asoc->peer.primary_path) - printl("*"); - - printl("%pISc %2u %8u %8u %8u %8u %8u ", - &sp->ipaddr, sp->state, sp->cwnd, sp->ssthresh, - sp->flight_size, sp->partial_bytes_acked, - sp->pathmtu); - } - printl("\n"); - } - - jprobe_return(); - return 0; -} - -static struct jprobe sctp_recv_probe = { - .kp = { - .symbol_name = "sctp_sf_eat_sack_6_2", - }, - .entry = jsctp_sf_eat_sack, -}; - -static __init int sctp_setup_jprobe(void) -{ - int ret = register_jprobe(&sctp_recv_probe); - - if (ret) { - if (request_module("sctp")) - goto out; - ret = register_jprobe(&sctp_recv_probe); - } - -out: - return ret; -} - -static __init int sctpprobe_init(void) -{ - int ret = -ENOMEM; - - /* Warning: if the function signature of sctp_sf_eat_sack_6_2, - * has been changed, you also have to change the signature of - * jsctp_sf_eat_sack, otherwise you end up right here! - */ - BUILD_BUG_ON(__same_type(sctp_sf_eat_sack_6_2, - jsctp_sf_eat_sack) == 0); - - init_waitqueue_head(&sctpw.wait); - spin_lock_init(&sctpw.lock); - if (kfifo_alloc(&sctpw.fifo, bufsize, GFP_KERNEL)) - return ret; - - if (!proc_create(procname, S_IRUSR, init_net.proc_net, - &sctpprobe_fops)) - goto free_kfifo; - - ret = sctp_setup_jprobe(); - if (ret) - goto remove_proc; - - pr_info("probe registered (port=%d/fwmark=%u) bufsize=%u\n", - port, fwmark, bufsize); - return 0; - -remove_proc: - remove_proc_entry(procname, init_net.proc_net); -free_kfifo: - kfifo_free(&sctpw.fifo); - return ret; -} - -static __exit void sctpprobe_exit(void) -{ - kfifo_free(&sctpw.fifo); - remove_proc_entry(procname, init_net.proc_net); - unregister_jprobe(&sctp_recv_probe); -} - -module_init(sctpprobe_init); -module_exit(sctpprobe_exit); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 541f34735346..eb7905ffe5f2 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -59,6 +59,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include + static struct sctp_packet *sctp_abort_pkt_new( struct net *net, const struct sctp_endpoint *ep, @@ -3219,6 +3222,8 @@ enum sctp_disposition sctp_sf_eat_sack_6_2(struct net *net, struct sctp_sackhdr *sackh; __u32 ctsn; + trace_sctp_probe(ep, asoc, chunk); + if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);