2005-04-17 02:20:36 +04:00
/*
* tcp_diag . c Module for monitoring TCP sockets .
*
* Version : $ Id : tcp_diag . c , v 1.3 2002 / 02 / 01 22 : 01 : 04 davem Exp $
*
* Authors : Alexey Kuznetsov , < kuznet @ ms2 . inr . ac . ru >
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version
* 2 of the License , or ( at your option ) any later version .
*/
# include <linux/config.h>
# include <linux/module.h>
# include <linux/types.h>
# include <linux/fcntl.h>
# include <linux/random.h>
# include <linux/cache.h>
# include <linux/init.h>
# include <linux/time.h>
# include <net/icmp.h>
# include <net/tcp.h>
# include <net/ipv6.h>
# include <net/inet_common.h>
# include <linux/inet.h>
# include <linux/stddef.h>
# include <linux/tcp_diag.h>
struct tcpdiag_entry
{
u32 * saddr ;
u32 * daddr ;
u16 sport ;
u16 dport ;
u16 family ;
u16 userlocks ;
} ;
static struct sock * tcpnl ;
# define TCPDIAG_PUT(skb, attrtype, attrlen) \
2005-06-23 23:20:36 +04:00
RTA_DATA ( __RTA_PUT ( skb , attrtype , attrlen ) )
2005-04-17 02:20:36 +04:00
2005-08-10 12:54:28 +04:00
# if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE)
extern struct inet_hashinfo dccp_hashinfo ;
# endif
2005-04-17 02:20:36 +04:00
static int tcpdiag_fill ( struct sk_buff * skb , struct sock * sk ,
2005-08-10 12:54:28 +04:00
int ext , u32 pid , u32 seq , u16 nlmsg_flags ,
const struct nlmsghdr * unlh )
2005-04-17 02:20:36 +04:00
{
2005-08-10 07:10:42 +04:00
const struct inet_sock * inet = inet_sk ( sk ) ;
const struct inet_connection_sock * icsk = inet_csk ( sk ) ;
2005-04-17 02:20:36 +04:00
struct tcpdiagmsg * r ;
struct nlmsghdr * nlh ;
struct tcp_info * info = NULL ;
struct tcpdiag_meminfo * minfo = NULL ;
unsigned char * b = skb - > tail ;
2005-08-10 12:54:28 +04:00
nlh = NLMSG_PUT ( skb , pid , seq , unlh - > nlmsg_type , sizeof ( * r ) ) ;
2005-04-17 02:20:36 +04:00
nlh - > nlmsg_flags = nlmsg_flags ;
r = NLMSG_DATA ( nlh ) ;
if ( sk - > sk_state ! = TCP_TIME_WAIT ) {
if ( ext & ( 1 < < ( TCPDIAG_MEMINFO - 1 ) ) )
minfo = TCPDIAG_PUT ( skb , TCPDIAG_MEMINFO , sizeof ( * minfo ) ) ;
if ( ext & ( 1 < < ( TCPDIAG_INFO - 1 ) ) )
info = TCPDIAG_PUT ( skb , TCPDIAG_INFO , sizeof ( * info ) ) ;
2005-08-10 11:03:31 +04:00
if ( ( ext & ( 1 < < ( TCPDIAG_CONG - 1 ) ) ) & & icsk - > icsk_ca_ops ) {
size_t len = strlen ( icsk - > icsk_ca_ops - > name ) ;
2005-06-23 23:21:28 +04:00
strcpy ( TCPDIAG_PUT ( skb , TCPDIAG_CONG , len + 1 ) ,
2005-08-10 11:03:31 +04:00
icsk - > icsk_ca_ops - > name ) ;
2005-06-23 23:21:28 +04:00
}
2005-04-17 02:20:36 +04:00
}
r - > tcpdiag_family = sk - > sk_family ;
r - > tcpdiag_state = sk - > sk_state ;
r - > tcpdiag_timer = 0 ;
r - > tcpdiag_retrans = 0 ;
r - > id . tcpdiag_if = sk - > sk_bound_dev_if ;
r - > id . tcpdiag_cookie [ 0 ] = ( u32 ) ( unsigned long ) sk ;
r - > id . tcpdiag_cookie [ 1 ] = ( u32 ) ( ( ( unsigned long ) sk > > 31 ) > > 1 ) ;
if ( r - > tcpdiag_state = = TCP_TIME_WAIT ) {
2005-08-10 07:09:30 +04:00
const struct inet_timewait_sock * tw = inet_twsk ( sk ) ;
2005-04-17 02:20:36 +04:00
long tmo = tw - > tw_ttd - jiffies ;
if ( tmo < 0 )
tmo = 0 ;
r - > id . tcpdiag_sport = tw - > tw_sport ;
r - > id . tcpdiag_dport = tw - > tw_dport ;
r - > id . tcpdiag_src [ 0 ] = tw - > tw_rcv_saddr ;
r - > id . tcpdiag_dst [ 0 ] = tw - > tw_daddr ;
r - > tcpdiag_state = tw - > tw_substate ;
r - > tcpdiag_timer = 3 ;
r - > tcpdiag_expires = ( tmo * 1000 + HZ - 1 ) / HZ ;
r - > tcpdiag_rqueue = 0 ;
r - > tcpdiag_wqueue = 0 ;
r - > tcpdiag_uid = 0 ;
r - > tcpdiag_inode = 0 ;
# ifdef CONFIG_IP_TCPDIAG_IPV6
if ( r - > tcpdiag_family = = AF_INET6 ) {
2005-08-10 07:09:30 +04:00
const struct tcp6_timewait_sock * tcp6tw = tcp6_twsk ( sk ) ;
2005-04-17 02:20:36 +04:00
ipv6_addr_copy ( ( struct in6_addr * ) r - > id . tcpdiag_src ,
2005-08-10 07:09:30 +04:00
& tcp6tw - > tw_v6_rcv_saddr ) ;
2005-04-17 02:20:36 +04:00
ipv6_addr_copy ( ( struct in6_addr * ) r - > id . tcpdiag_dst ,
2005-08-10 07:09:30 +04:00
& tcp6tw - > tw_v6_daddr ) ;
2005-04-17 02:20:36 +04:00
}
# endif
nlh - > nlmsg_len = skb - > tail - b ;
return skb - > len ;
}
r - > id . tcpdiag_sport = inet - > sport ;
r - > id . tcpdiag_dport = inet - > dport ;
r - > id . tcpdiag_src [ 0 ] = inet - > rcv_saddr ;
r - > id . tcpdiag_dst [ 0 ] = inet - > daddr ;
# ifdef CONFIG_IP_TCPDIAG_IPV6
if ( r - > tcpdiag_family = = AF_INET6 ) {
struct ipv6_pinfo * np = inet6_sk ( sk ) ;
ipv6_addr_copy ( ( struct in6_addr * ) r - > id . tcpdiag_src ,
& np - > rcv_saddr ) ;
ipv6_addr_copy ( ( struct in6_addr * ) r - > id . tcpdiag_dst ,
& np - > daddr ) ;
}
# endif
# define EXPIRES_IN_MS(tmo) ((tmo-jiffies)*1000+HZ-1) / HZ
2005-08-10 07:10:42 +04:00
if ( icsk - > icsk_pending = = ICSK_TIME_RETRANS ) {
2005-04-17 02:20:36 +04:00
r - > tcpdiag_timer = 1 ;
2005-08-10 07:10:42 +04:00
r - > tcpdiag_retrans = icsk - > icsk_retransmits ;
r - > tcpdiag_expires = EXPIRES_IN_MS ( icsk - > icsk_timeout ) ;
} else if ( icsk - > icsk_pending = = ICSK_TIME_PROBE0 ) {
2005-04-17 02:20:36 +04:00
r - > tcpdiag_timer = 4 ;
2005-08-10 11:03:31 +04:00
r - > tcpdiag_retrans = icsk - > icsk_probes_out ;
2005-08-10 07:10:42 +04:00
r - > tcpdiag_expires = EXPIRES_IN_MS ( icsk - > icsk_timeout ) ;
2005-04-17 02:20:36 +04:00
} else if ( timer_pending ( & sk - > sk_timer ) ) {
r - > tcpdiag_timer = 2 ;
2005-08-10 11:03:31 +04:00
r - > tcpdiag_retrans = icsk - > icsk_probes_out ;
2005-04-17 02:20:36 +04:00
r - > tcpdiag_expires = EXPIRES_IN_MS ( sk - > sk_timer . expires ) ;
} else {
r - > tcpdiag_timer = 0 ;
r - > tcpdiag_expires = 0 ;
}
# undef EXPIRES_IN_MS
2005-08-10 12:54:28 +04:00
/*
* Ahem . . . for now we ' ll have some knowledge about TCP - acme
* But this is just one of two small exceptions , both in this
* function , so lets close our eyes for some 15 lines or so . . . 8 )
* - acme
*/
if ( sk - > sk_protocol = = IPPROTO_TCP ) {
const struct tcp_sock * tp = tcp_sk ( sk ) ;
r - > tcpdiag_rqueue = tp - > rcv_nxt - tp - > copied_seq ;
r - > tcpdiag_wqueue = tp - > write_seq - tp - > snd_una ;
} else
r - > tcpdiag_rqueue = r - > tcpdiag_wqueue = 0 ;
2005-04-17 02:20:36 +04:00
r - > tcpdiag_uid = sock_i_uid ( sk ) ;
r - > tcpdiag_inode = sock_i_ino ( sk ) ;
if ( minfo ) {
minfo - > tcpdiag_rmem = atomic_read ( & sk - > sk_rmem_alloc ) ;
minfo - > tcpdiag_wmem = sk - > sk_wmem_queued ;
minfo - > tcpdiag_fmem = sk - > sk_forward_alloc ;
minfo - > tcpdiag_tmem = atomic_read ( & sk - > sk_wmem_alloc ) ;
}
2005-08-10 12:54:28 +04:00
/* Ahem... for now we'll have some knowledge about TCP -acme */
if ( info ) {
if ( sk - > sk_protocol = = IPPROTO_TCP )
tcp_get_info ( sk , info ) ;
else
memset ( info , 0 , sizeof ( * info ) ) ;
}
2005-04-17 02:20:36 +04:00
2005-08-10 11:03:31 +04:00
if ( sk - > sk_state < TCP_TIME_WAIT & &
icsk - > icsk_ca_ops & & icsk - > icsk_ca_ops - > get_info )
icsk - > icsk_ca_ops - > get_info ( sk , ext , skb ) ;
2005-04-17 02:20:36 +04:00
nlh - > nlmsg_len = skb - > tail - b ;
return skb - > len ;
2005-06-23 23:20:36 +04:00
rtattr_failure :
2005-04-17 02:20:36 +04:00
nlmsg_failure :
skb_trim ( skb , b - skb - > data ) ;
return - 1 ;
}
# ifdef CONFIG_IP_TCPDIAG_IPV6
extern struct sock * tcp_v6_lookup ( struct in6_addr * saddr , u16 sport ,
struct in6_addr * daddr , u16 dport ,
int dif ) ;
# else
static inline struct sock * tcp_v6_lookup ( struct in6_addr * saddr , u16 sport ,
struct in6_addr * daddr , u16 dport ,
int dif )
{
return NULL ;
}
# endif
static int tcpdiag_get_exact ( struct sk_buff * in_skb , const struct nlmsghdr * nlh )
{
int err ;
struct sock * sk ;
struct tcpdiagreq * req = NLMSG_DATA ( nlh ) ;
struct sk_buff * rep ;
2005-08-10 12:54:28 +04:00
struct inet_hashinfo * hashinfo = & tcp_hashinfo ;
# if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE)
if ( nlh - > nlmsg_type = = DCCPDIAG_GETSOCK )
hashinfo = & dccp_hashinfo ;
# endif
2005-04-17 02:20:36 +04:00
if ( req - > tcpdiag_family = = AF_INET ) {
2005-08-10 12:54:28 +04:00
sk = inet_lookup ( hashinfo , req - > id . tcpdiag_dst [ 0 ] ,
2005-08-10 07:09:46 +04:00
req - > id . tcpdiag_dport , req - > id . tcpdiag_src [ 0 ] ,
req - > id . tcpdiag_sport , req - > id . tcpdiag_if ) ;
2005-04-17 02:20:36 +04:00
}
# ifdef CONFIG_IP_TCPDIAG_IPV6
else if ( req - > tcpdiag_family = = AF_INET6 ) {
sk = tcp_v6_lookup ( ( struct in6_addr * ) req - > id . tcpdiag_dst , req - > id . tcpdiag_dport ,
( struct in6_addr * ) req - > id . tcpdiag_src , req - > id . tcpdiag_sport ,
req - > id . tcpdiag_if ) ;
}
# endif
else {
return - EINVAL ;
}
if ( sk = = NULL )
return - ENOENT ;
err = - ESTALE ;
if ( ( req - > id . tcpdiag_cookie [ 0 ] ! = TCPDIAG_NOCOOKIE | |
req - > id . tcpdiag_cookie [ 1 ] ! = TCPDIAG_NOCOOKIE ) & &
( ( u32 ) ( unsigned long ) sk ! = req - > id . tcpdiag_cookie [ 0 ] | |
( u32 ) ( ( ( ( unsigned long ) sk ) > > 31 ) > > 1 ) ! = req - > id . tcpdiag_cookie [ 1 ] ) )
goto out ;
err = - ENOMEM ;
rep = alloc_skb ( NLMSG_SPACE ( sizeof ( struct tcpdiagmsg ) +
sizeof ( struct tcpdiag_meminfo ) +
sizeof ( struct tcp_info ) + 64 ) , GFP_KERNEL ) ;
if ( ! rep )
goto out ;
if ( tcpdiag_fill ( rep , sk , req - > tcpdiag_ext ,
NETLINK_CB ( in_skb ) . pid ,
2005-08-10 12:54:28 +04:00
nlh - > nlmsg_seq , 0 , nlh ) < = 0 )
2005-04-17 02:20:36 +04:00
BUG ( ) ;
err = netlink_unicast ( tcpnl , rep , NETLINK_CB ( in_skb ) . pid , MSG_DONTWAIT ) ;
if ( err > 0 )
err = 0 ;
out :
if ( sk ) {
if ( sk - > sk_state = = TCP_TIME_WAIT )
2005-08-10 07:09:30 +04:00
inet_twsk_put ( ( struct inet_timewait_sock * ) sk ) ;
2005-04-17 02:20:36 +04:00
else
sock_put ( sk ) ;
}
return err ;
}
static int bitstring_match ( const u32 * a1 , const u32 * a2 , int bits )
{
int words = bits > > 5 ;
bits & = 0x1f ;
if ( words ) {
if ( memcmp ( a1 , a2 , words < < 2 ) )
return 0 ;
}
if ( bits ) {
__u32 w1 , w2 ;
__u32 mask ;
w1 = a1 [ words ] ;
w2 = a2 [ words ] ;
mask = htonl ( ( 0xffffffff ) < < ( 32 - bits ) ) ;
if ( ( w1 ^ w2 ) & mask )
return 0 ;
}
return 1 ;
}
static int tcpdiag_bc_run ( const void * bc , int len ,
const struct tcpdiag_entry * entry )
{
while ( len > 0 ) {
int yes = 1 ;
const struct tcpdiag_bc_op * op = bc ;
switch ( op - > code ) {
case TCPDIAG_BC_NOP :
break ;
case TCPDIAG_BC_JMP :
yes = 0 ;
break ;
case TCPDIAG_BC_S_GE :
yes = entry - > sport > = op [ 1 ] . no ;
break ;
case TCPDIAG_BC_S_LE :
yes = entry - > dport < = op [ 1 ] . no ;
break ;
case TCPDIAG_BC_D_GE :
yes = entry - > dport > = op [ 1 ] . no ;
break ;
case TCPDIAG_BC_D_LE :
yes = entry - > dport < = op [ 1 ] . no ;
break ;
case TCPDIAG_BC_AUTO :
yes = ! ( entry - > userlocks & SOCK_BINDPORT_LOCK ) ;
break ;
case TCPDIAG_BC_S_COND :
case TCPDIAG_BC_D_COND :
{
struct tcpdiag_hostcond * cond = ( struct tcpdiag_hostcond * ) ( op + 1 ) ;
u32 * addr ;
if ( cond - > port ! = - 1 & &
cond - > port ! = ( op - > code = = TCPDIAG_BC_S_COND ?
entry - > sport : entry - > dport ) ) {
yes = 0 ;
break ;
}
if ( cond - > prefix_len = = 0 )
break ;
if ( op - > code = = TCPDIAG_BC_S_COND )
addr = entry - > saddr ;
else
addr = entry - > daddr ;
if ( bitstring_match ( addr , cond - > addr , cond - > prefix_len ) )
break ;
if ( entry - > family = = AF_INET6 & &
cond - > family = = AF_INET ) {
if ( addr [ 0 ] = = 0 & & addr [ 1 ] = = 0 & &
addr [ 2 ] = = htonl ( 0xffff ) & &
bitstring_match ( addr + 3 , cond - > addr , cond - > prefix_len ) )
break ;
}
yes = 0 ;
break ;
}
}
if ( yes ) {
len - = op - > yes ;
bc + = op - > yes ;
} else {
len - = op - > no ;
bc + = op - > no ;
}
}
return ( len = = 0 ) ;
}
static int valid_cc ( const void * bc , int len , int cc )
{
while ( len > = 0 ) {
const struct tcpdiag_bc_op * op = bc ;
if ( cc > len )
return 0 ;
if ( cc = = len )
return 1 ;
if ( op - > yes < 4 )
return 0 ;
len - = op - > yes ;
bc + = op - > yes ;
}
return 0 ;
}
static int tcpdiag_bc_audit ( const void * bytecode , int bytecode_len )
{
const unsigned char * bc = bytecode ;
int len = bytecode_len ;
while ( len > 0 ) {
struct tcpdiag_bc_op * op = ( struct tcpdiag_bc_op * ) bc ;
//printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len);
switch ( op - > code ) {
case TCPDIAG_BC_AUTO :
case TCPDIAG_BC_S_COND :
case TCPDIAG_BC_D_COND :
case TCPDIAG_BC_S_GE :
case TCPDIAG_BC_S_LE :
case TCPDIAG_BC_D_GE :
case TCPDIAG_BC_D_LE :
if ( op - > yes < 4 | | op - > yes > len + 4 )
return - EINVAL ;
case TCPDIAG_BC_JMP :
if ( op - > no < 4 | | op - > no > len + 4 )
return - EINVAL ;
if ( op - > no < len & &
! valid_cc ( bytecode , bytecode_len , len - op - > no ) )
return - EINVAL ;
break ;
case TCPDIAG_BC_NOP :
if ( op - > yes < 4 | | op - > yes > len + 4 )
return - EINVAL ;
break ;
default :
return - EINVAL ;
}
bc + = op - > yes ;
len - = op - > yes ;
}
return len = = 0 ? 0 : - EINVAL ;
}
static int tcpdiag_dump_sock ( struct sk_buff * skb , struct sock * sk ,
struct netlink_callback * cb )
{
struct tcpdiagreq * r = NLMSG_DATA ( cb - > nlh ) ;
if ( cb - > nlh - > nlmsg_len > 4 + NLMSG_SPACE ( sizeof ( * r ) ) ) {
struct tcpdiag_entry entry ;
struct rtattr * bc = ( struct rtattr * ) ( r + 1 ) ;
struct inet_sock * inet = inet_sk ( sk ) ;
entry . family = sk - > sk_family ;
# ifdef CONFIG_IP_TCPDIAG_IPV6
if ( entry . family = = AF_INET6 ) {
struct ipv6_pinfo * np = inet6_sk ( sk ) ;
entry . saddr = np - > rcv_saddr . s6_addr32 ;
entry . daddr = np - > daddr . s6_addr32 ;
} else
# endif
{
entry . saddr = & inet - > rcv_saddr ;
entry . daddr = & inet - > daddr ;
}
entry . sport = inet - > num ;
entry . dport = ntohs ( inet - > dport ) ;
entry . userlocks = sk - > sk_userlocks ;
if ( ! tcpdiag_bc_run ( RTA_DATA ( bc ) , RTA_PAYLOAD ( bc ) , & entry ) )
return 0 ;
}
return tcpdiag_fill ( skb , sk , r - > tcpdiag_ext , NETLINK_CB ( cb - > skb ) . pid ,
2005-08-10 12:54:28 +04:00
cb - > nlh - > nlmsg_seq , NLM_F_MULTI , cb - > nlh ) ;
2005-04-17 02:20:36 +04:00
}
static int tcpdiag_fill_req ( struct sk_buff * skb , struct sock * sk ,
2005-06-19 09:47:21 +04:00
struct request_sock * req ,
2005-08-10 12:54:28 +04:00
u32 pid , u32 seq ,
const struct nlmsghdr * unlh )
2005-04-17 02:20:36 +04:00
{
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
const struct inet_request_sock * ireq = inet_rsk ( req ) ;
2005-04-17 02:20:36 +04:00
struct inet_sock * inet = inet_sk ( sk ) ;
unsigned char * b = skb - > tail ;
struct tcpdiagmsg * r ;
struct nlmsghdr * nlh ;
long tmo ;
2005-08-10 12:54:28 +04:00
nlh = NLMSG_PUT ( skb , pid , seq , unlh - > nlmsg_type , sizeof ( * r ) ) ;
2005-04-17 02:20:36 +04:00
nlh - > nlmsg_flags = NLM_F_MULTI ;
r = NLMSG_DATA ( nlh ) ;
r - > tcpdiag_family = sk - > sk_family ;
r - > tcpdiag_state = TCP_SYN_RECV ;
r - > tcpdiag_timer = 1 ;
r - > tcpdiag_retrans = req - > retrans ;
r - > id . tcpdiag_if = sk - > sk_bound_dev_if ;
r - > id . tcpdiag_cookie [ 0 ] = ( u32 ) ( unsigned long ) req ;
r - > id . tcpdiag_cookie [ 1 ] = ( u32 ) ( ( ( unsigned long ) req > > 31 ) > > 1 ) ;
tmo = req - > expires - jiffies ;
if ( tmo < 0 )
tmo = 0 ;
r - > id . tcpdiag_sport = inet - > sport ;
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
r - > id . tcpdiag_dport = ireq - > rmt_port ;
r - > id . tcpdiag_src [ 0 ] = ireq - > loc_addr ;
r - > id . tcpdiag_dst [ 0 ] = ireq - > rmt_addr ;
2005-04-17 02:20:36 +04:00
r - > tcpdiag_expires = jiffies_to_msecs ( tmo ) ,
r - > tcpdiag_rqueue = 0 ;
r - > tcpdiag_wqueue = 0 ;
r - > tcpdiag_uid = sock_i_uid ( sk ) ;
r - > tcpdiag_inode = 0 ;
# ifdef CONFIG_IP_TCPDIAG_IPV6
if ( r - > tcpdiag_family = = AF_INET6 ) {
ipv6_addr_copy ( ( struct in6_addr * ) r - > id . tcpdiag_src ,
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
& tcp6_rsk ( req ) - > loc_addr ) ;
2005-04-17 02:20:36 +04:00
ipv6_addr_copy ( ( struct in6_addr * ) r - > id . tcpdiag_dst ,
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
& tcp6_rsk ( req ) - > rmt_addr ) ;
2005-04-17 02:20:36 +04:00
}
# endif
nlh - > nlmsg_len = skb - > tail - b ;
return skb - > len ;
nlmsg_failure :
skb_trim ( skb , b - skb - > data ) ;
return - 1 ;
}
static int tcpdiag_dump_reqs ( struct sk_buff * skb , struct sock * sk ,
struct netlink_callback * cb )
{
struct tcpdiag_entry entry ;
struct tcpdiagreq * r = NLMSG_DATA ( cb - > nlh ) ;
2005-08-10 07:10:42 +04:00
struct inet_connection_sock * icsk = inet_csk ( sk ) ;
2005-06-19 09:48:55 +04:00
struct listen_sock * lopt ;
2005-04-17 02:20:36 +04:00
struct rtattr * bc = NULL ;
struct inet_sock * inet = inet_sk ( sk ) ;
int j , s_j ;
int reqnum , s_reqnum ;
int err = 0 ;
s_j = cb - > args [ 3 ] ;
s_reqnum = cb - > args [ 4 ] ;
if ( s_j > 0 )
s_j - - ;
entry . family = sk - > sk_family ;
2005-08-10 07:10:42 +04:00
read_lock_bh ( & icsk - > icsk_accept_queue . syn_wait_lock ) ;
2005-04-17 02:20:36 +04:00
2005-08-10 07:10:42 +04:00
lopt = icsk - > icsk_accept_queue . listen_opt ;
2005-04-17 02:20:36 +04:00
if ( ! lopt | | ! lopt - > qlen )
goto out ;
if ( cb - > nlh - > nlmsg_len > 4 + NLMSG_SPACE ( sizeof ( * r ) ) ) {
bc = ( struct rtattr * ) ( r + 1 ) ;
entry . sport = inet - > num ;
entry . userlocks = sk - > sk_userlocks ;
}
2005-08-10 12:54:28 +04:00
for ( j = s_j ; j < lopt - > nr_table_entries ; j + + ) {
2005-06-19 09:47:21 +04:00
struct request_sock * req , * head = lopt - > syn_table [ j ] ;
2005-04-17 02:20:36 +04:00
reqnum = 0 ;
for ( req = head ; req ; reqnum + + , req = req - > dl_next ) {
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
struct inet_request_sock * ireq = inet_rsk ( req ) ;
2005-04-17 02:20:36 +04:00
if ( reqnum < s_reqnum )
continue ;
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
if ( r - > id . tcpdiag_dport ! = ireq - > rmt_port & &
2005-04-17 02:20:36 +04:00
r - > id . tcpdiag_dport )
continue ;
if ( bc ) {
entry . saddr =
# ifdef CONFIG_IP_TCPDIAG_IPV6
( entry . family = = AF_INET6 ) ?
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
tcp6_rsk ( req ) - > loc_addr . s6_addr32 :
2005-04-17 02:20:36 +04:00
# endif
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
& ireq - > loc_addr ;
2005-04-17 02:20:36 +04:00
entry . daddr =
# ifdef CONFIG_IP_TCPDIAG_IPV6
( entry . family = = AF_INET6 ) ?
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
tcp6_rsk ( req ) - > rmt_addr . s6_addr32 :
2005-04-17 02:20:36 +04:00
# endif
[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-19 09:46:52 +04:00
& ireq - > rmt_addr ;
entry . dport = ntohs ( ireq - > rmt_port ) ;
2005-04-17 02:20:36 +04:00
if ( ! tcpdiag_bc_run ( RTA_DATA ( bc ) ,
RTA_PAYLOAD ( bc ) , & entry ) )
continue ;
}
err = tcpdiag_fill_req ( skb , sk , req ,
NETLINK_CB ( cb - > skb ) . pid ,
2005-08-10 12:54:28 +04:00
cb - > nlh - > nlmsg_seq , cb - > nlh ) ;
2005-04-17 02:20:36 +04:00
if ( err < 0 ) {
cb - > args [ 3 ] = j + 1 ;
cb - > args [ 4 ] = reqnum ;
goto out ;
}
}
s_reqnum = 0 ;
}
out :
2005-08-10 07:10:42 +04:00
read_unlock_bh ( & icsk - > icsk_accept_queue . syn_wait_lock ) ;
2005-04-17 02:20:36 +04:00
return err ;
}
static int tcpdiag_dump ( struct sk_buff * skb , struct netlink_callback * cb )
{
int i , num ;
int s_i , s_num ;
struct tcpdiagreq * r = NLMSG_DATA ( cb - > nlh ) ;
2005-08-10 12:54:28 +04:00
struct inet_hashinfo * hashinfo ;
2005-04-17 02:20:36 +04:00
s_i = cb - > args [ 1 ] ;
s_num = num = cb - > args [ 2 ] ;
2005-08-10 12:54:28 +04:00
hashinfo = & tcp_hashinfo ;
# if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE)
if ( cb - > nlh - > nlmsg_type = = DCCPDIAG_GETSOCK )
hashinfo = & dccp_hashinfo ;
# endif
2005-04-17 02:20:36 +04:00
if ( cb - > args [ 0 ] = = 0 ) {
if ( ! ( r - > tcpdiag_states & ( TCPF_LISTEN | TCPF_SYN_RECV ) ) )
goto skip_listen_ht ;
2005-08-10 12:54:28 +04:00
inet_listen_lock ( hashinfo ) ;
2005-08-10 06:59:44 +04:00
for ( i = s_i ; i < INET_LHTABLE_SIZE ; i + + ) {
2005-04-17 02:20:36 +04:00
struct sock * sk ;
struct hlist_node * node ;
num = 0 ;
2005-08-10 12:54:28 +04:00
sk_for_each ( sk , node , & hashinfo - > listening_hash [ i ] ) {
2005-04-17 02:20:36 +04:00
struct inet_sock * inet = inet_sk ( sk ) ;
if ( num < s_num ) {
num + + ;
continue ;
}
if ( r - > id . tcpdiag_sport ! = inet - > sport & &
r - > id . tcpdiag_sport )
goto next_listen ;
if ( ! ( r - > tcpdiag_states & TCPF_LISTEN ) | |
r - > id . tcpdiag_dport | |
cb - > args [ 3 ] > 0 )
goto syn_recv ;
if ( tcpdiag_dump_sock ( skb , sk , cb ) < 0 ) {
2005-08-10 12:54:28 +04:00
inet_listen_unlock ( hashinfo ) ;
2005-04-17 02:20:36 +04:00
goto done ;
}
syn_recv :
if ( ! ( r - > tcpdiag_states & TCPF_SYN_RECV ) )
goto next_listen ;
if ( tcpdiag_dump_reqs ( skb , sk , cb ) < 0 ) {
2005-08-10 12:54:28 +04:00
inet_listen_unlock ( hashinfo ) ;
2005-04-17 02:20:36 +04:00
goto done ;
}
next_listen :
cb - > args [ 3 ] = 0 ;
cb - > args [ 4 ] = 0 ;
+ + num ;
}
s_num = 0 ;
cb - > args [ 3 ] = 0 ;
cb - > args [ 4 ] = 0 ;
}
2005-08-10 12:54:28 +04:00
inet_listen_unlock ( hashinfo ) ;
2005-04-17 02:20:36 +04:00
skip_listen_ht :
cb - > args [ 0 ] = 1 ;
s_i = num = s_num = 0 ;
}
if ( ! ( r - > tcpdiag_states & ~ ( TCPF_LISTEN | TCPF_SYN_RECV ) ) )
return skb - > len ;
2005-08-10 12:54:28 +04:00
for ( i = s_i ; i < hashinfo - > ehash_size ; i + + ) {
struct inet_ehash_bucket * head = & hashinfo - > ehash [ i ] ;
2005-04-17 02:20:36 +04:00
struct sock * sk ;
struct hlist_node * node ;
if ( i > s_i )
s_num = 0 ;
read_lock_bh ( & head - > lock ) ;
num = 0 ;
sk_for_each ( sk , node , & head - > chain ) {
struct inet_sock * inet = inet_sk ( sk ) ;
if ( num < s_num )
goto next_normal ;
if ( ! ( r - > tcpdiag_states & ( 1 < < sk - > sk_state ) ) )
goto next_normal ;
if ( r - > id . tcpdiag_sport ! = inet - > sport & &
r - > id . tcpdiag_sport )
goto next_normal ;
if ( r - > id . tcpdiag_dport ! = inet - > dport & & r - > id . tcpdiag_dport )
goto next_normal ;
if ( tcpdiag_dump_sock ( skb , sk , cb ) < 0 ) {
read_unlock_bh ( & head - > lock ) ;
goto done ;
}
next_normal :
+ + num ;
}
if ( r - > tcpdiag_states & TCPF_TIME_WAIT ) {
sk_for_each ( sk , node ,
2005-08-10 12:54:28 +04:00
& hashinfo - > ehash [ i + hashinfo - > ehash_size ] . chain ) {
2005-04-17 02:20:36 +04:00
struct inet_sock * inet = inet_sk ( sk ) ;
if ( num < s_num )
goto next_dying ;
if ( r - > id . tcpdiag_sport ! = inet - > sport & &
r - > id . tcpdiag_sport )
goto next_dying ;
if ( r - > id . tcpdiag_dport ! = inet - > dport & &
r - > id . tcpdiag_dport )
goto next_dying ;
if ( tcpdiag_dump_sock ( skb , sk , cb ) < 0 ) {
read_unlock_bh ( & head - > lock ) ;
goto done ;
}
next_dying :
+ + num ;
}
}
read_unlock_bh ( & head - > lock ) ;
}
done :
cb - > args [ 1 ] = i ;
cb - > args [ 2 ] = num ;
return skb - > len ;
}
static int tcpdiag_dump_done ( struct netlink_callback * cb )
{
return 0 ;
}
static __inline__ int
tcpdiag_rcv_msg ( struct sk_buff * skb , struct nlmsghdr * nlh )
{
if ( ! ( nlh - > nlmsg_flags & NLM_F_REQUEST ) )
return 0 ;
2005-08-10 12:54:28 +04:00
if ( nlh - > nlmsg_type ! = TCPDIAG_GETSOCK
# if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE)
& & nlh - > nlmsg_type ! = DCCPDIAG_GETSOCK
# endif
)
2005-04-17 02:20:36 +04:00
goto err_inval ;
if ( NLMSG_LENGTH ( sizeof ( struct tcpdiagreq ) ) > skb - > len )
goto err_inval ;
if ( nlh - > nlmsg_flags & NLM_F_DUMP ) {
if ( nlh - > nlmsg_len > 4 + NLMSG_SPACE ( sizeof ( struct tcpdiagreq ) ) ) {
struct rtattr * rta = ( struct rtattr * ) ( NLMSG_DATA ( nlh ) + sizeof ( struct tcpdiagreq ) ) ;
if ( rta - > rta_type ! = TCPDIAG_REQ_BYTECODE | |
rta - > rta_len < 8 | |
rta - > rta_len > nlh - > nlmsg_len - NLMSG_SPACE ( sizeof ( struct tcpdiagreq ) ) )
goto err_inval ;
if ( tcpdiag_bc_audit ( RTA_DATA ( rta ) , RTA_PAYLOAD ( rta ) ) )
goto err_inval ;
}
return netlink_dump_start ( tcpnl , skb , nlh ,
tcpdiag_dump ,
tcpdiag_dump_done ) ;
} else {
return tcpdiag_get_exact ( skb , nlh ) ;
}
err_inval :
return - EINVAL ;
}
static inline void tcpdiag_rcv_skb ( struct sk_buff * skb )
{
int err ;
struct nlmsghdr * nlh ;
if ( skb - > len > = NLMSG_SPACE ( 0 ) ) {
nlh = ( struct nlmsghdr * ) skb - > data ;
if ( nlh - > nlmsg_len < sizeof ( * nlh ) | | skb - > len < nlh - > nlmsg_len )
return ;
err = tcpdiag_rcv_msg ( skb , nlh ) ;
if ( err | | nlh - > nlmsg_flags & NLM_F_ACK )
netlink_ack ( skb , nlh , err ) ;
}
}
static void tcpdiag_rcv ( struct sock * sk , int len )
{
struct sk_buff * skb ;
2005-05-04 01:55:09 +04:00
unsigned int qlen = skb_queue_len ( & sk - > sk_receive_queue ) ;
2005-04-17 02:20:36 +04:00
2005-05-04 01:55:09 +04:00
while ( qlen - - & & ( skb = skb_dequeue ( & sk - > sk_receive_queue ) ) ) {
2005-04-17 02:20:36 +04:00
tcpdiag_rcv_skb ( skb ) ;
kfree_skb ( skb ) ;
}
}
static int __init tcpdiag_init ( void )
{
2005-08-10 06:40:55 +04:00
tcpnl = netlink_kernel_create ( NETLINK_TCPDIAG , tcpdiag_rcv ,
THIS_MODULE ) ;
2005-04-17 02:20:36 +04:00
if ( tcpnl = = NULL )
return - ENOMEM ;
return 0 ;
}
static void __exit tcpdiag_exit ( void )
{
sock_release ( tcpnl - > sk_socket ) ;
}
module_init ( tcpdiag_init ) ;
module_exit ( tcpdiag_exit ) ;
MODULE_LICENSE ( " GPL " ) ;